diff --git a/makefile b/makefile new file mode 100644 index 0000000000000000000000000000000000000000..7d35f963f16c73bf9416ab2b1701f7f420f01e98 --- /dev/null +++ b/makefile @@ -0,0 +1,6 @@ +CP = parallel_radix.cu +EXE = radix +SRC_DIR = src +LDLIBS += -lcurand +build: + nvcc -o $(EXE) $(SRC_DIR)/$(CP) $(LDLIBS) diff --git a/src/parallel_radix.cu b/src/parallel_radix.cu new file mode 100644 index 0000000000000000000000000000000000000000..9e7a15963f48ecbf640e39fbe06bc4eb4a1cc247 --- /dev/null +++ b/src/parallel_radix.cu @@ -0,0 +1,125 @@ +#include <iostream> +#include <fstream> +#include <numeric> +#include "cuda_runtime.h" +#include "cuda.h" +#include "curand_kernel.h" +#include "device_launch_parameters.h" + +#define ARRAY_SIZE 100000 +#define MAX_BLOCK_SIZE 1024 +#define RNG_SEED 13516120 +#define BASE 10 + + +using namespace std; + +void printArray(unsigned int * arr, int n) { + for (int i = 0; i < n; i++) + cout << arr[i] << " "; + cout << endl; +} + +__global__ void get_max(unsigned int * input, int n) +{ + const int tid = blockDim.x * blockIdx.x + threadIdx.x; + auto step_size = 1; + int number_of_threads = n / 2; + while (number_of_threads > 0) + { + if (tid < number_of_threads) // still alive? + { + const auto fst = tid * step_size * 2; + const auto snd = fst + step_size; + if(input[fst] < input[snd]){ + input[fst] = input[snd]; + } + } + step_size <<= 1; + number_of_threads >>= 1; + } + __syncthreads(); +} + +__global__ void count_occurences(unsigned int * arr, int n, int * count, unsigned long long exp){ + const int tid = blockDim.x * blockIdx.x + threadIdx.x; + int digit; + if(tid < n){ + digit = (arr[tid] / exp) % BASE; + atomicAdd(&count[digit], 1); + } +} +int main(int argc, char *argv[]) +{ + curandGenerator_t curand_gen; + cudaEvent_t start, stop; + float time; + cudaEventCreate(&start); + cudaEventCreate(&stop); + curandCreateGenerator(&curand_gen, CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(curand_gen, RNG_SEED); + const long count = ARRAY_SIZE; + unsigned int deviceArr[count]; + unsigned int * darr; + int grid, block; + cudaMalloc(&darr, count * sizeof(unsigned int)); + + + curandGenerate(curand_gen, darr, count); + cudaDeviceSynchronize(); + cudaMemcpy(deviceArr, darr, count * sizeof(int), cudaMemcpyDeviceToHost); + //find max + if(count > MAX_BLOCK_SIZE){ + grid = count / (MAX_BLOCK_SIZE); + } + else{ + grid = 1; + } + block = MAX_BLOCK_SIZE / 2; + get_max <<<grid, block >>>(darr, count); + unsigned int global_max; + cudaMemcpy(&global_max, darr, sizeof(int), cudaMemcpyDeviceToHost); + + + + cudaEventRecord(start, 0); + cudaMemcpy(darr, deviceArr, count * sizeof(unsigned int), cudaMemcpyHostToDevice); + block *= 2; + for (unsigned long long exp = 1; global_max / exp > 0; exp *= BASE) { + unsigned int output[count]; + int hcount[BASE] = {0}; + int * dcount; + cudaMalloc(&dcount, sizeof(int) * BASE); + cudaMemset(dcount, 0, sizeof(int) * BASE); + count_occurences <<<grid, block>>>(darr, count, dcount, exp); + cudaMemcpy(hcount, dcount, BASE * sizeof(int), cudaMemcpyDeviceToHost); + for (int i = 1; i < BASE; i++) { + hcount[i] += hcount[i - 1]; + } + for (int i = count - 1; i >= 0; i--) { + output[hcount[ (deviceArr[i] / exp) % BASE ] - 1] = deviceArr[i]; + hcount[ (deviceArr[i] / exp) % BASE ]--; + } + cudaMemcpy(darr, output, sizeof(unsigned int) * count, cudaMemcpyHostToDevice); + memcpy(deviceArr, output, sizeof(unsigned int) * count); + cudaFree(dcount); + cudaDeviceSynchronize(); + } + + cudaMemcpy(deviceArr, darr, count * sizeof(unsigned int), cudaMemcpyDeviceToHost); + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&time, start, stop); + ofstream out("test/output", std::ios::out | std::ios::trunc); + for(int k = 0; k < count; k ++){ + out << deviceArr[k] << endl ; + } + + out.close(); + cout << "Time = " << time * 1000 << " ms" << endl; + cudaFree(darr); + curandDestroyGenerator(curand_gen); + cudaEventDestroy(start); + cudaEventDestroy(stop); + return 0; +} \ No newline at end of file diff --git a/test/README.md b/test/README.md index fcb37d95caa74bc95d1cb01dea18a5eacff5d72e..d10bbd8e7ae1b07d5b8f6414fe498845c0d42a7e 100644 --- a/test/README.md +++ b/test/README.md @@ -1,3 +1,15 @@ -# Announcement +# Radix_Sort -Please place your sample input and output here. \ No newline at end of file +Sebuah program serial dan paralel yang berfungsi untuk mengurutkan angka dengan metode Radix + +## Petunjuk Penggunaan Program + +Untuk menjalankan program, +>download program ++>Kemudian buka terminal dan compile program dengan + +make + +### Pembagian Kerja + +Dias Akbar Nugraha mengerjakan parallel program, makefile +Nuha Adinata mengerjakan parallel program, README \ No newline at end of file