diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..42b6e0cb45d4974c21fec3acb0da112c75042262 --- /dev/null +++ b/Makefile @@ -0,0 +1,26 @@ +CXX := gcc +CUDA := nvcc +OUTPUT_DIR := output +LIB := -lm +EXEC_PARALEL := radixsort_paralel +EXEC_SERIAL := radixsort_serial + +serial-only: + @mkdir -p ${OUTPUT_DIR} + ${CXX} src/radixsort_serial.c -o ${EXEC_SERIAL} + +debug: + @mkdir -p ${OUTPUT_DIR} + ${CUDA} src/radixsort_paralel.cu src/cudaUtil/cudaUtil.cu src/radixSort/radixSort.cu src/util/util.cpp --device-c + ${CUDA} radixsort_paralel.o util.o cudaUtil.o radixSort.o -o bin/radixsort_paralel.out + + +build: + @mkdir -p ${OUTPUT_DIR} + ${CUDA} src/radixsort_paralel.cu -o ${EXEC_PARALEL} + ${CXX} src/radixsort_serial.c -o ${EXEC_SERIAL} + +clean: + @rm -r ${OUTPUT_DIR} || true + @rm ${EXEC_PARALEL} || true + @rm ${EXEC_SERIAL} || true diff --git a/doc/Tugas CUDA.pdf b/doc/Tugas CUDA.pdf deleted file mode 100644 index 632f8439d6a8cd8c4c1269ade74eb9906e15febe..0000000000000000000000000000000000000000 Binary files a/doc/Tugas CUDA.pdf and /dev/null differ diff --git a/run_cuda.sh b/run_cuda.sh new file mode 100644 index 0000000000000000000000000000000000000000..adc6b33b1ab7e8e1fb350d349a3fd54145228fe3 --- /dev/null +++ b/run_cuda.sh @@ -0,0 +1 @@ +./bin/radixsort_paralel.out $1 \ No newline at end of file diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..4d861c7e4ad62f9a39b316785324aadb1f3be408 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,51 @@ + +# Created by https://www.gitignore.io/api/c++,visualstudiocode +# Edit at https://www.gitignore.io/?templates=c++,visualstudiocode + +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history + +# End of https://www.gitignore.io/api/c++,visualstudiocode + diff --git a/src/cudaUtil/cudaUtil.cu b/src/cudaUtil/cudaUtil.cu index 430b9e20846cc74da909e16769a1ed3c0cde63af..b9acee97f159eb2d8daa9238a704dd71bcf2a93f 100644 --- a/src/cudaUtil/cudaUtil.cu +++ b/src/cudaUtil/cudaUtil.cu @@ -1,34 +1,11 @@ #include "cudaUtil.cuh" #include <stdio.h> -__device__ void getMax(int* arr, int n, int* maxBitIdx){ - int myId = threadIdx.x + blockDim.x * blockIdx.x; - int tId = threadIdx.x; - for (unsigned int s = blockDim.x/2; s>0; s>>=1){ - if (tId < s){ - if (arr[myId] < arr[myId + s]){ - arr[myId] = arr[myId+s]; - } - } - __syncthreads(); - } - - if (tId == 0){ - int currentNum = arr[myId]; - int bitIndex = 0; - while(currentNum > 0){ - currentNum >>=1; - bitIndex +=1; - } - *maxBitIdx = bitIndex; - } -} -__global__ void printArr(int* arr, int n){ - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i<n; i+=stride){ +void printArr(int* arr, int n){ + for (int i = 0; i<n; i++){ printf("%d ", arr[i]); } + printf("\n"); } diff --git a/src/cudaUtil/cudaUtil.cuh b/src/cudaUtil/cudaUtil.cuh index 69ce35c6f46f0c0297960ecfa0b5bd76578eb70f..b6ced14db7a23039e6f189b8d28c885097f54679 100644 --- a/src/cudaUtil/cudaUtil.cuh +++ b/src/cudaUtil/cudaUtil.cuh @@ -1,7 +1,6 @@ #ifndef CUDA_UTIL_CUH #define CUDA_UTIL_CUH -__device__ void getMax(int* arr, int n, int* out); -__global__ void printArr(int* arr, int n); +void printArr(int* arr, int n); #endif \ No newline at end of file diff --git a/src/main.cu b/src/main.cu deleted file mode 100644 index f8181a4a627a95886f52c8c70af894b5e5e34904..0000000000000000000000000000000000000000 --- a/src/main.cu +++ /dev/null @@ -1,41 +0,0 @@ -#include <stdio.h> -#include "util/util.hpp" -#include "cudaUtil/cudaUtil.cuh" -#include "radixSort/radixSort.cuh" - -__global__ void cuda_hello(){ - printf("Hello World from GPU!\n"); -} - -int main(int argc, char *argv[]) { - - if (argc != 2){ - fprintf(stderr, "Usage: <number of elements>\n"); - exit(1); - } - - int n = atoi(argv[1]); - - int* arr = (int *) malloc(sizeof(int) * n); - - int* d_arr; - cudaMalloc((void**)&d_arr, sizeof(int) * n); - - int* d_out; - cudaMalloc((void**)&d_out, sizeof(int) * n); - - generate_random(arr, n); - - cudaMemcpy(d_arr, arr, sizeof(float) * n, cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - - radixSort<<<1,256>>>(d_out, d_arr, n); - cudaDeviceSynchronize(); - - cuda_hello<<<1,1>>>(); - cudaDeviceSynchronize(); - - cudaFree(d_arr); - - return 0; -} diff --git a/src/radixSort/radixSort.cu b/src/radixSort/radixSort.cu index b539d5dbb22b591cfe72b92c425e1e223c6a3849..0bc3d16a66c0aa480a380535cf9a339d0064de04 100644 --- a/src/radixSort/radixSort.cu +++ b/src/radixSort/radixSort.cu @@ -1,15 +1,54 @@ #include "radixSort.cuh" #include "../cudaUtil/cudaUtil.cuh" #include <stdio.h> +using namespace std; +#define WSIZE 32 +__device__ int* d_arr; -__global__ -void radixSort(int* out, int* arr, int n){ - int* maxBitIdx = (int*) malloc(sizeof(int)); - - if (threadIdx.x == 0){ - getMax(arr,n,maxBitIdx); - printf("%d\n", *maxBitIdx); +__device__ void print_arr(int* arr, int n){ + for (int i = 0; i<n; i++){ + printf("%d ", arr[i]); + } + printf("\n"); } - __syncthreads(); + +__global__ void countSort(int arr[], int n, int exp) +{ + __shared__ int *output; // output d_array + int num_div_per_thread = n / WSIZE; + int i; + __shared__ int count[10]; + if(threadIdx.x == 0){ + output = (int*) malloc(sizeof(int) * n); + for(int j = 0; j<10;j++){ + count[j] = 0; + } + } + // Store count of occurrences in count[] + for (i = threadIdx.x * num_div_per_thread; i < (threadIdx.x + 1) * num_div_per_thread; i++) + count[ (arr[i]/exp)%10 ]++; + + // Change count[i] so that count[i] now contains actual + // position of this digit in output[] + __syncthreads(); + if(threadIdx.x == 0){ + for (i = 1; i < 10; i++) + count[i] += count[i - 1]; + } + + // Build the output d_array + for (i = n - 1; i >= 0; i--) + { + output[count[ (d_arr[i]/exp)%10 ] - 1] = d_arr[i]; + count[ (d_arr[i]/exp)%10 ]--; + } + + // Copy the output d_array to d_arr[], so that d_arr[] now + // contains sorted numbers according to current digit + for (i = threadIdx.x * num_div_per_thread; i < (threadIdx.x + 1) * num_div_per_thread; i++) + arr[i] = output[i]; + print_arr(arr,n); + __syncthreads(); +} + -} diff --git a/src/radixSort/radixSort.cuh b/src/radixSort/radixSort.cuh index 2814cd8ca70a9a1b7b4a97e4275af0c27cd86123..e6aec1bf9ce77e57e1865efe67178c5e94e36ec8 100644 --- a/src/radixSort/radixSort.cuh +++ b/src/radixSort/radixSort.cuh @@ -1,7 +1,7 @@ #ifndef RADIX_SORT_CUH #define RADIX_SORT_CUH -__global__ void radixSort(int* out, int* arr, int n); +__global__ void countSort(int arr[], int n, int exp); __device__ void sortMSB(int* out, int* arr, int n); #endif \ No newline at end of file diff --git a/src/radixsort_paralel.cu b/src/radixsort_paralel.cu new file mode 100644 index 0000000000000000000000000000000000000000..f6872cc0b7af3ac3145375aba82ae43e070647c3 --- /dev/null +++ b/src/radixsort_paralel.cu @@ -0,0 +1,84 @@ +#include <stdio.h> +#include "util/util.hpp" +#include "cudaUtil/cudaUtil.cuh" +#include "radixSort/radixSort.cuh" + +#define WSIZE 32 + +void count_sort_paralel_first(int arr[],int count[], int n, int exp) +{ + int *output = (int*) malloc (sizeof(int) * n); // output array + int i,displacement[10] = {0}; + + // Store count of occurrences in count[] + for (i = 0; i < n; i++) { + count[ (arr[i]/exp)%10 ]++; + displacement[ (arr[i]/exp)%10 ]++; + } + + // Change count[i] so that count[i] now contains actual + // position of this digit in output[] + for (i = 1; i < 10; i++) + displacement[i] += displacement[i - 1]; + + // Build the output array + for (i = n - 1; i >= 0; i--) + { + output[displacement[ (arr[i]/exp)%10 ] - 1] = arr[i]; + displacement[ (arr[i]/exp)%10 ]--; + } + + // Copy the output array to arr[], so that arr[] now + // contains sorted numbers according to current digit + for (i = 0; i < n; i++) + arr[i] = output[i]; +} +int getMax(int* arr, int n){ + int mx = arr[0]; + for (int i = 1; i < n; i++) + if (arr[i] > mx) + mx = arr[i]; + return mx; +} + + +int main(int argc, char *argv[]) { + + if (argc != 2){ + fprintf(stderr, "Usage: <number of elements>\n"); + exit(1); + } + + int n = atoi(argv[1]); + + int* arr = (int *) malloc(sizeof(int) * n); + + int* d_arr; + cudaMalloc((void**)&d_arr, sizeof(int) * n); + + // int* d_out; + // cudaMalloc((void**)&d_out, sizeof(int) * n); + + generate_random(arr, n); + + printArr(arr,n); + + int m = getMax(arr,n); + cudaMemcpyToSymbol(d_arr, arr, sizeof(float) * n); + for (int exp = 1; m/exp > 0; exp *= 10) + countSort<<<1,WSIZE>>>(arr, n, exp); + cudaDeviceSynchronize(); + + + + + + cudaMemcpyFromSymbol(arr, d_arr, sizeof(float) * n); + cudaDeviceSynchronize(); + printArr(arr,n); + + + cudaFree(d_arr); + + return 0; +} diff --git a/src/radixsort_serial.c b/src/radixsort_serial.c new file mode 100644 index 0000000000000000000000000000000000000000..a95dac07e271ce214a38719b97061029a999f21a --- /dev/null +++ b/src/radixsort_serial.c @@ -0,0 +1,134 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#define SEED 13516019 +#define MAX_DIGIT 128 +#define OUTPUT_FILE "output/output_serial.txt" + +// A function to randomize arr with size n using seed +void randomizeArray(int arr[], int n) { + int seed = SEED; // Ganti dengan NIM anda sebagai seed. + srand(seed); + for(long i = 0; i < n; i++) { + arr[i] = (int)rand() % 999999; + } +} + + +int getMax(int arr[], int n) +{ + int mx = arr[0]; + for (int i = 1; i < n; i++) + if (arr[i] > mx) + mx = arr[i]; + return mx; +} + +// A function to do counting sort of arr[] according to +// the digit represented by exp. +void countSort(int arr[], int n, int exp) +{ + int* output = (int*) malloc(sizeof(int) * n); // output array + int i, count[10] = {0}; + + // Store count of occurrences in count[] + for (i = 0; i < n; i++) { + count[ (arr[i]/exp)%10 ]++; + } + + // Change count[i] so that count[i] now contains actual + // position of this digit in output[] + for (i = 1; i < 10; i++) + count[i] += count[i - 1]; + + // Build the output array + for (i = n - 1; i >= 0; i--) + { + output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; + count[ (arr[i]/exp)%10 ]--; + } + + // Copy the output array to arr[], so that arr[] now + // contains sorted numbers according to current digit + for (i = 0; i < n; i++) + arr[i] = output[i]; + + free(output); +} + +// The main function to that sorts arr[] of size n using +// Radix Sort +void radix_sort(int arr[], int n) +{ + // Find the maximum number to know number of digits + int m = getMax(arr, n); + + // Do counting sort for every digit. Note that instead + // of passing digit number, exp is passed. exp is 10^i + // where i is current digit number + for (int exp = 1; m/exp > 0; exp *= 10) + countSort(arr, n, exp); +} + +// A utility function to print an array +void print(int arr[], int n) +{ + for (int i = 0; i < n; i++) + printf("%d ", arr[i]); +} +// A utility function to write array in a output file +void printToFile(int arr[], int n, char name[50]) { + FILE* fOut = fopen(name, "w"); + char temp[MAX_DIGIT]; + + if (fOut != NULL) { + sprintf(temp, "Array size: %d\n", n); + fputs(temp, fOut); + for (int i = 0; i < n; i++) { + sprintf(temp, "%d ", arr[i]); + fputs(temp, fOut); + } + } else { + printf("Unable to write file"); + } + + fclose(fOut); +} + +// A utility function to get different of two time in nanosecond +long getTimeDiff(struct timeval start, struct timeval end) { + long result = (long) end.tv_sec * 1000000 + end.tv_usec - \ + (long) start.tv_sec * 1000000 + start.tv_usec; + + return result; +} + +// Main program to implement RadixSort +int main(int argc, char** argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: <number of elements>\n"); + exit(1); + } + + int n = atoi(argv[1]); + struct timeval start, end; + int* arr; + + arr = (int*) malloc(sizeof(int) * n); + randomizeArray(arr, n); + + // Calculate the time and execute the radixsort + gettimeofday(&start, NULL); + radix_sort(arr, n); + gettimeofday(&end, NULL); + + printf("\n"); + printf("Execution time (microseconds) - Serial: %ld \xE6s.\n", getTimeDiff(start, end)); + + printToFile(arr, n, OUTPUT_FILE); + + return 0; +} \ No newline at end of file diff --git a/src/util/util.cpp b/src/util/util.cpp index ac95ea1e111ed5e38d1a08f12141171b0474fd8a..c1cc0d51655d7ca5a5ed5a611bf5fd5a2f108f1c 100644 --- a/src/util/util.cpp +++ b/src/util/util.cpp @@ -8,19 +8,11 @@ void generate_random(int *arr, int n){ srand(seed); for (long i = 0; i < n; i++) { - arr[i] = (int)rand(); + arr[i] = (int)rand()%100; } } -void print_arr(int *array, int num_elements){ - { - for (int i = 0; i < num_elements; i++) - { - printf("%d ", array[i]); - } - printf("\n"); - } -} + int get_max(int arr[], int n){ int max=INT_MIN; diff --git a/src/util/util.hpp b/src/util/util.hpp index 114c8ee784caec926f88d6aedaa5a8d644415847..97b27dc1da836708425edb77a3d1742c4f84183c 100644 --- a/src/util/util.hpp +++ b/src/util/util.hpp @@ -2,7 +2,7 @@ #define UTIL_HPP void generate_random(int* arr, int n); -void print_arr(int * array, int num_elements); +// void print_arr(int * array, int num_elements); void assert_sorted(int arr[], int n, double time_elapsed); #endif \ No newline at end of file