From e4d05172b37a880504d85b93eed58dfb08624548 Mon Sep 17 00:00:00 2001 From: Ayrton Cyril <13516019@std.stei.itb.ac.id> Date: Thu, 11 Apr 2019 18:25:35 +0700 Subject: [PATCH] done --- Makefile | 9 ++-- run_cuda.sh | 3 +- src/radixSort/radixSort.cu | 99 +++++++++++++++++++++++-------------- src/radixSort/radixSort.cuh | 2 +- src/radixsort_paralel.cu | 60 +++------------------- src/radixsort_serial.c | 60 ++++++---------------- src/util/util.cpp | 7 ++- src/util/util.hpp | 3 +- 8 files changed, 99 insertions(+), 144 deletions(-) diff --git a/Makefile b/Makefile index 42b6e0c..ffb4e92 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,6 @@ LIB := -lm EXEC_PARALEL := radixsort_paralel EXEC_SERIAL := radixsort_serial -serial-only: - @mkdir -p ${OUTPUT_DIR} - ${CXX} src/radixsort_serial.c -o ${EXEC_SERIAL} - debug: @mkdir -p ${OUTPUT_DIR} ${CUDA} src/radixsort_paralel.cu src/cudaUtil/cudaUtil.cu src/radixSort/radixSort.cu src/util/util.cpp --device-c @@ -17,8 +13,9 @@ debug: build: @mkdir -p ${OUTPUT_DIR} - ${CUDA} src/radixsort_paralel.cu -o ${EXEC_PARALEL} - ${CXX} src/radixsort_serial.c -o ${EXEC_SERIAL} + ${CUDA} src/radixsort_paralel.cu src/cudaUtil/cudaUtil.cu src/radixSort/radixSort.cu src/util/util.cpp --device-c + ${CUDA} radixsort_paralel.o util.o cudaUtil.o radixSort.o -o bin/radixsort_paralel.out + ${CXX} src/util/util.cpp src/radixsort_serial.c -o ${EXEC_SERIAL} clean: @rm -r ${OUTPUT_DIR} || true diff --git a/run_cuda.sh b/run_cuda.sh index adc6b33..b0d0937 100644 --- a/run_cuda.sh +++ b/run_cuda.sh @@ -1 +1,2 @@ -./bin/radixsort_paralel.out $1 \ No newline at end of file +./bin/radixsort_paralel.out $1 +./radixsort_serial $1 \ No newline at end of file diff --git a/src/radixSort/radixSort.cu b/src/radixSort/radixSort.cu index 0bc3d16..9a5182a 100644 --- a/src/radixSort/radixSort.cu +++ b/src/radixSort/radixSort.cu @@ -1,54 +1,81 @@ #include "radixSort.cuh" #include "../cudaUtil/cudaUtil.cuh" +#include "../util/util.hpp" #include <stdio.h> using namespace std; #define WSIZE 32 -__device__ int* d_arr; -__device__ void print_arr(int* arr, int n){ - for (int i = 0; i<n; i++){ - printf("%d ", arr[i]); - } - printf("\n"); - } +__global__ void createBucket(int arr[], int count[], int n, int exp){ + int stride = blockDim.x * gridDim.x; + int pointer = blockDim.x * blockIdx.x + threadIdx.x; + for (int i = pointer; i < n; i += stride) { + atomicAdd(&count[ (arr[i]/exp)%10 ], 1); + } +} -__global__ void countSort(int arr[], int n, int exp) -{ - __shared__ int *output; // output d_array - int num_div_per_thread = n / WSIZE; - int i; - __shared__ int count[10]; - if(threadIdx.x == 0){ - output = (int*) malloc(sizeof(int) * n); - for(int j = 0; j<10;j++){ - count[j] = 0; - } - } +__global__ void copyToArray(int arr[], int output[], int n){ + int stride = blockDim.x * gridDim.x; + int pointer = blockDim.x * blockIdx.x + threadIdx.x; + for (int i = pointer; i < n; i += stride) { + arr[i] = output[i]; + __syncthreads(); + } +} + +void countSort(int arr[], int n, int exp) { + dim3 gridSize(20,1,1); + dim3 blockSize(20,1,1); + int *d_output, *output = (int*) malloc(sizeof(int) * n); + int *d_arr, *d_count; + int i, count[10] = {0}; + + cudaMalloc((void **) &d_count, sizeof(int) * 10); + cudaMalloc((void **) &d_arr, sizeof(int) * n); + cudaMalloc((void **) &d_output, sizeof(int) * n); + + cudaMemcpy(d_arr, arr, sizeof(int) * n, cudaMemcpyHostToDevice); + cudaMemcpy(d_count, count, sizeof(int) * 10, cudaMemcpyHostToDevice); // Store count of occurrences in count[] - for (i = threadIdx.x * num_div_per_thread; i < (threadIdx.x + 1) * num_div_per_thread; i++) - count[ (arr[i]/exp)%10 ]++; + createBucket<<<gridSize, blockSize>>>(d_arr, d_count, n, exp); + cudaDeviceSynchronize(); + cudaMemcpy(count, d_count, sizeof(int) * 10, cudaMemcpyDeviceToHost); // Change count[i] so that count[i] now contains actual - // position of this digit in output[] - __syncthreads(); - if(threadIdx.x == 0){ - for (i = 1; i < 10; i++) - count[i] += count[i - 1]; - } - - // Build the output d_array + // position of this digit in output[] + for (i = 1; i < 10; i++) + count[i] += count[i - 1]; + + // Build the output array for (i = n - 1; i >= 0; i--) { - output[count[ (d_arr[i]/exp)%10 ] - 1] = d_arr[i]; - count[ (d_arr[i]/exp)%10 ]--; + output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; + count[ (arr[i]/exp)%10 ]--; } - // Copy the output d_array to d_arr[], so that d_arr[] now + // Copy the output array to arr[], so that arr[] now // contains sorted numbers according to current digit - for (i = threadIdx.x * num_div_per_thread; i < (threadIdx.x + 1) * num_div_per_thread; i++) - arr[i] = output[i]; - print_arr(arr,n); - __syncthreads(); + cudaMemcpy(d_output, output, sizeof(int) * n, cudaMemcpyHostToDevice); + copyToArray<<<gridSize, blockSize>>>(d_arr, d_output, n); + cudaDeviceSynchronize(); + cudaMemcpy(arr, d_arr, sizeof(int) * n, cudaMemcpyDeviceToHost); + + cudaFree(d_count); + cudaFree(d_output); + cudaFree(d_arr); + free(output); +} + +void radix_sort(int arr[], int n) +{ + // Find the maximum number to know number of digits + int m = get_max(arr, n); + + // Do counting sort for every digit. Note that instead + // of passing digit number, exp is passed. exp is BASE^i + // where i is current digit number + for (int exp = 1; m/exp > 0; exp *= 10) { + countSort(arr, n, exp); + } } diff --git a/src/radixSort/radixSort.cuh b/src/radixSort/radixSort.cuh index e6aec1b..1b73c9c 100644 --- a/src/radixSort/radixSort.cuh +++ b/src/radixSort/radixSort.cuh @@ -1,7 +1,7 @@ #ifndef RADIX_SORT_CUH #define RADIX_SORT_CUH -__global__ void countSort(int arr[], int n, int exp); +void radix_sort(int arr[], int n); __device__ void sortMSB(int* out, int* arr, int n); #endif \ No newline at end of file diff --git a/src/radixsort_paralel.cu b/src/radixsort_paralel.cu index f6872cc..8520fc3 100644 --- a/src/radixsort_paralel.cu +++ b/src/radixsort_paralel.cu @@ -2,44 +2,9 @@ #include "util/util.hpp" #include "cudaUtil/cudaUtil.cuh" #include "radixSort/radixSort.cuh" - +#include <sys/time.h> #define WSIZE 32 -void count_sort_paralel_first(int arr[],int count[], int n, int exp) -{ - int *output = (int*) malloc (sizeof(int) * n); // output array - int i,displacement[10] = {0}; - - // Store count of occurrences in count[] - for (i = 0; i < n; i++) { - count[ (arr[i]/exp)%10 ]++; - displacement[ (arr[i]/exp)%10 ]++; - } - - // Change count[i] so that count[i] now contains actual - // position of this digit in output[] - for (i = 1; i < 10; i++) - displacement[i] += displacement[i - 1]; - - // Build the output array - for (i = n - 1; i >= 0; i--) - { - output[displacement[ (arr[i]/exp)%10 ] - 1] = arr[i]; - displacement[ (arr[i]/exp)%10 ]--; - } - - // Copy the output array to arr[], so that arr[] now - // contains sorted numbers according to current digit - for (i = 0; i < n; i++) - arr[i] = output[i]; -} -int getMax(int* arr, int n){ - int mx = arr[0]; - for (int i = 1; i < n; i++) - if (arr[i] > mx) - mx = arr[i]; - return mx; -} int main(int argc, char *argv[]) { @@ -50,7 +15,7 @@ int main(int argc, char *argv[]) { } int n = atoi(argv[1]); - + struct timeval start, end; int* arr = (int *) malloc(sizeof(int) * n); int* d_arr; @@ -60,25 +25,14 @@ int main(int argc, char *argv[]) { // cudaMalloc((void**)&d_out, sizeof(int) * n); generate_random(arr, n); - - printArr(arr,n); - - int m = getMax(arr,n); - cudaMemcpyToSymbol(d_arr, arr, sizeof(float) * n); - for (int exp = 1; m/exp > 0; exp *= 10) - countSort<<<1,WSIZE>>>(arr, n, exp); - cudaDeviceSynchronize(); - - - - + gettimeofday(&start, NULL); + radix_sort(arr, n); + gettimeofday(&end, NULL); - cudaMemcpyFromSymbol(arr, d_arr, sizeof(float) * n); - cudaDeviceSynchronize(); - printArr(arr,n); + printf("\n"); + printf("Parallel execution time = %ld.\n", getTimeDiff(start, end)); - cudaFree(d_arr); return 0; } diff --git a/src/radixsort_serial.c b/src/radixsort_serial.c index a95dac0..ac2d806 100644 --- a/src/radixsort_serial.c +++ b/src/radixsort_serial.c @@ -1,22 +1,19 @@ -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <time.h> -#define SEED 13516019 -#define MAX_DIGIT 128 -#define OUTPUT_FILE "output/output_serial.txt" -// A function to randomize arr with size n using seed -void randomizeArray(int arr[], int n) { - int seed = SEED; // Ganti dengan NIM anda sebagai seed. - srand(seed); - for(long i = 0; i < n; i++) { - arr[i] = (int)rand() % 999999; - } -} +#include <stdio.h> +#include <stdlib.h> +#include "util/util.hpp" +#include <sys/time.h> +void generate_random(int *arr, int n){ + int seed = 13516016; + srand(seed); + for (long i = 0; i < n; i++) + { + arr[i] = (int)rand(); + } +} int getMax(int arr[], int n) { int mx = arr[0]; @@ -72,32 +69,6 @@ void radix_sort(int arr[], int n) countSort(arr, n, exp); } -// A utility function to print an array -void print(int arr[], int n) -{ - for (int i = 0; i < n; i++) - printf("%d ", arr[i]); -} -// A utility function to write array in a output file -void printToFile(int arr[], int n, char name[50]) { - FILE* fOut = fopen(name, "w"); - char temp[MAX_DIGIT]; - - if (fOut != NULL) { - sprintf(temp, "Array size: %d\n", n); - fputs(temp, fOut); - for (int i = 0; i < n; i++) { - sprintf(temp, "%d ", arr[i]); - fputs(temp, fOut); - } - } else { - printf("Unable to write file"); - } - - fclose(fOut); -} - -// A utility function to get different of two time in nanosecond long getTimeDiff(struct timeval start, struct timeval end) { long result = (long) end.tv_sec * 1000000 + end.tv_usec - \ (long) start.tv_sec * 1000000 + start.tv_usec; @@ -105,6 +76,7 @@ long getTimeDiff(struct timeval start, struct timeval end) { return result; } + // Main program to implement RadixSort int main(int argc, char** argv) { @@ -118,7 +90,7 @@ int main(int argc, char** argv) int* arr; arr = (int*) malloc(sizeof(int) * n); - randomizeArray(arr, n); + generate_random(arr, n); // Calculate the time and execute the radixsort gettimeofday(&start, NULL); @@ -126,9 +98,7 @@ int main(int argc, char** argv) gettimeofday(&end, NULL); printf("\n"); - printf("Execution time (microseconds) - Serial: %ld \xE6s.\n", getTimeDiff(start, end)); - - printToFile(arr, n, OUTPUT_FILE); + printf("Serial execution time = %ld.\n", getTimeDiff(start, end)); return 0; } \ No newline at end of file diff --git a/src/util/util.cpp b/src/util/util.cpp index c1cc0d5..d43dbb1 100644 --- a/src/util/util.cpp +++ b/src/util/util.cpp @@ -8,12 +8,17 @@ void generate_random(int *arr, int n){ srand(seed); for (long i = 0; i < n; i++) { - arr[i] = (int)rand()%100; + arr[i] = (int)rand(); } } +long getTimeDiff(struct timeval start, struct timeval end) { + long result = (long) end.tv_sec * 1000000 + end.tv_usec - \ + (long) start.tv_sec * 1000000 + start.tv_usec; + return result; +} int get_max(int arr[], int n){ int max=INT_MIN; for (int i=0; i<n; i++){ diff --git a/src/util/util.hpp b/src/util/util.hpp index 97b27dc..db69ab8 100644 --- a/src/util/util.hpp +++ b/src/util/util.hpp @@ -4,5 +4,6 @@ void generate_random(int* arr, int n); // void print_arr(int * array, int num_elements); void assert_sorted(int arr[], int n, double time_elapsed); - +int get_max(int arr[], int n); +long getTimeDiff(struct timeval start, struct timeval end); #endif \ No newline at end of file -- GitLab