From 684daffc2726ee2c4c677a025de1e10bcc220643 Mon Sep 17 00:00:00 2001 From: azkanab <azkanabilah@gmail.com> Date: Thu, 11 Apr 2019 18:25:37 +0700 Subject: [PATCH] try --- src/radix_sort_parallel.cu | 12 +++-- src/radixsort_parallel.cu.cpp | 98 +++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 src/radixsort_parallel.cu.cpp diff --git a/src/radix_sort_parallel.cu b/src/radix_sort_parallel.cu index 950f89f..f9d93af 100644 --- a/src/radix_sort_parallel.cu +++ b/src/radix_sort_parallel.cu @@ -3,14 +3,20 @@ #include "radix_sort_parallel.h" __global__ void copyArrayParallel(int *arr, int *output, int n) { - for (int i = 0; i < n; i++) { + int index = threadIdx.x; + int stride = blockDim.x; + + for (int i = index; i < n; i+=stride) { arr[i] = output[i]; } } __global__ void getMaxParallel(int *arr, int *max, int n) { - int maximum = arr[0]; - for (int i = 0; i < n; i++) { + int index = threadIdx.x; + int stride = blockDim.x; + + int maximum = arr[index]; + for (int i = index; i < n; i+=stride) { if (arr[i] > maximum) { maximum = arr[i]; } diff --git a/src/radixsort_parallel.cu.cpp b/src/radixsort_parallel.cu.cpp new file mode 100644 index 0000000..8fc2f94 --- /dev/null +++ b/src/radixsort_parallel.cu.cpp @@ -0,0 +1,98 @@ +#include <cuda.h> +#include <cuda_runtime.h> +#include "radix_sort_parallel.h" + +__global__ void getMax(int *arr, int *max, int n) { + int index = threadIdx.x; + int stride = blockDim.x; + int mx = arr[index]; + + for (int i = index; i < n; i+=stride) + if (arr[i] > mx) + mx = arr[i]; + max[0] = mx; +} + +__global__ void countSort(int arr[], int n, int exp) +{ + int* output = (int*)malloc(n * sizeof(int)); + int i, count[10] = {0}; + + for (i = 0; i < n; i++) + count[ (arr[i]/exp)%10 ]++; + + for (i = 1; i < 10; i++) + count[i] += count[i - 1]; + + for (i = n - 1; i >= 0; i--) + { + output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; + count[ (arr[i]/exp)%10 ]--; + } + + for (i = 0; i < n; i++) + arr[i] = output[i]; + free(output); +} + +void radix_sort(int arr[], int n) +{ + int *max; + int *d_max, *d_arr; + + // Allocate host memory + max = (int*)malloc(1 * sizeof(int)); + + // Allocate device memory + cudaMalloc((void**)&d_max, 1 * sizeof(int)); + cudaMalloc((void**)&d_arr, n * sizeof(int)); + + // Transfer data from host to device + cudaMemcpy(d_arr, arr, n * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(d_max, max, sizeof(int) * 1, cudaMemcpyHostToDevice); + + // Executing kernel + getMax<<1, 500>>(d_arr, d_max, n); + + // Transfer data back to host memory + cudaMemcpy(max, d_max, 1 * sizeof(int), cudaMemcpyDeviceToHost); + + for (int exp = 1; max[0]/exp > 0; exp *= 10) + countSort<<1,1>>(d_arr, n, exp); + + cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost); +} + +void print(int arr[], int n) +{ + for (int i = 0; i < n; i++) + printf("%d: %d\n",i, arr[i]); +} + +void rng(int* arr, int n) { + int seed = 13516013; + srand(seed); + for(long i = 0; i < n; i++) { + arr[i] = (int)rand(); + } +} + +int main(int argc, char *argv[]) { + int N; + if (argc == 2) { + N = strtol(argv[1], NULL, 10); + } else { + printf("ERROR: ./radix_sort <array_length>\n"); + return 1; + } + int arr[N]; + rng(arr,N); + clock_t begin = clock(); + radix_sort(arr, N); + clock_t end = clock(); + double time = (double)(end - begin) * 1000 / CLOCKS_PER_SEC; + print(arr,N); + printf("Executed in %lf ms\n",time); + return 0; +} + -- GitLab