diff --git a/src/radix_sort_parallel.cu b/src/radix_sort_parallel.cu index ffa62952d4481ed186b64c4aa1d796ab7970d691..7341405c9c97e0ac1521d5e88454f7850314f40d 100644 --- a/src/radix_sort_parallel.cu +++ b/src/radix_sort_parallel.cu @@ -3,20 +3,14 @@ #include "radix_sort_parallel.h" __global__ void copyArrayParallel(int *arr, int *output, int n) { - int index = threadIdx.x; - int stride = blockDim.x; - - for (int i = index+stride; i < n; i+=stride) { + for (int i = 0; i < n; i++) { arr[i] = output[i]; } } __global__ void getMaxParallel(int *arr, int *max, int n) { - int index = threadIdx.x; - int stride = blockDim.x; - - int maximum = arr[index]; - for (int i = index+stride; i < n; i+=stride) { + int maximum = arr[0]; + for (int i = 0; i < n; i++) { if (arr[i] > maximum) { maximum = arr[i]; } diff --git a/src/radix_sort_parallel.h b/src/radix_sort_parallel.h index 76c36e31a7ae88ddbe6c3829a242b893aeb72d8f..b2477e55f5cb5350db3a7bf1605b94b9c5de8cc8 100644 --- a/src/radix_sort_parallel.h +++ b/src/radix_sort_parallel.h @@ -10,5 +10,4 @@ void rng(int* arr, int n); void radix_sort(int arr[], int n); -void print(int arr[], int n) ; -void countSort(int arr[], int n, int exp); \ No newline at end of file +void print(int arr[], int n) ; \ No newline at end of file diff --git a/src/radixsort_parallel.cu b/src/radixsort_parallel.cu index 00f142434afa356d3a17eebb506eaf92f47e9168..2dabb613902093ba92c1e9c866c66619525b3ac3 100644 --- a/src/radixsort_parallel.cu +++ b/src/radixsort_parallel.cu @@ -13,20 +13,10 @@ __global__ void getMax(int *arr, int *max, int n) { max[0] = mx; } -__global__ void copyArrayParallel(int *arr, int *output, int n) { - for (int i = 0; i < n; i++) { - arr[i] = output[i]; - } -} - -void countSort(int arr[], int n, int exp) { - int *output; - int *d_output, *d_arr; +__global__ void countSort(int *arr, int n, int exp) { + int* output = (int*)malloc(n * sizeof(int)); int i, count[10] = {0}; - // Allocate host memory - output = (int*)malloc(n * sizeof(int)); - for (i = 0; i < n; i++) count[ (arr[i]/exp)%10 ]++; @@ -39,25 +29,8 @@ void countSort(int arr[], int n, int exp) { count[ (arr[i]/exp)%10 ]--; } - // Allocate device memory - cudaMalloc((void**)&d_arr, sizeof(n * sizeof(int))); - cudaMalloc((void**)&d_output, sizeof(n * sizeof(int))); - - // Transfer data from host to device memory - cudaMemcpy(d_arr, arr, n * sizeof(int), cudaMemcpyHostToDevice); - - // Executing kernel - copyArrayParallel<<<1,500>>>(d_arr, d_output, n); - - //Transfer data back to host memory - cudaMemcpy(output, d_output, n * sizeof(int), cudaMemcpyDeviceToHost); - - // Deallocate device memory - cudaFree(d_arr); - cudaFree(d_output); - - // Deallocate host memory - free(output); + for (i = 0; i < n; i++) + arr[i] = output[i]; } void radix_sort(int arr[], int n) @@ -83,7 +56,7 @@ void radix_sort(int arr[], int n) cudaMemcpy(max, d_max, 1 * sizeof(int), cudaMemcpyDeviceToHost); for (int exp = 1; max[0]/exp > 0; exp *= 10) { - countSort(d_arr, n, exp); + countSort<<<1, 500>>>(d_arr, n, exp); } cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost);