diff --git a/src/radix_sort_parallel.cu b/src/radix_sort_parallel.cu index f9d93af7e233ed359daa981ace04b4baff7342f1..d7c7fdc0aa3d316f9be715482399df8500e51dc6 100644 --- a/src/radix_sort_parallel.cu +++ b/src/radix_sort_parallel.cu @@ -16,12 +16,12 @@ __global__ void getMaxParallel(int *arr, int *max, int n) { int stride = blockDim.x; int maximum = arr[index]; - for (int i = index; i < n; i+=stride) { + for (int i = index+stride; i < n; i+=stride) { if (arr[i] > maximum) { maximum = arr[i]; } } - max[0] = maximum; + max[index] = maximum; } int getMax(int arr[], int n) diff --git a/src/radixsort_parallel.cu b/src/radixsort_parallel.cu index 2511d39867a0b66db178e86e5e724fba4a416982..9c7c01c30e58729dad394a08113053e1760557b8 100644 --- a/src/radixsort_parallel.cu +++ b/src/radixsort_parallel.cu @@ -2,15 +2,15 @@ #include <cuda_runtime.h> #include "radix_sort_parallel.h" -__global__ void getMax(int *arr, int *max, int n) { +__global__ void getMax(int *arr, int *max, int *n) { int index = threadIdx.x; int stride = blockDim.x; int mx = arr[index]; - for (int i = index+stride; i < n; i+=stride) + for (int i = index+stride; i < n[0]; i+=stride) if (arr[i] > mx) mx = arr[i]; - max[0] = mx; + max[index] = mx; } __global__ void countSort(int *arr, int n, int exp) { @@ -36,21 +36,23 @@ __global__ void countSort(int *arr, int n, int exp) { void radix_sort(int arr[], int n) { int *max; - int *d_max, *d_arr; + int *d_max, *d_arr, *d_n; // Allocate host memory max = (int*)malloc(1 * sizeof(int)); // Allocate device memory cudaMalloc((void**)&d_max, 1 * sizeof(int)); + cudaMalloc((void**)&d_n, 1 * sizeof(int)); cudaMalloc((void**)&d_arr, n * sizeof(int)); // Transfer data from host to device cudaMemcpy(d_arr, arr, n * sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(d_max, max, sizeof(int) * 1, cudaMemcpyHostToDevice); + cudaMemcpy(d_n, n, sizeof(int) * 1, cudaMemcpyHostToDevice); // Executing kernel - getMax<<<1, 500>>>(d_arr, d_max, n); + getMax<<<1, 500>>>(d_arr, d_max, d_n); // Transfer data back to host memory cudaMemcpy(max, d_max, 1 * sizeof(int), cudaMemcpyDeviceToHost); @@ -60,6 +62,10 @@ void radix_sort(int arr[], int n) } cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost); + cudaFree(d_max); + cudaFree(d_arr); + cudaFree(d_n); + free(max); } void print(int arr[], int n)