diff --git a/src/radix_sort_par.cu b/src/radix_sort_par.cu index ecc6b1814b7c678a37b24bad9fab4e610a324587..a4613c255805dc63210ca3a2a134675fe79e28dd 100644 --- a/src/radix_sort_par.cu +++ b/src/radix_sort_par.cu @@ -18,14 +18,14 @@ __device__ int getMax(int arr[], int n) // the digit represented by exp. __device__ void countSort(int arr[], int n, int exp) { - int index = threadIdx.x; - int stride = blockDim.x; +// int index = threadIdx.x; +// int stride = blockDim.x; int *output= (int*)malloc(sizeof(int)*n); // output array int i, count[10] = {0}; // Store count of occurrences in count[] - for (i = index; i < n; i+=stride) + for (i = 0; i < n; i++) count[ (arr[i]/exp)%10 ]++; // Change count[i] so that count[i] now contains actual @@ -34,7 +34,7 @@ __device__ void countSort(int arr[], int n, int exp) count[i] += count[i - 1]; // Build the output array - for (i = n - 1; i >= index; i-=stride) + for (i = n - 1; i >= 0; i--) { output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; count[ (arr[i]/exp)%10 ]--; @@ -42,7 +42,7 @@ __device__ void countSort(int arr[], int n, int exp) // Copy the output array to arr[], so that arr[] now // contains sorted numbers according to current digit - for (i = index; i < n; i+=stride) + for (i = 0; i < n; i++) arr[i] = output[i]; } diff --git a/src/radix_sort_par3.cu b/src/radix_sort_par3.cu new file mode 100644 index 0000000000000000000000000000000000000000..27c479204fe96950cf7b516bf2399bbd536d2a79 --- /dev/null +++ b/src/radix_sort_par3.cu @@ -0,0 +1,135 @@ +// C++ implementation of Radix Sort +#include <iostream> +#include <cstdlib> +#include <time.h> +#include <cuda.h> +#include <cuda_runtime.h> +using namespace std; + +// A utility function to get maximum value in arr[] +int getMax(int arr[], int n) +{ + int mx = arr[0]; + for (int i = 1; i < n; i++) + if (arr[i] > mx) + mx = arr[i]; + return mx; +} + +__global__ void storeCount(int *count, int *arr, int n, int exp){ + int index = threadIdx.x; + int stride = blockDim.x; + + for (int i = index; i < n; i+=stride) + count[ (arr[i]/exp)%10 ]++; +} + +// A function to do counting sort of arr[] according to +// the digit represented by exp. +void countSort(int arr[], int n, int exp) +{ + int *d_arr; + + int *output= (int*)malloc(sizeof(int)*n); // output array + int i; + int d_count[10] = {0}; + int h_count[10] = {0}; + cudaMalloc((void**)&d_arr,sizeof(int)*n); + cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice); + + // Store count of occurrences in count[] + storeCount<<<1,32>>>(d_count,d_arr,n,exp); + cudaMemcpy(h_count, d_count, 10,cudaMemcpyDeviceToHost); + // Change count[i] so that count[i] now contains actual + // position of this digit in output[] + for (i = 1; i < 10; i++) + h_count[i] += h_count[i - 1]; + + // Build the output array + for (i = n - 1; i >= 0; i--) + { + output[h_count[ (arr[i]/exp)%10 ] - 1] = arr[i]; + h_count[ (arr[i]/exp)%10 ]--; + } + + // Copy the output array to arr[], so that arr[] now + // contains sorted numbers according to current digit + for (i = 0; i < n; i++) + arr[i] = output[i]; + + cudaFree(d_arr); + //cudaFree(d_count); +} + +// The main function to that sorts arr[] of size n using +// Radix Sort + +void radixsort(int *arr, int n) +{ + + int m = getMax(arr, n); + + // Do counting sort for every digit. Note that instead + // of passing digit number, exp is passed. exp is 10^i + // where i is current digit number + + + //cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice); + for (int exp = 1; m/exp > 0; exp *= 10) + countSort(arr,n,exp); + +} + +// A utility function to print an array +void print(int arr[], int n){ + for (int i = 0; i < n; i++) + cout << arr[i] << "\n"; +} + +void rng(int* arr,int n){ + int seed = 13516067; + srand(seed); + for (long i=0; i<n; i++){ + arr[i] = (int) rand(); + } +} + + +timespec diff(timespec start, timespec end) +{ + timespec temp; + if ((end.tv_nsec - start.tv_nsec) < 0) { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + return temp; +} + +// Driver program to test above functions +int main(int argc, char *argv[]) +{ + timespec start, stop; + + + int n; + n= atoi(argv[1]); + int arr[n]; + rng(arr,n); + + + clock_gettime(CLOCK_REALTIME, &start); + radixsort(arr,n); + clock_gettime(CLOCK_REALTIME, &stop); + print(arr,n); + + timespec duration = diff(start, stop); + long time = duration.tv_sec * 1000000 + duration.tv_nsec/1000; + printf("\n%d.%09d s\n", duration.tv_sec, duration.tv_nsec); + + //deallocate host memory + + return 0; +}