diff --git a/.DS_Store b/.DS_Store index 707c39b8ba9621345fc9797d7071c7eea90abfc7..22ae81ec5f55ce735351902b11b164d295b0e603 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/Makefile b/Makefile index 2d663dc0d3f81bb59d4d4e4cc7b6f9d4a39ab6c3..7de4f8a509fde54113d2f73210d01171b91747ce 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ all: - nvcc src/radix_sort_cuda.cu -o radix_sort_cuda + nvcc src/radix_sort_par.cu -o radix_sort_cuda clean: rm -rf radix_sort_cuda diff --git a/src/.DS_Store b/src/.DS_Store index 4912157825bb3ffe363aeada5f93b27d3b9d50db..09bf7c2bc24e9930b0b02dbae5fbbe3a9d080167 100644 Binary files a/src/.DS_Store and b/src/.DS_Store differ diff --git a/src/rad2 b/src/rad2 deleted file mode 100755 index db2a378010b194cd994b309489505efa5fa44e96..0000000000000000000000000000000000000000 Binary files a/src/rad2 and /dev/null differ diff --git a/src/radix_par b/src/radix_par deleted file mode 100755 index 7a027978875c8c9a404b81fda39d4867a1287257..0000000000000000000000000000000000000000 Binary files a/src/radix_par and /dev/null differ diff --git a/src/radix_sort_cuda b/src/radix_sort_cuda deleted file mode 100755 index b7f9a8e77e7da0869d7203a4bdc89c10cddf3768..0000000000000000000000000000000000000000 Binary files a/src/radix_sort_cuda and /dev/null differ diff --git a/src/radix_sort_par.cu b/src/radix_sort_par.cu index 1718b956ea1a3e16802afbbd5874b95540d76ec3..7eb0da76dfa202b87fea945bbd537454c691ba2f 100755 --- a/src/radix_sort_par.cu +++ b/src/radix_sort_par.cu @@ -5,24 +5,30 @@ using namespace std; // A utility function to get maximum value in arr[] -__device__ int getMax(int arr[], int n) +int getMax(int arr[], int n) { int mx = arr[0]; for (int i = 1; i < n; i++) if (arr[i] > mx) mx = arr[i]; return mx; +} + +__global__ void copyArray(int *arr, int *output, int n) +{ + int id = blockIdx.x*blockDim.x+threadIdx.x; + + if (id<n) + arr[id]=output[id]; } // A function to do counting sort of arr[] according to // the digit represented by exp. -__device__ void countSort(int arr[], int n, int exp) +void countSort(int arr[], int n, int exp) { -// int index = threadIdx.x; -// int stride = blockDim.x; - - int *output= (int*)malloc(sizeof(int)*n); // output array - int i, count[10] = {0}; + int output[n]; // output array + long i; + int count[10] = {0}; // Store count of occurrences in count[] for (i = 0; i < n; i++) @@ -42,42 +48,59 @@ __device__ void countSort(int arr[], int n, int exp) // Copy the output array to arr[], so that arr[] now // contains sorted numbers according to current digit - for (i = 0; i < n; i++) - arr[i] = output[i]; + //for (i = 0; i < n; i++) + //arr[i] = output[i]; + + int *h_arr; + int *h_output; + int *d_arr; + int *d_output; + + size_t bytes= n*sizeof(int); + + cudaMalloc(&d_arr, bytes); + cudaMalloc(&d_output, bytes); + + if (d_output==0){ + printf ("hi"); + } + + cudaMemcpy(d_output,output, bytes, cudaMemcpyHostToDevice); + + int blockSize, gridSize; + + // Number of threads in each thread block + blockSize = 256; + + // Number of thread blocks in grid + gridSize = ceil((float)n/blockSize); + + copyArray<<<gridSize,blockSize>>>(d_arr,d_output,n); + + cudaMemcpy(arr,d_arr,bytes,cudaMemcpyDeviceToHost); + + cudaFree(d_arr); + cudaFree(d_output); + } // The main function to that sorts arr[] of size n using -// Radix Sort - -__global__ void radixsort(int *arr, int n) +// Radix Sort +void radixsort(int arr[], int n) { - //int *d_arr; - // Find the maximum number to know number of digits int m = getMax(arr, n); // Do counting sort for every digit. Note that instead // of passing digit number, exp is passed. exp is 10^i // where i is current digit number - - // allocate device memory - //cudaMalloc((void**)&d_arr,sizeof(int)*n); - - //cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice); for (int exp = 1; m/exp > 0; exp *= 10) - //countSort<<<1,1024>>>(d_arr, n, exp); - countSort(arr,n,exp); - //transfer data back to host memory - //cudaMemcpy(arr, d_arr, sizeof(int)*n, cudaMemcpyDeviceToHost); - - //deallocate device memory - //cudaFree(d_arr); - __syncthreads(); - + countSort(arr, n, exp); } // A utility function to print an array -void print(int arr[], int n){ +void print(int arr[], int n) +{ for (int i = 0; i < n; i++) cout << arr[i] << "\n"; } @@ -109,18 +132,12 @@ int main(int argc, char *argv[]) { timespec start, stop; - int *d_arr; int n; n= atoi(argv[1]); int arr[n]; rng(arr,n); - - cudaMalloc((void**)&d_arr,sizeof(int)*n); - cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice); - clock_gettime(CLOCK_REALTIME, &start); - radixsort<<<1,32>>>(d_arr,n); - cudaMemcpy(arr, d_arr, sizeof(int)*n,cudaMemcpyDeviceToHost); + radixsort(arr,n); clock_gettime(CLOCK_REALTIME, &stop); print(arr,n); @@ -128,9 +145,5 @@ int main(int argc, char *argv[]) long time = duration.tv_sec * 1000000 + duration.tv_nsec/1000; printf("\n%d.%09d s\n", duration.tv_sec, duration.tv_nsec); - //deallocate host memory - cudaFree(d_arr); - return 0; } - diff --git a/src/radix_sort_seq b/src/radix_sort_seq deleted file mode 100755 index 9ad6573e7dcc4676e1900d6c7c830bca4fc5d4eb..0000000000000000000000000000000000000000 Binary files a/src/radix_sort_seq and /dev/null differ