From 5e356e0b4db0f2a46aab1617edacab3b63e55c8b Mon Sep 17 00:00:00 2001
From: Yasya Rusyda <yasyars@Yasyas-MacBook-Pro.local>
Date: Thu, 11 Apr 2019 17:31:30 +0700
Subject: [PATCH] edit parallel

---
 src/radix_sort_par.cu | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/radix_sort_par.cu b/src/radix_sort_par.cu
index 3718623..ecc6b18 100644
--- a/src/radix_sort_par.cu
+++ b/src/radix_sort_par.cu
@@ -49,9 +49,9 @@ __device__ void countSort(int arr[], int n, int exp)
 // The main function to that sorts arr[] of size n using  
 // Radix Sort
 
-__device__ void radixsort(int *arr, int n) 
+__global__ void radixsort(int *arr, int n) 
 { 
-    int *d_arr;
+    //int *d_arr;
 
     // Find the maximum number to know number of digits 
     int m = getMax(arr, n); 
@@ -61,18 +61,18 @@ __device__ void radixsort(int *arr, int n)
     // where i is current digit number 
 
     // allocate device memory
-    cudaMalloc((void**)&d_arr,sizeof(int)*n);
+    //cudaMalloc((void**)&d_arr,sizeof(int)*n);
 
-    cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
+    //cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
     for (int exp = 1; m/exp > 0; exp *= 10) 
-        countSort<<<1,1024>>>(d_arr, n, exp); 
-
+        //countSort<<<1,1024>>>(d_arr, n, exp); 
+        countSort(arr,n,exp);
         //transfer data back to host memory
-        cudaMemcpy(arr, d_arr, sizeof(int)*n, cudaMemcpyDeviceToHost);
+        //cudaMemcpy(arr, d_arr, sizeof(int)*n, cudaMemcpyDeviceToHost);
         
     //deallocate device memory
-    cudaFree(d_arr);
-
+    //cudaFree(d_arr);
+    __syncthreads();
 
 } 
   
@@ -109,12 +109,18 @@ int main(int argc, char *argv[])
 { 
     timespec start, stop;
     
+    int *d_arr;
     int n;
     n= atoi(argv[1]);
     int arr[n];
     rng(arr,n);
+
+    cudaMalloc((void**)&d_arr,sizeof(int)*n);
+    cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
+
     clock_gettime(CLOCK_REALTIME, &start);
-    radixsort(arr,n);
+    radixsort<<<1,32>>>(d_arr,n);
+    cudaMemcpy(arr, d_arr, sizeof(int)*n,cudaMemcpyDeviceToHost);
     clock_gettime(CLOCK_REALTIME, &stop);
     print(arr,n);
     
@@ -123,6 +129,7 @@ int main(int argc, char *argv[])
     printf("\n%d.%09d s\n", duration.tv_sec, duration.tv_nsec);
     
     //deallocate host memory
+    cudaFree(d_arr);
     
     return 0; 
 } 
-- 
GitLab