From 38861ffe5a61ec3d56c8ffb2fffe400e24edb68a Mon Sep 17 00:00:00 2001
From: azkanab <azkanabilah@gmail.com>
Date: Thu, 11 Apr 2019 18:57:18 +0700
Subject: [PATCH] tes

---
 src_radix_sort_parallel.cu | 147 +++++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 src_radix_sort_parallel.cu

diff --git a/src_radix_sort_parallel.cu b/src_radix_sort_parallel.cu
new file mode 100644
index 0000000..adc7ab9
--- /dev/null
+++ b/src_radix_sort_parallel.cu
@@ -0,0 +1,147 @@
+#include <stdio.h>
+#include <stdlib.h>
+// #include <omp.h>
+#include <sys/time.h>
+
+void generateArray(int arr[], int n,int seed);
+void radixsort(int arr[], int n);
+void print(int arr[], int n);
+
+int main(int argc, char *argv[]) {
+   struct timeval stop, start;
+   // int thread_count = 5;
+   int array_size = strtol(argv[1], NULL, 10);
+	//printf("NUM THREADS : %d\n", omp_get_num_threads());
+   int *arr,*d_arr;
+   arr = (int*)malloc(sizeof(int)* array_size);
+   cudaMalloc((void **)&d_arr, sizeof(int) * array_size);
+//   cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
+   int seed = 13516127;
+   generateArray(arr,array_size,seed);
+   cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
+   gettimeofday(&start, NULL);
+   radixsort(arr, array_size);
+   gettimeofday(&stop, NULL);
+	 print(arr,array_size);
+   printf("Sorting selesai\n");
+   printf("took %lu microsecond for parallel radix sort\n", ((stop.tv_sec - start.tv_sec)*1000000)+(stop.tv_usec - start.tv_usec));
+  
+   cudaFree(d_arr);
+   free(arr);
+	return 0;
+}
+
+//find max value from array
+__global__
+void getMax(int arr[], int n, int max[]) {
+__shared__  int cache[1000];
+int temp = arr[0];
+	for (int i=0; i<n; i+=1000){
+		if(threadIdx.x< 1000){
+			if(temp < arr[threadIdx.x + i]){
+				temp = arr[threadIdx.x+i];
+			}
+		}
+	}
+
+	if(threadIdx.x <1000){
+		cache[threadIdx.x] =temp;
+	}
+	
+	__syncthreads();
+
+	if(threadIdx.x ==0){
+		*max = cache[0];
+		for(int i =1; i<1000; i++){
+			if(*max < cache[i]){
+				*max = cache[i];
+			}
+		}
+	
+	}
+}
+// __global__
+void generateArray(int arr[], int n,int seed){
+   int *output;
+   output = (int*)malloc(sizeof(int) * n);
+   srand(seed);
+   for(long i = 0; i < n; i++) {
+        output[i] = (int)rand();
+   }
+
+   for(long i = 0; i < n; i++) {
+      arr[i] = output[i];
+   }
+
+}
+__global__
+void countSort(int arr[], int n, int digit) {
+   // int my_rank = omp_get_thread_num();
+   // int thread_count = omp_get_num_threads();
+	//printf("NUMBER OF THREAD NYA BOY %d\n",thread_count);
+	int *output;
+	output = (int*)malloc(sizeof(int)*n);
+  long i;
+	int count[10] = {0};
+
+	for (i = 0; i < n; i++)
+		count[ (arr[i]/digit)%10 ]++;
+	for (i = 1; i < 10; i++)
+		count[i] += count[i - 1];
+  for (i = n - 1; i >= 0; i--) {
+         output[count[ (arr[i]/digit)%10 ] - 1] = arr[i];
+         count[(arr[i]/digit)%10 ]--;
+  }
+  for (i = 0; i < n; i++)
+     arr[i] = output[i];
+}
+//__global__
+void radixsort(int arr[], int n) {
+  int  *max,*d_max,*d_arr;
+  max = (int*)malloc(sizeof(int)* 1);
+  cudaMalloc((void **)&d_max, sizeof(int) * 1);
+  cudaMemcpy(d_max, max, sizeof(int) * 1, cudaMemcpyHostToDevice);
+  cudaMalloc((void **)&d_arr, sizeof(int) * n);
+  cudaMemcpy(d_arr,arr,sizeof(int) *n, cudaMemcpyHostToDevice);
+  getMax<<<1,1000>>>(d_arr, n,d_max);
+  cudaMemcpy(max,d_max, sizeof(int) * 1, cudaMemcpyDeviceToHost);
+  for (int digit = 1; max[0]/digit > 0; digit *= 10) {
+      countSort<<<1,1>>>(d_arr, n, digit);
+   }
+  cudaMemcpy(arr,d_arr, sizeof(int) *n, cudaMemcpyDeviceToHost);
+// printf("%d\n",max[0]);
+
+}
+
+void print(int arr[], int n) {
+	for (long i = 0; i < n; i++) {
+      printf("%d \n",arr[i]);
+
+   }
+   printf("\n");
+}
+/*
+int main(int argc, char *argv[]) {
+   struct timeval stop, start;
+   // int thread_count = 5;
+   int array_size = strtol(argv[1], NULL, 10);
+        //printf("NUM THREADS : %d\n", omp_get_num_threads());
+   int *arr,*d_arr;
+   arr = (int*)malloc(sizeof(int)* array_size);
+   cudaMalloc((void **)&d_arr, sizeof(int) * array_size);
+//   cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
+   int seed = 13516127;
+   generateArray(arr,array_size,seed);
+   cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
+   gettimeofday(&start, NULL);
+         radixsort<<<1,1>>>(arr, array_size);
+   gettimeofday(&stop, NULL);
+         print(arr,array_size);
+   printf("Sorting selesai\n");
+   printf("took %lu microsecond for parallel radix sort\n", ((stop.tv_sec - start.tv_sec)*1000000)+(stop.tv_usec - start.tv_usec));
+
+   cudaFree(d_arr);
+   free(arr);
+        return 0;
+}
+*/
-- 
GitLab