From 684daffc2726ee2c4c677a025de1e10bcc220643 Mon Sep 17 00:00:00 2001
From: azkanab <azkanabilah@gmail.com>
Date: Thu, 11 Apr 2019 18:25:37 +0700
Subject: [PATCH] try

---
 src/radix_sort_parallel.cu    | 12 +++--
 src/radixsort_parallel.cu.cpp | 98 +++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 3 deletions(-)
 create mode 100644 src/radixsort_parallel.cu.cpp

diff --git a/src/radix_sort_parallel.cu b/src/radix_sort_parallel.cu
index 950f89f..f9d93af 100644
--- a/src/radix_sort_parallel.cu
+++ b/src/radix_sort_parallel.cu
@@ -3,14 +3,20 @@
 #include "radix_sort_parallel.h"
 
 __global__ void copyArrayParallel(int *arr, int *output, int n) {
-	for (int i = 0; i < n; i++) {
+	int index = threadIdx.x;
+	int stride = blockDim.x;
+
+	for (int i = index; i < n; i+=stride) {
 		arr[i] = output[i];
 	}
 }
 
 __global__ void getMaxParallel(int *arr, int *max, int n) {
-	int maximum = arr[0];
-	for (int i = 0; i < n; i++) {
+	int index = threadIdx.x;
+	int stride = blockDim.x;
+
+	int maximum = arr[index];
+	for (int i = index; i < n; i+=stride) {
 		if (arr[i] > maximum) {
 			maximum = arr[i];
 		}
diff --git a/src/radixsort_parallel.cu.cpp b/src/radixsort_parallel.cu.cpp
new file mode 100644
index 0000000..8fc2f94
--- /dev/null
+++ b/src/radixsort_parallel.cu.cpp
@@ -0,0 +1,98 @@
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include "radix_sort_parallel.h"
+
+__global__ void getMax(int *arr, int *max, int n) {
+    int index = threadIdx.x;
+    int stride = blockDim.x;
+    int mx = arr[index]; 
+
+    for (int i = index; i < n; i+=stride) 
+        if (arr[i] > mx) 
+            mx = arr[i];
+    max[0] = mx;
+} 
+
+__global__ void countSort(int arr[], int n, int exp) 
+{ 
+    int* output = (int*)malloc(n * sizeof(int));
+    int i, count[10] = {0};
+
+    for (i = 0; i < n; i++) 
+        count[ (arr[i]/exp)%10 ]++; 
+
+    for (i = 1; i < 10; i++) 
+        count[i] += count[i - 1]; 
+
+    for (i = n - 1; i >= 0; i--) 
+    { 
+        output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; 
+        count[ (arr[i]/exp)%10 ]--; 
+    } 
+
+    for (i = 0; i < n; i++) 
+        arr[i] = output[i];
+    free(output);
+} 
+
+void radix_sort(int arr[], int n) 
+{ 
+    int *max;
+    int *d_max, *d_arr;
+
+    // Allocate host memory
+    max = (int*)malloc(1 * sizeof(int));
+
+    // Allocate device memory
+    cudaMalloc((void**)&d_max, 1 * sizeof(int));
+    cudaMalloc((void**)&d_arr, n * sizeof(int));
+
+    // Transfer data from host to device
+    cudaMemcpy(d_arr, arr, n * sizeof(int), cudaMemcpyHostToDevice);
+    cudaMemcpy(d_max, max, sizeof(int) * 1, cudaMemcpyHostToDevice);
+
+    // Executing kernel
+    getMax<<1, 500>>(d_arr, d_max, n);
+
+    // Transfer data back to host memory
+    cudaMemcpy(max, d_max, 1 * sizeof(int), cudaMemcpyDeviceToHost);
+
+    for (int exp = 1; max[0]/exp > 0; exp *= 10) 
+        countSort<<1,1>>(d_arr, n, exp);
+
+    cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost);
+} 
+
+void print(int arr[], int n) 
+{ 
+    for (int i = 0; i < n; i++) 
+        printf("%d: %d\n",i, arr[i]);
+} 
+
+void rng(int* arr, int n) {
+    int seed = 13516013;
+    srand(seed);
+    for(long i = 0; i < n; i++) {
+        arr[i] = (int)rand();
+    }
+}
+
+int main(int argc, char *argv[]) {
+    int N;
+    if (argc == 2) {
+        N = strtol(argv[1], NULL, 10);
+    } else {
+        printf("ERROR: ./radix_sort <array_length>\n");
+        return 1;
+    }
+    int arr[N];
+    rng(arr,N);
+    clock_t begin = clock();
+    radix_sort(arr, N);
+    clock_t end = clock();
+    double time = (double)(end - begin) * 1000 / CLOCKS_PER_SEC;
+    print(arr,N);
+    printf("Executed in %lf ms\n",time);
+    return 0;
+}
+
-- 
GitLab