From 38861ffe5a61ec3d56c8ffb2fffe400e24edb68a Mon Sep 17 00:00:00 2001 From: azkanab <azkanabilah@gmail.com> Date: Thu, 11 Apr 2019 18:57:18 +0700 Subject: [PATCH] tes --- src_radix_sort_parallel.cu | 147 +++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 src_radix_sort_parallel.cu diff --git a/src_radix_sort_parallel.cu b/src_radix_sort_parallel.cu new file mode 100644 index 0000000..adc7ab9 --- /dev/null +++ b/src_radix_sort_parallel.cu @@ -0,0 +1,147 @@ +#include <stdio.h> +#include <stdlib.h> +// #include <omp.h> +#include <sys/time.h> + +void generateArray(int arr[], int n,int seed); +void radixsort(int arr[], int n); +void print(int arr[], int n); + +int main(int argc, char *argv[]) { + struct timeval stop, start; + // int thread_count = 5; + int array_size = strtol(argv[1], NULL, 10); + //printf("NUM THREADS : %d\n", omp_get_num_threads()); + int *arr,*d_arr; + arr = (int*)malloc(sizeof(int)* array_size); + cudaMalloc((void **)&d_arr, sizeof(int) * array_size); +// cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice); + int seed = 13516127; + generateArray(arr,array_size,seed); + cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice); + gettimeofday(&start, NULL); + radixsort(arr, array_size); + gettimeofday(&stop, NULL); + print(arr,array_size); + printf("Sorting selesai\n"); + printf("took %lu microsecond for parallel radix sort\n", ((stop.tv_sec - start.tv_sec)*1000000)+(stop.tv_usec - start.tv_usec)); + + cudaFree(d_arr); + free(arr); + return 0; +} + +//find max value from array +__global__ +void getMax(int arr[], int n, int max[]) { +__shared__ int cache[1000]; +int temp = arr[0]; + for (int i=0; i<n; i+=1000){ + if(threadIdx.x< 1000){ + if(temp < arr[threadIdx.x + i]){ + temp = arr[threadIdx.x+i]; + } + } + } + + if(threadIdx.x <1000){ + cache[threadIdx.x] =temp; + } + + __syncthreads(); + + if(threadIdx.x ==0){ + *max = cache[0]; + for(int i =1; i<1000; i++){ + if(*max < cache[i]){ + *max = cache[i]; + } + } + + } +} +// __global__ +void generateArray(int arr[], int n,int seed){ + int *output; + output = (int*)malloc(sizeof(int) * n); + srand(seed); + for(long i = 0; i < n; i++) { + output[i] = (int)rand(); + } + + for(long i = 0; i < n; i++) { + arr[i] = output[i]; + } + +} +__global__ +void countSort(int arr[], int n, int digit) { + // int my_rank = omp_get_thread_num(); + // int thread_count = omp_get_num_threads(); + //printf("NUMBER OF THREAD NYA BOY %d\n",thread_count); + int *output; + output = (int*)malloc(sizeof(int)*n); + long i; + int count[10] = {0}; + + for (i = 0; i < n; i++) + count[ (arr[i]/digit)%10 ]++; + for (i = 1; i < 10; i++) + count[i] += count[i - 1]; + for (i = n - 1; i >= 0; i--) { + output[count[ (arr[i]/digit)%10 ] - 1] = arr[i]; + count[(arr[i]/digit)%10 ]--; + } + for (i = 0; i < n; i++) + arr[i] = output[i]; +} +//__global__ +void radixsort(int arr[], int n) { + int *max,*d_max,*d_arr; + max = (int*)malloc(sizeof(int)* 1); + cudaMalloc((void **)&d_max, sizeof(int) * 1); + cudaMemcpy(d_max, max, sizeof(int) * 1, cudaMemcpyHostToDevice); + cudaMalloc((void **)&d_arr, sizeof(int) * n); + cudaMemcpy(d_arr,arr,sizeof(int) *n, cudaMemcpyHostToDevice); + getMax<<<1,1000>>>(d_arr, n,d_max); + cudaMemcpy(max,d_max, sizeof(int) * 1, cudaMemcpyDeviceToHost); + for (int digit = 1; max[0]/digit > 0; digit *= 10) { + countSort<<<1,1>>>(d_arr, n, digit); + } + cudaMemcpy(arr,d_arr, sizeof(int) *n, cudaMemcpyDeviceToHost); +// printf("%d\n",max[0]); + +} + +void print(int arr[], int n) { + for (long i = 0; i < n; i++) { + printf("%d \n",arr[i]); + + } + printf("\n"); +} +/* +int main(int argc, char *argv[]) { + struct timeval stop, start; + // int thread_count = 5; + int array_size = strtol(argv[1], NULL, 10); + //printf("NUM THREADS : %d\n", omp_get_num_threads()); + int *arr,*d_arr; + arr = (int*)malloc(sizeof(int)* array_size); + cudaMalloc((void **)&d_arr, sizeof(int) * array_size); +// cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice); + int seed = 13516127; + generateArray(arr,array_size,seed); + cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice); + gettimeofday(&start, NULL); + radixsort<<<1,1>>>(arr, array_size); + gettimeofday(&stop, NULL); + print(arr,array_size); + printf("Sorting selesai\n"); + printf("took %lu microsecond for parallel radix sort\n", ((stop.tv_sec - start.tv_sec)*1000000)+(stop.tv_usec - start.tv_usec)); + + cudaFree(d_arr); + free(arr); + return 0; +} +*/ -- GitLab