diff --git a/src/radixsort_parallel.cu b/src/radixsort_parallel.cu index 3b9affe25a727d7f20aaf04b3f27b70424636b22..8ba6fe75fb0b0901c0d092f37e9976d757f2c13c 100644 --- a/src/radixsort_parallel.cu +++ b/src/radixsort_parallel.cu @@ -1,6 +1,35 @@ -#include <cuda.h> -#include <cuda_runtime.h> -#include "radix_sort_parallel.h" +#include <stdio.h> +#include <stdlib.h> +// #include <omp.h> +#include <sys/time.h> + +void generateArray(int arr[], int n,int seed); +void radixsort(int arr[], int n); +void print(int arr[], int n); + +int main(int argc, char *argv[]) { + struct timeval stop, start; + // int thread_count = 5; + int array_size = strtol(argv[1], NULL, 10); + //printf("NUM THREADS : %d\n", omp_get_num_threads()); + int *arr,*d_arr; + arr = (int*)malloc(sizeof(int)* array_size); + cudaMalloc((void **)&d_arr, sizeof(int) * array_size); +// cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice); + int seed = 13516127; + generateArray(arr,array_size,seed); + cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice); + gettimeofday(&start, NULL); + radixsort(arr, array_size); + gettimeofday(&stop, NULL); + print(arr,array_size); + printf("Sorting selesai\n"); + printf("took %lu microsecond for parallel radix sort\n", ((stop.tv_sec - start.tv_sec)*1000000)+(stop.tv_usec - start.tv_usec)); + + cudaFree(d_arr); + free(arr); + return 0; +} __global__ void getMax(int arr[], int n, int max[]) { @@ -29,13 +58,23 @@ int temp = arr[0]; } } -} +} +// __global__ +void generateArray(int arr[], int n,int seed){ + int *output; + output = (int*)malloc(sizeof(int) * n); + srand(seed); + for(long i = 0; i < n; i++) { + output[i] = (int)rand(); + } + + for(long i = 0; i < n; i++) { + arr[i] = output[i]; + } +} __global__ void countSort(int arr[], int n, int digit) { - // int my_rank = omp_get_thread_num(); - // int thread_count = omp_get_num_threads(); - //printf("NUMBER OF THREAD NYA BOY %d\n",thread_count); int *output; output = (int*)malloc(sizeof(int)*n); long i; @@ -53,8 +92,7 @@ void countSort(int arr[], int n, int digit) { arr[i] = output[i]; } -void radix_sort(int arr[], int n) -{ +void radixsort(int arr[], int n) { int *max,*d_max,*d_arr; max = (int*)malloc(sizeof(int)* 1); cudaMalloc((void **)&d_max, sizeof(int) * 1); @@ -67,55 +105,13 @@ void radix_sort(int arr[], int n) countSort<<<1,1>>>(d_arr, n, digit); } cudaMemcpy(arr,d_arr, sizeof(int) *n, cudaMemcpyDeviceToHost); -} - -void print(int arr[], int n) -{ - for (int i = 0; i < n; i++) - printf("%d: %d\n",i, arr[i]); -} -void rng(int *arr, int n) { - int seed = 13516013; - srand(seed); - for(long i = 0; i < n; i++) { - arr[i] = (int)rand(); - } } -int main(int argc, char *argv[]) { - int N; - int *arr; - int *d_arr; - - if (argc == 2) { - N = strtol(argv[1], NULL, 10); - } else { - printf("ERROR: ./radix_sort <array_length>\n"); - return 1; - } - - // Allocate host memory - arr = (int*)malloc(N * sizeof(int)); - - // Initialize host memory - rng(arr,N); - - // Allocate device memory - cudaMalloc((void**)&d_arr, N * sizeof(int)); - - // Transfer data from host to device memory - cudaMemcpy(d_arr, arr, N * sizeof(int), cudaMemcpyHostToDevice); - - clock_t begin = clock(); - radix_sort(arr, N); - clock_t end = clock(); - double time = (double)(end - begin) * 1000 / CLOCKS_PER_SEC; - print(arr,N); - printf("Executed in %lf ms\n",time); - - cudaFree(d_arr); - free(arr); - return 0; -} +void print(int arr[], int n) { + for (long i = 0; i < n; i++) { + printf("%d \n",arr[i]); + } + printf("\n"); +} \ No newline at end of file