diff --git a/Makefile b/Makefile index 9243cc03bb476215939655b92cca1edebe1d8365..ff6c8b2a7c6575145b68de97fedc384741ffdf73 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,3 @@ radix_sort: + gcc -g -Wall -o serial_radix_sort src/serial_radix_sort.c -lm nvcc src/radix_sort_parallel.cu -o radix_sort diff --git a/src/radix_sort_parallel.cu b/src/radix_sort_parallel.cu index e857a870b26041437b6372998334030e630321ab..fd4227e43b9ec41fbbb1be4e697ab976ef5e2243 100644 --- a/src/radix_sort_parallel.cu +++ b/src/radix_sort_parallel.cu @@ -55,15 +55,15 @@ __global__ void assign_index(int* arr, int* arr_idx, int* down_sweep_i, int* up_ } __global__ void assign_flags(int* arr, int n, int idx, int* flags) { - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { - flags[i] = ((arr[i] & 1 << idx) == 1 << idx) ? 0 : 1; - } + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { + flags[i] = ((arr[i] & 1 << idx) == 1 << idx) ? 0 : 1; + } } __global__ void scatter(int* arr, int* arr_temp, int* arr_idx, int n) { - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { - arr[arr_idx[i]] = arr_temp[i]; - } + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { + arr[arr_idx[i]] = arr_temp[i]; + } } void count_sort(int* d_arr, int n, int idx) { @@ -129,11 +129,11 @@ int main(int argc, char *argv[]) { // Move sorting CPU int* d_arr; - cudaMalloc(&d_arr, sizeof(int) * n); - cudaMemcpy(d_arr, arr, sizeof(int) * n, cudaMemcpyHostToDevice); + cudaMalloc(&d_arr, sizeof(int) * n); + cudaMemcpy(d_arr, arr, sizeof(int) * n, cudaMemcpyHostToDevice); radix_sort(d_arr, n); cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost); - cudaFree(d_arr); + cudaFree(d_arr); end = clock(); cpu_time_used = ((double) (end - start)) * 1000000 / CLOCKS_PER_SEC;