Skip to content
Snippets Groups Projects
Commit dc771107 authored by Nicholas Rianto Putra's avatar Nicholas Rianto Putra
Browse files

edit makefile

parent 9a932d29
Branches
No related merge requests found
radix_sort: radix_sort:
gcc -g -Wall -o serial_radix_sort src/serial_radix_sort.c -lm
nvcc src/radix_sort_parallel.cu -o radix_sort nvcc src/radix_sort_parallel.cu -o radix_sort
...@@ -55,15 +55,15 @@ __global__ void assign_index(int* arr, int* arr_idx, int* down_sweep_i, int* up_ ...@@ -55,15 +55,15 @@ __global__ void assign_index(int* arr, int* arr_idx, int* down_sweep_i, int* up_
} }
__global__ void assign_flags(int* arr, int n, int idx, int* flags) { __global__ void assign_flags(int* arr, int n, int idx, int* flags) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
flags[i] = ((arr[i] & 1 << idx) == 1 << idx) ? 0 : 1; flags[i] = ((arr[i] & 1 << idx) == 1 << idx) ? 0 : 1;
} }
} }
__global__ void scatter(int* arr, int* arr_temp, int* arr_idx, int n) { __global__ void scatter(int* arr, int* arr_temp, int* arr_idx, int n) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) { for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
arr[arr_idx[i]] = arr_temp[i]; arr[arr_idx[i]] = arr_temp[i];
} }
} }
void count_sort(int* d_arr, int n, int idx) { void count_sort(int* d_arr, int n, int idx) {
...@@ -129,11 +129,11 @@ int main(int argc, char *argv[]) { ...@@ -129,11 +129,11 @@ int main(int argc, char *argv[]) {
// Move sorting CPU // Move sorting CPU
int* d_arr; int* d_arr;
cudaMalloc(&d_arr, sizeof(int) * n); cudaMalloc(&d_arr, sizeof(int) * n);
cudaMemcpy(d_arr, arr, sizeof(int) * n, cudaMemcpyHostToDevice); cudaMemcpy(d_arr, arr, sizeof(int) * n, cudaMemcpyHostToDevice);
radix_sort(d_arr, n); radix_sort(d_arr, n);
cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost); cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(d_arr); cudaFree(d_arr);
end = clock(); end = clock();
cpu_time_used = ((double) (end - start)) * 1000000 / CLOCKS_PER_SEC; cpu_time_used = ((double) (end - start)) * 1000000 / CLOCKS_PER_SEC;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment