From d5a4fc8f9bfcb64b60037ac12f11687b183edbee Mon Sep 17 00:00:00 2001 From: azharfatrr <m.azhar.faturahman@gmail.com> Date: Mon, 14 Mar 2022 17:00:55 +0700 Subject: [PATCH] feat: add header for brick_sort --- src/lib/bitonic_sort.cu | 1 - src/lib/brick_sort.cu | 61 +++++++++++++++++++++++------------------ src/lib/brick_sort.cuh | 6 ++++ 3 files changed, 41 insertions(+), 27 deletions(-) create mode 100644 src/lib/brick_sort.cuh diff --git a/src/lib/bitonic_sort.cu b/src/lib/bitonic_sort.cu index c126d18..90a2735 100644 --- a/src/lib/bitonic_sort.cu +++ b/src/lib/bitonic_sort.cu @@ -10,7 +10,6 @@ // PROBLEM: THREADS DAN BLOCKS HARUS KELIPATAN 2 // SOLUTION: PADDING DATA KE DALAM BUFFER DENGAN NILAI MAX_INT - void array_fill(int *arr, int length) { srand(time(NULL)); diff --git a/src/lib/brick_sort.cu b/src/lib/brick_sort.cu index fdf9683..8ae6b15 100644 --- a/src/lib/brick_sort.cu +++ b/src/lib/brick_sort.cu @@ -2,11 +2,10 @@ #include <stdio.h> #include <time.h> -// PROBLEM: INI GA JALAN KALAU BLOCKS > 1 +#define THREADS 512 +#define NUM_VALS 512*5 -/* Every thread gets exactly one value in the unsorted array. */ -#define THREADS 16 -#define NUM_VALS 2000 +// PROBLEM: BLOCKS GA BOLEH LEBIH DARI 1. void print_elapsed(clock_t start, clock_t stop) { @@ -47,39 +46,49 @@ void array_fill(int *arr, int length) } } -__device__ void swap(int *d_arr, int i, int j) + +__device__ void swap(int *arr, int i, int j) { - int temp = d_arr[i]; - d_arr[i] = d_arr[j]; - d_arr[j] = temp; + int temp = arr[i]; + arr[i] = arr[j]; + arr[j] = temp; } + __global__ void brick_sort(int *d_arr, int length) { - int l; - if (length % 2 == 0) - l = length / 2; - else - l = (length / 2) + 1; - for (int i = 0; i < l; i++) + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i >= length - 1) + { + return; + } + + __syncthreads(); + + if (i % 2 == 0) { - if ((!(threadIdx.x & 1)) && (threadIdx.x < (length - 1))) // even phase + if (d_arr[i] > d_arr[i + 1]) { - if (d_arr[threadIdx.x] > d_arr[threadIdx.x + 1]) - swap(d_arr, threadIdx.x, threadIdx.x + 1); + swap(d_arr, i, i + 1); } + } + + __syncthreads(); - __syncthreads(); - if ((threadIdx.x & 1) && (threadIdx.x < (length - 1))) // odd phase + if (i % 2 != 0) + { + if (d_arr[i] > d_arr[i + 1]) { - if (d_arr[threadIdx.x] > d_arr[threadIdx.x + 1]) - swap(d_arr, threadIdx.x, threadIdx.x + 1); + swap(d_arr, i, i + 1); } - __syncthreads(); - } // for + } + + __syncthreads(); } -int main(void) + +int driver(void) { clock_t start, stop; size_t size = NUM_VALS * sizeof(int); @@ -100,7 +109,7 @@ int main(void) for (int i = 0; i < NUM_VALS / 2; ++i) { - brick_sort<<<1, THREADS>>>(d_arr, NUM_VALS); + brick_sort<<<blocks, THREADS>>>(d_arr, NUM_VALS); } cudaMemcpy(arr, d_arr, size, cudaMemcpyDeviceToHost); @@ -129,7 +138,7 @@ int main(void) printf("%d != %d\n", checksum1, checksum2); } - array_print(arr, NUM_VALS); + // array_print(arr, NUM_VALS); printf("Test %s\n", passed_sort ? "PASSED SORT" : "FAILED SORT"); printf("Test %s\n", passed_checksum ? "PASSED CHECKSUM" : "FAILED CHECKSUM"); diff --git a/src/lib/brick_sort.cuh b/src/lib/brick_sort.cuh new file mode 100644 index 0000000..a923b78 --- /dev/null +++ b/src/lib/brick_sort.cuh @@ -0,0 +1,6 @@ +#ifndef _BRICK_SORT_H_ +#define _BRICK_SORT_H_ + +__global__ void brick_sort(int *d_arr, int length); + +#endif \ No newline at end of file -- GitLab