diff --git a/src/lib/bitonic_sort.cu b/src/lib/bitonic_sort.cu index c126d18079df6bceae75d311071f39d63c579815..90a2735b25725af46aebe01fd65ce5d8b61b916a 100644 --- a/src/lib/bitonic_sort.cu +++ b/src/lib/bitonic_sort.cu @@ -10,7 +10,6 @@ // PROBLEM: THREADS DAN BLOCKS HARUS KELIPATAN 2 // SOLUTION: PADDING DATA KE DALAM BUFFER DENGAN NILAI MAX_INT - void array_fill(int *arr, int length) { srand(time(NULL)); diff --git a/src/lib/brick_sort.cu b/src/lib/brick_sort.cu index fdf9683640c6429780594d0c632ba892d953e558..8ae6b15193a49c2b84868a21e33a1b2adb075965 100644 --- a/src/lib/brick_sort.cu +++ b/src/lib/brick_sort.cu @@ -2,11 +2,10 @@ #include <stdio.h> #include <time.h> -// PROBLEM: INI GA JALAN KALAU BLOCKS > 1 +#define THREADS 512 +#define NUM_VALS 512*5 -/* Every thread gets exactly one value in the unsorted array. */ -#define THREADS 16 -#define NUM_VALS 2000 +// PROBLEM: BLOCKS GA BOLEH LEBIH DARI 1. void print_elapsed(clock_t start, clock_t stop) { @@ -47,39 +46,49 @@ void array_fill(int *arr, int length) } } -__device__ void swap(int *d_arr, int i, int j) + +__device__ void swap(int *arr, int i, int j) { - int temp = d_arr[i]; - d_arr[i] = d_arr[j]; - d_arr[j] = temp; + int temp = arr[i]; + arr[i] = arr[j]; + arr[j] = temp; } + __global__ void brick_sort(int *d_arr, int length) { - int l; - if (length % 2 == 0) - l = length / 2; - else - l = (length / 2) + 1; - for (int i = 0; i < l; i++) + int i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i >= length - 1) + { + return; + } + + __syncthreads(); + + if (i % 2 == 0) { - if ((!(threadIdx.x & 1)) && (threadIdx.x < (length - 1))) // even phase + if (d_arr[i] > d_arr[i + 1]) { - if (d_arr[threadIdx.x] > d_arr[threadIdx.x + 1]) - swap(d_arr, threadIdx.x, threadIdx.x + 1); + swap(d_arr, i, i + 1); } + } + + __syncthreads(); - __syncthreads(); - if ((threadIdx.x & 1) && (threadIdx.x < (length - 1))) // odd phase + if (i % 2 != 0) + { + if (d_arr[i] > d_arr[i + 1]) { - if (d_arr[threadIdx.x] > d_arr[threadIdx.x + 1]) - swap(d_arr, threadIdx.x, threadIdx.x + 1); + swap(d_arr, i, i + 1); } - __syncthreads(); - } // for + } + + __syncthreads(); } -int main(void) + +int driver(void) { clock_t start, stop; size_t size = NUM_VALS * sizeof(int); @@ -100,7 +109,7 @@ int main(void) for (int i = 0; i < NUM_VALS / 2; ++i) { - brick_sort<<<1, THREADS>>>(d_arr, NUM_VALS); + brick_sort<<<blocks, THREADS>>>(d_arr, NUM_VALS); } cudaMemcpy(arr, d_arr, size, cudaMemcpyDeviceToHost); @@ -129,7 +138,7 @@ int main(void) printf("%d != %d\n", checksum1, checksum2); } - array_print(arr, NUM_VALS); + // array_print(arr, NUM_VALS); printf("Test %s\n", passed_sort ? "PASSED SORT" : "FAILED SORT"); printf("Test %s\n", passed_checksum ? "PASSED CHECKSUM" : "FAILED CHECKSUM"); diff --git a/src/lib/brick_sort.cuh b/src/lib/brick_sort.cuh new file mode 100644 index 0000000000000000000000000000000000000000..a923b784d8cb252e08b6d03813db1e717b65fcb7 --- /dev/null +++ b/src/lib/brick_sort.cuh @@ -0,0 +1,6 @@ +#ifndef _BRICK_SORT_H_ +#define _BRICK_SORT_H_ + +__global__ void brick_sort(int *d_arr, int length); + +#endif \ No newline at end of file