diff --git a/src/lib/bitonic_sort.cu b/src/lib/bitonic_sort.cu
index c126d18079df6bceae75d311071f39d63c579815..90a2735b25725af46aebe01fd65ce5d8b61b916a 100644
--- a/src/lib/bitonic_sort.cu
+++ b/src/lib/bitonic_sort.cu
@@ -10,7 +10,6 @@
 // PROBLEM: THREADS DAN BLOCKS HARUS KELIPATAN 2
 // SOLUTION: PADDING DATA KE DALAM BUFFER DENGAN NILAI MAX_INT
 
-
 void array_fill(int *arr, int length)
 {
   srand(time(NULL));
diff --git a/src/lib/brick_sort.cu b/src/lib/brick_sort.cu
index fdf9683640c6429780594d0c632ba892d953e558..8ae6b15193a49c2b84868a21e33a1b2adb075965 100644
--- a/src/lib/brick_sort.cu
+++ b/src/lib/brick_sort.cu
@@ -2,11 +2,10 @@
 #include <stdio.h>
 #include <time.h>
 
-// PROBLEM: INI GA JALAN KALAU BLOCKS > 1
+#define THREADS 512
+#define NUM_VALS 512*5
 
-/* Every thread gets exactly one value in the unsorted array. */
-#define THREADS 16
-#define NUM_VALS 2000
+// PROBLEM: BLOCKS GA BOLEH LEBIH DARI 1.
 
 void print_elapsed(clock_t start, clock_t stop)
 {
@@ -47,39 +46,49 @@ void array_fill(int *arr, int length)
   }
 }
 
-__device__ void swap(int *d_arr, int i, int j)
+
+__device__ void swap(int *arr, int i, int j)
 {
-  int temp = d_arr[i];
-  d_arr[i] = d_arr[j];
-  d_arr[j] = temp;
+  int temp = arr[i];
+  arr[i] = arr[j];
+  arr[j] = temp;
 }
 
+
 __global__ void brick_sort(int *d_arr, int length)
 {
-  int l;
-  if (length % 2 == 0)
-    l = length / 2;
-  else
-    l = (length / 2) + 1;
-  for (int i = 0; i < l; i++)
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+
+  if (i >= length - 1)
+  {
+    return;
+  }
+
+  __syncthreads();
+
+  if (i % 2 == 0)
   {
-    if ((!(threadIdx.x & 1)) && (threadIdx.x < (length - 1))) // even phase
+    if (d_arr[i] > d_arr[i + 1])
     {
-      if (d_arr[threadIdx.x] > d_arr[threadIdx.x + 1])
-        swap(d_arr, threadIdx.x, threadIdx.x + 1);
+      swap(d_arr, i, i + 1);
     }
+  }
+
+  __syncthreads();
 
-    __syncthreads();
-    if ((threadIdx.x & 1) && (threadIdx.x < (length - 1))) // odd phase
+  if (i % 2 != 0)
+  {
+    if (d_arr[i] > d_arr[i + 1])
     {
-      if (d_arr[threadIdx.x] > d_arr[threadIdx.x + 1])
-        swap(d_arr, threadIdx.x, threadIdx.x + 1);
+      swap(d_arr, i, i + 1);
     }
-    __syncthreads();
-  } // for
+  }
+
+  __syncthreads();
 }
 
-int main(void)
+
+int driver(void)
 {
   clock_t start, stop;
   size_t size = NUM_VALS * sizeof(int);
@@ -100,7 +109,7 @@ int main(void)
 
   for (int i = 0; i < NUM_VALS / 2; ++i)
   {
-    brick_sort<<<1, THREADS>>>(d_arr, NUM_VALS);
+    brick_sort<<<blocks, THREADS>>>(d_arr, NUM_VALS);
   }
 
   cudaMemcpy(arr, d_arr, size, cudaMemcpyDeviceToHost);
@@ -129,7 +138,7 @@ int main(void)
     printf("%d != %d\n", checksum1, checksum2);
   }
 
-  array_print(arr, NUM_VALS);
+  // array_print(arr, NUM_VALS);
 
   printf("Test %s\n", passed_sort ? "PASSED SORT" : "FAILED SORT");
   printf("Test %s\n", passed_checksum ? "PASSED CHECKSUM" : "FAILED CHECKSUM");
diff --git a/src/lib/brick_sort.cuh b/src/lib/brick_sort.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..a923b784d8cb252e08b6d03813db1e717b65fcb7
--- /dev/null
+++ b/src/lib/brick_sort.cuh
@@ -0,0 +1,6 @@
+#ifndef _BRICK_SORT_H_
+#define _BRICK_SORT_H_
+
+__global__ void brick_sort(int *d_arr, int length);
+
+#endif
\ No newline at end of file