diff --git a/algorithm/bitonic_sort.cu b/algorithm/bitonic_sort.cu
index 4b0331477cb2df51b9aad871980eee347af5a06f..e35d1c6c26aeba6b5076dd99625f3969a72d815a 100644
--- a/algorithm/bitonic_sort.cu
+++ b/algorithm/bitonic_sort.cu
@@ -9,15 +9,23 @@
 // PROBLEM: THREADS DAN BLOCKS HARUS KELIPATAN 2
 // SOLUTION: PADDING DATA KE DALAM BUFFER DENGAN NILAI MAX_INT
 
-/* Every thread gets exactly one value in the unsorted array. */
-#define THREADS 1024
-#define BLOCKS 16 
-#define NUM_VALS 1000
 
-void print_elapsed(clock_t start, clock_t stop)
+void array_fill(int *arr, int length)
 {
-  double elapsed = ((double) (stop - start)) / CLOCKS_PER_SEC;
-  printf("Elapsed time: %.3fs\n", elapsed);
+  srand(time(NULL));
+  int i;
+  for (i = 0; i < length; ++i) {
+    arr[i] = rand();
+  }
+}
+
+void array_print(int *arr, int length) 
+{
+  int i;
+  for (i = 0; i < length; ++i) {
+    printf("%d ",  arr[i]);
+  }
+  printf("\n");
 }
 
 int checksum(int *arr, int size)
@@ -28,136 +36,170 @@ int checksum(int *arr, int size)
   return sum;
 }
 
-int ceil_power_of_two(int n)
+bool isSorted(int *arr, int size)
 {
-  int i = 1;
-  while (i < n) {
-    i = i << 1;
-  }
-  return i;
+  for (int i = 0; i < size - 1; i++)
+    if (arr[i] > arr[i + 1])
+      return false;
+  return true;
 }
 
-void array_print(int *arr, int length) 
+void print_elapsed(clock_t start, clock_t stop)
 {
-  int i;
-  for (i = 0; i < length; ++i) {
-    printf("%d ",  arr[i]);
-  }
-  printf("\n");
+  double elapsed = ((double) (stop - start)) / CLOCKS_PER_SEC;
+  printf("Elapsed time: %.3fs\n", elapsed);
 }
 
-void array_fill(int *arr, int length)
+/**
+ * Swap the values of two elements in an array.
+ * 
+ * @param d_arr - the array.
+ * @param i - the index of the first element.
+ * @param j - the index of the second element.
+ */
+__device__ void swap(int *d_arr, int i, int j)
 {
-  srand(time(NULL));
-  int i;
-  for (i = 0; i < length; ++i) {
-    arr[i] = rand();
-  }
+  int temp = d_arr[i];
+  d_arr[i] = d_arr[j];
+  d_arr[j] = temp;
 }
 
-void pad_array(int *arr, int length, int size)
+/**
+ * Pad the remaining empty value in the array with the maximum value.
+ * 
+ * @param arr - the array.
+ * @param length - the number of elements inside the array.
+ * @param length_buffer - the length of the buffer.
+ */
+void padding_array(int *arr, int length, int length_buffer)
 {
-  int i;
-  for (i = length; i < size; i++)
-  {
+  for (int i = length; i < length_buffer; i++) {
     arr[i] = INT_MAX;
   }
 }
 
-void copy_and_pad(int *dest, int *src, int length, int size)
+/**
+ * Copy the array from src to dest and pad the remaining empty value with the maximum value.
+ * 
+ * @param dest - the destination array.
+ * @param src - the source array.
+ * @param length - the number of elements inside the src array.
+ * @param length_buffer - the length of the buffer.
+ */
+void copy_padding(int *dest, int *src, int length, int length_buffer)
 {
-  int i;
-  for (i = 0; i < length; i++)
-  {
+  for (int i = 0; i < length; i++) {
     dest[i] = src[i];
   }
-  pad_array(dest, length, size);
+  padding_array(dest, length, length_buffer);
 }
 
-__global__ void bitonic_sort_step(int *dev_values, int j, int k)
+/**
+ * Do the bitnoic sort step by step.
+ * 
+ * @param d_values - array in the device to be sorted.
+ * @param i - major step index.
+ * @param j - minor step index.
+ */
+__global__ void bitonic_sort_step(int *d_arr, int i, int j)
 {
-  unsigned int i, ixj; /* Sorting partners: i and ixj */
-  i = threadIdx.x + blockDim.x * blockIdx.x;
-
-  ixj = i^j;
-
-  /* The threads with the lowest ids sort the array. */
-  if ((ixj)>i) {
-    if ((i&k)==0) {
-      /* Sort ascending */
-      if (dev_values[i]>dev_values[ixj]) {
-        /* exchange(i,ixj); */
-        int temp = dev_values[i];
-        dev_values[i] = dev_values[ixj];
-        dev_values[ixj] = temp;
+  // The array index and its patner.
+  unsigned int idx, patner;
+  idx = threadIdx.x + blockDim.x * blockIdx.x;
+
+  // Get the patner.
+  patner = idx ^ j;
+
+  // Sort the array by threads with the lowest idx.
+  if (idx < patner) {
+    if ((idx & i) == 0) {
+      // Sort ascending.
+      if (d_arr[idx] > d_arr[patner]) {
+        swap(d_arr, idx, patner);
       }
     }
-    if ((i&k)!=0) {
-      /* Sort descending */
-      if (dev_values[i]<dev_values[ixj]) {
-        /* exchange(i,ixj); */
-        int temp = dev_values[i];
-        dev_values[i] = dev_values[ixj];
-        dev_values[ixj] = temp;
+    if ((idx & i) != 0) {
+      // Sort descending.
+      if (d_arr[idx] < d_arr[patner]) {
+        swap(d_arr, idx, patner);
       }
     }
   }
 }
 
-void bitonic_sort(int *values)
+/**
+ * Perform a bitonic sort on the array.
+ *
+ * @param h_arr The host array to sort.
+ * @param length The length of the array.
+ */
+void bitonic_sort(int *h_arr, int length)
 {
-  int buffer_length = BLOCKS * THREADS;
-
-  size_t size = buffer_length * sizeof(int);
-  size_t original_size = NUM_VALS * sizeof(int);
-
-  // Copy and padding the values.
-  int *h_values = (int *)malloc(size);
-  copy_and_pad(h_values, values, NUM_VALS, buffer_length);
-
-  // Allocate device memory.
-  int *dev_values;
-  cudaMalloc((void**) &dev_values, size);
-  cudaMemcpy(dev_values, h_values, size, cudaMemcpyHostToDevice);
-
-  // Sort the values.
-  int j, k;
-  // Major step.
-  for (k = 2; k <= buffer_length; k <<= 1) {
-    // Minor step.
-    for (j=k>>1; j>0; j=j>>1) {
-      bitonic_sort_step<<<BLOCKS, THREADS>>>(dev_values, j, k);
+  // Initialize the constants variable.
+  const int threads = 1024;
+  const int blocks = 32;
+  const int buffer_length = threads * blocks;
+
+  // Check that the buffer can hold the array.
+  if (length > buffer_length) {
+    printf("[ERROR] The array length is %d, but the buffer length is %d.\n", length, buffer_length);
+    return;
+  }
+
+  // Initialize the memory size of the array.
+  size_t size = length * sizeof(int);
+  size_t buffer_size = buffer_length * sizeof(int);
+
+  // Create the buffer array and pad with maximum value of Int.
+  int *h_buffer = (int *)malloc(buffer_size);
+  copy_padding(h_buffer, h_arr, length, buffer_length);
+
+  // Allocate and copy array into device memory.
+  int *d_arr;
+  cudaMalloc((void**) &d_arr, buffer_size);
+  cudaMemcpy(d_arr, h_buffer, buffer_size, cudaMemcpyHostToDevice);
+
+  // Sort the array using bitonic_sort_step.
+  int i, j;
+  // The major step.
+  for (i = 2; i <= buffer_length; i *= 2) {
+    // The minor step.
+    for (j = i / 2; j > 0; j = j / 2) {
+      bitonic_sort_step<<<blocks, threads>>>(d_arr, i, j);
     }
   }
 
   // Copy the values back to the host.
-  cudaMemcpy(values, dev_values, original_size, cudaMemcpyDeviceToHost);
+  cudaMemcpy(h_arr, d_arr, size, cudaMemcpyDeviceToHost);
 
   // Free device memory.
-  cudaFree(dev_values);
-  free(h_values);
+  cudaFree(d_arr);
+  free(h_buffer);
 }
 
 int main(void)
 {
   clock_t start, stop;
+  int length = 1000000;
 
-  int *values = (int*) malloc( NUM_VALS * sizeof(int));
-  array_fill(values, NUM_VALS);
+  int *values = (int*) malloc( length * sizeof(int));
+  array_fill(values, length);
 
-  array_print(values, NUM_VALS);
-  printf("\n");
+  // array_print(values, length);
+  // printf("\n");
 
-  int checksum1 = checksum(values, NUM_VALS);
+  int checksum1 = checksum(values, length);
 
   start = clock();
-  bitonic_sort(values); /* Inplace */
+
+  bitonic_sort(values, length);
+
   stop = clock();
 
-  int checksum2 = checksum(values, NUM_VALS);
+  int checksum2 = checksum(values, length);
 
-  array_print(values, NUM_VALS);
-  printf("\n");
+  // array_print(values, length);
+  // printf("\n");
 
   print_elapsed(start, stop);
 
@@ -165,13 +207,7 @@ int main(void)
   bool passed_sort = true;
   bool passed_checksum = true;
 
-  for (int i = 1; i < NUM_VALS; i++)
-  {
-    if (values[i - 1] > values[i])
-    {
-      passed_sort = false;
-    }
-  }
+  passed_sort = isSorted(values, length);
 
   if (checksum1 != checksum2)
   {
diff --git a/algorithm/brick_sort.cu b/algorithm/brick_sort.cu
index f7295d45eaa22aba2af218a9f69a8dd96c36ce79..aa2c27e2192505d2c01e1ecc1f8d755e7eab8c77 100644
--- a/algorithm/brick_sort.cu
+++ b/algorithm/brick_sort.cu
@@ -1,7 +1,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <time.h>
-#include <cooperative_groups.h>
 
 // PROBLEM: INI GA JALAN KALAU BLOCKS > 1
 
@@ -9,8 +8,6 @@
 #define THREADS 16
 #define NUM_VALS 2000
 
-using namespace cooperative_groups;
-
 void print_elapsed(clock_t start, clock_t stop)
 {
   double elapsed = ((double)(stop - start)) / CLOCKS_PER_SEC;
@@ -50,11 +47,11 @@ void array_fill(int *arr, int length)
   }
 }
 
-__device__ void swap(int *arr, int i, int j)
+__device__ void swap(int *d_arr, int i, int j)
 {
-  int temp = arr[i];
-  arr[i] = arr[j];
-  arr[j] = temp;
+  int temp = d_arr[i];
+  d_arr[i] = d_arr[j];
+  d_arr[j] = temp;
 }
 
 __global__ void brick_sort(int *d_arr, int length)