diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..42b6e0cb45d4974c21fec3acb0da112c75042262
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,26 @@
+CXX := gcc
+CUDA := nvcc
+OUTPUT_DIR := output
+LIB := -lm
+EXEC_PARALEL := radixsort_paralel
+EXEC_SERIAL := radixsort_serial
+
+serial-only:
+	@mkdir -p ${OUTPUT_DIR}
+	${CXX} src/radixsort_serial.c -o ${EXEC_SERIAL}
+
+debug:
+	@mkdir -p ${OUTPUT_DIR}
+	${CUDA} src/radixsort_paralel.cu src/cudaUtil/cudaUtil.cu src/radixSort/radixSort.cu src/util/util.cpp --device-c
+	${CUDA}  radixsort_paralel.o util.o cudaUtil.o radixSort.o -o bin/radixsort_paralel.out
+
+
+build:
+	@mkdir -p ${OUTPUT_DIR}
+	${CUDA} src/radixsort_paralel.cu -o ${EXEC_PARALEL}
+	${CXX} src/radixsort_serial.c -o ${EXEC_SERIAL}
+
+clean:
+	@rm -r ${OUTPUT_DIR} || true
+	@rm ${EXEC_PARALEL} || true
+	@rm ${EXEC_SERIAL} || true
diff --git a/doc/Tugas CUDA.pdf b/doc/Tugas CUDA.pdf
deleted file mode 100644
index 632f8439d6a8cd8c4c1269ade74eb9906e15febe..0000000000000000000000000000000000000000
Binary files a/doc/Tugas CUDA.pdf and /dev/null differ
diff --git a/run_cuda.sh b/run_cuda.sh
new file mode 100644
index 0000000000000000000000000000000000000000..adc6b33b1ab7e8e1fb350d349a3fd54145228fe3
--- /dev/null
+++ b/run_cuda.sh
@@ -0,0 +1 @@
+./bin/radixsort_paralel.out $1
\ No newline at end of file
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..4d861c7e4ad62f9a39b316785324aadb1f3be408
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,51 @@
+
+# Created by https://www.gitignore.io/api/c++,visualstudiocode
+# Edit at https://www.gitignore.io/?templates=c++,visualstudiocode
+
+### C++ ###
+# Prerequisites
+*.d
+
+# Compiled Object files
+*.slo
+*.lo
+*.o
+*.obj
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Compiled Dynamic libraries
+*.so
+*.dylib
+*.dll
+
+# Fortran module files
+*.mod
+*.smod
+
+# Compiled Static libraries
+*.lai
+*.la
+*.a
+*.lib
+
+# Executables
+*.exe
+*.out
+*.app
+
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+
+# End of https://www.gitignore.io/api/c++,visualstudiocode
+
diff --git a/src/cudaUtil/cudaUtil.cu b/src/cudaUtil/cudaUtil.cu
index 430b9e20846cc74da909e16769a1ed3c0cde63af..b9acee97f159eb2d8daa9238a704dd71bcf2a93f 100644
--- a/src/cudaUtil/cudaUtil.cu
+++ b/src/cudaUtil/cudaUtil.cu
@@ -1,34 +1,11 @@
 #include "cudaUtil.cuh"
 #include <stdio.h>
 
-__device__ void getMax(int* arr, int n, int* maxBitIdx){
-  int myId = threadIdx.x + blockDim.x * blockIdx.x;
-  int tId = threadIdx.x;
 
-  for (unsigned int s = blockDim.x/2; s>0; s>>=1){
-    if (tId < s){
-      if (arr[myId] < arr[myId + s]){
-        arr[myId] = arr[myId+s];
-      }
-    }
-    __syncthreads();
-  }
-
-  if (tId == 0){
-    int currentNum = arr[myId];
-    int bitIndex = 0;
-    while(currentNum > 0){
-      currentNum >>=1;
-      bitIndex +=1;
-    }
-    *maxBitIdx = bitIndex;
-  }
-}
 
-__global__ void printArr(int* arr, int n){
-  int index = blockIdx.x * blockDim.x + threadIdx.x;
-  int stride = blockDim.x * gridDim.x;
-  for (int i = index; i<n; i+=stride){
+void printArr(int* arr, int n){
+  for (int i = 0; i<n; i++){
     printf("%d ", arr[i]);
   }
+  printf("\n");
 }
diff --git a/src/cudaUtil/cudaUtil.cuh b/src/cudaUtil/cudaUtil.cuh
index 69ce35c6f46f0c0297960ecfa0b5bd76578eb70f..b6ced14db7a23039e6f189b8d28c885097f54679 100644
--- a/src/cudaUtil/cudaUtil.cuh
+++ b/src/cudaUtil/cudaUtil.cuh
@@ -1,7 +1,6 @@
 #ifndef CUDA_UTIL_CUH
 #define CUDA_UTIL_CUH
 
-__device__ void getMax(int* arr, int n, int* out);
-__global__ void printArr(int* arr, int n);
+void printArr(int* arr, int n);
 
 #endif
\ No newline at end of file
diff --git a/src/main.cu b/src/main.cu
deleted file mode 100644
index f8181a4a627a95886f52c8c70af894b5e5e34904..0000000000000000000000000000000000000000
--- a/src/main.cu
+++ /dev/null
@@ -1,41 +0,0 @@
-#include <stdio.h>
-#include "util/util.hpp"
-#include "cudaUtil/cudaUtil.cuh"
-#include "radixSort/radixSort.cuh"
-
-__global__ void cuda_hello(){
-    printf("Hello World from GPU!\n");
-}
-
-int main(int argc, char *argv[]) {
-
-    if (argc != 2){
-        fprintf(stderr, "Usage: <number of elements>\n");
-        exit(1);
-    }
-
-    int n = atoi(argv[1]);
-
-    int* arr = (int *) malloc(sizeof(int) * n);
-    
-    int* d_arr;
-    cudaMalloc((void**)&d_arr, sizeof(int) * n);
-    
-    int* d_out;
-    cudaMalloc((void**)&d_out, sizeof(int) * n);
-    
-    generate_random(arr, n);
-
-    cudaMemcpy(d_arr, arr, sizeof(float) * n, cudaMemcpyHostToDevice);    
-    cudaDeviceSynchronize();
-
-    radixSort<<<1,256>>>(d_out, d_arr, n);
-    cudaDeviceSynchronize();
-
-    cuda_hello<<<1,1>>>(); 
-    cudaDeviceSynchronize();
-
-    cudaFree(d_arr);
-    
-    return 0;
-}
diff --git a/src/radixSort/radixSort.cu b/src/radixSort/radixSort.cu
index b539d5dbb22b591cfe72b92c425e1e223c6a3849..0bc3d16a66c0aa480a380535cf9a339d0064de04 100644
--- a/src/radixSort/radixSort.cu
+++ b/src/radixSort/radixSort.cu
@@ -1,15 +1,54 @@
 #include "radixSort.cuh"
 #include "../cudaUtil/cudaUtil.cuh"
 #include <stdio.h>
+using namespace std; 
+#define WSIZE 32
+__device__ int* d_arr;
 
-__global__
-void radixSort(int* out, int* arr, int n){
-  int* maxBitIdx = (int*) malloc(sizeof(int));
-  
-  if (threadIdx.x == 0){
-    getMax(arr,n,maxBitIdx);
-    printf("%d\n", *maxBitIdx);
+__device__ void print_arr(int* arr, int n){
+	for (int i = 0; i<n; i++){
+	  printf("%d ", arr[i]);
+	}
+	printf("\n");
   }
-  __syncthreads();
+
+__global__ void countSort(int arr[], int n, int exp) 
+{ 
+	__shared__ int *output; // output d_array 
+	int num_div_per_thread = n / WSIZE;
+	int i;
+	__shared__ int count[10];
+	if(threadIdx.x == 0){
+		output = (int*) malloc(sizeof(int) * n);
+		for(int j = 0; j<10;j++){
+			count[j] = 0;
+		}
+	}
+    // Store count of occurrences in count[] 
+    for (i = threadIdx.x * num_div_per_thread; i < (threadIdx.x + 1) * num_div_per_thread; i++) 
+        count[ (arr[i]/exp)%10 ]++; 
+  
+    // Change count[i] so that count[i] now contains actual 
+	//  position of this digit in output[] 
+	__syncthreads();
+	if(threadIdx.x == 0){
+		for (i = 1; i < 10; i++) 
+			count[i] += count[i - 1]; 
+	}
+  
+    // Build the output d_array 
+    for (i = n - 1; i >= 0; i--) 
+    { 
+        output[count[ (d_arr[i]/exp)%10 ] - 1] = d_arr[i]; 
+        count[ (d_arr[i]/exp)%10 ]--; 
+    } 
+  
+    // Copy the output d_array to d_arr[], so that d_arr[] now 
+    // contains sorted numbers according to current digit 
+    for (i = threadIdx.x * num_div_per_thread; i < (threadIdx.x + 1) * num_div_per_thread; i++) 
+		arr[i] = output[i]; 
+	print_arr(arr,n);
+	__syncthreads();
+} 
+  
   
-}
diff --git a/src/radixSort/radixSort.cuh b/src/radixSort/radixSort.cuh
index 2814cd8ca70a9a1b7b4a97e4275af0c27cd86123..e6aec1bf9ce77e57e1865efe67178c5e94e36ec8 100644
--- a/src/radixSort/radixSort.cuh
+++ b/src/radixSort/radixSort.cuh
@@ -1,7 +1,7 @@
 #ifndef RADIX_SORT_CUH
 #define RADIX_SORT_CUH
 
-__global__ void radixSort(int* out, int* arr, int n);
+__global__ void countSort(int arr[], int n, int exp); 
 __device__ void sortMSB(int* out, int* arr, int n);
 
 #endif
\ No newline at end of file
diff --git a/src/radixsort_paralel.cu b/src/radixsort_paralel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f6872cc0b7af3ac3145375aba82ae43e070647c3
--- /dev/null
+++ b/src/radixsort_paralel.cu
@@ -0,0 +1,84 @@
+#include <stdio.h>
+#include "util/util.hpp"
+#include "cudaUtil/cudaUtil.cuh"
+#include "radixSort/radixSort.cuh"
+
+#define WSIZE 32
+
+void count_sort_paralel_first(int arr[],int count[], int n, int exp) 
+{ 
+    int *output = (int*) malloc (sizeof(int) * n); // output array 
+    int i,displacement[10] = {0};
+  
+    // Store count of occurrences in count[] 
+    for (i = 0; i < n; i++) {
+        count[ (arr[i]/exp)%10 ]++; 
+        displacement[ (arr[i]/exp)%10 ]++; 
+    }
+  
+    // Change count[i] so that count[i] now contains actual 
+    //  position of this digit in output[] 
+    for (i = 1; i < 10; i++) 
+        displacement[i] += displacement[i - 1]; 
+  
+    // Build the output array 
+    for (i = n - 1; i >= 0; i--) 
+    { 
+        output[displacement[ (arr[i]/exp)%10 ] - 1] = arr[i]; 
+        displacement[ (arr[i]/exp)%10 ]--; 
+    } 
+  
+    // Copy the output array to arr[], so that arr[] now 
+    // contains sorted numbers according to current digit 
+    for (i = 0; i < n; i++) 
+        arr[i] = output[i]; 
+}
+int getMax(int* arr, int n){
+	int mx = arr[0]; 
+    for (int i = 1; i < n; i++) 
+        if (arr[i] > mx) 
+            mx = arr[i]; 
+    return mx; 
+}
+
+
+int main(int argc, char *argv[]) {
+
+    if (argc != 2){
+        fprintf(stderr, "Usage: <number of elements>\n");
+        exit(1);
+    }
+
+    int n = atoi(argv[1]);
+
+    int* arr = (int *) malloc(sizeof(int) * n);
+    
+    int* d_arr;
+    cudaMalloc((void**)&d_arr, sizeof(int) * n);
+    
+    // int* d_out;
+    // cudaMalloc((void**)&d_out, sizeof(int) * n);
+    
+	generate_random(arr, n);
+
+	printArr(arr,n);
+
+	int m = getMax(arr,n);
+	cudaMemcpyToSymbol(d_arr, arr, sizeof(float) * n);   
+	for (int exp = 1; m/exp > 0; exp *= 10) 
+        countSort<<<1,WSIZE>>>(arr, n, exp); 
+	cudaDeviceSynchronize();
+	
+	
+	
+    
+	
+    cudaMemcpyFromSymbol(arr, d_arr, sizeof(float) * n);    	
+    cudaDeviceSynchronize();
+	printArr(arr,n);
+
+
+    cudaFree(d_arr);
+    
+    return 0;
+}
diff --git a/src/radixsort_serial.c b/src/radixsort_serial.c
new file mode 100644
index 0000000000000000000000000000000000000000..a95dac07e271ce214a38719b97061029a999f21a
--- /dev/null
+++ b/src/radixsort_serial.c
@@ -0,0 +1,134 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+#define SEED 13516019
+#define MAX_DIGIT 128
+#define OUTPUT_FILE "output/output_serial.txt"
+
+// A function to randomize arr with size n using seed
+void randomizeArray(int arr[], int n) {
+    int seed = SEED; // Ganti dengan NIM anda sebagai seed.
+    srand(seed);
+    for(long i = 0; i < n; i++) {
+        arr[i] = (int)rand() % 999999;
+    }
+}
+
+
+int getMax(int arr[], int n) 
+{ 
+    int mx = arr[0]; 
+    for (int i = 1; i < n; i++) 
+        if (arr[i] > mx) 
+            mx = arr[i]; 
+    return mx; 
+} 
+  
+// A function to do counting sort of arr[] according to 
+// the digit represented by exp. 
+void countSort(int arr[], int n, int exp) 
+{ 
+    int* output = (int*) malloc(sizeof(int) * n); // output array 
+    int i, count[10] = {0}; 
+  
+    // Store count of occurrences in count[] 
+    for (i = 0; i < n; i++) {
+        count[ (arr[i]/exp)%10 ]++; 
+    }
+
+    // Change count[i] so that count[i] now contains actual 
+    //  position of this digit in output[] 
+    for (i = 1; i < 10; i++) 
+        count[i] += count[i - 1]; 
+
+    // Build the output array 
+    for (i = n - 1; i >= 0; i--) 
+    { 
+        output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; 
+        count[ (arr[i]/exp)%10 ]--; 
+    } 
+
+    // Copy the output array to arr[], so that arr[] now 
+    // contains sorted numbers according to current digit 
+    for (i = 0; i < n; i++) 
+        arr[i] = output[i]; 
+
+    free(output);
+} 
+  
+// The main function to that sorts arr[] of size n using  
+// Radix Sort 
+void radix_sort(int arr[], int n) 
+{ 
+    // Find the maximum number to know number of digits 
+    int m = getMax(arr, n); 
+  
+    // Do counting sort for every digit. Note that instead 
+    // of passing digit number, exp is passed. exp is 10^i 
+    // where i is current digit number 
+    for (int exp = 1; m/exp > 0; exp *= 10) 
+        countSort(arr, n, exp); 
+} 
+  
+// A utility function to print an array 
+void print(int arr[], int n) 
+{ 
+    for (int i = 0; i < n; i++) 
+        printf("%d ", arr[i]);
+}
+// A utility function to write array in a output file
+void printToFile(int arr[], int n, char name[50]) {
+    FILE* fOut = fopen(name, "w");
+    char temp[MAX_DIGIT];
+
+    if (fOut != NULL) {
+        sprintf(temp, "Array size: %d\n", n);
+        fputs(temp, fOut);
+        for (int i = 0; i < n; i++) {
+            sprintf(temp, "%d ", arr[i]);
+            fputs(temp, fOut);
+        }
+    } else {
+        printf("Unable to write file");
+    }
+
+    fclose(fOut);
+}
+  
+// A utility function to get different of two time in nanosecond
+long getTimeDiff(struct timeval start, struct timeval end) {
+    long result = (long) end.tv_sec * 1000000 + end.tv_usec - \
+                  (long) start.tv_sec * 1000000 + start.tv_usec;
+
+    return result;
+}
+  
+// Main program to implement RadixSort
+int main(int argc, char** argv) 
+{ 
+    if (argc != 2) {
+        fprintf(stderr, "Usage: <number of elements>\n");
+        exit(1);
+    }
+
+    int n = atoi(argv[1]);  
+    struct timeval start, end;
+    int* arr;
+
+    arr = (int*) malloc(sizeof(int) * n);
+    randomizeArray(arr, n);
+
+    // Calculate the time and execute the radixsort
+    gettimeofday(&start, NULL);
+    radix_sort(arr, n);
+    gettimeofday(&end, NULL);
+
+    printf("\n");
+    printf("Execution time (microseconds) - Serial: %ld \xE6s.\n", getTimeDiff(start, end));
+
+    printToFile(arr, n, OUTPUT_FILE);
+
+    return 0; 
+}
\ No newline at end of file
diff --git a/src/util/util.cpp b/src/util/util.cpp
index ac95ea1e111ed5e38d1a08f12141171b0474fd8a..c1cc0d51655d7ca5a5ed5a611bf5fd5a2f108f1c 100644
--- a/src/util/util.cpp
+++ b/src/util/util.cpp
@@ -8,19 +8,11 @@ void generate_random(int *arr, int n){
   srand(seed);
   for (long i = 0; i < n; i++)
   {
-    arr[i] = (int)rand();
+    arr[i] = (int)rand()%100;
   }
 }
 
-void print_arr(int *array, int num_elements){
-  {
-    for (int i = 0; i < num_elements; i++)
-    {
-      printf("%d ", array[i]);
-    }
-    printf("\n");
-  }
-}
+
 
 int get_max(int arr[], int n){
   int max=INT_MIN;
diff --git a/src/util/util.hpp b/src/util/util.hpp
index 114c8ee784caec926f88d6aedaa5a8d644415847..97b27dc1da836708425edb77a3d1742c4f84183c 100644
--- a/src/util/util.hpp
+++ b/src/util/util.hpp
@@ -2,7 +2,7 @@
 #define UTIL_HPP
 
 void generate_random(int* arr, int n);
-void print_arr(int * array, int num_elements);
+// void print_arr(int * array, int num_elements);
 void assert_sorted(int arr[], int n, double time_elapsed);
 
 #endif
\ No newline at end of file