diff --git a/.DS_Store b/.DS_Store
index 1175da2ac62dc0f47eca32d97a9a2eb3d543a4a9..ea1d8fcae3082442de5386a3a8dd2fa5cbd0955a 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/src/.DS_Store b/src/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..8d1c6ed5fdc8f98ecae7e03285b3580fa78ccedd
Binary files /dev/null and b/src/.DS_Store differ
diff --git a/src/kernel.cu b/src/kernel.cu
deleted file mode 100644
index b7ded00cc83d428ec509f8fbd76000bb0991070d..0000000000000000000000000000000000000000
--- a/src/kernel.cu
+++ /dev/null
@@ -1,262 +0,0 @@
-#pragma once
-#ifdef __INTELLISENSE__
-void __syncthreads();
-#endif
-
-#include "cuda_runtime.h"
-#include "device_launch_parameters.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <iostream>
-#include <chrono>
-
-using namespace std;
-using namespace std::chrono;
-
-#define WSIZE 32
-#define LOOPS 100
-#define UPPER_BIT 10
-#define LOWER_BIT 0
-
-__device__ unsigned int ddata[WSIZE];
-__device__ int ddata_s[WSIZE];
-
-template <typename T, unsigned S>
-inline unsigned arraysize(const T(&v)[S])
-{
-	return S;
-}
-
-template<typename T>
-void printArray(T &arr)
-{
-	for (int i = 0; i < arraysize(arr); ++i)
-	{
-		cout << "Array[" << i << "]: " << *(arr + i) << endl;
-	}
-}
-
-template<typename T>
-void fillArray(T &arr)
-{
-	srand(time(NULL));
-	for (int i = 0; i < arraysize(arr); ++i)
-	{
-		arr[i] = rand() % 1024;
-	}
-}
-
-void print(int arr[], int n)
-{
-	for (int i = 0; i < n; i++)
-	{
-		cout << arr[i] << " ";
-	}
-	cout << endl;
-}
-
-template<typename T>
-T findMax(T &arr)
-{
-	T max = 0;
-	for (int i = 0; i < arraysize(arr); ++i)
-	{
-		if (arr[i] > max)
-		{
-			max = arr[i];
-		}
-	}
-	return max;
-}
-
-__device__ int getMax(int arr[], int n)
-{
-	int mx = arr[0];
-	for (int i = 1; i < n; i++)
-		if (arr[i] > mx)
-			mx = arr[i];
-	return mx;
-}
-
-__device__ void countSort(int arr[], int n, int exp)
-{
-	int output[1024]; // Output array
-	int i, count[10] = { 0 };
-
-	// Store count of occurrences in count[]
-	for (i = 0; i < n; i++)
-		count[(arr[i] / exp) % 10]++;
-
-	// Change count[i] so that count[i] now contains actual
-	// position of this digit in output[]
-	for (i = 1; i < 10; i++)
-	{
-		count[i] += count[i - 1];
-	}
-
-	// Build the output array
-	for (i = n - 1; i >= 0; i--)
-	{
-		output[count[(arr[i] / exp) % 10] - 1] = arr[i];
-		count[(arr[i] / exp) % 10]--;
-	}
-
-	// Copy the output array to arr[], so that arr[] now
-	// contains sorted numbers according to current digit
-	for (i = 0; i < n; i++)
-		arr[i] = output[i];
-}
-
-__device__ void radixsort(int arr[], int n)
-{
-	// Find the maximum number to know number of digits
-	int m = getMax(arr, n);
-
-	// Do counting sort for every digit. Note that instead
-	// of passing digit number, exp is passed. exp is 10^i
-	// where i is current digit number
-	for (int exp = 1; m / exp > 0; exp *= 10)
-		countSort(arr, n, exp);
-}
-
-__global__ void serialRadix()
-{
-	radixsort(ddata_s, WSIZE);
-	__syncthreads();
-}
-
-__global__ void parallelRadix()
-{
-	// This data in shared memory
-	__shared__ volatile unsigned int sdata[WSIZE * 2];
-
-	// Load from global into shared variable
-	sdata[threadIdx.x] = ddata[threadIdx.x];
-
-	unsigned int bitmask = 1 << LOWER_BIT;
-	unsigned int offset = 0;
-	// -1, -2, -4, -8, -16, -32, -64, -128, -256,...
-	unsigned int thrmask = 0xFFFFFFFFU << threadIdx.x;
-	unsigned int mypos;
-
-	// For each LSB to MSB
-	for (int i = LOWER_BIT; i <= UPPER_BIT; i++)
-	{
-		unsigned int mydata = sdata[((WSIZE - 1) - threadIdx.x) + offset];
-		unsigned int mybit = mydata&bitmask;
-		// Get population of ones and zeroes
-		unsigned int ones = __ballot(mybit);
-		unsigned int zeroes = ~ones;
-		// Switch ping-pong buffers
-		offset ^= WSIZE;
-
-		// Do zeroes, then ones
-		if (!mybit)
-		{
-			mypos = __popc(zeroes&thrmask);
-		}
-		else  {      // Threads with a one bit
-			// Get my position in ping-pong buffer
-			mypos = __popc(zeroes) + __popc(ones&thrmask);
-		}
-
-		// Move to buffer  (or use shfl for cc 3.0)
-		sdata[mypos - 1 + offset] = mydata;
-		// Repeat for next bit
-		bitmask <<= 1;
-	}
-	// Put results to global
-	ddata[threadIdx.x] = sdata[threadIdx.x + offset];
-}
-
-int main() {
-
-	/* Parallel Radix Sort */
-
-	unsigned int hdata[WSIZE];
-	float totalTime = 0;
-
-	for (int lcount = 0; lcount < LOOPS; lcount++)
-	{
-		srand(time(NULL));
-		// Array elements have value in range of 1024
-		unsigned int range = 1U << UPPER_BIT;
-
-		// Fill array with random elements
-		// Range = 1024
-		for (int i = 0; i < WSIZE; i++)
-		{
-			hdata[i] = i;
-		}
-
-		// Copy data from host to device
-		cudaMemcpyToSymbol(ddata, hdata, WSIZE * sizeof(unsigned int));
-
-		// Execution time measurement, that point starts the clock
-		high_resolution_clock::time_point t1 = high_resolution_clock::now();
-		parallelRadix <<< 1, WSIZE >>>();
-		// Make kernel function synchronous
-		cudaDeviceSynchronize();
-		// Execution time measurement, that point stops the clock
-		high_resolution_clock::time_point t2 = high_resolution_clock::now();
-
-		// Execution time measurement, that is the result
-		auto duration = duration_cast<milliseconds>(t2 - t1).count();
-
-		// Summination of each loops' execution time
-		totalTime += (float)duration / 1000.00;
-
-		// Copy data from device to host
-		cudaMemcpyFromSymbol(hdata, ddata, WSIZE * sizeof(unsigned int));
-	}
-
-	printf("Parallel Radix Sort:\n");
-	printf("Array size = %d\n", WSIZE * LOOPS);
-	printf("Time elapsed = %fseconds\n", totalTime);
-
-	/* Serial Radix Sort */
-
-	unsigned int hdata_s[WSIZE];
-	totalTime = 0;
-
-	for (int lcount = 0; lcount < LOOPS; lcount++)
-	{
-		srand(time(NULL));
-		// Array elements have value in range of 1024
-		unsigned int range = 1U << UPPER_BIT;
-
-		// Fill array with random elements
-		// Range = 1024
-		for (int i = 0; i < WSIZE; i++)
-		{
-			hdata_s[i] = i;
-		}
-
-		// Copy data from host to device
-		cudaMemcpyToSymbol(ddata_s, hdata_s, WSIZE * sizeof(unsigned int));
-
-		// Execution time measurement, that point starts the clock
-		high_resolution_clock::time_point t1 = high_resolution_clock::now();
-		serialRadix <<< 1, 1 >>>();
-		// Make kernel function synchronous
-		cudaDeviceSynchronize();
-		// Execution time measurement, that point stops the clock
-		high_resolution_clock::time_point t2 = high_resolution_clock::now();
-
-		// Execution time measurement, that is the result
-		auto duration = duration_cast<milliseconds>(t2 - t1).count();
-
-		// Summination of each loops' execution time
-		totalTime += (float)duration / 1000.00;
-
-		// Copy data from device to host
-		cudaMemcpyFromSymbol(hdata_s, ddata_s, WSIZE * sizeof(unsigned int));
-	}
-
-	printf("\nSerial Radix Sort:\n");
-	printf("Array size = %d\n", WSIZE * LOOPS);
-	printf("Time elapsed = %fseconds\n\n", totalTime);
-
-	return 0;
-}
diff --git a/src/radix_par2.cpp b/src/radix_par2.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..eae49a3713dec42e968c6d4cea75d8dcc9499f6f
--- /dev/null
+++ b/src/radix_par2.cpp
@@ -0,0 +1,118 @@
+// C++ implementation of Radix Sort 
+#include <iostream> 
+#include <cstdlib>
+#include <time.h>
+using namespace std; 
+  
+// A utility function to get maximum value in arr[] 
+int getMax(int arr[], int n) 
+{ 
+    int mx = arr[0]; 
+    for (int i = 1; i < n; i++) 
+        if (arr[i] > mx) 
+            mx = arr[i]; 
+    return mx; 
+} 
+  
+// A function to do counting sort of arr[] according to 
+// the digit represented by exp. 
+__global__ void countSort(int arr[], int n, int exp) 
+{ 
+    int output[n]; // output array 
+    int i, count[10] = {0}; 
+  
+    // Store count of occurrences in count[] 
+    for (i = 0; i < n; i++) 
+        count[ (arr[i]/exp)%10 ]++; 
+  
+    // Change count[i] so that count[i] now contains actual 
+    //  position of this digit in output[] 
+    for (i = 1; i < 10; i++) 
+        count[i] += count[i - 1]; 
+  
+    // Build the output array 
+    for (i = n - 1; i >= 0; i--) 
+    { 
+        output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; 
+        count[ (arr[i]/exp)%10 ]--; 
+    } 
+  
+    // Copy the output array to arr[], so that arr[] now 
+    // contains sorted numbers according to current digit 
+    for (i = 0; i < n; i++) 
+        arr[i] = output[i]; 
+} 
+  
+// The main function to that sorts arr[] of size n using  
+// Radix Sort 
+void radixsort(int arr[], int n) 
+{ 
+    int *d_arr;
+    // Find the maximum number to know number of digits 
+    int m = getMax(arr, n); 
+  
+    // Do counting sort for every digit. Note that instead 
+    // of passing digit number, exp is passed. exp is 10^i 
+    // where i is current digit number 
+
+    cudaMalloc((void**)&d_arr,sizeof(int)*n);
+    cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
+ 
+    for (int exp = 1; m/exp > 0; exp *= 10) {
+        countSort<<<1,32>>>(arr, n, exp);
+        cudaMemcpy(arr, d_arr, sizeof(int)*n, cudaMemcpyDeviceToHost);
+    }
+
+    cudaFree(d_arr); 
+} 
+  
+// A utility function to print an array 
+void print(int arr[], int n) 
+{ 
+    for (int i = 0; i < n; i++) 
+        cout << arr[i] << "\n"; 
+} 
+  
+void rng(int* arr,int n){
+    int seed = 13516067;
+    srand(seed);
+    for (long i=0; i<n; i++){
+        arr[i] = (int) rand();
+    }
+}
+
+ 
+timespec diff(timespec start, timespec end)
+{
+    timespec temp;
+    if ((end.tv_nsec - start.tv_nsec) < 0) {
+        temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+        temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
+    } else {
+        temp.tv_sec = end.tv_sec - start.tv_sec;
+        temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+    }
+    return temp;
+}
+
+// Driver program to test above functions 
+int main() 
+{ 
+    timespec start, stop;
+    
+    int n;
+    cout<<"Masukkan nilai N\n";
+    cin>>n;
+    int arr[n];
+    rng(arr,n);
+    clock_gettime(CLOCK_REALTIME, &start);
+    radixsort(arr,n);
+    clock_gettime(CLOCK_REALTIME, &stop);
+    print(arr,n);
+    
+    timespec duration = diff(start, stop);
+    long time = duration.tv_sec * 1000000 + duration.tv_nsec/1000;
+    printf("\n%d.%09d s\n", duration.tv_sec, duration.tv_nsec);
+    
+    return 0; 
+} 
\ No newline at end of file
diff --git a/src/radix_par2.cu b/src/radix_par2.cu
new file mode 100644
index 0000000000000000000000000000000000000000..eae49a3713dec42e968c6d4cea75d8dcc9499f6f
--- /dev/null
+++ b/src/radix_par2.cu
@@ -0,0 +1,118 @@
+// C++ implementation of Radix Sort 
+#include <iostream> 
+#include <cstdlib>
+#include <time.h>
+using namespace std; 
+  
+// A utility function to get maximum value in arr[] 
+int getMax(int arr[], int n) 
+{ 
+    int mx = arr[0]; 
+    for (int i = 1; i < n; i++) 
+        if (arr[i] > mx) 
+            mx = arr[i]; 
+    return mx; 
+} 
+  
+// A function to do counting sort of arr[] according to 
+// the digit represented by exp. 
+__global__ void countSort(int arr[], int n, int exp) 
+{ 
+    int output[n]; // output array 
+    int i, count[10] = {0}; 
+  
+    // Store count of occurrences in count[] 
+    for (i = 0; i < n; i++) 
+        count[ (arr[i]/exp)%10 ]++; 
+  
+    // Change count[i] so that count[i] now contains actual 
+    //  position of this digit in output[] 
+    for (i = 1; i < 10; i++) 
+        count[i] += count[i - 1]; 
+  
+    // Build the output array 
+    for (i = n - 1; i >= 0; i--) 
+    { 
+        output[count[ (arr[i]/exp)%10 ] - 1] = arr[i]; 
+        count[ (arr[i]/exp)%10 ]--; 
+    } 
+  
+    // Copy the output array to arr[], so that arr[] now 
+    // contains sorted numbers according to current digit 
+    for (i = 0; i < n; i++) 
+        arr[i] = output[i]; 
+} 
+  
+// The main function to that sorts arr[] of size n using  
+// Radix Sort 
+void radixsort(int arr[], int n) 
+{ 
+    int *d_arr;
+    // Find the maximum number to know number of digits 
+    int m = getMax(arr, n); 
+  
+    // Do counting sort for every digit. Note that instead 
+    // of passing digit number, exp is passed. exp is 10^i 
+    // where i is current digit number 
+
+    cudaMalloc((void**)&d_arr,sizeof(int)*n);
+    cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
+ 
+    for (int exp = 1; m/exp > 0; exp *= 10) {
+        countSort<<<1,32>>>(arr, n, exp);
+        cudaMemcpy(arr, d_arr, sizeof(int)*n, cudaMemcpyDeviceToHost);
+    }
+
+    cudaFree(d_arr); 
+} 
+  
+// A utility function to print an array 
+void print(int arr[], int n) 
+{ 
+    for (int i = 0; i < n; i++) 
+        cout << arr[i] << "\n"; 
+} 
+  
+void rng(int* arr,int n){
+    int seed = 13516067;
+    srand(seed);
+    for (long i=0; i<n; i++){
+        arr[i] = (int) rand();
+    }
+}
+
+ 
+timespec diff(timespec start, timespec end)
+{
+    timespec temp;
+    if ((end.tv_nsec - start.tv_nsec) < 0) {
+        temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+        temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
+    } else {
+        temp.tv_sec = end.tv_sec - start.tv_sec;
+        temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+    }
+    return temp;
+}
+
+// Driver program to test above functions 
+int main() 
+{ 
+    timespec start, stop;
+    
+    int n;
+    cout<<"Masukkan nilai N\n";
+    cin>>n;
+    int arr[n];
+    rng(arr,n);
+    clock_gettime(CLOCK_REALTIME, &start);
+    radixsort(arr,n);
+    clock_gettime(CLOCK_REALTIME, &stop);
+    print(arr,n);
+    
+    timespec duration = diff(start, stop);
+    long time = duration.tv_sec * 1000000 + duration.tv_nsec/1000;
+    printf("\n%d.%09d s\n", duration.tv_sec, duration.tv_nsec);
+    
+    return 0; 
+} 
\ No newline at end of file