diff --git a/src/radix_sort_parallel.cu b/src/radix_sort_parallel.cu
index 9509a5e55081b001e193d507a1b90aecfe694006..e857a870b26041437b6372998334030e630321ab 100644
--- a/src/radix_sort_parallel.cu
+++ b/src/radix_sort_parallel.cu
@@ -25,8 +25,8 @@ void rng(int* arr, int n) {
     }
 }
 
-u_int32_t* down_sweep(u_int32_t* flags, int n) {
-	u_int32_t* prefix_sum = (u_int32_t*) malloc(n * sizeof(u_int32_t));
+int* down_sweep(int* flags, int n) {
+	int* prefix_sum = (int*) malloc(n * sizeof(int));
 	prefix_sum[0] = 0;
 
 	for (int i = 1; i < n; i++) {
@@ -36,8 +36,8 @@ u_int32_t* down_sweep(u_int32_t* flags, int n) {
 	return prefix_sum;
 }
 
-u_int32_t* up_sweep(u_int32_t* flags, int n) {
-	u_int32_t* prefix_sum = (u_int32_t*) malloc(n * sizeof(u_int32_t));
+int* up_sweep(int* flags, int n) {
+	int* prefix_sum = (int*) malloc(n * sizeof(int));
 	prefix_sum[n - 1] = n - 1;
 
 	for (int i = n - 2; i >= 0; i--) {
@@ -48,19 +48,19 @@ u_int32_t* up_sweep(u_int32_t* flags, int n) {
 	return prefix_sum;
 }
 
-__global__ void assign_index(u_int32_t* arr, u_int32_t* arr_idx, u_int32_t* down_sweep_i, u_int32_t* up_sweep_i, u_int32_t* flags, int n) {
+__global__ void assign_index(int* arr, int* arr_idx, int* down_sweep_i, int* up_sweep_i, int* flags, int n) {
 	for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
 		arr_idx[i] = flags[i] ? down_sweep_i[i] : up_sweep_i[i];
 	}
 }
 
-__global__ void assign_flags(u_int32_t* arr, int n, int idx, u_int32_t* flags) {
+__global__ void assign_flags(int* arr, int n, int idx, int* flags) {
 		for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
 			flags[i] = ((arr[i] & 1 << idx) == 1 << idx) ? 0 : 1;
 		}
 }
 
-__global__ void scatter(u_int32_t* arr, u_int32_t* arr_temp, u_int32_t* arr_idx, int n) {
+__global__ void scatter(int* arr, int* arr_temp, int* arr_idx, int n) {
 		for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; i += blockDim.x * gridDim.x) {
 				arr[arr_idx[i]] = arr_temp[i];
 		}
@@ -69,23 +69,23 @@ __global__ void scatter(u_int32_t* arr, u_int32_t* arr_temp, u_int32_t* arr_idx,
 void count_sort(int* d_arr, int n, int idx) {
     int block_dim = 64;
 	int grid_dim = (n + block_dim - 1) / block_dim;
-	u_int32_t*  d_flags;
-	cudaMalloc(&d_flags, n * sizeof(u_int32_t));
+	int*  d_flags;
+	cudaMalloc(&d_flags, n * sizeof(int));
 	assign_flags<<<grid_dim, block_dim>>>(d_arr, n, idx, d_flags);
 	cudaDeviceSynchronize();
-	u_int32_t* flags = (u_int32_t*) malloc(n * sizeof(u_int32_t));
-	cudaMemcpy(flags, d_flags, n * sizeof(u_int32_t), cudaMemcpyDeviceToHost);
-	u_int32_t* down_sweep_i = down_sweep(flags, n);
-	u_int32_t* up_sweep_i = up_sweep(flags, n);
+	int* flags = (int*) malloc(n * sizeof(int));
+	cudaMemcpy(flags, d_flags, n * sizeof(int), cudaMemcpyDeviceToHost);
+	int* down_sweep_i = down_sweep(flags, n);
+	int* up_sweep_i = up_sweep(flags, n);
 	free(flags);
-	u_int32_t *d_arr_temp, *d_arr_idx, *d_idx_down, *d_idx_up;
-	cudaMalloc(&d_arr_temp, n * sizeof(u_int32_t));
-	cudaMalloc(&d_arr_idx, n * sizeof(u_int32_t));
-	cudaMalloc(&d_idx_down, n * sizeof(u_int32_t));
-	cudaMalloc(&d_idx_up, n * sizeof(u_int32_t));
-	cudaMemcpy(d_idx_down, down_sweep_i, n * sizeof(u_int32_t), cudaMemcpyHostToDevice);
-	cudaMemcpy(d_idx_up, up_sweep_i, n * sizeof(u_int32_t), cudaMemcpyHostToDevice);
-	cudaMemcpy(d_arr_temp, d_arr, n * sizeof(u_int32_t), cudaMemcpyDeviceToDevice);
+	int *d_arr_temp, *d_arr_idx, *d_idx_down, *d_idx_up;
+	cudaMalloc(&d_arr_temp, n * sizeof(int));
+	cudaMalloc(&d_arr_idx, n * sizeof(int));
+	cudaMalloc(&d_idx_down, n * sizeof(int));
+	cudaMalloc(&d_idx_up, n * sizeof(int));
+	cudaMemcpy(d_idx_down, down_sweep_i, n * sizeof(int), cudaMemcpyHostToDevice);
+	cudaMemcpy(d_idx_up, up_sweep_i, n * sizeof(int), cudaMemcpyHostToDevice);
+	cudaMemcpy(d_arr_temp, d_arr, n * sizeof(int), cudaMemcpyDeviceToDevice);
 	assign_index<<<grid_dim, block_dim>>>(d_arr, d_arr_idx, d_idx_down, d_idx_up, d_flags, n);
 	cudaDeviceSynchronize();
 	scatter<<<grid_dim, block_dim>>>(d_arr, d_arr_temp, d_arr_idx, n);
@@ -128,11 +128,11 @@ int main(int argc, char *argv[]) {
     start = clock();
 
     // Move sorting CPU
-    u_int32_t* d_arr;
-		cudaMalloc(&d_arr, sizeof(u_int32_t) * n);
-		cudaMemcpy(d_arr, arr, sizeof(u_int32_t) * n, cudaMemcpyHostToDevice);
+    int* d_arr;
+		cudaMalloc(&d_arr, sizeof(int) * n);
+		cudaMemcpy(d_arr, arr, sizeof(int) * n, cudaMemcpyHostToDevice);
     radix_sort(d_arr, n);
-    cudaMemcpy(arr, d_arr, n * sizeof(u_int32_t), cudaMemcpyDeviceToHost);
+    cudaMemcpy(arr, d_arr, n * sizeof(int), cudaMemcpyDeviceToHost);
 		cudaFree(d_arr);
 
     end = clock();