diff --git a/src/radix_sort_par.cu b/src/radix_sort_par.cu
index 160487522e7c7a31d158e8c6bf6c789aefa693c3..0e7620e6c06390b701974e5f1e81b23511a0a703 100644
--- a/src/radix_sort_par.cu
+++ b/src/radix_sort_par.cu
@@ -61,13 +61,13 @@ void radixsort(int *arr, int n)
     // allocate device memory
     cudaMalloc((void**)&d_arr,sizeof(int)*n);
 
-    cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevie);
-    for (int exp = 1; m/exp > 0; exp *= 10) 
+    cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
+    for (int exp = 1; m/exp > 0; exp *= 10){ 
         countSort<<<1,32>>>(d_arr, n, exp); 
 
-    //transfer data back to host memory
-    cudaMemcpy(arr, d_arr, sizeof(int)*n, cudaMemcpyDeviceToHost);
-
+        //transfer data back to host memory
+        cudaMemcpy(arr, d_arr, sizeof(int)*n, cudaMemcpyDeviceToHost);
+    }
     //deallocate device memory
     cudaFree(d_arr);
 
@@ -115,13 +115,13 @@ int main(int argc, char *argv[])
     clock_gettime(CLOCK_REALTIME, &start);
     radixsort(arr,n);
     clock_gettime(CLOCK_REALTIME, &stop);
-    print(arr,n);
+    //print(arr,n);
     
     timespec duration = diff(start, stop);
     long time = duration.tv_sec * 1000000 + duration.tv_nsec/1000;
     printf("\n%d.%09d s\n", duration.tv_sec, duration.tv_nsec);
     
     //deallocate host memory
-    free(arr);
+    
     return 0; 
-} 
\ No newline at end of file
+}