diff --git a/src/radixsort_parallel.cu b/src/radixsort_parallel.cu
index b1c69dbd41f581f2ab2b08167a72a8c30a5d8d6d..6e0e18ec96eec68e39d15efd989036f7df9ba980 100644
--- a/src/radixsort_parallel.cu
+++ b/src/radixsort_parallel.cu
@@ -3,14 +3,31 @@
 #include "radix_sort_parallel.h"
 
 __global__ void getMax(int *arr, int *max, int n) {
-    int index = threadIdx.x;
-    int stride = blockDim.x;
-    int mx = arr[index]; 
-
-    for (int i = index; i < n; i+=stride) 
-        if (arr[i] > mx) 
-            mx = arr[i];
-    max[0] = mx;
+    __shared__  int cache[1000];
+int temp = arr[0];
+    for (int i=0; i<n; i+=1000){
+        if(threadIdx.x< 1000){
+            if(temp < arr[threadIdx.x + i]){
+                temp = arr[threadIdx.x+i];
+            }
+        }
+    }
+
+    if(threadIdx.x <1000){
+        cache[threadIdx.x] =temp;
+    }
+    
+    __syncthreads();
+
+    if(threadIdx.x ==0){
+        *max = cache[0];
+        for(int i =1; i<1000; i++){
+            if(*max < cache[i]){
+                *max = cache[i];
+            }
+        }
+    
+    }
 } 
 
 __global__ void countSort(int *arr, int n, int exp) {