diff --git a/src/main.cu b/src/main.cu
index 9b8ec65032b93442db70771dd56c5d3462f12a75..361f6340545c185a284fa210a2133e56b3ef2eeb 100644
--- a/src/main.cu
+++ b/src/main.cu
@@ -17,12 +17,14 @@ int main(int argc, char *argv[])
     }
 
     int node_count = atoi(argv[1]);
-    cudaDeviceSetLimit(cudaLimitMallocHeapSize, node_count * node_count * 3 * sizeof(long int) + node_count * sizeof(long int));
+    cudaDeviceSetLimit(cudaLimitMallocHeapSize, node_count * node_count * 2 * sizeof(long int));
     long int *adj_matrix = create_adj_matrix(node_count, node_count);
 
     long int *sub_dist;
     cudaMallocManaged(&sub_dist, node_count * node_count * sizeof(long int));
-    calculate_sub_matrix<<<16,64>>>(adj_matrix, sub_dist, node_count);
+    calculate_sub_matrix<<<20,128>>>(adj_matrix, sub_dist, node_count);
+
+    cudaDeviceSynchronize();
 
     char print_dist;
     printf("Print distances to stdout? [y/N] ");