From cafa9b609681f2f33aba5e240487d799b2ea393e Mon Sep 17 00:00:00 2001
From: Saskia Imani <imani.saskia@gmail.com>
Date: Sat, 28 Mar 2020 21:41:03 +0700
Subject: [PATCH] Input-based block and thread numbers

---
 parallel.cu | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/parallel.cu b/parallel.cu
index f6f96c1..f2be582 100644
--- a/parallel.cu
+++ b/parallel.cu
@@ -2,7 +2,6 @@
 #include <stdlib.h>
 #include <limits.h>
 #include <time.h>
-#include <omp.h>
 
 #define TRUE 1
 #define FALSE 0
@@ -17,17 +16,16 @@ long *initialize_result(int n);
 void write_output(long matrix[], int n_row, int n_col, double time);
 
 __global__
-void dijkstra(int n, long matrix[], long result[]) {
+void dijkstra(int n, int sub_n, long matrix[], long result[]) {
 	long *connected;         // Array of 'boolean' on whether vertice [i] is connected to source
     long *distance;          // Distance of vertice [i] from source
 
     int my_rank = blockIdx.x * blockDim.x + threadIdx.x;
-    int stride = blockDim.x * gridDim.x;
 
 	connected = (long*)malloc(n * sizeof(long));
     distance = (long *)malloc(n * sizeof(long));
 
-	for (int SOURCE_V = my_rank; SOURCE_V < n; SOURCE_V += stride) {
+	for (int SOURCE_V = my_rank * sub_n; SOURCE_V < (my_rank + 1) * sub_n; SOURCE_V++) {
         for (int i = 0; i < n; i++) {
             if (i == SOURCE_V) {
                 connected[i] = TRUE;
@@ -87,6 +85,8 @@ int main (int argc, char **argv) {
     
     /* Read number of vertices */
     printf("Number of vertices (n): "); scanf("%d", &n);
+    printf("Number of blocks      : "); scanf("%d", &num_blocks);
+    printf("Number of threads     : "); scanf("%d", &num_thrads);
     
     matrix = initialize_matrix(13517142, n);
     printf("\nGenerated %d * %d matrix.\n", n, n);
@@ -94,8 +94,10 @@ int main (int argc, char **argv) {
     result = initialize_result(n);
 
     start = clock();
+
+    int sub_n = n / (num_blocks * num_threads);
 	
-	dijkstra<<<n,1>>>(n, matrix, result);
+	dijkstra<<<num_blocks,num_threads>>>(n, sub_n, matrix, result);
 
     cudaDeviceSynchronize();
 
-- 
GitLab