From 93cb9e53f4b8221000de3f0136006cb39e051408 Mon Sep 17 00:00:00 2001
From: I Putu Gede Wirasuta <wirasutat@gmail.com>
Date: Sat, 28 Mar 2020 00:45:12 +0700
Subject: [PATCH] Update makefile and implement parallel functionality.
 Unstable performance on high number of nodes

---
 Makefile              | 17 ++++++++---------
 src/main.cu           | 28 ++++++++++------------------
 src/utils/dijkstra.cu | 10 +++-------
 src/utils/dijkstra.h  |  3 ++-
 src/utils/matrix.cu   |  4 ++--
 src/utils/matrix.h    |  4 ++--
 6 files changed, 27 insertions(+), 39 deletions(-)

diff --git a/Makefile b/Makefile
index 44c03e0..b5fad74 100644
--- a/Makefile
+++ b/Makefile
@@ -1,14 +1,13 @@
-CC=nvcc
-CFLAGS=-g
-SRCFOLDER=src/
-DEPS=src/utils/matrix.h src/utils/dijkstra.h
+CC = nvcc
+DEPS = src/main.cu src/utils/dijkstra.cu src/utils/matrix.cu
+OBJECTS = src/main.o src/utils/dijkstra.o src/utils/matrix.o
 
-%.o: %.cu $(DEPS)
-	$(CC) -c -o $@ $< $(CFLAGS)
-
-main: $(SRCFOLDER)main.o $(SRCFOLDER)utils/matrix.o $(SRCFOLDER)utils/dijkstra.o
+all: $(OBJECTS)
 	mkdir -p dist
-	$(CC) $(CFLAGS) -o dist/main $(SRCFOLDER)main.cu $(SRCFOLDER)utils/matrix.o $(SRCFOLDER)utils/dijkstra.o
+	$(CC) $(OBJECTS) -o dist/main
+
+%.o: %.cu $(DEPS)
+	$(CC) -x cu -I. -dc $< -o $@
 
 clean:
 	rm -rf dist
diff --git a/src/main.cu b/src/main.cu
index 8cbb1c4..9b8ec65 100644
--- a/src/main.cu
+++ b/src/main.cu
@@ -3,7 +3,7 @@
 #include "utils/matrix.h"
 #include "utils/dijkstra.h"
 
-long int *calculate_sub_matrix(long int *matrix, int node_count);
+__global__ void calculate_sub_matrix(long int *matrix, long int *sub_dist, int node_count);
 
 int main(int argc, char *argv[])
 {
@@ -17,10 +17,12 @@ int main(int argc, char *argv[])
     }
 
     int node_count = atoi(argv[1]);
-    cudaDeviceSetLimit(cudaLimitMallocHeapSize, node_count * node_count * 2.5f * sizeof(long int));
+    cudaDeviceSetLimit(cudaLimitMallocHeapSize, node_count * node_count * 3 * sizeof(long int) + node_count * sizeof(long int));
     long int *adj_matrix = create_adj_matrix(node_count, node_count);
 
-    long int *sub_dist = calculate_sub_matrix(adj_matrix, node_count);
+    long int *sub_dist;
+    cudaMallocManaged(&sub_dist, node_count * node_count * sizeof(long int));
+    calculate_sub_matrix<<<16,64>>>(adj_matrix, sub_dist, node_count);
 
     char print_dist;
     printf("Print distances to stdout? [y/N] ");
@@ -36,22 +38,12 @@ int main(int argc, char *argv[])
     cudaFree(adj_matrix);
 }
 
-long int *calculate_sub_matrix(long int *matrix, int node_count)
+__global__ void calculate_sub_matrix(long int *matrix, long int *sub_dist, int node_count)
 {
-    long int *sub_dist = (long int *)malloc(node_count * node_count * sizeof(long int));
-    cudaMallocManaged(&sub_dist, node_count * node_count * sizeof(long int));
-
-    for (int i = 0; i < node_count; i++)
+    int start = (blockIdx.x * blockDim.x) + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+    for (int i = start; i < node_count; i+= stride)
     {
-        long int *temp_dist = dijkstra(matrix, i, node_count);
-
-        for (int j = 0; j < node_count; j++)
-        {
-            set_el(sub_dist, node_count, j, i, temp_dist[j]);
-        }
-
-        cudaFree(temp_dist);
+        dijkstra(matrix, sub_dist + i * node_count, i, node_count);
     }
-
-    return sub_dist;
 }
diff --git a/src/utils/dijkstra.cu b/src/utils/dijkstra.cu
index 930dd5d..0fd4428 100644
--- a/src/utils/dijkstra.cu
+++ b/src/utils/dijkstra.cu
@@ -1,6 +1,6 @@
 #include "dijkstra.h"
 
-long int get_idx_min_dist(long int *dist, short *processed, int len)
+__device__ long int get_idx_min_dist(long int *dist, short *processed, int len)
 {
     long int min = LONG_MAX;
     int idx;
@@ -17,11 +17,9 @@ long int get_idx_min_dist(long int *dist, short *processed, int len)
     return idx;
 }
 
-long int *dijkstra(long int *adj_matrix, int src, int size)
+__device__ void dijkstra(long int *adj_matrix, long int *dist, int src, int size)
 {
-    long int *dist;
-    cudaMallocManaged(&dist, size * sizeof(long int));
-    short processed[size];
+    short processed[MAX_NODE];
 
     for (int i = 0; i < size; i++)
     {
@@ -50,6 +48,4 @@ long int *dijkstra(long int *adj_matrix, int src, int size)
             }
         }
     }
-
-    return dist;
 }
\ No newline at end of file
diff --git a/src/utils/dijkstra.h b/src/utils/dijkstra.h
index d143e6b..6f88983 100644
--- a/src/utils/dijkstra.h
+++ b/src/utils/dijkstra.h
@@ -6,5 +6,6 @@
 
 #define TRUE 1
 #define FALSE 0
+#define MAX_NODE 3000
 
-long int *dijkstra(long int *adj_matrix, int src, int size);
+__device__ void dijkstra(long int *adj_matrix, long int *dist, int src, int size);
\ No newline at end of file
diff --git a/src/utils/matrix.cu b/src/utils/matrix.cu
index f7b9bb0..2651e5c 100644
--- a/src/utils/matrix.cu
+++ b/src/utils/matrix.cu
@@ -61,12 +61,12 @@ long int *create_adj_matrix(int width, int height)
     return matrix;
 }
 
-long int get_el(long int *matrix, int width, int x, int y)
+__device__  long int get_el(long int *matrix, int width, int x, int y)
 {
     return matrix[y * width + x];
 }
 
-void set_el(long int *matrix, int width, int x, int y, long int value)
+__device__ void set_el(long int *matrix, int width, int x, int y, long int value)
 {
     matrix[y * width + x] = value;
 }
diff --git a/src/utils/matrix.h b/src/utils/matrix.h
index c02cbb6..cc9fd1a 100644
--- a/src/utils/matrix.h
+++ b/src/utils/matrix.h
@@ -4,6 +4,6 @@
 void print_array(long int *array, int width);
 void print_matrix(long int *matrix, int width, int height);
 void print_matrix_to_file(long int *matrix, int width, int height, char *filename);
-long int get_el(long int *matrix, int width, int x, int y);
-void set_el(long int *matrix, int width, int x, int y, long int value);
+__device__ long int get_el(long int *matrix, int width, int x, int y);
+__device__ void set_el(long int *matrix, int width, int x, int y, long int value);
 long int *create_adj_matrix(int width, int height);
\ No newline at end of file
-- 
GitLab