diff --git a/src/dijkstra_cuda.cuh b/src/dijkstra_cuda.cuh
new file mode 100644
index 0000000000000000000000000000000000000000..ff4602e56867b2082d3f32ba31fbb0c8474c4ec4
--- /dev/null
+++ b/src/dijkstra_cuda.cuh
@@ -0,0 +1,138 @@
+#ifndef __DIJKSTRA_CUDA__
+#define __DIJKSTRA_CUDA__
+
+#include <stdlib.h>
+#include <limits.h>
+
+/**
+ * Get vertex index with minimum distance which not yet included
+ * in spt_set
+ * @param  dist    distance from origin vertex to vertex with that index
+ * @param  spt_set a set denoting vertices included in spt_set
+ * @param n number of vertices in the graph
+ * @return         index of minimum distance not yet included in spt_set
+ */
+ int min_distance_idx(long dist[], bool spt_set[], int n) {
+	// Initialize min value 
+    int min = INT_MAX, min_index; 
+  
+    for (int i = 0; i < n; i++) {
+    	if (spt_set[i] == false && dist[i] <= min) {
+    		min = dist[i];
+    		min_index = i;
+    	}
+    } 
+        
+  
+    return min_index; 
+}
+
+/**
+ * generate a graph with n vertices
+ * @param  n number of vertices
+ * @return   2D array, graph[i][j] = graph[j][i] = distance from vertex i to j
+ */
+ long **gen_graph(int n) {
+	// alokasi memori untuk matriks yang merepresentasikan graf
+	long **result = (long **)malloc(n * sizeof(long *)); 
+    for (int i = 0; i < n; i++) {
+    	result[i] = (long *)malloc(n * sizeof(long)); 
+    }    
+  
+    // isi matriks dengan bilangan random
+    srand(13517122);
+    
+    for (int i = 0; i < n; i++) {
+    	for (int j = i; j < n; j++) {
+    		if (i == j) {
+    			result[i][j] = 0;
+    		} else {
+    			result[i][j] = result[j][i] = rand();	
+    		}
+    		
+    	}
+    }
+     
+ 	return result; 
+}
+
+long **gen_temp(int r, int c) {
+	// alokasi memori untuk matriks yang merepresentasikan graf
+	long **result = (long **)malloc(r * sizeof(long *)); 
+	// printf("[gen_temp] initiate temp\n");
+    for (int i = 0; i < r; i++) {
+    	result[i] = (long *)malloc(c * sizeof(long)); 
+    	// printf("[gen_temp] initiate each row in temp\n");
+    }    
+    
+    for (int i = 0; i < r; i++) {
+    	for (int j = 0; j < c; j++) {
+    		// printf("[gen_temp] filling temp\n");
+    		result[i][j] = 0;
+    	}
+    }
+     
+ 	return result; 
+}
+
+long *dijkstra(long **graph, int n, int src) {
+
+	// output array, contains shortest distance from src to every vertices
+	long *dist = (long *) malloc (sizeof(long) * n);
+	// spt_set[i] is true if vertex i already included in the shortest path tree
+	bool spt_set[n];
+
+	// initialize dist and spt_set
+	for (int i = 0; i < n; i++) {
+		dist[i] = INT_MAX;
+		spt_set[i] = false;
+	}
+
+	// initiate path searching 
+	dist[src] = 0;
+
+
+	// find the shortest path for all vertices
+	for (int i = 0; i < n; i++) {
+
+		// pick vertex with minimum distance from src from spt_set not yet
+		// processed
+		int processed_vertex = min_distance_idx(dist, spt_set, n);
+
+		// mark vertex as processed
+		spt_set[processed_vertex] = true;
+
+		for (int j = 0; j < n; j++) {
+			// check vertices connected to processed_vertex not yet processed
+			if (!spt_set[j] 
+				&& graph[processed_vertex][j] != 0
+				&& dist[processed_vertex] != INT_MAX 
+				&& dist[processed_vertex] + graph[processed_vertex][j] < dist[j]) {
+
+				dist[j] = dist[processed_vertex] + graph[processed_vertex][j];
+			}
+		}
+	}
+
+	return dist;
+}
+
+/**
+* that one kernel that do it "all"
+*/
+__global__ do_it(**long graph, **long result, int num_vertices) {
+	int start_idx = threadIdx.x + blockDim.x * blockIdx.x;
+	int stride = blockDim.x * gridDim.x;
+	
+	long *temp = (long*) malloc(sizeof(long)*num_vertices);
+	
+	for (int i = start_idx; i < num_vertices; i+= stride) {
+		temp = dijkstra(graph, num_vertices, i);
+		
+		for (int j = 0; j < num_vertices; j++) {
+			result[i][j] = temp[j];
+		}
+	}
+}
+
+#endif
\ No newline at end of file
diff --git a/src/paralel.cu b/src/paralel.cu
index d57fdf09080a20b6ce4acc7d596338ed39e0c42f..de31f9fc426b5e3d62cc4d7f0e83930ed73a4cc2 100644
--- a/src/paralel.cu
+++ b/src/paralel.cu
@@ -1,28 +1,44 @@
 #include <iostream>
 #include <stdlib.h>
 #include <stdio.h>
+#include <time.h>
+#include <util.h>
+#include "dijkstra_cuda.cuh"
 
 
-int main () {
+static double get_micros(void) {
+    struct timespec ts;
+    timespec_get(&ts, TIME_UTC);
+    return ((double)((long)ts.tv_sec * 1000000000L + ts.tv_nsec)/1000);
+}
+
+int main (int argc, char const *argv[]) {
     
+    // initialization
+    int num_vertices = atoi(argv[2]);
+    double start_time, end_time, total_time;
+    total_time = 0;
+
     // allocate memory in host for the graph
     // code goes here
+    long **graph = gen_graph(num_vertices);
 
     // allocate memory in the host for the result matrice
     // code goes here
+    long **result = gen_temp(num_vertices, num_vertices);
 
     // copy graph from host to device
-    // code goes here
-
+    // code goes here, this might be unnecessary
+    // cudaMemCpy()
 
 
     // start timer
     // code goes here
-
+    start_time = get_micros();
 
     // calculate the shortest paths using device
     // code goes here
-
+    do_it();
 
     // copy result array from device to host
     // code goes here