diff --git a/dijkstra_cuda.cu b/dijkstra_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..e94fc08ba21f5c45e89a33a07437b32e10175e04
--- /dev/null
+++ b/dijkstra_cuda.cu
@@ -0,0 +1,178 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <limits.h>
+
+#define INFINITE INT_MAX
+#define RAND_SEED 13517119
+
+int malloc_matrix(int ***matrix, int n){
+    int *temp;
+    cudaMallocManaged(&temp, sizeof(int) * n * n);
+    if (!temp) return -1;
+
+    cudaMallocManaged(matrix, sizeof(int *) * n);
+    for (int i=0; i < n; i++) {
+        (*matrix)[i] = &(temp[i*n]);
+    }
+    if (!(*matrix)) {
+        cudaFree(temp);
+        return -1;
+    }
+
+    return 0;
+}
+
+int free_matrix(int ***matrix) {
+    /* free the memory - the first element of the array is at the start */
+    cudaFree(&((*matrix)[0][0]));
+
+    /* free the pointers into the memory */
+    cudaFree(*matrix);
+
+    return 0;
+}
+
+void matrix_to_file(int **matrix, int n, char *filename) {
+    FILE *file;
+
+    file = fopen(filename, "w");
+
+    for(int i=0; i<n; i++){
+        for(int j=0; j<n-1; j++){
+            fprintf(file, "%d ", matrix[i][j]);
+        }
+        fprintf(file, "%d\n", matrix[i][n-1]);
+    }
+
+    fclose(file);
+}
+
+void init_random(int **graph, int n){
+    srand(RAND_SEED);
+
+    for(int i=0; i<n; i++){
+        for(int j=0; j<n; j++){
+            if(i != j){
+                graph[i][j] = rand()%100;
+            }
+            else{
+                graph[i][j] = 0;
+            }
+        }
+    }
+}
+
+__device__ int minDistance(int *dist, bool sptSet[], int n) 
+{ 
+    // Initialize min value 
+    int min = INFINITE, min_index;
+
+    for (int v = 0; v < n; v++) 
+        if (sptSet[v] == false && dist[v] <= min) 
+            min = dist[v], min_index = v; 
+
+    return min_index; 
+} 
+
+__device__ void dijkstra(int **graph, int **result, int src, int n) 
+{ 
+    int *dist; // The output array.  dist[i] will hold the shortest 
+    // distance from src to i
+    dist = result[src];
+
+    bool *sptSet; // sptSet[i] will be true if vertex i is included in shortest 
+    // path tree or shortest distance from src to i is finalized
+    sptSet = (bool *)malloc(sizeof(bool) * n);
+
+    // Initialize all distances as INFINITE and stpSet[] as false 
+    for (int i = 0; i < n; i++) 
+        dist[i] = INFINITE, sptSet[i] = false; 
+
+    // Distance of source vertex from itself is always 0 
+    dist[src] = 0; 
+
+    // Find shortest path for all vertices 
+    for (int count = 0; count < n - 1; count++) { 
+        // Pick the minimum distance vertex from the set of vertices not 
+        // yet processed. u is always equal to src in the first iteration. 
+        int u = minDistance(dist, sptSet, n); 
+
+        // Mark the picked vertex as processed 
+        sptSet[u] = true; 
+
+        // Update dist value of the adjacent vertices of the picked vertex. 
+        for (int v = 0; v < n; v++) 
+
+            // Update dist[v] only if is not in sptSet, there is an edge from 
+            // u to v, and total weight of path from src to  v through u is 
+            // smaller than current value of dist[v] 
+            if (!sptSet[v] && graph[u][v] && dist[u] != INFINITE 
+                && dist[u] + graph[u][v] < dist[v]) 
+                dist[v] = dist[u] + graph[u][v]; 
+    }
+}
+
+/*
+ * All Pairs Shortest Path with dijkstra algorithm
+ */
+__global__ void dijkstra_APSP(int **matrix, int **result, int n) {
+
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    int stride = blockDim.x * gridDim.x;
+
+    for (int i = index; i < n; i += stride)
+        dijkstra(matrix, result, i, n);
+}
+
+int main(int argc, char **argv){
+    cudaEvent_t start, stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+
+    int n = atoi(argv[1]);
+
+    // Initialize graph
+    int **matrix;
+    malloc_matrix(&matrix, n);
+
+    // Generate random values in root graph
+    init_random(matrix, n);
+
+    matrix_to_file(matrix, n, "input.txt");
+
+    // Initialize result matrix
+    int **result;
+    malloc_matrix(&result, n);
+
+    int blockSize = 256;
+    int numBlocks = (n + blockSize - 1) / blockSize;
+
+    cudaEventRecord(start);
+    // Start dijkstra APSP
+    dijkstra_APSP<<<12, blockSize>>>(matrix, result, n);
+    cudaEventRecord(stop);
+
+    cudaEventSynchronize(stop);
+
+    // Free input graph matrix
+    free_matrix(&matrix);
+
+    // Flush output to file
+    matrix_to_file(result, n, "output.txt");
+
+    // Free result matrix
+    free_matrix(&result);
+
+    // print execution time in microseconds
+    float milliseconds = 0;
+    cudaEventElapsedTime(&milliseconds, start, stop);
+    printf("Execution time : %f microseconds\n", milliseconds*1000);
+
+    // Check for errors
+    cudaError_t err = cudaGetLastError();
+    if (err != cudaSuccess)
+        printf("CUDA error: %s\n", cudaGetErrorString(err));
+
+    return 0;
+}
\ No newline at end of file