Skip to content
Snippets Groups Projects
Commit ec623aef authored by Hilmi Naufal Yafie's avatar Hilmi Naufal Yafie
Browse files

apply dijkstra algorithm using cuda

parent 5c8c299d
Branches
No related merge requests found
......@@ -4,6 +4,8 @@
#include <stdlib.h>
#include <limits.h>
#define ID 13517122
/**
* Get vertex index with minimum distance which not yet included
* in spt_set
......@@ -11,10 +13,10 @@
* @param spt_set a set denoting vertices included in spt_set
* @param n number of vertices in the graph
* @return index of minimum distance not yet included in spt_set
*/
int min_distance_idx(long dist[], bool spt_set[], int n) {
*//*
long min_distance_idx(long *dist, bool *spt_set, int n) {
// Initialize min value
int min = INT_MAX, min_index;
long min = LONG_MAX, min_index;
for (int i = 0; i < n; i++) {
if (spt_set[i] == false && dist[i] <= min) {
......@@ -22,16 +24,96 @@
min_index = i;
}
}
spt_set[min_index] = true;
dist[src] = 0;
return min_index;
}
*/
// for dijkstra algorithm
__global__
void initValue(long *graph, long *allResult, int *visitedNode, int *minIndex, int sourceIdx, int num_vertices) {
int index = threadIdx.x + blockDim.x * blockIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i=index; i < num_vertices; i += stride) {
visitedNode[i] = 0;
if ((graph[i*num_vertices + sourceIdx]==0) && i!=(sourceIdx)) {
allResult[i] = LONG_MAX;
}
else {
allResult[i] = graph[i*num_vertices + sourceIdx];
}
}
*minIndex = -1;
visitedNode[sourceIdx] = 1;
}
__global__
void findMinDistance(long *allResult, int *visitedNode, int *minIndex, long *minDistance, int num_vertices) {
*minDistance = LONG_MAX;
for (int j=0; j<num_vertices; j++) {
if (visitedNode[j]==0 && allResult[j]<*minDistance) {
*minDistance = allResult[j];
*minIndex = j;
}
}
visitedNode[*minIndex] = 1;
}
__global__
void setNewDistance(long *graph, long *allResult, int *visitedNode, int *minIndex, long *minDistance, int num_vertices) {
int index = threadIdx.x + blockDim.x * blockIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i=index; i < num_vertices; i += stride) {
if (visitedNode[i]) {
continue;
}
else if ((graph[i*num_vertices + *minIndex]+*minDistance<allResult[i]) && (graph[i*num_vertices + *minIndex]+*minDistance!=0)) {
allResult[i] = graph[i*num_vertices + *minIndex]+*minDistance;
}
}
}
/**
* generate a graph with n vertices
* @param n number of vertices
* @return 2D array, graph[i][j] = graph[j][i] = distance from vertex i to j
* @return 1D array, graph[i*n + j] = graph[j*n + i] = distance from vertex i to j
*/
long* create_graph(int n) {
int i,j;
long *graph = (long*) malloc(n * n * sizeof(long));
for (i=0;i<n;i++) {
for (j=i;j<n;j++) {
if (i==j) {
graph[i*n + j] = 0;
}
else {
graph[i*n + j] = rand();
graph[j*n + i] = graph[i*n + j];
}
}
}
return graph;
}
//-----
long* create_temp(int n) {
int i,j;
long *graph = (long*) malloc(n * n * sizeof(long));
for (i=0;i<n;i++) {
for (j=i;j<n;j++) {
graph[i*n + j] = INT_MAX;
}
}
return graph;
}
/*
long **gen_graph(int n) {
// alokasi memori untuk matriks yang merepresentasikan graf
long **result = (long **)malloc(n * sizeof(long *));
......@@ -40,7 +122,7 @@
}
// isi matriks dengan bilangan random
srand(13517122);
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
......@@ -54,8 +136,11 @@
}
return result;
}
}*/
//make the graph as graph[i*n + j] to make it able to be malloc on cuda as 1d array
//-----
//-----
/*
long **gen_temp(int r, int c) {
// alokasi memori untuk matriks yang merepresentasikan graf
long **result = (long **)malloc(r * sizeof(long *));
......@@ -73,14 +158,14 @@ long **gen_temp(int r, int c) {
}
return result;
}
}*/
/*
long *dijkstra(long **graph, int n, int src) {
// output array, contains shortest distance from src to every vertices
long *dist = (long *) malloc (sizeof(long) * n);
// spt_set[i] is true if vertex i already included in the shortest path tree
bool spt_set[n];
bool *spt_set = (bool *) malloc(sizeof(bool) * n);
// initialize dist and spt_set
for (int i = 0; i < n; i++) {
......@@ -113,14 +198,16 @@ long *dijkstra(long **graph, int n, int src) {
}
}
}
free(spt_set);
return dist;
}
*/
/**
* that one kernel that do it "all"
*/
__global__ do_it(**long graph, **long result, int num_vertices) {
/*
__global__ void do_it(**long graph, **long result, int num_vertices) {
int start_idx = threadIdx.x + blockDim.x * blockIdx.x;
int stride = blockDim.x * gridDim.x;
......@@ -134,5 +221,33 @@ __global__ do_it(**long graph, **long result, int num_vertices) {
}
}
}
*/
void print_graph(long *data, int n) {
int i,j;
for (i=0;i<n;i++) {
for (j=0;j<n;j++) {
printf("%li ",data[i*n + j]);
}
printf("\n");
}
}
void write_to_txt(int n, long *const graph, const char* filename) {
FILE *fout;
int i,j;
if (NULL == (fout = fopen(filename,"w"))) {
fprintf(stderr,"error opening output file");
abort();
}
for (i=0;i<n;i++) {
for(j=0;j<n;j++) {
fprintf(fout,"%li ",graph[i*n + j]);
}
fprintf(fout,"\n");
}
printf("Result has been written to %s ...\n",filename);
}
#endif
\ No newline at end of file
......@@ -2,9 +2,9 @@
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <util.h>
#include "dijkstra_cuda.cuh"
#define THREADS_BLOCK 256
static double get_micros(void) {
struct timespec ts;
......@@ -14,53 +14,125 @@ static double get_micros(void) {
int main (int argc, char const *argv[]) {
//check if argc == 3
if (argc!=3) {
fprintf(stderr,"Usage: Dijkstra_CUDA num_of_node output_filename\n");
return EXIT_FAILURE;
}
// initialization
int num_vertices = atoi(argv[2]);
srand(ID);
int num_vertices = atoi(argv[1]);
double start_time, end_time, total_time;
total_time = 0;
// allocate memory in host for the graph
// code goes here
long **graph = gen_graph(num_vertices);
long *graph = create_graph(num_vertices);
// allocate memory in the host for the result matrice
// code goes here
long **result = gen_temp(num_vertices, num_vertices);
// copy graph from host to device
// code goes here, this might be unnecessary
// cudaMemCpy()
// start timer
// code goes here
start_time = get_micros();
// calculate the shortest paths using device
// code goes here
do_it();
// copy result array from device to host
// code goes here
// synchronize device
// code goes here
// free memory in device
/// code goes here
// stop the timer
// code goes here
// write result matrice to a file
// code goes here
// free result matrice
// code goes here
long *result = create_temp(num_vertices);
// allocate memory in the host for result array from a vertice
long *tempResult = (long *)malloc(num_vertices * sizeof(long));
for (int i=0; i<num_vertices; i++) {
tempResult[i] = -1;
}
//CUDA malloc initialize
long *gpu_graph;
long *gpu_result;
int *gpu_visitedNode;
long *minDistance;
int *minIndex;
//CUDA malloc
// allocate memory in device for the graph
cudaMalloc((void**)&gpu_graph,num_vertices*num_vertices*sizeof(long));
// allocate memory in device for the result of dijkstra
cudaMalloc((void**)&gpu_result,num_vertices*sizeof(long));
// allocate memory in device for the list of visited node
cudaMalloc((void**)&gpu_visitedNode,num_vertices*sizeof(int));
// allocate memory in device for the minimal distance used in dijkstra
cudaMalloc((void**)&minDistance,sizeof(long));
// allocate memory in device for the index of minDistance
cudaMalloc((void**)&minIndex,sizeof(int));
// copy data of graph from host to device
cudaMemcpy(gpu_graph,graph,num_vertices*num_vertices*sizeof(long),cudaMemcpyHostToDevice);
// initiate block size and num of blocks that will be use in device
int blockSize = 256;
int numBlocks = (num_vertices + blockSize - 1) / blockSize;
// dijkstra algorithm for each vertice
for (int i=0; i<num_vertices; i++) {
// set timer
start_time = get_micros();
// initialize value for dijkstra in device
initValue<<<numBlocks, blockSize>>>(
gpu_graph,
gpu_result,
gpu_visitedNode,
minIndex,
i,
num_vertices);
// for each vertice except current vertice (source)
for (int j=1; j<num_vertices; j++) {
// find minimal distance
findMinDistance<<<1,1>>>(
gpu_result,
gpu_visitedNode,
minIndex,
minDistance,
num_vertices);
// update distance for each vertice if new distance < old distance
setNewDistance<<<numBlocks, blockSize>>>(
gpu_graph,
gpu_result,
gpu_visitedNode,
minIndex,
minDistance,
num_vertices);
// cudaDeviceSynchronize();
}
// end of timer
end_time = get_micros();
// copy the result from device to host
cudaMemcpy(tempResult,gpu_result,num_vertices*sizeof(long),cudaMemcpyDeviceToHost);
// fill copied into the result matrice
for (int k=0; k<num_vertices; k++) {
result[i*num_vertices + k] = tempResult[k];
}
total_time += end_time-start_time;
}
write_to_txt(num_vertices,result,argv[2]);
printf("processing time: %0.04lf us ...\n",total_time);
// free device memory allocation
cudaFree(gpu_graph);
cudaFree(gpu_result);
cudaFree(gpu_visitedNode);
cudaFree(minDistance);
cudaFree(minIndex);
// free host memory allocation
free(tempResult);
free(graph);
free(result);
return 0;
return EXIT_SUCCESS;
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment