Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Commits on Source (12)
# binaries
bin/*
# scripts
connect
push
serial: ./src/serial.c ./src/dijkstra.* ./src/util.*
gcc ./src/serial.c ./src/dijkstra.c ./src/util.c -o ./bin/serial
./bin/serial $(n)
parallel: ./src/paralel.c ./src/util.* ./src/dijkstra.*
mpicc ./src/paralel.c ./src/util.c ./src/dijkstra.c -o ./bin/parallel
mpirun -np $(np) --hostfile mpi_hostfile ./bin/parallel $(np) $(nv)
run_par: ./bin/parallel
mpirun -np $(np) --hostfile mpi_hostfile ./bin/parallel $(np) $(nv)
hello_omp: ./src/hello_openmp.c
gcc -g -Wall -fopenmp -o ./bin/hello_omp ./src/hello_openmp.c
./bin/hello_omp $(nt)
parallel_omp: ./src/paralel_openmp.c
gcc -g -Wall -fopenmp -o ./bin/parallel_omp ./src/paralel_openmp.c ./src/util.c ./src/dijkstra.c
./bin/parallel_omp $(nt) $(nv)
parallel_cuda: ./src/paralel.cu
nvcc ./src/paralel.cu -o ./bin/parallel_cuda
./bin/parallel_cuda $(nt) $(nv)
\ No newline at end of file
# Announcement
<!-- # Announcement
Beberapa file yang harus ada dalam repositori tersebut diantaranya:
* Direktori src yang berisi source code yang anda buat.
......@@ -7,4 +7,66 @@ Beberapa file yang harus ada dalam repositori tersebut diantaranya:
* File README.md yang berisi:
* Petunjuk penggunaan program.
* Pembagian tugas. Sampaikan dalam list pengerjaan untuk setiap mahasiswa. Sebagai contoh: XXXX mengerjakan fungsi YYYY, ZZZZ, dan YYZZ.
* Laporan pengerjaan, dengan struktur laporan sesuai dengan deskripsi pada bagian sebelumnya.
* Laporan pengerjaan, dengan struktur laporan sesuai dengan deskripsi pada bagian sebelumnya. -->
# Tugas IF3230 Dijkstra CUDA
## Petunjuk Penggunaan Program
Dalam direktori root lakukan kompilasi program dengan makefile:
* Untuk melakukan kompilasi dijkstra versi serial:
`make serial`
`bin/serial [number of vertices]`
* Untuk melakukan kompilasi dijkstra versi paralel (CUDA):
`make parallel_cuda nt=[jumlah thread] nv=[jumlah nodes]`
## Pembagian Tugas
* 13517035 - Hilmi Naufal Yafie : Paralel Dijkstra CUDA, Laporan, Eksplorasi
* 13517122 - M. ALgah Fattah I. : Paralel Dijkstra CUDA, Laporan, Eksplorasi
## Laporan Pengerjaan
### Deskripsi Solusi Paralel
Solusi paralel yang kami buat adalah paralelisasi dalam menjalankan algoritma dijkstra secara keseluruhan. Sebagaimana yang diketahui, algoritma dijkstra dapat mencari jarak terdekat dari suatu node ke semua node lain. Oleh karena pada persoalan yang diberikan kita diminta untuk mencari jarak dari semua node ke semua node lain, paralelisasi yang kami lakukan adalah setiap thread menjalankan dijkstra dari titik asal (source) yang berbeda-beda, lalu kemudian menuliskan hasil jarak antara titik-titik lain dengan source tersebut pada baris yang bersangkutan di matriks yang merepresentasikan hasil akhir.
Misalkan ada 3 anak proses dan ada 3 node pada graf yang di-proses, maka diparalelisasi sehingga thread pertama memproses node A, thread kedua memproses node B, dan thread ketiga memproses node C. Misalkan pula sebuah matriks akhir `result` yang menyimpan matriks akhir, maka thread pertama akan menuliskan ke baris pertama `result` yang merepresentasikan jarak dari node A ke node-node lain, dst.
### Analisis Solusi
Dalam konteks memparalelisasi algoritma dijkstra, menurut kami jika beban komputasi didistribusi kepada thread-thread yang dikerjakan oleh core pada gpu, maka waktu untuk melakukan perhitungan jarak secara total akan menjadi lebih singkat
### Hasil Uji
Berikut Merupakan hasil uji yang kami lakukan untuk node 100, 500, 1000, dan 3000 baik untuk Serial Dijkstra dan Paralel Dijkstra (dalam microseconds):
* **Serial Dijkstra**
| N | Percobaan 1 | Percobaan 2 | Percobaan 3 | Rata-rata |
| ------------- |:-------------:| -----:| ------------- |:-------------:|
| 100 | 20.1025 | 20.0155 | 18.46475 | 19.52758 |
| 500 | 2537.57925 | 1734.28625 | 1661.24725 | 1977.70425 |
| 1000 | 13798.4025 | 13938.675 | 15880.503 | 14539.1935 |
| 3000 | 762352.86125 | 663569.772 | 790499.578 | 738807.40375 |
* **Paralel Dijkstra CUDA**
| N | Percobaan 1 | Percobaan 2 | Percobaan 3 | Rata-rata |
| ------------- |:-------------:| -----:| ------------- |:-------------:|
| 100 | 86.368 | 58.231 | 39.25525 | 61.28475 |
| 500 | 1115.20125 | 941.9165 | 943.5145 | 1000.21075 |
| 1000 | 4191.5325 | 6223825 | 7646.94175 | 6020.76641666 |
| 3000 | 432189.653 | 438795.246 | 441382.9855 | 437455.9615 |
**untuk setiap kasus uji, block size = 256 threads**
### Analisis Uji
Dari seluruh percobaan yang dilakukan, didapatkan bahwa program paralel selalu lebih cepat daripada program serial.
Hal ini tentu karena paralelisasi lebih mengutilisasi resource yang ada dengan thread yang lebih dari 1 menjadikan proses lebih cepat.
Sementara itu untuk pengujian N = 100, hasil pada dijkstra menggunakan paralel lebih besar dan terlihat tidak stabil, hal ini dikarenakan
dalam perhitungan waktu, jeda pemanggilan kernel ke server juga terhitung. Sehingga ketika waktu pemanggilan lebih lama, waktu yang dihasilkan juga akan
lebih lama.
#ifndef _BOOLEAN_h
#define _BOOLEAN_h
#define bool unsigned char
#define true 1
#define false 0
#endif
\ No newline at end of file
#include "dijkstra.h"
/**
* Get vertex index with minimum distance which not yet included
* in spt_set
* @param dist distance from origin vertex to vertex with that index
* @param spt_set a set denoting vertices included in spt_set
* @param n number of vertices in the graph
* @return index of minimum distance not yet included in spt_set
*/
int min_distance_idx(long dist[], bool spt_set[], int n) {
// Initialize min value
int min = INT_MAX, min_index;
for (int i = 0; i < n; i++) {
if (spt_set[i] == false && dist[i] <= min) {
min = dist[i];
min_index = i;
}
}
return min_index;
}
/**
* generate a graph with n vertices
* @param n number of vertices
* @return 2D array, graph[i][j] = graph[j][i] = distance from vertex i to j
*/
long **gen_graph(int n) {
// alokasi memori untuk matriks yang merepresentasikan graf
long **result = (long **)malloc(n * sizeof(long *));
for (int i = 0; i < n; i++) {
result[i] = (long *)malloc(n * sizeof(long));
}
// isi matriks dengan bilangan random
srand(13517122);
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
if (i == j) {
result[i][j] = 0;
} else {
result[i][j] = result[j][i] = rand();
}
}
}
return result;
}
long **gen_temp(int r, int c) {
// alokasi memori untuk matriks yang merepresentasikan graf
long **result = (long **)malloc(r * sizeof(long *));
// printf("[gen_temp] initiate temp\n");
for (int i = 0; i < r; i++) {
result[i] = (long *)malloc(c * sizeof(long));
// printf("[gen_temp] initiate each row in temp\n");
}
for (int i = 0; i < r; i++) {
for (int j = 0; j < c; j++) {
// printf("[gen_temp] filling temp\n");
result[i][j] = 0;
}
}
return result;
}
long *dijkstra(long **graph, int n, int src) {
// output array, contains shortest distance from src to every vertices
long *dist = (long *) malloc (sizeof(long) * n);
// spt_set[i] is true if vertex i already included in the shortest path tree
bool spt_set[n];
// initialize dist and spt_set
for (int i = 0; i < n; i++) {
dist[i] = INT_MAX;
spt_set[i] = false;
}
// initiate path searching
dist[src] = 0;
// find the shortest path for all vertices
for (int i = 0; i < n; i++) {
// pick vertex with minimum distance from src from spt_set not yet
// processed
int processed_vertex = min_distance_idx(dist, spt_set, n);
// mark vertex as processed
spt_set[processed_vertex] = true;
for (int j = 0; j < n; j++) {
// check vertices connected to processed_vertex not yet processed
if (!spt_set[j]
&& graph[processed_vertex][j] != 0
&& dist[processed_vertex] != INT_MAX
&& dist[processed_vertex] + graph[processed_vertex][j] < dist[j]) {
dist[j] = dist[processed_vertex] + graph[processed_vertex][j];
}
}
}
return dist;
}
\ No newline at end of file
#ifndef DIJKSTRA_H
#define DIJKSTRA_H
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include "boolean.h"
/**
* Get vertex index with minimum distance which not yet included
* in spt_set
* @param dist distance from origin vertex to vertex with that index
* @param spt_set a set denoting vertices included in spt_set
* @return index of minimum distance not yet included in spt_set
*/
int min_distance_idx(long dist[], bool spt_set[], int n);
/**
* generate a graph with n vertices
* @param n number of vertices
* @return 2D array, graph[i][j] = graph[j][i] = distance from vertex i to j
*/
long **gen_graph(int n);
/**
* generate 2D array with dimension of r x c
* @param r number of rows
* @param c number of columns
* @return 2D array, all filled with zero
*/
long **gen_temp(int r, int c);
/**
*
* @param graph [description]
* @param n [description]
* @param src [description]
*/
long *dijkstra(long **graph, int n, int src);
#endif
\ No newline at end of file
#ifndef __DIJKSTRA_CUDA__
#define __DIJKSTRA_CUDA__
#include <stdlib.h>
#include <limits.h>
#define ID 13517122
/**
* Get vertex index with minimum distance which not yet included
* in spt_set
* @param dist distance from origin vertex to vertex with that index
* @param spt_set a set denoting vertices included in spt_set
* @param n number of vertices in the graph
* @return index of minimum distance not yet included in spt_set
*//*
long min_distance_idx(long *dist, bool *spt_set, int n) {
// Initialize min value
long min = LONG_MAX, min_index;
for (int i = 0; i < n; i++) {
if (spt_set[i] == false && dist[i] <= min) {
min = dist[i];
min_index = i;
}
}
spt_set[min_index] = true;
dist[src] = 0;
return min_index;
}
*/
// for dijkstra algorithm
__global__
void initValue(long *graph, long *allResult, int *visitedNode, int *minIndex, int sourceIdx, int num_vertices) {
int index = threadIdx.x + blockDim.x * blockIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i=index; i < num_vertices; i += stride) {
visitedNode[i] = 0;
if ((graph[i*num_vertices + sourceIdx]==0) && i!=(sourceIdx)) {
allResult[i] = LONG_MAX;
}
else {
allResult[i] = graph[i*num_vertices + sourceIdx];
}
}
*minIndex = -1;
visitedNode[sourceIdx] = 1;
}
/*
__global__
void findMinDistance(long *allResult, int *visitedNode, int *minIndex, long *minDistance, int num_vertices) {
*minDistance = LONG_MAX;
for (int j=0; j<num_vertices; j++) {
if (visitedNode[j]==0 && allResult[j]<*minDistance) {
*minDistance = allResult[j];
*minIndex = j;
}
}
visitedNode[*minIndex] = 1;
}
*/
__global__
void findAndSetNewDistance(long *graph, long *allResult, int *visitedNode, int *minIndex, long *minDistance, int num_vertices) {
*minDistance = LONG_MAX;
for (int j=0; j<num_vertices; j++) {
if (visitedNode[j]==0 && allResult[j]<*minDistance) {
*minDistance = allResult[j];
*minIndex = j;
}
}
__syncthreads();
visitedNode[*minIndex] = 1;
int index = threadIdx.x + blockDim.x * blockIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i=index; i < num_vertices; i += stride) {
if (visitedNode[i]) {
continue;
}
else if ((graph[i*num_vertices + *minIndex]+*minDistance<allResult[i]) && (graph[i*num_vertices + *minIndex]+*minDistance!=0)) {
allResult[i] = graph[i*num_vertices + *minIndex]+*minDistance;
}
}
}
/**
* generate a graph with n vertices
* @param n number of vertices
* @return 1D array, graph[i*n + j] = graph[j*n + i] = distance from vertex i to j
*/
long* create_graph(int n) {
int i,j;
long *graph = (long*) malloc(n * n * sizeof(long));
for (i=0;i<n;i++) {
for (j=i;j<n;j++) {
if (i==j) {
graph[i*n + j] = 0;
}
else {
graph[i*n + j] = rand();
graph[j*n + i] = graph[i*n + j];
}
}
}
return graph;
}
//-----
long* create_temp(int n) {
int i,j;
long *graph = (long*) malloc(n * n * sizeof(long));
for (i=0;i<n;i++) {
for (j=i;j<n;j++) {
graph[i*n + j] = INT_MAX;
}
}
return graph;
}
/*
long **gen_graph(int n) {
// alokasi memori untuk matriks yang merepresentasikan graf
long **result = (long **)malloc(n * sizeof(long *));
for (int i = 0; i < n; i++) {
result[i] = (long *)malloc(n * sizeof(long));
}
// isi matriks dengan bilangan random
for (int i = 0; i < n; i++) {
for (int j = i; j < n; j++) {
if (i == j) {
result[i][j] = 0;
} else {
result[i][j] = result[j][i] = rand();
}
}
}
return result;
}*/
//make the graph as graph[i*n + j] to make it able to be malloc on cuda as 1d array
//-----
//-----
/*
long **gen_temp(int r, int c) {
// alokasi memori untuk matriks yang merepresentasikan graf
long **result = (long **)malloc(r * sizeof(long *));
// printf("[gen_temp] initiate temp\n");
for (int i = 0; i < r; i++) {
result[i] = (long *)malloc(c * sizeof(long));
// printf("[gen_temp] initiate each row in temp\n");
}
for (int i = 0; i < r; i++) {
for (int j = 0; j < c; j++) {
// printf("[gen_temp] filling temp\n");
result[i][j] = 0;
}
}
return result;
}*/
/*
long *dijkstra(long **graph, int n, int src) {
// output array, contains shortest distance from src to every vertices
long *dist = (long *) malloc (sizeof(long) * n);
// spt_set[i] is true if vertex i already included in the shortest path tree
bool *spt_set = (bool *) malloc(sizeof(bool) * n);
// initialize dist and spt_set
for (int i = 0; i < n; i++) {
dist[i] = INT_MAX;
spt_set[i] = false;
}
// initiate path searching
dist[src] = 0;
// find the shortest path for all vertices
for (int i = 0; i < n; i++) {
// pick vertex with minimum distance from src from spt_set not yet
// processed
int processed_vertex = min_distance_idx(dist, spt_set, n);
// mark vertex as processed
spt_set[processed_vertex] = true;
for (int j = 0; j < n; j++) {
// check vertices connected to processed_vertex not yet processed
if (!spt_set[j]
&& graph[processed_vertex][j] != 0
&& dist[processed_vertex] != INT_MAX
&& dist[processed_vertex] + graph[processed_vertex][j] < dist[j]) {
dist[j] = dist[processed_vertex] + graph[processed_vertex][j];
}
}
}
free(spt_set);
return dist;
}
*/
/**
* that one kernel that do it "all"
*/
/*
__global__ void do_it(**long graph, **long result, int num_vertices) {
int start_idx = threadIdx.x + blockDim.x * blockIdx.x;
int stride = blockDim.x * gridDim.x;
long *temp = (long*) malloc(sizeof(long)*num_vertices);
for (int i = start_idx; i < num_vertices; i+= stride) {
temp = dijkstra(graph, num_vertices, i);
for (int j = 0; j < num_vertices; j++) {
result[i][j] = temp[j];
}
}
}
*/
void print_graph(long *data, int n) {
int i,j;
for (i=0;i<n;i++) {
for (j=0;j<n;j++) {
printf("%li ",data[i*n + j]);
}
printf("\n");
}
}
void write_to_txt(int n, long *const graph, const char* filename) {
FILE *fout;
int i,j;
if (NULL == (fout = fopen(filename,"w"))) {
fprintf(stderr,"error opening output file");
abort();
}
for (i=0;i<n;i++) {
for(j=0;j<n;j++) {
fprintf(fout,"%li ",graph[i*n + j]);
}
fprintf(fout,"\n");
}
printf("Result has been written to %s ...\n",filename);
}
#endif
\ No newline at end of file
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
int main (int argc, char *argv[]) {
int nthreads = atoi(argv[1]), t_id;
int i;
#pragma omp parallel for private(t_id)
for (i = 0; i < 10; i++) {
t_id = omp_get_thread_num(); // get thread if for each thread
printf("i : %d, by the way i'm thread %d\n", i, t_id);
}
// printf("Hello from thead number %d of %d\n", t_id, nthreads);
return 0;
}
\ No newline at end of file
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc,char *argv[]) {
int numtasks, rank;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int name_len;
int arr_size = 3;
MPI_Status Stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &name_len);
// printf("Hello from processor %s, task %d of %d, argv[1]: %s\n",
// processor_name, rank, numtasks, argv[1]);
//
int *result_arr = (int *) malloc(arr_size * numtasks * sizeof(int));
// int **local_mat = (int **) malloc(arr_size * sizeof(int*));
int *local_arr = (int *) malloc(arr_size * sizeof(int));
// for (int i = 0; i < arr_size; i++) {
// local_mat = (int*) malloc(5 *sizeof(int));
// }
// for (int i = 0; i < r; i++) {
// for (int j = i; j < c; j++) {
// local_mat[i][j] = local_mat[j][i] = 0;
// }
// }
for (int i = 0; i < arr_size; i++) {
local_arr[i] = i*rank*3;
}
if (rank == 0) {
// int **local_mat = (int **) malloc(arr_size * sizeof(int*));
int *temp = (int *) malloc(arr_size * sizeof(int));
// initiate result arr
for (int i = 0; i < arr_size; i++) {
result_arr[i] = local_arr[i];
}
// terima array dari tiap node
for (int i = 1; i < numtasks; i++) {
MPI_Recv( temp,
arr_size*sizeof(int),
MPI_INT,
i,
0,
MPI_COMM_WORLD,
MPI_STATUS_IGNORE
);
// salin array ke result_arr
// result_arr[i*arr_size] = temp;
memcpy(result_arr+(arr_size*i),
temp,
arr_size* sizeof(int)
);
// for (int j = 0; j < arr_size && j+(rank*arr_size); j++) {
// result_arr[j+i*arr_size] = temp[j];
// }
//
// printf("array from node %d\n", i);
// for(int j = 0; j < arr_size; j++) {
// printf("%d ", temp[j]);
// }
// printf("\n");
}
} else {
MPI_Send( local_arr,
arr_size*sizeof(int),
MPI_INT,
0,
0,
MPI_COMM_WORLD
);
}
if (rank == 0) {
printf("here is the array you ordered\n");
for (int i=0; i<arr_size*numtasks; i++){
printf("%d ", result_arr[i]);
}
printf("\n");
}
free(local_arr);
free(result_arr);
MPI_Finalize();
}
#include "dijkstra.h"
#include "util.h"
#include <stdio.h>
#include <mpi.h>
#include <math.h>
#include <string.h>
int main(int argc, char *argv[])
{
int numprocs, rank;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int name_len;
int np = atoi(argv[1]);
int numvertices = atoi(argv[2]);
int chunk_size = ceil(numvertices/np);
// printf("chunk_size : %d\n", chunk_size);
// printf("np : %d\n", np);
// printf("numvertices : %d\n", numvertices);
MPI_Status Stat;
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &name_len);
double start_time, finish_time;
// generate graph
// printf("about to generate our graph\n");
long **graph = gen_graph(numvertices);
// start the timer
MPI_Barrier(MPI_COMM_WORLD);
start_time = MPI_Wtime();
// set chunk size and local src
// int my_first_src = rank * ceil(numvertices/numprocs);
// int my_last_src = my_first_src + chunk_size;
int my_first_src = rank * chunk_size;
// int my_last_src = if (my_first_src+chunk_size < numvertices)? my_first_src+chunk_size:;
if (rank == 0) {
printf("chunk_size : %d\n", chunk_size);
}
printf("i'm node %d, my_first_src: %d\n", rank, my_first_src);
// printf("i'm node %d, my_last_src: %d\n", rank, my_last_src);
// long **my_results = (long **) malloc(chunk_size * sizeof(long*));
// allocate 2D array for local result
// printf("initiate my result\n");
long **my_results = gen_temp(chunk_size, numvertices);
// printf("done initiating my result\n");
// find shortest path from each src
for (int i = 0; i+my_first_src < numvertices && i < chunk_size; i++) {
// printf("i'm node %d and currently working on row %d \n", rank, i+my_first_src);
long *dist = dijkstra(graph, numvertices, i+my_first_src);
my_results[i] = dist;
}
// printf("hi i'm node %d and i'm done searching\n", rank);
// TBD gathering data from these processes
if ( rank == 0 ) { // gather data from other nodes
long **result = gen_temp(numvertices, numvertices);
// printf("generated empty result matrix\n");
// long **temp = gen_temp(chunk_size, numvertices);
long *temp = (long*) malloc(numvertices * sizeof(long));
// printf("generated temp\n");
memcpy(result,
my_results,
chunk_size*sizeof(long*));
printf("copying temp to result\n");
for (int i = 1; i < numprocs; i++) {
printf("about to receive local result from node %d\n", i);
for (int j = 0; j < chunk_size; j++) { // loop buat nerima tiap baris dari node lain
MPI_Recv(temp,
numvertices*sizeof(long),
MPI_LONG,
i,
0,
MPI_COMM_WORLD,
MPI_STATUS_IGNORE);
// for (int i=0;i<numvertices;i++) {
// printf("%d ", temp[i]);
// }
// printf("\n");
// printf("alamat result yang mau ditulis: result+ %d\n", (i*chunk_size)+(j*numvertices));
// memcpy(result+((i*chunk_size)+(j*numvertices)),
// temp,
// chunk_size*sizeof(long));
for (int k = 0; k < numvertices; k++) {
result[i*chunk_size+j][k] = temp[k];
}
}
printf("done receiving local result from node %d\n", i);
// for (int i =0; i < chunk_size; i++) {
// for (int j= 0 ; j < numvertices; j++){
// printf("%ld ", temp[i][j]);
// }
// printf("\n");
// }
printf("copying local result from node %d to result\n", i);
}
// for (int i=0; i < numvertices; i++) {
// printf("row %d\n", i);
// for (int j = 0; j < numvertices; j++) {
// printf("%d ", result[i][j]);
// }
// printf("\n");
// }
//
char filename[20];
sprintf(filename, "./output_parallel_%d", numvertices);
printf("about to write output file\n");
write_result(result, numvertices, filename);
printf("done writing\n");
//free result (2d arr)
for(int i = 0; i < numvertices; i++){
free(result[i]);
}
printf("freeing result\n");
//free temp (1d arr)
free(temp);
printf("freeing temp\n");
} else { // send my_results to master node
printf("i'm node %d and i'm going to send my result to master\n", rank);
for(int i = 0; i < chunk_size; i++) {
MPI_Send(my_results[i],
chunk_size*sizeof(long),
MPI_LONG,
0,
0,
MPI_COMM_WORLD
);
}
printf("i'm node %d and i'm just sent my result to master\n", rank);
}
// printf("I'm process %d and the distance from vertex 1 to 2 is %d\n",
// rank,
// graph[0][1]);
//
MPI_Barrier(MPI_COMM_WORLD);
finish_time = MPI_Wtime();
if (rank == 0) {
printf("elapsed time : %.lf ms\n", (finish_time-start_time)*1000000);
}
//free my_results (2d arr)
free(my_results);
printf("freed my_results\n");
//free graph (2d arr)
free(graph);
printf("freed graph\n");
MPI_Finalize();
return 0;
}
\ No newline at end of file
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include "dijkstra_cuda.cuh"
#define THREADS_BLOCK 256
static double get_micros(void) {
struct timespec ts;
timespec_get(&ts, TIME_UTC);
return ((double)((long)ts.tv_sec * 1000000000L + ts.tv_nsec)/1000);
}
int main (int argc, char const *argv[]) {
//check if argc == 3
if (argc!=3) {
fprintf(stderr,"Usage: Dijkstra_CUDA num_of_node output_filename\n");
return EXIT_FAILURE;
}
// initialization
srand(ID);
int num_vertices = atoi(argv[1]);
double start_time, end_time, total_time;
total_time = 0;
// allocate memory in host for the graph
long *graph = create_graph(num_vertices);
// allocate memory in the host for the result matrice
long *result = create_temp(num_vertices);
// allocate memory in the host for result array from a vertice
long *tempResult = (long *)malloc(num_vertices * sizeof(long));
for (int i=0; i<num_vertices; i++) {
tempResult[i] = -1;
}
//CUDA malloc initialize
long *gpu_graph;
long *gpu_result;
int *gpu_visitedNode;
long *minDistance;
int *minIndex;
//CUDA malloc
// allocate memory in device for the graph
cudaMalloc((void**)&gpu_graph,num_vertices*num_vertices*sizeof(long));
// allocate memory in device for the result of dijkstra
cudaMalloc((void**)&gpu_result,num_vertices*sizeof(long));
// allocate memory in device for the list of visited node
cudaMalloc((void**)&gpu_visitedNode,num_vertices*sizeof(int));
// allocate memory in device for the minimal distance used in dijkstra
cudaMalloc((void**)&minDistance,sizeof(long));
// allocate memory in device for the index of minDistance
cudaMalloc((void**)&minIndex,sizeof(int));
// copy data of graph from host to device
cudaMemcpy(gpu_graph,graph,num_vertices*num_vertices*sizeof(long),cudaMemcpyHostToDevice);
// initiate block size and num of blocks that will be use in device
int blockSize = 256;
int numBlocks = (num_vertices + blockSize - 1) / blockSize;
// dijkstra algorithm for each vertice
for (int i=0; i<num_vertices; i++) {
// initialize value for dijkstra in device
initValue<<<numBlocks, blockSize>>>(
gpu_graph,
gpu_result,
gpu_visitedNode,
minIndex,
i,
num_vertices);
// set timer
start_time = get_micros();
// for each vertice except current vertice (source)
for (int j=1; j<num_vertices; j++) {
// find minimal distance
//findMinDistance<<<numBlocks,blockSize,num_vertices*sizeof(int)+num_vertices*sizeof(long)>>>(
//
//findMinDistance<<<1,1>>>(
// gpu_result,
// gpu_visitedNode,
// minIndex,
// minDistance,
// num_vertices);
//
// update distance for each vertice if new distance < old distance
findAndSetNewDistance<<<numBlocks, blockSize>>>(
gpu_graph,
gpu_result,
gpu_visitedNode,
minIndex,
minDistance,
num_vertices);
// cudaDeviceSynchronize();
}
// end of timer
end_time = get_micros();
// copy the result from device to host
cudaMemcpy(tempResult,gpu_result,num_vertices*sizeof(long),cudaMemcpyDeviceToHost);
// fill copied into the result matrice
for (int k=0; k<num_vertices; k++) {
result[i*num_vertices + k] = tempResult[k];
}
total_time += end_time-start_time;
}
write_to_txt(num_vertices,result,argv[2]);
printf("processing time: %0.04lf us ...\n",total_time);
// free device memory allocation
cudaFree(gpu_graph);
cudaFree(gpu_result);
cudaFree(gpu_visitedNode);
cudaFree(minDistance);
cudaFree(minIndex);
// free host memory allocation
free(tempResult);
free(graph);
free(result);
return EXIT_SUCCESS;
}
\ No newline at end of file
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "util.h"
#include "dijkstra.h"
#include "boolean.h"
static double get_micros(void) {
struct timespec ts;
timespec_get(&ts, TIME_UTC);
return ((double)((long)ts.tv_sec * 1000000000L + ts.tv_nsec)/1000);
}
int main(int argc, char const *argv[])
{
// int thread_count = strtol(argv[1], NULL, 10);
int num_vertices = atoi(argv[2]);
// int tid;
// time
double start_time, end_time, total_time;
// generate graph and result matrix
long **result = gen_temp(num_vertices, num_vertices);
long **graph = gen_graph(num_vertices);
long *temp = (long*) malloc(sizeof(long)*num_vertices);
// init time
total_time = 0;
#pragma omp barrier
// start time
start_time = get_micros();
// share the work to all the threads
#pragma omp parallel for private(temp)
for (int i = 0; i < num_vertices; i++) {
// tid = omp_get_thread_num();
// printf("i: %d, by the way i'm thread %d\n", i, tid);
// get the shortest path from each vertex
// get time execution
temp = dijkstra(graph, num_vertices, i);
// kalo barrier nya disini error
end_time = get_micros();
// put it in result
// #pragma omp critical(result)
// {
for (int j = 0; j < num_vertices; j++) {
result[i][j] = temp[j];
}
// }
total_time += end_time - start_time;
}
// #pragma omp barrier
// total_time += end_time - start_time;
char filename[20];
sprintf(filename, "./output_parallel_%d", num_vertices);
printf("about to write output file\n");
write_result(result, num_vertices, filename);
printf("done writing\n");
printf("processing time: %0.04lf us ...\n",total_time);
return 0;
}
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "dijkstra.h"
#include "util.h"
int main(int argc, char const *argv[])
{
time_t start,end;
if (argc > 1) {
int n = atoi(argv[1]);
printf("n: %d\n", n);
// start timer
start=clock();
// generate graph
long **graph = gen_graph(n);
// result matrix
long **result = gen_temp(n, n);
for (int i = 0; i < n; i++) {
long *dist = dijkstra(graph, n, i);
result[i] = dist;
}
//end timer
end=clock();
//elapsed time
float t = (float)(end-start)/CLOCKS_PER_SEC;
printf("Elapsed time (in millisecond): %f", t*1000000);
char filename[20];
sprintf(filename, "./output_serial_%d", n);
write_result(result, n, filename);
free(result);
free(graph);
} else {
printf("usage : serial [n]\n");
}
return 0;
}
File added
#include "dijkstra.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
int main(int argc, char const *argv[])
{
long **result = gen_temp(10,5);
long **first = gen_temp(5,5);
long **second = gen_temp(5,5);
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
first[i][j] = 1 * i * j;
second[i][j] = 2 * i * j;
}
}
memcpy(result,
first,
5 * sizeof(int*)
);
memcpy(result+5,
second,
5 * sizeof(int*)
);
for (int i =0; i < 10; i++) {
for (int j =0; j < 5; j++) {
printf("%d ", result[i][j]);
}
printf("\n");
}
return 0;
}
\ No newline at end of file
#include "util.h"
/**
* write matrix of shortest distance from vertex i to j
* @param graph 2D array
*/
void write_result(long **m, int n, char filename[]) {
FILE *outfile;
printf("here we go, writing file again\n");
outfile = fopen(filename,"w");
printf("file %s opened\n", filename);
if (outfile == NULL) {
printf("Error!\n");
exit(1);
}
printf("Writing output...\n");
for (int i = 0; i < n; i++) {
// printf("about to write row %d\n", i);
for (int j = 0; j < n; j++) {
fprintf(outfile, "%ld ", m[i][j]);
}
// printf("just write row %d\n", i);
fprintf(outfile, "\n");
}
printf("Done.\n");
}
\ No newline at end of file
#ifndef UTIL_H
#define UTIL_H
#include <stdio.h>
#include <stdlib.h>
/**
* write matrix of shortest distance from vertex i to j
* @param m 2D array, m[i][j] = m[j][i] = shortest distance from vertex i to j
* @param n dimension of matrix m
*/
void write_result(long **m, int n, char filename[]);
#endif
\ No newline at end of file