diff --git a/Makefile b/Makefile index 44c03e06c4ff1dbda108610ed8fb5b213d93e522..b5fad74d53c1485d0db5e80c7c2f6634d7ac064b 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,13 @@ -CC=nvcc -CFLAGS=-g -SRCFOLDER=src/ -DEPS=src/utils/matrix.h src/utils/dijkstra.h +CC = nvcc +DEPS = src/main.cu src/utils/dijkstra.cu src/utils/matrix.cu +OBJECTS = src/main.o src/utils/dijkstra.o src/utils/matrix.o -%.o: %.cu $(DEPS) - $(CC) -c -o $@ $< $(CFLAGS) - -main: $(SRCFOLDER)main.o $(SRCFOLDER)utils/matrix.o $(SRCFOLDER)utils/dijkstra.o +all: $(OBJECTS) mkdir -p dist - $(CC) $(CFLAGS) -o dist/main $(SRCFOLDER)main.cu $(SRCFOLDER)utils/matrix.o $(SRCFOLDER)utils/dijkstra.o + $(CC) $(OBJECTS) -o dist/main + +%.o: %.cu $(DEPS) + $(CC) -x cu -I. -dc $< -o $@ clean: rm -rf dist diff --git a/src/main.cu b/src/main.cu index 8cbb1c4629035cce683dda0471ab7cffe20858e6..9b8ec65032b93442db70771dd56c5d3462f12a75 100644 --- a/src/main.cu +++ b/src/main.cu @@ -3,7 +3,7 @@ #include "utils/matrix.h" #include "utils/dijkstra.h" -long int *calculate_sub_matrix(long int *matrix, int node_count); +__global__ void calculate_sub_matrix(long int *matrix, long int *sub_dist, int node_count); int main(int argc, char *argv[]) { @@ -17,10 +17,12 @@ int main(int argc, char *argv[]) } int node_count = atoi(argv[1]); - cudaDeviceSetLimit(cudaLimitMallocHeapSize, node_count * node_count * 2.5f * sizeof(long int)); + cudaDeviceSetLimit(cudaLimitMallocHeapSize, node_count * node_count * 3 * sizeof(long int) + node_count * sizeof(long int)); long int *adj_matrix = create_adj_matrix(node_count, node_count); - long int *sub_dist = calculate_sub_matrix(adj_matrix, node_count); + long int *sub_dist; + cudaMallocManaged(&sub_dist, node_count * node_count * sizeof(long int)); + calculate_sub_matrix<<<16,64>>>(adj_matrix, sub_dist, node_count); char print_dist; printf("Print distances to stdout? [y/N] "); @@ -36,22 +38,12 @@ int main(int argc, char *argv[]) cudaFree(adj_matrix); } -long int *calculate_sub_matrix(long int *matrix, int node_count) +__global__ void calculate_sub_matrix(long int *matrix, long int *sub_dist, int node_count) { - long int *sub_dist = (long int *)malloc(node_count * node_count * sizeof(long int)); - cudaMallocManaged(&sub_dist, node_count * node_count * sizeof(long int)); - - for (int i = 0; i < node_count; i++) + int start = (blockIdx.x * blockDim.x) + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = start; i < node_count; i+= stride) { - long int *temp_dist = dijkstra(matrix, i, node_count); - - for (int j = 0; j < node_count; j++) - { - set_el(sub_dist, node_count, j, i, temp_dist[j]); - } - - cudaFree(temp_dist); + dijkstra(matrix, sub_dist + i * node_count, i, node_count); } - - return sub_dist; } diff --git a/src/utils/dijkstra.cu b/src/utils/dijkstra.cu index 930dd5d59c5c88174c7b3f2901a696ef1211268e..0fd4428dccdf8e6d787cd32dad28904cc5c63e43 100644 --- a/src/utils/dijkstra.cu +++ b/src/utils/dijkstra.cu @@ -1,6 +1,6 @@ #include "dijkstra.h" -long int get_idx_min_dist(long int *dist, short *processed, int len) +__device__ long int get_idx_min_dist(long int *dist, short *processed, int len) { long int min = LONG_MAX; int idx; @@ -17,11 +17,9 @@ long int get_idx_min_dist(long int *dist, short *processed, int len) return idx; } -long int *dijkstra(long int *adj_matrix, int src, int size) +__device__ void dijkstra(long int *adj_matrix, long int *dist, int src, int size) { - long int *dist; - cudaMallocManaged(&dist, size * sizeof(long int)); - short processed[size]; + short processed[MAX_NODE]; for (int i = 0; i < size; i++) { @@ -50,6 +48,4 @@ long int *dijkstra(long int *adj_matrix, int src, int size) } } } - - return dist; } \ No newline at end of file diff --git a/src/utils/dijkstra.h b/src/utils/dijkstra.h index d143e6b3116b1596989edd62fa1013c881de35c6..6f88983b00402e4f768345dd01e6a8f2835ee35f 100644 --- a/src/utils/dijkstra.h +++ b/src/utils/dijkstra.h @@ -6,5 +6,6 @@ #define TRUE 1 #define FALSE 0 +#define MAX_NODE 3000 -long int *dijkstra(long int *adj_matrix, int src, int size); +__device__ void dijkstra(long int *adj_matrix, long int *dist, int src, int size); \ No newline at end of file diff --git a/src/utils/matrix.cu b/src/utils/matrix.cu index f7b9bb0951e6f6735b212c66ec23cc8f58a03578..2651e5cdecdfd199ad60401c5ac1e558a582e8c5 100644 --- a/src/utils/matrix.cu +++ b/src/utils/matrix.cu @@ -61,12 +61,12 @@ long int *create_adj_matrix(int width, int height) return matrix; } -long int get_el(long int *matrix, int width, int x, int y) +__device__ long int get_el(long int *matrix, int width, int x, int y) { return matrix[y * width + x]; } -void set_el(long int *matrix, int width, int x, int y, long int value) +__device__ void set_el(long int *matrix, int width, int x, int y, long int value) { matrix[y * width + x] = value; } diff --git a/src/utils/matrix.h b/src/utils/matrix.h index c02cbb6bb8e3f47f9e05b3838962cb7f510ad94d..cc9fd1aa5f5fa60d8e2735d09fe308edd7a8b932 100644 --- a/src/utils/matrix.h +++ b/src/utils/matrix.h @@ -4,6 +4,6 @@ void print_array(long int *array, int width); void print_matrix(long int *matrix, int width, int height); void print_matrix_to_file(long int *matrix, int width, int height, char *filename); -long int get_el(long int *matrix, int width, int x, int y); -void set_el(long int *matrix, int width, int x, int y, long int value); +__device__ long int get_el(long int *matrix, int width, int x, int y); +__device__ void set_el(long int *matrix, int width, int x, int y, long int value); long int *create_adj_matrix(int width, int height); \ No newline at end of file