diff --git a/add.c b/add.c new file mode 100644 index 0000000000000000000000000000000000000000..4df7e38b16e0f2f22ebe66b9f926b299638095ab --- /dev/null +++ b/add.c @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include "cuda.h" + +int main(int argc, char const *argv[]) +{ + int N = 1<<20; + float *x, *y; + + // float *x = malloc(sizeof(float) * N); + // float *y = malloc(sizeof(float) * N); + + // Allocate Unified Memory – accessible from CPU or GPU + cudaMallocManaged(&x, N*sizeof(float)); + cudaMallocManaged(&y, N*sizeof(float)); + + for (int i = 0; i < N; i++) { + x[i] = 1.0f; + y[i] = 2.0f; + } + + int blockSize = 256; + int numBlocks = (N + blockSize - 1) / blockSize; + add<<<numBlocks, blockSize>>>(N, x, y); + + // Wait for GPU to finish before accessing on host + cudaDeviceSynchronize(); + + float maxError = 0.0f; + for (int i = 0; i < N; i++) + maxError = fmax(maxError, fabs(y[i]-3.0f)); + printf("Max error: %f", maxError); + + // Free memory + cudaFree(x); + cudaFree(y); + + return 0; +} + +__global__ +void add(int n, float *x, float *y) +{ + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < n; i += stride) + y[i] = x[i] + y[i]; +} \ No newline at end of file diff --git a/datawaktu.txt b/datawaktu.txt new file mode 100644 index 0000000000000000000000000000000000000000..07bdbc1a728e1ae3b272c5e067d1113406e52add --- /dev/null +++ b/datawaktu.txt @@ -0,0 +1,6 @@ +100 : +12694.000000 +12823.000000 +17699.000000 + +500 : diff --git a/parallel.cu b/parallel.cu index 49cef8fa4e650f33e4b335ddff2886c372fe32aa..db93c31255952df5fbc20b83506014897db4b65e 100644 --- a/parallel.cu +++ b/parallel.cu @@ -86,7 +86,7 @@ void dijkstra(int n, long matrix[], long result[]) { int main (int argc, char **argv) { long *matrix, *result; // Matrix size i * j of distance from vertice [i] to vertice [j] - int n; // Number of vertices + int n, jmlBlock; // Number of vertices // int my_rank, thread_count, sub_n; clock_t start, end; double cpu_time_used; @@ -97,13 +97,18 @@ int main (int argc, char **argv) { matrix = initialize_matrix(13517142, n); printf("\nGenerated %d * %d matrix.\n", n, n); - print_matrix(matrix, n, n); - result = initialize_result(n); + if (n <=512) { + jmlBlock = n; + } + else { + jmlBlock = 512; + } + start = clock(); - dijkstra<<<1,1>>>(n, matrix, result); + dijkstra<<<jmlBlock,1>>>(n, matrix, result); cudaDeviceSynchronize(); @@ -156,7 +161,7 @@ long *initialize_matrix(int random_number, int n) { if (i == j) { matrix[i * n + j] = 0; } else { - long dist = (rand() % 9) + 1; + long dist = rand(); matrix[i * n + j] = dist; matrix[j * n + i] = dist; } diff --git a/src/cuda.c b/src/cuda.c new file mode 100644 index 0000000000000000000000000000000000000000..26d84a24e2ea55fa35733d52c8d2a68c3ae863c0 --- /dev/null +++ b/src/cuda.c @@ -0,0 +1,191 @@ +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <time.h> +#include <omp.h> + +#define TRUE 1 +#define FALSE 0 +#define INF LONG_MAX +// #define N_THREADS 10 +#define OUTPUT_FILE_PATH "parallel_result.txt" + +int read_n(int my_rank); +void print_matrix(long matrix[], int n_row, int n_col); +void print_array(long arr[], int len); +int idx_min_distance(long connd[], long dist[], int len); +long *initialize_matrix(int random_number, int n); +long *initialize_result(int n); +void write_output(long matrix[], int n_row, int n_col, double time); +void dijkstra(int n, int sub_n, long matrix[], long result[]); + +int main (int argc, char **argv) { + long *matrix, *result; // Matrix size i * j of distance from vertice [i] to vertice [j] + int n, sub_n; // Number of vertices + // int my_rank, thread_count, sub_n; + clock_t start, end; + double cpu_time_used; + + /* Read number of vertices */ + printf("Number of vertices (n): "); scanf("%d", &n); + + int thread_count = strtol(argv[1], NULL, 10); + if (n < thread_count) { + sub_n = 1; + thread_count = n; + } else { + sub_n = n / thread_count; + } + + matrix = initialize_matrix(13517142, n); + printf("\nGenerated %d * %d matrix.\n", n, n); + + result = initialize_result(n); + + start = clock(); + + dijkstra(n, sub_n, matrix, result); + + end = clock(); + cpu_time_used = (((double) (end - start)) / CLOCKS_PER_SEC) * 1000000; + write_output(result, n, n, cpu_time_used); + printf("\nResults written to %s.\n", OUTPUT_FILE_PATH); + + free(result); + free(matrix); +} + +int read_n(int my_rank) { + int n; + + if (my_rank == 0) { + printf("Number of vertices (n): "); + scanf("%d", &n); + } + return n; +} + +void print_matrix(long matrix[], int n_row, int n_col) { + for (int i = 0; i < n_row; i++) { + for (int j = 0; j < n_col; j++) { + printf("%ld ", matrix[i * n_col + j]); + } + printf("\n"); + } +} + +void print_array(long arr[], int len) { + for (int i = 0; i < len; i++) { + printf("%ld ", arr[i]); + } + printf("\n"); +} + +int idx_min_distance(long connd[], long dist[], int len) { + long min_dist = INF; + int idx_min_dist = -1; + for (int i = 0; i < len; i++) { + if (connd[i] != TRUE) { + if (dist[i] < min_dist) { + min_dist = dist[i]; + idx_min_dist = i; + } + } + } + return idx_min_dist; +} + +void write_output(long matrix[], int n_row, int n_col, double time) { + FILE * file; + file = fopen (OUTPUT_FILE_PATH,"w"); + for (int i = 0; i < n_row; i++) { + for (int j = 0; j < n_col; j++) { + fprintf(file, "%ld ", matrix[i * n_col + j]); + } + fprintf(file, "\n"); + } + fprintf(file, "Process finished in %f microseconds.\n", time); + fclose (file); +} + +long *initialize_matrix(int random_number, int n) { + /* Initialize variables */ + long *matrix = malloc(sizeof(long) * n*n); + + srand(random_number); + for (int i = 0; i < n; i++) { + for (int j = 0; j <= i; j++) { + if (i == j) { + matrix[i * n + j] = 0; + } else { + long dist = rand(); + matrix[i * n + j] = dist; + matrix[j * n + i] = dist; + } + } + } + return matrix; +} + +long *initialize_result(int n) { + long *result = malloc(sizeof(long) * n*n); + + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + if (i == j) { + result[i * n + j] = 0; + } else { + result[i * n + j] = INF; + } + } + } + + return result; +} + +void dijkstra(int n, int sub_n, long matrix[], long result[]) { + long *connected; // Array of 'boolean' on whether vertice [i] is connected to source + long *distance; // Distance of vertice [i] from source + int my_rank = omp_get_thread_num(); + + connected = malloc(n * sizeof(long)); + distance = malloc(n * sizeof(long)); + + for (int SOURCE_V = my_rank * sub_n; SOURCE_V < (my_rank + 1) * sub_n; SOURCE_V++) { + for (int i = 0; i < n; i++) { + if (i == SOURCE_V) { + connected[i] = TRUE; + } else { + connected[i] = FALSE; + } + } + + for (int i = 0; i < n; i++) { + distance[i] = matrix[SOURCE_V * n + i]; + } + + /* Djikstra */ + for (int i = 1; i < n; i++) { + int closest_v = idx_min_distance(connected, distance, n); + connected[closest_v] = TRUE; + + /* Update distances */ + /* With unconnected vertices in distance array */ + for (int j = 0; j < n; j++) { + if (connected[j] == FALSE) { + /* If not connected, only concern with distance array */ + long new_dist = distance[closest_v] + matrix[closest_v * n + j]; + if (new_dist < distance[j]) { + distance[j] = new_dist; + } + } + } + for (int j = 0; j < n; j++) { + result[SOURCE_V * n + j] = distance[j]; + } + } + } + + free(connected); + free(distance); +} \ No newline at end of file diff --git a/src/serial.c b/src/serial.c new file mode 100644 index 0000000000000000000000000000000000000000..08747f21bf5ba7cc9c319c024a507f6af20cbd26 --- /dev/null +++ b/src/serial.c @@ -0,0 +1,199 @@ +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <time.h> + +#define TRUE 1 +#define FALSE 0 +#define INF LONG_MAX +#define OUTPUT_FILE_PATH "serial_result.txt" + +void print_matrix(long matrix[], int n_row, int n_col); +void print_array(long arr[], int len); +int idx_min_distance(long connd[], long dist[], int len); +long *initialize_matrix(int random_number, int n); +long *initialize_result(int n); +void write_output(long matrix[], int n_row, int n_col, double time); + +int main (int argc, char **argv) { + long *matrix, *result; // Matrix size i * j of distance from vertice [i] to vertice [j] + long *connected; // Array of 'boolean' on whether vertice [i] is connected to source [0] + long *distance; // Distance of vertice [i] from source [0] + int n; // Number of vertices + + /* Read number of vertices */ + printf("Number of vertices (n): "); scanf("%d", &n); + + matrix = initialize_matrix(13517142, n); + printf("\nGenerated %d * %d matrix.\n", n, n); + //print_matrix(matrix, n, n); + + result = initialize_result(n); + connected = malloc(n * sizeof(long)); + distance = malloc(n * sizeof(long)); + + clock_t start, end; + double cpu_time_used; + start = clock(); + + for (int SOURCE_V = 0; SOURCE_V < n; SOURCE_V++) { + for (int i = 0; i < n; i++) { + if (i == SOURCE_V) { + connected[i] = TRUE; + } else { + connected[i] = FALSE; + } + } + + for (int i = 0; i < n; i++) { + distance[i] = matrix[SOURCE_V * n + i]; + } + + /* Djikstra */ + for (int i = 1; i < n; i++) { + //printf("------------------------------------------------\n"); + //printf("CONNECTED : "); + //print_array(connected, n); + //printf("DISTANCE FROM %d : ", SOURCE_V); + //print_array(distance, n); + + int closest_v = idx_min_distance(connected, distance, n); + //printf("Closest vertice is: %d\n", closest_v); + connected[closest_v] = TRUE; + + /* Update distances */ + /* With unconnected vertices in distance array */ + for (int j = 0; j < n; j++) { + if (connected[j] == FALSE) { + /* If not connected, only concern with distance array */ + long new_dist = distance[closest_v] + matrix[closest_v * n + j]; + if (new_dist < distance[j]) { + distance[j] = new_dist; + } + } + } + + /* With connected vertices in result matrix */ + /* int closest_to_closest_v = SOURCE_V; + if (distance[closest_v] != matrix[closest_v * n + SOURCE_V]) { + for (int i = 0; i < n; i++) { + if (connected[i] == TRUE && i != closest_v) { + long dist_closest_to_v = matrix[closest_v * n + closest_to_closest_v]; + long i_to_v = matrix[closest_v * n + i]; + if (i_to_v < dist_closest_to_v) { + closest_to_closest_v = i; + } + } + } + result[closest_v * n + closest_to_closest_v] = matrix[closest_v * n + closest_to_closest_v]; + result[closest_to_closest_v * n + closest_v] = matrix[closest_v * n + closest_to_closest_v]; + } + result[closest_v * n + SOURCE_V] = distance[closest_v]; + result[SOURCE_V * n + closest_v] = distance[closest_v]; + for (int i = 0; i < n; i++) { + if (connected[i] == TRUE && i != closest_v && i != closest_to_closest_v) { + result[closest_v * n + i] = distance[closest_v] + distance[i]; + result[i * n + closest_v] = distance[closest_v] + distance[i]; + } + }*/ + } + + for (int j = 0; j < n; j++) { + result[SOURCE_V * n + j] = distance[j]; + } + } + + //printf("\n----------------------RESULTS---------------------\n", n, n); + //print_matrix(result, n, n); + end = clock(); + cpu_time_used = (((double) (end - start)) / CLOCKS_PER_SEC) * 1000000; + //cpu_time_used = (end - start); + write_output(result, n, n, cpu_time_used); + printf("\nResults written to %s.\n", OUTPUT_FILE_PATH); + + /* Deallocation */ + free(matrix); + free(result); + free(connected); + free(distance); +} + +void print_matrix(long matrix[], int n_row, int n_col) { + for (int i = 0; i < n_row; i++) { + for (int j = 0; j < n_col; j++) { + printf("%ld ", matrix[i * n_col + j]); + } + printf("\n"); + } +} + +void print_array(long arr[], int len) { + for (int i = 0; i < len; i++) { + printf("%ld ", arr[i]); + } + printf("\n"); +} + +int idx_min_distance(long connd[], long dist[], int len) { + long min_dist = INF; + int idx_min_dist = -1; + for (int i = 0; i < len; i++) { + if (connd[i] != TRUE) { + if (dist[i] < min_dist) { + min_dist = dist[i]; + idx_min_dist = i; + } + } + } + return idx_min_dist; +} + +void write_output(long matrix[], int n_row, int n_col, double time) { + FILE * file; + file = fopen (OUTPUT_FILE_PATH,"w"); + for (int i = 0; i < n_row; i++) { + for (int j = 0; j < n_col; j++) { + fprintf(file, "%ld ", matrix[i * n_col + j]); + } + fprintf(file, "\n"); + } + fprintf(file, "Process finished in %f microseconds.\n", time); + fclose (file); +} + + +long *initialize_matrix(int random_number, int n) { + /* Initialize variables */ + long *matrix = malloc(sizeof(long) * n*n); + + srand(random_number); + for (int i = 0; i < n; i++) { + for (int j = 0; j <= i; j++) { + if (i == j) { + matrix[i * n + j] = 0; + } else { + long dist = (rand() % 9) + 1; + matrix[i * n + j] = dist; + matrix[j * n + i] = dist; + } + } + } + + return matrix; +} + +long *initialize_result(int n) { + long *result = malloc(sizeof(long) * n*n); + + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + if (i == j) { + result[i * n + j] = 0; + } else { + result[i * n + j] = INF; + } + } + } + + return result; +} \ No newline at end of file