diff --git a/usable.cu b/usable.cu
new file mode 100644
index 0000000000000000000000000000000000000000..42988da8bc19bac1698057a46a5c796bf1da2058
--- /dev/null
+++ b/usable.cu
@@ -0,0 +1,165 @@
+
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+
+#define inf 9999
+#define N 1000
+
+__global__ void funct(int n, int k, float* x, int* qx) {
+
+    int ix = blockIdx.x * blockDim.x + threadIdx.x;
+    int j = ix & (n - 1);
+    float temp2 = x[ix - j + k] + x[k * n + j];
+    if (x[ix] > temp2) {
+        x[ix] = temp2;
+        qx[ix] = k;
+    }
+    if (x[ix] == inf) {
+        qx[ix] = -2;
+    }
+}
+
+__global__ void funct2(int n, int k, float* x, int* qx) {
+    int ix = blockIdx.x * blockDim.x + threadIdx.x;
+    int j = ix & (n - 1);
+    float temp2 = x[ix - j + k] + x[k * n + j];
+    if (x[ix] > temp2) {
+        x[ix] = temp2;
+        qx[ix] = k;
+    }
+}
+
+
+
+
+int main(int argc, char **argv) {
+
+    struct timeval first, second, lapsed, third;
+    struct timezone tzp, tzp2;
+    float *host_A;
+    int *host_Q;
+    float *dev_x;
+    int *dev_qx;
+    float *A;
+    int *Q;
+    float *D;
+    
+
+    int i, j, bk;
+    int k = 0;
+    //int n = atoi(argv[1]);
+    int n = N;
+
+    cudaEvent_t start,stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+    float milliseconds = 0;
+
+    printf("\n");
+    printf("RUNNING WITH %d VERTICES \n", n);
+    printf("\n");
+
+    cudaMalloc(&dev_x, n * n * sizeof (float));
+    cudaMalloc(&dev_qx, n * n * sizeof (float));
+
+    //CPU arrays
+    A = (float *) malloc(n * n * sizeof (float)); //arxikos pinakas A
+    D = (float *) malloc(n * n * sizeof (float)); //arxikos pinakas D
+    Q = (int *) malloc(n * n * sizeof (int)); //arxikos pinakas Q
+
+    //GPU arrays
+    host_A = (float *) malloc(n * n * sizeof (float));
+    //host_D = (float *) malloc(n * n * sizeof (float));
+    host_Q = (int *) malloc(n * n * sizeof (int));
+
+    srand(time(NULL));
+    for (i = 0; i < n; i++) {
+        for (j = 0; j < n; j++) {
+            if (i == j) {
+                A[i * n + j] = 0;
+            } else {
+                A[i * n + j] = 1200 * (float) rand() / RAND_MAX + 1;
+                if (A[i * n + j] > 1000) {
+                    A[i * n + j] = inf;
+                }
+            }
+        }
+    }
+    for (i = 0; i < n; i++) {
+        for (j = 0; j < n; j++) {
+            Q[i * n + j] = -1;
+        }
+    }
+    for (i = 0; i < n; i++) {
+        for (j = 0; j < n; j++) {
+            D[i * n + j] = A[i * n + j];
+        }
+
+    }
+    for (i = 0; i < n; i++) //Antigrafi tou A ston host_A
+    {
+        for (j = 0; j < n; j++) {
+            host_A[i * n + j] = A[i * n + j];
+        }
+
+    }
+    for (i = 0; i < n; i++) //Antigrafi tou Q ston host_Q
+    {
+        for (j = 0; j < n; j++) {
+            host_Q[i * n + j] = Q[i * n + j];
+        }
+    }
+    gettimeofday(&third, &tzp2);
+    ////////////////////////////First Mem Copy////////////////////
+    gettimeofday(&first, &tzp);
+    cudaMemcpy(dev_x, host_A, n * n * sizeof (float), cudaMemcpyHostToDevice);
+    cudaMemcpy(dev_qx, host_Q, n * n * sizeof (int), cudaMemcpyHostToDevice);
+    gettimeofday(&second, &tzp);
+    if (first.tv_usec > second.tv_usec) {
+        second.tv_usec += 1000000;
+        second.tv_sec--;
+    }
+    lapsed.tv_usec = second.tv_usec - first.tv_usec;
+    lapsed.tv_sec = second.tv_sec - first.tv_sec;
+    printf("First Transfer CPU to GPU  Time elapsed: %lu, %lu s\n", lapsed.tv_sec, lapsed.tv_usec);
+    ////////////////////////////////////////////////////GPU Calculation////////////////////////////////
+
+    bk = (int) (n * n / 512);
+    int gputhreads = 512;
+    if (bk > 0) {
+        gputhreads = 512;
+    } else {
+        bk = 1;
+        gputhreads = n*n;
+    }
+    printf(" \n");
+    printf("BLOCKS :   %d      GPU THREADS:     %d \n", bk, gputhreads);
+    printf(" \n");
+    //gettimeofday(&first, &tzp);
+    cudaEventRecord(start); 
+    funct << <bk, gputhreads>>>(n, k, dev_x, dev_qx);
+    for (k = 1; k < n; k++) {
+        funct2 << <bk, gputhreads>>>(n, k, dev_x, dev_qx);
+    }
+    cudaThreadSynchronize();
+    //gettimeofday(&second, &tzp);
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+    cudaEventElapsedTime(&milliseconds, start, stop);
+    if (first.tv_usec > second.tv_usec) {
+        second.tv_usec += 1000000;
+        second.tv_sec--;
+    }
+
+    lapsed.tv_usec = second.tv_usec - first.tv_usec;
+    lapsed.tv_sec = second.tv_sec - first.tv_sec;
+    printf("GPU Calculation Time elapsed: %.20f\n", milliseconds * .0001);
+    printf("\n");
+
+    
+    printf("ALL OK WE ARE DONE \n");
+    return 0;
+}
\ No newline at end of file