Skip to content
Snippets Groups Projects
Commit d0bc063c authored by Yasya Rusyda's avatar Yasya Rusyda
Browse files

Merge branch 'master' of https://gitlab.informatika.org/Yora/cuda

parents dd302803 ba25bd68
Branches
No related merge requests found
......@@ -18,14 +18,14 @@ __device__ int getMax(int arr[], int n)
// the digit represented by exp.
__device__ void countSort(int arr[], int n, int exp)
{
int index = threadIdx.x;
int stride = blockDim.x;
// int index = threadIdx.x;
// int stride = blockDim.x;
int *output= (int*)malloc(sizeof(int)*n); // output array
int i, count[10] = {0};
// Store count of occurrences in count[]
for (i = index; i < n; i+=stride)
for (i = 0; i < n; i++)
count[ (arr[i]/exp)%10 ]++;
// Change count[i] so that count[i] now contains actual
......@@ -34,7 +34,7 @@ __device__ void countSort(int arr[], int n, int exp)
count[i] += count[i - 1];
// Build the output array
for (i = n - 1; i >= index; i-=stride)
for (i = n - 1; i >= 0; i--)
{
output[count[ (arr[i]/exp)%10 ] - 1] = arr[i];
count[ (arr[i]/exp)%10 ]--;
......@@ -42,7 +42,7 @@ __device__ void countSort(int arr[], int n, int exp)
// Copy the output array to arr[], so that arr[] now
// contains sorted numbers according to current digit
for (i = index; i < n; i+=stride)
for (i = 0; i < n; i++)
arr[i] = output[i];
}
......
// C++ implementation of Radix Sort
#include <iostream>
#include <cstdlib>
#include <time.h>
#include <cuda.h>
#include <cuda_runtime.h>
using namespace std;
// A utility function to get maximum value in arr[]
int getMax(int arr[], int n)
{
int mx = arr[0];
for (int i = 1; i < n; i++)
if (arr[i] > mx)
mx = arr[i];
return mx;
}
__global__ void storeCount(int *count, int *arr, int n, int exp){
int index = threadIdx.x;
int stride = blockDim.x;
for (int i = index; i < n; i+=stride)
count[ (arr[i]/exp)%10 ]++;
}
// A function to do counting sort of arr[] according to
// the digit represented by exp.
void countSort(int arr[], int n, int exp)
{
int *d_arr;
int *output= (int*)malloc(sizeof(int)*n); // output array
int i;
int d_count[10] = {0};
int h_count[10] = {0};
cudaMalloc((void**)&d_arr,sizeof(int)*n);
cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
// Store count of occurrences in count[]
storeCount<<<1,32>>>(d_count,d_arr,n,exp);
cudaMemcpy(h_count, d_count, 10,cudaMemcpyDeviceToHost);
// Change count[i] so that count[i] now contains actual
// position of this digit in output[]
for (i = 1; i < 10; i++)
h_count[i] += h_count[i - 1];
// Build the output array
for (i = n - 1; i >= 0; i--)
{
output[h_count[ (arr[i]/exp)%10 ] - 1] = arr[i];
h_count[ (arr[i]/exp)%10 ]--;
}
// Copy the output array to arr[], so that arr[] now
// contains sorted numbers according to current digit
for (i = 0; i < n; i++)
arr[i] = output[i];
cudaFree(d_arr);
//cudaFree(d_count);
}
// The main function to that sorts arr[] of size n using
// Radix Sort
void radixsort(int *arr, int n)
{
int m = getMax(arr, n);
// Do counting sort for every digit. Note that instead
// of passing digit number, exp is passed. exp is 10^i
// where i is current digit number
//cudaMemcpy(d_arr, arr, sizeof(int)*n,cudaMemcpyHostToDevice);
for (int exp = 1; m/exp > 0; exp *= 10)
countSort(arr,n,exp);
}
// A utility function to print an array
void print(int arr[], int n){
for (int i = 0; i < n; i++)
cout << arr[i] << "\n";
}
void rng(int* arr,int n){
int seed = 13516067;
srand(seed);
for (long i=0; i<n; i++){
arr[i] = (int) rand();
}
}
timespec diff(timespec start, timespec end)
{
timespec temp;
if ((end.tv_nsec - start.tv_nsec) < 0) {
temp.tv_sec = end.tv_sec - start.tv_sec - 1;
temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec - start.tv_sec;
temp.tv_nsec = end.tv_nsec - start.tv_nsec;
}
return temp;
}
// Driver program to test above functions
int main(int argc, char *argv[])
{
timespec start, stop;
int n;
n= atoi(argv[1]);
int arr[n];
rng(arr,n);
clock_gettime(CLOCK_REALTIME, &start);
radixsort(arr,n);
clock_gettime(CLOCK_REALTIME, &stop);
print(arr,n);
timespec duration = diff(start, stop);
long time = duration.tv_sec * 1000000 + duration.tv_nsec/1000;
printf("\n%d.%09d s\n", duration.tv_sec, duration.tv_nsec);
//deallocate host memory
return 0;
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment