Skip to content
Snippets Groups Projects
Commit 38861ffe authored by Azka Nabilah Mumtaz's avatar Azka Nabilah Mumtaz
Browse files

tes

parent 0a816f3e
Branches
No related merge requests found
#include <stdio.h>
#include <stdlib.h>
// #include <omp.h>
#include <sys/time.h>
void generateArray(int arr[], int n,int seed);
void radixsort(int arr[], int n);
void print(int arr[], int n);
int main(int argc, char *argv[]) {
struct timeval stop, start;
// int thread_count = 5;
int array_size = strtol(argv[1], NULL, 10);
//printf("NUM THREADS : %d\n", omp_get_num_threads());
int *arr,*d_arr;
arr = (int*)malloc(sizeof(int)* array_size);
cudaMalloc((void **)&d_arr, sizeof(int) * array_size);
// cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
int seed = 13516127;
generateArray(arr,array_size,seed);
cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
gettimeofday(&start, NULL);
radixsort(arr, array_size);
gettimeofday(&stop, NULL);
print(arr,array_size);
printf("Sorting selesai\n");
printf("took %lu microsecond for parallel radix sort\n", ((stop.tv_sec - start.tv_sec)*1000000)+(stop.tv_usec - start.tv_usec));
cudaFree(d_arr);
free(arr);
return 0;
}
//find max value from array
__global__
void getMax(int arr[], int n, int max[]) {
__shared__ int cache[1000];
int temp = arr[0];
for (int i=0; i<n; i+=1000){
if(threadIdx.x< 1000){
if(temp < arr[threadIdx.x + i]){
temp = arr[threadIdx.x+i];
}
}
}
if(threadIdx.x <1000){
cache[threadIdx.x] =temp;
}
__syncthreads();
if(threadIdx.x ==0){
*max = cache[0];
for(int i =1; i<1000; i++){
if(*max < cache[i]){
*max = cache[i];
}
}
}
}
// __global__
void generateArray(int arr[], int n,int seed){
int *output;
output = (int*)malloc(sizeof(int) * n);
srand(seed);
for(long i = 0; i < n; i++) {
output[i] = (int)rand();
}
for(long i = 0; i < n; i++) {
arr[i] = output[i];
}
}
__global__
void countSort(int arr[], int n, int digit) {
// int my_rank = omp_get_thread_num();
// int thread_count = omp_get_num_threads();
//printf("NUMBER OF THREAD NYA BOY %d\n",thread_count);
int *output;
output = (int*)malloc(sizeof(int)*n);
long i;
int count[10] = {0};
for (i = 0; i < n; i++)
count[ (arr[i]/digit)%10 ]++;
for (i = 1; i < 10; i++)
count[i] += count[i - 1];
for (i = n - 1; i >= 0; i--) {
output[count[ (arr[i]/digit)%10 ] - 1] = arr[i];
count[(arr[i]/digit)%10 ]--;
}
for (i = 0; i < n; i++)
arr[i] = output[i];
}
//__global__
void radixsort(int arr[], int n) {
int *max,*d_max,*d_arr;
max = (int*)malloc(sizeof(int)* 1);
cudaMalloc((void **)&d_max, sizeof(int) * 1);
cudaMemcpy(d_max, max, sizeof(int) * 1, cudaMemcpyHostToDevice);
cudaMalloc((void **)&d_arr, sizeof(int) * n);
cudaMemcpy(d_arr,arr,sizeof(int) *n, cudaMemcpyHostToDevice);
getMax<<<1,1000>>>(d_arr, n,d_max);
cudaMemcpy(max,d_max, sizeof(int) * 1, cudaMemcpyDeviceToHost);
for (int digit = 1; max[0]/digit > 0; digit *= 10) {
countSort<<<1,1>>>(d_arr, n, digit);
}
cudaMemcpy(arr,d_arr, sizeof(int) *n, cudaMemcpyDeviceToHost);
// printf("%d\n",max[0]);
}
void print(int arr[], int n) {
for (long i = 0; i < n; i++) {
printf("%d \n",arr[i]);
}
printf("\n");
}
/*
int main(int argc, char *argv[]) {
struct timeval stop, start;
// int thread_count = 5;
int array_size = strtol(argv[1], NULL, 10);
//printf("NUM THREADS : %d\n", omp_get_num_threads());
int *arr,*d_arr;
arr = (int*)malloc(sizeof(int)* array_size);
cudaMalloc((void **)&d_arr, sizeof(int) * array_size);
// cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
int seed = 13516127;
generateArray(arr,array_size,seed);
cudaMemcpy(d_arr, arr, sizeof(int) * array_size, cudaMemcpyHostToDevice);
gettimeofday(&start, NULL);
radixsort<<<1,1>>>(arr, array_size);
gettimeofday(&stop, NULL);
print(arr,array_size);
printf("Sorting selesai\n");
printf("took %lu microsecond for parallel radix sort\n", ((stop.tv_sec - start.tv_sec)*1000000)+(stop.tv_usec - start.tv_usec));
cudaFree(d_arr);
free(arr);
return 0;
}
*/
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment