Skip to content
Snippets Groups Projects
Commit 8ba34dd5 authored by jasonwiguna's avatar jasonwiguna
Browse files

mantap

parents e7e2a4e1 0e377ce8
No related merge requests found
Pipeline #12346 canceled with stages
File deleted
radix.cu 0 → 100644
#include <stdio.h>
#include <iostream>
#include <math.h>
#include <chrono>
#define THREADS 4
using namespace std;
__device__ int pow(int x, int n) {
int sum = 1;
if (n == 0) {
return 1;
} else {
for (int i = 0; i < n; i++) {
sum *= x;
}
return sum;
}
}
__device__ int getDigitFromNumber(int number, int n) {
int digit = number;
int divisor = pow(10, n);
digit = (digit / divisor) % 10;
return digit;
}
__device__ void countingSort(int* arr, int* temp_arr, int size, int* count_arr, int length) {
for(int i = size-1; i>=0; i--){
int digit_count = getDigitFromNumber(arr[i], length);
count_arr[digit_count]--;
int j = count_arr[digit_count];
temp_arr[j] = arr[i];
}
}
__global__ void radixSort(int* arr, int size, int length) {
__shared__ int count_arr[10];
int* temp_arr = (int*) malloc(size*sizeof(int));
int i = threadIdx.x * size / THREADS;
int device_i = 0;
for (int j = 0; j < size; j++) {
temp_arr[j] = arr[j];
}
for(int j = 0; j < 10; j++) {
count_arr[j] = 0;
}
while (device_i < length) {
for (int j = i; j < i + (size / THREADS); j++) {
int digit_count = getDigitFromNumber(arr[j], device_i);
atomicAdd(&count_arr[digit_count], 1);
__syncthreads();
}
__syncthreads();
if(threadIdx.x == 0) {
for (int j = 1; j < 10 ; j++ ) {
count_arr[j] += count_arr[j-1];
}
countingSort(arr, temp_arr, size, count_arr, device_i);
for (int j = 0; j < size; j++) {
arr[j] = temp_arr[j];
}
for(int j = 0; j < 10; j++) {
count_arr[j] = 0;
}
}
device_i++;
}
}
int getMax(int arr[], int n) {
int mx = arr[0];
for (int i = 1; i < n; i++)
if (arr[i] > mx)
mx = arr[i];
return mx;
}
void rng(int* arr, int n) {
int seed = 13516117; // Ganti dengan NIM anda sebagai seed.
srand(seed);
for(long i = 0; i < n; i++) {
arr[i] = (int)rand();
}
}
void printHostArray(int* arr, int size) {
for (int i = 0; i < size; ++i)
{
cout << "Array[" << i << "] : " << *(arr + i) << endl;
}
}
int main(int argc, char* argv[]) {
int *device = 0;
int size = atoi(argv[1]);
int mem_size = size * sizeof(int);
int* host = (int*) malloc(mem_size);
rng(host, size);
cudaMallocManaged( (void**)&device, mem_size);
if( host == 0 || device == 0 ) {
cout << "Allocating memory failed" << endl;
return 0;
}
int max_num = getMax(host, size);
int length = 0;
while (max_num > 0) {
if (max_num > 0) {
length++;
max_num /= 10;
}
}
// cout << "before :" << endl;
// printHostArray(host, size);
cudaMemcpy(device, host, mem_size, cudaMemcpyHostToDevice);
auto start = std::chrono::high_resolution_clock::now();
radixSort<<<1, THREADS>>>(device, size, length);
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
cout << "Execution time : " << elapsed.count() * 1000000 << " microseconds" << endl;
cudaDeviceSynchronize();
cudaMemcpy(host, device, mem_size, cudaMemcpyDeviceToHost);
// cout << "After :" << endl;
// printHostArray(host, size);
free(host);
cudaFree(device);
return 0;
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment