diff --git a/radix b/radix deleted file mode 100755 index 3df6114965f06bb640097e00399f36095d3ccfbd..0000000000000000000000000000000000000000 Binary files a/radix and /dev/null differ diff --git a/radix.cu b/radix.cu new file mode 100644 index 0000000000000000000000000000000000000000..db26e5eb30d5ed74a9c4372990122eb213f38898 --- /dev/null +++ b/radix.cu @@ -0,0 +1,152 @@ +#include <stdio.h> +#include <iostream> +#include <math.h> +#include <chrono> + +#define THREADS 4 +using namespace std; + +__device__ int pow(int x, int n) { + int sum = 1; + if (n == 0) { + return 1; + } else { + for (int i = 0; i < n; i++) { + sum *= x; + } + return sum; + } +} + +__device__ int getDigitFromNumber(int number, int n) { + + int digit = number; + int divisor = pow(10, n); + + digit = (digit / divisor) % 10; + + return digit; +} + +__device__ void countingSort(int* arr, int* temp_arr, int size, int* count_arr, int length) { + for(int i = size-1; i>=0; i--){ + int digit_count = getDigitFromNumber(arr[i], length); + count_arr[digit_count]--; + int j = count_arr[digit_count]; + temp_arr[j] = arr[i]; + } +} + +__global__ void radixSort(int* arr, int size, int length) { + __shared__ int count_arr[10]; + int* temp_arr = (int*) malloc(size*sizeof(int)); + int i = threadIdx.x * size / THREADS; + int device_i = 0; + + for (int j = 0; j < size; j++) { + temp_arr[j] = arr[j]; + } + + for(int j = 0; j < 10; j++) { + count_arr[j] = 0; + } + + while (device_i < length) { + for (int j = i; j < i + (size / THREADS); j++) { + int digit_count = getDigitFromNumber(arr[j], device_i); + atomicAdd(&count_arr[digit_count], 1); + __syncthreads(); + } + __syncthreads(); + if(threadIdx.x == 0) { + for (int j = 1; j < 10 ; j++ ) { + count_arr[j] += count_arr[j-1]; + } + countingSort(arr, temp_arr, size, count_arr, device_i); + + for (int j = 0; j < size; j++) { + arr[j] = temp_arr[j]; + } + + for(int j = 0; j < 10; j++) { + count_arr[j] = 0; + } + } + + device_i++; + } +} + +int getMax(int arr[], int n) { + int mx = arr[0]; + for (int i = 1; i < n; i++) + if (arr[i] > mx) + mx = arr[i]; + return mx; +} + +void rng(int* arr, int n) { + int seed = 13516117; // Ganti dengan NIM anda sebagai seed. + srand(seed); + for(long i = 0; i < n; i++) { + arr[i] = (int)rand(); + } +} + +void printHostArray(int* arr, int size) { + for (int i = 0; i < size; ++i) + { + cout << "Array[" << i << "] : " << *(arr + i) << endl; + } +} + +int main(int argc, char* argv[]) { + int *device = 0; + + int size = atoi(argv[1]); + int mem_size = size * sizeof(int); + + int* host = (int*) malloc(mem_size); + + rng(host, size); + cudaMallocManaged( (void**)&device, mem_size); + + if( host == 0 || device == 0 ) { + cout << "Allocating memory failed" << endl; + return 0; + } + + int max_num = getMax(host, size); + int length = 0; + + while (max_num > 0) { + if (max_num > 0) { + length++; + max_num /= 10; + } + } + + // cout << "before :" << endl; + // printHostArray(host, size); + + cudaMemcpy(device, host, mem_size, cudaMemcpyHostToDevice); + + auto start = std::chrono::high_resolution_clock::now(); + + radixSort<<<1, THREADS>>>(device, size, length); + + auto finish = std::chrono::high_resolution_clock::now(); + std::chrono::duration<double> elapsed = finish - start; + cout << "Execution time : " << elapsed.count() * 1000000 << " microseconds" << endl; + + cudaDeviceSynchronize(); + + cudaMemcpy(host, device, mem_size, cudaMemcpyDeviceToHost); + + // cout << "After :" << endl; + // printHostArray(host, size); + + free(host); + cudaFree(device); + return 0; +} \ No newline at end of file