Skip to content
Snippets Groups Projects
Commit 71280c56 authored by HizRadit07's avatar HizRadit07
Browse files

added sorting implementation

parent 2975a059
Branches issue-55-edit-departemen-new
Tags
No related merge requests found
sort.cu 0 → 100644
#include <stdio.h>
#include <stdlib.h>
//VERY IMPORTANT NOTE
//untuk compile, mesti kyk gini
//nvcc -arch=sm_35 -rdc=true -o cuda_sort sort.cu
//tambahin flag arsitekturnya compute_35,klo ngga ngebug nanti
//device function that implements a simple merge sort
//dataarr is input data, dataaux is just empty arr so that it doesnt read n write in the same array
//remember to allocate in the host before pushing to cuda
//begin,end == start, finish, depth == kedalaman dia ngepartisi i think
//dataarr.length == dataaux.length
__global__ void cudaMergeSort(int* dataArr, int* dataAux, int begin, int end, int depth){
int middle = (end+begin)/2;
int i0 = begin;
int i1 = middle;
int index;
int n = end-begin;
//cuda stream to implement recursion using cuda paralellism
cudaStream_t s,s1;
if (n < 2){
return;
}
//launch new block to sort left
cudaStreamCreateWithFlags(&s,cudaStreamNonBlocking);
cudaMergeSort<<< 1, 1, 0, s >>>(dataArr,dataAux, begin, middle, depth+1);
cudaStreamDestroy(s);
//launch block to sort right
cudaStreamCreateWithFlags(&s1,cudaStreamNonBlocking);
cudaMergeSort<<< 1, 1, 0, s1 >>>(dataArr,dataAux, middle, end, depth+1);
cudaStreamDestroy(s1);
//sync
cudaDeviceSynchronize();
//merge and sort
for (index = begin; index < end; index++){
if (i0<middle && (i1>=end || dataArr[i0] <= dataArr[i1])){
dataAux[index] = dataArr[i0];
i0++;
}else{
dataAux[index] = dataArr[i1];
i1++;
}
}
//rewrite balik ke data
for (index = begin; index < end; index++){
dataArr[index] = dataAux[index];
}
}
#define MAX_DEPTH 16
//ini main buat testing
int main(){
int *gpuData;
int *gpuAux;
int left = 0;
int right = 5;
int *data = (int*)malloc(right*sizeof(int));
data[0] = 5;
data[1] = 3;
data[2] = 4;
data[3] = 2;
data[4] = 1;
//set depth limit
cudaDeviceSetLimit(cudaLimitDevRuntimeSyncDepth, MAX_DEPTH);
//malloc gpudata and aux
cudaMalloc((void**)&gpuData,right*sizeof(int));
cudaMalloc((void**)&gpuAux,right*sizeof(int));
//copy isi data ke gpuData yg di device
cudaMemcpy(gpuData,data, right*sizeof(int), cudaMemcpyHostToDevice);
//launch on device
cudaMergeSort<<<1,1>>>(gpuData,gpuAux, left, right, 0);
cudaDeviceSynchronize();
//copu back to host
cudaMemcpy(data, gpuData, right*sizeof(int), cudaMemcpyDeviceToHost);
//free
cudaFree(gpuAux);
cudaFree(gpuData);
cudaDeviceReset();
for (int i=0; i<right; i++){
printf("%d\n", data[i]);
}
return 0;
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment