diff --git a/src/cuda.cu b/src/cuda.cu index a12e7c82fee29b4c41afa8513fb6d78a9e923e3c..1c994f6c12b1e61a118902204eebbc95d2b78079 100644 --- a/src/cuda.cu +++ b/src/cuda.cu @@ -107,9 +107,11 @@ __global__ void mergeSort(int* output, int *temporary_array, int index, int widt int x = blockIdx.x * blockDim.x + threadIdx.x; int left = width*x*arrayPerThread; int mid,right; + for(int i = 0; i < arrayPerThread; i++){ if(left >= index) break; - mid = fminf(left+(width/2), index); // ini min nya emang defined? @nyamnyam, nggak tau, tpi blm merge sort aja di TC 4 ada segfault, jdi mau solve itu dlu + + mid = fminf(left+(width/2), index); right = fminf(left+width, index); mergeArray(output, temporary_array, left, mid, right); left += width; @@ -164,7 +166,7 @@ void convolutionWithCUDA(int* output, int* kernel, int* target, int rowKernel, i long nThreads = thread_num; for(int width = 2; width < index*2; width*=2){ int arrayPerThread = index/(nThreads*width) + 1; - mergeSort<<<1, dim3(thread_num, 1, 1)>>>(output_datarange, temporary_array, index, width, arrayPerThread); + mergeSort<<<1, thread_num>>>(output_datarange,temporary_array, index, width, arrayPerThread); temp_swap = output_datarange; output_datarange = temporary_array; temporary_array = temp_swap;