diff --git a/src/parallel_radix.cu b/src/parallel_radix.cu index 55f734e5e7c521941605250dfe4e6c11d1a6fc85..074689673e52cd2a6faeee2b5803ba3563bdbfe3 100644 --- a/src/parallel_radix.cu +++ b/src/parallel_radix.cu @@ -7,7 +7,7 @@ #include "device_launch_parameters.h" #define MAX_BLOCK_SIZE 1024 -#define ARRAY_SIZE 200000 +#define ARRAY_SIZE 100000 #define BASE 10 #define RNG_SEED 13516027 @@ -23,7 +23,7 @@ __global__ void get_max(unsigned int * input, int n) { const int tid = blockDim.x * blockIdx.x + threadIdx.x; auto step_size = 1; - int number_of_threads = n / 2; + int number_of_threads = blockDim.x / 2; while (number_of_threads > 0) { if (tid < number_of_threads) // still alive? @@ -68,7 +68,7 @@ int main(int argc, char *argv[]) cudaMemcpy(harr, darr, count * sizeof(int), cudaMemcpyDeviceToHost); //find max if(count > MAX_BLOCK_SIZE){ - grid = count / (MAX_BLOCK_SIZE); + grid = count / (MAX_BLOCK_SIZE) + 1; } else{ grid = 1;