cl_kernel zero_e_fshift = nb->kernel_zero_e_fshift;
local_work_size[0] = 64;
- global_work_size[0] = ((shifts/64)*64) + ((shifts%64) ? 64 : 0);
+ // Round the total number of threads up from the array size
+ global_work_size[0] = ((shifts + local_work_size[0] - 1)/local_work_size[0])*local_work_size[0];
arg_no = 0;
cl_error = clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->fshift));
*/
static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear)
{
+ if (natoms_clear == 0)
+ {
+ return;
+ }
cl_int cl_error;
cl_atomdata_t * adat = nb->atdat;
cl_uint natoms_flat = natoms_clear * (sizeof(rvec)/sizeof(real));
local_work_size[0] = 64;
- global_work_size[0] = ((natoms_flat/local_work_size[0])*local_work_size[0]) + ((natoms_flat%local_work_size[0]) ? local_work_size[0] : 0);
+ // Round the total number of threads up from the array size
+ global_work_size[0] = ((natoms_flat + local_work_size[0] - 1)/local_work_size[0])*local_work_size[0];
+
arg_no = 0;
cl_error = clSetKernelArg(memset_f, arg_no++, sizeof(cl_mem), &(adat->f));