Merge branch release-5-1 into release-2016

[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_ocl / nbnxn_ocl_data_mgmt.cpp
diff --git a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp

index f9080af4eb120a01e479d682a4847009e7b16e3d..c7e3329a7c210d8dd3a595684d4292c607350c47 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
+++ b/src/gromacs/mdlib/nbnxn_ocl/nbnxn_ocl_data_mgmt.cpp
@@ -641,7 +641,8 @@ nbnxn_ocl_clear_e_fshift(gmx_nbnxn_ocl_t *nb)
      cl_kernel            zero_e_fshift = nb->kernel_zero_e_fshift;
  
      local_work_size[0]   = 64;
-    global_work_size[0]  = ((shifts/64)*64) + ((shifts%64) ? 64 : 0);
+    // Round the total number of threads up from the array size
+    global_work_size[0]  = ((shifts + local_work_size[0] - 1)/local_work_size[0])*local_work_size[0];
  
      arg_no    = 0;
      cl_error  = clSetKernelArg(zero_e_fshift, arg_no++, sizeof(cl_mem), &(adat->fshift));
@@ -814,6 +815,10 @@ void nbnxn_gpu_init(gmx_nbnxn_ocl_t          **p_nb,
   */
  static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear)
  {
+    if (natoms_clear == 0)
+    {
+        return;
+    }
  
      cl_int               cl_error;
      cl_atomdata_t *      adat     = nb->atdat;
@@ -830,7 +835,9 @@ static void nbnxn_ocl_clear_f(gmx_nbnxn_ocl_t *nb, int natoms_clear)
      cl_uint              natoms_flat = natoms_clear * (sizeof(rvec)/sizeof(real));
  
      local_work_size[0]  = 64;
-    global_work_size[0] = ((natoms_flat/local_work_size[0])*local_work_size[0]) + ((natoms_flat%local_work_size[0]) ? local_work_size[0] : 0);
+    // Round the total number of threads up from the array size
+    global_work_size[0] = ((natoms_flat + local_work_size[0] - 1)/local_work_size[0])*local_work_size[0];
+
  
      arg_no    = 0;
      cl_error  = clSetKernelArg(memset_f, arg_no++, sizeof(cl_mem), &(adat->f));