Reduce the use of nbnxn_consts.h

[alexxy/gromacs.git] / src / gromacs / mdlib / calc_verletbuf.c
diff --git a/src/gromacs/mdlib/calc_verletbuf.c b/src/gromacs/mdlib/calc_verletbuf.c

index 774a0b138e7ee15a9dbca359123f6c0cb74e31fd..50415436f054f4510b6168ec1ac6d289c3b58199 100644 (file)
--- a/src/gromacs/mdlib/calc_verletbuf.c
+++ b/src/gromacs/mdlib/calc_verletbuf.c
@@ -47,22 +47,12 @@
  #include "gromacs/math/calculate-ewald-splitting-coefficient.h"
  #include "gromacs/math/units.h"
  #include "gromacs/math/vec.h"
-#include "gromacs/mdlib/nbnxn_consts.h"
+#include "gromacs/mdlib/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_search.h"
  #include "gromacs/mdlib/nbnxn_simd.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  
-#ifdef GMX_NBNXN_SIMD
-/* The include below sets the SIMD instruction type (precision+width)
- * for all nbnxn SIMD search and non-bonded kernel code.
- */
-#ifdef GMX_NBNXN_HALF_WIDTH_SIMD
-#define GMX_USE_HALF_WIDTH_SIMD_HERE
-#endif
-#include "gromacs/simd/simd.h"
-#endif
-
-
  /* The code in this file estimates a pairlist buffer length
   * given a target energy drift per atom per picosecond.
   * This is done by estimating the drift given a buffer length.
@@ -123,25 +113,40 @@ void verletbuf_get_list_setup(gmx_bool gmx_unused     bSIMD,
                                gmx_bool                bGPU,
                                verletbuf_list_setup_t *list_setup)
  {
+    /* When calling this function we often don't know which kernel type we
+     * are going to use. W choose the kernel type with the smallest possible
+     * i- and j-cluster sizes, so we potentially overestimate, but never
+     * underestimate, the buffer drift.
+     * Note that the current buffer estimation code only handles clusters
+     * of size 1, 2 or 4, so for 4x8 or 8x8 we use the estimate for 4x4.
+     */
+
      if (bGPU)
      {
-        list_setup->cluster_size_i     = NBNXN_GPU_CLUSTER_SIZE;
-        list_setup->cluster_size_j     = NBNXN_GPU_CLUSTER_SIZE;
+        /* The CUDA kernels split the j-clusters in two halves */
+        list_setup->cluster_size_i = nbnxn_kernel_to_ci_size(nbnxnk8x8x8_GPU);
+        list_setup->cluster_size_j = nbnxn_kernel_to_cj_size(nbnxnk8x8x8_GPU)/2;
      }
      else
      {
-        list_setup->cluster_size_i     = NBNXN_CPU_CLUSTER_I_SIZE;
-        list_setup->cluster_size_j     = NBNXN_CPU_CLUSTER_I_SIZE;
+        int kernel_type;
+
+        kernel_type = nbnxnk4x4_PlainC;
+
  #ifdef GMX_NBNXN_SIMD
          if (bSIMD)
          {
-            list_setup->cluster_size_j = GMX_SIMD_REAL_WIDTH;
  #ifdef GMX_NBNXN_SIMD_2XNN
-            /* We assume the smallest cluster size to be on the safe side */
-            list_setup->cluster_size_j /= 2;
+            /* We use the smallest cluster size to be on the safe side */
+            kernel_type = nbnxnk4xN_SIMD_2xNN;
+#else
+            kernel_type = nbnxnk4xN_SIMD_4xN;
  #endif
          }
  #endif
+
+        list_setup->cluster_size_i = nbnxn_kernel_to_ci_size(kernel_type);
+        list_setup->cluster_size_j = nbnxn_kernel_to_cj_size(kernel_type);
      }
  }