Fix overflow in LJ-PME nbnxn kernels

author Christian Wennberg <christian.wennberg@scilifelab.se>

Mon, 28 Jul 2014 16:06:36 +0000 (18:06 +0200)

committer Berk Hess <hess@kth.se>

Mon, 28 Jul 2014 20:26:16 +0000 (22:26 +0200)
author Christian Wennberg <christian.wennberg@scilifelab.se>
Mon, 28 Jul 2014 16:06:36 +0000 (18:06 +0200)
committer Berk Hess <hess@kth.se>
Mon, 28 Jul 2014 20:26:16 +0000 (22:26 +0200)
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h

index 5663260589ca2f8af2c509f58cbdc615503d8b1e..99385ff3cac17b1fe9e1fc6cb3210b588c870f0f 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h
@@ -65,7 +65,7 @@
  /* Without exclusions and energies we only need to mask the cut-off,
   * this can be faster with blendv.
   */
-#if !(defined CHECK_EXCLS || defined CALC_ENERGIES) && defined GMX_SIMD_HAVE_BLENDV && !defined COUNT_PAIRS
+#if !(defined CHECK_EXCLS || defined CALC_ENERGIES || defined LJ_EWALD_GEOM) && defined GMX_SIMD_HAVE_BLENDV
  /* With RF and tabulated Coulomb we replace cmp+and with sub+blendv.
   * With gcc this is slower, except for RF on Sandy Bridge.
   * Tested with gcc 4.6.2, 4.6.3 and 4.7.1.
@@ -328,10 +328,10 @@
          tmp = gmx_simd_align_r(tmpa);
          for (i = 0; i < UNROLLI; i += 2)
          {
-            gmx_simd_store_r(tmp, i == 0 ? wco_S0 : wco_S2);
+            gmx_simd_store_r(tmp, gmx_simd_sub_r(rc2_S, i == 0 ? rsq_S0 : rsq_S2));
              for (j = 0; j < 2*UNROLLJ; j++)
              {
-                if (!(tmp[j] == 0))
+                if (tmp[j] >= 0)
                  {
                      npair++;
                  }
@@ -780,9 +780,10 @@
  #endif
  #endif
  
-        cr2_S0        = gmx_simd_mul_r(lje_c2_S, rsq_S0);
+        /* Mask for the cut-off to avoid overflow in gmx_simd_exp_r */
+        cr2_S0        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S0, wco_vdw_S0));
  #ifndef HALF_LJ
-        cr2_S2        = gmx_simd_mul_r(lje_c2_S, rsq_S2);
+        cr2_S2        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S2, wco_vdw_S2));
  #endif
          expmcr2_S0    = gmx_simd_exp_r(gmx_simd_mul_r(mone_S, cr2_S0));
  #ifndef HALF_LJ
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h

index 4b7de1678fae416dfdf61dc275fa3f199342375f..c567589ed5319c66148843e9195a12b913ef6fea 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h
@@ -53,7 +53,7 @@
   * this can be faster when we have defined gmx_simd_blendv_r, i.e. an instruction
   * that selects from two SIMD registers based on the contents of a third.
   */
-#if !(defined CHECK_EXCLS || defined CALC_ENERGIES) && defined GMX_SIMD_HAVE_BLENDV
+#if !(defined CHECK_EXCLS || defined CALC_ENERGIES || defined LJ_EWALD_GEOM) && defined GMX_SIMD_HAVE_BLENDV
  /* With RF and tabulated Coulomb we replace cmp+and with sub+blendv.
   * With gcc this is slower, except for RF on Sandy Bridge.
   * Tested with gcc 4.6.2, 4.6.3 and 4.7.1.
@@ -1014,11 +1014,12 @@
  #endif
  #endif
  
-        cr2_S0        = gmx_simd_mul_r(lje_c2_S, rsq_S0);
-        cr2_S1        = gmx_simd_mul_r(lje_c2_S, rsq_S1);
+        /* Mask for the cut-off to avoid overflow in gmx_simd_exp_r */
+        cr2_S0        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S0, wco_vdw_S0));
+        cr2_S1        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S1, wco_vdw_S1));
  #ifndef HALF_LJ
-        cr2_S2        = gmx_simd_mul_r(lje_c2_S, rsq_S2);
-        cr2_S3        = gmx_simd_mul_r(lje_c2_S, rsq_S3);
+        cr2_S2        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S2, wco_vdw_S2));
+        cr2_S3        = gmx_simd_mul_r(lje_c2_S, gmx_simd_blendzero_r(rsq_S3, wco_vdw_S3));
  #endif
          expmcr2_S0    = gmx_simd_exp_r(gmx_simd_mul_r(mone_S, cr2_S0));
          expmcr2_S1    = gmx_simd_exp_r(gmx_simd_mul_r(mone_S, cr2_S1));
author	Christian Wennberg <christian.wennberg@scilifelab.se>
	Mon, 28 Jul 2014 16:06:36 +0000 (18:06 +0200)
committer	Berk Hess <hess@kth.se>
	Mon, 28 Jul 2014 20:26:16 +0000 (22:26 +0200)
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h		patch \| blob \| history