Minor fixes to Verlet kernels

author Mark Abraham <mark.j.abraham@gmail.com>

Mon, 28 Jan 2013 15:48:08 +0000 (16:48 +0100)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Fri, 15 Feb 2013 09:25:11 +0000 (10:25 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Mon, 28 Jan 2013 15:48:08 +0000 (16:48 +0100)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Fri, 15 Feb 2013 09:25:11 +0000 (10:25 +0100)
diff --git a/include/gmx_simd_macros.h b/include/gmx_simd_macros.h

index 4d7627a027862234554b8447711f11dcf144e710..f7464d18d7be06e74d5fc09de8843de6a2f94ae7 100644 (file)
--- a/include/gmx_simd_macros.h
+++ b/include/gmx_simd_macros.h
@@ -36,7 +36,7 @@
   */
  
  /* The macros in this file are intended to be used for writing
- * architecture independent SIMD intrinsics code.
+ * architecture-independent SIMD intrinsics code.
   * To support a new architecture, adding macros here should be (nearly)
   * all that is needed.
   */
@@ -45,7 +45,7 @@
   * with different settings from the same source file.
   */
  
-/* NOTE: floor and blendv are NOT available with SSE2 only acceleration */
+/* NOTE: SSE2 acceleration does not include floor or blendv */
  
  #undef GMX_SIMD_WIDTH_HERE
  
diff --git a/include/types/nbnxn_pairlist.h b/include/types/nbnxn_pairlist.h

index 98044b6204e3f9c4bc277a0819bbb228f2fdb412..578aaf7f3f19650fa64faa2f42f3921c6c35d68b 100644 (file)
--- a/include/types/nbnxn_pairlist.h
+++ b/include/types/nbnxn_pairlist.h
@@ -219,7 +219,9 @@ typedef struct {
      real                    *nbfp;            /* Lennard-Jones 6*C6 and 12*C12 params, size ntype^2*2 */
      int                      comb_rule;       /* Combination rule, see enum above                   */
      real                    *nbfp_comb;       /* LJ parameter per atom type, size ntype*2           */
-    real                    *nbfp_s4;         /* As nbfp, but with stride 4, size ntype^2*4         */
+    real                    *nbfp_s4;         /* As nbfp, but with stride 4, size ntype^2*4. This
+                                               * might suit 4-wide SIMD loads of two values (e.g.
+                                               * two floats in single precision on x86).            */
      int                      natoms;          /* Number of atoms                                    */
      int                      natoms_local;    /* Number of local atoms                           */
      int                     *type;            /* Atom types                                         */
diff --git a/src/mdlib/nbnxn_atomdata.c b/src/mdlib/nbnxn_atomdata.c

index c6e402c91c7fd6e5caa725d66400c8e639c1001c..6e70d98cdd5cbfe920040f592407a7b866105d7b 100644 (file)
--- a/src/mdlib/nbnxn_atomdata.c
+++ b/src/mdlib/nbnxn_atomdata.c
@@ -405,7 +405,12 @@ static void set_combination_rule_data(nbnxn_atomdata_t *nbat)
              }
              break;
          case ljcrNONE:
-            /* In nbfp_s4 we use a stride of 4 for storing two parameters */
+            /* nbfp_s4 stores two parameters using a stride of 4,
+             * because this would suit x86 SIMD single-precision
+             * quad-load intrinsics. There's a slight inefficiency in
+             * allocating and initializing nbfp_s4 when it might not
+             * be used, but introducing the conditional code is not
+             * really worth it. */
              nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
              for (i = 0; i < nt; i++)
              {
author	Mark Abraham <mark.j.abraham@gmail.com>
	Mon, 28 Jan 2013 15:48:08 +0000 (16:48 +0100)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Fri, 15 Feb 2013 09:25:11 +0000 (10:25 +0100)
include/gmx_simd_macros.h		patch \| blob \| history
include/types/nbnxn_pairlist.h		patch \| blob \| history
src/mdlib/nbnxn_atomdata.c		patch \| blob \| history