Minor fixes to Verlet kernels
authorMark Abraham <mark.j.abraham@gmail.com>
Mon, 28 Jan 2013 15:48:08 +0000 (16:48 +0100)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Fri, 15 Feb 2013 09:25:11 +0000 (10:25 +0100)
* clarified some comments
* documented use of nbfp_s4 better

Change-Id: I0848f2e570daef881368a45d4a429a3f80fde81b

include/gmx_simd_macros.h
include/types/nbnxn_pairlist.h
src/mdlib/nbnxn_atomdata.c

index 4d7627a027862234554b8447711f11dcf144e710..f7464d18d7be06e74d5fc09de8843de6a2f94ae7 100644 (file)
@@ -36,7 +36,7 @@
  */
 
 /* The macros in this file are intended to be used for writing
- * architecture independent SIMD intrinsics code.
+ * architecture-independent SIMD intrinsics code.
  * To support a new architecture, adding macros here should be (nearly)
  * all that is needed.
  */
@@ -45,7 +45,7 @@
  * with different settings from the same source file.
  */
 
-/* NOTE: floor and blendv are NOT available with SSE2 only acceleration */
+/* NOTE: SSE2 acceleration does not include floor or blendv */
 
 #undef GMX_SIMD_WIDTH_HERE
 
index 98044b6204e3f9c4bc277a0819bbb228f2fdb412..578aaf7f3f19650fa64faa2f42f3921c6c35d68b 100644 (file)
@@ -219,7 +219,9 @@ typedef struct {
     real                    *nbfp;            /* Lennard-Jones 6*C6 and 12*C12 params, size ntype^2*2 */
     int                      comb_rule;       /* Combination rule, see enum above                   */
     real                    *nbfp_comb;       /* LJ parameter per atom type, size ntype*2           */
-    real                    *nbfp_s4;         /* As nbfp, but with stride 4, size ntype^2*4         */
+    real                    *nbfp_s4;         /* As nbfp, but with stride 4, size ntype^2*4. This
+                                               * might suit 4-wide SIMD loads of two values (e.g.
+                                               * two floats in single precision on x86).            */
     int                      natoms;          /* Number of atoms                                    */
     int                      natoms_local;    /* Number of local atoms                           */
     int                     *type;            /* Atom types                                         */
index c6e402c91c7fd6e5caa725d66400c8e639c1001c..6e70d98cdd5cbfe920040f592407a7b866105d7b 100644 (file)
@@ -405,7 +405,12 @@ static void set_combination_rule_data(nbnxn_atomdata_t *nbat)
             }
             break;
         case ljcrNONE:
-            /* In nbfp_s4 we use a stride of 4 for storing two parameters */
+            /* nbfp_s4 stores two parameters using a stride of 4,
+             * because this would suit x86 SIMD single-precision
+             * quad-load intrinsics. There's a slight inefficiency in
+             * allocating and initializing nbfp_s4 when it might not
+             * be used, but introducing the conditional code is not
+             * really worth it. */
             nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
             for (i = 0; i < nt; i++)
             {