fixed nbnxn no LJ comb.rule AVX256 PME kernel
authorBerk Hess <hess@kth.se>
Wed, 2 Jan 2013 16:46:08 +0000 (17:46 +0100)
committerGerrit Code Review <gerrit@gerrit.gromacs.org>
Thu, 10 Jan 2013 22:10:17 +0000 (23:10 +0100)
The nbnxn 2x(N+N) kernels did incorrect LJ parameter lookups
for systems which do not follow LJ combination rules.
The kernels are only used with AVX256 with Ewald type electrostatics.

Change-Id: I6ccc133e7f21a43c25cf61d7ede43da7889e66de

src/mdlib/nbnxn_kernels/nbnxn_kernel_simd_2xnn_inner.h
src/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h

index cab66c3e346310461148ab9489e42a5cde9eab5b..fa50cbeb4b7e6346ec0541bf142a6028a6fd4eac 100644 (file)
 #ifdef CALC_LJ
 
 #if !defined LJ_COMB_GEOM && !defined LJ_COMB_LB && !defined FIX_LJ_C
-            load_lj_pair_params2(nbfp0,type,aj,c6_SSE0,c12_SSE0);
+            load_lj_pair_params2(nbfp0,nbfp1,type,aj,c6_SSE0,c12_SSE0);
 #ifndef HALF_LJ
-            load_lj_pair_params2(nbfp2,type,aj,c6_SSE2,c12_SSE2);
+            load_lj_pair_params2(nbfp2,nbfp3,type,aj,c6_SSE2,c12_SSE2);
 #endif
 #endif /* not defined any LJ rule */
 
index a8e068699952102704e734250a68343b9603dd6d..dc7112e7a3e9d74b6a6a7f0595413234e12e9f55 100644 (file)
@@ -249,18 +249,23 @@ gmx_mm256_invsqrt_ps_single(__m256 x)
     GMX_2_MM_TO_M256(c12t_SSE[0],c12t_SSE[1],c12_SSE);                  \
 }
 
-#define load_lj_pair_params2(nbfp,type,aj,c6_SSE,c12_SSE)               \
+#define load_lj_pair_params2(nbfp0,nbfp1,type,aj,c6_SSE,c12_SSE)        \
 {                                                                       \
-    __m128 clj_SSE[2*UNROLLJ],c6t_SSE[2],c12t_SSE[2];                   \
+    __m128 clj_SSE0[UNROLLJ],clj_SSE1[UNROLLJ],c6t_SSE[2],c12t_SSE[2];  \
     int p;                                                              \
                                                                         \
-    for(p=0; p<2*UNROLLJ; p++)                                          \
+    for(p=0; p<UNROLLJ; p++)                                            \
     {                                                                   \
         /* Here we load 4 aligned floats, but we need just 2 */         \
-        clj_SSE[p] = _mm_load_ps(nbfp+type[aj+p]*NBFP_STRIDE);          \
+        clj_SSE0[p] = _mm_load_ps(nbfp0+type[aj+p]*NBFP_STRIDE);        \
     }                                                                   \
-    GMX_MM_SHUFFLE_4_PS_FIL01_TO_2_PS(clj_SSE[0],clj_SSE[1],clj_SSE[2],clj_SSE[3],c6t_SSE[0],c12t_SSE[0]); \
-    GMX_MM_SHUFFLE_4_PS_FIL01_TO_2_PS(clj_SSE[4],clj_SSE[5],clj_SSE[6],clj_SSE[7],c6t_SSE[1],c12t_SSE[1]); \
+    for(p=0; p<UNROLLJ; p++)                                            \
+    {                                                                   \
+        /* Here we load 4 aligned floats, but we need just 2 */         \
+        clj_SSE1[p] = _mm_load_ps(nbfp1+type[aj+p]*NBFP_STRIDE);        \
+    }                                                                   \
+    GMX_MM_SHUFFLE_4_PS_FIL01_TO_2_PS(clj_SSE0[0],clj_SSE0[1],clj_SSE0[2],clj_SSE0[3],c6t_SSE[0],c12t_SSE[0]); \
+    GMX_MM_SHUFFLE_4_PS_FIL01_TO_2_PS(clj_SSE1[0],clj_SSE1[1],clj_SSE1[2],clj_SSE1[3],c6t_SSE[1],c12t_SSE[1]); \
                                                                         \
     GMX_2_MM_TO_M256(c6t_SSE[0],c6t_SSE[1],c6_SSE);                     \
     GMX_2_MM_TO_M256(c12t_SSE[0],c12t_SSE[1],c12_SSE);                  \