implemented plain-C SIMD macros for reference
[alexxy/gromacs.git] / src / mdlib / nbnxn_kernels / nbnxn_kernel_simd_2xnn.c
index 87654f9685590277bb76f6cb32e6c8623562cf1d..67a69b00ad88a9d7dc5cef4b31fc3ed671c8032c 100644 (file)
 
 #ifdef GMX_NBNXN_SIMD_2XNN
 
-#include "nbnxn_kernel_simd_2xnn.h"
+/* Include the full width SIMD macros */
+#include "gmx_simd_macros.h"
+#include "gmx_simd_vec.h"
 
-/* Include all flavors of the SSE or AVX 2x(N+N) kernel loops */
+#include "nbnxn_kernel_simd_2xnn.h"
 
-#if GMX_NBNXN_SIMD_BITWIDTH == 128
-#define GMX_MM128_HERE
-#else
-#if GMX_NBNXN_SIMD_BITWIDTH == 256
-#define GMX_MM256_HERE
-#else
-#error "unsupported GMX_NBNXN_SIMD_BITWIDTH"
-#endif
+#if !(GMX_SIMD_WIDTH_HERE == 8 || GMX_SIMD_WIDTH_HERE == 16)
+#error "unsupported SIMD width"
 #endif
 
+
+/* Include all flavors of the SSE or AVX 2x(N+N) kernel loops */
+
 /* Analytical reaction-field kernels */
 #define CALC_COUL_RF
 
@@ -151,8 +150,8 @@ static void reduce_group_energies(int ng, int ng_2log,
                                   const real *VSvdw, const real *VSc,
                                   real *Vvdw, real *Vc)
 {
-    const int simd_width   = GMX_SIMD_WIDTH_HERE;
-    const int unrollj_half = GMX_SIMD_WIDTH_HERE/4;
+    const int unrollj      = GMX_SIMD_WIDTH_HERE/2;
+    const int unrollj_half = unrollj/2;
     int       ng_p2, i, j, j0, j1, c, s;
 
     ng_p2 = (1<<ng_2log);
@@ -172,14 +171,14 @@ static void reduce_group_energies(int ng, int ng_2log,
         {
             for (j0 = 0; j0 < ng; j0++)
             {
-                c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*simd_width/2;
+                c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
                 for (s = 0; s < unrollj_half; s++)
                 {
                     Vvdw[i*ng+j0] += VSvdw[c+0];
                     Vvdw[i*ng+j1] += VSvdw[c+1];
                     Vc  [i*ng+j0] += VSc  [c+0];
                     Vc  [i*ng+j1] += VSc  [c+1];
-                    c             += simd_width/2 + 2;
+                    c             += unrollj + 2;
                 }
             }
         }