#ifdef GMX_NBNXN_SIMD_2XNN
-#include "nbnxn_kernel_simd_2xnn.h"
+/* Include the full width SIMD macros */
+#include "gmx_simd_macros.h"
+#include "gmx_simd_vec.h"
-/* Include all flavors of the SSE or AVX 2x(N+N) kernel loops */
+#include "nbnxn_kernel_simd_2xnn.h"
-#if GMX_NBNXN_SIMD_BITWIDTH == 128
-#define GMX_MM128_HERE
-#else
-#if GMX_NBNXN_SIMD_BITWIDTH == 256
-#define GMX_MM256_HERE
-#else
-#error "unsupported GMX_NBNXN_SIMD_BITWIDTH"
-#endif
+#if !(GMX_SIMD_WIDTH_HERE == 8 || GMX_SIMD_WIDTH_HERE == 16)
+#error "unsupported SIMD width"
#endif
+
+/* Include all flavors of the SSE or AVX 2x(N+N) kernel loops */
+
/* Analytical reaction-field kernels */
#define CALC_COUL_RF
const real *VSvdw, const real *VSc,
real *Vvdw, real *Vc)
{
- const int simd_width = GMX_SIMD_WIDTH_HERE;
- const int unrollj_half = GMX_SIMD_WIDTH_HERE/4;
+ const int unrollj = GMX_SIMD_WIDTH_HERE/2;
+ const int unrollj_half = unrollj/2;
int ng_p2, i, j, j0, j1, c, s;
ng_p2 = (1<<ng_2log);
{
for (j0 = 0; j0 < ng; j0++)
{
- c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*simd_width/2;
+ c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
for (s = 0; s < unrollj_half; s++)
{
Vvdw[i*ng+j0] += VSvdw[c+0];
Vvdw[i*ng+j1] += VSvdw[c+1];
Vc [i*ng+j0] += VSc [c+0];
Vc [i*ng+j1] += VSc [c+1];
- c += simd_width/2 + 2;
+ c += unrollj + 2;
}
}
}