Merge branch 'release-4-6'

[alexxy/gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / nbnxn_kernel_simd_2xnn.c
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_2xnn.c b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_2xnn.c

index 7baaf5ac5db204c121e7e53b4379e4360cf000f6..fecd353a4ec438ed0306829e3f231509d2f41a93 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_2xnn.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_2xnn.c
@@ -61,6 +61,42 @@
  #error "unsupported SIMD width"
  #endif
  
+#define SUM_SIMD4(x) (x[0]+x[1]+x[2]+x[3])
+
+#define UNROLLI    NBNXN_CPU_CLUSTER_I_SIZE
+#define UNROLLJ    (GMX_SIMD_WIDTH_HERE/2)
+
+/* The stride of all the atom data arrays is equal to half the SIMD width */
+#define STRIDE     (GMX_SIMD_WIDTH_HERE/2)
+
+#if GMX_SIMD_WIDTH_HERE == 8
+#define SUM_SIMD(x) (x[0]+x[1]+x[2]+x[3]+x[4]+x[5]+x[6]+x[7])
+#else
+#if GMX_SIMD_WIDTH_HERE == 16
+/* This is getting ridiculous, SIMD horizontal adds would help,
+ * but this is not performance critical (only used to reduce energies)
+ */
+#define SUM_SIMD(x) (x[0]+x[1]+x[2]+x[3]+x[4]+x[5]+x[6]+x[7]+x[8]+x[9]+x[10]+x[11]+x[12]+x[13]+x[14]+x[15])
+#else
+#error "unsupported kernel configuration"
+#endif
+#endif
+
+
+#include "nbnxn_kernel_simd_utils.h"
+
+static inline void
+gmx_load_simd_2xnn_interactions(int            excl,
+                                gmx_exclfilter filter_S0,
+                                gmx_exclfilter filter_S2,
+                                gmx_mm_pb     *interact_S0,
+                                gmx_mm_pb     *interact_S2)
+{
+    /* Load integer topology exclusion interaction mask */
+    gmx_exclfilter mask_pr_S = gmx_load1_exclfilter(excl);
+    *interact_S0  = gmx_checkbitmask_pb(mask_pr_S, filter_S0);
+    *interact_S2  = gmx_checkbitmask_pb(mask_pr_S, filter_S2);
+}
  
  /* Include all flavors of the SSE or AVX 2x(N+N) kernel loops */