* In the kernel we can subtract 1 to generate the subsequent mask.
*/
const int simd_width = GMX_NBNXN_SIMD_BITWIDTH/(sizeof(real)*8);
- int simd_4xn_diag_size, j;
+ int simd_4xn_diag_size, real_excl, simd_excl_size, j, s;
simd_4xn_diag_size = max(NBNXN_CPU_CLUSTER_I_SIZE, simd_width);
snew_aligned(nbat->simd_4xn_diag, simd_4xn_diag_size, NBNXN_MEM_ALIGN);
/* The next half of the SIMD width is for i + 1 */
nbat->simd_2xnn_diag[simd_width/2+j] = j - 1 - 0.5;
}
+
+ /* We always use 32-bit integer exclusion masks. When we use
+ * double precision, we fit two integers in a double SIMD register.
+ */
+ real_excl = sizeof(real)/sizeof(*nbat->simd_excl_mask);
+ /* Set bits for use with both 4xN and 2x(N+N) kernels */
+ simd_excl_size = NBNXN_CPU_CLUSTER_I_SIZE*simd_width*real_excl;
+ snew_aligned(nbat->simd_excl_mask, simd_excl_size*real_excl, NBNXN_MEM_ALIGN);
+ for (j = 0; j < simd_excl_size; j++)
+ {
+ /* Set the consecutive bits for masking pair exclusions.
+ * For double a single-bit mask would be enough.
+ * But using two bits avoids endianness issues.
+ */
+ for (s = 0; s < real_excl; s++)
+ {
+ /* Set the consecutive bits for masking pair exclusions */
+ nbat->simd_excl_mask[j*real_excl + s] = (1U << j);
+ }
+ }
}
#endif
/* The SIMD width here is actually independent of that in the kernels,
* but we use the same width for simplicity (usually optimal anyhow).
*/
-#if GMX_NBNXN_SIMD_BITWIDTH == 128
-#define GMX_MM128_HERE
-#endif
-#if GMX_NBNXN_SIMD_BITWIDTH == 256
-#define GMX_MM256_HERE
+#ifdef GMX_NBNXN_HALF_WIDTH_SIMD
+#define GMX_USE_HALF_WIDTH_SIMD_HERE
#endif
#include "gmx_simd_macros.h"
gmx_store_pr(dest+i, dest_SSE);
}
}
-
-#undef GMX_MM128_HERE
-#undef GMX_MM256_HERE
#endif
}