using namespace gmx;
/* Unpack pointers for output */
- real *f = out->f.data();
- real *fshift = out->fshift.data();
+ real* f = out->f.data();
+ real* fshift = out->fshift.data();
#ifdef CALC_ENERGIES
-#ifdef ENERGY_GROUPS
- real *Vvdw = out->VSvdw.data();
- real *Vc = out->VSc.data();
-#else
- real *Vvdw = out->Vvdw.data();
- real *Vc = out->Vc.data();
-#endif
+# ifdef ENERGY_GROUPS
+ real* Vvdw = out->VSvdw.data();
+ real* Vc = out->VSc.data();
+# else
+ real* Vvdw = out->Vvdw.data();
+ real* Vc = out->Vc.data();
+# endif
#endif
- const nbnxn_cj_t *l_cj;
- int ci, ci_sh;
- int ish, ish3;
- gmx_bool do_LJ, half_LJ, do_coul;
- int cjind0, cjind1, cjind;
+ const nbnxn_cj_t* l_cj;
+ int ci, ci_sh;
+ int ish, ish3;
+ gmx_bool do_LJ, half_LJ, do_coul;
+ int cjind0, cjind1, cjind;
#ifdef ENERGY_GROUPS
- int Vstride_i;
- int egps_ishift, egps_imask;
- int egps_jshift, egps_jmask, egps_jstride;
- int egps_i;
- real *vvdwtp[UNROLLI];
- real *vctp[UNROLLI];
-#endif
-
- SimdReal shX_S;
- SimdReal shY_S;
- SimdReal shZ_S;
- SimdReal ix_S0, iy_S0, iz_S0;
- SimdReal ix_S2, iy_S2, iz_S2;
- SimdReal fix_S0, fiy_S0, fiz_S0;
- SimdReal fix_S2, fiy_S2, fiz_S2;
-
- SimdReal diagonal_jmi_S;
+ int Vstride_i;
+ int egps_ishift, egps_imask;
+ int egps_jshift, egps_jmask, egps_jstride;
+ int egps_i;
+ real* vvdwtp[UNROLLI];
+ real* vctp[UNROLLI];
+#endif
+
+ SimdReal shX_S;
+ SimdReal shY_S;
+ SimdReal shZ_S;
+ SimdReal ix_S0, iy_S0, iz_S0;
+ SimdReal ix_S2, iy_S2, iz_S2;
+ SimdReal fix_S0, fiy_S0, fiz_S0;
+ SimdReal fix_S2, fiy_S2, fiz_S2;
+
+ SimdReal diagonal_jmi_S;
#if UNROLLI == UNROLLJ
- SimdBool diagonal_mask_S0, diagonal_mask_S2;
+ SimdBool diagonal_mask_S0, diagonal_mask_S2;
#else
- SimdBool diagonal_mask0_S0, diagonal_mask0_S2;
- SimdBool diagonal_mask1_S0, diagonal_mask1_S2;
+ SimdBool diagonal_mask0_S0, diagonal_mask0_S2;
+ SimdBool diagonal_mask1_S0, diagonal_mask1_S2;
#endif
- SimdBitMask filter_S0, filter_S2;
+ SimdBitMask filter_S0, filter_S2;
- SimdReal zero_S(0.0);
+ SimdReal zero_S(0.0);
- SimdReal one_S(1.0);
- SimdReal iq_S0 = setZero();
- SimdReal iq_S2 = setZero();
+ SimdReal one_S(1.0);
+ SimdReal iq_S0 = setZero();
+ SimdReal iq_S2 = setZero();
#ifdef CALC_COUL_RF
- SimdReal mrc_3_S;
-#ifdef CALC_ENERGIES
- SimdReal hrc_3_S, moh_rc_S;
-#endif
+ SimdReal mrc_3_S;
+# ifdef CALC_ENERGIES
+ SimdReal hrc_3_S, moh_rc_S;
+# endif
#endif
#ifdef CALC_COUL_TAB
/* Coulomb table variables */
- SimdReal invtsp_S;
- const real *tab_coul_F;
-#if defined CALC_ENERGIES && !defined TAB_FDV0
- const real *tab_coul_V;
-#endif
+ SimdReal invtsp_S;
+ const real* tab_coul_F;
+# if defined CALC_ENERGIES && !defined TAB_FDV0
+ const real* tab_coul_V;
+# endif
-#ifdef CALC_ENERGIES
- SimdReal mhalfsp_S;
-#endif
+# ifdef CALC_ENERGIES
+ SimdReal mhalfsp_S;
+# endif
#endif
#ifdef CALC_COUL_EWALD
#endif
#if defined CALC_ENERGIES && (defined CALC_COUL_EWALD || defined CALC_COUL_TAB)
- SimdReal sh_ewald_S;
+ SimdReal sh_ewald_S;
#endif
#if defined LJ_CUT && defined CALC_ENERGIES
- SimdReal p6_cpot_S, p12_cpot_S;
+ SimdReal p6_cpot_S, p12_cpot_S;
#endif
#ifdef LJ_POT_SWITCH
- SimdReal rswitch_S;
- SimdReal swV3_S, swV4_S, swV5_S;
- SimdReal swF2_S, swF3_S, swF4_S;
+ SimdReal rswitch_S;
+ SimdReal swV3_S, swV4_S, swV5_S;
+ SimdReal swF2_S, swF3_S, swF4_S;
#endif
#ifdef LJ_FORCE_SWITCH
- SimdReal rswitch_S;
- SimdReal p6_fc2_S, p6_fc3_S;
- SimdReal p12_fc2_S, p12_fc3_S;
-#ifdef CALC_ENERGIES
- SimdReal p6_vc3_S, p6_vc4_S;
- SimdReal p12_vc3_S, p12_vc4_S;
- SimdReal p6_6cpot_S, p12_12cpot_S;
-#endif
+ SimdReal rswitch_S;
+ SimdReal p6_fc2_S, p6_fc3_S;
+ SimdReal p12_fc2_S, p12_fc3_S;
+# ifdef CALC_ENERGIES
+ SimdReal p6_vc3_S, p6_vc4_S;
+ SimdReal p12_vc3_S, p12_vc4_S;
+ SimdReal p6_6cpot_S, p12_12cpot_S;
+# endif
#endif
#ifdef LJ_EWALD_GEOM
- real lj_ewaldcoeff2, lj_ewaldcoeff6_6;
- SimdReal mone_S, half_S, lje_c2_S, lje_c6_6_S;
+ real lj_ewaldcoeff2, lj_ewaldcoeff6_6;
+ SimdReal mone_S, half_S, lje_c2_S, lje_c6_6_S;
#endif
#ifdef LJ_COMB_LB
- SimdReal hsig_i_S0, seps_i_S0;
- SimdReal hsig_i_S2, seps_i_S2;
+ SimdReal hsig_i_S0, seps_i_S0;
+ SimdReal hsig_i_S2, seps_i_S2;
#else
-#ifdef FIX_LJ_C
- alignas(GMX_SIMD_ALIGNMENT) real pvdw_c6[2*UNROLLI*UNROLLJ];
- real *pvdw_c12 = pvdw_c6 + UNROLLI*UNROLLJ;
-#endif
+# ifdef FIX_LJ_C
+ alignas(GMX_SIMD_ALIGNMENT) real pvdw_c6[2 * UNROLLI * UNROLLJ];
+ real* pvdw_c12 = pvdw_c6 + UNROLLI * UNROLLJ;
+# endif
#endif /* LJ_COMB_LB */
- SimdReal minRsq_S;
- SimdReal rc2_S;
+ SimdReal minRsq_S;
+ SimdReal rc2_S;
#ifdef VDW_CUTOFF_CHECK
- SimdReal rcvdw2_S;
+ SimdReal rcvdw2_S;
#endif
int ninner;
int npair = 0;
#endif
- const nbnxn_atomdata_t::Params &nbatParams = nbat->params();
+ const nbnxn_atomdata_t::Params& nbatParams = nbat->params();
#if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
- const real * gmx_restrict ljc = nbatParams.lj_comb.data();
+ const real* gmx_restrict ljc = nbatParams.lj_comb.data();
#endif
#if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
/* No combination rule used */
- const real * gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
- const int * gmx_restrict type = nbatParams.type.data();
+ const real* gmx_restrict nbfp_ptr = nbatParams.nbfp_aligned.data();
+ const int* gmx_restrict type = nbatParams.type.data();
#endif
/* Load j-i for the first i */
- diagonal_jmi_S = load<SimdReal>(nbat->simdMasks.diagonal_2xnn_j_minus_i.data());
+ diagonal_jmi_S = load<SimdReal>(nbat->simdMasks.diagonal_2xnn_j_minus_i.data());
/* Generate all the diagonal masks as comparison results */
#if UNROLLI == UNROLLJ
- diagonal_mask_S0 = (zero_S < diagonal_jmi_S);
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_mask_S2 = (zero_S < diagonal_jmi_S);
+ diagonal_mask_S0 = (zero_S < diagonal_jmi_S);
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_mask_S2 = (zero_S < diagonal_jmi_S);
#else
-#if 2*UNROLLI == UNROLLJ
- diagonal_mask0_S0 = (zero_S < diagonal_jmi_S);
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_mask0_S2 = (zero_S < diagonal_jmi_S);
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_jmi_S = diagonal_jmi_S - one_S;
- diagonal_mask1_S2 = (zero_S < diagonal_jmi_S);
-#endif
+# if 2 * UNROLLI == UNROLLJ
+ diagonal_mask0_S0 = (zero_S < diagonal_jmi_S);
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_mask0_S2 = (zero_S < diagonal_jmi_S);
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_mask1_S0 = (zero_S < diagonal_jmi_S);
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_jmi_S = diagonal_jmi_S - one_S;
+ diagonal_mask1_S2 = (zero_S < diagonal_jmi_S);
+# endif
#endif
/* Load masks for topology exclusion masking. filter_stride is
static const, so the conditional will be optimized away. */
#if GMX_DOUBLE && !GMX_SIMD_HAVE_INT32_LOGICAL
- const std::uint64_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
+ const std::uint64_t* gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter64.data();
#else
- const std::uint32_t * gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
+ const std::uint32_t* gmx_restrict exclusion_filter = nbat->simdMasks.exclusion_filter.data();
#endif
/* Here we cast the exclusion filters from unsigned * to int * or real *.
* matter, as long as both filter and mask data are treated the same way.
*/
#if GMX_SIMD_HAVE_INT32_LOGICAL
- filter_S0 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 0*UNROLLJ));
- filter_S2 = load<SimdBitMask>(reinterpret_cast<const int *>(exclusion_filter + 2*UNROLLJ));
+ filter_S0 = load<SimdBitMask>(reinterpret_cast<const int*>(exclusion_filter + 0 * UNROLLJ));
+ filter_S2 = load<SimdBitMask>(reinterpret_cast<const int*>(exclusion_filter + 2 * UNROLLJ));
#else
- filter_S0 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 0*UNROLLJ));
- filter_S2 = load<SimdBitMask>(reinterpret_cast<const real *>(exclusion_filter + 2*UNROLLJ));
+ filter_S0 = load<SimdBitMask>(reinterpret_cast<const real*>(exclusion_filter + 0 * UNROLLJ));
+ filter_S2 = load<SimdBitMask>(reinterpret_cast<const real*>(exclusion_filter + 2 * UNROLLJ));
#endif
#ifdef CALC_COUL_RF
/* Reaction-field constants */
- mrc_3_S = SimdReal(-2*ic->k_rf);
-#ifdef CALC_ENERGIES
+ mrc_3_S = SimdReal(-2 * ic->k_rf);
+# ifdef CALC_ENERGIES
hrc_3_S = SimdReal(ic->k_rf);
moh_rc_S = SimdReal(-ic->c_rf);
-#endif
+# endif
#endif
#ifdef CALC_COUL_TAB
- invtsp_S = SimdReal(ic->coulombEwaldTables->scale);
-#ifdef CALC_ENERGIES
- mhalfsp_S = SimdReal(-0.5_real/ic->coulombEwaldTables->scale);
-#endif
+ invtsp_S = SimdReal(ic->coulombEwaldTables->scale);
+# ifdef CALC_ENERGIES
+ mhalfsp_S = SimdReal(-0.5_real / ic->coulombEwaldTables->scale);
+# endif
-#ifdef TAB_FDV0
+# ifdef TAB_FDV0
tab_coul_F = ic->coulombEwaldTables->tableFDV0.data();
-#else
+# else
tab_coul_F = ic->coulombEwaldTables->tableF.data();
-#ifdef CALC_ENERGIES
+# ifdef CALC_ENERGIES
tab_coul_V = ic->coulombEwaldTables->tableV.data();
-#endif
-#endif
+# endif
+# endif
#endif /* CALC_COUL_TAB */
#ifdef CALC_COUL_EWALD
- beta2_S = SimdReal(ic->ewaldcoeff_q*ic->ewaldcoeff_q);
+ beta2_S = SimdReal(ic->ewaldcoeff_q * ic->ewaldcoeff_q);
beta_S = SimdReal(ic->ewaldcoeff_q);
#endif
/* LJ function constants */
#if defined CALC_ENERGIES || defined LJ_POT_SWITCH
- SimdReal sixth_S = SimdReal(1.0/6.0);
- SimdReal twelveth_S = SimdReal(1.0/12.0);
+ SimdReal sixth_S = SimdReal(1.0 / 6.0);
+ SimdReal twelveth_S = SimdReal(1.0 / 12.0);
#endif
#if defined LJ_CUT && defined CALC_ENERGIES
/* We shift the potential by cpot, which can be zero */
- p6_cpot_S = SimdReal(ic->dispersion_shift.cpot);
- p12_cpot_S = SimdReal(ic->repulsion_shift.cpot);
+ p6_cpot_S = SimdReal(ic->dispersion_shift.cpot);
+ p12_cpot_S = SimdReal(ic->repulsion_shift.cpot);
#endif
#ifdef LJ_POT_SWITCH
rswitch_S = SimdReal(ic->rvdw_switch);
swV3_S = SimdReal(ic->vdw_switch.c3);
swV4_S = SimdReal(ic->vdw_switch.c4);
swV5_S = SimdReal(ic->vdw_switch.c5);
- swF2_S = SimdReal(3*ic->vdw_switch.c3);
- swF3_S = SimdReal(4*ic->vdw_switch.c4);
- swF4_S = SimdReal(5*ic->vdw_switch.c5);
+ swF2_S = SimdReal(3 * ic->vdw_switch.c3);
+ swF3_S = SimdReal(4 * ic->vdw_switch.c4);
+ swF4_S = SimdReal(5 * ic->vdw_switch.c5);
#endif
#ifdef LJ_FORCE_SWITCH
rswitch_S = SimdReal(ic->rvdw_switch);
p6_fc3_S = SimdReal(ic->dispersion_shift.c3);
p12_fc2_S = SimdReal(ic->repulsion_shift.c2);
p12_fc3_S = SimdReal(ic->repulsion_shift.c3);
-#ifdef CALC_ENERGIES
+# ifdef CALC_ENERGIES
{
- SimdReal mthird_S = SimdReal(-1.0/3.0);
- SimdReal mfourth_S = SimdReal(-1.0/4.0);
+ SimdReal mthird_S = SimdReal(-1.0 / 3.0);
+ SimdReal mfourth_S = SimdReal(-1.0 / 4.0);
p6_vc3_S = mthird_S * p6_fc2_S;
p6_vc4_S = mfourth_S * p6_fc3_S;
- p6_6cpot_S = SimdReal(ic->dispersion_shift.cpot/6);
+ p6_6cpot_S = SimdReal(ic->dispersion_shift.cpot / 6);
p12_vc3_S = mthird_S * p12_fc2_S;
p12_vc4_S = mfourth_S * p12_fc3_S;
- p12_12cpot_S = SimdReal(ic->repulsion_shift.cpot/12);
+ p12_12cpot_S = SimdReal(ic->repulsion_shift.cpot / 12);
}
-#endif
+# endif
#endif
#ifdef LJ_EWALD_GEOM
mone_S = SimdReal(-1.0);
half_S = SimdReal(0.5);
- lj_ewaldcoeff2 = ic->ewaldcoeff_lj*ic->ewaldcoeff_lj;
- lj_ewaldcoeff6_6 = lj_ewaldcoeff2*lj_ewaldcoeff2*lj_ewaldcoeff2/6;
+ lj_ewaldcoeff2 = ic->ewaldcoeff_lj * ic->ewaldcoeff_lj;
+ lj_ewaldcoeff6_6 = lj_ewaldcoeff2 * lj_ewaldcoeff2 * lj_ewaldcoeff2 / 6;
lje_c2_S = SimdReal(lj_ewaldcoeff2);
lje_c6_6_S = SimdReal(lj_ewaldcoeff6_6);
-#ifdef CALC_ENERGIES
+# ifdef CALC_ENERGIES
/* Determine the grid potential at the cut-off */
SimdReal lje_vc_S = SimdReal(ic->sh_lj_ewald);
-#endif
+# endif
#endif
/* The kernel either supports rcoulomb = rvdw or rcoulomb >= rvdw */
- rc2_S = SimdReal(ic->rcoulomb*ic->rcoulomb);
+ rc2_S = SimdReal(ic->rcoulomb * ic->rcoulomb);
#ifdef VDW_CUTOFF_CHECK
- rcvdw2_S = SimdReal(ic->rvdw*ic->rvdw);
+ rcvdw2_S = SimdReal(ic->rvdw * ic->rvdw);
#endif
- minRsq_S = SimdReal(NBNXN_MIN_RSQ);
+ minRsq_S = SimdReal(NBNXN_MIN_RSQ);
- const real * gmx_restrict q = nbatParams.q.data();
- const real facel = ic->epsfac;
- const real * gmx_restrict shiftvec = shift_vec[0];
- const real * gmx_restrict x = nbat->x().data();
+ const real* gmx_restrict q = nbatParams.q.data();
+ const real facel = ic->epsfac;
+ const real* gmx_restrict shiftvec = shift_vec[0];
+ const real* gmx_restrict x = nbat->x().data();
#ifdef FIX_LJ_C
for (jp = 0; jp < UNROLLJ; jp++)
{
- pvdw_c6 [0*UNROLLJ+jp] = nbat->nbfp[0*2];
- pvdw_c6 [1*UNROLLJ+jp] = nbat->nbfp[0*2];
- pvdw_c6 [2*UNROLLJ+jp] = nbat->nbfp[0*2];
- pvdw_c6 [3*UNROLLJ+jp] = nbat->nbfp[0*2];
-
- pvdw_c12[0*UNROLLJ+jp] = nbat->nbfp[0*2+1];
- pvdw_c12[1*UNROLLJ+jp] = nbat->nbfp[0*2+1];
- pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
- pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
+ pvdw_c6[0 * UNROLLJ + jp] = nbat->nbfp[0 * 2];
+ pvdw_c6[1 * UNROLLJ + jp] = nbat->nbfp[0 * 2];
+ pvdw_c6[2 * UNROLLJ + jp] = nbat->nbfp[0 * 2];
+ pvdw_c6[3 * UNROLLJ + jp] = nbat->nbfp[0 * 2];
+
+ pvdw_c12[0 * UNROLLJ + jp] = nbat->nbfp[0 * 2 + 1];
+ pvdw_c12[1 * UNROLLJ + jp] = nbat->nbfp[0 * 2 + 1];
+ pvdw_c12[2 * UNROLLJ + jp] = nbat->nbfp[0 * 2 + 1];
+ pvdw_c12[3 * UNROLLJ + jp] = nbat->nbfp[0 * 2 + 1];
}
- SimdReal c6_S0 = load<SimdReal>(pvdw_c6 +0*UNROLLJ);
- SimdReal c6_S1 = load<SimdReal>(pvdw_c6 +1*UNROLLJ);
- SimdReal c6_S2 = load<SimdReal>(pvdw_c6 +2*UNROLLJ);
- SimdReal c6_S3 = load<SimdReal>(pvdw_c6 +3*UNROLLJ);
-
- SimdReal c12_S0 = load<SimdReal>(pvdw_c12+0*UNROLLJ);
- SimdReal c12_S1 = load<SimdReal>(pvdw_c12+1*UNROLLJ);
- SimdReal c12_S2 = load<SimdReal>(pvdw_c12+2*UNROLLJ);
- SimdReal c12_S3 = load<SimdReal>(pvdw_c12+3*UNROLLJ);
+ SimdReal c6_S0 = load<SimdReal>(pvdw_c6 + 0 * UNROLLJ);
+ SimdReal c6_S1 = load<SimdReal>(pvdw_c6 + 1 * UNROLLJ);
+ SimdReal c6_S2 = load<SimdReal>(pvdw_c6 + 2 * UNROLLJ);
+ SimdReal c6_S3 = load<SimdReal>(pvdw_c6 + 3 * UNROLLJ);
+
+ SimdReal c12_S0 = load<SimdReal>(pvdw_c12 + 0 * UNROLLJ);
+ SimdReal c12_S1 = load<SimdReal>(pvdw_c12 + 1 * UNROLLJ);
+ SimdReal c12_S2 = load<SimdReal>(pvdw_c12 + 2 * UNROLLJ);
+ SimdReal c12_S3 = load<SimdReal>(pvdw_c12 + 3 * UNROLLJ);
#endif /* FIX_LJ_C */
#ifdef ENERGY_GROUPS
egps_ishift = nbatParams.neg_2log;
- egps_imask = (1<<egps_ishift) - 1;
- egps_jshift = 2*nbatParams.neg_2log;
- egps_jmask = (1<<egps_jshift) - 1;
- egps_jstride = (UNROLLJ>>1)*UNROLLJ;
+ egps_imask = (1 << egps_ishift) - 1;
+ egps_jshift = 2 * nbatParams.neg_2log;
+ egps_jmask = (1 << egps_jshift) - 1;
+ egps_jstride = (UNROLLJ >> 1) * UNROLLJ;
/* Major division is over i-particle energy groups, determine the stride */
- Vstride_i = nbatParams.nenergrp*(1 << nbatParams.neg_2log)*egps_jstride;
+ Vstride_i = nbatParams.nenergrp * (1 << nbatParams.neg_2log) * egps_jstride;
#endif
l_cj = nbl->cj.data();
ninner = 0;
- for (const nbnxn_ci_t &ciEntry : nbl->ci)
+ for (const nbnxn_ci_t& ciEntry : nbl->ci)
{
- ish = (ciEntry.shift & NBNXN_CI_SHIFT);
- ish3 = ish*3;
- cjind0 = ciEntry.cj_ind_start;
- cjind1 = ciEntry.cj_ind_end;
- ci = ciEntry.ci;
- ci_sh = (ish == CENTRAL ? ci : -1);
+ ish = (ciEntry.shift & NBNXN_CI_SHIFT);
+ ish3 = ish * 3;
+ cjind0 = ciEntry.cj_ind_start;
+ cjind1 = ciEntry.cj_ind_end;
+ ci = ciEntry.ci;
+ ci_sh = (ish == CENTRAL ? ci : -1);
shX_S = SimdReal(shiftvec[ish3]);
- shY_S = SimdReal(shiftvec[ish3+1]);
- shZ_S = SimdReal(shiftvec[ish3+2]);
+ shY_S = SimdReal(shiftvec[ish3 + 1]);
+ shZ_S = SimdReal(shiftvec[ish3 + 2]);
#if UNROLLJ <= 4
- int sci = ci*STRIDE;
- int scix = sci*DIM;
-#if defined LJ_COMB_LB || defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
- int sci2 = sci*2;
-#endif
+ int sci = ci * STRIDE;
+ int scix = sci * DIM;
+# if defined LJ_COMB_LB || defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
+ int sci2 = sci * 2;
+# endif
#else
- int sci = (ci>>1)*STRIDE;
- int scix = sci*DIM + (ci & 1)*(STRIDE>>1);
-#if defined LJ_COMB_LB || defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
- int sci2 = sci*2 + (ci & 1)*(STRIDE>>1);
-#endif
- sci += (ci & 1)*(STRIDE>>1);
+ int sci = (ci >> 1) * STRIDE;
+ int scix = sci * DIM + (ci & 1) * (STRIDE >> 1);
+# if defined LJ_COMB_LB || defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
+ int sci2 = sci * 2 + (ci & 1) * (STRIDE >> 1);
+# endif
+ sci += (ci & 1) * (STRIDE >> 1);
#endif
/* We have 5 LJ/C combinations, but use only three inner loops,
for (ia = 0; ia < UNROLLI; ia++)
{
- egp_ia = (egps_i >> (ia*egps_ishift)) & egps_imask;
- vvdwtp[ia] = Vvdw + egp_ia*Vstride_i;
- vctp[ia] = Vc + egp_ia*Vstride_i;
+ egp_ia = (egps_i >> (ia * egps_ishift)) & egps_imask;
+ vvdwtp[ia] = Vvdw + egp_ia * Vstride_i;
+ vctp[ia] = Vc + egp_ia * Vstride_i;
}
}
#endif
#ifdef CALC_ENERGIES
-#ifdef LJ_EWALD_GEOM
+# ifdef LJ_EWALD_GEOM
gmx_bool do_self = TRUE;
-#else
+# else
gmx_bool do_self = do_coul;
-#endif
-#if UNROLLJ == 4
+# endif
+# if UNROLLJ == 4
if (do_self && l_cj[ciEntry.cj_ind_start].cj == ci_sh)
-#endif
-#if UNROLLJ == 8
- if (do_self && l_cj[ciEntry.cj_ind_start].cj == (ci_sh>>1))
-#endif
- {
- if (do_coul)
+# endif
+# if UNROLLJ == 8
+ if (do_self && l_cj[ciEntry.cj_ind_start].cj == (ci_sh >> 1))
+# endif
{
- real Vc_sub_self;
- int ia;
-
-#ifdef CALC_COUL_RF
- Vc_sub_self = 0.5*ic->c_rf;
-#endif
-#ifdef CALC_COUL_TAB
-#ifdef TAB_FDV0
- Vc_sub_self = 0.5*tab_coul_F[2];
-#else
- Vc_sub_self = 0.5*tab_coul_V[0];
-#endif
-#endif
-#ifdef CALC_COUL_EWALD
- /* beta/sqrt(pi) */
- Vc_sub_self = 0.5*ic->ewaldcoeff_q*M_2_SQRTPI;
-#endif
-
- for (ia = 0; ia < UNROLLI; ia++)
+ if (do_coul)
{
- real qi;
-
- qi = q[sci+ia];
-#ifdef ENERGY_GROUPS
- vctp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
-#else
+ real Vc_sub_self;
+ int ia;
+
+# ifdef CALC_COUL_RF
+ Vc_sub_self = 0.5 * ic->c_rf;
+# endif
+# ifdef CALC_COUL_TAB
+# ifdef TAB_FDV0
+ Vc_sub_self = 0.5 * tab_coul_F[2];
+# else
+ Vc_sub_self = 0.5 * tab_coul_V[0];
+# endif
+# endif
+# ifdef CALC_COUL_EWALD
+ /* beta/sqrt(pi) */
+ Vc_sub_self = 0.5 * ic->ewaldcoeff_q * M_2_SQRTPI;
+# endif
+
+ for (ia = 0; ia < UNROLLI; ia++)
+ {
+ real qi;
+
+ qi = q[sci + ia];
+# ifdef ENERGY_GROUPS
+ vctp[ia][((egps_i >> (ia * egps_ishift)) & egps_imask) * egps_jstride]
+# else
Vc[0]
-#endif
- -= facel*qi*qi*Vc_sub_self;
+# endif
+ -= facel * qi * qi * Vc_sub_self;
+ }
}
- }
-
-#ifdef LJ_EWALD_GEOM
- {
- int ia;
- for (ia = 0; ia < UNROLLI; ia++)
+# ifdef LJ_EWALD_GEOM
{
- real c6_i;
-
- c6_i = nbatParams.nbfp[nbatParams.type[sci+ia]*(nbatParams.numTypes + 1)*2]/6;
-#ifdef ENERGY_GROUPS
- vvdwtp[ia][((egps_i>>(ia*egps_ishift)) & egps_imask)*egps_jstride]
-#else
- Vvdw[0]
-#endif
- += 0.5*c6_i*lj_ewaldcoeff6_6;
+ int ia;
+
+ for (ia = 0; ia < UNROLLI; ia++)
+ {
+ real c6_i;
+
+ c6_i = nbatParams.nbfp[nbatParams.type[sci + ia] * (nbatParams.numTypes + 1) * 2]
+ / 6;
+# ifdef ENERGY_GROUPS
+ vvdwtp[ia][((egps_i >> (ia * egps_ishift)) & egps_imask) * egps_jstride]
+# else
+ Vvdw[0]
+# endif
+ += 0.5 * c6_i * lj_ewaldcoeff6_6;
+ }
}
+# endif /* LJ_EWALD */
}
-#endif /* LJ_EWALD */
- }
#endif
/* Load i atom data */
- int sciy = scix + STRIDE;
- int sciz = sciy + STRIDE;
- ix_S0 = loadU1DualHsimd(x+scix);
- ix_S2 = loadU1DualHsimd(x+scix+2);
- iy_S0 = loadU1DualHsimd(x+sciy);
- iy_S2 = loadU1DualHsimd(x+sciy+2);
- iz_S0 = loadU1DualHsimd(x+sciz);
- iz_S2 = loadU1DualHsimd(x+sciz+2);
- ix_S0 = ix_S0 + shX_S;
- ix_S2 = ix_S2 + shX_S;
- iy_S0 = iy_S0 + shY_S;
- iy_S2 = iy_S2 + shY_S;
- iz_S0 = iz_S0 + shZ_S;
- iz_S2 = iz_S2 + shZ_S;
+ int sciy = scix + STRIDE;
+ int sciz = sciy + STRIDE;
+ ix_S0 = loadU1DualHsimd(x + scix);
+ ix_S2 = loadU1DualHsimd(x + scix + 2);
+ iy_S0 = loadU1DualHsimd(x + sciy);
+ iy_S2 = loadU1DualHsimd(x + sciy + 2);
+ iz_S0 = loadU1DualHsimd(x + sciz);
+ iz_S2 = loadU1DualHsimd(x + sciz + 2);
+ ix_S0 = ix_S0 + shX_S;
+ ix_S2 = ix_S2 + shX_S;
+ iy_S0 = iy_S0 + shY_S;
+ iy_S2 = iy_S2 + shY_S;
+ iz_S0 = iz_S0 + shZ_S;
+ iz_S2 = iz_S2 + shZ_S;
if (do_coul)
{
SimdReal facel_S;
- facel_S = SimdReal(facel);
+ facel_S = SimdReal(facel);
- iq_S0 = loadU1DualHsimd(q+sci);
- iq_S2 = loadU1DualHsimd(q+sci+2);
- iq_S0 = facel_S * iq_S0;
- iq_S2 = facel_S * iq_S2;
+ iq_S0 = loadU1DualHsimd(q + sci);
+ iq_S2 = loadU1DualHsimd(q + sci + 2);
+ iq_S0 = facel_S * iq_S0;
+ iq_S2 = facel_S * iq_S2;
}
#ifdef LJ_COMB_LB
- hsig_i_S0 = loadU1DualHsimd(ljc+sci2);
- hsig_i_S2 = loadU1DualHsimd(ljc+sci2+2);
- seps_i_S0 = loadU1DualHsimd(ljc+sci2+STRIDE);
- seps_i_S2 = loadU1DualHsimd(ljc+sci2+STRIDE+2);
+ hsig_i_S0 = loadU1DualHsimd(ljc + sci2);
+ hsig_i_S2 = loadU1DualHsimd(ljc + sci2 + 2);
+ seps_i_S0 = loadU1DualHsimd(ljc + sci2 + STRIDE);
+ seps_i_S2 = loadU1DualHsimd(ljc + sci2 + STRIDE + 2);
#else
-#ifdef LJ_COMB_GEOM
- SimdReal c6s_S0, c12s_S0;
- SimdReal c6s_S2, c12s_S2;
+# ifdef LJ_COMB_GEOM
+ SimdReal c6s_S0, c12s_S0;
+ SimdReal c6s_S2, c12s_S2;
- c6s_S0 = loadU1DualHsimd(ljc+sci2);
+ c6s_S0 = loadU1DualHsimd(ljc + sci2);
if (!half_LJ)
{
- c6s_S2 = loadU1DualHsimd(ljc+sci2+2);
+ c6s_S2 = loadU1DualHsimd(ljc + sci2 + 2);
}
- c12s_S0 = loadU1DualHsimd(ljc+sci2+STRIDE);
+ c12s_S0 = loadU1DualHsimd(ljc + sci2 + STRIDE);
if (!half_LJ)
{
- c12s_S2 = loadU1DualHsimd(ljc+sci2+STRIDE+2);
+ c12s_S2 = loadU1DualHsimd(ljc + sci2 + STRIDE + 2);
}
-#elif !defined LJ_COMB_LB && !defined FIX_LJ_C
- const int numTypes = nbatParams.numTypes;
- const real *nbfp0 = nbfp_ptr + type[sci ]*numTypes*c_simdBestPairAlignment;
- const real *nbfp1 = nbfp_ptr + type[sci+1]*numTypes*c_simdBestPairAlignment;
- const real *nbfp2 = nullptr, *nbfp3 = nullptr;
+# elif !defined LJ_COMB_LB && !defined FIX_LJ_C
+ const int numTypes = nbatParams.numTypes;
+ const real* nbfp0 = nbfp_ptr + type[sci] * numTypes * c_simdBestPairAlignment;
+ const real* nbfp1 = nbfp_ptr + type[sci + 1] * numTypes * c_simdBestPairAlignment;
+ const real *nbfp2 = nullptr, *nbfp3 = nullptr;
if (!half_LJ)
{
- nbfp2 = nbfp_ptr + type[sci+2]*numTypes*c_simdBestPairAlignment;
- nbfp3 = nbfp_ptr + type[sci+3]*numTypes*c_simdBestPairAlignment;
+ nbfp2 = nbfp_ptr + type[sci + 2] * numTypes * c_simdBestPairAlignment;
+ nbfp3 = nbfp_ptr + type[sci + 3] * numTypes * c_simdBestPairAlignment;
}
-#endif
+# endif
#endif
#ifdef LJ_EWALD_GEOM
/* We need the geometrically combined C6 for the PME grid correction */
SimdReal c6s_S0, c6s_S2;
- c6s_S0 = loadU1DualHsimd(ljc+sci2);
+ c6s_S0 = loadU1DualHsimd(ljc + sci2);
if (!half_LJ)
{
- c6s_S2 = loadU1DualHsimd(ljc+sci2+2);
+ c6s_S2 = loadU1DualHsimd(ljc + sci2 + 2);
}
#endif
#endif
/* Clear i atom forces */
- fix_S0 = setZero();
- fix_S2 = setZero();
- fiy_S0 = setZero();
- fiy_S2 = setZero();
- fiz_S0 = setZero();
- fiz_S2 = setZero();
+ fix_S0 = setZero();
+ fix_S2 = setZero();
+ fiy_S0 = setZero();
+ fiy_S2 = setZero();
+ fiz_S0 = setZero();
+ fiz_S2 = setZero();
cjind = cjind0;
ninner += cjind1 - cjind0;
/* Add accumulated i-forces to the force array */
- real fShiftX = reduceIncr4ReturnSumHsimd(f+scix, fix_S0, fix_S2);
- real fShiftY = reduceIncr4ReturnSumHsimd(f+sciy, fiy_S0, fiy_S2);
- real fShiftZ = reduceIncr4ReturnSumHsimd(f+sciz, fiz_S0, fiz_S2);
+ real fShiftX = reduceIncr4ReturnSumHsimd(f + scix, fix_S0, fix_S2);
+ real fShiftY = reduceIncr4ReturnSumHsimd(f + sciy, fiy_S0, fiy_S2);
+ real fShiftZ = reduceIncr4ReturnSumHsimd(f + sciz, fiz_S0, fiz_S2);
#ifdef CALC_SHIFTFORCES
- fshift[ish3+0] += fShiftX;
- fshift[ish3+1] += fShiftY;
- fshift[ish3+2] += fShiftZ;
+ fshift[ish3 + 0] += fShiftX;
+ fshift[ish3 + 1] += fShiftY;
+ fshift[ish3 + 2] += fShiftZ;
#endif
#ifdef CALC_ENERGIES