/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#if (defined CALC_COULOMB && defined CALC_COUL_TAB) || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
gmx_simd_real_t r_S0;
+#if (defined CALC_COULOMB && defined CALC_COUL_TAB) || !defined HALF_LJ
gmx_simd_real_t r_S2;
#endif
+#endif
#if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
- gmx_simd_real_t rsw_S0, rsw2_S0, rsw2_r_S0;
+ gmx_simd_real_t rsw_S0, rsw2_S0;
#ifndef HALF_LJ
- gmx_simd_real_t rsw_S2, rsw2_S2, rsw2_r_S2;
+ gmx_simd_real_t rsw_S2, rsw2_S2;
#endif
#endif
int aj2;
#endif
-#ifndef FIX_LJ_C
- /* LJ C6 and C12 parameters, used with geometric comb. rule */
- gmx_simd_real_t c6_S0, c12_S0;
-#ifndef HALF_LJ
- gmx_simd_real_t c6_S2, c12_S2;
-#endif
-#endif
-
/* Intermediate variables for LJ calculation */
#ifndef LJ_COMB_LB
gmx_simd_real_t rinvsix_S0;
#ifndef HALF_LJ
gmx_simd_real_t FrLJ6_S2, FrLJ12_S2, frLJ_S2;
#endif
-#if defined CALC_ENERGIES || defined LJ_POT_SWITCH
- gmx_simd_real_t VLJ6_S0, VLJ12_S0, VLJ_S0;
-#ifndef HALF_LJ
- gmx_simd_real_t VLJ6_S2, VLJ12_S2, VLJ_S2;
-#endif
-#endif
#endif /* CALC_LJ */
gmx_mm_hpr fjx_S, fjy_S, fjz_S;
#ifdef CALC_LJ
#if !defined LJ_COMB_GEOM && !defined LJ_COMB_LB && !defined FIX_LJ_C
+ gmx_simd_real_t c6_S0, c12_S0;
load_lj_pair_params2(nbfp0, nbfp1, type, aj, &c6_S0, &c12_S0);
#ifndef HALF_LJ
+ gmx_simd_real_t c6_S2, c12_S2;
load_lj_pair_params2(nbfp2, nbfp3, type, aj, &c6_S2, &c12_S2);
#endif
#endif /* not defined any LJ rule */
#ifdef LJ_COMB_GEOM
gmx_loaddh_pr(&c6s_j_S, ljc+aj2+0);
gmx_loaddh_pr(&c12s_j_S, ljc+aj2+STRIDE);
- c6_S0 = gmx_simd_mul_r(c6s_S0, c6s_j_S );
+ gmx_simd_real_t c6_S0 = gmx_simd_mul_r(c6s_S0, c6s_j_S );
#ifndef HALF_LJ
- c6_S2 = gmx_simd_mul_r(c6s_S2, c6s_j_S );
+ gmx_simd_real_t c6_S2 = gmx_simd_mul_r(c6s_S2, c6s_j_S );
#endif
- c12_S0 = gmx_simd_mul_r(c12s_S0, c12s_j_S);
+ gmx_simd_real_t c12_S0 = gmx_simd_mul_r(c12s_S0, c12s_j_S);
#ifndef HALF_LJ
- c12_S2 = gmx_simd_mul_r(c12s_S2, c12s_j_S);
+ gmx_simd_real_t c12_S2 = gmx_simd_mul_r(c12s_S2, c12s_j_S);
#endif
#endif /* LJ_COMB_GEOM */
r_S0 = gmx_simd_mul_r(rsq_S0, rinv_S0);
rsw_S0 = gmx_simd_max_r(gmx_simd_sub_r(r_S0, rswitch_S), zero_S);
rsw2_S0 = gmx_simd_mul_r(rsw_S0, rsw_S0);
- rsw2_r_S0 = gmx_simd_mul_r(rsw2_S0, r_S0);
#ifndef HALF_LJ
r_S2 = gmx_simd_mul_r(rsq_S2, rinv_S2);
rsw_S2 = gmx_simd_max_r(gmx_simd_sub_r(r_S2, rswitch_S), zero_S);
rsw2_S2 = gmx_simd_mul_r(rsw_S2, rsw_S2);
- rsw2_r_S2 = gmx_simd_mul_r(rsw2_S2, r_S2);
#endif
#endif
#ifdef LJ_FORCE_SWITCH
#define add_fr_switch(fr, rsw, rsw2_r, c2, c3) gmx_simd_fmadd_r(gmx_simd_fmadd_r(c3, rsw, c2), rsw2_r, fr)
-
+ gmx_simd_real_t rsw2_r_S0 = gmx_simd_mul_r(rsw2_S0, r_S0);
FrLJ6_S0 = gmx_simd_mul_r(c6_S0, add_fr_switch(rinvsix_S0, rsw_S0, rsw2_r_S0, p6_fc2_S, p6_fc3_S));
#ifndef HALF_LJ
+ gmx_simd_real_t rsw2_r_S2 = gmx_simd_mul_r(rsw2_S2, r_S2);
FrLJ6_S2 = gmx_simd_mul_r(c6_S2, add_fr_switch(rinvsix_S2, rsw_S2, rsw2_r_S2, p6_fc2_S, p6_fc3_S));
#endif
FrLJ12_S0 = gmx_simd_mul_r(c12_S0, add_fr_switch(gmx_simd_mul_r(rinvsix_S0, rinvsix_S0), rsw_S0, rsw2_r_S0, p12_fc2_S, p12_fc3_S));
#ifndef HALF_LJ
sig6_S2 = gmx_simd_mul_r(sig2_S2, gmx_simd_mul_r(sig2_S2, sig2_S2));
#endif
- c6_S0 = gmx_simd_mul_r(eps_S0, sig6_S0);
+ gmx_simd_real_t c6_S0 = gmx_simd_mul_r(eps_S0, sig6_S0);
#ifndef HALF_LJ
- c6_S2 = gmx_simd_mul_r(eps_S2, sig6_S2);
+ gmx_simd_real_t c6_S2 = gmx_simd_mul_r(eps_S2, sig6_S2);
#endif
- c12_S0 = gmx_simd_mul_r(c6_S0, sig6_S0);
+ gmx_simd_real_t c12_S0 = gmx_simd_mul_r(c6_S0, sig6_S0);
#ifndef HALF_LJ
- c12_S2 = gmx_simd_mul_r(c6_S2, sig6_S2);
+ gmx_simd_real_t c12_S2 = gmx_simd_mul_r(c6_S2, sig6_S2);
#endif
#endif
#endif /* LJ_COMB_LB */
#ifdef LJ_CUT
/* Calculate the LJ energies, with constant potential shift */
- VLJ6_S0 = gmx_simd_mul_r(sixth_S, gmx_simd_fmadd_r(c6_S0, p6_cpot_S, FrLJ6_S0));
+ gmx_simd_real_t VLJ6_S0 = gmx_simd_mul_r(sixth_S, gmx_simd_fmadd_r(c6_S0, p6_cpot_S, FrLJ6_S0));
#ifndef HALF_LJ
- VLJ6_S2 = gmx_simd_mul_r(sixth_S, gmx_simd_fmadd_r(c6_S2, p6_cpot_S, FrLJ6_S2));
+ gmx_simd_real_t VLJ6_S2 = gmx_simd_mul_r(sixth_S, gmx_simd_fmadd_r(c6_S2, p6_cpot_S, FrLJ6_S2));
#endif
- VLJ12_S0 = gmx_simd_mul_r(twelveth_S, gmx_simd_fmadd_r(c12_S0, p12_cpot_S, FrLJ12_S0));
+ gmx_simd_real_t VLJ12_S0 = gmx_simd_mul_r(twelveth_S, gmx_simd_fmadd_r(c12_S0, p12_cpot_S, FrLJ12_S0));
#ifndef HALF_LJ
- VLJ12_S2 = gmx_simd_mul_r(twelveth_S, gmx_simd_fmadd_r(c12_S2, p12_cpot_S, FrLJ12_S2));
+ gmx_simd_real_t VLJ12_S2 = gmx_simd_mul_r(twelveth_S, gmx_simd_fmadd_r(c12_S2, p12_cpot_S, FrLJ12_S2));
#endif
#endif /* LJ_CUT */
#ifdef LJ_FORCE_SWITCH
#define v_fswitch_pr(rsw, rsw2, c0, c3, c4) gmx_simd_fmadd_r(gmx_simd_fmadd_r(c4, rsw, c3), gmx_simd_mul_r(rsw2, rsw), c0)
- VLJ6_S0 = gmx_simd_mul_r(c6_S0, gmx_simd_fmadd_r(sixth_S, rinvsix_S0, v_fswitch_pr(rsw_S0, rsw2_S0, p6_6cpot_S, p6_vc3_S, p6_vc4_S)));
+ gmx_simd_real_t VLJ6_S0 = gmx_simd_mul_r(c6_S0, gmx_simd_fmadd_r(sixth_S, rinvsix_S0, v_fswitch_pr(rsw_S0, rsw2_S0, p6_6cpot_S, p6_vc3_S, p6_vc4_S)));
#ifndef HALF_LJ
- VLJ6_S2 = gmx_simd_mul_r(c6_S2, gmx_simd_fmadd_r(sixth_S, rinvsix_S2, v_fswitch_pr(rsw_S2, rsw2_S2, p6_6cpot_S, p6_vc3_S, p6_vc4_S)));
+ gmx_simd_real_t VLJ6_S2 = gmx_simd_mul_r(c6_S2, gmx_simd_fmadd_r(sixth_S, rinvsix_S2, v_fswitch_pr(rsw_S2, rsw2_S2, p6_6cpot_S, p6_vc3_S, p6_vc4_S)));
#endif
- VLJ12_S0 = gmx_simd_mul_r(c12_S0, gmx_simd_fmadd_r(twelveth_S, gmx_simd_mul_r(rinvsix_S0, rinvsix_S0), v_fswitch_pr(rsw_S0, rsw2_S0, p12_12cpot_S, p12_vc3_S, p12_vc4_S)));
+ gmx_simd_real_t VLJ12_S0 = gmx_simd_mul_r(c12_S0, gmx_simd_fmadd_r(twelveth_S, gmx_simd_mul_r(rinvsix_S0, rinvsix_S0), v_fswitch_pr(rsw_S0, rsw2_S0, p12_12cpot_S, p12_vc3_S, p12_vc4_S)));
#ifndef HALF_LJ
- VLJ12_S2 = gmx_simd_mul_r(c12_S2, gmx_simd_fmadd_r(twelveth_S, gmx_simd_mul_r(rinvsix_S2, rinvsix_S2), v_fswitch_pr(rsw_S2, rsw2_S2, p12_12cpot_S, p12_vc3_S, p12_vc4_S)));
+ gmx_simd_real_t VLJ12_S2 = gmx_simd_mul_r(c12_S2, gmx_simd_fmadd_r(twelveth_S, gmx_simd_mul_r(rinvsix_S2, rinvsix_S2), v_fswitch_pr(rsw_S2, rsw2_S2, p12_12cpot_S, p12_vc3_S, p12_vc4_S)));
#endif
#undef v_fswitch_pr
#endif /* LJ_FORCE_SWITCH */
/* Add up the repulsion and dispersion */
- VLJ_S0 = gmx_simd_sub_r(VLJ12_S0, VLJ6_S0);
+ gmx_simd_real_t VLJ_S0 = gmx_simd_sub_r(VLJ12_S0, VLJ6_S0);
#ifndef HALF_LJ
- VLJ_S2 = gmx_simd_sub_r(VLJ12_S2, VLJ6_S2);
+ gmx_simd_real_t VLJ_S2 = gmx_simd_sub_r(VLJ12_S2, VLJ6_S2);
#endif
#endif /* (LJ_CUT || LJ_FORCE_SWITCH) && CALC_ENERGIES */
#ifdef LJ_POT_SWITCH
/* We always need the potential, since it is needed for the force */
- VLJ_S0 = gmx_simd_fnmadd_r(sixth_S, FrLJ6_S0, gmx_simd_mul_r(twelveth_S, FrLJ12_S0));
+ gmx_simd_real_t VLJ_S0 = gmx_simd_fnmadd_r(sixth_S, FrLJ6_S0, gmx_simd_mul_r(twelveth_S, FrLJ12_S0));
#ifndef HALF_LJ
- VLJ_S2 = gmx_simd_fnmadd_r(sixth_S, FrLJ6_S2, gmx_simd_mul_r(twelveth_S, FrLJ12_S2));
+ gmx_simd_real_t VLJ_S2 = gmx_simd_fnmadd_r(sixth_S, FrLJ6_S2, gmx_simd_mul_r(twelveth_S, FrLJ12_S2));
#endif
{
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
{
const nbnxn_ci_t *nbln;
const nbnxn_cj_t *l_cj;
- const int *type;
const real *q;
const real *shiftvec;
const real *x;
- const real *nbfp0, *nbfp1, *nbfp2 = NULL, *nbfp3 = NULL;
real facel;
- real *nbfp_ptr;
int n, ci, ci_sh;
int ish, ish3;
- gmx_bool do_LJ, half_LJ, do_coul, do_self;
- int sci, scix, sciy, sciz, sci2;
+ gmx_bool do_LJ, half_LJ, do_coul;
int cjind0, cjind1, cjind;
- int ip, jp;
#ifdef ENERGY_GROUPS
int Vstride_i;
#endif
#ifdef LJ_EWALD_GEOM
real lj_ewaldcoeff2, lj_ewaldcoeff6_6;
- gmx_simd_real_t mone_S, half_S, lje_c2_S, lje_c6_6_S, lje_vc_S;
+ gmx_simd_real_t mone_S, half_S, lje_c2_S, lje_c6_6_S;
#endif
#ifdef LJ_COMB_LB
#ifdef FIX_LJ_C
real pvdw_array[2*UNROLLI*UNROLLJ+GMX_SIMD_REAL_WIDTH];
real *pvdw_c6, *pvdw_c12;
- gmx_simd_real_t c6_S0, c12_S0;
- gmx_simd_real_t c6_S2, c12_S2;
#endif
#if defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
const real *ljc;
-
- gmx_simd_real_t c6s_S0, c12s_S0;
- gmx_simd_real_t c6s_S2 = gmx_simd_setzero_r();
- gmx_simd_real_t c12s_S2 = gmx_simd_setzero_r();
#endif
#endif /* LJ_COMB_LB */
- gmx_simd_real_t vctot_S, Vvdwtot_S;
- gmx_simd_real_t sixth_S, twelveth_S;
-
gmx_simd_real_t avoid_sing_S;
gmx_simd_real_t rc2_S;
#ifdef VDW_CUTOFF_CHECK
#if defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined LJ_EWALD_GEOM
ljc = nbat->lj_comb;
#endif
-#if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB)
+#if !(defined LJ_COMB_GEOM || defined LJ_COMB_LB || defined FIX_LJ_C)
/* No combination rule used */
- nbfp_ptr = (4 == nbfp_stride) ? nbat->nbfp_s4 : nbat->nbfp;
+ real *nbfp_ptr = (4 == nbfp_stride) ? nbat->nbfp_s4 : nbat->nbfp;
+ const int *type = nbat->type;
#endif
/* Load j-i for the first i */
/* LJ function constants */
#if defined CALC_ENERGIES || defined LJ_POT_SWITCH
- sixth_S = gmx_simd_set1_r(1.0/6.0);
- twelveth_S = gmx_simd_set1_r(1.0/12.0);
+ gmx_simd_real_t sixth_S = gmx_simd_set1_r(1.0/6.0);
+ gmx_simd_real_t twelveth_S = gmx_simd_set1_r(1.0/12.0);
#endif
#if defined LJ_CUT && defined CALC_ENERGIES
lj_ewaldcoeff6_6 = lj_ewaldcoeff2*lj_ewaldcoeff2*lj_ewaldcoeff2/6;
lje_c2_S = gmx_simd_set1_r(lj_ewaldcoeff2);
lje_c6_6_S = gmx_simd_set1_r(lj_ewaldcoeff6_6);
+#ifdef CALC_ENERGIES
/* Determine the grid potential at the cut-off */
- lje_vc_S = gmx_simd_set1_r(ic->sh_lj_ewald);
+ gmx_simd_real_t lje_vc_S = gmx_simd_set1_r(ic->sh_lj_ewald);
+#endif
#endif
/* The kernel either supports rcoulomb = rvdw or rcoulomb >= rvdw */
avoid_sing_S = gmx_simd_set1_r(NBNXN_AVOID_SING_R2_INC);
q = nbat->q;
- type = nbat->type;
facel = ic->epsfac;
shiftvec = shift_vec[0];
x = nbat->x;
pvdw_c12[2*UNROLLJ+jp] = nbat->nbfp[0*2+1];
pvdw_c12[3*UNROLLJ+jp] = nbat->nbfp[0*2+1];
}
- c6_S0 = gmx_simd_load_r(pvdw_c6 +0*UNROLLJ);
- c6_S1 = gmx_simd_load_r(pvdw_c6 +1*UNROLLJ);
- c6_S2 = gmx_simd_load_r(pvdw_c6 +2*UNROLLJ);
- c6_S3 = gmx_simd_load_r(pvdw_c6 +3*UNROLLJ);
-
- c12_S0 = gmx_simd_load_r(pvdw_c12+0*UNROLLJ);
- c12_S1 = gmx_simd_load_r(pvdw_c12+1*UNROLLJ);
- c12_S2 = gmx_simd_load_r(pvdw_c12+2*UNROLLJ);
- c12_S3 = gmx_simd_load_r(pvdw_c12+3*UNROLLJ);
+ gmx_simd_real_t c6_S0 = gmx_simd_load_r(pvdw_c6 +0*UNROLLJ);
+ gmx_simd_real_t c6_S1 = gmx_simd_load_r(pvdw_c6 +1*UNROLLJ);
+ gmx_simd_real_t c6_S2 = gmx_simd_load_r(pvdw_c6 +2*UNROLLJ);
+ gmx_simd_real_t c6_S3 = gmx_simd_load_r(pvdw_c6 +3*UNROLLJ);
+
+ gmx_simd_real_t c12_S0 = gmx_simd_load_r(pvdw_c12+0*UNROLLJ);
+ gmx_simd_real_t c12_S1 = gmx_simd_load_r(pvdw_c12+1*UNROLLJ);
+ gmx_simd_real_t c12_S2 = gmx_simd_load_r(pvdw_c12+2*UNROLLJ);
+ gmx_simd_real_t c12_S3 = gmx_simd_load_r(pvdw_c12+3*UNROLLJ);
#endif /* FIX_LJ_C */
#ifdef ENERGY_GROUPS
shZ_S = gmx_simd_load1_r(shiftvec+ish3+2);
#if UNROLLJ <= 4
- sci = ci*STRIDE;
- scix = sci*DIM;
- sci2 = sci*2;
+ int sci = ci*STRIDE;
+ int scix = sci*DIM;
+#if defined LJ_COMB_LB || defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
+ int sci2 = sci*2;
+#endif
#else
- sci = (ci>>1)*STRIDE;
- scix = sci*DIM + (ci & 1)*(STRIDE>>1);
- sci2 = sci*2 + (ci & 1)*(STRIDE>>1);
+ int sci = (ci>>1)*STRIDE;
+ int scix = sci*DIM + (ci & 1)*(STRIDE>>1);
+#if defined LJ_COMB_LB || defined LJ_COMB_GEOM || defined LJ_EWALD_GEOM
+ int sci2 = sci*2 + (ci & 1)*(STRIDE>>1);
+#endif
sci += (ci & 1)*(STRIDE>>1);
#endif
do_LJ = (nbln->shift & NBNXN_CI_DO_LJ(0));
do_coul = (nbln->shift & NBNXN_CI_DO_COUL(0));
half_LJ = ((nbln->shift & NBNXN_CI_HALF_LJ(0)) || !do_LJ) && do_coul;
-#ifdef LJ_EWALD_GEOM
- do_self = TRUE;
-#else
- do_self = do_coul;
-#endif
#ifdef ENERGY_GROUPS
egps_i = nbat->energrp[ci];
#endif
#ifdef CALC_ENERGIES
+#ifdef LJ_EWALD_GEOM
+ gmx_bool do_self = TRUE;
+#else
+ gmx_bool do_self = do_coul;
+#endif
#if UNROLLJ == 4
if (do_self && l_cj[nbln->cj_ind_start].cj == ci_sh)
#endif
#endif
/* Load i atom data */
- sciy = scix + STRIDE;
- sciz = sciy + STRIDE;
+ int sciy = scix + STRIDE;
+ int sciz = sciy + STRIDE;
gmx_load1p1_pr(&ix_S0, x+scix);
gmx_load1p1_pr(&ix_S2, x+scix+2);
gmx_load1p1_pr(&iy_S0, x+sciy);
gmx_load1p1_pr(&seps_i_S2, ljc+sci2+STRIDE+2);
#else
#ifdef LJ_COMB_GEOM
+ gmx_simd_real_t c6s_S0, c12s_S0;
+ gmx_simd_real_t c6s_S2, c12s_S2;
+
gmx_load1p1_pr(&c6s_S0, ljc+sci2+0);
if (!half_LJ)
{
{
gmx_load1p1_pr(&c12s_S2, ljc+sci2+STRIDE+2);
}
-#else
- nbfp0 = nbfp_ptr + type[sci ]*nbat->ntype*nbfp_stride;
- nbfp1 = nbfp_ptr + type[sci+1]*nbat->ntype*nbfp_stride;
+#elif !defined LJ_COMB_LB && !defined FIX_LJ_C
+ const real *nbfp0 = nbfp_ptr + type[sci ]*nbat->ntype*nbfp_stride;
+ const real *nbfp1 = nbfp_ptr + type[sci+1]*nbat->ntype*nbfp_stride;
+ const real *nbfp2 = NULL, *nbfp3 = NULL;
if (!half_LJ)
{
nbfp2 = nbfp_ptr + type[sci+2]*nbat->ntype*nbfp_stride;
#endif
#ifdef LJ_EWALD_GEOM
/* We need the geometrically combined C6 for the PME grid correction */
+ gmx_simd_real_t c6s_S0, c6s_S2;
gmx_load1p1_pr(&c6s_S0, ljc+sci2+0);
if (!half_LJ)
{
#endif
/* Zero the potential energy for this list */
- Vvdwtot_S = gmx_simd_setzero_r();
- vctot_S = gmx_simd_setzero_r();
+#ifdef CALC_ENERGIES
+ gmx_simd_real_t Vvdwtot_S = gmx_simd_setzero_r();
+ gmx_simd_real_t vctot_S = gmx_simd_setzero_r();
+#endif
/* Clear i atom forces */
fix_S0 = gmx_simd_setzero_r();