Mostly useful as lesson learned.
1) The double precision constant forces the compiler to convert
the single precision variable to double, then do the multiplication
in double and then convert back. Using the single precsion
constant in double reduces the accuracy (the calculation is still done
double but the constant has only single precision).
2) Using a temporary array instead of a temporary scalar causes ICC14 to
generate an extra store.
Change-Id: Ib320ac2ae4ff80ce48277544abff468c483cc83a
int ewitab;
real ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
int ewitab;
real ewrt, eweps, ewtabscale, ewtabhalfspace, sh_ewald;
+ const real onetwelfth = 1.0/12.0;
+ const real onesixth = 1.0/6.0;
+ const real zero = 0.0;
+ const real half = 0.5;
+ const real one = 1.0;
+ const real two = 2.0;
+ const real six = 6.0;
+ const real fourtyeight = 48.0;
+
sh_ewald = fr->ic->sh_ewald;
ewtab = fr->ic->tabq_coul_FDV0;
ewtabscale = fr->ic->tabq_scale;
sh_ewald = fr->ic->sh_ewald;
ewtab = fr->ic->tabq_coul_FDV0;
ewtabscale = fr->ic->tabq_scale;
- ewtabhalfspace = 0.5/ewtabscale;
+ ewtabhalfspace = half/ewtabscale;
tab_ewald_F_lj = fr->ic->tabq_vdw_F;
tab_ewald_V_lj = fr->ic->tabq_vdw_V;
tab_ewald_F_lj = fr->ic->tabq_vdw_F;
tab_ewald_V_lj = fr->ic->tabq_vdw_V;
dvdl_vdw = 0;
/* Lambda factor for state A, 1-lambda*/
dvdl_vdw = 0;
/* Lambda factor for state A, 1-lambda*/
- LFC[STATE_A] = 1.0 - lambda_coul;
- LFV[STATE_A] = 1.0 - lambda_vdw;
+ LFC[STATE_A] = one - lambda_coul;
+ LFV[STATE_A] = one - lambda_vdw;
/* Lambda factor for state B, lambda*/
LFC[STATE_B] = lambda_coul;
/* Lambda factor for state B, lambda*/
LFC[STATE_B] = lambda_coul;
{
rpm2 = rsq*rsq; /* r4 */
rp = rpm2*rsq; /* r6 */
}
{
rpm2 = rsq*rsq; /* r4 */
rp = rpm2*rsq; /* r6 */
}
- else if (sc_r_power == 48.0)
+ else if (sc_r_power == fourtyeight)
{
rp = rsq*rsq*rsq; /* r6 */
rp = rp*rp; /* r12 */
{
rp = rsq*rsq*rsq; /* r6 */
rp = rp*rp; /* r12 */
if ((c6[i] > 0) && (c12[i] > 0))
{
/* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
if ((c6[i] > 0) && (c12[i] > 0))
{
/* c12 is stored scaled with 12.0 and c6 is scaled with 6.0 - correct for this */
- sigma6[i] = 0.5*c12[i]/c6[i];
+ sigma6[i] = half*c12[i]/c6[i];
sigma2[i] = pow(sigma6[i], 1.0/3.0);
/* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
sigma2[i] = pow(sigma6[i], 1.0/3.0);
/* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
what data to store externally. Can't be fixed without larger scale changes, so not 4.6 */
sigma6[i] = sigma6_def;
sigma2[i] = sigma2_def;
}
sigma6[i] = sigma6_def;
sigma2[i] = sigma2_def;
}
{
sigma_pow[i] = sigma6[i];
sigma_powm2[i] = sigma6[i]/sigma2[i];
}
{
sigma_pow[i] = sigma6[i];
sigma_powm2[i] = sigma6[i]/sigma2[i];
}
- else if (sc_r_power == 48.0)
+ else if (sc_r_power == fourtyeight)
{
sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
{
sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
{
/* this section has to be inside the loop because of the dependence on sigma_pow */
if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
{
/* this section has to be inside the loop because of the dependence on sigma_pow */
- rpinvC = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
- rinvC = pow(rpinvC, 1.0/sc_r_power);
- rC = 1.0/rinvC;
+ rpinvC = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
+ rinvC = pow(rpinvC, one/sc_r_power);
+ rC = one/rinvC;
- rpinvV = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
- rinvV = pow(rpinvV, 1.0/sc_r_power);
- rV = 1.0/rinvV;
+ rpinvV = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
+ rinvV = pow(rpinvV, one/sc_r_power);
+ rV = one/rinvV;
case GMX_NBKERNEL_ELEC_REACTIONFIELD:
/* reaction-field */
Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
case GMX_NBKERNEL_ELEC_REACTIONFIELD:
/* reaction-field */
Vcoul[i] = qq[i]*(rinvC + krf*rC*rC-crf);
- FscalC[i] = qq[i]*(rinvC - 2.0*krf*rC*rC);
+ FscalC[i] = qq[i]*(rinvC - two*krf*rC*rC);
break;
case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
break;
case GMX_NBKERNEL_ELEC_CUBICSPLINETABLE:
Heps2 = eps2C*VFtab[nnn+3];
Fp = F+Geps+Heps2;
VV = Y+epsC*Fp;
Heps2 = eps2C*VFtab[nnn+3];
Fp = F+Geps+Heps2;
VV = Y+epsC*Fp;
- FF = Fp+Geps+2.0*Heps2;
+ FF = Fp+Geps+two*Heps2;
Vcoul[i] = qq[i]*VV;
FscalC[i] = -qq[i]*tabscale*FF*rC;
break;
Vcoul[i] = qq[i]*VV;
FscalC[i] = -qq[i]*tabscale*FF*rC;
break;
break;
case GMX_NBKERNEL_ELEC_NONE:
break;
case GMX_NBKERNEL_ELEC_NONE:
- FscalC[i] = 0.0;
- Vcoul[i] = 0.0;
+ FscalC[i] = zero;
+ Vcoul[i] = zero;
if (fr->coulomb_modifier == eintmodPOTSWITCH)
{
d = rC-fr->rcoulomb_switch;
if (fr->coulomb_modifier == eintmodPOTSWITCH)
{
d = rC-fr->rcoulomb_switch;
- d = (d > 0.0) ? d : 0.0;
+ d = (d > zero) ? d : zero;
- sw = 1.0+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
+ sw = one+d2*d*(elec_swV3+d*(elec_swV4+d*elec_swV5));
dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw;
Vcoul[i] *= sw;
dsw = d2*(elec_swF2+d*(elec_swF3+d*elec_swF4));
FscalC[i] = FscalC[i]*sw - rC*Vcoul[i]*dsw;
Vcoul[i] *= sw;
- FscalC[i] = (rC < rcoulomb) ? FscalC[i] : 0.0;
- Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : 0.0;
+ FscalC[i] = (rC < rcoulomb) ? FscalC[i] : zero;
+ Vcoul[i] = (rC < rcoulomb) ? Vcoul[i] : zero;
{
case GMX_NBKERNEL_VDW_LENNARDJONES:
/* cutoff LJ */
{
case GMX_NBKERNEL_VDW_LENNARDJONES:
/* cutoff LJ */
Vvdw6 = c6[i]*rinv6;
Vvdw12 = c12[i]*rinv6*rinv6;
Vvdw6 = c6[i]*rinv6;
Vvdw12 = c12[i]*rinv6*rinv6;
- Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
- - (Vvdw6 - c6[i]*sh_invrc6)*(1.0/6.0));
+ Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
+ - (Vvdw6 - c6[i]*sh_invrc6)*onesixth);
FscalV[i] = Vvdw12 - Vvdw6;
break;
FscalV[i] = Vvdw12 - Vvdw6;
break;
Heps2 = eps2V*VFtab[nnn+3];
Fp = F+Geps+Heps2;
VV = Y+epsV*Fp;
Heps2 = eps2V*VFtab[nnn+3];
Fp = F+Geps+Heps2;
VV = Y+epsV*Fp;
- FF = Fp+Geps+2.0*Heps2;
+ FF = Fp+Geps+two*Heps2;
Vvdw[i] += c6[i]*VV;
FscalV[i] -= c6[i]*tabscale*FF*rV;
Vvdw[i] += c6[i]*VV;
FscalV[i] -= c6[i]*tabscale*FF*rV;
Heps2 = eps2V*VFtab[nnn+7];
Fp = F+Geps+Heps2;
VV = Y+epsV*Fp;
Heps2 = eps2V*VFtab[nnn+7];
Fp = F+Geps+Heps2;
VV = Y+epsV*Fp;
- FF = Fp+Geps+2.0*Heps2;
+ FF = Fp+Geps+two*Heps2;
Vvdw[i] += c12[i]*VV;
FscalV[i] -= c12[i]*tabscale*FF*rV;
break;
case GMX_NBKERNEL_VDW_LJEWALD:
Vvdw[i] += c12[i]*VV;
FscalV[i] -= c12[i]*tabscale*FF*rV;
break;
case GMX_NBKERNEL_VDW_LJEWALD:
Vvdw6 = c6[i]*rinv6;
Vvdw12 = c12[i]*rinv6*rinv6;
Vvdw6 = c6[i]*rinv6;
Vvdw12 = c12[i]*rinv6*rinv6;
- Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*(1.0/12.0)
- - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*(1.0/6.0));
+ Vvdw[i] = ( (Vvdw12 - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth
+ - (Vvdw6 - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)*onesixth);
FscalV[i] = Vvdw12 - Vvdw6;
}
else
FscalV[i] = Vvdw12 - Vvdw6;
}
else
/* Normal LJ-PME */
ewcljrsq = ewclj2*rV*rV;
exponent = exp(-ewcljrsq);
/* Normal LJ-PME */
ewcljrsq = ewclj2*rV*rV;
exponent = exp(-ewcljrsq);
- poly = exponent*(1.0 + ewcljrsq + ewcljrsq*ewcljrsq*0.5);
- vvdw_disp = (c6[i]-c6grid*(1.0-poly))*rinv6;
+ poly = exponent*(one + ewcljrsq + ewcljrsq*ewcljrsq*half);
+ vvdw_disp = (c6[i]-c6grid*(one-poly))*rinv6;
vvdw_rep = c12[i]*rinv6*rinv6;
vvdw_rep = c12[i]*rinv6*rinv6;
- FscalV[i] = vvdw_rep - vvdw_disp - c6grid*(1.0/6.0)*exponent*ewclj6;
- Vvdw[i] = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)/12.0 - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/6.0;
+ FscalV[i] = vvdw_rep - vvdw_disp - c6grid*onesixth*exponent*ewclj6;
+ Vvdw[i] = (vvdw_rep - c12[i]*sh_invrc6*sh_invrc6)*onetwelfth - (vvdw_disp - c6[i]*sh_invrc6 - c6grid*sh_lj_ewald)/six;
}
break;
case GMX_NBKERNEL_VDW_NONE:
}
break;
case GMX_NBKERNEL_VDW_NONE:
- Vvdw[i] = 0.0;
- FscalV[i] = 0.0;
+ Vvdw[i] = zero;
+ FscalV[i] = zero;
if (fr->vdw_modifier == eintmodPOTSWITCH)
{
d = rV-fr->rvdw_switch;
if (fr->vdw_modifier == eintmodPOTSWITCH)
{
d = rV-fr->rvdw_switch;
- d = (d > 0.0) ? d : 0.0;
+ d = (d > zero) ? d : zero;
- sw = 1.0+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
+ sw = one+d2*d*(vdw_swV3+d*(vdw_swV4+d*vdw_swV5));
dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw;
Vvdw[i] *= sw;
dsw = d2*(vdw_swF2+d*(vdw_swF3+d*vdw_swF4));
FscalV[i] = FscalV[i]*sw - rV*Vvdw[i]*dsw;
Vvdw[i] *= sw;
- FscalV[i] = (rV < rvdw) ? FscalV[i] : 0.0;
- Vvdw[i] = (rV < rvdw) ? Vvdw[i] : 0.0;
+ FscalV[i] = (rV < rvdw) ? FscalV[i] : zero;
+ Vvdw[i] = (rV < rvdw) ? Vvdw[i] : zero;
* As there is no singularity, there is no need for soft-core.
*/
VV = krf*rsq - crf;
* As there is no singularity, there is no need for soft-core.
*/
VV = krf*rsq - crf;
}
for (i = 0; i < NSTATES; i++)
}
for (i = 0; i < NSTATES; i++)
* scheme, and corresponds to a self-interaction that will
* occur twice. Scale it down by 50% to only include it once.
*/
* scheme, and corresponds to a self-interaction that will
* occur twice. Scale it down by 50% to only include it once.
*/
}
for (i = 0; i < NSTATES; i++)
}
for (i = 0; i < NSTATES; i++)
/* TODO: Currently the Ewald LJ table does not contain
* the factor 1/6, we should add this.
*/
/* TODO: Currently the Ewald LJ table does not contain
* the factor 1/6, we should add this.
*/
- FF = f_lr*rinv/6.0;
- VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/6.0;
+ FF = f_lr*rinv/six;
+ VV = (tab_ewald_V_lj[ri] - ewtabhalfspace*frac*(tab_ewald_F_lj[ri] + f_lr))/six;
* scheme, and corresponds to a self-interaction that will
* occur twice. Scale it down by 50% to only include it once.
*/
* scheme, and corresponds to a self-interaction that will
* occur twice. Scale it down by 50% to only include it once.
*/
}
for (i = 0; i < NSTATES; i++)
}
for (i = 0; i < NSTATES; i++)
real velec[2], vvdw[2];
int i, ntab;
real velec[2], vvdw[2];
int i, ntab;
+ const real half = 0.5;
+ const real one = 1.0;
+ const real two = 2.0;
+ const real six = 6.0;
+ const real fourtyeight = 48.0;
+
qq[0] = qqA;
qq[1] = qqB;
c6[0] = c6A;
qq[0] = qqA;
qq[1] = qqB;
c6[0] = c6A;
c12[0] = c12A;
c12[1] = c12B;
c12[0] = c12A;
c12[1] = c12B;
{
rpm2 = r2*r2; /* r4 */
rp = rpm2*r2; /* r6 */
}
{
rpm2 = r2*r2; /* r4 */
rp = rpm2*r2; /* r6 */
}
- else if (sc_r_power == 48.0)
+ else if (sc_r_power == fourtyeight)
{
rp = r2*r2*r2; /* r6 */
rp = rp*rp; /* r12 */
{
rp = r2*r2*r2; /* r6 */
rp = rp*rp; /* r12 */
- rp = pow(r2, 0.5*sc_r_power); /* not currently supported as input, but can handle it */
+ rp = pow(r2, half*sc_r_power); /* not currently supported as input, but can handle it */
/* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
* Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
*/
/* The c6 & c12 coefficients now contain the constants 6.0 and 12.0, respectively.
* Correct for this by multiplying with (1/12.0)/(1/6.0)=6.0/12.0=0.5.
*/
- sigma6[i] = 0.5*c12[i]/c6[i];
- sigma2[i] = pow(0.5*c12[i]/c6[i], 1.0/3.0);
+ sigma6[i] = half*c12[i]/c6[i];
+ sigma2[i] = pow(half*c12[i]/c6[i], 1.0/3.0);
/* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */
if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
/* should be able to get rid of this ^^^ internal pow call eventually. Will require agreement on
what data to store externally. Can't be fixed without larger scale changes, so not 5.0 */
if (sigma6[i] < sigma6_min) /* for disappearing coul and vdw with soft core at the same time */
sigma6[i] = sigma6_def;
sigma2[i] = sigma2_def;
}
sigma6[i] = sigma6_def;
sigma2[i] = sigma2_def;
}
{
sigma_pow[i] = sigma6[i];
sigma_powm2[i] = sigma6[i]/sigma2[i];
}
{
sigma_pow[i] = sigma6[i];
sigma_powm2[i] = sigma6[i]/sigma2[i];
}
- else if (sc_r_power == 48.0)
+ else if (sc_r_power == fourtyeight)
{
sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
{
sigma_pow[i] = sigma6[i]*sigma6[i]; /* sigma^12 */
sigma_pow[i] = sigma_pow[i]*sigma_pow[i]; /* sigma^24 */
if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
{
/* Coulomb */
if ( (qq[i] != 0) || (c6[i] != 0) || (c12[i] != 0) )
{
/* Coulomb */
- rpinv = 1.0/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
- r_coul = pow(rpinv, -1.0/sc_r_power);
+ rpinv = one/(alpha_coul_eff*lfac_coul[i]*sigma_pow[i]+rp);
+ r_coul = pow(rpinv, -one/sc_r_power);
/* Electrostatics table lookup data */
rtab = r_coul*tabscale;
/* Electrostatics table lookup data */
rtab = r_coul*tabscale;
Heps2 = eps2*vftab[ntab+3];
Fp = F+Geps+Heps2;
VV = Y+eps*Fp;
Heps2 = eps2*vftab[ntab+3];
Fp = F+Geps+Heps2;
VV = Y+eps*Fp;
- FF = Fp+Geps+2.0*Heps2;
+ FF = Fp+Geps+two*Heps2;
velec[i] = qq[i]*VV;
fscal_elec[i] = -qq[i]*FF*r_coul*rpinv*tabscale;
/* Vdw */
velec[i] = qq[i]*VV;
fscal_elec[i] = -qq[i]*FF*r_coul*rpinv*tabscale;
/* Vdw */
- rpinv = 1.0/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
- r_vdw = pow(rpinv, -1.0/sc_r_power);
+ rpinv = one/(alpha_vdw_eff*lfac_vdw[i]*sigma_pow[i]+rp);
+ r_vdw = pow(rpinv, -one/sc_r_power);
/* Vdw table lookup data */
rtab = r_vdw*tabscale;
ntab = rtab;
/* Vdw table lookup data */
rtab = r_vdw*tabscale;
ntab = rtab;
Heps2 = eps2*vftab[ntab+7];
Fp = F+Geps+Heps2;
VV = Y+eps*Fp;
Heps2 = eps2*vftab[ntab+7];
Fp = F+Geps+Heps2;
VV = Y+eps*Fp;
- FF = Fp+Geps+2.0*Heps2;
+ FF = Fp+Geps+two*Heps2;
vvdw[i] = c6[i]*VV;
fscal_vdw[i] = -c6[i]*FF;
vvdw[i] = c6[i]*VV;
fscal_vdw[i] = -c6[i]*FF;
Heps2 = eps2*vftab[ntab+11];
Fp = F+Geps+Heps2;
VV = Y+eps*Fp;
Heps2 = eps2*vftab[ntab+11];
Fp = F+Geps+Heps2;
VV = Y+eps*Fp;
- FF = Fp+Geps+2.0*Heps2;
+ FF = Fp+Geps+two*Heps2;
vvdw[i] += c12[i]*VV;
fscal_vdw[i] -= c12[i]*FF;
fscal_vdw[i] *= r_vdw*rpinv*tabscale;
vvdw[i] += c12[i]*VV;
fscal_vdw[i] -= c12[i]*FF;
fscal_vdw[i] *= r_vdw*rpinv*tabscale;
real comm_box_frac(ivec dd_nc, real cutoff, gmx_ddbox_t *ddbox)
{
int i, j, k;
real comm_box_frac(ivec dd_nc, real cutoff, gmx_ddbox_t *ddbox)
{
int i, j, k;
real comm_vol;
for (i = 0; i < DIM; i++)
{
real comm_vol;
for (i = 0; i < DIM; i++)
{
- bt[i] = ddbox->box_size[i]*ddbox->skew_fac[i];
- nw[i] = dd_nc[i]*cutoff/bt[i];
+ real bt = ddbox->box_size[i]*ddbox->skew_fac[i];
+ nw[i] = dd_nc[i]*cutoff/bt;