gmx_bool bNEMD;
int ngtc; /* The number of T-coupling groups */
t_grp_tcstat *tcstat; /* T-coupling data */
- tensor **ekin_work_alloc; /* Allocated locations of ekin_work */
+ tensor **ekin_work_alloc; /* Allocated locations for *_work members */
tensor **ekin_work; /* Work arrays for tcstat per thread */
+ real **dekindl_work; /* Work location for dekindl per thread */
int ngacc; /* The number of acceleration groups */
t_grp_acc *grpstat; /* Acceleration data */
tensor ekin; /* overall kinetic energy */
/* could this be done more readably/compactly? */
switch (i)
{
+ case (efptMASS):
+ index = F_DKDL;
+ break;
case (efptCOUL):
index = F_DVDL_COUL;
break;
case (efptRESTRAINT):
index = F_DVDL_RESTRAINT;
break;
- case (efptMASS):
- index = F_DKDL;
- break;
default:
index = F_DVDL;
break;
so we don't need to add anything to the
enerd->enerpart_lambda[0] */
- /* we don't need to worry about dvdl contributions to the current lambda, because
- it's automatically zero */
-
- /* first kinetic energy term */
- dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
-
- enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
+ /* we don't need to worry about dvdl_lin contributions to dE at
+ current lambda, because the contributions to the current
+ lambda are automatically zeroed */
for (j = 0; j < efptNR; j++)
{
- if (j == efptMASS)
- {
- continue;
- } /* no other mass term to worry about */
-
+ /* Note that this loop is over all dhdl components, not just the separated ones */
dlam = (fepvals->all_lambda[j][i]-lambda[j]);
enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
if (debug)
gmx_bool bEner, bPres, bTemp, bVV;
gmx_bool bRerunMD, bStopCM, bGStat, bIterate,
bFirstIterate, bReadEkin, bEkinAveVel, bScaleEkin, bConstrain;
- real ekin, temp, prescorr, enercorr, dvdlcorr;
+ real ekin, temp, prescorr, enercorr, dvdlcorr, dvdl_ekin;
/* translate CGLO flags to gmx_booleans */
bRerunMD = flags & CGLO_RERUNMD;
bSaveEkinOld: If TRUE (in the case of iteration = bIterate is TRUE), we don't reset the ekinscale_nhc.
If FALSE, we go ahead and erase over it.
*/
- enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &(enerd->term[F_DKDL]),
+ enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, &dvdl_ekin,
bEkinAveVel, bIterate, bScaleEkin);
+ enerd->dvdl_lin[efptMASS] = (double) dvdl_ekin;
enerd->term[F_EKIN] = trace(ekind->ekin);
}
snew(ekind->ekin_work_alloc, nthread);
snew(ekind->ekin_work, nthread);
+ snew(ekind->dekindl_work, nthread);
#pragma omp parallel for num_threads(nthread) schedule(static)
for (thread = 0; thread < nthread; thread++)
{
- /* Allocate 2 elements extra on both sides,
- * so in single precision we have 2*3*3*4=72 bytes buffer
- * on both sides to avoid cache pollution.
+#define EKIN_WORK_BUFFER_SIZE 2
+ /* Allocate 2 extra elements on both sides, so in single
+ * precision we have
+ * EKIN_WORK_BUFFER_SIZE*DIM*DIM*sizeof(real) = 72/144 bytes
+ * buffer on both sides to avoid cache pollution.
*/
- snew(ekind->ekin_work_alloc[thread], ekind->ngtc+4);
- ekind->ekin_work[thread] = ekind->ekin_work_alloc[thread] + 2;
+ snew(ekind->ekin_work_alloc[thread], ekind->ngtc+2*EKIN_WORK_BUFFER_SIZE);
+ ekind->ekin_work[thread] = ekind->ekin_work_alloc[thread] + EKIN_WORK_BUFFER_SIZE;
+ /* Nasty hack so we can have the per-thread accumulation
+ * variable for dekindl in the same thread-local cache lines
+ * as the per-thread accumulation tensors for ekin[fh],
+ * because they are accumulated in the same loop. */
+ ekind->dekindl_work[thread] = &(ekind->ekin_work[thread][ekind->ngtc][0][0]);
+#undef EKIN_WORK_BUFFER_SIZE
}
ekind->ngacc = opts->ngacc;
}
}
else
-
{
/* Calculate the full step Ekin as the average of the half steps */
for (j = 0; (j < DIM); j++)
}
if (dekindlambda)
{
- *dekindlambda = 0.5*(ekind->dekindl + ekind->dekindl_old);
+ if (bEkinAveVel)
+ {
+ *dekindlambda = ekind->dekindl;
+ }
+ else
+ {
+ *dekindlambda = 0.5*(ekind->dekindl + ekind->dekindl_old);
+ }
}
return T;
}
end_t = md->start + ((thread+1)*md->homenr)/nthread;
ekin_sum = ekind->ekin_work[thread];
- dekindl_sum = &ekind->ekin_work[thread][opts->ngtc][0][0];
+ dekindl_sum = ekind->dekindl_work[thread];
for (gt = 0; gt < opts->ngtc; gt++)
{
clear_mat(ekin_sum[gt]);
}
+ *dekindl_sum = 0.0;
ga = 0;
gt = 0;
}
if (md->nMassPerturbed && md->bPerturbed[n])
{
- *dekindl_sum -=
+ *dekindl_sum +=
0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
}
}
}
}
- ekind->dekindl += ekind->ekin_work[thread][opts->ngtc][0][0];
+ ekind->dekindl += *ekind->dekindl_work[thread];
}
inc_nrnb(nrnb, eNR_EKIN, md->homenr);
}
if (md->nPerturbed && md->bPerturbed[n])
{
- dekindl -= 0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
+ dekindl += 0.5*(md->massB[n] - md->massA[n])*iprod(v_corrt, v_corrt);
}
}
ekind->dekindl = dekindl;