&(state_global->fep_state), lam0,
nrnb, top_global, &upd,
nfile, fnm, &outf, &mdebin,
- force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags);
+ force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags, wcycle);
clear_mat(total_vir);
clear_mat(pres);
do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
ir, state, state_global, top_global, fr,
outf, mdebin, ekind, f, f_global,
- wcycle, &nchkpt,
+ &nchkpt,
bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT),
bSumEkinhOld);
/* Check if IMD step and do IMD communication, if bIMD is TRUE. */
}
dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning);
+ if (bPMETuneRunning &&
+ fr->nbv->bUseGPU && DOMAINDECOMP(cr) &&
+ !(cr->duty & DUTY_PME))
+ {
+ /* Lock DLB=auto to off (does nothing when DLB=yes/no).
+ * With GPUs + separate PME ranks, we don't want DLB.
+ * This could happen when we scan coarse grids and
+ * it would then never be turned off again.
+ * This would hurt performance at the final, optimal
+ * grid spacing, where DLB almost never helps.
+ * Also, DLB can limit the cut-off for PME tuning.
+ */
+ dd_dlb_set_lock(cr->dd, TRUE);
+ }
+
if (bPMETuneRunning || step_rel > ir->nstlist*50)
{
bPMETuneTry = FALSE;
{
calc_enervirdiff(NULL, ir->eDispCorr, fr);
}
+
+ if (!bPMETuneRunning &&
+ DOMAINDECOMP(cr) &&
+ dd_dlb_is_locked(cr->dd))
+ {
+ /* Unlock the DLB=auto, DLB is allowed to activate
+ * (but we don't expect it to activate in most cases).
+ */
+ dd_dlb_set_lock(cr->dd, FALSE);
+ }
}
cycles_pmes = 0;
}
/* End of main MD loop */
debug_gmx();
+ /* Closing TNG files can include compressing data. Therefore it is good to do that
+ * before stopping the time measurements. */
+ mdoutf_tng_close(outf);
+
/* Stop measuring walltime */
walltime_accounting_end(walltime_accounting);