simulation stops. If equal to zero, don't
communicate any more between multisims.*/
/* PME load balancing data for GPU kernels */
- pme_load_balancing_t *pme_loadbal = NULL;
- double cycles_pmes;
- gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE;
+ pme_load_balancing_t *pme_loadbal;
+ gmx_bool bPMETune = FALSE;
+ gmx_bool bPMETunePrinting = FALSE;
/* Interactive MD */
gmx_bool bIMDstep = FALSE;
repl_ex_nst, repl_ex_nex, repl_ex_seed);
}
- /* PME tuning is only supported with GPUs or PME nodes and not with rerun.
- * PME tuning is not supported with PME only for LJ and not for Coulomb.
+ /* PME tuning is only supported with PME for Coulomb. Is is not supported
+ * with only LJ PME, or for reruns.
*/
- if ((Flags & MD_TUNEPME) &&
- EEL_PME(fr->eeltype) &&
- ( use_GPU(fr->nbv) || !(cr->duty & DUTY_PME)) &&
- !bRerunMD)
+ bPMETune = ((Flags & MD_TUNEPME) && EEL_PME(fr->eeltype) && !bRerunMD &&
+ !(Flags & MD_REPRODUCIBLE));
+ if (bPMETune)
{
- pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata);
- cycles_pmes = 0;
- if (cr->duty & DUTY_PME)
- {
- /* Start tuning right away, as we can't measure the load */
- bPMETuneRunning = TRUE;
- }
- else
- {
- /* Separate PME nodes, we can measure the PP/PME load balance */
- bPMETuneTry = TRUE;
- }
+ pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata,
+ use_GPU(fr->nbv), !(cr->duty & DUTY_PME),
+ &bPMETunePrinting);
}
if (!ir->bContinuation && !bRerunMD)
while (!bLastStep || (bRerunMD && bNotLastFrame))
{
+ /* Determine if this is a neighbor search step */
+ bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
+
+ if (bPMETune && bNStList)
+ {
+ /* PME grid + cut-off optimization with GPUs or PME nodes */
+ pme_loadbal_do(pme_loadbal, cr,
+ (bVerbose && MASTER(cr)) ? stderr : NULL,
+ fplog,
+ ir, fr, state, wcycle,
+ step, step_rel,
+ &bPMETunePrinting);
+ }
+
wallcycle_start(wcycle, ewcSTEP);
if (bRerunMD)
}
else
{
- /* Determine whether or not to do Neighbour Searching and LR */
- bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
-
bNS = (bFirstStep || bExchanged || bNeedRepartition || bNStList || bDoFEP);
}
state, &f, mdatoms, top, fr,
vsite, shellfc, constr,
nrnb, wcycle,
- do_verbose && !bPMETuneRunning);
+ do_verbose && !bPMETunePrinting);
wallcycle_stop(wcycle, ewcDOMDEC);
}
}
state->fep_state = lamnew;
}
/* Print the remaining wall clock time for the run */
- if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning)
+ if (MULTIMASTER(cr) &&
+ (do_verbose || gmx_got_usr_signal()) &&
+ !bPMETunePrinting)
{
if (shellfc)
{
}
}
- if (!bRerunMD || !rerun_fr.bStep)
- {
- /* increase the MD step number */
- step++;
- step_rel++;
- }
-
cycles = wallcycle_stop(wcycle, ewcSTEP);
if (DOMAINDECOMP(cr) && wcycle)
{
dd_cycles_add(cr->dd, cycles, ddCyclStep);
}
- if (bPMETuneRunning || bPMETuneTry)
+ if (!bRerunMD || !rerun_fr.bStep)
{
- /* PME grid + cut-off optimization with GPUs or PME nodes */
-
- /* Count the total cycles over the last steps */
- cycles_pmes += cycles;
-
- /* We can only switch cut-off at NS steps */
- if (step % ir->nstlist == 0)
- {
- /* PME grid + cut-off optimization with GPUs or PME nodes */
- if (bPMETuneTry)
- {
- if (DDMASTER(cr->dd))
- {
- /* PME node load is too high, start tuning */
- bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05);
- }
- dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning);
-
- if (bPMETuneRunning &&
- use_GPU(fr->nbv) && DOMAINDECOMP(cr) &&
- !(cr->duty & DUTY_PME))
- {
- /* Lock DLB=auto to off (does nothing when DLB=yes/no).
- * With GPUs + separate PME ranks, we don't want DLB.
- * This could happen when we scan coarse grids and
- * it would then never be turned off again.
- * This would hurt performance at the final, optimal
- * grid spacing, where DLB almost never helps.
- * Also, DLB can limit the cut-off for PME tuning.
- */
- dd_dlb_set_lock(cr->dd, TRUE);
- }
-
- if (bPMETuneRunning || step_rel > ir->nstlist*50)
- {
- bPMETuneTry = FALSE;
- }
- }
- if (bPMETuneRunning)
- {
- /* init_step might not be a multiple of nstlist,
- * but the first cycle is always skipped anyhow.
- */
- bPMETuneRunning =
- pme_load_balance(pme_loadbal, cr,
- (bVerbose && MASTER(cr)) ? stderr : NULL,
- fplog,
- ir, state, cycles_pmes,
- fr->ic, fr->nbv, &fr->pmedata,
- step);
-
- /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
- fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q;
- fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
- fr->rlist = fr->ic->rlist;
- fr->rlistlong = fr->ic->rlistlong;
- fr->rcoulomb = fr->ic->rcoulomb;
- fr->rvdw = fr->ic->rvdw;
-
- if (ir->eDispCorr != edispcNO)
- {
- calc_enervirdiff(NULL, ir->eDispCorr, fr);
- }
-
- if (!bPMETuneRunning &&
- DOMAINDECOMP(cr) &&
- dd_dlb_is_locked(cr->dd))
- {
- /* Unlock the DLB=auto, DLB is allowed to activate
- * (but we don't expect it to activate in most cases).
- */
- dd_dlb_set_lock(cr->dd, FALSE);
- }
- }
- cycles_pmes = 0;
- }
+ /* increase the MD step number */
+ step++;
+ step_rel++;
}
if (step_rel == wcycle_get_reset_counters(wcycle) ||
done_mdoutf(outf);
debug_gmx();
- if (pme_loadbal != NULL)
+ if (bPMETune)
{
- pme_loadbal_done(pme_loadbal, cr, fplog,
- use_GPU(fr->nbv));
+ pme_loadbal_done(pme_loadbal, cr, fplog, use_GPU(fr->nbv));
}
if (shellfc && fplog)