+void pme_loadbal_do(pme_load_balancing_t *pme_lb,
+ t_commrec *cr,
+ FILE *fp_err,
+ FILE *fp_log,
+ t_inputrec *ir,
+ t_forcerec *fr,
+ t_state *state,
+ gmx_wallcycle_t wcycle,
+ gmx_int64_t step,
+ gmx_int64_t step_rel,
+ gmx_bool *bPrinting)
+{
+ int n_prev;
+ double cycles_prev;
+
+ assert(pme_lb != NULL);
+
+ if (!pme_lb->bActive)
+ {
+ return;
+ }
+
+ n_prev = pme_lb->cycles_n;
+ cycles_prev = pme_lb->cycles_c;
+ wallcycle_get(wcycle, ewcSTEP, &pme_lb->cycles_n, &pme_lb->cycles_c);
+ if (pme_lb->cycles_n == 0)
+ {
+ /* Before the first step we haven't done any steps yet */
+ return;
+ }
+ /* Sanity check, we expect nstlist cycle counts */
+ if (pme_lb->cycles_n - n_prev != ir->nstlist)
+ {
+ /* We could return here, but it's safer to issue and error and quit */
+ gmx_incons("pme_loadbal_do called at an interval != nstlist");
+ }
+
+ /* PME grid + cut-off optimization with GPUs or PME ranks */
+ if (!pme_lb->bBalance && pme_lb->bSepPMERanks)
+ {
+ if (DDMASTER(cr->dd))
+ {
+ /* PME rank load is too high, start tuning */
+ pme_lb->bBalance = (dd_pme_f_ratio(cr->dd) >= loadBalanceTriggerFactor);
+ }
+ dd_bcast(cr->dd, sizeof(gmx_bool), &pme_lb->bBalance);
+
+ if (pme_lb->bBalance &&
+ use_GPU(fr->nbv) && DOMAINDECOMP(cr) &&
+ pme_lb->bSepPMERanks)
+ {
+ /* Lock DLB=auto to off (does nothing when DLB=yes/no).
+ * With GPUs + separate PME ranks, we don't want DLB.
+ * This could happen when we scan coarse grids and
+ * it would then never be turned off again.
+ * This would hurt performance at the final, optimal
+ * grid spacing, where DLB almost never helps.
+ * Also, DLB can limit the cut-off for PME tuning.
+ */
+ dd_dlb_set_lock(cr->dd, TRUE);
+ }
+ }
+
+ if (pme_lb->bBalance)
+ {
+ /* init_step might not be a multiple of nstlist,
+ * but the first cycle is always skipped anyhow.
+ */
+ pme_lb->bBalance =
+ pme_load_balance(pme_lb, cr,
+ fp_err, fp_log,
+ ir, state, pme_lb->cycles_c - cycles_prev,
+ fr->ic, fr->nbv, &fr->pmedata,
+ step);
+
+ /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */
+ fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q;
+ fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
+ fr->rlist = fr->ic->rlist;
+ fr->rlistlong = fr->ic->rlistlong;
+ fr->rcoulomb = fr->ic->rcoulomb;
+ fr->rvdw = fr->ic->rvdw;
+
+ if (ir->eDispCorr != edispcNO)
+ {
+ calc_enervirdiff(NULL, ir->eDispCorr, fr);
+ }
+
+ if (!pme_lb->bBalance &&
+ DOMAINDECOMP(cr) &&
+ dd_dlb_is_locked(cr->dd))
+ {
+ /* Unlock the DLB=auto, DLB is allowed to activate
+ * (but we don't expect it to activate in most cases).
+ */
+ dd_dlb_set_lock(cr->dd, FALSE);
+ }
+ }
+
+ if (!pme_lb->bBalance &&
+ (!pme_lb->bSepPMERanks || (step_rel <= PMETunePeriod*ir->nstlist)))
+ {
+ /* We have just deactivated the balancing and we're not measuring PP/PME
+ * imbalance during the first 50*nstlist steps: deactivate the tuning.
+ */
+ pme_lb->bActive = FALSE;
+ }
+
+ *bPrinting = pme_lb->bBalance;
+}
+