#include "gromacs/domdec/domdec.h"
#include "gromacs/domdec/domdec_network.h"
-#include "gromacs/legacyheaders/calcgrid.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/sim_util.h"
-#include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/domdec/domdec_struct.h"
+#include "gromacs/fft/calcgrid.h"
+#include "gromacs/gmxlib/md_logging.h"
+#include "gromacs/gmxlib/network.h"
+#include "gromacs/math/functions.h"
#include "gromacs/math/vec.h"
+#include "gromacs/mdlib/forcerec.h"
#include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
+#include "gromacs/mdlib/sim_util.h"
+#include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/md_enums.h"
#include "gromacs/pbcutil/pbc.h"
#include "gromacs/timing/wallcycle.h"
#include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/fatalerror.h"
#include "gromacs/utility/gmxassert.h"
#include "gromacs/utility/smalloc.h"
struct pme_setup_t {
real rcut_coulomb; /**< Coulomb cut-off */
real rlist; /**< pair-list cut-off */
- real rlistlong; /**< LR pair-list cut-off */
- int nstcalclr; /**< frequency of evaluating long-range forces for group scheme */
real spacing; /**< (largest) PME grid spacing */
ivec grid; /**< the PME grid dimensions */
real grid_efficiency; /**< ineffiency factor for non-uniform grids <= 1 */
real cut_spacing; /**< the minimum cutoff / PME grid spacing ratio */
real rcut_vdw; /**< Vdw cutoff (does not change) */
real rcut_coulomb_start; /**< Initial electrostatics cutoff */
- int nstcalclr_start; /**< Initial electrostatics cutoff */
real rbuf_coulomb; /**< the pairlist buffer size */
real rbuf_vdw; /**< the pairlist buffer size */
matrix box_start; /**< the initial simulation box */
real spm, sp;
int d;
+ // Note that we don't (yet) support PME load balancing with LJ-PME only.
+ GMX_RELEASE_ASSERT(EEL_PME(ir->coulombtype), "pme_loadbal_init called without PME electrostatics");
+ // To avoid complexity, we require a single cut-off with PME for q+LJ.
+ // This is checked by grompp, but it doesn't hurt to check again.
+ GMX_RELEASE_ASSERT(!(EEL_PME(ir->coulombtype) && EVDW_PME(ir->vdwtype) && ir->rcoulomb != ir->rvdw), "With Coulomb and LJ PME, rcoulomb should be equal to rvdw");
+
snew(pme_lb, 1);
pme_lb->bSepPMERanks = !(cr->duty & DUTY_PME);
pme_lb->cutoff_scheme = ir->cutoff_scheme;
- if (pme_lb->cutoff_scheme == ecutsVERLET)
- {
- pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
- pme_lb->rbuf_vdw = pme_lb->rbuf_coulomb;
- }
- else
- {
- if (ic->rcoulomb > ic->rlist)
- {
- pme_lb->rbuf_coulomb = ic->rlistlong - ic->rcoulomb;
- }
- else
- {
- pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
- }
- if (ic->rvdw > ic->rlist)
- {
- pme_lb->rbuf_vdw = ic->rlistlong - ic->rvdw;
- }
- else
- {
- pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
- }
- }
+ pme_lb->rbuf_coulomb = ic->rlist - ic->rcoulomb;
+ pme_lb->rbuf_vdw = ic->rlist - ic->rvdw;
copy_mat(box, pme_lb->box_start);
if (ir->ePBC == epbcXY && ir->nwall == 2)
pme_lb->rcut_vdw = ic->rvdw;
pme_lb->rcut_coulomb_start = ir->rcoulomb;
- pme_lb->nstcalclr_start = ir->nstcalclr;
pme_lb->cur = 0;
pme_lb->setup[0].rcut_coulomb = ic->rcoulomb;
pme_lb->setup[0].rlist = ic->rlist;
- pme_lb->setup[0].rlistlong = ic->rlistlong;
- pme_lb->setup[0].nstcalclr = ir->nstcalclr;
pme_lb->setup[0].grid[XX] = ir->nkx;
pme_lb->setup[0].grid[YY] = ir->nky;
pme_lb->setup[0].grid[ZZ] = ir->nkz;
pme_lb->cycles_n = 0;
pme_lb->cycles_c = 0;
+ if (!wallcycle_have_counter())
+ {
+ md_print_warn(cr, fp_log, "NOTE: Cycle counters unsupported or not enabled in kernel. Cannot use PME-PP balancing.\n");
+ }
+
/* Tune with GPUs and/or separate PME ranks.
* When running only on a CPU without PME ranks, PME tuning will only help
* with small numbers of atoms in the cut-off sphere.
if (pme_lb->cutoff_scheme == ecutsVERLET)
{
- set->rlist = set->rcut_coulomb + pme_lb->rbuf_coulomb;
- /* We dont use LR lists with Verlet, but this avoids if-statements in further checks */
- set->rlistlong = set->rlist;
+ /* Never decrease the Coulomb and VdW list buffers */
+ set->rlist = std::max(set->rcut_coulomb + pme_lb->rbuf_coulomb,
+ pme_lb->rcut_vdw + pme_lb->rbuf_vdw);
}
else
{
* - We should set fr->bTwinRange = (fr->rlistlong > fr->rlist).
*/
set->rlist = std::min(tmpr_coulomb, tmpr_vdw);
- set->rlistlong = std::max(tmpr_coulomb, tmpr_vdw);
-
- /* Set the long-range update frequency */
- if (set->rlist == set->rlistlong)
- {
- /* No long-range interactions if the short-/long-range cutoffs are identical */
- set->nstcalclr = 0;
- }
- else if (pme_lb->nstcalclr_start == 0 || pme_lb->nstcalclr_start == 1)
- {
- /* We were not doing long-range before, but now we are since rlist!=rlistlong */
- set->nstcalclr = 1;
- }
- else
- {
- /* We were already doing long-range interactions from the start */
- if (pme_lb->rcut_vdw > pme_lb->rcut_coulomb_start)
- {
- /* We were originally doing long-range VdW-only interactions.
- * If rvdw is still longer than rcoulomb we keep the original nstcalclr,
- * but if the coulomb cutoff has become longer we should update the long-range
- * part every step.
- */
- set->nstcalclr = (tmpr_vdw > tmpr_coulomb) ? pme_lb->nstcalclr_start : 1;
- }
- else
- {
- /* We were not doing any long-range interaction from the start,
- * since it is not possible to do twin-range coulomb for the PME interaction.
- */
- set->nstcalclr = 1;
- }
- }
}
set->spacing = sp;
if (fp_err != NULL)
{
fprintf(fp_err, "\r%s\n", buf);
+ fflush(fp_err);
}
if (fp_log != NULL)
{
if (fp_err != NULL)
{
fprintf(fp_err, "\r%s\n", buf);
+ fflush(fp_err);
}
if (fp_log != NULL)
{
t_commrec *cr,
FILE *fp_err,
FILE *fp_log,
- t_inputrec *ir,
+ const t_inputrec *ir,
t_state *state,
double cycles,
interaction_const_t *ic,
set = &pme_lb->setup[pme_lb->cur];
set->count++;
- rtab = ir->rlistlong + ir->tabext;
+ rtab = ir->rlist + ir->tabext;
if (set->count % 2 == 1)
{
* better overal performance can be obtained with a slightly
* shorter cut-off and better DD load balancing.
*/
- set_dd_dlb_max_cutoff(cr, pme_lb->setup[pme_lb->fastest].rlistlong);
+ set_dd_dlb_max_cutoff(cr, pme_lb->setup[pme_lb->fastest].rlist);
}
}
cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;
if (OK && ir->ePBC != epbcNONE)
{
- OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlistlong)
+ OK = (gmx::square(pme_lb->setup[pme_lb->cur+1].rlist)
<= max_cutoff2(ir->ePBC, state->box));
if (!OK)
{
if (DOMAINDECOMP(cr))
{
OK = change_dd_cutoff(cr, state, ir,
- pme_lb->setup[pme_lb->cur].rlistlong);
+ pme_lb->setup[pme_lb->cur].rlist);
if (!OK)
{
/* Failed: do not use this setup */
if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
{
- OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlistlong);
+ OK = change_dd_cutoff(cr, state, ir, pme_lb->setup[pme_lb->cur].rlist);
if (!OK)
{
/* For some reason the chosen cut-off is incompatible with DD.
ic->rcoulomb = set->rcut_coulomb;
ic->rlist = set->rlist;
- ic->rlistlong = set->rlistlong;
- ir->nstcalclr = set->nstcalclr;
ic->ewaldcoeff_q = set->ewaldcoeff_q;
/* TODO: centralize the code that sets the potentials shifts */
if (ic->coulomb_modifier == eintmodPOTSHIFT)
{
GMX_RELEASE_ASSERT(ic->rcoulomb != 0, "Cutoff radius cannot be zero");
- ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb) / ic->rcoulomb;
+ ic->sh_ewald = std::erfc(ic->ewaldcoeff_q*ic->rcoulomb) / ic->rcoulomb;
}
if (EVDW_PME(ic->vdwtype))
{
{
real crc2;
- ic->dispersion_shift.cpot = -std::pow(static_cast<double>(ic->rvdw), -6.0);
- ic->repulsion_shift.cpot = -std::pow(static_cast<double>(ic->rvdw), -12.0);
+ ic->dispersion_shift.cpot = -1.0/gmx::power6(static_cast<double>(ic->rvdw));
+ ic->repulsion_shift.cpot = -1.0/gmx::power12(static_cast<double>(ic->rvdw));
ic->sh_invrc6 = -ic->dispersion_shift.cpot;
- crc2 = sqr(ic->ewaldcoeff_lj*ic->rvdw);
- ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*std::pow(static_cast<double>(ic->rvdw), -6.0);
+ crc2 = gmx::square(ic->ewaldcoeff_lj*ic->rvdw);
+ ic->sh_lj_ewald = (std::exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)/gmx::power6(ic->rvdw);
}
}
* texture objects are used), but as this is initialization code, there
* is not point in complicating things.
*/
-#ifdef GMX_THREAD_MPI
+#if GMX_THREAD_MPI
if (PAR(cr) && use_GPU(nbv))
{
gmx_barrier(cr);
t_commrec *cr,
FILE *fp_err,
FILE *fp_log,
- t_inputrec *ir,
+ const t_inputrec *ir,
t_forcerec *fr,
t_state *state,
gmx_wallcycle_t wcycle,
* This also ensures that we won't disable the currently
* optimal setting during a second round of PME balancing.
*/
- set_dd_dlb_max_cutoff(cr, fr->ic->rlistlong);
+ set_dd_dlb_max_cutoff(cr, fr->ic->rlist);
}
}
fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q;
fr->ewaldcoeff_lj = fr->ic->ewaldcoeff_lj;
fr->rlist = fr->ic->rlist;
- fr->rlistlong = fr->ic->rlistlong;
fr->rcoulomb = fr->ic->rcoulomb;
fr->rvdw = fr->ic->rvdw;
return setup->grid[XX]*setup->grid[YY]*setup->grid[ZZ];
}
-/*! \brief Retuern the largest short-range list cut-off radius */
-static real pme_loadbal_rlist(const pme_setup_t *setup)
-{
- /* With the group cut-off scheme we can have twin-range either
- * for Coulomb or for VdW, so we need a check here.
- * With the Verlet cut-off scheme rlist=rlistlong.
- */
- if (setup->rcut_coulomb > setup->rlist)
- {
- return setup->rlistlong;
- }
- else
- {
- return setup->rlist;
- }
-}
-
/*! \brief Print one load-balancing setting */
static void print_pme_loadbal_setting(FILE *fplog,
const char *name,
fprintf(fplog,
" %-7s %6.3f nm %6.3f nm %3d %3d %3d %5.3f nm %5.3f nm\n",
name,
- setup->rcut_coulomb, pme_loadbal_rlist(setup),
+ setup->rcut_coulomb, setup->rlist,
setup->grid[XX], setup->grid[YY], setup->grid[ZZ],
setup->spacing, 1/setup->ewaldcoeff_q);
}
double pp_ratio, grid_ratio;
real pp_ratio_temporary;
- pp_ratio_temporary = pme_loadbal_rlist(&pme_lb->setup[pme_lb->cur])/pme_loadbal_rlist(&pme_lb->setup[0]);
- pp_ratio = std::pow(static_cast<double>(pp_ratio_temporary), 3.0);
+ pp_ratio_temporary = pme_lb->setup[pme_lb->cur].rlist / pme_lb->setup[0].rlist;
+ pp_ratio = gmx::power3(pp_ratio_temporary);
grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
(double)pme_grid_points(&pme_lb->setup[0]);