From ce0fc73a169bd7bcdeb806e3ba5348db819e04fd Mon Sep 17 00:00:00 2001 From: ejjordan Date: Tue, 14 Sep 2021 16:25:21 +0200 Subject: [PATCH] Make CpuPpLongRangeNonbondeds class The CpuPpLongRangeNonbondeds class replaces the calculateLongRangeNonbondeds function. Now simulation constant data is passed to a constructor, domain constant data is passed to an updateAfterPartition function, and per step data is passed to a calculate function. --- src/gromacs/mdlib/force.cpp | 216 ++++++++++-------- src/gromacs/mdlib/force.h | 127 ++++++++-- src/gromacs/mdlib/forcerec.cpp | 10 - src/gromacs/mdlib/forcerec_threading.h | 52 ----- src/gromacs/mdlib/sim_util.cpp | 25 +- src/gromacs/mdrun/md.cpp | 6 + src/gromacs/mdrun/mimic.cpp | 5 + src/gromacs/mdrun/minimize.cpp | 5 + src/gromacs/mdrun/rerun.cpp | 5 + src/gromacs/mdrun/runner.cpp | 10 + src/gromacs/mdrun/shellfc.cpp | 3 + src/gromacs/mdrun/shellfc.h | 2 + src/gromacs/mdrun/tpi.cpp | 2 + src/gromacs/mdtypes/forcerec.h | 11 +- src/gromacs/modularsimulator/forceelement.cpp | 38 ++- src/gromacs/modularsimulator/forceelement.h | 9 +- .../modularsimulator/simulatoralgorithm.cpp | 2 +- 17 files changed, 322 insertions(+), 206 deletions(-) delete mode 100644 src/gromacs/mdlib/forcerec_threading.h diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp index 3ac2484a2b..4507dbc7bc 100644 --- a/src/gromacs/mdlib/force.cpp +++ b/src/gromacs/mdlib/force.cpp @@ -53,7 +53,7 @@ #include "gromacs/gmxlib/nrnb.h" #include "gromacs/math/vec.h" #include "gromacs/math/vecdump.h" -#include "gromacs/mdlib/forcerec_threading.h" +#include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdtypes/commrec.h" #include "gromacs/mdtypes/enerdata.h" #include "gromacs/mdtypes/forceoutput.h" @@ -63,12 +63,10 @@ #include "gromacs/mdtypes/md_enums.h" #include "gromacs/mdtypes/mdatom.h" #include "gromacs/mdtypes/simulation_workload.h" -#include "gromacs/pbcutil/ishift.h" #include "gromacs/pbcutil/pbc.h" #include "gromacs/timing/wallcycle.h" #include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" -#include "gromacs/utility/smalloc.h" using gmx::ArrayRef; using gmx::RVec; @@ -100,56 +98,102 @@ static void reduceEwaldThreadOuput(int nthreads, gmx::ArrayRef coordinates, - gmx::ForceWithVirial* forceWithVirial, - gmx_enerdata_t* enerd, - const matrix box, - gmx::ArrayRef lambda, - gmx::ArrayRef mu_tot, - const gmx::StepWorkload& stepWork, - const DDBalanceRegionHandler& ddBalanceRegionHandler) +CpuPpLongRangeNonbondeds::CpuPpLongRangeNonbondeds(int numberOfTestPaticles, + real ewaldCoeffQ, + real epsilonR, + gmx::ArrayRef chargeC6Sum, + CoulombInteractionType eeltype, + VanDerWaalsType vdwtype, + const t_inputrec& inputrec, + t_nrnb* nrnb, + gmx_wallcycle* wcycle, + FILE* fplog) : + numTpiAtoms_(numberOfTestPaticles), + ewaldCoeffQ_(ewaldCoeffQ), + epsilonR_(epsilonR), + chargeC6Sum_(chargeC6Sum), + coulombInteractionType_(eeltype), + vanDerWaalsType_(vdwtype), + ewaldGeometry_(inputrec.ewald_geometry), + epsilonSurface_(inputrec.epsilon_surface), + haveEwaldSurfaceTerm_(haveEwaldSurfaceContribution(inputrec)), + wallEwaldZfac_(inputrec.wall_ewald_zfac), + havePbcXY2Walls_(inputrecPbcXY2Walls(&inputrec)), + freeEnergyPerturbationType_(inputrec.efep), + nrnb_(nrnb), + wcycle_(wcycle) { - const bool computePmeOnCpu = (EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) - && thisRankHasDuty(cr, DUTY_PME) - && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU); + GMX_ASSERT(epsilonR == inputrec.epsilon_r, + "Forcerec and inputrec relative dielectrics don't match"); - const bool haveEwaldSurfaceTerm = haveEwaldSurfaceContribution(ir); + // Use the thread count for the bonded module since reducing CPU-side + // non-bonded contributions does not currently have its own thread count. + outputPerThread_.resize(gmx_omp_nthreads_get(ModuleMultiThread::Bonded)); + + if (inputrec.coulombtype == CoulombInteractionType::Ewald) + { + ewaldTable_ = std::make_unique(inputrec, fplog); + } +} + +CpuPpLongRangeNonbondeds::~CpuPpLongRangeNonbondeds() = default; + +void CpuPpLongRangeNonbondeds::updateAfterPartition(const t_mdatoms& md) +{ + homenr_ = md.homenr; + havePerturbed_ = md.nChargePerturbed != 0; + chargeA_ = md.chargeA ? gmx::constArrayRefFromArray(md.chargeA, md.nr) : gmx::ArrayRef{}; + chargeB_ = md.chargeB ? gmx::constArrayRefFromArray(md.chargeB, md.nr) : gmx::ArrayRef{}; + sqrt_c6A_ = md.sqrt_c6A ? gmx::constArrayRefFromArray(md.sqrt_c6A, md.nr) + : gmx::ArrayRef{}; + sqrt_c6B_ = md.sqrt_c6B ? gmx::constArrayRefFromArray(md.sqrt_c6B, md.nr) + : gmx::ArrayRef{}; + sigmaA_ = md.sigmaA ? gmx::constArrayRefFromArray(md.sigmaA, md.nr) : gmx::ArrayRef{}; + sigmaB_ = md.sigmaB ? gmx::constArrayRefFromArray(md.sigmaB, md.nr) : gmx::ArrayRef{}; +} + +void CpuPpLongRangeNonbondeds::calculate(gmx_pme_t* pmedata, + const t_commrec* commrec, + gmx::ArrayRef coordinates, + gmx::ForceWithVirial* forceWithVirial, + gmx_enerdata_t* enerd, + const matrix box, + gmx::ArrayRef lambda, + gmx::ArrayRef mu_tot, + const gmx::StepWorkload& stepWork, + const DDBalanceRegionHandler& ddBalanceRegionHandler) +{ + const bool computePmeOnCpu = (EEL_PME(coulombInteractionType_) || EVDW_PME(vanDerWaalsType_)) + && thisRankHasDuty(commrec, DUTY_PME) + && (pme_run_mode(pmedata) == PmeRunMode::CPU); /* Do long-range electrostatics and/or LJ-PME * and compute PME surface terms when necessary. */ - if ((computePmeOnCpu || fr->ic->eeltype == CoulombInteractionType::Ewald || haveEwaldSurfaceTerm) + if ((computePmeOnCpu || coulombInteractionType_ == CoulombInteractionType::Ewald || haveEwaldSurfaceTerm_) && stepWork.computeNonbondedForces) { - int status = 0; real Vlr_q = 0, Vlr_lj = 0; - /* We reduce all virial, dV/dlambda and energy contributions, except * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct. */ - ewald_corr_thread_t& ewaldOutput = fr->ewc_t[0]; + ewald_corr_thread_t& ewaldOutput = outputPerThread_[0]; clearEwaldThreadOutput(&ewaldOutput); - if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) + if (EEL_PME_EWALD(coulombInteractionType_) || EVDW_PME(vanDerWaalsType_)) { /* Calculate the Ewald surface force and energy contributions, when necessary */ - if (haveEwaldSurfaceTerm) + if (haveEwaldSurfaceTerm_) { - wallcycle_sub_start(wcycle, WallCycleSubCounter::EwaldCorrection); + wallcycle_sub_start(wcycle_, WallCycleSubCounter::EwaldCorrection); - int nthreads = fr->nthread_ewc; + int nthreads = gmx::ssize(outputPerThread_); #pragma omp parallel for num_threads(nthreads) schedule(static) for (int t = 0; t < nthreads; t++) { try { - ewald_corr_thread_t& ewc_t = fr->ewc_t[t]; + ewald_corr_thread_t& ewc_t = outputPerThread_[t]; if (t > 0) { clearEwaldThreadOutput(&ewc_t); @@ -161,21 +205,19 @@ void calculateLongRangeNonbondeds(t_forcerec* fr, * the forces in the normal, single forceWithVirial->force_ array. */ ewald_LRcorrection( - md->homenr, - cr, + homenr_, + commrec, nthreads, t, - fr->ic->epsilon_r, - fr->qsum, - ir.ewald_geometry, - ir.epsilon_surface, - inputrecPbcXY2Walls(&ir), - ir.wall_ewald_zfac, - md->chargeA ? gmx::constArrayRefFromArray(md->chargeA, md->nr) - : gmx::ArrayRef{}, - md->chargeB ? gmx::constArrayRefFromArray(md->chargeB, md->nr) - : gmx::ArrayRef{}, - (md->nChargePerturbed != 0), + epsilonR_, + chargeC6Sum_, + ewaldGeometry_, + epsilonSurface_, + havePbcXY2Walls_, + wallEwaldZfac_, + chargeA_, + chargeB_, + havePerturbed_, coordinates, box, mu_tot, @@ -188,20 +230,20 @@ void calculateLongRangeNonbondeds(t_forcerec* fr, } if (nthreads > 1) { - reduceEwaldThreadOuput(nthreads, fr->ewc_t); + reduceEwaldThreadOuput(nthreads, outputPerThread_); } - wallcycle_sub_stop(wcycle, WallCycleSubCounter::EwaldCorrection); + wallcycle_sub_stop(wcycle_, WallCycleSubCounter::EwaldCorrection); } - if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0) + if (EEL_PME_EWALD(coulombInteractionType_) && numTpiAtoms_ == 0) { /* This is not in a subcounter because it takes a negligible and constant-sized amount of time */ ewaldOutput.Vcorr_q += ewald_charge_correction( - cr, - fr->ic->epsilon_r, - fr->ic->ewaldcoeff_q, - fr->qsum, + commrec, + epsilonR_, + ewaldCoeffQ_, + chargeC6Sum_, lambda[static_cast(FreeEnergyPerturbationCouplingType::Coul)], box, &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul], @@ -211,8 +253,8 @@ void calculateLongRangeNonbondeds(t_forcerec* fr, if (computePmeOnCpu) { /* Do reciprocal PME for Coulomb and/or LJ. */ - assert(fr->n_tpi >= 0); - if (fr->n_tpi == 0 || stepWork.stateChanged) + assert(numTpiAtoms_ >= 0); + if (numTpiAtoms_ == 0 || stepWork.stateChanged) { /* With domain decomposition we close the CPU side load * balancing region here, because PME does global @@ -220,29 +262,23 @@ void calculateLongRangeNonbondeds(t_forcerec* fr, */ ddBalanceRegionHandler.closeAfterForceComputationCpu(); - wallcycle_start(wcycle, WallCycleCounter::PmeMesh); - status = gmx_pme_do( - fr->pmedata, - gmx::constArrayRefFromArray(coordinates.data(), md->homenr - fr->n_tpi), + wallcycle_start(wcycle_, WallCycleCounter::PmeMesh); + int status = gmx_pme_do( + pmedata, + gmx::constArrayRefFromArray(coordinates.data(), homenr_ - numTpiAtoms_), forceWithVirial->force_, - md->chargeA ? gmx::constArrayRefFromArray(md->chargeA, md->nr) - : gmx::ArrayRef{}, - md->chargeB ? gmx::constArrayRefFromArray(md->chargeB, md->nr) - : gmx::ArrayRef{}, - md->sqrt_c6A ? gmx::constArrayRefFromArray(md->sqrt_c6A, md->nr) - : gmx::ArrayRef{}, - md->sqrt_c6B ? gmx::constArrayRefFromArray(md->sqrt_c6B, md->nr) - : gmx::ArrayRef{}, - md->sigmaA ? gmx::constArrayRefFromArray(md->sigmaA, md->nr) - : gmx::ArrayRef{}, - md->sigmaB ? gmx::constArrayRefFromArray(md->sigmaB, md->nr) - : gmx::ArrayRef{}, + chargeA_, + chargeB_, + sqrt_c6A_, + sqrt_c6B_, + sigmaA_, + sigmaB_, box, - cr, - DOMAINDECOMP(cr) ? dd_pme_maxshift_x(*cr->dd) : 0, - DOMAINDECOMP(cr) ? dd_pme_maxshift_y(*cr->dd) : 0, - nrnb, - wcycle, + commrec, + DOMAINDECOMP(commrec) ? dd_pme_maxshift_x(*commrec->dd) : 0, + DOMAINDECOMP(commrec) ? dd_pme_maxshift_y(*commrec->dd) : 0, + nrnb_, + wcycle_, ewaldOutput.vir_q, ewaldOutput.vir_lj, &Vlr_q, @@ -252,7 +288,7 @@ void calculateLongRangeNonbondeds(t_forcerec* fr, &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul], &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Vdw], stepWork); - wallcycle_stop(wcycle, WallCycleCounter::PmeMesh); + wallcycle_stop(wcycle_, WallCycleCounter::PmeMesh); if (status != 0) { gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status); @@ -266,37 +302,37 @@ void calculateLongRangeNonbondeds(t_forcerec* fr, * of the force call (without PME). */ } - if (fr->n_tpi > 0) + if (numTpiAtoms_ > 0) { /* Determine the PME grid energy of the test molecule * with the PME grid potential of the other charges. */ Vlr_q = gmx_pme_calc_energy( - fr->pmedata, - coordinates.subArray(md->homenr - fr->n_tpi, fr->n_tpi), - gmx::arrayRefFromArray(md->chargeA + md->homenr - fr->n_tpi, fr->n_tpi)); + pmedata, + coordinates.subArray(homenr_ - numTpiAtoms_, numTpiAtoms_), + chargeA_.subArray(homenr_ - numTpiAtoms_, numTpiAtoms_)); } } } - if (fr->ic->eeltype == CoulombInteractionType::Ewald) + if (coulombInteractionType_ == CoulombInteractionType::Ewald) { - Vlr_q = do_ewald(inputrecPbcXY2Walls(&ir), - ir.wall_ewald_zfac, - ir.epsilon_r, - ir.efep, + Vlr_q = do_ewald(havePbcXY2Walls_, + wallEwaldZfac_, + epsilonR_, + freeEnergyPerturbationType_, coordinates, forceWithVirial->force_, - gmx::arrayRefFromArray(md->chargeA, md->nr), - gmx::arrayRefFromArray(md->chargeB, md->nr), + chargeA_, + chargeB_, box, - cr, - md->homenr, + commrec, + homenr_, ewaldOutput.vir_q, - fr->ic->ewaldcoeff_q, + ewaldCoeffQ_, lambda[static_cast(FreeEnergyPerturbationCouplingType::Coul)], &ewaldOutput.dvdl[FreeEnergyPerturbationCouplingType::Coul], - fr->ewald_table.get()); + ewaldTable_.get()); } /* Note that with separate PME nodes we get the real energies later */ @@ -330,6 +366,6 @@ void calculateLongRangeNonbondeds(t_forcerec* fr, if (debug) { - print_nrnb(debug, nrnb); + print_nrnb(debug, nrnb_); } } diff --git a/src/gromacs/mdlib/force.h b/src/gromacs/mdlib/force.h index a25a42d6c2..f9ff390fdc 100644 --- a/src/gromacs/mdlib/force.h +++ b/src/gromacs/mdlib/force.h @@ -40,7 +40,12 @@ #include +#include + #include "gromacs/math/vectypes.h" +#include "gromacs/mdtypes/md_enums.h" +#include "gromacs/utility/arrayref.h" +#include "gromacs/utility/enumerationhelpers.h" class DDBalanceRegionHandler; struct gmx_edsam; @@ -50,6 +55,7 @@ struct SimulationGroups; struct gmx_localtop_t; struct gmx_multisim_t; struct gmx_wallcycle; +struct gmx_pme_t; class history_t; class InteractionDefinitions; struct pull_t; @@ -59,12 +65,12 @@ struct t_inputrec; struct t_lambda; struct t_mdatoms; struct t_nrnb; +struct gmx_ewald_tab_t; +class CpuPpLongRangeNonbondeds; namespace gmx { template -class ArrayRef; -template class ArrayRefWithPadding; class Awh; class ForceBuffersView; @@ -76,6 +82,15 @@ class StepWorkload; class VirtualSitesHandler; } // namespace gmx +struct ewald_corr_thread_t +{ + real Vcorr_q; + real Vcorr_lj; + gmx::EnumerationArray dvdl; + tensor vir_q; + tensor vir_lj; +}; + /* Perform the force and, if requested, energy computation * * Without multiple time stepping the force is returned in force->force(). @@ -115,6 +130,7 @@ void do_force(FILE* log, rvec mu_tot, double t, gmx_edsam* ed, + CpuPpLongRangeNonbondeds* longRangeNonbondeds, int legacyFlags, const DDBalanceRegionHandler& ddBalanceRegionHandler); @@ -128,24 +144,93 @@ void do_force(FILE* log, */ -/* Calculate CPU Ewald or PME-mesh forces when done on this rank and Ewald corrections, when used - * - * Note that Ewald dipole and net charge corrections are always computed here, independently - * on whether the PME-mesh contribution is computed on a separate PME rank or on a GPU. - */ -void calculateLongRangeNonbondeds(t_forcerec* fr, - const t_inputrec& ir, - const t_commrec* cr, - t_nrnb* nrnb, - gmx_wallcycle* wcycle, - const t_mdatoms* md, - gmx::ArrayRef coordinates, - gmx::ForceWithVirial* forceWithVirial, - gmx_enerdata_t* enerd, - const matrix box, - gmx::ArrayRef lambda, - gmx::ArrayRef mu_tot, - const gmx::StepWorkload& stepWork, - const DDBalanceRegionHandler& ddBalanceRegionHandler); +class CpuPpLongRangeNonbondeds +{ +public: + /* \brief Constructor + * + * Should be called after init_forcerec if params come from a populated forcerec + */ + CpuPpLongRangeNonbondeds(int numberOfTestPaticles, + real ewaldCoeffQ, + real epsilonR, + gmx::ArrayRef chargeC6Sum, + CoulombInteractionType eeltype, + VanDerWaalsType vdwtype, + const t_inputrec& inputrec, + t_nrnb* nrnb, + gmx_wallcycle* wcycle, + FILE* fplog); + + ~CpuPpLongRangeNonbondeds(); + + void updateAfterPartition(const t_mdatoms& md); + + /* Calculate CPU Ewald or PME-mesh forces when done on this rank and Ewald corrections, when used + * + * Note that Ewald dipole and net charge corrections are always computed here, independently + * of whether the PME-mesh contribution is computed on a separate PME rank or on a GPU. + */ + void calculate(gmx_pme_t* pmedata, + const t_commrec* commrec, + gmx::ArrayRef coordinates, + gmx::ForceWithVirial* forceWithVirial, + gmx_enerdata_t* enerd, + const matrix box, + gmx::ArrayRef lambda, + gmx::ArrayRef mu_tot, + const gmx::StepWorkload& stepWork, + const DDBalanceRegionHandler& ddBalanceRegionHandler); + +private: + //! Number of particles for test particle insertion + int numTpiAtoms_; + //! Ewald charge coefficient + real ewaldCoeffQ_; + //! Dielectric constant + real epsilonR_; + //! [0]: sum of charges; [1]: sum of C6's + gmx::ArrayRef chargeC6Sum_; + //! Cut-off treatment for Coulomb + CoulombInteractionType coulombInteractionType_; + //! Van der Waals interaction treatment + VanDerWaalsType vanDerWaalsType_; + //! Ewald geometry + EwaldGeometry ewaldGeometry_; + //! Epsilon for PME dipole correction + real epsilonSurface_; + //! Whether a long range correction is used + bool haveEwaldSurfaceTerm_; + //! Scaling factor for the box for Ewald + real wallEwaldZfac_; + //! Whether the simulation is 2D periodic with two walls + bool havePbcXY2Walls_; + //! Free energy perturbation type + FreeEnergyPerturbationType freeEnergyPerturbationType_; + //! Number of atoms on this node + int homenr_; + //! Whether there are perturbed interactions + bool havePerturbed_; + //! State A charge + gmx::ArrayRef chargeA_; + //! State B charge + gmx::ArrayRef chargeB_; + //! State A LJ c6 + gmx::ArrayRef sqrt_c6A_; + //! State B LJ c6 + gmx::ArrayRef sqrt_c6B_; + //! State A LJ sigma + gmx::ArrayRef sigmaA_; + //! State B LJ sigma + gmx::ArrayRef sigmaB_; + //! Ewald correction thread local virial and energy data + std::vector outputPerThread_; + //! Ewald table + std::unique_ptr ewaldTable_; + //! Non bonded kernel flop counters + t_nrnb* nrnb_; + //! Wall cycle counters + gmx_wallcycle* wcycle_; +}; #endif diff --git a/src/gromacs/mdlib/forcerec.cpp b/src/gromacs/mdlib/forcerec.cpp index 23a0639167..166ff08603 100644 --- a/src/gromacs/mdlib/forcerec.cpp +++ b/src/gromacs/mdlib/forcerec.cpp @@ -65,7 +65,6 @@ #include "gromacs/math/vec.h" #include "gromacs/mdlib/dispersioncorrection.h" #include "gromacs/mdlib/force.h" -#include "gromacs/mdlib/forcerec_threading.h" #include "gromacs/mdlib/gmx_omp_nthreads.h" #include "gromacs/mdlib/md_support.h" #include "gromacs/mdlib/wall.h" @@ -790,12 +789,6 @@ void init_forcerec(FILE* fplog, const interaction_const_t* interactionConst = forcerec->ic.get(); - /* TODO: Replace this Ewald table or move it into interaction_const_t */ - if (inputrec.coulombtype == CoulombInteractionType::Ewald) - { - forcerec->ewald_table = std::make_unique(inputrec, fplog); - } - /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */ switch (interactionConst->eeltype) { @@ -1070,9 +1063,6 @@ void init_forcerec(FILE* fplog, forcerec->print_force = print_force; - forcerec->nthread_ewc = gmx_omp_nthreads_get(ModuleMultiThread::Bonded); - forcerec->ewc_t.resize(forcerec->nthread_ewc); - if (inputrec.eDispCorr != DispersionCorrectionType::No) { forcerec->dispersionCorrection = std::make_unique( diff --git a/src/gromacs/mdlib/forcerec_threading.h b/src/gromacs/mdlib/forcerec_threading.h deleted file mode 100644 index 8a0e341648..0000000000 --- a/src/gromacs/mdlib/forcerec_threading.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2014,2015,2018,2019,2021, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ - -#ifndef GMX_MDLIB_FORCEREC_THREADING_H -#define GMX_MDLIB_FORCEREC_THREADING_H - -#include "gromacs/math/vectypes.h" -#include "gromacs/mdtypes/md_enums.h" -#include "gromacs/utility/enumerationhelpers.h" - -struct ewald_corr_thread_t -{ - real Vcorr_q; - real Vcorr_lj; - gmx::EnumerationArray dvdl; - tensor vir_q; - tensor vir_lj; -}; - -#endif diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 69c380a73f..9b7affec5e 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -1236,6 +1236,7 @@ void do_force(FILE* fplog, rvec muTotal, double t, gmx_edsam* ed, + CpuPpLongRangeNonbondeds* longRangeNonbondeds, int legacyFlags, const DDBalanceRegionHandler& ddBalanceRegionHandler) { @@ -1941,20 +1942,16 @@ void do_force(FILE* fplog, if (stepWork.computeSlowForces) { - calculateLongRangeNonbondeds(fr, - inputrec, - cr, - nrnb, - wcycle, - mdatoms, - x.unpaddedConstArrayRef(), - &forceOutMtsLevel1->forceWithVirial(), - enerd, - box, - lambda, - dipoleData.muStateAB, - stepWork, - ddBalanceRegionHandler); + longRangeNonbondeds->calculate(fr->pmedata, + cr, + x.unpaddedConstArrayRef(), + &forceOutMtsLevel1->forceWithVirial(), + enerd, + box, + lambda, + dipoleData.muStateAB, + stepWork, + ddBalanceRegionHandler); } wallcycle_stop(wcycle, WallCycleCounter::Force); diff --git a/src/gromacs/mdrun/md.cpp b/src/gromacs/mdrun/md.cpp index 687760caa7..58d1da65fe 100644 --- a/src/gromacs/mdrun/md.cpp +++ b/src/gromacs/mdrun/md.cpp @@ -402,6 +402,7 @@ void gmx::LegacySimulator::do_md() : gmx::ArrayRef(), md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr) : gmx::ArrayRef()); + fr->longRangeNonbondeds->updateAfterPartition(*md); } else { @@ -417,6 +418,7 @@ void gmx::LegacySimulator::do_md() : gmx::ArrayRef(), md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr) : gmx::ArrayRef()); + fr->longRangeNonbondeds->updateAfterPartition(*md); } std::unique_ptr integrator; @@ -1016,6 +1018,7 @@ void gmx::LegacySimulator::do_md() : gmx::ArrayRef(), md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr) : gmx::ArrayRef()); + fr->longRangeNonbondeds->updateAfterPartition(*md); } } @@ -1152,6 +1155,7 @@ void gmx::LegacySimulator::do_md() &f.view(), force_vir, *md, + fr->longRangeNonbondeds.get(), nrnb, wcycle, shellfc, @@ -1208,6 +1212,7 @@ void gmx::LegacySimulator::do_md() mu_tot, t, ed ? ed->getLegacyED() : nullptr, + fr->longRangeNonbondeds.get(), (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler); } @@ -1989,6 +1994,7 @@ void gmx::LegacySimulator::do_md() : gmx::ArrayRef(), md->cTC ? gmx::arrayRefFromArray(md->cTC, md->nr) : gmx::ArrayRef()); + fr->longRangeNonbondeds->updateAfterPartition(*md); } bFirstStep = FALSE; diff --git a/src/gromacs/mdrun/mimic.cpp b/src/gromacs/mdrun/mimic.cpp index 995336e157..2c08bdf15f 100644 --- a/src/gromacs/mdrun/mimic.cpp +++ b/src/gromacs/mdrun/mimic.cpp @@ -336,6 +336,7 @@ void gmx::LegacySimulator::do_mimic() // (Global topology should persist.) update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]); + fr->longRangeNonbondeds->updateAfterPartition(*mdatoms); if (ir->efep != FreeEnergyPerturbationType::No && ir->fepvals->nstdhdl != 0) { @@ -515,6 +516,8 @@ void gmx::LegacySimulator::do_mimic() update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]); } + fr->longRangeNonbondeds->updateAfterPartition(*mdatoms); + force_flags = (GMX_FORCE_STATECHANGED | GMX_FORCE_DYNAMICBOX | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | // TODO: Get rid of this once #2649 is solved GMX_FORCE_ENERGY | (doFreeEnergyPerturbation ? GMX_FORCE_DHDL : 0)); @@ -545,6 +548,7 @@ void gmx::LegacySimulator::do_mimic() &f.view(), force_vir, *mdatoms, + fr->longRangeNonbondeds.get(), nrnb, wcycle, shellfc, @@ -590,6 +594,7 @@ void gmx::LegacySimulator::do_mimic() mu_tot, t, ed, + fr->longRangeNonbondeds.get(), GMX_FORCE_NS | force_flags, ddBalanceRegionHandler); } diff --git a/src/gromacs/mdrun/minimize.cpp b/src/gromacs/mdrun/minimize.cpp index d055c942e6..497b3accd9 100644 --- a/src/gromacs/mdrun/minimize.cpp +++ b/src/gromacs/mdrun/minimize.cpp @@ -996,6 +996,8 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, setCoordinates(&pairSearchCoordinates, localCoordinates); } + fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms()); + /* Calc force & energy on new trial position */ /* do_force always puts the charge groups in the box and shifts again * We do not unshift, so molecules are always whole in congrad.c @@ -1026,6 +1028,7 @@ void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, mu_tot, t, nullptr, + fr->longRangeNonbondeds.get(), GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY | (bNS ? GMX_FORCE_NS : 0), DDBalanceRegionHandler(cr)); @@ -3147,6 +3150,7 @@ void LegacySimulator::do_nm() em_state_t state_work{}; + fr->longRangeNonbondeds->updateAfterPartition(*mdAtoms->mdatoms()); ObservablesReducer observablesReducer = observablesReducerBuilder->build(); /* Init em and store the local state in state_minimum */ @@ -3360,6 +3364,7 @@ void LegacySimulator::do_nm() &state_work.f.view(), vir, *mdatoms, + fr->longRangeNonbondeds.get(), nrnb, wcycle, shellfc, diff --git a/src/gromacs/mdrun/rerun.cpp b/src/gromacs/mdrun/rerun.cpp index 877f73dddc..799b1b716a 100644 --- a/src/gromacs/mdrun/rerun.cpp +++ b/src/gromacs/mdrun/rerun.cpp @@ -374,6 +374,7 @@ void gmx::LegacySimulator::do_rerun() } auto* mdatoms = mdAtoms->mdatoms(); + fr->longRangeNonbondeds->updateAfterPartition(*mdatoms); // NOTE: The global state is no longer used at this point. // But state_global is still used as temporary storage space for writing @@ -626,6 +627,8 @@ void gmx::LegacySimulator::do_rerun() update_mdatoms(mdatoms, state->lambda[FreeEnergyPerturbationCouplingType::Mass]); } + fr->longRangeNonbondeds->updateAfterPartition(*mdatoms); + force_flags = (GMX_FORCE_STATECHANGED | GMX_FORCE_DYNAMICBOX | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | // TODO: Get rid of this once #2649 and #3400 are solved GMX_FORCE_ENERGY | (doFreeEnergyPerturbation ? GMX_FORCE_DHDL : 0)); @@ -656,6 +659,7 @@ void gmx::LegacySimulator::do_rerun() &f.view(), force_vir, *mdatoms, + fr->longRangeNonbondeds.get(), nrnb, wcycle, shellfc, @@ -701,6 +705,7 @@ void gmx::LegacySimulator::do_rerun() mu_tot, t, ed, + fr->longRangeNonbondeds.get(), GMX_FORCE_NS | force_flags, ddBalanceRegionHandler); } diff --git a/src/gromacs/mdrun/runner.cpp b/src/gromacs/mdrun/runner.cpp index 09964ee8b7..ffc5db038c 100644 --- a/src/gromacs/mdrun/runner.cpp +++ b/src/gromacs/mdrun/runner.cpp @@ -1712,6 +1712,16 @@ int Mdrunner::mdrunner() deviceStreamManager->bondedStream(havePPDomainDecomposition(cr)), wcycle.get()); } + fr->longRangeNonbondeds = std::make_unique(fr->n_tpi, + fr->ic->ewaldcoeff_q, + fr->ic->epsilon_r, + fr->qsum, + fr->ic->eeltype, + fr->ic->vdwtype, + *inputrec, + &nrnb, + wcycle.get(), + fplog); /* Initialize the mdAtoms structure. * mdAtoms is not filled with atom data, diff --git a/src/gromacs/mdrun/shellfc.cpp b/src/gromacs/mdrun/shellfc.cpp index 3c000e1add..1284b62fb5 100644 --- a/src/gromacs/mdrun/shellfc.cpp +++ b/src/gromacs/mdrun/shellfc.cpp @@ -956,6 +956,7 @@ void relax_shell_flexcon(FILE* fplog, gmx::ForceBuffersView* f, tensor force_vir, const t_mdatoms& md, + CpuPpLongRangeNonbondeds* longRangeNonbondeds, t_nrnb* nrnb, gmx_wallcycle* wcycle, gmx_shellfc_t* shfc, @@ -1084,6 +1085,7 @@ void relax_shell_flexcon(FILE* fplog, mu_tot, t, nullptr, + longRangeNonbondeds, (bDoNS ? GMX_FORCE_NS : 0) | shellfc_flags, ddBalanceRegionHandler); @@ -1224,6 +1226,7 @@ void relax_shell_flexcon(FILE* fplog, mu_tot, t, nullptr, + longRangeNonbondeds, shellfc_flags, ddBalanceRegionHandler); accumulatePotentialEnergies(enerd, lambda, inputrec->fepvals.get()); diff --git a/src/gromacs/mdrun/shellfc.h b/src/gromacs/mdrun/shellfc.h index b8a314679c..e48874f0ca 100644 --- a/src/gromacs/mdrun/shellfc.h +++ b/src/gromacs/mdrun/shellfc.h @@ -59,6 +59,7 @@ struct t_inputrec; struct t_mdatoms; struct t_nrnb; class t_state; +class CpuPpLongRangeNonbondeds; namespace gmx { @@ -115,6 +116,7 @@ void relax_shell_flexcon(FILE* log, gmx::ForceBuffersView* f, tensor force_vir, const t_mdatoms& md, + CpuPpLongRangeNonbondeds* longRangeNonbondeds, t_nrnb* nrnb, gmx_wallcycle* wcycle, gmx_shellfc_t* shfc, diff --git a/src/gromacs/mdrun/tpi.cpp b/src/gromacs/mdrun/tpi.cpp index e84fdff31a..0827b4a03d 100644 --- a/src/gromacs/mdrun/tpi.cpp +++ b/src/gromacs/mdrun/tpi.cpp @@ -751,6 +751,7 @@ void LegacySimulator::do_tpi() /* Note: NonLocal refers to the inserted molecule */ fr->nbv->convertCoordinates(AtomLocality::NonLocal, x); + fr->longRangeNonbondeds->updateAfterPartition(*mdatoms); /* Clear some matrix variables */ clear_mat(force_vir); @@ -794,6 +795,7 @@ void LegacySimulator::do_tpi() mu_tot, t, nullptr, + fr->longRangeNonbondeds.get(), GMX_FORCE_NONBONDED | GMX_FORCE_ENERGY | (bStateChanged ? GMX_FORCE_STATECHANGED : 0), DDBalanceRegionHandler(nullptr)); std::feclearexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); diff --git a/src/gromacs/mdtypes/forcerec.h b/src/gromacs/mdtypes/forcerec.h index d4bcb4c350..dffb712b56 100644 --- a/src/gromacs/mdtypes/forcerec.h +++ b/src/gromacs/mdtypes/forcerec.h @@ -59,6 +59,7 @@ struct bonded_threading_t; class DeviceContext; class DispersionCorrection; class ListedForces; +class CpuPpLongRangeNonbondeds; struct t_fcdata; struct t_forcetable; struct interaction_const_t; @@ -85,8 +86,6 @@ real cutoff_inf(real cutoff); /* Forward declaration of type for managing Ewald tables */ struct gmx_ewald_tab_t; -struct ewald_corr_thread_t; - /*! \brief Helper force buffers for ForceOutputs * * This class stores intermediate force buffers that are used @@ -216,9 +215,6 @@ struct t_forcerec gmx_pme_t* pmedata = nullptr; LongRangeVdW ljpme_combination_rule = LongRangeVdW::Geom; - /* PME/Ewald stuff */ - std::unique_ptr ewald_table; - /* Non bonded Parameter lists */ int ntype = 0; /* Number of atom types */ bool haveBuckingham = false; @@ -260,9 +256,8 @@ struct t_forcerec // The listed forces calculation data for GPU std::unique_ptr listedForcesGpu; - /* Ewald correction thread local virial and energy data */ - int nthread_ewc = 0; - std::vector ewc_t; + // The long range non-bonded forces + std::unique_ptr longRangeNonbondeds; gmx::ForceProviders* forceProviders = nullptr; diff --git a/src/gromacs/modularsimulator/forceelement.cpp b/src/gromacs/modularsimulator/forceelement.cpp index 586e1bc2be..3e4c0f414e 100644 --- a/src/gromacs/modularsimulator/forceelement.cpp +++ b/src/gromacs/modularsimulator/forceelement.cpp @@ -51,7 +51,9 @@ #include "gromacs/mdlib/mdatoms.h" #include "gromacs/mdrun/shellfc.h" #include "gromacs/mdtypes/forcebuffers.h" +#include "gromacs/mdtypes/forcerec.h" #include "gromacs/mdtypes/inputrec.h" +#include "gromacs/mdtypes/interaction_const.h" #include "gromacs/mdtypes/mdatom.h" #include "gromacs/mdtypes/mdrunoptions.h" #include "gromacs/mdtypes/simulation_workload.h" @@ -81,15 +83,14 @@ ForceElement::ForceElement(StatePropagatorData* statePropagatorData, const MDAtoms* mdAtoms, t_nrnb* nrnb, t_forcerec* fr, - - gmx_wallcycle* wcycle, - MdrunScheduleWorkload* runScheduleWork, - VirtualSitesHandler* vsite, - ImdSession* imdSession, - pull_t* pull_work, - Constraints* constr, - const gmx_mtop_t& globalTopology, - gmx_enfrot* enforcedRotation) : + gmx_wallcycle* wcycle, + MdrunScheduleWorkload* runScheduleWork, + VirtualSitesHandler* vsite, + ImdSession* imdSession, + pull_t* pull_work, + Constraints* constr, + const gmx_mtop_t& globalTopology, + gmx_enfrot* enforcedRotation) : shellfc_(init_shell_flexcon(fplog, globalTopology, constr ? constr->numFlexibleConstraints() : 0, @@ -109,6 +110,16 @@ ForceElement::ForceElement(StatePropagatorData* statePropagatorData, isVerbose_(isVerbose), nShellRelaxationSteps_(0), ddBalanceRegionHandler_(cr), + longRangeNonbondeds_(std::make_unique(fr->n_tpi, + fr->ic->ewaldcoeff_q, + fr->ic->epsilon_r, + fr->qsum, + fr->ic->eeltype, + fr->ic->vdwtype, + *inputrec, + nrnb, + wcycle, + fplog)), lambda_(), fplog_(fplog), cr_(cr), @@ -190,6 +201,8 @@ void ForceElement::run(Step step, Time time, unsigned int flags) ArrayRef lambda = freeEnergyPerturbationData_ ? freeEnergyPerturbationData_->lambdaView() : lambda_; + longRangeNonbondeds_->updateAfterPartition(*mdAtoms_->mdatoms()); + if (doShellFC) { auto v = statePropagatorData_->velocitiesView(); @@ -217,6 +230,7 @@ void ForceElement::run(Step step, Time time, unsigned int flags) &forces, force_vir, *mdAtoms_->mdatoms(), + longRangeNonbondeds_.get(), nrnb_, wcycle_, shellfc_, @@ -260,6 +274,7 @@ void ForceElement::run(Step step, Time time, unsigned int flags) energyData_->muTot(), time, ed, + longRangeNonbondeds_.get(), static_cast(flags), ddBalanceRegionHandler_); } @@ -301,6 +316,11 @@ std::optional ForceElement::registerEnergyCallback(EnergySign return std::nullopt; } +DomDecCallback ForceElement::registerDomDecCallback() +{ + return [this]() { longRangeNonbondeds_->updateAfterPartition(*mdAtoms_->mdatoms()); }; +} + ISimulatorElement* ForceElement::getElementPointerImpl(LegacySimulatorData* legacySimulatorData, ModularSimulatorAlgorithmBuilderHelper* builderHelper, diff --git a/src/gromacs/modularsimulator/forceelement.h b/src/gromacs/modularsimulator/forceelement.h index c778d37be9..b9fd205759 100644 --- a/src/gromacs/modularsimulator/forceelement.h +++ b/src/gromacs/modularsimulator/forceelement.h @@ -60,6 +60,7 @@ struct gmx_enfrot; struct gmx_shellfc_t; struct gmx_wallcycle; +class CpuPpLongRangeNonbondeds; struct pull_t; struct t_nrnb; @@ -89,7 +90,8 @@ class ForceElement final : public ISimulatorElement, public ITopologyHolderClient, public INeighborSearchSignallerClient, - public IEnergySignallerClient + public IEnergySignallerClient, + public IDomDecHelperClient { public: //! Constructor @@ -146,6 +148,9 @@ public: GlobalCommunicationHelper* globalCommunicationHelper, ObservablesReducer* observablesReducer); + //! Callback on domain decomposition repartitioning + DomDecCallback registerDomDecCallback() override; + private: //! ITopologyHolderClient implementation void setTopology(const gmx_localtop_t* top) override; @@ -191,6 +196,8 @@ private: //! DD / DLB helper object const DDBalanceRegionHandler ddBalanceRegionHandler_; + //! Long range force calculator + std::unique_ptr longRangeNonbondeds_; /* \brief The FEP lambda vector * diff --git a/src/gromacs/modularsimulator/simulatoralgorithm.cpp b/src/gromacs/modularsimulator/simulatoralgorithm.cpp index 4b035d12ae..8ef941fd18 100644 --- a/src/gromacs/modularsimulator/simulatoralgorithm.cpp +++ b/src/gromacs/modularsimulator/simulatoralgorithm.cpp @@ -585,7 +585,7 @@ ModularSimulatorAlgorithm ModularSimulatorAlgorithmBuilder::build() simulationsShareState); registerWithInfrastructureAndSignallers(trajectoryElement.get()); - // Build domdec helper + // Build domdec helper (free energy element is a client, so keep this after it is built) if (DOMAINDECOMP(legacySimulatorData_->cr)) { algorithm.domDecHelper_ = -- 2.22.0