2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines high-level functions for mdrun to compute
39 * energies and forces for listed interactions.
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \ingroup module_listed_forces
47 #include "listed_forces.h"
55 #include "gromacs/gmxlib/network.h"
56 #include "gromacs/gmxlib/nrnb.h"
57 #include "gromacs/listed_forces/bonded.h"
58 #include "gromacs/listed_forces/disre.h"
59 #include "gromacs/listed_forces/orires.h"
60 #include "gromacs/listed_forces/pairs.h"
61 #include "gromacs/listed_forces/position_restraints.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdlib/enerdata_utils.h"
64 #include "gromacs/mdlib/force.h"
65 #include "gromacs/mdlib/gmx_omp_nthreads.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/mdtypes/fcdata.h"
68 #include "gromacs/mdtypes/forceoutput.h"
69 #include "gromacs/mdtypes/forcerec.h"
70 #include "gromacs/mdtypes/inputrec.h"
71 #include "gromacs/mdtypes/md_enums.h"
72 #include "gromacs/mdtypes/simulation_workload.h"
73 #include "gromacs/pbcutil/ishift.h"
74 #include "gromacs/pbcutil/pbc.h"
75 #include "gromacs/timing/wallcycle.h"
76 #include "gromacs/topology/topology.h"
77 #include "gromacs/utility/exceptions.h"
78 #include "gromacs/utility/fatalerror.h"
80 #include "listed_internal.h"
81 #include "manage_threading.h"
82 #include "utilities.h"
84 ListedForces::ListedForces(const gmx_ffparams_t& ffparams,
85 const int numEnergyGroups,
87 const InteractionSelection interactionSelection,
89 idefSelection_(ffparams),
90 threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
91 interactionSelection_(interactionSelection)
95 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
97 ListedForces::~ListedForces() = default;
99 //! Copies the selection interactions from \p idefSrc to \p idef
100 static void selectInteractions(InteractionDefinitions* idef,
101 const InteractionDefinitions& idefSrc,
102 const ListedForces::InteractionSelection interactionSelection)
104 const bool selectPairs =
105 interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
106 const bool selectDihedrals =
107 interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
108 const bool selectAngles =
109 interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
110 const bool selectRest =
111 interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
113 for (int ftype = 0; ftype < F_NRE; ftype++)
115 const t_interaction_function& ifunc = interaction_function[ftype];
116 if (ifunc.flags & IF_BOND)
119 if (ifunc.flags & IF_PAIR)
121 assign = selectPairs;
123 else if (ifunc.flags & IF_DIHEDRAL)
125 assign = selectDihedrals;
127 else if (ifunc.flags & IF_ATYPE)
129 assign = selectAngles;
137 idef->il[ftype] = idefSrc.il[ftype];
141 idef->il[ftype].clear();
147 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
149 if (interactionSelection_.all())
151 // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
156 idef_ = &idefSelection_;
158 selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
160 idefSelection_.ilsort = domainIdef.ilsort;
162 if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
164 idefSelection_.iparams_posres = domainIdef.iparams_posres;
165 idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
169 idefSelection_.iparams_posres.clear();
170 idefSelection_.iparams_fbposres.clear();
174 setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
176 if (idef_->ilsort == ilsortFE_SORTED)
178 forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
179 shiftForceBufferLambda_.resize(SHIFTS);
188 /*! \brief Return true if ftype is an explicit pair-listed LJ or
189 * COULOMB interaction type: bonded LJ (usually 1-4), or special
190 * listed non-bonded for FEP. */
191 bool isPairInteraction(int ftype)
193 return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
196 /*! \brief Zero thread-local output buffers */
197 void zero_thread_output(f_thread_t* f_t)
199 constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
201 for (int i = 0; i < f_t->nblock_used; i++)
203 int a0 = f_t->block_index[i] * reduction_block_size;
204 int a1 = a0 + reduction_block_size;
205 for (int a = a0; a < a1; a++)
207 for (int d = 0; d < nelem_fa; d++)
214 for (int i = 0; i < SHIFTS; i++)
216 clear_rvec(f_t->fshift[i]);
218 for (int i = 0; i < F_NRE; i++)
222 for (int i = 0; i < egNR; i++)
224 for (int j = 0; j < f_t->grpp.nener; j++)
226 f_t->grpp.ener[i][j] = 0;
229 for (int i = 0; i < efptNR; i++)
235 /*! \brief The max thread number is arbitrary, we used a fixed number
236 * to avoid memory management. Using more than 16 threads is probably
237 * never useful performance wise. */
238 #define MAX_BONDED_THREADS 256
240 /*! \brief Reduce thread-local force buffers */
241 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
243 if (nthreads > MAX_BONDED_THREADS)
245 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
248 rvec* gmx_restrict f = as_rvec_array(force.data());
250 const int numAtomsForce = bt->numAtomsForce;
252 /* This reduction can run on any number of threads,
253 * independently of bt->nthreads.
254 * But if nthreads matches bt->nthreads (which it currently does)
255 * the uniform distribution of the touched blocks over nthreads will
256 * match the distribution of bonded over threads well in most cases,
257 * which means that threads mostly reduce their own data which increases
258 * the number of cache hits.
260 #pragma omp parallel for num_threads(nthreads) schedule(static)
261 for (int b = 0; b < bt->nblock_used; b++)
265 int ind = bt->block_index[b];
266 rvec4* fp[MAX_BONDED_THREADS];
268 /* Determine which threads contribute to this block */
270 for (int ft = 0; ft < bt->nthreads; ft++)
272 if (bitmask_is_set(bt->mask[ind], ft))
274 fp[nfb++] = bt->f_t[ft]->f;
279 /* Reduce force buffers for threads that contribute */
280 int a0 = ind * reduction_block_size;
281 int a1 = (ind + 1) * reduction_block_size;
282 /* It would be nice if we could pad f to avoid this min */
283 a1 = std::min(a1, numAtomsForce);
284 for (int a = a0; a < a1; a++)
286 for (int fb = 0; fb < nfb; fb++)
288 rvec_inc(f[a], fp[fb][a]);
293 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
297 /*! \brief Reduce thread-local forces, shift forces and energies */
298 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
300 gmx_grppairener_t* grpp,
302 const bonded_threading_t* bt,
303 const gmx::StepWorkload& stepWork)
305 assert(bt->haveBondeds);
307 if (bt->nblock_used > 0)
309 /* Reduce the bonded force buffer */
310 reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
313 rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
315 /* When necessary, reduce energy and virial using one thread only */
316 if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
318 gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
320 if (stepWork.computeVirial)
322 for (int i = 0; i < SHIFTS; i++)
324 for (int t = 1; t < bt->nthreads; t++)
326 rvec_inc(fshift[i], f_t[t]->fshift[i]);
330 if (stepWork.computeEnergy)
332 for (int i = 0; i < F_NRE; i++)
334 for (int t = 1; t < bt->nthreads; t++)
336 ener[i] += f_t[t]->ener[i];
339 for (int i = 0; i < egNR; i++)
341 for (int j = 0; j < f_t[1]->grpp.nener; j++)
343 for (int t = 1; t < bt->nthreads; t++)
345 grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
350 if (stepWork.computeDhdl)
352 for (int i = 0; i < efptNR; i++)
355 for (int t = 1; t < bt->nthreads; t++)
357 dvdl[i] += f_t[t]->dvdl[i];
364 /*! \brief Returns the bonded kernel flavor
366 * Note that energies are always requested when the virial
367 * is requested (performance gain would be small).
368 * Note that currently we do not have bonded kernels that
369 * do not compute forces.
371 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
372 const bool useSimdKernels,
373 const bool havePerturbedInteractions)
375 BondedKernelFlavor flavor;
376 if (stepWork.computeEnergy || stepWork.computeVirial)
378 if (stepWork.computeVirial)
380 flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
384 flavor = BondedKernelFlavor::ForcesAndEnergy;
389 if (useSimdKernels && !havePerturbedInteractions)
391 flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
395 flavor = BondedKernelFlavor::ForcesNoSimd;
402 /*! \brief Calculate one element of the list of bonded interactions
404 real calc_one_bond(int thread,
406 const InteractionDefinitions& idef,
407 ArrayRef<const int> iatoms,
408 const int numNonperturbedInteractions,
409 const WorkDivision& workDivision,
413 const t_forcerec* fr,
415 gmx_grppairener_t* grpp,
421 const gmx::StepWorkload& stepWork,
422 int* global_atom_index)
424 GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
425 "The topology should be marked either as no FE or sorted on FE");
427 const bool havePerturbedInteractions =
428 (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
429 BondedKernelFlavor flavor =
430 selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
432 if (IS_RESTRAINT_TYPE(ftype))
434 efptFTYPE = efptRESTRAINT;
438 efptFTYPE = efptBONDED;
441 const int nat1 = interaction_function[ftype].nratoms + 1;
442 const int nbonds = iatoms.ssize() / nat1;
444 GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
445 "The thread division should match the topology");
447 const int nb0 = workDivision.bound(ftype, thread);
448 const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
450 ArrayRef<const t_iparams> iparams = idef.iparams;
453 if (!isPairInteraction(ftype))
457 /* TODO The execution time for CMAP dihedrals might be
458 nice to account to its own subtimer, but first
459 wallcycle needs to be extended to support calling from
477 v = calculateSimpleBond(ftype,
495 /* TODO The execution time for pairs might be nice to account
496 to its own subtimer, but first wallcycle needs to be
497 extended to support calling from multiple threads. */
510 havePerturbedInteractions,
518 inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
526 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
528 static void calcBondedForces(const InteractionDefinitions& idef,
529 bonded_threading_t* bt,
531 const t_forcerec* fr,
532 const t_pbc* pbc_null,
533 rvec* fshiftMasterBuffer,
534 gmx_enerdata_t* enerd,
540 const gmx::StepWorkload& stepWork,
541 int* global_atom_index)
543 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
544 for (int thread = 0; thread < bt->nthreads; thread++)
548 f_thread_t& threadBuffers = *bt->f_t[thread];
554 gmx_grppairener_t* grpp;
556 zero_thread_output(&threadBuffers);
558 rvec4* ft = threadBuffers.f;
560 /* Thread 0 writes directly to the main output buffers.
561 * We might want to reconsider this.
565 fshift = fshiftMasterBuffer;
572 fshift = as_rvec_array(threadBuffers.fshift.data());
573 epot = threadBuffers.ener;
574 grpp = &threadBuffers.grpp;
575 dvdlt = threadBuffers.dvdl;
577 /* Loop over all bonded force types to calculate the bonded forces */
578 for (ftype = 0; (ftype < F_NRE); ftype++)
580 const InteractionList& ilist = idef.il[ftype];
581 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
583 ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
584 v = calc_one_bond(thread,
588 idef.numNonperturbedInteractions[ftype],
607 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
611 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
613 GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
615 return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
616 || fcdata.disres->nres > 0);
619 bool ListedForces::haveCpuBondeds() const
621 return threading_->haveBondeds;
624 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
626 return haveCpuBondeds() || haveRestraints(fcdata);
632 /*! \brief Calculates all listed force interactions. */
633 void calc_listed(struct gmx_wallcycle* wcycle,
634 const InteractionDefinitions& idef,
635 bonded_threading_t* bt,
637 gmx::ForceOutputs* forceOutputs,
638 const t_forcerec* fr,
640 gmx_enerdata_t* enerd,
645 int* global_atom_index,
646 const gmx::StepWorkload& stepWork)
650 gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
652 wallcycle_sub_start(wcycle, ewcsLISTED);
653 /* The dummy array is to have a place to store the dhdl at other values
654 of lambda, which will be thrown away in the end */
655 real dvdl[efptNR] = { 0 };
656 calcBondedForces(idef,
660 fr->bMolPBC ? pbc : nullptr,
661 as_rvec_array(forceWithShiftForces.shiftForces().data()),
670 wallcycle_sub_stop(wcycle, ewcsLISTED);
672 wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
673 reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
675 if (stepWork.computeDhdl)
677 for (int i = 0; i < efptNR; i++)
679 enerd->dvdl_nonlin[i] += dvdl[i];
682 wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
685 /* Copy the sum of violations for the distance restraints from fcd */
688 enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
692 /*! \brief As calc_listed(), but only determines the potential energy
693 * for the perturbed interactions.
695 * The shift forces in fr are not affected.
697 void calc_listed_lambda(const InteractionDefinitions& idef,
698 bonded_threading_t* bt,
700 const t_forcerec* fr,
701 const struct t_pbc* pbc,
702 gmx::ArrayRef<real> forceBufferLambda,
703 gmx::ArrayRef<gmx::RVec> shiftForceBufferLambda,
704 gmx_grppairener_t* grpp,
706 gmx::ArrayRef<real> dvdl,
711 int* global_atom_index)
713 WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
715 const t_pbc* pbc_null;
725 /* We already have the forces, so we use temp buffers here */
726 std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
727 std::fill(shiftForceBufferLambda.begin(),
728 shiftForceBufferLambda.end(),
729 gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
730 rvec4* f = reinterpret_cast<rvec4*>(forceBufferLambda.data());
731 rvec* fshift = as_rvec_array(shiftForceBufferLambda.data());
733 /* Loop over all bonded force types to calculate the bonded energies */
734 for (int ftype = 0; (ftype < F_NRE); ftype++)
736 if (ftype_is_bonded_potential(ftype))
738 const InteractionList& ilist = idef.il[ftype];
739 /* Create a temporary iatom list with only perturbed interactions */
740 const int numNonperturbed = idef.numNonperturbedInteractions[ftype];
741 ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
742 ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
743 if (!iatomsPerturbed.empty())
745 /* Set the work range of thread 0 to the perturbed bondeds */
746 workDivision.setBound(ftype, 0, 0);
747 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
749 gmx::StepWorkload tempFlags;
750 tempFlags.computeEnergy = true;
751 real v = calc_one_bond(0,
755 iatomsPerturbed.ssize(),
778 void ListedForces::calculate(struct gmx_wallcycle* wcycle,
780 const t_lambda* fepvals,
782 const gmx_multisim_t* ms,
783 gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
784 gmx::ArrayRef<const gmx::RVec> xWholeMolecules,
786 const history_t* hist,
787 gmx::ForceOutputs* forceOutputs,
788 const t_forcerec* fr,
789 const struct t_pbc* pbc,
790 gmx_enerdata_t* enerd,
794 int* global_atom_index,
795 const gmx::StepWorkload& stepWork)
797 if (interactionSelection_.none() || !stepWork.computeListedForces)
802 const InteractionDefinitions& idef = *idef_;
804 // Todo: replace all rvec use here with ArrayRefWithPadding
805 const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
807 const bool calculateRestInteractions =
808 interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
810 t_pbc pbc_full; /* Full PBC is needed for position restraints */
811 if (calculateRestInteractions && haveRestraints(*fcdata))
813 if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
815 /* Not enough flops to bother counting */
816 set_pbc(&pbc_full, fr->pbcType, box);
819 /* TODO Use of restraints triggers further function calls
820 inside the loop over calc_one_bond(), but those are too
821 awkward to account to this subtimer properly in the present
822 code. We don't test / care much about performance with
823 restraints, anyway. */
824 wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
826 if (!idef.il[F_POSRES].empty())
828 posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
831 if (!idef.il[F_FBPOSRES].empty())
833 fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
836 /* Do pre force calculation stuff which might require communication */
837 if (fcdata->orires->nr > 0)
839 GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
840 enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
841 idef.il[F_ORIRES].size(),
842 idef.il[F_ORIRES].iatoms.data(),
847 fr->bMolPBC ? pbc : nullptr,
851 if (fcdata->disres->nres > 0)
855 idef.il[F_DISRES].size(),
856 idef.il[F_DISRES].iatoms.data(),
858 fr->bMolPBC ? pbc : nullptr,
863 wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
866 calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
868 /* Check if we have to determine energy differences
869 * at foreign lambda's.
871 if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
873 real dvdl[efptNR] = { 0 };
874 if (!idef.il[F_POSRES].empty())
876 posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
878 if (idef.ilsort != ilsortNO_FE)
880 wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
881 if (idef.ilsort != ilsortFE_SORTED)
883 gmx_incons("The bonded interactions are not sorted for free energy");
885 for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
889 reset_foreign_enerdata(enerd);
890 for (int j = 0; j < efptNR; j++)
892 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
894 calc_listed_lambda(idef,
900 shiftForceBufferLambda_,
901 &(enerd->foreign_grpp),
909 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
910 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
911 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
912 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
914 wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);