2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * \brief This file defines high-level functions for mdrun to compute
39 * energies and forces for listed interactions.
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \ingroup module_listed_forces
47 #include "listed_forces.h"
54 #include "gromacs/gmxlib/network.h"
55 #include "gromacs/gmxlib/nrnb.h"
56 #include "gromacs/listed_forces/bonded.h"
57 #include "gromacs/listed_forces/disre.h"
58 #include "gromacs/listed_forces/orires.h"
59 #include "gromacs/listed_forces/pairs.h"
60 #include "gromacs/listed_forces/position_restraints.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/mdlib/enerdata_utils.h"
63 #include "gromacs/mdlib/force.h"
64 #include "gromacs/mdtypes/commrec.h"
65 #include "gromacs/mdtypes/fcdata.h"
66 #include "gromacs/mdtypes/forceoutput.h"
67 #include "gromacs/mdtypes/forcerec.h"
68 #include "gromacs/mdtypes/inputrec.h"
69 #include "gromacs/mdtypes/md_enums.h"
70 #include "gromacs/mdtypes/simulation_workload.h"
71 #include "gromacs/pbcutil/ishift.h"
72 #include "gromacs/pbcutil/pbc.h"
73 #include "gromacs/timing/wallcycle.h"
74 #include "gromacs/topology/topology.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
79 #include "listed_internal.h"
80 #include "utilities.h"
87 /*! \brief Return true if ftype is an explicit pair-listed LJ or
88 * COULOMB interaction type: bonded LJ (usually 1-4), or special
89 * listed non-bonded for FEP. */
90 bool isPairInteraction(int ftype)
92 return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
95 /*! \brief Zero thread-local output buffers */
96 void zero_thread_output(f_thread_t* f_t)
98 constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
100 for (int i = 0; i < f_t->nblock_used; i++)
102 int a0 = f_t->block_index[i] * reduction_block_size;
103 int a1 = a0 + reduction_block_size;
104 for (int a = a0; a < a1; a++)
106 for (int d = 0; d < nelem_fa; d++)
113 for (int i = 0; i < SHIFTS; i++)
115 clear_rvec(f_t->fshift[i]);
117 for (int i = 0; i < F_NRE; i++)
121 for (int i = 0; i < egNR; i++)
123 for (int j = 0; j < f_t->grpp.nener; j++)
125 f_t->grpp.ener[i][j] = 0;
128 for (int i = 0; i < efptNR; i++)
134 /*! \brief The max thread number is arbitrary, we used a fixed number
135 * to avoid memory management. Using more than 16 threads is probably
136 * never useful performance wise. */
137 #define MAX_BONDED_THREADS 256
139 /*! \brief Reduce thread-local force buffers */
140 void reduce_thread_forces(int n, gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
142 if (nthreads > MAX_BONDED_THREADS)
144 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
147 rvec* gmx_restrict f = as_rvec_array(force.data());
149 /* This reduction can run on any number of threads,
150 * independently of bt->nthreads.
151 * But if nthreads matches bt->nthreads (which it currently does)
152 * the uniform distribution of the touched blocks over nthreads will
153 * match the distribution of bonded over threads well in most cases,
154 * which means that threads mostly reduce their own data which increases
155 * the number of cache hits.
157 #pragma omp parallel for num_threads(nthreads) schedule(static)
158 for (int b = 0; b < bt->nblock_used; b++)
162 int ind = bt->block_index[b];
163 rvec4* fp[MAX_BONDED_THREADS];
165 /* Determine which threads contribute to this block */
167 for (int ft = 0; ft < bt->nthreads; ft++)
169 if (bitmask_is_set(bt->mask[ind], ft))
171 fp[nfb++] = bt->f_t[ft]->f;
176 /* Reduce force buffers for threads that contribute */
177 int a0 = ind * reduction_block_size;
178 int a1 = (ind + 1) * reduction_block_size;
179 /* It would be nice if we could pad f to avoid this min */
180 a1 = std::min(a1, n);
181 for (int a = a0; a < a1; a++)
183 for (int fb = 0; fb < nfb; fb++)
185 rvec_inc(f[a], fp[fb][a]);
190 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
194 /*! \brief Reduce thread-local forces, shift forces and energies */
195 void reduce_thread_output(int n,
196 gmx::ForceWithShiftForces* forceWithShiftForces,
198 gmx_grppairener_t* grpp,
200 const bonded_threading_t* bt,
201 const gmx::StepWorkload& stepWork)
203 assert(bt->haveBondeds);
205 if (bt->nblock_used > 0)
207 /* Reduce the bonded force buffer */
208 reduce_thread_forces(n, forceWithShiftForces->force(), bt, bt->nthreads);
211 rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
213 /* When necessary, reduce energy and virial using one thread only */
214 if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
216 gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
218 if (stepWork.computeVirial)
220 for (int i = 0; i < SHIFTS; i++)
222 for (int t = 1; t < bt->nthreads; t++)
224 rvec_inc(fshift[i], f_t[t]->fshift[i]);
228 if (stepWork.computeEnergy)
230 for (int i = 0; i < F_NRE; i++)
232 for (int t = 1; t < bt->nthreads; t++)
234 ener[i] += f_t[t]->ener[i];
237 for (int i = 0; i < egNR; i++)
239 for (int j = 0; j < f_t[1]->grpp.nener; j++)
241 for (int t = 1; t < bt->nthreads; t++)
243 grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
248 if (stepWork.computeDhdl)
250 for (int i = 0; i < efptNR; i++)
253 for (int t = 1; t < bt->nthreads; t++)
255 dvdl[i] += f_t[t]->dvdl[i];
262 /*! \brief Returns the bonded kernel flavor
264 * Note that energies are always requested when the virial
265 * is requested (performance gain would be small).
266 * Note that currently we do not have bonded kernels that
267 * do not compute forces.
269 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
270 const bool useSimdKernels,
271 const bool havePerturbedInteractions)
273 BondedKernelFlavor flavor;
274 if (stepWork.computeEnergy || stepWork.computeVirial)
276 if (stepWork.computeVirial)
278 flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
282 flavor = BondedKernelFlavor::ForcesAndEnergy;
287 if (useSimdKernels && !havePerturbedInteractions)
289 flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
293 flavor = BondedKernelFlavor::ForcesNoSimd;
300 /*! \brief Calculate one element of the list of bonded interactions
302 real calc_one_bond(int thread,
305 ArrayRef<const int> iatoms,
306 const int numNonperturbedInteractions,
307 const WorkDivision& workDivision,
311 const t_forcerec* fr,
314 gmx_grppairener_t* grpp,
320 const gmx::StepWorkload& stepWork,
321 int* global_atom_index)
323 GMX_ASSERT(idef->ilsort == ilsortNO_FE || idef->ilsort == ilsortFE_SORTED,
324 "The topology should be marked either as no FE or sorted on FE");
326 const bool havePerturbedInteractions =
327 (idef->ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
328 BondedKernelFlavor flavor =
329 selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
331 if (IS_RESTRAINT_TYPE(ftype))
333 efptFTYPE = efptRESTRAINT;
337 efptFTYPE = efptBONDED;
340 const int nat1 = interaction_function[ftype].nratoms + 1;
341 const int nbonds = iatoms.ssize() / nat1;
343 GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
344 "The thread division should match the topology");
346 const int nb0 = workDivision.bound(ftype, thread);
347 const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
350 if (!isPairInteraction(ftype))
354 /* TODO The execution time for CMAP dihedrals might be
355 nice to account to its own subtimer, but first
356 wallcycle needs to be extended to support calling from
358 v = cmap_dihs(nbn, iatoms.data() + nb0, idef->iparams, idef->cmap_grid, x, f, fshift,
359 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index);
363 v = calculateSimpleBond(ftype, nbn, iatoms.data() + nb0, idef->iparams, x, f, fshift,
364 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
365 global_atom_index, flavor);
370 /* TODO The execution time for pairs might be nice to account
371 to its own subtimer, but first wallcycle needs to be
372 extended to support calling from multiple threads. */
373 do_pairs(ftype, nbn, iatoms.data() + nb0, idef->iparams, x, f, fshift, pbc, g, lambda, dvdl,
374 md, fr, havePerturbedInteractions, stepWork, grpp, global_atom_index);
379 inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
387 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
389 static void calcBondedForces(const t_idef* idef,
391 const t_forcerec* fr,
392 const t_pbc* pbc_null,
394 rvec* fshiftMasterBuffer,
395 gmx_enerdata_t* enerd,
401 const gmx::StepWorkload& stepWork,
402 int* global_atom_index)
404 bonded_threading_t* bt = fr->bondedThreading;
406 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
407 for (int thread = 0; thread < bt->nthreads; thread++)
411 f_thread_t& threadBuffers = *bt->f_t[thread];
417 gmx_grppairener_t* grpp;
419 zero_thread_output(&threadBuffers);
421 rvec4* ft = threadBuffers.f;
423 /* Thread 0 writes directly to the main output buffers.
424 * We might want to reconsider this.
428 fshift = fshiftMasterBuffer;
435 fshift = threadBuffers.fshift;
436 epot = threadBuffers.ener;
437 grpp = &threadBuffers.grpp;
438 dvdlt = threadBuffers.dvdl;
440 /* Loop over all bonded force types to calculate the bonded forces */
441 for (ftype = 0; (ftype < F_NRE); ftype++)
443 const t_ilist& ilist = idef->il[ftype];
444 if (ilist.nr > 0 && ftype_is_bonded_potential(ftype))
446 ArrayRef<const int> iatoms = gmx::constArrayRefFromArray(ilist.iatoms, ilist.nr);
448 thread, ftype, idef, iatoms, idef->numNonperturbedInteractions[ftype],
449 fr->bondedThreading->workDivision, x, ft, fshift, fr, pbc_null, g, grpp,
450 nrnb, lambda, dvdlt, md, fcd, stepWork, global_atom_index);
455 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
459 bool haveRestraints(const t_idef& idef, const t_fcdata& fcd)
461 return ((idef.il[F_POSRES].nr > 0) || (idef.il[F_FBPOSRES].nr > 0) || fcd.orires.nr > 0
462 || fcd.disres.nres > 0);
465 bool haveCpuBondeds(const t_forcerec& fr)
467 return fr.bondedThreading->haveBondeds;
470 bool haveCpuListedForces(const t_forcerec& fr, const t_idef& idef, const t_fcdata& fcd)
472 return haveCpuBondeds(fr) || haveRestraints(idef, fcd);
475 void calc_listed(const t_commrec* cr,
476 const gmx_multisim_t* ms,
477 struct gmx_wallcycle* wcycle,
481 gmx::ForceOutputs* forceOutputs,
482 const t_forcerec* fr,
483 const struct t_pbc* pbc,
484 const struct t_pbc* pbc_full,
485 const struct t_graph* g,
486 gmx_enerdata_t* enerd,
491 int* global_atom_index,
492 const gmx::StepWorkload& stepWork)
494 const t_pbc* pbc_null;
495 bonded_threading_t* bt = fr->bondedThreading;
506 if (haveRestraints(*idef, *fcd))
508 /* TODO Use of restraints triggers further function calls
509 inside the loop over calc_one_bond(), but those are too
510 awkward to account to this subtimer properly in the present
511 code. We don't test / care much about performance with
512 restraints, anyway. */
513 wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
515 if (idef->il[F_POSRES].nr > 0)
517 posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
520 if (idef->il[F_FBPOSRES].nr > 0)
522 fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
525 /* Do pre force calculation stuff which might require communication */
526 if (fcd->orires.nr > 0)
528 /* This assertion is to ensure we have whole molecules.
529 * Unfortunately we do not have an mdrun state variable that tells
530 * us if molecules in x are not broken over PBC, so we have to make
531 * do with checking graph!=nullptr, which should tell us if we made
532 * molecules whole before calling the current function.
534 GMX_RELEASE_ASSERT(fr->pbcType == PbcType::No || g != nullptr,
535 "With orientation restraints molecules should be whole");
536 enerd->term[F_ORIRESDEV] = calc_orires_dev(ms, idef->il[F_ORIRES].nr, idef->il[F_ORIRES].iatoms,
537 idef->iparams, md, x, pbc_null, fcd, hist);
539 if (fcd->disres.nres > 0)
541 calc_disres_R_6(cr, ms, idef->il[F_DISRES].nr, idef->il[F_DISRES].iatoms, x, pbc_null,
545 wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
548 if (haveCpuBondeds(*fr))
550 gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
552 wallcycle_sub_start(wcycle, ewcsLISTED);
553 /* The dummy array is to have a place to store the dhdl at other values
554 of lambda, which will be thrown away in the end */
555 real dvdl[efptNR] = { 0 };
556 calcBondedForces(idef, x, fr, pbc_null, g,
557 as_rvec_array(forceWithShiftForces.shiftForces().data()), enerd, nrnb,
558 lambda, dvdl, md, fcd, stepWork, global_atom_index);
559 wallcycle_sub_stop(wcycle, ewcsLISTED);
561 wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
562 reduce_thread_output(fr->natoms_force, &forceWithShiftForces, enerd->term, &enerd->grpp,
565 if (stepWork.computeDhdl)
567 for (int i = 0; i < efptNR; i++)
569 enerd->dvdl_nonlin[i] += dvdl[i];
572 wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
575 /* Copy the sum of violations for the distance restraints from fcd */
578 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
582 void calc_listed_lambda(const t_idef* idef,
584 const t_forcerec* fr,
585 const struct t_pbc* pbc,
586 const struct t_graph* g,
587 gmx_grppairener_t* grpp,
589 gmx::ArrayRef<real> dvdl,
594 int* global_atom_index)
599 const t_pbc* pbc_null;
601 WorkDivision& workDivision = fr->bondedThreading->foreignLambdaWorkDivision;
612 /* Copy the whole idef, so we can modify the contents locally */
615 /* We already have the forces, so we use temp buffers here */
616 // TODO: Get rid of these allocations by using permanent force buffers
617 snew(f, fr->natoms_force);
618 snew(fshift, SHIFTS);
620 /* Loop over all bonded force types to calculate the bonded energies */
621 for (int ftype = 0; (ftype < F_NRE); ftype++)
623 if (ftype_is_bonded_potential(ftype))
625 const t_ilist& ilist = idef->il[ftype];
626 /* Create a temporary iatom list with only perturbed interactions */
627 const int numNonperturbed = idef->numNonperturbedInteractions[ftype];
628 ArrayRef<const int> iatoms = gmx::constArrayRefFromArray(ilist.iatoms + numNonperturbed,
629 ilist.nr - numNonperturbed);
630 t_ilist& ilist_fe = idef_fe.il[ftype];
631 /* Set the work range of thread 0 to the perturbed bondeds */
632 workDivision.setBound(ftype, 0, 0);
633 workDivision.setBound(ftype, 1, iatoms.ssize());
637 gmx::StepWorkload tempFlags;
638 tempFlags.computeEnergy = true;
639 v = calc_one_bond(0, ftype, idef, iatoms, iatoms.ssize(), workDivision, x, f,
640 fshift, fr, pbc_null, g, grpp, nrnb, lambda, dvdl.data(), md, fcd,
641 tempFlags, global_atom_index);
651 void do_force_listed(struct gmx_wallcycle* wcycle,
653 const t_lambda* fepvals,
655 const gmx_multisim_t* ms,
659 gmx::ForceOutputs* forceOutputs,
660 const t_forcerec* fr,
661 const struct t_pbc* pbc,
662 const struct t_graph* graph,
663 gmx_enerdata_t* enerd,
668 int* global_atom_index,
669 const gmx::StepWorkload& stepWork)
671 t_pbc pbc_full; /* Full PBC is needed for position restraints */
673 if (!stepWork.computeListedForces)
678 if ((idef->il[F_POSRES].nr > 0) || (idef->il[F_FBPOSRES].nr > 0))
680 /* Not enough flops to bother counting */
681 set_pbc(&pbc_full, fr->pbcType, box);
683 calc_listed(cr, ms, wcycle, idef, x, hist, forceOutputs, fr, pbc, &pbc_full, graph, enerd, nrnb,
684 lambda, md, fcd, global_atom_index, stepWork);
686 /* Check if we have to determine energy differences
687 * at foreign lambda's.
689 if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
691 real dvdl[efptNR] = { 0 };
692 posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
694 if (idef->ilsort != ilsortNO_FE)
696 wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
697 if (idef->ilsort != ilsortFE_SORTED)
699 gmx_incons("The bonded interactions are not sorted for free energy");
701 for (size_t i = 0; i < enerd->enerpart_lambda.size(); i++)
705 reset_foreign_enerdata(enerd);
706 for (int j = 0; j < efptNR; j++)
708 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
710 calc_listed_lambda(idef, x, fr, pbc, graph, &(enerd->foreign_grpp),
711 enerd->foreign_term, dvdl, nrnb, lam_i, md, fcd, global_atom_index);
712 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
713 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
714 for (int j = 0; j < efptNR; j++)
716 enerd->dhdlLambda[i] += dvdl[j];
720 wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);