2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief This file defines high-level functions for mdrun to compute
38 * energies and forces for listed interactions.
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
42 * \ingroup module_listed_forces
46 #include "listed_forces.h"
53 #include "gromacs/gmxlib/network.h"
54 #include "gromacs/gmxlib/nrnb.h"
55 #include "gromacs/listed_forces/bonded.h"
56 #include "gromacs/listed_forces/disre.h"
57 #include "gromacs/listed_forces/orires.h"
58 #include "gromacs/listed_forces/pairs.h"
59 #include "gromacs/listed_forces/position_restraints.h"
60 #include "gromacs/math/vec.h"
61 #include "gromacs/mdlib/enerdata_utils.h"
62 #include "gromacs/mdlib/force.h"
63 #include "gromacs/mdtypes/commrec.h"
64 #include "gromacs/mdtypes/fcdata.h"
65 #include "gromacs/mdtypes/forcerec.h"
66 #include "gromacs/mdtypes/inputrec.h"
67 #include "gromacs/mdtypes/md_enums.h"
68 #include "gromacs/mdtypes/simulation_workload.h"
69 #include "gromacs/pbcutil/ishift.h"
70 #include "gromacs/pbcutil/pbc.h"
71 #include "gromacs/timing/wallcycle.h"
72 #include "gromacs/topology/topology.h"
73 #include "gromacs/utility/exceptions.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/smalloc.h"
77 #include "listed_internal.h"
78 #include "utilities.h"
83 /*! \brief Return true if ftype is an explicit pair-listed LJ or
84 * COULOMB interaction type: bonded LJ (usually 1-4), or special
85 * listed non-bonded for FEP. */
87 isPairInteraction(int ftype)
89 return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
92 /*! \brief Zero thread-local output buffers */
94 zero_thread_output(f_thread_t *f_t)
96 constexpr int nelem_fa = sizeof(f_t->f[0])/sizeof(real);
98 for (int i = 0; i < f_t->nblock_used; i++)
100 int a0 = f_t->block_index[i]*reduction_block_size;
101 int a1 = a0 + reduction_block_size;
102 for (int a = a0; a < a1; a++)
104 for (int d = 0; d < nelem_fa; d++)
111 for (int i = 0; i < SHIFTS; i++)
113 clear_rvec(f_t->fshift[i]);
115 for (int i = 0; i < F_NRE; i++)
119 for (int i = 0; i < egNR; i++)
121 for (int j = 0; j < f_t->grpp.nener; j++)
123 f_t->grpp.ener[i][j] = 0;
126 for (int i = 0; i < efptNR; i++)
132 /*! \brief The max thread number is arbitrary, we used a fixed number
133 * to avoid memory management. Using more than 16 threads is probably
134 * never useful performance wise. */
135 #define MAX_BONDED_THREADS 256
137 /*! \brief Reduce thread-local force buffers */
139 reduce_thread_forces(int n,
140 gmx::ArrayRef<gmx::RVec> force,
141 const bonded_threading_t *bt,
144 if (nthreads > MAX_BONDED_THREADS)
146 gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
150 rvec * gmx_restrict f = as_rvec_array(force.data());
152 /* This reduction can run on any number of threads,
153 * independently of bt->nthreads.
154 * But if nthreads matches bt->nthreads (which it currently does)
155 * the uniform distribution of the touched blocks over nthreads will
156 * match the distribution of bonded over threads well in most cases,
157 * which means that threads mostly reduce their own data which increases
158 * the number of cache hits.
160 #pragma omp parallel for num_threads(nthreads) schedule(static)
161 for (int b = 0; b < bt->nblock_used; b++)
165 int ind = bt->block_index[b];
166 rvec4 *fp[MAX_BONDED_THREADS];
168 /* Determine which threads contribute to this block */
170 for (int ft = 0; ft < bt->nthreads; ft++)
172 if (bitmask_is_set(bt->mask[ind], ft))
174 fp[nfb++] = bt->f_t[ft]->f;
179 /* Reduce force buffers for threads that contribute */
180 int a0 = ind *reduction_block_size;
181 int a1 = (ind + 1)*reduction_block_size;
182 /* It would be nice if we could pad f to avoid this min */
183 a1 = std::min(a1, n);
184 for (int a = a0; a < a1; a++)
186 for (int fb = 0; fb < nfb; fb++)
188 rvec_inc(f[a], fp[fb][a]);
193 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
197 /*! \brief Reduce thread-local forces, shift forces and energies */
199 reduce_thread_output(int n, gmx::ForceWithShiftForces *forceWithShiftForces,
200 real *ener, gmx_grppairener_t *grpp, real *dvdl,
201 const bonded_threading_t *bt,
202 const gmx::StepWorkload &stepWork)
204 assert(bt->haveBondeds);
206 if (bt->nblock_used > 0)
208 /* Reduce the bonded force buffer */
209 reduce_thread_forces(n, forceWithShiftForces->force(), bt, bt->nthreads);
212 rvec * gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
214 /* When necessary, reduce energy and virial using one thread only */
215 if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) &&
218 gmx::ArrayRef < const std::unique_ptr < f_thread_t>> f_t = bt->f_t;
220 if (stepWork.computeVirial)
222 for (int i = 0; i < SHIFTS; i++)
224 for (int t = 1; t < bt->nthreads; t++)
226 rvec_inc(fshift[i], f_t[t]->fshift[i]);
230 if (stepWork.computeEnergy)
232 for (int i = 0; i < F_NRE; i++)
234 for (int t = 1; t < bt->nthreads; t++)
236 ener[i] += f_t[t]->ener[i];
239 for (int i = 0; i < egNR; i++)
241 for (int j = 0; j < f_t[1]->grpp.nener; j++)
243 for (int t = 1; t < bt->nthreads; t++)
245 grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
250 if (stepWork.computeDhdl)
252 for (int i = 0; i < efptNR; i++)
255 for (int t = 1; t < bt->nthreads; t++)
257 dvdl[i] += f_t[t]->dvdl[i];
264 /*! \brief Returns the bonded kernel flavor
266 * Note that energies are always requested when the virial
267 * is requested (performance gain would be small).
268 * Note that currently we do not have bonded kernels that
269 * do not compute forces.
271 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload &stepWork,
272 const bool useSimdKernels,
273 const bool havePerturbedInteractions)
275 BondedKernelFlavor flavor;
276 if (stepWork.computeEnergy || stepWork.computeVirial)
278 if (stepWork.computeVirial)
280 flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
284 flavor = BondedKernelFlavor::ForcesAndEnergy;
289 if (useSimdKernels && !havePerturbedInteractions)
291 flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
295 flavor = BondedKernelFlavor::ForcesNoSimd;
302 /*! \brief Calculate one element of the list of bonded interactions
305 calc_one_bond(int thread,
306 int ftype, const t_idef *idef,
307 const WorkDivision &workDivision,
308 const rvec x[], rvec4 f[], rvec fshift[],
309 const t_forcerec *fr,
310 const t_pbc *pbc, const t_graph *g,
311 gmx_grppairener_t *grpp,
313 const real *lambda, real *dvdl,
314 const t_mdatoms *md, t_fcdata *fcd,
315 const gmx::StepWorkload &stepWork,
316 int *global_atom_index)
318 GMX_ASSERT(idef->ilsort == ilsortNO_FE || idef->ilsort == ilsortFE_SORTED,
319 "The topology should be marked either as no FE or sorted on FE");
321 const bool havePerturbedInteractions =
322 (idef->ilsort == ilsortFE_SORTED &&
323 idef->il[ftype].nr_nonperturbed < idef->il[ftype].nr);
324 BondedKernelFlavor flavor =
325 selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
327 if (IS_RESTRAINT_TYPE(ftype))
329 efptFTYPE = efptRESTRAINT;
333 efptFTYPE = efptBONDED;
336 const int nat1 = interaction_function[ftype].nratoms + 1;
337 const int nbonds = idef->il[ftype].nr/nat1;
338 const t_iatom *iatoms = idef->il[ftype].iatoms;
340 GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == idef->il[ftype].nr,
341 "The thread division should match the topology");
343 const int nb0 = workDivision.bound(ftype, thread);
344 const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
347 if (!isPairInteraction(ftype))
351 /* TODO The execution time for CMAP dihedrals might be
352 nice to account to its own subtimer, but first
353 wallcycle needs to be extended to support calling from
355 v = cmap_dihs(nbn, iatoms+nb0,
356 idef->iparams, idef->cmap_grid,
358 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
359 md, fcd, global_atom_index);
363 v = calculateSimpleBond(ftype, nbn, iatoms + nb0,
366 pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
367 md, fcd, global_atom_index,
373 /* TODO The execution time for pairs might be nice to account
374 to its own subtimer, but first wallcycle needs to be
375 extended to support calling from multiple threads. */
376 do_pairs(ftype, nbn, iatoms+nb0, idef->iparams, x, f, fshift,
377 pbc, g, lambda, dvdl, md, fr,
378 havePerturbedInteractions, stepWork,
379 grpp, global_atom_index);
384 inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
392 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
395 calcBondedForces(const t_idef *idef,
397 const t_forcerec *fr,
398 const t_pbc *pbc_null,
400 rvec *fshiftMasterBuffer,
401 gmx_enerdata_t *enerd,
407 const gmx::StepWorkload &stepWork,
408 int *global_atom_index)
410 bonded_threading_t *bt = fr->bondedThreading;
412 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
413 for (int thread = 0; thread < bt->nthreads; thread++)
417 f_thread_t &threadBuffers = *bt->f_t[thread];
423 gmx_grppairener_t *grpp;
425 zero_thread_output(&threadBuffers);
427 rvec4 *ft = threadBuffers.f;
429 /* Thread 0 writes directly to the main output buffers.
430 * We might want to reconsider this.
434 fshift = fshiftMasterBuffer;
441 fshift = threadBuffers.fshift;
442 epot = threadBuffers.ener;
443 grpp = &threadBuffers.grpp;
444 dvdlt = threadBuffers.dvdl;
446 /* Loop over all bonded force types to calculate the bonded forces */
447 for (ftype = 0; (ftype < F_NRE); ftype++)
449 if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
451 v = calc_one_bond(thread, ftype, idef,
452 fr->bondedThreading->workDivision, x,
453 ft, fshift, fr, pbc_null, g, grpp,
461 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
465 bool havePositionRestraints(const t_idef &idef,
469 ((idef.il[F_POSRES].nr > 0) ||
470 (idef.il[F_FBPOSRES].nr > 0) ||
472 fcd.disres.nres > 0);
475 bool haveCpuBondeds(const t_forcerec &fr)
477 return fr.bondedThreading->haveBondeds;
480 bool haveCpuListedForces(const t_forcerec &fr,
484 return haveCpuBondeds(fr) || havePositionRestraints(idef, fcd);
487 void calc_listed(const t_commrec *cr,
488 const gmx_multisim_t *ms,
489 struct gmx_wallcycle *wcycle,
491 const rvec x[], history_t *hist,
492 gmx::ForceOutputs *forceOutputs,
493 const t_forcerec *fr,
494 const struct t_pbc *pbc,
495 const struct t_pbc *pbc_full,
496 const struct t_graph *g,
497 gmx_enerdata_t *enerd, t_nrnb *nrnb,
500 t_fcdata *fcd, int *global_atom_index,
501 const gmx::StepWorkload &stepWork)
503 const t_pbc *pbc_null;
504 bonded_threading_t *bt = fr->bondedThreading;
515 if (havePositionRestraints(*idef, *fcd))
517 /* TODO Use of restraints triggers further function calls
518 inside the loop over calc_one_bond(), but those are too
519 awkward to account to this subtimer properly in the present
520 code. We don't test / care much about performance with
521 restraints, anyway. */
522 wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
524 if (idef->il[F_POSRES].nr > 0)
526 posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr,
527 &forceOutputs->forceWithVirial());
530 if (idef->il[F_FBPOSRES].nr > 0)
532 fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr,
533 &forceOutputs->forceWithVirial());
536 /* Do pre force calculation stuff which might require communication */
537 if (fcd->orires.nr > 0)
539 /* This assertion is to ensure we have whole molecules.
540 * Unfortunately we do not have an mdrun state variable that tells
541 * us if molecules in x are not broken over PBC, so we have to make
542 * do with checking graph!=nullptr, which should tell us if we made
543 * molecules whole before calling the current function.
545 GMX_RELEASE_ASSERT(fr->ePBC == epbcNONE || g != nullptr, "With orientation restraints molecules should be whole");
546 enerd->term[F_ORIRESDEV] =
547 calc_orires_dev(ms, idef->il[F_ORIRES].nr,
548 idef->il[F_ORIRES].iatoms,
549 idef->iparams, md, x,
550 pbc_null, fcd, hist);
552 if (fcd->disres.nres > 0)
554 calc_disres_R_6(cr, ms,
555 idef->il[F_DISRES].nr,
556 idef->il[F_DISRES].iatoms,
561 wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
564 if (haveCpuBondeds(*fr))
566 gmx::ForceWithShiftForces &forceWithShiftForces = forceOutputs->forceWithShiftForces();
568 wallcycle_sub_start(wcycle, ewcsLISTED);
569 /* The dummy array is to have a place to store the dhdl at other values
570 of lambda, which will be thrown away in the end */
571 real dvdl[efptNR] = {0};
572 calcBondedForces(idef, x, fr, pbc_null, g,
573 as_rvec_array(forceWithShiftForces.shiftForces().data()),
574 enerd, nrnb, lambda, dvdl, md,
575 fcd, stepWork, global_atom_index);
576 wallcycle_sub_stop(wcycle, ewcsLISTED);
578 wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
579 reduce_thread_output(fr->natoms_force, &forceWithShiftForces,
580 enerd->term, &enerd->grpp, dvdl,
584 if (stepWork.computeDhdl)
586 for (int i = 0; i < efptNR; i++)
588 enerd->dvdl_nonlin[i] += dvdl[i];
591 wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
594 /* Copy the sum of violations for the distance restraints from fcd */
597 enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
601 void calc_listed_lambda(const t_idef *idef,
603 const t_forcerec *fr,
604 const struct t_pbc *pbc, const struct t_graph *g,
605 gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
609 int *global_atom_index)
612 real dvdl_dum[efptNR] = {0};
615 const t_pbc *pbc_null;
617 WorkDivision &workDivision = fr->bondedThreading->foreignLambdaWorkDivision;
628 /* Copy the whole idef, so we can modify the contents locally */
631 /* We already have the forces, so we use temp buffers here */
632 // TODO: Get rid of these allocations by using permanent force buffers
633 snew(f, fr->natoms_force);
634 snew(fshift, SHIFTS);
636 /* Loop over all bonded force types to calculate the bonded energies */
637 for (int ftype = 0; (ftype < F_NRE); ftype++)
639 if (ftype_is_bonded_potential(ftype))
641 const t_ilist &ilist = idef->il[ftype];
642 /* Create a temporary t_ilist with only perturbed interactions */
643 t_ilist &ilist_fe = idef_fe.il[ftype];
644 ilist_fe.iatoms = ilist.iatoms + ilist.nr_nonperturbed;
645 ilist_fe.nr_nonperturbed = 0;
646 ilist_fe.nr = ilist.nr - ilist.nr_nonperturbed;
647 /* Set the work range of thread 0 to the perturbed bondeds */
648 workDivision.setBound(ftype, 0, 0);
649 workDivision.setBound(ftype, 1, ilist_fe.nr);
653 gmx::StepWorkload tempFlags;
654 tempFlags.computeEnergy = true;
655 v = calc_one_bond(0, ftype, &idef_fe, workDivision,
656 x, f, fshift, fr, pbc_null, g,
657 grpp, nrnb, lambda, dvdl_dum,
670 do_force_listed(struct gmx_wallcycle *wcycle,
672 const t_lambda *fepvals,
674 const gmx_multisim_t *ms,
678 gmx::ForceOutputs *forceOutputs,
679 const t_forcerec *fr,
680 const struct t_pbc *pbc,
681 const struct t_graph *graph,
682 gmx_enerdata_t *enerd,
687 int *global_atom_index,
688 const gmx::StepWorkload &stepWork)
690 t_pbc pbc_full; /* Full PBC is needed for position restraints */
692 if (!stepWork.computeListedForces)
697 if ((idef->il[F_POSRES].nr > 0) ||
698 (idef->il[F_FBPOSRES].nr > 0))
700 /* Not enough flops to bother counting */
701 set_pbc(&pbc_full, fr->ePBC, box);
703 calc_listed(cr, ms, wcycle, idef, x, hist,
706 graph, enerd, nrnb, lambda, md, fcd,
707 global_atom_index, stepWork);
709 /* Check if we have to determine energy differences
710 * at foreign lambda's.
712 if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
714 posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
716 if (idef->ilsort != ilsortNO_FE)
718 wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
719 if (idef->ilsort != ilsortFE_SORTED)
721 gmx_incons("The bonded interactions are not sorted for free energy");
723 for (size_t i = 0; i < enerd->enerpart_lambda.size(); i++)
727 reset_foreign_enerdata(enerd);
728 for (int j = 0; j < efptNR; j++)
730 lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
732 calc_listed_lambda(idef, x, fr, pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md,
733 fcd, global_atom_index);
734 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
735 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
737 wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);