Merge branch release-2021
[alexxy/gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines high-level functions for mdrun to compute
39  * energies and forces for listed interactions.
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "listed_forces.h"
48
49 #include <cassert>
50
51 #include <algorithm>
52 #include <array>
53 #include <numeric>
54
55 #include "gromacs/gmxlib/network.h"
56 #include "gromacs/gmxlib/nrnb.h"
57 #include "gromacs/listed_forces/bonded.h"
58 #include "gromacs/listed_forces/disre.h"
59 #include "gromacs/listed_forces/orires.h"
60 #include "gromacs/listed_forces/pairs.h"
61 #include "gromacs/listed_forces/position_restraints.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdlib/enerdata_utils.h"
64 #include "gromacs/mdlib/force.h"
65 #include "gromacs/mdlib/gmx_omp_nthreads.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/mdtypes/fcdata.h"
68 #include "gromacs/mdtypes/forceoutput.h"
69 #include "gromacs/mdtypes/forcerec.h"
70 #include "gromacs/mdtypes/inputrec.h"
71 #include "gromacs/mdtypes/md_enums.h"
72 #include "gromacs/mdtypes/simulation_workload.h"
73 #include "gromacs/pbcutil/ishift.h"
74 #include "gromacs/pbcutil/pbc.h"
75 #include "gromacs/timing/wallcycle.h"
76 #include "gromacs/topology/topology.h"
77 #include "gromacs/utility/exceptions.h"
78 #include "gromacs/utility/fatalerror.h"
79
80 #include "listed_internal.h"
81 #include "manage_threading.h"
82 #include "utilities.h"
83
84 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
85                            const int                  numEnergyGroups,
86                            const int                  numThreads,
87                            const InteractionSelection interactionSelection,
88                            FILE*                      fplog) :
89     idefSelection_(ffparams),
90     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
91     interactionSelection_(interactionSelection)
92 {
93 }
94
95 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
96
97 ListedForces::~ListedForces() = default;
98
99 //! Copies the selection interactions from \p idefSrc to \p idef
100 static void selectInteractions(InteractionDefinitions*                  idef,
101                                const InteractionDefinitions&            idefSrc,
102                                const ListedForces::InteractionSelection interactionSelection)
103 {
104     const bool selectPairs =
105             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
106     const bool selectDihedrals =
107             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
108     const bool selectAngles =
109             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
110     const bool selectRest =
111             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
112
113     for (int ftype = 0; ftype < F_NRE; ftype++)
114     {
115         const t_interaction_function& ifunc = interaction_function[ftype];
116         if (ifunc.flags & IF_BOND)
117         {
118             bool assign = false;
119             if (ifunc.flags & IF_PAIR)
120             {
121                 assign = selectPairs;
122             }
123             else if (ifunc.flags & IF_DIHEDRAL)
124             {
125                 assign = selectDihedrals;
126             }
127             else if (ifunc.flags & IF_ATYPE)
128             {
129                 assign = selectAngles;
130             }
131             else
132             {
133                 assign = selectRest;
134             }
135             if (assign)
136             {
137                 idef->il[ftype] = idefSrc.il[ftype];
138             }
139             else
140             {
141                 idef->il[ftype].clear();
142             }
143         }
144     }
145 }
146
147 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
148 {
149     if (interactionSelection_.all())
150     {
151         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
152         idef_ = &domainIdef;
153     }
154     else
155     {
156         idef_ = &idefSelection_;
157
158         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
159
160         idefSelection_.ilsort = domainIdef.ilsort;
161
162         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
163         {
164             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
165             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
166         }
167         else
168         {
169             idefSelection_.iparams_posres.clear();
170             idefSelection_.iparams_fbposres.clear();
171         }
172     }
173
174     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
175
176     if (idef_->ilsort == ilsortFE_SORTED)
177     {
178         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
179         shiftForceBufferLambda_.resize(SHIFTS);
180     }
181 }
182
183 namespace
184 {
185
186 using gmx::ArrayRef;
187
188 /*! \brief Return true if ftype is an explicit pair-listed LJ or
189  * COULOMB interaction type: bonded LJ (usually 1-4), or special
190  * listed non-bonded for FEP. */
191 bool isPairInteraction(int ftype)
192 {
193     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
194 }
195
196 /*! \brief Zero thread-local output buffers */
197 void zero_thread_output(f_thread_t* f_t)
198 {
199     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
200
201     for (int i = 0; i < f_t->nblock_used; i++)
202     {
203         int a0 = f_t->block_index[i] * reduction_block_size;
204         int a1 = a0 + reduction_block_size;
205         for (int a = a0; a < a1; a++)
206         {
207             for (int d = 0; d < nelem_fa; d++)
208             {
209                 f_t->f[a][d] = 0;
210             }
211         }
212     }
213
214     for (int i = 0; i < SHIFTS; i++)
215     {
216         clear_rvec(f_t->fshift[i]);
217     }
218     for (int i = 0; i < F_NRE; i++)
219     {
220         f_t->ener[i] = 0;
221     }
222     for (int i = 0; i < egNR; i++)
223     {
224         for (int j = 0; j < f_t->grpp.nener; j++)
225         {
226             f_t->grpp.ener[i][j] = 0;
227         }
228     }
229     for (int i = 0; i < efptNR; i++)
230     {
231         f_t->dvdl[i] = 0;
232     }
233 }
234
235 /*! \brief The max thread number is arbitrary, we used a fixed number
236  * to avoid memory management.  Using more than 16 threads is probably
237  * never useful performance wise. */
238 #define MAX_BONDED_THREADS 256
239
240 /*! \brief Reduce thread-local force buffers */
241 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
242 {
243     if (nthreads > MAX_BONDED_THREADS)
244     {
245         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
246     }
247
248     rvec* gmx_restrict f = as_rvec_array(force.data());
249
250     const int numAtomsForce = bt->numAtomsForce;
251
252     /* This reduction can run on any number of threads,
253      * independently of bt->nthreads.
254      * But if nthreads matches bt->nthreads (which it currently does)
255      * the uniform distribution of the touched blocks over nthreads will
256      * match the distribution of bonded over threads well in most cases,
257      * which means that threads mostly reduce their own data which increases
258      * the number of cache hits.
259      */
260 #pragma omp parallel for num_threads(nthreads) schedule(static)
261     for (int b = 0; b < bt->nblock_used; b++)
262     {
263         try
264         {
265             int    ind = bt->block_index[b];
266             rvec4* fp[MAX_BONDED_THREADS];
267
268             /* Determine which threads contribute to this block */
269             int nfb = 0;
270             for (int ft = 0; ft < bt->nthreads; ft++)
271             {
272                 if (bitmask_is_set(bt->mask[ind], ft))
273                 {
274                     fp[nfb++] = bt->f_t[ft]->f;
275                 }
276             }
277             if (nfb > 0)
278             {
279                 /* Reduce force buffers for threads that contribute */
280                 int a0 = ind * reduction_block_size;
281                 int a1 = (ind + 1) * reduction_block_size;
282                 /* It would be nice if we could pad f to avoid this min */
283                 a1 = std::min(a1, numAtomsForce);
284                 for (int a = a0; a < a1; a++)
285                 {
286                     for (int fb = 0; fb < nfb; fb++)
287                     {
288                         rvec_inc(f[a], fp[fb][a]);
289                     }
290                 }
291             }
292         }
293         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
294     }
295 }
296
297 /*! \brief Reduce thread-local forces, shift forces and energies */
298 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
299                           real*                      ener,
300                           gmx_grppairener_t*         grpp,
301                           real*                      dvdl,
302                           const bonded_threading_t*  bt,
303                           const gmx::StepWorkload&   stepWork)
304 {
305     assert(bt->haveBondeds);
306
307     if (bt->nblock_used > 0)
308     {
309         /* Reduce the bonded force buffer */
310         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
311     }
312
313     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
314
315     /* When necessary, reduce energy and virial using one thread only */
316     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
317     {
318         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
319
320         if (stepWork.computeVirial)
321         {
322             for (int i = 0; i < SHIFTS; i++)
323             {
324                 for (int t = 1; t < bt->nthreads; t++)
325                 {
326                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
327                 }
328             }
329         }
330         if (stepWork.computeEnergy)
331         {
332             for (int i = 0; i < F_NRE; i++)
333             {
334                 for (int t = 1; t < bt->nthreads; t++)
335                 {
336                     ener[i] += f_t[t]->ener[i];
337                 }
338             }
339             for (int i = 0; i < egNR; i++)
340             {
341                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
342                 {
343                     for (int t = 1; t < bt->nthreads; t++)
344                     {
345                         grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
346                     }
347                 }
348             }
349         }
350         if (stepWork.computeDhdl)
351         {
352             for (int i = 0; i < efptNR; i++)
353             {
354
355                 for (int t = 1; t < bt->nthreads; t++)
356                 {
357                     dvdl[i] += f_t[t]->dvdl[i];
358                 }
359             }
360         }
361     }
362 }
363
364 /*! \brief Returns the bonded kernel flavor
365  *
366  * Note that energies are always requested when the virial
367  * is requested (performance gain would be small).
368  * Note that currently we do not have bonded kernels that
369  * do not compute forces.
370  */
371 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
372                                             const bool               useSimdKernels,
373                                             const bool               havePerturbedInteractions)
374 {
375     BondedKernelFlavor flavor;
376     if (stepWork.computeEnergy || stepWork.computeVirial)
377     {
378         if (stepWork.computeVirial)
379         {
380             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
381         }
382         else
383         {
384             flavor = BondedKernelFlavor::ForcesAndEnergy;
385         }
386     }
387     else
388     {
389         if (useSimdKernels && !havePerturbedInteractions)
390         {
391             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
392         }
393         else
394         {
395             flavor = BondedKernelFlavor::ForcesNoSimd;
396         }
397     }
398
399     return flavor;
400 }
401
402 /*! \brief Calculate one element of the list of bonded interactions
403     for this thread */
404 real calc_one_bond(int                           thread,
405                    int                           ftype,
406                    const InteractionDefinitions& idef,
407                    ArrayRef<const int>           iatoms,
408                    const int                     numNonperturbedInteractions,
409                    const WorkDivision&           workDivision,
410                    const rvec                    x[],
411                    rvec4                         f[],
412                    rvec                          fshift[],
413                    const t_forcerec*             fr,
414                    const t_pbc*                  pbc,
415                    gmx_grppairener_t*            grpp,
416                    t_nrnb*                       nrnb,
417                    const real*                   lambda,
418                    real*                         dvdl,
419                    const t_mdatoms*              md,
420                    t_fcdata*                     fcd,
421                    const gmx::StepWorkload&      stepWork,
422                    int*                          global_atom_index)
423 {
424     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
425                "The topology should be marked either as no FE or sorted on FE");
426
427     const bool havePerturbedInteractions =
428             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
429     BondedKernelFlavor flavor =
430             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
431     int efptFTYPE;
432     if (IS_RESTRAINT_TYPE(ftype))
433     {
434         efptFTYPE = efptRESTRAINT;
435     }
436     else
437     {
438         efptFTYPE = efptBONDED;
439     }
440
441     const int nat1   = interaction_function[ftype].nratoms + 1;
442     const int nbonds = iatoms.ssize() / nat1;
443
444     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
445                "The thread division should match the topology");
446
447     const int nb0 = workDivision.bound(ftype, thread);
448     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
449
450     ArrayRef<const t_iparams> iparams = idef.iparams;
451
452     real v = 0;
453     if (!isPairInteraction(ftype))
454     {
455         if (ftype == F_CMAP)
456         {
457             /* TODO The execution time for CMAP dihedrals might be
458                nice to account to its own subtimer, but first
459                wallcycle needs to be extended to support calling from
460                multiple threads. */
461             v = cmap_dihs(nbn,
462                           iatoms.data() + nb0,
463                           iparams.data(),
464                           &idef.cmap_grid,
465                           x,
466                           f,
467                           fshift,
468                           pbc,
469                           lambda[efptFTYPE],
470                           &(dvdl[efptFTYPE]),
471                           md,
472                           fcd,
473                           global_atom_index);
474         }
475         else
476         {
477             v = calculateSimpleBond(ftype,
478                                     nbn,
479                                     iatoms.data() + nb0,
480                                     iparams.data(),
481                                     x,
482                                     f,
483                                     fshift,
484                                     pbc,
485                                     lambda[efptFTYPE],
486                                     &(dvdl[efptFTYPE]),
487                                     md,
488                                     fcd,
489                                     global_atom_index,
490                                     flavor);
491         }
492     }
493     else
494     {
495         /* TODO The execution time for pairs might be nice to account
496            to its own subtimer, but first wallcycle needs to be
497            extended to support calling from multiple threads. */
498         do_pairs(ftype,
499                  nbn,
500                  iatoms.data() + nb0,
501                  iparams.data(),
502                  x,
503                  f,
504                  fshift,
505                  pbc,
506                  lambda,
507                  dvdl,
508                  md,
509                  fr,
510                  havePerturbedInteractions,
511                  stepWork,
512                  grpp,
513                  global_atom_index);
514     }
515
516     if (thread == 0)
517     {
518         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
519     }
520
521     return v;
522 }
523
524 } // namespace
525
526 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
527  */
528 static void calcBondedForces(const InteractionDefinitions& idef,
529                              bonded_threading_t*           bt,
530                              const rvec                    x[],
531                              const t_forcerec*             fr,
532                              const t_pbc*                  pbc_null,
533                              rvec*                         fshiftMasterBuffer,
534                              gmx_enerdata_t*               enerd,
535                              t_nrnb*                       nrnb,
536                              const real*                   lambda,
537                              real*                         dvdl,
538                              const t_mdatoms*              md,
539                              t_fcdata*                     fcd,
540                              const gmx::StepWorkload&      stepWork,
541                              int*                          global_atom_index)
542 {
543 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
544     for (int thread = 0; thread < bt->nthreads; thread++)
545     {
546         try
547         {
548             f_thread_t& threadBuffers = *bt->f_t[thread];
549             int         ftype;
550             real *      epot, v;
551             /* thread stuff */
552             rvec*              fshift;
553             real*              dvdlt;
554             gmx_grppairener_t* grpp;
555
556             zero_thread_output(&threadBuffers);
557
558             rvec4* ft = threadBuffers.f;
559
560             /* Thread 0 writes directly to the main output buffers.
561              * We might want to reconsider this.
562              */
563             if (thread == 0)
564             {
565                 fshift = fshiftMasterBuffer;
566                 epot   = enerd->term;
567                 grpp   = &enerd->grpp;
568                 dvdlt  = dvdl;
569             }
570             else
571             {
572                 fshift = as_rvec_array(threadBuffers.fshift.data());
573                 epot   = threadBuffers.ener;
574                 grpp   = &threadBuffers.grpp;
575                 dvdlt  = threadBuffers.dvdl;
576             }
577             /* Loop over all bonded force types to calculate the bonded forces */
578             for (ftype = 0; (ftype < F_NRE); ftype++)
579             {
580                 const InteractionList& ilist = idef.il[ftype];
581                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
582                 {
583                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
584                     v                          = calc_one_bond(thread,
585                                       ftype,
586                                       idef,
587                                       iatoms,
588                                       idef.numNonperturbedInteractions[ftype],
589                                       bt->workDivision,
590                                       x,
591                                       ft,
592                                       fshift,
593                                       fr,
594                                       pbc_null,
595                                       grpp,
596                                       nrnb,
597                                       lambda,
598                                       dvdlt,
599                                       md,
600                                       fcd,
601                                       stepWork,
602                                       global_atom_index);
603                     epot[ftype] += v;
604                 }
605             }
606         }
607         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
608     }
609 }
610
611 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
612 {
613     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
614
615     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
616             || fcdata.disres->nres > 0);
617 }
618
619 bool ListedForces::haveCpuBondeds() const
620 {
621     return threading_->haveBondeds;
622 }
623
624 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
625 {
626     return haveCpuBondeds() || haveRestraints(fcdata);
627 }
628
629 namespace
630 {
631
632 /*! \brief Calculates all listed force interactions. */
633 void calc_listed(struct gmx_wallcycle*         wcycle,
634                  const InteractionDefinitions& idef,
635                  bonded_threading_t*           bt,
636                  const rvec                    x[],
637                  gmx::ForceOutputs*            forceOutputs,
638                  const t_forcerec*             fr,
639                  const t_pbc*                  pbc,
640                  gmx_enerdata_t*               enerd,
641                  t_nrnb*                       nrnb,
642                  const real*                   lambda,
643                  const t_mdatoms*              md,
644                  t_fcdata*                     fcd,
645                  int*                          global_atom_index,
646                  const gmx::StepWorkload&      stepWork)
647 {
648     if (bt->haveBondeds)
649     {
650         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
651
652         wallcycle_sub_start(wcycle, ewcsLISTED);
653         /* The dummy array is to have a place to store the dhdl at other values
654            of lambda, which will be thrown away in the end */
655         real dvdl[efptNR] = { 0 };
656         calcBondedForces(idef,
657                          bt,
658                          x,
659                          fr,
660                          fr->bMolPBC ? pbc : nullptr,
661                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
662                          enerd,
663                          nrnb,
664                          lambda,
665                          dvdl,
666                          md,
667                          fcd,
668                          stepWork,
669                          global_atom_index);
670         wallcycle_sub_stop(wcycle, ewcsLISTED);
671
672         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
673         reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
674
675         if (stepWork.computeDhdl)
676         {
677             for (int i = 0; i < efptNR; i++)
678             {
679                 enerd->dvdl_nonlin[i] += dvdl[i];
680             }
681         }
682         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
683     }
684
685     /* Copy the sum of violations for the distance restraints from fcd */
686     if (fcd)
687     {
688         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
689     }
690 }
691
692 /*! \brief As calc_listed(), but only determines the potential energy
693  * for the perturbed interactions.
694  *
695  * The shift forces in fr are not affected.
696  */
697 void calc_listed_lambda(const InteractionDefinitions& idef,
698                         bonded_threading_t*           bt,
699                         const rvec                    x[],
700                         const t_forcerec*             fr,
701                         const struct t_pbc*           pbc,
702                         gmx::ArrayRef<real>           forceBufferLambda,
703                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
704                         gmx_grppairener_t*            grpp,
705                         real*                         epot,
706                         gmx::ArrayRef<real>           dvdl,
707                         t_nrnb*                       nrnb,
708                         const real*                   lambda,
709                         const t_mdatoms*              md,
710                         t_fcdata*                     fcd,
711                         int*                          global_atom_index)
712 {
713     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
714
715     const t_pbc* pbc_null;
716     if (fr->bMolPBC)
717     {
718         pbc_null = pbc;
719     }
720     else
721     {
722         pbc_null = nullptr;
723     }
724
725     /* We already have the forces, so we use temp buffers here */
726     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
727     std::fill(shiftForceBufferLambda.begin(),
728               shiftForceBufferLambda.end(),
729               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
730     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
731     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
732
733     /* Loop over all bonded force types to calculate the bonded energies */
734     for (int ftype = 0; (ftype < F_NRE); ftype++)
735     {
736         if (ftype_is_bonded_potential(ftype))
737         {
738             const InteractionList& ilist = idef.il[ftype];
739             /* Create a temporary iatom list with only perturbed interactions */
740             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
741             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
742                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
743             if (!iatomsPerturbed.empty())
744             {
745                 /* Set the work range of thread 0 to the perturbed bondeds */
746                 workDivision.setBound(ftype, 0, 0);
747                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
748
749                 gmx::StepWorkload tempFlags;
750                 tempFlags.computeEnergy = true;
751                 real v                  = calc_one_bond(0,
752                                        ftype,
753                                        idef,
754                                        iatomsPerturbed,
755                                        iatomsPerturbed.ssize(),
756                                        workDivision,
757                                        x,
758                                        f,
759                                        fshift,
760                                        fr,
761                                        pbc_null,
762                                        grpp,
763                                        nrnb,
764                                        lambda,
765                                        dvdl.data(),
766                                        md,
767                                        fcd,
768                                        tempFlags,
769                                        global_atom_index);
770                 epot[ftype] += v;
771             }
772         }
773     }
774 }
775
776 } // namespace
777
778 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
779                              const matrix                              box,
780                              const t_lambda*                           fepvals,
781                              const t_commrec*                          cr,
782                              const gmx_multisim_t*                     ms,
783                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
784                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
785                              t_fcdata*                                 fcdata,
786                              history_t*                                hist,
787                              gmx::ForceOutputs*                        forceOutputs,
788                              const t_forcerec*                         fr,
789                              const struct t_pbc*                       pbc,
790                              gmx_enerdata_t*                           enerd,
791                              t_nrnb*                                   nrnb,
792                              const real*                               lambda,
793                              const t_mdatoms*                          md,
794                              int*                                      global_atom_index,
795                              const gmx::StepWorkload&                  stepWork)
796 {
797     if (interactionSelection_.none() || !stepWork.computeListedForces)
798     {
799         return;
800     }
801
802     const InteractionDefinitions& idef = *idef_;
803
804     // Todo: replace all rvec use here with ArrayRefWithPadding
805     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
806
807     const bool calculateRestInteractions =
808             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
809
810     t_pbc pbc_full; /* Full PBC is needed for position restraints */
811     if (calculateRestInteractions && haveRestraints(*fcdata))
812     {
813         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
814         {
815             /* Not enough flops to bother counting */
816             set_pbc(&pbc_full, fr->pbcType, box);
817         }
818
819         /* TODO Use of restraints triggers further function calls
820            inside the loop over calc_one_bond(), but those are too
821            awkward to account to this subtimer properly in the present
822            code. We don't test / care much about performance with
823            restraints, anyway. */
824         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
825
826         if (!idef.il[F_POSRES].empty())
827         {
828             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
829         }
830
831         if (!idef.il[F_FBPOSRES].empty())
832         {
833             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
834         }
835
836         /* Do pre force calculation stuff which might require communication */
837         if (fcdata->orires->nr > 0)
838         {
839             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
840             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
841                                                        idef.il[F_ORIRES].size(),
842                                                        idef.il[F_ORIRES].iatoms.data(),
843                                                        idef.iparams.data(),
844                                                        md,
845                                                        xWholeMolecules,
846                                                        x,
847                                                        fr->bMolPBC ? pbc : nullptr,
848                                                        fcdata->orires,
849                                                        hist);
850         }
851         if (fcdata->disres->nres > 0)
852         {
853             calc_disres_R_6(cr,
854                             ms,
855                             idef.il[F_DISRES].size(),
856                             idef.il[F_DISRES].iatoms.data(),
857                             x,
858                             fr->bMolPBC ? pbc : nullptr,
859                             fcdata->disres,
860                             hist);
861         }
862
863         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
864     }
865
866     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
867
868     /* Check if we have to determine energy differences
869      * at foreign lambda's.
870      */
871     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
872     {
873         real dvdl[efptNR] = { 0 };
874         if (!idef.il[F_POSRES].empty())
875         {
876             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
877         }
878         if (idef.ilsort != ilsortNO_FE)
879         {
880             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
881             if (idef.ilsort != ilsortFE_SORTED)
882             {
883                 gmx_incons("The bonded interactions are not sorted for free energy");
884             }
885             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
886             {
887                 real lam_i[efptNR];
888
889                 reset_foreign_enerdata(enerd);
890                 for (int j = 0; j < efptNR; j++)
891                 {
892                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
893                 }
894                 calc_listed_lambda(idef,
895                                    threading_.get(),
896                                    x,
897                                    fr,
898                                    pbc,
899                                    forceBufferLambda_,
900                                    shiftForceBufferLambda_,
901                                    &(enerd->foreign_grpp),
902                                    enerd->foreign_term,
903                                    dvdl,
904                                    nrnb,
905                                    lam_i,
906                                    md,
907                                    fcdata,
908                                    global_atom_index);
909                 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
910                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
911                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
912                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
913             }
914             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
915         }
916     }
917 }