82b93fa3e2d256c89bee174e3a0792d876ff7dee
[alexxy/gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines high-level functions for mdrun to compute
39  * energies and forces for listed interactions.
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "gromacs/utility/arrayref.h"
48 #include "gromacs/utility/enumerationhelpers.h"
49 #include "listed_forces.h"
50
51 #include <cassert>
52
53 #include <algorithm>
54 #include <array>
55 #include <numeric>
56
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/gmxlib/nrnb.h"
59 #include "gromacs/listed_forces/bonded.h"
60 #include "gromacs/listed_forces/disre.h"
61 #include "gromacs/listed_forces/orires.h"
62 #include "gromacs/listed_forces/pairs.h"
63 #include "gromacs/listed_forces/position_restraints.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdlib/enerdata_utils.h"
66 #include "gromacs/mdlib/force.h"
67 #include "gromacs/mdlib/gmx_omp_nthreads.h"
68 #include "gromacs/mdtypes/commrec.h"
69 #include "gromacs/mdtypes/mdatom.h"
70 #include "gromacs/mdtypes/fcdata.h"
71 #include "gromacs/mdtypes/forceoutput.h"
72 #include "gromacs/mdtypes/forcerec.h"
73 #include "gromacs/mdtypes/inputrec.h"
74 #include "gromacs/mdtypes/md_enums.h"
75 #include "gromacs/mdtypes/simulation_workload.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/timing/wallcycle.h"
79 #include "gromacs/topology/topology.h"
80 #include "gromacs/utility/exceptions.h"
81 #include "gromacs/utility/fatalerror.h"
82
83 #include "listed_internal.h"
84 #include "manage_threading.h"
85 #include "utilities.h"
86
87 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
88                            const int                  numEnergyGroups,
89                            const int                  numThreads,
90                            const InteractionSelection interactionSelection,
91                            FILE*                      fplog) :
92     idefSelection_(ffparams),
93     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
94     interactionSelection_(interactionSelection)
95 {
96 }
97
98 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
99
100 ListedForces::~ListedForces() = default;
101
102 //! Copies the selection interactions from \p idefSrc to \p idef
103 static void selectInteractions(InteractionDefinitions*                  idef,
104                                const InteractionDefinitions&            idefSrc,
105                                const ListedForces::InteractionSelection interactionSelection)
106 {
107     const bool selectPairs =
108             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
109     const bool selectDihedrals =
110             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
111     const bool selectAngles =
112             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
113     const bool selectRest =
114             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
115
116     for (int ftype = 0; ftype < F_NRE; ftype++)
117     {
118         const t_interaction_function& ifunc = interaction_function[ftype];
119         if (ifunc.flags & IF_BOND)
120         {
121             bool assign = false;
122             if (ifunc.flags & IF_PAIR)
123             {
124                 assign = selectPairs;
125             }
126             else if (ifunc.flags & IF_DIHEDRAL)
127             {
128                 assign = selectDihedrals;
129             }
130             else if (ifunc.flags & IF_ATYPE)
131             {
132                 assign = selectAngles;
133             }
134             else
135             {
136                 assign = selectRest;
137             }
138             if (assign)
139             {
140                 idef->il[ftype] = idefSrc.il[ftype];
141             }
142             else
143             {
144                 idef->il[ftype].clear();
145             }
146         }
147     }
148 }
149
150 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
151 {
152     if (interactionSelection_.all())
153     {
154         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
155         idef_ = &domainIdef;
156     }
157     else
158     {
159         idef_ = &idefSelection_;
160
161         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
162
163         idefSelection_.ilsort = domainIdef.ilsort;
164
165         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
166         {
167             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
168             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
169         }
170         else
171         {
172             idefSelection_.iparams_posres.clear();
173             idefSelection_.iparams_fbposres.clear();
174         }
175     }
176
177     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
178
179     if (idef_->ilsort == ilsortFE_SORTED)
180     {
181         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
182         shiftForceBufferLambda_.resize(SHIFTS);
183     }
184 }
185
186 namespace
187 {
188
189 using gmx::ArrayRef;
190
191 /*! \brief Return true if ftype is an explicit pair-listed LJ or
192  * COULOMB interaction type: bonded LJ (usually 1-4), or special
193  * listed non-bonded for FEP. */
194 bool isPairInteraction(int ftype)
195 {
196     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
197 }
198
199 /*! \brief Zero thread-local output buffers */
200 void zero_thread_output(f_thread_t* f_t)
201 {
202     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
203
204     for (int i = 0; i < f_t->nblock_used; i++)
205     {
206         int a0 = f_t->block_index[i] * reduction_block_size;
207         int a1 = a0 + reduction_block_size;
208         for (int a = a0; a < a1; a++)
209         {
210             for (int d = 0; d < nelem_fa; d++)
211             {
212                 f_t->f[a][d] = 0;
213             }
214         }
215     }
216
217     for (int i = 0; i < SHIFTS; i++)
218     {
219         clear_rvec(f_t->fshift[i]);
220     }
221     for (int i = 0; i < F_NRE; i++)
222     {
223         f_t->ener[i] = 0;
224     }
225     for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
226     {
227         for (int j = 0; j < f_t->grpp.nener; j++)
228         {
229             f_t->grpp.energyGroupPairTerms[i][j] = 0;
230         }
231     }
232     for (auto i : keysOf(f_t->dvdl))
233     {
234         f_t->dvdl[i] = 0;
235     }
236 }
237
238 /*! \brief The max thread number is arbitrary, we used a fixed number
239  * to avoid memory management.  Using more than 16 threads is probably
240  * never useful performance wise. */
241 #define MAX_BONDED_THREADS 256
242
243 /*! \brief Reduce thread-local force buffers */
244 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
245 {
246     if (nthreads > MAX_BONDED_THREADS)
247     {
248         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
249     }
250
251     rvec* gmx_restrict f = as_rvec_array(force.data());
252
253     const int numAtomsForce = bt->numAtomsForce;
254
255     /* This reduction can run on any number of threads,
256      * independently of bt->nthreads.
257      * But if nthreads matches bt->nthreads (which it currently does)
258      * the uniform distribution of the touched blocks over nthreads will
259      * match the distribution of bonded over threads well in most cases,
260      * which means that threads mostly reduce their own data which increases
261      * the number of cache hits.
262      */
263 #pragma omp parallel for num_threads(nthreads) schedule(static)
264     for (int b = 0; b < bt->nblock_used; b++)
265     {
266         try
267         {
268             int    ind = bt->block_index[b];
269             rvec4* fp[MAX_BONDED_THREADS];
270
271             /* Determine which threads contribute to this block */
272             int nfb = 0;
273             for (int ft = 0; ft < bt->nthreads; ft++)
274             {
275                 if (bitmask_is_set(bt->mask[ind], ft))
276                 {
277                     fp[nfb++] = bt->f_t[ft]->f;
278                 }
279             }
280             if (nfb > 0)
281             {
282                 /* Reduce force buffers for threads that contribute */
283                 int a0 = ind * reduction_block_size;
284                 int a1 = (ind + 1) * reduction_block_size;
285                 /* It would be nice if we could pad f to avoid this min */
286                 a1 = std::min(a1, numAtomsForce);
287                 for (int a = a0; a < a1; a++)
288                 {
289                     for (int fb = 0; fb < nfb; fb++)
290                     {
291                         rvec_inc(f[a], fp[fb][a]);
292                     }
293                 }
294             }
295         }
296         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
297     }
298 }
299
300 /*! \brief Reduce thread-local forces, shift forces and energies */
301 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
302                           real*                      ener,
303                           gmx_grppairener_t*         grpp,
304                           gmx::ArrayRef<real>        dvdl,
305                           const bonded_threading_t*  bt,
306                           const gmx::StepWorkload&   stepWork)
307 {
308     assert(bt->haveBondeds);
309
310     if (bt->nblock_used > 0)
311     {
312         /* Reduce the bonded force buffer */
313         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
314     }
315
316     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
317
318     /* When necessary, reduce energy and virial using one thread only */
319     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
320     {
321         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
322
323         if (stepWork.computeVirial)
324         {
325             for (int i = 0; i < SHIFTS; i++)
326             {
327                 for (int t = 1; t < bt->nthreads; t++)
328                 {
329                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
330                 }
331             }
332         }
333         if (stepWork.computeEnergy)
334         {
335             for (int i = 0; i < F_NRE; i++)
336             {
337                 for (int t = 1; t < bt->nthreads; t++)
338                 {
339                     ener[i] += f_t[t]->ener[i];
340                 }
341             }
342             for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
343             {
344                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
345                 {
346                     for (int t = 1; t < bt->nthreads; t++)
347                     {
348                         grpp->energyGroupPairTerms[i][j] += f_t[t]->grpp.energyGroupPairTerms[i][j];
349                     }
350                 }
351             }
352         }
353         if (stepWork.computeDhdl)
354         {
355             for (auto i : keysOf(f_t[1]->dvdl))
356             {
357
358                 for (int t = 1; t < bt->nthreads; t++)
359                 {
360                     dvdl[static_cast<int>(i)] += f_t[t]->dvdl[i];
361                 }
362             }
363         }
364     }
365 }
366
367 /*! \brief Returns the bonded kernel flavor
368  *
369  * Note that energies are always requested when the virial
370  * is requested (performance gain would be small).
371  * Note that currently we do not have bonded kernels that
372  * do not compute forces.
373  */
374 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
375                                             const bool               useSimdKernels,
376                                             const bool               havePerturbedInteractions)
377 {
378     BondedKernelFlavor flavor;
379     if (stepWork.computeEnergy || stepWork.computeVirial)
380     {
381         if (stepWork.computeVirial)
382         {
383             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
384         }
385         else
386         {
387             flavor = BondedKernelFlavor::ForcesAndEnergy;
388         }
389     }
390     else
391     {
392         if (useSimdKernels && !havePerturbedInteractions)
393         {
394             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
395         }
396         else
397         {
398             flavor = BondedKernelFlavor::ForcesNoSimd;
399         }
400     }
401
402     return flavor;
403 }
404
405 /*! \brief Calculate one element of the list of bonded interactions
406     for this thread */
407 real calc_one_bond(int                           thread,
408                    int                           ftype,
409                    const InteractionDefinitions& idef,
410                    ArrayRef<const int>           iatoms,
411                    const int                     numNonperturbedInteractions,
412                    const WorkDivision&           workDivision,
413                    const rvec                    x[],
414                    rvec4                         f[],
415                    rvec                          fshift[],
416                    const t_forcerec*             fr,
417                    const t_pbc*                  pbc,
418                    gmx_grppairener_t*            grpp,
419                    t_nrnb*                       nrnb,
420                    gmx::ArrayRef<const real>     lambda,
421                    gmx::ArrayRef<real>           dvdl,
422                    const t_mdatoms*              md,
423                    t_fcdata*                     fcd,
424                    const gmx::StepWorkload&      stepWork,
425                    int*                          global_atom_index)
426 {
427     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
428                "The topology should be marked either as no FE or sorted on FE");
429
430     const bool havePerturbedInteractions =
431             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
432     BondedKernelFlavor flavor =
433             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
434     FreeEnergyPerturbationCouplingType efptFTYPE;
435     if (IS_RESTRAINT_TYPE(ftype))
436     {
437         efptFTYPE = FreeEnergyPerturbationCouplingType::Restraint;
438     }
439     else
440     {
441         efptFTYPE = FreeEnergyPerturbationCouplingType::Bonded;
442     }
443
444     const int nat1   = interaction_function[ftype].nratoms + 1;
445     const int nbonds = iatoms.ssize() / nat1;
446
447     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
448                "The thread division should match the topology");
449
450     const int nb0 = workDivision.bound(ftype, thread);
451     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
452
453     ArrayRef<const t_iparams> iparams = idef.iparams;
454
455     real v = 0;
456     if (!isPairInteraction(ftype))
457     {
458         if (ftype == F_CMAP)
459         {
460             /* TODO The execution time for CMAP dihedrals might be
461                nice to account to its own subtimer, but first
462                wallcycle needs to be extended to support calling from
463                multiple threads. */
464             v = cmap_dihs(nbn,
465                           iatoms.data() + nb0,
466                           iparams.data(),
467                           &idef.cmap_grid,
468                           x,
469                           f,
470                           fshift,
471                           pbc,
472                           lambda[static_cast<int>(efptFTYPE)],
473                           &(dvdl[static_cast<int>(efptFTYPE)]),
474                           gmx::arrayRefFromArray(md->chargeA, md->nr),
475                           fcd,
476                           nullptr,
477                           nullptr,
478                           global_atom_index);
479         }
480         else
481         {
482             v = calculateSimpleBond(ftype,
483                                     nbn,
484                                     iatoms.data() + nb0,
485                                     iparams.data(),
486                                     x,
487                                     f,
488                                     fshift,
489                                     pbc,
490                                     lambda[static_cast<int>(efptFTYPE)],
491                                     &(dvdl[static_cast<int>(efptFTYPE)]),
492                                     gmx::arrayRefFromArray(md->chargeA, md->nr),
493                                     fcd,
494                                     fcd->disres,
495                                     fcd->orires,
496                                     global_atom_index,
497                                     flavor);
498         }
499     }
500     else
501     {
502         /* TODO The execution time for pairs might be nice to account
503            to its own subtimer, but first wallcycle needs to be
504            extended to support calling from multiple threads. */
505         do_pairs(ftype,
506                  nbn,
507                  iatoms.data() + nb0,
508                  iparams.data(),
509                  x,
510                  f,
511                  fshift,
512                  pbc,
513                  lambda.data(),
514                  dvdl.data(),
515                  md,
516                  fr,
517                  havePerturbedInteractions,
518                  stepWork,
519                  grpp,
520                  global_atom_index);
521     }
522
523     if (thread == 0)
524     {
525         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
526     }
527
528     return v;
529 }
530
531 } // namespace
532
533 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
534  */
535 static void calcBondedForces(const InteractionDefinitions& idef,
536                              bonded_threading_t*           bt,
537                              const rvec                    x[],
538                              const t_forcerec*             fr,
539                              const t_pbc*                  pbc_null,
540                              rvec*                         fshiftMasterBuffer,
541                              gmx_enerdata_t*               enerd,
542                              t_nrnb*                       nrnb,
543                              gmx::ArrayRef<const real>     lambda,
544                              gmx::ArrayRef<real>           dvdl,
545                              const t_mdatoms*              md,
546                              t_fcdata*                     fcd,
547                              const gmx::StepWorkload&      stepWork,
548                              int*                          global_atom_index)
549 {
550 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
551     for (int thread = 0; thread < bt->nthreads; thread++)
552     {
553         try
554         {
555             f_thread_t& threadBuffers = *bt->f_t[thread];
556             int         ftype;
557             real *      epot, v;
558             /* thread stuff */
559             rvec*               fshift;
560             gmx::ArrayRef<real> dvdlt;
561             gmx_grppairener_t*  grpp;
562
563             zero_thread_output(&threadBuffers);
564
565             rvec4* ft = threadBuffers.f;
566
567             /* Thread 0 writes directly to the main output buffers.
568              * We might want to reconsider this.
569              */
570             if (thread == 0)
571             {
572                 fshift = fshiftMasterBuffer;
573                 epot   = enerd->term;
574                 grpp   = &enerd->grpp;
575                 dvdlt  = dvdl;
576             }
577             else
578             {
579                 fshift = as_rvec_array(threadBuffers.fshift.data());
580                 epot   = threadBuffers.ener;
581                 grpp   = &threadBuffers.grpp;
582                 dvdlt  = threadBuffers.dvdl;
583             }
584             /* Loop over all bonded force types to calculate the bonded forces */
585             for (ftype = 0; (ftype < F_NRE); ftype++)
586             {
587                 const InteractionList& ilist = idef.il[ftype];
588                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
589                 {
590                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
591                     v                          = calc_one_bond(thread,
592                                       ftype,
593                                       idef,
594                                       iatoms,
595                                       idef.numNonperturbedInteractions[ftype],
596                                       bt->workDivision,
597                                       x,
598                                       ft,
599                                       fshift,
600                                       fr,
601                                       pbc_null,
602                                       grpp,
603                                       nrnb,
604                                       lambda,
605                                       dvdlt,
606                                       md,
607                                       fcd,
608                                       stepWork,
609                                       global_atom_index);
610                     epot[ftype] += v;
611                 }
612             }
613         }
614         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
615     }
616 }
617
618 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
619 {
620     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
621
622     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
623             || fcdata.disres->nres > 0);
624 }
625
626 bool ListedForces::haveCpuBondeds() const
627 {
628     return threading_->haveBondeds;
629 }
630
631 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
632 {
633     return haveCpuBondeds() || haveRestraints(fcdata);
634 }
635
636 namespace
637 {
638
639 /*! \brief Calculates all listed force interactions. */
640 void calc_listed(struct gmx_wallcycle*         wcycle,
641                  const InteractionDefinitions& idef,
642                  bonded_threading_t*           bt,
643                  const rvec                    x[],
644                  gmx::ForceOutputs*            forceOutputs,
645                  const t_forcerec*             fr,
646                  const t_pbc*                  pbc,
647                  gmx_enerdata_t*               enerd,
648                  t_nrnb*                       nrnb,
649                  gmx::ArrayRef<const real>     lambda,
650                  const t_mdatoms*              md,
651                  t_fcdata*                     fcd,
652                  int*                          global_atom_index,
653                  const gmx::StepWorkload&      stepWork)
654 {
655     if (bt->haveBondeds)
656     {
657         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
658
659         wallcycle_sub_start(wcycle, ewcsLISTED);
660         /* The dummy array is to have a place to store the dhdl at other values
661            of lambda, which will be thrown away in the end */
662         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
663         calcBondedForces(idef,
664                          bt,
665                          x,
666                          fr,
667                          fr->bMolPBC ? pbc : nullptr,
668                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
669                          enerd,
670                          nrnb,
671                          lambda,
672                          dvdl,
673                          md,
674                          fcd,
675                          stepWork,
676                          global_atom_index);
677         wallcycle_sub_stop(wcycle, ewcsLISTED);
678
679         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
680         reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
681
682         if (stepWork.computeDhdl)
683         {
684             for (auto i : keysOf(enerd->dvdl_lin))
685             {
686                 enerd->dvdl_nonlin[i] += dvdl[i];
687             }
688         }
689         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
690     }
691
692     /* Copy the sum of violations for the distance restraints from fcd */
693     if (fcd)
694     {
695         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
696     }
697 }
698
699 /*! \brief As calc_listed(), but only determines the potential energy
700  * for the perturbed interactions.
701  *
702  * The shift forces in fr are not affected.
703  */
704 void calc_listed_lambda(const InteractionDefinitions& idef,
705                         bonded_threading_t*           bt,
706                         const rvec                    x[],
707                         const t_forcerec*             fr,
708                         const struct t_pbc*           pbc,
709                         gmx::ArrayRef<real>           forceBufferLambda,
710                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
711                         gmx_grppairener_t*            grpp,
712                         real*                         epot,
713                         gmx::ArrayRef<real>           dvdl,
714                         t_nrnb*                       nrnb,
715                         gmx::ArrayRef<const real>     lambda,
716                         const t_mdatoms*              md,
717                         t_fcdata*                     fcd,
718                         int*                          global_atom_index)
719 {
720     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
721
722     const t_pbc* pbc_null;
723     if (fr->bMolPBC)
724     {
725         pbc_null = pbc;
726     }
727     else
728     {
729         pbc_null = nullptr;
730     }
731
732     /* We already have the forces, so we use temp buffers here */
733     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
734     std::fill(shiftForceBufferLambda.begin(),
735               shiftForceBufferLambda.end(),
736               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
737     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
738     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
739
740     /* Loop over all bonded force types to calculate the bonded energies */
741     for (int ftype = 0; (ftype < F_NRE); ftype++)
742     {
743         if (ftype_is_bonded_potential(ftype))
744         {
745             const InteractionList& ilist = idef.il[ftype];
746             /* Create a temporary iatom list with only perturbed interactions */
747             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
748             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
749                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
750             if (!iatomsPerturbed.empty())
751             {
752                 /* Set the work range of thread 0 to the perturbed bondeds */
753                 workDivision.setBound(ftype, 0, 0);
754                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
755
756                 gmx::StepWorkload tempFlags;
757                 tempFlags.computeEnergy = true;
758                 real v                  = calc_one_bond(0,
759                                        ftype,
760                                        idef,
761                                        iatomsPerturbed,
762                                        iatomsPerturbed.ssize(),
763                                        workDivision,
764                                        x,
765                                        f,
766                                        fshift,
767                                        fr,
768                                        pbc_null,
769                                        grpp,
770                                        nrnb,
771                                        lambda,
772                                        dvdl,
773                                        md,
774                                        fcd,
775                                        tempFlags,
776                                        global_atom_index);
777                 epot[ftype] += v;
778             }
779         }
780     }
781 }
782
783 } // namespace
784
785 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
786                              const matrix                              box,
787                              const t_lambda*                           fepvals,
788                              const t_commrec*                          cr,
789                              const gmx_multisim_t*                     ms,
790                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
791                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
792                              t_fcdata*                                 fcdata,
793                              const history_t*                          hist,
794                              gmx::ForceOutputs*                        forceOutputs,
795                              const t_forcerec*                         fr,
796                              const struct t_pbc*                       pbc,
797                              gmx_enerdata_t*                           enerd,
798                              t_nrnb*                                   nrnb,
799                              gmx::ArrayRef<const real>                 lambda,
800                              const t_mdatoms*                          md,
801                              int*                                      global_atom_index,
802                              const gmx::StepWorkload&                  stepWork)
803 {
804     if (interactionSelection_.none() || !stepWork.computeListedForces)
805     {
806         return;
807     }
808
809     const InteractionDefinitions& idef = *idef_;
810
811     // Todo: replace all rvec use here with ArrayRefWithPadding
812     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
813
814     const bool calculateRestInteractions =
815             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
816
817     t_pbc pbc_full; /* Full PBC is needed for position restraints */
818     if (calculateRestInteractions && haveRestraints(*fcdata))
819     {
820         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
821         {
822             /* Not enough flops to bother counting */
823             set_pbc(&pbc_full, fr->pbcType, box);
824         }
825
826         /* TODO Use of restraints triggers further function calls
827            inside the loop over calc_one_bond(), but those are too
828            awkward to account to this subtimer properly in the present
829            code. We don't test / care much about performance with
830            restraints, anyway. */
831         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
832
833         if (!idef.il[F_POSRES].empty())
834         {
835             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
836         }
837
838         if (!idef.il[F_FBPOSRES].empty())
839         {
840             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
841         }
842
843         /* Do pre force calculation stuff which might require communication */
844         if (fcdata->orires->nr > 0)
845         {
846             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
847             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
848                                                        idef.il[F_ORIRES].size(),
849                                                        idef.il[F_ORIRES].iatoms.data(),
850                                                        idef.iparams.data(),
851                                                        md,
852                                                        xWholeMolecules,
853                                                        x,
854                                                        fr->bMolPBC ? pbc : nullptr,
855                                                        fcdata->orires,
856                                                        hist);
857         }
858         if (fcdata->disres->nres > 0)
859         {
860             calc_disres_R_6(cr,
861                             ms,
862                             idef.il[F_DISRES].size(),
863                             idef.il[F_DISRES].iatoms.data(),
864                             x,
865                             fr->bMolPBC ? pbc : nullptr,
866                             fcdata->disres,
867                             hist);
868         }
869
870         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
871     }
872
873     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
874
875     /* Check if we have to determine energy differences
876      * at foreign lambda's.
877      */
878     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
879     {
880         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
881         if (!idef.il[F_POSRES].empty())
882         {
883             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
884         }
885         if (idef.ilsort != ilsortNO_FE)
886         {
887             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
888             if (idef.ilsort != ilsortFE_SORTED)
889             {
890                 gmx_incons("The bonded interactions are not sorted for free energy");
891             }
892             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
893             {
894                 gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> lam_i;
895
896                 reset_foreign_enerdata(enerd);
897                 for (auto j : keysOf(lam_i))
898                 {
899                     lam_i[j] = (i == 0 ? lambda[static_cast<int>(j)] : fepvals->all_lambda[j][i - 1]);
900                 }
901                 calc_listed_lambda(idef,
902                                    threading_.get(),
903                                    x,
904                                    fr,
905                                    pbc,
906                                    forceBufferLambda_,
907                                    shiftForceBufferLambda_,
908                                    &(enerd->foreign_grpp),
909                                    enerd->foreign_term,
910                                    dvdl,
911                                    nrnb,
912                                    lam_i,
913                                    md,
914                                    fcdata,
915                                    global_atom_index);
916                 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
917                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
918                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
919                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
920             }
921             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
922         }
923     }
924 }