Modernize wallcycle counting
[alexxy/gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines high-level functions for mdrun to compute
39  * energies and forces for listed interactions.
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "gromacs/utility/arrayref.h"
48 #include "gromacs/utility/enumerationhelpers.h"
49 #include "listed_forces.h"
50
51 #include <cassert>
52
53 #include <algorithm>
54 #include <array>
55 #include <numeric>
56
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/gmxlib/nrnb.h"
59 #include "gromacs/listed_forces/bonded.h"
60 #include "gromacs/listed_forces/disre.h"
61 #include "gromacs/listed_forces/orires.h"
62 #include "gromacs/listed_forces/pairs.h"
63 #include "gromacs/listed_forces/position_restraints.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdlib/enerdata_utils.h"
66 #include "gromacs/mdlib/force.h"
67 #include "gromacs/mdlib/gmx_omp_nthreads.h"
68 #include "gromacs/mdtypes/commrec.h"
69 #include "gromacs/mdtypes/mdatom.h"
70 #include "gromacs/mdtypes/fcdata.h"
71 #include "gromacs/mdtypes/forceoutput.h"
72 #include "gromacs/mdtypes/forcerec.h"
73 #include "gromacs/mdtypes/inputrec.h"
74 #include "gromacs/mdtypes/md_enums.h"
75 #include "gromacs/mdtypes/simulation_workload.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/timing/wallcycle.h"
79 #include "gromacs/topology/topology.h"
80 #include "gromacs/utility/exceptions.h"
81 #include "gromacs/utility/fatalerror.h"
82
83 #include "listed_internal.h"
84 #include "manage_threading.h"
85 #include "utilities.h"
86
87 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
88                            const int                  numEnergyGroups,
89                            const int                  numThreads,
90                            const InteractionSelection interactionSelection,
91                            FILE*                      fplog) :
92     idefSelection_(ffparams),
93     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
94     interactionSelection_(interactionSelection)
95 {
96 }
97
98 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
99
100 ListedForces::~ListedForces() = default;
101
102 //! Copies the selection interactions from \p idefSrc to \p idef
103 static void selectInteractions(InteractionDefinitions*                  idef,
104                                const InteractionDefinitions&            idefSrc,
105                                const ListedForces::InteractionSelection interactionSelection)
106 {
107     const bool selectPairs =
108             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
109     const bool selectDihedrals =
110             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
111     const bool selectAngles =
112             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
113     const bool selectRest =
114             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
115
116     for (int ftype = 0; ftype < F_NRE; ftype++)
117     {
118         const t_interaction_function& ifunc = interaction_function[ftype];
119         if (ifunc.flags & IF_BOND)
120         {
121             bool assign = false;
122             if (ifunc.flags & IF_PAIR)
123             {
124                 assign = selectPairs;
125             }
126             else if (ifunc.flags & IF_DIHEDRAL)
127             {
128                 assign = selectDihedrals;
129             }
130             else if (ifunc.flags & IF_ATYPE)
131             {
132                 assign = selectAngles;
133             }
134             else
135             {
136                 assign = selectRest;
137             }
138             if (assign)
139             {
140                 idef->il[ftype] = idefSrc.il[ftype];
141             }
142             else
143             {
144                 idef->il[ftype].clear();
145             }
146         }
147     }
148 }
149
150 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
151 {
152     if (interactionSelection_.all())
153     {
154         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
155         idef_ = &domainIdef;
156     }
157     else
158     {
159         idef_ = &idefSelection_;
160
161         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
162
163         idefSelection_.ilsort = domainIdef.ilsort;
164
165         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
166         {
167             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
168             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
169         }
170         else
171         {
172             idefSelection_.iparams_posres.clear();
173             idefSelection_.iparams_fbposres.clear();
174         }
175     }
176
177     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
178
179     if (idef_->ilsort == ilsortFE_SORTED)
180     {
181         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
182         shiftForceBufferLambda_.resize(gmx::c_numShiftVectors);
183     }
184 }
185
186 namespace
187 {
188
189 using gmx::ArrayRef;
190
191 /*! \brief Return true if ftype is an explicit pair-listed LJ or
192  * COULOMB interaction type: bonded LJ (usually 1-4), or special
193  * listed non-bonded for FEP. */
194 bool isPairInteraction(int ftype)
195 {
196     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
197 }
198
199 /*! \brief Zero thread-local output buffers */
200 void zero_thread_output(f_thread_t* f_t)
201 {
202     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
203
204     for (int i = 0; i < f_t->nblock_used; i++)
205     {
206         int a0 = f_t->block_index[i] * reduction_block_size;
207         int a1 = a0 + reduction_block_size;
208         for (int a = a0; a < a1; a++)
209         {
210             for (int d = 0; d < nelem_fa; d++)
211             {
212                 f_t->f[a][d] = 0;
213             }
214         }
215     }
216
217     for (int i = 0; i < gmx::c_numShiftVectors; i++)
218     {
219         clear_rvec(f_t->fshift[i]);
220     }
221     for (int i = 0; i < F_NRE; i++)
222     {
223         f_t->ener[i] = 0;
224     }
225     for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
226     {
227         for (int j = 0; j < f_t->grpp.nener; j++)
228         {
229             f_t->grpp.energyGroupPairTerms[i][j] = 0;
230         }
231     }
232     for (auto i : keysOf(f_t->dvdl))
233     {
234         f_t->dvdl[i] = 0;
235     }
236 }
237
238 /*! \brief The max thread number is arbitrary, we used a fixed number
239  * to avoid memory management.  Using more than 16 threads is probably
240  * never useful performance wise. */
241 #define MAX_BONDED_THREADS 256
242
243 /*! \brief Reduce thread-local force buffers */
244 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
245 {
246     if (nthreads > MAX_BONDED_THREADS)
247     {
248         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
249     }
250
251     rvec* gmx_restrict f = as_rvec_array(force.data());
252
253     const int numAtomsForce = bt->numAtomsForce;
254
255     /* This reduction can run on any number of threads,
256      * independently of bt->nthreads.
257      * But if nthreads matches bt->nthreads (which it currently does)
258      * the uniform distribution of the touched blocks over nthreads will
259      * match the distribution of bonded over threads well in most cases,
260      * which means that threads mostly reduce their own data which increases
261      * the number of cache hits.
262      */
263 #pragma omp parallel for num_threads(nthreads) schedule(static)
264     for (int b = 0; b < bt->nblock_used; b++)
265     {
266         try
267         {
268             int    ind = bt->block_index[b];
269             rvec4* fp[MAX_BONDED_THREADS];
270
271             /* Determine which threads contribute to this block */
272             int nfb = 0;
273             for (int ft = 0; ft < bt->nthreads; ft++)
274             {
275                 if (bitmask_is_set(bt->mask[ind], ft))
276                 {
277                     fp[nfb++] = bt->f_t[ft]->f;
278                 }
279             }
280             if (nfb > 0)
281             {
282                 /* Reduce force buffers for threads that contribute */
283                 int a0 = ind * reduction_block_size;
284                 int a1 = (ind + 1) * reduction_block_size;
285                 /* It would be nice if we could pad f to avoid this min */
286                 a1 = std::min(a1, numAtomsForce);
287                 for (int a = a0; a < a1; a++)
288                 {
289                     for (int fb = 0; fb < nfb; fb++)
290                     {
291                         rvec_inc(f[a], fp[fb][a]);
292                     }
293                 }
294             }
295         }
296         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
297     }
298 }
299
300 /*! \brief Reduce thread-local forces, shift forces and energies */
301 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
302                           real*                      ener,
303                           gmx_grppairener_t*         grpp,
304                           gmx::ArrayRef<real>        dvdl,
305                           const bonded_threading_t*  bt,
306                           const gmx::StepWorkload&   stepWork)
307 {
308     assert(bt->haveBondeds);
309
310     if (bt->nblock_used > 0)
311     {
312         /* Reduce the bonded force buffer */
313         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
314     }
315
316     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
317
318     /* When necessary, reduce energy and virial using one thread only */
319     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
320     {
321         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
322
323         if (stepWork.computeVirial)
324         {
325             for (int i = 0; i < gmx::c_numShiftVectors; i++)
326             {
327                 for (int t = 1; t < bt->nthreads; t++)
328                 {
329                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
330                 }
331             }
332         }
333         if (stepWork.computeEnergy)
334         {
335             for (int i = 0; i < F_NRE; i++)
336             {
337                 for (int t = 1; t < bt->nthreads; t++)
338                 {
339                     ener[i] += f_t[t]->ener[i];
340                 }
341             }
342             for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
343             {
344                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
345                 {
346                     for (int t = 1; t < bt->nthreads; t++)
347                     {
348                         grpp->energyGroupPairTerms[i][j] += f_t[t]->grpp.energyGroupPairTerms[i][j];
349                     }
350                 }
351             }
352         }
353         if (stepWork.computeDhdl)
354         {
355             for (auto i : keysOf(f_t[1]->dvdl))
356             {
357
358                 for (int t = 1; t < bt->nthreads; t++)
359                 {
360                     dvdl[static_cast<int>(i)] += f_t[t]->dvdl[i];
361                 }
362             }
363         }
364     }
365 }
366
367 /*! \brief Returns the bonded kernel flavor
368  *
369  * Note that energies are always requested when the virial
370  * is requested (performance gain would be small).
371  * Note that currently we do not have bonded kernels that
372  * do not compute forces.
373  */
374 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
375                                             const bool               useSimdKernels,
376                                             const bool               havePerturbedInteractions)
377 {
378     BondedKernelFlavor flavor;
379     if (stepWork.computeEnergy || stepWork.computeVirial)
380     {
381         if (stepWork.computeVirial)
382         {
383             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
384         }
385         else
386         {
387             flavor = BondedKernelFlavor::ForcesAndEnergy;
388         }
389     }
390     else
391     {
392         if (useSimdKernels && !havePerturbedInteractions)
393         {
394             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
395         }
396         else
397         {
398             flavor = BondedKernelFlavor::ForcesNoSimd;
399         }
400     }
401
402     return flavor;
403 }
404
405 /*! \brief Calculate one element of the list of bonded interactions
406     for this thread */
407 real calc_one_bond(int                           thread,
408                    int                           ftype,
409                    const InteractionDefinitions& idef,
410                    ArrayRef<const int>           iatoms,
411                    const int                     numNonperturbedInteractions,
412                    const WorkDivision&           workDivision,
413                    const rvec                    x[],
414                    rvec4                         f[],
415                    rvec                          fshift[],
416                    const t_forcerec*             fr,
417                    const t_pbc*                  pbc,
418                    gmx_grppairener_t*            grpp,
419                    t_nrnb*                       nrnb,
420                    gmx::ArrayRef<const real>     lambda,
421                    gmx::ArrayRef<real>           dvdl,
422                    const t_mdatoms*              md,
423                    t_fcdata*                     fcd,
424                    const gmx::StepWorkload&      stepWork,
425                    int*                          global_atom_index)
426 {
427     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
428                "The topology should be marked either as no FE or sorted on FE");
429
430     const bool havePerturbedInteractions =
431             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
432     BondedKernelFlavor flavor =
433             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
434     FreeEnergyPerturbationCouplingType efptFTYPE;
435     if (IS_RESTRAINT_TYPE(ftype))
436     {
437         efptFTYPE = FreeEnergyPerturbationCouplingType::Restraint;
438     }
439     else
440     {
441         efptFTYPE = FreeEnergyPerturbationCouplingType::Bonded;
442     }
443
444     const int nat1   = interaction_function[ftype].nratoms + 1;
445     const int nbonds = iatoms.ssize() / nat1;
446
447     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
448                "The thread division should match the topology");
449
450     const int nb0 = workDivision.bound(ftype, thread);
451     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
452
453     ArrayRef<const t_iparams> iparams = idef.iparams;
454
455     real v = 0;
456     if (!isPairInteraction(ftype))
457     {
458         if (ftype == F_CMAP)
459         {
460             /* TODO The execution time for CMAP dihedrals might be
461                nice to account to its own subtimer, but first
462                wallcycle needs to be extended to support calling from
463                multiple threads. */
464             v = cmap_dihs(nbn,
465                           iatoms.data() + nb0,
466                           iparams.data(),
467                           &idef.cmap_grid,
468                           x,
469                           f,
470                           fshift,
471                           pbc,
472                           lambda[static_cast<int>(efptFTYPE)],
473                           &(dvdl[static_cast<int>(efptFTYPE)]),
474                           gmx::arrayRefFromArray(md->chargeA, md->nr),
475                           fcd,
476                           nullptr,
477                           nullptr,
478                           global_atom_index);
479         }
480         else
481         {
482             v = calculateSimpleBond(ftype,
483                                     nbn,
484                                     iatoms.data() + nb0,
485                                     iparams.data(),
486                                     x,
487                                     f,
488                                     fshift,
489                                     pbc,
490                                     lambda[static_cast<int>(efptFTYPE)],
491                                     &(dvdl[static_cast<int>(efptFTYPE)]),
492                                     gmx::arrayRefFromArray(md->chargeA, md->nr),
493                                     fcd,
494                                     fcd->disres,
495                                     fcd->orires,
496                                     global_atom_index,
497                                     flavor);
498         }
499     }
500     else
501     {
502         /* TODO The execution time for pairs might be nice to account
503            to its own subtimer, but first wallcycle needs to be
504            extended to support calling from multiple threads. */
505         do_pairs(ftype,
506                  nbn,
507                  iatoms.data() + nb0,
508                  iparams.data(),
509                  x,
510                  f,
511                  fshift,
512                  pbc,
513                  lambda.data(),
514                  dvdl.data(),
515                  md,
516                  fr,
517                  havePerturbedInteractions,
518                  stepWork,
519                  grpp,
520                  global_atom_index);
521     }
522
523     if (thread == 0)
524     {
525         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
526     }
527
528     return v;
529 }
530
531 } // namespace
532
533 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
534  */
535 static void calcBondedForces(const InteractionDefinitions& idef,
536                              bonded_threading_t*           bt,
537                              const rvec                    x[],
538                              const t_forcerec*             fr,
539                              const t_pbc*                  pbc_null,
540                              rvec*                         fshiftMasterBuffer,
541                              gmx_enerdata_t*               enerd,
542                              t_nrnb*                       nrnb,
543                              gmx::ArrayRef<const real>     lambda,
544                              gmx::ArrayRef<real>           dvdl,
545                              const t_mdatoms*              md,
546                              t_fcdata*                     fcd,
547                              const gmx::StepWorkload&      stepWork,
548                              int*                          global_atom_index)
549 {
550 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
551     for (int thread = 0; thread < bt->nthreads; thread++)
552     {
553         try
554         {
555             f_thread_t& threadBuffers = *bt->f_t[thread];
556             int         ftype;
557             real        v;
558             /* thread stuff */
559             rvec*               fshift;
560             gmx::ArrayRef<real> dvdlt;
561             gmx::ArrayRef<real> epot;
562             gmx_grppairener_t*  grpp;
563
564             zero_thread_output(&threadBuffers);
565
566             rvec4* ft = threadBuffers.f;
567
568             /* Thread 0 writes directly to the main output buffers.
569              * We might want to reconsider this.
570              */
571             if (thread == 0)
572             {
573                 fshift = fshiftMasterBuffer;
574                 epot   = enerd->term;
575                 grpp   = &enerd->grpp;
576                 dvdlt  = dvdl;
577             }
578             else
579             {
580                 fshift = as_rvec_array(threadBuffers.fshift.data());
581                 epot   = threadBuffers.ener;
582                 grpp   = &threadBuffers.grpp;
583                 dvdlt  = threadBuffers.dvdl;
584             }
585             /* Loop over all bonded force types to calculate the bonded forces */
586             for (ftype = 0; (ftype < F_NRE); ftype++)
587             {
588                 const InteractionList& ilist = idef.il[ftype];
589                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
590                 {
591                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
592                     v                          = calc_one_bond(thread,
593                                       ftype,
594                                       idef,
595                                       iatoms,
596                                       idef.numNonperturbedInteractions[ftype],
597                                       bt->workDivision,
598                                       x,
599                                       ft,
600                                       fshift,
601                                       fr,
602                                       pbc_null,
603                                       grpp,
604                                       nrnb,
605                                       lambda,
606                                       dvdlt,
607                                       md,
608                                       fcd,
609                                       stepWork,
610                                       global_atom_index);
611                     epot[ftype] += v;
612                 }
613             }
614         }
615         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
616     }
617 }
618
619 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
620 {
621     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
622
623     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
624             || fcdata.disres->nres > 0);
625 }
626
627 bool ListedForces::haveCpuBondeds() const
628 {
629     return threading_->haveBondeds;
630 }
631
632 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
633 {
634     return haveCpuBondeds() || haveRestraints(fcdata);
635 }
636
637 namespace
638 {
639
640 /*! \brief Calculates all listed force interactions. */
641 void calc_listed(struct gmx_wallcycle*         wcycle,
642                  const InteractionDefinitions& idef,
643                  bonded_threading_t*           bt,
644                  const rvec                    x[],
645                  gmx::ForceOutputs*            forceOutputs,
646                  const t_forcerec*             fr,
647                  const t_pbc*                  pbc,
648                  gmx_enerdata_t*               enerd,
649                  t_nrnb*                       nrnb,
650                  gmx::ArrayRef<const real>     lambda,
651                  const t_mdatoms*              md,
652                  t_fcdata*                     fcd,
653                  int*                          global_atom_index,
654                  const gmx::StepWorkload&      stepWork)
655 {
656     if (bt->haveBondeds)
657     {
658         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
659
660         wallcycle_sub_start(wcycle, WallCycleSubCounter::Listed);
661         /* The dummy array is to have a place to store the dhdl at other values
662            of lambda, which will be thrown away in the end */
663         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
664         calcBondedForces(idef,
665                          bt,
666                          x,
667                          fr,
668                          fr->bMolPBC ? pbc : nullptr,
669                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
670                          enerd,
671                          nrnb,
672                          lambda,
673                          dvdl,
674                          md,
675                          fcd,
676                          stepWork,
677                          global_atom_index);
678         wallcycle_sub_stop(wcycle, WallCycleSubCounter::Listed);
679
680         wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedBufOps);
681         reduce_thread_output(&forceWithShiftForces, enerd->term.data(), &enerd->grpp, dvdl, bt, stepWork);
682
683         if (stepWork.computeDhdl)
684         {
685             for (auto i : keysOf(enerd->dvdl_lin))
686             {
687                 enerd->dvdl_nonlin[i] += dvdl[i];
688             }
689         }
690         wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedBufOps);
691     }
692
693     /* Copy the sum of violations for the distance restraints from fcd */
694     if (fcd)
695     {
696         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
697     }
698 }
699
700 /*! \brief As calc_listed(), but only determines the potential energy
701  * for the perturbed interactions.
702  *
703  * The shift forces in fr are not affected.
704  */
705 void calc_listed_lambda(const InteractionDefinitions& idef,
706                         bonded_threading_t*           bt,
707                         const rvec                    x[],
708                         const t_forcerec*             fr,
709                         const struct t_pbc*           pbc,
710                         gmx::ArrayRef<real>           forceBufferLambda,
711                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
712                         gmx_grppairener_t*            grpp,
713                         real*                         epot,
714                         gmx::ArrayRef<real>           dvdl,
715                         t_nrnb*                       nrnb,
716                         gmx::ArrayRef<const real>     lambda,
717                         const t_mdatoms*              md,
718                         t_fcdata*                     fcd,
719                         int*                          global_atom_index)
720 {
721     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
722
723     const t_pbc* pbc_null;
724     if (fr->bMolPBC)
725     {
726         pbc_null = pbc;
727     }
728     else
729     {
730         pbc_null = nullptr;
731     }
732
733     /* We already have the forces, so we use temp buffers here */
734     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
735     std::fill(shiftForceBufferLambda.begin(),
736               shiftForceBufferLambda.end(),
737               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
738     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
739     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
740
741     /* Loop over all bonded force types to calculate the bonded energies */
742     for (int ftype = 0; (ftype < F_NRE); ftype++)
743     {
744         if (ftype_is_bonded_potential(ftype))
745         {
746             const InteractionList& ilist = idef.il[ftype];
747             /* Create a temporary iatom list with only perturbed interactions */
748             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
749             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
750                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
751             if (!iatomsPerturbed.empty())
752             {
753                 /* Set the work range of thread 0 to the perturbed bondeds */
754                 workDivision.setBound(ftype, 0, 0);
755                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
756
757                 gmx::StepWorkload tempFlags;
758                 tempFlags.computeEnergy = true;
759                 real v                  = calc_one_bond(0,
760                                        ftype,
761                                        idef,
762                                        iatomsPerturbed,
763                                        iatomsPerturbed.ssize(),
764                                        workDivision,
765                                        x,
766                                        f,
767                                        fshift,
768                                        fr,
769                                        pbc_null,
770                                        grpp,
771                                        nrnb,
772                                        lambda,
773                                        dvdl,
774                                        md,
775                                        fcd,
776                                        tempFlags,
777                                        global_atom_index);
778                 epot[ftype] += v;
779             }
780         }
781     }
782 }
783
784 } // namespace
785
786 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
787                              const matrix                              box,
788                              const t_lambda*                           fepvals,
789                              const t_commrec*                          cr,
790                              const gmx_multisim_t*                     ms,
791                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
792                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
793                              t_fcdata*                                 fcdata,
794                              const history_t*                          hist,
795                              gmx::ForceOutputs*                        forceOutputs,
796                              const t_forcerec*                         fr,
797                              const struct t_pbc*                       pbc,
798                              gmx_enerdata_t*                           enerd,
799                              t_nrnb*                                   nrnb,
800                              gmx::ArrayRef<const real>                 lambda,
801                              const t_mdatoms*                          md,
802                              int*                                      global_atom_index,
803                              const gmx::StepWorkload&                  stepWork)
804 {
805     if (interactionSelection_.none() || !stepWork.computeListedForces)
806     {
807         return;
808     }
809
810     const InteractionDefinitions& idef = *idef_;
811
812     // Todo: replace all rvec use here with ArrayRefWithPadding
813     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
814
815     const bool calculateRestInteractions =
816             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
817
818     t_pbc pbc_full; /* Full PBC is needed for position restraints */
819     if (calculateRestInteractions && haveRestraints(*fcdata))
820     {
821         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
822         {
823             /* Not enough flops to bother counting */
824             set_pbc(&pbc_full, fr->pbcType, box);
825         }
826
827         /* TODO Use of restraints triggers further function calls
828            inside the loop over calc_one_bond(), but those are too
829            awkward to account to this subtimer properly in the present
830            code. We don't test / care much about performance with
831            restraints, anyway. */
832         wallcycle_sub_start(wcycle, WallCycleSubCounter::Restraints);
833
834         if (!idef.il[F_POSRES].empty())
835         {
836             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
837         }
838
839         if (!idef.il[F_FBPOSRES].empty())
840         {
841             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
842         }
843
844         /* Do pre force calculation stuff which might require communication */
845         if (fcdata->orires->nr > 0)
846         {
847             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
848             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
849                                                        idef.il[F_ORIRES].size(),
850                                                        idef.il[F_ORIRES].iatoms.data(),
851                                                        idef.iparams.data(),
852                                                        md,
853                                                        xWholeMolecules,
854                                                        x,
855                                                        fr->bMolPBC ? pbc : nullptr,
856                                                        fcdata->orires,
857                                                        hist);
858         }
859         if (fcdata->disres->nres > 0)
860         {
861             calc_disres_R_6(cr,
862                             ms,
863                             idef.il[F_DISRES].size(),
864                             idef.il[F_DISRES].iatoms.data(),
865                             x,
866                             fr->bMolPBC ? pbc : nullptr,
867                             fcdata->disres,
868                             hist);
869         }
870
871         wallcycle_sub_stop(wcycle, WallCycleSubCounter::Restraints);
872     }
873
874     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
875
876     /* Check if we have to determine energy differences
877      * at foreign lambda's.
878      */
879     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
880     {
881         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
882         if (!idef.il[F_POSRES].empty())
883         {
884             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
885         }
886         if (idef.ilsort != ilsortNO_FE)
887         {
888             wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedFep);
889             if (idef.ilsort != ilsortFE_SORTED)
890             {
891                 gmx_incons("The bonded interactions are not sorted for free energy");
892             }
893             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
894             {
895                 gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> lam_i;
896
897                 reset_foreign_enerdata(enerd);
898                 for (auto j : keysOf(lam_i))
899                 {
900                     lam_i[j] = (i == 0 ? lambda[static_cast<int>(j)] : fepvals->all_lambda[j][i - 1]);
901                 }
902                 calc_listed_lambda(idef,
903                                    threading_.get(),
904                                    x,
905                                    fr,
906                                    pbc,
907                                    forceBufferLambda_,
908                                    shiftForceBufferLambda_,
909                                    &(enerd->foreign_grpp),
910                                    enerd->foreign_term.data(),
911                                    dvdl,
912                                    nrnb,
913                                    lam_i,
914                                    md,
915                                    fcdata,
916                                    global_atom_index);
917                 sum_epot(enerd->foreign_grpp, enerd->foreign_term.data());
918                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
919                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
920                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
921             }
922             wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedFep);
923         }
924     }
925 }