Convert t_oriresdata to C++
[alexxy/gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines high-level functions for mdrun to compute
39  * energies and forces for listed interactions.
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "gromacs/utility/arrayref.h"
48 #include "gromacs/utility/enumerationhelpers.h"
49 #include "listed_forces.h"
50
51 #include <cassert>
52
53 #include <algorithm>
54 #include <array>
55 #include <numeric>
56
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/gmxlib/nrnb.h"
59 #include "gromacs/listed_forces/bonded.h"
60 #include "gromacs/listed_forces/disre.h"
61 #include "gromacs/listed_forces/orires.h"
62 #include "gromacs/listed_forces/pairs.h"
63 #include "gromacs/listed_forces/position_restraints.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdlib/enerdata_utils.h"
66 #include "gromacs/mdlib/force.h"
67 #include "gromacs/mdlib/gmx_omp_nthreads.h"
68 #include "gromacs/mdtypes/commrec.h"
69 #include "gromacs/mdtypes/mdatom.h"
70 #include "gromacs/mdtypes/fcdata.h"
71 #include "gromacs/mdtypes/forceoutput.h"
72 #include "gromacs/mdtypes/forcerec.h"
73 #include "gromacs/mdtypes/inputrec.h"
74 #include "gromacs/mdtypes/md_enums.h"
75 #include "gromacs/mdtypes/simulation_workload.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/timing/wallcycle.h"
79 #include "gromacs/topology/topology.h"
80 #include "gromacs/utility/exceptions.h"
81 #include "gromacs/utility/fatalerror.h"
82
83 #include "listed_internal.h"
84 #include "manage_threading.h"
85 #include "utilities.h"
86
87 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
88                            const int                  numEnergyGroups,
89                            const int                  numThreads,
90                            const InteractionSelection interactionSelection,
91                            FILE*                      fplog) :
92     idefSelection_(ffparams),
93     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
94     interactionSelection_(interactionSelection)
95 {
96 }
97
98 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
99
100 ListedForces::~ListedForces() = default;
101
102 //! Copies the selection interactions from \p idefSrc to \p idef
103 static void selectInteractions(InteractionDefinitions*                  idef,
104                                const InteractionDefinitions&            idefSrc,
105                                const ListedForces::InteractionSelection interactionSelection)
106 {
107     const bool selectPairs =
108             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
109     const bool selectDihedrals =
110             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
111     const bool selectAngles =
112             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
113     const bool selectRest =
114             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
115
116     for (int ftype = 0; ftype < F_NRE; ftype++)
117     {
118         const t_interaction_function& ifunc = interaction_function[ftype];
119         if (ifunc.flags & IF_BOND)
120         {
121             bool assign = false;
122             if (ifunc.flags & IF_PAIR)
123             {
124                 assign = selectPairs;
125             }
126             else if (ifunc.flags & IF_DIHEDRAL)
127             {
128                 assign = selectDihedrals;
129             }
130             else if (ifunc.flags & IF_ATYPE)
131             {
132                 assign = selectAngles;
133             }
134             else
135             {
136                 assign = selectRest;
137             }
138             if (assign)
139             {
140                 idef->il[ftype] = idefSrc.il[ftype];
141             }
142             else
143             {
144                 idef->il[ftype].clear();
145             }
146         }
147     }
148 }
149
150 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
151 {
152     if (interactionSelection_.all())
153     {
154         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
155         idef_ = &domainIdef;
156     }
157     else
158     {
159         idef_ = &idefSelection_;
160
161         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
162
163         idefSelection_.ilsort = domainIdef.ilsort;
164
165         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
166         {
167             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
168             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
169         }
170         else
171         {
172             idefSelection_.iparams_posres.clear();
173             idefSelection_.iparams_fbposres.clear();
174         }
175     }
176
177     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
178
179     if (idef_->ilsort == ilsortFE_SORTED)
180     {
181         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
182         shiftForceBufferLambda_.resize(gmx::c_numShiftVectors);
183     }
184 }
185
186 namespace
187 {
188
189 using gmx::ArrayRef;
190
191 /*! \brief Return true if ftype is an explicit pair-listed LJ or
192  * COULOMB interaction type: bonded LJ (usually 1-4), or special
193  * listed non-bonded for FEP. */
194 bool isPairInteraction(int ftype)
195 {
196     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
197 }
198
199 /*! \brief Zero thread-local output buffers */
200 void zero_thread_output(f_thread_t* f_t)
201 {
202     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
203
204     for (int i = 0; i < f_t->nblock_used; i++)
205     {
206         int a0 = f_t->block_index[i] * reduction_block_size;
207         int a1 = a0 + reduction_block_size;
208         for (int a = a0; a < a1; a++)
209         {
210             for (int d = 0; d < nelem_fa; d++)
211             {
212                 f_t->f[a][d] = 0;
213             }
214         }
215     }
216
217     for (int i = 0; i < gmx::c_numShiftVectors; i++)
218     {
219         clear_rvec(f_t->fshift[i]);
220     }
221     for (int i = 0; i < F_NRE; i++)
222     {
223         f_t->ener[i] = 0;
224     }
225     for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
226     {
227         for (int j = 0; j < f_t->grpp.nener; j++)
228         {
229             f_t->grpp.energyGroupPairTerms[i][j] = 0;
230         }
231     }
232     for (auto i : keysOf(f_t->dvdl))
233     {
234         f_t->dvdl[i] = 0;
235     }
236 }
237
238 /*! \brief The max thread number is arbitrary, we used a fixed number
239  * to avoid memory management.  Using more than 16 threads is probably
240  * never useful performance wise. */
241 #define MAX_BONDED_THREADS 256
242
243 /*! \brief Reduce thread-local force buffers */
244 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
245 {
246     if (nthreads > MAX_BONDED_THREADS)
247     {
248         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
249     }
250
251     rvec* gmx_restrict f = as_rvec_array(force.data());
252
253     const int numAtomsForce = bt->numAtomsForce;
254
255     /* This reduction can run on any number of threads,
256      * independently of bt->nthreads.
257      * But if nthreads matches bt->nthreads (which it currently does)
258      * the uniform distribution of the touched blocks over nthreads will
259      * match the distribution of bonded over threads well in most cases,
260      * which means that threads mostly reduce their own data which increases
261      * the number of cache hits.
262      */
263 #pragma omp parallel for num_threads(nthreads) schedule(static)
264     for (int b = 0; b < bt->nblock_used; b++)
265     {
266         try
267         {
268             int    ind = bt->block_index[b];
269             rvec4* fp[MAX_BONDED_THREADS];
270
271             /* Determine which threads contribute to this block */
272             int nfb = 0;
273             for (int ft = 0; ft < bt->nthreads; ft++)
274             {
275                 if (bitmask_is_set(bt->mask[ind], ft))
276                 {
277                     fp[nfb++] = bt->f_t[ft]->f;
278                 }
279             }
280             if (nfb > 0)
281             {
282                 /* Reduce force buffers for threads that contribute */
283                 int a0 = ind * reduction_block_size;
284                 int a1 = (ind + 1) * reduction_block_size;
285                 /* It would be nice if we could pad f to avoid this min */
286                 a1 = std::min(a1, numAtomsForce);
287                 for (int a = a0; a < a1; a++)
288                 {
289                     for (int fb = 0; fb < nfb; fb++)
290                     {
291                         rvec_inc(f[a], fp[fb][a]);
292                     }
293                 }
294             }
295         }
296         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
297     }
298 }
299
300 /*! \brief Reduce thread-local forces, shift forces and energies */
301 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
302                           real*                      ener,
303                           gmx_grppairener_t*         grpp,
304                           gmx::ArrayRef<real>        dvdl,
305                           const bonded_threading_t*  bt,
306                           const gmx::StepWorkload&   stepWork)
307 {
308     assert(bt->haveBondeds);
309
310     if (bt->nblock_used > 0)
311     {
312         /* Reduce the bonded force buffer */
313         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
314     }
315
316     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
317
318     /* When necessary, reduce energy and virial using one thread only */
319     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
320     {
321         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
322
323         if (stepWork.computeVirial)
324         {
325             for (int i = 0; i < gmx::c_numShiftVectors; i++)
326             {
327                 for (int t = 1; t < bt->nthreads; t++)
328                 {
329                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
330                 }
331             }
332         }
333         if (stepWork.computeEnergy)
334         {
335             for (int i = 0; i < F_NRE; i++)
336             {
337                 for (int t = 1; t < bt->nthreads; t++)
338                 {
339                     ener[i] += f_t[t]->ener[i];
340                 }
341             }
342             for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
343             {
344                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
345                 {
346                     for (int t = 1; t < bt->nthreads; t++)
347                     {
348                         grpp->energyGroupPairTerms[i][j] += f_t[t]->grpp.energyGroupPairTerms[i][j];
349                     }
350                 }
351             }
352         }
353         if (stepWork.computeDhdl)
354         {
355             for (auto i : keysOf(f_t[1]->dvdl))
356             {
357
358                 for (int t = 1; t < bt->nthreads; t++)
359                 {
360                     dvdl[static_cast<int>(i)] += f_t[t]->dvdl[i];
361                 }
362             }
363         }
364     }
365 }
366
367 /*! \brief Returns the bonded kernel flavor
368  *
369  * Note that energies are always requested when the virial
370  * is requested (performance gain would be small).
371  * Note that currently we do not have bonded kernels that
372  * do not compute forces.
373  */
374 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
375                                             const bool               useSimdKernels,
376                                             const bool               havePerturbedInteractions)
377 {
378     BondedKernelFlavor flavor;
379     if (stepWork.computeEnergy || stepWork.computeVirial)
380     {
381         if (stepWork.computeVirial)
382         {
383             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
384         }
385         else
386         {
387             flavor = BondedKernelFlavor::ForcesAndEnergy;
388         }
389     }
390     else
391     {
392         if (useSimdKernels && !havePerturbedInteractions)
393         {
394             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
395         }
396         else
397         {
398             flavor = BondedKernelFlavor::ForcesNoSimd;
399         }
400     }
401
402     return flavor;
403 }
404
405 /*! \brief Calculate one element of the list of bonded interactions
406     for this thread */
407 real calc_one_bond(int                           thread,
408                    int                           ftype,
409                    const InteractionDefinitions& idef,
410                    ArrayRef<const int>           iatoms,
411                    const int                     numNonperturbedInteractions,
412                    const WorkDivision&           workDivision,
413                    const rvec                    x[],
414                    rvec4                         f[],
415                    rvec                          fshift[],
416                    const t_forcerec*             fr,
417                    const t_pbc*                  pbc,
418                    gmx_grppairener_t*            grpp,
419                    t_nrnb*                       nrnb,
420                    gmx::ArrayRef<const real>     lambda,
421                    gmx::ArrayRef<real>           dvdl,
422                    const t_mdatoms*              md,
423                    t_fcdata*                     fcd,
424                    const gmx::StepWorkload&      stepWork,
425                    int*                          global_atom_index)
426 {
427     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
428                "The topology should be marked either as no FE or sorted on FE");
429
430     const bool havePerturbedInteractions =
431             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
432     BondedKernelFlavor flavor =
433             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
434     FreeEnergyPerturbationCouplingType efptFTYPE;
435     if (IS_RESTRAINT_TYPE(ftype))
436     {
437         efptFTYPE = FreeEnergyPerturbationCouplingType::Restraint;
438     }
439     else
440     {
441         efptFTYPE = FreeEnergyPerturbationCouplingType::Bonded;
442     }
443
444     const int nat1   = interaction_function[ftype].nratoms + 1;
445     const int nbonds = iatoms.ssize() / nat1;
446
447     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
448                "The thread division should match the topology");
449
450     const int nb0 = workDivision.bound(ftype, thread);
451     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
452
453     ArrayRef<const t_iparams> iparams = idef.iparams;
454
455     real v = 0;
456     if (!isPairInteraction(ftype))
457     {
458         if (ftype == F_CMAP)
459         {
460             /* TODO The execution time for CMAP dihedrals might be
461                nice to account to its own subtimer, but first
462                wallcycle needs to be extended to support calling from
463                multiple threads. */
464             v = cmap_dihs(nbn,
465                           iatoms.data() + nb0,
466                           iparams.data(),
467                           &idef.cmap_grid,
468                           x,
469                           f,
470                           fshift,
471                           pbc,
472                           lambda[static_cast<int>(efptFTYPE)],
473                           &(dvdl[static_cast<int>(efptFTYPE)]),
474                           gmx::arrayRefFromArray(md->chargeA, md->nr),
475                           fcd,
476                           nullptr,
477                           nullptr,
478                           global_atom_index);
479         }
480         else
481         {
482             v = calculateSimpleBond(ftype,
483                                     nbn,
484                                     iatoms.data() + nb0,
485                                     iparams.data(),
486                                     x,
487                                     f,
488                                     fshift,
489                                     pbc,
490                                     lambda[static_cast<int>(efptFTYPE)],
491                                     &(dvdl[static_cast<int>(efptFTYPE)]),
492                                     gmx::arrayRefFromArray(md->chargeA, md->nr),
493                                     fcd,
494                                     fcd->disres,
495                                     fcd->orires.get(),
496                                     global_atom_index,
497                                     flavor);
498         }
499     }
500     else
501     {
502         /* TODO The execution time for pairs might be nice to account
503            to its own subtimer, but first wallcycle needs to be
504            extended to support calling from multiple threads. */
505         do_pairs(ftype,
506                  nbn,
507                  iatoms.data() + nb0,
508                  iparams.data(),
509                  x,
510                  f,
511                  fshift,
512                  pbc,
513                  lambda.data(),
514                  dvdl.data(),
515                  gmx::arrayRefFromArray(md->chargeA, md->nr),
516                  gmx::arrayRefFromArray(md->chargeB, md->nr),
517                  md->bPerturbed ? gmx::arrayRefFromArray(md->bPerturbed, md->nr) : gmx::ArrayRef<bool>(),
518                  gmx::arrayRefFromArray(md->cENER, md->nr),
519                  md->nPerturbed,
520                  fr,
521                  havePerturbedInteractions,
522                  stepWork,
523                  grpp,
524                  global_atom_index);
525     }
526
527     if (thread == 0)
528     {
529         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
530     }
531
532     return v;
533 }
534
535 } // namespace
536
537 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
538  */
539 static void calcBondedForces(const InteractionDefinitions& idef,
540                              bonded_threading_t*           bt,
541                              const rvec                    x[],
542                              const t_forcerec*             fr,
543                              const t_pbc*                  pbc_null,
544                              rvec*                         fshiftMasterBuffer,
545                              gmx_enerdata_t*               enerd,
546                              t_nrnb*                       nrnb,
547                              gmx::ArrayRef<const real>     lambda,
548                              gmx::ArrayRef<real>           dvdl,
549                              const t_mdatoms*              md,
550                              t_fcdata*                     fcd,
551                              const gmx::StepWorkload&      stepWork,
552                              int*                          global_atom_index)
553 {
554 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
555     for (int thread = 0; thread < bt->nthreads; thread++)
556     {
557         try
558         {
559             f_thread_t& threadBuffers = *bt->f_t[thread];
560             int         ftype;
561             real        v;
562             /* thread stuff */
563             rvec*               fshift;
564             gmx::ArrayRef<real> dvdlt;
565             gmx::ArrayRef<real> epot;
566             gmx_grppairener_t*  grpp;
567
568             zero_thread_output(&threadBuffers);
569
570             rvec4* ft = threadBuffers.f;
571
572             /* Thread 0 writes directly to the main output buffers.
573              * We might want to reconsider this.
574              */
575             if (thread == 0)
576             {
577                 fshift = fshiftMasterBuffer;
578                 epot   = enerd->term;
579                 grpp   = &enerd->grpp;
580                 dvdlt  = dvdl;
581             }
582             else
583             {
584                 fshift = as_rvec_array(threadBuffers.fshift.data());
585                 epot   = threadBuffers.ener;
586                 grpp   = &threadBuffers.grpp;
587                 dvdlt  = threadBuffers.dvdl;
588             }
589             /* Loop over all bonded force types to calculate the bonded forces */
590             for (ftype = 0; (ftype < F_NRE); ftype++)
591             {
592                 const InteractionList& ilist = idef.il[ftype];
593                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
594                 {
595                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
596                     v                          = calc_one_bond(thread,
597                                       ftype,
598                                       idef,
599                                       iatoms,
600                                       idef.numNonperturbedInteractions[ftype],
601                                       bt->workDivision,
602                                       x,
603                                       ft,
604                                       fshift,
605                                       fr,
606                                       pbc_null,
607                                       grpp,
608                                       nrnb,
609                                       lambda,
610                                       dvdlt,
611                                       md,
612                                       fcd,
613                                       stepWork,
614                                       global_atom_index);
615                     epot[ftype] += v;
616                 }
617             }
618         }
619         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
620     }
621 }
622
623 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
624 {
625     GMX_ASSERT(fcdata.disres, "NMR distance restraint object should be set up");
626
627     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires
628             || fcdata.disres->nres > 0);
629 }
630
631 bool ListedForces::haveCpuBondeds() const
632 {
633     return threading_->haveBondeds;
634 }
635
636 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
637 {
638     return haveCpuBondeds() || haveRestraints(fcdata);
639 }
640
641 namespace
642 {
643
644 /*! \brief Calculates all listed force interactions. */
645 void calc_listed(struct gmx_wallcycle*         wcycle,
646                  const InteractionDefinitions& idef,
647                  bonded_threading_t*           bt,
648                  const rvec                    x[],
649                  gmx::ForceOutputs*            forceOutputs,
650                  const t_forcerec*             fr,
651                  const t_pbc*                  pbc,
652                  gmx_enerdata_t*               enerd,
653                  t_nrnb*                       nrnb,
654                  gmx::ArrayRef<const real>     lambda,
655                  const t_mdatoms*              md,
656                  t_fcdata*                     fcd,
657                  int*                          global_atom_index,
658                  const gmx::StepWorkload&      stepWork)
659 {
660     if (bt->haveBondeds)
661     {
662         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
663
664         wallcycle_sub_start(wcycle, WallCycleSubCounter::Listed);
665         /* The dummy array is to have a place to store the dhdl at other values
666            of lambda, which will be thrown away in the end */
667         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
668         calcBondedForces(idef,
669                          bt,
670                          x,
671                          fr,
672                          fr->bMolPBC ? pbc : nullptr,
673                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
674                          enerd,
675                          nrnb,
676                          lambda,
677                          dvdl,
678                          md,
679                          fcd,
680                          stepWork,
681                          global_atom_index);
682         wallcycle_sub_stop(wcycle, WallCycleSubCounter::Listed);
683
684         wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedBufOps);
685         reduce_thread_output(&forceWithShiftForces, enerd->term.data(), &enerd->grpp, dvdl, bt, stepWork);
686
687         if (stepWork.computeDhdl)
688         {
689             for (auto i : keysOf(enerd->dvdl_lin))
690             {
691                 enerd->dvdl_nonlin[i] += dvdl[i];
692             }
693         }
694         wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedBufOps);
695     }
696
697     /* Copy the sum of violations for the distance restraints from fcd */
698     if (fcd)
699     {
700         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
701     }
702 }
703
704 /*! \brief As calc_listed(), but only determines the potential energy
705  * for the perturbed interactions.
706  *
707  * The shift forces in fr are not affected.
708  */
709 void calc_listed_lambda(const InteractionDefinitions& idef,
710                         bonded_threading_t*           bt,
711                         const rvec                    x[],
712                         const t_forcerec*             fr,
713                         const struct t_pbc*           pbc,
714                         gmx::ArrayRef<real>           forceBufferLambda,
715                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
716                         gmx_grppairener_t*            grpp,
717                         real*                         epot,
718                         gmx::ArrayRef<real>           dvdl,
719                         t_nrnb*                       nrnb,
720                         gmx::ArrayRef<const real>     lambda,
721                         const t_mdatoms*              md,
722                         t_fcdata*                     fcd,
723                         int*                          global_atom_index)
724 {
725     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
726
727     const t_pbc* pbc_null;
728     if (fr->bMolPBC)
729     {
730         pbc_null = pbc;
731     }
732     else
733     {
734         pbc_null = nullptr;
735     }
736
737     /* We already have the forces, so we use temp buffers here */
738     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
739     std::fill(shiftForceBufferLambda.begin(),
740               shiftForceBufferLambda.end(),
741               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
742     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
743     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
744
745     /* Loop over all bonded force types to calculate the bonded energies */
746     for (int ftype = 0; (ftype < F_NRE); ftype++)
747     {
748         if (ftype_is_bonded_potential(ftype))
749         {
750             const InteractionList& ilist = idef.il[ftype];
751             /* Create a temporary iatom list with only perturbed interactions */
752             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
753             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
754                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
755             if (!iatomsPerturbed.empty())
756             {
757                 /* Set the work range of thread 0 to the perturbed bondeds */
758                 workDivision.setBound(ftype, 0, 0);
759                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
760
761                 gmx::StepWorkload tempFlags;
762                 tempFlags.computeEnergy = true;
763                 real v                  = calc_one_bond(0,
764                                        ftype,
765                                        idef,
766                                        iatomsPerturbed,
767                                        iatomsPerturbed.ssize(),
768                                        workDivision,
769                                        x,
770                                        f,
771                                        fshift,
772                                        fr,
773                                        pbc_null,
774                                        grpp,
775                                        nrnb,
776                                        lambda,
777                                        dvdl,
778                                        md,
779                                        fcd,
780                                        tempFlags,
781                                        global_atom_index);
782                 epot[ftype] += v;
783             }
784         }
785     }
786 }
787
788 } // namespace
789
790 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
791                              const matrix                              box,
792                              const t_lambda*                           fepvals,
793                              const t_commrec*                          cr,
794                              const gmx_multisim_t*                     ms,
795                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
796                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
797                              t_fcdata*                                 fcdata,
798                              const history_t*                          hist,
799                              gmx::ForceOutputs*                        forceOutputs,
800                              const t_forcerec*                         fr,
801                              const struct t_pbc*                       pbc,
802                              gmx_enerdata_t*                           enerd,
803                              t_nrnb*                                   nrnb,
804                              gmx::ArrayRef<const real>                 lambda,
805                              const t_mdatoms*                          md,
806                              int*                                      global_atom_index,
807                              const gmx::StepWorkload&                  stepWork)
808 {
809     if (interactionSelection_.none() || !stepWork.computeListedForces)
810     {
811         return;
812     }
813
814     const InteractionDefinitions& idef = *idef_;
815
816     // Todo: replace all rvec use here with ArrayRefWithPadding
817     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
818
819     const bool calculateRestInteractions =
820             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
821
822     t_pbc pbc_full; /* Full PBC is needed for position restraints */
823     if (calculateRestInteractions && haveRestraints(*fcdata))
824     {
825         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
826         {
827             /* Not enough flops to bother counting */
828             set_pbc(&pbc_full, fr->pbcType, box);
829         }
830
831         /* TODO Use of restraints triggers further function calls
832            inside the loop over calc_one_bond(), but those are too
833            awkward to account to this subtimer properly in the present
834            code. We don't test / care much about performance with
835            restraints, anyway. */
836         wallcycle_sub_start(wcycle, WallCycleSubCounter::Restraints);
837
838         if (!idef.il[F_POSRES].empty())
839         {
840             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
841         }
842
843         if (!idef.il[F_FBPOSRES].empty())
844         {
845             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
846         }
847
848         /* Do pre force calculation stuff which might require communication */
849         if (fcdata->orires)
850         {
851             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
852             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
853                                                        idef.il[F_ORIRES].size(),
854                                                        idef.il[F_ORIRES].iatoms.data(),
855                                                        idef.iparams.data(),
856                                                        md,
857                                                        xWholeMolecules,
858                                                        x,
859                                                        fr->bMolPBC ? pbc : nullptr,
860                                                        fcdata->orires.get(),
861                                                        hist);
862         }
863         if (fcdata->disres->nres > 0)
864         {
865             calc_disres_R_6(cr,
866                             ms,
867                             idef.il[F_DISRES].size(),
868                             idef.il[F_DISRES].iatoms.data(),
869                             x,
870                             fr->bMolPBC ? pbc : nullptr,
871                             fcdata->disres,
872                             hist);
873         }
874
875         wallcycle_sub_stop(wcycle, WallCycleSubCounter::Restraints);
876     }
877
878     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
879
880     /* Check if we have to determine energy differences
881      * at foreign lambda's.
882      */
883     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
884     {
885         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
886         if (!idef.il[F_POSRES].empty())
887         {
888             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
889         }
890         if (idef.ilsort != ilsortNO_FE)
891         {
892             wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedFep);
893             if (idef.ilsort != ilsortFE_SORTED)
894             {
895                 gmx_incons("The bonded interactions are not sorted for free energy");
896             }
897             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
898             {
899                 gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> lam_i;
900
901                 reset_foreign_enerdata(enerd);
902                 for (auto j : keysOf(lam_i))
903                 {
904                     lam_i[j] = (i == 0 ? lambda[static_cast<int>(j)] : fepvals->all_lambda[j][i - 1]);
905                 }
906                 calc_listed_lambda(idef,
907                                    threading_.get(),
908                                    x,
909                                    fr,
910                                    pbc,
911                                    forceBufferLambda_,
912                                    shiftForceBufferLambda_,
913                                    &(enerd->foreign_grpp),
914                                    enerd->foreign_term.data(),
915                                    dvdl,
916                                    nrnb,
917                                    lam_i,
918                                    md,
919                                    fcdata,
920                                    global_atom_index);
921                 sum_epot(enerd->foreign_grpp, enerd->foreign_term.data());
922                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
923                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
924                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
925             }
926             wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedFep);
927         }
928     }
929 }