Enable splitting of listed interaction calculation
[alexxy/gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines high-level functions for mdrun to compute
39  * energies and forces for listed interactions.
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "listed_forces.h"
48
49 #include <cassert>
50
51 #include <algorithm>
52 #include <array>
53 #include <numeric>
54
55 #include "gromacs/gmxlib/network.h"
56 #include "gromacs/gmxlib/nrnb.h"
57 #include "gromacs/listed_forces/bonded.h"
58 #include "gromacs/listed_forces/disre.h"
59 #include "gromacs/listed_forces/orires.h"
60 #include "gromacs/listed_forces/pairs.h"
61 #include "gromacs/listed_forces/position_restraints.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/mdlib/enerdata_utils.h"
64 #include "gromacs/mdlib/force.h"
65 #include "gromacs/mdlib/gmx_omp_nthreads.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/mdtypes/fcdata.h"
68 #include "gromacs/mdtypes/forceoutput.h"
69 #include "gromacs/mdtypes/forcerec.h"
70 #include "gromacs/mdtypes/inputrec.h"
71 #include "gromacs/mdtypes/md_enums.h"
72 #include "gromacs/mdtypes/simulation_workload.h"
73 #include "gromacs/pbcutil/ishift.h"
74 #include "gromacs/pbcutil/pbc.h"
75 #include "gromacs/timing/wallcycle.h"
76 #include "gromacs/topology/topology.h"
77 #include "gromacs/utility/exceptions.h"
78 #include "gromacs/utility/fatalerror.h"
79
80 #include "listed_internal.h"
81 #include "manage_threading.h"
82 #include "utilities.h"
83
84 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
85                            const int                  numEnergyGroups,
86                            const int                  numThreads,
87                            const InteractionSelection interactionSelection,
88                            FILE*                      fplog) :
89     idefSelection_(ffparams),
90     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
91     interactionSelection_(interactionSelection)
92 {
93 }
94
95 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
96
97 ListedForces::~ListedForces() = default;
98
99 //! Copies the selection interactions from \p idefSrc to \p idef
100 static void selectInteractions(InteractionDefinitions*                  idef,
101                                const InteractionDefinitions&            idefSrc,
102                                const ListedForces::InteractionSelection interactionSelection)
103 {
104     const bool selectPairs =
105             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
106     const bool selectDihedrals =
107             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
108     const bool selectRest =
109             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
110
111     for (int ftype = 0; ftype < F_NRE; ftype++)
112     {
113         const t_interaction_function& ifunc = interaction_function[ftype];
114         if (ifunc.flags & IF_BOND)
115         {
116             bool assign = false;
117             if (ifunc.flags & IF_PAIR)
118             {
119                 assign = selectPairs;
120             }
121             else if (ifunc.flags & IF_DIHEDRAL)
122             {
123                 assign = selectDihedrals;
124             }
125             else
126             {
127                 assign = selectRest;
128             }
129             if (assign)
130             {
131                 idef->il[ftype] = idefSrc.il[ftype];
132             }
133             else
134             {
135                 idef->il[ftype].clear();
136             }
137         }
138     }
139 }
140
141 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
142 {
143     if (interactionSelection_.all())
144     {
145         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
146         idef_ = &domainIdef;
147     }
148     else
149     {
150         idef_ = &idefSelection_;
151
152         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
153
154         idefSelection_.ilsort = domainIdef.ilsort;
155     }
156
157     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
158
159     if (idef_->ilsort == ilsortFE_SORTED)
160     {
161         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
162         shiftForceBufferLambda_.resize(SHIFTS);
163     }
164 }
165
166 namespace
167 {
168
169 using gmx::ArrayRef;
170
171 /*! \brief Return true if ftype is an explicit pair-listed LJ or
172  * COULOMB interaction type: bonded LJ (usually 1-4), or special
173  * listed non-bonded for FEP. */
174 bool isPairInteraction(int ftype)
175 {
176     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
177 }
178
179 /*! \brief Zero thread-local output buffers */
180 void zero_thread_output(f_thread_t* f_t)
181 {
182     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
183
184     for (int i = 0; i < f_t->nblock_used; i++)
185     {
186         int a0 = f_t->block_index[i] * reduction_block_size;
187         int a1 = a0 + reduction_block_size;
188         for (int a = a0; a < a1; a++)
189         {
190             for (int d = 0; d < nelem_fa; d++)
191             {
192                 f_t->f[a][d] = 0;
193             }
194         }
195     }
196
197     for (int i = 0; i < SHIFTS; i++)
198     {
199         clear_rvec(f_t->fshift[i]);
200     }
201     for (int i = 0; i < F_NRE; i++)
202     {
203         f_t->ener[i] = 0;
204     }
205     for (int i = 0; i < egNR; i++)
206     {
207         for (int j = 0; j < f_t->grpp.nener; j++)
208         {
209             f_t->grpp.ener[i][j] = 0;
210         }
211     }
212     for (int i = 0; i < efptNR; i++)
213     {
214         f_t->dvdl[i] = 0;
215     }
216 }
217
218 /*! \brief The max thread number is arbitrary, we used a fixed number
219  * to avoid memory management.  Using more than 16 threads is probably
220  * never useful performance wise. */
221 #define MAX_BONDED_THREADS 256
222
223 /*! \brief Reduce thread-local force buffers */
224 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
225 {
226     if (nthreads > MAX_BONDED_THREADS)
227     {
228         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
229     }
230
231     rvec* gmx_restrict f = as_rvec_array(force.data());
232
233     const int numAtomsForce = bt->numAtomsForce;
234
235     /* This reduction can run on any number of threads,
236      * independently of bt->nthreads.
237      * But if nthreads matches bt->nthreads (which it currently does)
238      * the uniform distribution of the touched blocks over nthreads will
239      * match the distribution of bonded over threads well in most cases,
240      * which means that threads mostly reduce their own data which increases
241      * the number of cache hits.
242      */
243 #pragma omp parallel for num_threads(nthreads) schedule(static)
244     for (int b = 0; b < bt->nblock_used; b++)
245     {
246         try
247         {
248             int    ind = bt->block_index[b];
249             rvec4* fp[MAX_BONDED_THREADS];
250
251             /* Determine which threads contribute to this block */
252             int nfb = 0;
253             for (int ft = 0; ft < bt->nthreads; ft++)
254             {
255                 if (bitmask_is_set(bt->mask[ind], ft))
256                 {
257                     fp[nfb++] = bt->f_t[ft]->f;
258                 }
259             }
260             if (nfb > 0)
261             {
262                 /* Reduce force buffers for threads that contribute */
263                 int a0 = ind * reduction_block_size;
264                 int a1 = (ind + 1) * reduction_block_size;
265                 /* It would be nice if we could pad f to avoid this min */
266                 a1 = std::min(a1, numAtomsForce);
267                 for (int a = a0; a < a1; a++)
268                 {
269                     for (int fb = 0; fb < nfb; fb++)
270                     {
271                         rvec_inc(f[a], fp[fb][a]);
272                     }
273                 }
274             }
275         }
276         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
277     }
278 }
279
280 /*! \brief Reduce thread-local forces, shift forces and energies */
281 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
282                           real*                      ener,
283                           gmx_grppairener_t*         grpp,
284                           real*                      dvdl,
285                           const bonded_threading_t*  bt,
286                           const gmx::StepWorkload&   stepWork)
287 {
288     assert(bt->haveBondeds);
289
290     if (bt->nblock_used > 0)
291     {
292         /* Reduce the bonded force buffer */
293         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
294     }
295
296     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
297
298     /* When necessary, reduce energy and virial using one thread only */
299     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
300     {
301         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
302
303         if (stepWork.computeVirial)
304         {
305             for (int i = 0; i < SHIFTS; i++)
306             {
307                 for (int t = 1; t < bt->nthreads; t++)
308                 {
309                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
310                 }
311             }
312         }
313         if (stepWork.computeEnergy)
314         {
315             for (int i = 0; i < F_NRE; i++)
316             {
317                 for (int t = 1; t < bt->nthreads; t++)
318                 {
319                     ener[i] += f_t[t]->ener[i];
320                 }
321             }
322             for (int i = 0; i < egNR; i++)
323             {
324                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
325                 {
326                     for (int t = 1; t < bt->nthreads; t++)
327                     {
328                         grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
329                     }
330                 }
331             }
332         }
333         if (stepWork.computeDhdl)
334         {
335             for (int i = 0; i < efptNR; i++)
336             {
337
338                 for (int t = 1; t < bt->nthreads; t++)
339                 {
340                     dvdl[i] += f_t[t]->dvdl[i];
341                 }
342             }
343         }
344     }
345 }
346
347 /*! \brief Returns the bonded kernel flavor
348  *
349  * Note that energies are always requested when the virial
350  * is requested (performance gain would be small).
351  * Note that currently we do not have bonded kernels that
352  * do not compute forces.
353  */
354 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
355                                             const bool               useSimdKernels,
356                                             const bool               havePerturbedInteractions)
357 {
358     BondedKernelFlavor flavor;
359     if (stepWork.computeEnergy || stepWork.computeVirial)
360     {
361         if (stepWork.computeVirial)
362         {
363             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
364         }
365         else
366         {
367             flavor = BondedKernelFlavor::ForcesAndEnergy;
368         }
369     }
370     else
371     {
372         if (useSimdKernels && !havePerturbedInteractions)
373         {
374             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
375         }
376         else
377         {
378             flavor = BondedKernelFlavor::ForcesNoSimd;
379         }
380     }
381
382     return flavor;
383 }
384
385 /*! \brief Calculate one element of the list of bonded interactions
386     for this thread */
387 real calc_one_bond(int                           thread,
388                    int                           ftype,
389                    const InteractionDefinitions& idef,
390                    ArrayRef<const int>           iatoms,
391                    const int                     numNonperturbedInteractions,
392                    const WorkDivision&           workDivision,
393                    const rvec                    x[],
394                    rvec4                         f[],
395                    rvec                          fshift[],
396                    const t_forcerec*             fr,
397                    const t_pbc*                  pbc,
398                    gmx_grppairener_t*            grpp,
399                    t_nrnb*                       nrnb,
400                    const real*                   lambda,
401                    real*                         dvdl,
402                    const t_mdatoms*              md,
403                    t_fcdata*                     fcd,
404                    const gmx::StepWorkload&      stepWork,
405                    int*                          global_atom_index)
406 {
407     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
408                "The topology should be marked either as no FE or sorted on FE");
409
410     const bool havePerturbedInteractions =
411             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
412     BondedKernelFlavor flavor =
413             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
414     int efptFTYPE;
415     if (IS_RESTRAINT_TYPE(ftype))
416     {
417         efptFTYPE = efptRESTRAINT;
418     }
419     else
420     {
421         efptFTYPE = efptBONDED;
422     }
423
424     const int nat1   = interaction_function[ftype].nratoms + 1;
425     const int nbonds = iatoms.ssize() / nat1;
426
427     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
428                "The thread division should match the topology");
429
430     const int nb0 = workDivision.bound(ftype, thread);
431     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
432
433     ArrayRef<const t_iparams> iparams = idef.iparams;
434
435     real v = 0;
436     if (!isPairInteraction(ftype))
437     {
438         if (ftype == F_CMAP)
439         {
440             /* TODO The execution time for CMAP dihedrals might be
441                nice to account to its own subtimer, but first
442                wallcycle needs to be extended to support calling from
443                multiple threads. */
444             v = cmap_dihs(nbn, iatoms.data() + nb0, iparams.data(), &idef.cmap_grid, x, f, fshift,
445                           pbc, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index);
446         }
447         else
448         {
449             v = calculateSimpleBond(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift,
450                                     pbc, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
451                                     global_atom_index, flavor);
452         }
453     }
454     else
455     {
456         /* TODO The execution time for pairs might be nice to account
457            to its own subtimer, but first wallcycle needs to be
458            extended to support calling from multiple threads. */
459         do_pairs(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift, pbc, lambda, dvdl,
460                  md, fr, havePerturbedInteractions, stepWork, grpp, global_atom_index);
461     }
462
463     if (thread == 0)
464     {
465         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
466     }
467
468     return v;
469 }
470
471 } // namespace
472
473 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
474  */
475 static void calcBondedForces(const InteractionDefinitions& idef,
476                              bonded_threading_t*           bt,
477                              const rvec                    x[],
478                              const t_forcerec*             fr,
479                              const t_pbc*                  pbc_null,
480                              rvec*                         fshiftMasterBuffer,
481                              gmx_enerdata_t*               enerd,
482                              t_nrnb*                       nrnb,
483                              const real*                   lambda,
484                              real*                         dvdl,
485                              const t_mdatoms*              md,
486                              t_fcdata*                     fcd,
487                              const gmx::StepWorkload&      stepWork,
488                              int*                          global_atom_index)
489 {
490 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
491     for (int thread = 0; thread < bt->nthreads; thread++)
492     {
493         try
494         {
495             f_thread_t& threadBuffers = *bt->f_t[thread];
496             int         ftype;
497             real *      epot, v;
498             /* thread stuff */
499             rvec*              fshift;
500             real*              dvdlt;
501             gmx_grppairener_t* grpp;
502
503             zero_thread_output(&threadBuffers);
504
505             rvec4* ft = threadBuffers.f;
506
507             /* Thread 0 writes directly to the main output buffers.
508              * We might want to reconsider this.
509              */
510             if (thread == 0)
511             {
512                 fshift = fshiftMasterBuffer;
513                 epot   = enerd->term;
514                 grpp   = &enerd->grpp;
515                 dvdlt  = dvdl;
516             }
517             else
518             {
519                 fshift = as_rvec_array(threadBuffers.fshift.data());
520                 epot   = threadBuffers.ener;
521                 grpp   = &threadBuffers.grpp;
522                 dvdlt  = threadBuffers.dvdl;
523             }
524             /* Loop over all bonded force types to calculate the bonded forces */
525             for (ftype = 0; (ftype < F_NRE); ftype++)
526             {
527                 const InteractionList& ilist = idef.il[ftype];
528                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
529                 {
530                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
531                     v = calc_one_bond(thread, ftype, idef, iatoms, idef.numNonperturbedInteractions[ftype],
532                                       bt->workDivision, x, ft, fshift, fr, pbc_null, grpp, nrnb,
533                                       lambda, dvdlt, md, fcd, stepWork, global_atom_index);
534                     epot[ftype] += v;
535                 }
536             }
537         }
538         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
539     }
540 }
541
542 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
543 {
544     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
545
546     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
547             || fcdata.disres->nres > 0);
548 }
549
550 bool ListedForces::haveCpuBondeds() const
551 {
552     return threading_->haveBondeds;
553 }
554
555 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
556 {
557     return haveCpuBondeds() || haveRestraints(fcdata);
558 }
559
560 namespace
561 {
562
563 /*! \brief Calculates all listed force interactions. */
564 void calc_listed(struct gmx_wallcycle*         wcycle,
565                  const InteractionDefinitions& idef,
566                  bonded_threading_t*           bt,
567                  const rvec                    x[],
568                  gmx::ForceOutputs*            forceOutputs,
569                  const t_forcerec*             fr,
570                  const t_pbc*                  pbc,
571                  gmx_enerdata_t*               enerd,
572                  t_nrnb*                       nrnb,
573                  const real*                   lambda,
574                  const t_mdatoms*              md,
575                  t_fcdata*                     fcd,
576                  int*                          global_atom_index,
577                  const gmx::StepWorkload&      stepWork)
578 {
579     if (bt->haveBondeds)
580     {
581         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
582
583         wallcycle_sub_start(wcycle, ewcsLISTED);
584         /* The dummy array is to have a place to store the dhdl at other values
585            of lambda, which will be thrown away in the end */
586         real dvdl[efptNR] = { 0 };
587         calcBondedForces(idef, bt, x, fr, fr->bMolPBC ? pbc : nullptr,
588                          as_rvec_array(forceWithShiftForces.shiftForces().data()), enerd, nrnb,
589                          lambda, dvdl, md, fcd, stepWork, global_atom_index);
590         wallcycle_sub_stop(wcycle, ewcsLISTED);
591
592         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
593         reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
594
595         if (stepWork.computeDhdl)
596         {
597             for (int i = 0; i < efptNR; i++)
598             {
599                 enerd->dvdl_nonlin[i] += dvdl[i];
600             }
601         }
602         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
603     }
604
605     /* Copy the sum of violations for the distance restraints from fcd */
606     if (fcd)
607     {
608         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
609     }
610 }
611
612 /*! \brief As calc_listed(), but only determines the potential energy
613  * for the perturbed interactions.
614  *
615  * The shift forces in fr are not affected.
616  */
617 void calc_listed_lambda(const InteractionDefinitions& idef,
618                         bonded_threading_t*           bt,
619                         const rvec                    x[],
620                         const t_forcerec*             fr,
621                         const struct t_pbc*           pbc,
622                         gmx::ArrayRef<real>           forceBufferLambda,
623                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
624                         gmx_grppairener_t*            grpp,
625                         real*                         epot,
626                         gmx::ArrayRef<real>           dvdl,
627                         t_nrnb*                       nrnb,
628                         const real*                   lambda,
629                         const t_mdatoms*              md,
630                         t_fcdata*                     fcd,
631                         int*                          global_atom_index)
632 {
633     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
634
635     const t_pbc* pbc_null;
636     if (fr->bMolPBC)
637     {
638         pbc_null = pbc;
639     }
640     else
641     {
642         pbc_null = nullptr;
643     }
644
645     /* We already have the forces, so we use temp buffers here */
646     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
647     std::fill(shiftForceBufferLambda.begin(), shiftForceBufferLambda.end(),
648               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
649     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
650     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
651
652     /* Loop over all bonded force types to calculate the bonded energies */
653     for (int ftype = 0; (ftype < F_NRE); ftype++)
654     {
655         if (ftype_is_bonded_potential(ftype))
656         {
657             const InteractionList& ilist = idef.il[ftype];
658             /* Create a temporary iatom list with only perturbed interactions */
659             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
660             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
661                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
662             if (!iatomsPerturbed.empty())
663             {
664                 /* Set the work range of thread 0 to the perturbed bondeds */
665                 workDivision.setBound(ftype, 0, 0);
666                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
667
668                 gmx::StepWorkload tempFlags;
669                 tempFlags.computeEnergy = true;
670                 real v = calc_one_bond(0, ftype, idef, iatomsPerturbed, iatomsPerturbed.ssize(),
671                                        workDivision, x, f, fshift, fr, pbc_null, grpp, nrnb, lambda,
672                                        dvdl.data(), md, fcd, tempFlags, global_atom_index);
673                 epot[ftype] += v;
674             }
675         }
676     }
677 }
678
679 } // namespace
680
681 void ListedForces::calculate(struct gmx_wallcycle*          wcycle,
682                              const matrix                   box,
683                              const t_lambda*                fepvals,
684                              const t_commrec*               cr,
685                              const gmx_multisim_t*          ms,
686                              const rvec                     x[],
687                              gmx::ArrayRef<const gmx::RVec> xWholeMolecules,
688                              t_fcdata*                      fcdata,
689                              history_t*                     hist,
690                              gmx::ForceOutputs*             forceOutputs,
691                              const t_forcerec*              fr,
692                              const struct t_pbc*            pbc,
693                              gmx_enerdata_t*                enerd,
694                              t_nrnb*                        nrnb,
695                              const real*                    lambda,
696                              const t_mdatoms*               md,
697                              int*                           global_atom_index,
698                              const gmx::StepWorkload&       stepWork)
699 {
700     if (interactionSelection_.none() || !stepWork.computeListedForces)
701     {
702         return;
703     }
704
705     const InteractionDefinitions& idef = *idef_;
706
707     t_pbc pbc_full; /* Full PBC is needed for position restraints */
708     if (haveRestraints(*fcdata))
709     {
710         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
711         {
712             /* Not enough flops to bother counting */
713             set_pbc(&pbc_full, fr->pbcType, box);
714         }
715
716         /* TODO Use of restraints triggers further function calls
717            inside the loop over calc_one_bond(), but those are too
718            awkward to account to this subtimer properly in the present
719            code. We don't test / care much about performance with
720            restraints, anyway. */
721         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
722
723         if (!idef.il[F_POSRES].empty())
724         {
725             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
726         }
727
728         if (!idef.il[F_FBPOSRES].empty())
729         {
730             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
731         }
732
733         /* Do pre force calculation stuff which might require communication */
734         if (fcdata->orires->nr > 0)
735         {
736             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
737             enerd->term[F_ORIRESDEV] = calc_orires_dev(
738                     ms, idef.il[F_ORIRES].size(), idef.il[F_ORIRES].iatoms.data(), idef.iparams.data(),
739                     md, xWholeMolecules, x, fr->bMolPBC ? pbc : nullptr, fcdata->orires, hist);
740         }
741         if (fcdata->disres->nres > 0)
742         {
743             calc_disres_R_6(cr, ms, idef.il[F_DISRES].size(), idef.il[F_DISRES].iatoms.data(), x,
744                             fr->bMolPBC ? pbc : nullptr, fcdata->disres, hist);
745         }
746
747         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
748     }
749
750     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md,
751                 fcdata, global_atom_index, stepWork);
752
753     /* Check if we have to determine energy differences
754      * at foreign lambda's.
755      */
756     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
757     {
758         real dvdl[efptNR] = { 0 };
759         if (!idef.il[F_POSRES].empty())
760         {
761             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
762         }
763         if (idef.ilsort != ilsortNO_FE)
764         {
765             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
766             if (idef.ilsort != ilsortFE_SORTED)
767             {
768                 gmx_incons("The bonded interactions are not sorted for free energy");
769             }
770             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
771             {
772                 real lam_i[efptNR];
773
774                 reset_foreign_enerdata(enerd);
775                 for (int j = 0; j < efptNR; j++)
776                 {
777                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
778                 }
779                 calc_listed_lambda(idef, threading_.get(), x, fr, pbc, forceBufferLambda_,
780                                    shiftForceBufferLambda_, &(enerd->foreign_grpp), enerd->foreign_term,
781                                    dvdl, nrnb, lam_i, md, fcdata, global_atom_index);
782                 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
783                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
784                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
785                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
786             }
787             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
788         }
789     }
790 }