893a1d306cd0ceb17b9e612a76ca8f4fcb2dc061
[alexxy/gromacs.git] / src / gromacs / listed_forces / listed_forces.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 /*! \internal \file
37  *
38  * \brief This file defines high-level functions for mdrun to compute
39  * energies and forces for listed interactions.
40  *
41  * \author Mark Abraham <mark.j.abraham@gmail.com>
42  *
43  * \ingroup module_listed_forces
44  */
45 #include "gmxpre.h"
46
47 #include "gromacs/utility/arrayref.h"
48 #include "gromacs/utility/enumerationhelpers.h"
49 #include "listed_forces.h"
50
51 #include <cassert>
52
53 #include <algorithm>
54 #include <array>
55 #include <numeric>
56
57 #include "gromacs/gmxlib/network.h"
58 #include "gromacs/gmxlib/nrnb.h"
59 #include "gromacs/listed_forces/bonded.h"
60 #include "gromacs/listed_forces/disre.h"
61 #include "gromacs/listed_forces/orires.h"
62 #include "gromacs/listed_forces/pairs.h"
63 #include "gromacs/listed_forces/position_restraints.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdlib/enerdata_utils.h"
66 #include "gromacs/mdlib/force.h"
67 #include "gromacs/mdlib/gmx_omp_nthreads.h"
68 #include "gromacs/mdtypes/commrec.h"
69 #include "gromacs/mdtypes/mdatom.h"
70 #include "gromacs/mdtypes/fcdata.h"
71 #include "gromacs/mdtypes/forceoutput.h"
72 #include "gromacs/mdtypes/forcerec.h"
73 #include "gromacs/mdtypes/inputrec.h"
74 #include "gromacs/mdtypes/md_enums.h"
75 #include "gromacs/mdtypes/simulation_workload.h"
76 #include "gromacs/pbcutil/ishift.h"
77 #include "gromacs/pbcutil/pbc.h"
78 #include "gromacs/timing/wallcycle.h"
79 #include "gromacs/topology/topology.h"
80 #include "gromacs/utility/exceptions.h"
81 #include "gromacs/utility/fatalerror.h"
82
83 #include "listed_internal.h"
84 #include "manage_threading.h"
85 #include "utilities.h"
86
87 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
88                            const int                  numEnergyGroups,
89                            const int                  numThreads,
90                            const InteractionSelection interactionSelection,
91                            FILE*                      fplog) :
92     idefSelection_(ffparams),
93     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
94     interactionSelection_(interactionSelection),
95     foreignEnergyGroups_(std::make_unique<gmx_grppairener_t>(numEnergyGroups))
96 {
97 }
98
99 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
100
101 ListedForces::~ListedForces() = default;
102
103 //! Copies the selection interactions from \p idefSrc to \p idef
104 static void selectInteractions(InteractionDefinitions*                  idef,
105                                const InteractionDefinitions&            idefSrc,
106                                const ListedForces::InteractionSelection interactionSelection)
107 {
108     const bool selectPairs =
109             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
110     const bool selectDihedrals =
111             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
112     const bool selectAngles =
113             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
114     const bool selectRest =
115             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
116
117     for (int ftype = 0; ftype < F_NRE; ftype++)
118     {
119         const t_interaction_function& ifunc = interaction_function[ftype];
120         if (ifunc.flags & IF_BOND)
121         {
122             bool assign = false;
123             if (ifunc.flags & IF_PAIR)
124             {
125                 assign = selectPairs;
126             }
127             else if (ifunc.flags & IF_DIHEDRAL)
128             {
129                 assign = selectDihedrals;
130             }
131             else if (ifunc.flags & IF_ATYPE)
132             {
133                 assign = selectAngles;
134             }
135             else
136             {
137                 assign = selectRest;
138             }
139             if (assign)
140             {
141                 idef->il[ftype] = idefSrc.il[ftype];
142             }
143             else
144             {
145                 idef->il[ftype].clear();
146             }
147         }
148     }
149 }
150
151 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
152 {
153     if (interactionSelection_.all())
154     {
155         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
156         idef_ = &domainIdef;
157     }
158     else
159     {
160         idef_ = &idefSelection_;
161
162         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
163
164         idefSelection_.ilsort = domainIdef.ilsort;
165
166         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
167         {
168             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
169             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
170         }
171         else
172         {
173             idefSelection_.iparams_posres.clear();
174             idefSelection_.iparams_fbposres.clear();
175         }
176     }
177
178     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
179
180     if (idef_->ilsort == ilsortFE_SORTED)
181     {
182         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
183         shiftForceBufferLambda_.resize(gmx::c_numShiftVectors);
184     }
185 }
186
187 namespace
188 {
189
190 using gmx::ArrayRef;
191
192 /*! \brief Return true if ftype is an explicit pair-listed LJ or
193  * COULOMB interaction type: bonded LJ (usually 1-4), or special
194  * listed non-bonded for FEP. */
195 bool isPairInteraction(int ftype)
196 {
197     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
198 }
199
200 /*! \brief Zero thread-local output buffers */
201 void zero_thread_output(f_thread_t* f_t)
202 {
203     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
204
205     for (int i = 0; i < f_t->nblock_used; i++)
206     {
207         int a0 = f_t->block_index[i] * reduction_block_size;
208         int a1 = a0 + reduction_block_size;
209         for (int a = a0; a < a1; a++)
210         {
211             for (int d = 0; d < nelem_fa; d++)
212             {
213                 f_t->f[a][d] = 0;
214             }
215         }
216     }
217
218     for (int i = 0; i < gmx::c_numShiftVectors; i++)
219     {
220         clear_rvec(f_t->fshift[i]);
221     }
222     for (int i = 0; i < F_NRE; i++)
223     {
224         f_t->ener[i] = 0;
225     }
226     for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
227     {
228         for (int j = 0; j < f_t->grpp.nener; j++)
229         {
230             f_t->grpp.energyGroupPairTerms[i][j] = 0;
231         }
232     }
233     for (auto i : keysOf(f_t->dvdl))
234     {
235         f_t->dvdl[i] = 0;
236     }
237 }
238
239 /*! \brief The max thread number is arbitrary, we used a fixed number
240  * to avoid memory management.  Using more than 16 threads is probably
241  * never useful performance wise. */
242 #define MAX_BONDED_THREADS 256
243
244 /*! \brief Reduce thread-local force buffers */
245 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
246 {
247     if (nthreads > MAX_BONDED_THREADS)
248     {
249         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
250     }
251
252     rvec* gmx_restrict f = as_rvec_array(force.data());
253
254     const int numAtomsForce = bt->numAtomsForce;
255
256     /* This reduction can run on any number of threads,
257      * independently of bt->nthreads.
258      * But if nthreads matches bt->nthreads (which it currently does)
259      * the uniform distribution of the touched blocks over nthreads will
260      * match the distribution of bonded over threads well in most cases,
261      * which means that threads mostly reduce their own data which increases
262      * the number of cache hits.
263      */
264 #pragma omp parallel for num_threads(nthreads) schedule(static)
265     for (int b = 0; b < bt->nblock_used; b++)
266     {
267         try
268         {
269             int    ind = bt->block_index[b];
270             rvec4* fp[MAX_BONDED_THREADS];
271
272             /* Determine which threads contribute to this block */
273             int nfb = 0;
274             for (int ft = 0; ft < bt->nthreads; ft++)
275             {
276                 if (bitmask_is_set(bt->mask[ind], ft))
277                 {
278                     fp[nfb++] = bt->f_t[ft]->f;
279                 }
280             }
281             if (nfb > 0)
282             {
283                 /* Reduce force buffers for threads that contribute */
284                 int a0 = ind * reduction_block_size;
285                 int a1 = (ind + 1) * reduction_block_size;
286                 /* It would be nice if we could pad f to avoid this min */
287                 a1 = std::min(a1, numAtomsForce);
288                 for (int a = a0; a < a1; a++)
289                 {
290                     for (int fb = 0; fb < nfb; fb++)
291                     {
292                         rvec_inc(f[a], fp[fb][a]);
293                     }
294                 }
295             }
296         }
297         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
298     }
299 }
300
301 /*! \brief Reduce thread-local forces, shift forces and energies */
302 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
303                           real*                      ener,
304                           gmx_grppairener_t*         grpp,
305                           gmx::ArrayRef<real>        dvdl,
306                           const bonded_threading_t*  bt,
307                           const gmx::StepWorkload&   stepWork)
308 {
309     assert(bt->haveBondeds);
310
311     if (bt->nblock_used > 0)
312     {
313         /* Reduce the bonded force buffer */
314         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
315     }
316
317     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
318
319     /* When necessary, reduce energy and virial using one thread only */
320     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
321     {
322         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
323
324         if (stepWork.computeVirial)
325         {
326             for (int i = 0; i < gmx::c_numShiftVectors; i++)
327             {
328                 for (int t = 1; t < bt->nthreads; t++)
329                 {
330                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
331                 }
332             }
333         }
334         if (stepWork.computeEnergy)
335         {
336             for (int i = 0; i < F_NRE; i++)
337             {
338                 for (int t = 1; t < bt->nthreads; t++)
339                 {
340                     ener[i] += f_t[t]->ener[i];
341                 }
342             }
343             for (int i = 0; i < static_cast<int>(NonBondedEnergyTerms::Count); i++)
344             {
345                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
346                 {
347                     for (int t = 1; t < bt->nthreads; t++)
348                     {
349                         grpp->energyGroupPairTerms[i][j] += f_t[t]->grpp.energyGroupPairTerms[i][j];
350                     }
351                 }
352             }
353         }
354         if (stepWork.computeDhdl)
355         {
356             for (auto i : keysOf(f_t[1]->dvdl))
357             {
358
359                 for (int t = 1; t < bt->nthreads; t++)
360                 {
361                     dvdl[static_cast<int>(i)] += f_t[t]->dvdl[i];
362                 }
363             }
364         }
365     }
366 }
367
368 /*! \brief Returns the bonded kernel flavor
369  *
370  * Note that energies are always requested when the virial
371  * is requested (performance gain would be small).
372  * Note that currently we do not have bonded kernels that
373  * do not compute forces.
374  */
375 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
376                                             const bool               useSimdKernels,
377                                             const bool               havePerturbedInteractions)
378 {
379     BondedKernelFlavor flavor;
380     if (stepWork.computeEnergy || stepWork.computeVirial)
381     {
382         if (stepWork.computeVirial)
383         {
384             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
385         }
386         else
387         {
388             flavor = BondedKernelFlavor::ForcesAndEnergy;
389         }
390     }
391     else
392     {
393         if (useSimdKernels && !havePerturbedInteractions)
394         {
395             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
396         }
397         else
398         {
399             flavor = BondedKernelFlavor::ForcesNoSimd;
400         }
401     }
402
403     return flavor;
404 }
405
406 /*! \brief Calculate one element of the list of bonded interactions
407     for this thread */
408 real calc_one_bond(int                           thread,
409                    int                           ftype,
410                    const InteractionDefinitions& idef,
411                    ArrayRef<const int>           iatoms,
412                    const int                     numNonperturbedInteractions,
413                    const WorkDivision&           workDivision,
414                    const rvec                    x[],
415                    rvec4                         f[],
416                    rvec                          fshift[],
417                    const t_forcerec*             fr,
418                    const t_pbc*                  pbc,
419                    gmx_grppairener_t*            grpp,
420                    t_nrnb*                       nrnb,
421                    gmx::ArrayRef<const real>     lambda,
422                    gmx::ArrayRef<real>           dvdl,
423                    const t_mdatoms*              md,
424                    t_fcdata*                     fcd,
425                    const gmx::StepWorkload&      stepWork,
426                    int*                          global_atom_index)
427 {
428     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
429                "The topology should be marked either as no FE or sorted on FE");
430
431     const bool havePerturbedInteractions =
432             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
433     BondedKernelFlavor flavor =
434             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
435     FreeEnergyPerturbationCouplingType efptFTYPE;
436     if (IS_RESTRAINT_TYPE(ftype))
437     {
438         efptFTYPE = FreeEnergyPerturbationCouplingType::Restraint;
439     }
440     else
441     {
442         efptFTYPE = FreeEnergyPerturbationCouplingType::Bonded;
443     }
444
445     const int nat1   = interaction_function[ftype].nratoms + 1;
446     const int nbonds = iatoms.ssize() / nat1;
447
448     GMX_ASSERT(fr->listedForcesGpu != nullptr || workDivision.end(ftype) == iatoms.ssize(),
449                "The thread division should match the topology");
450
451     const int nb0 = workDivision.bound(ftype, thread);
452     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
453
454     ArrayRef<const t_iparams> iparams = idef.iparams;
455
456     real v = 0;
457     if (!isPairInteraction(ftype))
458     {
459         if (ftype == F_CMAP)
460         {
461             /* TODO The execution time for CMAP dihedrals might be
462                nice to account to its own subtimer, but first
463                wallcycle needs to be extended to support calling from
464                multiple threads. */
465             v = cmap_dihs(nbn,
466                           iatoms.data() + nb0,
467                           iparams.data(),
468                           &idef.cmap_grid,
469                           x,
470                           f,
471                           fshift,
472                           pbc,
473                           lambda[static_cast<int>(efptFTYPE)],
474                           &(dvdl[static_cast<int>(efptFTYPE)]),
475                           gmx::arrayRefFromArray(md->chargeA, md->nr),
476                           fcd,
477                           nullptr,
478                           nullptr,
479                           global_atom_index);
480         }
481         else
482         {
483             v = calculateSimpleBond(ftype,
484                                     nbn,
485                                     iatoms.data() + nb0,
486                                     iparams.data(),
487                                     x,
488                                     f,
489                                     fshift,
490                                     pbc,
491                                     lambda[static_cast<int>(efptFTYPE)],
492                                     &(dvdl[static_cast<int>(efptFTYPE)]),
493                                     gmx::arrayRefFromArray(md->chargeA, md->nr),
494                                     fcd,
495                                     fcd->disres,
496                                     fcd->orires.get(),
497                                     global_atom_index,
498                                     flavor);
499         }
500     }
501     else
502     {
503         /* TODO The execution time for pairs might be nice to account
504            to its own subtimer, but first wallcycle needs to be
505            extended to support calling from multiple threads. */
506         do_pairs(ftype,
507                  nbn,
508                  iatoms.data() + nb0,
509                  iparams.data(),
510                  x,
511                  f,
512                  fshift,
513                  pbc,
514                  lambda.data(),
515                  dvdl.data(),
516                  md->chargeA ? gmx::arrayRefFromArray(md->chargeA, md->nr) : gmx::ArrayRef<real>{},
517                  md->chargeB ? gmx::arrayRefFromArray(md->chargeB, md->nr) : gmx::ArrayRef<real>{},
518                  md->bPerturbed ? gmx::arrayRefFromArray(md->bPerturbed, md->nr) : gmx::ArrayRef<bool>(),
519                  gmx::arrayRefFromArray(md->cENER, md->nr),
520                  md->nPerturbed,
521                  fr,
522                  havePerturbedInteractions,
523                  stepWork,
524                  grpp,
525                  global_atom_index);
526     }
527
528     if (thread == 0)
529     {
530         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
531     }
532
533     return v;
534 }
535
536 } // namespace
537
538 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
539  */
540 static void calcBondedForces(const InteractionDefinitions& idef,
541                              bonded_threading_t*           bt,
542                              const rvec                    x[],
543                              const t_forcerec*             fr,
544                              const t_pbc*                  pbc_null,
545                              rvec*                         fshiftMasterBuffer,
546                              gmx_enerdata_t*               enerd,
547                              t_nrnb*                       nrnb,
548                              gmx::ArrayRef<const real>     lambda,
549                              gmx::ArrayRef<real>           dvdl,
550                              const t_mdatoms*              md,
551                              t_fcdata*                     fcd,
552                              const gmx::StepWorkload&      stepWork,
553                              int*                          global_atom_index)
554 {
555 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
556     for (int thread = 0; thread < bt->nthreads; thread++)
557     {
558         try
559         {
560             f_thread_t& threadBuffers = *bt->f_t[thread];
561             int         ftype;
562             real        v;
563             /* thread stuff */
564             rvec*               fshift;
565             gmx::ArrayRef<real> dvdlt;
566             gmx::ArrayRef<real> epot;
567             gmx_grppairener_t*  grpp;
568
569             zero_thread_output(&threadBuffers);
570
571             rvec4* ft = threadBuffers.f;
572
573             /* Thread 0 writes directly to the main output buffers.
574              * We might want to reconsider this.
575              */
576             if (thread == 0)
577             {
578                 fshift = fshiftMasterBuffer;
579                 epot   = enerd->term;
580                 grpp   = &enerd->grpp;
581                 dvdlt  = dvdl;
582             }
583             else
584             {
585                 fshift = as_rvec_array(threadBuffers.fshift.data());
586                 epot   = threadBuffers.ener;
587                 grpp   = &threadBuffers.grpp;
588                 dvdlt  = threadBuffers.dvdl;
589             }
590             /* Loop over all bonded force types to calculate the bonded forces */
591             for (ftype = 0; (ftype < F_NRE); ftype++)
592             {
593                 const InteractionList& ilist = idef.il[ftype];
594                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
595                 {
596                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
597                     v                          = calc_one_bond(thread,
598                                       ftype,
599                                       idef,
600                                       iatoms,
601                                       idef.numNonperturbedInteractions[ftype],
602                                       bt->workDivision,
603                                       x,
604                                       ft,
605                                       fshift,
606                                       fr,
607                                       pbc_null,
608                                       grpp,
609                                       nrnb,
610                                       lambda,
611                                       dvdlt,
612                                       md,
613                                       fcd,
614                                       stepWork,
615                                       global_atom_index);
616                     epot[ftype] += v;
617                 }
618             }
619         }
620         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
621     }
622 }
623
624 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
625 {
626     GMX_ASSERT(fcdata.disres, "NMR distance restraint object should be set up");
627
628     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires
629             || fcdata.disres->nres > 0);
630 }
631
632 bool ListedForces::haveCpuBondeds() const
633 {
634     return threading_->haveBondeds;
635 }
636
637 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
638 {
639     return haveCpuBondeds() || haveRestraints(fcdata);
640 }
641
642 namespace
643 {
644
645 /*! \brief Calculates all listed force interactions. */
646 void calc_listed(struct gmx_wallcycle*         wcycle,
647                  const InteractionDefinitions& idef,
648                  bonded_threading_t*           bt,
649                  const rvec                    x[],
650                  gmx::ForceOutputs*            forceOutputs,
651                  const t_forcerec*             fr,
652                  const t_pbc*                  pbc,
653                  gmx_enerdata_t*               enerd,
654                  t_nrnb*                       nrnb,
655                  gmx::ArrayRef<const real>     lambda,
656                  const t_mdatoms*              md,
657                  t_fcdata*                     fcd,
658                  int*                          global_atom_index,
659                  const gmx::StepWorkload&      stepWork)
660 {
661     if (bt->haveBondeds)
662     {
663         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
664
665         wallcycle_sub_start(wcycle, WallCycleSubCounter::Listed);
666         /* The dummy array is to have a place to store the dhdl at other values
667            of lambda, which will be thrown away in the end */
668         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
669         calcBondedForces(idef,
670                          bt,
671                          x,
672                          fr,
673                          fr->bMolPBC ? pbc : nullptr,
674                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
675                          enerd,
676                          nrnb,
677                          lambda,
678                          dvdl,
679                          md,
680                          fcd,
681                          stepWork,
682                          global_atom_index);
683         wallcycle_sub_stop(wcycle, WallCycleSubCounter::Listed);
684
685         wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedBufOps);
686         reduce_thread_output(&forceWithShiftForces, enerd->term.data(), &enerd->grpp, dvdl, bt, stepWork);
687
688         if (stepWork.computeDhdl)
689         {
690             for (auto i : keysOf(enerd->dvdl_lin))
691             {
692                 enerd->dvdl_nonlin[i] += dvdl[i];
693             }
694         }
695         wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedBufOps);
696     }
697
698     /* Copy the sum of violations for the distance restraints from fcd */
699     if (fcd)
700     {
701         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
702     }
703 }
704
705 /*! \brief As calc_listed(), but only determines the potential energy
706  * for the perturbed interactions.
707  *
708  * The shift forces in fr are not affected.
709  */
710 void calc_listed_lambda(const InteractionDefinitions& idef,
711                         bonded_threading_t*           bt,
712                         const rvec                    x[],
713                         const t_forcerec*             fr,
714                         const struct t_pbc*           pbc,
715                         gmx::ArrayRef<real>           forceBufferLambda,
716                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
717                         gmx_grppairener_t*            grpp,
718                         gmx::ArrayRef<real>           epot,
719                         gmx::ArrayRef<real>           dvdl,
720                         t_nrnb*                       nrnb,
721                         gmx::ArrayRef<const real>     lambda,
722                         const t_mdatoms*              md,
723                         t_fcdata*                     fcd,
724                         int*                          global_atom_index)
725 {
726     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
727
728     const t_pbc* pbc_null;
729     if (fr->bMolPBC)
730     {
731         pbc_null = pbc;
732     }
733     else
734     {
735         pbc_null = nullptr;
736     }
737
738     /* We already have the forces, so we use temp buffers here */
739     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
740     std::fill(shiftForceBufferLambda.begin(),
741               shiftForceBufferLambda.end(),
742               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
743     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
744     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
745
746     /* Loop over all bonded force types to calculate the bonded energies */
747     for (int ftype = 0; (ftype < F_NRE); ftype++)
748     {
749         if (ftype_is_bonded_potential(ftype))
750         {
751             const InteractionList& ilist = idef.il[ftype];
752             /* Create a temporary iatom list with only perturbed interactions */
753             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
754             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
755                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
756             if (!iatomsPerturbed.empty())
757             {
758                 /* Set the work range of thread 0 to the perturbed bondeds */
759                 workDivision.setBound(ftype, 0, 0);
760                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
761
762                 gmx::StepWorkload tempFlags;
763                 tempFlags.computeEnergy = true;
764                 real v                  = calc_one_bond(0,
765                                        ftype,
766                                        idef,
767                                        iatomsPerturbed,
768                                        iatomsPerturbed.ssize(),
769                                        workDivision,
770                                        x,
771                                        f,
772                                        fshift,
773                                        fr,
774                                        pbc_null,
775                                        grpp,
776                                        nrnb,
777                                        lambda,
778                                        dvdl,
779                                        md,
780                                        fcd,
781                                        tempFlags,
782                                        global_atom_index);
783                 epot[ftype] += v;
784             }
785         }
786     }
787 }
788
789 } // namespace
790
791 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
792                              const matrix                              box,
793                              const t_lambda*                           fepvals,
794                              const t_commrec*                          cr,
795                              const gmx_multisim_t*                     ms,
796                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
797                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
798                              t_fcdata*                                 fcdata,
799                              const history_t*                          hist,
800                              gmx::ForceOutputs*                        forceOutputs,
801                              const t_forcerec*                         fr,
802                              const struct t_pbc*                       pbc,
803                              gmx_enerdata_t*                           enerd,
804                              t_nrnb*                                   nrnb,
805                              gmx::ArrayRef<const real>                 lambda,
806                              const t_mdatoms*                          md,
807                              int*                                      global_atom_index,
808                              const gmx::StepWorkload&                  stepWork)
809 {
810     if (interactionSelection_.none() || !stepWork.computeListedForces)
811     {
812         return;
813     }
814
815     const InteractionDefinitions& idef = *idef_;
816
817     // Todo: replace all rvec use here with ArrayRefWithPadding
818     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
819
820     const bool calculateRestInteractions =
821             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
822
823     t_pbc pbc_full; /* Full PBC is needed for position restraints */
824     if (calculateRestInteractions && haveRestraints(*fcdata))
825     {
826         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
827         {
828             /* Not enough flops to bother counting */
829             set_pbc(&pbc_full, fr->pbcType, box);
830         }
831
832         /* TODO Use of restraints triggers further function calls
833            inside the loop over calc_one_bond(), but those are too
834            awkward to account to this subtimer properly in the present
835            code. We don't test / care much about performance with
836            restraints, anyway. */
837         wallcycle_sub_start(wcycle, WallCycleSubCounter::Restraints);
838
839         if (!idef.il[F_POSRES].empty())
840         {
841             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
842         }
843
844         if (!idef.il[F_FBPOSRES].empty())
845         {
846             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
847         }
848
849         /* Do pre force calculation stuff which might require communication */
850         if (fcdata->orires)
851         {
852             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
853             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
854                                                        idef.il[F_ORIRES].size(),
855                                                        idef.il[F_ORIRES].iatoms.data(),
856                                                        idef.iparams.data(),
857                                                        md,
858                                                        xWholeMolecules,
859                                                        x,
860                                                        fr->bMolPBC ? pbc : nullptr,
861                                                        fcdata->orires.get(),
862                                                        hist);
863         }
864         if (fcdata->disres->nres > 0)
865         {
866             calc_disres_R_6(cr,
867                             ms,
868                             idef.il[F_DISRES].size(),
869                             idef.il[F_DISRES].iatoms.data(),
870                             x,
871                             fr->bMolPBC ? pbc : nullptr,
872                             fcdata->disres,
873                             hist);
874         }
875
876         wallcycle_sub_stop(wcycle, WallCycleSubCounter::Restraints);
877     }
878
879     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
880
881     /* Check if we have to determine energy differences
882      * at foreign lambda's.
883      */
884     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
885     {
886         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
887         if (!idef.il[F_POSRES].empty())
888         {
889             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
890         }
891         if (idef.ilsort != ilsortNO_FE)
892         {
893             wallcycle_sub_start(wcycle, WallCycleSubCounter::ListedFep);
894             if (idef.ilsort != ilsortFE_SORTED)
895             {
896                 gmx_incons("The bonded interactions are not sorted for free energy");
897             }
898             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
899             {
900                 gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> lam_i;
901                 foreignEnergyGroups_->clear();
902                 std::array<real, F_NRE> foreign_term = { 0 };
903                 for (auto j : keysOf(lam_i))
904                 {
905                     lam_i[j] = (i == 0 ? lambda[static_cast<int>(j)] : fepvals->all_lambda[j][i - 1]);
906                 }
907                 calc_listed_lambda(idef,
908                                    threading_.get(),
909                                    x,
910                                    fr,
911                                    pbc,
912                                    forceBufferLambda_,
913                                    shiftForceBufferLambda_,
914                                    foreignEnergyGroups_.get(),
915                                    foreign_term,
916                                    dvdl,
917                                    nrnb,
918                                    lam_i,
919                                    md,
920                                    fcdata,
921                                    global_atom_index);
922                 sum_epot(*foreignEnergyGroups_, foreign_term.data());
923                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
924                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
925                 enerd->foreignLambdaTerms.accumulate(i, foreign_term[F_EPOT], dvdlSum);
926             }
927             wallcycle_sub_stop(wcycle, WallCycleSubCounter::ListedFep);
928         }
929     }
930 }