src/gromacs/listed_forces/listed_forces.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines high-level functions for mdrun to compute
  39  * energies and forces for listed interactions.
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "listed_forces.h"
  48
  49 #include <cassert>
  50
  51 #include <algorithm>
  52 #include <array>
  53 #include <numeric>
  54
  55 #include "gromacs/gmxlib/network.h"
  56 #include "gromacs/gmxlib/nrnb.h"
  57 #include "gromacs/listed_forces/bonded.h"
  58 #include "gromacs/listed_forces/disre.h"
  59 #include "gromacs/listed_forces/orires.h"
  60 #include "gromacs/listed_forces/pairs.h"
  61 #include "gromacs/listed_forces/position_restraints.h"
  62 #include "gromacs/math/vec.h"
  63 #include "gromacs/mdlib/enerdata_utils.h"
  64 #include "gromacs/mdlib/force.h"
  65 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  66 #include "gromacs/mdtypes/commrec.h"
  67 #include "gromacs/mdtypes/fcdata.h"
  68 #include "gromacs/mdtypes/forceoutput.h"
  69 #include "gromacs/mdtypes/forcerec.h"
  70 #include "gromacs/mdtypes/inputrec.h"
  71 #include "gromacs/mdtypes/md_enums.h"
  72 #include "gromacs/mdtypes/simulation_workload.h"
  73 #include "gromacs/pbcutil/ishift.h"
  74 #include "gromacs/pbcutil/pbc.h"
  75 #include "gromacs/timing/wallcycle.h"
  76 #include "gromacs/topology/topology.h"
  77 #include "gromacs/utility/exceptions.h"
  78 #include "gromacs/utility/fatalerror.h"
  79
  80 #include "listed_internal.h"
  81 #include "manage_threading.h"
  82 #include "utilities.h"
  83
  84 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
  85                            const int                  numEnergyGroups,
  86                            const int                  numThreads,
  87                            const InteractionSelection interactionSelection,
  88                            FILE*                      fplog) :
  89     idefSelection_(ffparams),
  90     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
  91     interactionSelection_(interactionSelection)
  92 {
  93 }
  94
  95 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
  96
  97 ListedForces::~ListedForces() = default;
  98
  99 //! Copies the selection interactions from \p idefSrc to \p idef
 100 static void selectInteractions(InteractionDefinitions*                  idef,
 101                                const InteractionDefinitions&            idefSrc,
 102                                const ListedForces::InteractionSelection interactionSelection)
 103 {
 104     const bool selectPairs =
 105             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
 106     const bool selectDihedrals =
 107             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
 108     const bool selectAngles =
 109             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
 110     const bool selectRest =
 111             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
 112
 113     for (int ftype = 0; ftype < F_NRE; ftype++)
 114     {
 115         const t_interaction_function& ifunc = interaction_function[ftype];
 116         if (ifunc.flags & IF_BOND)
 117         {
 118             bool assign = false;
 119             if (ifunc.flags & IF_PAIR)
 120             {
 121                 assign = selectPairs;
 122             }
 123             else if (ifunc.flags & IF_DIHEDRAL)
 124             {
 125                 assign = selectDihedrals;
 126             }
 127             else if (ifunc.flags & IF_ATYPE)
 128             {
 129                 assign = selectAngles;
 130             }
 131             else
 132             {
 133                 assign = selectRest;
 134             }
 135             if (assign)
 136             {
 137                 idef->il[ftype] = idefSrc.il[ftype];
 138             }
 139             else
 140             {
 141                 idef->il[ftype].clear();
 142             }
 143         }
 144     }
 145 }
 146
 147 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
 148 {
 149     if (interactionSelection_.all())
 150     {
 151         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
 152         idef_ = &domainIdef;
 153     }
 154     else
 155     {
 156         idef_ = &idefSelection_;
 157
 158         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
 159
 160         idefSelection_.ilsort = domainIdef.ilsort;
 161
 162         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
 163         {
 164             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
 165             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
 166         }
 167         else
 168         {
 169             idefSelection_.iparams_posres.clear();
 170             idefSelection_.iparams_fbposres.clear();
 171         }
 172     }
 173
 174     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
 175
 176     if (idef_->ilsort == ilsortFE_SORTED)
 177     {
 178         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
 179         shiftForceBufferLambda_.resize(SHIFTS);
 180     }
 181 }
 182
 183 namespace
 184 {
 185
 186 using gmx::ArrayRef;
 187
 188 /*! \brief Return true if ftype is an explicit pair-listed LJ or
 189  * COULOMB interaction type: bonded LJ (usually 1-4), or special
 190  * listed non-bonded for FEP. */
 191 bool isPairInteraction(int ftype)
 192 {
 193     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
 194 }
 195
 196 /*! \brief Zero thread-local output buffers */
 197 void zero_thread_output(f_thread_t* f_t)
 198 {
 199     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
 200
 201     for (int i = 0; i < f_t->nblock_used; i++)
 202     {
 203         int a0 = f_t->block_index[i] * reduction_block_size;
 204         int a1 = a0 + reduction_block_size;
 205         for (int a = a0; a < a1; a++)
 206         {
 207             for (int d = 0; d < nelem_fa; d++)
 208             {
 209                 f_t->f[a][d] = 0;
 210             }
 211         }
 212     }
 213
 214     for (int i = 0; i < SHIFTS; i++)
 215     {
 216         clear_rvec(f_t->fshift[i]);
 217     }
 218     for (int i = 0; i < F_NRE; i++)
 219     {
 220         f_t->ener[i] = 0;
 221     }
 222     for (int i = 0; i < egNR; i++)
 223     {
 224         for (int j = 0; j < f_t->grpp.nener; j++)
 225         {
 226             f_t->grpp.ener[i][j] = 0;
 227         }
 228     }
 229     for (int i = 0; i < efptNR; i++)
 230     {
 231         f_t->dvdl[i] = 0;
 232     }
 233 }
 234
 235 /*! \brief The max thread number is arbitrary, we used a fixed number
 236  * to avoid memory management.  Using more than 16 threads is probably
 237  * never useful performance wise. */
 238 #define MAX_BONDED_THREADS 256
 239
 240 /*! \brief Reduce thread-local force buffers */
 241 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
 242 {
 243     if (nthreads > MAX_BONDED_THREADS)
 244     {
 245         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
 246     }
 247
 248     rvec* gmx_restrict f = as_rvec_array(force.data());
 249
 250     const int numAtomsForce = bt->numAtomsForce;
 251
 252     /* This reduction can run on any number of threads,
 253      * independently of bt->nthreads.
 254      * But if nthreads matches bt->nthreads (which it currently does)
 255      * the uniform distribution of the touched blocks over nthreads will
 256      * match the distribution of bonded over threads well in most cases,
 257      * which means that threads mostly reduce their own data which increases
 258      * the number of cache hits.
 259      */
 260 #pragma omp parallel for num_threads(nthreads) schedule(static)
 261     for (int b = 0; b < bt->nblock_used; b++)
 262     {
 263         try
 264         {
 265             int    ind = bt->block_index[b];
 266             rvec4* fp[MAX_BONDED_THREADS];
 267
 268             /* Determine which threads contribute to this block */
 269             int nfb = 0;
 270             for (int ft = 0; ft < bt->nthreads; ft++)
 271             {
 272                 if (bitmask_is_set(bt->mask[ind], ft))
 273                 {
 274                     fp[nfb++] = bt->f_t[ft]->f;
 275                 }
 276             }
 277             if (nfb > 0)
 278             {
 279                 /* Reduce force buffers for threads that contribute */
 280                 int a0 = ind * reduction_block_size;
 281                 int a1 = (ind + 1) * reduction_block_size;
 282                 /* It would be nice if we could pad f to avoid this min */
 283                 a1 = std::min(a1, numAtomsForce);
 284                 for (int a = a0; a < a1; a++)
 285                 {
 286                     for (int fb = 0; fb < nfb; fb++)
 287                     {
 288                         rvec_inc(f[a], fp[fb][a]);
 289                     }
 290                 }
 291             }
 292         }
 293         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 294     }
 295 }
 296
 297 /*! \brief Reduce thread-local forces, shift forces and energies */
 298 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
 299                           real*                      ener,
 300                           gmx_grppairener_t*         grpp,
 301                           real*                      dvdl,
 302                           const bonded_threading_t*  bt,
 303                           const gmx::StepWorkload&   stepWork)
 304 {
 305     assert(bt->haveBondeds);
 306
 307     if (bt->nblock_used > 0)
 308     {
 309         /* Reduce the bonded force buffer */
 310         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
 311     }
 312
 313     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
 314
 315     /* When necessary, reduce energy and virial using one thread only */
 316     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
 317     {
 318         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
 319
 320         if (stepWork.computeVirial)
 321         {
 322             for (int i = 0; i < SHIFTS; i++)
 323             {
 324                 for (int t = 1; t < bt->nthreads; t++)
 325                 {
 326                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
 327                 }
 328             }
 329         }
 330         if (stepWork.computeEnergy)
 331         {
 332             for (int i = 0; i < F_NRE; i++)
 333             {
 334                 for (int t = 1; t < bt->nthreads; t++)
 335                 {
 336                     ener[i] += f_t[t]->ener[i];
 337                 }
 338             }
 339             for (int i = 0; i < egNR; i++)
 340             {
 341                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
 342                 {
 343                     for (int t = 1; t < bt->nthreads; t++)
 344                     {
 345                         grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
 346                     }
 347                 }
 348             }
 349         }
 350         if (stepWork.computeDhdl)
 351         {
 352             for (int i = 0; i < efptNR; i++)
 353             {
 354
 355                 for (int t = 1; t < bt->nthreads; t++)
 356                 {
 357                     dvdl[i] += f_t[t]->dvdl[i];
 358                 }
 359             }
 360         }
 361     }
 362 }
 363
 364 /*! \brief Returns the bonded kernel flavor
 365  *
 366  * Note that energies are always requested when the virial
 367  * is requested (performance gain would be small).
 368  * Note that currently we do not have bonded kernels that
 369  * do not compute forces.
 370  */
 371 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
 372                                             const bool               useSimdKernels,
 373                                             const bool               havePerturbedInteractions)
 374 {
 375     BondedKernelFlavor flavor;
 376     if (stepWork.computeEnergy || stepWork.computeVirial)
 377     {
 378         if (stepWork.computeVirial)
 379         {
 380             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
 381         }
 382         else
 383         {
 384             flavor = BondedKernelFlavor::ForcesAndEnergy;
 385         }
 386     }
 387     else
 388     {
 389         if (useSimdKernels && !havePerturbedInteractions)
 390         {
 391             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
 392         }
 393         else
 394         {
 395             flavor = BondedKernelFlavor::ForcesNoSimd;
 396         }
 397     }
 398
 399     return flavor;
 400 }
 401
 402 /*! \brief Calculate one element of the list of bonded interactions
 403     for this thread */
 404 real calc_one_bond(int                           thread,
 405                    int                           ftype,
 406                    const InteractionDefinitions& idef,
 407                    ArrayRef<const int>           iatoms,
 408                    const int                     numNonperturbedInteractions,
 409                    const WorkDivision&           workDivision,
 410                    const rvec                    x[],
 411                    rvec4                         f[],
 412                    rvec                          fshift[],
 413                    const t_forcerec*             fr,
 414                    const t_pbc*                  pbc,
 415                    gmx_grppairener_t*            grpp,
 416                    t_nrnb*                       nrnb,
 417                    const real*                   lambda,
 418                    real*                         dvdl,
 419                    const t_mdatoms*              md,
 420                    t_fcdata*                     fcd,
 421                    const gmx::StepWorkload&      stepWork,
 422                    int*                          global_atom_index)
 423 {
 424     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
 425                "The topology should be marked either as no FE or sorted on FE");
 426
 427     const bool havePerturbedInteractions =
 428             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
 429     BondedKernelFlavor flavor =
 430             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
 431     int efptFTYPE;
 432     if (IS_RESTRAINT_TYPE(ftype))
 433     {
 434         efptFTYPE = efptRESTRAINT;
 435     }
 436     else
 437     {
 438         efptFTYPE = efptBONDED;
 439     }
 440
 441     const int nat1   = interaction_function[ftype].nratoms + 1;
 442     const int nbonds = iatoms.ssize() / nat1;
 443
 444     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
 445                "The thread division should match the topology");
 446
 447     const int nb0 = workDivision.bound(ftype, thread);
 448     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
 449
 450     ArrayRef<const t_iparams> iparams = idef.iparams;
 451
 452     real v = 0;
 453     if (!isPairInteraction(ftype))
 454     {
 455         if (ftype == F_CMAP)
 456         {
 457             /* TODO The execution time for CMAP dihedrals might be
 458                nice to account to its own subtimer, but first
 459                wallcycle needs to be extended to support calling from
 460                multiple threads. */
 461             v = cmap_dihs(nbn,
 462                           iatoms.data() + nb0,
 463                           iparams.data(),
 464                           &idef.cmap_grid,
 465                           x,
 466                           f,
 467                           fshift,
 468                           pbc,
 469                           lambda[efptFTYPE],
 470                           &(dvdl[efptFTYPE]),
 471                           md,
 472                           fcd,
 473                           nullptr,
 474                           nullptr,
 475                           global_atom_index);
 476         }
 477         else
 478         {
 479             v = calculateSimpleBond(ftype,
 480                                     nbn,
 481                                     iatoms.data() + nb0,
 482                                     iparams.data(),
 483                                     x,
 484                                     f,
 485                                     fshift,
 486                                     pbc,
 487                                     lambda[efptFTYPE],
 488                                     &(dvdl[efptFTYPE]),
 489                                     md,
 490                                     fcd,
 491                                     fcd->disres,
 492                                     fcd->orires,
 493                                     global_atom_index,
 494                                     flavor);
 495         }
 496     }
 497     else
 498     {
 499         /* TODO The execution time for pairs might be nice to account
 500            to its own subtimer, but first wallcycle needs to be
 501            extended to support calling from multiple threads. */
 502         do_pairs(ftype,
 503                  nbn,
 504                  iatoms.data() + nb0,
 505                  iparams.data(),
 506                  x,
 507                  f,
 508                  fshift,
 509                  pbc,
 510                  lambda,
 511                  dvdl,
 512                  md,
 513                  fr,
 514                  havePerturbedInteractions,
 515                  stepWork,
 516                  grpp,
 517                  global_atom_index);
 518     }
 519
 520     if (thread == 0)
 521     {
 522         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
 523     }
 524
 525     return v;
 526 }
 527
 528 } // namespace
 529
 530 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
 531  */
 532 static void calcBondedForces(const InteractionDefinitions& idef,
 533                              bonded_threading_t*           bt,
 534                              const rvec                    x[],
 535                              const t_forcerec*             fr,
 536                              const t_pbc*                  pbc_null,
 537                              rvec*                         fshiftMasterBuffer,
 538                              gmx_enerdata_t*               enerd,
 539                              t_nrnb*                       nrnb,
 540                              const real*                   lambda,
 541                              real*                         dvdl,
 542                              const t_mdatoms*              md,
 543                              t_fcdata*                     fcd,
 544                              const gmx::StepWorkload&      stepWork,
 545                              int*                          global_atom_index)
 546 {
 547 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
 548     for (int thread = 0; thread < bt->nthreads; thread++)
 549     {
 550         try
 551         {
 552             f_thread_t& threadBuffers = *bt->f_t[thread];
 553             int         ftype;
 554             real *      epot, v;
 555             /* thread stuff */
 556             rvec*              fshift;
 557             real*              dvdlt;
 558             gmx_grppairener_t* grpp;
 559
 560             zero_thread_output(&threadBuffers);
 561
 562             rvec4* ft = threadBuffers.f;
 563
 564             /* Thread 0 writes directly to the main output buffers.
 565              * We might want to reconsider this.
 566              */
 567             if (thread == 0)
 568             {
 569                 fshift = fshiftMasterBuffer;
 570                 epot   = enerd->term;
 571                 grpp   = &enerd->grpp;
 572                 dvdlt  = dvdl;
 573             }
 574             else
 575             {
 576                 fshift = as_rvec_array(threadBuffers.fshift.data());
 577                 epot   = threadBuffers.ener;
 578                 grpp   = &threadBuffers.grpp;
 579                 dvdlt  = threadBuffers.dvdl;
 580             }
 581             /* Loop over all bonded force types to calculate the bonded forces */
 582             for (ftype = 0; (ftype < F_NRE); ftype++)
 583             {
 584                 const InteractionList& ilist = idef.il[ftype];
 585                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
 586                 {
 587                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
 588                     v                          = calc_one_bond(thread,
 589                                       ftype,
 590                                       idef,
 591                                       iatoms,
 592                                       idef.numNonperturbedInteractions[ftype],
 593                                       bt->workDivision,
 594                                       x,
 595                                       ft,
 596                                       fshift,
 597                                       fr,
 598                                       pbc_null,
 599                                       grpp,
 600                                       nrnb,
 601                                       lambda,
 602                                       dvdlt,
 603                                       md,
 604                                       fcd,
 605                                       stepWork,
 606                                       global_atom_index);
 607                     epot[ftype] += v;
 608                 }
 609             }
 610         }
 611         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 612     }
 613 }
 614
 615 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
 616 {
 617     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
 618
 619     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
 620             || fcdata.disres->nres > 0);
 621 }
 622
 623 bool ListedForces::haveCpuBondeds() const
 624 {
 625     return threading_->haveBondeds;
 626 }
 627
 628 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
 629 {
 630     return haveCpuBondeds() || haveRestraints(fcdata);
 631 }
 632
 633 namespace
 634 {
 635
 636 /*! \brief Calculates all listed force interactions. */
 637 void calc_listed(struct gmx_wallcycle*         wcycle,
 638                  const InteractionDefinitions& idef,
 639                  bonded_threading_t*           bt,
 640                  const rvec                    x[],
 641                  gmx::ForceOutputs*            forceOutputs,
 642                  const t_forcerec*             fr,
 643                  const t_pbc*                  pbc,
 644                  gmx_enerdata_t*               enerd,
 645                  t_nrnb*                       nrnb,
 646                  const real*                   lambda,
 647                  const t_mdatoms*              md,
 648                  t_fcdata*                     fcd,
 649                  int*                          global_atom_index,
 650                  const gmx::StepWorkload&      stepWork)
 651 {
 652     if (bt->haveBondeds)
 653     {
 654         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
 655
 656         wallcycle_sub_start(wcycle, ewcsLISTED);
 657         /* The dummy array is to have a place to store the dhdl at other values
 658            of lambda, which will be thrown away in the end */
 659         real dvdl[efptNR] = { 0 };
 660         calcBondedForces(idef,
 661                          bt,
 662                          x,
 663                          fr,
 664                          fr->bMolPBC ? pbc : nullptr,
 665                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
 666                          enerd,
 667                          nrnb,
 668                          lambda,
 669                          dvdl,
 670                          md,
 671                          fcd,
 672                          stepWork,
 673                          global_atom_index);
 674         wallcycle_sub_stop(wcycle, ewcsLISTED);
 675
 676         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
 677         reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
 678
 679         if (stepWork.computeDhdl)
 680         {
 681             for (int i = 0; i < efptNR; i++)
 682             {
 683                 enerd->dvdl_nonlin[i] += dvdl[i];
 684             }
 685         }
 686         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
 687     }
 688
 689     /* Copy the sum of violations for the distance restraints from fcd */
 690     if (fcd)
 691     {
 692         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
 693     }
 694 }
 695
 696 /*! \brief As calc_listed(), but only determines the potential energy
 697  * for the perturbed interactions.
 698  *
 699  * The shift forces in fr are not affected.
 700  */
 701 void calc_listed_lambda(const InteractionDefinitions& idef,
 702                         bonded_threading_t*           bt,
 703                         const rvec                    x[],
 704                         const t_forcerec*             fr,
 705                         const struct t_pbc*           pbc,
 706                         gmx::ArrayRef<real>           forceBufferLambda,
 707                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
 708                         gmx_grppairener_t*            grpp,
 709                         real*                         epot,
 710                         gmx::ArrayRef<real>           dvdl,
 711                         t_nrnb*                       nrnb,
 712                         const real*                   lambda,
 713                         const t_mdatoms*              md,
 714                         t_fcdata*                     fcd,
 715                         int*                          global_atom_index)
 716 {
 717     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
 718
 719     const t_pbc* pbc_null;
 720     if (fr->bMolPBC)
 721     {
 722         pbc_null = pbc;
 723     }
 724     else
 725     {
 726         pbc_null = nullptr;
 727     }
 728
 729     /* We already have the forces, so we use temp buffers here */
 730     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
 731     std::fill(shiftForceBufferLambda.begin(),
 732               shiftForceBufferLambda.end(),
 733               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
 734     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
 735     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
 736
 737     /* Loop over all bonded force types to calculate the bonded energies */
 738     for (int ftype = 0; (ftype < F_NRE); ftype++)
 739     {
 740         if (ftype_is_bonded_potential(ftype))
 741         {
 742             const InteractionList& ilist = idef.il[ftype];
 743             /* Create a temporary iatom list with only perturbed interactions */
 744             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
 745             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
 746                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
 747             if (!iatomsPerturbed.empty())
 748             {
 749                 /* Set the work range of thread 0 to the perturbed bondeds */
 750                 workDivision.setBound(ftype, 0, 0);
 751                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
 752
 753                 gmx::StepWorkload tempFlags;
 754                 tempFlags.computeEnergy = true;
 755                 real v                  = calc_one_bond(0,
 756                                        ftype,
 757                                        idef,
 758                                        iatomsPerturbed,
 759                                        iatomsPerturbed.ssize(),
 760                                        workDivision,
 761                                        x,
 762                                        f,
 763                                        fshift,
 764                                        fr,
 765                                        pbc_null,
 766                                        grpp,
 767                                        nrnb,
 768                                        lambda,
 769                                        dvdl.data(),
 770                                        md,
 771                                        fcd,
 772                                        tempFlags,
 773                                        global_atom_index);
 774                 epot[ftype] += v;
 775             }
 776         }
 777     }
 778 }
 779
 780 } // namespace
 781
 782 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
 783                              const matrix                              box,
 784                              const t_lambda*                           fepvals,
 785                              const t_commrec*                          cr,
 786                              const gmx_multisim_t*                     ms,
 787                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
 788                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
 789                              t_fcdata*                                 fcdata,
 790                              const history_t*                          hist,
 791                              gmx::ForceOutputs*                        forceOutputs,
 792                              const t_forcerec*                         fr,
 793                              const struct t_pbc*                       pbc,
 794                              gmx_enerdata_t*                           enerd,
 795                              t_nrnb*                                   nrnb,
 796                              const real*                               lambda,
 797                              const t_mdatoms*                          md,
 798                              int*                                      global_atom_index,
 799                              const gmx::StepWorkload&                  stepWork)
 800 {
 801     if (interactionSelection_.none() || !stepWork.computeListedForces)
 802     {
 803         return;
 804     }
 805
 806     const InteractionDefinitions& idef = *idef_;
 807
 808     // Todo: replace all rvec use here with ArrayRefWithPadding
 809     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
 810
 811     const bool calculateRestInteractions =
 812             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
 813
 814     t_pbc pbc_full; /* Full PBC is needed for position restraints */
 815     if (calculateRestInteractions && haveRestraints(*fcdata))
 816     {
 817         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
 818         {
 819             /* Not enough flops to bother counting */
 820             set_pbc(&pbc_full, fr->pbcType, box);
 821         }
 822
 823         /* TODO Use of restraints triggers further function calls
 824            inside the loop over calc_one_bond(), but those are too
 825            awkward to account to this subtimer properly in the present
 826            code. We don't test / care much about performance with
 827            restraints, anyway. */
 828         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
 829
 830         if (!idef.il[F_POSRES].empty())
 831         {
 832             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
 833         }
 834
 835         if (!idef.il[F_FBPOSRES].empty())
 836         {
 837             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
 838         }
 839
 840         /* Do pre force calculation stuff which might require communication */
 841         if (fcdata->orires->nr > 0)
 842         {
 843             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
 844             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
 845                                                        idef.il[F_ORIRES].size(),
 846                                                        idef.il[F_ORIRES].iatoms.data(),
 847                                                        idef.iparams.data(),
 848                                                        md,
 849                                                        xWholeMolecules,
 850                                                        x,
 851                                                        fr->bMolPBC ? pbc : nullptr,
 852                                                        fcdata->orires,
 853                                                        hist);
 854         }
 855         if (fcdata->disres->nres > 0)
 856         {
 857             calc_disres_R_6(cr,
 858                             ms,
 859                             idef.il[F_DISRES].size(),
 860                             idef.il[F_DISRES].iatoms.data(),
 861                             x,
 862                             fr->bMolPBC ? pbc : nullptr,
 863                             fcdata->disres,
 864                             hist);
 865         }
 866
 867         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
 868     }
 869
 870     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
 871
 872     /* Check if we have to determine energy differences
 873      * at foreign lambda's.
 874      */
 875     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
 876     {
 877         real dvdl[efptNR] = { 0 };
 878         if (!idef.il[F_POSRES].empty())
 879         {
 880             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
 881         }
 882         if (idef.ilsort != ilsortNO_FE)
 883         {
 884             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
 885             if (idef.ilsort != ilsortFE_SORTED)
 886             {
 887                 gmx_incons("The bonded interactions are not sorted for free energy");
 888             }
 889             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
 890             {
 891                 real lam_i[efptNR];
 892
 893                 reset_foreign_enerdata(enerd);
 894                 for (int j = 0; j < efptNR; j++)
 895                 {
 896                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
 897                 }
 898                 calc_listed_lambda(idef,
 899                                    threading_.get(),
 900                                    x,
 901                                    fr,
 902                                    pbc,
 903                                    forceBufferLambda_,
 904                                    shiftForceBufferLambda_,
 905                                    &(enerd->foreign_grpp),
 906                                    enerd->foreign_term,
 907                                    dvdl,
 908                                    nrnb,
 909                                    lam_i,
 910                                    md,
 911                                    fcdata,
 912                                    global_atom_index);
 913                 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
 914                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
 915                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
 916                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
 917             }
 918             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
 919         }
 920     }
 921 }