src/gromacs/listed_forces/listed_forces.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines high-level functions for mdrun to compute
  39  * energies and forces for listed interactions.
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "gromacs/utility/arrayref.h"
  48 #include "gromacs/utility/enumerationhelpers.h"
  49 #include "listed_forces.h"
  50
  51 #include <cassert>
  52
  53 #include <algorithm>
  54 #include <array>
  55 #include <numeric>
  56
  57 #include "gromacs/gmxlib/network.h"
  58 #include "gromacs/gmxlib/nrnb.h"
  59 #include "gromacs/listed_forces/bonded.h"
  60 #include "gromacs/listed_forces/disre.h"
  61 #include "gromacs/listed_forces/orires.h"
  62 #include "gromacs/listed_forces/pairs.h"
  63 #include "gromacs/listed_forces/position_restraints.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/mdlib/enerdata_utils.h"
  66 #include "gromacs/mdlib/force.h"
  67 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  68 #include "gromacs/mdtypes/commrec.h"
  69 #include "gromacs/mdtypes/fcdata.h"
  70 #include "gromacs/mdtypes/forceoutput.h"
  71 #include "gromacs/mdtypes/forcerec.h"
  72 #include "gromacs/mdtypes/inputrec.h"
  73 #include "gromacs/mdtypes/md_enums.h"
  74 #include "gromacs/mdtypes/simulation_workload.h"
  75 #include "gromacs/pbcutil/ishift.h"
  76 #include "gromacs/pbcutil/pbc.h"
  77 #include "gromacs/timing/wallcycle.h"
  78 #include "gromacs/topology/topology.h"
  79 #include "gromacs/utility/exceptions.h"
  80 #include "gromacs/utility/fatalerror.h"
  81
  82 #include "listed_internal.h"
  83 #include "manage_threading.h"
  84 #include "utilities.h"
  85
  86 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
  87                            const int                  numEnergyGroups,
  88                            const int                  numThreads,
  89                            const InteractionSelection interactionSelection,
  90                            FILE*                      fplog) :
  91     idefSelection_(ffparams),
  92     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
  93     interactionSelection_(interactionSelection)
  94 {
  95 }
  96
  97 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
  98
  99 ListedForces::~ListedForces() = default;
 100
 101 //! Copies the selection interactions from \p idefSrc to \p idef
 102 static void selectInteractions(InteractionDefinitions*                  idef,
 103                                const InteractionDefinitions&            idefSrc,
 104                                const ListedForces::InteractionSelection interactionSelection)
 105 {
 106     const bool selectPairs =
 107             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
 108     const bool selectDihedrals =
 109             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
 110     const bool selectAngles =
 111             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
 112     const bool selectRest =
 113             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
 114
 115     for (int ftype = 0; ftype < F_NRE; ftype++)
 116     {
 117         const t_interaction_function& ifunc = interaction_function[ftype];
 118         if (ifunc.flags & IF_BOND)
 119         {
 120             bool assign = false;
 121             if (ifunc.flags & IF_PAIR)
 122             {
 123                 assign = selectPairs;
 124             }
 125             else if (ifunc.flags & IF_DIHEDRAL)
 126             {
 127                 assign = selectDihedrals;
 128             }
 129             else if (ifunc.flags & IF_ATYPE)
 130             {
 131                 assign = selectAngles;
 132             }
 133             else
 134             {
 135                 assign = selectRest;
 136             }
 137             if (assign)
 138             {
 139                 idef->il[ftype] = idefSrc.il[ftype];
 140             }
 141             else
 142             {
 143                 idef->il[ftype].clear();
 144             }
 145         }
 146     }
 147 }
 148
 149 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
 150 {
 151     if (interactionSelection_.all())
 152     {
 153         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
 154         idef_ = &domainIdef;
 155     }
 156     else
 157     {
 158         idef_ = &idefSelection_;
 159
 160         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
 161
 162         idefSelection_.ilsort = domainIdef.ilsort;
 163
 164         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
 165         {
 166             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
 167             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
 168         }
 169         else
 170         {
 171             idefSelection_.iparams_posres.clear();
 172             idefSelection_.iparams_fbposres.clear();
 173         }
 174     }
 175
 176     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
 177
 178     if (idef_->ilsort == ilsortFE_SORTED)
 179     {
 180         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
 181         shiftForceBufferLambda_.resize(SHIFTS);
 182     }
 183 }
 184
 185 namespace
 186 {
 187
 188 using gmx::ArrayRef;
 189
 190 /*! \brief Return true if ftype is an explicit pair-listed LJ or
 191  * COULOMB interaction type: bonded LJ (usually 1-4), or special
 192  * listed non-bonded for FEP. */
 193 bool isPairInteraction(int ftype)
 194 {
 195     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
 196 }
 197
 198 /*! \brief Zero thread-local output buffers */
 199 void zero_thread_output(f_thread_t* f_t)
 200 {
 201     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
 202
 203     for (int i = 0; i < f_t->nblock_used; i++)
 204     {
 205         int a0 = f_t->block_index[i] * reduction_block_size;
 206         int a1 = a0 + reduction_block_size;
 207         for (int a = a0; a < a1; a++)
 208         {
 209             for (int d = 0; d < nelem_fa; d++)
 210             {
 211                 f_t->f[a][d] = 0;
 212             }
 213         }
 214     }
 215
 216     for (int i = 0; i < SHIFTS; i++)
 217     {
 218         clear_rvec(f_t->fshift[i]);
 219     }
 220     for (int i = 0; i < F_NRE; i++)
 221     {
 222         f_t->ener[i] = 0;
 223     }
 224     for (int i = 0; i < egNR; i++)
 225     {
 226         for (int j = 0; j < f_t->grpp.nener; j++)
 227         {
 228             f_t->grpp.ener[i][j] = 0;
 229         }
 230     }
 231     for (auto i : keysOf(f_t->dvdl))
 232     {
 233         f_t->dvdl[i] = 0;
 234     }
 235 }
 236
 237 /*! \brief The max thread number is arbitrary, we used a fixed number
 238  * to avoid memory management.  Using more than 16 threads is probably
 239  * never useful performance wise. */
 240 #define MAX_BONDED_THREADS 256
 241
 242 /*! \brief Reduce thread-local force buffers */
 243 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
 244 {
 245     if (nthreads > MAX_BONDED_THREADS)
 246     {
 247         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
 248     }
 249
 250     rvec* gmx_restrict f = as_rvec_array(force.data());
 251
 252     const int numAtomsForce = bt->numAtomsForce;
 253
 254     /* This reduction can run on any number of threads,
 255      * independently of bt->nthreads.
 256      * But if nthreads matches bt->nthreads (which it currently does)
 257      * the uniform distribution of the touched blocks over nthreads will
 258      * match the distribution of bonded over threads well in most cases,
 259      * which means that threads mostly reduce their own data which increases
 260      * the number of cache hits.
 261      */
 262 #pragma omp parallel for num_threads(nthreads) schedule(static)
 263     for (int b = 0; b < bt->nblock_used; b++)
 264     {
 265         try
 266         {
 267             int    ind = bt->block_index[b];
 268             rvec4* fp[MAX_BONDED_THREADS];
 269
 270             /* Determine which threads contribute to this block */
 271             int nfb = 0;
 272             for (int ft = 0; ft < bt->nthreads; ft++)
 273             {
 274                 if (bitmask_is_set(bt->mask[ind], ft))
 275                 {
 276                     fp[nfb++] = bt->f_t[ft]->f;
 277                 }
 278             }
 279             if (nfb > 0)
 280             {
 281                 /* Reduce force buffers for threads that contribute */
 282                 int a0 = ind * reduction_block_size;
 283                 int a1 = (ind + 1) * reduction_block_size;
 284                 /* It would be nice if we could pad f to avoid this min */
 285                 a1 = std::min(a1, numAtomsForce);
 286                 for (int a = a0; a < a1; a++)
 287                 {
 288                     for (int fb = 0; fb < nfb; fb++)
 289                     {
 290                         rvec_inc(f[a], fp[fb][a]);
 291                     }
 292                 }
 293             }
 294         }
 295         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 296     }
 297 }
 298
 299 /*! \brief Reduce thread-local forces, shift forces and energies */
 300 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
 301                           real*                      ener,
 302                           gmx_grppairener_t*         grpp,
 303                           gmx::ArrayRef<real>        dvdl,
 304                           const bonded_threading_t*  bt,
 305                           const gmx::StepWorkload&   stepWork)
 306 {
 307     assert(bt->haveBondeds);
 308
 309     if (bt->nblock_used > 0)
 310     {
 311         /* Reduce the bonded force buffer */
 312         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
 313     }
 314
 315     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
 316
 317     /* When necessary, reduce energy and virial using one thread only */
 318     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
 319     {
 320         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
 321
 322         if (stepWork.computeVirial)
 323         {
 324             for (int i = 0; i < SHIFTS; i++)
 325             {
 326                 for (int t = 1; t < bt->nthreads; t++)
 327                 {
 328                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
 329                 }
 330             }
 331         }
 332         if (stepWork.computeEnergy)
 333         {
 334             for (int i = 0; i < F_NRE; i++)
 335             {
 336                 for (int t = 1; t < bt->nthreads; t++)
 337                 {
 338                     ener[i] += f_t[t]->ener[i];
 339                 }
 340             }
 341             for (int i = 0; i < egNR; i++)
 342             {
 343                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
 344                 {
 345                     for (int t = 1; t < bt->nthreads; t++)
 346                     {
 347                         grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
 348                     }
 349                 }
 350             }
 351         }
 352         if (stepWork.computeDhdl)
 353         {
 354             for (auto i : keysOf(f_t[1]->dvdl))
 355             {
 356
 357                 for (int t = 1; t < bt->nthreads; t++)
 358                 {
 359                     dvdl[static_cast<int>(i)] += f_t[t]->dvdl[i];
 360                 }
 361             }
 362         }
 363     }
 364 }
 365
 366 /*! \brief Returns the bonded kernel flavor
 367  *
 368  * Note that energies are always requested when the virial
 369  * is requested (performance gain would be small).
 370  * Note that currently we do not have bonded kernels that
 371  * do not compute forces.
 372  */
 373 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
 374                                             const bool               useSimdKernels,
 375                                             const bool               havePerturbedInteractions)
 376 {
 377     BondedKernelFlavor flavor;
 378     if (stepWork.computeEnergy || stepWork.computeVirial)
 379     {
 380         if (stepWork.computeVirial)
 381         {
 382             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
 383         }
 384         else
 385         {
 386             flavor = BondedKernelFlavor::ForcesAndEnergy;
 387         }
 388     }
 389     else
 390     {
 391         if (useSimdKernels && !havePerturbedInteractions)
 392         {
 393             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
 394         }
 395         else
 396         {
 397             flavor = BondedKernelFlavor::ForcesNoSimd;
 398         }
 399     }
 400
 401     return flavor;
 402 }
 403
 404 /*! \brief Calculate one element of the list of bonded interactions
 405     for this thread */
 406 real calc_one_bond(int                           thread,
 407                    int                           ftype,
 408                    const InteractionDefinitions& idef,
 409                    ArrayRef<const int>           iatoms,
 410                    const int                     numNonperturbedInteractions,
 411                    const WorkDivision&           workDivision,
 412                    const rvec                    x[],
 413                    rvec4                         f[],
 414                    rvec                          fshift[],
 415                    const t_forcerec*             fr,
 416                    const t_pbc*                  pbc,
 417                    gmx_grppairener_t*            grpp,
 418                    t_nrnb*                       nrnb,
 419                    gmx::ArrayRef<const real>     lambda,
 420                    gmx::ArrayRef<real>           dvdl,
 421                    const t_mdatoms*              md,
 422                    t_fcdata*                     fcd,
 423                    const gmx::StepWorkload&      stepWork,
 424                    int*                          global_atom_index)
 425 {
 426     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
 427                "The topology should be marked either as no FE or sorted on FE");
 428
 429     const bool havePerturbedInteractions =
 430             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
 431     BondedKernelFlavor flavor =
 432             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
 433     FreeEnergyPerturbationCouplingType efptFTYPE;
 434     if (IS_RESTRAINT_TYPE(ftype))
 435     {
 436         efptFTYPE = FreeEnergyPerturbationCouplingType::Restraint;
 437     }
 438     else
 439     {
 440         efptFTYPE = FreeEnergyPerturbationCouplingType::Bonded;
 441     }
 442
 443     const int nat1   = interaction_function[ftype].nratoms + 1;
 444     const int nbonds = iatoms.ssize() / nat1;
 445
 446     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
 447                "The thread division should match the topology");
 448
 449     const int nb0 = workDivision.bound(ftype, thread);
 450     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
 451
 452     ArrayRef<const t_iparams> iparams = idef.iparams;
 453
 454     real v = 0;
 455     if (!isPairInteraction(ftype))
 456     {
 457         if (ftype == F_CMAP)
 458         {
 459             /* TODO The execution time for CMAP dihedrals might be
 460                nice to account to its own subtimer, but first
 461                wallcycle needs to be extended to support calling from
 462                multiple threads. */
 463             v = cmap_dihs(nbn,
 464                           iatoms.data() + nb0,
 465                           iparams.data(),
 466                           &idef.cmap_grid,
 467                           x,
 468                           f,
 469                           fshift,
 470                           pbc,
 471                           lambda[static_cast<int>(efptFTYPE)],
 472                           &(dvdl[static_cast<int>(efptFTYPE)]),
 473                           md,
 474                           fcd,
 475                           nullptr,
 476                           nullptr,
 477                           global_atom_index);
 478         }
 479         else
 480         {
 481             v = calculateSimpleBond(ftype,
 482                                     nbn,
 483                                     iatoms.data() + nb0,
 484                                     iparams.data(),
 485                                     x,
 486                                     f,
 487                                     fshift,
 488                                     pbc,
 489                                     lambda[static_cast<int>(efptFTYPE)],
 490                                     &(dvdl[static_cast<int>(efptFTYPE)]),
 491                                     md,
 492                                     fcd,
 493                                     fcd->disres,
 494                                     fcd->orires,
 495                                     global_atom_index,
 496                                     flavor);
 497         }
 498     }
 499     else
 500     {
 501         /* TODO The execution time for pairs might be nice to account
 502            to its own subtimer, but first wallcycle needs to be
 503            extended to support calling from multiple threads. */
 504         do_pairs(ftype,
 505                  nbn,
 506                  iatoms.data() + nb0,
 507                  iparams.data(),
 508                  x,
 509                  f,
 510                  fshift,
 511                  pbc,
 512                  lambda.data(),
 513                  dvdl.data(),
 514                  md,
 515                  fr,
 516                  havePerturbedInteractions,
 517                  stepWork,
 518                  grpp,
 519                  global_atom_index);
 520     }
 521
 522     if (thread == 0)
 523     {
 524         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
 525     }
 526
 527     return v;
 528 }
 529
 530 } // namespace
 531
 532 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
 533  */
 534 static void calcBondedForces(const InteractionDefinitions& idef,
 535                              bonded_threading_t*           bt,
 536                              const rvec                    x[],
 537                              const t_forcerec*             fr,
 538                              const t_pbc*                  pbc_null,
 539                              rvec*                         fshiftMasterBuffer,
 540                              gmx_enerdata_t*               enerd,
 541                              t_nrnb*                       nrnb,
 542                              gmx::ArrayRef<const real>     lambda,
 543                              gmx::ArrayRef<real>           dvdl,
 544                              const t_mdatoms*              md,
 545                              t_fcdata*                     fcd,
 546                              const gmx::StepWorkload&      stepWork,
 547                              int*                          global_atom_index)
 548 {
 549 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
 550     for (int thread = 0; thread < bt->nthreads; thread++)
 551     {
 552         try
 553         {
 554             f_thread_t& threadBuffers = *bt->f_t[thread];
 555             int         ftype;
 556             real *      epot, v;
 557             /* thread stuff */
 558             rvec*               fshift;
 559             gmx::ArrayRef<real> dvdlt;
 560             gmx_grppairener_t*  grpp;
 561
 562             zero_thread_output(&threadBuffers);
 563
 564             rvec4* ft = threadBuffers.f;
 565
 566             /* Thread 0 writes directly to the main output buffers.
 567              * We might want to reconsider this.
 568              */
 569             if (thread == 0)
 570             {
 571                 fshift = fshiftMasterBuffer;
 572                 epot   = enerd->term;
 573                 grpp   = &enerd->grpp;
 574                 dvdlt  = dvdl;
 575             }
 576             else
 577             {
 578                 fshift = as_rvec_array(threadBuffers.fshift.data());
 579                 epot   = threadBuffers.ener;
 580                 grpp   = &threadBuffers.grpp;
 581                 dvdlt  = threadBuffers.dvdl;
 582             }
 583             /* Loop over all bonded force types to calculate the bonded forces */
 584             for (ftype = 0; (ftype < F_NRE); ftype++)
 585             {
 586                 const InteractionList& ilist = idef.il[ftype];
 587                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
 588                 {
 589                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
 590                     v                          = calc_one_bond(thread,
 591                                       ftype,
 592                                       idef,
 593                                       iatoms,
 594                                       idef.numNonperturbedInteractions[ftype],
 595                                       bt->workDivision,
 596                                       x,
 597                                       ft,
 598                                       fshift,
 599                                       fr,
 600                                       pbc_null,
 601                                       grpp,
 602                                       nrnb,
 603                                       lambda,
 604                                       dvdlt,
 605                                       md,
 606                                       fcd,
 607                                       stepWork,
 608                                       global_atom_index);
 609                     epot[ftype] += v;
 610                 }
 611             }
 612         }
 613         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 614     }
 615 }
 616
 617 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
 618 {
 619     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
 620
 621     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
 622             || fcdata.disres->nres > 0);
 623 }
 624
 625 bool ListedForces::haveCpuBondeds() const
 626 {
 627     return threading_->haveBondeds;
 628 }
 629
 630 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
 631 {
 632     return haveCpuBondeds() || haveRestraints(fcdata);
 633 }
 634
 635 namespace
 636 {
 637
 638 /*! \brief Calculates all listed force interactions. */
 639 void calc_listed(struct gmx_wallcycle*         wcycle,
 640                  const InteractionDefinitions& idef,
 641                  bonded_threading_t*           bt,
 642                  const rvec                    x[],
 643                  gmx::ForceOutputs*            forceOutputs,
 644                  const t_forcerec*             fr,
 645                  const t_pbc*                  pbc,
 646                  gmx_enerdata_t*               enerd,
 647                  t_nrnb*                       nrnb,
 648                  gmx::ArrayRef<const real>     lambda,
 649                  const t_mdatoms*              md,
 650                  t_fcdata*                     fcd,
 651                  int*                          global_atom_index,
 652                  const gmx::StepWorkload&      stepWork)
 653 {
 654     if (bt->haveBondeds)
 655     {
 656         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
 657
 658         wallcycle_sub_start(wcycle, ewcsLISTED);
 659         /* The dummy array is to have a place to store the dhdl at other values
 660            of lambda, which will be thrown away in the end */
 661         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
 662         calcBondedForces(idef,
 663                          bt,
 664                          x,
 665                          fr,
 666                          fr->bMolPBC ? pbc : nullptr,
 667                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
 668                          enerd,
 669                          nrnb,
 670                          lambda,
 671                          dvdl,
 672                          md,
 673                          fcd,
 674                          stepWork,
 675                          global_atom_index);
 676         wallcycle_sub_stop(wcycle, ewcsLISTED);
 677
 678         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
 679         reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
 680
 681         if (stepWork.computeDhdl)
 682         {
 683             for (auto i : keysOf(enerd->dvdl_lin))
 684             {
 685                 enerd->dvdl_nonlin[i] += dvdl[i];
 686             }
 687         }
 688         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
 689     }
 690
 691     /* Copy the sum of violations for the distance restraints from fcd */
 692     if (fcd)
 693     {
 694         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
 695     }
 696 }
 697
 698 /*! \brief As calc_listed(), but only determines the potential energy
 699  * for the perturbed interactions.
 700  *
 701  * The shift forces in fr are not affected.
 702  */
 703 void calc_listed_lambda(const InteractionDefinitions& idef,
 704                         bonded_threading_t*           bt,
 705                         const rvec                    x[],
 706                         const t_forcerec*             fr,
 707                         const struct t_pbc*           pbc,
 708                         gmx::ArrayRef<real>           forceBufferLambda,
 709                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
 710                         gmx_grppairener_t*            grpp,
 711                         real*                         epot,
 712                         gmx::ArrayRef<real>           dvdl,
 713                         t_nrnb*                       nrnb,
 714                         gmx::ArrayRef<const real>     lambda,
 715                         const t_mdatoms*              md,
 716                         t_fcdata*                     fcd,
 717                         int*                          global_atom_index)
 718 {
 719     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
 720
 721     const t_pbc* pbc_null;
 722     if (fr->bMolPBC)
 723     {
 724         pbc_null = pbc;
 725     }
 726     else
 727     {
 728         pbc_null = nullptr;
 729     }
 730
 731     /* We already have the forces, so we use temp buffers here */
 732     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
 733     std::fill(shiftForceBufferLambda.begin(),
 734               shiftForceBufferLambda.end(),
 735               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
 736     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
 737     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
 738
 739     /* Loop over all bonded force types to calculate the bonded energies */
 740     for (int ftype = 0; (ftype < F_NRE); ftype++)
 741     {
 742         if (ftype_is_bonded_potential(ftype))
 743         {
 744             const InteractionList& ilist = idef.il[ftype];
 745             /* Create a temporary iatom list with only perturbed interactions */
 746             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
 747             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
 748                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
 749             if (!iatomsPerturbed.empty())
 750             {
 751                 /* Set the work range of thread 0 to the perturbed bondeds */
 752                 workDivision.setBound(ftype, 0, 0);
 753                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
 754
 755                 gmx::StepWorkload tempFlags;
 756                 tempFlags.computeEnergy = true;
 757                 real v                  = calc_one_bond(0,
 758                                        ftype,
 759                                        idef,
 760                                        iatomsPerturbed,
 761                                        iatomsPerturbed.ssize(),
 762                                        workDivision,
 763                                        x,
 764                                        f,
 765                                        fshift,
 766                                        fr,
 767                                        pbc_null,
 768                                        grpp,
 769                                        nrnb,
 770                                        lambda,
 771                                        dvdl,
 772                                        md,
 773                                        fcd,
 774                                        tempFlags,
 775                                        global_atom_index);
 776                 epot[ftype] += v;
 777             }
 778         }
 779     }
 780 }
 781
 782 } // namespace
 783
 784 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
 785                              const matrix                              box,
 786                              const t_lambda*                           fepvals,
 787                              const t_commrec*                          cr,
 788                              const gmx_multisim_t*                     ms,
 789                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
 790                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
 791                              t_fcdata*                                 fcdata,
 792                              const history_t*                          hist,
 793                              gmx::ForceOutputs*                        forceOutputs,
 794                              const t_forcerec*                         fr,
 795                              const struct t_pbc*                       pbc,
 796                              gmx_enerdata_t*                           enerd,
 797                              t_nrnb*                                   nrnb,
 798                              gmx::ArrayRef<const real>                 lambda,
 799                              const t_mdatoms*                          md,
 800                              int*                                      global_atom_index,
 801                              const gmx::StepWorkload&                  stepWork)
 802 {
 803     if (interactionSelection_.none() || !stepWork.computeListedForces)
 804     {
 805         return;
 806     }
 807
 808     const InteractionDefinitions& idef = *idef_;
 809
 810     // Todo: replace all rvec use here with ArrayRefWithPadding
 811     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
 812
 813     const bool calculateRestInteractions =
 814             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
 815
 816     t_pbc pbc_full; /* Full PBC is needed for position restraints */
 817     if (calculateRestInteractions && haveRestraints(*fcdata))
 818     {
 819         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
 820         {
 821             /* Not enough flops to bother counting */
 822             set_pbc(&pbc_full, fr->pbcType, box);
 823         }
 824
 825         /* TODO Use of restraints triggers further function calls
 826            inside the loop over calc_one_bond(), but those are too
 827            awkward to account to this subtimer properly in the present
 828            code. We don't test / care much about performance with
 829            restraints, anyway. */
 830         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
 831
 832         if (!idef.il[F_POSRES].empty())
 833         {
 834             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
 835         }
 836
 837         if (!idef.il[F_FBPOSRES].empty())
 838         {
 839             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
 840         }
 841
 842         /* Do pre force calculation stuff which might require communication */
 843         if (fcdata->orires->nr > 0)
 844         {
 845             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
 846             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
 847                                                        idef.il[F_ORIRES].size(),
 848                                                        idef.il[F_ORIRES].iatoms.data(),
 849                                                        idef.iparams.data(),
 850                                                        md,
 851                                                        xWholeMolecules,
 852                                                        x,
 853                                                        fr->bMolPBC ? pbc : nullptr,
 854                                                        fcdata->orires,
 855                                                        hist);
 856         }
 857         if (fcdata->disres->nres > 0)
 858         {
 859             calc_disres_R_6(cr,
 860                             ms,
 861                             idef.il[F_DISRES].size(),
 862                             idef.il[F_DISRES].iatoms.data(),
 863                             x,
 864                             fr->bMolPBC ? pbc : nullptr,
 865                             fcdata->disres,
 866                             hist);
 867         }
 868
 869         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
 870     }
 871
 872     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
 873
 874     /* Check if we have to determine energy differences
 875      * at foreign lambda's.
 876      */
 877     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
 878     {
 879         gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> dvdl = { 0 };
 880         if (!idef.il[F_POSRES].empty())
 881         {
 882             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
 883         }
 884         if (idef.ilsort != ilsortNO_FE)
 885         {
 886             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
 887             if (idef.ilsort != ilsortFE_SORTED)
 888             {
 889                 gmx_incons("The bonded interactions are not sorted for free energy");
 890             }
 891             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
 892             {
 893                 gmx::EnumerationArray<FreeEnergyPerturbationCouplingType, real> lam_i;
 894
 895                 reset_foreign_enerdata(enerd);
 896                 for (auto j : keysOf(lam_i))
 897                 {
 898                     lam_i[j] = (i == 0 ? lambda[static_cast<int>(j)] : fepvals->all_lambda[j][i - 1]);
 899                 }
 900                 calc_listed_lambda(idef,
 901                                    threading_.get(),
 902                                    x,
 903                                    fr,
 904                                    pbc,
 905                                    forceBufferLambda_,
 906                                    shiftForceBufferLambda_,
 907                                    &(enerd->foreign_grpp),
 908                                    enerd->foreign_term,
 909                                    dvdl,
 910                                    nrnb,
 911                                    lam_i,
 912                                    md,
 913                                    fcdata,
 914                                    global_atom_index);
 915                 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
 916                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
 917                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
 918                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
 919             }
 920             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
 921         }
 922     }
 923 }