src/gromacs/listed_forces/listed_forces.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines high-level functions for mdrun to compute
  39  * energies and forces for listed interactions.
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "listed_forces.h"
  48
  49 #include <cassert>
  50
  51 #include <algorithm>
  52 #include <array>
  53 #include <numeric>
  54
  55 #include "gromacs/gmxlib/network.h"
  56 #include "gromacs/gmxlib/nrnb.h"
  57 #include "gromacs/listed_forces/bonded.h"
  58 #include "gromacs/listed_forces/disre.h"
  59 #include "gromacs/listed_forces/orires.h"
  60 #include "gromacs/listed_forces/pairs.h"
  61 #include "gromacs/listed_forces/position_restraints.h"
  62 #include "gromacs/math/vec.h"
  63 #include "gromacs/mdlib/enerdata_utils.h"
  64 #include "gromacs/mdlib/force.h"
  65 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  66 #include "gromacs/mdtypes/commrec.h"
  67 #include "gromacs/mdtypes/fcdata.h"
  68 #include "gromacs/mdtypes/forceoutput.h"
  69 #include "gromacs/mdtypes/forcerec.h"
  70 #include "gromacs/mdtypes/inputrec.h"
  71 #include "gromacs/mdtypes/md_enums.h"
  72 #include "gromacs/mdtypes/simulation_workload.h"
  73 #include "gromacs/pbcutil/ishift.h"
  74 #include "gromacs/pbcutil/pbc.h"
  75 #include "gromacs/timing/wallcycle.h"
  76 #include "gromacs/topology/topology.h"
  77 #include "gromacs/utility/exceptions.h"
  78 #include "gromacs/utility/fatalerror.h"
  79
  80 #include "listed_internal.h"
  81 #include "manage_threading.h"
  82 #include "utilities.h"
  83
  84 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
  85                            const int                  numEnergyGroups,
  86                            const int                  numThreads,
  87                            const InteractionSelection interactionSelection,
  88                            FILE*                      fplog) :
  89     idefSelection_(ffparams),
  90     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
  91     interactionSelection_(interactionSelection)
  92 {
  93 }
  94
  95 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
  96
  97 ListedForces::~ListedForces() = default;
  98
  99 //! Copies the selection interactions from \p idefSrc to \p idef
 100 static void selectInteractions(InteractionDefinitions*                  idef,
 101                                const InteractionDefinitions&            idefSrc,
 102                                const ListedForces::InteractionSelection interactionSelection)
 103 {
 104     const bool selectPairs =
 105             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
 106     const bool selectDihedrals =
 107             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
 108     const bool selectAngles =
 109             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
 110     const bool selectRest =
 111             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
 112
 113     for (int ftype = 0; ftype < F_NRE; ftype++)
 114     {
 115         const t_interaction_function& ifunc = interaction_function[ftype];
 116         if (ifunc.flags & IF_BOND)
 117         {
 118             bool assign = false;
 119             if (ifunc.flags & IF_PAIR)
 120             {
 121                 assign = selectPairs;
 122             }
 123             else if (ifunc.flags & IF_DIHEDRAL)
 124             {
 125                 assign = selectDihedrals;
 126             }
 127             else if (ifunc.flags & IF_ATYPE)
 128             {
 129                 assign = selectAngles;
 130             }
 131             else
 132             {
 133                 assign = selectRest;
 134             }
 135             if (assign)
 136             {
 137                 idef->il[ftype] = idefSrc.il[ftype];
 138             }
 139             else
 140             {
 141                 idef->il[ftype].clear();
 142             }
 143         }
 144     }
 145 }
 146
 147 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
 148 {
 149     if (interactionSelection_.all())
 150     {
 151         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
 152         idef_ = &domainIdef;
 153     }
 154     else
 155     {
 156         idef_ = &idefSelection_;
 157
 158         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
 159
 160         idefSelection_.ilsort = domainIdef.ilsort;
 161
 162         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
 163         {
 164             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
 165             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
 166         }
 167         else
 168         {
 169             idefSelection_.iparams_posres.clear();
 170             idefSelection_.iparams_fbposres.clear();
 171         }
 172     }
 173
 174     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
 175
 176     if (idef_->ilsort == ilsortFE_SORTED)
 177     {
 178         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
 179         shiftForceBufferLambda_.resize(SHIFTS);
 180     }
 181 }
 182
 183 namespace
 184 {
 185
 186 using gmx::ArrayRef;
 187
 188 /*! \brief Return true if ftype is an explicit pair-listed LJ or
 189  * COULOMB interaction type: bonded LJ (usually 1-4), or special
 190  * listed non-bonded for FEP. */
 191 bool isPairInteraction(int ftype)
 192 {
 193     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
 194 }
 195
 196 /*! \brief Zero thread-local output buffers */
 197 void zero_thread_output(f_thread_t* f_t)
 198 {
 199     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
 200
 201     for (int i = 0; i < f_t->nblock_used; i++)
 202     {
 203         int a0 = f_t->block_index[i] * reduction_block_size;
 204         int a1 = a0 + reduction_block_size;
 205         for (int a = a0; a < a1; a++)
 206         {
 207             for (int d = 0; d < nelem_fa; d++)
 208             {
 209                 f_t->f[a][d] = 0;
 210             }
 211         }
 212     }
 213
 214     for (int i = 0; i < SHIFTS; i++)
 215     {
 216         clear_rvec(f_t->fshift[i]);
 217     }
 218     for (int i = 0; i < F_NRE; i++)
 219     {
 220         f_t->ener[i] = 0;
 221     }
 222     for (int i = 0; i < egNR; i++)
 223     {
 224         for (int j = 0; j < f_t->grpp.nener; j++)
 225         {
 226             f_t->grpp.ener[i][j] = 0;
 227         }
 228     }
 229     for (int i = 0; i < efptNR; i++)
 230     {
 231         f_t->dvdl[i] = 0;
 232     }
 233 }
 234
 235 /*! \brief The max thread number is arbitrary, we used a fixed number
 236  * to avoid memory management.  Using more than 16 threads is probably
 237  * never useful performance wise. */
 238 #define MAX_BONDED_THREADS 256
 239
 240 /*! \brief Reduce thread-local force buffers */
 241 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
 242 {
 243     if (nthreads > MAX_BONDED_THREADS)
 244     {
 245         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
 246     }
 247
 248     rvec* gmx_restrict f = as_rvec_array(force.data());
 249
 250     const int numAtomsForce = bt->numAtomsForce;
 251
 252     /* This reduction can run on any number of threads,
 253      * independently of bt->nthreads.
 254      * But if nthreads matches bt->nthreads (which it currently does)
 255      * the uniform distribution of the touched blocks over nthreads will
 256      * match the distribution of bonded over threads well in most cases,
 257      * which means that threads mostly reduce their own data which increases
 258      * the number of cache hits.
 259      */
 260 #pragma omp parallel for num_threads(nthreads) schedule(static)
 261     for (int b = 0; b < bt->nblock_used; b++)
 262     {
 263         try
 264         {
 265             int    ind = bt->block_index[b];
 266             rvec4* fp[MAX_BONDED_THREADS];
 267
 268             /* Determine which threads contribute to this block */
 269             int nfb = 0;
 270             for (int ft = 0; ft < bt->nthreads; ft++)
 271             {
 272                 if (bitmask_is_set(bt->mask[ind], ft))
 273                 {
 274                     fp[nfb++] = bt->f_t[ft]->f;
 275                 }
 276             }
 277             if (nfb > 0)
 278             {
 279                 /* Reduce force buffers for threads that contribute */
 280                 int a0 = ind * reduction_block_size;
 281                 int a1 = (ind + 1) * reduction_block_size;
 282                 /* It would be nice if we could pad f to avoid this min */
 283                 a1 = std::min(a1, numAtomsForce);
 284                 for (int a = a0; a < a1; a++)
 285                 {
 286                     for (int fb = 0; fb < nfb; fb++)
 287                     {
 288                         rvec_inc(f[a], fp[fb][a]);
 289                     }
 290                 }
 291             }
 292         }
 293         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 294     }
 295 }
 296
 297 /*! \brief Reduce thread-local forces, shift forces and energies */
 298 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
 299                           real*                      ener,
 300                           gmx_grppairener_t*         grpp,
 301                           real*                      dvdl,
 302                           const bonded_threading_t*  bt,
 303                           const gmx::StepWorkload&   stepWork)
 304 {
 305     assert(bt->haveBondeds);
 306
 307     if (bt->nblock_used > 0)
 308     {
 309         /* Reduce the bonded force buffer */
 310         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
 311     }
 312
 313     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
 314
 315     /* When necessary, reduce energy and virial using one thread only */
 316     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
 317     {
 318         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
 319
 320         if (stepWork.computeVirial)
 321         {
 322             for (int i = 0; i < SHIFTS; i++)
 323             {
 324                 for (int t = 1; t < bt->nthreads; t++)
 325                 {
 326                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
 327                 }
 328             }
 329         }
 330         if (stepWork.computeEnergy)
 331         {
 332             for (int i = 0; i < F_NRE; i++)
 333             {
 334                 for (int t = 1; t < bt->nthreads; t++)
 335                 {
 336                     ener[i] += f_t[t]->ener[i];
 337                 }
 338             }
 339             for (int i = 0; i < egNR; i++)
 340             {
 341                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
 342                 {
 343                     for (int t = 1; t < bt->nthreads; t++)
 344                     {
 345                         grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
 346                     }
 347                 }
 348             }
 349         }
 350         if (stepWork.computeDhdl)
 351         {
 352             for (int i = 0; i < efptNR; i++)
 353             {
 354
 355                 for (int t = 1; t < bt->nthreads; t++)
 356                 {
 357                     dvdl[i] += f_t[t]->dvdl[i];
 358                 }
 359             }
 360         }
 361     }
 362 }
 363
 364 /*! \brief Returns the bonded kernel flavor
 365  *
 366  * Note that energies are always requested when the virial
 367  * is requested (performance gain would be small).
 368  * Note that currently we do not have bonded kernels that
 369  * do not compute forces.
 370  */
 371 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
 372                                             const bool               useSimdKernels,
 373                                             const bool               havePerturbedInteractions)
 374 {
 375     BondedKernelFlavor flavor;
 376     if (stepWork.computeEnergy || stepWork.computeVirial)
 377     {
 378         if (stepWork.computeVirial)
 379         {
 380             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
 381         }
 382         else
 383         {
 384             flavor = BondedKernelFlavor::ForcesAndEnergy;
 385         }
 386     }
 387     else
 388     {
 389         if (useSimdKernels && !havePerturbedInteractions)
 390         {
 391             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
 392         }
 393         else
 394         {
 395             flavor = BondedKernelFlavor::ForcesNoSimd;
 396         }
 397     }
 398
 399     return flavor;
 400 }
 401
 402 /*! \brief Calculate one element of the list of bonded interactions
 403     for this thread */
 404 real calc_one_bond(int                           thread,
 405                    int                           ftype,
 406                    const InteractionDefinitions& idef,
 407                    ArrayRef<const int>           iatoms,
 408                    const int                     numNonperturbedInteractions,
 409                    const WorkDivision&           workDivision,
 410                    const rvec                    x[],
 411                    rvec4                         f[],
 412                    rvec                          fshift[],
 413                    const t_forcerec*             fr,
 414                    const t_pbc*                  pbc,
 415                    gmx_grppairener_t*            grpp,
 416                    t_nrnb*                       nrnb,
 417                    const real*                   lambda,
 418                    real*                         dvdl,
 419                    const t_mdatoms*              md,
 420                    t_fcdata*                     fcd,
 421                    const gmx::StepWorkload&      stepWork,
 422                    int*                          global_atom_index)
 423 {
 424     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
 425                "The topology should be marked either as no FE or sorted on FE");
 426
 427     const bool havePerturbedInteractions =
 428             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
 429     BondedKernelFlavor flavor =
 430             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
 431     int efptFTYPE;
 432     if (IS_RESTRAINT_TYPE(ftype))
 433     {
 434         efptFTYPE = efptRESTRAINT;
 435     }
 436     else
 437     {
 438         efptFTYPE = efptBONDED;
 439     }
 440
 441     const int nat1   = interaction_function[ftype].nratoms + 1;
 442     const int nbonds = iatoms.ssize() / nat1;
 443
 444     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
 445                "The thread division should match the topology");
 446
 447     const int nb0 = workDivision.bound(ftype, thread);
 448     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
 449
 450     ArrayRef<const t_iparams> iparams = idef.iparams;
 451
 452     real v = 0;
 453     if (!isPairInteraction(ftype))
 454     {
 455         if (ftype == F_CMAP)
 456         {
 457             /* TODO The execution time for CMAP dihedrals might be
 458                nice to account to its own subtimer, but first
 459                wallcycle needs to be extended to support calling from
 460                multiple threads. */
 461             v = cmap_dihs(nbn, iatoms.data() + nb0, iparams.data(), &idef.cmap_grid, x, f, fshift,
 462                           pbc, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index);
 463         }
 464         else
 465         {
 466             v = calculateSimpleBond(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift,
 467                                     pbc, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
 468                                     global_atom_index, flavor);
 469         }
 470     }
 471     else
 472     {
 473         /* TODO The execution time for pairs might be nice to account
 474            to its own subtimer, but first wallcycle needs to be
 475            extended to support calling from multiple threads. */
 476         do_pairs(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift, pbc, lambda, dvdl,
 477                  md, fr, havePerturbedInteractions, stepWork, grpp, global_atom_index);
 478     }
 479
 480     if (thread == 0)
 481     {
 482         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
 483     }
 484
 485     return v;
 486 }
 487
 488 } // namespace
 489
 490 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
 491  */
 492 static void calcBondedForces(const InteractionDefinitions& idef,
 493                              bonded_threading_t*           bt,
 494                              const rvec                    x[],
 495                              const t_forcerec*             fr,
 496                              const t_pbc*                  pbc_null,
 497                              rvec*                         fshiftMasterBuffer,
 498                              gmx_enerdata_t*               enerd,
 499                              t_nrnb*                       nrnb,
 500                              const real*                   lambda,
 501                              real*                         dvdl,
 502                              const t_mdatoms*              md,
 503                              t_fcdata*                     fcd,
 504                              const gmx::StepWorkload&      stepWork,
 505                              int*                          global_atom_index)
 506 {
 507 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
 508     for (int thread = 0; thread < bt->nthreads; thread++)
 509     {
 510         try
 511         {
 512             f_thread_t& threadBuffers = *bt->f_t[thread];
 513             int         ftype;
 514             real *      epot, v;
 515             /* thread stuff */
 516             rvec*              fshift;
 517             real*              dvdlt;
 518             gmx_grppairener_t* grpp;
 519
 520             zero_thread_output(&threadBuffers);
 521
 522             rvec4* ft = threadBuffers.f;
 523
 524             /* Thread 0 writes directly to the main output buffers.
 525              * We might want to reconsider this.
 526              */
 527             if (thread == 0)
 528             {
 529                 fshift = fshiftMasterBuffer;
 530                 epot   = enerd->term;
 531                 grpp   = &enerd->grpp;
 532                 dvdlt  = dvdl;
 533             }
 534             else
 535             {
 536                 fshift = as_rvec_array(threadBuffers.fshift.data());
 537                 epot   = threadBuffers.ener;
 538                 grpp   = &threadBuffers.grpp;
 539                 dvdlt  = threadBuffers.dvdl;
 540             }
 541             /* Loop over all bonded force types to calculate the bonded forces */
 542             for (ftype = 0; (ftype < F_NRE); ftype++)
 543             {
 544                 const InteractionList& ilist = idef.il[ftype];
 545                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
 546                 {
 547                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
 548                     v = calc_one_bond(thread, ftype, idef, iatoms, idef.numNonperturbedInteractions[ftype],
 549                                       bt->workDivision, x, ft, fshift, fr, pbc_null, grpp, nrnb,
 550                                       lambda, dvdlt, md, fcd, stepWork, global_atom_index);
 551                     epot[ftype] += v;
 552                 }
 553             }
 554         }
 555         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 556     }
 557 }
 558
 559 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
 560 {
 561     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
 562
 563     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
 564             || fcdata.disres->nres > 0);
 565 }
 566
 567 bool ListedForces::haveCpuBondeds() const
 568 {
 569     return threading_->haveBondeds;
 570 }
 571
 572 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
 573 {
 574     return haveCpuBondeds() || haveRestraints(fcdata);
 575 }
 576
 577 namespace
 578 {
 579
 580 /*! \brief Calculates all listed force interactions. */
 581 void calc_listed(struct gmx_wallcycle*         wcycle,
 582                  const InteractionDefinitions& idef,
 583                  bonded_threading_t*           bt,
 584                  const rvec                    x[],
 585                  gmx::ForceOutputs*            forceOutputs,
 586                  const t_forcerec*             fr,
 587                  const t_pbc*                  pbc,
 588                  gmx_enerdata_t*               enerd,
 589                  t_nrnb*                       nrnb,
 590                  const real*                   lambda,
 591                  const t_mdatoms*              md,
 592                  t_fcdata*                     fcd,
 593                  int*                          global_atom_index,
 594                  const gmx::StepWorkload&      stepWork)
 595 {
 596     if (bt->haveBondeds)
 597     {
 598         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
 599
 600         wallcycle_sub_start(wcycle, ewcsLISTED);
 601         /* The dummy array is to have a place to store the dhdl at other values
 602            of lambda, which will be thrown away in the end */
 603         real dvdl[efptNR] = { 0 };
 604         calcBondedForces(idef, bt, x, fr, fr->bMolPBC ? pbc : nullptr,
 605                          as_rvec_array(forceWithShiftForces.shiftForces().data()), enerd, nrnb,
 606                          lambda, dvdl, md, fcd, stepWork, global_atom_index);
 607         wallcycle_sub_stop(wcycle, ewcsLISTED);
 608
 609         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
 610         reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
 611
 612         if (stepWork.computeDhdl)
 613         {
 614             for (int i = 0; i < efptNR; i++)
 615             {
 616                 enerd->dvdl_nonlin[i] += dvdl[i];
 617             }
 618         }
 619         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
 620     }
 621
 622     /* Copy the sum of violations for the distance restraints from fcd */
 623     if (fcd)
 624     {
 625         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
 626     }
 627 }
 628
 629 /*! \brief As calc_listed(), but only determines the potential energy
 630  * for the perturbed interactions.
 631  *
 632  * The shift forces in fr are not affected.
 633  */
 634 void calc_listed_lambda(const InteractionDefinitions& idef,
 635                         bonded_threading_t*           bt,
 636                         const rvec                    x[],
 637                         const t_forcerec*             fr,
 638                         const struct t_pbc*           pbc,
 639                         gmx::ArrayRef<real>           forceBufferLambda,
 640                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
 641                         gmx_grppairener_t*            grpp,
 642                         real*                         epot,
 643                         gmx::ArrayRef<real>           dvdl,
 644                         t_nrnb*                       nrnb,
 645                         const real*                   lambda,
 646                         const t_mdatoms*              md,
 647                         t_fcdata*                     fcd,
 648                         int*                          global_atom_index)
 649 {
 650     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
 651
 652     const t_pbc* pbc_null;
 653     if (fr->bMolPBC)
 654     {
 655         pbc_null = pbc;
 656     }
 657     else
 658     {
 659         pbc_null = nullptr;
 660     }
 661
 662     /* We already have the forces, so we use temp buffers here */
 663     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
 664     std::fill(shiftForceBufferLambda.begin(), shiftForceBufferLambda.end(),
 665               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
 666     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
 667     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
 668
 669     /* Loop over all bonded force types to calculate the bonded energies */
 670     for (int ftype = 0; (ftype < F_NRE); ftype++)
 671     {
 672         if (ftype_is_bonded_potential(ftype))
 673         {
 674             const InteractionList& ilist = idef.il[ftype];
 675             /* Create a temporary iatom list with only perturbed interactions */
 676             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
 677             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
 678                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
 679             if (!iatomsPerturbed.empty())
 680             {
 681                 /* Set the work range of thread 0 to the perturbed bondeds */
 682                 workDivision.setBound(ftype, 0, 0);
 683                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
 684
 685                 gmx::StepWorkload tempFlags;
 686                 tempFlags.computeEnergy = true;
 687                 real v = calc_one_bond(0, ftype, idef, iatomsPerturbed, iatomsPerturbed.ssize(),
 688                                        workDivision, x, f, fshift, fr, pbc_null, grpp, nrnb, lambda,
 689                                        dvdl.data(), md, fcd, tempFlags, global_atom_index);
 690                 epot[ftype] += v;
 691             }
 692         }
 693     }
 694 }
 695
 696 } // namespace
 697
 698 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
 699                              const matrix                              box,
 700                              const t_lambda*                           fepvals,
 701                              const t_commrec*                          cr,
 702                              const gmx_multisim_t*                     ms,
 703                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
 704                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
 705                              t_fcdata*                                 fcdata,
 706                              history_t*                                hist,
 707                              gmx::ForceOutputs*                        forceOutputs,
 708                              const t_forcerec*                         fr,
 709                              const struct t_pbc*                       pbc,
 710                              gmx_enerdata_t*                           enerd,
 711                              t_nrnb*                                   nrnb,
 712                              const real*                               lambda,
 713                              const t_mdatoms*                          md,
 714                              int*                                      global_atom_index,
 715                              const gmx::StepWorkload&                  stepWork)
 716 {
 717     if (interactionSelection_.none() || !stepWork.computeListedForces)
 718     {
 719         return;
 720     }
 721
 722     const InteractionDefinitions& idef = *idef_;
 723
 724     // Todo: replace all rvec use here with ArrayRefWithPadding
 725     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
 726
 727     t_pbc pbc_full; /* Full PBC is needed for position restraints */
 728     if (haveRestraints(*fcdata))
 729     {
 730         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
 731         {
 732             /* Not enough flops to bother counting */
 733             set_pbc(&pbc_full, fr->pbcType, box);
 734         }
 735
 736         /* TODO Use of restraints triggers further function calls
 737            inside the loop over calc_one_bond(), but those are too
 738            awkward to account to this subtimer properly in the present
 739            code. We don't test / care much about performance with
 740            restraints, anyway. */
 741         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
 742
 743         if (!idef.il[F_POSRES].empty())
 744         {
 745             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
 746         }
 747
 748         if (!idef.il[F_FBPOSRES].empty())
 749         {
 750             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
 751         }
 752
 753         /* Do pre force calculation stuff which might require communication */
 754         if (fcdata->orires->nr > 0)
 755         {
 756             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
 757             enerd->term[F_ORIRESDEV] = calc_orires_dev(
 758                     ms, idef.il[F_ORIRES].size(), idef.il[F_ORIRES].iatoms.data(), idef.iparams.data(),
 759                     md, xWholeMolecules, x, fr->bMolPBC ? pbc : nullptr, fcdata->orires, hist);
 760         }
 761         if (fcdata->disres->nres > 0)
 762         {
 763             calc_disres_R_6(cr, ms, idef.il[F_DISRES].size(), idef.il[F_DISRES].iatoms.data(), x,
 764                             fr->bMolPBC ? pbc : nullptr, fcdata->disres, hist);
 765         }
 766
 767         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
 768     }
 769
 770     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md,
 771                 fcdata, global_atom_index, stepWork);
 772
 773     /* Check if we have to determine energy differences
 774      * at foreign lambda's.
 775      */
 776     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
 777     {
 778         real dvdl[efptNR] = { 0 };
 779         if (!idef.il[F_POSRES].empty())
 780         {
 781             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
 782         }
 783         if (idef.ilsort != ilsortNO_FE)
 784         {
 785             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
 786             if (idef.ilsort != ilsortFE_SORTED)
 787             {
 788                 gmx_incons("The bonded interactions are not sorted for free energy");
 789             }
 790             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
 791             {
 792                 real lam_i[efptNR];
 793
 794                 reset_foreign_enerdata(enerd);
 795                 for (int j = 0; j < efptNR; j++)
 796                 {
 797                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
 798                 }
 799                 calc_listed_lambda(idef, threading_.get(), x, fr, pbc, forceBufferLambda_,
 800                                    shiftForceBufferLambda_, &(enerd->foreign_grpp), enerd->foreign_term,
 801                                    dvdl, nrnb, lam_i, md, fcdata, global_atom_index);
 802                 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
 803                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
 804                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
 805                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
 806             }
 807             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
 808         }
 809     }
 810 }