src/gromacs/listed_forces/listed_forces.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines high-level functions for mdrun to compute
  39  * energies and forces for listed interactions.
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "listed_forces.h"
  48
  49 #include <cassert>
  50
  51 #include <algorithm>
  52 #include <array>
  53 #include <numeric>
  54
  55 #include "gromacs/gmxlib/network.h"
  56 #include "gromacs/gmxlib/nrnb.h"
  57 #include "gromacs/listed_forces/bonded.h"
  58 #include "gromacs/listed_forces/disre.h"
  59 #include "gromacs/listed_forces/orires.h"
  60 #include "gromacs/listed_forces/pairs.h"
  61 #include "gromacs/listed_forces/position_restraints.h"
  62 #include "gromacs/math/vec.h"
  63 #include "gromacs/mdlib/enerdata_utils.h"
  64 #include "gromacs/mdlib/force.h"
  65 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  66 #include "gromacs/mdtypes/commrec.h"
  67 #include "gromacs/mdtypes/fcdata.h"
  68 #include "gromacs/mdtypes/forceoutput.h"
  69 #include "gromacs/mdtypes/forcerec.h"
  70 #include "gromacs/mdtypes/inputrec.h"
  71 #include "gromacs/mdtypes/md_enums.h"
  72 #include "gromacs/mdtypes/simulation_workload.h"
  73 #include "gromacs/pbcutil/ishift.h"
  74 #include "gromacs/pbcutil/pbc.h"
  75 #include "gromacs/timing/wallcycle.h"
  76 #include "gromacs/topology/topology.h"
  77 #include "gromacs/utility/exceptions.h"
  78 #include "gromacs/utility/fatalerror.h"
  79
  80 #include "listed_internal.h"
  81 #include "manage_threading.h"
  82 #include "utilities.h"
  83
  84 ListedForces::ListedForces(const gmx_ffparams_t&      ffparams,
  85                            const int                  numEnergyGroups,
  86                            const int                  numThreads,
  87                            const InteractionSelection interactionSelection,
  88                            FILE*                      fplog) :
  89     idefSelection_(ffparams),
  90     threading_(std::make_unique<bonded_threading_t>(numThreads, numEnergyGroups, fplog)),
  91     interactionSelection_(interactionSelection)
  92 {
  93 }
  94
  95 ListedForces::ListedForces(ListedForces&& o) noexcept = default;
  96
  97 ListedForces::~ListedForces() = default;
  98
  99 //! Copies the selection interactions from \p idefSrc to \p idef
 100 static void selectInteractions(InteractionDefinitions*                  idef,
 101                                const InteractionDefinitions&            idefSrc,
 102                                const ListedForces::InteractionSelection interactionSelection)
 103 {
 104     const bool selectPairs =
 105             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Pairs));
 106     const bool selectDihedrals =
 107             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Dihedrals));
 108     const bool selectAngles =
 109             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Angles));
 110     const bool selectRest =
 111             interactionSelection.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
 112
 113     for (int ftype = 0; ftype < F_NRE; ftype++)
 114     {
 115         const t_interaction_function& ifunc = interaction_function[ftype];
 116         if (ifunc.flags & IF_BOND)
 117         {
 118             bool assign = false;
 119             if (ifunc.flags & IF_PAIR)
 120             {
 121                 assign = selectPairs;
 122             }
 123             else if (ifunc.flags & IF_DIHEDRAL)
 124             {
 125                 assign = selectDihedrals;
 126             }
 127             else if (ifunc.flags & IF_ATYPE)
 128             {
 129                 assign = selectAngles;
 130             }
 131             else
 132             {
 133                 assign = selectRest;
 134             }
 135             if (assign)
 136             {
 137                 idef->il[ftype] = idefSrc.il[ftype];
 138             }
 139             else
 140             {
 141                 idef->il[ftype].clear();
 142             }
 143         }
 144     }
 145 }
 146
 147 void ListedForces::setup(const InteractionDefinitions& domainIdef, const int numAtomsForce, const bool useGpu)
 148 {
 149     if (interactionSelection_.all())
 150     {
 151         // Avoid the overhead of copying all interaction lists by simply setting the reference to the domain idef
 152         idef_ = &domainIdef;
 153     }
 154     else
 155     {
 156         idef_ = &idefSelection_;
 157
 158         selectInteractions(&idefSelection_, domainIdef, interactionSelection_);
 159
 160         idefSelection_.ilsort = domainIdef.ilsort;
 161
 162         if (interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest)))
 163         {
 164             idefSelection_.iparams_posres   = domainIdef.iparams_posres;
 165             idefSelection_.iparams_fbposres = domainIdef.iparams_fbposres;
 166         }
 167         else
 168         {
 169             idefSelection_.iparams_posres.clear();
 170             idefSelection_.iparams_fbposres.clear();
 171         }
 172     }
 173
 174     setup_bonded_threading(threading_.get(), numAtomsForce, useGpu, *idef_);
 175
 176     if (idef_->ilsort == ilsortFE_SORTED)
 177     {
 178         forceBufferLambda_.resize(numAtomsForce * sizeof(rvec4) / sizeof(real));
 179         shiftForceBufferLambda_.resize(SHIFTS);
 180     }
 181 }
 182
 183 namespace
 184 {
 185
 186 using gmx::ArrayRef;
 187
 188 /*! \brief Return true if ftype is an explicit pair-listed LJ or
 189  * COULOMB interaction type: bonded LJ (usually 1-4), or special
 190  * listed non-bonded for FEP. */
 191 bool isPairInteraction(int ftype)
 192 {
 193     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
 194 }
 195
 196 /*! \brief Zero thread-local output buffers */
 197 void zero_thread_output(f_thread_t* f_t)
 198 {
 199     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
 200
 201     for (int i = 0; i < f_t->nblock_used; i++)
 202     {
 203         int a0 = f_t->block_index[i] * reduction_block_size;
 204         int a1 = a0 + reduction_block_size;
 205         for (int a = a0; a < a1; a++)
 206         {
 207             for (int d = 0; d < nelem_fa; d++)
 208             {
 209                 f_t->f[a][d] = 0;
 210             }
 211         }
 212     }
 213
 214     for (int i = 0; i < SHIFTS; i++)
 215     {
 216         clear_rvec(f_t->fshift[i]);
 217     }
 218     for (int i = 0; i < F_NRE; i++)
 219     {
 220         f_t->ener[i] = 0;
 221     }
 222     for (int i = 0; i < egNR; i++)
 223     {
 224         for (int j = 0; j < f_t->grpp.nener; j++)
 225         {
 226             f_t->grpp.ener[i][j] = 0;
 227         }
 228     }
 229     for (int i = 0; i < efptNR; i++)
 230     {
 231         f_t->dvdl[i] = 0;
 232     }
 233 }
 234
 235 /*! \brief The max thread number is arbitrary, we used a fixed number
 236  * to avoid memory management.  Using more than 16 threads is probably
 237  * never useful performance wise. */
 238 #define MAX_BONDED_THREADS 256
 239
 240 /*! \brief Reduce thread-local force buffers */
 241 void reduce_thread_forces(gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
 242 {
 243     if (nthreads > MAX_BONDED_THREADS)
 244     {
 245         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
 246     }
 247
 248     rvec* gmx_restrict f = as_rvec_array(force.data());
 249
 250     const int numAtomsForce = bt->numAtomsForce;
 251
 252     /* This reduction can run on any number of threads,
 253      * independently of bt->nthreads.
 254      * But if nthreads matches bt->nthreads (which it currently does)
 255      * the uniform distribution of the touched blocks over nthreads will
 256      * match the distribution of bonded over threads well in most cases,
 257      * which means that threads mostly reduce their own data which increases
 258      * the number of cache hits.
 259      */
 260 #pragma omp parallel for num_threads(nthreads) schedule(static)
 261     for (int b = 0; b < bt->nblock_used; b++)
 262     {
 263         try
 264         {
 265             int    ind = bt->block_index[b];
 266             rvec4* fp[MAX_BONDED_THREADS];
 267
 268             /* Determine which threads contribute to this block */
 269             int nfb = 0;
 270             for (int ft = 0; ft < bt->nthreads; ft++)
 271             {
 272                 if (bitmask_is_set(bt->mask[ind], ft))
 273                 {
 274                     fp[nfb++] = bt->f_t[ft]->f;
 275                 }
 276             }
 277             if (nfb > 0)
 278             {
 279                 /* Reduce force buffers for threads that contribute */
 280                 int a0 = ind * reduction_block_size;
 281                 int a1 = (ind + 1) * reduction_block_size;
 282                 /* It would be nice if we could pad f to avoid this min */
 283                 a1 = std::min(a1, numAtomsForce);
 284                 for (int a = a0; a < a1; a++)
 285                 {
 286                     for (int fb = 0; fb < nfb; fb++)
 287                     {
 288                         rvec_inc(f[a], fp[fb][a]);
 289                     }
 290                 }
 291             }
 292         }
 293         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 294     }
 295 }
 296
 297 /*! \brief Reduce thread-local forces, shift forces and energies */
 298 void reduce_thread_output(gmx::ForceWithShiftForces* forceWithShiftForces,
 299                           real*                      ener,
 300                           gmx_grppairener_t*         grpp,
 301                           real*                      dvdl,
 302                           const bonded_threading_t*  bt,
 303                           const gmx::StepWorkload&   stepWork)
 304 {
 305     assert(bt->haveBondeds);
 306
 307     if (bt->nblock_used > 0)
 308     {
 309         /* Reduce the bonded force buffer */
 310         reduce_thread_forces(forceWithShiftForces->force(), bt, bt->nthreads);
 311     }
 312
 313     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
 314
 315     /* When necessary, reduce energy and virial using one thread only */
 316     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
 317     {
 318         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
 319
 320         if (stepWork.computeVirial)
 321         {
 322             for (int i = 0; i < SHIFTS; i++)
 323             {
 324                 for (int t = 1; t < bt->nthreads; t++)
 325                 {
 326                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
 327                 }
 328             }
 329         }
 330         if (stepWork.computeEnergy)
 331         {
 332             for (int i = 0; i < F_NRE; i++)
 333             {
 334                 for (int t = 1; t < bt->nthreads; t++)
 335                 {
 336                     ener[i] += f_t[t]->ener[i];
 337                 }
 338             }
 339             for (int i = 0; i < egNR; i++)
 340             {
 341                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
 342                 {
 343                     for (int t = 1; t < bt->nthreads; t++)
 344                     {
 345                         grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
 346                     }
 347                 }
 348             }
 349         }
 350         if (stepWork.computeDhdl)
 351         {
 352             for (int i = 0; i < efptNR; i++)
 353             {
 354
 355                 for (int t = 1; t < bt->nthreads; t++)
 356                 {
 357                     dvdl[i] += f_t[t]->dvdl[i];
 358                 }
 359             }
 360         }
 361     }
 362 }
 363
 364 /*! \brief Returns the bonded kernel flavor
 365  *
 366  * Note that energies are always requested when the virial
 367  * is requested (performance gain would be small).
 368  * Note that currently we do not have bonded kernels that
 369  * do not compute forces.
 370  */
 371 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
 372                                             const bool               useSimdKernels,
 373                                             const bool               havePerturbedInteractions)
 374 {
 375     BondedKernelFlavor flavor;
 376     if (stepWork.computeEnergy || stepWork.computeVirial)
 377     {
 378         if (stepWork.computeVirial)
 379         {
 380             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
 381         }
 382         else
 383         {
 384             flavor = BondedKernelFlavor::ForcesAndEnergy;
 385         }
 386     }
 387     else
 388     {
 389         if (useSimdKernels && !havePerturbedInteractions)
 390         {
 391             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
 392         }
 393         else
 394         {
 395             flavor = BondedKernelFlavor::ForcesNoSimd;
 396         }
 397     }
 398
 399     return flavor;
 400 }
 401
 402 /*! \brief Calculate one element of the list of bonded interactions
 403     for this thread */
 404 real calc_one_bond(int                           thread,
 405                    int                           ftype,
 406                    const InteractionDefinitions& idef,
 407                    ArrayRef<const int>           iatoms,
 408                    const int                     numNonperturbedInteractions,
 409                    const WorkDivision&           workDivision,
 410                    const rvec                    x[],
 411                    rvec4                         f[],
 412                    rvec                          fshift[],
 413                    const t_forcerec*             fr,
 414                    const t_pbc*                  pbc,
 415                    gmx_grppairener_t*            grpp,
 416                    t_nrnb*                       nrnb,
 417                    const real*                   lambda,
 418                    real*                         dvdl,
 419                    const t_mdatoms*              md,
 420                    t_fcdata*                     fcd,
 421                    const gmx::StepWorkload&      stepWork,
 422                    int*                          global_atom_index)
 423 {
 424     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
 425                "The topology should be marked either as no FE or sorted on FE");
 426
 427     const bool havePerturbedInteractions =
 428             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
 429     BondedKernelFlavor flavor =
 430             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
 431     int efptFTYPE;
 432     if (IS_RESTRAINT_TYPE(ftype))
 433     {
 434         efptFTYPE = efptRESTRAINT;
 435     }
 436     else
 437     {
 438         efptFTYPE = efptBONDED;
 439     }
 440
 441     const int nat1   = interaction_function[ftype].nratoms + 1;
 442     const int nbonds = iatoms.ssize() / nat1;
 443
 444     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
 445                "The thread division should match the topology");
 446
 447     const int nb0 = workDivision.bound(ftype, thread);
 448     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
 449
 450     ArrayRef<const t_iparams> iparams = idef.iparams;
 451
 452     real v = 0;
 453     if (!isPairInteraction(ftype))
 454     {
 455         if (ftype == F_CMAP)
 456         {
 457             /* TODO The execution time for CMAP dihedrals might be
 458                nice to account to its own subtimer, but first
 459                wallcycle needs to be extended to support calling from
 460                multiple threads. */
 461             v = cmap_dihs(nbn,
 462                           iatoms.data() + nb0,
 463                           iparams.data(),
 464                           &idef.cmap_grid,
 465                           x,
 466                           f,
 467                           fshift,
 468                           pbc,
 469                           lambda[efptFTYPE],
 470                           &(dvdl[efptFTYPE]),
 471                           md,
 472                           fcd,
 473                           global_atom_index);
 474         }
 475         else
 476         {
 477             v = calculateSimpleBond(ftype,
 478                                     nbn,
 479                                     iatoms.data() + nb0,
 480                                     iparams.data(),
 481                                     x,
 482                                     f,
 483                                     fshift,
 484                                     pbc,
 485                                     lambda[efptFTYPE],
 486                                     &(dvdl[efptFTYPE]),
 487                                     md,
 488                                     fcd,
 489                                     global_atom_index,
 490                                     flavor);
 491         }
 492     }
 493     else
 494     {
 495         /* TODO The execution time for pairs might be nice to account
 496            to its own subtimer, but first wallcycle needs to be
 497            extended to support calling from multiple threads. */
 498         do_pairs(ftype,
 499                  nbn,
 500                  iatoms.data() + nb0,
 501                  iparams.data(),
 502                  x,
 503                  f,
 504                  fshift,
 505                  pbc,
 506                  lambda,
 507                  dvdl,
 508                  md,
 509                  fr,
 510                  havePerturbedInteractions,
 511                  stepWork,
 512                  grpp,
 513                  global_atom_index);
 514     }
 515
 516     if (thread == 0)
 517     {
 518         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
 519     }
 520
 521     return v;
 522 }
 523
 524 } // namespace
 525
 526 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
 527  */
 528 static void calcBondedForces(const InteractionDefinitions& idef,
 529                              bonded_threading_t*           bt,
 530                              const rvec                    x[],
 531                              const t_forcerec*             fr,
 532                              const t_pbc*                  pbc_null,
 533                              rvec*                         fshiftMasterBuffer,
 534                              gmx_enerdata_t*               enerd,
 535                              t_nrnb*                       nrnb,
 536                              const real*                   lambda,
 537                              real*                         dvdl,
 538                              const t_mdatoms*              md,
 539                              t_fcdata*                     fcd,
 540                              const gmx::StepWorkload&      stepWork,
 541                              int*                          global_atom_index)
 542 {
 543 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
 544     for (int thread = 0; thread < bt->nthreads; thread++)
 545     {
 546         try
 547         {
 548             f_thread_t& threadBuffers = *bt->f_t[thread];
 549             int         ftype;
 550             real *      epot, v;
 551             /* thread stuff */
 552             rvec*              fshift;
 553             real*              dvdlt;
 554             gmx_grppairener_t* grpp;
 555
 556             zero_thread_output(&threadBuffers);
 557
 558             rvec4* ft = threadBuffers.f;
 559
 560             /* Thread 0 writes directly to the main output buffers.
 561              * We might want to reconsider this.
 562              */
 563             if (thread == 0)
 564             {
 565                 fshift = fshiftMasterBuffer;
 566                 epot   = enerd->term;
 567                 grpp   = &enerd->grpp;
 568                 dvdlt  = dvdl;
 569             }
 570             else
 571             {
 572                 fshift = as_rvec_array(threadBuffers.fshift.data());
 573                 epot   = threadBuffers.ener;
 574                 grpp   = &threadBuffers.grpp;
 575                 dvdlt  = threadBuffers.dvdl;
 576             }
 577             /* Loop over all bonded force types to calculate the bonded forces */
 578             for (ftype = 0; (ftype < F_NRE); ftype++)
 579             {
 580                 const InteractionList& ilist = idef.il[ftype];
 581                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
 582                 {
 583                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
 584                     v                          = calc_one_bond(thread,
 585                                       ftype,
 586                                       idef,
 587                                       iatoms,
 588                                       idef.numNonperturbedInteractions[ftype],
 589                                       bt->workDivision,
 590                                       x,
 591                                       ft,
 592                                       fshift,
 593                                       fr,
 594                                       pbc_null,
 595                                       grpp,
 596                                       nrnb,
 597                                       lambda,
 598                                       dvdlt,
 599                                       md,
 600                                       fcd,
 601                                       stepWork,
 602                                       global_atom_index);
 603                     epot[ftype] += v;
 604                 }
 605             }
 606         }
 607         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 608     }
 609 }
 610
 611 bool ListedForces::haveRestraints(const t_fcdata& fcdata) const
 612 {
 613     GMX_ASSERT(fcdata.orires && fcdata.disres, "NMR restraints objects should be set up");
 614
 615     return (!idef_->il[F_POSRES].empty() || !idef_->il[F_FBPOSRES].empty() || fcdata.orires->nr > 0
 616             || fcdata.disres->nres > 0);
 617 }
 618
 619 bool ListedForces::haveCpuBondeds() const
 620 {
 621     return threading_->haveBondeds;
 622 }
 623
 624 bool ListedForces::haveCpuListedForces(const t_fcdata& fcdata) const
 625 {
 626     return haveCpuBondeds() || haveRestraints(fcdata);
 627 }
 628
 629 namespace
 630 {
 631
 632 /*! \brief Calculates all listed force interactions. */
 633 void calc_listed(struct gmx_wallcycle*         wcycle,
 634                  const InteractionDefinitions& idef,
 635                  bonded_threading_t*           bt,
 636                  const rvec                    x[],
 637                  gmx::ForceOutputs*            forceOutputs,
 638                  const t_forcerec*             fr,
 639                  const t_pbc*                  pbc,
 640                  gmx_enerdata_t*               enerd,
 641                  t_nrnb*                       nrnb,
 642                  const real*                   lambda,
 643                  const t_mdatoms*              md,
 644                  t_fcdata*                     fcd,
 645                  int*                          global_atom_index,
 646                  const gmx::StepWorkload&      stepWork)
 647 {
 648     if (bt->haveBondeds)
 649     {
 650         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
 651
 652         wallcycle_sub_start(wcycle, ewcsLISTED);
 653         /* The dummy array is to have a place to store the dhdl at other values
 654            of lambda, which will be thrown away in the end */
 655         real dvdl[efptNR] = { 0 };
 656         calcBondedForces(idef,
 657                          bt,
 658                          x,
 659                          fr,
 660                          fr->bMolPBC ? pbc : nullptr,
 661                          as_rvec_array(forceWithShiftForces.shiftForces().data()),
 662                          enerd,
 663                          nrnb,
 664                          lambda,
 665                          dvdl,
 666                          md,
 667                          fcd,
 668                          stepWork,
 669                          global_atom_index);
 670         wallcycle_sub_stop(wcycle, ewcsLISTED);
 671
 672         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
 673         reduce_thread_output(&forceWithShiftForces, enerd->term, &enerd->grpp, dvdl, bt, stepWork);
 674
 675         if (stepWork.computeDhdl)
 676         {
 677             for (int i = 0; i < efptNR; i++)
 678             {
 679                 enerd->dvdl_nonlin[i] += dvdl[i];
 680             }
 681         }
 682         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
 683     }
 684
 685     /* Copy the sum of violations for the distance restraints from fcd */
 686     if (fcd)
 687     {
 688         enerd->term[F_DISRESVIOL] = fcd->disres->sumviol;
 689     }
 690 }
 691
 692 /*! \brief As calc_listed(), but only determines the potential energy
 693  * for the perturbed interactions.
 694  *
 695  * The shift forces in fr are not affected.
 696  */
 697 void calc_listed_lambda(const InteractionDefinitions& idef,
 698                         bonded_threading_t*           bt,
 699                         const rvec                    x[],
 700                         const t_forcerec*             fr,
 701                         const struct t_pbc*           pbc,
 702                         gmx::ArrayRef<real>           forceBufferLambda,
 703                         gmx::ArrayRef<gmx::RVec>      shiftForceBufferLambda,
 704                         gmx_grppairener_t*            grpp,
 705                         real*                         epot,
 706                         gmx::ArrayRef<real>           dvdl,
 707                         t_nrnb*                       nrnb,
 708                         const real*                   lambda,
 709                         const t_mdatoms*              md,
 710                         t_fcdata*                     fcd,
 711                         int*                          global_atom_index)
 712 {
 713     WorkDivision& workDivision = bt->foreignLambdaWorkDivision;
 714
 715     const t_pbc* pbc_null;
 716     if (fr->bMolPBC)
 717     {
 718         pbc_null = pbc;
 719     }
 720     else
 721     {
 722         pbc_null = nullptr;
 723     }
 724
 725     /* We already have the forces, so we use temp buffers here */
 726     std::fill(forceBufferLambda.begin(), forceBufferLambda.end(), 0.0_real);
 727     std::fill(shiftForceBufferLambda.begin(),
 728               shiftForceBufferLambda.end(),
 729               gmx::RVec{ 0.0_real, 0.0_real, 0.0_real });
 730     rvec4* f      = reinterpret_cast<rvec4*>(forceBufferLambda.data());
 731     rvec*  fshift = as_rvec_array(shiftForceBufferLambda.data());
 732
 733     /* Loop over all bonded force types to calculate the bonded energies */
 734     for (int ftype = 0; (ftype < F_NRE); ftype++)
 735     {
 736         if (ftype_is_bonded_potential(ftype))
 737         {
 738             const InteractionList& ilist = idef.il[ftype];
 739             /* Create a temporary iatom list with only perturbed interactions */
 740             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
 741             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
 742                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
 743             if (!iatomsPerturbed.empty())
 744             {
 745                 /* Set the work range of thread 0 to the perturbed bondeds */
 746                 workDivision.setBound(ftype, 0, 0);
 747                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
 748
 749                 gmx::StepWorkload tempFlags;
 750                 tempFlags.computeEnergy = true;
 751                 real v                  = calc_one_bond(0,
 752                                        ftype,
 753                                        idef,
 754                                        iatomsPerturbed,
 755                                        iatomsPerturbed.ssize(),
 756                                        workDivision,
 757                                        x,
 758                                        f,
 759                                        fshift,
 760                                        fr,
 761                                        pbc_null,
 762                                        grpp,
 763                                        nrnb,
 764                                        lambda,
 765                                        dvdl.data(),
 766                                        md,
 767                                        fcd,
 768                                        tempFlags,
 769                                        global_atom_index);
 770                 epot[ftype] += v;
 771             }
 772         }
 773     }
 774 }
 775
 776 } // namespace
 777
 778 void ListedForces::calculate(struct gmx_wallcycle*                     wcycle,
 779                              const matrix                              box,
 780                              const t_lambda*                           fepvals,
 781                              const t_commrec*                          cr,
 782                              const gmx_multisim_t*                     ms,
 783                              gmx::ArrayRefWithPadding<const gmx::RVec> coordinates,
 784                              gmx::ArrayRef<const gmx::RVec>            xWholeMolecules,
 785                              t_fcdata*                                 fcdata,
 786                              const history_t*                          hist,
 787                              gmx::ForceOutputs*                        forceOutputs,
 788                              const t_forcerec*                         fr,
 789                              const struct t_pbc*                       pbc,
 790                              gmx_enerdata_t*                           enerd,
 791                              t_nrnb*                                   nrnb,
 792                              const real*                               lambda,
 793                              const t_mdatoms*                          md,
 794                              int*                                      global_atom_index,
 795                              const gmx::StepWorkload&                  stepWork)
 796 {
 797     if (interactionSelection_.none() || !stepWork.computeListedForces)
 798     {
 799         return;
 800     }
 801
 802     const InteractionDefinitions& idef = *idef_;
 803
 804     // Todo: replace all rvec use here with ArrayRefWithPadding
 805     const rvec* x = as_rvec_array(coordinates.paddedArrayRef().data());
 806
 807     const bool calculateRestInteractions =
 808             interactionSelection_.test(static_cast<int>(ListedForces::InteractionGroup::Rest));
 809
 810     t_pbc pbc_full; /* Full PBC is needed for position restraints */
 811     if (calculateRestInteractions && haveRestraints(*fcdata))
 812     {
 813         if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
 814         {
 815             /* Not enough flops to bother counting */
 816             set_pbc(&pbc_full, fr->pbcType, box);
 817         }
 818
 819         /* TODO Use of restraints triggers further function calls
 820            inside the loop over calc_one_bond(), but those are too
 821            awkward to account to this subtimer properly in the present
 822            code. We don't test / care much about performance with
 823            restraints, anyway. */
 824         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
 825
 826         if (!idef.il[F_POSRES].empty())
 827         {
 828             posres_wrapper(nrnb, idef, &pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
 829         }
 830
 831         if (!idef.il[F_FBPOSRES].empty())
 832         {
 833             fbposres_wrapper(nrnb, idef, &pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
 834         }
 835
 836         /* Do pre force calculation stuff which might require communication */
 837         if (fcdata->orires->nr > 0)
 838         {
 839             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
 840             enerd->term[F_ORIRESDEV] = calc_orires_dev(ms,
 841                                                        idef.il[F_ORIRES].size(),
 842                                                        idef.il[F_ORIRES].iatoms.data(),
 843                                                        idef.iparams.data(),
 844                                                        md,
 845                                                        xWholeMolecules,
 846                                                        x,
 847                                                        fr->bMolPBC ? pbc : nullptr,
 848                                                        fcdata->orires,
 849                                                        hist);
 850         }
 851         if (fcdata->disres->nres > 0)
 852         {
 853             calc_disres_R_6(cr,
 854                             ms,
 855                             idef.il[F_DISRES].size(),
 856                             idef.il[F_DISRES].iatoms.data(),
 857                             x,
 858                             fr->bMolPBC ? pbc : nullptr,
 859                             fcdata->disres,
 860                             hist);
 861         }
 862
 863         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
 864     }
 865
 866     calc_listed(wcycle, idef, threading_.get(), x, forceOutputs, fr, pbc, enerd, nrnb, lambda, md, fcdata, global_atom_index, stepWork);
 867
 868     /* Check if we have to determine energy differences
 869      * at foreign lambda's.
 870      */
 871     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
 872     {
 873         real dvdl[efptNR] = { 0 };
 874         if (!idef.il[F_POSRES].empty())
 875         {
 876             posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
 877         }
 878         if (idef.ilsort != ilsortNO_FE)
 879         {
 880             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
 881             if (idef.ilsort != ilsortFE_SORTED)
 882             {
 883                 gmx_incons("The bonded interactions are not sorted for free energy");
 884             }
 885             for (int i = 0; i < 1 + enerd->foreignLambdaTerms.numLambdas(); i++)
 886             {
 887                 real lam_i[efptNR];
 888
 889                 reset_foreign_enerdata(enerd);
 890                 for (int j = 0; j < efptNR; j++)
 891                 {
 892                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
 893                 }
 894                 calc_listed_lambda(idef,
 895                                    threading_.get(),
 896                                    x,
 897                                    fr,
 898                                    pbc,
 899                                    forceBufferLambda_,
 900                                    shiftForceBufferLambda_,
 901                                    &(enerd->foreign_grpp),
 902                                    enerd->foreign_term,
 903                                    dvdl,
 904                                    nrnb,
 905                                    lam_i,
 906                                    md,
 907                                    fcdata,
 908                                    global_atom_index);
 909                 sum_epot(enerd->foreign_grpp, enerd->foreign_term);
 910                 const double dvdlSum = std::accumulate(std::begin(dvdl), std::end(dvdl), 0.);
 911                 std::fill(std::begin(dvdl), std::end(dvdl), 0.0);
 912                 enerd->foreignLambdaTerms.accumulate(i, enerd->foreign_term[F_EPOT], dvdlSum);
 913             }
 914             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
 915         }
 916     }
 917 }