src/gromacs/listed_forces/listed_forces.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 /*! \internal \file
  37  *
  38  * \brief This file defines high-level functions for mdrun to compute
  39  * energies and forces for listed interactions.
  40  *
  41  * \author Mark Abraham <mark.j.abraham@gmail.com>
  42  *
  43  * \ingroup module_listed_forces
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "listed_forces.h"
  48
  49 #include <cassert>
  50
  51 #include <algorithm>
  52 #include <array>
  53
  54 #include "gromacs/gmxlib/network.h"
  55 #include "gromacs/gmxlib/nrnb.h"
  56 #include "gromacs/listed_forces/bonded.h"
  57 #include "gromacs/listed_forces/disre.h"
  58 #include "gromacs/listed_forces/orires.h"
  59 #include "gromacs/listed_forces/pairs.h"
  60 #include "gromacs/listed_forces/position_restraints.h"
  61 #include "gromacs/math/vec.h"
  62 #include "gromacs/mdlib/enerdata_utils.h"
  63 #include "gromacs/mdlib/force.h"
  64 #include "gromacs/mdtypes/commrec.h"
  65 #include "gromacs/mdtypes/fcdata.h"
  66 #include "gromacs/mdtypes/forceoutput.h"
  67 #include "gromacs/mdtypes/forcerec.h"
  68 #include "gromacs/mdtypes/inputrec.h"
  69 #include "gromacs/mdtypes/md_enums.h"
  70 #include "gromacs/mdtypes/simulation_workload.h"
  71 #include "gromacs/pbcutil/ishift.h"
  72 #include "gromacs/pbcutil/pbc.h"
  73 #include "gromacs/timing/wallcycle.h"
  74 #include "gromacs/topology/topology.h"
  75 #include "gromacs/utility/exceptions.h"
  76 #include "gromacs/utility/fatalerror.h"
  77 #include "gromacs/utility/smalloc.h"
  78
  79 #include "listed_internal.h"
  80 #include "utilities.h"
  81
  82 namespace
  83 {
  84
  85 using gmx::ArrayRef;
  86
  87 /*! \brief Return true if ftype is an explicit pair-listed LJ or
  88  * COULOMB interaction type: bonded LJ (usually 1-4), or special
  89  * listed non-bonded for FEP. */
  90 bool isPairInteraction(int ftype)
  91 {
  92     return ((ftype) >= F_LJ14 && (ftype) <= F_LJC_PAIRS_NB);
  93 }
  94
  95 /*! \brief Zero thread-local output buffers */
  96 void zero_thread_output(f_thread_t* f_t)
  97 {
  98     constexpr int nelem_fa = sizeof(f_t->f[0]) / sizeof(real);
  99
 100     for (int i = 0; i < f_t->nblock_used; i++)
 101     {
 102         int a0 = f_t->block_index[i] * reduction_block_size;
 103         int a1 = a0 + reduction_block_size;
 104         for (int a = a0; a < a1; a++)
 105         {
 106             for (int d = 0; d < nelem_fa; d++)
 107             {
 108                 f_t->f[a][d] = 0;
 109             }
 110         }
 111     }
 112
 113     for (int i = 0; i < SHIFTS; i++)
 114     {
 115         clear_rvec(f_t->fshift[i]);
 116     }
 117     for (int i = 0; i < F_NRE; i++)
 118     {
 119         f_t->ener[i] = 0;
 120     }
 121     for (int i = 0; i < egNR; i++)
 122     {
 123         for (int j = 0; j < f_t->grpp.nener; j++)
 124         {
 125             f_t->grpp.ener[i][j] = 0;
 126         }
 127     }
 128     for (int i = 0; i < efptNR; i++)
 129     {
 130         f_t->dvdl[i] = 0;
 131     }
 132 }
 133
 134 /*! \brief The max thread number is arbitrary, we used a fixed number
 135  * to avoid memory management.  Using more than 16 threads is probably
 136  * never useful performance wise. */
 137 #define MAX_BONDED_THREADS 256
 138
 139 /*! \brief Reduce thread-local force buffers */
 140 void reduce_thread_forces(int n, gmx::ArrayRef<gmx::RVec> force, const bonded_threading_t* bt, int nthreads)
 141 {
 142     if (nthreads > MAX_BONDED_THREADS)
 143     {
 144         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads", MAX_BONDED_THREADS);
 145     }
 146
 147     rvec* gmx_restrict f = as_rvec_array(force.data());
 148
 149     /* This reduction can run on any number of threads,
 150      * independently of bt->nthreads.
 151      * But if nthreads matches bt->nthreads (which it currently does)
 152      * the uniform distribution of the touched blocks over nthreads will
 153      * match the distribution of bonded over threads well in most cases,
 154      * which means that threads mostly reduce their own data which increases
 155      * the number of cache hits.
 156      */
 157 #pragma omp parallel for num_threads(nthreads) schedule(static)
 158     for (int b = 0; b < bt->nblock_used; b++)
 159     {
 160         try
 161         {
 162             int    ind = bt->block_index[b];
 163             rvec4* fp[MAX_BONDED_THREADS];
 164
 165             /* Determine which threads contribute to this block */
 166             int nfb = 0;
 167             for (int ft = 0; ft < bt->nthreads; ft++)
 168             {
 169                 if (bitmask_is_set(bt->mask[ind], ft))
 170                 {
 171                     fp[nfb++] = bt->f_t[ft]->f;
 172                 }
 173             }
 174             if (nfb > 0)
 175             {
 176                 /* Reduce force buffers for threads that contribute */
 177                 int a0 = ind * reduction_block_size;
 178                 int a1 = (ind + 1) * reduction_block_size;
 179                 /* It would be nice if we could pad f to avoid this min */
 180                 a1 = std::min(a1, n);
 181                 for (int a = a0; a < a1; a++)
 182                 {
 183                     for (int fb = 0; fb < nfb; fb++)
 184                     {
 185                         rvec_inc(f[a], fp[fb][a]);
 186                     }
 187                 }
 188             }
 189         }
 190         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 191     }
 192 }
 193
 194 /*! \brief Reduce thread-local forces, shift forces and energies */
 195 void reduce_thread_output(int                        n,
 196                           gmx::ForceWithShiftForces* forceWithShiftForces,
 197                           real*                      ener,
 198                           gmx_grppairener_t*         grpp,
 199                           real*                      dvdl,
 200                           const bonded_threading_t*  bt,
 201                           const gmx::StepWorkload&   stepWork)
 202 {
 203     assert(bt->haveBondeds);
 204
 205     if (bt->nblock_used > 0)
 206     {
 207         /* Reduce the bonded force buffer */
 208         reduce_thread_forces(n, forceWithShiftForces->force(), bt, bt->nthreads);
 209     }
 210
 211     rvec* gmx_restrict fshift = as_rvec_array(forceWithShiftForces->shiftForces().data());
 212
 213     /* When necessary, reduce energy and virial using one thread only */
 214     if ((stepWork.computeEnergy || stepWork.computeVirial || stepWork.computeDhdl) && bt->nthreads > 1)
 215     {
 216         gmx::ArrayRef<const std::unique_ptr<f_thread_t>> f_t = bt->f_t;
 217
 218         if (stepWork.computeVirial)
 219         {
 220             for (int i = 0; i < SHIFTS; i++)
 221             {
 222                 for (int t = 1; t < bt->nthreads; t++)
 223                 {
 224                     rvec_inc(fshift[i], f_t[t]->fshift[i]);
 225                 }
 226             }
 227         }
 228         if (stepWork.computeEnergy)
 229         {
 230             for (int i = 0; i < F_NRE; i++)
 231             {
 232                 for (int t = 1; t < bt->nthreads; t++)
 233                 {
 234                     ener[i] += f_t[t]->ener[i];
 235                 }
 236             }
 237             for (int i = 0; i < egNR; i++)
 238             {
 239                 for (int j = 0; j < f_t[1]->grpp.nener; j++)
 240                 {
 241                     for (int t = 1; t < bt->nthreads; t++)
 242                     {
 243                         grpp->ener[i][j] += f_t[t]->grpp.ener[i][j];
 244                     }
 245                 }
 246             }
 247         }
 248         if (stepWork.computeDhdl)
 249         {
 250             for (int i = 0; i < efptNR; i++)
 251             {
 252
 253                 for (int t = 1; t < bt->nthreads; t++)
 254                 {
 255                     dvdl[i] += f_t[t]->dvdl[i];
 256                 }
 257             }
 258         }
 259     }
 260 }
 261
 262 /*! \brief Returns the bonded kernel flavor
 263  *
 264  * Note that energies are always requested when the virial
 265  * is requested (performance gain would be small).
 266  * Note that currently we do not have bonded kernels that
 267  * do not compute forces.
 268  */
 269 BondedKernelFlavor selectBondedKernelFlavor(const gmx::StepWorkload& stepWork,
 270                                             const bool               useSimdKernels,
 271                                             const bool               havePerturbedInteractions)
 272 {
 273     BondedKernelFlavor flavor;
 274     if (stepWork.computeEnergy || stepWork.computeVirial)
 275     {
 276         if (stepWork.computeVirial)
 277         {
 278             flavor = BondedKernelFlavor::ForcesAndVirialAndEnergy;
 279         }
 280         else
 281         {
 282             flavor = BondedKernelFlavor::ForcesAndEnergy;
 283         }
 284     }
 285     else
 286     {
 287         if (useSimdKernels && !havePerturbedInteractions)
 288         {
 289             flavor = BondedKernelFlavor::ForcesSimdWhenAvailable;
 290         }
 291         else
 292         {
 293             flavor = BondedKernelFlavor::ForcesNoSimd;
 294         }
 295     }
 296
 297     return flavor;
 298 }
 299
 300 /*! \brief Calculate one element of the list of bonded interactions
 301     for this thread */
 302 real calc_one_bond(int                           thread,
 303                    int                           ftype,
 304                    const InteractionDefinitions& idef,
 305                    ArrayRef<const int>           iatoms,
 306                    const int                     numNonperturbedInteractions,
 307                    const WorkDivision&           workDivision,
 308                    const rvec                    x[],
 309                    rvec4                         f[],
 310                    rvec                          fshift[],
 311                    const t_forcerec*             fr,
 312                    const t_pbc*                  pbc,
 313                    const t_graph*                g,
 314                    gmx_grppairener_t*            grpp,
 315                    t_nrnb*                       nrnb,
 316                    const real*                   lambda,
 317                    real*                         dvdl,
 318                    const t_mdatoms*              md,
 319                    t_fcdata*                     fcd,
 320                    const gmx::StepWorkload&      stepWork,
 321                    int*                          global_atom_index)
 322 {
 323     GMX_ASSERT(idef.ilsort == ilsortNO_FE || idef.ilsort == ilsortFE_SORTED,
 324                "The topology should be marked either as no FE or sorted on FE");
 325
 326     const bool havePerturbedInteractions =
 327             (idef.ilsort == ilsortFE_SORTED && numNonperturbedInteractions < iatoms.ssize());
 328     BondedKernelFlavor flavor =
 329             selectBondedKernelFlavor(stepWork, fr->use_simd_kernels, havePerturbedInteractions);
 330     int efptFTYPE;
 331     if (IS_RESTRAINT_TYPE(ftype))
 332     {
 333         efptFTYPE = efptRESTRAINT;
 334     }
 335     else
 336     {
 337         efptFTYPE = efptBONDED;
 338     }
 339
 340     const int nat1   = interaction_function[ftype].nratoms + 1;
 341     const int nbonds = iatoms.ssize() / nat1;
 342
 343     GMX_ASSERT(fr->gpuBonded != nullptr || workDivision.end(ftype) == iatoms.ssize(),
 344                "The thread division should match the topology");
 345
 346     const int nb0 = workDivision.bound(ftype, thread);
 347     const int nbn = workDivision.bound(ftype, thread + 1) - nb0;
 348
 349     ArrayRef<const t_iparams> iparams = idef.iparams;
 350
 351     real v = 0;
 352     if (!isPairInteraction(ftype))
 353     {
 354         if (ftype == F_CMAP)
 355         {
 356             /* TODO The execution time for CMAP dihedrals might be
 357                nice to account to its own subtimer, but first
 358                wallcycle needs to be extended to support calling from
 359                multiple threads. */
 360             v = cmap_dihs(nbn, iatoms.data() + nb0, iparams.data(), &idef.cmap_grid, x, f, fshift,
 361                           pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd, global_atom_index);
 362         }
 363         else
 364         {
 365             v = calculateSimpleBond(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift,
 366                                     pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
 367                                     global_atom_index, flavor);
 368         }
 369     }
 370     else
 371     {
 372         /* TODO The execution time for pairs might be nice to account
 373            to its own subtimer, but first wallcycle needs to be
 374            extended to support calling from multiple threads. */
 375         do_pairs(ftype, nbn, iatoms.data() + nb0, iparams.data(), x, f, fshift, pbc, g, lambda,
 376                  dvdl, md, fr, havePerturbedInteractions, stepWork, grpp, global_atom_index);
 377     }
 378
 379     if (thread == 0)
 380     {
 381         inc_nrnb(nrnb, nrnbIndex(ftype), nbonds);
 382     }
 383
 384     return v;
 385 }
 386
 387 } // namespace
 388
 389 /*! \brief Compute the bonded part of the listed forces, parallelized over threads
 390  */
 391 static void calcBondedForces(const InteractionDefinitions& idef,
 392                              const rvec                    x[],
 393                              const t_forcerec*             fr,
 394                              const t_pbc*                  pbc_null,
 395                              const t_graph*                g,
 396                              rvec*                         fshiftMasterBuffer,
 397                              gmx_enerdata_t*               enerd,
 398                              t_nrnb*                       nrnb,
 399                              const real*                   lambda,
 400                              real*                         dvdl,
 401                              const t_mdatoms*              md,
 402                              t_fcdata*                     fcd,
 403                              const gmx::StepWorkload&      stepWork,
 404                              int*                          global_atom_index)
 405 {
 406     bonded_threading_t* bt = fr->bondedThreading;
 407
 408 #pragma omp parallel for num_threads(bt->nthreads) schedule(static)
 409     for (int thread = 0; thread < bt->nthreads; thread++)
 410     {
 411         try
 412         {
 413             f_thread_t& threadBuffers = *bt->f_t[thread];
 414             int         ftype;
 415             real *      epot, v;
 416             /* thread stuff */
 417             rvec*              fshift;
 418             real*              dvdlt;
 419             gmx_grppairener_t* grpp;
 420
 421             zero_thread_output(&threadBuffers);
 422
 423             rvec4* ft = threadBuffers.f;
 424
 425             /* Thread 0 writes directly to the main output buffers.
 426              * We might want to reconsider this.
 427              */
 428             if (thread == 0)
 429             {
 430                 fshift = fshiftMasterBuffer;
 431                 epot   = enerd->term;
 432                 grpp   = &enerd->grpp;
 433                 dvdlt  = dvdl;
 434             }
 435             else
 436             {
 437                 fshift = threadBuffers.fshift;
 438                 epot   = threadBuffers.ener;
 439                 grpp   = &threadBuffers.grpp;
 440                 dvdlt  = threadBuffers.dvdl;
 441             }
 442             /* Loop over all bonded force types to calculate the bonded forces */
 443             for (ftype = 0; (ftype < F_NRE); ftype++)
 444             {
 445                 const InteractionList& ilist = idef.il[ftype];
 446                 if (!ilist.empty() && ftype_is_bonded_potential(ftype))
 447                 {
 448                     ArrayRef<const int> iatoms = gmx::makeConstArrayRef(ilist.iatoms);
 449                     v                          = calc_one_bond(
 450                             thread, ftype, idef, iatoms, idef.numNonperturbedInteractions[ftype],
 451                             fr->bondedThreading->workDivision, x, ft, fshift, fr, pbc_null, g, grpp,
 452                             nrnb, lambda, dvdlt, md, fcd, stepWork, global_atom_index);
 453                     epot[ftype] += v;
 454                 }
 455             }
 456         }
 457         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 458     }
 459 }
 460
 461 bool haveRestraints(const InteractionDefinitions& idef, const t_fcdata& fcd)
 462 {
 463     return (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty() || fcd.orires.nr > 0
 464             || fcd.disres.nres > 0);
 465 }
 466
 467 bool haveCpuBondeds(const t_forcerec& fr)
 468 {
 469     return fr.bondedThreading->haveBondeds;
 470 }
 471
 472 bool haveCpuListedForces(const t_forcerec& fr, const InteractionDefinitions& idef, const t_fcdata& fcd)
 473 {
 474     return haveCpuBondeds(fr) || haveRestraints(idef, fcd);
 475 }
 476
 477 namespace
 478 {
 479
 480 /*! \brief Calculates all listed force interactions.
 481  *
 482  * Note that pbc_full is used only for position restraints, and is
 483  * not initialized if there are none.
 484  */
 485 void calc_listed(const t_commrec*              cr,
 486                  const gmx_multisim_t*         ms,
 487                  struct gmx_wallcycle*         wcycle,
 488                  const InteractionDefinitions& idef,
 489                  const rvec                    x[],
 490                  ArrayRef<const gmx::RVec>     xWholeMolecules,
 491                  history_t*                    hist,
 492                  gmx::ForceOutputs*            forceOutputs,
 493                  const t_forcerec*             fr,
 494                  const struct t_pbc*           pbc,
 495                  const struct t_pbc*           pbc_full,
 496                  const struct t_graph*         g,
 497                  gmx_enerdata_t*               enerd,
 498                  t_nrnb*                       nrnb,
 499                  const real*                   lambda,
 500                  const t_mdatoms*              md,
 501                  t_fcdata*                     fcd,
 502                  int*                          global_atom_index,
 503                  const gmx::StepWorkload&      stepWork)
 504 {
 505     const t_pbc*        pbc_null;
 506     bonded_threading_t* bt = fr->bondedThreading;
 507
 508     if (fr->bMolPBC)
 509     {
 510         pbc_null = pbc;
 511     }
 512     else
 513     {
 514         pbc_null = nullptr;
 515     }
 516
 517     if (haveRestraints(idef, *fcd))
 518     {
 519         /* TODO Use of restraints triggers further function calls
 520            inside the loop over calc_one_bond(), but those are too
 521            awkward to account to this subtimer properly in the present
 522            code. We don't test / care much about performance with
 523            restraints, anyway. */
 524         wallcycle_sub_start(wcycle, ewcsRESTRAINTS);
 525
 526         if (!idef.il[F_POSRES].empty())
 527         {
 528             posres_wrapper(nrnb, idef, pbc_full, x, enerd, lambda, fr, &forceOutputs->forceWithVirial());
 529         }
 530
 531         if (!idef.il[F_FBPOSRES].empty())
 532         {
 533             fbposres_wrapper(nrnb, idef, pbc_full, x, enerd, fr, &forceOutputs->forceWithVirial());
 534         }
 535
 536         /* Do pre force calculation stuff which might require communication */
 537         if (fcd->orires.nr > 0)
 538         {
 539             GMX_ASSERT(!xWholeMolecules.empty(), "Need whole molecules for orienation restraints");
 540             enerd->term[F_ORIRESDEV] =
 541                     calc_orires_dev(ms, idef.il[F_ORIRES].size(), idef.il[F_ORIRES].iatoms.data(),
 542                                     idef.iparams.data(), md, xWholeMolecules, x, pbc_null, fcd, hist);
 543         }
 544         if (fcd->disres.nres > 0)
 545         {
 546             calc_disres_R_6(cr, ms, idef.il[F_DISRES].size(), idef.il[F_DISRES].iatoms.data(), x,
 547                             pbc_null, fcd, hist);
 548         }
 549
 550         wallcycle_sub_stop(wcycle, ewcsRESTRAINTS);
 551     }
 552
 553     if (haveCpuBondeds(*fr))
 554     {
 555         gmx::ForceWithShiftForces& forceWithShiftForces = forceOutputs->forceWithShiftForces();
 556
 557         wallcycle_sub_start(wcycle, ewcsLISTED);
 558         /* The dummy array is to have a place to store the dhdl at other values
 559            of lambda, which will be thrown away in the end */
 560         real dvdl[efptNR] = { 0 };
 561         calcBondedForces(idef, x, fr, pbc_null, g,
 562                          as_rvec_array(forceWithShiftForces.shiftForces().data()), enerd, nrnb,
 563                          lambda, dvdl, md, fcd, stepWork, global_atom_index);
 564         wallcycle_sub_stop(wcycle, ewcsLISTED);
 565
 566         wallcycle_sub_start(wcycle, ewcsLISTED_BUF_OPS);
 567         reduce_thread_output(fr->natoms_force, &forceWithShiftForces, enerd->term, &enerd->grpp,
 568                              dvdl, bt, stepWork);
 569
 570         if (stepWork.computeDhdl)
 571         {
 572             for (int i = 0; i < efptNR; i++)
 573             {
 574                 enerd->dvdl_nonlin[i] += dvdl[i];
 575             }
 576         }
 577         wallcycle_sub_stop(wcycle, ewcsLISTED_BUF_OPS);
 578     }
 579
 580     /* Copy the sum of violations for the distance restraints from fcd */
 581     if (fcd)
 582     {
 583         enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
 584     }
 585 }
 586
 587 /*! \brief As calc_listed(), but only determines the potential energy
 588  * for the perturbed interactions.
 589  *
 590  * The shift forces in fr are not affected.
 591  */
 592 void calc_listed_lambda(const InteractionDefinitions& idef,
 593                         const rvec                    x[],
 594                         const t_forcerec*             fr,
 595                         const struct t_pbc*           pbc,
 596                         const struct t_graph*         g,
 597                         gmx_grppairener_t*            grpp,
 598                         real*                         epot,
 599                         gmx::ArrayRef<real>           dvdl,
 600                         t_nrnb*                       nrnb,
 601                         const real*                   lambda,
 602                         const t_mdatoms*              md,
 603                         t_fcdata*                     fcd,
 604                         int*                          global_atom_index)
 605 {
 606     real          v;
 607     rvec4*        f;
 608     rvec*         fshift;
 609     const t_pbc*  pbc_null;
 610     WorkDivision& workDivision = fr->bondedThreading->foreignLambdaWorkDivision;
 611
 612     if (fr->bMolPBC)
 613     {
 614         pbc_null = pbc;
 615     }
 616     else
 617     {
 618         pbc_null = nullptr;
 619     }
 620
 621     /* We already have the forces, so we use temp buffers here */
 622     // TODO: Get rid of these allocations by using permanent force buffers
 623     snew(f, fr->natoms_force);
 624     snew(fshift, SHIFTS);
 625
 626     /* Loop over all bonded force types to calculate the bonded energies */
 627     for (int ftype = 0; (ftype < F_NRE); ftype++)
 628     {
 629         if (ftype_is_bonded_potential(ftype))
 630         {
 631             const InteractionList& ilist = idef.il[ftype];
 632             /* Create a temporary iatom list with only perturbed interactions */
 633             const int           numNonperturbed = idef.numNonperturbedInteractions[ftype];
 634             ArrayRef<const int> iatomsPerturbed = gmx::constArrayRefFromArray(
 635                     ilist.iatoms.data() + numNonperturbed, ilist.size() - numNonperturbed);
 636             if (!iatomsPerturbed.empty())
 637             {
 638                 /* Set the work range of thread 0 to the perturbed bondeds */
 639                 workDivision.setBound(ftype, 0, 0);
 640                 workDivision.setBound(ftype, 1, iatomsPerturbed.ssize());
 641
 642                 gmx::StepWorkload tempFlags;
 643                 tempFlags.computeEnergy = true;
 644                 v = calc_one_bond(0, ftype, idef, iatomsPerturbed, iatomsPerturbed.ssize(),
 645                                   workDivision, x, f, fshift, fr, pbc_null, g, grpp, nrnb, lambda,
 646                                   dvdl.data(), md, fcd, tempFlags, global_atom_index);
 647                 epot[ftype] += v;
 648             }
 649         }
 650     }
 651
 652     sfree(fshift);
 653     sfree(f);
 654 }
 655
 656 } // namespace
 657
 658 void do_force_listed(struct gmx_wallcycle*          wcycle,
 659                      const matrix                   box,
 660                      const t_lambda*                fepvals,
 661                      const t_commrec*               cr,
 662                      const gmx_multisim_t*          ms,
 663                      const InteractionDefinitions&  idef,
 664                      const rvec                     x[],
 665                      gmx::ArrayRef<const gmx::RVec> xWholeMolecules,
 666                      history_t*                     hist,
 667                      gmx::ForceOutputs*             forceOutputs,
 668                      const t_forcerec*              fr,
 669                      const struct t_pbc*            pbc,
 670                      const struct t_graph*          graph,
 671                      gmx_enerdata_t*                enerd,
 672                      t_nrnb*                        nrnb,
 673                      const real*                    lambda,
 674                      const t_mdatoms*               md,
 675                      t_fcdata*                      fcd,
 676                      int*                           global_atom_index,
 677                      const gmx::StepWorkload&       stepWork)
 678 {
 679     t_pbc pbc_full; /* Full PBC is needed for position restraints */
 680
 681     if (!stepWork.computeListedForces)
 682     {
 683         return;
 684     }
 685
 686     if (!idef.il[F_POSRES].empty() || !idef.il[F_FBPOSRES].empty())
 687     {
 688         /* Not enough flops to bother counting */
 689         set_pbc(&pbc_full, fr->pbcType, box);
 690     }
 691     calc_listed(cr, ms, wcycle, idef, x, xWholeMolecules, hist, forceOutputs, fr, pbc, &pbc_full,
 692                 graph, enerd, nrnb, lambda, md, fcd, global_atom_index, stepWork);
 693
 694     /* Check if we have to determine energy differences
 695      * at foreign lambda's.
 696      */
 697     if (fepvals->n_lambda > 0 && stepWork.computeDhdl)
 698     {
 699         real dvdl[efptNR] = { 0 };
 700         posres_wrapper_lambda(wcycle, fepvals, idef, &pbc_full, x, enerd, lambda, fr);
 701
 702         if (idef.ilsort != ilsortNO_FE)
 703         {
 704             wallcycle_sub_start(wcycle, ewcsLISTED_FEP);
 705             if (idef.ilsort != ilsortFE_SORTED)
 706             {
 707                 gmx_incons("The bonded interactions are not sorted for free energy");
 708             }
 709             for (size_t i = 0; i < enerd->enerpart_lambda.size(); i++)
 710             {
 711                 real lam_i[efptNR];
 712
 713                 reset_foreign_enerdata(enerd);
 714                 for (int j = 0; j < efptNR; j++)
 715                 {
 716                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i - 1]);
 717                 }
 718                 calc_listed_lambda(idef, x, fr, pbc, graph, &(enerd->foreign_grpp),
 719                                    enerd->foreign_term, dvdl, nrnb, lam_i, md, fcd, global_atom_index);
 720                 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
 721                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 722                 for (int j = 0; j < efptNR; j++)
 723                 {
 724                     enerd->dhdlLambda[i] += dvdl[j];
 725                     dvdl[j] = 0;
 726                 }
 727             }
 728             wallcycle_sub_stop(wcycle, ewcsLISTED_FEP);
 729         }
 730     }
 731 }