src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  *
  40  * \brief This file defines low-level functions necessary for
  41  * computing energies and forces for listed interactions.
  42  *
  43  * \author Mark Abraham <mark.j.abraham@gmail.com>
  44  *
  45  * \ingroup module_listed_forces
  46  */
  47 #include "gmxpre.h"
  48
  49 #include "bonded.h"
  50
  51 #include "config.h"
  52
  53 #include <cassert>
  54 #include <cmath>
  55
  56 #include <algorithm>
  57
  58 #include "gromacs/gmxlib/nrnb.h"
  59 #include "gromacs/listed_forces/disre.h"
  60 #include "gromacs/listed_forces/orires.h"
  61 #include "gromacs/math/functions.h"
  62 #include "gromacs/math/units.h"
  63 #include "gromacs/math/utilities.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/mdtypes/fcdata.h"
  66 #include "gromacs/mdtypes/mdatom.h"
  67 #include "gromacs/pbcutil/ishift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/pbcutil/pbc_simd.h"
  70 #include "gromacs/simd/simd.h"
  71 #include "gromacs/simd/simd_math.h"
  72 #include "gromacs/simd/vector_operations.h"
  73 #include "gromacs/utility/basedefinitions.h"
  74 #include "gromacs/utility/enumerationhelpers.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/real.h"
  77 #include "gromacs/utility/smalloc.h"
  78
  79 #include "listed_internal.h"
  80 #include "restcbt.h"
  81
  82 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  83
  84 namespace
  85 {
  86
  87 //! Type of CPU function to compute a bonded interaction.
  88 using BondedFunction = real (*)(int              nbonds,
  89                                 const t_iatom    iatoms[],
  90                                 const t_iparams  iparams[],
  91                                 const rvec       x[],
  92                                 rvec4            f[],
  93                                 rvec             fshift[],
  94                                 const t_pbc*     pbc,
  95                                 real             lambda,
  96                                 real*            dvdlambda,
  97                                 const t_mdatoms* md,
  98                                 t_fcdata*        fcd,
  99                                 int*             ddgatindex);
 100
 101 /*! \brief Mysterious CMAP coefficient matrix */
 102 const int cmap_coeff_matrix[] = {
 103     1,  0,  -3, 2,  0,  0, 0,  0,  -3, 0,  9,  -6, 2, 0,  -6, 4,  0,  0,  0, 0,  0, 0, 0,  0,
 104     3,  0,  -9, 6,  -2, 0, 6,  -4, 0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  9, -6, 0, 0, -6, 4,
 105     0,  0,  3,  -2, 0,  0, 0,  0,  0,  0,  -9, 6,  0, 0,  6,  -4, 0,  0,  0, 0,  1, 0, -3, 2,
 106     -2, 0,  6,  -4, 1,  0, -3, 2,  0,  0,  0,  0,  0, 0,  0,  0,  -1, 0,  3, -2, 1, 0, -3, 2,
 107     0,  0,  0,  0,  0,  0, 0,  0,  0,  0,  -3, 2,  0, 0,  3,  -2, 0,  0,  0, 0,  0, 0, 3,  -2,
 108     0,  0,  -6, 4,  0,  0, 3,  -2, 0,  1,  -2, 1,  0, 0,  0,  0,  0,  -3, 6, -3, 0, 2, -4, 2,
 109     0,  0,  0,  0,  0,  0, 0,  0,  0,  3,  -6, 3,  0, -2, 4,  -2, 0,  0,  0, 0,  0, 0, 0,  0,
 110     0,  0,  -3, 3,  0,  0, 2,  -2, 0,  0,  -1, 1,  0, 0,  0,  0,  0,  0,  3, -3, 0, 0, -2, 2,
 111     0,  0,  0,  0,  0,  1, -2, 1,  0,  -2, 4,  -2, 0, 1,  -2, 1,  0,  0,  0, 0,  0, 0, 0,  0,
 112     0,  -1, 2,  -1, 0,  1, -2, 1,  0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  1, -1, 0, 0, -1, 1,
 113     0,  0,  0,  0,  0,  0, -1, 1,  0,  0,  2,  -2, 0, 0,  -1, 1
 114 };
 115
 116
 117 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 118  *
 119  * \todo This kind of code appears in many places. Consolidate it */
 120 int pbc_rvec_sub(const t_pbc* pbc, const rvec xi, const rvec xj, rvec dx)
 121 {
 122     if (pbc)
 123     {
 124         return pbc_dx_aiuc(pbc, xi, xj, dx);
 125     }
 126     else
 127     {
 128         rvec_sub(xi, xj, dx);
 129         return CENTRAL;
 130     }
 131 }
 132
 133 } // namespace
 134
 135 //! \cond
 136 real bond_angle(const rvec   xi,
 137                 const rvec   xj,
 138                 const rvec   xk,
 139                 const t_pbc* pbc,
 140                 rvec         r_ij,
 141                 rvec         r_kj,
 142                 real*        costh,
 143                 int*         t1,
 144                 int*         t2)
 145 /* Return value is the angle between the bonds i-j and j-k */
 146 {
 147     /* 41 FLOPS */
 148     real th;
 149
 150     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 151     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 152
 153     *costh = cos_angle(r_ij, r_kj); /* 25               */
 154     th     = std::acos(*costh);     /* 10               */
 155     /* 41 TOTAL */
 156     return th;
 157 }
 158
 159 real dih_angle(const rvec   xi,
 160                const rvec   xj,
 161                const rvec   xk,
 162                const rvec   xl,
 163                const t_pbc* pbc,
 164                rvec         r_ij,
 165                rvec         r_kj,
 166                rvec         r_kl,
 167                rvec         m,
 168                rvec         n,
 169                int*         t1,
 170                int*         t2,
 171                int*         t3)
 172 {
 173     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 174     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 175     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 176
 177     cprod(r_ij, r_kj, m);        /*  9        */
 178     cprod(r_kj, r_kl, n);        /*  9          */
 179     real phi  = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
 180     real ipr  = iprod(r_ij, n);  /*  5        */
 181     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 182     phi       = sign * phi; /*  1               */
 183     /* 82 TOTAL */
 184     return phi;
 185 }
 186 //! \endcond
 187
 188 void make_dp_periodic(real* dp) /* 1 flop? */
 189 {
 190     /* dp cannot be outside (-pi,pi) */
 191     if (*dp >= M_PI)
 192     {
 193         *dp -= 2 * M_PI;
 194     }
 195     else if (*dp < -M_PI)
 196     {
 197         *dp += 2 * M_PI;
 198     }
 199 }
 200
 201 namespace
 202 {
 203
 204 /*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
 205  *
 206  * \p shiftIndex is used as the periodic shift.
 207  */
 208 template<BondedKernelFlavor flavor>
 209 inline void spreadBondForces(const real bondForce,
 210                              const rvec dx,
 211                              const int  ai,
 212                              const int  aj,
 213                              rvec4*     f,
 214                              int        shiftIndex,
 215                              rvec*      fshift)
 216 {
 217     for (int m = 0; m < DIM; m++) /*  15          */
 218     {
 219         const real fij = bondForce * dx[m];
 220         f[ai][m] += fij;
 221         f[aj][m] -= fij;
 222         if (computeVirial(flavor))
 223         {
 224             fshift[shiftIndex][m] += fij;
 225             fshift[CENTRAL][m] -= fij;
 226         }
 227     }
 228 }
 229
 230 /*! \brief Morse potential bond
 231  *
 232  * By Frank Everdij. Three parameters needed:
 233  *
 234  * b0 = equilibrium distance in nm
 235  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 236  * cb = well depth in kJ/mol
 237  *
 238  * Note: the potential is referenced to be +cb at infinite separation
 239  *       and zero at the equilibrium distance!
 240  */
 241 template<BondedKernelFlavor flavor>
 242 real morse_bonds(int             nbonds,
 243                  const t_iatom   forceatoms[],
 244                  const t_iparams forceparams[],
 245                  const rvec      x[],
 246                  rvec4           f[],
 247                  rvec            fshift[],
 248                  const t_pbc*    pbc,
 249                  real            lambda,
 250                  real*           dvdlambda,
 251                  const t_mdatoms gmx_unused* md,
 252                  t_fcdata gmx_unused* fcd,
 253                  int gmx_unused* global_atom_index)
 254 {
 255     const real one = 1.0;
 256     const real two = 2.0;
 257     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, vtot;
 258     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 259     rvec       dx;
 260     int        i, ki, type, ai, aj;
 261
 262     vtot = 0.0;
 263     for (i = 0; (i < nbonds);)
 264     {
 265         type = forceatoms[i++];
 266         ai   = forceatoms[i++];
 267         aj   = forceatoms[i++];
 268
 269         b0A = forceparams[type].morse.b0A;
 270         beA = forceparams[type].morse.betaA;
 271         cbA = forceparams[type].morse.cbA;
 272
 273         b0B = forceparams[type].morse.b0B;
 274         beB = forceparams[type].morse.betaB;
 275         cbB = forceparams[type].morse.cbB;
 276
 277         L1 = one - lambda;            /* 1 */
 278         b0 = L1 * b0A + lambda * b0B; /* 3 */
 279         be = L1 * beA + lambda * beB; /* 3 */
 280         cb = L1 * cbA + lambda * cbB; /* 3 */
 281
 282         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 283         dr2  = iprod(dx, dx);                       /*   5          */
 284         dr   = dr2 * gmx::invsqrt(dr2);             /*  10          */
 285         temp = std::exp(-be * (dr - b0));           /*  12          */
 286
 287         if (temp == one)
 288         {
 289             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 290             *dvdlambda += cbB - cbA;
 291             continue;
 292         }
 293
 294         omtemp   = one - temp;                                      /*   1          */
 295         cbomtemp = cb * omtemp;                                     /*   1          */
 296         vbond    = cbomtemp * omtemp;                               /*   1          */
 297         fbond    = -two * be * temp * cbomtemp * gmx::invsqrt(dr2); /*   9          */
 298         vtot += vbond;                                              /*   1          */
 299
 300         *dvdlambda += (cbB - cbA) * omtemp * omtemp
 301                       - (2 - 2 * omtemp) * omtemp * cb * ((b0B - b0A) * be - (beB - beA) * (dr - b0)); /* 15 */
 302
 303         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 304     }                                                               /*  83 TOTAL    */
 305     return vtot;
 306 }
 307
 308 //! \cond
 309 template<BondedKernelFlavor flavor>
 310 real cubic_bonds(int             nbonds,
 311                  const t_iatom   forceatoms[],
 312                  const t_iparams forceparams[],
 313                  const rvec      x[],
 314                  rvec4           f[],
 315                  rvec            fshift[],
 316                  const t_pbc*    pbc,
 317                  real gmx_unused lambda,
 318                  real gmx_unused* dvdlambda,
 319                  const t_mdatoms gmx_unused* md,
 320                  t_fcdata gmx_unused* fcd,
 321                  int gmx_unused* global_atom_index)
 322 {
 323     const real three = 3.0;
 324     const real two   = 2.0;
 325     real       kb, b0, kcub;
 326     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, vtot;
 327     rvec       dx;
 328     int        i, ki, type, ai, aj;
 329
 330     vtot = 0.0;
 331     for (i = 0; (i < nbonds);)
 332     {
 333         type = forceatoms[i++];
 334         ai   = forceatoms[i++];
 335         aj   = forceatoms[i++];
 336
 337         b0   = forceparams[type].cubic.b0;
 338         kb   = forceparams[type].cubic.kb;
 339         kcub = forceparams[type].cubic.kcub;
 340
 341         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 342         dr2 = iprod(dx, dx);                       /*   5          */
 343
 344         if (dr2 == 0.0)
 345         {
 346             continue;
 347         }
 348
 349         dr     = dr2 * gmx::invsqrt(dr2); /*  10          */
 350         dist   = dr - b0;
 351         kdist  = kb * dist;
 352         kdist2 = kdist * dist;
 353
 354         vbond = kdist2 + kcub * kdist2 * dist;
 355         fbond = -(two * kdist + three * kdist2 * kcub) / dr;
 356
 357         vtot += vbond; /* 21 */
 358
 359         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 360     }                                                               /*  54 TOTAL    */
 361     return vtot;
 362 }
 363
 364 template<BondedKernelFlavor flavor>
 365 real FENE_bonds(int             nbonds,
 366                 const t_iatom   forceatoms[],
 367                 const t_iparams forceparams[],
 368                 const rvec      x[],
 369                 rvec4           f[],
 370                 rvec            fshift[],
 371                 const t_pbc*    pbc,
 372                 real gmx_unused lambda,
 373                 real gmx_unused* dvdlambda,
 374                 const t_mdatoms gmx_unused* md,
 375                 t_fcdata gmx_unused* fcd,
 376                 int*                 global_atom_index)
 377 {
 378     const real half = 0.5;
 379     const real one  = 1.0;
 380     real       bm, kb;
 381     real       dr2, bm2, omdr2obm2, fbond, vbond, vtot;
 382     rvec       dx;
 383     int        i, ki, type, ai, aj;
 384
 385     vtot = 0.0;
 386     for (i = 0; (i < nbonds);)
 387     {
 388         type = forceatoms[i++];
 389         ai   = forceatoms[i++];
 390         aj   = forceatoms[i++];
 391
 392         bm = forceparams[type].fene.bm;
 393         kb = forceparams[type].fene.kb;
 394
 395         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 396         dr2 = iprod(dx, dx);                       /*   5          */
 397
 398         if (dr2 == 0.0)
 399         {
 400             continue;
 401         }
 402
 403         bm2 = bm * bm;
 404
 405         if (dr2 >= bm2)
 406         {
 407             gmx_fatal(FARGS, "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d", dr2, bm2,
 408                       glatnr(global_atom_index, ai), glatnr(global_atom_index, aj));
 409         }
 410
 411         omdr2obm2 = one - dr2 / bm2;
 412
 413         vbond = -half * kb * bm2 * std::log(omdr2obm2);
 414         fbond = -kb / omdr2obm2;
 415
 416         vtot += vbond; /* 35 */
 417
 418         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 419     }                                                               /*  58 TOTAL    */
 420     return vtot;
 421 }
 422
 423 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
 424 {
 425     const real half = 0.5;
 426     real       L1, kk, x0, dx, dx2;
 427     real       v, f, dvdlambda;
 428
 429     L1 = 1.0 - lambda;
 430     kk = L1 * kA + lambda * kB;
 431     x0 = L1 * xA + lambda * xB;
 432
 433     dx  = x - x0;
 434     dx2 = dx * dx;
 435
 436     f         = -kk * dx;
 437     v         = half * kk * dx2;
 438     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
 439
 440     *F = f;
 441     *V = v;
 442
 443     return dvdlambda;
 444
 445     /* That was 19 flops */
 446 }
 447
 448
 449 template<BondedKernelFlavor flavor>
 450 real bonds(int             nbonds,
 451            const t_iatom   forceatoms[],
 452            const t_iparams forceparams[],
 453            const rvec      x[],
 454            rvec4           f[],
 455            rvec            fshift[],
 456            const t_pbc*    pbc,
 457            real            lambda,
 458            real*           dvdlambda,
 459            const t_mdatoms gmx_unused* md,
 460            t_fcdata gmx_unused* fcd,
 461            int gmx_unused* global_atom_index)
 462 {
 463     int  i, ki, ai, aj, type;
 464     real dr, dr2, fbond, vbond, vtot;
 465     rvec dx;
 466
 467     vtot = 0.0;
 468     for (i = 0; (i < nbonds);)
 469     {
 470         type = forceatoms[i++];
 471         ai   = forceatoms[i++];
 472         aj   = forceatoms[i++];
 473
 474         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 475         dr2 = iprod(dx, dx);                       /*   5               */
 476         dr  = std::sqrt(dr2);                      /*  10               */
 477
 478         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 479                                forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr,
 480                                lambda, &vbond, &fbond); /*  19  */
 481
 482         if (dr2 == 0.0)
 483         {
 484             continue;
 485         }
 486
 487
 488         vtot += vbond;              /* 1*/
 489         fbond *= gmx::invsqrt(dr2); /*   6              */
 490
 491         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 492     }                                                               /* 59 TOTAL */
 493     return vtot;
 494 }
 495
 496 template<BondedKernelFlavor flavor>
 497 real restraint_bonds(int             nbonds,
 498                      const t_iatom   forceatoms[],
 499                      const t_iparams forceparams[],
 500                      const rvec      x[],
 501                      rvec4           f[],
 502                      rvec            fshift[],
 503                      const t_pbc*    pbc,
 504                      real            lambda,
 505                      real*           dvdlambda,
 506                      const t_mdatoms gmx_unused* md,
 507                      t_fcdata gmx_unused* fcd,
 508                      int gmx_unused* global_atom_index)
 509 {
 510     int  i, ki, ai, aj, type;
 511     real dr, dr2, fbond, vbond, vtot;
 512     real L1;
 513     real low, dlow, up1, dup1, up2, dup2, k, dk;
 514     real drh, drh2;
 515     rvec dx;
 516
 517     L1 = 1.0 - lambda;
 518
 519     vtot = 0.0;
 520     for (i = 0; (i < nbonds);)
 521     {
 522         type = forceatoms[i++];
 523         ai   = forceatoms[i++];
 524         aj   = forceatoms[i++];
 525
 526         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 527         dr2 = iprod(dx, dx);                       /*   5               */
 528         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 529
 530         low  = L1 * forceparams[type].restraint.lowA + lambda * forceparams[type].restraint.lowB;
 531         dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
 532         up1  = L1 * forceparams[type].restraint.up1A + lambda * forceparams[type].restraint.up1B;
 533         dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
 534         up2  = L1 * forceparams[type].restraint.up2A + lambda * forceparams[type].restraint.up2B;
 535         dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
 536         k    = L1 * forceparams[type].restraint.kA + lambda * forceparams[type].restraint.kB;
 537         dk   = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
 538         /* 24 */
 539
 540         if (dr < low)
 541         {
 542             drh   = dr - low;
 543             drh2  = drh * drh;
 544             vbond = 0.5 * k * drh2;
 545             fbond = -k * drh;
 546             *dvdlambda += 0.5 * dk * drh2 - k * dlow * drh;
 547         } /* 11 */
 548         else if (dr <= up1)
 549         {
 550             vbond = 0;
 551             fbond = 0;
 552         }
 553         else if (dr <= up2)
 554         {
 555             drh   = dr - up1;
 556             drh2  = drh * drh;
 557             vbond = 0.5 * k * drh2;
 558             fbond = -k * drh;
 559             *dvdlambda += 0.5 * dk * drh2 - k * dup1 * drh;
 560         } /* 11 */
 561         else
 562         {
 563             drh   = dr - up2;
 564             vbond = k * (up2 - up1) * (0.5 * (up2 - up1) + drh);
 565             fbond = -k * (up2 - up1);
 566             *dvdlambda += dk * (up2 - up1) * (0.5 * (up2 - up1) + drh)
 567                           + k * (dup2 - dup1) * (up2 - up1 + drh) - k * (up2 - up1) * dup2;
 568         }
 569
 570         if (dr2 == 0.0)
 571         {
 572             continue;
 573         }
 574
 575         vtot += vbond;              /* 1*/
 576         fbond *= gmx::invsqrt(dr2); /*   6              */
 577
 578         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 579     }                                                               /* 59 TOTAL */
 580
 581     return vtot;
 582 }
 583
 584 template<BondedKernelFlavor flavor>
 585 real polarize(int              nbonds,
 586               const t_iatom    forceatoms[],
 587               const t_iparams  forceparams[],
 588               const rvec       x[],
 589               rvec4            f[],
 590               rvec             fshift[],
 591               const t_pbc*     pbc,
 592               real             lambda,
 593               real*            dvdlambda,
 594               const t_mdatoms* md,
 595               t_fcdata gmx_unused* fcd,
 596               int gmx_unused* global_atom_index)
 597 {
 598     int  i, ki, ai, aj, type;
 599     real dr, dr2, fbond, vbond, vtot, ksh;
 600     rvec dx;
 601
 602     vtot = 0.0;
 603     for (i = 0; (i < nbonds);)
 604     {
 605         type = forceatoms[i++];
 606         ai   = forceatoms[i++];
 607         aj   = forceatoms[i++];
 608         ksh  = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].polarize.alpha;
 609
 610         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 611         dr2 = iprod(dx, dx);                       /*   5               */
 612         dr  = std::sqrt(dr2);                      /*  10               */
 613
 614         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 615
 616         if (dr2 == 0.0)
 617         {
 618             continue;
 619         }
 620
 621         vtot += vbond;              /* 1*/
 622         fbond *= gmx::invsqrt(dr2); /*   6              */
 623
 624         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 625     }                                                               /* 59 TOTAL */
 626     return vtot;
 627 }
 628
 629 template<BondedKernelFlavor flavor>
 630 real anharm_polarize(int              nbonds,
 631                      const t_iatom    forceatoms[],
 632                      const t_iparams  forceparams[],
 633                      const rvec       x[],
 634                      rvec4            f[],
 635                      rvec             fshift[],
 636                      const t_pbc*     pbc,
 637                      real             lambda,
 638                      real*            dvdlambda,
 639                      const t_mdatoms* md,
 640                      t_fcdata gmx_unused* fcd,
 641                      int gmx_unused* global_atom_index)
 642 {
 643     int  i, ki, ai, aj, type;
 644     real dr, dr2, fbond, vbond, vtot, ksh, khyp, drcut, ddr, ddr3;
 645     rvec dx;
 646
 647     vtot = 0.0;
 648     for (i = 0; (i < nbonds);)
 649     {
 650         type = forceatoms[i++];
 651         ai   = forceatoms[i++];
 652         aj   = forceatoms[i++];
 653         ksh = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].anharm_polarize.alpha; /* 7*/
 654         khyp  = forceparams[type].anharm_polarize.khyp;
 655         drcut = forceparams[type].anharm_polarize.drcut;
 656
 657         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 658         dr2 = iprod(dx, dx);                       /*   5               */
 659         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 660
 661         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 662
 663         if (dr2 == 0.0)
 664         {
 665             continue;
 666         }
 667
 668         if (dr > drcut)
 669         {
 670             ddr  = dr - drcut;
 671             ddr3 = ddr * ddr * ddr;
 672             vbond += khyp * ddr * ddr3;
 673             fbond -= 4 * khyp * ddr3;
 674         }
 675         fbond *= gmx::invsqrt(dr2); /*   6              */
 676         vtot += vbond;              /* 1*/
 677
 678         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 679     }                                                               /* 72 TOTAL */
 680     return vtot;
 681 }
 682
 683 template<BondedKernelFlavor flavor>
 684 real water_pol(int             nbonds,
 685                const t_iatom   forceatoms[],
 686                const t_iparams forceparams[],
 687                const rvec      x[],
 688                rvec4           f[],
 689                rvec gmx_unused fshift[],
 690                const t_pbc gmx_unused* pbc,
 691                real gmx_unused lambda,
 692                real gmx_unused* dvdlambda,
 693                const t_mdatoms gmx_unused* md,
 694                t_fcdata gmx_unused* fcd,
 695                int gmx_unused* global_atom_index)
 696 {
 697     /* This routine implements anisotropic polarizibility for water, through
 698      * a shell connected to a dummy with spring constant that differ in the
 699      * three spatial dimensions in the molecular frame.
 700      */
 701     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 702     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 703     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 704
 705     vtot = 0.0;
 706     if (nbonds > 0)
 707     {
 708         type0  = forceatoms[0];
 709         aS     = forceatoms[5];
 710         qS     = md->chargeA[aS];
 711         kk[XX] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_x;
 712         kk[YY] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_y;
 713         kk[ZZ] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_z;
 714         r_HH   = 1.0 / forceparams[type0].wpol.rHH;
 715         for (i = 0; (i < nbonds); i += 6)
 716         {
 717             type = forceatoms[i];
 718             if (type != type0)
 719             {
 720                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d", type, type0,
 721                           __FILE__, __LINE__);
 722             }
 723             aO  = forceatoms[i + 1];
 724             aH1 = forceatoms[i + 2];
 725             aH2 = forceatoms[i + 3];
 726             aD  = forceatoms[i + 4];
 727             aS  = forceatoms[i + 5];
 728
 729             /* Compute vectors describing the water frame */
 730             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 731             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 732             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 733             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 734             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 735             cprod(dOH1, dOH2, nW);
 736
 737             /* Compute inverse length of normal vector
 738              * (this one could be precomputed, but I'm too lazy now)
 739              */
 740             r_nW = gmx::invsqrt(iprod(nW, nW));
 741             /* This is for precision, but does not make a big difference,
 742              * it can go later.
 743              */
 744             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 745
 746             /* Normalize the vectors in the water frame */
 747             svmul(r_nW, nW, nW);
 748             svmul(r_HH, dHH, dHH);
 749             svmul(r_OD, dOD, dOD);
 750
 751             /* Compute displacement of shell along components of the vector */
 752             dx[ZZ] = iprod(dDS, dOD);
 753             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 754             for (m = 0; (m < DIM); m++)
 755             {
 756                 proj[m] = dDS[m] - dx[ZZ] * dOD[m];
 757             }
 758
 759             /*dx[XX] = iprod(dDS,nW);
 760                dx[YY] = iprod(dDS,dHH);*/
 761             dx[XX] = iprod(proj, nW);
 762             for (m = 0; (m < DIM); m++)
 763             {
 764                 proj[m] -= dx[XX] * nW[m];
 765             }
 766             dx[YY] = iprod(proj, dHH);
 767             /* Now compute the forces and energy */
 768             kdx[XX] = kk[XX] * dx[XX];
 769             kdx[YY] = kk[YY] * dx[YY];
 770             kdx[ZZ] = kk[ZZ] * dx[ZZ];
 771             vtot += iprod(dx, kdx);
 772
 773             for (m = 0; (m < DIM); m++)
 774             {
 775                 /* This is a tensor operation but written out for speed */
 776                 tx  = nW[m] * kdx[XX];
 777                 ty  = dHH[m] * kdx[YY];
 778                 tz  = dOD[m] * kdx[ZZ];
 779                 fij = -tx - ty - tz;
 780                 f[aS][m] += fij;
 781                 f[aD][m] -= fij;
 782                 if (computeVirial(flavor))
 783                 {
 784                     fshift[ki][m] += fij;
 785                     fshift[CENTRAL][m] -= fij;
 786                 }
 787             }
 788         }
 789     }
 790     return 0.5 * vtot;
 791 }
 792
 793 template<BondedKernelFlavor flavor>
 794 static real
 795 do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj, const t_pbc* pbc, real qq, rvec fshift[], real afac)
 796 {
 797     rvec r12;
 798     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 799     int  m, t;
 800
 801     t = pbc_rvec_sub(pbc, xi, xj, r12); /*  3 */
 802
 803     r12sq  = iprod(r12, r12);                                                     /*  5 */
 804     r12_1  = gmx::invsqrt(r12sq);                                                 /*  5 */
 805     r12bar = afac / r12_1;                                                        /*  5 */
 806     v0     = qq * ONE_4PI_EPS0 * r12_1;                                           /*  2 */
 807     ebar   = std::exp(-r12bar);                                                   /*  5 */
 808     v1     = (1 - (1 + 0.5 * r12bar) * ebar);                                     /*  4 */
 809     fscal  = ((v0 * r12_1) * v1 - v0 * 0.5 * afac * ebar * (r12bar + 1)) * r12_1; /* 9 */
 810
 811     for (m = 0; (m < DIM); m++)
 812     {
 813         fff = fscal * r12[m];
 814         fi[m] += fff;
 815         fj[m] -= fff;
 816         if (computeVirial(flavor))
 817         {
 818             fshift[t][m] += fff;
 819             fshift[CENTRAL][m] -= fff;
 820         }
 821     } /* 15 */
 822
 823     return v0 * v1; /* 1 */
 824     /* 54 */
 825 }
 826
 827 template<BondedKernelFlavor flavor>
 828 real thole_pol(int             nbonds,
 829                const t_iatom   forceatoms[],
 830                const t_iparams forceparams[],
 831                const rvec      x[],
 832                rvec4           f[],
 833                rvec            fshift[],
 834                const t_pbc*    pbc,
 835                real gmx_unused lambda,
 836                real gmx_unused* dvdlambda,
 837                const t_mdatoms* md,
 838                t_fcdata gmx_unused* fcd,
 839                int gmx_unused* global_atom_index)
 840 {
 841     /* Interaction between two pairs of particles with opposite charge */
 842     int  i, type, a1, da1, a2, da2;
 843     real q1, q2, qq, a, al1, al2, afac;
 844     real V = 0;
 845
 846     for (i = 0; (i < nbonds);)
 847     {
 848         type = forceatoms[i++];
 849         a1   = forceatoms[i++];
 850         da1  = forceatoms[i++];
 851         a2   = forceatoms[i++];
 852         da2  = forceatoms[i++];
 853         q1   = md->chargeA[da1];
 854         q2   = md->chargeA[da2];
 855         a    = forceparams[type].thole.a;
 856         al1  = forceparams[type].thole.alpha1;
 857         al2  = forceparams[type].thole.alpha2;
 858         qq   = q1 * q2;
 859         afac = a * gmx::invsixthroot(al1 * al2);
 860         V += do_1_thole<flavor>(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 861         V += do_1_thole<flavor>(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 862         V += do_1_thole<flavor>(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 863         V += do_1_thole<flavor>(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 864     }
 865     /* 290 flops */
 866     return V;
 867 }
 868
 869 // Avoid gcc 386 -O3 code generation bug in this function (see Issue
 870 // #3205 for more information)
 871 #if defined(__GNUC__) && defined(__i386__) && defined(__OPTIMIZE__)
 872 #    pragma GCC push_options
 873 #    pragma GCC optimize("O1")
 874 #    define avoid_gcc_i386_o3_code_generation_bug
 875 #endif
 876
 877 template<BondedKernelFlavor flavor>
 878 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 879 angles(int             nbonds,
 880        const t_iatom   forceatoms[],
 881        const t_iparams forceparams[],
 882        const rvec      x[],
 883        rvec4           f[],
 884        rvec            fshift[],
 885        const t_pbc*    pbc,
 886        real            lambda,
 887        real*           dvdlambda,
 888        const t_mdatoms gmx_unused* md,
 889        t_fcdata gmx_unused* fcd,
 890        int gmx_unused* global_atom_index)
 891 {
 892     int  i, ai, aj, ak, t1, t2, type;
 893     rvec r_ij, r_kj;
 894     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 895
 896     vtot = 0.0;
 897     for (i = 0; i < nbonds;)
 898     {
 899         type = forceatoms[i++];
 900         ai   = forceatoms[i++];
 901         aj   = forceatoms[i++];
 902         ak   = forceatoms[i++];
 903
 904         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
 905
 906         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 907                                forceparams[type].harmonic.rA * DEG2RAD,
 908                                forceparams[type].harmonic.rB * DEG2RAD, theta, lambda, &va, &dVdt); /*  21  */
 909         vtot += va;
 910
 911         cos_theta2 = gmx::square(cos_theta);
 912         if (cos_theta2 < 1)
 913         {
 914             int  m;
 915             real st, sth;
 916             real cik, cii, ckk;
 917             real nrkj2, nrij2;
 918             real nrkj_1, nrij_1;
 919             rvec f_i, f_j, f_k;
 920
 921             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
 922             sth   = st * cos_theta;                      /*   1         */
 923             nrij2 = iprod(r_ij, r_ij);                   /*   5         */
 924             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
 925
 926             nrij_1 = gmx::invsqrt(nrij2); /*  10                */
 927             nrkj_1 = gmx::invsqrt(nrkj2); /*  10                */
 928
 929             cik = st * nrij_1 * nrkj_1;  /*   2         */
 930             cii = sth * nrij_1 * nrij_1; /*   2         */
 931             ckk = sth * nrkj_1 * nrkj_1; /*   2         */
 932
 933             for (m = 0; m < DIM; m++)
 934             { /*  39            */
 935                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
 936                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
 937                 f_j[m] = -f_i[m] - f_k[m];
 938                 f[ai][m] += f_i[m];
 939                 f[aj][m] += f_j[m];
 940                 f[ak][m] += f_k[m];
 941             }
 942             if (computeVirial(flavor))
 943             {
 944                 rvec_inc(fshift[t1], f_i);
 945                 rvec_inc(fshift[CENTRAL], f_j);
 946                 rvec_inc(fshift[t2], f_k);
 947             }
 948         } /* 161 TOTAL  */
 949     }
 950
 951     return vtot;
 952 }
 953
 954 #ifdef avoid_gcc_i386_o3_code_generation_bug
 955 #    pragma GCC pop_options
 956 #    undef avoid_gcc_i386_o3_code_generation_bug
 957 #endif
 958
 959 #if GMX_SIMD_HAVE_REAL
 960
 961 /* As angles, but using SIMD to calculate many angles at once.
 962  * This routines does not calculate energies and shift forces.
 963  */
 964 template<BondedKernelFlavor flavor>
 965 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
 966 angles(int             nbonds,
 967        const t_iatom   forceatoms[],
 968        const t_iparams forceparams[],
 969        const rvec      x[],
 970        rvec4           f[],
 971        rvec gmx_unused fshift[],
 972        const t_pbc*    pbc,
 973        real gmx_unused lambda,
 974        real gmx_unused* dvdlambda,
 975        const t_mdatoms gmx_unused* md,
 976        t_fcdata gmx_unused* fcd,
 977        int gmx_unused* global_atom_index)
 978 {
 979     const int                                nfa1 = 4;
 980     int                                      i, iu, s;
 981     int                                      type;
 982     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
 983     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
 984     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
 985     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
 986     SimdReal                                 deg2rad_S(DEG2RAD);
 987     SimdReal                                 xi_S, yi_S, zi_S;
 988     SimdReal                                 xj_S, yj_S, zj_S;
 989     SimdReal                                 xk_S, yk_S, zk_S;
 990     SimdReal                                 k_S, theta0_S;
 991     SimdReal                                 rijx_S, rijy_S, rijz_S;
 992     SimdReal                                 rkjx_S, rkjy_S, rkjz_S;
 993     SimdReal                                 one_S(1.0);
 994     SimdReal min_one_plus_eps_S(-1.0 + 2.0 * GMX_REAL_EPS); // Smallest number > -1
 995
 996     SimdReal                         rij_rkj_S;
 997     SimdReal                         nrij2_S, nrij_1_S;
 998     SimdReal                         nrkj2_S, nrkj_1_S;
 999     SimdReal                         cos_S, invsin_S;
1000     SimdReal                         cos2_S;
1001     SimdReal                         theta_S;
1002     SimdReal                         st_S, sth_S;
1003     SimdReal                         cik_S, cii_S, ckk_S;
1004     SimdReal                         f_ix_S, f_iy_S, f_iz_S;
1005     SimdReal                         f_kx_S, f_ky_S, f_kz_S;
1006     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1007
1008     set_pbc_simd(pbc, pbc_simd);
1009
1010     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1011     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1012     {
1013         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1014          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1015          */
1016         iu = i;
1017         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1018         {
1019             type  = forceatoms[iu];
1020             ai[s] = forceatoms[iu + 1];
1021             aj[s] = forceatoms[iu + 2];
1022             ak[s] = forceatoms[iu + 3];
1023
1024             /* At the end fill the arrays with the last atoms and 0 params */
1025             if (i + s * nfa1 < nbonds)
1026             {
1027                 coeff[s]                       = forceparams[type].harmonic.krA;
1028                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
1029
1030                 if (iu + nfa1 < nbonds)
1031                 {
1032                     iu += nfa1;
1033                 }
1034             }
1035             else
1036             {
1037                 coeff[s]                       = 0;
1038                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
1039             }
1040         }
1041
1042         /* Store the non PBC corrected distances packed and aligned */
1043         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1044         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1045         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1046         rijx_S = xi_S - xj_S;
1047         rijy_S = yi_S - yj_S;
1048         rijz_S = zi_S - zj_S;
1049         rkjx_S = xk_S - xj_S;
1050         rkjy_S = yk_S - yj_S;
1051         rkjz_S = zk_S - zj_S;
1052
1053         k_S      = load<SimdReal>(coeff);
1054         theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1055
1056         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1057         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1058
1059         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1060
1061         nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1062         nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1063
1064         nrij_1_S = invsqrt(nrij2_S);
1065         nrkj_1_S = invsqrt(nrkj2_S);
1066
1067         cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S;
1068
1069         /* We compute cos^2 using a division instead of squaring cos_S,
1070          * as we would then get additional error contributions from
1071          * the two invsqrt operations, which get amplified by
1072          * the 1/sqrt(1-cos^2) for cos values close to 1.
1073          *
1074          * Note that the non-SIMD version of angles() avoids this issue
1075          * by computing the cosine using double precision.
1076          */
1077         cos2_S = rij_rkj_S * rij_rkj_S / (nrij2_S * nrkj2_S);
1078
1079         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1080          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1081          * This also ensures that rounding errors would cause the argument
1082          * of simdAcos to be < -1.
1083          * Note that we do not take precautions for cos(0)=1, so the outer
1084          * atoms in an angle should not be on top of each other.
1085          */
1086         cos_S = max(cos_S, min_one_plus_eps_S);
1087
1088         theta_S = acos(cos_S);
1089
1090         invsin_S = invsqrt(one_S - cos2_S);
1091
1092         st_S  = k_S * (theta0_S - theta_S) * invsin_S;
1093         sth_S = st_S * cos_S;
1094
1095         cik_S = st_S * nrij_1_S * nrkj_1_S;
1096         cii_S = sth_S * nrij_1_S * nrij_1_S;
1097         ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1098
1099         f_ix_S = cii_S * rijx_S;
1100         f_ix_S = fnma(cik_S, rkjx_S, f_ix_S);
1101         f_iy_S = cii_S * rijy_S;
1102         f_iy_S = fnma(cik_S, rkjy_S, f_iy_S);
1103         f_iz_S = cii_S * rijz_S;
1104         f_iz_S = fnma(cik_S, rkjz_S, f_iz_S);
1105         f_kx_S = ckk_S * rkjx_S;
1106         f_kx_S = fnma(cik_S, rijx_S, f_kx_S);
1107         f_ky_S = ckk_S * rkjy_S;
1108         f_ky_S = fnma(cik_S, rijy_S, f_ky_S);
1109         f_kz_S = ckk_S * rkjz_S;
1110         f_kz_S = fnma(cik_S, rijz_S, f_kz_S);
1111
1112         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1113         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1114                                  f_iz_S + f_kz_S);
1115         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1116     }
1117
1118     return 0;
1119 }
1120
1121 #endif // GMX_SIMD_HAVE_REAL
1122
1123 template<BondedKernelFlavor flavor>
1124 real linear_angles(int             nbonds,
1125                    const t_iatom   forceatoms[],
1126                    const t_iparams forceparams[],
1127                    const rvec      x[],
1128                    rvec4           f[],
1129                    rvec            fshift[],
1130                    const t_pbc*    pbc,
1131                    real            lambda,
1132                    real*           dvdlambda,
1133                    const t_mdatoms gmx_unused* md,
1134                    t_fcdata gmx_unused* fcd,
1135                    int gmx_unused* global_atom_index)
1136 {
1137     int  i, m, ai, aj, ak, t1, t2, type;
1138     rvec f_i, f_j, f_k;
1139     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1140     rvec r_ij, r_kj, r_ik, dx;
1141
1142     L1   = 1 - lambda;
1143     vtot = 0.0;
1144     for (i = 0; (i < nbonds);)
1145     {
1146         type = forceatoms[i++];
1147         ai   = forceatoms[i++];
1148         aj   = forceatoms[i++];
1149         ak   = forceatoms[i++];
1150
1151         kA   = forceparams[type].linangle.klinA;
1152         kB   = forceparams[type].linangle.klinB;
1153         klin = L1 * kA + lambda * kB;
1154
1155         aA = forceparams[type].linangle.aA;
1156         aB = forceparams[type].linangle.aB;
1157         a  = L1 * aA + lambda * aB;
1158         b  = 1 - a;
1159
1160         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1161         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1162         rvec_sub(r_ij, r_kj, r_ik);
1163
1164         dr2 = 0;
1165         for (m = 0; (m < DIM); m++)
1166         {
1167             dr = -a * r_ij[m] - b * r_kj[m];
1168             dr2 += dr * dr;
1169             dx[m]  = dr;
1170             f_i[m] = a * klin * dr;
1171             f_k[m] = b * klin * dr;
1172             f_j[m] = -(f_i[m] + f_k[m]);
1173             f[ai][m] += f_i[m];
1174             f[aj][m] += f_j[m];
1175             f[ak][m] += f_k[m];
1176         }
1177         va = 0.5 * klin * dr2;
1178         *dvdlambda += 0.5 * (kB - kA) * dr2 + klin * (aB - aA) * iprod(dx, r_ik);
1179
1180         vtot += va;
1181
1182         if (computeVirial(flavor))
1183         {
1184             rvec_inc(fshift[t1], f_i);
1185             rvec_inc(fshift[CENTRAL], f_j);
1186             rvec_inc(fshift[t2], f_k);
1187         }
1188     } /* 57 TOTAL       */
1189     return vtot;
1190 }
1191
1192 template<BondedKernelFlavor flavor>
1193 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1194 urey_bradley(int             nbonds,
1195              const t_iatom   forceatoms[],
1196              const t_iparams forceparams[],
1197              const rvec      x[],
1198              rvec4           f[],
1199              rvec            fshift[],
1200              const t_pbc*    pbc,
1201              real            lambda,
1202              real*           dvdlambda,
1203              const t_mdatoms gmx_unused* md,
1204              t_fcdata gmx_unused* fcd,
1205              int gmx_unused* global_atom_index)
1206 {
1207     int  i, m, ai, aj, ak, t1, t2, type, ki;
1208     rvec r_ij, r_kj, r_ik;
1209     real cos_theta, cos_theta2, theta;
1210     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1211     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1212
1213     vtot = 0.0;
1214     for (i = 0; (i < nbonds);)
1215     {
1216         type = forceatoms[i++];
1217         ai   = forceatoms[i++];
1218         aj   = forceatoms[i++];
1219         ak   = forceatoms[i++];
1220         th0A = forceparams[type].u_b.thetaA * DEG2RAD;
1221         kthA = forceparams[type].u_b.kthetaA;
1222         r13A = forceparams[type].u_b.r13A;
1223         kUBA = forceparams[type].u_b.kUBA;
1224         th0B = forceparams[type].u_b.thetaB * DEG2RAD;
1225         kthB = forceparams[type].u_b.kthetaB;
1226         r13B = forceparams[type].u_b.r13B;
1227         kUBB = forceparams[type].u_b.kUBB;
1228
1229         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1230
1231         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1232         vtot += va;
1233
1234         ki  = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /*   3      */
1235         dr2 = iprod(r_ik, r_ik);                     /*   5             */
1236         dr  = dr2 * gmx::invsqrt(dr2);               /*  10             */
1237
1238         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1239
1240         cos_theta2 = gmx::square(cos_theta); /*   1             */
1241         if (cos_theta2 < 1)
1242         {
1243             real st, sth;
1244             real cik, cii, ckk;
1245             real nrkj2, nrij2;
1246             rvec f_i, f_j, f_k;
1247
1248             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1249             sth   = st * cos_theta;                      /*   1         */
1250             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1251             nrij2 = iprod(r_ij, r_ij);
1252
1253             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1254             cii = sth / nrij2;                      /*  10              */
1255             ckk = sth / nrkj2;                      /*  10              */
1256
1257             for (m = 0; (m < DIM); m++) /*  39          */
1258             {
1259                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1260                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1261                 f_j[m] = -f_i[m] - f_k[m];
1262                 f[ai][m] += f_i[m];
1263                 f[aj][m] += f_j[m];
1264                 f[ak][m] += f_k[m];
1265             }
1266             if (computeVirial(flavor))
1267             {
1268                 rvec_inc(fshift[t1], f_i);
1269                 rvec_inc(fshift[CENTRAL], f_j);
1270                 rvec_inc(fshift[t2], f_k);
1271             }
1272         } /* 161 TOTAL  */
1273         /* Time for the bond calculations */
1274         if (dr2 == 0.0)
1275         {
1276             continue;
1277         }
1278
1279         vtot += vbond;              /* 1*/
1280         fbond *= gmx::invsqrt(dr2); /*   6              */
1281
1282         for (m = 0; (m < DIM); m++) /*  15              */
1283         {
1284             fik = fbond * r_ik[m];
1285             f[ai][m] += fik;
1286             f[ak][m] -= fik;
1287             if (computeVirial(flavor))
1288             {
1289                 fshift[ki][m] += fik;
1290                 fshift[CENTRAL][m] -= fik;
1291             }
1292         }
1293     }
1294     return vtot;
1295 }
1296
1297 #if GMX_SIMD_HAVE_REAL
1298
1299 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1300  * This routines does not calculate energies and shift forces.
1301  */
1302 template<BondedKernelFlavor flavor>
1303 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1304 urey_bradley(int             nbonds,
1305              const t_iatom   forceatoms[],
1306              const t_iparams forceparams[],
1307              const rvec      x[],
1308              rvec4           f[],
1309              rvec gmx_unused fshift[],
1310              const t_pbc*    pbc,
1311              real gmx_unused lambda,
1312              real gmx_unused* dvdlambda,
1313              const t_mdatoms gmx_unused* md,
1314              t_fcdata gmx_unused* fcd,
1315              int gmx_unused* global_atom_index)
1316 {
1317     constexpr int                            nfa1 = 4;
1318     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1319     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1320     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1321     alignas(GMX_SIMD_ALIGNMENT) real         coeff[4 * GMX_SIMD_REAL_WIDTH];
1322     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1323
1324     set_pbc_simd(pbc, pbc_simd);
1325
1326     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1327     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
1328     {
1329         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1330          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1331          */
1332         int iu = i;
1333         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1334         {
1335             const int type = forceatoms[iu];
1336             ai[s]          = forceatoms[iu + 1];
1337             aj[s]          = forceatoms[iu + 2];
1338             ak[s]          = forceatoms[iu + 3];
1339
1340             /* At the end fill the arrays with the last atoms and 0 params */
1341             if (i + s * nfa1 < nbonds)
1342             {
1343                 coeff[s]                           = forceparams[type].u_b.kthetaA;
1344                 coeff[GMX_SIMD_REAL_WIDTH + s]     = forceparams[type].u_b.thetaA;
1345                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = forceparams[type].u_b.kUBA;
1346                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = forceparams[type].u_b.r13A;
1347
1348                 if (iu + nfa1 < nbonds)
1349                 {
1350                     iu += nfa1;
1351                 }
1352             }
1353             else
1354             {
1355                 coeff[s]                           = 0;
1356                 coeff[GMX_SIMD_REAL_WIDTH + s]     = 0;
1357                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = 0;
1358                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = 0;
1359             }
1360         }
1361
1362         SimdReal xi_S, yi_S, zi_S;
1363         SimdReal xj_S, yj_S, zj_S;
1364         SimdReal xk_S, yk_S, zk_S;
1365
1366         /* Store the non PBC corrected distances packed and aligned */
1367         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1368         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1369         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1370         SimdReal rijx_S = xi_S - xj_S;
1371         SimdReal rijy_S = yi_S - yj_S;
1372         SimdReal rijz_S = zi_S - zj_S;
1373         SimdReal rkjx_S = xk_S - xj_S;
1374         SimdReal rkjy_S = yk_S - yj_S;
1375         SimdReal rkjz_S = zk_S - zj_S;
1376         SimdReal rikx_S = xi_S - xk_S;
1377         SimdReal riky_S = yi_S - yk_S;
1378         SimdReal rikz_S = zi_S - zk_S;
1379
1380         const SimdReal ktheta_S = load<SimdReal>(coeff);
1381         const SimdReal theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * DEG2RAD;
1382         const SimdReal kUB_S    = load<SimdReal>(coeff + 2 * GMX_SIMD_REAL_WIDTH);
1383         const SimdReal r13_S    = load<SimdReal>(coeff + 3 * GMX_SIMD_REAL_WIDTH);
1384
1385         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1386         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1387         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1388
1389         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1390
1391         const SimdReal dr2_S = iprod(rikx_S, riky_S, rikz_S, rikx_S, riky_S, rikz_S);
1392
1393         const SimdReal nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1394         const SimdReal nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1395
1396         const SimdReal nrij_1_S = invsqrt(nrij2_S);
1397         const SimdReal nrkj_1_S = invsqrt(nrkj2_S);
1398         const SimdReal invdr2_S = invsqrt(dr2_S);
1399         const SimdReal dr_S     = dr2_S * invdr2_S;
1400
1401         constexpr real min_one_plus_eps = -1.0 + 2.0 * GMX_REAL_EPS; // Smallest number > -1
1402
1403         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1404          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1405          * This also ensures that rounding errors would cause the argument
1406          * of simdAcos to be < -1.
1407          * Note that we do not take precautions for cos(0)=1, so the bonds
1408          * in an angle should not align at an angle of 0 degrees.
1409          */
1410         const SimdReal cos_S = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1411
1412         const SimdReal theta_S  = acos(cos_S);
1413         const SimdReal invsin_S = invsqrt(1.0 - cos_S * cos_S);
1414         const SimdReal st_S     = ktheta_S * (theta0_S - theta_S) * invsin_S;
1415         const SimdReal sth_S    = st_S * cos_S;
1416
1417         const SimdReal cik_S = st_S * nrij_1_S * nrkj_1_S;
1418         const SimdReal cii_S = sth_S * nrij_1_S * nrij_1_S;
1419         const SimdReal ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1420
1421         const SimdReal sUB_S = kUB_S * (r13_S - dr_S) * invdr2_S;
1422
1423         const SimdReal f_ikx_S = sUB_S * rikx_S;
1424         const SimdReal f_iky_S = sUB_S * riky_S;
1425         const SimdReal f_ikz_S = sUB_S * rikz_S;
1426
1427         const SimdReal f_ix_S = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1428         const SimdReal f_iy_S = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1429         const SimdReal f_iz_S = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1430         const SimdReal f_kx_S = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1431         const SimdReal f_ky_S = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1432         const SimdReal f_kz_S = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1433
1434         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1435         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1436                                  f_iz_S + f_kz_S);
1437         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1438     }
1439
1440     return 0;
1441 }
1442
1443 #endif // GMX_SIMD_HAVE_REAL
1444
1445 template<BondedKernelFlavor flavor>
1446 real quartic_angles(int             nbonds,
1447                     const t_iatom   forceatoms[],
1448                     const t_iparams forceparams[],
1449                     const rvec      x[],
1450                     rvec4           f[],
1451                     rvec            fshift[],
1452                     const t_pbc*    pbc,
1453                     real gmx_unused lambda,
1454                     real gmx_unused* dvdlambda,
1455                     const t_mdatoms gmx_unused* md,
1456                     t_fcdata gmx_unused* fcd,
1457                     int gmx_unused* global_atom_index)
1458 {
1459     int  i, j, ai, aj, ak, t1, t2, type;
1460     rvec r_ij, r_kj;
1461     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1462
1463     vtot = 0.0;
1464     for (i = 0; (i < nbonds);)
1465     {
1466         type = forceatoms[i++];
1467         ai   = forceatoms[i++];
1468         aj   = forceatoms[i++];
1469         ak   = forceatoms[i++];
1470
1471         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1472
1473         dt = theta - forceparams[type].qangle.theta * DEG2RAD; /* 2          */
1474
1475         dVdt = 0;
1476         va   = forceparams[type].qangle.c[0];
1477         dtp  = 1.0;
1478         for (j = 1; j <= 4; j++)
1479         {
1480             c = forceparams[type].qangle.c[j];
1481             dVdt -= j * c * dtp;
1482             dtp *= dt;
1483             va += c * dtp;
1484         }
1485         /* 20 */
1486
1487         vtot += va;
1488
1489         cos_theta2 = gmx::square(cos_theta); /*   1             */
1490         if (cos_theta2 < 1)
1491         {
1492             int  m;
1493             real st, sth;
1494             real cik, cii, ckk;
1495             real nrkj2, nrij2;
1496             rvec f_i, f_j, f_k;
1497
1498             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1499             sth   = st * cos_theta;                      /*   1         */
1500             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1501             nrij2 = iprod(r_ij, r_ij);
1502
1503             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1504             cii = sth / nrij2;                      /*  10              */
1505             ckk = sth / nrkj2;                      /*  10              */
1506
1507             for (m = 0; (m < DIM); m++) /*  39          */
1508             {
1509                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1510                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1511                 f_j[m] = -f_i[m] - f_k[m];
1512                 f[ai][m] += f_i[m];
1513                 f[aj][m] += f_j[m];
1514                 f[ak][m] += f_k[m];
1515             }
1516
1517             if (computeVirial(flavor))
1518             {
1519                 rvec_inc(fshift[t1], f_i);
1520                 rvec_inc(fshift[CENTRAL], f_j);
1521                 rvec_inc(fshift[t2], f_k);
1522             }
1523         } /* 153 TOTAL  */
1524     }
1525     return vtot;
1526 }
1527
1528
1529 #if GMX_SIMD_HAVE_REAL
1530
1531 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1532  * also calculates the pre-factor required for the dihedral force update.
1533  * Note that bv and buf should be register aligned.
1534  */
1535 inline void dih_angle_simd(const rvec* x,
1536                            const int*  ai,
1537                            const int*  aj,
1538                            const int*  ak,
1539                            const int*  al,
1540                            const real* pbc_simd,
1541                            SimdReal*   phi_S,
1542                            SimdReal*   mx_S,
1543                            SimdReal*   my_S,
1544                            SimdReal*   mz_S,
1545                            SimdReal*   nx_S,
1546                            SimdReal*   ny_S,
1547                            SimdReal*   nz_S,
1548                            SimdReal*   nrkj_m2_S,
1549                            SimdReal*   nrkj_n2_S,
1550                            SimdReal*   p_S,
1551                            SimdReal*   q_S)
1552 {
1553     SimdReal xi_S, yi_S, zi_S;
1554     SimdReal xj_S, yj_S, zj_S;
1555     SimdReal xk_S, yk_S, zk_S;
1556     SimdReal xl_S, yl_S, zl_S;
1557     SimdReal rijx_S, rijy_S, rijz_S;
1558     SimdReal rkjx_S, rkjy_S, rkjz_S;
1559     SimdReal rklx_S, rkly_S, rklz_S;
1560     SimdReal cx_S, cy_S, cz_S;
1561     SimdReal cn_S;
1562     SimdReal s_S;
1563     SimdReal ipr_S;
1564     SimdReal iprm_S, iprn_S;
1565     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1566     SimdReal toler_S;
1567     SimdReal nrkj2_min_S;
1568     SimdReal real_eps_S;
1569
1570     /* Used to avoid division by zero.
1571      * We take into acount that we multiply the result by real_eps_S.
1572      */
1573     nrkj2_min_S = SimdReal(GMX_REAL_MIN / (2 * GMX_REAL_EPS));
1574
1575     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1576     real_eps_S = SimdReal(2 * GMX_REAL_EPS);
1577
1578     /* Store the non PBC corrected distances packed and aligned */
1579     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1580     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1581     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1582     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), al, &xl_S, &yl_S, &zl_S);
1583     rijx_S = xi_S - xj_S;
1584     rijy_S = yi_S - yj_S;
1585     rijz_S = zi_S - zj_S;
1586     rkjx_S = xk_S - xj_S;
1587     rkjy_S = yk_S - yj_S;
1588     rkjz_S = zk_S - zj_S;
1589     rklx_S = xk_S - xl_S;
1590     rkly_S = yk_S - yl_S;
1591     rklz_S = zk_S - zl_S;
1592
1593     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1594     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1595     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1596
1597     cprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S, mx_S, my_S, mz_S);
1598
1599     cprod(rkjx_S, rkjy_S, rkjz_S, rklx_S, rkly_S, rklz_S, nx_S, ny_S, nz_S);
1600
1601     cprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S, &cx_S, &cy_S, &cz_S);
1602
1603     cn_S = sqrt(norm2(cx_S, cy_S, cz_S));
1604
1605     s_S = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1606
1607     /* Determine the dihedral angle, the sign might need correction */
1608     *phi_S = atan2(cn_S, s_S);
1609
1610     ipr_S = iprod(rijx_S, rijy_S, rijz_S, *nx_S, *ny_S, *nz_S);
1611
1612     iprm_S = norm2(*mx_S, *my_S, *mz_S);
1613     iprn_S = norm2(*nx_S, *ny_S, *nz_S);
1614
1615     nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1616
1617     /* Avoid division by zero. When zero, the result is multiplied by 0
1618      * anyhow, so the 3 max below do not affect the final result.
1619      */
1620     nrkj2_S  = max(nrkj2_S, nrkj2_min_S);
1621     nrkj_1_S = invsqrt(nrkj2_S);
1622     nrkj_2_S = nrkj_1_S * nrkj_1_S;
1623     nrkj_S   = nrkj2_S * nrkj_1_S;
1624
1625     toler_S = nrkj2_S * real_eps_S;
1626
1627     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1628      * So we take a max with the tolerance instead. Since we multiply with
1629      * m or n later, the max does not affect the results.
1630      */
1631     iprm_S     = max(iprm_S, toler_S);
1632     iprn_S     = max(iprn_S, toler_S);
1633     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1634     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1635
1636     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1637     *phi_S = copysign(*phi_S, ipr_S);
1638     *p_S   = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1639     *p_S   = *p_S * nrkj_2_S;
1640
1641     *q_S = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1642     *q_S = *q_S * nrkj_2_S;
1643 }
1644
1645 #endif // GMX_SIMD_HAVE_REAL
1646
1647 } // namespace
1648
1649 template<BondedKernelFlavor flavor>
1650 void do_dih_fup(int          i,
1651                 int          j,
1652                 int          k,
1653                 int          l,
1654                 real         ddphi,
1655                 rvec         r_ij,
1656                 rvec         r_kj,
1657                 rvec         r_kl,
1658                 rvec         m,
1659                 rvec         n,
1660                 rvec4        f[],
1661                 rvec         fshift[],
1662                 const t_pbc* pbc,
1663                 const rvec   x[],
1664                 int          t1,
1665                 int          t2,
1666                 int          t3)
1667 {
1668     /* 143 FLOPS */
1669     rvec f_i, f_j, f_k, f_l;
1670     rvec uvec, vvec, svec, dx_jl;
1671     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1672     real a, b, p, q, toler;
1673
1674     iprm  = iprod(m, m);       /*  5    */
1675     iprn  = iprod(n, n);       /*  5    */
1676     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1677     toler = nrkj2 * GMX_REAL_EPS;
1678     if ((iprm > toler) && (iprn > toler))
1679     {
1680         nrkj_1 = gmx::invsqrt(nrkj2);  /* 10    */
1681         nrkj_2 = nrkj_1 * nrkj_1;      /*  1    */
1682         nrkj   = nrkj2 * nrkj_1;       /*  1    */
1683         a      = -ddphi * nrkj / iprm; /* 11    */
1684         svmul(a, m, f_i);              /*  3    */
1685         b = ddphi * nrkj / iprn;       /* 11    */
1686         svmul(b, n, f_l);              /*  3  */
1687         p = iprod(r_ij, r_kj);         /*  5    */
1688         p *= nrkj_2;                   /*  1    */
1689         q = iprod(r_kl, r_kj);         /*  5    */
1690         q *= nrkj_2;                   /*  1    */
1691         svmul(p, f_i, uvec);           /*  3    */
1692         svmul(q, f_l, vvec);           /*  3    */
1693         rvec_sub(uvec, vvec, svec);    /*  3    */
1694         rvec_sub(f_i, svec, f_j);      /*  3    */
1695         rvec_add(f_l, svec, f_k);      /*  3    */
1696         rvec_inc(f[i], f_i);           /*  3    */
1697         rvec_dec(f[j], f_j);           /*  3    */
1698         rvec_dec(f[k], f_k);           /*  3    */
1699         rvec_inc(f[l], f_l);           /*  3    */
1700
1701         if (computeVirial(flavor))
1702         {
1703             if (pbc)
1704             {
1705                 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1706             }
1707             else
1708             {
1709                 t3 = CENTRAL;
1710             }
1711
1712             rvec_inc(fshift[t1], f_i);
1713             rvec_dec(fshift[CENTRAL], f_j);
1714             rvec_dec(fshift[t2], f_k);
1715             rvec_inc(fshift[t3], f_l);
1716         }
1717     }
1718     /* 112 TOTAL    */
1719 }
1720
1721 namespace
1722 {
1723
1724 #if GMX_SIMD_HAVE_REAL
1725 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1726 inline void gmx_simdcall do_dih_fup_noshiftf_simd(const int* ai,
1727                                                   const int* aj,
1728                                                   const int* ak,
1729                                                   const int* al,
1730                                                   SimdReal   p,
1731                                                   SimdReal   q,
1732                                                   SimdReal   f_i_x,
1733                                                   SimdReal   f_i_y,
1734                                                   SimdReal   f_i_z,
1735                                                   SimdReal   mf_l_x,
1736                                                   SimdReal   mf_l_y,
1737                                                   SimdReal   mf_l_z,
1738                                                   rvec4      f[])
1739 {
1740     SimdReal sx    = p * f_i_x + q * mf_l_x;
1741     SimdReal sy    = p * f_i_y + q * mf_l_y;
1742     SimdReal sz    = p * f_i_z + q * mf_l_z;
1743     SimdReal f_j_x = f_i_x - sx;
1744     SimdReal f_j_y = f_i_y - sy;
1745     SimdReal f_j_z = f_i_z - sz;
1746     SimdReal f_k_x = mf_l_x - sx;
1747     SimdReal f_k_y = mf_l_y - sy;
1748     SimdReal f_k_z = mf_l_z - sz;
1749     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_i_x, f_i_y, f_i_z);
1750     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_j_x, f_j_y, f_j_z);
1751     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_k_x, f_k_y, f_k_z);
1752     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), al, mf_l_x, mf_l_y, mf_l_z);
1753 }
1754 #endif // GMX_SIMD_HAVE_REAL
1755
1756 /*! \brief Computes and returns the proper dihedral force
1757  *
1758  * With the appropriate kernel flavor, also computes and accumulates
1759  * the energy and dV/dlambda.
1760  */
1761 template<BondedKernelFlavor flavor>
1762 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* dvdlambda)
1763 {
1764     const real L1   = 1.0 - lambda;
1765     const real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1766     const real dph0 = (phiB - phiA) * DEG2RAD;
1767     const real cp   = L1 * cpA + lambda * cpB;
1768
1769     const real mdphi = mult * phi - ph0;
1770     const real sdphi = std::sin(mdphi);
1771     const real ddphi = -cp * mult * sdphi;
1772     if (computeEnergy(flavor))
1773     {
1774         const real v1 = 1 + std::cos(mdphi);
1775         *V += cp * v1;
1776
1777         *dvdlambda += (cpB - cpA) * v1 + cp * dph0 * sdphi;
1778     }
1779
1780     return ddphi;
1781     /* That was 40 flops */
1782 }
1783
1784 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1785 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* F)
1786 {
1787     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1788     real L1   = 1.0 - lambda;
1789     real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1790     real dph0 = (phiB - phiA) * DEG2RAD;
1791     real cp   = L1 * cpA + lambda * cpB;
1792
1793     mdphi = mult * (phi - ph0);
1794     sdphi = std::sin(mdphi);
1795     ddphi = cp * mult * sdphi;
1796     v1    = 1.0 - std::cos(mdphi);
1797     v     = cp * v1;
1798
1799     dvdlambda = (cpB - cpA) * v1 + cp * dph0 * sdphi;
1800
1801     *V = v;
1802     *F = ddphi;
1803
1804     return dvdlambda;
1805
1806     /* That was 40 flops */
1807 }
1808
1809 template<BondedKernelFlavor flavor>
1810 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1811 pdihs(int             nbonds,
1812       const t_iatom   forceatoms[],
1813       const t_iparams forceparams[],
1814       const rvec      x[],
1815       rvec4           f[],
1816       rvec            fshift[],
1817       const t_pbc*    pbc,
1818       real            lambda,
1819       real*           dvdlambda,
1820       const t_mdatoms gmx_unused* md,
1821       t_fcdata gmx_unused* fcd,
1822       int gmx_unused* global_atom_index)
1823 {
1824     int  t1, t2, t3;
1825     rvec r_ij, r_kj, r_kl, m, n;
1826
1827     real vtot = 0.0;
1828
1829     for (int i = 0; i < nbonds;)
1830     {
1831         const int ai = forceatoms[i + 1];
1832         const int aj = forceatoms[i + 2];
1833         const int ak = forceatoms[i + 3];
1834         const int al = forceatoms[i + 4];
1835
1836         const real phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1,
1837                                    &t2, &t3); /*  84      */
1838
1839         /* Loop over dihedrals working on the same atoms,
1840          * so we avoid recalculating angles and distributing forces.
1841          */
1842         real ddphi_tot = 0;
1843         do
1844         {
1845             const int type = forceatoms[i];
1846             ddphi_tot += dopdihs<flavor>(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
1847                                          forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
1848                                          forceparams[type].pdihs.mult, phi, lambda, &vtot, dvdlambda);
1849
1850             i += 5;
1851         } while (i < nbonds && forceatoms[i + 1] == ai && forceatoms[i + 2] == aj
1852                  && forceatoms[i + 3] == ak && forceatoms[i + 4] == al);
1853
1854         do_dih_fup<flavor>(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
1855                            t2, t3); /* 112              */
1856     }                               /* 223 TOTAL  */
1857
1858     return vtot;
1859 }
1860
1861 #if GMX_SIMD_HAVE_REAL
1862
1863 /* As pdihs above, but using SIMD to calculate multiple dihedrals at once */
1864 template<BondedKernelFlavor flavor>
1865 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1866 pdihs(int             nbonds,
1867       const t_iatom   forceatoms[],
1868       const t_iparams forceparams[],
1869       const rvec      x[],
1870       rvec4           f[],
1871       rvec gmx_unused fshift[],
1872       const t_pbc*    pbc,
1873       real gmx_unused lambda,
1874       real gmx_unused* dvdlambda,
1875       const t_mdatoms gmx_unused* md,
1876       t_fcdata gmx_unused* fcd,
1877       int gmx_unused* global_atom_index)
1878 {
1879     const int                                nfa1 = 5;
1880     int                                      i, iu, s;
1881     int                                      type;
1882     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1883     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1884     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1885     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
1886     alignas(GMX_SIMD_ALIGNMENT) real         buf[3 * GMX_SIMD_REAL_WIDTH];
1887     real *                                   cp, *phi0, *mult;
1888     SimdReal                                 deg2rad_S(DEG2RAD);
1889     SimdReal                                 p_S, q_S;
1890     SimdReal                                 phi0_S, phi_S;
1891     SimdReal                                 mx_S, my_S, mz_S;
1892     SimdReal                                 nx_S, ny_S, nz_S;
1893     SimdReal                                 nrkj_m2_S, nrkj_n2_S;
1894     SimdReal                                 cp_S, mdphi_S, mult_S;
1895     SimdReal                                 sin_S, cos_S;
1896     SimdReal                                 mddphi_S;
1897     SimdReal                                 sf_i_S, msf_l_S;
1898     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1899
1900     /* Extract aligned pointer for parameters and variables */
1901     cp   = buf + 0 * GMX_SIMD_REAL_WIDTH;
1902     phi0 = buf + 1 * GMX_SIMD_REAL_WIDTH;
1903     mult = buf + 2 * GMX_SIMD_REAL_WIDTH;
1904
1905     set_pbc_simd(pbc, pbc_simd);
1906
1907     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1908     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1909     {
1910         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1911          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1912          */
1913         iu = i;
1914         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1915         {
1916             type  = forceatoms[iu];
1917             ai[s] = forceatoms[iu + 1];
1918             aj[s] = forceatoms[iu + 2];
1919             ak[s] = forceatoms[iu + 3];
1920             al[s] = forceatoms[iu + 4];
1921
1922             /* At the end fill the arrays with the last atoms and 0 params */
1923             if (i + s * nfa1 < nbonds)
1924             {
1925                 cp[s]   = forceparams[type].pdihs.cpA;
1926                 phi0[s] = forceparams[type].pdihs.phiA;
1927                 mult[s] = forceparams[type].pdihs.mult;
1928
1929                 if (iu + nfa1 < nbonds)
1930                 {
1931                     iu += nfa1;
1932                 }
1933             }
1934             else
1935             {
1936                 cp[s]   = 0;
1937                 phi0[s] = 0;
1938                 mult[s] = 0;
1939             }
1940         }
1941
1942         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
1943         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
1944                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
1945
1946         cp_S   = load<SimdReal>(cp);
1947         phi0_S = load<SimdReal>(phi0) * deg2rad_S;
1948         mult_S = load<SimdReal>(mult);
1949
1950         mdphi_S = fms(mult_S, phi_S, phi0_S);
1951
1952         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
1953         sincos(mdphi_S, &sin_S, &cos_S);
1954         mddphi_S = cp_S * mult_S * sin_S;
1955         sf_i_S   = mddphi_S * nrkj_m2_S;
1956         msf_l_S  = mddphi_S * nrkj_n2_S;
1957
1958         /* After this m?_S will contain f[i] */
1959         mx_S = sf_i_S * mx_S;
1960         my_S = sf_i_S * my_S;
1961         mz_S = sf_i_S * mz_S;
1962
1963         /* After this m?_S will contain -f[l] */
1964         nx_S = msf_l_S * nx_S;
1965         ny_S = msf_l_S * ny_S;
1966         nz_S = msf_l_S * nz_S;
1967
1968         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
1969     }
1970
1971     return 0;
1972 }
1973
1974 /* This is mostly a copy of the SIMD flavor of pdihs above, but with using
1975  * the RB potential instead of a harmonic potential.
1976  * This function can replace rbdihs() when no energy and virial are needed.
1977  */
1978 template<BondedKernelFlavor flavor>
1979 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1980 rbdihs(int             nbonds,
1981        const t_iatom   forceatoms[],
1982        const t_iparams forceparams[],
1983        const rvec      x[],
1984        rvec4           f[],
1985        rvec gmx_unused fshift[],
1986        const t_pbc*    pbc,
1987        real gmx_unused lambda,
1988        real gmx_unused* dvdlambda,
1989        const t_mdatoms gmx_unused* md,
1990        t_fcdata gmx_unused* fcd,
1991        int gmx_unused* global_atom_index)
1992 {
1993     const int                                nfa1 = 5;
1994     int                                      i, iu, s, j;
1995     int                                      type;
1996     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1997     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1998     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1999     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
2000     alignas(GMX_SIMD_ALIGNMENT) real         parm[NR_RBDIHS * GMX_SIMD_REAL_WIDTH];
2001
2002     SimdReal                         p_S, q_S;
2003     SimdReal                         phi_S;
2004     SimdReal                         ddphi_S, cosfac_S;
2005     SimdReal                         mx_S, my_S, mz_S;
2006     SimdReal                         nx_S, ny_S, nz_S;
2007     SimdReal                         nrkj_m2_S, nrkj_n2_S;
2008     SimdReal                         parm_S, c_S;
2009     SimdReal                         sin_S, cos_S;
2010     SimdReal                         sf_i_S, msf_l_S;
2011     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2012
2013     SimdReal pi_S(M_PI);
2014     SimdReal one_S(1.0);
2015
2016     set_pbc_simd(pbc, pbc_simd);
2017
2018     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2019     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2020     {
2021         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2022          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2023          */
2024         iu = i;
2025         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2026         {
2027             type  = forceatoms[iu];
2028             ai[s] = forceatoms[iu + 1];
2029             aj[s] = forceatoms[iu + 2];
2030             ak[s] = forceatoms[iu + 3];
2031             al[s] = forceatoms[iu + 4];
2032
2033             /* At the end fill the arrays with the last atoms and 0 params */
2034             if (i + s * nfa1 < nbonds)
2035             {
2036                 /* We don't need the first parameter, since that's a constant
2037                  * which only affects the energies, not the forces.
2038                  */
2039                 for (j = 1; j < NR_RBDIHS; j++)
2040                 {
2041                     parm[j * GMX_SIMD_REAL_WIDTH + s] = forceparams[type].rbdihs.rbcA[j];
2042                 }
2043
2044                 if (iu + nfa1 < nbonds)
2045                 {
2046                     iu += nfa1;
2047                 }
2048             }
2049             else
2050             {
2051                 for (j = 1; j < NR_RBDIHS; j++)
2052                 {
2053                     parm[j * GMX_SIMD_REAL_WIDTH + s] = 0;
2054                 }
2055             }
2056         }
2057
2058         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2059         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
2060                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2061
2062         /* Change to polymer convention */
2063         phi_S = phi_S - pi_S;
2064
2065         sincos(phi_S, &sin_S, &cos_S);
2066
2067         ddphi_S  = setZero();
2068         c_S      = one_S;
2069         cosfac_S = one_S;
2070         for (j = 1; j < NR_RBDIHS; j++)
2071         {
2072             parm_S   = load<SimdReal>(parm + j * GMX_SIMD_REAL_WIDTH);
2073             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2074             cosfac_S = cosfac_S * cos_S;
2075             c_S      = c_S + one_S;
2076         }
2077
2078         /* Note that here we do not use the minus sign which is present
2079          * in the normal RB code. This is corrected for through (m)sf below.
2080          */
2081         ddphi_S = ddphi_S * sin_S;
2082
2083         sf_i_S  = ddphi_S * nrkj_m2_S;
2084         msf_l_S = ddphi_S * nrkj_n2_S;
2085
2086         /* After this m?_S will contain f[i] */
2087         mx_S = sf_i_S * mx_S;
2088         my_S = sf_i_S * my_S;
2089         mz_S = sf_i_S * mz_S;
2090
2091         /* After this m?_S will contain -f[l] */
2092         nx_S = msf_l_S * nx_S;
2093         ny_S = msf_l_S * ny_S;
2094         nz_S = msf_l_S * nz_S;
2095
2096         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2097     }
2098
2099     return 0;
2100 }
2101
2102 #endif // GMX_SIMD_HAVE_REAL
2103
2104
2105 template<BondedKernelFlavor flavor>
2106 real idihs(int             nbonds,
2107            const t_iatom   forceatoms[],
2108            const t_iparams forceparams[],
2109            const rvec      x[],
2110            rvec4           f[],
2111            rvec            fshift[],
2112            const t_pbc*    pbc,
2113            real            lambda,
2114            real*           dvdlambda,
2115            const t_mdatoms gmx_unused* md,
2116            t_fcdata gmx_unused* fcd,
2117            int gmx_unused* global_atom_index)
2118 {
2119     int  i, type, ai, aj, ak, al;
2120     int  t1, t2, t3;
2121     real phi, phi0, dphi0, ddphi, vtot;
2122     rvec r_ij, r_kj, r_kl, m, n;
2123     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2124
2125     L1        = 1.0 - lambda;
2126     dvdl_term = 0;
2127     vtot      = 0.0;
2128     for (i = 0; (i < nbonds);)
2129     {
2130         type = forceatoms[i++];
2131         ai   = forceatoms[i++];
2132         aj   = forceatoms[i++];
2133         ak   = forceatoms[i++];
2134         al   = forceatoms[i++];
2135
2136         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2137
2138         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2139          * force changes if we just apply a normal harmonic.
2140          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2141          * This means we will never have the periodicity problem, unless
2142          * the dihedral is Pi away from phiO, which is very unlikely due to
2143          * the potential.
2144          */
2145         kA = forceparams[type].harmonic.krA;
2146         kB = forceparams[type].harmonic.krB;
2147         pA = forceparams[type].harmonic.rA;
2148         pB = forceparams[type].harmonic.rB;
2149
2150         kk    = L1 * kA + lambda * kB;
2151         phi0  = (L1 * pA + lambda * pB) * DEG2RAD;
2152         dphi0 = (pB - pA) * DEG2RAD;
2153
2154         dp = phi - phi0;
2155
2156         make_dp_periodic(&dp);
2157
2158         dp2 = dp * dp;
2159
2160         vtot += 0.5 * kk * dp2;
2161         ddphi = -kk * dp;
2162
2163         dvdl_term += 0.5 * (kB - kA) * dp2 - kk * dphi0 * dp;
2164
2165         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
2166                            t2, t3); /* 112              */
2167         /* 218 TOTAL    */
2168     }
2169
2170     *dvdlambda += dvdl_term;
2171     return vtot;
2172 }
2173
2174 /*! \brief Computes angle restraints of two different types */
2175 template<BondedKernelFlavor flavor>
2176 real low_angres(int             nbonds,
2177                 const t_iatom   forceatoms[],
2178                 const t_iparams forceparams[],
2179                 const rvec      x[],
2180                 rvec4           f[],
2181                 rvec            fshift[],
2182                 const t_pbc*    pbc,
2183                 real            lambda,
2184                 real*           dvdlambda,
2185                 gmx_bool        bZAxis)
2186 {
2187     int  i, m, type, ai, aj, ak, al;
2188     int  t1, t2;
2189     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2190     rvec r_ij, r_kl, f_i, f_k = { 0, 0, 0 };
2191     real st, sth, nrij2, nrkl2, c, cij, ckl;
2192
2193     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2194
2195     vtot = 0.0;
2196     ak = al = 0; /* to avoid warnings */
2197     for (i = 0; i < nbonds;)
2198     {
2199         type = forceatoms[i++];
2200         ai   = forceatoms[i++];
2201         aj   = forceatoms[i++];
2202         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /*  3             */
2203         if (!bZAxis)
2204         {
2205             ak = forceatoms[i++];
2206             al = forceatoms[i++];
2207             t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /*  3           */
2208         }
2209         else
2210         {
2211             r_kl[XX] = 0;
2212             r_kl[YY] = 0;
2213             r_kl[ZZ] = 1;
2214         }
2215
2216         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2217         phi     = std::acos(cos_phi);    /* 10           */
2218
2219         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
2220                                   forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
2221                                   forceparams[type].pdihs.mult, phi, lambda, &vid, &dVdphi); /*  40 */
2222
2223         vtot += vid;
2224
2225         cos_phi2 = gmx::square(cos_phi); /*   1         */
2226         if (cos_phi2 < 1)
2227         {
2228             st    = -dVdphi * gmx::invsqrt(1 - cos_phi2); /*  12                */
2229             sth   = st * cos_phi;                         /*   1                */
2230             nrij2 = iprod(r_ij, r_ij);                    /*   5                */
2231             nrkl2 = iprod(r_kl, r_kl);                    /*   5          */
2232
2233             c   = st * gmx::invsqrt(nrij2 * nrkl2); /*  11              */
2234             cij = sth / nrij2;                      /*  10              */
2235             ckl = sth / nrkl2;                      /*  10              */
2236
2237             for (m = 0; m < DIM; m++) /*  18+18       */
2238             {
2239                 f_i[m] = (c * r_kl[m] - cij * r_ij[m]);
2240                 f[ai][m] += f_i[m];
2241                 f[aj][m] -= f_i[m];
2242                 if (!bZAxis)
2243                 {
2244                     f_k[m] = (c * r_ij[m] - ckl * r_kl[m]);
2245                     f[ak][m] += f_k[m];
2246                     f[al][m] -= f_k[m];
2247                 }
2248             }
2249
2250             if (computeVirial(flavor))
2251             {
2252                 rvec_inc(fshift[t1], f_i);
2253                 rvec_dec(fshift[CENTRAL], f_i);
2254                 if (!bZAxis)
2255                 {
2256                     rvec_inc(fshift[t2], f_k);
2257                     rvec_dec(fshift[CENTRAL], f_k);
2258                 }
2259             }
2260         }
2261     }
2262
2263     return vtot; /*  184 / 157 (bZAxis)  total  */
2264 }
2265
2266 template<BondedKernelFlavor flavor>
2267 real angres(int             nbonds,
2268             const t_iatom   forceatoms[],
2269             const t_iparams forceparams[],
2270             const rvec      x[],
2271             rvec4           f[],
2272             rvec            fshift[],
2273             const t_pbc*    pbc,
2274             real            lambda,
2275             real*           dvdlambda,
2276             const t_mdatoms gmx_unused* md,
2277             t_fcdata gmx_unused* fcd,
2278             int gmx_unused* global_atom_index)
2279 {
2280     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, FALSE);
2281 }
2282
2283 template<BondedKernelFlavor flavor>
2284 real angresz(int             nbonds,
2285              const t_iatom   forceatoms[],
2286              const t_iparams forceparams[],
2287              const rvec      x[],
2288              rvec4           f[],
2289              rvec            fshift[],
2290              const t_pbc*    pbc,
2291              real            lambda,
2292              real*           dvdlambda,
2293              const t_mdatoms gmx_unused* md,
2294              t_fcdata gmx_unused* fcd,
2295              int gmx_unused* global_atom_index)
2296 {
2297     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, TRUE);
2298 }
2299
2300 template<BondedKernelFlavor flavor>
2301 real dihres(int             nbonds,
2302             const t_iatom   forceatoms[],
2303             const t_iparams forceparams[],
2304             const rvec      x[],
2305             rvec4           f[],
2306             rvec            fshift[],
2307             const t_pbc*    pbc,
2308             real            lambda,
2309             real*           dvdlambda,
2310             const t_mdatoms gmx_unused* md,
2311             t_fcdata gmx_unused* fcd,
2312             int gmx_unused* global_atom_index)
2313 {
2314     real vtot = 0;
2315     int  ai, aj, ak, al, i, type, t1, t2, t3;
2316     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2317     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2318     rvec r_ij, r_kj, r_kl, m, n;
2319
2320     L1 = 1.0 - lambda;
2321
2322     d2r = DEG2RAD;
2323
2324     for (i = 0; (i < nbonds);)
2325     {
2326         type = forceatoms[i++];
2327         ai   = forceatoms[i++];
2328         aj   = forceatoms[i++];
2329         ak   = forceatoms[i++];
2330         al   = forceatoms[i++];
2331
2332         phi0A = forceparams[type].dihres.phiA * d2r;
2333         dphiA = forceparams[type].dihres.dphiA * d2r;
2334         kfacA = forceparams[type].dihres.kfacA;
2335
2336         phi0B = forceparams[type].dihres.phiB * d2r;
2337         dphiB = forceparams[type].dihres.dphiB * d2r;
2338         kfacB = forceparams[type].dihres.kfacB;
2339
2340         phi0 = L1 * phi0A + lambda * phi0B;
2341         dphi = L1 * dphiA + lambda * dphiB;
2342         kfac = L1 * kfacA + lambda * kfacB;
2343
2344         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3);
2345         /* 84 flops */
2346
2347         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2348          * force changes if we just apply a normal harmonic.
2349          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2350          * This means we will never have the periodicity problem, unless
2351          * the dihedral is Pi away from phiO, which is very unlikely due to
2352          * the potential.
2353          */
2354         dp = phi - phi0;
2355         make_dp_periodic(&dp);
2356
2357         if (dp > dphi)
2358         {
2359             ddp = dp - dphi;
2360         }
2361         else if (dp < -dphi)
2362         {
2363             ddp = dp + dphi;
2364         }
2365         else
2366         {
2367             ddp = 0;
2368         }
2369
2370         if (ddp != 0.0)
2371         {
2372             ddp2 = ddp * ddp;
2373             vtot += 0.5 * kfac * ddp2;
2374             ddphi = kfac * ddp;
2375
2376             *dvdlambda += 0.5 * (kfacB - kfacA) * ddp2;
2377             /* lambda dependence from changing restraint distances */
2378             if (ddp > 0)
2379             {
2380                 *dvdlambda -= kfac * ddp * ((dphiB - dphiA) + (phi0B - phi0A));
2381             }
2382             else if (ddp < 0)
2383             {
2384                 *dvdlambda += kfac * ddp * ((dphiB - dphiA) - (phi0B - phi0A));
2385             }
2386             do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
2387                                t2, t3); /* 112          */
2388         }
2389     }
2390     return vtot;
2391 }
2392
2393
2394 real unimplemented(int gmx_unused nbonds,
2395                    const t_iatom gmx_unused forceatoms[],
2396                    const t_iparams gmx_unused forceparams[],
2397                    const rvec gmx_unused x[],
2398                    rvec4 gmx_unused f[],
2399                    rvec gmx_unused fshift[],
2400                    const t_pbc gmx_unused* pbc,
2401                    real gmx_unused lambda,
2402                    real gmx_unused* dvdlambda,
2403                    const t_mdatoms gmx_unused* md,
2404                    t_fcdata gmx_unused* fcd,
2405                    int gmx_unused* global_atom_index)
2406 {
2407     gmx_impl("*** you are using a not implemented function");
2408 }
2409
2410 template<BondedKernelFlavor flavor>
2411 real restrangles(int             nbonds,
2412                  const t_iatom   forceatoms[],
2413                  const t_iparams forceparams[],
2414                  const rvec      x[],
2415                  rvec4           f[],
2416                  rvec            fshift[],
2417                  const t_pbc*    pbc,
2418                  real gmx_unused lambda,
2419                  real gmx_unused* dvdlambda,
2420                  const t_mdatoms gmx_unused* md,
2421                  t_fcdata gmx_unused* fcd,
2422                  int gmx_unused* global_atom_index)
2423 {
2424     int    i, d, ai, aj, ak, type, m;
2425     int    t1, t2;
2426     real   v, vtot;
2427     rvec   f_i, f_j, f_k;
2428     double prefactor, ratio_ante, ratio_post;
2429     rvec   delta_ante, delta_post, vec_temp;
2430
2431     vtot = 0.0;
2432     for (i = 0; (i < nbonds);)
2433     {
2434         type = forceatoms[i++];
2435         ai   = forceatoms[i++];
2436         aj   = forceatoms[i++];
2437         ak   = forceatoms[i++];
2438
2439         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2440         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2441         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2442
2443
2444         /* This function computes factors needed for restricted angle potential.
2445          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2446          * when three particles align and the dihedral angle and dihedral potential
2447          * cannot be calculated. This potential is calculated using the formula:
2448            real restrangles(int nbonds,
2449             const t_iatom forceatoms[],const t_iparams forceparams[],
2450             const rvec x[],rvec4 f[],rvec fshift[],
2451             const t_pbc *pbc,
2452             real gmx_unused lambda,real gmx_unused *dvdlambda,
2453             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2454             int gmx_unused *global_atom_index)
2455            {
2456            int  i, d, ai, aj, ak, type, m;
2457            int t1, t2;
2458            rvec r_ij,r_kj;
2459            real v, vtot;
2460            rvec f_i, f_j, f_k;
2461            real prefactor, ratio_ante, ratio_post;
2462            rvec delta_ante, delta_post, vec_temp;
2463
2464            vtot = 0.0;
2465            for(i=0; (i<nbonds); )
2466            {
2467            type = forceatoms[i++];
2468            ai   = forceatoms[i++];
2469            aj   = forceatoms[i++];
2470            ak   = forceatoms[i++];
2471
2472          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2473          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2474          * For more explanations see comments file "restcbt.h". */
2475
2476         compute_factors_restangles(type, forceparams, delta_ante, delta_post, &prefactor,
2477                                    &ratio_ante, &ratio_post, &v);
2478
2479         /*   Forces are computed per component */
2480         for (d = 0; d < DIM; d++)
2481         {
2482             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2483             f_j[d] = prefactor
2484                      * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2485             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2486         }
2487
2488         /*   Computation of potential energy   */
2489
2490         vtot += v;
2491
2492         /*   Update forces */
2493
2494         for (m = 0; (m < DIM); m++)
2495         {
2496             f[ai][m] += f_i[m];
2497             f[aj][m] += f_j[m];
2498             f[ak][m] += f_k[m];
2499         }
2500
2501         if (computeVirial(flavor))
2502         {
2503             rvec_inc(fshift[t1], f_i);
2504             rvec_inc(fshift[CENTRAL], f_j);
2505             rvec_inc(fshift[t2], f_k);
2506         }
2507     }
2508     return vtot;
2509 }
2510
2511
2512 template<BondedKernelFlavor flavor>
2513 real restrdihs(int             nbonds,
2514                const t_iatom   forceatoms[],
2515                const t_iparams forceparams[],
2516                const rvec      x[],
2517                rvec4           f[],
2518                rvec            fshift[],
2519                const t_pbc*    pbc,
2520                real gmx_unused lambda,
2521                real gmx_unused* dvlambda,
2522                const t_mdatoms gmx_unused* md,
2523                t_fcdata gmx_unused* fcd,
2524                int gmx_unused* global_atom_index)
2525 {
2526     int  i, d, type, ai, aj, ak, al;
2527     rvec f_i, f_j, f_k, f_l;
2528     rvec dx_jl;
2529     int  t1, t2, t3;
2530     real v, vtot;
2531     rvec delta_ante, delta_crnt, delta_post, vec_temp;
2532     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2533     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2534     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2535     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2536     real prefactor_phi;
2537
2538
2539     vtot = 0.0;
2540     for (i = 0; (i < nbonds);)
2541     {
2542         type = forceatoms[i++];
2543         ai   = forceatoms[i++];
2544         aj   = forceatoms[i++];
2545         ak   = forceatoms[i++];
2546         al   = forceatoms[i++];
2547
2548         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2549         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2550         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2551         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2552         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2553
2554         /* This function computes factors needed for restricted angle potential.
2555          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2556          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2557          * This potential is calculated using the formula:
2558          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2559          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2560          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2561          * For more explanations see comments file "restcbt.h" */
2562
2563         compute_factors_restrdihs(
2564                 type, forceparams, delta_ante, delta_crnt, delta_post, &factor_phi_ai_ante,
2565                 &factor_phi_ai_crnt, &factor_phi_ai_post, &factor_phi_aj_ante, &factor_phi_aj_crnt,
2566                 &factor_phi_aj_post, &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2567                 &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post, &prefactor_phi, &v);
2568
2569
2570         /*      Computation of forces per component */
2571         for (d = 0; d < DIM; d++)
2572         {
2573             f_i[d] = prefactor_phi
2574                      * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d]
2575                         + factor_phi_ai_post * delta_post[d]);
2576             f_j[d] = prefactor_phi
2577                      * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d]
2578                         + factor_phi_aj_post * delta_post[d]);
2579             f_k[d] = prefactor_phi
2580                      * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d]
2581                         + factor_phi_ak_post * delta_post[d]);
2582             f_l[d] = prefactor_phi
2583                      * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d]
2584                         + factor_phi_al_post * delta_post[d]);
2585         }
2586         /*      Computation of the energy */
2587
2588         vtot += v;
2589
2590
2591         /*    Updating the forces */
2592
2593         rvec_inc(f[ai], f_i);
2594         rvec_inc(f[aj], f_j);
2595         rvec_inc(f[ak], f_k);
2596         rvec_inc(f[al], f_l);
2597
2598         if (computeVirial(flavor))
2599         {
2600             if (pbc)
2601             {
2602                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2603             }
2604             else
2605             {
2606                 t3 = CENTRAL;
2607             }
2608
2609             rvec_inc(fshift[t1], f_i);
2610             rvec_inc(fshift[CENTRAL], f_j);
2611             rvec_inc(fshift[t2], f_k);
2612             rvec_inc(fshift[t3], f_l);
2613         }
2614     }
2615
2616     return vtot;
2617 }
2618
2619
2620 template<BondedKernelFlavor flavor>
2621 real cbtdihs(int             nbonds,
2622              const t_iatom   forceatoms[],
2623              const t_iparams forceparams[],
2624              const rvec      x[],
2625              rvec4           f[],
2626              rvec            fshift[],
2627              const t_pbc*    pbc,
2628              real gmx_unused lambda,
2629              real gmx_unused* dvdlambda,
2630              const t_mdatoms gmx_unused* md,
2631              t_fcdata gmx_unused* fcd,
2632              int gmx_unused* global_atom_index)
2633 {
2634     int  type, ai, aj, ak, al, i, d;
2635     int  t1, t2, t3;
2636     real v, vtot;
2637     rvec vec_temp;
2638     rvec f_i, f_j, f_k, f_l;
2639     rvec dx_jl;
2640     rvec delta_ante, delta_crnt, delta_post;
2641     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2642     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2643     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2644
2645
2646     vtot = 0.0;
2647     for (i = 0; (i < nbonds);)
2648     {
2649         type = forceatoms[i++];
2650         ai   = forceatoms[i++];
2651         aj   = forceatoms[i++];
2652         ak   = forceatoms[i++];
2653         al   = forceatoms[i++];
2654
2655
2656         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2657         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2658         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2659         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2660         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2661         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2662
2663         /* \brief Compute factors for CBT potential
2664          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2665          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2666          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2667          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2668          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2669          * It contains in its expression not only the dihedral angle \f$\phi\f$
2670          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2671          * --- the adjacent bending angles.
2672          * For more explanations see comments file "restcbt.h". */
2673
2674         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post, f_phi_ai,
2675                                 f_phi_aj, f_phi_ak, f_phi_al, f_theta_ante_ai, f_theta_ante_aj,
2676                                 f_theta_ante_ak, f_theta_post_aj, f_theta_post_ak, f_theta_post_al, &v);
2677
2678
2679         /*      Acumulate the resuts per beads */
2680         for (d = 0; d < DIM; d++)
2681         {
2682             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2683             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2684             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2685             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2686         }
2687
2688         /*      Compute the potential energy */
2689
2690         vtot += v;
2691
2692
2693         /*  Updating the forces */
2694         rvec_inc(f[ai], f_i);
2695         rvec_inc(f[aj], f_j);
2696         rvec_inc(f[ak], f_k);
2697         rvec_inc(f[al], f_l);
2698
2699
2700         if (computeVirial(flavor))
2701         {
2702             if (pbc)
2703             {
2704                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2705             }
2706             else
2707             {
2708                 t3 = CENTRAL;
2709             }
2710
2711             rvec_inc(fshift[t1], f_i);
2712             rvec_inc(fshift[CENTRAL], f_j);
2713             rvec_inc(fshift[t2], f_k);
2714             rvec_inc(fshift[t3], f_l);
2715         }
2716     }
2717
2718     return vtot;
2719 }
2720
2721 template<BondedKernelFlavor flavor>
2722 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2723 rbdihs(int             nbonds,
2724        const t_iatom   forceatoms[],
2725        const t_iparams forceparams[],
2726        const rvec      x[],
2727        rvec4           f[],
2728        rvec            fshift[],
2729        const t_pbc*    pbc,
2730        real            lambda,
2731        real*           dvdlambda,
2732        const t_mdatoms gmx_unused* md,
2733        t_fcdata gmx_unused* fcd,
2734        int gmx_unused* global_atom_index)
2735 {
2736     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2737     int        type, ai, aj, ak, al, i, j;
2738     int        t1, t2, t3;
2739     rvec       r_ij, r_kj, r_kl, m, n;
2740     real       parmA[NR_RBDIHS];
2741     real       parmB[NR_RBDIHS];
2742     real       parm[NR_RBDIHS];
2743     real       cos_phi, phi, rbp, rbpBA;
2744     real       v, ddphi, sin_phi;
2745     real       cosfac, vtot;
2746     real       L1        = 1.0 - lambda;
2747     real       dvdl_term = 0;
2748
2749     vtot = 0.0;
2750     for (i = 0; (i < nbonds);)
2751     {
2752         type = forceatoms[i++];
2753         ai   = forceatoms[i++];
2754         aj   = forceatoms[i++];
2755         ak   = forceatoms[i++];
2756         al   = forceatoms[i++];
2757
2758         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2759
2760         /* Change to polymer convention */
2761         if (phi < c0)
2762         {
2763             phi += M_PI;
2764         }
2765         else
2766         {
2767             phi -= M_PI; /*   1         */
2768         }
2769         cos_phi = std::cos(phi);
2770         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2771         sin_phi = std::sin(phi);
2772
2773         for (j = 0; (j < NR_RBDIHS); j++)
2774         {
2775             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2776             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2777             parm[j]  = L1 * parmA[j] + lambda * parmB[j];
2778         }
2779         /* Calculate cosine powers */
2780         /* Calculate the energy */
2781         /* Calculate the derivative */
2782
2783         v = parm[0];
2784         dvdl_term += (parmB[0] - parmA[0]);
2785         ddphi  = c0;
2786         cosfac = c1;
2787
2788         rbp   = parm[1];
2789         rbpBA = parmB[1] - parmA[1];
2790         ddphi += rbp * cosfac;
2791         cosfac *= cos_phi;
2792         v += cosfac * rbp;
2793         dvdl_term += cosfac * rbpBA;
2794         rbp   = parm[2];
2795         rbpBA = parmB[2] - parmA[2];
2796         ddphi += c2 * rbp * cosfac;
2797         cosfac *= cos_phi;
2798         v += cosfac * rbp;
2799         dvdl_term += cosfac * rbpBA;
2800         rbp   = parm[3];
2801         rbpBA = parmB[3] - parmA[3];
2802         ddphi += c3 * rbp * cosfac;
2803         cosfac *= cos_phi;
2804         v += cosfac * rbp;
2805         dvdl_term += cosfac * rbpBA;
2806         rbp   = parm[4];
2807         rbpBA = parmB[4] - parmA[4];
2808         ddphi += c4 * rbp * cosfac;
2809         cosfac *= cos_phi;
2810         v += cosfac * rbp;
2811         dvdl_term += cosfac * rbpBA;
2812         rbp   = parm[5];
2813         rbpBA = parmB[5] - parmA[5];
2814         ddphi += c5 * rbp * cosfac;
2815         cosfac *= cos_phi;
2816         v += cosfac * rbp;
2817         dvdl_term += cosfac * rbpBA;
2818
2819         ddphi = -ddphi * sin_phi; /*  11                */
2820
2821         do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2,
2822                            t3); /* 112          */
2823         vtot += v;
2824     }
2825     *dvdlambda += dvdl_term;
2826
2827     return vtot;
2828 }
2829
2830 //! \endcond
2831
2832 /*! \brief Mysterious undocumented function */
2833 int cmap_setup_grid_index(int ip, int grid_spacing, int* ipm1, int* ipp1, int* ipp2)
2834 {
2835     int im1, ip1, ip2;
2836
2837     if (ip < 0)
2838     {
2839         ip = ip + grid_spacing - 1;
2840     }
2841     else if (ip > grid_spacing)
2842     {
2843         ip = ip - grid_spacing - 1;
2844     }
2845
2846     im1 = ip - 1;
2847     ip1 = ip + 1;
2848     ip2 = ip + 2;
2849
2850     if (ip == 0)
2851     {
2852         im1 = grid_spacing - 1;
2853     }
2854     else if (ip == grid_spacing - 2)
2855     {
2856         ip2 = 0;
2857     }
2858     else if (ip == grid_spacing - 1)
2859     {
2860         ip1 = 0;
2861         ip2 = 1;
2862     }
2863
2864     *ipm1 = im1;
2865     *ipp1 = ip1;
2866     *ipp2 = ip2;
2867
2868     return ip;
2869 }
2870
2871 } // namespace
2872
2873 real cmap_dihs(int                 nbonds,
2874                const t_iatom       forceatoms[],
2875                const t_iparams     forceparams[],
2876                const gmx_cmap_t*   cmap_grid,
2877                const rvec          x[],
2878                rvec4               f[],
2879                rvec                fshift[],
2880                const struct t_pbc* pbc,
2881                real gmx_unused lambda,
2882                real gmx_unused* dvdlambda,
2883                const t_mdatoms gmx_unused* md,
2884                t_fcdata gmx_unused* fcd,
2885                int gmx_unused* global_atom_index)
2886 {
2887     int i, n;
2888     int ai, aj, ak, al, am;
2889     int a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2890     int type;
2891     int t11, t21, t31, t12, t22, t32;
2892     int iphi1, ip1m1, ip1p1, ip1p2;
2893     int iphi2, ip2m1, ip2p1, ip2p2;
2894     int l1, l2, l3;
2895     int pos1, pos2, pos3, pos4;
2896
2897     real ty[4], ty1[4], ty2[4], ty12[4], tx[16];
2898     real phi1, cos_phi1, sin_phi1, xphi1;
2899     real phi2, cos_phi2, sin_phi2, xphi2;
2900     real dx, tt, tu, e, df1, df2, vtot;
2901     real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2902     real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2903     real fg1, hg1, fga1, hgb1, gaa1, gbb1;
2904     real fg2, hg2, fga2, hgb2, gaa2, gbb2;
2905     real fac;
2906
2907     rvec r1_ij, r1_kj, r1_kl, m1, n1;
2908     rvec r2_ij, r2_kj, r2_kl, m2, n2;
2909     rvec f1_i, f1_j, f1_k, f1_l;
2910     rvec f2_i, f2_j, f2_k, f2_l;
2911     rvec a1, b1, a2, b2;
2912     rvec f1, g1, h1, f2, g2, h2;
2913     rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2914
2915     int loop_index[4][4] = { { 0, 4, 8, 12 }, { 1, 5, 9, 13 }, { 2, 6, 10, 14 }, { 3, 7, 11, 15 } };
2916
2917     /* Total CMAP energy */
2918     vtot = 0;
2919
2920     for (n = 0; n < nbonds;)
2921     {
2922         /* Five atoms are involved in the two torsions */
2923         type = forceatoms[n++];
2924         ai   = forceatoms[n++];
2925         aj   = forceatoms[n++];
2926         ak   = forceatoms[n++];
2927         al   = forceatoms[n++];
2928         am   = forceatoms[n++];
2929
2930         /* Which CMAP type is this */
2931         const int   cmapA = forceparams[type].cmap.cmapA;
2932         const real* cmapd = cmap_grid->cmapdata[cmapA].cmap.data();
2933
2934         /* First torsion */
2935         a1i = ai;
2936         a1j = aj;
2937         a1k = ak;
2938         a1l = al;
2939
2940         phi1 = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1, &t11,
2941                          &t21, &t31); /* 84 */
2942
2943         cos_phi1 = std::cos(phi1);
2944
2945         a1[0] = r1_ij[1] * r1_kj[2] - r1_ij[2] * r1_kj[1];
2946         a1[1] = r1_ij[2] * r1_kj[0] - r1_ij[0] * r1_kj[2];
2947         a1[2] = r1_ij[0] * r1_kj[1] - r1_ij[1] * r1_kj[0]; /* 9 */
2948
2949         b1[0] = r1_kl[1] * r1_kj[2] - r1_kl[2] * r1_kj[1];
2950         b1[1] = r1_kl[2] * r1_kj[0] - r1_kl[0] * r1_kj[2];
2951         b1[2] = r1_kl[0] * r1_kj[1] - r1_kl[1] * r1_kj[0]; /* 9 */
2952
2953         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2954
2955         ra21 = iprod(a1, a1);       /* 5 */
2956         rb21 = iprod(b1, b1);       /* 5 */
2957         rg21 = iprod(r1_kj, r1_kj); /* 5 */
2958         rg1  = sqrt(rg21);
2959
2960         rgr1  = 1.0 / rg1;
2961         ra2r1 = 1.0 / ra21;
2962         rb2r1 = 1.0 / rb21;
2963         rabr1 = sqrt(ra2r1 * rb2r1);
2964
2965         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2966
2967         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2968         {
2969             phi1 = std::asin(sin_phi1);
2970
2971             if (cos_phi1 < 0)
2972             {
2973                 if (phi1 > 0)
2974                 {
2975                     phi1 = M_PI - phi1;
2976                 }
2977                 else
2978                 {
2979                     phi1 = -M_PI - phi1;
2980                 }
2981             }
2982         }
2983         else
2984         {
2985             phi1 = std::acos(cos_phi1);
2986
2987             if (sin_phi1 < 0)
2988             {
2989                 phi1 = -phi1;
2990             }
2991         }
2992
2993         xphi1 = phi1 + M_PI; /* 1 */
2994
2995         /* Second torsion */
2996         a2i = aj;
2997         a2j = ak;
2998         a2k = al;
2999         a2l = am;
3000
3001         phi2 = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2, &t12,
3002                          &t22, &t32); /* 84 */
3003
3004         cos_phi2 = std::cos(phi2);
3005
3006         a2[0] = r2_ij[1] * r2_kj[2] - r2_ij[2] * r2_kj[1];
3007         a2[1] = r2_ij[2] * r2_kj[0] - r2_ij[0] * r2_kj[2];
3008         a2[2] = r2_ij[0] * r2_kj[1] - r2_ij[1] * r2_kj[0]; /* 9 */
3009
3010         b2[0] = r2_kl[1] * r2_kj[2] - r2_kl[2] * r2_kj[1];
3011         b2[1] = r2_kl[2] * r2_kj[0] - r2_kl[0] * r2_kj[2];
3012         b2[2] = r2_kl[0] * r2_kj[1] - r2_kl[1] * r2_kj[0]; /* 9 */
3013
3014         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3015
3016         ra22 = iprod(a2, a2);       /* 5 */
3017         rb22 = iprod(b2, b2);       /* 5 */
3018         rg22 = iprod(r2_kj, r2_kj); /* 5 */
3019         rg2  = sqrt(rg22);
3020
3021         rgr2  = 1.0 / rg2;
3022         ra2r2 = 1.0 / ra22;
3023         rb2r2 = 1.0 / rb22;
3024         rabr2 = sqrt(ra2r2 * rb2r2);
3025
3026         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3027
3028         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3029         {
3030             phi2 = std::asin(sin_phi2);
3031
3032             if (cos_phi2 < 0)
3033             {
3034                 if (phi2 > 0)
3035                 {
3036                     phi2 = M_PI - phi2;
3037                 }
3038                 else
3039                 {
3040                     phi2 = -M_PI - phi2;
3041                 }
3042             }
3043         }
3044         else
3045         {
3046             phi2 = std::acos(cos_phi2);
3047
3048             if (sin_phi2 < 0)
3049             {
3050                 phi2 = -phi2;
3051             }
3052         }
3053
3054         xphi2 = phi2 + M_PI; /* 1 */
3055
3056         /* Range mangling */
3057         if (xphi1 < 0)
3058         {
3059             xphi1 = xphi1 + 2 * M_PI;
3060         }
3061         else if (xphi1 >= 2 * M_PI)
3062         {
3063             xphi1 = xphi1 - 2 * M_PI;
3064         }
3065
3066         if (xphi2 < 0)
3067         {
3068             xphi2 = xphi2 + 2 * M_PI;
3069         }
3070         else if (xphi2 >= 2 * M_PI)
3071         {
3072             xphi2 = xphi2 - 2 * M_PI;
3073         }
3074
3075         /* Number of grid points */
3076         dx = 2 * M_PI / cmap_grid->grid_spacing;
3077
3078         /* Where on the grid are we */
3079         iphi1 = static_cast<int>(xphi1 / dx);
3080         iphi2 = static_cast<int>(xphi2 / dx);
3081
3082         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3083         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3084
3085         pos1 = iphi1 * cmap_grid->grid_spacing + iphi2;
3086         pos2 = ip1p1 * cmap_grid->grid_spacing + iphi2;
3087         pos3 = ip1p1 * cmap_grid->grid_spacing + ip2p1;
3088         pos4 = iphi1 * cmap_grid->grid_spacing + ip2p1;
3089
3090         ty[0] = cmapd[pos1 * 4];
3091         ty[1] = cmapd[pos2 * 4];
3092         ty[2] = cmapd[pos3 * 4];
3093         ty[3] = cmapd[pos4 * 4];
3094
3095         ty1[0] = cmapd[pos1 * 4 + 1];
3096         ty1[1] = cmapd[pos2 * 4 + 1];
3097         ty1[2] = cmapd[pos3 * 4 + 1];
3098         ty1[3] = cmapd[pos4 * 4 + 1];
3099
3100         ty2[0] = cmapd[pos1 * 4 + 2];
3101         ty2[1] = cmapd[pos2 * 4 + 2];
3102         ty2[2] = cmapd[pos3 * 4 + 2];
3103         ty2[3] = cmapd[pos4 * 4 + 2];
3104
3105         ty12[0] = cmapd[pos1 * 4 + 3];
3106         ty12[1] = cmapd[pos2 * 4 + 3];
3107         ty12[2] = cmapd[pos3 * 4 + 3];
3108         ty12[3] = cmapd[pos4 * 4 + 3];
3109
3110         /* Switch to degrees */
3111         dx    = 360.0 / cmap_grid->grid_spacing;
3112         xphi1 = xphi1 * RAD2DEG;
3113         xphi2 = xphi2 * RAD2DEG;
3114
3115         for (i = 0; i < 4; i++) /* 16 */
3116         {
3117             tx[i]      = ty[i];
3118             tx[i + 4]  = ty1[i] * dx;
3119             tx[i + 8]  = ty2[i] * dx;
3120             tx[i + 12] = ty12[i] * dx * dx;
3121         }
3122
3123         real tc[16] = { 0 };
3124         for (int idx = 0; idx < 16; idx++) /* 1056 */
3125         {
3126             for (int k = 0; k < 16; k++)
3127             {
3128                 tc[idx] += cmap_coeff_matrix[k * 16 + idx] * tx[k];
3129             }
3130         }
3131
3132         tt = (xphi1 - iphi1 * dx) / dx;
3133         tu = (xphi2 - iphi2 * dx) / dx;
3134
3135         e   = 0;
3136         df1 = 0;
3137         df2 = 0;
3138
3139         for (i = 3; i >= 0; i--)
3140         {
3141             l1 = loop_index[i][3];
3142             l2 = loop_index[i][2];
3143             l3 = loop_index[i][1];
3144
3145             e = tt * e + ((tc[i * 4 + 3] * tu + tc[i * 4 + 2]) * tu + tc[i * 4 + 1]) * tu + tc[i * 4];
3146             df1 = tu * df1 + (3.0 * tc[l1] * tt + 2.0 * tc[l2]) * tt + tc[l3];
3147             df2 = tt * df2 + (3.0 * tc[i * 4 + 3] * tu + 2.0 * tc[i * 4 + 2]) * tu + tc[i * 4 + 1];
3148         }
3149
3150         fac = RAD2DEG / dx;
3151         df1 = df1 * fac;
3152         df2 = df2 * fac;
3153
3154         /* CMAP energy */
3155         vtot += e;
3156
3157         /* Do forces - first torsion */
3158         fg1  = iprod(r1_ij, r1_kj);
3159         hg1  = iprod(r1_kl, r1_kj);
3160         fga1 = fg1 * ra2r1 * rgr1;
3161         hgb1 = hg1 * rb2r1 * rgr1;
3162         gaa1 = -ra2r1 * rg1;
3163         gbb1 = rb2r1 * rg1;
3164
3165         for (i = 0; i < DIM; i++)
3166         {
3167             dtf1[i] = gaa1 * a1[i];
3168             dtg1[i] = fga1 * a1[i] - hgb1 * b1[i];
3169             dth1[i] = gbb1 * b1[i];
3170
3171             f1[i] = df1 * dtf1[i];
3172             g1[i] = df1 * dtg1[i];
3173             h1[i] = df1 * dth1[i];
3174
3175             f1_i[i] = f1[i];
3176             f1_j[i] = -f1[i] - g1[i];
3177             f1_k[i] = h1[i] + g1[i];
3178             f1_l[i] = -h1[i];
3179
3180             f[a1i][i] = f[a1i][i] + f1_i[i];
3181             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3182             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3183             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3184         }
3185
3186         /* Do forces - second torsion */
3187         fg2  = iprod(r2_ij, r2_kj);
3188         hg2  = iprod(r2_kl, r2_kj);
3189         fga2 = fg2 * ra2r2 * rgr2;
3190         hgb2 = hg2 * rb2r2 * rgr2;
3191         gaa2 = -ra2r2 * rg2;
3192         gbb2 = rb2r2 * rg2;
3193
3194         for (i = 0; i < DIM; i++)
3195         {
3196             dtf2[i] = gaa2 * a2[i];
3197             dtg2[i] = fga2 * a2[i] - hgb2 * b2[i];
3198             dth2[i] = gbb2 * b2[i];
3199
3200             f2[i] = df2 * dtf2[i];
3201             g2[i] = df2 * dtg2[i];
3202             h2[i] = df2 * dth2[i];
3203
3204             f2_i[i] = f2[i];
3205             f2_j[i] = -f2[i] - g2[i];
3206             f2_k[i] = h2[i] + g2[i];
3207             f2_l[i] = -h2[i];
3208
3209             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3210             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3211             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3212             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3213         }
3214
3215         /* Shift forces */
3216         if (fshift != nullptr)
3217         {
3218             if (pbc)
3219             {
3220                 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3221                 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3222             }
3223             else
3224             {
3225                 t31 = CENTRAL;
3226                 t32 = CENTRAL;
3227             }
3228
3229             rvec_inc(fshift[t11], f1_i);
3230             rvec_inc(fshift[CENTRAL], f1_j);
3231             rvec_inc(fshift[t21], f1_k);
3232             rvec_inc(fshift[t31], f1_l);
3233
3234             rvec_inc(fshift[t12], f2_i);
3235             rvec_inc(fshift[CENTRAL], f2_j);
3236             rvec_inc(fshift[t22], f2_k);
3237             rvec_inc(fshift[t32], f2_l);
3238         }
3239     }
3240     return vtot;
3241 }
3242
3243 namespace
3244 {
3245
3246 //! \cond
3247 /***********************************************************
3248  *
3249  *   G R O M O S  9 6   F U N C T I O N S
3250  *
3251  ***********************************************************/
3252 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
3253 {
3254     const real half = 0.5;
3255     real       L1, kk, x0, dx, dx2;
3256     real       v, f, dvdlambda;
3257
3258     L1 = 1.0 - lambda;
3259     kk = L1 * kA + lambda * kB;
3260     x0 = L1 * xA + lambda * xB;
3261
3262     dx  = x - x0;
3263     dx2 = dx * dx;
3264
3265     f         = -kk * dx;
3266     v         = half * kk * dx2;
3267     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
3268
3269     *F = f;
3270     *V = v;
3271
3272     return dvdlambda;
3273
3274     /* That was 21 flops */
3275 }
3276
3277 template<BondedKernelFlavor flavor>
3278 real g96bonds(int             nbonds,
3279               const t_iatom   forceatoms[],
3280               const t_iparams forceparams[],
3281               const rvec      x[],
3282               rvec4           f[],
3283               rvec            fshift[],
3284               const t_pbc*    pbc,
3285               real            lambda,
3286               real*           dvdlambda,
3287               const t_mdatoms gmx_unused* md,
3288               t_fcdata gmx_unused* fcd,
3289               int gmx_unused* global_atom_index)
3290 {
3291     int  i, ki, ai, aj, type;
3292     real dr2, fbond, vbond, vtot;
3293     rvec dx;
3294
3295     vtot = 0.0;
3296     for (i = 0; (i < nbonds);)
3297     {
3298         type = forceatoms[i++];
3299         ai   = forceatoms[i++];
3300         aj   = forceatoms[i++];
3301
3302         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3303         dr2 = iprod(dx, dx);                       /*   5               */
3304
3305         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3306                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr2,
3307                                   lambda, &vbond, &fbond);
3308
3309         vtot += 0.5 * vbond; /* 1*/
3310
3311         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3312     }                                                               /* 44 TOTAL */
3313     return vtot;
3314 }
3315
3316 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc* pbc, rvec r_ij, rvec r_kj, int* t1, int* t2)
3317 /* Return value is the angle between the bonds i-j and j-k */
3318 {
3319     real costh;
3320
3321     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3322     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3323
3324     costh = cos_angle(r_ij, r_kj); /* 25                */
3325     /* 41 TOTAL */
3326     return costh;
3327 }
3328
3329 template<BondedKernelFlavor flavor>
3330 real g96angles(int             nbonds,
3331                const t_iatom   forceatoms[],
3332                const t_iparams forceparams[],
3333                const rvec      x[],
3334                rvec4           f[],
3335                rvec            fshift[],
3336                const t_pbc*    pbc,
3337                real            lambda,
3338                real*           dvdlambda,
3339                const t_mdatoms gmx_unused* md,
3340                t_fcdata gmx_unused* fcd,
3341                int gmx_unused* global_atom_index)
3342 {
3343     int  i, ai, aj, ak, type, m, t1, t2;
3344     rvec r_ij, r_kj;
3345     real cos_theta, dVdt, va, vtot;
3346     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3347     rvec f_i, f_j, f_k;
3348
3349     vtot = 0.0;
3350     for (i = 0; (i < nbonds);)
3351     {
3352         type = forceatoms[i++];
3353         ai   = forceatoms[i++];
3354         aj   = forceatoms[i++];
3355         ak   = forceatoms[i++];
3356
3357         cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3358
3359         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3360                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB,
3361                                   cos_theta, lambda, &va, &dVdt);
3362         vtot += va;
3363
3364         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3365         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3366         rij_2    = rij_1 * rij_1;
3367         rkj_2    = rkj_1 * rkj_1;
3368         rijrkj_1 = rij_1 * rkj_1; /* 23 */
3369
3370         for (m = 0; (m < DIM); m++) /*  42      */
3371         {
3372             f_i[m] = dVdt * (r_kj[m] * rijrkj_1 - r_ij[m] * rij_2 * cos_theta);
3373             f_k[m] = dVdt * (r_ij[m] * rijrkj_1 - r_kj[m] * rkj_2 * cos_theta);
3374             f_j[m] = -f_i[m] - f_k[m];
3375             f[ai][m] += f_i[m];
3376             f[aj][m] += f_j[m];
3377             f[ak][m] += f_k[m];
3378         }
3379
3380         if (computeVirial(flavor))
3381         {
3382             rvec_inc(fshift[t1], f_i);
3383             rvec_inc(fshift[CENTRAL], f_j);
3384             rvec_inc(fshift[t2], f_k); /* 9 */
3385         }
3386         /* 163 TOTAL    */
3387     }
3388     return vtot;
3389 }
3390
3391 template<BondedKernelFlavor flavor>
3392 real cross_bond_bond(int             nbonds,
3393                      const t_iatom   forceatoms[],
3394                      const t_iparams forceparams[],
3395                      const rvec      x[],
3396                      rvec4           f[],
3397                      rvec            fshift[],
3398                      const t_pbc*    pbc,
3399                      real gmx_unused lambda,
3400                      real gmx_unused* dvdlambda,
3401                      const t_mdatoms gmx_unused* md,
3402                      t_fcdata gmx_unused* fcd,
3403                      int gmx_unused* global_atom_index)
3404 {
3405     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3406      * pp. 842-847
3407      */
3408     int  i, ai, aj, ak, type, m, t1, t2;
3409     rvec r_ij, r_kj;
3410     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3411     rvec f_i, f_j, f_k;
3412
3413     vtot = 0.0;
3414     for (i = 0; (i < nbonds);)
3415     {
3416         type = forceatoms[i++];
3417         ai   = forceatoms[i++];
3418         aj   = forceatoms[i++];
3419         ak   = forceatoms[i++];
3420         r1e  = forceparams[type].cross_bb.r1e;
3421         r2e  = forceparams[type].cross_bb.r2e;
3422         krr  = forceparams[type].cross_bb.krr;
3423
3424         /* Compute distance vectors ... */
3425         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3426         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3427
3428         /* ... and their lengths */
3429         r1 = norm(r_ij);
3430         r2 = norm(r_kj);
3431
3432         /* Deviations from ideality */
3433         s1 = r1 - r1e;
3434         s2 = r2 - r2e;
3435
3436         /* Energy (can be negative!) */
3437         vrr = krr * s1 * s2;
3438         vtot += vrr;
3439
3440         /* Forces */
3441         svmul(-krr * s2 / r1, r_ij, f_i);
3442         svmul(-krr * s1 / r2, r_kj, f_k);
3443
3444         for (m = 0; (m < DIM); m++) /*  12      */
3445         {
3446             f_j[m] = -f_i[m] - f_k[m];
3447             f[ai][m] += f_i[m];
3448             f[aj][m] += f_j[m];
3449             f[ak][m] += f_k[m];
3450         }
3451
3452         if (computeVirial(flavor))
3453         {
3454             rvec_inc(fshift[t1], f_i);
3455             rvec_inc(fshift[CENTRAL], f_j);
3456             rvec_inc(fshift[t2], f_k); /* 9 */
3457         }
3458         /* 163 TOTAL    */
3459     }
3460     return vtot;
3461 }
3462
3463 template<BondedKernelFlavor flavor>
3464 real cross_bond_angle(int             nbonds,
3465                       const t_iatom   forceatoms[],
3466                       const t_iparams forceparams[],
3467                       const rvec      x[],
3468                       rvec4           f[],
3469                       rvec            fshift[],
3470                       const t_pbc*    pbc,
3471                       real gmx_unused lambda,
3472                       real gmx_unused* dvdlambda,
3473                       const t_mdatoms gmx_unused* md,
3474                       t_fcdata gmx_unused* fcd,
3475                       int gmx_unused* global_atom_index)
3476 {
3477     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3478      * pp. 842-847
3479      */
3480     int  i, ai, aj, ak, type, m, t1, t2;
3481     rvec r_ij, r_kj, r_ik;
3482     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3483     rvec f_i, f_j, f_k;
3484
3485     vtot = 0.0;
3486     for (i = 0; (i < nbonds);)
3487     {
3488         type = forceatoms[i++];
3489         ai   = forceatoms[i++];
3490         aj   = forceatoms[i++];
3491         ak   = forceatoms[i++];
3492         r1e  = forceparams[type].cross_ba.r1e;
3493         r2e  = forceparams[type].cross_ba.r2e;
3494         r3e  = forceparams[type].cross_ba.r3e;
3495         krt  = forceparams[type].cross_ba.krt;
3496
3497         /* Compute distance vectors ... */
3498         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3499         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3500         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3501
3502         /* ... and their lengths */
3503         r1 = norm(r_ij);
3504         r2 = norm(r_kj);
3505         r3 = norm(r_ik);
3506
3507         /* Deviations from ideality */
3508         s1 = r1 - r1e;
3509         s2 = r2 - r2e;
3510         s3 = r3 - r3e;
3511
3512         /* Energy (can be negative!) */
3513         vrt = krt * s3 * (s1 + s2);
3514         vtot += vrt;
3515
3516         /* Forces */
3517         k1 = -krt * (s3 / r1);
3518         k2 = -krt * (s3 / r2);
3519         k3 = -krt * (s1 + s2) / r3;
3520         for (m = 0; (m < DIM); m++)
3521         {
3522             f_i[m] = k1 * r_ij[m] + k3 * r_ik[m];
3523             f_k[m] = k2 * r_kj[m] - k3 * r_ik[m];
3524             f_j[m] = -f_i[m] - f_k[m];
3525         }
3526
3527         for (m = 0; (m < DIM); m++) /*  12      */
3528         {
3529             f[ai][m] += f_i[m];
3530             f[aj][m] += f_j[m];
3531             f[ak][m] += f_k[m];
3532         }
3533
3534         if (computeVirial(flavor))
3535         {
3536             rvec_inc(fshift[t1], f_i);
3537             rvec_inc(fshift[CENTRAL], f_j);
3538             rvec_inc(fshift[t2], f_k); /* 9 */
3539         }
3540         /* 163 TOTAL    */
3541     }
3542     return vtot;
3543 }
3544
3545 /*! \brief Computes the potential and force for a tabulated potential */
3546 real bonded_tab(const char*          type,
3547                 int                  table_nr,
3548                 const bondedtable_t* table,
3549                 real                 kA,
3550                 real                 kB,
3551                 real                 r,
3552                 real                 lambda,
3553                 real*                V,
3554                 real*                F)
3555 {
3556     real k, tabscale, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3557     int  n0, nnn;
3558     real dvdlambda;
3559
3560     k = (1.0 - lambda) * kA + lambda * kB;
3561
3562     tabscale          = table->scale;
3563     const real* VFtab = table->data.data();
3564
3565     rt = r * tabscale;
3566     n0 = static_cast<int>(rt);
3567     if (n0 >= table->n)
3568     {
3569         gmx_fatal(FARGS,
3570                   "A tabulated %s interaction table number %d is out of the table range: r %f, "
3571                   "between table indices %d and %d, table length %d",
3572                   type, table_nr, r, n0, n0 + 1, table->n);
3573     }
3574     eps   = rt - n0;
3575     eps2  = eps * eps;
3576     nnn   = 4 * n0;
3577     Yt    = VFtab[nnn];
3578     Ft    = VFtab[nnn + 1];
3579     Geps  = VFtab[nnn + 2] * eps;
3580     Heps2 = VFtab[nnn + 3] * eps2;
3581     Fp    = Ft + Geps + Heps2;
3582     VV    = Yt + Fp * eps;
3583     FF    = Fp + Geps + 2.0 * Heps2;
3584
3585     *F        = -k * FF * tabscale;
3586     *V        = k * VV;
3587     dvdlambda = (kB - kA) * VV;
3588
3589     return dvdlambda;
3590
3591     /* That was 22 flops */
3592 }
3593
3594 template<BondedKernelFlavor flavor>
3595 real tab_bonds(int             nbonds,
3596                const t_iatom   forceatoms[],
3597                const t_iparams forceparams[],
3598                const rvec      x[],
3599                rvec4           f[],
3600                rvec            fshift[],
3601                const t_pbc*    pbc,
3602                real            lambda,
3603                real*           dvdlambda,
3604                const t_mdatoms gmx_unused* md,
3605                t_fcdata*                   fcd,
3606                int gmx_unused* global_atom_index)
3607 {
3608     int  i, ki, ai, aj, type, table;
3609     real dr, dr2, fbond, vbond, vtot;
3610     rvec dx;
3611
3612     vtot = 0.0;
3613     for (i = 0; (i < nbonds);)
3614     {
3615         type = forceatoms[i++];
3616         ai   = forceatoms[i++];
3617         aj   = forceatoms[i++];
3618
3619         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3620         dr2 = iprod(dx, dx);                       /*   5               */
3621         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
3622
3623         table = forceparams[type].tab.table;
3624
3625         *dvdlambda += bonded_tab("bond", table, &fcd->bondtab[table], forceparams[type].tab.kA,
3626                                  forceparams[type].tab.kB, dr, lambda, &vbond, &fbond); /*  22 */
3627
3628         if (dr2 == 0.0)
3629         {
3630             continue;
3631         }
3632
3633
3634         vtot += vbond;              /* 1*/
3635         fbond *= gmx::invsqrt(dr2); /*   6              */
3636
3637         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3638     }                                                               /* 62 TOTAL */
3639     return vtot;
3640 }
3641
3642 template<BondedKernelFlavor flavor>
3643 real tab_angles(int             nbonds,
3644                 const t_iatom   forceatoms[],
3645                 const t_iparams forceparams[],
3646                 const rvec      x[],
3647                 rvec4           f[],
3648                 rvec            fshift[],
3649                 const t_pbc*    pbc,
3650                 real            lambda,
3651                 real*           dvdlambda,
3652                 const t_mdatoms gmx_unused* md,
3653                 t_fcdata*                   fcd,
3654                 int gmx_unused* global_atom_index)
3655 {
3656     int  i, ai, aj, ak, t1, t2, type, table;
3657     rvec r_ij, r_kj;
3658     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3659
3660     vtot = 0.0;
3661     for (i = 0; (i < nbonds);)
3662     {
3663         type = forceatoms[i++];
3664         ai   = forceatoms[i++];
3665         aj   = forceatoms[i++];
3666         ak   = forceatoms[i++];
3667
3668         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
3669
3670         table = forceparams[type].tab.table;
3671
3672         *dvdlambda += bonded_tab("angle", table, &fcd->angletab[table], forceparams[type].tab.kA,
3673                                  forceparams[type].tab.kB, theta, lambda, &va, &dVdt); /*  22  */
3674         vtot += va;
3675
3676         cos_theta2 = gmx::square(cos_theta); /*   1             */
3677         if (cos_theta2 < 1)
3678         {
3679             int  m;
3680             real st, sth;
3681             real cik, cii, ckk;
3682             real nrkj2, nrij2;
3683             rvec f_i, f_j, f_k;
3684
3685             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
3686             sth   = st * cos_theta;                      /*   1         */
3687             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
3688             nrij2 = iprod(r_ij, r_ij);
3689
3690             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
3691             cii = sth / nrij2;                      /*  10              */
3692             ckk = sth / nrkj2;                      /*  10              */
3693
3694             for (m = 0; (m < DIM); m++) /*  39          */
3695             {
3696                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
3697                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
3698                 f_j[m] = -f_i[m] - f_k[m];
3699                 f[ai][m] += f_i[m];
3700                 f[aj][m] += f_j[m];
3701                 f[ak][m] += f_k[m];
3702             }
3703
3704             if (computeVirial(flavor))
3705             {
3706                 rvec_inc(fshift[t1], f_i);
3707                 rvec_inc(fshift[CENTRAL], f_j);
3708                 rvec_inc(fshift[t2], f_k);
3709             }
3710         } /* 169 TOTAL  */
3711     }
3712     return vtot;
3713 }
3714
3715 template<BondedKernelFlavor flavor>
3716 real tab_dihs(int             nbonds,
3717               const t_iatom   forceatoms[],
3718               const t_iparams forceparams[],
3719               const rvec      x[],
3720               rvec4           f[],
3721               rvec            fshift[],
3722               const t_pbc*    pbc,
3723               real            lambda,
3724               real*           dvdlambda,
3725               const t_mdatoms gmx_unused* md,
3726               t_fcdata*                   fcd,
3727               int gmx_unused* global_atom_index)
3728 {
3729     int  i, type, ai, aj, ak, al, table;
3730     int  t1, t2, t3;
3731     rvec r_ij, r_kj, r_kl, m, n;
3732     real phi, ddphi, vpd, vtot;
3733
3734     vtot = 0.0;
3735     for (i = 0; (i < nbonds);)
3736     {
3737         type = forceatoms[i++];
3738         ai   = forceatoms[i++];
3739         aj   = forceatoms[i++];
3740         ak   = forceatoms[i++];
3741         al   = forceatoms[i++];
3742
3743         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84  */
3744
3745         table = forceparams[type].tab.table;
3746
3747         /* Hopefully phi+M_PI never results in values < 0 */
3748         *dvdlambda += bonded_tab("dihedral", table, &fcd->dihtab[table], forceparams[type].tab.kA,
3749                                  forceparams[type].tab.kB, phi + M_PI, lambda, &vpd, &ddphi);
3750
3751         vtot += vpd;
3752         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
3753                            t2, t3); /* 112      */
3754
3755     } /* 227 TOTAL  */
3756
3757     return vtot;
3758 }
3759
3760 struct BondedInteractions
3761 {
3762     BondedFunction function;
3763     int            nrnbIndex;
3764 };
3765
3766 // Bug in old clang versions prevents constexpr. constexpr is needed for MSVC.
3767 #if defined(__clang__) && __clang_major__ < 6
3768 #    define CONSTEXPR_EXCL_OLD_CLANG const
3769 #else
3770 #    define CONSTEXPR_EXCL_OLD_CLANG constexpr
3771 #endif
3772
3773 /*! \brief Lookup table of bonded interaction functions
3774  *
3775  * This must have as many entries as interaction_function in ifunc.cpp */
3776 template<BondedKernelFlavor flavor>
3777 CONSTEXPR_EXCL_OLD_CLANG std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
3778     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_BONDS
3779     BondedInteractions{ g96bonds<flavor>, eNR_BONDS },                    // F_G96BONDS
3780     BondedInteractions{ morse_bonds<flavor>, eNR_MORSE },                 // F_MORSE
3781     BondedInteractions{ cubic_bonds<flavor>, eNR_CUBICBONDS },            // F_CUBICBONDS
3782     BondedInteractions{ unimplemented, -1 },                              // F_CONNBONDS
3783     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_HARMONIC
3784     BondedInteractions{ FENE_bonds<flavor>, eNR_FENEBONDS },              // F_FENEBONDS
3785     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDS
3786     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDSNC
3787     BondedInteractions{ restraint_bonds<flavor>, eNR_RESTRBONDS },        // F_RESTRBONDS
3788     BondedInteractions{ angles<flavor>, eNR_ANGLES },                     // F_ANGLES
3789     BondedInteractions{ g96angles<flavor>, eNR_ANGLES },                  // F_G96ANGLES
3790     BondedInteractions{ restrangles<flavor>, eNR_ANGLES },                // F_RESTRANGLES
3791     BondedInteractions{ linear_angles<flavor>, eNR_ANGLES },              // F_LINEAR_ANGLES
3792     BondedInteractions{ cross_bond_bond<flavor>, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
3793     BondedInteractions{ cross_bond_angle<flavor>, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
3794     BondedInteractions{ urey_bradley<flavor>, eNR_UREY_BRADLEY },         // F_UREY_BRADLEY
3795     BondedInteractions{ quartic_angles<flavor>, eNR_QANGLES },            // F_QUARTIC_ANGLES
3796     BondedInteractions{ tab_angles<flavor>, eNR_TABANGLES },              // F_TABANGLES
3797     BondedInteractions{ pdihs<flavor>, eNR_PROPER },                      // F_PDIHS
3798     BondedInteractions{ rbdihs<flavor>, eNR_RB },                         // F_RBDIHS
3799     BondedInteractions{ restrdihs<flavor>, eNR_PROPER },                  // F_RESTRDIHS
3800     BondedInteractions{ cbtdihs<flavor>, eNR_RB },                        // F_CBTDIHS
3801     BondedInteractions{ rbdihs<flavor>, eNR_FOURDIH },                    // F_FOURDIHS
3802     BondedInteractions{ idihs<flavor>, eNR_IMPROPER },                    // F_IDIHS
3803     BondedInteractions{ pdihs<flavor>, eNR_IMPROPER },                    // F_PIDIHS
3804     BondedInteractions{ tab_dihs<flavor>, eNR_TABDIHS },                  // F_TABDIHS
3805     BondedInteractions{ unimplemented, eNR_CMAP },                        // F_CMAP
3806     BondedInteractions{ unimplemented, -1 },                              // F_GB12_NOLONGERUSED
3807     BondedInteractions{ unimplemented, -1 },                              // F_GB13_NOLONGERUSED
3808     BondedInteractions{ unimplemented, -1 },                              // F_GB14_NOLONGERUSED
3809     BondedInteractions{ unimplemented, -1 },                              // F_GBPOL_NOLONGERUSED
3810     BondedInteractions{ unimplemented, -1 },                       // F_NPSOLVATION_NOLONGERUSED
3811     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJ14
3812     BondedInteractions{ unimplemented, -1 },                       // F_COUL14
3813     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC14_Q
3814     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC_PAIRS_NB
3815     BondedInteractions{ unimplemented, -1 },                       // F_LJ
3816     BondedInteractions{ unimplemented, -1 },                       // F_BHAM
3817     BondedInteractions{ unimplemented, -1 },                       // F_LJ_LR_NOLONGERUSED
3818     BondedInteractions{ unimplemented, -1 },                       // F_BHAM_LR_NOLONGERUSED
3819     BondedInteractions{ unimplemented, -1 },                       // F_DISPCORR
3820     BondedInteractions{ unimplemented, -1 },                       // F_COUL_SR
3821     BondedInteractions{ unimplemented, -1 },                       // F_COUL_LR_NOLONGERUSED
3822     BondedInteractions{ unimplemented, -1 },                       // F_RF_EXCL
3823     BondedInteractions{ unimplemented, -1 },                       // F_COUL_RECIP
3824     BondedInteractions{ unimplemented, -1 },                       // F_LJ_RECIP
3825     BondedInteractions{ unimplemented, -1 },                       // F_DPD
3826     BondedInteractions{ polarize<flavor>, eNR_POLARIZE },          // F_POLARIZATION
3827     BondedInteractions{ water_pol<flavor>, eNR_WPOL },             // F_WATER_POL
3828     BondedInteractions{ thole_pol<flavor>, eNR_THOLE },            // F_THOLE_POL
3829     BondedInteractions{ anharm_polarize<flavor>, eNR_ANHARM_POL }, // F_ANHARM_POL
3830     BondedInteractions{ unimplemented, -1 },                       // F_POSRES
3831     BondedInteractions{ unimplemented, -1 },                       // F_FBPOSRES
3832     BondedInteractions{ ta_disres, eNR_DISRES },                   // F_DISRES
3833     BondedInteractions{ unimplemented, -1 },                       // F_DISRESVIOL
3834     BondedInteractions{ orires, eNR_ORIRES },                      // F_ORIRES
3835     BondedInteractions{ unimplemented, -1 },                       // F_ORIRESDEV
3836     BondedInteractions{ angres<flavor>, eNR_ANGRES },              // F_ANGRES
3837     BondedInteractions{ angresz<flavor>, eNR_ANGRESZ },            // F_ANGRESZ
3838     BondedInteractions{ dihres<flavor>, eNR_DIHRES },              // F_DIHRES
3839     BondedInteractions{ unimplemented, -1 },                       // F_DIHRESVIOL
3840     BondedInteractions{ unimplemented, -1 },                       // F_CONSTR
3841     BondedInteractions{ unimplemented, -1 },                       // F_CONSTRNC
3842     BondedInteractions{ unimplemented, -1 },                       // F_SETTLE
3843     BondedInteractions{ unimplemented, -1 },                       // F_VSITE2
3844     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3
3845     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FD
3846     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FAD
3847     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3OUT
3848     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FD
3849     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FDN
3850     BondedInteractions{ unimplemented, -1 },                       // F_VSITEN
3851     BondedInteractions{ unimplemented, -1 },                       // F_COM_PULL
3852     BondedInteractions{ unimplemented, -1 },                       // F_DENSITYFITTING
3853     BondedInteractions{ unimplemented, -1 },                       // F_EQM
3854     BondedInteractions{ unimplemented, -1 },                       // F_EPOT
3855     BondedInteractions{ unimplemented, -1 },                       // F_EKIN
3856     BondedInteractions{ unimplemented, -1 },                       // F_ETOT
3857     BondedInteractions{ unimplemented, -1 },                       // F_ECONSERVED
3858     BondedInteractions{ unimplemented, -1 },                       // F_TEMP
3859     BondedInteractions{ unimplemented, -1 },                       // F_VTEMP_NOLONGERUSED
3860     BondedInteractions{ unimplemented, -1 },                       // F_PDISPCORR
3861     BondedInteractions{ unimplemented, -1 },                       // F_PRES
3862     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_CONSTR
3863     BondedInteractions{ unimplemented, -1 },                       // F_DVDL
3864     BondedInteractions{ unimplemented, -1 },                       // F_DKDL
3865     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_COUL
3866     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_VDW
3867     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_BONDED
3868     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_RESTRAINT
3869     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_TEMPERATURE
3870 };
3871
3872 /*! \brief List of instantiated BondedInteractions list */
3873 CONSTEXPR_EXCL_OLD_CLANG
3874 gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
3875     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
3876     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
3877     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
3878     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndEnergy>
3879 };
3880
3881 //! \endcond
3882
3883 } // namespace
3884
3885 real calculateSimpleBond(const int           ftype,
3886                          const int           numForceatoms,
3887                          const t_iatom       forceatoms[],
3888                          const t_iparams     forceparams[],
3889                          const rvec          x[],
3890                          rvec4               f[],
3891                          rvec                fshift[],
3892                          const struct t_pbc* pbc,
3893                          const real          lambda,
3894                          real*               dvdlambda,
3895                          const t_mdatoms*    md,
3896                          t_fcdata*           fcd,
3897                          int gmx_unused*          global_atom_index,
3898                          const BondedKernelFlavor bondedKernelFlavor)
3899 {
3900     const BondedInteractions& bonded = c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
3901
3902     real v = bonded.function(numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, lambda,
3903                              dvdlambda, md, fcd, global_atom_index);
3904
3905     return v;
3906 }
3907
3908 int nrnbIndex(int ftype)
3909 {
3910     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
3911 }