src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  *
  40  * \brief This file defines low-level functions necessary for
  41  * computing energies and forces for listed interactions.
  42  *
  43  * \author Mark Abraham <mark.j.abraham@gmail.com>
  44  *
  45  * \ingroup module_listed_forces
  46  */
  47 #include "gmxpre.h"
  48
  49 #include "bonded.h"
  50
  51 #include "config.h"
  52
  53 #include <cassert>
  54 #include <cmath>
  55
  56 #include <algorithm>
  57
  58 #include "gromacs/gmxlib/nrnb.h"
  59 #include "gromacs/listed_forces/disre.h"
  60 #include "gromacs/listed_forces/orires.h"
  61 #include "gromacs/math/functions.h"
  62 #include "gromacs/math/units.h"
  63 #include "gromacs/math/utilities.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/mdtypes/fcdata.h"
  66 #include "gromacs/mdtypes/mdatom.h"
  67 #include "gromacs/pbcutil/ishift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/pbcutil/pbc_simd.h"
  70 #include "gromacs/simd/simd.h"
  71 #include "gromacs/simd/simd_math.h"
  72 #include "gromacs/simd/vector_operations.h"
  73 #include "gromacs/utility/basedefinitions.h"
  74 #include "gromacs/utility/enumerationhelpers.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/real.h"
  77 #include "gromacs/utility/smalloc.h"
  78
  79 #include "listed_internal.h"
  80 #include "restcbt.h"
  81
  82 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  83
  84 namespace
  85 {
  86
  87 //! Type of CPU function to compute a bonded interaction.
  88 using BondedFunction = real (*)(int              nbonds,
  89                                 const t_iatom    iatoms[],
  90                                 const t_iparams  iparams[],
  91                                 const rvec       x[],
  92                                 rvec4            f[],
  93                                 rvec             fshift[],
  94                                 const t_pbc*     pbc,
  95                                 real             lambda,
  96                                 real*            dvdlambda,
  97                                 const t_mdatoms* md,
  98                                 t_fcdata*        fcd,
  99                                 int*             ddgatindex);
 100
 101 /*! \brief Mysterious CMAP coefficient matrix */
 102 const int cmap_coeff_matrix[] = {
 103     1,  0,  -3, 2,  0,  0, 0,  0,  -3, 0,  9,  -6, 2, 0,  -6, 4,  0,  0,  0, 0,  0, 0, 0,  0,
 104     3,  0,  -9, 6,  -2, 0, 6,  -4, 0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  9, -6, 0, 0, -6, 4,
 105     0,  0,  3,  -2, 0,  0, 0,  0,  0,  0,  -9, 6,  0, 0,  6,  -4, 0,  0,  0, 0,  1, 0, -3, 2,
 106     -2, 0,  6,  -4, 1,  0, -3, 2,  0,  0,  0,  0,  0, 0,  0,  0,  -1, 0,  3, -2, 1, 0, -3, 2,
 107     0,  0,  0,  0,  0,  0, 0,  0,  0,  0,  -3, 2,  0, 0,  3,  -2, 0,  0,  0, 0,  0, 0, 3,  -2,
 108     0,  0,  -6, 4,  0,  0, 3,  -2, 0,  1,  -2, 1,  0, 0,  0,  0,  0,  -3, 6, -3, 0, 2, -4, 2,
 109     0,  0,  0,  0,  0,  0, 0,  0,  0,  3,  -6, 3,  0, -2, 4,  -2, 0,  0,  0, 0,  0, 0, 0,  0,
 110     0,  0,  -3, 3,  0,  0, 2,  -2, 0,  0,  -1, 1,  0, 0,  0,  0,  0,  0,  3, -3, 0, 0, -2, 2,
 111     0,  0,  0,  0,  0,  1, -2, 1,  0,  -2, 4,  -2, 0, 1,  -2, 1,  0,  0,  0, 0,  0, 0, 0,  0,
 112     0,  -1, 2,  -1, 0,  1, -2, 1,  0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  1, -1, 0, 0, -1, 1,
 113     0,  0,  0,  0,  0,  0, -1, 1,  0,  0,  2,  -2, 0, 0,  -1, 1
 114 };
 115
 116
 117 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 118  *
 119  * \todo This kind of code appears in many places. Consolidate it */
 120 int pbc_rvec_sub(const t_pbc* pbc, const rvec xi, const rvec xj, rvec dx)
 121 {
 122     if (pbc)
 123     {
 124         return pbc_dx_aiuc(pbc, xi, xj, dx);
 125     }
 126     else
 127     {
 128         rvec_sub(xi, xj, dx);
 129         return CENTRAL;
 130     }
 131 }
 132
 133 } // namespace
 134
 135 //! \cond
 136 real bond_angle(const rvec   xi,
 137                 const rvec   xj,
 138                 const rvec   xk,
 139                 const t_pbc* pbc,
 140                 rvec         r_ij,
 141                 rvec         r_kj,
 142                 real*        costh,
 143                 int*         t1,
 144                 int*         t2)
 145 /* Return value is the angle between the bonds i-j and j-k */
 146 {
 147     /* 41 FLOPS */
 148     real th;
 149
 150     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 151     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 152
 153     *costh = cos_angle(r_ij, r_kj); /* 25               */
 154     th     = std::acos(*costh);     /* 10               */
 155     /* 41 TOTAL */
 156     return th;
 157 }
 158
 159 real dih_angle(const rvec   xi,
 160                const rvec   xj,
 161                const rvec   xk,
 162                const rvec   xl,
 163                const t_pbc* pbc,
 164                rvec         r_ij,
 165                rvec         r_kj,
 166                rvec         r_kl,
 167                rvec         m,
 168                rvec         n,
 169                int*         t1,
 170                int*         t2,
 171                int*         t3)
 172 {
 173     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 174     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 175     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 176
 177     cprod(r_ij, r_kj, m);        /*  9        */
 178     cprod(r_kj, r_kl, n);        /*  9          */
 179     real phi  = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
 180     real ipr  = iprod(r_ij, n);  /*  5        */
 181     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 182     phi       = sign * phi; /*  1               */
 183     /* 82 TOTAL */
 184     return phi;
 185 }
 186 //! \endcond
 187
 188 void make_dp_periodic(real* dp) /* 1 flop? */
 189 {
 190     /* dp cannot be outside (-pi,pi) */
 191     if (*dp >= M_PI)
 192     {
 193         *dp -= 2 * M_PI;
 194     }
 195     else if (*dp < -M_PI)
 196     {
 197         *dp += 2 * M_PI;
 198     }
 199 }
 200
 201 namespace
 202 {
 203
 204 /*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
 205  *
 206  * \p shiftIndex is used as the periodic shift.
 207  */
 208 template<BondedKernelFlavor flavor>
 209 inline void spreadBondForces(const real bondForce,
 210                              const rvec dx,
 211                              const int  ai,
 212                              const int  aj,
 213                              rvec4*     f,
 214                              int        shiftIndex,
 215                              rvec*      fshift)
 216 {
 217     for (int m = 0; m < DIM; m++) /*  15          */
 218     {
 219         const real fij = bondForce * dx[m];
 220         f[ai][m] += fij;
 221         f[aj][m] -= fij;
 222         if (computeVirial(flavor))
 223         {
 224             fshift[shiftIndex][m] += fij;
 225             fshift[CENTRAL][m] -= fij;
 226         }
 227     }
 228 }
 229
 230 /*! \brief Morse potential bond
 231  *
 232  * By Frank Everdij. Three parameters needed:
 233  *
 234  * b0 = equilibrium distance in nm
 235  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 236  * cb = well depth in kJ/mol
 237  *
 238  * Note: the potential is referenced to be +cb at infinite separation
 239  *       and zero at the equilibrium distance!
 240  */
 241 template<BondedKernelFlavor flavor>
 242 real morse_bonds(int             nbonds,
 243                  const t_iatom   forceatoms[],
 244                  const t_iparams forceparams[],
 245                  const rvec      x[],
 246                  rvec4           f[],
 247                  rvec            fshift[],
 248                  const t_pbc*    pbc,
 249                  real            lambda,
 250                  real*           dvdlambda,
 251                  const t_mdatoms gmx_unused* md,
 252                  t_fcdata gmx_unused* fcd,
 253                  int gmx_unused* global_atom_index)
 254 {
 255     const real one = 1.0;
 256     const real two = 2.0;
 257     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, vtot;
 258     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 259     rvec       dx;
 260     int        i, ki, type, ai, aj;
 261
 262     vtot = 0.0;
 263     for (i = 0; (i < nbonds);)
 264     {
 265         type = forceatoms[i++];
 266         ai   = forceatoms[i++];
 267         aj   = forceatoms[i++];
 268
 269         b0A = forceparams[type].morse.b0A;
 270         beA = forceparams[type].morse.betaA;
 271         cbA = forceparams[type].morse.cbA;
 272
 273         b0B = forceparams[type].morse.b0B;
 274         beB = forceparams[type].morse.betaB;
 275         cbB = forceparams[type].morse.cbB;
 276
 277         L1 = one - lambda;            /* 1 */
 278         b0 = L1 * b0A + lambda * b0B; /* 3 */
 279         be = L1 * beA + lambda * beB; /* 3 */
 280         cb = L1 * cbA + lambda * cbB; /* 3 */
 281
 282         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 283         dr2  = iprod(dx, dx);                       /*   5          */
 284         dr   = dr2 * gmx::invsqrt(dr2);             /*  10          */
 285         temp = std::exp(-be * (dr - b0));           /*  12          */
 286
 287         if (temp == one)
 288         {
 289             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 290             *dvdlambda += cbB - cbA;
 291             continue;
 292         }
 293
 294         omtemp   = one - temp;                                      /*   1          */
 295         cbomtemp = cb * omtemp;                                     /*   1          */
 296         vbond    = cbomtemp * omtemp;                               /*   1          */
 297         fbond    = -two * be * temp * cbomtemp * gmx::invsqrt(dr2); /*   9          */
 298         vtot += vbond;                                              /*   1          */
 299
 300         *dvdlambda += (cbB - cbA) * omtemp * omtemp
 301                       - (2 - 2 * omtemp) * omtemp * cb * ((b0B - b0A) * be - (beB - beA) * (dr - b0)); /* 15 */
 302
 303         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 304     }                                                               /*  83 TOTAL    */
 305     return vtot;
 306 }
 307
 308 //! \cond
 309 template<BondedKernelFlavor flavor>
 310 real cubic_bonds(int             nbonds,
 311                  const t_iatom   forceatoms[],
 312                  const t_iparams forceparams[],
 313                  const rvec      x[],
 314                  rvec4           f[],
 315                  rvec            fshift[],
 316                  const t_pbc*    pbc,
 317                  real gmx_unused lambda,
 318                  real gmx_unused* dvdlambda,
 319                  const t_mdatoms gmx_unused* md,
 320                  t_fcdata gmx_unused* fcd,
 321                  int gmx_unused* global_atom_index)
 322 {
 323     const real three = 3.0;
 324     const real two   = 2.0;
 325     real       kb, b0, kcub;
 326     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, vtot;
 327     rvec       dx;
 328     int        i, ki, type, ai, aj;
 329
 330     vtot = 0.0;
 331     for (i = 0; (i < nbonds);)
 332     {
 333         type = forceatoms[i++];
 334         ai   = forceatoms[i++];
 335         aj   = forceatoms[i++];
 336
 337         b0   = forceparams[type].cubic.b0;
 338         kb   = forceparams[type].cubic.kb;
 339         kcub = forceparams[type].cubic.kcub;
 340
 341         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 342         dr2 = iprod(dx, dx);                       /*   5          */
 343
 344         if (dr2 == 0.0)
 345         {
 346             continue;
 347         }
 348
 349         dr     = dr2 * gmx::invsqrt(dr2); /*  10          */
 350         dist   = dr - b0;
 351         kdist  = kb * dist;
 352         kdist2 = kdist * dist;
 353
 354         vbond = kdist2 + kcub * kdist2 * dist;
 355         fbond = -(two * kdist + three * kdist2 * kcub) / dr;
 356
 357         vtot += vbond; /* 21 */
 358
 359         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 360     }                                                               /*  54 TOTAL    */
 361     return vtot;
 362 }
 363
 364 template<BondedKernelFlavor flavor>
 365 real FENE_bonds(int             nbonds,
 366                 const t_iatom   forceatoms[],
 367                 const t_iparams forceparams[],
 368                 const rvec      x[],
 369                 rvec4           f[],
 370                 rvec            fshift[],
 371                 const t_pbc*    pbc,
 372                 real gmx_unused lambda,
 373                 real gmx_unused* dvdlambda,
 374                 const t_mdatoms gmx_unused* md,
 375                 t_fcdata gmx_unused* fcd,
 376                 int*                 global_atom_index)
 377 {
 378     const real half = 0.5;
 379     const real one  = 1.0;
 380     real       bm, kb;
 381     real       dr2, bm2, omdr2obm2, fbond, vbond, vtot;
 382     rvec       dx;
 383     int        i, ki, type, ai, aj;
 384
 385     vtot = 0.0;
 386     for (i = 0; (i < nbonds);)
 387     {
 388         type = forceatoms[i++];
 389         ai   = forceatoms[i++];
 390         aj   = forceatoms[i++];
 391
 392         bm = forceparams[type].fene.bm;
 393         kb = forceparams[type].fene.kb;
 394
 395         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 396         dr2 = iprod(dx, dx);                       /*   5          */
 397
 398         if (dr2 == 0.0)
 399         {
 400             continue;
 401         }
 402
 403         bm2 = bm * bm;
 404
 405         if (dr2 >= bm2)
 406         {
 407             gmx_fatal(FARGS, "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d", dr2, bm2,
 408                       glatnr(global_atom_index, ai), glatnr(global_atom_index, aj));
 409         }
 410
 411         omdr2obm2 = one - dr2 / bm2;
 412
 413         vbond = -half * kb * bm2 * std::log(omdr2obm2);
 414         fbond = -kb / omdr2obm2;
 415
 416         vtot += vbond; /* 35 */
 417
 418         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 419     }                                                               /*  58 TOTAL    */
 420     return vtot;
 421 }
 422
 423 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
 424 {
 425     const real half = 0.5;
 426     real       L1, kk, x0, dx, dx2;
 427     real       v, f, dvdlambda;
 428
 429     L1 = 1.0 - lambda;
 430     kk = L1 * kA + lambda * kB;
 431     x0 = L1 * xA + lambda * xB;
 432
 433     dx  = x - x0;
 434     dx2 = dx * dx;
 435
 436     f         = -kk * dx;
 437     v         = half * kk * dx2;
 438     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
 439
 440     *F = f;
 441     *V = v;
 442
 443     return dvdlambda;
 444
 445     /* That was 19 flops */
 446 }
 447
 448
 449 template<BondedKernelFlavor flavor>
 450 real bonds(int             nbonds,
 451            const t_iatom   forceatoms[],
 452            const t_iparams forceparams[],
 453            const rvec      x[],
 454            rvec4           f[],
 455            rvec            fshift[],
 456            const t_pbc*    pbc,
 457            real            lambda,
 458            real*           dvdlambda,
 459            const t_mdatoms gmx_unused* md,
 460            t_fcdata gmx_unused* fcd,
 461            int gmx_unused* global_atom_index)
 462 {
 463     int  i, ki, ai, aj, type;
 464     real dr, dr2, fbond, vbond, vtot;
 465     rvec dx;
 466
 467     vtot = 0.0;
 468     for (i = 0; (i < nbonds);)
 469     {
 470         type = forceatoms[i++];
 471         ai   = forceatoms[i++];
 472         aj   = forceatoms[i++];
 473
 474         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 475         dr2 = iprod(dx, dx);                       /*   5               */
 476         dr  = std::sqrt(dr2);                      /*  10               */
 477
 478         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 479                                forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr,
 480                                lambda, &vbond, &fbond); /*  19  */
 481
 482         if (dr2 == 0.0)
 483         {
 484             continue;
 485         }
 486
 487
 488         vtot += vbond;              /* 1*/
 489         fbond *= gmx::invsqrt(dr2); /*   6              */
 490
 491         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 492     }                                                               /* 59 TOTAL */
 493     return vtot;
 494 }
 495
 496 template<BondedKernelFlavor flavor>
 497 real restraint_bonds(int             nbonds,
 498                      const t_iatom   forceatoms[],
 499                      const t_iparams forceparams[],
 500                      const rvec      x[],
 501                      rvec4           f[],
 502                      rvec            fshift[],
 503                      const t_pbc*    pbc,
 504                      real            lambda,
 505                      real*           dvdlambda,
 506                      const t_mdatoms gmx_unused* md,
 507                      t_fcdata gmx_unused* fcd,
 508                      int gmx_unused* global_atom_index)
 509 {
 510     int  i, ki, ai, aj, type;
 511     real dr, dr2, fbond, vbond, vtot;
 512     real L1;
 513     real low, dlow, up1, dup1, up2, dup2, k, dk;
 514     real drh, drh2;
 515     rvec dx;
 516
 517     L1 = 1.0 - lambda;
 518
 519     vtot = 0.0;
 520     for (i = 0; (i < nbonds);)
 521     {
 522         type = forceatoms[i++];
 523         ai   = forceatoms[i++];
 524         aj   = forceatoms[i++];
 525
 526         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 527         dr2 = iprod(dx, dx);                       /*   5               */
 528         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 529
 530         low  = L1 * forceparams[type].restraint.lowA + lambda * forceparams[type].restraint.lowB;
 531         dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
 532         up1  = L1 * forceparams[type].restraint.up1A + lambda * forceparams[type].restraint.up1B;
 533         dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
 534         up2  = L1 * forceparams[type].restraint.up2A + lambda * forceparams[type].restraint.up2B;
 535         dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
 536         k    = L1 * forceparams[type].restraint.kA + lambda * forceparams[type].restraint.kB;
 537         dk   = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
 538         /* 24 */
 539
 540         if (dr < low)
 541         {
 542             drh   = dr - low;
 543             drh2  = drh * drh;
 544             vbond = 0.5 * k * drh2;
 545             fbond = -k * drh;
 546             *dvdlambda += 0.5 * dk * drh2 - k * dlow * drh;
 547         } /* 11 */
 548         else if (dr <= up1)
 549         {
 550             vbond = 0;
 551             fbond = 0;
 552         }
 553         else if (dr <= up2)
 554         {
 555             drh   = dr - up1;
 556             drh2  = drh * drh;
 557             vbond = 0.5 * k * drh2;
 558             fbond = -k * drh;
 559             *dvdlambda += 0.5 * dk * drh2 - k * dup1 * drh;
 560         } /* 11 */
 561         else
 562         {
 563             drh   = dr - up2;
 564             vbond = k * (up2 - up1) * (0.5 * (up2 - up1) + drh);
 565             fbond = -k * (up2 - up1);
 566             *dvdlambda += dk * (up2 - up1) * (0.5 * (up2 - up1) + drh)
 567                           + k * (dup2 - dup1) * (up2 - up1 + drh) - k * (up2 - up1) * dup2;
 568         }
 569
 570         if (dr2 == 0.0)
 571         {
 572             continue;
 573         }
 574
 575         vtot += vbond;              /* 1*/
 576         fbond *= gmx::invsqrt(dr2); /*   6              */
 577
 578         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 579     }                                                               /* 59 TOTAL */
 580
 581     return vtot;
 582 }
 583
 584 template<BondedKernelFlavor flavor>
 585 real polarize(int              nbonds,
 586               const t_iatom    forceatoms[],
 587               const t_iparams  forceparams[],
 588               const rvec       x[],
 589               rvec4            f[],
 590               rvec             fshift[],
 591               const t_pbc*     pbc,
 592               real             lambda,
 593               real*            dvdlambda,
 594               const t_mdatoms* md,
 595               t_fcdata gmx_unused* fcd,
 596               int gmx_unused* global_atom_index)
 597 {
 598     int  i, ki, ai, aj, type;
 599     real dr, dr2, fbond, vbond, vtot, ksh;
 600     rvec dx;
 601
 602     vtot = 0.0;
 603     for (i = 0; (i < nbonds);)
 604     {
 605         type = forceatoms[i++];
 606         ai   = forceatoms[i++];
 607         aj   = forceatoms[i++];
 608         ksh  = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].polarize.alpha;
 609
 610         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 611         dr2 = iprod(dx, dx);                       /*   5               */
 612         dr  = std::sqrt(dr2);                      /*  10               */
 613
 614         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 615
 616         if (dr2 == 0.0)
 617         {
 618             continue;
 619         }
 620
 621         vtot += vbond;              /* 1*/
 622         fbond *= gmx::invsqrt(dr2); /*   6              */
 623
 624         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 625     }                                                               /* 59 TOTAL */
 626     return vtot;
 627 }
 628
 629 template<BondedKernelFlavor flavor>
 630 real anharm_polarize(int              nbonds,
 631                      const t_iatom    forceatoms[],
 632                      const t_iparams  forceparams[],
 633                      const rvec       x[],
 634                      rvec4            f[],
 635                      rvec             fshift[],
 636                      const t_pbc*     pbc,
 637                      real             lambda,
 638                      real*            dvdlambda,
 639                      const t_mdatoms* md,
 640                      t_fcdata gmx_unused* fcd,
 641                      int gmx_unused* global_atom_index)
 642 {
 643     int  i, ki, ai, aj, type;
 644     real dr, dr2, fbond, vbond, vtot, ksh, khyp, drcut, ddr, ddr3;
 645     rvec dx;
 646
 647     vtot = 0.0;
 648     for (i = 0; (i < nbonds);)
 649     {
 650         type = forceatoms[i++];
 651         ai   = forceatoms[i++];
 652         aj   = forceatoms[i++];
 653         ksh = gmx::square(md->chargeA[aj]) * ONE_4PI_EPS0 / forceparams[type].anharm_polarize.alpha; /* 7*/
 654         khyp  = forceparams[type].anharm_polarize.khyp;
 655         drcut = forceparams[type].anharm_polarize.drcut;
 656
 657         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 658         dr2 = iprod(dx, dx);                       /*   5               */
 659         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 660
 661         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 662
 663         if (dr2 == 0.0)
 664         {
 665             continue;
 666         }
 667
 668         if (dr > drcut)
 669         {
 670             ddr  = dr - drcut;
 671             ddr3 = ddr * ddr * ddr;
 672             vbond += khyp * ddr * ddr3;
 673             fbond -= 4 * khyp * ddr3;
 674         }
 675         fbond *= gmx::invsqrt(dr2); /*   6              */
 676         vtot += vbond;              /* 1*/
 677
 678         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 679     }                                                               /* 72 TOTAL */
 680     return vtot;
 681 }
 682
 683 template<BondedKernelFlavor flavor>
 684 real water_pol(int             nbonds,
 685                const t_iatom   forceatoms[],
 686                const t_iparams forceparams[],
 687                const rvec      x[],
 688                rvec4           f[],
 689                rvec gmx_unused fshift[],
 690                const t_pbc gmx_unused* pbc,
 691                real gmx_unused lambda,
 692                real gmx_unused* dvdlambda,
 693                const t_mdatoms gmx_unused* md,
 694                t_fcdata gmx_unused* fcd,
 695                int gmx_unused* global_atom_index)
 696 {
 697     /* This routine implements anisotropic polarizibility for water, through
 698      * a shell connected to a dummy with spring constant that differ in the
 699      * three spatial dimensions in the molecular frame.
 700      */
 701     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 702     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 703     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 704
 705     vtot = 0.0;
 706     if (nbonds > 0)
 707     {
 708         type0  = forceatoms[0];
 709         aS     = forceatoms[5];
 710         qS     = md->chargeA[aS];
 711         kk[XX] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_x;
 712         kk[YY] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_y;
 713         kk[ZZ] = gmx::square(qS) * ONE_4PI_EPS0 / forceparams[type0].wpol.al_z;
 714         r_HH   = 1.0 / forceparams[type0].wpol.rHH;
 715         for (i = 0; (i < nbonds); i += 6)
 716         {
 717             type = forceatoms[i];
 718             if (type != type0)
 719             {
 720                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d", type, type0,
 721                           __FILE__, __LINE__);
 722             }
 723             aO  = forceatoms[i + 1];
 724             aH1 = forceatoms[i + 2];
 725             aH2 = forceatoms[i + 3];
 726             aD  = forceatoms[i + 4];
 727             aS  = forceatoms[i + 5];
 728
 729             /* Compute vectors describing the water frame */
 730             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 731             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 732             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 733             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 734             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 735             cprod(dOH1, dOH2, nW);
 736
 737             /* Compute inverse length of normal vector
 738              * (this one could be precomputed, but I'm too lazy now)
 739              */
 740             r_nW = gmx::invsqrt(iprod(nW, nW));
 741             /* This is for precision, but does not make a big difference,
 742              * it can go later.
 743              */
 744             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 745
 746             /* Normalize the vectors in the water frame */
 747             svmul(r_nW, nW, nW);
 748             svmul(r_HH, dHH, dHH);
 749             svmul(r_OD, dOD, dOD);
 750
 751             /* Compute displacement of shell along components of the vector */
 752             dx[ZZ] = iprod(dDS, dOD);
 753             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 754             for (m = 0; (m < DIM); m++)
 755             {
 756                 proj[m] = dDS[m] - dx[ZZ] * dOD[m];
 757             }
 758
 759             /*dx[XX] = iprod(dDS,nW);
 760                dx[YY] = iprod(dDS,dHH);*/
 761             dx[XX] = iprod(proj, nW);
 762             for (m = 0; (m < DIM); m++)
 763             {
 764                 proj[m] -= dx[XX] * nW[m];
 765             }
 766             dx[YY] = iprod(proj, dHH);
 767             /* Now compute the forces and energy */
 768             kdx[XX] = kk[XX] * dx[XX];
 769             kdx[YY] = kk[YY] * dx[YY];
 770             kdx[ZZ] = kk[ZZ] * dx[ZZ];
 771             vtot += iprod(dx, kdx);
 772
 773             for (m = 0; (m < DIM); m++)
 774             {
 775                 /* This is a tensor operation but written out for speed */
 776                 tx  = nW[m] * kdx[XX];
 777                 ty  = dHH[m] * kdx[YY];
 778                 tz  = dOD[m] * kdx[ZZ];
 779                 fij = -tx - ty - tz;
 780                 f[aS][m] += fij;
 781                 f[aD][m] -= fij;
 782                 if (computeVirial(flavor))
 783                 {
 784                     fshift[ki][m] += fij;
 785                     fshift[CENTRAL][m] -= fij;
 786                 }
 787             }
 788         }
 789     }
 790     return 0.5 * vtot;
 791 }
 792
 793 template<BondedKernelFlavor flavor>
 794 static real
 795 do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj, const t_pbc* pbc, real qq, rvec fshift[], real afac)
 796 {
 797     rvec r12;
 798     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 799     int  m, t;
 800
 801     t = pbc_rvec_sub(pbc, xi, xj, r12); /*  3 */
 802
 803     r12sq  = iprod(r12, r12);                                                     /*  5 */
 804     r12_1  = gmx::invsqrt(r12sq);                                                 /*  5 */
 805     r12bar = afac / r12_1;                                                        /*  5 */
 806     v0     = qq * ONE_4PI_EPS0 * r12_1;                                           /*  2 */
 807     ebar   = std::exp(-r12bar);                                                   /*  5 */
 808     v1     = (1 - (1 + 0.5 * r12bar) * ebar);                                     /*  4 */
 809     fscal  = ((v0 * r12_1) * v1 - v0 * 0.5 * afac * ebar * (r12bar + 1)) * r12_1; /* 9 */
 810
 811     for (m = 0; (m < DIM); m++)
 812     {
 813         fff = fscal * r12[m];
 814         fi[m] += fff;
 815         fj[m] -= fff;
 816         if (computeVirial(flavor))
 817         {
 818             fshift[t][m] += fff;
 819             fshift[CENTRAL][m] -= fff;
 820         }
 821     } /* 15 */
 822
 823     return v0 * v1; /* 1 */
 824     /* 54 */
 825 }
 826
 827 template<BondedKernelFlavor flavor>
 828 real thole_pol(int             nbonds,
 829                const t_iatom   forceatoms[],
 830                const t_iparams forceparams[],
 831                const rvec      x[],
 832                rvec4           f[],
 833                rvec            fshift[],
 834                const t_pbc*    pbc,
 835                real gmx_unused lambda,
 836                real gmx_unused* dvdlambda,
 837                const t_mdatoms* md,
 838                t_fcdata gmx_unused* fcd,
 839                int gmx_unused* global_atom_index)
 840 {
 841     /* Interaction between two pairs of particles with opposite charge */
 842     int  i, type, a1, da1, a2, da2;
 843     real q1, q2, qq, a, al1, al2, afac;
 844     real V = 0;
 845
 846     for (i = 0; (i < nbonds);)
 847     {
 848         type = forceatoms[i++];
 849         a1   = forceatoms[i++];
 850         da1  = forceatoms[i++];
 851         a2   = forceatoms[i++];
 852         da2  = forceatoms[i++];
 853         q1   = md->chargeA[da1];
 854         q2   = md->chargeA[da2];
 855         a    = forceparams[type].thole.a;
 856         al1  = forceparams[type].thole.alpha1;
 857         al2  = forceparams[type].thole.alpha2;
 858         qq   = q1 * q2;
 859         afac = a * gmx::invsixthroot(al1 * al2);
 860         V += do_1_thole<flavor>(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 861         V += do_1_thole<flavor>(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 862         V += do_1_thole<flavor>(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 863         V += do_1_thole<flavor>(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 864     }
 865     /* 290 flops */
 866     return V;
 867 }
 868
 869 // Avoid gcc 386 -O3 code generation bug in this function (see Issue
 870 // #3205 for more information)
 871 #if defined(__GNUC__) && defined(__i386__) && defined(__OPTIMIZE__)
 872 #    pragma GCC push_options
 873 #    pragma GCC optimize("O1")
 874 #    define avoid_gcc_i386_o3_code_generation_bug
 875 #endif
 876
 877 template<BondedKernelFlavor flavor>
 878 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 879 angles(int             nbonds,
 880        const t_iatom   forceatoms[],
 881        const t_iparams forceparams[],
 882        const rvec      x[],
 883        rvec4           f[],
 884        rvec            fshift[],
 885        const t_pbc*    pbc,
 886        real            lambda,
 887        real*           dvdlambda,
 888        const t_mdatoms gmx_unused* md,
 889        t_fcdata gmx_unused* fcd,
 890        int gmx_unused* global_atom_index)
 891 {
 892     int  i, ai, aj, ak, t1, t2, type;
 893     rvec r_ij, r_kj;
 894     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 895
 896     vtot = 0.0;
 897     for (i = 0; i < nbonds;)
 898     {
 899         type = forceatoms[i++];
 900         ai   = forceatoms[i++];
 901         aj   = forceatoms[i++];
 902         ak   = forceatoms[i++];
 903
 904         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
 905
 906         *dvdlambda += harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
 907                                forceparams[type].harmonic.rA * DEG2RAD,
 908                                forceparams[type].harmonic.rB * DEG2RAD, theta, lambda, &va, &dVdt); /*  21  */
 909         vtot += va;
 910
 911         cos_theta2 = gmx::square(cos_theta);
 912         if (cos_theta2 < 1)
 913         {
 914             int  m;
 915             real st, sth;
 916             real cik, cii, ckk;
 917             real nrkj2, nrij2;
 918             real nrkj_1, nrij_1;
 919             rvec f_i, f_j, f_k;
 920
 921             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
 922             sth   = st * cos_theta;                      /*   1         */
 923             nrij2 = iprod(r_ij, r_ij);                   /*   5         */
 924             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
 925
 926             nrij_1 = gmx::invsqrt(nrij2); /*  10                */
 927             nrkj_1 = gmx::invsqrt(nrkj2); /*  10                */
 928
 929             cik = st * nrij_1 * nrkj_1;  /*   2         */
 930             cii = sth * nrij_1 * nrij_1; /*   2         */
 931             ckk = sth * nrkj_1 * nrkj_1; /*   2         */
 932
 933             for (m = 0; m < DIM; m++)
 934             { /*  39            */
 935                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
 936                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
 937                 f_j[m] = -f_i[m] - f_k[m];
 938                 f[ai][m] += f_i[m];
 939                 f[aj][m] += f_j[m];
 940                 f[ak][m] += f_k[m];
 941             }
 942             if (computeVirial(flavor))
 943             {
 944                 rvec_inc(fshift[t1], f_i);
 945                 rvec_inc(fshift[CENTRAL], f_j);
 946                 rvec_inc(fshift[t2], f_k);
 947             }
 948         } /* 161 TOTAL  */
 949     }
 950
 951     return vtot;
 952 }
 953
 954 #ifdef avoid_gcc_i386_o3_code_generation_bug
 955 #    pragma GCC pop_options
 956 #    undef avoid_gcc_i386_o3_code_generation_bug
 957 #endif
 958
 959 #if GMX_SIMD_HAVE_REAL
 960
 961 /* As angles, but using SIMD to calculate many angles at once.
 962  * This routines does not calculate energies and shift forces.
 963  */
 964 template<BondedKernelFlavor flavor>
 965 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
 966 angles(int             nbonds,
 967        const t_iatom   forceatoms[],
 968        const t_iparams forceparams[],
 969        const rvec      x[],
 970        rvec4           f[],
 971        rvec gmx_unused fshift[],
 972        const t_pbc*    pbc,
 973        real gmx_unused lambda,
 974        real gmx_unused* dvdlambda,
 975        const t_mdatoms gmx_unused* md,
 976        t_fcdata gmx_unused* fcd,
 977        int gmx_unused* global_atom_index)
 978 {
 979     const int                                nfa1 = 4;
 980     int                                      i, iu, s;
 981     int                                      type;
 982     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
 983     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
 984     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
 985     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
 986     SimdReal                                 deg2rad_S(DEG2RAD);
 987     SimdReal                                 xi_S, yi_S, zi_S;
 988     SimdReal                                 xj_S, yj_S, zj_S;
 989     SimdReal                                 xk_S, yk_S, zk_S;
 990     SimdReal                                 k_S, theta0_S;
 991     SimdReal                                 rijx_S, rijy_S, rijz_S;
 992     SimdReal                                 rkjx_S, rkjy_S, rkjz_S;
 993     SimdReal                                 one_S(1.0);
 994     SimdReal min_one_plus_eps_S(-1.0 + 2.0 * GMX_REAL_EPS); // Smallest number > -1
 995
 996     SimdReal                         rij_rkj_S;
 997     SimdReal                         nrij2_S, nrij_1_S;
 998     SimdReal                         nrkj2_S, nrkj_1_S;
 999     SimdReal                         cos_S, invsin_S;
1000     SimdReal                         theta_S;
1001     SimdReal                         st_S, sth_S;
1002     SimdReal                         cik_S, cii_S, ckk_S;
1003     SimdReal                         f_ix_S, f_iy_S, f_iz_S;
1004     SimdReal                         f_kx_S, f_ky_S, f_kz_S;
1005     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1006
1007     set_pbc_simd(pbc, pbc_simd);
1008
1009     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1010     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1011     {
1012         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1013          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1014          */
1015         iu = i;
1016         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1017         {
1018             type  = forceatoms[iu];
1019             ai[s] = forceatoms[iu + 1];
1020             aj[s] = forceatoms[iu + 2];
1021             ak[s] = forceatoms[iu + 3];
1022
1023             /* At the end fill the arrays with the last atoms and 0 params */
1024             if (i + s * nfa1 < nbonds)
1025             {
1026                 coeff[s]                       = forceparams[type].harmonic.krA;
1027                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
1028
1029                 if (iu + nfa1 < nbonds)
1030                 {
1031                     iu += nfa1;
1032                 }
1033             }
1034             else
1035             {
1036                 coeff[s]                       = 0;
1037                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
1038             }
1039         }
1040
1041         /* Store the non PBC corrected distances packed and aligned */
1042         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1043         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1044         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1045         rijx_S = xi_S - xj_S;
1046         rijy_S = yi_S - yj_S;
1047         rijz_S = zi_S - zj_S;
1048         rkjx_S = xk_S - xj_S;
1049         rkjy_S = yk_S - yj_S;
1050         rkjz_S = zk_S - zj_S;
1051
1052         k_S      = load<SimdReal>(coeff);
1053         theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1054
1055         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1056         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1057
1058         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1059
1060         nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1061         nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1062
1063         nrij_1_S = invsqrt(nrij2_S);
1064         nrkj_1_S = invsqrt(nrkj2_S);
1065
1066         cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S;
1067
1068         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1069          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1070          * This also ensures that rounding errors would cause the argument
1071          * of simdAcos to be < -1.
1072          * Note that we do not take precautions for cos(0)=1, so the outer
1073          * atoms in an angle should not be on top of each other.
1074          */
1075         cos_S = max(cos_S, min_one_plus_eps_S);
1076
1077         theta_S = acos(cos_S);
1078
1079         invsin_S = invsqrt(one_S - cos_S * cos_S);
1080
1081         st_S  = k_S * (theta0_S - theta_S) * invsin_S;
1082         sth_S = st_S * cos_S;
1083
1084         cik_S = st_S * nrij_1_S * nrkj_1_S;
1085         cii_S = sth_S * nrij_1_S * nrij_1_S;
1086         ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1087
1088         f_ix_S = cii_S * rijx_S;
1089         f_ix_S = fnma(cik_S, rkjx_S, f_ix_S);
1090         f_iy_S = cii_S * rijy_S;
1091         f_iy_S = fnma(cik_S, rkjy_S, f_iy_S);
1092         f_iz_S = cii_S * rijz_S;
1093         f_iz_S = fnma(cik_S, rkjz_S, f_iz_S);
1094         f_kx_S = ckk_S * rkjx_S;
1095         f_kx_S = fnma(cik_S, rijx_S, f_kx_S);
1096         f_ky_S = ckk_S * rkjy_S;
1097         f_ky_S = fnma(cik_S, rijy_S, f_ky_S);
1098         f_kz_S = ckk_S * rkjz_S;
1099         f_kz_S = fnma(cik_S, rijz_S, f_kz_S);
1100
1101         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1102         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1103                                  f_iz_S + f_kz_S);
1104         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1105     }
1106
1107     return 0;
1108 }
1109
1110 #endif // GMX_SIMD_HAVE_REAL
1111
1112 template<BondedKernelFlavor flavor>
1113 real linear_angles(int             nbonds,
1114                    const t_iatom   forceatoms[],
1115                    const t_iparams forceparams[],
1116                    const rvec      x[],
1117                    rvec4           f[],
1118                    rvec            fshift[],
1119                    const t_pbc*    pbc,
1120                    real            lambda,
1121                    real*           dvdlambda,
1122                    const t_mdatoms gmx_unused* md,
1123                    t_fcdata gmx_unused* fcd,
1124                    int gmx_unused* global_atom_index)
1125 {
1126     int  i, m, ai, aj, ak, t1, t2, type;
1127     rvec f_i, f_j, f_k;
1128     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1129     rvec r_ij, r_kj, r_ik, dx;
1130
1131     L1   = 1 - lambda;
1132     vtot = 0.0;
1133     for (i = 0; (i < nbonds);)
1134     {
1135         type = forceatoms[i++];
1136         ai   = forceatoms[i++];
1137         aj   = forceatoms[i++];
1138         ak   = forceatoms[i++];
1139
1140         kA   = forceparams[type].linangle.klinA;
1141         kB   = forceparams[type].linangle.klinB;
1142         klin = L1 * kA + lambda * kB;
1143
1144         aA = forceparams[type].linangle.aA;
1145         aB = forceparams[type].linangle.aB;
1146         a  = L1 * aA + lambda * aB;
1147         b  = 1 - a;
1148
1149         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1150         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1151         rvec_sub(r_ij, r_kj, r_ik);
1152
1153         dr2 = 0;
1154         for (m = 0; (m < DIM); m++)
1155         {
1156             dr = -a * r_ij[m] - b * r_kj[m];
1157             dr2 += dr * dr;
1158             dx[m]  = dr;
1159             f_i[m] = a * klin * dr;
1160             f_k[m] = b * klin * dr;
1161             f_j[m] = -(f_i[m] + f_k[m]);
1162             f[ai][m] += f_i[m];
1163             f[aj][m] += f_j[m];
1164             f[ak][m] += f_k[m];
1165         }
1166         va = 0.5 * klin * dr2;
1167         *dvdlambda += 0.5 * (kB - kA) * dr2 + klin * (aB - aA) * iprod(dx, r_ik);
1168
1169         vtot += va;
1170
1171         if (computeVirial(flavor))
1172         {
1173             rvec_inc(fshift[t1], f_i);
1174             rvec_inc(fshift[CENTRAL], f_j);
1175             rvec_inc(fshift[t2], f_k);
1176         }
1177     } /* 57 TOTAL       */
1178     return vtot;
1179 }
1180
1181 template<BondedKernelFlavor flavor>
1182 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1183 urey_bradley(int             nbonds,
1184              const t_iatom   forceatoms[],
1185              const t_iparams forceparams[],
1186              const rvec      x[],
1187              rvec4           f[],
1188              rvec            fshift[],
1189              const t_pbc*    pbc,
1190              real            lambda,
1191              real*           dvdlambda,
1192              const t_mdatoms gmx_unused* md,
1193              t_fcdata gmx_unused* fcd,
1194              int gmx_unused* global_atom_index)
1195 {
1196     int  i, m, ai, aj, ak, t1, t2, type, ki;
1197     rvec r_ij, r_kj, r_ik;
1198     real cos_theta, cos_theta2, theta;
1199     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1200     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1201
1202     vtot = 0.0;
1203     for (i = 0; (i < nbonds);)
1204     {
1205         type = forceatoms[i++];
1206         ai   = forceatoms[i++];
1207         aj   = forceatoms[i++];
1208         ak   = forceatoms[i++];
1209         th0A = forceparams[type].u_b.thetaA * DEG2RAD;
1210         kthA = forceparams[type].u_b.kthetaA;
1211         r13A = forceparams[type].u_b.r13A;
1212         kUBA = forceparams[type].u_b.kUBA;
1213         th0B = forceparams[type].u_b.thetaB * DEG2RAD;
1214         kthB = forceparams[type].u_b.kthetaB;
1215         r13B = forceparams[type].u_b.r13B;
1216         kUBB = forceparams[type].u_b.kUBB;
1217
1218         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1219
1220         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1221         vtot += va;
1222
1223         ki  = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /*   3      */
1224         dr2 = iprod(r_ik, r_ik);                     /*   5             */
1225         dr  = dr2 * gmx::invsqrt(dr2);               /*  10             */
1226
1227         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1228
1229         cos_theta2 = gmx::square(cos_theta); /*   1             */
1230         if (cos_theta2 < 1)
1231         {
1232             real st, sth;
1233             real cik, cii, ckk;
1234             real nrkj2, nrij2;
1235             rvec f_i, f_j, f_k;
1236
1237             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1238             sth   = st * cos_theta;                      /*   1         */
1239             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1240             nrij2 = iprod(r_ij, r_ij);
1241
1242             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1243             cii = sth / nrij2;                      /*  10              */
1244             ckk = sth / nrkj2;                      /*  10              */
1245
1246             for (m = 0; (m < DIM); m++) /*  39          */
1247             {
1248                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1249                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1250                 f_j[m] = -f_i[m] - f_k[m];
1251                 f[ai][m] += f_i[m];
1252                 f[aj][m] += f_j[m];
1253                 f[ak][m] += f_k[m];
1254             }
1255             if (computeVirial(flavor))
1256             {
1257                 rvec_inc(fshift[t1], f_i);
1258                 rvec_inc(fshift[CENTRAL], f_j);
1259                 rvec_inc(fshift[t2], f_k);
1260             }
1261         } /* 161 TOTAL  */
1262         /* Time for the bond calculations */
1263         if (dr2 == 0.0)
1264         {
1265             continue;
1266         }
1267
1268         vtot += vbond;              /* 1*/
1269         fbond *= gmx::invsqrt(dr2); /*   6              */
1270
1271         for (m = 0; (m < DIM); m++) /*  15              */
1272         {
1273             fik = fbond * r_ik[m];
1274             f[ai][m] += fik;
1275             f[ak][m] -= fik;
1276             if (computeVirial(flavor))
1277             {
1278                 fshift[ki][m] += fik;
1279                 fshift[CENTRAL][m] -= fik;
1280             }
1281         }
1282     }
1283     return vtot;
1284 }
1285
1286 #if GMX_SIMD_HAVE_REAL
1287
1288 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1289  * This routines does not calculate energies and shift forces.
1290  */
1291 template<BondedKernelFlavor flavor>
1292 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1293 urey_bradley(int             nbonds,
1294              const t_iatom   forceatoms[],
1295              const t_iparams forceparams[],
1296              const rvec      x[],
1297              rvec4           f[],
1298              rvec gmx_unused fshift[],
1299              const t_pbc*    pbc,
1300              real gmx_unused lambda,
1301              real gmx_unused* dvdlambda,
1302              const t_mdatoms gmx_unused* md,
1303              t_fcdata gmx_unused* fcd,
1304              int gmx_unused* global_atom_index)
1305 {
1306     constexpr int                            nfa1 = 4;
1307     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1308     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1309     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1310     alignas(GMX_SIMD_ALIGNMENT) real         coeff[4 * GMX_SIMD_REAL_WIDTH];
1311     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1312
1313     set_pbc_simd(pbc, pbc_simd);
1314
1315     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1316     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
1317     {
1318         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1319          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1320          */
1321         int iu = i;
1322         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1323         {
1324             const int type = forceatoms[iu];
1325             ai[s]          = forceatoms[iu + 1];
1326             aj[s]          = forceatoms[iu + 2];
1327             ak[s]          = forceatoms[iu + 3];
1328
1329             /* At the end fill the arrays with the last atoms and 0 params */
1330             if (i + s * nfa1 < nbonds)
1331             {
1332                 coeff[s]                           = forceparams[type].u_b.kthetaA;
1333                 coeff[GMX_SIMD_REAL_WIDTH + s]     = forceparams[type].u_b.thetaA;
1334                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = forceparams[type].u_b.kUBA;
1335                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = forceparams[type].u_b.r13A;
1336
1337                 if (iu + nfa1 < nbonds)
1338                 {
1339                     iu += nfa1;
1340                 }
1341             }
1342             else
1343             {
1344                 coeff[s]                           = 0;
1345                 coeff[GMX_SIMD_REAL_WIDTH + s]     = 0;
1346                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = 0;
1347                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = 0;
1348             }
1349         }
1350
1351         SimdReal xi_S, yi_S, zi_S;
1352         SimdReal xj_S, yj_S, zj_S;
1353         SimdReal xk_S, yk_S, zk_S;
1354
1355         /* Store the non PBC corrected distances packed and aligned */
1356         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1357         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1358         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1359         SimdReal rijx_S = xi_S - xj_S;
1360         SimdReal rijy_S = yi_S - yj_S;
1361         SimdReal rijz_S = zi_S - zj_S;
1362         SimdReal rkjx_S = xk_S - xj_S;
1363         SimdReal rkjy_S = yk_S - yj_S;
1364         SimdReal rkjz_S = zk_S - zj_S;
1365         SimdReal rikx_S = xi_S - xk_S;
1366         SimdReal riky_S = yi_S - yk_S;
1367         SimdReal rikz_S = zi_S - zk_S;
1368
1369         const SimdReal ktheta_S = load<SimdReal>(coeff);
1370         const SimdReal theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * DEG2RAD;
1371         const SimdReal kUB_S    = load<SimdReal>(coeff + 2 * GMX_SIMD_REAL_WIDTH);
1372         const SimdReal r13_S    = load<SimdReal>(coeff + 3 * GMX_SIMD_REAL_WIDTH);
1373
1374         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1375         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1376         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1377
1378         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1379
1380         const SimdReal dr2_S = iprod(rikx_S, riky_S, rikz_S, rikx_S, riky_S, rikz_S);
1381
1382         const SimdReal nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1383         const SimdReal nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1384
1385         const SimdReal nrij_1_S = invsqrt(nrij2_S);
1386         const SimdReal nrkj_1_S = invsqrt(nrkj2_S);
1387         const SimdReal invdr2_S = invsqrt(dr2_S);
1388         const SimdReal dr_S     = dr2_S * invdr2_S;
1389
1390         constexpr real min_one_plus_eps = -1.0 + 2.0 * GMX_REAL_EPS; // Smallest number > -1
1391
1392         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1393          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1394          * This also ensures that rounding errors would cause the argument
1395          * of simdAcos to be < -1.
1396          * Note that we do not take precautions for cos(0)=1, so the bonds
1397          * in an angle should not align at an angle of 0 degrees.
1398          */
1399         const SimdReal cos_S = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1400
1401         const SimdReal theta_S  = acos(cos_S);
1402         const SimdReal invsin_S = invsqrt(1.0 - cos_S * cos_S);
1403         const SimdReal st_S     = ktheta_S * (theta0_S - theta_S) * invsin_S;
1404         const SimdReal sth_S    = st_S * cos_S;
1405
1406         const SimdReal cik_S = st_S * nrij_1_S * nrkj_1_S;
1407         const SimdReal cii_S = sth_S * nrij_1_S * nrij_1_S;
1408         const SimdReal ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1409
1410         const SimdReal sUB_S = kUB_S * (r13_S - dr_S) * invdr2_S;
1411
1412         const SimdReal f_ikx_S = sUB_S * rikx_S;
1413         const SimdReal f_iky_S = sUB_S * riky_S;
1414         const SimdReal f_ikz_S = sUB_S * rikz_S;
1415
1416         const SimdReal f_ix_S = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1417         const SimdReal f_iy_S = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1418         const SimdReal f_iz_S = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1419         const SimdReal f_kx_S = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1420         const SimdReal f_ky_S = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1421         const SimdReal f_kz_S = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1422
1423         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1424         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S,
1425                                  f_iz_S + f_kz_S);
1426         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1427     }
1428
1429     return 0;
1430 }
1431
1432 #endif // GMX_SIMD_HAVE_REAL
1433
1434 template<BondedKernelFlavor flavor>
1435 real quartic_angles(int             nbonds,
1436                     const t_iatom   forceatoms[],
1437                     const t_iparams forceparams[],
1438                     const rvec      x[],
1439                     rvec4           f[],
1440                     rvec            fshift[],
1441                     const t_pbc*    pbc,
1442                     real gmx_unused lambda,
1443                     real gmx_unused* dvdlambda,
1444                     const t_mdatoms gmx_unused* md,
1445                     t_fcdata gmx_unused* fcd,
1446                     int gmx_unused* global_atom_index)
1447 {
1448     int  i, j, ai, aj, ak, t1, t2, type;
1449     rvec r_ij, r_kj;
1450     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1451
1452     vtot = 0.0;
1453     for (i = 0; (i < nbonds);)
1454     {
1455         type = forceatoms[i++];
1456         ai   = forceatoms[i++];
1457         aj   = forceatoms[i++];
1458         ak   = forceatoms[i++];
1459
1460         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1461
1462         dt = theta - forceparams[type].qangle.theta * DEG2RAD; /* 2          */
1463
1464         dVdt = 0;
1465         va   = forceparams[type].qangle.c[0];
1466         dtp  = 1.0;
1467         for (j = 1; j <= 4; j++)
1468         {
1469             c = forceparams[type].qangle.c[j];
1470             dVdt -= j * c * dtp;
1471             dtp *= dt;
1472             va += c * dtp;
1473         }
1474         /* 20 */
1475
1476         vtot += va;
1477
1478         cos_theta2 = gmx::square(cos_theta); /*   1             */
1479         if (cos_theta2 < 1)
1480         {
1481             int  m;
1482             real st, sth;
1483             real cik, cii, ckk;
1484             real nrkj2, nrij2;
1485             rvec f_i, f_j, f_k;
1486
1487             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1488             sth   = st * cos_theta;                      /*   1         */
1489             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1490             nrij2 = iprod(r_ij, r_ij);
1491
1492             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1493             cii = sth / nrij2;                      /*  10              */
1494             ckk = sth / nrkj2;                      /*  10              */
1495
1496             for (m = 0; (m < DIM); m++) /*  39          */
1497             {
1498                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1499                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1500                 f_j[m] = -f_i[m] - f_k[m];
1501                 f[ai][m] += f_i[m];
1502                 f[aj][m] += f_j[m];
1503                 f[ak][m] += f_k[m];
1504             }
1505
1506             if (computeVirial(flavor))
1507             {
1508                 rvec_inc(fshift[t1], f_i);
1509                 rvec_inc(fshift[CENTRAL], f_j);
1510                 rvec_inc(fshift[t2], f_k);
1511             }
1512         } /* 153 TOTAL  */
1513     }
1514     return vtot;
1515 }
1516
1517
1518 #if GMX_SIMD_HAVE_REAL
1519
1520 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1521  * also calculates the pre-factor required for the dihedral force update.
1522  * Note that bv and buf should be register aligned.
1523  */
1524 inline void dih_angle_simd(const rvec* x,
1525                            const int*  ai,
1526                            const int*  aj,
1527                            const int*  ak,
1528                            const int*  al,
1529                            const real* pbc_simd,
1530                            SimdReal*   phi_S,
1531                            SimdReal*   mx_S,
1532                            SimdReal*   my_S,
1533                            SimdReal*   mz_S,
1534                            SimdReal*   nx_S,
1535                            SimdReal*   ny_S,
1536                            SimdReal*   nz_S,
1537                            SimdReal*   nrkj_m2_S,
1538                            SimdReal*   nrkj_n2_S,
1539                            SimdReal*   p_S,
1540                            SimdReal*   q_S)
1541 {
1542     SimdReal xi_S, yi_S, zi_S;
1543     SimdReal xj_S, yj_S, zj_S;
1544     SimdReal xk_S, yk_S, zk_S;
1545     SimdReal xl_S, yl_S, zl_S;
1546     SimdReal rijx_S, rijy_S, rijz_S;
1547     SimdReal rkjx_S, rkjy_S, rkjz_S;
1548     SimdReal rklx_S, rkly_S, rklz_S;
1549     SimdReal cx_S, cy_S, cz_S;
1550     SimdReal cn_S;
1551     SimdReal s_S;
1552     SimdReal ipr_S;
1553     SimdReal iprm_S, iprn_S;
1554     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1555     SimdReal toler_S;
1556     SimdReal nrkj2_min_S;
1557     SimdReal real_eps_S;
1558
1559     /* Used to avoid division by zero.
1560      * We take into acount that we multiply the result by real_eps_S.
1561      */
1562     nrkj2_min_S = SimdReal(GMX_REAL_MIN / (2 * GMX_REAL_EPS));
1563
1564     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1565     real_eps_S = SimdReal(2 * GMX_REAL_EPS);
1566
1567     /* Store the non PBC corrected distances packed and aligned */
1568     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1569     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1570     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1571     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), al, &xl_S, &yl_S, &zl_S);
1572     rijx_S = xi_S - xj_S;
1573     rijy_S = yi_S - yj_S;
1574     rijz_S = zi_S - zj_S;
1575     rkjx_S = xk_S - xj_S;
1576     rkjy_S = yk_S - yj_S;
1577     rkjz_S = zk_S - zj_S;
1578     rklx_S = xk_S - xl_S;
1579     rkly_S = yk_S - yl_S;
1580     rklz_S = zk_S - zl_S;
1581
1582     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1583     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1584     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1585
1586     cprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S, mx_S, my_S, mz_S);
1587
1588     cprod(rkjx_S, rkjy_S, rkjz_S, rklx_S, rkly_S, rklz_S, nx_S, ny_S, nz_S);
1589
1590     cprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S, &cx_S, &cy_S, &cz_S);
1591
1592     cn_S = sqrt(norm2(cx_S, cy_S, cz_S));
1593
1594     s_S = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1595
1596     /* Determine the dihedral angle, the sign might need correction */
1597     *phi_S = atan2(cn_S, s_S);
1598
1599     ipr_S = iprod(rijx_S, rijy_S, rijz_S, *nx_S, *ny_S, *nz_S);
1600
1601     iprm_S = norm2(*mx_S, *my_S, *mz_S);
1602     iprn_S = norm2(*nx_S, *ny_S, *nz_S);
1603
1604     nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1605
1606     /* Avoid division by zero. When zero, the result is multiplied by 0
1607      * anyhow, so the 3 max below do not affect the final result.
1608      */
1609     nrkj2_S  = max(nrkj2_S, nrkj2_min_S);
1610     nrkj_1_S = invsqrt(nrkj2_S);
1611     nrkj_2_S = nrkj_1_S * nrkj_1_S;
1612     nrkj_S   = nrkj2_S * nrkj_1_S;
1613
1614     toler_S = nrkj2_S * real_eps_S;
1615
1616     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1617      * So we take a max with the tolerance instead. Since we multiply with
1618      * m or n later, the max does not affect the results.
1619      */
1620     iprm_S     = max(iprm_S, toler_S);
1621     iprn_S     = max(iprn_S, toler_S);
1622     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1623     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1624
1625     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1626     *phi_S = copysign(*phi_S, ipr_S);
1627     *p_S   = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1628     *p_S   = *p_S * nrkj_2_S;
1629
1630     *q_S = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1631     *q_S = *q_S * nrkj_2_S;
1632 }
1633
1634 #endif // GMX_SIMD_HAVE_REAL
1635
1636 } // namespace
1637
1638 template<BondedKernelFlavor flavor>
1639 void do_dih_fup(int          i,
1640                 int          j,
1641                 int          k,
1642                 int          l,
1643                 real         ddphi,
1644                 rvec         r_ij,
1645                 rvec         r_kj,
1646                 rvec         r_kl,
1647                 rvec         m,
1648                 rvec         n,
1649                 rvec4        f[],
1650                 rvec         fshift[],
1651                 const t_pbc* pbc,
1652                 const rvec   x[],
1653                 int          t1,
1654                 int          t2,
1655                 int          t3)
1656 {
1657     /* 143 FLOPS */
1658     rvec f_i, f_j, f_k, f_l;
1659     rvec uvec, vvec, svec, dx_jl;
1660     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1661     real a, b, p, q, toler;
1662
1663     iprm  = iprod(m, m);       /*  5    */
1664     iprn  = iprod(n, n);       /*  5    */
1665     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1666     toler = nrkj2 * GMX_REAL_EPS;
1667     if ((iprm > toler) && (iprn > toler))
1668     {
1669         nrkj_1 = gmx::invsqrt(nrkj2);  /* 10    */
1670         nrkj_2 = nrkj_1 * nrkj_1;      /*  1    */
1671         nrkj   = nrkj2 * nrkj_1;       /*  1    */
1672         a      = -ddphi * nrkj / iprm; /* 11    */
1673         svmul(a, m, f_i);              /*  3    */
1674         b = ddphi * nrkj / iprn;       /* 11    */
1675         svmul(b, n, f_l);              /*  3  */
1676         p = iprod(r_ij, r_kj);         /*  5    */
1677         p *= nrkj_2;                   /*  1    */
1678         q = iprod(r_kl, r_kj);         /*  5    */
1679         q *= nrkj_2;                   /*  1    */
1680         svmul(p, f_i, uvec);           /*  3    */
1681         svmul(q, f_l, vvec);           /*  3    */
1682         rvec_sub(uvec, vvec, svec);    /*  3    */
1683         rvec_sub(f_i, svec, f_j);      /*  3    */
1684         rvec_add(f_l, svec, f_k);      /*  3    */
1685         rvec_inc(f[i], f_i);           /*  3    */
1686         rvec_dec(f[j], f_j);           /*  3    */
1687         rvec_dec(f[k], f_k);           /*  3    */
1688         rvec_inc(f[l], f_l);           /*  3    */
1689
1690         if (computeVirial(flavor))
1691         {
1692             if (pbc)
1693             {
1694                 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1695             }
1696             else
1697             {
1698                 t3 = CENTRAL;
1699             }
1700
1701             rvec_inc(fshift[t1], f_i);
1702             rvec_dec(fshift[CENTRAL], f_j);
1703             rvec_dec(fshift[t2], f_k);
1704             rvec_inc(fshift[t3], f_l);
1705         }
1706     }
1707     /* 112 TOTAL    */
1708 }
1709
1710 namespace
1711 {
1712
1713 #if GMX_SIMD_HAVE_REAL
1714 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1715 inline void gmx_simdcall do_dih_fup_noshiftf_simd(const int* ai,
1716                                                   const int* aj,
1717                                                   const int* ak,
1718                                                   const int* al,
1719                                                   SimdReal   p,
1720                                                   SimdReal   q,
1721                                                   SimdReal   f_i_x,
1722                                                   SimdReal   f_i_y,
1723                                                   SimdReal   f_i_z,
1724                                                   SimdReal   mf_l_x,
1725                                                   SimdReal   mf_l_y,
1726                                                   SimdReal   mf_l_z,
1727                                                   rvec4      f[])
1728 {
1729     SimdReal sx    = p * f_i_x + q * mf_l_x;
1730     SimdReal sy    = p * f_i_y + q * mf_l_y;
1731     SimdReal sz    = p * f_i_z + q * mf_l_z;
1732     SimdReal f_j_x = f_i_x - sx;
1733     SimdReal f_j_y = f_i_y - sy;
1734     SimdReal f_j_z = f_i_z - sz;
1735     SimdReal f_k_x = mf_l_x - sx;
1736     SimdReal f_k_y = mf_l_y - sy;
1737     SimdReal f_k_z = mf_l_z - sz;
1738     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_i_x, f_i_y, f_i_z);
1739     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_j_x, f_j_y, f_j_z);
1740     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_k_x, f_k_y, f_k_z);
1741     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), al, mf_l_x, mf_l_y, mf_l_z);
1742 }
1743 #endif // GMX_SIMD_HAVE_REAL
1744
1745 /*! \brief Computes and returns the proper dihedral force
1746  *
1747  * With the appropriate kernel flavor, also computes and accumulates
1748  * the energy and dV/dlambda.
1749  */
1750 template<BondedKernelFlavor flavor>
1751 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* dvdlambda)
1752 {
1753     const real L1   = 1.0 - lambda;
1754     const real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1755     const real dph0 = (phiB - phiA) * DEG2RAD;
1756     const real cp   = L1 * cpA + lambda * cpB;
1757
1758     const real mdphi = mult * phi - ph0;
1759     const real sdphi = std::sin(mdphi);
1760     const real ddphi = -cp * mult * sdphi;
1761     if (computeEnergy(flavor))
1762     {
1763         const real v1 = 1 + std::cos(mdphi);
1764         *V += cp * v1;
1765
1766         *dvdlambda += (cpB - cpA) * v1 + cp * dph0 * sdphi;
1767     }
1768
1769     return ddphi;
1770     /* That was 40 flops */
1771 }
1772
1773 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1774 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* F)
1775 {
1776     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1777     real L1   = 1.0 - lambda;
1778     real ph0  = (L1 * phiA + lambda * phiB) * DEG2RAD;
1779     real dph0 = (phiB - phiA) * DEG2RAD;
1780     real cp   = L1 * cpA + lambda * cpB;
1781
1782     mdphi = mult * (phi - ph0);
1783     sdphi = std::sin(mdphi);
1784     ddphi = cp * mult * sdphi;
1785     v1    = 1.0 - std::cos(mdphi);
1786     v     = cp * v1;
1787
1788     dvdlambda = (cpB - cpA) * v1 + cp * dph0 * sdphi;
1789
1790     *V = v;
1791     *F = ddphi;
1792
1793     return dvdlambda;
1794
1795     /* That was 40 flops */
1796 }
1797
1798 template<BondedKernelFlavor flavor>
1799 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1800 pdihs(int             nbonds,
1801       const t_iatom   forceatoms[],
1802       const t_iparams forceparams[],
1803       const rvec      x[],
1804       rvec4           f[],
1805       rvec            fshift[],
1806       const t_pbc*    pbc,
1807       real            lambda,
1808       real*           dvdlambda,
1809       const t_mdatoms gmx_unused* md,
1810       t_fcdata gmx_unused* fcd,
1811       int gmx_unused* global_atom_index)
1812 {
1813     int  t1, t2, t3;
1814     rvec r_ij, r_kj, r_kl, m, n;
1815
1816     real vtot = 0.0;
1817
1818     for (int i = 0; i < nbonds;)
1819     {
1820         const int ai = forceatoms[i + 1];
1821         const int aj = forceatoms[i + 2];
1822         const int ak = forceatoms[i + 3];
1823         const int al = forceatoms[i + 4];
1824
1825         const real phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1,
1826                                    &t2, &t3); /*  84      */
1827
1828         /* Loop over dihedrals working on the same atoms,
1829          * so we avoid recalculating angles and distributing forces.
1830          */
1831         real ddphi_tot = 0;
1832         do
1833         {
1834             const int type = forceatoms[i];
1835             ddphi_tot += dopdihs<flavor>(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
1836                                          forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
1837                                          forceparams[type].pdihs.mult, phi, lambda, &vtot, dvdlambda);
1838
1839             i += 5;
1840         } while (i < nbonds && forceatoms[i + 1] == ai && forceatoms[i + 2] == aj
1841                  && forceatoms[i + 3] == ak && forceatoms[i + 4] == al);
1842
1843         do_dih_fup<flavor>(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
1844                            t2, t3); /* 112              */
1845     }                               /* 223 TOTAL  */
1846
1847     return vtot;
1848 }
1849
1850 #if GMX_SIMD_HAVE_REAL
1851
1852 /* As pdihs above, but using SIMD to calculate multiple dihedrals at once */
1853 template<BondedKernelFlavor flavor>
1854 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1855 pdihs(int             nbonds,
1856       const t_iatom   forceatoms[],
1857       const t_iparams forceparams[],
1858       const rvec      x[],
1859       rvec4           f[],
1860       rvec gmx_unused fshift[],
1861       const t_pbc*    pbc,
1862       real gmx_unused lambda,
1863       real gmx_unused* dvdlambda,
1864       const t_mdatoms gmx_unused* md,
1865       t_fcdata gmx_unused* fcd,
1866       int gmx_unused* global_atom_index)
1867 {
1868     const int                                nfa1 = 5;
1869     int                                      i, iu, s;
1870     int                                      type;
1871     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1872     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1873     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1874     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
1875     alignas(GMX_SIMD_ALIGNMENT) real         buf[3 * GMX_SIMD_REAL_WIDTH];
1876     real *                                   cp, *phi0, *mult;
1877     SimdReal                                 deg2rad_S(DEG2RAD);
1878     SimdReal                                 p_S, q_S;
1879     SimdReal                                 phi0_S, phi_S;
1880     SimdReal                                 mx_S, my_S, mz_S;
1881     SimdReal                                 nx_S, ny_S, nz_S;
1882     SimdReal                                 nrkj_m2_S, nrkj_n2_S;
1883     SimdReal                                 cp_S, mdphi_S, mult_S;
1884     SimdReal                                 sin_S, cos_S;
1885     SimdReal                                 mddphi_S;
1886     SimdReal                                 sf_i_S, msf_l_S;
1887     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1888
1889     /* Extract aligned pointer for parameters and variables */
1890     cp   = buf + 0 * GMX_SIMD_REAL_WIDTH;
1891     phi0 = buf + 1 * GMX_SIMD_REAL_WIDTH;
1892     mult = buf + 2 * GMX_SIMD_REAL_WIDTH;
1893
1894     set_pbc_simd(pbc, pbc_simd);
1895
1896     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1897     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1898     {
1899         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1900          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1901          */
1902         iu = i;
1903         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1904         {
1905             type  = forceatoms[iu];
1906             ai[s] = forceatoms[iu + 1];
1907             aj[s] = forceatoms[iu + 2];
1908             ak[s] = forceatoms[iu + 3];
1909             al[s] = forceatoms[iu + 4];
1910
1911             /* At the end fill the arrays with the last atoms and 0 params */
1912             if (i + s * nfa1 < nbonds)
1913             {
1914                 cp[s]   = forceparams[type].pdihs.cpA;
1915                 phi0[s] = forceparams[type].pdihs.phiA;
1916                 mult[s] = forceparams[type].pdihs.mult;
1917
1918                 if (iu + nfa1 < nbonds)
1919                 {
1920                     iu += nfa1;
1921                 }
1922             }
1923             else
1924             {
1925                 cp[s]   = 0;
1926                 phi0[s] = 0;
1927                 mult[s] = 0;
1928             }
1929         }
1930
1931         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
1932         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
1933                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
1934
1935         cp_S   = load<SimdReal>(cp);
1936         phi0_S = load<SimdReal>(phi0) * deg2rad_S;
1937         mult_S = load<SimdReal>(mult);
1938
1939         mdphi_S = fms(mult_S, phi_S, phi0_S);
1940
1941         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
1942         sincos(mdphi_S, &sin_S, &cos_S);
1943         mddphi_S = cp_S * mult_S * sin_S;
1944         sf_i_S   = mddphi_S * nrkj_m2_S;
1945         msf_l_S  = mddphi_S * nrkj_n2_S;
1946
1947         /* After this m?_S will contain f[i] */
1948         mx_S = sf_i_S * mx_S;
1949         my_S = sf_i_S * my_S;
1950         mz_S = sf_i_S * mz_S;
1951
1952         /* After this m?_S will contain -f[l] */
1953         nx_S = msf_l_S * nx_S;
1954         ny_S = msf_l_S * ny_S;
1955         nz_S = msf_l_S * nz_S;
1956
1957         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
1958     }
1959
1960     return 0;
1961 }
1962
1963 /* This is mostly a copy of the SIMD flavor of pdihs above, but with using
1964  * the RB potential instead of a harmonic potential.
1965  * This function can replace rbdihs() when no energy and virial are needed.
1966  */
1967 template<BondedKernelFlavor flavor>
1968 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1969 rbdihs(int             nbonds,
1970        const t_iatom   forceatoms[],
1971        const t_iparams forceparams[],
1972        const rvec      x[],
1973        rvec4           f[],
1974        rvec gmx_unused fshift[],
1975        const t_pbc*    pbc,
1976        real gmx_unused lambda,
1977        real gmx_unused* dvdlambda,
1978        const t_mdatoms gmx_unused* md,
1979        t_fcdata gmx_unused* fcd,
1980        int gmx_unused* global_atom_index)
1981 {
1982     const int                                nfa1 = 5;
1983     int                                      i, iu, s, j;
1984     int                                      type;
1985     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1986     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1987     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1988     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
1989     alignas(GMX_SIMD_ALIGNMENT) real         parm[NR_RBDIHS * GMX_SIMD_REAL_WIDTH];
1990
1991     SimdReal                         p_S, q_S;
1992     SimdReal                         phi_S;
1993     SimdReal                         ddphi_S, cosfac_S;
1994     SimdReal                         mx_S, my_S, mz_S;
1995     SimdReal                         nx_S, ny_S, nz_S;
1996     SimdReal                         nrkj_m2_S, nrkj_n2_S;
1997     SimdReal                         parm_S, c_S;
1998     SimdReal                         sin_S, cos_S;
1999     SimdReal                         sf_i_S, msf_l_S;
2000     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2001
2002     SimdReal pi_S(M_PI);
2003     SimdReal one_S(1.0);
2004
2005     set_pbc_simd(pbc, pbc_simd);
2006
2007     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2008     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2009     {
2010         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2011          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2012          */
2013         iu = i;
2014         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2015         {
2016             type  = forceatoms[iu];
2017             ai[s] = forceatoms[iu + 1];
2018             aj[s] = forceatoms[iu + 2];
2019             ak[s] = forceatoms[iu + 3];
2020             al[s] = forceatoms[iu + 4];
2021
2022             /* At the end fill the arrays with the last atoms and 0 params */
2023             if (i + s * nfa1 < nbonds)
2024             {
2025                 /* We don't need the first parameter, since that's a constant
2026                  * which only affects the energies, not the forces.
2027                  */
2028                 for (j = 1; j < NR_RBDIHS; j++)
2029                 {
2030                     parm[j * GMX_SIMD_REAL_WIDTH + s] = forceparams[type].rbdihs.rbcA[j];
2031                 }
2032
2033                 if (iu + nfa1 < nbonds)
2034                 {
2035                     iu += nfa1;
2036                 }
2037             }
2038             else
2039             {
2040                 for (j = 1; j < NR_RBDIHS; j++)
2041                 {
2042                     parm[j * GMX_SIMD_REAL_WIDTH + s] = 0;
2043                 }
2044             }
2045         }
2046
2047         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2048         dih_angle_simd(x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S,
2049                        &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2050
2051         /* Change to polymer convention */
2052         phi_S = phi_S - pi_S;
2053
2054         sincos(phi_S, &sin_S, &cos_S);
2055
2056         ddphi_S  = setZero();
2057         c_S      = one_S;
2058         cosfac_S = one_S;
2059         for (j = 1; j < NR_RBDIHS; j++)
2060         {
2061             parm_S   = load<SimdReal>(parm + j * GMX_SIMD_REAL_WIDTH);
2062             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2063             cosfac_S = cosfac_S * cos_S;
2064             c_S      = c_S + one_S;
2065         }
2066
2067         /* Note that here we do not use the minus sign which is present
2068          * in the normal RB code. This is corrected for through (m)sf below.
2069          */
2070         ddphi_S = ddphi_S * sin_S;
2071
2072         sf_i_S  = ddphi_S * nrkj_m2_S;
2073         msf_l_S = ddphi_S * nrkj_n2_S;
2074
2075         /* After this m?_S will contain f[i] */
2076         mx_S = sf_i_S * mx_S;
2077         my_S = sf_i_S * my_S;
2078         mz_S = sf_i_S * mz_S;
2079
2080         /* After this m?_S will contain -f[l] */
2081         nx_S = msf_l_S * nx_S;
2082         ny_S = msf_l_S * ny_S;
2083         nz_S = msf_l_S * nz_S;
2084
2085         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2086     }
2087
2088     return 0;
2089 }
2090
2091 #endif // GMX_SIMD_HAVE_REAL
2092
2093
2094 template<BondedKernelFlavor flavor>
2095 real idihs(int             nbonds,
2096            const t_iatom   forceatoms[],
2097            const t_iparams forceparams[],
2098            const rvec      x[],
2099            rvec4           f[],
2100            rvec            fshift[],
2101            const t_pbc*    pbc,
2102            real            lambda,
2103            real*           dvdlambda,
2104            const t_mdatoms gmx_unused* md,
2105            t_fcdata gmx_unused* fcd,
2106            int gmx_unused* global_atom_index)
2107 {
2108     int  i, type, ai, aj, ak, al;
2109     int  t1, t2, t3;
2110     real phi, phi0, dphi0, ddphi, vtot;
2111     rvec r_ij, r_kj, r_kl, m, n;
2112     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2113
2114     L1        = 1.0 - lambda;
2115     dvdl_term = 0;
2116     vtot      = 0.0;
2117     for (i = 0; (i < nbonds);)
2118     {
2119         type = forceatoms[i++];
2120         ai   = forceatoms[i++];
2121         aj   = forceatoms[i++];
2122         ak   = forceatoms[i++];
2123         al   = forceatoms[i++];
2124
2125         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2126
2127         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2128          * force changes if we just apply a normal harmonic.
2129          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2130          * This means we will never have the periodicity problem, unless
2131          * the dihedral is Pi away from phiO, which is very unlikely due to
2132          * the potential.
2133          */
2134         kA = forceparams[type].harmonic.krA;
2135         kB = forceparams[type].harmonic.krB;
2136         pA = forceparams[type].harmonic.rA;
2137         pB = forceparams[type].harmonic.rB;
2138
2139         kk    = L1 * kA + lambda * kB;
2140         phi0  = (L1 * pA + lambda * pB) * DEG2RAD;
2141         dphi0 = (pB - pA) * DEG2RAD;
2142
2143         dp = phi - phi0;
2144
2145         make_dp_periodic(&dp);
2146
2147         dp2 = dp * dp;
2148
2149         vtot += 0.5 * kk * dp2;
2150         ddphi = -kk * dp;
2151
2152         dvdl_term += 0.5 * (kB - kA) * dp2 - kk * dphi0 * dp;
2153
2154         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
2155                            t2, t3); /* 112              */
2156         /* 218 TOTAL    */
2157     }
2158
2159     *dvdlambda += dvdl_term;
2160     return vtot;
2161 }
2162
2163 /*! \brief Computes angle restraints of two different types */
2164 template<BondedKernelFlavor flavor>
2165 real low_angres(int             nbonds,
2166                 const t_iatom   forceatoms[],
2167                 const t_iparams forceparams[],
2168                 const rvec      x[],
2169                 rvec4           f[],
2170                 rvec            fshift[],
2171                 const t_pbc*    pbc,
2172                 real            lambda,
2173                 real*           dvdlambda,
2174                 gmx_bool        bZAxis)
2175 {
2176     int  i, m, type, ai, aj, ak, al;
2177     int  t1, t2;
2178     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2179     rvec r_ij, r_kl, f_i, f_k = { 0, 0, 0 };
2180     real st, sth, nrij2, nrkl2, c, cij, ckl;
2181
2182     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2183
2184     vtot = 0.0;
2185     ak = al = 0; /* to avoid warnings */
2186     for (i = 0; i < nbonds;)
2187     {
2188         type = forceatoms[i++];
2189         ai   = forceatoms[i++];
2190         aj   = forceatoms[i++];
2191         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /*  3             */
2192         if (!bZAxis)
2193         {
2194             ak = forceatoms[i++];
2195             al = forceatoms[i++];
2196             t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /*  3           */
2197         }
2198         else
2199         {
2200             r_kl[XX] = 0;
2201             r_kl[YY] = 0;
2202             r_kl[ZZ] = 1;
2203         }
2204
2205         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2206         phi     = std::acos(cos_phi);    /* 10           */
2207
2208         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA, forceparams[type].pdihs.cpB,
2209                                   forceparams[type].pdihs.phiA, forceparams[type].pdihs.phiB,
2210                                   forceparams[type].pdihs.mult, phi, lambda, &vid, &dVdphi); /*  40 */
2211
2212         vtot += vid;
2213
2214         cos_phi2 = gmx::square(cos_phi); /*   1         */
2215         if (cos_phi2 < 1)
2216         {
2217             st    = -dVdphi * gmx::invsqrt(1 - cos_phi2); /*  12                */
2218             sth   = st * cos_phi;                         /*   1                */
2219             nrij2 = iprod(r_ij, r_ij);                    /*   5                */
2220             nrkl2 = iprod(r_kl, r_kl);                    /*   5          */
2221
2222             c   = st * gmx::invsqrt(nrij2 * nrkl2); /*  11              */
2223             cij = sth / nrij2;                      /*  10              */
2224             ckl = sth / nrkl2;                      /*  10              */
2225
2226             for (m = 0; m < DIM; m++) /*  18+18       */
2227             {
2228                 f_i[m] = (c * r_kl[m] - cij * r_ij[m]);
2229                 f[ai][m] += f_i[m];
2230                 f[aj][m] -= f_i[m];
2231                 if (!bZAxis)
2232                 {
2233                     f_k[m] = (c * r_ij[m] - ckl * r_kl[m]);
2234                     f[ak][m] += f_k[m];
2235                     f[al][m] -= f_k[m];
2236                 }
2237             }
2238
2239             if (computeVirial(flavor))
2240             {
2241                 rvec_inc(fshift[t1], f_i);
2242                 rvec_dec(fshift[CENTRAL], f_i);
2243                 if (!bZAxis)
2244                 {
2245                     rvec_inc(fshift[t2], f_k);
2246                     rvec_dec(fshift[CENTRAL], f_k);
2247                 }
2248             }
2249         }
2250     }
2251
2252     return vtot; /*  184 / 157 (bZAxis)  total  */
2253 }
2254
2255 template<BondedKernelFlavor flavor>
2256 real angres(int             nbonds,
2257             const t_iatom   forceatoms[],
2258             const t_iparams forceparams[],
2259             const rvec      x[],
2260             rvec4           f[],
2261             rvec            fshift[],
2262             const t_pbc*    pbc,
2263             real            lambda,
2264             real*           dvdlambda,
2265             const t_mdatoms gmx_unused* md,
2266             t_fcdata gmx_unused* fcd,
2267             int gmx_unused* global_atom_index)
2268 {
2269     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, FALSE);
2270 }
2271
2272 template<BondedKernelFlavor flavor>
2273 real angresz(int             nbonds,
2274              const t_iatom   forceatoms[],
2275              const t_iparams forceparams[],
2276              const rvec      x[],
2277              rvec4           f[],
2278              rvec            fshift[],
2279              const t_pbc*    pbc,
2280              real            lambda,
2281              real*           dvdlambda,
2282              const t_mdatoms gmx_unused* md,
2283              t_fcdata gmx_unused* fcd,
2284              int gmx_unused* global_atom_index)
2285 {
2286     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, TRUE);
2287 }
2288
2289 template<BondedKernelFlavor flavor>
2290 real dihres(int             nbonds,
2291             const t_iatom   forceatoms[],
2292             const t_iparams forceparams[],
2293             const rvec      x[],
2294             rvec4           f[],
2295             rvec            fshift[],
2296             const t_pbc*    pbc,
2297             real            lambda,
2298             real*           dvdlambda,
2299             const t_mdatoms gmx_unused* md,
2300             t_fcdata gmx_unused* fcd,
2301             int gmx_unused* global_atom_index)
2302 {
2303     real vtot = 0;
2304     int  ai, aj, ak, al, i, type, t1, t2, t3;
2305     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2306     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2307     rvec r_ij, r_kj, r_kl, m, n;
2308
2309     L1 = 1.0 - lambda;
2310
2311     d2r = DEG2RAD;
2312
2313     for (i = 0; (i < nbonds);)
2314     {
2315         type = forceatoms[i++];
2316         ai   = forceatoms[i++];
2317         aj   = forceatoms[i++];
2318         ak   = forceatoms[i++];
2319         al   = forceatoms[i++];
2320
2321         phi0A = forceparams[type].dihres.phiA * d2r;
2322         dphiA = forceparams[type].dihres.dphiA * d2r;
2323         kfacA = forceparams[type].dihres.kfacA;
2324
2325         phi0B = forceparams[type].dihres.phiB * d2r;
2326         dphiB = forceparams[type].dihres.dphiB * d2r;
2327         kfacB = forceparams[type].dihres.kfacB;
2328
2329         phi0 = L1 * phi0A + lambda * phi0B;
2330         dphi = L1 * dphiA + lambda * dphiB;
2331         kfac = L1 * kfacA + lambda * kfacB;
2332
2333         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3);
2334         /* 84 flops */
2335
2336         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2337          * force changes if we just apply a normal harmonic.
2338          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2339          * This means we will never have the periodicity problem, unless
2340          * the dihedral is Pi away from phiO, which is very unlikely due to
2341          * the potential.
2342          */
2343         dp = phi - phi0;
2344         make_dp_periodic(&dp);
2345
2346         if (dp > dphi)
2347         {
2348             ddp = dp - dphi;
2349         }
2350         else if (dp < -dphi)
2351         {
2352             ddp = dp + dphi;
2353         }
2354         else
2355         {
2356             ddp = 0;
2357         }
2358
2359         if (ddp != 0.0)
2360         {
2361             ddp2 = ddp * ddp;
2362             vtot += 0.5 * kfac * ddp2;
2363             ddphi = kfac * ddp;
2364
2365             *dvdlambda += 0.5 * (kfacB - kfacA) * ddp2;
2366             /* lambda dependence from changing restraint distances */
2367             if (ddp > 0)
2368             {
2369                 *dvdlambda -= kfac * ddp * ((dphiB - dphiA) + (phi0B - phi0A));
2370             }
2371             else if (ddp < 0)
2372             {
2373                 *dvdlambda += kfac * ddp * ((dphiB - dphiA) - (phi0B - phi0A));
2374             }
2375             do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
2376                                t2, t3); /* 112          */
2377         }
2378     }
2379     return vtot;
2380 }
2381
2382
2383 real unimplemented(int gmx_unused nbonds,
2384                    const t_iatom gmx_unused forceatoms[],
2385                    const t_iparams gmx_unused forceparams[],
2386                    const rvec gmx_unused x[],
2387                    rvec4 gmx_unused f[],
2388                    rvec gmx_unused fshift[],
2389                    const t_pbc gmx_unused* pbc,
2390                    real gmx_unused lambda,
2391                    real gmx_unused* dvdlambda,
2392                    const t_mdatoms gmx_unused* md,
2393                    t_fcdata gmx_unused* fcd,
2394                    int gmx_unused* global_atom_index)
2395 {
2396     gmx_impl("*** you are using a not implemented function");
2397 }
2398
2399 template<BondedKernelFlavor flavor>
2400 real restrangles(int             nbonds,
2401                  const t_iatom   forceatoms[],
2402                  const t_iparams forceparams[],
2403                  const rvec      x[],
2404                  rvec4           f[],
2405                  rvec            fshift[],
2406                  const t_pbc*    pbc,
2407                  real gmx_unused lambda,
2408                  real gmx_unused* dvdlambda,
2409                  const t_mdatoms gmx_unused* md,
2410                  t_fcdata gmx_unused* fcd,
2411                  int gmx_unused* global_atom_index)
2412 {
2413     int    i, d, ai, aj, ak, type, m;
2414     int    t1, t2;
2415     real   v, vtot;
2416     rvec   f_i, f_j, f_k;
2417     double prefactor, ratio_ante, ratio_post;
2418     rvec   delta_ante, delta_post, vec_temp;
2419
2420     vtot = 0.0;
2421     for (i = 0; (i < nbonds);)
2422     {
2423         type = forceatoms[i++];
2424         ai   = forceatoms[i++];
2425         aj   = forceatoms[i++];
2426         ak   = forceatoms[i++];
2427
2428         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2429         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2430         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2431
2432
2433         /* This function computes factors needed for restricted angle potential.
2434          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2435          * when three particles align and the dihedral angle and dihedral potential
2436          * cannot be calculated. This potential is calculated using the formula:
2437            real restrangles(int nbonds,
2438             const t_iatom forceatoms[],const t_iparams forceparams[],
2439             const rvec x[],rvec4 f[],rvec fshift[],
2440             const t_pbc *pbc,
2441             real gmx_unused lambda,real gmx_unused *dvdlambda,
2442             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2443             int gmx_unused *global_atom_index)
2444            {
2445            int  i, d, ai, aj, ak, type, m;
2446            int t1, t2;
2447            rvec r_ij,r_kj;
2448            real v, vtot;
2449            rvec f_i, f_j, f_k;
2450            real prefactor, ratio_ante, ratio_post;
2451            rvec delta_ante, delta_post, vec_temp;
2452
2453            vtot = 0.0;
2454            for(i=0; (i<nbonds); )
2455            {
2456            type = forceatoms[i++];
2457            ai   = forceatoms[i++];
2458            aj   = forceatoms[i++];
2459            ak   = forceatoms[i++];
2460
2461          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2462          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2463          * For more explanations see comments file "restcbt.h". */
2464
2465         compute_factors_restangles(type, forceparams, delta_ante, delta_post, &prefactor,
2466                                    &ratio_ante, &ratio_post, &v);
2467
2468         /*   Forces are computed per component */
2469         for (d = 0; d < DIM; d++)
2470         {
2471             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2472             f_j[d] = prefactor
2473                      * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2474             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2475         }
2476
2477         /*   Computation of potential energy   */
2478
2479         vtot += v;
2480
2481         /*   Update forces */
2482
2483         for (m = 0; (m < DIM); m++)
2484         {
2485             f[ai][m] += f_i[m];
2486             f[aj][m] += f_j[m];
2487             f[ak][m] += f_k[m];
2488         }
2489
2490         if (computeVirial(flavor))
2491         {
2492             rvec_inc(fshift[t1], f_i);
2493             rvec_inc(fshift[CENTRAL], f_j);
2494             rvec_inc(fshift[t2], f_k);
2495         }
2496     }
2497     return vtot;
2498 }
2499
2500
2501 template<BondedKernelFlavor flavor>
2502 real restrdihs(int             nbonds,
2503                const t_iatom   forceatoms[],
2504                const t_iparams forceparams[],
2505                const rvec      x[],
2506                rvec4           f[],
2507                rvec            fshift[],
2508                const t_pbc*    pbc,
2509                real gmx_unused lambda,
2510                real gmx_unused* dvlambda,
2511                const t_mdatoms gmx_unused* md,
2512                t_fcdata gmx_unused* fcd,
2513                int gmx_unused* global_atom_index)
2514 {
2515     int  i, d, type, ai, aj, ak, al;
2516     rvec f_i, f_j, f_k, f_l;
2517     rvec dx_jl;
2518     int  t1, t2, t3;
2519     real v, vtot;
2520     rvec delta_ante, delta_crnt, delta_post, vec_temp;
2521     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2522     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2523     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2524     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2525     real prefactor_phi;
2526
2527
2528     vtot = 0.0;
2529     for (i = 0; (i < nbonds);)
2530     {
2531         type = forceatoms[i++];
2532         ai   = forceatoms[i++];
2533         aj   = forceatoms[i++];
2534         ak   = forceatoms[i++];
2535         al   = forceatoms[i++];
2536
2537         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2538         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2539         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2540         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2541         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2542
2543         /* This function computes factors needed for restricted angle potential.
2544          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2545          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2546          * This potential is calculated using the formula:
2547          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2548          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2549          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2550          * For more explanations see comments file "restcbt.h" */
2551
2552         compute_factors_restrdihs(
2553                 type, forceparams, delta_ante, delta_crnt, delta_post, &factor_phi_ai_ante,
2554                 &factor_phi_ai_crnt, &factor_phi_ai_post, &factor_phi_aj_ante, &factor_phi_aj_crnt,
2555                 &factor_phi_aj_post, &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2556                 &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post, &prefactor_phi, &v);
2557
2558
2559         /*      Computation of forces per component */
2560         for (d = 0; d < DIM; d++)
2561         {
2562             f_i[d] = prefactor_phi
2563                      * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d]
2564                         + factor_phi_ai_post * delta_post[d]);
2565             f_j[d] = prefactor_phi
2566                      * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d]
2567                         + factor_phi_aj_post * delta_post[d]);
2568             f_k[d] = prefactor_phi
2569                      * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d]
2570                         + factor_phi_ak_post * delta_post[d]);
2571             f_l[d] = prefactor_phi
2572                      * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d]
2573                         + factor_phi_al_post * delta_post[d]);
2574         }
2575         /*      Computation of the energy */
2576
2577         vtot += v;
2578
2579
2580         /*    Updating the forces */
2581
2582         rvec_inc(f[ai], f_i);
2583         rvec_inc(f[aj], f_j);
2584         rvec_inc(f[ak], f_k);
2585         rvec_inc(f[al], f_l);
2586
2587         if (computeVirial(flavor))
2588         {
2589             if (pbc)
2590             {
2591                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2592             }
2593             else
2594             {
2595                 t3 = CENTRAL;
2596             }
2597
2598             rvec_inc(fshift[t1], f_i);
2599             rvec_inc(fshift[CENTRAL], f_j);
2600             rvec_inc(fshift[t2], f_k);
2601             rvec_inc(fshift[t3], f_l);
2602         }
2603     }
2604
2605     return vtot;
2606 }
2607
2608
2609 template<BondedKernelFlavor flavor>
2610 real cbtdihs(int             nbonds,
2611              const t_iatom   forceatoms[],
2612              const t_iparams forceparams[],
2613              const rvec      x[],
2614              rvec4           f[],
2615              rvec            fshift[],
2616              const t_pbc*    pbc,
2617              real gmx_unused lambda,
2618              real gmx_unused* dvdlambda,
2619              const t_mdatoms gmx_unused* md,
2620              t_fcdata gmx_unused* fcd,
2621              int gmx_unused* global_atom_index)
2622 {
2623     int  type, ai, aj, ak, al, i, d;
2624     int  t1, t2, t3;
2625     real v, vtot;
2626     rvec vec_temp;
2627     rvec f_i, f_j, f_k, f_l;
2628     rvec dx_jl;
2629     rvec delta_ante, delta_crnt, delta_post;
2630     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2631     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2632     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2633
2634
2635     vtot = 0.0;
2636     for (i = 0; (i < nbonds);)
2637     {
2638         type = forceatoms[i++];
2639         ai   = forceatoms[i++];
2640         aj   = forceatoms[i++];
2641         ak   = forceatoms[i++];
2642         al   = forceatoms[i++];
2643
2644
2645         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2646         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2647         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2648         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2649         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2650         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2651
2652         /* \brief Compute factors for CBT potential
2653          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2654          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2655          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2656          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2657          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2658          * It contains in its expression not only the dihedral angle \f$\phi\f$
2659          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2660          * --- the adjacent bending angles.
2661          * For more explanations see comments file "restcbt.h". */
2662
2663         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post, f_phi_ai,
2664                                 f_phi_aj, f_phi_ak, f_phi_al, f_theta_ante_ai, f_theta_ante_aj,
2665                                 f_theta_ante_ak, f_theta_post_aj, f_theta_post_ak, f_theta_post_al, &v);
2666
2667
2668         /*      Acumulate the resuts per beads */
2669         for (d = 0; d < DIM; d++)
2670         {
2671             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2672             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2673             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2674             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2675         }
2676
2677         /*      Compute the potential energy */
2678
2679         vtot += v;
2680
2681
2682         /*  Updating the forces */
2683         rvec_inc(f[ai], f_i);
2684         rvec_inc(f[aj], f_j);
2685         rvec_inc(f[ak], f_k);
2686         rvec_inc(f[al], f_l);
2687
2688
2689         if (computeVirial(flavor))
2690         {
2691             if (pbc)
2692             {
2693                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2694             }
2695             else
2696             {
2697                 t3 = CENTRAL;
2698             }
2699
2700             rvec_inc(fshift[t1], f_i);
2701             rvec_inc(fshift[CENTRAL], f_j);
2702             rvec_inc(fshift[t2], f_k);
2703             rvec_inc(fshift[t3], f_l);
2704         }
2705     }
2706
2707     return vtot;
2708 }
2709
2710 template<BondedKernelFlavor flavor>
2711 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2712 rbdihs(int             nbonds,
2713        const t_iatom   forceatoms[],
2714        const t_iparams forceparams[],
2715        const rvec      x[],
2716        rvec4           f[],
2717        rvec            fshift[],
2718        const t_pbc*    pbc,
2719        real            lambda,
2720        real*           dvdlambda,
2721        const t_mdatoms gmx_unused* md,
2722        t_fcdata gmx_unused* fcd,
2723        int gmx_unused* global_atom_index)
2724 {
2725     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2726     int        type, ai, aj, ak, al, i, j;
2727     int        t1, t2, t3;
2728     rvec       r_ij, r_kj, r_kl, m, n;
2729     real       parmA[NR_RBDIHS];
2730     real       parmB[NR_RBDIHS];
2731     real       parm[NR_RBDIHS];
2732     real       cos_phi, phi, rbp, rbpBA;
2733     real       v, ddphi, sin_phi;
2734     real       cosfac, vtot;
2735     real       L1        = 1.0 - lambda;
2736     real       dvdl_term = 0;
2737
2738     vtot = 0.0;
2739     for (i = 0; (i < nbonds);)
2740     {
2741         type = forceatoms[i++];
2742         ai   = forceatoms[i++];
2743         aj   = forceatoms[i++];
2744         ak   = forceatoms[i++];
2745         al   = forceatoms[i++];
2746
2747         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2748
2749         /* Change to polymer convention */
2750         if (phi < c0)
2751         {
2752             phi += M_PI;
2753         }
2754         else
2755         {
2756             phi -= M_PI; /*   1         */
2757         }
2758         cos_phi = std::cos(phi);
2759         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2760         sin_phi = std::sin(phi);
2761
2762         for (j = 0; (j < NR_RBDIHS); j++)
2763         {
2764             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2765             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2766             parm[j]  = L1 * parmA[j] + lambda * parmB[j];
2767         }
2768         /* Calculate cosine powers */
2769         /* Calculate the energy */
2770         /* Calculate the derivative */
2771
2772         v = parm[0];
2773         dvdl_term += (parmB[0] - parmA[0]);
2774         ddphi  = c0;
2775         cosfac = c1;
2776
2777         rbp   = parm[1];
2778         rbpBA = parmB[1] - parmA[1];
2779         ddphi += rbp * cosfac;
2780         cosfac *= cos_phi;
2781         v += cosfac * rbp;
2782         dvdl_term += cosfac * rbpBA;
2783         rbp   = parm[2];
2784         rbpBA = parmB[2] - parmA[2];
2785         ddphi += c2 * rbp * cosfac;
2786         cosfac *= cos_phi;
2787         v += cosfac * rbp;
2788         dvdl_term += cosfac * rbpBA;
2789         rbp   = parm[3];
2790         rbpBA = parmB[3] - parmA[3];
2791         ddphi += c3 * rbp * cosfac;
2792         cosfac *= cos_phi;
2793         v += cosfac * rbp;
2794         dvdl_term += cosfac * rbpBA;
2795         rbp   = parm[4];
2796         rbpBA = parmB[4] - parmA[4];
2797         ddphi += c4 * rbp * cosfac;
2798         cosfac *= cos_phi;
2799         v += cosfac * rbp;
2800         dvdl_term += cosfac * rbpBA;
2801         rbp   = parm[5];
2802         rbpBA = parmB[5] - parmA[5];
2803         ddphi += c5 * rbp * cosfac;
2804         cosfac *= cos_phi;
2805         v += cosfac * rbp;
2806         dvdl_term += cosfac * rbpBA;
2807
2808         ddphi = -ddphi * sin_phi; /*  11                */
2809
2810         do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2,
2811                            t3); /* 112          */
2812         vtot += v;
2813     }
2814     *dvdlambda += dvdl_term;
2815
2816     return vtot;
2817 }
2818
2819 //! \endcond
2820
2821 /*! \brief Mysterious undocumented function */
2822 int cmap_setup_grid_index(int ip, int grid_spacing, int* ipm1, int* ipp1, int* ipp2)
2823 {
2824     int im1, ip1, ip2;
2825
2826     if (ip < 0)
2827     {
2828         ip = ip + grid_spacing - 1;
2829     }
2830     else if (ip > grid_spacing)
2831     {
2832         ip = ip - grid_spacing - 1;
2833     }
2834
2835     im1 = ip - 1;
2836     ip1 = ip + 1;
2837     ip2 = ip + 2;
2838
2839     if (ip == 0)
2840     {
2841         im1 = grid_spacing - 1;
2842     }
2843     else if (ip == grid_spacing - 2)
2844     {
2845         ip2 = 0;
2846     }
2847     else if (ip == grid_spacing - 1)
2848     {
2849         ip1 = 0;
2850         ip2 = 1;
2851     }
2852
2853     *ipm1 = im1;
2854     *ipp1 = ip1;
2855     *ipp2 = ip2;
2856
2857     return ip;
2858 }
2859
2860 } // namespace
2861
2862 real cmap_dihs(int                 nbonds,
2863                const t_iatom       forceatoms[],
2864                const t_iparams     forceparams[],
2865                const gmx_cmap_t*   cmap_grid,
2866                const rvec          x[],
2867                rvec4               f[],
2868                rvec                fshift[],
2869                const struct t_pbc* pbc,
2870                real gmx_unused lambda,
2871                real gmx_unused* dvdlambda,
2872                const t_mdatoms gmx_unused* md,
2873                t_fcdata gmx_unused* fcd,
2874                int gmx_unused* global_atom_index)
2875 {
2876     int i, n;
2877     int ai, aj, ak, al, am;
2878     int a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2879     int type;
2880     int t11, t21, t31, t12, t22, t32;
2881     int iphi1, ip1m1, ip1p1, ip1p2;
2882     int iphi2, ip2m1, ip2p1, ip2p2;
2883     int l1, l2, l3;
2884     int pos1, pos2, pos3, pos4;
2885
2886     real ty[4], ty1[4], ty2[4], ty12[4], tx[16];
2887     real phi1, cos_phi1, sin_phi1, xphi1;
2888     real phi2, cos_phi2, sin_phi2, xphi2;
2889     real dx, tt, tu, e, df1, df2, vtot;
2890     real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2891     real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2892     real fg1, hg1, fga1, hgb1, gaa1, gbb1;
2893     real fg2, hg2, fga2, hgb2, gaa2, gbb2;
2894     real fac;
2895
2896     rvec r1_ij, r1_kj, r1_kl, m1, n1;
2897     rvec r2_ij, r2_kj, r2_kl, m2, n2;
2898     rvec f1_i, f1_j, f1_k, f1_l;
2899     rvec f2_i, f2_j, f2_k, f2_l;
2900     rvec a1, b1, a2, b2;
2901     rvec f1, g1, h1, f2, g2, h2;
2902     rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2903
2904     int loop_index[4][4] = { { 0, 4, 8, 12 }, { 1, 5, 9, 13 }, { 2, 6, 10, 14 }, { 3, 7, 11, 15 } };
2905
2906     /* Total CMAP energy */
2907     vtot = 0;
2908
2909     for (n = 0; n < nbonds;)
2910     {
2911         /* Five atoms are involved in the two torsions */
2912         type = forceatoms[n++];
2913         ai   = forceatoms[n++];
2914         aj   = forceatoms[n++];
2915         ak   = forceatoms[n++];
2916         al   = forceatoms[n++];
2917         am   = forceatoms[n++];
2918
2919         /* Which CMAP type is this */
2920         const int   cmapA = forceparams[type].cmap.cmapA;
2921         const real* cmapd = cmap_grid->cmapdata[cmapA].cmap.data();
2922
2923         /* First torsion */
2924         a1i = ai;
2925         a1j = aj;
2926         a1k = ak;
2927         a1l = al;
2928
2929         phi1 = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1, &t11,
2930                          &t21, &t31); /* 84 */
2931
2932         cos_phi1 = std::cos(phi1);
2933
2934         a1[0] = r1_ij[1] * r1_kj[2] - r1_ij[2] * r1_kj[1];
2935         a1[1] = r1_ij[2] * r1_kj[0] - r1_ij[0] * r1_kj[2];
2936         a1[2] = r1_ij[0] * r1_kj[1] - r1_ij[1] * r1_kj[0]; /* 9 */
2937
2938         b1[0] = r1_kl[1] * r1_kj[2] - r1_kl[2] * r1_kj[1];
2939         b1[1] = r1_kl[2] * r1_kj[0] - r1_kl[0] * r1_kj[2];
2940         b1[2] = r1_kl[0] * r1_kj[1] - r1_kl[1] * r1_kj[0]; /* 9 */
2941
2942         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2943
2944         ra21 = iprod(a1, a1);       /* 5 */
2945         rb21 = iprod(b1, b1);       /* 5 */
2946         rg21 = iprod(r1_kj, r1_kj); /* 5 */
2947         rg1  = sqrt(rg21);
2948
2949         rgr1  = 1.0 / rg1;
2950         ra2r1 = 1.0 / ra21;
2951         rb2r1 = 1.0 / rb21;
2952         rabr1 = sqrt(ra2r1 * rb2r1);
2953
2954         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2955
2956         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2957         {
2958             phi1 = std::asin(sin_phi1);
2959
2960             if (cos_phi1 < 0)
2961             {
2962                 if (phi1 > 0)
2963                 {
2964                     phi1 = M_PI - phi1;
2965                 }
2966                 else
2967                 {
2968                     phi1 = -M_PI - phi1;
2969                 }
2970             }
2971         }
2972         else
2973         {
2974             phi1 = std::acos(cos_phi1);
2975
2976             if (sin_phi1 < 0)
2977             {
2978                 phi1 = -phi1;
2979             }
2980         }
2981
2982         xphi1 = phi1 + M_PI; /* 1 */
2983
2984         /* Second torsion */
2985         a2i = aj;
2986         a2j = ak;
2987         a2k = al;
2988         a2l = am;
2989
2990         phi2 = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2, &t12,
2991                          &t22, &t32); /* 84 */
2992
2993         cos_phi2 = std::cos(phi2);
2994
2995         a2[0] = r2_ij[1] * r2_kj[2] - r2_ij[2] * r2_kj[1];
2996         a2[1] = r2_ij[2] * r2_kj[0] - r2_ij[0] * r2_kj[2];
2997         a2[2] = r2_ij[0] * r2_kj[1] - r2_ij[1] * r2_kj[0]; /* 9 */
2998
2999         b2[0] = r2_kl[1] * r2_kj[2] - r2_kl[2] * r2_kj[1];
3000         b2[1] = r2_kl[2] * r2_kj[0] - r2_kl[0] * r2_kj[2];
3001         b2[2] = r2_kl[0] * r2_kj[1] - r2_kl[1] * r2_kj[0]; /* 9 */
3002
3003         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3004
3005         ra22 = iprod(a2, a2);       /* 5 */
3006         rb22 = iprod(b2, b2);       /* 5 */
3007         rg22 = iprod(r2_kj, r2_kj); /* 5 */
3008         rg2  = sqrt(rg22);
3009
3010         rgr2  = 1.0 / rg2;
3011         ra2r2 = 1.0 / ra22;
3012         rb2r2 = 1.0 / rb22;
3013         rabr2 = sqrt(ra2r2 * rb2r2);
3014
3015         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3016
3017         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3018         {
3019             phi2 = std::asin(sin_phi2);
3020
3021             if (cos_phi2 < 0)
3022             {
3023                 if (phi2 > 0)
3024                 {
3025                     phi2 = M_PI - phi2;
3026                 }
3027                 else
3028                 {
3029                     phi2 = -M_PI - phi2;
3030                 }
3031             }
3032         }
3033         else
3034         {
3035             phi2 = std::acos(cos_phi2);
3036
3037             if (sin_phi2 < 0)
3038             {
3039                 phi2 = -phi2;
3040             }
3041         }
3042
3043         xphi2 = phi2 + M_PI; /* 1 */
3044
3045         /* Range mangling */
3046         if (xphi1 < 0)
3047         {
3048             xphi1 = xphi1 + 2 * M_PI;
3049         }
3050         else if (xphi1 >= 2 * M_PI)
3051         {
3052             xphi1 = xphi1 - 2 * M_PI;
3053         }
3054
3055         if (xphi2 < 0)
3056         {
3057             xphi2 = xphi2 + 2 * M_PI;
3058         }
3059         else if (xphi2 >= 2 * M_PI)
3060         {
3061             xphi2 = xphi2 - 2 * M_PI;
3062         }
3063
3064         /* Number of grid points */
3065         dx = 2 * M_PI / cmap_grid->grid_spacing;
3066
3067         /* Where on the grid are we */
3068         iphi1 = static_cast<int>(xphi1 / dx);
3069         iphi2 = static_cast<int>(xphi2 / dx);
3070
3071         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3072         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3073
3074         pos1 = iphi1 * cmap_grid->grid_spacing + iphi2;
3075         pos2 = ip1p1 * cmap_grid->grid_spacing + iphi2;
3076         pos3 = ip1p1 * cmap_grid->grid_spacing + ip2p1;
3077         pos4 = iphi1 * cmap_grid->grid_spacing + ip2p1;
3078
3079         ty[0] = cmapd[pos1 * 4];
3080         ty[1] = cmapd[pos2 * 4];
3081         ty[2] = cmapd[pos3 * 4];
3082         ty[3] = cmapd[pos4 * 4];
3083
3084         ty1[0] = cmapd[pos1 * 4 + 1];
3085         ty1[1] = cmapd[pos2 * 4 + 1];
3086         ty1[2] = cmapd[pos3 * 4 + 1];
3087         ty1[3] = cmapd[pos4 * 4 + 1];
3088
3089         ty2[0] = cmapd[pos1 * 4 + 2];
3090         ty2[1] = cmapd[pos2 * 4 + 2];
3091         ty2[2] = cmapd[pos3 * 4 + 2];
3092         ty2[3] = cmapd[pos4 * 4 + 2];
3093
3094         ty12[0] = cmapd[pos1 * 4 + 3];
3095         ty12[1] = cmapd[pos2 * 4 + 3];
3096         ty12[2] = cmapd[pos3 * 4 + 3];
3097         ty12[3] = cmapd[pos4 * 4 + 3];
3098
3099         /* Switch to degrees */
3100         dx    = 360.0 / cmap_grid->grid_spacing;
3101         xphi1 = xphi1 * RAD2DEG;
3102         xphi2 = xphi2 * RAD2DEG;
3103
3104         for (i = 0; i < 4; i++) /* 16 */
3105         {
3106             tx[i]      = ty[i];
3107             tx[i + 4]  = ty1[i] * dx;
3108             tx[i + 8]  = ty2[i] * dx;
3109             tx[i + 12] = ty12[i] * dx * dx;
3110         }
3111
3112         real tc[16] = { 0 };
3113         for (int idx = 0; idx < 16; idx++) /* 1056 */
3114         {
3115             for (int k = 0; k < 16; k++)
3116             {
3117                 tc[idx] += cmap_coeff_matrix[k * 16 + idx] * tx[k];
3118             }
3119         }
3120
3121         tt = (xphi1 - iphi1 * dx) / dx;
3122         tu = (xphi2 - iphi2 * dx) / dx;
3123
3124         e   = 0;
3125         df1 = 0;
3126         df2 = 0;
3127
3128         for (i = 3; i >= 0; i--)
3129         {
3130             l1 = loop_index[i][3];
3131             l2 = loop_index[i][2];
3132             l3 = loop_index[i][1];
3133
3134             e = tt * e + ((tc[i * 4 + 3] * tu + tc[i * 4 + 2]) * tu + tc[i * 4 + 1]) * tu + tc[i * 4];
3135             df1 = tu * df1 + (3.0 * tc[l1] * tt + 2.0 * tc[l2]) * tt + tc[l3];
3136             df2 = tt * df2 + (3.0 * tc[i * 4 + 3] * tu + 2.0 * tc[i * 4 + 2]) * tu + tc[i * 4 + 1];
3137         }
3138
3139         fac = RAD2DEG / dx;
3140         df1 = df1 * fac;
3141         df2 = df2 * fac;
3142
3143         /* CMAP energy */
3144         vtot += e;
3145
3146         /* Do forces - first torsion */
3147         fg1  = iprod(r1_ij, r1_kj);
3148         hg1  = iprod(r1_kl, r1_kj);
3149         fga1 = fg1 * ra2r1 * rgr1;
3150         hgb1 = hg1 * rb2r1 * rgr1;
3151         gaa1 = -ra2r1 * rg1;
3152         gbb1 = rb2r1 * rg1;
3153
3154         for (i = 0; i < DIM; i++)
3155         {
3156             dtf1[i] = gaa1 * a1[i];
3157             dtg1[i] = fga1 * a1[i] - hgb1 * b1[i];
3158             dth1[i] = gbb1 * b1[i];
3159
3160             f1[i] = df1 * dtf1[i];
3161             g1[i] = df1 * dtg1[i];
3162             h1[i] = df1 * dth1[i];
3163
3164             f1_i[i] = f1[i];
3165             f1_j[i] = -f1[i] - g1[i];
3166             f1_k[i] = h1[i] + g1[i];
3167             f1_l[i] = -h1[i];
3168
3169             f[a1i][i] = f[a1i][i] + f1_i[i];
3170             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3171             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3172             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3173         }
3174
3175         /* Do forces - second torsion */
3176         fg2  = iprod(r2_ij, r2_kj);
3177         hg2  = iprod(r2_kl, r2_kj);
3178         fga2 = fg2 * ra2r2 * rgr2;
3179         hgb2 = hg2 * rb2r2 * rgr2;
3180         gaa2 = -ra2r2 * rg2;
3181         gbb2 = rb2r2 * rg2;
3182
3183         for (i = 0; i < DIM; i++)
3184         {
3185             dtf2[i] = gaa2 * a2[i];
3186             dtg2[i] = fga2 * a2[i] - hgb2 * b2[i];
3187             dth2[i] = gbb2 * b2[i];
3188
3189             f2[i] = df2 * dtf2[i];
3190             g2[i] = df2 * dtg2[i];
3191             h2[i] = df2 * dth2[i];
3192
3193             f2_i[i] = f2[i];
3194             f2_j[i] = -f2[i] - g2[i];
3195             f2_k[i] = h2[i] + g2[i];
3196             f2_l[i] = -h2[i];
3197
3198             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3199             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3200             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3201             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3202         }
3203
3204         /* Shift forces */
3205         if (fshift != nullptr)
3206         {
3207             if (pbc)
3208             {
3209                 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3210                 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3211             }
3212             else
3213             {
3214                 t31 = CENTRAL;
3215                 t32 = CENTRAL;
3216             }
3217
3218             rvec_inc(fshift[t11], f1_i);
3219             rvec_inc(fshift[CENTRAL], f1_j);
3220             rvec_inc(fshift[t21], f1_k);
3221             rvec_inc(fshift[t31], f1_l);
3222
3223             rvec_inc(fshift[t12], f2_i);
3224             rvec_inc(fshift[CENTRAL], f2_j);
3225             rvec_inc(fshift[t22], f2_k);
3226             rvec_inc(fshift[t32], f2_l);
3227         }
3228     }
3229     return vtot;
3230 }
3231
3232 namespace
3233 {
3234
3235 //! \cond
3236 /***********************************************************
3237  *
3238  *   G R O M O S  9 6   F U N C T I O N S
3239  *
3240  ***********************************************************/
3241 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
3242 {
3243     const real half = 0.5;
3244     real       L1, kk, x0, dx, dx2;
3245     real       v, f, dvdlambda;
3246
3247     L1 = 1.0 - lambda;
3248     kk = L1 * kA + lambda * kB;
3249     x0 = L1 * xA + lambda * xB;
3250
3251     dx  = x - x0;
3252     dx2 = dx * dx;
3253
3254     f         = -kk * dx;
3255     v         = half * kk * dx2;
3256     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
3257
3258     *F = f;
3259     *V = v;
3260
3261     return dvdlambda;
3262
3263     /* That was 21 flops */
3264 }
3265
3266 template<BondedKernelFlavor flavor>
3267 real g96bonds(int             nbonds,
3268               const t_iatom   forceatoms[],
3269               const t_iparams forceparams[],
3270               const rvec      x[],
3271               rvec4           f[],
3272               rvec            fshift[],
3273               const t_pbc*    pbc,
3274               real            lambda,
3275               real*           dvdlambda,
3276               const t_mdatoms gmx_unused* md,
3277               t_fcdata gmx_unused* fcd,
3278               int gmx_unused* global_atom_index)
3279 {
3280     int  i, ki, ai, aj, type;
3281     real dr2, fbond, vbond, vtot;
3282     rvec dx;
3283
3284     vtot = 0.0;
3285     for (i = 0; (i < nbonds);)
3286     {
3287         type = forceatoms[i++];
3288         ai   = forceatoms[i++];
3289         aj   = forceatoms[i++];
3290
3291         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3292         dr2 = iprod(dx, dx);                       /*   5               */
3293
3294         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3295                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB, dr2,
3296                                   lambda, &vbond, &fbond);
3297
3298         vtot += 0.5 * vbond; /* 1*/
3299
3300         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3301     }                                                               /* 44 TOTAL */
3302     return vtot;
3303 }
3304
3305 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc* pbc, rvec r_ij, rvec r_kj, int* t1, int* t2)
3306 /* Return value is the angle between the bonds i-j and j-k */
3307 {
3308     real costh;
3309
3310     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3311     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3312
3313     costh = cos_angle(r_ij, r_kj); /* 25                */
3314     /* 41 TOTAL */
3315     return costh;
3316 }
3317
3318 template<BondedKernelFlavor flavor>
3319 real g96angles(int             nbonds,
3320                const t_iatom   forceatoms[],
3321                const t_iparams forceparams[],
3322                const rvec      x[],
3323                rvec4           f[],
3324                rvec            fshift[],
3325                const t_pbc*    pbc,
3326                real            lambda,
3327                real*           dvdlambda,
3328                const t_mdatoms gmx_unused* md,
3329                t_fcdata gmx_unused* fcd,
3330                int gmx_unused* global_atom_index)
3331 {
3332     int  i, ai, aj, ak, type, m, t1, t2;
3333     rvec r_ij, r_kj;
3334     real cos_theta, dVdt, va, vtot;
3335     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3336     rvec f_i, f_j, f_k;
3337
3338     vtot = 0.0;
3339     for (i = 0; (i < nbonds);)
3340     {
3341         type = forceatoms[i++];
3342         ai   = forceatoms[i++];
3343         aj   = forceatoms[i++];
3344         ak   = forceatoms[i++];
3345
3346         cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3347
3348         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA, forceparams[type].harmonic.krB,
3349                                   forceparams[type].harmonic.rA, forceparams[type].harmonic.rB,
3350                                   cos_theta, lambda, &va, &dVdt);
3351         vtot += va;
3352
3353         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3354         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3355         rij_2    = rij_1 * rij_1;
3356         rkj_2    = rkj_1 * rkj_1;
3357         rijrkj_1 = rij_1 * rkj_1; /* 23 */
3358
3359         for (m = 0; (m < DIM); m++) /*  42      */
3360         {
3361             f_i[m] = dVdt * (r_kj[m] * rijrkj_1 - r_ij[m] * rij_2 * cos_theta);
3362             f_k[m] = dVdt * (r_ij[m] * rijrkj_1 - r_kj[m] * rkj_2 * cos_theta);
3363             f_j[m] = -f_i[m] - f_k[m];
3364             f[ai][m] += f_i[m];
3365             f[aj][m] += f_j[m];
3366             f[ak][m] += f_k[m];
3367         }
3368
3369         if (computeVirial(flavor))
3370         {
3371             rvec_inc(fshift[t1], f_i);
3372             rvec_inc(fshift[CENTRAL], f_j);
3373             rvec_inc(fshift[t2], f_k); /* 9 */
3374         }
3375         /* 163 TOTAL    */
3376     }
3377     return vtot;
3378 }
3379
3380 template<BondedKernelFlavor flavor>
3381 real cross_bond_bond(int             nbonds,
3382                      const t_iatom   forceatoms[],
3383                      const t_iparams forceparams[],
3384                      const rvec      x[],
3385                      rvec4           f[],
3386                      rvec            fshift[],
3387                      const t_pbc*    pbc,
3388                      real gmx_unused lambda,
3389                      real gmx_unused* dvdlambda,
3390                      const t_mdatoms gmx_unused* md,
3391                      t_fcdata gmx_unused* fcd,
3392                      int gmx_unused* global_atom_index)
3393 {
3394     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3395      * pp. 842-847
3396      */
3397     int  i, ai, aj, ak, type, m, t1, t2;
3398     rvec r_ij, r_kj;
3399     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3400     rvec f_i, f_j, f_k;
3401
3402     vtot = 0.0;
3403     for (i = 0; (i < nbonds);)
3404     {
3405         type = forceatoms[i++];
3406         ai   = forceatoms[i++];
3407         aj   = forceatoms[i++];
3408         ak   = forceatoms[i++];
3409         r1e  = forceparams[type].cross_bb.r1e;
3410         r2e  = forceparams[type].cross_bb.r2e;
3411         krr  = forceparams[type].cross_bb.krr;
3412
3413         /* Compute distance vectors ... */
3414         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3415         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3416
3417         /* ... and their lengths */
3418         r1 = norm(r_ij);
3419         r2 = norm(r_kj);
3420
3421         /* Deviations from ideality */
3422         s1 = r1 - r1e;
3423         s2 = r2 - r2e;
3424
3425         /* Energy (can be negative!) */
3426         vrr = krr * s1 * s2;
3427         vtot += vrr;
3428
3429         /* Forces */
3430         svmul(-krr * s2 / r1, r_ij, f_i);
3431         svmul(-krr * s1 / r2, r_kj, f_k);
3432
3433         for (m = 0; (m < DIM); m++) /*  12      */
3434         {
3435             f_j[m] = -f_i[m] - f_k[m];
3436             f[ai][m] += f_i[m];
3437             f[aj][m] += f_j[m];
3438             f[ak][m] += f_k[m];
3439         }
3440
3441         if (computeVirial(flavor))
3442         {
3443             rvec_inc(fshift[t1], f_i);
3444             rvec_inc(fshift[CENTRAL], f_j);
3445             rvec_inc(fshift[t2], f_k); /* 9 */
3446         }
3447         /* 163 TOTAL    */
3448     }
3449     return vtot;
3450 }
3451
3452 template<BondedKernelFlavor flavor>
3453 real cross_bond_angle(int             nbonds,
3454                       const t_iatom   forceatoms[],
3455                       const t_iparams forceparams[],
3456                       const rvec      x[],
3457                       rvec4           f[],
3458                       rvec            fshift[],
3459                       const t_pbc*    pbc,
3460                       real gmx_unused lambda,
3461                       real gmx_unused* dvdlambda,
3462                       const t_mdatoms gmx_unused* md,
3463                       t_fcdata gmx_unused* fcd,
3464                       int gmx_unused* global_atom_index)
3465 {
3466     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3467      * pp. 842-847
3468      */
3469     int  i, ai, aj, ak, type, m, t1, t2;
3470     rvec r_ij, r_kj, r_ik;
3471     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3472     rvec f_i, f_j, f_k;
3473
3474     vtot = 0.0;
3475     for (i = 0; (i < nbonds);)
3476     {
3477         type = forceatoms[i++];
3478         ai   = forceatoms[i++];
3479         aj   = forceatoms[i++];
3480         ak   = forceatoms[i++];
3481         r1e  = forceparams[type].cross_ba.r1e;
3482         r2e  = forceparams[type].cross_ba.r2e;
3483         r3e  = forceparams[type].cross_ba.r3e;
3484         krt  = forceparams[type].cross_ba.krt;
3485
3486         /* Compute distance vectors ... */
3487         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3488         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3489         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3490
3491         /* ... and their lengths */
3492         r1 = norm(r_ij);
3493         r2 = norm(r_kj);
3494         r3 = norm(r_ik);
3495
3496         /* Deviations from ideality */
3497         s1 = r1 - r1e;
3498         s2 = r2 - r2e;
3499         s3 = r3 - r3e;
3500
3501         /* Energy (can be negative!) */
3502         vrt = krt * s3 * (s1 + s2);
3503         vtot += vrt;
3504
3505         /* Forces */
3506         k1 = -krt * (s3 / r1);
3507         k2 = -krt * (s3 / r2);
3508         k3 = -krt * (s1 + s2) / r3;
3509         for (m = 0; (m < DIM); m++)
3510         {
3511             f_i[m] = k1 * r_ij[m] + k3 * r_ik[m];
3512             f_k[m] = k2 * r_kj[m] - k3 * r_ik[m];
3513             f_j[m] = -f_i[m] - f_k[m];
3514         }
3515
3516         for (m = 0; (m < DIM); m++) /*  12      */
3517         {
3518             f[ai][m] += f_i[m];
3519             f[aj][m] += f_j[m];
3520             f[ak][m] += f_k[m];
3521         }
3522
3523         if (computeVirial(flavor))
3524         {
3525             rvec_inc(fshift[t1], f_i);
3526             rvec_inc(fshift[CENTRAL], f_j);
3527             rvec_inc(fshift[t2], f_k); /* 9 */
3528         }
3529         /* 163 TOTAL    */
3530     }
3531     return vtot;
3532 }
3533
3534 /*! \brief Computes the potential and force for a tabulated potential */
3535 real bonded_tab(const char*          type,
3536                 int                  table_nr,
3537                 const bondedtable_t* table,
3538                 real                 kA,
3539                 real                 kB,
3540                 real                 r,
3541                 real                 lambda,
3542                 real*                V,
3543                 real*                F)
3544 {
3545     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3546     int  n0, nnn;
3547     real dvdlambda;
3548
3549     k = (1.0 - lambda) * kA + lambda * kB;
3550
3551     tabscale = table->scale;
3552     VFtab    = table->data;
3553
3554     rt = r * tabscale;
3555     n0 = static_cast<int>(rt);
3556     if (n0 >= table->n)
3557     {
3558         gmx_fatal(FARGS,
3559                   "A tabulated %s interaction table number %d is out of the table range: r %f, "
3560                   "between table indices %d and %d, table length %d",
3561                   type, table_nr, r, n0, n0 + 1, table->n);
3562     }
3563     eps   = rt - n0;
3564     eps2  = eps * eps;
3565     nnn   = 4 * n0;
3566     Yt    = VFtab[nnn];
3567     Ft    = VFtab[nnn + 1];
3568     Geps  = VFtab[nnn + 2] * eps;
3569     Heps2 = VFtab[nnn + 3] * eps2;
3570     Fp    = Ft + Geps + Heps2;
3571     VV    = Yt + Fp * eps;
3572     FF    = Fp + Geps + 2.0 * Heps2;
3573
3574     *F        = -k * FF * tabscale;
3575     *V        = k * VV;
3576     dvdlambda = (kB - kA) * VV;
3577
3578     return dvdlambda;
3579
3580     /* That was 22 flops */
3581 }
3582
3583 template<BondedKernelFlavor flavor>
3584 real tab_bonds(int             nbonds,
3585                const t_iatom   forceatoms[],
3586                const t_iparams forceparams[],
3587                const rvec      x[],
3588                rvec4           f[],
3589                rvec            fshift[],
3590                const t_pbc*    pbc,
3591                real            lambda,
3592                real*           dvdlambda,
3593                const t_mdatoms gmx_unused* md,
3594                t_fcdata*                   fcd,
3595                int gmx_unused* global_atom_index)
3596 {
3597     int  i, ki, ai, aj, type, table;
3598     real dr, dr2, fbond, vbond, vtot;
3599     rvec dx;
3600
3601     vtot = 0.0;
3602     for (i = 0; (i < nbonds);)
3603     {
3604         type = forceatoms[i++];
3605         ai   = forceatoms[i++];
3606         aj   = forceatoms[i++];
3607
3608         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3609         dr2 = iprod(dx, dx);                       /*   5               */
3610         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
3611
3612         table = forceparams[type].tab.table;
3613
3614         *dvdlambda += bonded_tab("bond", table, &fcd->bondtab[table], forceparams[type].tab.kA,
3615                                  forceparams[type].tab.kB, dr, lambda, &vbond, &fbond); /*  22 */
3616
3617         if (dr2 == 0.0)
3618         {
3619             continue;
3620         }
3621
3622
3623         vtot += vbond;              /* 1*/
3624         fbond *= gmx::invsqrt(dr2); /*   6              */
3625
3626         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3627     }                                                               /* 62 TOTAL */
3628     return vtot;
3629 }
3630
3631 template<BondedKernelFlavor flavor>
3632 real tab_angles(int             nbonds,
3633                 const t_iatom   forceatoms[],
3634                 const t_iparams forceparams[],
3635                 const rvec      x[],
3636                 rvec4           f[],
3637                 rvec            fshift[],
3638                 const t_pbc*    pbc,
3639                 real            lambda,
3640                 real*           dvdlambda,
3641                 const t_mdatoms gmx_unused* md,
3642                 t_fcdata*                   fcd,
3643                 int gmx_unused* global_atom_index)
3644 {
3645     int  i, ai, aj, ak, t1, t2, type, table;
3646     rvec r_ij, r_kj;
3647     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3648
3649     vtot = 0.0;
3650     for (i = 0; (i < nbonds);)
3651     {
3652         type = forceatoms[i++];
3653         ai   = forceatoms[i++];
3654         aj   = forceatoms[i++];
3655         ak   = forceatoms[i++];
3656
3657         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
3658
3659         table = forceparams[type].tab.table;
3660
3661         *dvdlambda += bonded_tab("angle", table, &fcd->angletab[table], forceparams[type].tab.kA,
3662                                  forceparams[type].tab.kB, theta, lambda, &va, &dVdt); /*  22  */
3663         vtot += va;
3664
3665         cos_theta2 = gmx::square(cos_theta); /*   1             */
3666         if (cos_theta2 < 1)
3667         {
3668             int  m;
3669             real st, sth;
3670             real cik, cii, ckk;
3671             real nrkj2, nrij2;
3672             rvec f_i, f_j, f_k;
3673
3674             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
3675             sth   = st * cos_theta;                      /*   1         */
3676             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
3677             nrij2 = iprod(r_ij, r_ij);
3678
3679             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
3680             cii = sth / nrij2;                      /*  10              */
3681             ckk = sth / nrkj2;                      /*  10              */
3682
3683             for (m = 0; (m < DIM); m++) /*  39          */
3684             {
3685                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
3686                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
3687                 f_j[m] = -f_i[m] - f_k[m];
3688                 f[ai][m] += f_i[m];
3689                 f[aj][m] += f_j[m];
3690                 f[ak][m] += f_k[m];
3691             }
3692
3693             if (computeVirial(flavor))
3694             {
3695                 rvec_inc(fshift[t1], f_i);
3696                 rvec_inc(fshift[CENTRAL], f_j);
3697                 rvec_inc(fshift[t2], f_k);
3698             }
3699         } /* 169 TOTAL  */
3700     }
3701     return vtot;
3702 }
3703
3704 template<BondedKernelFlavor flavor>
3705 real tab_dihs(int             nbonds,
3706               const t_iatom   forceatoms[],
3707               const t_iparams forceparams[],
3708               const rvec      x[],
3709               rvec4           f[],
3710               rvec            fshift[],
3711               const t_pbc*    pbc,
3712               real            lambda,
3713               real*           dvdlambda,
3714               const t_mdatoms gmx_unused* md,
3715               t_fcdata*                   fcd,
3716               int gmx_unused* global_atom_index)
3717 {
3718     int  i, type, ai, aj, ak, al, table;
3719     int  t1, t2, t3;
3720     rvec r_ij, r_kj, r_kl, m, n;
3721     real phi, ddphi, vpd, vtot;
3722
3723     vtot = 0.0;
3724     for (i = 0; (i < nbonds);)
3725     {
3726         type = forceatoms[i++];
3727         ai   = forceatoms[i++];
3728         aj   = forceatoms[i++];
3729         ak   = forceatoms[i++];
3730         al   = forceatoms[i++];
3731
3732         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84  */
3733
3734         table = forceparams[type].tab.table;
3735
3736         /* Hopefully phi+M_PI never results in values < 0 */
3737         *dvdlambda += bonded_tab("dihedral", table, &fcd->dihtab[table], forceparams[type].tab.kA,
3738                                  forceparams[type].tab.kB, phi + M_PI, lambda, &vpd, &ddphi);
3739
3740         vtot += vpd;
3741         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1,
3742                            t2, t3); /* 112      */
3743
3744     } /* 227 TOTAL  */
3745
3746     return vtot;
3747 }
3748
3749 struct BondedInteractions
3750 {
3751     BondedFunction function;
3752     int            nrnbIndex;
3753 };
3754
3755 // Bug in old clang versions prevents constexpr. constexpr is needed for MSVC.
3756 #if defined(__clang__) && __clang_major__ < 6
3757 #    define CONSTEXPR_EXCL_OLD_CLANG const
3758 #else
3759 #    define CONSTEXPR_EXCL_OLD_CLANG constexpr
3760 #endif
3761
3762 /*! \brief Lookup table of bonded interaction functions
3763  *
3764  * This must have as many entries as interaction_function in ifunc.cpp */
3765 template<BondedKernelFlavor flavor>
3766 CONSTEXPR_EXCL_OLD_CLANG std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
3767     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_BONDS
3768     BondedInteractions{ g96bonds<flavor>, eNR_BONDS },                    // F_G96BONDS
3769     BondedInteractions{ morse_bonds<flavor>, eNR_MORSE },                 // F_MORSE
3770     BondedInteractions{ cubic_bonds<flavor>, eNR_CUBICBONDS },            // F_CUBICBONDS
3771     BondedInteractions{ unimplemented, -1 },                              // F_CONNBONDS
3772     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_HARMONIC
3773     BondedInteractions{ FENE_bonds<flavor>, eNR_FENEBONDS },              // F_FENEBONDS
3774     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDS
3775     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDSNC
3776     BondedInteractions{ restraint_bonds<flavor>, eNR_RESTRBONDS },        // F_RESTRBONDS
3777     BondedInteractions{ angles<flavor>, eNR_ANGLES },                     // F_ANGLES
3778     BondedInteractions{ g96angles<flavor>, eNR_ANGLES },                  // F_G96ANGLES
3779     BondedInteractions{ restrangles<flavor>, eNR_ANGLES },                // F_RESTRANGLES
3780     BondedInteractions{ linear_angles<flavor>, eNR_ANGLES },              // F_LINEAR_ANGLES
3781     BondedInteractions{ cross_bond_bond<flavor>, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
3782     BondedInteractions{ cross_bond_angle<flavor>, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
3783     BondedInteractions{ urey_bradley<flavor>, eNR_UREY_BRADLEY },         // F_UREY_BRADLEY
3784     BondedInteractions{ quartic_angles<flavor>, eNR_QANGLES },            // F_QUARTIC_ANGLES
3785     BondedInteractions{ tab_angles<flavor>, eNR_TABANGLES },              // F_TABANGLES
3786     BondedInteractions{ pdihs<flavor>, eNR_PROPER },                      // F_PDIHS
3787     BondedInteractions{ rbdihs<flavor>, eNR_RB },                         // F_RBDIHS
3788     BondedInteractions{ restrdihs<flavor>, eNR_PROPER },                  // F_RESTRDIHS
3789     BondedInteractions{ cbtdihs<flavor>, eNR_RB },                        // F_CBTDIHS
3790     BondedInteractions{ rbdihs<flavor>, eNR_FOURDIH },                    // F_FOURDIHS
3791     BondedInteractions{ idihs<flavor>, eNR_IMPROPER },                    // F_IDIHS
3792     BondedInteractions{ pdihs<flavor>, eNR_IMPROPER },                    // F_PIDIHS
3793     BondedInteractions{ tab_dihs<flavor>, eNR_TABDIHS },                  // F_TABDIHS
3794     BondedInteractions{ unimplemented, eNR_CMAP },                        // F_CMAP
3795     BondedInteractions{ unimplemented, -1 },                              // F_GB12_NOLONGERUSED
3796     BondedInteractions{ unimplemented, -1 },                              // F_GB13_NOLONGERUSED
3797     BondedInteractions{ unimplemented, -1 },                              // F_GB14_NOLONGERUSED
3798     BondedInteractions{ unimplemented, -1 },                              // F_GBPOL_NOLONGERUSED
3799     BondedInteractions{ unimplemented, -1 },                       // F_NPSOLVATION_NOLONGERUSED
3800     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJ14
3801     BondedInteractions{ unimplemented, -1 },                       // F_COUL14
3802     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC14_Q
3803     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC_PAIRS_NB
3804     BondedInteractions{ unimplemented, -1 },                       // F_LJ
3805     BondedInteractions{ unimplemented, -1 },                       // F_BHAM
3806     BondedInteractions{ unimplemented, -1 },                       // F_LJ_LR_NOLONGERUSED
3807     BondedInteractions{ unimplemented, -1 },                       // F_BHAM_LR_NOLONGERUSED
3808     BondedInteractions{ unimplemented, -1 },                       // F_DISPCORR
3809     BondedInteractions{ unimplemented, -1 },                       // F_COUL_SR
3810     BondedInteractions{ unimplemented, -1 },                       // F_COUL_LR_NOLONGERUSED
3811     BondedInteractions{ unimplemented, -1 },                       // F_RF_EXCL
3812     BondedInteractions{ unimplemented, -1 },                       // F_COUL_RECIP
3813     BondedInteractions{ unimplemented, -1 },                       // F_LJ_RECIP
3814     BondedInteractions{ unimplemented, -1 },                       // F_DPD
3815     BondedInteractions{ polarize<flavor>, eNR_POLARIZE },          // F_POLARIZATION
3816     BondedInteractions{ water_pol<flavor>, eNR_WPOL },             // F_WATER_POL
3817     BondedInteractions{ thole_pol<flavor>, eNR_THOLE },            // F_THOLE_POL
3818     BondedInteractions{ anharm_polarize<flavor>, eNR_ANHARM_POL }, // F_ANHARM_POL
3819     BondedInteractions{ unimplemented, -1 },                       // F_POSRES
3820     BondedInteractions{ unimplemented, -1 },                       // F_FBPOSRES
3821     BondedInteractions{ ta_disres, eNR_DISRES },                   // F_DISRES
3822     BondedInteractions{ unimplemented, -1 },                       // F_DISRESVIOL
3823     BondedInteractions{ orires, eNR_ORIRES },                      // F_ORIRES
3824     BondedInteractions{ unimplemented, -1 },                       // F_ORIRESDEV
3825     BondedInteractions{ angres<flavor>, eNR_ANGRES },              // F_ANGRES
3826     BondedInteractions{ angresz<flavor>, eNR_ANGRESZ },            // F_ANGRESZ
3827     BondedInteractions{ dihres<flavor>, eNR_DIHRES },              // F_DIHRES
3828     BondedInteractions{ unimplemented, -1 },                       // F_DIHRESVIOL
3829     BondedInteractions{ unimplemented, -1 },                       // F_CONSTR
3830     BondedInteractions{ unimplemented, -1 },                       // F_CONSTRNC
3831     BondedInteractions{ unimplemented, -1 },                       // F_SETTLE
3832     BondedInteractions{ unimplemented, -1 },                       // F_VSITE2
3833     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3
3834     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FD
3835     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FAD
3836     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3OUT
3837     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FD
3838     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FDN
3839     BondedInteractions{ unimplemented, -1 },                       // F_VSITEN
3840     BondedInteractions{ unimplemented, -1 },                       // F_COM_PULL
3841     BondedInteractions{ unimplemented, -1 },                       // F_DENSITYFITTING
3842     BondedInteractions{ unimplemented, -1 },                       // F_EQM
3843     BondedInteractions{ unimplemented, -1 },                       // F_EPOT
3844     BondedInteractions{ unimplemented, -1 },                       // F_EKIN
3845     BondedInteractions{ unimplemented, -1 },                       // F_ETOT
3846     BondedInteractions{ unimplemented, -1 },                       // F_ECONSERVED
3847     BondedInteractions{ unimplemented, -1 },                       // F_TEMP
3848     BondedInteractions{ unimplemented, -1 },                       // F_VTEMP_NOLONGERUSED
3849     BondedInteractions{ unimplemented, -1 },                       // F_PDISPCORR
3850     BondedInteractions{ unimplemented, -1 },                       // F_PRES
3851     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_CONSTR
3852     BondedInteractions{ unimplemented, -1 },                       // F_DVDL
3853     BondedInteractions{ unimplemented, -1 },                       // F_DKDL
3854     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_COUL
3855     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_VDW
3856     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_BONDED
3857     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_RESTRAINT
3858     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_TEMPERATURE
3859 };
3860
3861 /*! \brief List of instantiated BondedInteractions list */
3862 CONSTEXPR_EXCL_OLD_CLANG
3863 gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
3864     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
3865     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
3866     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
3867     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndEnergy>
3868 };
3869
3870 //! \endcond
3871
3872 } // namespace
3873
3874 real calculateSimpleBond(const int           ftype,
3875                          const int           numForceatoms,
3876                          const t_iatom       forceatoms[],
3877                          const t_iparams     forceparams[],
3878                          const rvec          x[],
3879                          rvec4               f[],
3880                          rvec                fshift[],
3881                          const struct t_pbc* pbc,
3882                          const real          lambda,
3883                          real*               dvdlambda,
3884                          const t_mdatoms*    md,
3885                          t_fcdata*           fcd,
3886                          int gmx_unused*          global_atom_index,
3887                          const BondedKernelFlavor bondedKernelFlavor)
3888 {
3889     const BondedInteractions& bonded = c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
3890
3891     real v = bonded.function(numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, lambda,
3892                              dvdlambda, md, fcd, global_atom_index);
3893
3894     return v;
3895 }
3896
3897 int nrnbIndex(int ftype)
3898 {
3899     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
3900 }