src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  *
  40  * \brief This file defines low-level functions necessary for
  41  * computing energies and forces for listed interactions.
  42  *
  43  * \author Mark Abraham <mark.j.abraham@gmail.com>
  44  *
  45  * \ingroup module_listed_forces
  46  */
  47 #include "gmxpre.h"
  48
  49 #include "bonded.h"
  50
  51 #include "config.h"
  52
  53 #include <cassert>
  54 #include <cmath>
  55
  56 #include <algorithm>
  57
  58 #include "gromacs/gmxlib/nrnb.h"
  59 #include "gromacs/listed_forces/disre.h"
  60 #include "gromacs/listed_forces/orires.h"
  61 #include "gromacs/math/functions.h"
  62 #include "gromacs/math/units.h"
  63 #include "gromacs/math/utilities.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/mdtypes/fcdata.h"
  66 #include "gromacs/mdtypes/mdatom.h"
  67 #include "gromacs/pbcutil/ishift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/pbcutil/pbc_simd.h"
  70 #include "gromacs/simd/simd.h"
  71 #include "gromacs/simd/simd_math.h"
  72 #include "gromacs/simd/vector_operations.h"
  73 #include "gromacs/utility/basedefinitions.h"
  74 #include "gromacs/utility/enumerationhelpers.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/real.h"
  77 #include "gromacs/utility/smalloc.h"
  78
  79 #include "listed_internal.h"
  80 #include "restcbt.h"
  81
  82 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  83
  84 const EnumerationArray<BondedKernelFlavor, std::string> c_bondedKernelFlavorStrings = {
  85     "forces, using SIMD when available",
  86     "forces, not using SIMD",
  87     "forces, virial, and energy (ie. not using SIMD)",
  88     "forces and energy (ie. not using SIMD)"
  89 };
  90 namespace
  91 {
  92
  93 //! Type of CPU function to compute a bonded interaction.
  94 using BondedFunction = real (*)(int              nbonds,
  95                                 const t_iatom    iatoms[],
  96                                 const t_iparams  iparams[],
  97                                 const rvec       x[],
  98                                 rvec4            f[],
  99                                 rvec             fshift[],
 100                                 const t_pbc*     pbc,
 101                                 real             lambda,
 102                                 real*            dvdlambda,
 103                                 const t_mdatoms* md,
 104                                 t_fcdata*        fcd,
 105                                 t_disresdata*    disresdata,
 106                                 t_oriresdata*    oriresdata,
 107                                 int*             ddgatindex);
 108
 109 /*! \brief Mysterious CMAP coefficient matrix */
 110 const int cmap_coeff_matrix[] = {
 111     1,  0,  -3, 2,  0,  0, 0,  0,  -3, 0,  9,  -6, 2, 0,  -6, 4,  0,  0,  0, 0,  0, 0, 0,  0,
 112     3,  0,  -9, 6,  -2, 0, 6,  -4, 0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  9, -6, 0, 0, -6, 4,
 113     0,  0,  3,  -2, 0,  0, 0,  0,  0,  0,  -9, 6,  0, 0,  6,  -4, 0,  0,  0, 0,  1, 0, -3, 2,
 114     -2, 0,  6,  -4, 1,  0, -3, 2,  0,  0,  0,  0,  0, 0,  0,  0,  -1, 0,  3, -2, 1, 0, -3, 2,
 115     0,  0,  0,  0,  0,  0, 0,  0,  0,  0,  -3, 2,  0, 0,  3,  -2, 0,  0,  0, 0,  0, 0, 3,  -2,
 116     0,  0,  -6, 4,  0,  0, 3,  -2, 0,  1,  -2, 1,  0, 0,  0,  0,  0,  -3, 6, -3, 0, 2, -4, 2,
 117     0,  0,  0,  0,  0,  0, 0,  0,  0,  3,  -6, 3,  0, -2, 4,  -2, 0,  0,  0, 0,  0, 0, 0,  0,
 118     0,  0,  -3, 3,  0,  0, 2,  -2, 0,  0,  -1, 1,  0, 0,  0,  0,  0,  0,  3, -3, 0, 0, -2, 2,
 119     0,  0,  0,  0,  0,  1, -2, 1,  0,  -2, 4,  -2, 0, 1,  -2, 1,  0,  0,  0, 0,  0, 0, 0,  0,
 120     0,  -1, 2,  -1, 0,  1, -2, 1,  0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  1, -1, 0, 0, -1, 1,
 121     0,  0,  0,  0,  0,  0, -1, 1,  0,  0,  2,  -2, 0, 0,  -1, 1
 122 };
 123
 124
 125 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 126  *
 127  * \todo This kind of code appears in many places. Consolidate it */
 128 int pbc_rvec_sub(const t_pbc* pbc, const rvec xi, const rvec xj, rvec dx)
 129 {
 130     if (pbc)
 131     {
 132         return pbc_dx_aiuc(pbc, xi, xj, dx);
 133     }
 134     else
 135     {
 136         rvec_sub(xi, xj, dx);
 137         return CENTRAL;
 138     }
 139 }
 140
 141 } // namespace
 142
 143 //! \cond
 144 real bond_angle(const rvec   xi,
 145                 const rvec   xj,
 146                 const rvec   xk,
 147                 const t_pbc* pbc,
 148                 rvec         r_ij,
 149                 rvec         r_kj,
 150                 real*        costh,
 151                 int*         t1,
 152                 int*         t2)
 153 /* Return value is the angle between the bonds i-j and j-k */
 154 {
 155     /* 41 FLOPS */
 156     real th;
 157
 158     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 159     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 160
 161     *costh = cos_angle(r_ij, r_kj); /* 25               */
 162     th     = std::acos(*costh);     /* 10               */
 163     /* 41 TOTAL */
 164     return th;
 165 }
 166
 167 real dih_angle(const rvec   xi,
 168                const rvec   xj,
 169                const rvec   xk,
 170                const rvec   xl,
 171                const t_pbc* pbc,
 172                rvec         r_ij,
 173                rvec         r_kj,
 174                rvec         r_kl,
 175                rvec         m,
 176                rvec         n,
 177                int*         t1,
 178                int*         t2,
 179                int*         t3)
 180 {
 181     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 182     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 183     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 184
 185     cprod(r_ij, r_kj, m);        /*  9        */
 186     cprod(r_kj, r_kl, n);        /*  9          */
 187     real phi  = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
 188     real ipr  = iprod(r_ij, n);  /*  5        */
 189     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 190     phi       = sign * phi; /*  1               */
 191     /* 82 TOTAL */
 192     return phi;
 193 }
 194 //! \endcond
 195
 196 void make_dp_periodic(real* dp) /* 1 flop? */
 197 {
 198     /* dp cannot be outside (-pi,pi) */
 199     if (*dp >= M_PI)
 200     {
 201         *dp -= 2 * M_PI;
 202     }
 203     else if (*dp < -M_PI)
 204     {
 205         *dp += 2 * M_PI;
 206     }
 207 }
 208
 209 namespace
 210 {
 211
 212 /*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
 213  *
 214  * \p shiftIndex is used as the periodic shift.
 215  */
 216 template<BondedKernelFlavor flavor>
 217 inline void spreadBondForces(const real bondForce,
 218                              const rvec dx,
 219                              const int  ai,
 220                              const int  aj,
 221                              rvec4*     f,
 222                              int        shiftIndex,
 223                              rvec*      fshift)
 224 {
 225     for (int m = 0; m < DIM; m++) /*  15          */
 226     {
 227         const real fij = bondForce * dx[m];
 228         f[ai][m] += fij;
 229         f[aj][m] -= fij;
 230         if (computeVirial(flavor))
 231         {
 232             fshift[shiftIndex][m] += fij;
 233             fshift[CENTRAL][m] -= fij;
 234         }
 235     }
 236 }
 237
 238 /*! \brief Morse potential bond
 239  *
 240  * By Frank Everdij. Three parameters needed:
 241  *
 242  * b0 = equilibrium distance in nm
 243  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 244  * cb = well depth in kJ/mol
 245  *
 246  * Note: the potential is referenced to be +cb at infinite separation
 247  *       and zero at the equilibrium distance!
 248  */
 249 template<BondedKernelFlavor flavor>
 250 real morse_bonds(int             nbonds,
 251                  const t_iatom   forceatoms[],
 252                  const t_iparams forceparams[],
 253                  const rvec      x[],
 254                  rvec4           f[],
 255                  rvec            fshift[],
 256                  const t_pbc*    pbc,
 257                  real            lambda,
 258                  real*           dvdlambda,
 259                  const t_mdatoms gmx_unused* md,
 260                  t_fcdata gmx_unused* fcd,
 261                  t_disresdata gmx_unused* disresdata,
 262                  t_oriresdata gmx_unused* oriresdata,
 263                  int gmx_unused* global_atom_index)
 264 {
 265     const real one = 1.0;
 266     const real two = 2.0;
 267     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, vtot;
 268     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 269     rvec       dx;
 270     int        i, ki, type, ai, aj;
 271
 272     vtot = 0.0;
 273     for (i = 0; (i < nbonds);)
 274     {
 275         type = forceatoms[i++];
 276         ai   = forceatoms[i++];
 277         aj   = forceatoms[i++];
 278
 279         b0A = forceparams[type].morse.b0A;
 280         beA = forceparams[type].morse.betaA;
 281         cbA = forceparams[type].morse.cbA;
 282
 283         b0B = forceparams[type].morse.b0B;
 284         beB = forceparams[type].morse.betaB;
 285         cbB = forceparams[type].morse.cbB;
 286
 287         L1 = one - lambda;            /* 1 */
 288         b0 = L1 * b0A + lambda * b0B; /* 3 */
 289         be = L1 * beA + lambda * beB; /* 3 */
 290         cb = L1 * cbA + lambda * cbB; /* 3 */
 291
 292         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 293         dr2  = iprod(dx, dx);                       /*   5          */
 294         dr   = dr2 * gmx::invsqrt(dr2);             /*  10          */
 295         temp = std::exp(-be * (dr - b0));           /*  12          */
 296
 297         if (temp == one)
 298         {
 299             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 300             *dvdlambda += cbB - cbA;
 301             continue;
 302         }
 303
 304         omtemp   = one - temp;                                      /*   1          */
 305         cbomtemp = cb * omtemp;                                     /*   1          */
 306         vbond    = cbomtemp * omtemp;                               /*   1          */
 307         fbond    = -two * be * temp * cbomtemp * gmx::invsqrt(dr2); /*   9          */
 308         vtot += vbond;                                              /*   1          */
 309
 310         *dvdlambda += (cbB - cbA) * omtemp * omtemp
 311                       - (2 - 2 * omtemp) * omtemp * cb * ((b0B - b0A) * be - (beB - beA) * (dr - b0)); /* 15 */
 312
 313         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 314     }                                                               /*  83 TOTAL    */
 315     return vtot;
 316 }
 317
 318 //! \cond
 319 template<BondedKernelFlavor flavor>
 320 real cubic_bonds(int             nbonds,
 321                  const t_iatom   forceatoms[],
 322                  const t_iparams forceparams[],
 323                  const rvec      x[],
 324                  rvec4           f[],
 325                  rvec            fshift[],
 326                  const t_pbc*    pbc,
 327                  real gmx_unused lambda,
 328                  real gmx_unused* dvdlambda,
 329                  const t_mdatoms gmx_unused* md,
 330                  t_fcdata gmx_unused* fcd,
 331                  t_disresdata gmx_unused* disresdata,
 332                  t_oriresdata gmx_unused* oriresdata,
 333                  int gmx_unused* global_atom_index)
 334 {
 335     const real three = 3.0;
 336     const real two   = 2.0;
 337     real       kb, b0, kcub;
 338     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, vtot;
 339     rvec       dx;
 340     int        i, ki, type, ai, aj;
 341
 342     vtot = 0.0;
 343     for (i = 0; (i < nbonds);)
 344     {
 345         type = forceatoms[i++];
 346         ai   = forceatoms[i++];
 347         aj   = forceatoms[i++];
 348
 349         b0   = forceparams[type].cubic.b0;
 350         kb   = forceparams[type].cubic.kb;
 351         kcub = forceparams[type].cubic.kcub;
 352
 353         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 354         dr2 = iprod(dx, dx);                       /*   5          */
 355
 356         if (dr2 == 0.0)
 357         {
 358             continue;
 359         }
 360
 361         dr     = dr2 * gmx::invsqrt(dr2); /*  10          */
 362         dist   = dr - b0;
 363         kdist  = kb * dist;
 364         kdist2 = kdist * dist;
 365
 366         vbond = kdist2 + kcub * kdist2 * dist;
 367         fbond = -(two * kdist + three * kdist2 * kcub) / dr;
 368
 369         vtot += vbond; /* 21 */
 370
 371         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 372     }                                                               /*  54 TOTAL    */
 373     return vtot;
 374 }
 375
 376 template<BondedKernelFlavor flavor>
 377 real FENE_bonds(int             nbonds,
 378                 const t_iatom   forceatoms[],
 379                 const t_iparams forceparams[],
 380                 const rvec      x[],
 381                 rvec4           f[],
 382                 rvec            fshift[],
 383                 const t_pbc*    pbc,
 384                 real gmx_unused lambda,
 385                 real gmx_unused* dvdlambda,
 386                 const t_mdatoms gmx_unused* md,
 387                 t_fcdata gmx_unused* fcd,
 388                 t_disresdata gmx_unused* disresdata,
 389                 t_oriresdata gmx_unused* oriresdata,
 390                 int*                     global_atom_index)
 391 {
 392     const real half = 0.5;
 393     const real one  = 1.0;
 394     real       bm, kb;
 395     real       dr2, bm2, omdr2obm2, fbond, vbond, vtot;
 396     rvec       dx;
 397     int        i, ki, type, ai, aj;
 398
 399     vtot = 0.0;
 400     for (i = 0; (i < nbonds);)
 401     {
 402         type = forceatoms[i++];
 403         ai   = forceatoms[i++];
 404         aj   = forceatoms[i++];
 405
 406         bm = forceparams[type].fene.bm;
 407         kb = forceparams[type].fene.kb;
 408
 409         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 410         dr2 = iprod(dx, dx);                       /*   5          */
 411
 412         if (dr2 == 0.0)
 413         {
 414             continue;
 415         }
 416
 417         bm2 = bm * bm;
 418
 419         if (dr2 >= bm2)
 420         {
 421             gmx_fatal(FARGS,
 422                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 423                       dr2,
 424                       bm2,
 425                       glatnr(global_atom_index, ai),
 426                       glatnr(global_atom_index, aj));
 427         }
 428
 429         omdr2obm2 = one - dr2 / bm2;
 430
 431         vbond = -half * kb * bm2 * std::log(omdr2obm2);
 432         fbond = -kb / omdr2obm2;
 433
 434         vtot += vbond; /* 35 */
 435
 436         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 437     }                                                               /*  58 TOTAL    */
 438     return vtot;
 439 }
 440
 441 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
 442 {
 443     const real half = 0.5;
 444     real       L1, kk, x0, dx, dx2;
 445     real       v, f, dvdlambda;
 446
 447     L1 = 1.0 - lambda;
 448     kk = L1 * kA + lambda * kB;
 449     x0 = L1 * xA + lambda * xB;
 450
 451     dx  = x - x0;
 452     dx2 = dx * dx;
 453
 454     f         = -kk * dx;
 455     v         = half * kk * dx2;
 456     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
 457
 458     *F = f;
 459     *V = v;
 460
 461     return dvdlambda;
 462
 463     /* That was 19 flops */
 464 }
 465
 466 template<BondedKernelFlavor flavor>
 467 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 468 bonds(int             nbonds,
 469       const t_iatom   forceatoms[],
 470       const t_iparams forceparams[],
 471       const rvec      x[],
 472       rvec4           f[],
 473       rvec            fshift[],
 474       const t_pbc*    pbc,
 475       real            lambda,
 476       real*           dvdlambda,
 477       const t_mdatoms gmx_unused* md,
 478       t_fcdata gmx_unused* fcd,
 479       t_disresdata gmx_unused* disresdata,
 480       t_oriresdata gmx_unused* oriresdata,
 481       int gmx_unused* global_atom_index)
 482 {
 483     int  i, ki, ai, aj, type;
 484     real dr, dr2, fbond, vbond, vtot;
 485     rvec dx;
 486
 487     vtot = 0.0;
 488     for (i = 0; (i < nbonds);)
 489     {
 490         type = forceatoms[i++];
 491         ai   = forceatoms[i++];
 492         aj   = forceatoms[i++];
 493
 494         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 495         dr2 = iprod(dx, dx);                       /*   5               */
 496         dr  = std::sqrt(dr2);                      /*  10               */
 497
 498         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 499                                forceparams[type].harmonic.krB,
 500                                forceparams[type].harmonic.rA,
 501                                forceparams[type].harmonic.rB,
 502                                dr,
 503                                lambda,
 504                                &vbond,
 505                                &fbond); /*  19  */
 506
 507         if (dr2 == 0.0)
 508         {
 509             continue;
 510         }
 511
 512
 513         vtot += vbond;              /* 1*/
 514         fbond *= gmx::invsqrt(dr2); /*   6              */
 515
 516         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 517     }                                                               /* 59 TOTAL */
 518     return vtot;
 519 }
 520
 521 #if GMX_SIMD_HAVE_REAL
 522
 523 /*! \brief Computes forces (only) for harmonic bonds using SIMD intrinsics
 524  *
 525  * As plain-C bonds(), but using SIMD to calculate many bonds at once.
 526  * This routines does not calculate energies and shift forces.
 527  */
 528 template<BondedKernelFlavor flavor>
 529 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
 530 bonds(int             nbonds,
 531       const t_iatom   forceatoms[],
 532       const t_iparams forceparams[],
 533       const rvec      x[],
 534       rvec4           f[],
 535       rvec gmx_unused fshift[],
 536       const t_pbc*    pbc,
 537       real gmx_unused lambda,
 538       real gmx_unused* dvdlambda,
 539       const t_mdatoms gmx_unused* md,
 540       t_fcdata gmx_unused* fcd,
 541       t_disresdata gmx_unused* disresdata,
 542       t_oriresdata gmx_unused* oriresdata,
 543       int gmx_unused* global_atom_index)
 544 {
 545     constexpr int                            nfa1 = 3;
 546     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
 547     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
 548     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
 549
 550     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
 551
 552     set_pbc_simd(pbc, pbc_simd);
 553
 554     const SimdReal real_eps = SimdReal(GMX_REAL_EPS);
 555
 556     /* nbonds is the number of bonds times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
 557     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
 558     {
 559         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
 560          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
 561          */
 562         int iu = i;
 563         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
 564         {
 565             const int type = forceatoms[iu];
 566             ai[s]          = forceatoms[iu + 1];
 567             aj[s]          = forceatoms[iu + 2];
 568
 569             /* At the end fill the arrays with the last atoms and 0 params */
 570             if (i + s * nfa1 < nbonds)
 571             {
 572                 coeff[s]                       = forceparams[type].harmonic.krA;
 573                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
 574
 575                 if (iu + nfa1 < nbonds)
 576                 {
 577                     iu += nfa1;
 578                 }
 579             }
 580             else
 581             {
 582                 coeff[s]                       = 0;
 583                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
 584             }
 585         }
 586
 587         /* Store the non PBC corrected distances packed and aligned */
 588         SimdReal xi, yi, zi;
 589         SimdReal xj, yj, zj;
 590         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi, &yi, &zi);
 591         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj, &yj, &zj);
 592         SimdReal rij_x = xi - xj;
 593         SimdReal rij_y = yi - yj;
 594         SimdReal rij_z = zi - zj;
 595
 596         pbc_correct_dx_simd(&rij_x, &rij_y, &rij_z, pbc_simd);
 597
 598         const SimdReal dist2 = rij_x * rij_x + rij_y * rij_y + rij_z * rij_z;
 599         // Here we avoid sqrt(0), the force will be zero because rij=0
 600         const SimdReal invDist = invsqrt(max(dist2, real_eps));
 601
 602         const SimdReal k  = load<SimdReal>(coeff);
 603         const SimdReal r0 = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH);
 604
 605         // Compute the force divided by the distance
 606         const SimdReal forceOverR = -k * (dist2 * invDist - r0) * invDist;
 607
 608         const SimdReal f_x = forceOverR * rij_x;
 609         const SimdReal f_y = forceOverR * rij_y;
 610         const SimdReal f_z = forceOverR * rij_z;
 611
 612         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_x, f_y, f_z);
 613         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_x, f_y, f_z);
 614     }
 615
 616     return 0;
 617 }
 618
 619 #endif // GMX_SIMD_HAVE_REAL
 620
 621 template<BondedKernelFlavor flavor>
 622 real restraint_bonds(int             nbonds,
 623                      const t_iatom   forceatoms[],
 624                      const t_iparams forceparams[],
 625                      const rvec      x[],
 626                      rvec4           f[],
 627                      rvec            fshift[],
 628                      const t_pbc*    pbc,
 629                      real            lambda,
 630                      real*           dvdlambda,
 631                      const t_mdatoms gmx_unused* md,
 632                      t_fcdata gmx_unused* fcd,
 633                      t_disresdata gmx_unused* disresdata,
 634                      t_oriresdata gmx_unused* oriresdata,
 635                      int gmx_unused* global_atom_index)
 636 {
 637     int  i, ki, ai, aj, type;
 638     real dr, dr2, fbond, vbond, vtot;
 639     real L1;
 640     real low, dlow, up1, dup1, up2, dup2, k, dk;
 641     real drh, drh2;
 642     rvec dx;
 643
 644     L1 = 1.0 - lambda;
 645
 646     vtot = 0.0;
 647     for (i = 0; (i < nbonds);)
 648     {
 649         type = forceatoms[i++];
 650         ai   = forceatoms[i++];
 651         aj   = forceatoms[i++];
 652
 653         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 654         dr2 = iprod(dx, dx);                       /*   5               */
 655         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 656
 657         low  = L1 * forceparams[type].restraint.lowA + lambda * forceparams[type].restraint.lowB;
 658         dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
 659         up1  = L1 * forceparams[type].restraint.up1A + lambda * forceparams[type].restraint.up1B;
 660         dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
 661         up2  = L1 * forceparams[type].restraint.up2A + lambda * forceparams[type].restraint.up2B;
 662         dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
 663         k    = L1 * forceparams[type].restraint.kA + lambda * forceparams[type].restraint.kB;
 664         dk   = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
 665         /* 24 */
 666
 667         if (dr < low)
 668         {
 669             drh   = dr - low;
 670             drh2  = drh * drh;
 671             vbond = 0.5 * k * drh2;
 672             fbond = -k * drh;
 673             *dvdlambda += 0.5 * dk * drh2 - k * dlow * drh;
 674         } /* 11 */
 675         else if (dr <= up1)
 676         {
 677             vbond = 0;
 678             fbond = 0;
 679         }
 680         else if (dr <= up2)
 681         {
 682             drh   = dr - up1;
 683             drh2  = drh * drh;
 684             vbond = 0.5 * k * drh2;
 685             fbond = -k * drh;
 686             *dvdlambda += 0.5 * dk * drh2 - k * dup1 * drh;
 687         } /* 11 */
 688         else
 689         {
 690             drh   = dr - up2;
 691             vbond = k * (up2 - up1) * (0.5 * (up2 - up1) + drh);
 692             fbond = -k * (up2 - up1);
 693             *dvdlambda += dk * (up2 - up1) * (0.5 * (up2 - up1) + drh)
 694                           + k * (dup2 - dup1) * (up2 - up1 + drh) - k * (up2 - up1) * dup2;
 695         }
 696
 697         if (dr2 == 0.0)
 698         {
 699             continue;
 700         }
 701
 702         vtot += vbond;              /* 1*/
 703         fbond *= gmx::invsqrt(dr2); /*   6              */
 704
 705         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 706     }                                                               /* 59 TOTAL */
 707
 708     return vtot;
 709 }
 710
 711 template<BondedKernelFlavor flavor>
 712 real polarize(int              nbonds,
 713               const t_iatom    forceatoms[],
 714               const t_iparams  forceparams[],
 715               const rvec       x[],
 716               rvec4            f[],
 717               rvec             fshift[],
 718               const t_pbc*     pbc,
 719               real             lambda,
 720               real*            dvdlambda,
 721               const t_mdatoms* md,
 722               t_fcdata gmx_unused* fcd,
 723               t_disresdata gmx_unused* disresdata,
 724               t_oriresdata gmx_unused* oriresdata,
 725               int gmx_unused* global_atom_index)
 726 {
 727     int  i, ki, ai, aj, type;
 728     real dr, dr2, fbond, vbond, vtot, ksh;
 729     rvec dx;
 730
 731     vtot = 0.0;
 732     for (i = 0; (i < nbonds);)
 733     {
 734         type = forceatoms[i++];
 735         ai   = forceatoms[i++];
 736         aj   = forceatoms[i++];
 737         ksh  = gmx::square(md->chargeA[aj]) * gmx::c_one4PiEps0 / forceparams[type].polarize.alpha;
 738
 739         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 740         dr2 = iprod(dx, dx);                       /*   5               */
 741         dr  = std::sqrt(dr2);                      /*  10               */
 742
 743         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 744
 745         if (dr2 == 0.0)
 746         {
 747             continue;
 748         }
 749
 750         vtot += vbond;              /* 1*/
 751         fbond *= gmx::invsqrt(dr2); /*   6              */
 752
 753         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 754     }                                                               /* 59 TOTAL */
 755     return vtot;
 756 }
 757
 758 template<BondedKernelFlavor flavor>
 759 real anharm_polarize(int              nbonds,
 760                      const t_iatom    forceatoms[],
 761                      const t_iparams  forceparams[],
 762                      const rvec       x[],
 763                      rvec4            f[],
 764                      rvec             fshift[],
 765                      const t_pbc*     pbc,
 766                      real             lambda,
 767                      real*            dvdlambda,
 768                      const t_mdatoms* md,
 769                      t_fcdata gmx_unused* fcd,
 770                      t_disresdata gmx_unused* disresdata,
 771                      t_oriresdata gmx_unused* oriresdata,
 772                      int gmx_unused* global_atom_index)
 773 {
 774     int  i, ki, ai, aj, type;
 775     real dr, dr2, fbond, vbond, vtot, ksh, khyp, drcut, ddr, ddr3;
 776     rvec dx;
 777
 778     vtot = 0.0;
 779     for (i = 0; (i < nbonds);)
 780     {
 781         type = forceatoms[i++];
 782         ai   = forceatoms[i++];
 783         aj   = forceatoms[i++];
 784         ksh = gmx::square(md->chargeA[aj]) * gmx::c_one4PiEps0 / forceparams[type].anharm_polarize.alpha; /* 7*/
 785         khyp  = forceparams[type].anharm_polarize.khyp;
 786         drcut = forceparams[type].anharm_polarize.drcut;
 787
 788         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 789         dr2 = iprod(dx, dx);                       /*   5               */
 790         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 791
 792         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 793
 794         if (dr2 == 0.0)
 795         {
 796             continue;
 797         }
 798
 799         if (dr > drcut)
 800         {
 801             ddr  = dr - drcut;
 802             ddr3 = ddr * ddr * ddr;
 803             vbond += khyp * ddr * ddr3;
 804             fbond -= 4 * khyp * ddr3;
 805         }
 806         fbond *= gmx::invsqrt(dr2); /*   6              */
 807         vtot += vbond;              /* 1*/
 808
 809         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 810     }                                                               /* 72 TOTAL */
 811     return vtot;
 812 }
 813
 814 template<BondedKernelFlavor flavor>
 815 real water_pol(int             nbonds,
 816                const t_iatom   forceatoms[],
 817                const t_iparams forceparams[],
 818                const rvec      x[],
 819                rvec4           f[],
 820                rvec gmx_unused fshift[],
 821                const t_pbc gmx_unused* pbc,
 822                real gmx_unused lambda,
 823                real gmx_unused* dvdlambda,
 824                const t_mdatoms gmx_unused* md,
 825                t_fcdata gmx_unused* fcd,
 826                t_disresdata gmx_unused* disresdata,
 827                t_oriresdata gmx_unused* oriresdata,
 828                int gmx_unused* global_atom_index)
 829 {
 830     /* This routine implements anisotropic polarizibility for water, through
 831      * a shell connected to a dummy with spring constant that differ in the
 832      * three spatial dimensions in the molecular frame.
 833      */
 834     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 835     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 836     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 837
 838     vtot = 0.0;
 839     if (nbonds > 0)
 840     {
 841         type0  = forceatoms[0];
 842         aS     = forceatoms[5];
 843         qS     = md->chargeA[aS];
 844         kk[XX] = gmx::square(qS) * gmx::c_one4PiEps0 / forceparams[type0].wpol.al_x;
 845         kk[YY] = gmx::square(qS) * gmx::c_one4PiEps0 / forceparams[type0].wpol.al_y;
 846         kk[ZZ] = gmx::square(qS) * gmx::c_one4PiEps0 / forceparams[type0].wpol.al_z;
 847         r_HH   = 1.0 / forceparams[type0].wpol.rHH;
 848         for (i = 0; (i < nbonds); i += 6)
 849         {
 850             type = forceatoms[i];
 851             if (type != type0)
 852             {
 853                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d", type, type0, __FILE__, __LINE__);
 854             }
 855             aO  = forceatoms[i + 1];
 856             aH1 = forceatoms[i + 2];
 857             aH2 = forceatoms[i + 3];
 858             aD  = forceatoms[i + 4];
 859             aS  = forceatoms[i + 5];
 860
 861             /* Compute vectors describing the water frame */
 862             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 863             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 864             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 865             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 866             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 867             cprod(dOH1, dOH2, nW);
 868
 869             /* Compute inverse length of normal vector
 870              * (this one could be precomputed, but I'm too lazy now)
 871              */
 872             r_nW = gmx::invsqrt(iprod(nW, nW));
 873             /* This is for precision, but does not make a big difference,
 874              * it can go later.
 875              */
 876             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 877
 878             /* Normalize the vectors in the water frame */
 879             svmul(r_nW, nW, nW);
 880             svmul(r_HH, dHH, dHH);
 881             svmul(r_OD, dOD, dOD);
 882
 883             /* Compute displacement of shell along components of the vector */
 884             dx[ZZ] = iprod(dDS, dOD);
 885             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 886             for (m = 0; (m < DIM); m++)
 887             {
 888                 proj[m] = dDS[m] - dx[ZZ] * dOD[m];
 889             }
 890
 891             /*dx[XX] = iprod(dDS,nW);
 892                dx[YY] = iprod(dDS,dHH);*/
 893             dx[XX] = iprod(proj, nW);
 894             for (m = 0; (m < DIM); m++)
 895             {
 896                 proj[m] -= dx[XX] * nW[m];
 897             }
 898             dx[YY] = iprod(proj, dHH);
 899             /* Now compute the forces and energy */
 900             kdx[XX] = kk[XX] * dx[XX];
 901             kdx[YY] = kk[YY] * dx[YY];
 902             kdx[ZZ] = kk[ZZ] * dx[ZZ];
 903             vtot += iprod(dx, kdx);
 904
 905             for (m = 0; (m < DIM); m++)
 906             {
 907                 /* This is a tensor operation but written out for speed */
 908                 tx  = nW[m] * kdx[XX];
 909                 ty  = dHH[m] * kdx[YY];
 910                 tz  = dOD[m] * kdx[ZZ];
 911                 fij = -tx - ty - tz;
 912                 f[aS][m] += fij;
 913                 f[aD][m] -= fij;
 914                 if (computeVirial(flavor))
 915                 {
 916                     fshift[ki][m] += fij;
 917                     fshift[CENTRAL][m] -= fij;
 918                 }
 919             }
 920         }
 921     }
 922     return 0.5 * vtot;
 923 }
 924
 925 template<BondedKernelFlavor flavor>
 926 static real
 927 do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj, const t_pbc* pbc, real qq, rvec fshift[], real afac)
 928 {
 929     rvec r12;
 930     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 931     int  m, t;
 932
 933     t = pbc_rvec_sub(pbc, xi, xj, r12); /*  3 */
 934
 935     r12sq  = iprod(r12, r12);                                                     /*  5 */
 936     r12_1  = gmx::invsqrt(r12sq);                                                 /*  5 */
 937     r12bar = afac / r12_1;                                                        /*  5 */
 938     v0     = qq * gmx::c_one4PiEps0 * r12_1;                                      /*  2 */
 939     ebar   = std::exp(-r12bar);                                                   /*  5 */
 940     v1     = (1 - (1 + 0.5 * r12bar) * ebar);                                     /*  4 */
 941     fscal  = ((v0 * r12_1) * v1 - v0 * 0.5 * afac * ebar * (r12bar + 1)) * r12_1; /* 9 */
 942
 943     for (m = 0; (m < DIM); m++)
 944     {
 945         fff = fscal * r12[m];
 946         fi[m] += fff;
 947         fj[m] -= fff;
 948         if (computeVirial(flavor))
 949         {
 950             fshift[t][m] += fff;
 951             fshift[CENTRAL][m] -= fff;
 952         }
 953     } /* 15 */
 954
 955     return v0 * v1; /* 1 */
 956     /* 54 */
 957 }
 958
 959 template<BondedKernelFlavor flavor>
 960 real thole_pol(int             nbonds,
 961                const t_iatom   forceatoms[],
 962                const t_iparams forceparams[],
 963                const rvec      x[],
 964                rvec4           f[],
 965                rvec            fshift[],
 966                const t_pbc*    pbc,
 967                real gmx_unused lambda,
 968                real gmx_unused* dvdlambda,
 969                const t_mdatoms* md,
 970                t_fcdata gmx_unused* fcd,
 971                t_disresdata gmx_unused* disresdata,
 972                t_oriresdata gmx_unused* oriresdata,
 973                int gmx_unused* global_atom_index)
 974 {
 975     /* Interaction between two pairs of particles with opposite charge */
 976     int  i, type, a1, da1, a2, da2;
 977     real q1, q2, qq, a, al1, al2, afac;
 978     real V = 0;
 979
 980     for (i = 0; (i < nbonds);)
 981     {
 982         type = forceatoms[i++];
 983         a1   = forceatoms[i++];
 984         da1  = forceatoms[i++];
 985         a2   = forceatoms[i++];
 986         da2  = forceatoms[i++];
 987         q1   = md->chargeA[da1];
 988         q2   = md->chargeA[da2];
 989         a    = forceparams[type].thole.a;
 990         al1  = forceparams[type].thole.alpha1;
 991         al2  = forceparams[type].thole.alpha2;
 992         qq   = q1 * q2;
 993         afac = a * gmx::invsixthroot(al1 * al2);
 994         V += do_1_thole<flavor>(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 995         V += do_1_thole<flavor>(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 996         V += do_1_thole<flavor>(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 997         V += do_1_thole<flavor>(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 998     }
 999     /* 290 flops */
1000     return V;
1001 }
1002
1003 // Avoid gcc 386 -O3 code generation bug in this function (see Issue
1004 // #3205 for more information)
1005 #if defined(__GNUC__) && defined(__i386__) && defined(__OPTIMIZE__)
1006 #    pragma GCC push_options
1007 #    pragma GCC optimize("O1")
1008 #    define avoid_gcc_i386_o3_code_generation_bug
1009 #endif
1010
1011 template<BondedKernelFlavor flavor>
1012 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1013 angles(int             nbonds,
1014        const t_iatom   forceatoms[],
1015        const t_iparams forceparams[],
1016        const rvec      x[],
1017        rvec4           f[],
1018        rvec            fshift[],
1019        const t_pbc*    pbc,
1020        real            lambda,
1021        real*           dvdlambda,
1022        const t_mdatoms gmx_unused* md,
1023        t_fcdata gmx_unused* fcd,
1024        t_disresdata gmx_unused* disresdata,
1025        t_oriresdata gmx_unused* oriresdata,
1026        int gmx_unused* global_atom_index)
1027 {
1028     int  i, ai, aj, ak, t1, t2, type;
1029     rvec r_ij, r_kj;
1030     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
1031
1032     vtot = 0.0;
1033     for (i = 0; i < nbonds;)
1034     {
1035         type = forceatoms[i++];
1036         ai   = forceatoms[i++];
1037         aj   = forceatoms[i++];
1038         ak   = forceatoms[i++];
1039
1040         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1041
1042         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
1043                                forceparams[type].harmonic.krB,
1044                                forceparams[type].harmonic.rA * gmx::c_deg2Rad,
1045                                forceparams[type].harmonic.rB * gmx::c_deg2Rad,
1046                                theta,
1047                                lambda,
1048                                &va,
1049                                &dVdt); /*  21  */
1050         vtot += va;
1051
1052         cos_theta2 = gmx::square(cos_theta);
1053         if (cos_theta2 < 1)
1054         {
1055             int  m;
1056             real st, sth;
1057             real cik, cii, ckk;
1058             real nrkj2, nrij2;
1059             real nrkj_1, nrij_1;
1060             rvec f_i, f_j, f_k;
1061
1062             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1063             sth   = st * cos_theta;                      /*   1         */
1064             nrij2 = iprod(r_ij, r_ij);                   /*   5         */
1065             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1066
1067             nrij_1 = gmx::invsqrt(nrij2); /*  10                */
1068             nrkj_1 = gmx::invsqrt(nrkj2); /*  10                */
1069
1070             cik = st * nrij_1 * nrkj_1;  /*   2         */
1071             cii = sth * nrij_1 * nrij_1; /*   2         */
1072             ckk = sth * nrkj_1 * nrkj_1; /*   2         */
1073
1074             for (m = 0; m < DIM; m++)
1075             { /*  39            */
1076                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1077                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1078                 f_j[m] = -f_i[m] - f_k[m];
1079                 f[ai][m] += f_i[m];
1080                 f[aj][m] += f_j[m];
1081                 f[ak][m] += f_k[m];
1082             }
1083             if (computeVirial(flavor))
1084             {
1085                 rvec_inc(fshift[t1], f_i);
1086                 rvec_inc(fshift[CENTRAL], f_j);
1087                 rvec_inc(fshift[t2], f_k);
1088             }
1089         } /* 161 TOTAL  */
1090     }
1091
1092     return vtot;
1093 }
1094
1095 #ifdef avoid_gcc_i386_o3_code_generation_bug
1096 #    pragma GCC pop_options
1097 #    undef avoid_gcc_i386_o3_code_generation_bug
1098 #endif
1099
1100 #if GMX_SIMD_HAVE_REAL
1101
1102 /* As angles, but using SIMD to calculate many angles at once.
1103  * This routines does not calculate energies and shift forces.
1104  */
1105 template<BondedKernelFlavor flavor>
1106 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1107 angles(int             nbonds,
1108        const t_iatom   forceatoms[],
1109        const t_iparams forceparams[],
1110        const rvec      x[],
1111        rvec4           f[],
1112        rvec gmx_unused fshift[],
1113        const t_pbc*    pbc,
1114        real gmx_unused lambda,
1115        real gmx_unused* dvdlambda,
1116        const t_mdatoms gmx_unused* md,
1117        t_fcdata gmx_unused* fcd,
1118        t_disresdata gmx_unused* disresdata,
1119        t_oriresdata gmx_unused* oriresdata,
1120        int gmx_unused* global_atom_index)
1121 {
1122     const int                                nfa1 = 4;
1123     int                                      i, iu, s;
1124     int                                      type;
1125     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1126     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1127     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1128     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
1129     SimdReal                                 deg2rad_S(gmx::c_deg2Rad);
1130     SimdReal                                 xi_S, yi_S, zi_S;
1131     SimdReal                                 xj_S, yj_S, zj_S;
1132     SimdReal                                 xk_S, yk_S, zk_S;
1133     SimdReal                                 k_S, theta0_S;
1134     SimdReal                                 rijx_S, rijy_S, rijz_S;
1135     SimdReal                                 rkjx_S, rkjy_S, rkjz_S;
1136     SimdReal                                 one_S(1.0);
1137     SimdReal one_min_eps_S(1.0_real - GMX_REAL_EPS); // Largest number < 1
1138
1139     SimdReal                         rij_rkj_S;
1140     SimdReal                         nrij2_S, nrij_1_S;
1141     SimdReal                         nrkj2_S, nrkj_1_S;
1142     SimdReal                         cos_S, invsin_S;
1143     SimdReal                         cos2_S;
1144     SimdReal                         theta_S;
1145     SimdReal                         st_S, sth_S;
1146     SimdReal                         cik_S, cii_S, ckk_S;
1147     SimdReal                         f_ix_S, f_iy_S, f_iz_S;
1148     SimdReal                         f_kx_S, f_ky_S, f_kz_S;
1149     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1150
1151     set_pbc_simd(pbc, pbc_simd);
1152
1153     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1154     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1155     {
1156         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1157          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1158          */
1159         iu = i;
1160         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1161         {
1162             type  = forceatoms[iu];
1163             ai[s] = forceatoms[iu + 1];
1164             aj[s] = forceatoms[iu + 2];
1165             ak[s] = forceatoms[iu + 3];
1166
1167             /* At the end fill the arrays with the last atoms and 0 params */
1168             if (i + s * nfa1 < nbonds)
1169             {
1170                 coeff[s]                       = forceparams[type].harmonic.krA;
1171                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
1172
1173                 if (iu + nfa1 < nbonds)
1174                 {
1175                     iu += nfa1;
1176                 }
1177             }
1178             else
1179             {
1180                 coeff[s]                       = 0;
1181                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
1182             }
1183         }
1184
1185         /* Store the non PBC corrected distances packed and aligned */
1186         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1187         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1188         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1189         rijx_S = xi_S - xj_S;
1190         rijy_S = yi_S - yj_S;
1191         rijz_S = zi_S - zj_S;
1192         rkjx_S = xk_S - xj_S;
1193         rkjy_S = yk_S - yj_S;
1194         rkjz_S = zk_S - zj_S;
1195
1196         k_S      = load<SimdReal>(coeff);
1197         theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1198
1199         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1200         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1201
1202         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1203
1204         nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1205         nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1206
1207         nrij_1_S = invsqrt(nrij2_S);
1208         nrkj_1_S = invsqrt(nrkj2_S);
1209
1210         cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S;
1211
1212         /* We compute cos^2 using a division instead of squaring cos_S,
1213          * as we would then get additional error contributions from
1214          * the two invsqrt operations, which get amplified by
1215          * the 1/sqrt(1-cos^2) for cos values close to 1.
1216          *
1217          * Note that the non-SIMD version of angles() avoids this issue
1218          * by computing the cosine using double precision.
1219          */
1220         cos2_S = rij_rkj_S * rij_rkj_S / (nrij2_S * nrkj2_S);
1221
1222         /* To allow for 0 and 180 degrees, we need to avoid issues when
1223          * the cosine is close to -1 and 1. For acos() the argument needs
1224          * to be in that range. For cos^2 we take the min of cos^2 and 1 - 1bit,
1225          * so we can safely compute 1/sin as 1/sqrt(1 - cos^2).
1226          */
1227         cos_S  = max(cos_S, -one_S);
1228         cos_S  = min(cos_S, one_S);
1229         cos2_S = min(cos2_S, one_min_eps_S);
1230
1231         theta_S = acos(cos_S);
1232
1233         invsin_S = invsqrt(one_S - cos2_S);
1234
1235         st_S  = k_S * (theta0_S - theta_S) * invsin_S;
1236         sth_S = st_S * cos_S;
1237
1238         cik_S = st_S * nrij_1_S * nrkj_1_S;
1239         cii_S = sth_S * nrij_1_S * nrij_1_S;
1240         ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1241
1242         f_ix_S = cii_S * rijx_S;
1243         f_ix_S = fnma(cik_S, rkjx_S, f_ix_S);
1244         f_iy_S = cii_S * rijy_S;
1245         f_iy_S = fnma(cik_S, rkjy_S, f_iy_S);
1246         f_iz_S = cii_S * rijz_S;
1247         f_iz_S = fnma(cik_S, rkjz_S, f_iz_S);
1248         f_kx_S = ckk_S * rkjx_S;
1249         f_kx_S = fnma(cik_S, rijx_S, f_kx_S);
1250         f_ky_S = ckk_S * rkjy_S;
1251         f_ky_S = fnma(cik_S, rijy_S, f_ky_S);
1252         f_kz_S = ckk_S * rkjz_S;
1253         f_kz_S = fnma(cik_S, rijz_S, f_kz_S);
1254
1255         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1256         transposeScatterDecrU<4>(
1257                 reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1258         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1259     }
1260
1261     return 0;
1262 }
1263
1264 #endif // GMX_SIMD_HAVE_REAL
1265
1266 template<BondedKernelFlavor flavor>
1267 real linear_angles(int             nbonds,
1268                    const t_iatom   forceatoms[],
1269                    const t_iparams forceparams[],
1270                    const rvec      x[],
1271                    rvec4           f[],
1272                    rvec            fshift[],
1273                    const t_pbc*    pbc,
1274                    real            lambda,
1275                    real*           dvdlambda,
1276                    const t_mdatoms gmx_unused* md,
1277                    t_fcdata gmx_unused* fcd,
1278                    t_disresdata gmx_unused* disresdata,
1279                    t_oriresdata gmx_unused* oriresdata,
1280                    int gmx_unused* global_atom_index)
1281 {
1282     int  i, m, ai, aj, ak, t1, t2, type;
1283     rvec f_i, f_j, f_k;
1284     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1285     rvec r_ij, r_kj, r_ik, dx;
1286
1287     L1   = 1 - lambda;
1288     vtot = 0.0;
1289     for (i = 0; (i < nbonds);)
1290     {
1291         type = forceatoms[i++];
1292         ai   = forceatoms[i++];
1293         aj   = forceatoms[i++];
1294         ak   = forceatoms[i++];
1295
1296         kA   = forceparams[type].linangle.klinA;
1297         kB   = forceparams[type].linangle.klinB;
1298         klin = L1 * kA + lambda * kB;
1299
1300         aA = forceparams[type].linangle.aA;
1301         aB = forceparams[type].linangle.aB;
1302         a  = L1 * aA + lambda * aB;
1303         b  = 1 - a;
1304
1305         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1306         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1307         rvec_sub(r_ij, r_kj, r_ik);
1308
1309         dr2 = 0;
1310         for (m = 0; (m < DIM); m++)
1311         {
1312             dr = -a * r_ij[m] - b * r_kj[m];
1313             dr2 += dr * dr;
1314             dx[m]  = dr;
1315             f_i[m] = a * klin * dr;
1316             f_k[m] = b * klin * dr;
1317             f_j[m] = -(f_i[m] + f_k[m]);
1318             f[ai][m] += f_i[m];
1319             f[aj][m] += f_j[m];
1320             f[ak][m] += f_k[m];
1321         }
1322         va = 0.5 * klin * dr2;
1323         *dvdlambda += 0.5 * (kB - kA) * dr2 + klin * (aB - aA) * iprod(dx, r_ik);
1324
1325         vtot += va;
1326
1327         if (computeVirial(flavor))
1328         {
1329             rvec_inc(fshift[t1], f_i);
1330             rvec_inc(fshift[CENTRAL], f_j);
1331             rvec_inc(fshift[t2], f_k);
1332         }
1333     } /* 57 TOTAL       */
1334     return vtot;
1335 }
1336
1337 template<BondedKernelFlavor flavor>
1338 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1339 urey_bradley(int             nbonds,
1340              const t_iatom   forceatoms[],
1341              const t_iparams forceparams[],
1342              const rvec      x[],
1343              rvec4           f[],
1344              rvec            fshift[],
1345              const t_pbc*    pbc,
1346              real            lambda,
1347              real*           dvdlambda,
1348              const t_mdatoms gmx_unused* md,
1349              t_fcdata gmx_unused* fcd,
1350              t_disresdata gmx_unused* disresdata,
1351              t_oriresdata gmx_unused* oriresdata,
1352              int gmx_unused* global_atom_index)
1353 {
1354     int  i, m, ai, aj, ak, t1, t2, type, ki;
1355     rvec r_ij, r_kj, r_ik;
1356     real cos_theta, cos_theta2, theta;
1357     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1358     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1359
1360     vtot = 0.0;
1361     for (i = 0; (i < nbonds);)
1362     {
1363         type = forceatoms[i++];
1364         ai   = forceatoms[i++];
1365         aj   = forceatoms[i++];
1366         ak   = forceatoms[i++];
1367         th0A = forceparams[type].u_b.thetaA * gmx::c_deg2Rad;
1368         kthA = forceparams[type].u_b.kthetaA;
1369         r13A = forceparams[type].u_b.r13A;
1370         kUBA = forceparams[type].u_b.kUBA;
1371         th0B = forceparams[type].u_b.thetaB * gmx::c_deg2Rad;
1372         kthB = forceparams[type].u_b.kthetaB;
1373         r13B = forceparams[type].u_b.r13B;
1374         kUBB = forceparams[type].u_b.kUBB;
1375
1376         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1377
1378         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1379         vtot += va;
1380
1381         ki  = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /*   3      */
1382         dr2 = iprod(r_ik, r_ik);                     /*   5             */
1383         dr  = dr2 * gmx::invsqrt(dr2);               /*  10             */
1384
1385         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1386
1387         cos_theta2 = gmx::square(cos_theta); /*   1             */
1388         if (cos_theta2 < 1)
1389         {
1390             real st, sth;
1391             real cik, cii, ckk;
1392             real nrkj2, nrij2;
1393             rvec f_i, f_j, f_k;
1394
1395             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1396             sth   = st * cos_theta;                      /*   1         */
1397             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1398             nrij2 = iprod(r_ij, r_ij);
1399
1400             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1401             cii = sth / nrij2;                      /*  10              */
1402             ckk = sth / nrkj2;                      /*  10              */
1403
1404             for (m = 0; (m < DIM); m++) /*  39          */
1405             {
1406                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1407                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1408                 f_j[m] = -f_i[m] - f_k[m];
1409                 f[ai][m] += f_i[m];
1410                 f[aj][m] += f_j[m];
1411                 f[ak][m] += f_k[m];
1412             }
1413             if (computeVirial(flavor))
1414             {
1415                 rvec_inc(fshift[t1], f_i);
1416                 rvec_inc(fshift[CENTRAL], f_j);
1417                 rvec_inc(fshift[t2], f_k);
1418             }
1419         } /* 161 TOTAL  */
1420         /* Time for the bond calculations */
1421         if (dr2 == 0.0)
1422         {
1423             continue;
1424         }
1425
1426         vtot += vbond;              /* 1*/
1427         fbond *= gmx::invsqrt(dr2); /*   6              */
1428
1429         for (m = 0; (m < DIM); m++) /*  15              */
1430         {
1431             fik = fbond * r_ik[m];
1432             f[ai][m] += fik;
1433             f[ak][m] -= fik;
1434             if (computeVirial(flavor))
1435             {
1436                 fshift[ki][m] += fik;
1437                 fshift[CENTRAL][m] -= fik;
1438             }
1439         }
1440     }
1441     return vtot;
1442 }
1443
1444 #if GMX_SIMD_HAVE_REAL
1445
1446 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1447  * This routines does not calculate energies and shift forces.
1448  */
1449 template<BondedKernelFlavor flavor>
1450 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1451 urey_bradley(int             nbonds,
1452              const t_iatom   forceatoms[],
1453              const t_iparams forceparams[],
1454              const rvec      x[],
1455              rvec4           f[],
1456              rvec gmx_unused fshift[],
1457              const t_pbc*    pbc,
1458              real gmx_unused lambda,
1459              real gmx_unused* dvdlambda,
1460              const t_mdatoms gmx_unused* md,
1461              t_fcdata gmx_unused* fcd,
1462              t_disresdata gmx_unused* disresdata,
1463              t_oriresdata gmx_unused* oriresdata,
1464              int gmx_unused* global_atom_index)
1465 {
1466     constexpr int                            nfa1 = 4;
1467     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1468     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1469     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1470     alignas(GMX_SIMD_ALIGNMENT) real         coeff[4 * GMX_SIMD_REAL_WIDTH];
1471     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1472
1473     set_pbc_simd(pbc, pbc_simd);
1474
1475     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1476     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
1477     {
1478         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1479          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1480          */
1481         int iu = i;
1482         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1483         {
1484             const int type = forceatoms[iu];
1485             ai[s]          = forceatoms[iu + 1];
1486             aj[s]          = forceatoms[iu + 2];
1487             ak[s]          = forceatoms[iu + 3];
1488
1489             /* At the end fill the arrays with the last atoms and 0 params */
1490             if (i + s * nfa1 < nbonds)
1491             {
1492                 coeff[s]                           = forceparams[type].u_b.kthetaA;
1493                 coeff[GMX_SIMD_REAL_WIDTH + s]     = forceparams[type].u_b.thetaA;
1494                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = forceparams[type].u_b.kUBA;
1495                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = forceparams[type].u_b.r13A;
1496
1497                 if (iu + nfa1 < nbonds)
1498                 {
1499                     iu += nfa1;
1500                 }
1501             }
1502             else
1503             {
1504                 coeff[s]                           = 0;
1505                 coeff[GMX_SIMD_REAL_WIDTH + s]     = 0;
1506                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = 0;
1507                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = 0;
1508             }
1509         }
1510
1511         SimdReal xi_S, yi_S, zi_S;
1512         SimdReal xj_S, yj_S, zj_S;
1513         SimdReal xk_S, yk_S, zk_S;
1514
1515         /* Store the non PBC corrected distances packed and aligned */
1516         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1517         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1518         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1519         SimdReal rijx_S = xi_S - xj_S;
1520         SimdReal rijy_S = yi_S - yj_S;
1521         SimdReal rijz_S = zi_S - zj_S;
1522         SimdReal rkjx_S = xk_S - xj_S;
1523         SimdReal rkjy_S = yk_S - yj_S;
1524         SimdReal rkjz_S = zk_S - zj_S;
1525         SimdReal rikx_S = xi_S - xk_S;
1526         SimdReal riky_S = yi_S - yk_S;
1527         SimdReal rikz_S = zi_S - zk_S;
1528
1529         const SimdReal ktheta_S = load<SimdReal>(coeff);
1530         const SimdReal theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * gmx::c_deg2Rad;
1531         const SimdReal kUB_S    = load<SimdReal>(coeff + 2 * GMX_SIMD_REAL_WIDTH);
1532         const SimdReal r13_S    = load<SimdReal>(coeff + 3 * GMX_SIMD_REAL_WIDTH);
1533
1534         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1535         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1536         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1537
1538         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1539
1540         const SimdReal dr2_S = iprod(rikx_S, riky_S, rikz_S, rikx_S, riky_S, rikz_S);
1541
1542         const SimdReal nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1543         const SimdReal nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1544
1545         const SimdReal nrij_1_S = invsqrt(nrij2_S);
1546         const SimdReal nrkj_1_S = invsqrt(nrkj2_S);
1547         const SimdReal invdr2_S = invsqrt(dr2_S);
1548         const SimdReal dr_S     = dr2_S * invdr2_S;
1549
1550         constexpr real min_one_plus_eps = -1.0 + 2.0 * GMX_REAL_EPS; // Smallest number > -1
1551
1552         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1553          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1554          * This also ensures that rounding errors would cause the argument
1555          * of simdAcos to be < -1.
1556          * Note that we do not take precautions for cos(0)=1, so the bonds
1557          * in an angle should not align at an angle of 0 degrees.
1558          */
1559         const SimdReal cos_S = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1560
1561         const SimdReal theta_S  = acos(cos_S);
1562         const SimdReal invsin_S = invsqrt(1.0 - cos_S * cos_S);
1563         const SimdReal st_S     = ktheta_S * (theta0_S - theta_S) * invsin_S;
1564         const SimdReal sth_S    = st_S * cos_S;
1565
1566         const SimdReal cik_S = st_S * nrij_1_S * nrkj_1_S;
1567         const SimdReal cii_S = sth_S * nrij_1_S * nrij_1_S;
1568         const SimdReal ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1569
1570         const SimdReal sUB_S = kUB_S * (r13_S - dr_S) * invdr2_S;
1571
1572         const SimdReal f_ikx_S = sUB_S * rikx_S;
1573         const SimdReal f_iky_S = sUB_S * riky_S;
1574         const SimdReal f_ikz_S = sUB_S * rikz_S;
1575
1576         const SimdReal f_ix_S = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1577         const SimdReal f_iy_S = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1578         const SimdReal f_iz_S = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1579         const SimdReal f_kx_S = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1580         const SimdReal f_ky_S = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1581         const SimdReal f_kz_S = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1582
1583         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1584         transposeScatterDecrU<4>(
1585                 reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1586         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1587     }
1588
1589     return 0;
1590 }
1591
1592 #endif // GMX_SIMD_HAVE_REAL
1593
1594 template<BondedKernelFlavor flavor>
1595 real quartic_angles(int             nbonds,
1596                     const t_iatom   forceatoms[],
1597                     const t_iparams forceparams[],
1598                     const rvec      x[],
1599                     rvec4           f[],
1600                     rvec            fshift[],
1601                     const t_pbc*    pbc,
1602                     real gmx_unused lambda,
1603                     real gmx_unused* dvdlambda,
1604                     const t_mdatoms gmx_unused* md,
1605                     t_fcdata gmx_unused* fcd,
1606                     t_disresdata gmx_unused* disresdata,
1607                     t_oriresdata gmx_unused* oriresdata,
1608                     int gmx_unused* global_atom_index)
1609 {
1610     int  i, j, ai, aj, ak, t1, t2, type;
1611     rvec r_ij, r_kj;
1612     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1613
1614     vtot = 0.0;
1615     for (i = 0; (i < nbonds);)
1616     {
1617         type = forceatoms[i++];
1618         ai   = forceatoms[i++];
1619         aj   = forceatoms[i++];
1620         ak   = forceatoms[i++];
1621
1622         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1623
1624         dt = theta - forceparams[type].qangle.theta * gmx::c_deg2Rad; /* 2          */
1625
1626         dVdt = 0;
1627         va   = forceparams[type].qangle.c[0];
1628         dtp  = 1.0;
1629         for (j = 1; j <= 4; j++)
1630         {
1631             c = forceparams[type].qangle.c[j];
1632             dVdt -= j * c * dtp;
1633             dtp *= dt;
1634             va += c * dtp;
1635         }
1636         /* 20 */
1637
1638         vtot += va;
1639
1640         cos_theta2 = gmx::square(cos_theta); /*   1             */
1641         if (cos_theta2 < 1)
1642         {
1643             int  m;
1644             real st, sth;
1645             real cik, cii, ckk;
1646             real nrkj2, nrij2;
1647             rvec f_i, f_j, f_k;
1648
1649             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1650             sth   = st * cos_theta;                      /*   1         */
1651             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1652             nrij2 = iprod(r_ij, r_ij);
1653
1654             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1655             cii = sth / nrij2;                      /*  10              */
1656             ckk = sth / nrkj2;                      /*  10              */
1657
1658             for (m = 0; (m < DIM); m++) /*  39          */
1659             {
1660                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1661                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1662                 f_j[m] = -f_i[m] - f_k[m];
1663                 f[ai][m] += f_i[m];
1664                 f[aj][m] += f_j[m];
1665                 f[ak][m] += f_k[m];
1666             }
1667
1668             if (computeVirial(flavor))
1669             {
1670                 rvec_inc(fshift[t1], f_i);
1671                 rvec_inc(fshift[CENTRAL], f_j);
1672                 rvec_inc(fshift[t2], f_k);
1673             }
1674         } /* 153 TOTAL  */
1675     }
1676     return vtot;
1677 }
1678
1679
1680 #if GMX_SIMD_HAVE_REAL
1681
1682 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1683  * also calculates the pre-factor required for the dihedral force update.
1684  * Note that bv and buf should be register aligned.
1685  */
1686 inline void dih_angle_simd(const rvec* x,
1687                            const int*  ai,
1688                            const int*  aj,
1689                            const int*  ak,
1690                            const int*  al,
1691                            const real* pbc_simd,
1692                            SimdReal*   phi_S,
1693                            SimdReal*   mx_S,
1694                            SimdReal*   my_S,
1695                            SimdReal*   mz_S,
1696                            SimdReal*   nx_S,
1697                            SimdReal*   ny_S,
1698                            SimdReal*   nz_S,
1699                            SimdReal*   nrkj_m2_S,
1700                            SimdReal*   nrkj_n2_S,
1701                            SimdReal*   p_S,
1702                            SimdReal*   q_S)
1703 {
1704     SimdReal xi_S, yi_S, zi_S;
1705     SimdReal xj_S, yj_S, zj_S;
1706     SimdReal xk_S, yk_S, zk_S;
1707     SimdReal xl_S, yl_S, zl_S;
1708     SimdReal rijx_S, rijy_S, rijz_S;
1709     SimdReal rkjx_S, rkjy_S, rkjz_S;
1710     SimdReal rklx_S, rkly_S, rklz_S;
1711     SimdReal cx_S, cy_S, cz_S;
1712     SimdReal cn_S;
1713     SimdReal s_S;
1714     SimdReal ipr_S;
1715     SimdReal iprm_S, iprn_S;
1716     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1717     SimdReal toler_S;
1718     SimdReal nrkj2_min_S;
1719     SimdReal real_eps_S;
1720
1721     /* Used to avoid division by zero.
1722      * We take into acount that we multiply the result by real_eps_S.
1723      */
1724     nrkj2_min_S = SimdReal(GMX_REAL_MIN / (2 * GMX_REAL_EPS));
1725
1726     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1727     real_eps_S = SimdReal(2 * GMX_REAL_EPS);
1728
1729     /* Store the non PBC corrected distances packed and aligned */
1730     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1731     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1732     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1733     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), al, &xl_S, &yl_S, &zl_S);
1734     rijx_S = xi_S - xj_S;
1735     rijy_S = yi_S - yj_S;
1736     rijz_S = zi_S - zj_S;
1737     rkjx_S = xk_S - xj_S;
1738     rkjy_S = yk_S - yj_S;
1739     rkjz_S = zk_S - zj_S;
1740     rklx_S = xk_S - xl_S;
1741     rkly_S = yk_S - yl_S;
1742     rklz_S = zk_S - zl_S;
1743
1744     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1745     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1746     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1747
1748     cprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S, mx_S, my_S, mz_S);
1749
1750     cprod(rkjx_S, rkjy_S, rkjz_S, rklx_S, rkly_S, rklz_S, nx_S, ny_S, nz_S);
1751
1752     cprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S, &cx_S, &cy_S, &cz_S);
1753
1754     cn_S = sqrt(norm2(cx_S, cy_S, cz_S));
1755
1756     s_S = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1757
1758     /* Determine the dihedral angle, the sign might need correction */
1759     *phi_S = atan2(cn_S, s_S);
1760
1761     ipr_S = iprod(rijx_S, rijy_S, rijz_S, *nx_S, *ny_S, *nz_S);
1762
1763     iprm_S = norm2(*mx_S, *my_S, *mz_S);
1764     iprn_S = norm2(*nx_S, *ny_S, *nz_S);
1765
1766     nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1767
1768     /* Avoid division by zero. When zero, the result is multiplied by 0
1769      * anyhow, so the 3 max below do not affect the final result.
1770      */
1771     nrkj2_S  = max(nrkj2_S, nrkj2_min_S);
1772     nrkj_1_S = invsqrt(nrkj2_S);
1773     nrkj_2_S = nrkj_1_S * nrkj_1_S;
1774     nrkj_S   = nrkj2_S * nrkj_1_S;
1775
1776     toler_S = nrkj2_S * real_eps_S;
1777
1778     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1779      * So we take a max with the tolerance instead. Since we multiply with
1780      * m or n later, the max does not affect the results.
1781      */
1782     iprm_S     = max(iprm_S, toler_S);
1783     iprn_S     = max(iprn_S, toler_S);
1784     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1785     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1786
1787     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1788     *phi_S = copysign(*phi_S, ipr_S);
1789     *p_S   = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1790     *p_S   = *p_S * nrkj_2_S;
1791
1792     *q_S = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1793     *q_S = *q_S * nrkj_2_S;
1794 }
1795
1796 #endif // GMX_SIMD_HAVE_REAL
1797
1798 } // namespace
1799
1800 template<BondedKernelFlavor flavor>
1801 void do_dih_fup(int          i,
1802                 int          j,
1803                 int          k,
1804                 int          l,
1805                 real         ddphi,
1806                 rvec         r_ij,
1807                 rvec         r_kj,
1808                 rvec         r_kl,
1809                 rvec         m,
1810                 rvec         n,
1811                 rvec4        f[],
1812                 rvec         fshift[],
1813                 const t_pbc* pbc,
1814                 const rvec   x[],
1815                 int          t1,
1816                 int          t2,
1817                 int          t3)
1818 {
1819     /* 143 FLOPS */
1820     rvec f_i, f_j, f_k, f_l;
1821     rvec uvec, vvec, svec, dx_jl;
1822     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1823     real a, b, p, q, toler;
1824
1825     iprm  = iprod(m, m);       /*  5    */
1826     iprn  = iprod(n, n);       /*  5    */
1827     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1828     toler = nrkj2 * GMX_REAL_EPS;
1829     if ((iprm > toler) && (iprn > toler))
1830     {
1831         nrkj_1 = gmx::invsqrt(nrkj2);  /* 10    */
1832         nrkj_2 = nrkj_1 * nrkj_1;      /*  1    */
1833         nrkj   = nrkj2 * nrkj_1;       /*  1    */
1834         a      = -ddphi * nrkj / iprm; /* 11    */
1835         svmul(a, m, f_i);              /*  3    */
1836         b = ddphi * nrkj / iprn;       /* 11    */
1837         svmul(b, n, f_l);              /*  3  */
1838         p = iprod(r_ij, r_kj);         /*  5    */
1839         p *= nrkj_2;                   /*  1    */
1840         q = iprod(r_kl, r_kj);         /*  5    */
1841         q *= nrkj_2;                   /*  1    */
1842         svmul(p, f_i, uvec);           /*  3    */
1843         svmul(q, f_l, vvec);           /*  3    */
1844         rvec_sub(uvec, vvec, svec);    /*  3    */
1845         rvec_sub(f_i, svec, f_j);      /*  3    */
1846         rvec_add(f_l, svec, f_k);      /*  3    */
1847         rvec_inc(f[i], f_i);           /*  3    */
1848         rvec_dec(f[j], f_j);           /*  3    */
1849         rvec_dec(f[k], f_k);           /*  3    */
1850         rvec_inc(f[l], f_l);           /*  3    */
1851
1852         if (computeVirial(flavor))
1853         {
1854             if (pbc)
1855             {
1856                 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1857             }
1858             else
1859             {
1860                 t3 = CENTRAL;
1861             }
1862
1863             rvec_inc(fshift[t1], f_i);
1864             rvec_dec(fshift[CENTRAL], f_j);
1865             rvec_dec(fshift[t2], f_k);
1866             rvec_inc(fshift[t3], f_l);
1867         }
1868     }
1869     /* 112 TOTAL    */
1870 }
1871
1872 namespace
1873 {
1874
1875 #if GMX_SIMD_HAVE_REAL
1876 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1877 inline void gmx_simdcall do_dih_fup_noshiftf_simd(const int* ai,
1878                                                   const int* aj,
1879                                                   const int* ak,
1880                                                   const int* al,
1881                                                   SimdReal   p,
1882                                                   SimdReal   q,
1883                                                   SimdReal   f_i_x,
1884                                                   SimdReal   f_i_y,
1885                                                   SimdReal   f_i_z,
1886                                                   SimdReal   mf_l_x,
1887                                                   SimdReal   mf_l_y,
1888                                                   SimdReal   mf_l_z,
1889                                                   rvec4      f[])
1890 {
1891     SimdReal sx    = p * f_i_x + q * mf_l_x;
1892     SimdReal sy    = p * f_i_y + q * mf_l_y;
1893     SimdReal sz    = p * f_i_z + q * mf_l_z;
1894     SimdReal f_j_x = f_i_x - sx;
1895     SimdReal f_j_y = f_i_y - sy;
1896     SimdReal f_j_z = f_i_z - sz;
1897     SimdReal f_k_x = mf_l_x - sx;
1898     SimdReal f_k_y = mf_l_y - sy;
1899     SimdReal f_k_z = mf_l_z - sz;
1900     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_i_x, f_i_y, f_i_z);
1901     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_j_x, f_j_y, f_j_z);
1902     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_k_x, f_k_y, f_k_z);
1903     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), al, mf_l_x, mf_l_y, mf_l_z);
1904 }
1905 #endif // GMX_SIMD_HAVE_REAL
1906
1907 /*! \brief Computes and returns the proper dihedral force
1908  *
1909  * With the appropriate kernel flavor, also computes and accumulates
1910  * the energy and dV/dlambda.
1911  */
1912 template<BondedKernelFlavor flavor>
1913 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* dvdlambda)
1914 {
1915     const real L1   = 1.0 - lambda;
1916     const real ph0  = (L1 * phiA + lambda * phiB) * gmx::c_deg2Rad;
1917     const real dph0 = (phiB - phiA) * gmx::c_deg2Rad;
1918     const real cp   = L1 * cpA + lambda * cpB;
1919
1920     const real mdphi = mult * phi - ph0;
1921     const real sdphi = std::sin(mdphi);
1922     const real ddphi = -cp * mult * sdphi;
1923     if (computeEnergy(flavor))
1924     {
1925         const real v1 = 1 + std::cos(mdphi);
1926         *V += cp * v1;
1927
1928         *dvdlambda += (cpB - cpA) * v1 + cp * dph0 * sdphi;
1929     }
1930
1931     return ddphi;
1932     /* That was 40 flops */
1933 }
1934
1935 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1936 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* F)
1937 {
1938     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1939     real L1   = 1.0 - lambda;
1940     real ph0  = (L1 * phiA + lambda * phiB) * gmx::c_deg2Rad;
1941     real dph0 = (phiB - phiA) * gmx::c_deg2Rad;
1942     real cp   = L1 * cpA + lambda * cpB;
1943
1944     mdphi = mult * (phi - ph0);
1945     sdphi = std::sin(mdphi);
1946     ddphi = cp * mult * sdphi;
1947     v1    = 1.0 - std::cos(mdphi);
1948     v     = cp * v1;
1949
1950     dvdlambda = (cpB - cpA) * v1 + cp * dph0 * sdphi;
1951
1952     *V = v;
1953     *F = ddphi;
1954
1955     return dvdlambda;
1956
1957     /* That was 40 flops */
1958 }
1959
1960 template<BondedKernelFlavor flavor>
1961 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1962 pdihs(int             nbonds,
1963       const t_iatom   forceatoms[],
1964       const t_iparams forceparams[],
1965       const rvec      x[],
1966       rvec4           f[],
1967       rvec            fshift[],
1968       const t_pbc*    pbc,
1969       real            lambda,
1970       real*           dvdlambda,
1971       const t_mdatoms gmx_unused* md,
1972       t_fcdata gmx_unused* fcd,
1973       t_disresdata gmx_unused* disresdata,
1974       t_oriresdata gmx_unused* oriresdata,
1975       int gmx_unused* global_atom_index)
1976 {
1977     int  t1, t2, t3;
1978     rvec r_ij, r_kj, r_kl, m, n;
1979
1980     real vtot = 0.0;
1981
1982     for (int i = 0; i < nbonds;)
1983     {
1984         const int ai = forceatoms[i + 1];
1985         const int aj = forceatoms[i + 2];
1986         const int ak = forceatoms[i + 3];
1987         const int al = forceatoms[i + 4];
1988
1989         const real phi =
1990                 dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84 */
1991
1992         /* Loop over dihedrals working on the same atoms,
1993          * so we avoid recalculating angles and distributing forces.
1994          */
1995         real ddphi_tot = 0;
1996         do
1997         {
1998             const int type = forceatoms[i];
1999             ddphi_tot += dopdihs<flavor>(forceparams[type].pdihs.cpA,
2000                                          forceparams[type].pdihs.cpB,
2001                                          forceparams[type].pdihs.phiA,
2002                                          forceparams[type].pdihs.phiB,
2003                                          forceparams[type].pdihs.mult,
2004                                          phi,
2005                                          lambda,
2006                                          &vtot,
2007                                          dvdlambda);
2008
2009             i += 5;
2010         } while (i < nbonds && forceatoms[i + 1] == ai && forceatoms[i + 2] == aj
2011                  && forceatoms[i + 3] == ak && forceatoms[i + 4] == al);
2012
2013         do_dih_fup<flavor>(
2014                 ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112               */
2015     } /* 223 TOTAL  */
2016
2017     return vtot;
2018 }
2019
2020 #if GMX_SIMD_HAVE_REAL
2021
2022 /* As pdihs above, but using SIMD to calculate multiple dihedrals at once */
2023 template<BondedKernelFlavor flavor>
2024 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
2025 pdihs(int             nbonds,
2026       const t_iatom   forceatoms[],
2027       const t_iparams forceparams[],
2028       const rvec      x[],
2029       rvec4           f[],
2030       rvec gmx_unused fshift[],
2031       const t_pbc*    pbc,
2032       real gmx_unused lambda,
2033       real gmx_unused* dvdlambda,
2034       const t_mdatoms gmx_unused* md,
2035       t_fcdata gmx_unused* fcd,
2036       t_disresdata gmx_unused* disresdata,
2037       t_oriresdata gmx_unused* oriresdata,
2038       int gmx_unused* global_atom_index)
2039 {
2040     const int                                nfa1 = 5;
2041     int                                      i, iu, s;
2042     int                                      type;
2043     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
2044     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
2045     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
2046     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
2047     alignas(GMX_SIMD_ALIGNMENT) real         buf[3 * GMX_SIMD_REAL_WIDTH];
2048     real *                                   cp, *phi0, *mult;
2049     SimdReal                                 deg2rad_S(gmx::c_deg2Rad);
2050     SimdReal                                 p_S, q_S;
2051     SimdReal                                 phi0_S, phi_S;
2052     SimdReal                                 mx_S, my_S, mz_S;
2053     SimdReal                                 nx_S, ny_S, nz_S;
2054     SimdReal                                 nrkj_m2_S, nrkj_n2_S;
2055     SimdReal                                 cp_S, mdphi_S, mult_S;
2056     SimdReal                                 sin_S, cos_S;
2057     SimdReal                                 mddphi_S;
2058     SimdReal                                 sf_i_S, msf_l_S;
2059     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2060
2061     /* Extract aligned pointer for parameters and variables */
2062     cp   = buf + 0 * GMX_SIMD_REAL_WIDTH;
2063     phi0 = buf + 1 * GMX_SIMD_REAL_WIDTH;
2064     mult = buf + 2 * GMX_SIMD_REAL_WIDTH;
2065
2066     set_pbc_simd(pbc, pbc_simd);
2067
2068     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2069     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2070     {
2071         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2072          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2073          */
2074         iu = i;
2075         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2076         {
2077             type  = forceatoms[iu];
2078             ai[s] = forceatoms[iu + 1];
2079             aj[s] = forceatoms[iu + 2];
2080             ak[s] = forceatoms[iu + 3];
2081             al[s] = forceatoms[iu + 4];
2082
2083             /* At the end fill the arrays with the last atoms and 0 params */
2084             if (i + s * nfa1 < nbonds)
2085             {
2086                 cp[s]   = forceparams[type].pdihs.cpA;
2087                 phi0[s] = forceparams[type].pdihs.phiA;
2088                 mult[s] = forceparams[type].pdihs.mult;
2089
2090                 if (iu + nfa1 < nbonds)
2091                 {
2092                     iu += nfa1;
2093                 }
2094             }
2095             else
2096             {
2097                 cp[s]   = 0;
2098                 phi0[s] = 0;
2099                 mult[s] = 0;
2100             }
2101         }
2102
2103         /* Calculate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2104         dih_angle_simd(
2105                 x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S, &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2106
2107         cp_S   = load<SimdReal>(cp);
2108         phi0_S = load<SimdReal>(phi0) * deg2rad_S;
2109         mult_S = load<SimdReal>(mult);
2110
2111         mdphi_S = fms(mult_S, phi_S, phi0_S);
2112
2113         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
2114         sincos(mdphi_S, &sin_S, &cos_S);
2115         mddphi_S = cp_S * mult_S * sin_S;
2116         sf_i_S   = mddphi_S * nrkj_m2_S;
2117         msf_l_S  = mddphi_S * nrkj_n2_S;
2118
2119         /* After this m?_S will contain f[i] */
2120         mx_S = sf_i_S * mx_S;
2121         my_S = sf_i_S * my_S;
2122         mz_S = sf_i_S * mz_S;
2123
2124         /* After this m?_S will contain -f[l] */
2125         nx_S = msf_l_S * nx_S;
2126         ny_S = msf_l_S * ny_S;
2127         nz_S = msf_l_S * nz_S;
2128
2129         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2130     }
2131
2132     return 0;
2133 }
2134
2135 /* This is mostly a copy of the SIMD flavor of pdihs above, but with using
2136  * the RB potential instead of a harmonic potential.
2137  * This function can replace rbdihs() when no energy and virial are needed.
2138  */
2139 template<BondedKernelFlavor flavor>
2140 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
2141 rbdihs(int             nbonds,
2142        const t_iatom   forceatoms[],
2143        const t_iparams forceparams[],
2144        const rvec      x[],
2145        rvec4           f[],
2146        rvec gmx_unused fshift[],
2147        const t_pbc*    pbc,
2148        real gmx_unused lambda,
2149        real gmx_unused* dvdlambda,
2150        const t_mdatoms gmx_unused* md,
2151        t_fcdata gmx_unused* fcd,
2152        t_disresdata gmx_unused* disresdata,
2153        t_oriresdata gmx_unused* oriresdata,
2154        int gmx_unused* global_atom_index)
2155 {
2156     const int                                nfa1 = 5;
2157     int                                      i, iu, s, j;
2158     int                                      type;
2159     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
2160     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
2161     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
2162     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
2163     alignas(GMX_SIMD_ALIGNMENT) real         parm[NR_RBDIHS * GMX_SIMD_REAL_WIDTH];
2164
2165     SimdReal                         p_S, q_S;
2166     SimdReal                         phi_S;
2167     SimdReal                         ddphi_S, cosfac_S;
2168     SimdReal                         mx_S, my_S, mz_S;
2169     SimdReal                         nx_S, ny_S, nz_S;
2170     SimdReal                         nrkj_m2_S, nrkj_n2_S;
2171     SimdReal                         parm_S, c_S;
2172     SimdReal                         sin_S, cos_S;
2173     SimdReal                         sf_i_S, msf_l_S;
2174     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2175
2176     SimdReal pi_S(M_PI);
2177     SimdReal one_S(1.0);
2178
2179     set_pbc_simd(pbc, pbc_simd);
2180
2181     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2182     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2183     {
2184         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2185          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2186          */
2187         iu = i;
2188         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2189         {
2190             type  = forceatoms[iu];
2191             ai[s] = forceatoms[iu + 1];
2192             aj[s] = forceatoms[iu + 2];
2193             ak[s] = forceatoms[iu + 3];
2194             al[s] = forceatoms[iu + 4];
2195
2196             /* At the end fill the arrays with the last atoms and 0 params */
2197             if (i + s * nfa1 < nbonds)
2198             {
2199                 /* We don't need the first parameter, since that's a constant
2200                  * which only affects the energies, not the forces.
2201                  */
2202                 for (j = 1; j < NR_RBDIHS; j++)
2203                 {
2204                     parm[j * GMX_SIMD_REAL_WIDTH + s] = forceparams[type].rbdihs.rbcA[j];
2205                 }
2206
2207                 if (iu + nfa1 < nbonds)
2208                 {
2209                     iu += nfa1;
2210                 }
2211             }
2212             else
2213             {
2214                 for (j = 1; j < NR_RBDIHS; j++)
2215                 {
2216                     parm[j * GMX_SIMD_REAL_WIDTH + s] = 0;
2217                 }
2218             }
2219         }
2220
2221         /* Calculate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2222         dih_angle_simd(
2223                 x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S, &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2224
2225         /* Change to polymer convention */
2226         phi_S = phi_S - pi_S;
2227
2228         sincos(phi_S, &sin_S, &cos_S);
2229
2230         ddphi_S  = setZero();
2231         c_S      = one_S;
2232         cosfac_S = one_S;
2233         for (j = 1; j < NR_RBDIHS; j++)
2234         {
2235             parm_S   = load<SimdReal>(parm + j * GMX_SIMD_REAL_WIDTH);
2236             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2237             cosfac_S = cosfac_S * cos_S;
2238             c_S      = c_S + one_S;
2239         }
2240
2241         /* Note that here we do not use the minus sign which is present
2242          * in the normal RB code. This is corrected for through (m)sf below.
2243          */
2244         ddphi_S = ddphi_S * sin_S;
2245
2246         sf_i_S  = ddphi_S * nrkj_m2_S;
2247         msf_l_S = ddphi_S * nrkj_n2_S;
2248
2249         /* After this m?_S will contain f[i] */
2250         mx_S = sf_i_S * mx_S;
2251         my_S = sf_i_S * my_S;
2252         mz_S = sf_i_S * mz_S;
2253
2254         /* After this m?_S will contain -f[l] */
2255         nx_S = msf_l_S * nx_S;
2256         ny_S = msf_l_S * ny_S;
2257         nz_S = msf_l_S * nz_S;
2258
2259         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2260     }
2261
2262     return 0;
2263 }
2264
2265 #endif // GMX_SIMD_HAVE_REAL
2266
2267
2268 template<BondedKernelFlavor flavor>
2269 real idihs(int             nbonds,
2270            const t_iatom   forceatoms[],
2271            const t_iparams forceparams[],
2272            const rvec      x[],
2273            rvec4           f[],
2274            rvec            fshift[],
2275            const t_pbc*    pbc,
2276            real            lambda,
2277            real*           dvdlambda,
2278            const t_mdatoms gmx_unused* md,
2279            t_fcdata gmx_unused* fcd,
2280            t_disresdata gmx_unused* disresdata,
2281            t_oriresdata gmx_unused* oriresdata,
2282            int gmx_unused* global_atom_index)
2283 {
2284     int  i, type, ai, aj, ak, al;
2285     int  t1, t2, t3;
2286     real phi, phi0, dphi0, ddphi, vtot;
2287     rvec r_ij, r_kj, r_kl, m, n;
2288     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2289
2290     L1        = 1.0 - lambda;
2291     dvdl_term = 0;
2292     vtot      = 0.0;
2293     for (i = 0; (i < nbonds);)
2294     {
2295         type = forceatoms[i++];
2296         ai   = forceatoms[i++];
2297         aj   = forceatoms[i++];
2298         ak   = forceatoms[i++];
2299         al   = forceatoms[i++];
2300
2301         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2302
2303         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2304          * force changes if we just apply a normal harmonic.
2305          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2306          * This means we will never have the periodicity problem, unless
2307          * the dihedral is Pi away from phiO, which is very unlikely due to
2308          * the potential.
2309          */
2310         kA = forceparams[type].harmonic.krA;
2311         kB = forceparams[type].harmonic.krB;
2312         pA = forceparams[type].harmonic.rA;
2313         pB = forceparams[type].harmonic.rB;
2314
2315         kk    = L1 * kA + lambda * kB;
2316         phi0  = (L1 * pA + lambda * pB) * gmx::c_deg2Rad;
2317         dphi0 = (pB - pA) * gmx::c_deg2Rad;
2318
2319         dp = phi - phi0;
2320
2321         make_dp_periodic(&dp);
2322
2323         dp2 = dp * dp;
2324
2325         vtot += 0.5 * kk * dp2;
2326         ddphi = -kk * dp;
2327
2328         dvdl_term += 0.5 * (kB - kA) * dp2 - kk * dphi0 * dp;
2329
2330         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112               */
2331         /* 218 TOTAL    */
2332     }
2333
2334     *dvdlambda += dvdl_term;
2335     return vtot;
2336 }
2337
2338 /*! \brief Computes angle restraints of two different types */
2339 template<BondedKernelFlavor flavor>
2340 real low_angres(int             nbonds,
2341                 const t_iatom   forceatoms[],
2342                 const t_iparams forceparams[],
2343                 const rvec      x[],
2344                 rvec4           f[],
2345                 rvec            fshift[],
2346                 const t_pbc*    pbc,
2347                 real            lambda,
2348                 real*           dvdlambda,
2349                 gmx_bool        bZAxis)
2350 {
2351     int  i, m, type, ai, aj, ak, al;
2352     int  t1, t2;
2353     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2354     rvec r_ij, r_kl, f_i, f_k = { 0, 0, 0 };
2355     real st, sth, nrij2, nrkl2, c, cij, ckl;
2356
2357     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2358
2359     vtot = 0.0;
2360     ak = al = 0; /* to avoid warnings */
2361     for (i = 0; i < nbonds;)
2362     {
2363         type = forceatoms[i++];
2364         ai   = forceatoms[i++];
2365         aj   = forceatoms[i++];
2366         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /*  3             */
2367         if (!bZAxis)
2368         {
2369             ak = forceatoms[i++];
2370             al = forceatoms[i++];
2371             t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /*  3           */
2372         }
2373         else
2374         {
2375             r_kl[XX] = 0;
2376             r_kl[YY] = 0;
2377             r_kl[ZZ] = 1;
2378         }
2379
2380         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2381         phi     = std::acos(cos_phi);    /* 10           */
2382
2383         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2384                                   forceparams[type].pdihs.cpB,
2385                                   forceparams[type].pdihs.phiA,
2386                                   forceparams[type].pdihs.phiB,
2387                                   forceparams[type].pdihs.mult,
2388                                   phi,
2389                                   lambda,
2390                                   &vid,
2391                                   &dVdphi); /*  40 */
2392
2393         vtot += vid;
2394
2395         cos_phi2 = gmx::square(cos_phi); /*   1         */
2396         if (cos_phi2 < 1)
2397         {
2398             st    = -dVdphi * gmx::invsqrt(1 - cos_phi2); /*  12                */
2399             sth   = st * cos_phi;                         /*   1                */
2400             nrij2 = iprod(r_ij, r_ij);                    /*   5                */
2401             nrkl2 = iprod(r_kl, r_kl);                    /*   5          */
2402
2403             c   = st * gmx::invsqrt(nrij2 * nrkl2); /*  11              */
2404             cij = sth / nrij2;                      /*  10              */
2405             ckl = sth / nrkl2;                      /*  10              */
2406
2407             for (m = 0; m < DIM; m++) /*  18+18       */
2408             {
2409                 f_i[m] = (c * r_kl[m] - cij * r_ij[m]);
2410                 f[ai][m] += f_i[m];
2411                 f[aj][m] -= f_i[m];
2412                 if (!bZAxis)
2413                 {
2414                     f_k[m] = (c * r_ij[m] - ckl * r_kl[m]);
2415                     f[ak][m] += f_k[m];
2416                     f[al][m] -= f_k[m];
2417                 }
2418             }
2419
2420             if (computeVirial(flavor))
2421             {
2422                 rvec_inc(fshift[t1], f_i);
2423                 rvec_dec(fshift[CENTRAL], f_i);
2424                 if (!bZAxis)
2425                 {
2426                     rvec_inc(fshift[t2], f_k);
2427                     rvec_dec(fshift[CENTRAL], f_k);
2428                 }
2429             }
2430         }
2431     }
2432
2433     return vtot; /*  184 / 157 (bZAxis)  total  */
2434 }
2435
2436 template<BondedKernelFlavor flavor>
2437 real angres(int             nbonds,
2438             const t_iatom   forceatoms[],
2439             const t_iparams forceparams[],
2440             const rvec      x[],
2441             rvec4           f[],
2442             rvec            fshift[],
2443             const t_pbc*    pbc,
2444             real            lambda,
2445             real*           dvdlambda,
2446             const t_mdatoms gmx_unused* md,
2447             t_fcdata gmx_unused* fcd,
2448             t_disresdata gmx_unused* disresdata,
2449             t_oriresdata gmx_unused* oriresdata,
2450             int gmx_unused* global_atom_index)
2451 {
2452     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, FALSE);
2453 }
2454
2455 template<BondedKernelFlavor flavor>
2456 real angresz(int             nbonds,
2457              const t_iatom   forceatoms[],
2458              const t_iparams forceparams[],
2459              const rvec      x[],
2460              rvec4           f[],
2461              rvec            fshift[],
2462              const t_pbc*    pbc,
2463              real            lambda,
2464              real*           dvdlambda,
2465              const t_mdatoms gmx_unused* md,
2466              t_fcdata gmx_unused* fcd,
2467              t_disresdata gmx_unused* disresdata,
2468              t_oriresdata gmx_unused* oriresdata,
2469              int gmx_unused* global_atom_index)
2470 {
2471     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, TRUE);
2472 }
2473
2474 template<BondedKernelFlavor flavor>
2475 real dihres(int             nbonds,
2476             const t_iatom   forceatoms[],
2477             const t_iparams forceparams[],
2478             const rvec      x[],
2479             rvec4           f[],
2480             rvec            fshift[],
2481             const t_pbc*    pbc,
2482             real            lambda,
2483             real*           dvdlambda,
2484             const t_mdatoms gmx_unused* md,
2485             t_fcdata gmx_unused* fcd,
2486             t_disresdata gmx_unused* disresdata,
2487             t_oriresdata gmx_unused* oriresdata,
2488             int gmx_unused* global_atom_index)
2489 {
2490     real vtot = 0;
2491     int  ai, aj, ak, al, i, type, t1, t2, t3;
2492     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2493     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2494     rvec r_ij, r_kj, r_kl, m, n;
2495
2496     L1 = 1.0 - lambda;
2497
2498     d2r = gmx::c_deg2Rad;
2499
2500     for (i = 0; (i < nbonds);)
2501     {
2502         type = forceatoms[i++];
2503         ai   = forceatoms[i++];
2504         aj   = forceatoms[i++];
2505         ak   = forceatoms[i++];
2506         al   = forceatoms[i++];
2507
2508         phi0A = forceparams[type].dihres.phiA * d2r;
2509         dphiA = forceparams[type].dihres.dphiA * d2r;
2510         kfacA = forceparams[type].dihres.kfacA;
2511
2512         phi0B = forceparams[type].dihres.phiB * d2r;
2513         dphiB = forceparams[type].dihres.dphiB * d2r;
2514         kfacB = forceparams[type].dihres.kfacB;
2515
2516         phi0 = L1 * phi0A + lambda * phi0B;
2517         dphi = L1 * dphiA + lambda * dphiB;
2518         kfac = L1 * kfacA + lambda * kfacB;
2519
2520         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3);
2521         /* 84 flops */
2522
2523         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2524          * force changes if we just apply a normal harmonic.
2525          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2526          * This means we will never have the periodicity problem, unless
2527          * the dihedral is Pi away from phiO, which is very unlikely due to
2528          * the potential.
2529          */
2530         dp = phi - phi0;
2531         make_dp_periodic(&dp);
2532
2533         if (dp > dphi)
2534         {
2535             ddp = dp - dphi;
2536         }
2537         else if (dp < -dphi)
2538         {
2539             ddp = dp + dphi;
2540         }
2541         else
2542         {
2543             ddp = 0;
2544         }
2545
2546         if (ddp != 0.0)
2547         {
2548             ddp2 = ddp * ddp;
2549             vtot += 0.5 * kfac * ddp2;
2550             ddphi = kfac * ddp;
2551
2552             *dvdlambda += 0.5 * (kfacB - kfacA) * ddp2;
2553             /* lambda dependence from changing restraint distances */
2554             if (ddp > 0)
2555             {
2556                 *dvdlambda -= kfac * ddp * ((dphiB - dphiA) + (phi0B - phi0A));
2557             }
2558             else if (ddp < 0)
2559             {
2560                 *dvdlambda += kfac * ddp * ((dphiB - dphiA) - (phi0B - phi0A));
2561             }
2562             do_dih_fup<flavor>(
2563                     ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112               */
2564         }
2565     }
2566     return vtot;
2567 }
2568
2569
2570 real unimplemented(int gmx_unused nbonds,
2571                    const t_iatom gmx_unused forceatoms[],
2572                    const t_iparams gmx_unused forceparams[],
2573                    const rvec gmx_unused x[],
2574                    rvec4 gmx_unused f[],
2575                    rvec gmx_unused fshift[],
2576                    const t_pbc gmx_unused* pbc,
2577                    real gmx_unused lambda,
2578                    real gmx_unused* dvdlambda,
2579                    const t_mdatoms gmx_unused* md,
2580                    t_fcdata gmx_unused* fcd,
2581                    t_disresdata gmx_unused* disresdata,
2582                    t_oriresdata gmx_unused* oriresdata,
2583                    int gmx_unused* global_atom_index)
2584 {
2585     gmx_impl("*** you are using a not implemented function");
2586 }
2587
2588 template<BondedKernelFlavor flavor>
2589 real restrangles(int             nbonds,
2590                  const t_iatom   forceatoms[],
2591                  const t_iparams forceparams[],
2592                  const rvec      x[],
2593                  rvec4           f[],
2594                  rvec            fshift[],
2595                  const t_pbc*    pbc,
2596                  real gmx_unused lambda,
2597                  real gmx_unused* dvdlambda,
2598                  const t_mdatoms gmx_unused* md,
2599                  t_fcdata gmx_unused* fcd,
2600                  t_disresdata gmx_unused* disresdata,
2601                  t_oriresdata gmx_unused* oriresdata,
2602                  int gmx_unused* global_atom_index)
2603 {
2604     int    i, d, ai, aj, ak, type, m;
2605     int    t1, t2;
2606     real   v, vtot;
2607     rvec   f_i, f_j, f_k;
2608     double prefactor, ratio_ante, ratio_post;
2609     rvec   delta_ante, delta_post, vec_temp;
2610
2611     vtot = 0.0;
2612     for (i = 0; (i < nbonds);)
2613     {
2614         type = forceatoms[i++];
2615         ai   = forceatoms[i++];
2616         aj   = forceatoms[i++];
2617         ak   = forceatoms[i++];
2618
2619         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2620         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2621         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2622
2623
2624         /* This function computes factors needed for restricted angle potential.
2625          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2626          * when three particles align and the dihedral angle and dihedral potential
2627          * cannot be calculated. This potential is calculated using the formula:
2628            real restrangles(int nbonds,
2629             const t_iatom forceatoms[],const t_iparams forceparams[],
2630             const rvec x[],rvec4 f[],rvec fshift[],
2631             const t_pbc *pbc,
2632             real gmx_unused lambda,real gmx_unused *dvdlambda,
2633             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2634             int gmx_unused *global_atom_index)
2635            {
2636            int  i, d, ai, aj, ak, type, m;
2637            int t1, t2;
2638            rvec r_ij,r_kj;
2639            real v, vtot;
2640            rvec f_i, f_j, f_k;
2641            real prefactor, ratio_ante, ratio_post;
2642            rvec delta_ante, delta_post, vec_temp;
2643
2644            vtot = 0.0;
2645            for(i=0; (i<nbonds); )
2646            {
2647            type = forceatoms[i++];
2648            ai   = forceatoms[i++];
2649            aj   = forceatoms[i++];
2650            ak   = forceatoms[i++];
2651
2652          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2653          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2654          * For more explanations see comments file "restcbt.h". */
2655
2656         compute_factors_restangles(
2657                 type, forceparams, delta_ante, delta_post, &prefactor, &ratio_ante, &ratio_post, &v);
2658
2659         /*   Forces are computed per component */
2660         for (d = 0; d < DIM; d++)
2661         {
2662             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2663             f_j[d] = prefactor
2664                      * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2665             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2666         }
2667
2668         /*   Computation of potential energy   */
2669
2670         vtot += v;
2671
2672         /*   Update forces */
2673
2674         for (m = 0; (m < DIM); m++)
2675         {
2676             f[ai][m] += f_i[m];
2677             f[aj][m] += f_j[m];
2678             f[ak][m] += f_k[m];
2679         }
2680
2681         if (computeVirial(flavor))
2682         {
2683             rvec_inc(fshift[t1], f_i);
2684             rvec_inc(fshift[CENTRAL], f_j);
2685             rvec_inc(fshift[t2], f_k);
2686         }
2687     }
2688     return vtot;
2689 }
2690
2691
2692 template<BondedKernelFlavor flavor>
2693 real restrdihs(int             nbonds,
2694                const t_iatom   forceatoms[],
2695                const t_iparams forceparams[],
2696                const rvec      x[],
2697                rvec4           f[],
2698                rvec            fshift[],
2699                const t_pbc*    pbc,
2700                real gmx_unused lambda,
2701                real gmx_unused* dvlambda,
2702                const t_mdatoms gmx_unused* md,
2703                t_fcdata gmx_unused* fcd,
2704                t_disresdata gmx_unused* disresdata,
2705                t_oriresdata gmx_unused* oriresdata,
2706                int gmx_unused* global_atom_index)
2707 {
2708     int  i, d, type, ai, aj, ak, al;
2709     rvec f_i, f_j, f_k, f_l;
2710     rvec dx_jl;
2711     int  t1, t2, t3;
2712     real v, vtot;
2713     rvec delta_ante, delta_crnt, delta_post, vec_temp;
2714     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2715     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2716     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2717     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2718     real prefactor_phi;
2719
2720
2721     vtot = 0.0;
2722     for (i = 0; (i < nbonds);)
2723     {
2724         type = forceatoms[i++];
2725         ai   = forceatoms[i++];
2726         aj   = forceatoms[i++];
2727         ak   = forceatoms[i++];
2728         al   = forceatoms[i++];
2729
2730         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2731         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2732         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2733         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2734         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2735
2736         /* This function computes factors needed for restricted angle potential.
2737          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2738          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2739          * This potential is calculated using the formula:
2740          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2741          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2742          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2743          * For more explanations see comments file "restcbt.h" */
2744
2745         compute_factors_restrdihs(type,
2746                                   forceparams,
2747                                   delta_ante,
2748                                   delta_crnt,
2749                                   delta_post,
2750                                   &factor_phi_ai_ante,
2751                                   &factor_phi_ai_crnt,
2752                                   &factor_phi_ai_post,
2753                                   &factor_phi_aj_ante,
2754                                   &factor_phi_aj_crnt,
2755                                   &factor_phi_aj_post,
2756                                   &factor_phi_ak_ante,
2757                                   &factor_phi_ak_crnt,
2758                                   &factor_phi_ak_post,
2759                                   &factor_phi_al_ante,
2760                                   &factor_phi_al_crnt,
2761                                   &factor_phi_al_post,
2762                                   &prefactor_phi,
2763                                   &v);
2764
2765
2766         /*      Computation of forces per component */
2767         for (d = 0; d < DIM; d++)
2768         {
2769             f_i[d] = prefactor_phi
2770                      * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d]
2771                         + factor_phi_ai_post * delta_post[d]);
2772             f_j[d] = prefactor_phi
2773                      * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d]
2774                         + factor_phi_aj_post * delta_post[d]);
2775             f_k[d] = prefactor_phi
2776                      * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d]
2777                         + factor_phi_ak_post * delta_post[d]);
2778             f_l[d] = prefactor_phi
2779                      * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d]
2780                         + factor_phi_al_post * delta_post[d]);
2781         }
2782         /*      Computation of the energy */
2783
2784         vtot += v;
2785
2786
2787         /*    Updating the forces */
2788
2789         rvec_inc(f[ai], f_i);
2790         rvec_inc(f[aj], f_j);
2791         rvec_inc(f[ak], f_k);
2792         rvec_inc(f[al], f_l);
2793
2794         if (computeVirial(flavor))
2795         {
2796             if (pbc)
2797             {
2798                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2799             }
2800             else
2801             {
2802                 t3 = CENTRAL;
2803             }
2804
2805             rvec_inc(fshift[t1], f_i);
2806             rvec_inc(fshift[CENTRAL], f_j);
2807             rvec_inc(fshift[t2], f_k);
2808             rvec_inc(fshift[t3], f_l);
2809         }
2810     }
2811
2812     return vtot;
2813 }
2814
2815
2816 template<BondedKernelFlavor flavor>
2817 real cbtdihs(int             nbonds,
2818              const t_iatom   forceatoms[],
2819              const t_iparams forceparams[],
2820              const rvec      x[],
2821              rvec4           f[],
2822              rvec            fshift[],
2823              const t_pbc*    pbc,
2824              real gmx_unused lambda,
2825              real gmx_unused* dvdlambda,
2826              const t_mdatoms gmx_unused* md,
2827              t_fcdata gmx_unused* fcd,
2828              t_disresdata gmx_unused* disresdata,
2829              t_oriresdata gmx_unused* oriresdata,
2830              int gmx_unused* global_atom_index)
2831 {
2832     int  type, ai, aj, ak, al, i, d;
2833     int  t1, t2, t3;
2834     real v, vtot;
2835     rvec vec_temp;
2836     rvec f_i, f_j, f_k, f_l;
2837     rvec dx_jl;
2838     rvec delta_ante, delta_crnt, delta_post;
2839     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2840     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2841     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2842
2843
2844     vtot = 0.0;
2845     for (i = 0; (i < nbonds);)
2846     {
2847         type = forceatoms[i++];
2848         ai   = forceatoms[i++];
2849         aj   = forceatoms[i++];
2850         ak   = forceatoms[i++];
2851         al   = forceatoms[i++];
2852
2853
2854         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2855         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2856         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2857         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2858         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2859         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2860
2861         /* \brief Compute factors for CBT potential
2862          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2863          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2864          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2865          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2866          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2867          * It contains in its expression not only the dihedral angle \f$\phi\f$
2868          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2869          * --- the adjacent bending angles.
2870          * For more explanations see comments file "restcbt.h". */
2871
2872         compute_factors_cbtdihs(type,
2873                                 forceparams,
2874                                 delta_ante,
2875                                 delta_crnt,
2876                                 delta_post,
2877                                 f_phi_ai,
2878                                 f_phi_aj,
2879                                 f_phi_ak,
2880                                 f_phi_al,
2881                                 f_theta_ante_ai,
2882                                 f_theta_ante_aj,
2883                                 f_theta_ante_ak,
2884                                 f_theta_post_aj,
2885                                 f_theta_post_ak,
2886                                 f_theta_post_al,
2887                                 &v);
2888
2889
2890         /*      Acumulate the resuts per beads */
2891         for (d = 0; d < DIM; d++)
2892         {
2893             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2894             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2895             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2896             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2897         }
2898
2899         /*      Compute the potential energy */
2900
2901         vtot += v;
2902
2903
2904         /*  Updating the forces */
2905         rvec_inc(f[ai], f_i);
2906         rvec_inc(f[aj], f_j);
2907         rvec_inc(f[ak], f_k);
2908         rvec_inc(f[al], f_l);
2909
2910
2911         if (computeVirial(flavor))
2912         {
2913             if (pbc)
2914             {
2915                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2916             }
2917             else
2918             {
2919                 t3 = CENTRAL;
2920             }
2921
2922             rvec_inc(fshift[t1], f_i);
2923             rvec_inc(fshift[CENTRAL], f_j);
2924             rvec_inc(fshift[t2], f_k);
2925             rvec_inc(fshift[t3], f_l);
2926         }
2927     }
2928
2929     return vtot;
2930 }
2931
2932 template<BondedKernelFlavor flavor>
2933 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2934 rbdihs(int             nbonds,
2935        const t_iatom   forceatoms[],
2936        const t_iparams forceparams[],
2937        const rvec      x[],
2938        rvec4           f[],
2939        rvec            fshift[],
2940        const t_pbc*    pbc,
2941        real            lambda,
2942        real*           dvdlambda,
2943        const t_mdatoms gmx_unused* md,
2944        t_fcdata gmx_unused* fcd,
2945        t_disresdata gmx_unused* disresdata,
2946        t_oriresdata gmx_unused* oriresdata,
2947        int gmx_unused* global_atom_index)
2948 {
2949     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2950     int        type, ai, aj, ak, al, i, j;
2951     int        t1, t2, t3;
2952     rvec       r_ij, r_kj, r_kl, m, n;
2953     real       parmA[NR_RBDIHS];
2954     real       parmB[NR_RBDIHS];
2955     real       parm[NR_RBDIHS];
2956     real       cos_phi, phi, rbp, rbpBA;
2957     real       v, ddphi, sin_phi;
2958     real       cosfac, vtot;
2959     real       L1        = 1.0 - lambda;
2960     real       dvdl_term = 0;
2961
2962     vtot = 0.0;
2963     for (i = 0; (i < nbonds);)
2964     {
2965         type = forceatoms[i++];
2966         ai   = forceatoms[i++];
2967         aj   = forceatoms[i++];
2968         ak   = forceatoms[i++];
2969         al   = forceatoms[i++];
2970
2971         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2972
2973         /* Change to polymer convention */
2974         if (phi < c0)
2975         {
2976             phi += M_PI;
2977         }
2978         else
2979         {
2980             phi -= M_PI; /*   1         */
2981         }
2982         cos_phi = std::cos(phi);
2983         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2984         sin_phi = std::sin(phi);
2985
2986         for (j = 0; (j < NR_RBDIHS); j++)
2987         {
2988             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2989             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2990             parm[j]  = L1 * parmA[j] + lambda * parmB[j];
2991         }
2992         /* Calculate cosine powers */
2993         /* Calculate the energy */
2994         /* Calculate the derivative */
2995
2996         v = parm[0];
2997         dvdl_term += (parmB[0] - parmA[0]);
2998         ddphi  = c0;
2999         cosfac = c1;
3000
3001         rbp   = parm[1];
3002         rbpBA = parmB[1] - parmA[1];
3003         ddphi += rbp * cosfac;
3004         cosfac *= cos_phi;
3005         v += cosfac * rbp;
3006         dvdl_term += cosfac * rbpBA;
3007         rbp   = parm[2];
3008         rbpBA = parmB[2] - parmA[2];
3009         ddphi += c2 * rbp * cosfac;
3010         cosfac *= cos_phi;
3011         v += cosfac * rbp;
3012         dvdl_term += cosfac * rbpBA;
3013         rbp   = parm[3];
3014         rbpBA = parmB[3] - parmA[3];
3015         ddphi += c3 * rbp * cosfac;
3016         cosfac *= cos_phi;
3017         v += cosfac * rbp;
3018         dvdl_term += cosfac * rbpBA;
3019         rbp   = parm[4];
3020         rbpBA = parmB[4] - parmA[4];
3021         ddphi += c4 * rbp * cosfac;
3022         cosfac *= cos_phi;
3023         v += cosfac * rbp;
3024         dvdl_term += cosfac * rbpBA;
3025         rbp   = parm[5];
3026         rbpBA = parmB[5] - parmA[5];
3027         ddphi += c5 * rbp * cosfac;
3028         cosfac *= cos_phi;
3029         v += cosfac * rbp;
3030         dvdl_term += cosfac * rbpBA;
3031
3032         ddphi = -ddphi * sin_phi; /*  11                */
3033
3034         do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112                */
3035         vtot += v;
3036     }
3037     *dvdlambda += dvdl_term;
3038
3039     return vtot;
3040 }
3041
3042 //! \endcond
3043
3044 /*! \brief Mysterious undocumented function */
3045 int cmap_setup_grid_index(int ip, int grid_spacing, int* ipm1, int* ipp1, int* ipp2)
3046 {
3047     int im1, ip1, ip2;
3048
3049     if (ip < 0)
3050     {
3051         ip = ip + grid_spacing - 1;
3052     }
3053     else if (ip > grid_spacing)
3054     {
3055         ip = ip - grid_spacing - 1;
3056     }
3057
3058     im1 = ip - 1;
3059     ip1 = ip + 1;
3060     ip2 = ip + 2;
3061
3062     if (ip == 0)
3063     {
3064         im1 = grid_spacing - 1;
3065     }
3066     else if (ip == grid_spacing - 2)
3067     {
3068         ip2 = 0;
3069     }
3070     else if (ip == grid_spacing - 1)
3071     {
3072         ip1 = 0;
3073         ip2 = 1;
3074     }
3075
3076     *ipm1 = im1;
3077     *ipp1 = ip1;
3078     *ipp2 = ip2;
3079
3080     return ip;
3081 }
3082
3083 } // namespace
3084
3085 real cmap_dihs(int                 nbonds,
3086                const t_iatom       forceatoms[],
3087                const t_iparams     forceparams[],
3088                const gmx_cmap_t*   cmap_grid,
3089                const rvec          x[],
3090                rvec4               f[],
3091                rvec                fshift[],
3092                const struct t_pbc* pbc,
3093                real gmx_unused lambda,
3094                real gmx_unused* dvdlambda,
3095                const t_mdatoms gmx_unused* md,
3096                t_fcdata gmx_unused* fcd,
3097                t_disresdata gmx_unused* disresdata,
3098                t_oriresdata gmx_unused* oriresdata,
3099                int gmx_unused* global_atom_index)
3100 {
3101     int i, n;
3102     int ai, aj, ak, al, am;
3103     int a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
3104     int type;
3105     int t11, t21, t31, t12, t22, t32;
3106     int iphi1, ip1m1, ip1p1, ip1p2;
3107     int iphi2, ip2m1, ip2p1, ip2p2;
3108     int l1, l2, l3;
3109     int pos1, pos2, pos3, pos4;
3110
3111     real ty[4], ty1[4], ty2[4], ty12[4], tx[16];
3112     real phi1, cos_phi1, sin_phi1, xphi1;
3113     real phi2, cos_phi2, sin_phi2, xphi2;
3114     real dx, tt, tu, e, df1, df2, vtot;
3115     real ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
3116     real ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
3117     real fg1, hg1, fga1, hgb1, gaa1, gbb1;
3118     real fg2, hg2, fga2, hgb2, gaa2, gbb2;
3119     real fac;
3120
3121     rvec r1_ij, r1_kj, r1_kl, m1, n1;
3122     rvec r2_ij, r2_kj, r2_kl, m2, n2;
3123     rvec f1_i, f1_j, f1_k, f1_l;
3124     rvec f2_i, f2_j, f2_k, f2_l;
3125     rvec a1, b1, a2, b2;
3126     rvec f1, g1, h1, f2, g2, h2;
3127     rvec dtf1, dtg1, dth1, dtf2, dtg2, dth2;
3128
3129     int loop_index[4][4] = { { 0, 4, 8, 12 }, { 1, 5, 9, 13 }, { 2, 6, 10, 14 }, { 3, 7, 11, 15 } };
3130
3131     /* Total CMAP energy */
3132     vtot = 0;
3133
3134     for (n = 0; n < nbonds;)
3135     {
3136         /* Five atoms are involved in the two torsions */
3137         type = forceatoms[n++];
3138         ai   = forceatoms[n++];
3139         aj   = forceatoms[n++];
3140         ak   = forceatoms[n++];
3141         al   = forceatoms[n++];
3142         am   = forceatoms[n++];
3143
3144         /* Which CMAP type is this */
3145         const int   cmapA = forceparams[type].cmap.cmapA;
3146         const real* cmapd = cmap_grid->cmapdata[cmapA].cmap.data();
3147
3148         /* First torsion */
3149         a1i = ai;
3150         a1j = aj;
3151         a1k = ak;
3152         a1l = al;
3153
3154         phi1 = dih_angle(
3155                 x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1, &t11, &t21, &t31); /* 84 */
3156
3157         cos_phi1 = std::cos(phi1);
3158
3159         a1[0] = r1_ij[1] * r1_kj[2] - r1_ij[2] * r1_kj[1];
3160         a1[1] = r1_ij[2] * r1_kj[0] - r1_ij[0] * r1_kj[2];
3161         a1[2] = r1_ij[0] * r1_kj[1] - r1_ij[1] * r1_kj[0]; /* 9 */
3162
3163         b1[0] = r1_kl[1] * r1_kj[2] - r1_kl[2] * r1_kj[1];
3164         b1[1] = r1_kl[2] * r1_kj[0] - r1_kl[0] * r1_kj[2];
3165         b1[2] = r1_kl[0] * r1_kj[1] - r1_kl[1] * r1_kj[0]; /* 9 */
3166
3167         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
3168
3169         ra21 = iprod(a1, a1);       /* 5 */
3170         rb21 = iprod(b1, b1);       /* 5 */
3171         rg21 = iprod(r1_kj, r1_kj); /* 5 */
3172         rg1  = sqrt(rg21);
3173
3174         rgr1  = 1.0 / rg1;
3175         ra2r1 = 1.0 / ra21;
3176         rb2r1 = 1.0 / rb21;
3177         rabr1 = sqrt(ra2r1 * rb2r1);
3178
3179         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
3180
3181         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
3182         {
3183             phi1 = std::asin(sin_phi1);
3184
3185             if (cos_phi1 < 0)
3186             {
3187                 if (phi1 > 0)
3188                 {
3189                     phi1 = M_PI - phi1;
3190                 }
3191                 else
3192                 {
3193                     phi1 = -M_PI - phi1;
3194                 }
3195             }
3196         }
3197         else
3198         {
3199             phi1 = std::acos(cos_phi1);
3200
3201             if (sin_phi1 < 0)
3202             {
3203                 phi1 = -phi1;
3204             }
3205         }
3206
3207         xphi1 = phi1 + M_PI; /* 1 */
3208
3209         /* Second torsion */
3210         a2i = aj;
3211         a2j = ak;
3212         a2k = al;
3213         a2l = am;
3214
3215         phi2 = dih_angle(
3216                 x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2, &t12, &t22, &t32); /* 84 */
3217
3218         cos_phi2 = std::cos(phi2);
3219
3220         a2[0] = r2_ij[1] * r2_kj[2] - r2_ij[2] * r2_kj[1];
3221         a2[1] = r2_ij[2] * r2_kj[0] - r2_ij[0] * r2_kj[2];
3222         a2[2] = r2_ij[0] * r2_kj[1] - r2_ij[1] * r2_kj[0]; /* 9 */
3223
3224         b2[0] = r2_kl[1] * r2_kj[2] - r2_kl[2] * r2_kj[1];
3225         b2[1] = r2_kl[2] * r2_kj[0] - r2_kl[0] * r2_kj[2];
3226         b2[2] = r2_kl[0] * r2_kj[1] - r2_kl[1] * r2_kj[0]; /* 9 */
3227
3228         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3229
3230         ra22 = iprod(a2, a2);       /* 5 */
3231         rb22 = iprod(b2, b2);       /* 5 */
3232         rg22 = iprod(r2_kj, r2_kj); /* 5 */
3233         rg2  = sqrt(rg22);
3234
3235         rgr2  = 1.0 / rg2;
3236         ra2r2 = 1.0 / ra22;
3237         rb2r2 = 1.0 / rb22;
3238         rabr2 = sqrt(ra2r2 * rb2r2);
3239
3240         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3241
3242         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3243         {
3244             phi2 = std::asin(sin_phi2);
3245
3246             if (cos_phi2 < 0)
3247             {
3248                 if (phi2 > 0)
3249                 {
3250                     phi2 = M_PI - phi2;
3251                 }
3252                 else
3253                 {
3254                     phi2 = -M_PI - phi2;
3255                 }
3256             }
3257         }
3258         else
3259         {
3260             phi2 = std::acos(cos_phi2);
3261
3262             if (sin_phi2 < 0)
3263             {
3264                 phi2 = -phi2;
3265             }
3266         }
3267
3268         xphi2 = phi2 + M_PI; /* 1 */
3269
3270         /* Range mangling */
3271         if (xphi1 < 0)
3272         {
3273             xphi1 = xphi1 + 2 * M_PI;
3274         }
3275         else if (xphi1 >= 2 * M_PI)
3276         {
3277             xphi1 = xphi1 - 2 * M_PI;
3278         }
3279
3280         if (xphi2 < 0)
3281         {
3282             xphi2 = xphi2 + 2 * M_PI;
3283         }
3284         else if (xphi2 >= 2 * M_PI)
3285         {
3286             xphi2 = xphi2 - 2 * M_PI;
3287         }
3288
3289         /* Number of grid points */
3290         dx = 2 * M_PI / cmap_grid->grid_spacing;
3291
3292         /* Where on the grid are we */
3293         iphi1 = static_cast<int>(xphi1 / dx);
3294         iphi2 = static_cast<int>(xphi2 / dx);
3295
3296         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3297         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3298
3299         pos1 = iphi1 * cmap_grid->grid_spacing + iphi2;
3300         pos2 = ip1p1 * cmap_grid->grid_spacing + iphi2;
3301         pos3 = ip1p1 * cmap_grid->grid_spacing + ip2p1;
3302         pos4 = iphi1 * cmap_grid->grid_spacing + ip2p1;
3303
3304         ty[0] = cmapd[pos1 * 4];
3305         ty[1] = cmapd[pos2 * 4];
3306         ty[2] = cmapd[pos3 * 4];
3307         ty[3] = cmapd[pos4 * 4];
3308
3309         ty1[0] = cmapd[pos1 * 4 + 1];
3310         ty1[1] = cmapd[pos2 * 4 + 1];
3311         ty1[2] = cmapd[pos3 * 4 + 1];
3312         ty1[3] = cmapd[pos4 * 4 + 1];
3313
3314         ty2[0] = cmapd[pos1 * 4 + 2];
3315         ty2[1] = cmapd[pos2 * 4 + 2];
3316         ty2[2] = cmapd[pos3 * 4 + 2];
3317         ty2[3] = cmapd[pos4 * 4 + 2];
3318
3319         ty12[0] = cmapd[pos1 * 4 + 3];
3320         ty12[1] = cmapd[pos2 * 4 + 3];
3321         ty12[2] = cmapd[pos3 * 4 + 3];
3322         ty12[3] = cmapd[pos4 * 4 + 3];
3323
3324         /* Switch to degrees */
3325         dx    = 360.0 / cmap_grid->grid_spacing;
3326         xphi1 = xphi1 * gmx::c_rad2Deg;
3327         xphi2 = xphi2 * gmx::c_rad2Deg;
3328
3329         for (i = 0; i < 4; i++) /* 16 */
3330         {
3331             tx[i]      = ty[i];
3332             tx[i + 4]  = ty1[i] * dx;
3333             tx[i + 8]  = ty2[i] * dx;
3334             tx[i + 12] = ty12[i] * dx * dx;
3335         }
3336
3337         real tc[16] = { 0 };
3338         for (int idx = 0; idx < 16; idx++) /* 1056 */
3339         {
3340             for (int k = 0; k < 16; k++)
3341             {
3342                 tc[idx] += cmap_coeff_matrix[k * 16 + idx] * tx[k];
3343             }
3344         }
3345
3346         tt = (xphi1 - iphi1 * dx) / dx;
3347         tu = (xphi2 - iphi2 * dx) / dx;
3348
3349         e   = 0;
3350         df1 = 0;
3351         df2 = 0;
3352
3353         for (i = 3; i >= 0; i--)
3354         {
3355             l1 = loop_index[i][3];
3356             l2 = loop_index[i][2];
3357             l3 = loop_index[i][1];
3358
3359             e = tt * e + ((tc[i * 4 + 3] * tu + tc[i * 4 + 2]) * tu + tc[i * 4 + 1]) * tu + tc[i * 4];
3360             df1 = tu * df1 + (3.0 * tc[l1] * tt + 2.0 * tc[l2]) * tt + tc[l3];
3361             df2 = tt * df2 + (3.0 * tc[i * 4 + 3] * tu + 2.0 * tc[i * 4 + 2]) * tu + tc[i * 4 + 1];
3362         }
3363
3364         fac = gmx::c_rad2Deg / dx;
3365         df1 = df1 * fac;
3366         df2 = df2 * fac;
3367
3368         /* CMAP energy */
3369         vtot += e;
3370
3371         /* Do forces - first torsion */
3372         fg1  = iprod(r1_ij, r1_kj);
3373         hg1  = iprod(r1_kl, r1_kj);
3374         fga1 = fg1 * ra2r1 * rgr1;
3375         hgb1 = hg1 * rb2r1 * rgr1;
3376         gaa1 = -ra2r1 * rg1;
3377         gbb1 = rb2r1 * rg1;
3378
3379         for (i = 0; i < DIM; i++)
3380         {
3381             dtf1[i] = gaa1 * a1[i];
3382             dtg1[i] = fga1 * a1[i] - hgb1 * b1[i];
3383             dth1[i] = gbb1 * b1[i];
3384
3385             f1[i] = df1 * dtf1[i];
3386             g1[i] = df1 * dtg1[i];
3387             h1[i] = df1 * dth1[i];
3388
3389             f1_i[i] = f1[i];
3390             f1_j[i] = -f1[i] - g1[i];
3391             f1_k[i] = h1[i] + g1[i];
3392             f1_l[i] = -h1[i];
3393
3394             f[a1i][i] = f[a1i][i] + f1_i[i];
3395             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3396             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3397             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3398         }
3399
3400         /* Do forces - second torsion */
3401         fg2  = iprod(r2_ij, r2_kj);
3402         hg2  = iprod(r2_kl, r2_kj);
3403         fga2 = fg2 * ra2r2 * rgr2;
3404         hgb2 = hg2 * rb2r2 * rgr2;
3405         gaa2 = -ra2r2 * rg2;
3406         gbb2 = rb2r2 * rg2;
3407
3408         for (i = 0; i < DIM; i++)
3409         {
3410             dtf2[i] = gaa2 * a2[i];
3411             dtg2[i] = fga2 * a2[i] - hgb2 * b2[i];
3412             dth2[i] = gbb2 * b2[i];
3413
3414             f2[i] = df2 * dtf2[i];
3415             g2[i] = df2 * dtg2[i];
3416             h2[i] = df2 * dth2[i];
3417
3418             f2_i[i] = f2[i];
3419             f2_j[i] = -f2[i] - g2[i];
3420             f2_k[i] = h2[i] + g2[i];
3421             f2_l[i] = -h2[i];
3422
3423             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3424             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3425             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3426             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3427         }
3428
3429         /* Shift forces */
3430         if (fshift != nullptr)
3431         {
3432             if (pbc)
3433             {
3434                 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3435                 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3436             }
3437             else
3438             {
3439                 t31 = CENTRAL;
3440                 t32 = CENTRAL;
3441             }
3442
3443             rvec_inc(fshift[t11], f1_i);
3444             rvec_inc(fshift[CENTRAL], f1_j);
3445             rvec_inc(fshift[t21], f1_k);
3446             rvec_inc(fshift[t31], f1_l);
3447
3448             rvec_inc(fshift[t12], f2_i);
3449             rvec_inc(fshift[CENTRAL], f2_j);
3450             rvec_inc(fshift[t22], f2_k);
3451             rvec_inc(fshift[t32], f2_l);
3452         }
3453     }
3454     return vtot;
3455 }
3456
3457 namespace
3458 {
3459
3460 //! \cond
3461 /***********************************************************
3462  *
3463  *   G R O M O S  9 6   F U N C T I O N S
3464  *
3465  ***********************************************************/
3466 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
3467 {
3468     const real half = 0.5;
3469     real       L1, kk, x0, dx, dx2;
3470     real       v, f, dvdlambda;
3471
3472     L1 = 1.0 - lambda;
3473     kk = L1 * kA + lambda * kB;
3474     x0 = L1 * xA + lambda * xB;
3475
3476     dx  = x - x0;
3477     dx2 = dx * dx;
3478
3479     f         = -kk * dx;
3480     v         = half * kk * dx2;
3481     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
3482
3483     *F = f;
3484     *V = v;
3485
3486     return dvdlambda;
3487
3488     /* That was 21 flops */
3489 }
3490
3491 template<BondedKernelFlavor flavor>
3492 real g96bonds(int             nbonds,
3493               const t_iatom   forceatoms[],
3494               const t_iparams forceparams[],
3495               const rvec      x[],
3496               rvec4           f[],
3497               rvec            fshift[],
3498               const t_pbc*    pbc,
3499               real            lambda,
3500               real*           dvdlambda,
3501               const t_mdatoms gmx_unused* md,
3502               t_fcdata gmx_unused* fcd,
3503               t_disresdata gmx_unused* disresdata,
3504               t_oriresdata gmx_unused* oriresdata,
3505               int gmx_unused* global_atom_index)
3506 {
3507     int  i, ki, ai, aj, type;
3508     real dr2, fbond, vbond, vtot;
3509     rvec dx;
3510
3511     vtot = 0.0;
3512     for (i = 0; (i < nbonds);)
3513     {
3514         type = forceatoms[i++];
3515         ai   = forceatoms[i++];
3516         aj   = forceatoms[i++];
3517
3518         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3519         dr2 = iprod(dx, dx);                       /*   5               */
3520
3521         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3522                                   forceparams[type].harmonic.krB,
3523                                   forceparams[type].harmonic.rA,
3524                                   forceparams[type].harmonic.rB,
3525                                   dr2,
3526                                   lambda,
3527                                   &vbond,
3528                                   &fbond);
3529
3530         vtot += 0.5 * vbond; /* 1*/
3531
3532         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3533     }                                                               /* 44 TOTAL */
3534     return vtot;
3535 }
3536
3537 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc* pbc, rvec r_ij, rvec r_kj, int* t1, int* t2)
3538 /* Return value is the angle between the bonds i-j and j-k */
3539 {
3540     real costh;
3541
3542     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3543     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3544
3545     costh = cos_angle(r_ij, r_kj); /* 25                */
3546     /* 41 TOTAL */
3547     return costh;
3548 }
3549
3550 template<BondedKernelFlavor flavor>
3551 real g96angles(int             nbonds,
3552                const t_iatom   forceatoms[],
3553                const t_iparams forceparams[],
3554                const rvec      x[],
3555                rvec4           f[],
3556                rvec            fshift[],
3557                const t_pbc*    pbc,
3558                real            lambda,
3559                real*           dvdlambda,
3560                const t_mdatoms gmx_unused* md,
3561                t_fcdata gmx_unused* fcd,
3562                t_disresdata gmx_unused* disresdata,
3563                t_oriresdata gmx_unused* oriresdata,
3564                int gmx_unused* global_atom_index)
3565 {
3566     int  i, ai, aj, ak, type, m, t1, t2;
3567     rvec r_ij, r_kj;
3568     real cos_theta, dVdt, va, vtot;
3569     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3570     rvec f_i, f_j, f_k;
3571
3572     vtot = 0.0;
3573     for (i = 0; (i < nbonds);)
3574     {
3575         type = forceatoms[i++];
3576         ai   = forceatoms[i++];
3577         aj   = forceatoms[i++];
3578         ak   = forceatoms[i++];
3579
3580         cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3581
3582         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3583                                   forceparams[type].harmonic.krB,
3584                                   forceparams[type].harmonic.rA,
3585                                   forceparams[type].harmonic.rB,
3586                                   cos_theta,
3587                                   lambda,
3588                                   &va,
3589                                   &dVdt);
3590         vtot += va;
3591
3592         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3593         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3594         rij_2    = rij_1 * rij_1;
3595         rkj_2    = rkj_1 * rkj_1;
3596         rijrkj_1 = rij_1 * rkj_1; /* 23 */
3597
3598         for (m = 0; (m < DIM); m++) /*  42      */
3599         {
3600             f_i[m] = dVdt * (r_kj[m] * rijrkj_1 - r_ij[m] * rij_2 * cos_theta);
3601             f_k[m] = dVdt * (r_ij[m] * rijrkj_1 - r_kj[m] * rkj_2 * cos_theta);
3602             f_j[m] = -f_i[m] - f_k[m];
3603             f[ai][m] += f_i[m];
3604             f[aj][m] += f_j[m];
3605             f[ak][m] += f_k[m];
3606         }
3607
3608         if (computeVirial(flavor))
3609         {
3610             rvec_inc(fshift[t1], f_i);
3611             rvec_inc(fshift[CENTRAL], f_j);
3612             rvec_inc(fshift[t2], f_k); /* 9 */
3613         }
3614         /* 163 TOTAL    */
3615     }
3616     return vtot;
3617 }
3618
3619 template<BondedKernelFlavor flavor>
3620 real cross_bond_bond(int             nbonds,
3621                      const t_iatom   forceatoms[],
3622                      const t_iparams forceparams[],
3623                      const rvec      x[],
3624                      rvec4           f[],
3625                      rvec            fshift[],
3626                      const t_pbc*    pbc,
3627                      real gmx_unused lambda,
3628                      real gmx_unused* dvdlambda,
3629                      const t_mdatoms gmx_unused* md,
3630                      t_fcdata gmx_unused* fcd,
3631                      t_disresdata gmx_unused* disresdata,
3632                      t_oriresdata gmx_unused* oriresdata,
3633                      int gmx_unused* global_atom_index)
3634 {
3635     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3636      * pp. 842-847
3637      */
3638     int  i, ai, aj, ak, type, m, t1, t2;
3639     rvec r_ij, r_kj;
3640     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3641     rvec f_i, f_j, f_k;
3642
3643     vtot = 0.0;
3644     for (i = 0; (i < nbonds);)
3645     {
3646         type = forceatoms[i++];
3647         ai   = forceatoms[i++];
3648         aj   = forceatoms[i++];
3649         ak   = forceatoms[i++];
3650         r1e  = forceparams[type].cross_bb.r1e;
3651         r2e  = forceparams[type].cross_bb.r2e;
3652         krr  = forceparams[type].cross_bb.krr;
3653
3654         /* Compute distance vectors ... */
3655         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3656         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3657
3658         /* ... and their lengths */
3659         r1 = norm(r_ij);
3660         r2 = norm(r_kj);
3661
3662         /* Deviations from ideality */
3663         s1 = r1 - r1e;
3664         s2 = r2 - r2e;
3665
3666         /* Energy (can be negative!) */
3667         vrr = krr * s1 * s2;
3668         vtot += vrr;
3669
3670         /* Forces */
3671         svmul(-krr * s2 / r1, r_ij, f_i);
3672         svmul(-krr * s1 / r2, r_kj, f_k);
3673
3674         for (m = 0; (m < DIM); m++) /*  12      */
3675         {
3676             f_j[m] = -f_i[m] - f_k[m];
3677             f[ai][m] += f_i[m];
3678             f[aj][m] += f_j[m];
3679             f[ak][m] += f_k[m];
3680         }
3681
3682         if (computeVirial(flavor))
3683         {
3684             rvec_inc(fshift[t1], f_i);
3685             rvec_inc(fshift[CENTRAL], f_j);
3686             rvec_inc(fshift[t2], f_k); /* 9 */
3687         }
3688         /* 163 TOTAL    */
3689     }
3690     return vtot;
3691 }
3692
3693 template<BondedKernelFlavor flavor>
3694 real cross_bond_angle(int             nbonds,
3695                       const t_iatom   forceatoms[],
3696                       const t_iparams forceparams[],
3697                       const rvec      x[],
3698                       rvec4           f[],
3699                       rvec            fshift[],
3700                       const t_pbc*    pbc,
3701                       real gmx_unused lambda,
3702                       real gmx_unused* dvdlambda,
3703                       const t_mdatoms gmx_unused* md,
3704                       t_fcdata gmx_unused* fcd,
3705                       t_disresdata gmx_unused* disresdata,
3706                       t_oriresdata gmx_unused* oriresdata,
3707                       int gmx_unused* global_atom_index)
3708 {
3709     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3710      * pp. 842-847
3711      */
3712     int  i, ai, aj, ak, type, m, t1, t2;
3713     rvec r_ij, r_kj, r_ik;
3714     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3715     rvec f_i, f_j, f_k;
3716
3717     vtot = 0.0;
3718     for (i = 0; (i < nbonds);)
3719     {
3720         type = forceatoms[i++];
3721         ai   = forceatoms[i++];
3722         aj   = forceatoms[i++];
3723         ak   = forceatoms[i++];
3724         r1e  = forceparams[type].cross_ba.r1e;
3725         r2e  = forceparams[type].cross_ba.r2e;
3726         r3e  = forceparams[type].cross_ba.r3e;
3727         krt  = forceparams[type].cross_ba.krt;
3728
3729         /* Compute distance vectors ... */
3730         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3731         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3732         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3733
3734         /* ... and their lengths */
3735         r1 = norm(r_ij);
3736         r2 = norm(r_kj);
3737         r3 = norm(r_ik);
3738
3739         /* Deviations from ideality */
3740         s1 = r1 - r1e;
3741         s2 = r2 - r2e;
3742         s3 = r3 - r3e;
3743
3744         /* Energy (can be negative!) */
3745         vrt = krt * s3 * (s1 + s2);
3746         vtot += vrt;
3747
3748         /* Forces */
3749         k1 = -krt * (s3 / r1);
3750         k2 = -krt * (s3 / r2);
3751         k3 = -krt * (s1 + s2) / r3;
3752         for (m = 0; (m < DIM); m++)
3753         {
3754             f_i[m] = k1 * r_ij[m] + k3 * r_ik[m];
3755             f_k[m] = k2 * r_kj[m] - k3 * r_ik[m];
3756             f_j[m] = -f_i[m] - f_k[m];
3757         }
3758
3759         for (m = 0; (m < DIM); m++) /*  12      */
3760         {
3761             f[ai][m] += f_i[m];
3762             f[aj][m] += f_j[m];
3763             f[ak][m] += f_k[m];
3764         }
3765
3766         if (computeVirial(flavor))
3767         {
3768             rvec_inc(fshift[t1], f_i);
3769             rvec_inc(fshift[CENTRAL], f_j);
3770             rvec_inc(fshift[t2], f_k); /* 9 */
3771         }
3772         /* 163 TOTAL    */
3773     }
3774     return vtot;
3775 }
3776
3777 /*! \brief Computes the potential and force for a tabulated potential */
3778 real bonded_tab(const char*          type,
3779                 int                  table_nr,
3780                 const bondedtable_t* table,
3781                 real                 kA,
3782                 real                 kB,
3783                 real                 r,
3784                 real                 lambda,
3785                 real*                V,
3786                 real*                F)
3787 {
3788     real k, tabscale, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3789     int  n0, nnn;
3790     real dvdlambda;
3791
3792     k = (1.0 - lambda) * kA + lambda * kB;
3793
3794     tabscale          = table->scale;
3795     const real* VFtab = table->data.data();
3796
3797     rt = r * tabscale;
3798     n0 = static_cast<int>(rt);
3799     if (n0 >= table->n)
3800     {
3801         gmx_fatal(FARGS,
3802                   "A tabulated %s interaction table number %d is out of the table range: r %f, "
3803                   "between table indices %d and %d, table length %d",
3804                   type,
3805                   table_nr,
3806                   r,
3807                   n0,
3808                   n0 + 1,
3809                   table->n);
3810     }
3811     eps   = rt - n0;
3812     eps2  = eps * eps;
3813     nnn   = 4 * n0;
3814     Yt    = VFtab[nnn];
3815     Ft    = VFtab[nnn + 1];
3816     Geps  = VFtab[nnn + 2] * eps;
3817     Heps2 = VFtab[nnn + 3] * eps2;
3818     Fp    = Ft + Geps + Heps2;
3819     VV    = Yt + Fp * eps;
3820     FF    = Fp + Geps + 2.0 * Heps2;
3821
3822     *F        = -k * FF * tabscale;
3823     *V        = k * VV;
3824     dvdlambda = (kB - kA) * VV;
3825
3826     return dvdlambda;
3827
3828     /* That was 22 flops */
3829 }
3830
3831 template<BondedKernelFlavor flavor>
3832 real tab_bonds(int             nbonds,
3833                const t_iatom   forceatoms[],
3834                const t_iparams forceparams[],
3835                const rvec      x[],
3836                rvec4           f[],
3837                rvec            fshift[],
3838                const t_pbc*    pbc,
3839                real            lambda,
3840                real*           dvdlambda,
3841                const t_mdatoms gmx_unused* md,
3842                t_fcdata*                   fcd,
3843                t_disresdata gmx_unused* disresdata,
3844                t_oriresdata gmx_unused* oriresdata,
3845                int gmx_unused* global_atom_index)
3846 {
3847     int  i, ki, ai, aj, type, table;
3848     real dr, dr2, fbond, vbond, vtot;
3849     rvec dx;
3850
3851     vtot = 0.0;
3852     for (i = 0; (i < nbonds);)
3853     {
3854         type = forceatoms[i++];
3855         ai   = forceatoms[i++];
3856         aj   = forceatoms[i++];
3857
3858         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3859         dr2 = iprod(dx, dx);                       /*   5               */
3860         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
3861
3862         table = forceparams[type].tab.table;
3863
3864         *dvdlambda += bonded_tab("bond",
3865                                  table,
3866                                  &fcd->bondtab[table],
3867                                  forceparams[type].tab.kA,
3868                                  forceparams[type].tab.kB,
3869                                  dr,
3870                                  lambda,
3871                                  &vbond,
3872                                  &fbond); /*  22 */
3873
3874         if (dr2 == 0.0)
3875         {
3876             continue;
3877         }
3878
3879
3880         vtot += vbond;              /* 1*/
3881         fbond *= gmx::invsqrt(dr2); /*   6              */
3882
3883         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3884     }                                                               /* 62 TOTAL */
3885     return vtot;
3886 }
3887
3888 template<BondedKernelFlavor flavor>
3889 real tab_angles(int             nbonds,
3890                 const t_iatom   forceatoms[],
3891                 const t_iparams forceparams[],
3892                 const rvec      x[],
3893                 rvec4           f[],
3894                 rvec            fshift[],
3895                 const t_pbc*    pbc,
3896                 real            lambda,
3897                 real*           dvdlambda,
3898                 const t_mdatoms gmx_unused* md,
3899                 t_fcdata*                   fcd,
3900                 t_disresdata gmx_unused* disresdata,
3901                 t_oriresdata gmx_unused* oriresdata,
3902                 int gmx_unused* global_atom_index)
3903 {
3904     int  i, ai, aj, ak, t1, t2, type, table;
3905     rvec r_ij, r_kj;
3906     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3907
3908     vtot = 0.0;
3909     for (i = 0; (i < nbonds);)
3910     {
3911         type = forceatoms[i++];
3912         ai   = forceatoms[i++];
3913         aj   = forceatoms[i++];
3914         ak   = forceatoms[i++];
3915
3916         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
3917
3918         table = forceparams[type].tab.table;
3919
3920         *dvdlambda += bonded_tab("angle",
3921                                  table,
3922                                  &fcd->angletab[table],
3923                                  forceparams[type].tab.kA,
3924                                  forceparams[type].tab.kB,
3925                                  theta,
3926                                  lambda,
3927                                  &va,
3928                                  &dVdt); /*  22  */
3929         vtot += va;
3930
3931         cos_theta2 = gmx::square(cos_theta); /*   1             */
3932         if (cos_theta2 < 1)
3933         {
3934             int  m;
3935             real st, sth;
3936             real cik, cii, ckk;
3937             real nrkj2, nrij2;
3938             rvec f_i, f_j, f_k;
3939
3940             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
3941             sth   = st * cos_theta;                      /*   1         */
3942             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
3943             nrij2 = iprod(r_ij, r_ij);
3944
3945             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
3946             cii = sth / nrij2;                      /*  10              */
3947             ckk = sth / nrkj2;                      /*  10              */
3948
3949             for (m = 0; (m < DIM); m++) /*  39          */
3950             {
3951                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
3952                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
3953                 f_j[m] = -f_i[m] - f_k[m];
3954                 f[ai][m] += f_i[m];
3955                 f[aj][m] += f_j[m];
3956                 f[ak][m] += f_k[m];
3957             }
3958
3959             if (computeVirial(flavor))
3960             {
3961                 rvec_inc(fshift[t1], f_i);
3962                 rvec_inc(fshift[CENTRAL], f_j);
3963                 rvec_inc(fshift[t2], f_k);
3964             }
3965         } /* 169 TOTAL  */
3966     }
3967     return vtot;
3968 }
3969
3970 template<BondedKernelFlavor flavor>
3971 real tab_dihs(int             nbonds,
3972               const t_iatom   forceatoms[],
3973               const t_iparams forceparams[],
3974               const rvec      x[],
3975               rvec4           f[],
3976               rvec            fshift[],
3977               const t_pbc*    pbc,
3978               real            lambda,
3979               real*           dvdlambda,
3980               const t_mdatoms gmx_unused* md,
3981               t_fcdata*                   fcd,
3982               t_disresdata gmx_unused* disresdata,
3983               t_oriresdata gmx_unused* oriresdata,
3984               int gmx_unused* global_atom_index)
3985 {
3986     int  i, type, ai, aj, ak, al, table;
3987     int  t1, t2, t3;
3988     rvec r_ij, r_kj, r_kl, m, n;
3989     real phi, ddphi, vpd, vtot;
3990
3991     vtot = 0.0;
3992     for (i = 0; (i < nbonds);)
3993     {
3994         type = forceatoms[i++];
3995         ai   = forceatoms[i++];
3996         aj   = forceatoms[i++];
3997         ak   = forceatoms[i++];
3998         al   = forceatoms[i++];
3999
4000         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84  */
4001
4002         table = forceparams[type].tab.table;
4003
4004         /* Hopefully phi+M_PI never results in values < 0 */
4005         *dvdlambda += bonded_tab("dihedral",
4006                                  table,
4007                                  &fcd->dihtab[table],
4008                                  forceparams[type].tab.kA,
4009                                  forceparams[type].tab.kB,
4010                                  phi + M_PI,
4011                                  lambda,
4012                                  &vpd,
4013                                  &ddphi);
4014
4015         vtot += vpd;
4016         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112       */
4017
4018     } /* 227 TOTAL  */
4019
4020     return vtot;
4021 }
4022
4023 struct BondedInteractions
4024 {
4025     BondedFunction function;
4026     int            nrnbIndex;
4027 };
4028
4029 // Bug in old clang versions prevents constexpr. constexpr is needed for MSVC.
4030 #if defined(__clang__) && __clang_major__ < 6
4031 #    define CONSTEXPR_EXCL_OLD_CLANG const
4032 #else
4033 #    define CONSTEXPR_EXCL_OLD_CLANG constexpr
4034 #endif
4035
4036 /*! \brief Lookup table of bonded interaction functions
4037  *
4038  * This must have as many entries as interaction_function in ifunc.cpp */
4039 template<BondedKernelFlavor flavor>
4040 CONSTEXPR_EXCL_OLD_CLANG std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
4041     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_BONDS
4042     BondedInteractions{ g96bonds<flavor>, eNR_BONDS },                    // F_G96BONDS
4043     BondedInteractions{ morse_bonds<flavor>, eNR_MORSE },                 // F_MORSE
4044     BondedInteractions{ cubic_bonds<flavor>, eNR_CUBICBONDS },            // F_CUBICBONDS
4045     BondedInteractions{ unimplemented, -1 },                              // F_CONNBONDS
4046     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_HARMONIC
4047     BondedInteractions{ FENE_bonds<flavor>, eNR_FENEBONDS },              // F_FENEBONDS
4048     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDS
4049     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDSNC
4050     BondedInteractions{ restraint_bonds<flavor>, eNR_RESTRBONDS },        // F_RESTRBONDS
4051     BondedInteractions{ angles<flavor>, eNR_ANGLES },                     // F_ANGLES
4052     BondedInteractions{ g96angles<flavor>, eNR_ANGLES },                  // F_G96ANGLES
4053     BondedInteractions{ restrangles<flavor>, eNR_ANGLES },                // F_RESTRANGLES
4054     BondedInteractions{ linear_angles<flavor>, eNR_ANGLES },              // F_LINEAR_ANGLES
4055     BondedInteractions{ cross_bond_bond<flavor>, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
4056     BondedInteractions{ cross_bond_angle<flavor>, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
4057     BondedInteractions{ urey_bradley<flavor>, eNR_UREY_BRADLEY },         // F_UREY_BRADLEY
4058     BondedInteractions{ quartic_angles<flavor>, eNR_QANGLES },            // F_QUARTIC_ANGLES
4059     BondedInteractions{ tab_angles<flavor>, eNR_TABANGLES },              // F_TABANGLES
4060     BondedInteractions{ pdihs<flavor>, eNR_PROPER },                      // F_PDIHS
4061     BondedInteractions{ rbdihs<flavor>, eNR_RB },                         // F_RBDIHS
4062     BondedInteractions{ restrdihs<flavor>, eNR_PROPER },                  // F_RESTRDIHS
4063     BondedInteractions{ cbtdihs<flavor>, eNR_RB },                        // F_CBTDIHS
4064     BondedInteractions{ rbdihs<flavor>, eNR_FOURDIH },                    // F_FOURDIHS
4065     BondedInteractions{ idihs<flavor>, eNR_IMPROPER },                    // F_IDIHS
4066     BondedInteractions{ pdihs<flavor>, eNR_IMPROPER },                    // F_PIDIHS
4067     BondedInteractions{ tab_dihs<flavor>, eNR_TABDIHS },                  // F_TABDIHS
4068     BondedInteractions{ unimplemented, eNR_CMAP },                        // F_CMAP
4069     BondedInteractions{ unimplemented, -1 },                              // F_GB12_NOLONGERUSED
4070     BondedInteractions{ unimplemented, -1 },                              // F_GB13_NOLONGERUSED
4071     BondedInteractions{ unimplemented, -1 },                              // F_GB14_NOLONGERUSED
4072     BondedInteractions{ unimplemented, -1 },                              // F_GBPOL_NOLONGERUSED
4073     BondedInteractions{ unimplemented, -1 },                       // F_NPSOLVATION_NOLONGERUSED
4074     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJ14
4075     BondedInteractions{ unimplemented, -1 },                       // F_COUL14
4076     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC14_Q
4077     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC_PAIRS_NB
4078     BondedInteractions{ unimplemented, -1 },                       // F_LJ
4079     BondedInteractions{ unimplemented, -1 },                       // F_BHAM
4080     BondedInteractions{ unimplemented, -1 },                       // F_LJ_LR_NOLONGERUSED
4081     BondedInteractions{ unimplemented, -1 },                       // F_BHAM_LR_NOLONGERUSED
4082     BondedInteractions{ unimplemented, -1 },                       // F_DISPCORR
4083     BondedInteractions{ unimplemented, -1 },                       // F_COUL_SR
4084     BondedInteractions{ unimplemented, -1 },                       // F_COUL_LR_NOLONGERUSED
4085     BondedInteractions{ unimplemented, -1 },                       // F_RF_EXCL
4086     BondedInteractions{ unimplemented, -1 },                       // F_COUL_RECIP
4087     BondedInteractions{ unimplemented, -1 },                       // F_LJ_RECIP
4088     BondedInteractions{ unimplemented, -1 },                       // F_DPD
4089     BondedInteractions{ polarize<flavor>, eNR_POLARIZE },          // F_POLARIZATION
4090     BondedInteractions{ water_pol<flavor>, eNR_WPOL },             // F_WATER_POL
4091     BondedInteractions{ thole_pol<flavor>, eNR_THOLE },            // F_THOLE_POL
4092     BondedInteractions{ anharm_polarize<flavor>, eNR_ANHARM_POL }, // F_ANHARM_POL
4093     BondedInteractions{ unimplemented, -1 },                       // F_POSRES
4094     BondedInteractions{ unimplemented, -1 },                       // F_FBPOSRES
4095     BondedInteractions{ ta_disres, eNR_DISRES },                   // F_DISRES
4096     BondedInteractions{ unimplemented, -1 },                       // F_DISRESVIOL
4097     BondedInteractions{ orires, eNR_ORIRES },                      // F_ORIRES
4098     BondedInteractions{ unimplemented, -1 },                       // F_ORIRESDEV
4099     BondedInteractions{ angres<flavor>, eNR_ANGRES },              // F_ANGRES
4100     BondedInteractions{ angresz<flavor>, eNR_ANGRESZ },            // F_ANGRESZ
4101     BondedInteractions{ dihres<flavor>, eNR_DIHRES },              // F_DIHRES
4102     BondedInteractions{ unimplemented, -1 },                       // F_DIHRESVIOL
4103     BondedInteractions{ unimplemented, -1 },                       // F_CONSTR
4104     BondedInteractions{ unimplemented, -1 },                       // F_CONSTRNC
4105     BondedInteractions{ unimplemented, -1 },                       // F_SETTLE
4106     BondedInteractions{ unimplemented, -1 },                       // F_VSITE2
4107     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3
4108     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FD
4109     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FAD
4110     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3OUT
4111     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FD
4112     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FDN
4113     BondedInteractions{ unimplemented, -1 },                       // F_VSITEN
4114     BondedInteractions{ unimplemented, -1 },                       // F_COM_PULL
4115     BondedInteractions{ unimplemented, -1 },                       // F_DENSITYFITTING
4116     BondedInteractions{ unimplemented, -1 },                       // F_EQM
4117     BondedInteractions{ unimplemented, -1 },                       // F_EPOT
4118     BondedInteractions{ unimplemented, -1 },                       // F_EKIN
4119     BondedInteractions{ unimplemented, -1 },                       // F_ETOT
4120     BondedInteractions{ unimplemented, -1 },                       // F_ECONSERVED
4121     BondedInteractions{ unimplemented, -1 },                       // F_TEMP
4122     BondedInteractions{ unimplemented, -1 },                       // F_VTEMP_NOLONGERUSED
4123     BondedInteractions{ unimplemented, -1 },                       // F_PDISPCORR
4124     BondedInteractions{ unimplemented, -1 },                       // F_PRES
4125     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_CONSTR
4126     BondedInteractions{ unimplemented, -1 },                       // F_DVDL
4127     BondedInteractions{ unimplemented, -1 },                       // F_DKDL
4128     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_COUL
4129     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_VDW
4130     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_BONDED
4131     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_RESTRAINT
4132     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_TEMPERATURE
4133 };
4134
4135 /*! \brief List of instantiated BondedInteractions list */
4136 CONSTEXPR_EXCL_OLD_CLANG
4137 gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
4138     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
4139     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
4140     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
4141     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndEnergy>
4142 };
4143
4144 //! \endcond
4145
4146 } // namespace
4147
4148 real calculateSimpleBond(const int           ftype,
4149                          const int           numForceatoms,
4150                          const t_iatom       forceatoms[],
4151                          const t_iparams     forceparams[],
4152                          const rvec          x[],
4153                          rvec4               f[],
4154                          rvec                fshift[],
4155                          const struct t_pbc* pbc,
4156                          const real          lambda,
4157                          real*               dvdlambda,
4158                          const t_mdatoms*    md,
4159                          t_fcdata*           fcd,
4160                          t_disresdata*       disresdata,
4161                          t_oriresdata*       oriresdata,
4162                          int gmx_unused*          global_atom_index,
4163                          const BondedKernelFlavor bondedKernelFlavor)
4164 {
4165     const BondedInteractions& bonded = c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
4166
4167     real v = bonded.function(
4168             numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, md, fcd, disresdata, oriresdata, global_atom_index);
4169
4170     return v;
4171 }
4172
4173 int nrnbIndex(int ftype)
4174 {
4175     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
4176 }