src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 /*! \internal \file
  39  *
  40  * \brief This file defines low-level functions necessary for
  41  * computing energies and forces for listed interactions.
  42  *
  43  * \author Mark Abraham <mark.j.abraham@gmail.com>
  44  *
  45  * \ingroup module_listed_forces
  46  */
  47 #include "gmxpre.h"
  48
  49 #include "bonded.h"
  50
  51 #include "config.h"
  52
  53 #include <cassert>
  54 #include <cmath>
  55
  56 #include <algorithm>
  57
  58 #include "gromacs/gmxlib/nrnb.h"
  59 #include "gromacs/listed_forces/disre.h"
  60 #include "gromacs/listed_forces/orires.h"
  61 #include "gromacs/math/functions.h"
  62 #include "gromacs/math/units.h"
  63 #include "gromacs/math/utilities.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/mdtypes/fcdata.h"
  66 #include "gromacs/mdtypes/mdatom.h"
  67 #include "gromacs/pbcutil/ishift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/pbcutil/pbc_simd.h"
  70 #include "gromacs/simd/simd.h"
  71 #include "gromacs/simd/simd_math.h"
  72 #include "gromacs/simd/vector_operations.h"
  73 #include "gromacs/utility/arrayref.h"
  74 #include "gromacs/utility/basedefinitions.h"
  75 #include "gromacs/utility/enumerationhelpers.h"
  76 #include "gromacs/utility/fatalerror.h"
  77 #include "gromacs/utility/real.h"
  78 #include "gromacs/utility/smalloc.h"
  79
  80 #include "listed_internal.h"
  81 #include "restcbt.h"
  82
  83 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  84
  85 const EnumerationArray<BondedKernelFlavor, std::string> c_bondedKernelFlavorStrings = {
  86     "forces, using SIMD when available",
  87     "forces, not using SIMD",
  88     "forces, virial, and energy (ie. not using SIMD)",
  89     "forces and energy (ie. not using SIMD)"
  90 };
  91 namespace
  92 {
  93
  94 //! Type of CPU function to compute a bonded interaction.
  95 using BondedFunction = real (*)(int                       nbonds,
  96                                 const t_iatom             iatoms[],
  97                                 const t_iparams           iparams[],
  98                                 const rvec                x[],
  99                                 rvec4                     f[],
 100                                 rvec                      fshift[],
 101                                 const t_pbc*              pbc,
 102                                 real                      lambda,
 103                                 real*                     dvdlambda,
 104                                 gmx::ArrayRef<const real> charge,
 105                                 t_fcdata*                 fcd,
 106                                 t_disresdata*             disresdata,
 107                                 t_oriresdata*             oriresdata,
 108                                 int*                      ddgatindex);
 109
 110 /*! \brief Mysterious CMAP coefficient matrix */
 111 const int cmap_coeff_matrix[] = {
 112     1,  0,  -3, 2,  0,  0, 0,  0,  -3, 0,  9,  -6, 2, 0,  -6, 4,  0,  0,  0, 0,  0, 0, 0,  0,
 113     3,  0,  -9, 6,  -2, 0, 6,  -4, 0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  9, -6, 0, 0, -6, 4,
 114     0,  0,  3,  -2, 0,  0, 0,  0,  0,  0,  -9, 6,  0, 0,  6,  -4, 0,  0,  0, 0,  1, 0, -3, 2,
 115     -2, 0,  6,  -4, 1,  0, -3, 2,  0,  0,  0,  0,  0, 0,  0,  0,  -1, 0,  3, -2, 1, 0, -3, 2,
 116     0,  0,  0,  0,  0,  0, 0,  0,  0,  0,  -3, 2,  0, 0,  3,  -2, 0,  0,  0, 0,  0, 0, 3,  -2,
 117     0,  0,  -6, 4,  0,  0, 3,  -2, 0,  1,  -2, 1,  0, 0,  0,  0,  0,  -3, 6, -3, 0, 2, -4, 2,
 118     0,  0,  0,  0,  0,  0, 0,  0,  0,  3,  -6, 3,  0, -2, 4,  -2, 0,  0,  0, 0,  0, 0, 0,  0,
 119     0,  0,  -3, 3,  0,  0, 2,  -2, 0,  0,  -1, 1,  0, 0,  0,  0,  0,  0,  3, -3, 0, 0, -2, 2,
 120     0,  0,  0,  0,  0,  1, -2, 1,  0,  -2, 4,  -2, 0, 1,  -2, 1,  0,  0,  0, 0,  0, 0, 0,  0,
 121     0,  -1, 2,  -1, 0,  1, -2, 1,  0,  0,  0,  0,  0, 0,  0,  0,  0,  0,  1, -1, 0, 0, -1, 1,
 122     0,  0,  0,  0,  0,  0, -1, 1,  0,  0,  2,  -2, 0, 0,  -1, 1
 123 };
 124
 125
 126 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 127  *
 128  * \todo This kind of code appears in many places. Consolidate it */
 129 int pbc_rvec_sub(const t_pbc* pbc, const rvec xi, const rvec xj, rvec dx)
 130 {
 131     if (pbc)
 132     {
 133         return pbc_dx_aiuc(pbc, xi, xj, dx);
 134     }
 135     else
 136     {
 137         rvec_sub(xi, xj, dx);
 138         return c_centralShiftIndex;
 139     }
 140 }
 141
 142 } // namespace
 143
 144 //! \cond
 145 real bond_angle(const rvec   xi,
 146                 const rvec   xj,
 147                 const rvec   xk,
 148                 const t_pbc* pbc,
 149                 rvec         r_ij,
 150                 rvec         r_kj,
 151                 real*        costh,
 152                 int*         t1,
 153                 int*         t2)
 154 /* Return value is the angle between the bonds i-j and j-k */
 155 {
 156     /* 41 FLOPS */
 157     real th;
 158
 159     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 160     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 161
 162     *costh = cos_angle(r_ij, r_kj); /* 25               */
 163     th     = std::acos(*costh);     /* 10               */
 164     /* 41 TOTAL */
 165     return th;
 166 }
 167
 168 real dih_angle(const rvec   xi,
 169                const rvec   xj,
 170                const rvec   xk,
 171                const rvec   xl,
 172                const t_pbc* pbc,
 173                rvec         r_ij,
 174                rvec         r_kj,
 175                rvec         r_kl,
 176                rvec         m,
 177                rvec         n,
 178                int*         t1,
 179                int*         t2,
 180                int*         t3)
 181 {
 182     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 183     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 184     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 185
 186     cprod(r_ij, r_kj, m);        /*  9        */
 187     cprod(r_kj, r_kl, n);        /*  9          */
 188     real phi  = gmx_angle(m, n); /* 49 (assuming 25 for atan2) */
 189     real ipr  = iprod(r_ij, n);  /*  5        */
 190     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 191     phi       = sign * phi; /*  1               */
 192     /* 82 TOTAL */
 193     return phi;
 194 }
 195 //! \endcond
 196
 197 void make_dp_periodic(real* dp) /* 1 flop? */
 198 {
 199     /* dp cannot be outside (-pi,pi) */
 200     if (*dp >= M_PI)
 201     {
 202         *dp -= 2 * M_PI;
 203     }
 204     else if (*dp < -M_PI)
 205     {
 206         *dp += 2 * M_PI;
 207     }
 208 }
 209
 210 namespace
 211 {
 212
 213 /*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
 214  *
 215  * \p shiftIndex is used as the periodic shift.
 216  */
 217 template<BondedKernelFlavor flavor>
 218 inline void spreadBondForces(const real bondForce,
 219                              const rvec dx,
 220                              const int  ai,
 221                              const int  aj,
 222                              rvec4*     f,
 223                              int        shiftIndex,
 224                              rvec*      fshift)
 225 {
 226     for (int m = 0; m < DIM; m++) /*  15          */
 227     {
 228         const real fij = bondForce * dx[m];
 229         f[ai][m] += fij;
 230         f[aj][m] -= fij;
 231         if (computeVirial(flavor))
 232         {
 233             fshift[shiftIndex][m] += fij;
 234             fshift[c_centralShiftIndex][m] -= fij;
 235         }
 236     }
 237 }
 238
 239 /*! \brief Morse potential bond
 240  *
 241  * By Frank Everdij. Three parameters needed:
 242  *
 243  * b0 = equilibrium distance in nm
 244  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 245  * cb = well depth in kJ/mol
 246  *
 247  * Note: the potential is referenced to be +cb at infinite separation
 248  *       and zero at the equilibrium distance!
 249  */
 250 template<BondedKernelFlavor flavor>
 251 real morse_bonds(int             nbonds,
 252                  const t_iatom   forceatoms[],
 253                  const t_iparams forceparams[],
 254                  const rvec      x[],
 255                  rvec4           f[],
 256                  rvec            fshift[],
 257                  const t_pbc*    pbc,
 258                  real            lambda,
 259                  real*           dvdlambda,
 260                  gmx::ArrayRef<const real> /*charge*/,
 261                  t_fcdata gmx_unused* fcd,
 262                  t_disresdata gmx_unused* disresdata,
 263                  t_oriresdata gmx_unused* oriresdata,
 264                  int gmx_unused* global_atom_index)
 265 {
 266     const real one = 1.0;
 267     const real two = 2.0;
 268     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, vtot;
 269     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 270     rvec       dx;
 271     int        i, ki, type, ai, aj;
 272
 273     vtot = 0.0;
 274     for (i = 0; (i < nbonds);)
 275     {
 276         type = forceatoms[i++];
 277         ai   = forceatoms[i++];
 278         aj   = forceatoms[i++];
 279
 280         b0A = forceparams[type].morse.b0A;
 281         beA = forceparams[type].morse.betaA;
 282         cbA = forceparams[type].morse.cbA;
 283
 284         b0B = forceparams[type].morse.b0B;
 285         beB = forceparams[type].morse.betaB;
 286         cbB = forceparams[type].morse.cbB;
 287
 288         L1 = one - lambda;            /* 1 */
 289         b0 = L1 * b0A + lambda * b0B; /* 3 */
 290         be = L1 * beA + lambda * beB; /* 3 */
 291         cb = L1 * cbA + lambda * cbB; /* 3 */
 292
 293         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 294         dr2  = iprod(dx, dx);                       /*   5          */
 295         dr   = dr2 * gmx::invsqrt(dr2);             /*  10          */
 296         temp = std::exp(-be * (dr - b0));           /*  12          */
 297
 298         if (temp == one)
 299         {
 300             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 301             *dvdlambda += cbB - cbA;
 302             continue;
 303         }
 304
 305         omtemp   = one - temp;                                      /*   1          */
 306         cbomtemp = cb * omtemp;                                     /*   1          */
 307         vbond    = cbomtemp * omtemp;                               /*   1          */
 308         fbond    = -two * be * temp * cbomtemp * gmx::invsqrt(dr2); /*   9          */
 309         vtot += vbond;                                              /*   1          */
 310
 311         *dvdlambda += (cbB - cbA) * omtemp * omtemp
 312                       - (2 - 2 * omtemp) * omtemp * cb * ((b0B - b0A) * be - (beB - beA) * (dr - b0)); /* 15 */
 313
 314         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 315     }                                                               /*  83 TOTAL    */
 316     return vtot;
 317 }
 318
 319 //! \cond
 320 template<BondedKernelFlavor flavor>
 321 real cubic_bonds(int             nbonds,
 322                  const t_iatom   forceatoms[],
 323                  const t_iparams forceparams[],
 324                  const rvec      x[],
 325                  rvec4           f[],
 326                  rvec            fshift[],
 327                  const t_pbc*    pbc,
 328                  real gmx_unused lambda,
 329                  real gmx_unused* dvdlambda,
 330                  gmx::ArrayRef<const real> /*charge*/,
 331                  t_fcdata gmx_unused* fcd,
 332                  t_disresdata gmx_unused* disresdata,
 333                  t_oriresdata gmx_unused* oriresdata,
 334                  int gmx_unused* global_atom_index)
 335 {
 336     const real three = 3.0;
 337     const real two   = 2.0;
 338     real       kb, b0, kcub;
 339     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, vtot;
 340     rvec       dx;
 341     int        i, ki, type, ai, aj;
 342
 343     vtot = 0.0;
 344     for (i = 0; (i < nbonds);)
 345     {
 346         type = forceatoms[i++];
 347         ai   = forceatoms[i++];
 348         aj   = forceatoms[i++];
 349
 350         b0   = forceparams[type].cubic.b0;
 351         kb   = forceparams[type].cubic.kb;
 352         kcub = forceparams[type].cubic.kcub;
 353
 354         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 355         dr2 = iprod(dx, dx);                       /*   5          */
 356
 357         if (dr2 == 0.0)
 358         {
 359             continue;
 360         }
 361
 362         dr     = dr2 * gmx::invsqrt(dr2); /*  10          */
 363         dist   = dr - b0;
 364         kdist  = kb * dist;
 365         kdist2 = kdist * dist;
 366
 367         vbond = kdist2 + kcub * kdist2 * dist;
 368         fbond = -(two * kdist + three * kdist2 * kcub) / dr;
 369
 370         vtot += vbond; /* 21 */
 371
 372         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 373     }                                                               /*  54 TOTAL    */
 374     return vtot;
 375 }
 376
 377 template<BondedKernelFlavor flavor>
 378 real FENE_bonds(int             nbonds,
 379                 const t_iatom   forceatoms[],
 380                 const t_iparams forceparams[],
 381                 const rvec      x[],
 382                 rvec4           f[],
 383                 rvec            fshift[],
 384                 const t_pbc*    pbc,
 385                 real gmx_unused lambda,
 386                 real gmx_unused* dvdlambda,
 387                 gmx::ArrayRef<const real> /*charge*/,
 388                 t_fcdata gmx_unused* fcd,
 389                 t_disresdata gmx_unused* disresdata,
 390                 t_oriresdata gmx_unused* oriresdata,
 391                 int*                     global_atom_index)
 392 {
 393     const real half = 0.5;
 394     const real one  = 1.0;
 395     real       bm, kb;
 396     real       dr2, bm2, omdr2obm2, fbond, vbond, vtot;
 397     rvec       dx;
 398     int        i, ki, type, ai, aj;
 399
 400     vtot = 0.0;
 401     for (i = 0; (i < nbonds);)
 402     {
 403         type = forceatoms[i++];
 404         ai   = forceatoms[i++];
 405         aj   = forceatoms[i++];
 406
 407         bm = forceparams[type].fene.bm;
 408         kb = forceparams[type].fene.kb;
 409
 410         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 411         dr2 = iprod(dx, dx);                       /*   5          */
 412
 413         if (dr2 == 0.0)
 414         {
 415             continue;
 416         }
 417
 418         bm2 = bm * bm;
 419
 420         if (dr2 >= bm2)
 421         {
 422             gmx_fatal(FARGS,
 423                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 424                       dr2,
 425                       bm2,
 426                       glatnr(global_atom_index, ai),
 427                       glatnr(global_atom_index, aj));
 428         }
 429
 430         omdr2obm2 = one - dr2 / bm2;
 431
 432         vbond = -half * kb * bm2 * std::log(omdr2obm2);
 433         fbond = -kb / omdr2obm2;
 434
 435         vtot += vbond; /* 35 */
 436
 437         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 438     }                                                               /*  58 TOTAL    */
 439     return vtot;
 440 }
 441
 442 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
 443 {
 444     const real half = 0.5;
 445     real       L1, kk, x0, dx, dx2;
 446     real       v, f, dvdlambda;
 447
 448     L1 = 1.0 - lambda;
 449     kk = L1 * kA + lambda * kB;
 450     x0 = L1 * xA + lambda * xB;
 451
 452     dx  = x - x0;
 453     dx2 = dx * dx;
 454
 455     f         = -kk * dx;
 456     v         = half * kk * dx2;
 457     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
 458
 459     *F = f;
 460     *V = v;
 461
 462     return dvdlambda;
 463
 464     /* That was 19 flops */
 465 }
 466
 467 template<BondedKernelFlavor flavor>
 468 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 469 bonds(int             nbonds,
 470       const t_iatom   forceatoms[],
 471       const t_iparams forceparams[],
 472       const rvec      x[],
 473       rvec4           f[],
 474       rvec            fshift[],
 475       const t_pbc*    pbc,
 476       real            lambda,
 477       real*           dvdlambda,
 478       gmx::ArrayRef<const real> /*charge*/,
 479       t_fcdata gmx_unused* fcd,
 480       t_disresdata gmx_unused* disresdata,
 481       t_oriresdata gmx_unused* oriresdata,
 482       int gmx_unused* global_atom_index)
 483 {
 484     int  i, ki, ai, aj, type;
 485     real dr, dr2, fbond, vbond, vtot;
 486     rvec dx;
 487
 488     vtot = 0.0;
 489     for (i = 0; (i < nbonds);)
 490     {
 491         type = forceatoms[i++];
 492         ai   = forceatoms[i++];
 493         aj   = forceatoms[i++];
 494
 495         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 496         dr2 = iprod(dx, dx);                       /*   5               */
 497         dr  = std::sqrt(dr2);                      /*  10               */
 498
 499         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 500                                forceparams[type].harmonic.krB,
 501                                forceparams[type].harmonic.rA,
 502                                forceparams[type].harmonic.rB,
 503                                dr,
 504                                lambda,
 505                                &vbond,
 506                                &fbond); /*  19  */
 507
 508         if (dr2 == 0.0)
 509         {
 510             continue;
 511         }
 512
 513
 514         vtot += vbond;              /* 1*/
 515         fbond *= gmx::invsqrt(dr2); /*   6              */
 516
 517         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 518     }                                                               /* 59 TOTAL */
 519     return vtot;
 520 }
 521
 522 #if GMX_SIMD_HAVE_REAL
 523
 524 /*! \brief Computes forces (only) for harmonic bonds using SIMD intrinsics
 525  *
 526  * As plain-C bonds(), but using SIMD to calculate many bonds at once.
 527  * This routines does not calculate energies and shift forces.
 528  */
 529 template<BondedKernelFlavor flavor>
 530 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
 531 bonds(int             nbonds,
 532       const t_iatom   forceatoms[],
 533       const t_iparams forceparams[],
 534       const rvec      x[],
 535       rvec4           f[],
 536       rvec gmx_unused fshift[],
 537       const t_pbc*    pbc,
 538       real gmx_unused lambda,
 539       real gmx_unused* dvdlambda,
 540       gmx::ArrayRef<const real> /*charge*/,
 541       t_fcdata gmx_unused* fcd,
 542       t_disresdata gmx_unused* disresdata,
 543       t_oriresdata gmx_unused* oriresdata,
 544       int gmx_unused* global_atom_index)
 545 {
 546     constexpr int                            nfa1 = 3;
 547     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
 548     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
 549     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
 550
 551     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
 552
 553     set_pbc_simd(pbc, pbc_simd);
 554
 555     const SimdReal real_eps = SimdReal(GMX_REAL_EPS);
 556
 557     /* nbonds is the number of bonds times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
 558     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
 559     {
 560         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
 561          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
 562          */
 563         int iu = i;
 564         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
 565         {
 566             const int type = forceatoms[iu];
 567             ai[s]          = forceatoms[iu + 1];
 568             aj[s]          = forceatoms[iu + 2];
 569
 570             /* At the end fill the arrays with the last atoms and 0 params */
 571             if (i + s * nfa1 < nbonds)
 572             {
 573                 coeff[s]                       = forceparams[type].harmonic.krA;
 574                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
 575
 576                 if (iu + nfa1 < nbonds)
 577                 {
 578                     iu += nfa1;
 579                 }
 580             }
 581             else
 582             {
 583                 coeff[s]                       = 0;
 584                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
 585             }
 586         }
 587
 588         /* Store the non PBC corrected distances packed and aligned */
 589         SimdReal xi, yi, zi;
 590         SimdReal xj, yj, zj;
 591         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi, &yi, &zi);
 592         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj, &yj, &zj);
 593         SimdReal rij_x = xi - xj;
 594         SimdReal rij_y = yi - yj;
 595         SimdReal rij_z = zi - zj;
 596
 597         pbc_correct_dx_simd(&rij_x, &rij_y, &rij_z, pbc_simd);
 598
 599         const SimdReal dist2 = rij_x * rij_x + rij_y * rij_y + rij_z * rij_z;
 600         // Here we avoid sqrt(0), the force will be zero because rij=0
 601         const SimdReal invDist = invsqrt(max(dist2, real_eps));
 602
 603         const SimdReal k  = load<SimdReal>(coeff);
 604         const SimdReal r0 = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH);
 605
 606         // Compute the force divided by the distance
 607         const SimdReal forceOverR = -k * (dist2 * invDist - r0) * invDist;
 608
 609         const SimdReal f_x = forceOverR * rij_x;
 610         const SimdReal f_y = forceOverR * rij_y;
 611         const SimdReal f_z = forceOverR * rij_z;
 612
 613         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_x, f_y, f_z);
 614         transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_x, f_y, f_z);
 615     }
 616
 617     return 0;
 618 }
 619
 620 #endif // GMX_SIMD_HAVE_REAL
 621
 622 template<BondedKernelFlavor flavor>
 623 real restraint_bonds(int             nbonds,
 624                      const t_iatom   forceatoms[],
 625                      const t_iparams forceparams[],
 626                      const rvec      x[],
 627                      rvec4           f[],
 628                      rvec            fshift[],
 629                      const t_pbc*    pbc,
 630                      real            lambda,
 631                      real*           dvdlambda,
 632                      gmx::ArrayRef<const real> /*charge*/,
 633                      t_fcdata gmx_unused* fcd,
 634                      t_disresdata gmx_unused* disresdata,
 635                      t_oriresdata gmx_unused* oriresdata,
 636                      int gmx_unused* global_atom_index)
 637 {
 638     int  i, ki, ai, aj, type;
 639     real dr, dr2, fbond, vbond, vtot;
 640     real L1;
 641     real low, dlow, up1, dup1, up2, dup2, k, dk;
 642     real drh, drh2;
 643     rvec dx;
 644
 645     L1 = 1.0 - lambda;
 646
 647     vtot = 0.0;
 648     for (i = 0; (i < nbonds);)
 649     {
 650         type = forceatoms[i++];
 651         ai   = forceatoms[i++];
 652         aj   = forceatoms[i++];
 653
 654         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 655         dr2 = iprod(dx, dx);                       /*   5               */
 656         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 657
 658         low  = L1 * forceparams[type].restraint.lowA + lambda * forceparams[type].restraint.lowB;
 659         dlow = -forceparams[type].restraint.lowA + forceparams[type].restraint.lowB;
 660         up1  = L1 * forceparams[type].restraint.up1A + lambda * forceparams[type].restraint.up1B;
 661         dup1 = -forceparams[type].restraint.up1A + forceparams[type].restraint.up1B;
 662         up2  = L1 * forceparams[type].restraint.up2A + lambda * forceparams[type].restraint.up2B;
 663         dup2 = -forceparams[type].restraint.up2A + forceparams[type].restraint.up2B;
 664         k    = L1 * forceparams[type].restraint.kA + lambda * forceparams[type].restraint.kB;
 665         dk   = -forceparams[type].restraint.kA + forceparams[type].restraint.kB;
 666         /* 24 */
 667
 668         if (dr < low)
 669         {
 670             drh   = dr - low;
 671             drh2  = drh * drh;
 672             vbond = 0.5 * k * drh2;
 673             fbond = -k * drh;
 674             *dvdlambda += 0.5 * dk * drh2 - k * dlow * drh;
 675         } /* 11 */
 676         else if (dr <= up1)
 677         {
 678             vbond = 0;
 679             fbond = 0;
 680         }
 681         else if (dr <= up2)
 682         {
 683             drh   = dr - up1;
 684             drh2  = drh * drh;
 685             vbond = 0.5 * k * drh2;
 686             fbond = -k * drh;
 687             *dvdlambda += 0.5 * dk * drh2 - k * dup1 * drh;
 688         } /* 11 */
 689         else
 690         {
 691             drh   = dr - up2;
 692             vbond = k * (up2 - up1) * (0.5 * (up2 - up1) + drh);
 693             fbond = -k * (up2 - up1);
 694             *dvdlambda += dk * (up2 - up1) * (0.5 * (up2 - up1) + drh)
 695                           + k * (dup2 - dup1) * (up2 - up1 + drh) - k * (up2 - up1) * dup2;
 696         }
 697
 698         if (dr2 == 0.0)
 699         {
 700             continue;
 701         }
 702
 703         vtot += vbond;              /* 1*/
 704         fbond *= gmx::invsqrt(dr2); /*   6              */
 705
 706         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 707     }                                                               /* 59 TOTAL */
 708
 709     return vtot;
 710 }
 711
 712 template<BondedKernelFlavor flavor>
 713 real polarize(int                       nbonds,
 714               const t_iatom             forceatoms[],
 715               const t_iparams           forceparams[],
 716               const rvec                x[],
 717               rvec4                     f[],
 718               rvec                      fshift[],
 719               const t_pbc*              pbc,
 720               real                      lambda,
 721               real*                     dvdlambda,
 722               gmx::ArrayRef<const real> charge,
 723               t_fcdata gmx_unused* fcd,
 724               t_disresdata gmx_unused* disresdata,
 725               t_oriresdata gmx_unused* oriresdata,
 726               int gmx_unused* global_atom_index)
 727 {
 728     int  i, ki, ai, aj, type;
 729     real dr, dr2, fbond, vbond, vtot, ksh;
 730     rvec dx;
 731
 732     vtot = 0.0;
 733     for (i = 0; (i < nbonds);)
 734     {
 735         type = forceatoms[i++];
 736         ai   = forceatoms[i++];
 737         aj   = forceatoms[i++];
 738         ksh  = gmx::square(charge[aj]) * gmx::c_one4PiEps0 / forceparams[type].polarize.alpha;
 739
 740         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 741         dr2 = iprod(dx, dx);                       /*   5               */
 742         dr  = std::sqrt(dr2);                      /*  10               */
 743
 744         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 745
 746         if (dr2 == 0.0)
 747         {
 748             continue;
 749         }
 750
 751         vtot += vbond;              /* 1*/
 752         fbond *= gmx::invsqrt(dr2); /*   6              */
 753
 754         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 755     }                                                               /* 59 TOTAL */
 756     return vtot;
 757 }
 758
 759 template<BondedKernelFlavor flavor>
 760 real anharm_polarize(int                       nbonds,
 761                      const t_iatom             forceatoms[],
 762                      const t_iparams           forceparams[],
 763                      const rvec                x[],
 764                      rvec4                     f[],
 765                      rvec                      fshift[],
 766                      const t_pbc*              pbc,
 767                      real                      lambda,
 768                      real*                     dvdlambda,
 769                      gmx::ArrayRef<const real> charge,
 770                      t_fcdata gmx_unused* fcd,
 771                      t_disresdata gmx_unused* disresdata,
 772                      t_oriresdata gmx_unused* oriresdata,
 773                      int gmx_unused* global_atom_index)
 774 {
 775     int  i, ki, ai, aj, type;
 776     real dr, dr2, fbond, vbond, vtot, ksh, khyp, drcut, ddr, ddr3;
 777     rvec dx;
 778
 779     vtot = 0.0;
 780     for (i = 0; (i < nbonds);)
 781     {
 782         type = forceatoms[i++];
 783         ai   = forceatoms[i++];
 784         aj   = forceatoms[i++];
 785         ksh = gmx::square(charge[aj]) * gmx::c_one4PiEps0 / forceparams[type].anharm_polarize.alpha; /* 7*/
 786         khyp  = forceparams[type].anharm_polarize.khyp;
 787         drcut = forceparams[type].anharm_polarize.drcut;
 788
 789         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 790         dr2 = iprod(dx, dx);                       /*   5               */
 791         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
 792
 793         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 794
 795         if (dr2 == 0.0)
 796         {
 797             continue;
 798         }
 799
 800         if (dr > drcut)
 801         {
 802             ddr  = dr - drcut;
 803             ddr3 = ddr * ddr * ddr;
 804             vbond += khyp * ddr * ddr3;
 805             fbond -= 4 * khyp * ddr3;
 806         }
 807         fbond *= gmx::invsqrt(dr2); /*   6              */
 808         vtot += vbond;              /* 1*/
 809
 810         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
 811     }                                                               /* 72 TOTAL */
 812     return vtot;
 813 }
 814
 815 template<BondedKernelFlavor flavor>
 816 real water_pol(int             nbonds,
 817                const t_iatom   forceatoms[],
 818                const t_iparams forceparams[],
 819                const rvec      x[],
 820                rvec4           f[],
 821                rvec gmx_unused fshift[],
 822                const t_pbc gmx_unused* pbc,
 823                real gmx_unused         lambda,
 824                real gmx_unused*          dvdlambda,
 825                gmx::ArrayRef<const real> charge,
 826                t_fcdata gmx_unused* fcd,
 827                t_disresdata gmx_unused* disresdata,
 828                t_oriresdata gmx_unused* oriresdata,
 829                int gmx_unused* global_atom_index)
 830 {
 831     /* This routine implements anisotropic polarizibility for water, through
 832      * a shell connected to a dummy with spring constant that differ in the
 833      * three spatial dimensions in the molecular frame.
 834      */
 835     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 836     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 837     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 838
 839     vtot = 0.0;
 840     if (nbonds > 0)
 841     {
 842         type0  = forceatoms[0];
 843         aS     = forceatoms[5];
 844         qS     = charge[aS];
 845         kk[XX] = gmx::square(qS) * gmx::c_one4PiEps0 / forceparams[type0].wpol.al_x;
 846         kk[YY] = gmx::square(qS) * gmx::c_one4PiEps0 / forceparams[type0].wpol.al_y;
 847         kk[ZZ] = gmx::square(qS) * gmx::c_one4PiEps0 / forceparams[type0].wpol.al_z;
 848         r_HH   = 1.0 / forceparams[type0].wpol.rHH;
 849         for (i = 0; (i < nbonds); i += 6)
 850         {
 851             type = forceatoms[i];
 852             if (type != type0)
 853             {
 854                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d", type, type0, __FILE__, __LINE__);
 855             }
 856             aO  = forceatoms[i + 1];
 857             aH1 = forceatoms[i + 2];
 858             aH2 = forceatoms[i + 3];
 859             aD  = forceatoms[i + 4];
 860             aS  = forceatoms[i + 5];
 861
 862             /* Compute vectors describing the water frame */
 863             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 864             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 865             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 866             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 867             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 868             cprod(dOH1, dOH2, nW);
 869
 870             /* Compute inverse length of normal vector
 871              * (this one could be precomputed, but I'm too lazy now)
 872              */
 873             r_nW = gmx::invsqrt(iprod(nW, nW));
 874             /* This is for precision, but does not make a big difference,
 875              * it can go later.
 876              */
 877             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 878
 879             /* Normalize the vectors in the water frame */
 880             svmul(r_nW, nW, nW);
 881             svmul(r_HH, dHH, dHH);
 882             svmul(r_OD, dOD, dOD);
 883
 884             /* Compute displacement of shell along components of the vector */
 885             dx[ZZ] = iprod(dDS, dOD);
 886             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 887             for (m = 0; (m < DIM); m++)
 888             {
 889                 proj[m] = dDS[m] - dx[ZZ] * dOD[m];
 890             }
 891
 892             /*dx[XX] = iprod(dDS,nW);
 893                dx[YY] = iprod(dDS,dHH);*/
 894             dx[XX] = iprod(proj, nW);
 895             for (m = 0; (m < DIM); m++)
 896             {
 897                 proj[m] -= dx[XX] * nW[m];
 898             }
 899             dx[YY] = iprod(proj, dHH);
 900             /* Now compute the forces and energy */
 901             kdx[XX] = kk[XX] * dx[XX];
 902             kdx[YY] = kk[YY] * dx[YY];
 903             kdx[ZZ] = kk[ZZ] * dx[ZZ];
 904             vtot += iprod(dx, kdx);
 905
 906             for (m = 0; (m < DIM); m++)
 907             {
 908                 /* This is a tensor operation but written out for speed */
 909                 tx  = nW[m] * kdx[XX];
 910                 ty  = dHH[m] * kdx[YY];
 911                 tz  = dOD[m] * kdx[ZZ];
 912                 fij = -tx - ty - tz;
 913                 f[aS][m] += fij;
 914                 f[aD][m] -= fij;
 915                 if (computeVirial(flavor))
 916                 {
 917                     fshift[ki][m] += fij;
 918                     fshift[c_centralShiftIndex][m] -= fij;
 919                 }
 920             }
 921         }
 922     }
 923     return 0.5 * vtot;
 924 }
 925
 926 template<BondedKernelFlavor flavor>
 927 static real
 928 do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj, const t_pbc* pbc, real qq, rvec fshift[], real afac)
 929 {
 930     rvec r12;
 931     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 932     int  m, t;
 933
 934     t = pbc_rvec_sub(pbc, xi, xj, r12); /*  3 */
 935
 936     r12sq  = iprod(r12, r12);                                                     /*  5 */
 937     r12_1  = gmx::invsqrt(r12sq);                                                 /*  5 */
 938     r12bar = afac / r12_1;                                                        /*  5 */
 939     v0     = qq * gmx::c_one4PiEps0 * r12_1;                                      /*  2 */
 940     ebar   = std::exp(-r12bar);                                                   /*  5 */
 941     v1     = (1 - (1 + 0.5 * r12bar) * ebar);                                     /*  4 */
 942     fscal  = ((v0 * r12_1) * v1 - v0 * 0.5 * afac * ebar * (r12bar + 1)) * r12_1; /* 9 */
 943
 944     for (m = 0; (m < DIM); m++)
 945     {
 946         fff = fscal * r12[m];
 947         fi[m] += fff;
 948         fj[m] -= fff;
 949         if (computeVirial(flavor))
 950         {
 951             fshift[t][m] += fff;
 952             fshift[c_centralShiftIndex][m] -= fff;
 953         }
 954     } /* 15 */
 955
 956     return v0 * v1; /* 1 */
 957     /* 54 */
 958 }
 959
 960 template<BondedKernelFlavor flavor>
 961 real thole_pol(int             nbonds,
 962                const t_iatom   forceatoms[],
 963                const t_iparams forceparams[],
 964                const rvec      x[],
 965                rvec4           f[],
 966                rvec            fshift[],
 967                const t_pbc*    pbc,
 968                real gmx_unused lambda,
 969                real gmx_unused*          dvdlambda,
 970                gmx::ArrayRef<const real> charge,
 971                t_fcdata gmx_unused* fcd,
 972                t_disresdata gmx_unused* disresdata,
 973                t_oriresdata gmx_unused* oriresdata,
 974                int gmx_unused* global_atom_index)
 975 {
 976     /* Interaction between two pairs of particles with opposite charge */
 977     int  i, type, a1, da1, a2, da2;
 978     real q1, q2, qq, a, al1, al2, afac;
 979     real V = 0;
 980
 981     for (i = 0; (i < nbonds);)
 982     {
 983         type = forceatoms[i++];
 984         a1   = forceatoms[i++];
 985         da1  = forceatoms[i++];
 986         a2   = forceatoms[i++];
 987         da2  = forceatoms[i++];
 988         q1   = charge[da1];
 989         q2   = charge[da2];
 990         a    = forceparams[type].thole.a;
 991         al1  = forceparams[type].thole.alpha1;
 992         al2  = forceparams[type].thole.alpha2;
 993         qq   = q1 * q2;
 994         afac = a * gmx::invsixthroot(al1 * al2);
 995         V += do_1_thole<flavor>(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 996         V += do_1_thole<flavor>(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 997         V += do_1_thole<flavor>(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 998         V += do_1_thole<flavor>(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 999     }
1000     /* 290 flops */
1001     return V;
1002 }
1003
1004 // Avoid gcc 386 -O3 code generation bug in this function (see Issue
1005 // #3205 for more information)
1006 #if defined(__GNUC__) && defined(__i386__) && defined(__OPTIMIZE__)
1007 #    pragma GCC push_options
1008 #    pragma GCC optimize("O1")
1009 #    define avoid_gcc_i386_o3_code_generation_bug
1010 #endif
1011
1012 template<BondedKernelFlavor flavor>
1013 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1014 angles(int             nbonds,
1015        const t_iatom   forceatoms[],
1016        const t_iparams forceparams[],
1017        const rvec      x[],
1018        rvec4           f[],
1019        rvec            fshift[],
1020        const t_pbc*    pbc,
1021        real            lambda,
1022        real*           dvdlambda,
1023        gmx::ArrayRef<const real> /*charge*/,
1024        t_fcdata gmx_unused* fcd,
1025        t_disresdata gmx_unused* disresdata,
1026        t_oriresdata gmx_unused* oriresdata,
1027        int gmx_unused* global_atom_index)
1028 {
1029     int  i, ai, aj, ak, t1, t2, type;
1030     rvec r_ij, r_kj;
1031     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
1032
1033     vtot = 0.0;
1034     for (i = 0; i < nbonds;)
1035     {
1036         type = forceatoms[i++];
1037         ai   = forceatoms[i++];
1038         aj   = forceatoms[i++];
1039         ak   = forceatoms[i++];
1040
1041         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1042
1043         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
1044                                forceparams[type].harmonic.krB,
1045                                forceparams[type].harmonic.rA * gmx::c_deg2Rad,
1046                                forceparams[type].harmonic.rB * gmx::c_deg2Rad,
1047                                theta,
1048                                lambda,
1049                                &va,
1050                                &dVdt); /*  21  */
1051         vtot += va;
1052
1053         cos_theta2 = gmx::square(cos_theta);
1054         if (cos_theta2 < 1)
1055         {
1056             int  m;
1057             real st, sth;
1058             real cik, cii, ckk;
1059             real nrkj2, nrij2;
1060             real nrkj_1, nrij_1;
1061             rvec f_i, f_j, f_k;
1062
1063             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1064             sth   = st * cos_theta;                      /*   1         */
1065             nrij2 = iprod(r_ij, r_ij);                   /*   5         */
1066             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1067
1068             nrij_1 = gmx::invsqrt(nrij2); /*  10                */
1069             nrkj_1 = gmx::invsqrt(nrkj2); /*  10                */
1070
1071             cik = st * nrij_1 * nrkj_1;  /*   2         */
1072             cii = sth * nrij_1 * nrij_1; /*   2         */
1073             ckk = sth * nrkj_1 * nrkj_1; /*   2         */
1074
1075             for (m = 0; m < DIM; m++)
1076             { /*  39            */
1077                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1078                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1079                 f_j[m] = -f_i[m] - f_k[m];
1080                 f[ai][m] += f_i[m];
1081                 f[aj][m] += f_j[m];
1082                 f[ak][m] += f_k[m];
1083             }
1084             if (computeVirial(flavor))
1085             {
1086                 rvec_inc(fshift[t1], f_i);
1087                 rvec_inc(fshift[c_centralShiftIndex], f_j);
1088                 rvec_inc(fshift[t2], f_k);
1089             }
1090         } /* 161 TOTAL  */
1091     }
1092
1093     return vtot;
1094 }
1095
1096 #ifdef avoid_gcc_i386_o3_code_generation_bug
1097 #    pragma GCC pop_options
1098 #    undef avoid_gcc_i386_o3_code_generation_bug
1099 #endif
1100
1101 #if GMX_SIMD_HAVE_REAL
1102
1103 /* As angles, but using SIMD to calculate many angles at once.
1104  * This routines does not calculate energies and shift forces.
1105  */
1106 template<BondedKernelFlavor flavor>
1107 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1108 angles(int             nbonds,
1109        const t_iatom   forceatoms[],
1110        const t_iparams forceparams[],
1111        const rvec      x[],
1112        rvec4           f[],
1113        rvec gmx_unused fshift[],
1114        const t_pbc*    pbc,
1115        real gmx_unused lambda,
1116        real gmx_unused* dvdlambda,
1117        gmx::ArrayRef<const real> /*charge*/,
1118        t_fcdata gmx_unused* fcd,
1119        t_disresdata gmx_unused* disresdata,
1120        t_oriresdata gmx_unused* oriresdata,
1121        int gmx_unused* global_atom_index)
1122 {
1123     const int                                nfa1 = 4;
1124     int                                      i, iu, s;
1125     int                                      type;
1126     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1127     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1128     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1129     alignas(GMX_SIMD_ALIGNMENT) real         coeff[2 * GMX_SIMD_REAL_WIDTH];
1130     SimdReal                                 deg2rad_S(gmx::c_deg2Rad);
1131     SimdReal                                 xi_S, yi_S, zi_S;
1132     SimdReal                                 xj_S, yj_S, zj_S;
1133     SimdReal                                 xk_S, yk_S, zk_S;
1134     SimdReal                                 k_S, theta0_S;
1135     SimdReal                                 rijx_S, rijy_S, rijz_S;
1136     SimdReal                                 rkjx_S, rkjy_S, rkjz_S;
1137     SimdReal                                 one_S(1.0);
1138     SimdReal one_min_eps_S(1.0_real - GMX_REAL_EPS); // Largest number < 1
1139
1140     SimdReal                         rij_rkj_S;
1141     SimdReal                         nrij2_S, nrij_1_S;
1142     SimdReal                         nrkj2_S, nrkj_1_S;
1143     SimdReal                         cos_S, invsin_S;
1144     SimdReal                         cos2_S;
1145     SimdReal                         theta_S;
1146     SimdReal                         st_S, sth_S;
1147     SimdReal                         cik_S, cii_S, ckk_S;
1148     SimdReal                         f_ix_S, f_iy_S, f_iz_S;
1149     SimdReal                         f_kx_S, f_ky_S, f_kz_S;
1150     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1151
1152     set_pbc_simd(pbc, pbc_simd);
1153
1154     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1155     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
1156     {
1157         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1158          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1159          */
1160         iu = i;
1161         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1162         {
1163             type  = forceatoms[iu];
1164             ai[s] = forceatoms[iu + 1];
1165             aj[s] = forceatoms[iu + 2];
1166             ak[s] = forceatoms[iu + 3];
1167
1168             /* At the end fill the arrays with the last atoms and 0 params */
1169             if (i + s * nfa1 < nbonds)
1170             {
1171                 coeff[s]                       = forceparams[type].harmonic.krA;
1172                 coeff[GMX_SIMD_REAL_WIDTH + s] = forceparams[type].harmonic.rA;
1173
1174                 if (iu + nfa1 < nbonds)
1175                 {
1176                     iu += nfa1;
1177                 }
1178             }
1179             else
1180             {
1181                 coeff[s]                       = 0;
1182                 coeff[GMX_SIMD_REAL_WIDTH + s] = 0;
1183             }
1184         }
1185
1186         /* Store the non PBC corrected distances packed and aligned */
1187         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1188         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1189         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1190         rijx_S = xi_S - xj_S;
1191         rijy_S = yi_S - yj_S;
1192         rijz_S = zi_S - zj_S;
1193         rkjx_S = xk_S - xj_S;
1194         rkjy_S = yk_S - yj_S;
1195         rkjz_S = zk_S - zj_S;
1196
1197         k_S      = load<SimdReal>(coeff);
1198         theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1199
1200         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1201         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1202
1203         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1204
1205         nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1206         nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1207
1208         nrij_1_S = invsqrt(nrij2_S);
1209         nrkj_1_S = invsqrt(nrkj2_S);
1210
1211         cos_S = rij_rkj_S * nrij_1_S * nrkj_1_S;
1212
1213         /* We compute cos^2 using a division instead of squaring cos_S,
1214          * as we would then get additional error contributions from
1215          * the two invsqrt operations, which get amplified by
1216          * the 1/sqrt(1-cos^2) for cos values close to 1.
1217          *
1218          * Note that the non-SIMD version of angles() avoids this issue
1219          * by computing the cosine using double precision.
1220          */
1221         cos2_S = rij_rkj_S * rij_rkj_S / (nrij2_S * nrkj2_S);
1222
1223         /* To allow for 0 and 180 degrees, we need to avoid issues when
1224          * the cosine is close to -1 and 1. For acos() the argument needs
1225          * to be in that range. For cos^2 we take the min of cos^2 and 1 - 1bit,
1226          * so we can safely compute 1/sin as 1/sqrt(1 - cos^2).
1227          */
1228         cos_S  = max(cos_S, -one_S);
1229         cos_S  = min(cos_S, one_S);
1230         cos2_S = min(cos2_S, one_min_eps_S);
1231
1232         theta_S = acos(cos_S);
1233
1234         invsin_S = invsqrt(one_S - cos2_S);
1235
1236         st_S  = k_S * (theta0_S - theta_S) * invsin_S;
1237         sth_S = st_S * cos_S;
1238
1239         cik_S = st_S * nrij_1_S * nrkj_1_S;
1240         cii_S = sth_S * nrij_1_S * nrij_1_S;
1241         ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1242
1243         f_ix_S = cii_S * rijx_S;
1244         f_ix_S = fnma(cik_S, rkjx_S, f_ix_S);
1245         f_iy_S = cii_S * rijy_S;
1246         f_iy_S = fnma(cik_S, rkjy_S, f_iy_S);
1247         f_iz_S = cii_S * rijz_S;
1248         f_iz_S = fnma(cik_S, rkjz_S, f_iz_S);
1249         f_kx_S = ckk_S * rkjx_S;
1250         f_kx_S = fnma(cik_S, rijx_S, f_kx_S);
1251         f_ky_S = ckk_S * rkjy_S;
1252         f_ky_S = fnma(cik_S, rijy_S, f_ky_S);
1253         f_kz_S = ckk_S * rkjz_S;
1254         f_kz_S = fnma(cik_S, rijz_S, f_kz_S);
1255
1256         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1257         transposeScatterDecrU<4>(
1258                 reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1259         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1260     }
1261
1262     return 0;
1263 }
1264
1265 #endif // GMX_SIMD_HAVE_REAL
1266
1267 template<BondedKernelFlavor flavor>
1268 real linear_angles(int             nbonds,
1269                    const t_iatom   forceatoms[],
1270                    const t_iparams forceparams[],
1271                    const rvec      x[],
1272                    rvec4           f[],
1273                    rvec            fshift[],
1274                    const t_pbc*    pbc,
1275                    real            lambda,
1276                    real*           dvdlambda,
1277                    gmx::ArrayRef<const real> /*charge*/,
1278                    t_fcdata gmx_unused* fcd,
1279                    t_disresdata gmx_unused* disresdata,
1280                    t_oriresdata gmx_unused* oriresdata,
1281                    int gmx_unused* global_atom_index)
1282 {
1283     int  i, m, ai, aj, ak, t1, t2, type;
1284     rvec f_i, f_j, f_k;
1285     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1286     rvec r_ij, r_kj, r_ik, dx;
1287
1288     L1   = 1 - lambda;
1289     vtot = 0.0;
1290     for (i = 0; (i < nbonds);)
1291     {
1292         type = forceatoms[i++];
1293         ai   = forceatoms[i++];
1294         aj   = forceatoms[i++];
1295         ak   = forceatoms[i++];
1296
1297         kA   = forceparams[type].linangle.klinA;
1298         kB   = forceparams[type].linangle.klinB;
1299         klin = L1 * kA + lambda * kB;
1300
1301         aA = forceparams[type].linangle.aA;
1302         aB = forceparams[type].linangle.aB;
1303         a  = L1 * aA + lambda * aB;
1304         b  = 1 - a;
1305
1306         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1307         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1308         rvec_sub(r_ij, r_kj, r_ik);
1309
1310         dr2 = 0;
1311         for (m = 0; (m < DIM); m++)
1312         {
1313             dr = -a * r_ij[m] - b * r_kj[m];
1314             dr2 += dr * dr;
1315             dx[m]  = dr;
1316             f_i[m] = a * klin * dr;
1317             f_k[m] = b * klin * dr;
1318             f_j[m] = -(f_i[m] + f_k[m]);
1319             f[ai][m] += f_i[m];
1320             f[aj][m] += f_j[m];
1321             f[ak][m] += f_k[m];
1322         }
1323         va = 0.5 * klin * dr2;
1324         *dvdlambda += 0.5 * (kB - kA) * dr2 + klin * (aB - aA) * iprod(dx, r_ik);
1325
1326         vtot += va;
1327
1328         if (computeVirial(flavor))
1329         {
1330             rvec_inc(fshift[t1], f_i);
1331             rvec_inc(fshift[c_centralShiftIndex], f_j);
1332             rvec_inc(fshift[t2], f_k);
1333         }
1334     } /* 57 TOTAL       */
1335     return vtot;
1336 }
1337
1338 template<BondedKernelFlavor flavor>
1339 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1340 urey_bradley(int             nbonds,
1341              const t_iatom   forceatoms[],
1342              const t_iparams forceparams[],
1343              const rvec      x[],
1344              rvec4           f[],
1345              rvec            fshift[],
1346              const t_pbc*    pbc,
1347              real            lambda,
1348              real*           dvdlambda,
1349              gmx::ArrayRef<const real> /*charge*/,
1350              t_fcdata gmx_unused* fcd,
1351              t_disresdata gmx_unused* disresdata,
1352              t_oriresdata gmx_unused* oriresdata,
1353              int gmx_unused* global_atom_index)
1354 {
1355     int  i, m, ai, aj, ak, t1, t2, type, ki;
1356     rvec r_ij, r_kj, r_ik;
1357     real cos_theta, cos_theta2, theta;
1358     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1359     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1360
1361     vtot = 0.0;
1362     for (i = 0; (i < nbonds);)
1363     {
1364         type = forceatoms[i++];
1365         ai   = forceatoms[i++];
1366         aj   = forceatoms[i++];
1367         ak   = forceatoms[i++];
1368         th0A = forceparams[type].u_b.thetaA * gmx::c_deg2Rad;
1369         kthA = forceparams[type].u_b.kthetaA;
1370         r13A = forceparams[type].u_b.r13A;
1371         kUBA = forceparams[type].u_b.kUBA;
1372         th0B = forceparams[type].u_b.thetaB * gmx::c_deg2Rad;
1373         kthB = forceparams[type].u_b.kthetaB;
1374         r13B = forceparams[type].u_b.r13B;
1375         kUBB = forceparams[type].u_b.kUBB;
1376
1377         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1378
1379         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1380         vtot += va;
1381
1382         ki  = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik); /*   3      */
1383         dr2 = iprod(r_ik, r_ik);                     /*   5             */
1384         dr  = dr2 * gmx::invsqrt(dr2);               /*  10             */
1385
1386         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1387
1388         cos_theta2 = gmx::square(cos_theta); /*   1             */
1389         if (cos_theta2 < 1)
1390         {
1391             real st, sth;
1392             real cik, cii, ckk;
1393             real nrkj2, nrij2;
1394             rvec f_i, f_j, f_k;
1395
1396             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1397             sth   = st * cos_theta;                      /*   1         */
1398             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1399             nrij2 = iprod(r_ij, r_ij);
1400
1401             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1402             cii = sth / nrij2;                      /*  10              */
1403             ckk = sth / nrkj2;                      /*  10              */
1404
1405             for (m = 0; (m < DIM); m++) /*  39          */
1406             {
1407                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1408                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1409                 f_j[m] = -f_i[m] - f_k[m];
1410                 f[ai][m] += f_i[m];
1411                 f[aj][m] += f_j[m];
1412                 f[ak][m] += f_k[m];
1413             }
1414             if (computeVirial(flavor))
1415             {
1416                 rvec_inc(fshift[t1], f_i);
1417                 rvec_inc(fshift[c_centralShiftIndex], f_j);
1418                 rvec_inc(fshift[t2], f_k);
1419             }
1420         } /* 161 TOTAL  */
1421         /* Time for the bond calculations */
1422         if (dr2 == 0.0)
1423         {
1424             continue;
1425         }
1426
1427         vtot += vbond;              /* 1*/
1428         fbond *= gmx::invsqrt(dr2); /*   6              */
1429
1430         for (m = 0; (m < DIM); m++) /*  15              */
1431         {
1432             fik = fbond * r_ik[m];
1433             f[ai][m] += fik;
1434             f[ak][m] -= fik;
1435             if (computeVirial(flavor))
1436             {
1437                 fshift[ki][m] += fik;
1438                 fshift[c_centralShiftIndex][m] -= fik;
1439             }
1440         }
1441     }
1442     return vtot;
1443 }
1444
1445 #if GMX_SIMD_HAVE_REAL
1446
1447 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1448  * This routines does not calculate energies and shift forces.
1449  */
1450 template<BondedKernelFlavor flavor>
1451 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1452 urey_bradley(int             nbonds,
1453              const t_iatom   forceatoms[],
1454              const t_iparams forceparams[],
1455              const rvec      x[],
1456              rvec4           f[],
1457              rvec gmx_unused fshift[],
1458              const t_pbc*    pbc,
1459              real gmx_unused lambda,
1460              real gmx_unused* dvdlambda,
1461              gmx::ArrayRef<const real> /*charge*/,
1462              t_fcdata gmx_unused* fcd,
1463              t_disresdata gmx_unused* disresdata,
1464              t_oriresdata gmx_unused* oriresdata,
1465              int gmx_unused* global_atom_index)
1466 {
1467     constexpr int                            nfa1 = 4;
1468     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
1469     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
1470     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
1471     alignas(GMX_SIMD_ALIGNMENT) real         coeff[4 * GMX_SIMD_REAL_WIDTH];
1472     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
1473
1474     set_pbc_simd(pbc, pbc_simd);
1475
1476     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1477     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH * nfa1)
1478     {
1479         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1480          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1481          */
1482         int iu = i;
1483         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1484         {
1485             const int type = forceatoms[iu];
1486             ai[s]          = forceatoms[iu + 1];
1487             aj[s]          = forceatoms[iu + 2];
1488             ak[s]          = forceatoms[iu + 3];
1489
1490             /* At the end fill the arrays with the last atoms and 0 params */
1491             if (i + s * nfa1 < nbonds)
1492             {
1493                 coeff[s]                           = forceparams[type].u_b.kthetaA;
1494                 coeff[GMX_SIMD_REAL_WIDTH + s]     = forceparams[type].u_b.thetaA;
1495                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = forceparams[type].u_b.kUBA;
1496                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = forceparams[type].u_b.r13A;
1497
1498                 if (iu + nfa1 < nbonds)
1499                 {
1500                     iu += nfa1;
1501                 }
1502             }
1503             else
1504             {
1505                 coeff[s]                           = 0;
1506                 coeff[GMX_SIMD_REAL_WIDTH + s]     = 0;
1507                 coeff[GMX_SIMD_REAL_WIDTH * 2 + s] = 0;
1508                 coeff[GMX_SIMD_REAL_WIDTH * 3 + s] = 0;
1509             }
1510         }
1511
1512         SimdReal xi_S, yi_S, zi_S;
1513         SimdReal xj_S, yj_S, zj_S;
1514         SimdReal xk_S, yk_S, zk_S;
1515
1516         /* Store the non PBC corrected distances packed and aligned */
1517         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1518         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1519         gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1520         SimdReal rijx_S = xi_S - xj_S;
1521         SimdReal rijy_S = yi_S - yj_S;
1522         SimdReal rijz_S = zi_S - zj_S;
1523         SimdReal rkjx_S = xk_S - xj_S;
1524         SimdReal rkjy_S = yk_S - yj_S;
1525         SimdReal rkjz_S = zk_S - zj_S;
1526         SimdReal rikx_S = xi_S - xk_S;
1527         SimdReal riky_S = yi_S - yk_S;
1528         SimdReal rikz_S = zi_S - zk_S;
1529
1530         const SimdReal ktheta_S = load<SimdReal>(coeff);
1531         const SimdReal theta0_S = load<SimdReal>(coeff + GMX_SIMD_REAL_WIDTH) * gmx::c_deg2Rad;
1532         const SimdReal kUB_S    = load<SimdReal>(coeff + 2 * GMX_SIMD_REAL_WIDTH);
1533         const SimdReal r13_S    = load<SimdReal>(coeff + 3 * GMX_SIMD_REAL_WIDTH);
1534
1535         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1536         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1537         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1538
1539         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1540
1541         const SimdReal dr2_S = iprod(rikx_S, riky_S, rikz_S, rikx_S, riky_S, rikz_S);
1542
1543         const SimdReal nrij2_S = norm2(rijx_S, rijy_S, rijz_S);
1544         const SimdReal nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1545
1546         const SimdReal nrij_1_S = invsqrt(nrij2_S);
1547         const SimdReal nrkj_1_S = invsqrt(nrkj2_S);
1548         const SimdReal invdr2_S = invsqrt(dr2_S);
1549         const SimdReal dr_S     = dr2_S * invdr2_S;
1550
1551         constexpr real min_one_plus_eps = -1.0 + 2.0 * GMX_REAL_EPS; // Smallest number > -1
1552
1553         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1554          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1555          * This also ensures that rounding errors would cause the argument
1556          * of simdAcos to be < -1.
1557          * Note that we do not take precautions for cos(0)=1, so the bonds
1558          * in an angle should not align at an angle of 0 degrees.
1559          */
1560         const SimdReal cos_S = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1561
1562         const SimdReal theta_S  = acos(cos_S);
1563         const SimdReal invsin_S = invsqrt(1.0 - cos_S * cos_S);
1564         const SimdReal st_S     = ktheta_S * (theta0_S - theta_S) * invsin_S;
1565         const SimdReal sth_S    = st_S * cos_S;
1566
1567         const SimdReal cik_S = st_S * nrij_1_S * nrkj_1_S;
1568         const SimdReal cii_S = sth_S * nrij_1_S * nrij_1_S;
1569         const SimdReal ckk_S = sth_S * nrkj_1_S * nrkj_1_S;
1570
1571         const SimdReal sUB_S = kUB_S * (r13_S - dr_S) * invdr2_S;
1572
1573         const SimdReal f_ikx_S = sUB_S * rikx_S;
1574         const SimdReal f_iky_S = sUB_S * riky_S;
1575         const SimdReal f_ikz_S = sUB_S * rikz_S;
1576
1577         const SimdReal f_ix_S = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1578         const SimdReal f_iy_S = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1579         const SimdReal f_iz_S = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1580         const SimdReal f_kx_S = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1581         const SimdReal f_ky_S = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1582         const SimdReal f_kz_S = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1583
1584         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1585         transposeScatterDecrU<4>(
1586                 reinterpret_cast<real*>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1587         transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1588     }
1589
1590     return 0;
1591 }
1592
1593 #endif // GMX_SIMD_HAVE_REAL
1594
1595 template<BondedKernelFlavor flavor>
1596 real quartic_angles(int             nbonds,
1597                     const t_iatom   forceatoms[],
1598                     const t_iparams forceparams[],
1599                     const rvec      x[],
1600                     rvec4           f[],
1601                     rvec            fshift[],
1602                     const t_pbc*    pbc,
1603                     real gmx_unused lambda,
1604                     real gmx_unused* dvdlambda,
1605                     gmx::ArrayRef<const real> /*charge*/,
1606                     t_fcdata gmx_unused* fcd,
1607                     t_disresdata gmx_unused* disresdata,
1608                     t_oriresdata gmx_unused* oriresdata,
1609                     int gmx_unused* global_atom_index)
1610 {
1611     int  i, j, ai, aj, ak, t1, t2, type;
1612     rvec r_ij, r_kj;
1613     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1614
1615     vtot = 0.0;
1616     for (i = 0; (i < nbonds);)
1617     {
1618         type = forceatoms[i++];
1619         ai   = forceatoms[i++];
1620         aj   = forceatoms[i++];
1621         ak   = forceatoms[i++];
1622
1623         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
1624
1625         dt = theta - forceparams[type].qangle.theta * gmx::c_deg2Rad; /* 2          */
1626
1627         dVdt = 0;
1628         va   = forceparams[type].qangle.c[0];
1629         dtp  = 1.0;
1630         for (j = 1; j <= 4; j++)
1631         {
1632             c = forceparams[type].qangle.c[j];
1633             dVdt -= j * c * dtp;
1634             dtp *= dt;
1635             va += c * dtp;
1636         }
1637         /* 20 */
1638
1639         vtot += va;
1640
1641         cos_theta2 = gmx::square(cos_theta); /*   1             */
1642         if (cos_theta2 < 1)
1643         {
1644             int  m;
1645             real st, sth;
1646             real cik, cii, ckk;
1647             real nrkj2, nrij2;
1648             rvec f_i, f_j, f_k;
1649
1650             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
1651             sth   = st * cos_theta;                      /*   1         */
1652             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
1653             nrij2 = iprod(r_ij, r_ij);
1654
1655             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
1656             cii = sth / nrij2;                      /*  10              */
1657             ckk = sth / nrkj2;                      /*  10              */
1658
1659             for (m = 0; (m < DIM); m++) /*  39          */
1660             {
1661                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
1662                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
1663                 f_j[m] = -f_i[m] - f_k[m];
1664                 f[ai][m] += f_i[m];
1665                 f[aj][m] += f_j[m];
1666                 f[ak][m] += f_k[m];
1667             }
1668
1669             if (computeVirial(flavor))
1670             {
1671                 rvec_inc(fshift[t1], f_i);
1672                 rvec_inc(fshift[c_centralShiftIndex], f_j);
1673                 rvec_inc(fshift[t2], f_k);
1674             }
1675         } /* 153 TOTAL  */
1676     }
1677     return vtot;
1678 }
1679
1680
1681 #if GMX_SIMD_HAVE_REAL
1682
1683 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1684  * also calculates the pre-factor required for the dihedral force update.
1685  * Note that bv and buf should be register aligned.
1686  */
1687 inline void dih_angle_simd(const rvec* x,
1688                            const int*  ai,
1689                            const int*  aj,
1690                            const int*  ak,
1691                            const int*  al,
1692                            const real* pbc_simd,
1693                            SimdReal*   phi_S,
1694                            SimdReal*   mx_S,
1695                            SimdReal*   my_S,
1696                            SimdReal*   mz_S,
1697                            SimdReal*   nx_S,
1698                            SimdReal*   ny_S,
1699                            SimdReal*   nz_S,
1700                            SimdReal*   nrkj_m2_S,
1701                            SimdReal*   nrkj_n2_S,
1702                            SimdReal*   p_S,
1703                            SimdReal*   q_S)
1704 {
1705     SimdReal xi_S, yi_S, zi_S;
1706     SimdReal xj_S, yj_S, zj_S;
1707     SimdReal xk_S, yk_S, zk_S;
1708     SimdReal xl_S, yl_S, zl_S;
1709     SimdReal rijx_S, rijy_S, rijz_S;
1710     SimdReal rkjx_S, rkjy_S, rkjz_S;
1711     SimdReal rklx_S, rkly_S, rklz_S;
1712     SimdReal cx_S, cy_S, cz_S;
1713     SimdReal cn_S;
1714     SimdReal s_S;
1715     SimdReal ipr_S;
1716     SimdReal iprm_S, iprn_S;
1717     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1718     SimdReal toler_S;
1719     SimdReal nrkj2_min_S;
1720     SimdReal real_eps_S;
1721
1722     /* Used to avoid division by zero.
1723      * We take into acount that we multiply the result by real_eps_S.
1724      */
1725     nrkj2_min_S = SimdReal(GMX_REAL_MIN / (2 * GMX_REAL_EPS));
1726
1727     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1728     real_eps_S = SimdReal(2 * GMX_REAL_EPS);
1729
1730     /* Store the non PBC corrected distances packed and aligned */
1731     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ai, &xi_S, &yi_S, &zi_S);
1732     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), aj, &xj_S, &yj_S, &zj_S);
1733     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), ak, &xk_S, &yk_S, &zk_S);
1734     gatherLoadUTranspose<3>(reinterpret_cast<const real*>(x), al, &xl_S, &yl_S, &zl_S);
1735     rijx_S = xi_S - xj_S;
1736     rijy_S = yi_S - yj_S;
1737     rijz_S = zi_S - zj_S;
1738     rkjx_S = xk_S - xj_S;
1739     rkjy_S = yk_S - yj_S;
1740     rkjz_S = zk_S - zj_S;
1741     rklx_S = xk_S - xl_S;
1742     rkly_S = yk_S - yl_S;
1743     rklz_S = zk_S - zl_S;
1744
1745     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1746     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1747     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1748
1749     cprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S, mx_S, my_S, mz_S);
1750
1751     cprod(rkjx_S, rkjy_S, rkjz_S, rklx_S, rkly_S, rklz_S, nx_S, ny_S, nz_S);
1752
1753     cprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S, &cx_S, &cy_S, &cz_S);
1754
1755     cn_S = sqrt(norm2(cx_S, cy_S, cz_S));
1756
1757     s_S = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1758
1759     /* Determine the dihedral angle, the sign might need correction */
1760     *phi_S = atan2(cn_S, s_S);
1761
1762     ipr_S = iprod(rijx_S, rijy_S, rijz_S, *nx_S, *ny_S, *nz_S);
1763
1764     iprm_S = norm2(*mx_S, *my_S, *mz_S);
1765     iprn_S = norm2(*nx_S, *ny_S, *nz_S);
1766
1767     nrkj2_S = norm2(rkjx_S, rkjy_S, rkjz_S);
1768
1769     /* Avoid division by zero. When zero, the result is multiplied by 0
1770      * anyhow, so the 3 max below do not affect the final result.
1771      */
1772     nrkj2_S  = max(nrkj2_S, nrkj2_min_S);
1773     nrkj_1_S = invsqrt(nrkj2_S);
1774     nrkj_2_S = nrkj_1_S * nrkj_1_S;
1775     nrkj_S   = nrkj2_S * nrkj_1_S;
1776
1777     toler_S = nrkj2_S * real_eps_S;
1778
1779     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1780      * So we take a max with the tolerance instead. Since we multiply with
1781      * m or n later, the max does not affect the results.
1782      */
1783     iprm_S     = max(iprm_S, toler_S);
1784     iprn_S     = max(iprn_S, toler_S);
1785     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1786     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1787
1788     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1789     *phi_S = copysign(*phi_S, ipr_S);
1790     *p_S   = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1791     *p_S   = *p_S * nrkj_2_S;
1792
1793     *q_S = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1794     *q_S = *q_S * nrkj_2_S;
1795 }
1796
1797 #endif // GMX_SIMD_HAVE_REAL
1798
1799 } // namespace
1800
1801 template<BondedKernelFlavor flavor>
1802 void do_dih_fup(int          i,
1803                 int          j,
1804                 int          k,
1805                 int          l,
1806                 real         ddphi,
1807                 rvec         r_ij,
1808                 rvec         r_kj,
1809                 rvec         r_kl,
1810                 rvec         m,
1811                 rvec         n,
1812                 rvec4        f[],
1813                 rvec         fshift[],
1814                 const t_pbc* pbc,
1815                 const rvec   x[],
1816                 int          t1,
1817                 int          t2,
1818                 int          t3)
1819 {
1820     /* 143 FLOPS */
1821     rvec f_i, f_j, f_k, f_l;
1822     rvec uvec, vvec, svec, dx_jl;
1823     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1824     real a, b, p, q, toler;
1825
1826     iprm  = iprod(m, m);       /*  5    */
1827     iprn  = iprod(n, n);       /*  5    */
1828     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1829     toler = nrkj2 * GMX_REAL_EPS;
1830     if ((iprm > toler) && (iprn > toler))
1831     {
1832         nrkj_1 = gmx::invsqrt(nrkj2);  /* 10    */
1833         nrkj_2 = nrkj_1 * nrkj_1;      /*  1    */
1834         nrkj   = nrkj2 * nrkj_1;       /*  1    */
1835         a      = -ddphi * nrkj / iprm; /* 11    */
1836         svmul(a, m, f_i);              /*  3    */
1837         b = ddphi * nrkj / iprn;       /* 11    */
1838         svmul(b, n, f_l);              /*  3  */
1839         p = iprod(r_ij, r_kj);         /*  5    */
1840         p *= nrkj_2;                   /*  1    */
1841         q = iprod(r_kl, r_kj);         /*  5    */
1842         q *= nrkj_2;                   /*  1    */
1843         svmul(p, f_i, uvec);           /*  3    */
1844         svmul(q, f_l, vvec);           /*  3    */
1845         rvec_sub(uvec, vvec, svec);    /*  3    */
1846         rvec_sub(f_i, svec, f_j);      /*  3    */
1847         rvec_add(f_l, svec, f_k);      /*  3    */
1848         rvec_inc(f[i], f_i);           /*  3    */
1849         rvec_dec(f[j], f_j);           /*  3    */
1850         rvec_dec(f[k], f_k);           /*  3    */
1851         rvec_inc(f[l], f_l);           /*  3    */
1852
1853         if (computeVirial(flavor))
1854         {
1855             if (pbc)
1856             {
1857                 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1858             }
1859             else
1860             {
1861                 t3 = c_centralShiftIndex;
1862             }
1863
1864             rvec_inc(fshift[t1], f_i);
1865             rvec_dec(fshift[c_centralShiftIndex], f_j);
1866             rvec_dec(fshift[t2], f_k);
1867             rvec_inc(fshift[t3], f_l);
1868         }
1869     }
1870     /* 112 TOTAL    */
1871 }
1872
1873 namespace
1874 {
1875
1876 #if GMX_SIMD_HAVE_REAL
1877 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1878 inline void gmx_simdcall do_dih_fup_noshiftf_simd(const int* ai,
1879                                                   const int* aj,
1880                                                   const int* ak,
1881                                                   const int* al,
1882                                                   SimdReal   p,
1883                                                   SimdReal   q,
1884                                                   SimdReal   f_i_x,
1885                                                   SimdReal   f_i_y,
1886                                                   SimdReal   f_i_z,
1887                                                   SimdReal   mf_l_x,
1888                                                   SimdReal   mf_l_y,
1889                                                   SimdReal   mf_l_z,
1890                                                   rvec4      f[])
1891 {
1892     SimdReal sx    = p * f_i_x + q * mf_l_x;
1893     SimdReal sy    = p * f_i_y + q * mf_l_y;
1894     SimdReal sz    = p * f_i_z + q * mf_l_z;
1895     SimdReal f_j_x = f_i_x - sx;
1896     SimdReal f_j_y = f_i_y - sy;
1897     SimdReal f_j_z = f_i_z - sz;
1898     SimdReal f_k_x = mf_l_x - sx;
1899     SimdReal f_k_y = mf_l_y - sy;
1900     SimdReal f_k_z = mf_l_z - sz;
1901     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ai, f_i_x, f_i_y, f_i_z);
1902     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), aj, f_j_x, f_j_y, f_j_z);
1903     transposeScatterIncrU<4>(reinterpret_cast<real*>(f), ak, f_k_x, f_k_y, f_k_z);
1904     transposeScatterDecrU<4>(reinterpret_cast<real*>(f), al, mf_l_x, mf_l_y, mf_l_z);
1905 }
1906 #endif // GMX_SIMD_HAVE_REAL
1907
1908 /*! \brief Computes and returns the proper dihedral force
1909  *
1910  * With the appropriate kernel flavor, also computes and accumulates
1911  * the energy and dV/dlambda.
1912  */
1913 template<BondedKernelFlavor flavor>
1914 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* dvdlambda)
1915 {
1916     const real L1   = 1.0 - lambda;
1917     const real ph0  = (L1 * phiA + lambda * phiB) * gmx::c_deg2Rad;
1918     const real dph0 = (phiB - phiA) * gmx::c_deg2Rad;
1919     const real cp   = L1 * cpA + lambda * cpB;
1920
1921     const real mdphi = mult * phi - ph0;
1922     const real sdphi = std::sin(mdphi);
1923     const real ddphi = -cp * mult * sdphi;
1924     if (computeEnergy(flavor))
1925     {
1926         const real v1 = 1 + std::cos(mdphi);
1927         *V += cp * v1;
1928
1929         *dvdlambda += (cpB - cpA) * v1 + cp * dph0 * sdphi;
1930     }
1931
1932     return ddphi;
1933     /* That was 40 flops */
1934 }
1935
1936 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1937 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult, real phi, real lambda, real* V, real* F)
1938 {
1939     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1940     real L1   = 1.0 - lambda;
1941     real ph0  = (L1 * phiA + lambda * phiB) * gmx::c_deg2Rad;
1942     real dph0 = (phiB - phiA) * gmx::c_deg2Rad;
1943     real cp   = L1 * cpA + lambda * cpB;
1944
1945     mdphi = mult * (phi - ph0);
1946     sdphi = std::sin(mdphi);
1947     ddphi = cp * mult * sdphi;
1948     v1    = 1.0 - std::cos(mdphi);
1949     v     = cp * v1;
1950
1951     dvdlambda = (cpB - cpA) * v1 + cp * dph0 * sdphi;
1952
1953     *V = v;
1954     *F = ddphi;
1955
1956     return dvdlambda;
1957
1958     /* That was 40 flops */
1959 }
1960
1961 template<BondedKernelFlavor flavor>
1962 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1963 pdihs(int             nbonds,
1964       const t_iatom   forceatoms[],
1965       const t_iparams forceparams[],
1966       const rvec      x[],
1967       rvec4           f[],
1968       rvec            fshift[],
1969       const t_pbc*    pbc,
1970       real            lambda,
1971       real*           dvdlambda,
1972       gmx::ArrayRef<const real> /*charge*/,
1973       t_fcdata gmx_unused* fcd,
1974       t_disresdata gmx_unused* disresdata,
1975       t_oriresdata gmx_unused* oriresdata,
1976       int gmx_unused* global_atom_index)
1977 {
1978     int  t1, t2, t3;
1979     rvec r_ij, r_kj, r_kl, m, n;
1980
1981     real vtot = 0.0;
1982
1983     for (int i = 0; i < nbonds;)
1984     {
1985         const int ai = forceatoms[i + 1];
1986         const int aj = forceatoms[i + 2];
1987         const int ak = forceatoms[i + 3];
1988         const int al = forceatoms[i + 4];
1989
1990         const real phi =
1991                 dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84 */
1992
1993         /* Loop over dihedrals working on the same atoms,
1994          * so we avoid recalculating angles and distributing forces.
1995          */
1996         real ddphi_tot = 0;
1997         do
1998         {
1999             const int type = forceatoms[i];
2000             ddphi_tot += dopdihs<flavor>(forceparams[type].pdihs.cpA,
2001                                          forceparams[type].pdihs.cpB,
2002                                          forceparams[type].pdihs.phiA,
2003                                          forceparams[type].pdihs.phiB,
2004                                          forceparams[type].pdihs.mult,
2005                                          phi,
2006                                          lambda,
2007                                          &vtot,
2008                                          dvdlambda);
2009
2010             i += 5;
2011         } while (i < nbonds && forceatoms[i + 1] == ai && forceatoms[i + 2] == aj
2012                  && forceatoms[i + 3] == ak && forceatoms[i + 4] == al);
2013
2014         do_dih_fup<flavor>(
2015                 ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112               */
2016     } /* 223 TOTAL  */
2017
2018     return vtot;
2019 }
2020
2021 #if GMX_SIMD_HAVE_REAL
2022
2023 /* As pdihs above, but using SIMD to calculate multiple dihedrals at once */
2024 template<BondedKernelFlavor flavor>
2025 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
2026 pdihs(int             nbonds,
2027       const t_iatom   forceatoms[],
2028       const t_iparams forceparams[],
2029       const rvec      x[],
2030       rvec4           f[],
2031       rvec gmx_unused fshift[],
2032       const t_pbc*    pbc,
2033       real gmx_unused lambda,
2034       real gmx_unused* dvdlambda,
2035       gmx::ArrayRef<const real> /*charge*/,
2036       t_fcdata gmx_unused* fcd,
2037       t_disresdata gmx_unused* disresdata,
2038       t_oriresdata gmx_unused* oriresdata,
2039       int gmx_unused* global_atom_index)
2040 {
2041     const int                                nfa1 = 5;
2042     int                                      i, iu, s;
2043     int                                      type;
2044     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
2045     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
2046     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
2047     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
2048     alignas(GMX_SIMD_ALIGNMENT) real         buf[3 * GMX_SIMD_REAL_WIDTH];
2049     real *                                   cp, *phi0, *mult;
2050     SimdReal                                 deg2rad_S(gmx::c_deg2Rad);
2051     SimdReal                                 p_S, q_S;
2052     SimdReal                                 phi0_S, phi_S;
2053     SimdReal                                 mx_S, my_S, mz_S;
2054     SimdReal                                 nx_S, ny_S, nz_S;
2055     SimdReal                                 nrkj_m2_S, nrkj_n2_S;
2056     SimdReal                                 cp_S, mdphi_S, mult_S;
2057     SimdReal                                 sin_S, cos_S;
2058     SimdReal                                 mddphi_S;
2059     SimdReal                                 sf_i_S, msf_l_S;
2060     alignas(GMX_SIMD_ALIGNMENT) real         pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2061
2062     /* Extract aligned pointer for parameters and variables */
2063     cp   = buf + 0 * GMX_SIMD_REAL_WIDTH;
2064     phi0 = buf + 1 * GMX_SIMD_REAL_WIDTH;
2065     mult = buf + 2 * GMX_SIMD_REAL_WIDTH;
2066
2067     set_pbc_simd(pbc, pbc_simd);
2068
2069     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2070     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2071     {
2072         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2073          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2074          */
2075         iu = i;
2076         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2077         {
2078             type  = forceatoms[iu];
2079             ai[s] = forceatoms[iu + 1];
2080             aj[s] = forceatoms[iu + 2];
2081             ak[s] = forceatoms[iu + 3];
2082             al[s] = forceatoms[iu + 4];
2083
2084             /* At the end fill the arrays with the last atoms and 0 params */
2085             if (i + s * nfa1 < nbonds)
2086             {
2087                 cp[s]   = forceparams[type].pdihs.cpA;
2088                 phi0[s] = forceparams[type].pdihs.phiA;
2089                 mult[s] = forceparams[type].pdihs.mult;
2090
2091                 if (iu + nfa1 < nbonds)
2092                 {
2093                     iu += nfa1;
2094                 }
2095             }
2096             else
2097             {
2098                 cp[s]   = 0;
2099                 phi0[s] = 0;
2100                 mult[s] = 0;
2101             }
2102         }
2103
2104         /* Calculate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2105         dih_angle_simd(
2106                 x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S, &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2107
2108         cp_S   = load<SimdReal>(cp);
2109         phi0_S = load<SimdReal>(phi0) * deg2rad_S;
2110         mult_S = load<SimdReal>(mult);
2111
2112         mdphi_S = fms(mult_S, phi_S, phi0_S);
2113
2114         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
2115         sincos(mdphi_S, &sin_S, &cos_S);
2116         mddphi_S = cp_S * mult_S * sin_S;
2117         sf_i_S   = mddphi_S * nrkj_m2_S;
2118         msf_l_S  = mddphi_S * nrkj_n2_S;
2119
2120         /* After this m?_S will contain f[i] */
2121         mx_S = sf_i_S * mx_S;
2122         my_S = sf_i_S * my_S;
2123         mz_S = sf_i_S * mz_S;
2124
2125         /* After this m?_S will contain -f[l] */
2126         nx_S = msf_l_S * nx_S;
2127         ny_S = msf_l_S * ny_S;
2128         nz_S = msf_l_S * nz_S;
2129
2130         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2131     }
2132
2133     return 0;
2134 }
2135
2136 /* This is mostly a copy of the SIMD flavor of pdihs above, but with using
2137  * the RB potential instead of a harmonic potential.
2138  * This function can replace rbdihs() when no energy and virial are needed.
2139  */
2140 template<BondedKernelFlavor flavor>
2141 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
2142 rbdihs(int             nbonds,
2143        const t_iatom   forceatoms[],
2144        const t_iparams forceparams[],
2145        const rvec      x[],
2146        rvec4           f[],
2147        rvec gmx_unused fshift[],
2148        const t_pbc*    pbc,
2149        real gmx_unused lambda,
2150        real gmx_unused* dvdlambda,
2151        gmx::ArrayRef<const real> /*charge*/,
2152        t_fcdata gmx_unused* fcd,
2153        t_disresdata gmx_unused* disresdata,
2154        t_oriresdata gmx_unused* oriresdata,
2155        int gmx_unused* global_atom_index)
2156 {
2157     const int                                nfa1 = 5;
2158     int                                      i, iu, s, j;
2159     int                                      type;
2160     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ai[GMX_SIMD_REAL_WIDTH];
2161     alignas(GMX_SIMD_ALIGNMENT) std::int32_t aj[GMX_SIMD_REAL_WIDTH];
2162     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ak[GMX_SIMD_REAL_WIDTH];
2163     alignas(GMX_SIMD_ALIGNMENT) std::int32_t al[GMX_SIMD_REAL_WIDTH];
2164     alignas(GMX_SIMD_ALIGNMENT) real         parm[NR_RBDIHS * GMX_SIMD_REAL_WIDTH];
2165
2166     SimdReal                         p_S, q_S;
2167     SimdReal                         phi_S;
2168     SimdReal                         ddphi_S, cosfac_S;
2169     SimdReal                         mx_S, my_S, mz_S;
2170     SimdReal                         nx_S, ny_S, nz_S;
2171     SimdReal                         nrkj_m2_S, nrkj_n2_S;
2172     SimdReal                         parm_S, c_S;
2173     SimdReal                         sin_S, cos_S;
2174     SimdReal                         sf_i_S, msf_l_S;
2175     alignas(GMX_SIMD_ALIGNMENT) real pbc_simd[9 * GMX_SIMD_REAL_WIDTH];
2176
2177     SimdReal pi_S(M_PI);
2178     SimdReal one_S(1.0);
2179
2180     set_pbc_simd(pbc, pbc_simd);
2181
2182     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2183     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH * nfa1)
2184     {
2185         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2186          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2187          */
2188         iu = i;
2189         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2190         {
2191             type  = forceatoms[iu];
2192             ai[s] = forceatoms[iu + 1];
2193             aj[s] = forceatoms[iu + 2];
2194             ak[s] = forceatoms[iu + 3];
2195             al[s] = forceatoms[iu + 4];
2196
2197             /* At the end fill the arrays with the last atoms and 0 params */
2198             if (i + s * nfa1 < nbonds)
2199             {
2200                 /* We don't need the first parameter, since that's a constant
2201                  * which only affects the energies, not the forces.
2202                  */
2203                 for (j = 1; j < NR_RBDIHS; j++)
2204                 {
2205                     parm[j * GMX_SIMD_REAL_WIDTH + s] = forceparams[type].rbdihs.rbcA[j];
2206                 }
2207
2208                 if (iu + nfa1 < nbonds)
2209                 {
2210                     iu += nfa1;
2211                 }
2212             }
2213             else
2214             {
2215                 for (j = 1; j < NR_RBDIHS; j++)
2216                 {
2217                     parm[j * GMX_SIMD_REAL_WIDTH + s] = 0;
2218                 }
2219             }
2220         }
2221
2222         /* Calculate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2223         dih_angle_simd(
2224                 x, ai, aj, ak, al, pbc_simd, &phi_S, &mx_S, &my_S, &mz_S, &nx_S, &ny_S, &nz_S, &nrkj_m2_S, &nrkj_n2_S, &p_S, &q_S);
2225
2226         /* Change to polymer convention */
2227         phi_S = phi_S - pi_S;
2228
2229         sincos(phi_S, &sin_S, &cos_S);
2230
2231         ddphi_S  = setZero();
2232         c_S      = one_S;
2233         cosfac_S = one_S;
2234         for (j = 1; j < NR_RBDIHS; j++)
2235         {
2236             parm_S   = load<SimdReal>(parm + j * GMX_SIMD_REAL_WIDTH);
2237             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2238             cosfac_S = cosfac_S * cos_S;
2239             c_S      = c_S + one_S;
2240         }
2241
2242         /* Note that here we do not use the minus sign which is present
2243          * in the normal RB code. This is corrected for through (m)sf below.
2244          */
2245         ddphi_S = ddphi_S * sin_S;
2246
2247         sf_i_S  = ddphi_S * nrkj_m2_S;
2248         msf_l_S = ddphi_S * nrkj_n2_S;
2249
2250         /* After this m?_S will contain f[i] */
2251         mx_S = sf_i_S * mx_S;
2252         my_S = sf_i_S * my_S;
2253         mz_S = sf_i_S * mz_S;
2254
2255         /* After this m?_S will contain -f[l] */
2256         nx_S = msf_l_S * nx_S;
2257         ny_S = msf_l_S * ny_S;
2258         nz_S = msf_l_S * nz_S;
2259
2260         do_dih_fup_noshiftf_simd(ai, aj, ak, al, p_S, q_S, mx_S, my_S, mz_S, nx_S, ny_S, nz_S, f);
2261     }
2262
2263     return 0;
2264 }
2265
2266 #endif // GMX_SIMD_HAVE_REAL
2267
2268
2269 template<BondedKernelFlavor flavor>
2270 real idihs(int             nbonds,
2271            const t_iatom   forceatoms[],
2272            const t_iparams forceparams[],
2273            const rvec      x[],
2274            rvec4           f[],
2275            rvec            fshift[],
2276            const t_pbc*    pbc,
2277            real            lambda,
2278            real*           dvdlambda,
2279            gmx::ArrayRef<const real> /*charge*/,
2280            t_fcdata gmx_unused* fcd,
2281            t_disresdata gmx_unused* disresdata,
2282            t_oriresdata gmx_unused* oriresdata,
2283            int gmx_unused* global_atom_index)
2284 {
2285     int  i, type, ai, aj, ak, al;
2286     int  t1, t2, t3;
2287     real phi, phi0, dphi0, ddphi, vtot;
2288     rvec r_ij, r_kj, r_kl, m, n;
2289     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2290
2291     L1        = 1.0 - lambda;
2292     dvdl_term = 0;
2293     vtot      = 0.0;
2294     for (i = 0; (i < nbonds);)
2295     {
2296         type = forceatoms[i++];
2297         ai   = forceatoms[i++];
2298         aj   = forceatoms[i++];
2299         ak   = forceatoms[i++];
2300         al   = forceatoms[i++];
2301
2302         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2303
2304         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2305          * force changes if we just apply a normal harmonic.
2306          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2307          * This means we will never have the periodicity problem, unless
2308          * the dihedral is Pi away from phiO, which is very unlikely due to
2309          * the potential.
2310          */
2311         kA = forceparams[type].harmonic.krA;
2312         kB = forceparams[type].harmonic.krB;
2313         pA = forceparams[type].harmonic.rA;
2314         pB = forceparams[type].harmonic.rB;
2315
2316         kk    = L1 * kA + lambda * kB;
2317         phi0  = (L1 * pA + lambda * pB) * gmx::c_deg2Rad;
2318         dphi0 = (pB - pA) * gmx::c_deg2Rad;
2319
2320         dp = phi - phi0;
2321
2322         make_dp_periodic(&dp);
2323
2324         dp2 = dp * dp;
2325
2326         vtot += 0.5 * kk * dp2;
2327         ddphi = -kk * dp;
2328
2329         dvdl_term += 0.5 * (kB - kA) * dp2 - kk * dphi0 * dp;
2330
2331         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112               */
2332         /* 218 TOTAL    */
2333     }
2334
2335     *dvdlambda += dvdl_term;
2336     return vtot;
2337 }
2338
2339 /*! \brief Computes angle restraints of two different types */
2340 template<BondedKernelFlavor flavor>
2341 real low_angres(int             nbonds,
2342                 const t_iatom   forceatoms[],
2343                 const t_iparams forceparams[],
2344                 const rvec      x[],
2345                 rvec4           f[],
2346                 rvec            fshift[],
2347                 const t_pbc*    pbc,
2348                 real            lambda,
2349                 real*           dvdlambda,
2350                 gmx_bool        bZAxis)
2351 {
2352     int  i, m, type, ai, aj, ak, al;
2353     int  t1, t2;
2354     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2355     rvec r_ij, r_kl, f_i, f_k = { 0, 0, 0 };
2356     real st, sth, nrij2, nrkl2, c, cij, ckl;
2357
2358     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2359
2360     vtot = 0.0;
2361     ak = al = 0; /* to avoid warnings */
2362     for (i = 0; i < nbonds;)
2363     {
2364         type = forceatoms[i++];
2365         ai   = forceatoms[i++];
2366         aj   = forceatoms[i++];
2367         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij); /*  3             */
2368         if (!bZAxis)
2369         {
2370             ak = forceatoms[i++];
2371             al = forceatoms[i++];
2372             t2 = pbc_rvec_sub(pbc, x[al], x[ak], r_kl); /*  3           */
2373         }
2374         else
2375         {
2376             r_kl[XX] = 0;
2377             r_kl[YY] = 0;
2378             r_kl[ZZ] = 1;
2379         }
2380
2381         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2382         phi     = std::acos(cos_phi);    /* 10           */
2383
2384         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2385                                   forceparams[type].pdihs.cpB,
2386                                   forceparams[type].pdihs.phiA,
2387                                   forceparams[type].pdihs.phiB,
2388                                   forceparams[type].pdihs.mult,
2389                                   phi,
2390                                   lambda,
2391                                   &vid,
2392                                   &dVdphi); /*  40 */
2393
2394         vtot += vid;
2395
2396         cos_phi2 = gmx::square(cos_phi); /*   1         */
2397         if (cos_phi2 < 1)
2398         {
2399             st    = -dVdphi * gmx::invsqrt(1 - cos_phi2); /*  12                */
2400             sth   = st * cos_phi;                         /*   1                */
2401             nrij2 = iprod(r_ij, r_ij);                    /*   5                */
2402             nrkl2 = iprod(r_kl, r_kl);                    /*   5          */
2403
2404             c   = st * gmx::invsqrt(nrij2 * nrkl2); /*  11              */
2405             cij = sth / nrij2;                      /*  10              */
2406             ckl = sth / nrkl2;                      /*  10              */
2407
2408             for (m = 0; m < DIM; m++) /*  18+18       */
2409             {
2410                 f_i[m] = (c * r_kl[m] - cij * r_ij[m]);
2411                 f[ai][m] += f_i[m];
2412                 f[aj][m] -= f_i[m];
2413                 if (!bZAxis)
2414                 {
2415                     f_k[m] = (c * r_ij[m] - ckl * r_kl[m]);
2416                     f[ak][m] += f_k[m];
2417                     f[al][m] -= f_k[m];
2418                 }
2419             }
2420
2421             if (computeVirial(flavor))
2422             {
2423                 rvec_inc(fshift[t1], f_i);
2424                 rvec_dec(fshift[c_centralShiftIndex], f_i);
2425                 if (!bZAxis)
2426                 {
2427                     rvec_inc(fshift[t2], f_k);
2428                     rvec_dec(fshift[c_centralShiftIndex], f_k);
2429                 }
2430             }
2431         }
2432     }
2433
2434     return vtot; /*  184 / 157 (bZAxis)  total  */
2435 }
2436
2437 template<BondedKernelFlavor flavor>
2438 real angres(int             nbonds,
2439             const t_iatom   forceatoms[],
2440             const t_iparams forceparams[],
2441             const rvec      x[],
2442             rvec4           f[],
2443             rvec            fshift[],
2444             const t_pbc*    pbc,
2445             real            lambda,
2446             real*           dvdlambda,
2447             gmx::ArrayRef<const real> /*charge*/,
2448             t_fcdata gmx_unused* fcd,
2449             t_disresdata gmx_unused* disresdata,
2450             t_oriresdata gmx_unused* oriresdata,
2451             int gmx_unused* global_atom_index)
2452 {
2453     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, FALSE);
2454 }
2455
2456 template<BondedKernelFlavor flavor>
2457 real angresz(int             nbonds,
2458              const t_iatom   forceatoms[],
2459              const t_iparams forceparams[],
2460              const rvec      x[],
2461              rvec4           f[],
2462              rvec            fshift[],
2463              const t_pbc*    pbc,
2464              real            lambda,
2465              real*           dvdlambda,
2466              gmx::ArrayRef<const real> /*charge*/,
2467              t_fcdata gmx_unused* fcd,
2468              t_disresdata gmx_unused* disresdata,
2469              t_oriresdata gmx_unused* oriresdata,
2470              int gmx_unused* global_atom_index)
2471 {
2472     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, TRUE);
2473 }
2474
2475 template<BondedKernelFlavor flavor>
2476 real dihres(int             nbonds,
2477             const t_iatom   forceatoms[],
2478             const t_iparams forceparams[],
2479             const rvec      x[],
2480             rvec4           f[],
2481             rvec            fshift[],
2482             const t_pbc*    pbc,
2483             real            lambda,
2484             real*           dvdlambda,
2485             gmx::ArrayRef<const real> /*charge*/,
2486             t_fcdata gmx_unused* fcd,
2487             t_disresdata gmx_unused* disresdata,
2488             t_oriresdata gmx_unused* oriresdata,
2489             int gmx_unused* global_atom_index)
2490 {
2491     real vtot = 0;
2492     int  ai, aj, ak, al, i, type, t1, t2, t3;
2493     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2494     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2495     rvec r_ij, r_kj, r_kl, m, n;
2496
2497     L1 = 1.0 - lambda;
2498
2499     d2r = gmx::c_deg2Rad;
2500
2501     for (i = 0; (i < nbonds);)
2502     {
2503         type = forceatoms[i++];
2504         ai   = forceatoms[i++];
2505         aj   = forceatoms[i++];
2506         ak   = forceatoms[i++];
2507         al   = forceatoms[i++];
2508
2509         phi0A = forceparams[type].dihres.phiA * d2r;
2510         dphiA = forceparams[type].dihres.dphiA * d2r;
2511         kfacA = forceparams[type].dihres.kfacA;
2512
2513         phi0B = forceparams[type].dihres.phiB * d2r;
2514         dphiB = forceparams[type].dihres.dphiB * d2r;
2515         kfacB = forceparams[type].dihres.kfacB;
2516
2517         phi0 = L1 * phi0A + lambda * phi0B;
2518         dphi = L1 * dphiA + lambda * dphiB;
2519         kfac = L1 * kfacA + lambda * kfacB;
2520
2521         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3);
2522         /* 84 flops */
2523
2524         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2525          * force changes if we just apply a normal harmonic.
2526          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2527          * This means we will never have the periodicity problem, unless
2528          * the dihedral is Pi away from phiO, which is very unlikely due to
2529          * the potential.
2530          */
2531         dp = phi - phi0;
2532         make_dp_periodic(&dp);
2533
2534         if (dp > dphi)
2535         {
2536             ddp = dp - dphi;
2537         }
2538         else if (dp < -dphi)
2539         {
2540             ddp = dp + dphi;
2541         }
2542         else
2543         {
2544             ddp = 0;
2545         }
2546
2547         if (ddp != 0.0)
2548         {
2549             ddp2 = ddp * ddp;
2550             vtot += 0.5 * kfac * ddp2;
2551             ddphi = kfac * ddp;
2552
2553             *dvdlambda += 0.5 * (kfacB - kfacA) * ddp2;
2554             /* lambda dependence from changing restraint distances */
2555             if (ddp > 0)
2556             {
2557                 *dvdlambda -= kfac * ddp * ((dphiB - dphiA) + (phi0B - phi0A));
2558             }
2559             else if (ddp < 0)
2560             {
2561                 *dvdlambda += kfac * ddp * ((dphiB - dphiA) - (phi0B - phi0A));
2562             }
2563             do_dih_fup<flavor>(
2564                     ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112               */
2565         }
2566     }
2567     return vtot;
2568 }
2569
2570
2571 real unimplemented(int gmx_unused             nbonds,
2572                    const t_iatom gmx_unused   forceatoms[],
2573                    const t_iparams gmx_unused forceparams[],
2574                    const rvec gmx_unused      x[],
2575                    rvec4 gmx_unused           f[],
2576                    rvec gmx_unused            fshift[],
2577                    const t_pbc gmx_unused* pbc,
2578                    real gmx_unused         lambda,
2579                    real gmx_unused* dvdlambda,
2580                    gmx::ArrayRef<const real> /*charge*/,
2581                    t_fcdata gmx_unused* fcd,
2582                    t_disresdata gmx_unused* disresdata,
2583                    t_oriresdata gmx_unused* oriresdata,
2584                    int gmx_unused* global_atom_index)
2585 {
2586     gmx_impl("*** you are using a not implemented function");
2587 }
2588
2589 template<BondedKernelFlavor flavor>
2590 real restrangles(int             nbonds,
2591                  const t_iatom   forceatoms[],
2592                  const t_iparams forceparams[],
2593                  const rvec      x[],
2594                  rvec4           f[],
2595                  rvec            fshift[],
2596                  const t_pbc*    pbc,
2597                  real gmx_unused lambda,
2598                  real gmx_unused* dvdlambda,
2599                  gmx::ArrayRef<const real> /*charge*/,
2600                  t_fcdata gmx_unused* fcd,
2601                  t_disresdata gmx_unused* disresdata,
2602                  t_oriresdata gmx_unused* oriresdata,
2603                  int gmx_unused* global_atom_index)
2604 {
2605     int    i, d, ai, aj, ak, type, m;
2606     int    t1, t2;
2607     real   v, vtot;
2608     rvec   f_i, f_j, f_k;
2609     double prefactor, ratio_ante, ratio_post;
2610     rvec   delta_ante, delta_post, vec_temp;
2611
2612     vtot = 0.0;
2613     for (i = 0; (i < nbonds);)
2614     {
2615         type = forceatoms[i++];
2616         ai   = forceatoms[i++];
2617         aj   = forceatoms[i++];
2618         ak   = forceatoms[i++];
2619
2620         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2621         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2622         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2623
2624
2625         /* This function computes factors needed for restricted angle potential.
2626          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2627          * when three particles align and the dihedral angle and dihedral potential
2628          * cannot be calculated. This potential is calculated using the formula:
2629            real restrangles(int nbonds,
2630             const t_iatom forceatoms[],const t_iparams forceparams[],
2631             const rvec x[],rvec4 f[],rvec fshift[],
2632             const t_pbc *pbc,
2633             real gmx_unused lambda,real gmx_unused *dvdlambda,
2634             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2635             int gmx_unused *global_atom_index)
2636            {
2637            int  i, d, ai, aj, ak, type, m;
2638            int t1, t2;
2639            rvec r_ij,r_kj;
2640            real v, vtot;
2641            rvec f_i, f_j, f_k;
2642            real prefactor, ratio_ante, ratio_post;
2643            rvec delta_ante, delta_post, vec_temp;
2644
2645            vtot = 0.0;
2646            for(i=0; (i<nbonds); )
2647            {
2648            type = forceatoms[i++];
2649            ai   = forceatoms[i++];
2650            aj   = forceatoms[i++];
2651            ak   = forceatoms[i++];
2652
2653          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2654          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2655          * For more explanations see comments file "restcbt.h". */
2656
2657         compute_factors_restangles(
2658                 type, forceparams, delta_ante, delta_post, &prefactor, &ratio_ante, &ratio_post, &v);
2659
2660         /*   Forces are computed per component */
2661         for (d = 0; d < DIM; d++)
2662         {
2663             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2664             f_j[d] = prefactor
2665                      * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2666             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2667         }
2668
2669         /*   Computation of potential energy   */
2670
2671         vtot += v;
2672
2673         /*   Update forces */
2674
2675         for (m = 0; (m < DIM); m++)
2676         {
2677             f[ai][m] += f_i[m];
2678             f[aj][m] += f_j[m];
2679             f[ak][m] += f_k[m];
2680         }
2681
2682         if (computeVirial(flavor))
2683         {
2684             rvec_inc(fshift[t1], f_i);
2685             rvec_inc(fshift[c_centralShiftIndex], f_j);
2686             rvec_inc(fshift[t2], f_k);
2687         }
2688     }
2689     return vtot;
2690 }
2691
2692
2693 template<BondedKernelFlavor flavor>
2694 real restrdihs(int             nbonds,
2695                const t_iatom   forceatoms[],
2696                const t_iparams forceparams[],
2697                const rvec      x[],
2698                rvec4           f[],
2699                rvec            fshift[],
2700                const t_pbc*    pbc,
2701                real gmx_unused lambda,
2702                real gmx_unused* dvlambda,
2703                gmx::ArrayRef<const real> /*charge*/,
2704                t_fcdata gmx_unused* fcd,
2705                t_disresdata gmx_unused* disresdata,
2706                t_oriresdata gmx_unused* oriresdata,
2707                int gmx_unused* global_atom_index)
2708 {
2709     int  i, d, type, ai, aj, ak, al;
2710     rvec f_i, f_j, f_k, f_l;
2711     rvec dx_jl;
2712     int  t1, t2, t3;
2713     real v, vtot;
2714     rvec delta_ante, delta_crnt, delta_post, vec_temp;
2715     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2716     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2717     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2718     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2719     real prefactor_phi;
2720
2721
2722     vtot = 0.0;
2723     for (i = 0; (i < nbonds);)
2724     {
2725         type = forceatoms[i++];
2726         ai   = forceatoms[i++];
2727         aj   = forceatoms[i++];
2728         ak   = forceatoms[i++];
2729         al   = forceatoms[i++];
2730
2731         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2732         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2733         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2734         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2735         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2736
2737         /* This function computes factors needed for restricted angle potential.
2738          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2739          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2740          * This potential is calculated using the formula:
2741          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2742          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2743          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2744          * For more explanations see comments file "restcbt.h" */
2745
2746         compute_factors_restrdihs(type,
2747                                   forceparams,
2748                                   delta_ante,
2749                                   delta_crnt,
2750                                   delta_post,
2751                                   &factor_phi_ai_ante,
2752                                   &factor_phi_ai_crnt,
2753                                   &factor_phi_ai_post,
2754                                   &factor_phi_aj_ante,
2755                                   &factor_phi_aj_crnt,
2756                                   &factor_phi_aj_post,
2757                                   &factor_phi_ak_ante,
2758                                   &factor_phi_ak_crnt,
2759                                   &factor_phi_ak_post,
2760                                   &factor_phi_al_ante,
2761                                   &factor_phi_al_crnt,
2762                                   &factor_phi_al_post,
2763                                   &prefactor_phi,
2764                                   &v);
2765
2766
2767         /*      Computation of forces per component */
2768         for (d = 0; d < DIM; d++)
2769         {
2770             f_i[d] = prefactor_phi
2771                      * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d]
2772                         + factor_phi_ai_post * delta_post[d]);
2773             f_j[d] = prefactor_phi
2774                      * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d]
2775                         + factor_phi_aj_post * delta_post[d]);
2776             f_k[d] = prefactor_phi
2777                      * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d]
2778                         + factor_phi_ak_post * delta_post[d]);
2779             f_l[d] = prefactor_phi
2780                      * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d]
2781                         + factor_phi_al_post * delta_post[d]);
2782         }
2783         /*      Computation of the energy */
2784
2785         vtot += v;
2786
2787
2788         /*    Updating the forces */
2789
2790         rvec_inc(f[ai], f_i);
2791         rvec_inc(f[aj], f_j);
2792         rvec_inc(f[ak], f_k);
2793         rvec_inc(f[al], f_l);
2794
2795         if (computeVirial(flavor))
2796         {
2797             if (pbc)
2798             {
2799                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2800             }
2801             else
2802             {
2803                 t3 = c_centralShiftIndex;
2804             }
2805
2806             rvec_inc(fshift[t1], f_i);
2807             rvec_inc(fshift[c_centralShiftIndex], f_j);
2808             rvec_inc(fshift[t2], f_k);
2809             rvec_inc(fshift[t3], f_l);
2810         }
2811     }
2812
2813     return vtot;
2814 }
2815
2816
2817 template<BondedKernelFlavor flavor>
2818 real cbtdihs(int             nbonds,
2819              const t_iatom   forceatoms[],
2820              const t_iparams forceparams[],
2821              const rvec      x[],
2822              rvec4           f[],
2823              rvec            fshift[],
2824              const t_pbc*    pbc,
2825              real gmx_unused lambda,
2826              real gmx_unused* dvdlambda,
2827              gmx::ArrayRef<const real> /*charge*/,
2828              t_fcdata gmx_unused* fcd,
2829              t_disresdata gmx_unused* disresdata,
2830              t_oriresdata gmx_unused* oriresdata,
2831              int gmx_unused* global_atom_index)
2832 {
2833     int  type, ai, aj, ak, al, i, d;
2834     int  t1, t2, t3;
2835     real v, vtot;
2836     rvec vec_temp;
2837     rvec f_i, f_j, f_k, f_l;
2838     rvec dx_jl;
2839     rvec delta_ante, delta_crnt, delta_post;
2840     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2841     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2842     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2843
2844
2845     vtot = 0.0;
2846     for (i = 0; (i < nbonds);)
2847     {
2848         type = forceatoms[i++];
2849         ai   = forceatoms[i++];
2850         aj   = forceatoms[i++];
2851         ak   = forceatoms[i++];
2852         al   = forceatoms[i++];
2853
2854
2855         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2856         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2857         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2858         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2859         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2860         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2861
2862         /* \brief Compute factors for CBT potential
2863          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2864          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2865          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2866          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2867          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2868          * It contains in its expression not only the dihedral angle \f$\phi\f$
2869          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2870          * --- the adjacent bending angles.
2871          * For more explanations see comments file "restcbt.h". */
2872
2873         compute_factors_cbtdihs(type,
2874                                 forceparams,
2875                                 delta_ante,
2876                                 delta_crnt,
2877                                 delta_post,
2878                                 f_phi_ai,
2879                                 f_phi_aj,
2880                                 f_phi_ak,
2881                                 f_phi_al,
2882                                 f_theta_ante_ai,
2883                                 f_theta_ante_aj,
2884                                 f_theta_ante_ak,
2885                                 f_theta_post_aj,
2886                                 f_theta_post_ak,
2887                                 f_theta_post_al,
2888                                 &v);
2889
2890
2891         /*      Acumulate the resuts per beads */
2892         for (d = 0; d < DIM; d++)
2893         {
2894             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2895             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2896             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2897             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2898         }
2899
2900         /*      Compute the potential energy */
2901
2902         vtot += v;
2903
2904
2905         /*  Updating the forces */
2906         rvec_inc(f[ai], f_i);
2907         rvec_inc(f[aj], f_j);
2908         rvec_inc(f[ak], f_k);
2909         rvec_inc(f[al], f_l);
2910
2911
2912         if (computeVirial(flavor))
2913         {
2914             if (pbc)
2915             {
2916                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2917             }
2918             else
2919             {
2920                 t3 = c_centralShiftIndex;
2921             }
2922
2923             rvec_inc(fshift[t1], f_i);
2924             rvec_inc(fshift[c_centralShiftIndex], f_j);
2925             rvec_inc(fshift[t2], f_k);
2926             rvec_inc(fshift[t3], f_l);
2927         }
2928     }
2929
2930     return vtot;
2931 }
2932
2933 template<BondedKernelFlavor flavor>
2934 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2935 rbdihs(int             nbonds,
2936        const t_iatom   forceatoms[],
2937        const t_iparams forceparams[],
2938        const rvec      x[],
2939        rvec4           f[],
2940        rvec            fshift[],
2941        const t_pbc*    pbc,
2942        real            lambda,
2943        real*           dvdlambda,
2944        gmx::ArrayRef<const real> /*charge*/,
2945        t_fcdata gmx_unused* fcd,
2946        t_disresdata gmx_unused* disresdata,
2947        t_oriresdata gmx_unused* oriresdata,
2948        int gmx_unused* global_atom_index)
2949 {
2950     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2951     int        type, ai, aj, ak, al, i, j;
2952     int        t1, t2, t3;
2953     rvec       r_ij, r_kj, r_kl, m, n;
2954     real       parmA[NR_RBDIHS];
2955     real       parmB[NR_RBDIHS];
2956     real       parm[NR_RBDIHS];
2957     real       cos_phi, phi, rbp, rbpBA;
2958     real       v, ddphi, sin_phi;
2959     real       cosfac, vtot;
2960     real       L1        = 1.0 - lambda;
2961     real       dvdl_term = 0;
2962
2963     vtot = 0.0;
2964     for (i = 0; (i < nbonds);)
2965     {
2966         type = forceatoms[i++];
2967         ai   = forceatoms[i++];
2968         aj   = forceatoms[i++];
2969         ak   = forceatoms[i++];
2970         al   = forceatoms[i++];
2971
2972         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84         */
2973
2974         /* Change to polymer convention */
2975         if (phi < c0)
2976         {
2977             phi += M_PI;
2978         }
2979         else
2980         {
2981             phi -= M_PI; /*   1         */
2982         }
2983         cos_phi = std::cos(phi);
2984         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2985         sin_phi = std::sin(phi);
2986
2987         for (j = 0; (j < NR_RBDIHS); j++)
2988         {
2989             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2990             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2991             parm[j]  = L1 * parmA[j] + lambda * parmB[j];
2992         }
2993         /* Calculate cosine powers */
2994         /* Calculate the energy */
2995         /* Calculate the derivative */
2996
2997         v = parm[0];
2998         dvdl_term += (parmB[0] - parmA[0]);
2999         ddphi  = c0;
3000         cosfac = c1;
3001
3002         rbp   = parm[1];
3003         rbpBA = parmB[1] - parmA[1];
3004         ddphi += rbp * cosfac;
3005         cosfac *= cos_phi;
3006         v += cosfac * rbp;
3007         dvdl_term += cosfac * rbpBA;
3008         rbp   = parm[2];
3009         rbpBA = parmB[2] - parmA[2];
3010         ddphi += c2 * rbp * cosfac;
3011         cosfac *= cos_phi;
3012         v += cosfac * rbp;
3013         dvdl_term += cosfac * rbpBA;
3014         rbp   = parm[3];
3015         rbpBA = parmB[3] - parmA[3];
3016         ddphi += c3 * rbp * cosfac;
3017         cosfac *= cos_phi;
3018         v += cosfac * rbp;
3019         dvdl_term += cosfac * rbpBA;
3020         rbp   = parm[4];
3021         rbpBA = parmB[4] - parmA[4];
3022         ddphi += c4 * rbp * cosfac;
3023         cosfac *= cos_phi;
3024         v += cosfac * rbp;
3025         dvdl_term += cosfac * rbpBA;
3026         rbp   = parm[5];
3027         rbpBA = parmB[5] - parmA[5];
3028         ddphi += c5 * rbp * cosfac;
3029         cosfac *= cos_phi;
3030         v += cosfac * rbp;
3031         dvdl_term += cosfac * rbpBA;
3032
3033         ddphi = -ddphi * sin_phi; /*  11                */
3034
3035         do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112                */
3036         vtot += v;
3037     }
3038     *dvdlambda += dvdl_term;
3039
3040     return vtot;
3041 }
3042
3043 //! \endcond
3044
3045 /*! \brief Mysterious undocumented function */
3046 int cmap_setup_grid_index(int ip, int grid_spacing, int* ipm1, int* ipp1, int* ipp2)
3047 {
3048     int im1, ip1, ip2;
3049
3050     if (ip < 0)
3051     {
3052         ip = ip + grid_spacing - 1;
3053     }
3054     else if (ip > grid_spacing)
3055     {
3056         ip = ip - grid_spacing - 1;
3057     }
3058
3059     im1 = ip - 1;
3060     ip1 = ip + 1;
3061     ip2 = ip + 2;
3062
3063     if (ip == 0)
3064     {
3065         im1 = grid_spacing - 1;
3066     }
3067     else if (ip == grid_spacing - 2)
3068     {
3069         ip2 = 0;
3070     }
3071     else if (ip == grid_spacing - 1)
3072     {
3073         ip1 = 0;
3074         ip2 = 1;
3075     }
3076
3077     *ipm1 = im1;
3078     *ipp1 = ip1;
3079     *ipp2 = ip2;
3080
3081     return ip;
3082 }
3083
3084 using CmapForceStructure = std::array<real, 4>;
3085
3086 gmx::RVec processCmapForceComponent(const gmx::RVec a,
3087                                     const gmx::RVec b,
3088                                     const real      df,
3089                                     const real      gaa,
3090                                     const real      fga,
3091                                     const real      gbb,
3092                                     const real      hgb,
3093                                     const int       dim)
3094 {
3095     gmx::RVec result; // mapping XX <-> f, YY <-> g, ZZ <-> h
3096     result[XX] = gaa * a[dim];
3097     result[YY] = fga * a[dim] - hgb * b[dim];
3098     result[ZZ] = gbb * b[dim];
3099     return result * df;
3100 }
3101
3102 CmapForceStructure applyCmapForceComponent(const gmx::RVec forceComponent)
3103 {
3104     // forceComponent mapping is XX <-> f, YY <-> g, ZZ <-> h
3105     CmapForceStructure forces;
3106     forces[0] = forceComponent[XX];
3107     forces[1] = -forceComponent[XX] - forceComponent[YY];
3108     forces[2] = forceComponent[ZZ] + forceComponent[YY];
3109     forces[3] = -forceComponent[ZZ];
3110     return forces;
3111 }
3112
3113 void accumulateCmapForces(const rvec      x[],
3114                           rvec4           f[],
3115                           rvec            fshift[],
3116                           const t_pbc*    pbc,
3117                           const gmx::RVec r_ij,
3118                           const gmx::RVec r_kj,
3119                           const gmx::RVec r_kl,
3120                           const gmx::RVec a,
3121                           const gmx::RVec b,
3122                           gmx::RVec       h,
3123                           const real      ra2r,
3124                           const real      rb2r,
3125                           const real      rgr,
3126                           const real      rg,
3127                           const int       ai,
3128                           const int       aj,
3129                           const int       ak,
3130                           const int       al,
3131                           const real      df,
3132                           const int       t1,
3133                           const int       t2)
3134 {
3135     const real fg  = iprod(r_ij, r_kj);
3136     const real hg  = iprod(r_kl, r_kj);
3137     const real fga = fg * ra2r * rgr;
3138     const real hgb = hg * rb2r * rgr;
3139     const real gaa = -ra2r * rg;
3140     const real gbb = rb2r * rg;
3141
3142     gmx::RVec f_i, f_j, f_k, f_l;
3143     for (int i = 0; i < DIM; i++)
3144     {
3145         CmapForceStructure forces =
3146                 applyCmapForceComponent(processCmapForceComponent(a, b, df, gaa, fga, gbb, hgb, i));
3147         f_i[i] = forces[0];
3148         f_j[i] = forces[1];
3149         f_k[i] = forces[2];
3150         f_l[i] = forces[3];
3151     }
3152     rvec_inc(f[ai], f_i);
3153     rvec_inc(f[aj], f_j);
3154     rvec_inc(f[ak], f_k);
3155     rvec_inc(f[al], f_l);
3156
3157     /* Shift forces */
3158     if (fshift != nullptr)
3159     {
3160         const int t3 = pbc ? pbc_rvec_sub(pbc, x[al], x[aj], h) : c_centralShiftIndex;
3161         rvec_inc(fshift[t1], f_i);
3162         rvec_inc(fshift[c_centralShiftIndex], f_j);
3163         rvec_inc(fshift[t2], f_k);
3164         rvec_inc(fshift[t3], f_l);
3165     }
3166 }
3167
3168
3169 } // namespace
3170
3171 real cmap_dihs(int                 nbonds,
3172                const t_iatom       forceatoms[],
3173                const t_iparams     forceparams[],
3174                const gmx_cmap_t*   cmap_grid,
3175                const rvec          x[],
3176                rvec4               f[],
3177                rvec                fshift[],
3178                const struct t_pbc* pbc,
3179                real gmx_unused     lambda,
3180                real gmx_unused* dvdlambda,
3181                gmx::ArrayRef<const real> /*charge*/,
3182                t_fcdata gmx_unused* fcd,
3183                t_disresdata gmx_unused* disresdata,
3184                t_oriresdata gmx_unused* oriresdata,
3185                int gmx_unused* global_atom_index)
3186 {
3187     int t11, t21, t31, t12, t22, t32;
3188     int ip1m1, ip1p1, ip1p2;
3189     int ip2m1, ip2p1, ip2p2;
3190     int l1, l2, l3;
3191
3192     real ty[4], ty1[4], ty2[4], ty12[4], tx[16];
3193
3194     rvec r1_ij, r1_kj, r1_kl, m1, n1;
3195     rvec r2_ij, r2_kj, r2_kl, m2, n2;
3196
3197     int loop_index[4][4] = { { 0, 4, 8, 12 }, { 1, 5, 9, 13 }, { 2, 6, 10, 14 }, { 3, 7, 11, 15 } };
3198
3199     /* Total CMAP energy */
3200     real vtot = 0;
3201
3202     for (int n = 0; n < nbonds;)
3203     {
3204         /* Five atoms are involved in the two torsions */
3205         const int type = forceatoms[n++];
3206         const int ai   = forceatoms[n++];
3207         const int aj   = forceatoms[n++];
3208         const int ak   = forceatoms[n++];
3209         const int al   = forceatoms[n++];
3210         const int am   = forceatoms[n++];
3211
3212         /* Which CMAP type is this */
3213         const int                 cmapA = forceparams[type].cmap.cmapA;
3214         gmx::ArrayRef<const real> cmapd = cmap_grid->cmapdata[cmapA].cmap;
3215
3216         /* First torsion */
3217         const int a1i = ai;
3218         const int a1j = aj;
3219         const int a1k = ak;
3220         const int a1l = al;
3221
3222         real phi1 = dih_angle(
3223                 x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1, &t11, &t21, &t31); /* 84 */
3224
3225         const real cos_phi1 = std::cos(phi1);
3226
3227         gmx::RVec a1, b1, h1;
3228         cprod(r1_ij, r1_kj, a1); /* 9 */
3229         cprod(r1_kl, r1_kj, b1); /* 9 */
3230
3231         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
3232
3233         const real ra21 = iprod(a1, a1);       /* 5 */
3234         const real rb21 = iprod(b1, b1);       /* 5 */
3235         const real rg21 = iprod(r1_kj, r1_kj); /* 5 */
3236         const real rg1  = sqrt(rg21);
3237
3238         const real rgr1  = 1.0 / rg1;
3239         const real ra2r1 = 1.0 / ra21;
3240         const real rb2r1 = 1.0 / rb21;
3241         const real rabr1 = sqrt(ra2r1 * rb2r1);
3242
3243         const real sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
3244
3245         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
3246         {
3247             phi1 = std::asin(sin_phi1);
3248
3249             if (cos_phi1 < 0)
3250             {
3251                 if (phi1 > 0)
3252                 {
3253                     phi1 = M_PI - phi1;
3254                 }
3255                 else
3256                 {
3257                     phi1 = -M_PI - phi1;
3258                 }
3259             }
3260         }
3261         else
3262         {
3263             phi1 = std::acos(cos_phi1);
3264
3265             if (sin_phi1 < 0)
3266             {
3267                 phi1 = -phi1;
3268             }
3269         }
3270
3271         real xphi1 = phi1 + M_PI; /* 1 */
3272
3273         /* Second torsion */
3274         const int a2i = aj;
3275         const int a2j = ak;
3276         const int a2k = al;
3277         const int a2l = am;
3278
3279         real phi2 = dih_angle(
3280                 x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2, &t12, &t22, &t32); /* 84 */
3281
3282         const real cos_phi2 = std::cos(phi2);
3283
3284         gmx::RVec a2, b2, h2;
3285         cprod(r2_ij, r2_kj, a2); /* 9 */
3286         cprod(r2_kl, r2_kj, b2); /* 9 */
3287
3288         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3289
3290         const real ra22 = iprod(a2, a2);       /* 5 */
3291         const real rb22 = iprod(b2, b2);       /* 5 */
3292         const real rg22 = iprod(r2_kj, r2_kj); /* 5 */
3293         const real rg2  = sqrt(rg22);
3294
3295         const real rgr2  = 1.0 / rg2;
3296         const real ra2r2 = 1.0 / ra22;
3297         const real rb2r2 = 1.0 / rb22;
3298         const real rabr2 = sqrt(ra2r2 * rb2r2);
3299
3300         const real sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3301
3302         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3303         {
3304             phi2 = std::asin(sin_phi2);
3305
3306             if (cos_phi2 < 0)
3307             {
3308                 if (phi2 > 0)
3309                 {
3310                     phi2 = M_PI - phi2;
3311                 }
3312                 else
3313                 {
3314                     phi2 = -M_PI - phi2;
3315                 }
3316             }
3317         }
3318         else
3319         {
3320             phi2 = std::acos(cos_phi2);
3321
3322             if (sin_phi2 < 0)
3323             {
3324                 phi2 = -phi2;
3325             }
3326         }
3327
3328         real xphi2 = phi2 + M_PI; /* 1 */
3329
3330         /* Range mangling */
3331         if (xphi1 < 0)
3332         {
3333             xphi1 = xphi1 + 2 * M_PI;
3334         }
3335         else if (xphi1 >= 2 * M_PI)
3336         {
3337             xphi1 = xphi1 - 2 * M_PI;
3338         }
3339
3340         if (xphi2 < 0)
3341         {
3342             xphi2 = xphi2 + 2 * M_PI;
3343         }
3344         else if (xphi2 >= 2 * M_PI)
3345         {
3346             xphi2 = xphi2 - 2 * M_PI;
3347         }
3348
3349         /* Number of grid points */
3350         real dx = 2 * M_PI / cmap_grid->grid_spacing;
3351
3352         /* Where on the grid are we */
3353         int iphi1 = static_cast<int>(xphi1 / dx);
3354         int iphi2 = static_cast<int>(xphi2 / dx);
3355
3356         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3357         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3358
3359         const int pos1 = iphi1 * cmap_grid->grid_spacing + iphi2;
3360         const int pos2 = ip1p1 * cmap_grid->grid_spacing + iphi2;
3361         const int pos3 = ip1p1 * cmap_grid->grid_spacing + ip2p1;
3362         const int pos4 = iphi1 * cmap_grid->grid_spacing + ip2p1;
3363
3364         ty[0] = cmapd[pos1 * 4];
3365         ty[1] = cmapd[pos2 * 4];
3366         ty[2] = cmapd[pos3 * 4];
3367         ty[3] = cmapd[pos4 * 4];
3368
3369         ty1[0] = cmapd[pos1 * 4 + 1];
3370         ty1[1] = cmapd[pos2 * 4 + 1];
3371         ty1[2] = cmapd[pos3 * 4 + 1];
3372         ty1[3] = cmapd[pos4 * 4 + 1];
3373
3374         ty2[0] = cmapd[pos1 * 4 + 2];
3375         ty2[1] = cmapd[pos2 * 4 + 2];
3376         ty2[2] = cmapd[pos3 * 4 + 2];
3377         ty2[3] = cmapd[pos4 * 4 + 2];
3378
3379         ty12[0] = cmapd[pos1 * 4 + 3];
3380         ty12[1] = cmapd[pos2 * 4 + 3];
3381         ty12[2] = cmapd[pos3 * 4 + 3];
3382         ty12[3] = cmapd[pos4 * 4 + 3];
3383
3384         /* Switch to degrees */
3385         dx    = 360.0 / cmap_grid->grid_spacing;
3386         xphi1 = xphi1 * gmx::c_rad2Deg;
3387         xphi2 = xphi2 * gmx::c_rad2Deg;
3388
3389         for (int i = 0; i < 4; i++) /* 16 */
3390         {
3391             tx[i]      = ty[i];
3392             tx[i + 4]  = ty1[i] * dx;
3393             tx[i + 8]  = ty2[i] * dx;
3394             tx[i + 12] = ty12[i] * dx * dx;
3395         }
3396
3397         std::array<real, 16> tc = { 0 };
3398         for (int idx = 0; idx < 16; idx++) /* 1056 */
3399         {
3400             for (int k = 0; k < 16; k++)
3401             {
3402                 tc[idx] += cmap_coeff_matrix[k * 16 + idx] * tx[k];
3403             }
3404         }
3405
3406         const real tt = (xphi1 - iphi1 * dx) / dx;
3407         const real tu = (xphi2 - iphi2 * dx) / dx;
3408
3409         real e   = 0;
3410         real df1 = 0;
3411         real df2 = 0;
3412
3413         for (int i = 3; i >= 0; i--)
3414         {
3415             l1 = loop_index[i][3];
3416             l2 = loop_index[i][2];
3417             l3 = loop_index[i][1];
3418
3419             e = tt * e + ((tc[i * 4 + 3] * tu + tc[i * 4 + 2]) * tu + tc[i * 4 + 1]) * tu + tc[i * 4];
3420             df1 = tu * df1 + (3.0 * tc[l1] * tt + 2.0 * tc[l2]) * tt + tc[l3];
3421             df2 = tt * df2 + (3.0 * tc[i * 4 + 3] * tu + 2.0 * tc[i * 4 + 2]) * tu + tc[i * 4 + 1];
3422         }
3423
3424         const real fac = gmx::c_rad2Deg / dx;
3425         df1            = df1 * fac;
3426         df2            = df2 * fac;
3427
3428         /* CMAP energy */
3429         vtot += e;
3430
3431         /* Do forces - first torsion */
3432         accumulateCmapForces(
3433                 x, f, fshift, pbc, r1_ij, r1_kj, r1_kl, a1, b1, h1, ra2r1, rb2r1, rgr1, rg1, a1i, a1j, a1k, a1l, df1, t11, t21);
3434
3435         /* Do forces - second torsion */
3436         accumulateCmapForces(
3437                 x, f, fshift, pbc, r2_ij, r2_kj, r2_kl, a2, b2, h2, ra2r2, rb2r2, rgr2, rg2, a2i, a2j, a2k, a2l, df2, t12, t22);
3438     }
3439     return vtot;
3440 }
3441
3442 namespace
3443 {
3444
3445 //! \cond
3446 /***********************************************************
3447  *
3448  *   G R O M O S  9 6   F U N C T I O N S
3449  *
3450  ***********************************************************/
3451 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda, real* V, real* F)
3452 {
3453     const real half = 0.5;
3454     real       L1, kk, x0, dx, dx2;
3455     real       v, f, dvdlambda;
3456
3457     L1 = 1.0 - lambda;
3458     kk = L1 * kA + lambda * kB;
3459     x0 = L1 * xA + lambda * xB;
3460
3461     dx  = x - x0;
3462     dx2 = dx * dx;
3463
3464     f         = -kk * dx;
3465     v         = half * kk * dx2;
3466     dvdlambda = half * (kB - kA) * dx2 + (xA - xB) * kk * dx;
3467
3468     *F = f;
3469     *V = v;
3470
3471     return dvdlambda;
3472
3473     /* That was 21 flops */
3474 }
3475
3476 template<BondedKernelFlavor flavor>
3477 real g96bonds(int             nbonds,
3478               const t_iatom   forceatoms[],
3479               const t_iparams forceparams[],
3480               const rvec      x[],
3481               rvec4           f[],
3482               rvec            fshift[],
3483               const t_pbc*    pbc,
3484               real            lambda,
3485               real*           dvdlambda,
3486               gmx::ArrayRef<const real> /*charge*/,
3487               t_fcdata gmx_unused* fcd,
3488               t_disresdata gmx_unused* disresdata,
3489               t_oriresdata gmx_unused* oriresdata,
3490               int gmx_unused* global_atom_index)
3491 {
3492     int  i, ki, ai, aj, type;
3493     real dr2, fbond, vbond, vtot;
3494     rvec dx;
3495
3496     vtot = 0.0;
3497     for (i = 0; (i < nbonds);)
3498     {
3499         type = forceatoms[i++];
3500         ai   = forceatoms[i++];
3501         aj   = forceatoms[i++];
3502
3503         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3504         dr2 = iprod(dx, dx);                       /*   5               */
3505
3506         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3507                                   forceparams[type].harmonic.krB,
3508                                   forceparams[type].harmonic.rA,
3509                                   forceparams[type].harmonic.rB,
3510                                   dr2,
3511                                   lambda,
3512                                   &vbond,
3513                                   &fbond);
3514
3515         vtot += 0.5 * vbond; /* 1*/
3516
3517         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3518     }                                                               /* 44 TOTAL */
3519     return vtot;
3520 }
3521
3522 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc* pbc, rvec r_ij, rvec r_kj, int* t1, int* t2)
3523 /* Return value is the angle between the bonds i-j and j-k */
3524 {
3525     real costh;
3526
3527     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3528     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3529
3530     costh = cos_angle(r_ij, r_kj); /* 25                */
3531     /* 41 TOTAL */
3532     return costh;
3533 }
3534
3535 template<BondedKernelFlavor flavor>
3536 real g96angles(int             nbonds,
3537                const t_iatom   forceatoms[],
3538                const t_iparams forceparams[],
3539                const rvec      x[],
3540                rvec4           f[],
3541                rvec            fshift[],
3542                const t_pbc*    pbc,
3543                real            lambda,
3544                real*           dvdlambda,
3545                gmx::ArrayRef<const real> /*charge*/,
3546                t_fcdata gmx_unused* fcd,
3547                t_disresdata gmx_unused* disresdata,
3548                t_oriresdata gmx_unused* oriresdata,
3549                int gmx_unused* global_atom_index)
3550 {
3551     int  i, ai, aj, ak, type, m, t1, t2;
3552     rvec r_ij, r_kj;
3553     real cos_theta, dVdt, va, vtot;
3554     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3555     rvec f_i, f_j, f_k;
3556
3557     vtot = 0.0;
3558     for (i = 0; (i < nbonds);)
3559     {
3560         type = forceatoms[i++];
3561         ai   = forceatoms[i++];
3562         aj   = forceatoms[i++];
3563         ak   = forceatoms[i++];
3564
3565         cos_theta = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3566
3567         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3568                                   forceparams[type].harmonic.krB,
3569                                   forceparams[type].harmonic.rA,
3570                                   forceparams[type].harmonic.rB,
3571                                   cos_theta,
3572                                   lambda,
3573                                   &va,
3574                                   &dVdt);
3575         vtot += va;
3576
3577         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3578         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3579         rij_2    = rij_1 * rij_1;
3580         rkj_2    = rkj_1 * rkj_1;
3581         rijrkj_1 = rij_1 * rkj_1; /* 23 */
3582
3583         for (m = 0; (m < DIM); m++) /*  42      */
3584         {
3585             f_i[m] = dVdt * (r_kj[m] * rijrkj_1 - r_ij[m] * rij_2 * cos_theta);
3586             f_k[m] = dVdt * (r_ij[m] * rijrkj_1 - r_kj[m] * rkj_2 * cos_theta);
3587             f_j[m] = -f_i[m] - f_k[m];
3588             f[ai][m] += f_i[m];
3589             f[aj][m] += f_j[m];
3590             f[ak][m] += f_k[m];
3591         }
3592
3593         if (computeVirial(flavor))
3594         {
3595             rvec_inc(fshift[t1], f_i);
3596             rvec_inc(fshift[c_centralShiftIndex], f_j);
3597             rvec_inc(fshift[t2], f_k); /* 9 */
3598         }
3599         /* 163 TOTAL    */
3600     }
3601     return vtot;
3602 }
3603
3604 template<BondedKernelFlavor flavor>
3605 real cross_bond_bond(int             nbonds,
3606                      const t_iatom   forceatoms[],
3607                      const t_iparams forceparams[],
3608                      const rvec      x[],
3609                      rvec4           f[],
3610                      rvec            fshift[],
3611                      const t_pbc*    pbc,
3612                      real gmx_unused lambda,
3613                      real gmx_unused* dvdlambda,
3614                      gmx::ArrayRef<const real> /*charge*/,
3615                      t_fcdata gmx_unused* fcd,
3616                      t_disresdata gmx_unused* disresdata,
3617                      t_oriresdata gmx_unused* oriresdata,
3618                      int gmx_unused* global_atom_index)
3619 {
3620     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3621      * pp. 842-847
3622      */
3623     int  i, ai, aj, ak, type, m, t1, t2;
3624     rvec r_ij, r_kj;
3625     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3626     rvec f_i, f_j, f_k;
3627
3628     vtot = 0.0;
3629     for (i = 0; (i < nbonds);)
3630     {
3631         type = forceatoms[i++];
3632         ai   = forceatoms[i++];
3633         aj   = forceatoms[i++];
3634         ak   = forceatoms[i++];
3635         r1e  = forceparams[type].cross_bb.r1e;
3636         r2e  = forceparams[type].cross_bb.r2e;
3637         krr  = forceparams[type].cross_bb.krr;
3638
3639         /* Compute distance vectors ... */
3640         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3641         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3642
3643         /* ... and their lengths */
3644         r1 = norm(r_ij);
3645         r2 = norm(r_kj);
3646
3647         /* Deviations from ideality */
3648         s1 = r1 - r1e;
3649         s2 = r2 - r2e;
3650
3651         /* Energy (can be negative!) */
3652         vrr = krr * s1 * s2;
3653         vtot += vrr;
3654
3655         /* Forces */
3656         svmul(-krr * s2 / r1, r_ij, f_i);
3657         svmul(-krr * s1 / r2, r_kj, f_k);
3658
3659         for (m = 0; (m < DIM); m++) /*  12      */
3660         {
3661             f_j[m] = -f_i[m] - f_k[m];
3662             f[ai][m] += f_i[m];
3663             f[aj][m] += f_j[m];
3664             f[ak][m] += f_k[m];
3665         }
3666
3667         if (computeVirial(flavor))
3668         {
3669             rvec_inc(fshift[t1], f_i);
3670             rvec_inc(fshift[c_centralShiftIndex], f_j);
3671             rvec_inc(fshift[t2], f_k); /* 9 */
3672         }
3673         /* 163 TOTAL    */
3674     }
3675     return vtot;
3676 }
3677
3678 template<BondedKernelFlavor flavor>
3679 real cross_bond_angle(int             nbonds,
3680                       const t_iatom   forceatoms[],
3681                       const t_iparams forceparams[],
3682                       const rvec      x[],
3683                       rvec4           f[],
3684                       rvec            fshift[],
3685                       const t_pbc*    pbc,
3686                       real gmx_unused lambda,
3687                       real gmx_unused* dvdlambda,
3688                       gmx::ArrayRef<const real> /*charge*/,
3689                       t_fcdata gmx_unused* fcd,
3690                       t_disresdata gmx_unused* disresdata,
3691                       t_oriresdata gmx_unused* oriresdata,
3692                       int gmx_unused* global_atom_index)
3693 {
3694     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3695      * pp. 842-847
3696      */
3697     int  i, ai, aj, ak, type, m, t1, t2;
3698     rvec r_ij, r_kj, r_ik;
3699     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3700     rvec f_i, f_j, f_k;
3701
3702     vtot = 0.0;
3703     for (i = 0; (i < nbonds);)
3704     {
3705         type = forceatoms[i++];
3706         ai   = forceatoms[i++];
3707         aj   = forceatoms[i++];
3708         ak   = forceatoms[i++];
3709         r1e  = forceparams[type].cross_ba.r1e;
3710         r2e  = forceparams[type].cross_ba.r2e;
3711         r3e  = forceparams[type].cross_ba.r3e;
3712         krt  = forceparams[type].cross_ba.krt;
3713
3714         /* Compute distance vectors ... */
3715         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3716         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3717         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3718
3719         /* ... and their lengths */
3720         r1 = norm(r_ij);
3721         r2 = norm(r_kj);
3722         r3 = norm(r_ik);
3723
3724         /* Deviations from ideality */
3725         s1 = r1 - r1e;
3726         s2 = r2 - r2e;
3727         s3 = r3 - r3e;
3728
3729         /* Energy (can be negative!) */
3730         vrt = krt * s3 * (s1 + s2);
3731         vtot += vrt;
3732
3733         /* Forces */
3734         k1 = -krt * (s3 / r1);
3735         k2 = -krt * (s3 / r2);
3736         k3 = -krt * (s1 + s2) / r3;
3737         for (m = 0; (m < DIM); m++)
3738         {
3739             f_i[m] = k1 * r_ij[m] + k3 * r_ik[m];
3740             f_k[m] = k2 * r_kj[m] - k3 * r_ik[m];
3741             f_j[m] = -f_i[m] - f_k[m];
3742         }
3743
3744         for (m = 0; (m < DIM); m++) /*  12      */
3745         {
3746             f[ai][m] += f_i[m];
3747             f[aj][m] += f_j[m];
3748             f[ak][m] += f_k[m];
3749         }
3750
3751         if (computeVirial(flavor))
3752         {
3753             rvec_inc(fshift[t1], f_i);
3754             rvec_inc(fshift[c_centralShiftIndex], f_j);
3755             rvec_inc(fshift[t2], f_k); /* 9 */
3756         }
3757         /* 163 TOTAL    */
3758     }
3759     return vtot;
3760 }
3761
3762 /*! \brief Computes the potential and force for a tabulated potential */
3763 real bonded_tab(const char*          type,
3764                 int                  table_nr,
3765                 const bondedtable_t* table,
3766                 real                 kA,
3767                 real                 kB,
3768                 real                 r,
3769                 real                 lambda,
3770                 real*                V,
3771                 real*                F)
3772 {
3773     real k, tabscale, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3774     int  n0, nnn;
3775     real dvdlambda;
3776
3777     k = (1.0 - lambda) * kA + lambda * kB;
3778
3779     tabscale          = table->scale;
3780     const real* VFtab = table->data.data();
3781
3782     rt = r * tabscale;
3783     n0 = static_cast<int>(rt);
3784     if (n0 >= table->n)
3785     {
3786         gmx_fatal(FARGS,
3787                   "A tabulated %s interaction table number %d is out of the table range: r %f, "
3788                   "between table indices %d and %d, table length %d",
3789                   type,
3790                   table_nr,
3791                   r,
3792                   n0,
3793                   n0 + 1,
3794                   table->n);
3795     }
3796     eps   = rt - n0;
3797     eps2  = eps * eps;
3798     nnn   = 4 * n0;
3799     Yt    = VFtab[nnn];
3800     Ft    = VFtab[nnn + 1];
3801     Geps  = VFtab[nnn + 2] * eps;
3802     Heps2 = VFtab[nnn + 3] * eps2;
3803     Fp    = Ft + Geps + Heps2;
3804     VV    = Yt + Fp * eps;
3805     FF    = Fp + Geps + 2.0 * Heps2;
3806
3807     *F        = -k * FF * tabscale;
3808     *V        = k * VV;
3809     dvdlambda = (kB - kA) * VV;
3810
3811     return dvdlambda;
3812
3813     /* That was 22 flops */
3814 }
3815
3816 template<BondedKernelFlavor flavor>
3817 real tab_bonds(int             nbonds,
3818                const t_iatom   forceatoms[],
3819                const t_iparams forceparams[],
3820                const rvec      x[],
3821                rvec4           f[],
3822                rvec            fshift[],
3823                const t_pbc*    pbc,
3824                real            lambda,
3825                real*           dvdlambda,
3826                gmx::ArrayRef<const real> /*charge*/,
3827                t_fcdata*    fcd,
3828                t_disresdata gmx_unused* disresdata,
3829                t_oriresdata gmx_unused* oriresdata,
3830                int gmx_unused* global_atom_index)
3831 {
3832     int  i, ki, ai, aj, type, table;
3833     real dr, dr2, fbond, vbond, vtot;
3834     rvec dx;
3835
3836     vtot = 0.0;
3837     for (i = 0; (i < nbonds);)
3838     {
3839         type = forceatoms[i++];
3840         ai   = forceatoms[i++];
3841         aj   = forceatoms[i++];
3842
3843         ki  = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3844         dr2 = iprod(dx, dx);                       /*   5               */
3845         dr  = dr2 * gmx::invsqrt(dr2);             /*  10               */
3846
3847         table = forceparams[type].tab.table;
3848
3849         *dvdlambda += bonded_tab("bond",
3850                                  table,
3851                                  &fcd->bondtab[table],
3852                                  forceparams[type].tab.kA,
3853                                  forceparams[type].tab.kB,
3854                                  dr,
3855                                  lambda,
3856                                  &vbond,
3857                                  &fbond); /*  22 */
3858
3859         if (dr2 == 0.0)
3860         {
3861             continue;
3862         }
3863
3864
3865         vtot += vbond;              /* 1*/
3866         fbond *= gmx::invsqrt(dr2); /*   6              */
3867
3868         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, fshift); /* 15 */
3869     }                                                               /* 62 TOTAL */
3870     return vtot;
3871 }
3872
3873 template<BondedKernelFlavor flavor>
3874 real tab_angles(int             nbonds,
3875                 const t_iatom   forceatoms[],
3876                 const t_iparams forceparams[],
3877                 const rvec      x[],
3878                 rvec4           f[],
3879                 rvec            fshift[],
3880                 const t_pbc*    pbc,
3881                 real            lambda,
3882                 real*           dvdlambda,
3883                 gmx::ArrayRef<const real> /*charge*/,
3884                 t_fcdata*    fcd,
3885                 t_disresdata gmx_unused* disresdata,
3886                 t_oriresdata gmx_unused* oriresdata,
3887                 int gmx_unused* global_atom_index)
3888 {
3889     int  i, ai, aj, ak, t1, t2, type, table;
3890     rvec r_ij, r_kj;
3891     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3892
3893     vtot = 0.0;
3894     for (i = 0; (i < nbonds);)
3895     {
3896         type = forceatoms[i++];
3897         ai   = forceatoms[i++];
3898         aj   = forceatoms[i++];
3899         ak   = forceatoms[i++];
3900
3901         theta = bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &cos_theta, &t1, &t2); /*  41 */
3902
3903         table = forceparams[type].tab.table;
3904
3905         *dvdlambda += bonded_tab("angle",
3906                                  table,
3907                                  &fcd->angletab[table],
3908                                  forceparams[type].tab.kA,
3909                                  forceparams[type].tab.kB,
3910                                  theta,
3911                                  lambda,
3912                                  &va,
3913                                  &dVdt); /*  22  */
3914         vtot += va;
3915
3916         cos_theta2 = gmx::square(cos_theta); /*   1             */
3917         if (cos_theta2 < 1)
3918         {
3919             int  m;
3920             real st, sth;
3921             real cik, cii, ckk;
3922             real nrkj2, nrij2;
3923             rvec f_i, f_j, f_k;
3924
3925             st    = dVdt * gmx::invsqrt(1 - cos_theta2); /*  12         */
3926             sth   = st * cos_theta;                      /*   1         */
3927             nrkj2 = iprod(r_kj, r_kj);                   /*   5         */
3928             nrij2 = iprod(r_ij, r_ij);
3929
3930             cik = st * gmx::invsqrt(nrkj2 * nrij2); /*  12              */
3931             cii = sth / nrij2;                      /*  10              */
3932             ckk = sth / nrkj2;                      /*  10              */
3933
3934             for (m = 0; (m < DIM); m++) /*  39          */
3935             {
3936                 f_i[m] = -(cik * r_kj[m] - cii * r_ij[m]);
3937                 f_k[m] = -(cik * r_ij[m] - ckk * r_kj[m]);
3938                 f_j[m] = -f_i[m] - f_k[m];
3939                 f[ai][m] += f_i[m];
3940                 f[aj][m] += f_j[m];
3941                 f[ak][m] += f_k[m];
3942             }
3943
3944             if (computeVirial(flavor))
3945             {
3946                 rvec_inc(fshift[t1], f_i);
3947                 rvec_inc(fshift[c_centralShiftIndex], f_j);
3948                 rvec_inc(fshift[t2], f_k);
3949             }
3950         } /* 169 TOTAL  */
3951     }
3952     return vtot;
3953 }
3954
3955 template<BondedKernelFlavor flavor>
3956 real tab_dihs(int             nbonds,
3957               const t_iatom   forceatoms[],
3958               const t_iparams forceparams[],
3959               const rvec      x[],
3960               rvec4           f[],
3961               rvec            fshift[],
3962               const t_pbc*    pbc,
3963               real            lambda,
3964               real*           dvdlambda,
3965               gmx::ArrayRef<const real> /*charge*/,
3966               t_fcdata*    fcd,
3967               t_disresdata gmx_unused* disresdata,
3968               t_oriresdata gmx_unused* oriresdata,
3969               int gmx_unused* global_atom_index)
3970 {
3971     int  i, type, ai, aj, ak, al, table;
3972     int  t1, t2, t3;
3973     rvec r_ij, r_kj, r_kl, m, n;
3974     real phi, ddphi, vpd, vtot;
3975
3976     vtot = 0.0;
3977     for (i = 0; (i < nbonds);)
3978     {
3979         type = forceatoms[i++];
3980         ai   = forceatoms[i++];
3981         aj   = forceatoms[i++];
3982         ak   = forceatoms[i++];
3983         al   = forceatoms[i++];
3984
3985         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n, &t1, &t2, &t3); /*  84  */
3986
3987         table = forceparams[type].tab.table;
3988
3989         /* Hopefully phi+M_PI never results in values < 0 */
3990         *dvdlambda += bonded_tab("dihedral",
3991                                  table,
3992                                  &fcd->dihtab[table],
3993                                  forceparams[type].tab.kA,
3994                                  forceparams[type].tab.kB,
3995                                  phi + M_PI,
3996                                  lambda,
3997                                  &vpd,
3998                                  &ddphi);
3999
4000         vtot += vpd;
4001         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n, f, fshift, pbc, x, t1, t2, t3); /* 112       */
4002
4003     } /* 227 TOTAL  */
4004
4005     return vtot;
4006 }
4007
4008 struct BondedInteractions
4009 {
4010     BondedFunction function;
4011     int            nrnbIndex;
4012 };
4013
4014 /*! \brief Lookup table of bonded interaction functions
4015  *
4016  * This must have as many entries as interaction_function in ifunc.cpp */
4017 template<BondedKernelFlavor flavor>
4018 constexpr std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions = {
4019     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_BONDS
4020     BondedInteractions{ g96bonds<flavor>, eNR_BONDS },                    // F_G96BONDS
4021     BondedInteractions{ morse_bonds<flavor>, eNR_MORSE },                 // F_MORSE
4022     BondedInteractions{ cubic_bonds<flavor>, eNR_CUBICBONDS },            // F_CUBICBONDS
4023     BondedInteractions{ unimplemented, -1 },                              // F_CONNBONDS
4024     BondedInteractions{ bonds<flavor>, eNR_BONDS },                       // F_HARMONIC
4025     BondedInteractions{ FENE_bonds<flavor>, eNR_FENEBONDS },              // F_FENEBONDS
4026     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDS
4027     BondedInteractions{ tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDSNC
4028     BondedInteractions{ restraint_bonds<flavor>, eNR_RESTRBONDS },        // F_RESTRBONDS
4029     BondedInteractions{ angles<flavor>, eNR_ANGLES },                     // F_ANGLES
4030     BondedInteractions{ g96angles<flavor>, eNR_ANGLES },                  // F_G96ANGLES
4031     BondedInteractions{ restrangles<flavor>, eNR_ANGLES },                // F_RESTRANGLES
4032     BondedInteractions{ linear_angles<flavor>, eNR_ANGLES },              // F_LINEAR_ANGLES
4033     BondedInteractions{ cross_bond_bond<flavor>, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
4034     BondedInteractions{ cross_bond_angle<flavor>, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
4035     BondedInteractions{ urey_bradley<flavor>, eNR_UREY_BRADLEY },         // F_UREY_BRADLEY
4036     BondedInteractions{ quartic_angles<flavor>, eNR_QANGLES },            // F_QUARTIC_ANGLES
4037     BondedInteractions{ tab_angles<flavor>, eNR_TABANGLES },              // F_TABANGLES
4038     BondedInteractions{ pdihs<flavor>, eNR_PROPER },                      // F_PDIHS
4039     BondedInteractions{ rbdihs<flavor>, eNR_RB },                         // F_RBDIHS
4040     BondedInteractions{ restrdihs<flavor>, eNR_PROPER },                  // F_RESTRDIHS
4041     BondedInteractions{ cbtdihs<flavor>, eNR_RB },                        // F_CBTDIHS
4042     BondedInteractions{ rbdihs<flavor>, eNR_FOURDIH },                    // F_FOURDIHS
4043     BondedInteractions{ idihs<flavor>, eNR_IMPROPER },                    // F_IDIHS
4044     BondedInteractions{ pdihs<flavor>, eNR_IMPROPER },                    // F_PIDIHS
4045     BondedInteractions{ tab_dihs<flavor>, eNR_TABDIHS },                  // F_TABDIHS
4046     BondedInteractions{ unimplemented, eNR_CMAP },                        // F_CMAP
4047     BondedInteractions{ unimplemented, -1 },                              // F_GB12_NOLONGERUSED
4048     BondedInteractions{ unimplemented, -1 },                              // F_GB13_NOLONGERUSED
4049     BondedInteractions{ unimplemented, -1 },                              // F_GB14_NOLONGERUSED
4050     BondedInteractions{ unimplemented, -1 },                              // F_GBPOL_NOLONGERUSED
4051     BondedInteractions{ unimplemented, -1 },                       // F_NPSOLVATION_NOLONGERUSED
4052     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJ14
4053     BondedInteractions{ unimplemented, -1 },                       // F_COUL14
4054     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC14_Q
4055     BondedInteractions{ unimplemented, eNR_NB14 },                 // F_LJC_PAIRS_NB
4056     BondedInteractions{ unimplemented, -1 },                       // F_LJ
4057     BondedInteractions{ unimplemented, -1 },                       // F_BHAM
4058     BondedInteractions{ unimplemented, -1 },                       // F_LJ_LR_NOLONGERUSED
4059     BondedInteractions{ unimplemented, -1 },                       // F_BHAM_LR_NOLONGERUSED
4060     BondedInteractions{ unimplemented, -1 },                       // F_DISPCORR
4061     BondedInteractions{ unimplemented, -1 },                       // F_COUL_SR
4062     BondedInteractions{ unimplemented, -1 },                       // F_COUL_LR_NOLONGERUSED
4063     BondedInteractions{ unimplemented, -1 },                       // F_RF_EXCL
4064     BondedInteractions{ unimplemented, -1 },                       // F_COUL_RECIP
4065     BondedInteractions{ unimplemented, -1 },                       // F_LJ_RECIP
4066     BondedInteractions{ unimplemented, -1 },                       // F_DPD
4067     BondedInteractions{ polarize<flavor>, eNR_POLARIZE },          // F_POLARIZATION
4068     BondedInteractions{ water_pol<flavor>, eNR_WPOL },             // F_WATER_POL
4069     BondedInteractions{ thole_pol<flavor>, eNR_THOLE },            // F_THOLE_POL
4070     BondedInteractions{ anharm_polarize<flavor>, eNR_ANHARM_POL }, // F_ANHARM_POL
4071     BondedInteractions{ unimplemented, -1 },                       // F_POSRES
4072     BondedInteractions{ unimplemented, -1 },                       // F_FBPOSRES
4073     BondedInteractions{ ta_disres, eNR_DISRES },                   // F_DISRES
4074     BondedInteractions{ unimplemented, -1 },                       // F_DISRESVIOL
4075     BondedInteractions{ orires, eNR_ORIRES },                      // F_ORIRES
4076     BondedInteractions{ unimplemented, -1 },                       // F_ORIRESDEV
4077     BondedInteractions{ angres<flavor>, eNR_ANGRES },              // F_ANGRES
4078     BondedInteractions{ angresz<flavor>, eNR_ANGRESZ },            // F_ANGRESZ
4079     BondedInteractions{ dihres<flavor>, eNR_DIHRES },              // F_DIHRES
4080     BondedInteractions{ unimplemented, -1 },                       // F_DIHRESVIOL
4081     BondedInteractions{ unimplemented, -1 },                       // F_CONSTR
4082     BondedInteractions{ unimplemented, -1 },                       // F_CONSTRNC
4083     BondedInteractions{ unimplemented, -1 },                       // F_SETTLE
4084     BondedInteractions{ unimplemented, -1 },                       // F_VSITE2
4085     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3
4086     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FD
4087     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3FAD
4088     BondedInteractions{ unimplemented, -1 },                       // F_VSITE3OUT
4089     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FD
4090     BondedInteractions{ unimplemented, -1 },                       // F_VSITE4FDN
4091     BondedInteractions{ unimplemented, -1 },                       // F_VSITEN
4092     BondedInteractions{ unimplemented, -1 },                       // F_COM_PULL
4093     BondedInteractions{ unimplemented, -1 },                       // F_DENSITYFITTING
4094     BondedInteractions{ unimplemented, -1 },                       // F_EQM
4095     BondedInteractions{ unimplemented, -1 },                       // F_EPOT
4096     BondedInteractions{ unimplemented, -1 },                       // F_EKIN
4097     BondedInteractions{ unimplemented, -1 },                       // F_ETOT
4098     BondedInteractions{ unimplemented, -1 },                       // F_ECONSERVED
4099     BondedInteractions{ unimplemented, -1 },                       // F_TEMP
4100     BondedInteractions{ unimplemented, -1 },                       // F_VTEMP_NOLONGERUSED
4101     BondedInteractions{ unimplemented, -1 },                       // F_PDISPCORR
4102     BondedInteractions{ unimplemented, -1 },                       // F_PRES
4103     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_CONSTR
4104     BondedInteractions{ unimplemented, -1 },                       // F_DVDL
4105     BondedInteractions{ unimplemented, -1 },                       // F_DKDL
4106     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_COUL
4107     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_VDW
4108     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_BONDED
4109     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_RESTRAINT
4110     BondedInteractions{ unimplemented, -1 },                       // F_DVDL_TEMPERATURE
4111 };
4112
4113 /*! \brief List of instantiated BondedInteractions list */
4114 constexpr gmx::EnumerationArray<BondedKernelFlavor, std::array<BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor = {
4115     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
4116     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
4117     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
4118     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndEnergy>
4119 };
4120
4121 //! \endcond
4122
4123 } // namespace
4124
4125 real calculateSimpleBond(const int                 ftype,
4126                          const int                 numForceatoms,
4127                          const t_iatom             forceatoms[],
4128                          const t_iparams           forceparams[],
4129                          const rvec                x[],
4130                          rvec4                     f[],
4131                          rvec                      fshift[],
4132                          const struct t_pbc*       pbc,
4133                          const real                lambda,
4134                          real*                     dvdlambda,
4135                          gmx::ArrayRef<const real> charge,
4136                          t_fcdata*                 fcd,
4137                          t_disresdata*             disresdata,
4138                          t_oriresdata*             oriresdata,
4139                          int gmx_unused*          global_atom_index,
4140                          const BondedKernelFlavor bondedKernelFlavor)
4141 {
4142     const BondedInteractions& bonded = c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
4143
4144     real v = bonded.function(
4145             numForceatoms, forceatoms, forceparams, x, f, fshift, pbc, lambda, dvdlambda, charge, fcd, disresdata, oriresdata, global_atom_index);
4146
4147     return v;
4148 }
4149
4150 int nrnbIndex(int ftype)
4151 {
4152     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
4153 }