src/gromacs/listed_forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file defines low-level functions necessary for
  40  * computing energies and forces for listed interactions.
  41  *
  42  * \author Mark Abraham <mark.j.abraham@gmail.com>
  43  *
  44  * \ingroup module_listed_forces
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "bonded.h"
  49
  50 #include "config.h"
  51
  52 #include <cassert>
  53 #include <cmath>
  54
  55 #include <algorithm>
  56
  57 #include "gromacs/gmxlib/nrnb.h"
  58 #include "gromacs/listed_forces/disre.h"
  59 #include "gromacs/listed_forces/orires.h"
  60 #include "gromacs/math/functions.h"
  61 #include "gromacs/math/units.h"
  62 #include "gromacs/math/utilities.h"
  63 #include "gromacs/math/vec.h"
  64 #include "gromacs/mdtypes/fcdata.h"
  65 #include "gromacs/mdtypes/mdatom.h"
  66 #include "gromacs/pbcutil/ishift.h"
  67 #include "gromacs/pbcutil/mshift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/pbcutil/pbc_simd.h"
  70 #include "gromacs/simd/simd.h"
  71 #include "gromacs/simd/simd_math.h"
  72 #include "gromacs/simd/vector_operations.h"
  73 #include "gromacs/utility/basedefinitions.h"
  74 #include "gromacs/utility/enumerationhelpers.h"
  75 #include "gromacs/utility/fatalerror.h"
  76 #include "gromacs/utility/real.h"
  77 #include "gromacs/utility/smalloc.h"
  78
  79 #include "listed_internal.h"
  80 #include "restcbt.h"
  81
  82 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  83
  84 namespace
  85 {
  86
  87 //! Type of CPU function to compute a bonded interaction.
  88 using BondedFunction = real(*)(int nbonds, const t_iatom iatoms[],
  89                                const t_iparams iparams[],
  90                                const rvec x[], rvec4 f[], rvec fshift[],
  91                                const t_pbc *pbc, const t_graph *g,
  92                                real lambda, real *dvdlambda,
  93                                const t_mdatoms *md, t_fcdata *fcd,
  94                                int *ddgatindex);
  95
  96 /*! \brief Mysterious CMAP coefficient matrix */
  97 const int cmap_coeff_matrix[] = {
  98     1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4,
  99     0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
 100     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4,
 101     0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
 102     0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2,
 103     0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2,
 104     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
 105     0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
 106     0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2,
 107     0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
 108     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
 109     0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2,
 110     0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
 111     0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
 112     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
 113     0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
 114 };
 115
 116
 117 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
 118  *
 119  * \todo This kind of code appears in many places. Consolidate it */
 120 int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
 121 {
 122     if (pbc)
 123     {
 124         return pbc_dx_aiuc(pbc, xi, xj, dx);
 125     }
 126     else
 127     {
 128         rvec_sub(xi, xj, dx);
 129         return CENTRAL;
 130     }
 131 }
 132
 133 } // namespace
 134
 135 //! \cond
 136 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
 137                 rvec r_ij, rvec r_kj, real *costh,
 138                 int *t1, int *t2)
 139 /* Return value is the angle between the bonds i-j and j-k */
 140 {
 141     /* 41 FLOPS */
 142     real th;
 143
 144     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 145     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 146
 147     *costh = cos_angle(r_ij, r_kj);        /* 25                */
 148     th     = std::acos(*costh);            /* 10                */
 149     /* 41 TOTAL */
 150     return th;
 151 }
 152
 153 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
 154                const t_pbc *pbc,
 155                rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
 156                int *t1, int *t2, int *t3)
 157 {
 158     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
 159     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 160     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
 161
 162     cprod(r_ij, r_kj, m);                  /*  9        */
 163     cprod(r_kj, r_kl, n);                  /*  9                */
 164     real phi  = gmx_angle(m, n);           /* 49 (assuming 25 for atan2) */
 165     real ipr  = iprod(r_ij, n);            /*  5        */
 166     real sign = (ipr < 0.0) ? -1.0 : 1.0;
 167     phi       = sign*phi;                  /*  1                */
 168     /* 82 TOTAL */
 169     return phi;
 170 }
 171 //! \endcond
 172
 173 void make_dp_periodic(real *dp)  /* 1 flop? */
 174 {
 175     /* dp cannot be outside (-pi,pi) */
 176     if (*dp >= M_PI)
 177     {
 178         *dp -= 2*M_PI;
 179     }
 180     else if (*dp < -M_PI)
 181     {
 182         *dp += 2*M_PI;
 183     }
 184 }
 185
 186 namespace
 187 {
 188
 189 /*! \brief Spreads and accumulates the bonded forces to the two atoms and adds the virial contribution when needed
 190  *
 191  * When \p g==nullptr, \p shiftIndex is used as the periodic shift.
 192  * When \p g!=nullptr, the graph is used to compute the periodic shift.
 193  */
 194 template <BondedKernelFlavor flavor>
 195 inline void spreadBondForces(const real     bondForce,
 196                              const rvec     dx,
 197                              const int      ai,
 198                              const int      aj,
 199                              rvec4         *f,
 200                              int            shiftIndex,
 201                              const t_graph *g,
 202                              rvec          *fshift)
 203 {
 204     if (computeVirial(flavor) && g)
 205     {
 206         ivec dt;
 207         ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 208         shiftIndex = IVEC2IS(dt);
 209     }
 210
 211     for (int m = 0; m < DIM; m++)                    /*  15          */
 212     {
 213         const real fij             = bondForce*dx[m];
 214         f[ai][m]                  += fij;
 215         f[aj][m]                  -= fij;
 216         if (computeVirial(flavor))
 217         {
 218             fshift[shiftIndex][m] += fij;
 219             fshift[CENTRAL][m]    -= fij;
 220         }
 221     }
 222 }
 223
 224 /*! \brief Morse potential bond
 225  *
 226  * By Frank Everdij. Three parameters needed:
 227  *
 228  * b0 = equilibrium distance in nm
 229  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 230  * cb = well depth in kJ/mol
 231  *
 232  * Note: the potential is referenced to be +cb at infinite separation
 233  *       and zero at the equilibrium distance!
 234  */
 235 template <BondedKernelFlavor flavor>
 236 real morse_bonds(int nbonds,
 237                  const t_iatom forceatoms[], const t_iparams forceparams[],
 238                  const rvec x[], rvec4 f[], rvec fshift[],
 239                  const t_pbc *pbc, const t_graph *g,
 240                  real lambda, real *dvdlambda,
 241                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 242                  int gmx_unused *global_atom_index)
 243 {
 244     const real one = 1.0;
 245     const real two = 2.0;
 246     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, vtot;
 247     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 248     rvec       dx;
 249     int        i, ki, type, ai, aj;
 250
 251     vtot = 0.0;
 252     for (i = 0; (i < nbonds); )
 253     {
 254         type = forceatoms[i++];
 255         ai   = forceatoms[i++];
 256         aj   = forceatoms[i++];
 257
 258         b0A   = forceparams[type].morse.b0A;
 259         beA   = forceparams[type].morse.betaA;
 260         cbA   = forceparams[type].morse.cbA;
 261
 262         b0B   = forceparams[type].morse.b0B;
 263         beB   = forceparams[type].morse.betaB;
 264         cbB   = forceparams[type].morse.cbB;
 265
 266         L1 = one-lambda;                            /* 1 */
 267         b0 = L1*b0A + lambda*b0B;                   /* 3 */
 268         be = L1*beA + lambda*beB;                   /* 3 */
 269         cb = L1*cbA + lambda*cbB;                   /* 3 */
 270
 271         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 272         dr2  = iprod(dx, dx);                       /*   5          */
 273         dr   = dr2*gmx::invsqrt(dr2);               /*  10          */
 274         temp = std::exp(-be*(dr-b0));               /*  12          */
 275
 276         if (temp == one)
 277         {
 278             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 279             *dvdlambda += cbB-cbA;
 280             continue;
 281         }
 282
 283         omtemp    = one-temp;                                                                                        /*   1          */
 284         cbomtemp  = cb*omtemp;                                                                                       /*   1          */
 285         vbond     = cbomtemp*omtemp;                                                                                 /*   1          */
 286         fbond     = -two*be*temp*cbomtemp*gmx::invsqrt(dr2);                                                         /*   9          */
 287         vtot     += vbond;                                                                                           /*   1          */
 288
 289         *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 290
 291         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift);                                               /* 15 */
 292     }                                                                                                                /*  83 TOTAL    */
 293     return vtot;
 294 }
 295
 296 //! \cond
 297 template <BondedKernelFlavor flavor>
 298 real cubic_bonds(int nbonds,
 299                  const t_iatom forceatoms[], const t_iparams forceparams[],
 300                  const rvec x[], rvec4 f[], rvec fshift[],
 301                  const t_pbc *pbc, const t_graph *g,
 302                  real gmx_unused lambda, real gmx_unused *dvdlambda,
 303                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 304                  int gmx_unused *global_atom_index)
 305 {
 306     const real three = 3.0;
 307     const real two   = 2.0;
 308     real       kb, b0, kcub;
 309     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, vtot;
 310     rvec       dx;
 311     int        i, ki, type, ai, aj;
 312
 313     vtot = 0.0;
 314     for (i = 0; (i < nbonds); )
 315     {
 316         type = forceatoms[i++];
 317         ai   = forceatoms[i++];
 318         aj   = forceatoms[i++];
 319
 320         b0   = forceparams[type].cubic.b0;
 321         kb   = forceparams[type].cubic.kb;
 322         kcub = forceparams[type].cubic.kcub;
 323
 324         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 325         dr2  = iprod(dx, dx);                           /*   5          */
 326
 327         if (dr2 == 0.0)
 328         {
 329             continue;
 330         }
 331
 332         dr         = dr2*gmx::invsqrt(dr2);                  /*  10          */
 333         dist       = dr-b0;
 334         kdist      = kb*dist;
 335         kdist2     = kdist*dist;
 336
 337         vbond      = kdist2 + kcub*kdist2*dist;
 338         fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 339
 340         vtot      += vbond;                                            /* 21 */
 341
 342         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 343     }                                                                  /*  54 TOTAL    */
 344     return vtot;
 345 }
 346
 347 template <BondedKernelFlavor flavor>
 348 real FENE_bonds(int nbonds,
 349                 const t_iatom forceatoms[], const t_iparams forceparams[],
 350                 const rvec x[], rvec4 f[], rvec fshift[],
 351                 const t_pbc *pbc, const t_graph *g,
 352                 real gmx_unused lambda, real gmx_unused *dvdlambda,
 353                 const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 354                 int *global_atom_index)
 355 {
 356     const real half = 0.5;
 357     const real one  = 1.0;
 358     real       bm, kb;
 359     real       dr2, bm2, omdr2obm2, fbond, vbond, vtot;
 360     rvec       dx;
 361     int        i, ki, type, ai, aj;
 362
 363     vtot = 0.0;
 364     for (i = 0; (i < nbonds); )
 365     {
 366         type = forceatoms[i++];
 367         ai   = forceatoms[i++];
 368         aj   = forceatoms[i++];
 369
 370         bm   = forceparams[type].fene.bm;
 371         kb   = forceparams[type].fene.kb;
 372
 373         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 374         dr2  = iprod(dx, dx);                           /*   5          */
 375
 376         if (dr2 == 0.0)
 377         {
 378             continue;
 379         }
 380
 381         bm2 = bm*bm;
 382
 383         if (dr2 >= bm2)
 384         {
 385             gmx_fatal(FARGS,
 386                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 387                       dr2, bm2,
 388                       glatnr(global_atom_index, ai),
 389                       glatnr(global_atom_index, aj));
 390         }
 391
 392         omdr2obm2  = one - dr2/bm2;
 393
 394         vbond      = -half*kb*bm2*std::log(omdr2obm2);
 395         fbond      = -kb/omdr2obm2;
 396
 397         vtot      += vbond;                                            /* 35 */
 398
 399         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 400     }                                                                  /*  58 TOTAL    */
 401     return vtot;
 402 }
 403
 404 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
 405               real *V, real *F)
 406 {
 407     const real half = 0.5;
 408     real       L1, kk, x0, dx, dx2;
 409     real       v, f, dvdlambda;
 410
 411     L1    = 1.0-lambda;
 412     kk    = L1*kA+lambda*kB;
 413     x0    = L1*xA+lambda*xB;
 414
 415     dx    = x-x0;
 416     dx2   = dx*dx;
 417
 418     f          = -kk*dx;
 419     v          = half*kk*dx2;
 420     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 421
 422     *F    = f;
 423     *V    = v;
 424
 425     return dvdlambda;
 426
 427     /* That was 19 flops */
 428 }
 429
 430
 431 template <BondedKernelFlavor flavor>
 432 real bonds(int nbonds,
 433            const t_iatom forceatoms[], const t_iparams forceparams[],
 434            const rvec x[], rvec4 f[], rvec fshift[],
 435            const t_pbc *pbc, const t_graph *g,
 436            real lambda, real *dvdlambda,
 437            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 438            int gmx_unused *global_atom_index)
 439 {
 440     int  i, ki, ai, aj, type;
 441     real dr, dr2, fbond, vbond, vtot;
 442     rvec dx;
 443
 444     vtot = 0.0;
 445     for (i = 0; (i < nbonds); )
 446     {
 447         type = forceatoms[i++];
 448         ai   = forceatoms[i++];
 449         aj   = forceatoms[i++];
 450
 451         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 452         dr2  = iprod(dx, dx);                       /*   5              */
 453         dr   = std::sqrt(dr2);                      /*  10              */
 454
 455         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 456                                forceparams[type].harmonic.krB,
 457                                forceparams[type].harmonic.rA,
 458                                forceparams[type].harmonic.rB,
 459                                dr, lambda, &vbond, &fbond); /*  19  */
 460
 461         if (dr2 == 0.0)
 462         {
 463             continue;
 464         }
 465
 466
 467         vtot  += vbond;                                                /* 1*/
 468         fbond *= gmx::invsqrt(dr2);                                    /*   6           */
 469
 470         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 471     }                                                                  /* 59 TOTAL      */
 472     return vtot;
 473 }
 474
 475 template <BondedKernelFlavor flavor>
 476 real restraint_bonds(int nbonds,
 477                      const t_iatom forceatoms[], const t_iparams forceparams[],
 478                      const rvec x[], rvec4 f[], rvec fshift[],
 479                      const t_pbc *pbc, const t_graph *g,
 480                      real lambda, real *dvdlambda,
 481                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 482                      int gmx_unused *global_atom_index)
 483 {
 484     int  i, ki, ai, aj, type;
 485     real dr, dr2, fbond, vbond, vtot;
 486     real L1;
 487     real low, dlow, up1, dup1, up2, dup2, k, dk;
 488     real drh, drh2;
 489     rvec dx;
 490
 491     L1   = 1.0 - lambda;
 492
 493     vtot = 0.0;
 494     for (i = 0; (i < nbonds); )
 495     {
 496         type = forceatoms[i++];
 497         ai   = forceatoms[i++];
 498         aj   = forceatoms[i++];
 499
 500         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 501         dr2  = iprod(dx, dx);                       /*   5              */
 502         dr   = dr2*gmx::invsqrt(dr2);               /*  10              */
 503
 504         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 505         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 506         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 507         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 508         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 509         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 510         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 511         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 512         /* 24 */
 513
 514         if (dr < low)
 515         {
 516             drh         = dr - low;
 517             drh2        = drh*drh;
 518             vbond       = 0.5*k*drh2;
 519             fbond       = -k*drh;
 520             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 521         }   /* 11 */
 522         else if (dr <= up1)
 523         {
 524             vbond = 0;
 525             fbond = 0;
 526         }
 527         else if (dr <= up2)
 528         {
 529             drh         = dr - up1;
 530             drh2        = drh*drh;
 531             vbond       = 0.5*k*drh2;
 532             fbond       = -k*drh;
 533             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 534         }   /* 11       */
 535         else
 536         {
 537             drh         = dr - up2;
 538             vbond       = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 539             fbond       = -k*(up2 - up1);
 540             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 541                 + k*(dup2 - dup1)*(up2 - up1 + drh)
 542                 - k*(up2 - up1)*dup2;
 543         }
 544
 545         if (dr2 == 0.0)
 546         {
 547             continue;
 548         }
 549
 550         vtot  += vbond;                                                /* 1*/
 551         fbond *= gmx::invsqrt(dr2);                                    /*   6           */
 552
 553         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 554     }                                                                  /* 59 TOTAL      */
 555
 556     return vtot;
 557 }
 558
 559 template <BondedKernelFlavor flavor>
 560 real polarize(int nbonds,
 561               const t_iatom forceatoms[], const t_iparams forceparams[],
 562               const rvec x[], rvec4 f[], rvec fshift[],
 563               const t_pbc *pbc, const t_graph *g,
 564               real lambda, real *dvdlambda,
 565               const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 566               int gmx_unused *global_atom_index)
 567 {
 568     int  i, ki, ai, aj, type;
 569     real dr, dr2, fbond, vbond, vtot, ksh;
 570     rvec dx;
 571
 572     vtot = 0.0;
 573     for (i = 0; (i < nbonds); )
 574     {
 575         type = forceatoms[i++];
 576         ai   = forceatoms[i++];
 577         aj   = forceatoms[i++];
 578         ksh  = gmx::square(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 579
 580         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 581         dr2  = iprod(dx, dx);                                               /*   5              */
 582         dr   = std::sqrt(dr2);                                              /*  10              */
 583
 584         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 585
 586         if (dr2 == 0.0)
 587         {
 588             continue;
 589         }
 590
 591         vtot  += vbond;                                                /* 1*/
 592         fbond *= gmx::invsqrt(dr2);                                    /*   6           */
 593
 594         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 595     }                                                                  /* 59 TOTAL      */
 596     return vtot;
 597 }
 598
 599 template <BondedKernelFlavor flavor>
 600 real anharm_polarize(int nbonds,
 601                      const t_iatom forceatoms[], const t_iparams forceparams[],
 602                      const rvec x[], rvec4 f[], rvec fshift[],
 603                      const t_pbc *pbc, const t_graph *g,
 604                      real lambda, real *dvdlambda,
 605                      const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 606                      int gmx_unused *global_atom_index)
 607 {
 608     int  i, ki, ai, aj, type;
 609     real dr, dr2, fbond, vbond, vtot, ksh, khyp, drcut, ddr, ddr3;
 610     rvec dx;
 611
 612     vtot = 0.0;
 613     for (i = 0; (i < nbonds); )
 614     {
 615         type  = forceatoms[i++];
 616         ai    = forceatoms[i++];
 617         aj    = forceatoms[i++];
 618         ksh   = gmx::square(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 619         khyp  = forceparams[type].anharm_polarize.khyp;
 620         drcut = forceparams[type].anharm_polarize.drcut;
 621
 622         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 623         dr2  = iprod(dx, dx);                                               /*   5              */
 624         dr   = dr2*gmx::invsqrt(dr2);                                       /*  10              */
 625
 626         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 627
 628         if (dr2 == 0.0)
 629         {
 630             continue;
 631         }
 632
 633         if (dr > drcut)
 634         {
 635             ddr    = dr-drcut;
 636             ddr3   = ddr*ddr*ddr;
 637             vbond += khyp*ddr*ddr3;
 638             fbond -= 4*khyp*ddr3;
 639         }
 640         fbond *= gmx::invsqrt(dr2);                                    /*   6           */
 641         vtot  += vbond;                                                /* 1*/
 642
 643         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
 644     }                                                                  /* 72 TOTAL      */
 645     return vtot;
 646 }
 647
 648 template <BondedKernelFlavor flavor>
 649 real water_pol(int nbonds,
 650                const t_iatom forceatoms[], const t_iparams forceparams[],
 651                const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
 652                const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
 653                real gmx_unused lambda, real gmx_unused *dvdlambda,
 654                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 655                int gmx_unused *global_atom_index)
 656 {
 657     /* This routine implements anisotropic polarizibility for water, through
 658      * a shell connected to a dummy with spring constant that differ in the
 659      * three spatial dimensions in the molecular frame.
 660      */
 661     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 662     ivec dt;
 663     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 664     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 665
 666     vtot = 0.0;
 667     if (nbonds > 0)
 668     {
 669         type0  = forceatoms[0];
 670         aS     = forceatoms[5];
 671         qS     = md->chargeA[aS];
 672         kk[XX] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 673         kk[YY] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 674         kk[ZZ] = gmx::square(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 675         r_HH   = 1.0/forceparams[type0].wpol.rHH;
 676         for (i = 0; (i < nbonds); i += 6)
 677         {
 678             type = forceatoms[i];
 679             if (type != type0)
 680             {
 681                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
 682                           type, type0, __FILE__, __LINE__);
 683             }
 684             aO   = forceatoms[i+1];
 685             aH1  = forceatoms[i+2];
 686             aH2  = forceatoms[i+3];
 687             aD   = forceatoms[i+4];
 688             aS   = forceatoms[i+5];
 689
 690             /* Compute vectors describing the water frame */
 691             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 692             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 693             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 694             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 695             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 696             cprod(dOH1, dOH2, nW);
 697
 698             /* Compute inverse length of normal vector
 699              * (this one could be precomputed, but I'm too lazy now)
 700              */
 701             r_nW = gmx::invsqrt(iprod(nW, nW));
 702             /* This is for precision, but does not make a big difference,
 703              * it can go later.
 704              */
 705             r_OD = gmx::invsqrt(iprod(dOD, dOD));
 706
 707             /* Normalize the vectors in the water frame */
 708             svmul(r_nW, nW, nW);
 709             svmul(r_HH, dHH, dHH);
 710             svmul(r_OD, dOD, dOD);
 711
 712             /* Compute displacement of shell along components of the vector */
 713             dx[ZZ] = iprod(dDS, dOD);
 714             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 715             for (m = 0; (m < DIM); m++)
 716             {
 717                 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 718             }
 719
 720             /*dx[XX] = iprod(dDS,nW);
 721                dx[YY] = iprod(dDS,dHH);*/
 722             dx[XX] = iprod(proj, nW);
 723             for (m = 0; (m < DIM); m++)
 724             {
 725                 proj[m] -= dx[XX]*nW[m];
 726             }
 727             dx[YY] = iprod(proj, dHH);
 728             /* Now compute the forces and energy */
 729             kdx[XX] = kk[XX]*dx[XX];
 730             kdx[YY] = kk[YY]*dx[YY];
 731             kdx[ZZ] = kk[ZZ]*dx[ZZ];
 732             vtot   += iprod(dx, kdx);
 733
 734             if (computeVirial(flavor) && g)
 735             {
 736                 ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
 737                 ki = IVEC2IS(dt);
 738             }
 739
 740             for (m = 0; (m < DIM); m++)
 741             {
 742                 /* This is a tensor operation but written out for speed */
 743                 tx                  =  nW[m]*kdx[XX];
 744                 ty                  = dHH[m]*kdx[YY];
 745                 tz                  = dOD[m]*kdx[ZZ];
 746                 fij                 = -tx-ty-tz;
 747                 f[aS][m]           += fij;
 748                 f[aD][m]           -= fij;
 749                 if (computeVirial(flavor))
 750                 {
 751                     fshift[ki][m]      += fij;
 752                     fshift[CENTRAL][m] -= fij;
 753                 }
 754             }
 755         }
 756     }
 757     return 0.5*vtot;
 758 }
 759
 760 template <BondedKernelFlavor flavor>
 761 static real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
 762                        const t_pbc *pbc, real qq,
 763                        rvec fshift[], real afac)
 764 {
 765     rvec r12;
 766     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 767     int  m, t;
 768
 769     t      = pbc_rvec_sub(pbc, xi, xj, r12);                      /*  3 */
 770
 771     r12sq  = iprod(r12, r12);                                     /*  5 */
 772     r12_1  = gmx::invsqrt(r12sq);                                 /*  5 */
 773     r12bar = afac/r12_1;                                          /*  5 */
 774     v0     = qq*ONE_4PI_EPS0*r12_1;                               /*  2 */
 775     ebar   = std::exp(-r12bar);                                   /*  5 */
 776     v1     = (1-(1+0.5*r12bar)*ebar);                             /*  4 */
 777     fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 778
 779     for (m = 0; (m < DIM); m++)
 780     {
 781         fff                 = fscal*r12[m];
 782         fi[m]              += fff;
 783         fj[m]              -= fff;
 784         if (computeVirial(flavor))
 785         {
 786             fshift[t][m]       += fff;
 787             fshift[CENTRAL][m] -= fff;
 788         }
 789     }             /* 15 */
 790
 791     return v0*v1; /* 1 */
 792     /* 54 */
 793 }
 794
 795 template <BondedKernelFlavor flavor>
 796 real thole_pol(int nbonds,
 797                const t_iatom forceatoms[], const t_iparams forceparams[],
 798                const rvec x[], rvec4 f[], rvec fshift[],
 799                const t_pbc *pbc, const t_graph gmx_unused *g,
 800                real gmx_unused lambda, real gmx_unused *dvdlambda,
 801                const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 802                int gmx_unused *global_atom_index)
 803 {
 804     /* Interaction between two pairs of particles with opposite charge */
 805     int        i, type, a1, da1, a2, da2;
 806     real       q1, q2, qq, a, al1, al2, afac;
 807     real       V             = 0;
 808
 809     for (i = 0; (i < nbonds); )
 810     {
 811         type  = forceatoms[i++];
 812         a1    = forceatoms[i++];
 813         da1   = forceatoms[i++];
 814         a2    = forceatoms[i++];
 815         da2   = forceatoms[i++];
 816         q1    = md->chargeA[da1];
 817         q2    = md->chargeA[da2];
 818         a     = forceparams[type].thole.a;
 819         al1   = forceparams[type].thole.alpha1;
 820         al2   = forceparams[type].thole.alpha2;
 821         qq    = q1*q2;
 822         afac  = a*gmx::invsixthroot(al1*al2);
 823         V    += do_1_thole<flavor>(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 824         V    += do_1_thole<flavor>(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 825         V    += do_1_thole<flavor>(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 826         V    += do_1_thole<flavor>(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 827     }
 828     /* 290 flops */
 829     return V;
 830 }
 831
 832 template <BondedKernelFlavor flavor>
 833 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
 834 angles(int nbonds,
 835        const t_iatom forceatoms[], const t_iparams forceparams[],
 836        const rvec x[], rvec4 f[], rvec fshift[],
 837        const t_pbc *pbc, const t_graph *g,
 838        real lambda, real *dvdlambda,
 839        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 840        int gmx_unused *global_atom_index)
 841 {
 842     int  i, ai, aj, ak, t1, t2, type;
 843     rvec r_ij, r_kj;
 844     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 845     ivec jt, dt_ij, dt_kj;
 846
 847     vtot = 0.0;
 848     for (i = 0; i < nbonds; )
 849     {
 850         type = forceatoms[i++];
 851         ai   = forceatoms[i++];
 852         aj   = forceatoms[i++];
 853         ak   = forceatoms[i++];
 854
 855         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
 856                             r_ij, r_kj, &cos_theta, &t1, &t2);  /*  41          */
 857
 858         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 859                                forceparams[type].harmonic.krB,
 860                                forceparams[type].harmonic.rA*DEG2RAD,
 861                                forceparams[type].harmonic.rB*DEG2RAD,
 862                                theta, lambda, &va, &dVdt);  /*  21  */
 863         vtot += va;
 864
 865         cos_theta2 = gmx::square(cos_theta);
 866         if (cos_theta2 < 1)
 867         {
 868             int  m;
 869             real st, sth;
 870             real cik, cii, ckk;
 871             real nrkj2, nrij2;
 872             real nrkj_1, nrij_1;
 873             rvec f_i, f_j, f_k;
 874
 875             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
 876             sth   = st*cos_theta;                      /*   1           */
 877             nrij2 = iprod(r_ij, r_ij);                 /*   5           */
 878             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
 879
 880             nrij_1 = gmx::invsqrt(nrij2);              /*  10           */
 881             nrkj_1 = gmx::invsqrt(nrkj2);              /*  10           */
 882
 883             cik = st*nrij_1*nrkj_1;                    /*   2           */
 884             cii = sth*nrij_1*nrij_1;                   /*   2           */
 885             ckk = sth*nrkj_1*nrkj_1;                   /*   2           */
 886
 887             for (m = 0; m < DIM; m++)
 888             {           /*  39          */
 889                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
 890                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
 891                 f_j[m]    = -f_i[m] - f_k[m];
 892                 f[ai][m] += f_i[m];
 893                 f[aj][m] += f_j[m];
 894                 f[ak][m] += f_k[m];
 895             }
 896             if (computeVirial(flavor))
 897             {
 898                 if (g != nullptr)
 899                 {
 900                     copy_ivec(SHIFT_IVEC(g, aj), jt);
 901
 902                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
 903                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
 904                     t1 = IVEC2IS(dt_ij);
 905                     t2 = IVEC2IS(dt_kj);
 906                 }
 907                 rvec_inc(fshift[t1], f_i);
 908                 rvec_inc(fshift[CENTRAL], f_j);
 909                 rvec_inc(fshift[t2], f_k);
 910             }
 911         }                                           /* 161 TOTAL        */
 912     }
 913
 914     return vtot;
 915 }
 916
 917 #if GMX_SIMD_HAVE_REAL
 918
 919 /* As angles, but using SIMD to calculate many angles at once.
 920  * This routines does not calculate energies and shift forces.
 921  */
 922 template <BondedKernelFlavor flavor>
 923 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
 924 angles(int nbonds,
 925        const t_iatom forceatoms[], const t_iparams forceparams[],
 926        const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
 927        const t_pbc *pbc, const t_graph gmx_unused *g,
 928        real gmx_unused lambda, real gmx_unused *dvdlambda,
 929        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 930        int gmx_unused *global_atom_index)
 931 {
 932     const int            nfa1 = 4;
 933     int                  i, iu, s;
 934     int                  type;
 935     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
 936     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
 937     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
 938     alignas(GMX_SIMD_ALIGNMENT) real            coeff[2*GMX_SIMD_REAL_WIDTH];
 939     SimdReal             deg2rad_S(DEG2RAD);
 940     SimdReal             xi_S, yi_S, zi_S;
 941     SimdReal             xj_S, yj_S, zj_S;
 942     SimdReal             xk_S, yk_S, zk_S;
 943     SimdReal             k_S, theta0_S;
 944     SimdReal             rijx_S, rijy_S, rijz_S;
 945     SimdReal             rkjx_S, rkjy_S, rkjz_S;
 946     SimdReal             one_S(1.0);
 947     SimdReal             min_one_plus_eps_S(-1.0 + 2.0*GMX_REAL_EPS); // Smallest number > -1
 948
 949     SimdReal             rij_rkj_S;
 950     SimdReal             nrij2_S, nrij_1_S;
 951     SimdReal             nrkj2_S, nrkj_1_S;
 952     SimdReal             cos_S, invsin_S;
 953     SimdReal             theta_S;
 954     SimdReal             st_S, sth_S;
 955     SimdReal             cik_S, cii_S, ckk_S;
 956     SimdReal             f_ix_S, f_iy_S, f_iz_S;
 957     SimdReal             f_kx_S, f_ky_S, f_kz_S;
 958     alignas(GMX_SIMD_ALIGNMENT) real    pbc_simd[9*GMX_SIMD_REAL_WIDTH];
 959
 960     set_pbc_simd(pbc, pbc_simd);
 961
 962     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
 963     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
 964     {
 965         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
 966          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
 967          */
 968         iu = i;
 969         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
 970         {
 971             type  = forceatoms[iu];
 972             ai[s] = forceatoms[iu+1];
 973             aj[s] = forceatoms[iu+2];
 974             ak[s] = forceatoms[iu+3];
 975
 976             /* At the end fill the arrays with the last atoms and 0 params */
 977             if (i + s*nfa1 < nbonds)
 978             {
 979                 coeff[s]                     = forceparams[type].harmonic.krA;
 980                 coeff[GMX_SIMD_REAL_WIDTH+s] = forceparams[type].harmonic.rA;
 981
 982                 if (iu + nfa1 < nbonds)
 983                 {
 984                     iu += nfa1;
 985                 }
 986             }
 987             else
 988             {
 989                 coeff[s]                     = 0;
 990                 coeff[GMX_SIMD_REAL_WIDTH+s] = 0;
 991             }
 992         }
 993
 994         /* Store the non PBC corrected distances packed and aligned */
 995         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
 996         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
 997         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
 998         rijx_S = xi_S - xj_S;
 999         rijy_S = yi_S - yj_S;
1000         rijz_S = zi_S - zj_S;
1001         rkjx_S = xk_S - xj_S;
1002         rkjy_S = yk_S - yj_S;
1003         rkjz_S = zk_S - zj_S;
1004
1005         k_S       = load<SimdReal>(coeff);
1006         theta0_S  = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * deg2rad_S;
1007
1008         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1009         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1010
1011         rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S,
1012                           rkjx_S, rkjy_S, rkjz_S);
1013
1014         nrij2_S   = norm2(rijx_S, rijy_S, rijz_S);
1015         nrkj2_S   = norm2(rkjx_S, rkjy_S, rkjz_S);
1016
1017         nrij_1_S  = invsqrt(nrij2_S);
1018         nrkj_1_S  = invsqrt(nrkj2_S);
1019
1020         cos_S     = rij_rkj_S * nrij_1_S * nrkj_1_S;
1021
1022         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1023          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1024          * This also ensures that rounding errors would cause the argument
1025          * of simdAcos to be < -1.
1026          * Note that we do not take precautions for cos(0)=1, so the outer
1027          * atoms in an angle should not be on top of each other.
1028          */
1029         cos_S     = max(cos_S, min_one_plus_eps_S);
1030
1031         theta_S   = acos(cos_S);
1032
1033         invsin_S  = invsqrt( one_S - cos_S * cos_S );
1034
1035         st_S      = k_S * (theta0_S - theta_S) * invsin_S;
1036         sth_S     = st_S * cos_S;
1037
1038         cik_S     = st_S  * nrij_1_S * nrkj_1_S;
1039         cii_S     = sth_S * nrij_1_S * nrij_1_S;
1040         ckk_S     = sth_S * nrkj_1_S * nrkj_1_S;
1041
1042         f_ix_S    = cii_S * rijx_S;
1043         f_ix_S    = fnma(cik_S, rkjx_S, f_ix_S);
1044         f_iy_S    = cii_S * rijy_S;
1045         f_iy_S    = fnma(cik_S, rkjy_S, f_iy_S);
1046         f_iz_S    = cii_S * rijz_S;
1047         f_iz_S    = fnma(cik_S, rkjz_S, f_iz_S);
1048         f_kx_S    = ckk_S * rkjx_S;
1049         f_kx_S    = fnma(cik_S, rijx_S, f_kx_S);
1050         f_ky_S    = ckk_S * rkjy_S;
1051         f_ky_S    = fnma(cik_S, rijy_S, f_ky_S);
1052         f_kz_S    = ckk_S * rkjz_S;
1053         f_kz_S    = fnma(cik_S, rijz_S, f_kz_S);
1054
1055         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1056         transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1057         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1058     }
1059
1060     return 0;
1061 }
1062
1063 #endif // GMX_SIMD_HAVE_REAL
1064
1065 template <BondedKernelFlavor flavor>
1066 real linear_angles(int nbonds,
1067                    const t_iatom forceatoms[], const t_iparams forceparams[],
1068                    const rvec x[], rvec4 f[], rvec fshift[],
1069                    const t_pbc *pbc, const t_graph *g,
1070                    real lambda, real *dvdlambda,
1071                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1072                    int gmx_unused *global_atom_index)
1073 {
1074     int  i, m, ai, aj, ak, t1, t2, type;
1075     rvec f_i, f_j, f_k;
1076     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1077     ivec jt, dt_ij, dt_kj;
1078     rvec r_ij, r_kj, r_ik, dx;
1079
1080     L1   = 1-lambda;
1081     vtot = 0.0;
1082     for (i = 0; (i < nbonds); )
1083     {
1084         type = forceatoms[i++];
1085         ai   = forceatoms[i++];
1086         aj   = forceatoms[i++];
1087         ak   = forceatoms[i++];
1088
1089         kA   = forceparams[type].linangle.klinA;
1090         kB   = forceparams[type].linangle.klinB;
1091         klin = L1*kA + lambda*kB;
1092
1093         aA   = forceparams[type].linangle.aA;
1094         aB   = forceparams[type].linangle.aB;
1095         a    = L1*aA+lambda*aB;
1096         b    = 1-a;
1097
1098         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1099         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1100         rvec_sub(r_ij, r_kj, r_ik);
1101
1102         dr2 = 0;
1103         for (m = 0; (m < DIM); m++)
1104         {
1105             dr        = -a * r_ij[m] - b * r_kj[m];
1106             dr2      += dr*dr;
1107             dx[m]     = dr;
1108             f_i[m]    = a*klin*dr;
1109             f_k[m]    = b*klin*dr;
1110             f_j[m]    = -(f_i[m]+f_k[m]);
1111             f[ai][m] += f_i[m];
1112             f[aj][m] += f_j[m];
1113             f[ak][m] += f_k[m];
1114         }
1115         va          = 0.5*klin*dr2;
1116         *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1117
1118         vtot += va;
1119
1120         if (computeVirial(flavor))
1121         {
1122             if (g)
1123             {
1124                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1125
1126                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1127                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1128                 t1 = IVEC2IS(dt_ij);
1129                 t2 = IVEC2IS(dt_kj);
1130             }
1131             rvec_inc(fshift[t1], f_i);
1132             rvec_inc(fshift[CENTRAL], f_j);
1133             rvec_inc(fshift[t2], f_k);
1134         }
1135     }                                         /* 57 TOTAL       */
1136     return vtot;
1137 }
1138
1139 template <BondedKernelFlavor flavor>
1140 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1141 urey_bradley(int nbonds,
1142              const t_iatom forceatoms[], const t_iparams forceparams[],
1143              const rvec x[], rvec4 f[], rvec fshift[],
1144              const t_pbc *pbc, const t_graph *g,
1145              real lambda, real *dvdlambda,
1146              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1147              int gmx_unused *global_atom_index)
1148 {
1149     int  i, m, ai, aj, ak, t1, t2, type, ki;
1150     rvec r_ij, r_kj, r_ik;
1151     real cos_theta, cos_theta2, theta;
1152     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1153     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1154     ivec jt, dt_ij, dt_kj, dt_ik;
1155
1156     vtot = 0.0;
1157     for (i = 0; (i < nbonds); )
1158     {
1159         type  = forceatoms[i++];
1160         ai    = forceatoms[i++];
1161         aj    = forceatoms[i++];
1162         ak    = forceatoms[i++];
1163         th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
1164         kthA  = forceparams[type].u_b.kthetaA;
1165         r13A  = forceparams[type].u_b.r13A;
1166         kUBA  = forceparams[type].u_b.kUBA;
1167         th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
1168         kthB  = forceparams[type].u_b.kthetaB;
1169         r13B  = forceparams[type].u_b.r13B;
1170         kUBB  = forceparams[type].u_b.kUBB;
1171
1172         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1173                             r_ij, r_kj, &cos_theta, &t1, &t2);                     /*  41               */
1174
1175         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1176         vtot       += va;
1177
1178         ki   = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);                               /*   3      */
1179         dr2  = iprod(r_ik, r_ik);                                                   /*   5              */
1180         dr   = dr2*gmx::invsqrt(dr2);                                               /*  10              */
1181
1182         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1183
1184         cos_theta2 = gmx::square(cos_theta);                                        /*   1              */
1185         if (cos_theta2 < 1)
1186         {
1187             real st, sth;
1188             real cik, cii, ckk;
1189             real nrkj2, nrij2;
1190             rvec f_i, f_j, f_k;
1191
1192             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
1193             sth   = st*cos_theta;                      /*   1           */
1194             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
1195             nrij2 = iprod(r_ij, r_ij);
1196
1197             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
1198             cii = sth/nrij2;                    /*  10          */
1199             ckk = sth/nrkj2;                    /*  10          */
1200
1201             for (m = 0; (m < DIM); m++)         /*  39          */
1202             {
1203                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1204                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1205                 f_j[m]    = -f_i[m]-f_k[m];
1206                 f[ai][m] += f_i[m];
1207                 f[aj][m] += f_j[m];
1208                 f[ak][m] += f_k[m];
1209             }
1210             if (computeVirial(flavor))
1211             {
1212                 if (g)
1213                 {
1214                     copy_ivec(SHIFT_IVEC(g, aj), jt);
1215
1216                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1217                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1218                     t1 = IVEC2IS(dt_ij);
1219                     t2 = IVEC2IS(dt_kj);
1220                 }
1221                 rvec_inc(fshift[t1], f_i);
1222                 rvec_inc(fshift[CENTRAL], f_j);
1223                 rvec_inc(fshift[t2], f_k);
1224             }
1225         }                                       /* 161 TOTAL    */
1226         /* Time for the bond calculations */
1227         if (dr2 == 0.0)
1228         {
1229             continue;
1230         }
1231
1232         vtot  += vbond;             /* 1*/
1233         fbond *= gmx::invsqrt(dr2); /*   6              */
1234
1235         if (computeVirial(flavor) && g)
1236         {
1237             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1238             ki = IVEC2IS(dt_ik);
1239         }
1240         for (m = 0; (m < DIM); m++)     /*  15          */
1241         {
1242             fik                 = fbond*r_ik[m];
1243             f[ai][m]           += fik;
1244             f[ak][m]           -= fik;
1245             if (computeVirial(flavor))
1246             {
1247                 fshift[ki][m]      += fik;
1248                 fshift[CENTRAL][m] -= fik;
1249             }
1250         }
1251     }
1252     return vtot;
1253 }
1254
1255 #if GMX_SIMD_HAVE_REAL
1256
1257 /* As urey_bradley, but using SIMD to calculate many potentials at once.
1258  * This routines does not calculate energies and shift forces.
1259  */
1260 template <BondedKernelFlavor flavor>
1261 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1262 urey_bradley(int nbonds,
1263              const t_iatom forceatoms[], const t_iparams forceparams[],
1264              const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
1265              const t_pbc *pbc, const t_graph gmx_unused *g,
1266              real gmx_unused lambda, real gmx_unused *dvdlambda,
1267              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1268              int gmx_unused *global_atom_index)
1269 {
1270     constexpr int            nfa1 = 4;
1271     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
1272     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
1273     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
1274     alignas(GMX_SIMD_ALIGNMENT) real            coeff[4*GMX_SIMD_REAL_WIDTH];
1275     alignas(GMX_SIMD_ALIGNMENT) real            pbc_simd[9*GMX_SIMD_REAL_WIDTH];
1276
1277     set_pbc_simd(pbc, pbc_simd);
1278
1279     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1280     for (int i = 0; i < nbonds; i += GMX_SIMD_REAL_WIDTH*nfa1)
1281     {
1282         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1283          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1284          */
1285         int iu = i;
1286         for (int s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1287         {
1288             const int type                     = forceatoms[iu];
1289             ai[s] = forceatoms[iu+1];
1290             aj[s] = forceatoms[iu+2];
1291             ak[s] = forceatoms[iu+3];
1292
1293             /* At the end fill the arrays with the last atoms and 0 params */
1294             if (i + s*nfa1 < nbonds)
1295             {
1296                 coeff[s]                       = forceparams[type].u_b.kthetaA;
1297                 coeff[GMX_SIMD_REAL_WIDTH+s]   = forceparams[type].u_b.thetaA;
1298                 coeff[GMX_SIMD_REAL_WIDTH*2+s] = forceparams[type].u_b.kUBA;
1299                 coeff[GMX_SIMD_REAL_WIDTH*3+s] = forceparams[type].u_b.r13A;
1300
1301                 if (iu + nfa1 < nbonds)
1302                 {
1303                     iu += nfa1;
1304                 }
1305             }
1306             else
1307             {
1308                 coeff[s]                       = 0;
1309                 coeff[GMX_SIMD_REAL_WIDTH+s]   = 0;
1310                 coeff[GMX_SIMD_REAL_WIDTH*2+s] = 0;
1311                 coeff[GMX_SIMD_REAL_WIDTH*3+s] = 0;
1312             }
1313         }
1314
1315         SimdReal xi_S, yi_S, zi_S;
1316         SimdReal xj_S, yj_S, zj_S;
1317         SimdReal xk_S, yk_S, zk_S;
1318
1319         /* Store the non PBC corrected distances packed and aligned */
1320         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
1321         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
1322         gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
1323         SimdReal       rijx_S = xi_S - xj_S;
1324         SimdReal       rijy_S = yi_S - yj_S;
1325         SimdReal       rijz_S = zi_S - zj_S;
1326         SimdReal       rkjx_S = xk_S - xj_S;
1327         SimdReal       rkjy_S = yk_S - yj_S;
1328         SimdReal       rkjz_S = zk_S - zj_S;
1329         SimdReal       rikx_S = xi_S - xk_S;
1330         SimdReal       riky_S = yi_S - yk_S;
1331         SimdReal       rikz_S = zi_S - zk_S;
1332
1333         const SimdReal ktheta_S = load<SimdReal>(coeff);
1334         const SimdReal theta0_S = load<SimdReal>(coeff+GMX_SIMD_REAL_WIDTH) * DEG2RAD;
1335         const SimdReal kUB_S    = load<SimdReal>(coeff+2*GMX_SIMD_REAL_WIDTH);
1336         const SimdReal r13_S    = load<SimdReal>(coeff+3*GMX_SIMD_REAL_WIDTH);
1337
1338         pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1339         pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1340         pbc_correct_dx_simd(&rikx_S, &riky_S, &rikz_S, pbc_simd);
1341
1342         const SimdReal rij_rkj_S = iprod(rijx_S, rijy_S, rijz_S,
1343                                          rkjx_S, rkjy_S, rkjz_S);
1344
1345         const SimdReal dr2_S     = iprod(rikx_S, riky_S, rikz_S,
1346                                          rikx_S, riky_S, rikz_S);
1347
1348         const SimdReal nrij2_S   = norm2(rijx_S, rijy_S, rijz_S);
1349         const SimdReal nrkj2_S   = norm2(rkjx_S, rkjy_S, rkjz_S);
1350
1351         const SimdReal nrij_1_S  = invsqrt(nrij2_S);
1352         const SimdReal nrkj_1_S  = invsqrt(nrkj2_S);
1353         const SimdReal invdr2_S  = invsqrt(dr2_S);
1354         const SimdReal dr_S      = dr2_S*invdr2_S;
1355
1356         constexpr real min_one_plus_eps = -1.0 + 2.0*GMX_REAL_EPS; // Smallest number > -1
1357
1358         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1359          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1360          * This also ensures that rounding errors would cause the argument
1361          * of simdAcos to be < -1.
1362          * Note that we do not take precautions for cos(0)=1, so the bonds
1363          * in an angle should not align at an angle of 0 degrees.
1364          */
1365         const SimdReal cos_S     = max(rij_rkj_S * nrij_1_S * nrkj_1_S, min_one_plus_eps);
1366
1367         const SimdReal theta_S   = acos(cos_S);
1368         const SimdReal invsin_S  = invsqrt( 1.0 - cos_S * cos_S );
1369         const SimdReal st_S      = ktheta_S * (theta0_S - theta_S) * invsin_S;
1370         const SimdReal sth_S     = st_S * cos_S;
1371
1372         const SimdReal cik_S     = st_S  * nrij_1_S * nrkj_1_S;
1373         const SimdReal cii_S     = sth_S * nrij_1_S * nrij_1_S;
1374         const SimdReal ckk_S     = sth_S * nrkj_1_S * nrkj_1_S;
1375
1376         const SimdReal sUB_S     = kUB_S * (r13_S - dr_S) * invdr2_S;
1377
1378         const SimdReal f_ikx_S   = sUB_S * rikx_S;
1379         const SimdReal f_iky_S   = sUB_S * riky_S;
1380         const SimdReal f_ikz_S   = sUB_S * rikz_S;
1381
1382         const SimdReal f_ix_S    = fnma(cik_S, rkjx_S, cii_S * rijx_S) + f_ikx_S;
1383         const SimdReal f_iy_S    = fnma(cik_S, rkjy_S, cii_S * rijy_S) + f_iky_S;
1384         const SimdReal f_iz_S    = fnma(cik_S, rkjz_S, cii_S * rijz_S) + f_ikz_S;
1385         const SimdReal f_kx_S    = fnma(cik_S, rijx_S, ckk_S * rkjx_S) - f_ikx_S;
1386         const SimdReal f_ky_S    = fnma(cik_S, rijy_S, ckk_S * rkjy_S) - f_iky_S;
1387         const SimdReal f_kz_S    = fnma(cik_S, rijz_S, ckk_S * rkjz_S) - f_ikz_S;
1388
1389         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_ix_S, f_iy_S, f_iz_S);
1390         transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_ix_S + f_kx_S, f_iy_S + f_ky_S, f_iz_S + f_kz_S);
1391         transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_kx_S, f_ky_S, f_kz_S);
1392     }
1393
1394     return 0;
1395 }
1396
1397 #endif // GMX_SIMD_HAVE_REAL
1398
1399 template <BondedKernelFlavor flavor>
1400 real quartic_angles(int nbonds,
1401                     const t_iatom forceatoms[], const t_iparams forceparams[],
1402                     const rvec x[], rvec4 f[], rvec fshift[],
1403                     const t_pbc *pbc, const t_graph *g,
1404                     real gmx_unused lambda, real gmx_unused *dvdlambda,
1405                     const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1406                     int gmx_unused *global_atom_index)
1407 {
1408     int  i, j, ai, aj, ak, t1, t2, type;
1409     rvec r_ij, r_kj;
1410     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1411     ivec jt, dt_ij, dt_kj;
1412
1413     vtot = 0.0;
1414     for (i = 0; (i < nbonds); )
1415     {
1416         type = forceatoms[i++];
1417         ai   = forceatoms[i++];
1418         aj   = forceatoms[i++];
1419         ak   = forceatoms[i++];
1420
1421         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1422                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
1423
1424         dt = theta - forceparams[type].qangle.theta*DEG2RAD;   /* 2          */
1425
1426         dVdt = 0;
1427         va   = forceparams[type].qangle.c[0];
1428         dtp  = 1.0;
1429         for (j = 1; j <= 4; j++)
1430         {
1431             c     = forceparams[type].qangle.c[j];
1432             dVdt -= j*c*dtp;
1433             dtp  *= dt;
1434             va   += c*dtp;
1435         }
1436         /* 20 */
1437
1438         vtot += va;
1439
1440         cos_theta2 = gmx::square(cos_theta);            /*   1          */
1441         if (cos_theta2 < 1)
1442         {
1443             int  m;
1444             real st, sth;
1445             real cik, cii, ckk;
1446             real nrkj2, nrij2;
1447             rvec f_i, f_j, f_k;
1448
1449             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
1450             sth   = st*cos_theta;                      /*   1           */
1451             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
1452             nrij2 = iprod(r_ij, r_ij);
1453
1454             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
1455             cii = sth/nrij2;                    /*  10          */
1456             ckk = sth/nrkj2;                    /*  10          */
1457
1458             for (m = 0; (m < DIM); m++)         /*  39          */
1459             {
1460                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1461                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1462                 f_j[m]    = -f_i[m]-f_k[m];
1463                 f[ai][m] += f_i[m];
1464                 f[aj][m] += f_j[m];
1465                 f[ak][m] += f_k[m];
1466             }
1467
1468             if (computeVirial(flavor))
1469             {
1470                 if (g)
1471                 {
1472                     copy_ivec(SHIFT_IVEC(g, aj), jt);
1473
1474                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1475                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1476                     t1 = IVEC2IS(dt_ij);
1477                     t2 = IVEC2IS(dt_kj);
1478                 }
1479                 rvec_inc(fshift[t1], f_i);
1480                 rvec_inc(fshift[CENTRAL], f_j);
1481                 rvec_inc(fshift[t2], f_k);
1482             }
1483         }                                       /* 153 TOTAL    */
1484     }
1485     return vtot;
1486 }
1487
1488
1489 #if GMX_SIMD_HAVE_REAL
1490
1491 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1492  * also calculates the pre-factor required for the dihedral force update.
1493  * Note that bv and buf should be register aligned.
1494  */
1495 inline void
1496 dih_angle_simd(const rvec *x,
1497                const int *ai, const int *aj, const int *ak, const int *al,
1498                const real *pbc_simd,
1499                SimdReal *phi_S,
1500                SimdReal *mx_S, SimdReal *my_S, SimdReal *mz_S,
1501                SimdReal *nx_S, SimdReal *ny_S, SimdReal *nz_S,
1502                SimdReal *nrkj_m2_S,
1503                SimdReal *nrkj_n2_S,
1504                SimdReal *p_S,
1505                SimdReal *q_S)
1506 {
1507     SimdReal xi_S, yi_S, zi_S;
1508     SimdReal xj_S, yj_S, zj_S;
1509     SimdReal xk_S, yk_S, zk_S;
1510     SimdReal xl_S, yl_S, zl_S;
1511     SimdReal rijx_S, rijy_S, rijz_S;
1512     SimdReal rkjx_S, rkjy_S, rkjz_S;
1513     SimdReal rklx_S, rkly_S, rklz_S;
1514     SimdReal cx_S, cy_S, cz_S;
1515     SimdReal cn_S;
1516     SimdReal s_S;
1517     SimdReal ipr_S;
1518     SimdReal iprm_S, iprn_S;
1519     SimdReal nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1520     SimdReal toler_S;
1521     SimdReal nrkj2_min_S;
1522     SimdReal real_eps_S;
1523
1524     /* Used to avoid division by zero.
1525      * We take into acount that we multiply the result by real_eps_S.
1526      */
1527     nrkj2_min_S = SimdReal(GMX_REAL_MIN/(2*GMX_REAL_EPS));
1528
1529     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1530     real_eps_S  = SimdReal(2*GMX_REAL_EPS);
1531
1532     /* Store the non PBC corrected distances packed and aligned */
1533     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ai, &xi_S, &yi_S, &zi_S);
1534     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), aj, &xj_S, &yj_S, &zj_S);
1535     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), ak, &xk_S, &yk_S, &zk_S);
1536     gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), al, &xl_S, &yl_S, &zl_S);
1537     rijx_S = xi_S - xj_S;
1538     rijy_S = yi_S - yj_S;
1539     rijz_S = zi_S - zj_S;
1540     rkjx_S = xk_S - xj_S;
1541     rkjy_S = yk_S - yj_S;
1542     rkjz_S = zk_S - zj_S;
1543     rklx_S = xk_S - xl_S;
1544     rkly_S = yk_S - yl_S;
1545     rklz_S = zk_S - zl_S;
1546
1547     pbc_correct_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc_simd);
1548     pbc_correct_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc_simd);
1549     pbc_correct_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc_simd);
1550
1551     cprod(rijx_S, rijy_S, rijz_S,
1552           rkjx_S, rkjy_S, rkjz_S,
1553           mx_S, my_S, mz_S);
1554
1555     cprod(rkjx_S, rkjy_S, rkjz_S,
1556           rklx_S, rkly_S, rklz_S,
1557           nx_S, ny_S, nz_S);
1558
1559     cprod(*mx_S, *my_S, *mz_S,
1560           *nx_S, *ny_S, *nz_S,
1561           &cx_S, &cy_S, &cz_S);
1562
1563     cn_S       = sqrt(norm2(cx_S, cy_S, cz_S));
1564
1565     s_S        = iprod(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1566
1567     /* Determine the dihedral angle, the sign might need correction */
1568     *phi_S     = atan2(cn_S, s_S);
1569
1570     ipr_S      = iprod(rijx_S, rijy_S, rijz_S,
1571                        *nx_S, *ny_S, *nz_S);
1572
1573     iprm_S     = norm2(*mx_S, *my_S, *mz_S);
1574     iprn_S     = norm2(*nx_S, *ny_S, *nz_S);
1575
1576     nrkj2_S    = norm2(rkjx_S, rkjy_S, rkjz_S);
1577
1578     /* Avoid division by zero. When zero, the result is multiplied by 0
1579      * anyhow, so the 3 max below do not affect the final result.
1580      */
1581     nrkj2_S    = max(nrkj2_S, nrkj2_min_S);
1582     nrkj_1_S   = invsqrt(nrkj2_S);
1583     nrkj_2_S   = nrkj_1_S * nrkj_1_S;
1584     nrkj_S     = nrkj2_S * nrkj_1_S;
1585
1586     toler_S    = nrkj2_S * real_eps_S;
1587
1588     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1589      * So we take a max with the tolerance instead. Since we multiply with
1590      * m or n later, the max does not affect the results.
1591      */
1592     iprm_S     = max(iprm_S, toler_S);
1593     iprn_S     = max(iprn_S, toler_S);
1594     *nrkj_m2_S = nrkj_S * inv(iprm_S);
1595     *nrkj_n2_S = nrkj_S * inv(iprn_S);
1596
1597     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1598     *phi_S     = copysign(*phi_S, ipr_S);
1599     *p_S       = iprod(rijx_S, rijy_S, rijz_S, rkjx_S, rkjy_S, rkjz_S);
1600     *p_S       = *p_S * nrkj_2_S;
1601
1602     *q_S       = iprod(rklx_S, rkly_S, rklz_S, rkjx_S, rkjy_S, rkjz_S);
1603     *q_S       = *q_S * nrkj_2_S;
1604 }
1605
1606 #endif // GMX_SIMD_HAVE_REAL
1607
1608 }      // namespace
1609
1610 template <BondedKernelFlavor flavor>
1611 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1612                 rvec r_ij, rvec r_kj, rvec r_kl,
1613                 rvec m, rvec n, rvec4 f[], rvec fshift[],
1614                 const t_pbc *pbc, const t_graph *g,
1615                 const rvec x[], int t1, int t2, int t3)
1616 {
1617     /* 143 FLOPS */
1618     rvec f_i, f_j, f_k, f_l;
1619     rvec uvec, vvec, svec, dx_jl;
1620     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1621     real a, b, p, q, toler;
1622     ivec jt, dt_ij, dt_kj, dt_lj;
1623
1624     iprm  = iprod(m, m);       /*  5    */
1625     iprn  = iprod(n, n);       /*  5    */
1626     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1627     toler = nrkj2*GMX_REAL_EPS;
1628     if ((iprm > toler) && (iprn > toler))
1629     {
1630         nrkj_1 = gmx::invsqrt(nrkj2); /* 10     */
1631         nrkj_2 = nrkj_1*nrkj_1;       /*  1     */
1632         nrkj   = nrkj2*nrkj_1;        /*  1     */
1633         a      = -ddphi*nrkj/iprm;    /* 11     */
1634         svmul(a, m, f_i);             /*  3     */
1635         b     = ddphi*nrkj/iprn;      /* 11     */
1636         svmul(b, n, f_l);             /*  3  */
1637         p     = iprod(r_ij, r_kj);    /*  5     */
1638         p    *= nrkj_2;               /*  1     */
1639         q     = iprod(r_kl, r_kj);    /*  5     */
1640         q    *= nrkj_2;               /*  1     */
1641         svmul(p, f_i, uvec);          /*  3     */
1642         svmul(q, f_l, vvec);          /*  3     */
1643         rvec_sub(uvec, vvec, svec);   /*  3     */
1644         rvec_sub(f_i, svec, f_j);     /*  3     */
1645         rvec_add(f_l, svec, f_k);     /*  3     */
1646         rvec_inc(f[i], f_i);          /*  3     */
1647         rvec_dec(f[j], f_j);          /*  3     */
1648         rvec_dec(f[k], f_k);          /*  3     */
1649         rvec_inc(f[l], f_l);          /*  3     */
1650
1651         if (computeVirial(flavor))
1652         {
1653             if (g)
1654             {
1655                 copy_ivec(SHIFT_IVEC(g, j), jt);
1656                 ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1657                 ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1658                 ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1659                 t1 = IVEC2IS(dt_ij);
1660                 t2 = IVEC2IS(dt_kj);
1661                 t3 = IVEC2IS(dt_lj);
1662             }
1663             else if (pbc)
1664             {
1665                 t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1666             }
1667             else
1668             {
1669                 t3 = CENTRAL;
1670             }
1671
1672             rvec_inc(fshift[t1], f_i);
1673             rvec_dec(fshift[CENTRAL], f_j);
1674             rvec_dec(fshift[t2], f_k);
1675             rvec_inc(fshift[t3], f_l);
1676         }
1677     }
1678     /* 112 TOTAL    */
1679 }
1680
1681 namespace
1682 {
1683
1684 #if GMX_SIMD_HAVE_REAL
1685 /* As do_dih_fup_noshiftf above, but with SIMD and pre-calculated pre-factors */
1686 inline void gmx_simdcall
1687 do_dih_fup_noshiftf_simd(const int *ai, const int *aj, const int *ak, const int *al,
1688                          SimdReal p, SimdReal q,
1689                          SimdReal f_i_x,  SimdReal f_i_y,  SimdReal f_i_z,
1690                          SimdReal mf_l_x, SimdReal mf_l_y, SimdReal mf_l_z,
1691                          rvec4 f[])
1692 {
1693     SimdReal sx    = p * f_i_x + q * mf_l_x;
1694     SimdReal sy    = p * f_i_y + q * mf_l_y;
1695     SimdReal sz    = p * f_i_z + q * mf_l_z;
1696     SimdReal f_j_x = f_i_x - sx;
1697     SimdReal f_j_y = f_i_y - sy;
1698     SimdReal f_j_z = f_i_z - sz;
1699     SimdReal f_k_x = mf_l_x - sx;
1700     SimdReal f_k_y = mf_l_y - sy;
1701     SimdReal f_k_z = mf_l_z - sz;
1702     transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ai, f_i_x, f_i_y, f_i_z);
1703     transposeScatterDecrU<4>(reinterpret_cast<real *>(f), aj, f_j_x, f_j_y, f_j_z);
1704     transposeScatterIncrU<4>(reinterpret_cast<real *>(f), ak, f_k_x, f_k_y, f_k_z);
1705     transposeScatterDecrU<4>(reinterpret_cast<real *>(f), al, mf_l_x, mf_l_y, mf_l_z);
1706 }
1707 #endif // GMX_SIMD_HAVE_REAL
1708
1709 /*! \brief Computes and returns the proper dihedral force
1710  *
1711  * With the appropriate kernel flavor, also computes and accumulates
1712  * the energy and dV/dlambda.
1713  */
1714 template <BondedKernelFlavor flavor>
1715 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1716              real phi, real lambda, real *V, real *dvdlambda)
1717 {
1718     const real L1    = 1.0 - lambda;
1719     const real ph0   = (L1*phiA + lambda*phiB)*DEG2RAD;
1720     const real dph0  = (phiB - phiA)*DEG2RAD;
1721     const real cp    = L1*cpA + lambda*cpB;
1722
1723     const real mdphi =  mult*phi - ph0;
1724     const real sdphi = std::sin(mdphi);
1725     const real ddphi = -cp*mult*sdphi;
1726     if (computeEnergy(flavor))
1727     {
1728         const real v1  = 1 + std::cos(mdphi);
1729         *V            += cp*v1;
1730
1731         *dvdlambda    += (cpB - cpA)*v1 + cp*dph0*sdphi;
1732     }
1733
1734     return ddphi;
1735     /* That was 40 flops */
1736 }
1737
1738 /*! \brief Similar to \p dopdihs(), except for a minus sign and a different treatment of mult/phi0 */
1739 real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1740                  real phi, real lambda, real *V, real *F)
1741 {
1742     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1743     real L1   = 1.0 - lambda;
1744     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1745     real dph0 = (phiB - phiA)*DEG2RAD;
1746     real cp   = L1*cpA + lambda*cpB;
1747
1748     mdphi = mult*(phi-ph0);
1749     sdphi = std::sin(mdphi);
1750     ddphi = cp*mult*sdphi;
1751     v1    = 1.0-std::cos(mdphi);
1752     v     = cp*v1;
1753
1754     dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1755
1756     *V = v;
1757     *F = ddphi;
1758
1759     return dvdlambda;
1760
1761     /* That was 40 flops */
1762 }
1763
1764 template <BondedKernelFlavor flavor>
1765 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
1766 pdihs(int nbonds,
1767       const t_iatom forceatoms[], const t_iparams forceparams[],
1768       const rvec x[], rvec4 f[], rvec fshift[],
1769       const t_pbc *pbc, const t_graph *g,
1770       real lambda, real *dvdlambda,
1771       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1772       int gmx_unused *global_atom_index)
1773 {
1774     int  t1, t2, t3;
1775     rvec r_ij, r_kj, r_kl, m, n;
1776
1777     real vtot = 0.0;
1778
1779     for (int i = 0; i < nbonds; )
1780     {
1781         const int  ai  = forceatoms[i + 1];
1782         const int  aj  = forceatoms[i + 2];
1783         const int  ak  = forceatoms[i + 3];
1784         const int  al  = forceatoms[i + 4];
1785
1786         const real phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1787                                    &t1, &t2, &t3); /*  84      */
1788
1789         /* Loop over dihedrals working on the same atoms,
1790          * so we avoid recalculating angles and distributing forces.
1791          */
1792         real ddphi_tot = 0;
1793         do
1794         {
1795             const int type = forceatoms[i];
1796             ddphi_tot +=
1797                 dopdihs<flavor>(forceparams[type].pdihs.cpA,
1798                                 forceparams[type].pdihs.cpB,
1799                                 forceparams[type].pdihs.phiA,
1800                                 forceparams[type].pdihs.phiB,
1801                                 forceparams[type].pdihs.mult,
1802                                 phi, lambda, &vtot, dvdlambda);
1803
1804             i += 5;
1805         }
1806         while (i < nbonds &&
1807                forceatoms[i + 1] == ai &&
1808                forceatoms[i + 2] == aj &&
1809                forceatoms[i + 3] == ak &&
1810                forceatoms[i + 4] == al);
1811
1812         do_dih_fup<flavor>(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n,
1813                            f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
1814     }                                                         /* 223 TOTAL  */
1815
1816     return vtot;
1817 }
1818
1819 #if GMX_SIMD_HAVE_REAL
1820
1821 /* As pdihs above, but using SIMD to calculate multiple dihedrals at once */
1822 template <BondedKernelFlavor flavor>
1823 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1824 pdihs(int nbonds,
1825       const t_iatom forceatoms[], const t_iparams forceparams[],
1826       const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
1827       const t_pbc *pbc, const t_graph gmx_unused *g,
1828       real gmx_unused lambda, real gmx_unused *dvdlambda,
1829       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1830       int gmx_unused *global_atom_index)
1831 {
1832     const int             nfa1 = 5;
1833     int                   i, iu, s;
1834     int                   type;
1835     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ai[GMX_SIMD_REAL_WIDTH];
1836     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    aj[GMX_SIMD_REAL_WIDTH];
1837     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    ak[GMX_SIMD_REAL_WIDTH];
1838     alignas(GMX_SIMD_ALIGNMENT) std::int32_t    al[GMX_SIMD_REAL_WIDTH];
1839     alignas(GMX_SIMD_ALIGNMENT) real            buf[3*GMX_SIMD_REAL_WIDTH];
1840     real                 *cp, *phi0, *mult;
1841     SimdReal              deg2rad_S(DEG2RAD);
1842     SimdReal              p_S, q_S;
1843     SimdReal              phi0_S, phi_S;
1844     SimdReal              mx_S, my_S, mz_S;
1845     SimdReal              nx_S, ny_S, nz_S;
1846     SimdReal              nrkj_m2_S, nrkj_n2_S;
1847     SimdReal              cp_S, mdphi_S, mult_S;
1848     SimdReal              sin_S, cos_S;
1849     SimdReal              mddphi_S;
1850     SimdReal              sf_i_S, msf_l_S;
1851     alignas(GMX_SIMD_ALIGNMENT) real            pbc_simd[9*GMX_SIMD_REAL_WIDTH];
1852
1853     /* Extract aligned pointer for parameters and variables */
1854     cp    = buf + 0*GMX_SIMD_REAL_WIDTH;
1855     phi0  = buf + 1*GMX_SIMD_REAL_WIDTH;
1856     mult  = buf + 2*GMX_SIMD_REAL_WIDTH;
1857
1858     set_pbc_simd(pbc, pbc_simd);
1859
1860     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1861     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
1862     {
1863         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1864          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1865          */
1866         iu = i;
1867         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1868         {
1869             type  = forceatoms[iu];
1870             ai[s] = forceatoms[iu+1];
1871             aj[s] = forceatoms[iu+2];
1872             ak[s] = forceatoms[iu+3];
1873             al[s] = forceatoms[iu+4];
1874
1875             /* At the end fill the arrays with the last atoms and 0 params */
1876             if (i + s*nfa1 < nbonds)
1877             {
1878                 cp[s]   = forceparams[type].pdihs.cpA;
1879                 phi0[s] = forceparams[type].pdihs.phiA;
1880                 mult[s] = forceparams[type].pdihs.mult;
1881
1882                 if (iu + nfa1 < nbonds)
1883                 {
1884                     iu += nfa1;
1885                 }
1886             }
1887             else
1888             {
1889                 cp[s]   = 0;
1890                 phi0[s] = 0;
1891                 mult[s] = 0;
1892             }
1893         }
1894
1895         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
1896         dih_angle_simd(x, ai, aj, ak, al, pbc_simd,
1897                        &phi_S,
1898                        &mx_S, &my_S, &mz_S,
1899                        &nx_S, &ny_S, &nz_S,
1900                        &nrkj_m2_S,
1901                        &nrkj_n2_S,
1902                        &p_S, &q_S);
1903
1904         cp_S     = load<SimdReal>(cp);
1905         phi0_S   = load<SimdReal>(phi0) * deg2rad_S;
1906         mult_S   = load<SimdReal>(mult);
1907
1908         mdphi_S  = fms(mult_S, phi_S, phi0_S);
1909
1910         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
1911         sincos(mdphi_S, &sin_S, &cos_S);
1912         mddphi_S = cp_S * mult_S * sin_S;
1913         sf_i_S   = mddphi_S * nrkj_m2_S;
1914         msf_l_S  = mddphi_S * nrkj_n2_S;
1915
1916         /* After this m?_S will contain f[i] */
1917         mx_S     = sf_i_S * mx_S;
1918         my_S     = sf_i_S * my_S;
1919         mz_S     = sf_i_S * mz_S;
1920
1921         /* After this m?_S will contain -f[l] */
1922         nx_S     = msf_l_S * nx_S;
1923         ny_S     = msf_l_S * ny_S;
1924         nz_S     = msf_l_S * nz_S;
1925
1926         do_dih_fup_noshiftf_simd(ai, aj, ak, al,
1927                                  p_S, q_S,
1928                                  mx_S, my_S, mz_S,
1929                                  nx_S, ny_S, nz_S,
1930                                  f);
1931     }
1932
1933     return 0;
1934 }
1935
1936 /* This is mostly a copy of the SIMD flavor of pdihs above, but with using
1937  * the RB potential instead of a harmonic potential.
1938  * This function can replace rbdihs() when no energy and virial are needed.
1939  */
1940 template <BondedKernelFlavor flavor>
1941 std::enable_if_t<flavor == BondedKernelFlavor::ForcesSimdWhenAvailable, real>
1942 rbdihs(int nbonds,
1943        const t_iatom forceatoms[], const t_iparams forceparams[],
1944        const rvec x[], rvec4 f[], rvec gmx_unused fshift[],
1945        const t_pbc *pbc, const t_graph gmx_unused *g,
1946        real gmx_unused lambda, real gmx_unused *dvdlambda,
1947        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1948        int gmx_unused *global_atom_index)
1949 {
1950     const int             nfa1 = 5;
1951     int                   i, iu, s, j;
1952     int                   type;
1953     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  ai[GMX_SIMD_REAL_WIDTH];
1954     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  aj[GMX_SIMD_REAL_WIDTH];
1955     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  ak[GMX_SIMD_REAL_WIDTH];
1956     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  al[GMX_SIMD_REAL_WIDTH];
1957     alignas(GMX_SIMD_ALIGNMENT) real          parm[NR_RBDIHS*GMX_SIMD_REAL_WIDTH];
1958
1959     SimdReal              p_S, q_S;
1960     SimdReal              phi_S;
1961     SimdReal              ddphi_S, cosfac_S;
1962     SimdReal              mx_S, my_S, mz_S;
1963     SimdReal              nx_S, ny_S, nz_S;
1964     SimdReal              nrkj_m2_S, nrkj_n2_S;
1965     SimdReal              parm_S, c_S;
1966     SimdReal              sin_S, cos_S;
1967     SimdReal              sf_i_S, msf_l_S;
1968     alignas(GMX_SIMD_ALIGNMENT) real          pbc_simd[9*GMX_SIMD_REAL_WIDTH];
1969
1970     SimdReal              pi_S(M_PI);
1971     SimdReal              one_S(1.0);
1972
1973     set_pbc_simd(pbc, pbc_simd);
1974
1975     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
1976     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
1977     {
1978         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
1979          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1980          */
1981         iu = i;
1982         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1983         {
1984             type  = forceatoms[iu];
1985             ai[s] = forceatoms[iu+1];
1986             aj[s] = forceatoms[iu+2];
1987             ak[s] = forceatoms[iu+3];
1988             al[s] = forceatoms[iu+4];
1989
1990             /* At the end fill the arrays with the last atoms and 0 params */
1991             if (i + s*nfa1 < nbonds)
1992             {
1993                 /* We don't need the first parameter, since that's a constant
1994                  * which only affects the energies, not the forces.
1995                  */
1996                 for (j = 1; j < NR_RBDIHS; j++)
1997                 {
1998                     parm[j*GMX_SIMD_REAL_WIDTH + s] =
1999                         forceparams[type].rbdihs.rbcA[j];
2000                 }
2001
2002                 if (iu + nfa1 < nbonds)
2003                 {
2004                     iu += nfa1;
2005                 }
2006             }
2007             else
2008             {
2009                 for (j = 1; j < NR_RBDIHS; j++)
2010                 {
2011                     parm[j*GMX_SIMD_REAL_WIDTH + s] = 0;
2012                 }
2013             }
2014         }
2015
2016         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2017         dih_angle_simd(x, ai, aj, ak, al, pbc_simd,
2018                        &phi_S,
2019                        &mx_S, &my_S, &mz_S,
2020                        &nx_S, &ny_S, &nz_S,
2021                        &nrkj_m2_S,
2022                        &nrkj_n2_S,
2023                        &p_S, &q_S);
2024
2025         /* Change to polymer convention */
2026         phi_S = phi_S - pi_S;
2027
2028         sincos(phi_S, &sin_S, &cos_S);
2029
2030         ddphi_S   = setZero();
2031         c_S       = one_S;
2032         cosfac_S  = one_S;
2033         for (j = 1; j < NR_RBDIHS; j++)
2034         {
2035             parm_S   = load<SimdReal>(parm + j*GMX_SIMD_REAL_WIDTH);
2036             ddphi_S  = fma(c_S * parm_S, cosfac_S, ddphi_S);
2037             cosfac_S = cosfac_S * cos_S;
2038             c_S      = c_S + one_S;
2039         }
2040
2041         /* Note that here we do not use the minus sign which is present
2042          * in the normal RB code. This is corrected for through (m)sf below.
2043          */
2044         ddphi_S  = ddphi_S * sin_S;
2045
2046         sf_i_S   = ddphi_S * nrkj_m2_S;
2047         msf_l_S  = ddphi_S * nrkj_n2_S;
2048
2049         /* After this m?_S will contain f[i] */
2050         mx_S     = sf_i_S * mx_S;
2051         my_S     = sf_i_S * my_S;
2052         mz_S     = sf_i_S * mz_S;
2053
2054         /* After this m?_S will contain -f[l] */
2055         nx_S     = msf_l_S * nx_S;
2056         ny_S     = msf_l_S * ny_S;
2057         nz_S     = msf_l_S * nz_S;
2058
2059         do_dih_fup_noshiftf_simd(ai, aj, ak, al,
2060                                  p_S, q_S,
2061                                  mx_S, my_S, mz_S,
2062                                  nx_S, ny_S, nz_S,
2063                                  f);
2064     }
2065
2066     return 0;
2067 }
2068
2069 #endif // GMX_SIMD_HAVE_REAL
2070
2071
2072 template <BondedKernelFlavor flavor>
2073 real idihs(int nbonds,
2074            const t_iatom forceatoms[], const t_iparams forceparams[],
2075            const rvec x[], rvec4 f[], rvec fshift[],
2076            const t_pbc *pbc, const t_graph *g,
2077            real lambda, real *dvdlambda,
2078            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2079            int gmx_unused *global_atom_index)
2080 {
2081     int  i, type, ai, aj, ak, al;
2082     int  t1, t2, t3;
2083     real phi, phi0, dphi0, ddphi, vtot;
2084     rvec r_ij, r_kj, r_kl, m, n;
2085     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2086
2087     L1        = 1.0-lambda;
2088     dvdl_term = 0;
2089     vtot      = 0.0;
2090     for (i = 0; (i < nbonds); )
2091     {
2092         type = forceatoms[i++];
2093         ai   = forceatoms[i++];
2094         aj   = forceatoms[i++];
2095         ak   = forceatoms[i++];
2096         al   = forceatoms[i++];
2097
2098         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2099                         &t1, &t2, &t3);  /*  84         */
2100
2101         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2102          * force changes if we just apply a normal harmonic.
2103          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2104          * This means we will never have the periodicity problem, unless
2105          * the dihedral is Pi away from phiO, which is very unlikely due to
2106          * the potential.
2107          */
2108         kA = forceparams[type].harmonic.krA;
2109         kB = forceparams[type].harmonic.krB;
2110         pA = forceparams[type].harmonic.rA;
2111         pB = forceparams[type].harmonic.rB;
2112
2113         kk    = L1*kA + lambda*kB;
2114         phi0  = (L1*pA + lambda*pB)*DEG2RAD;
2115         dphi0 = (pB - pA)*DEG2RAD;
2116
2117         dp = phi-phi0;
2118
2119         make_dp_periodic(&dp);
2120
2121         dp2 = dp*dp;
2122
2123         vtot += 0.5*kk*dp2;
2124         ddphi = -kk*dp;
2125
2126         dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2127
2128         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
2129                            f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2130         /* 218 TOTAL    */
2131     }
2132
2133     *dvdlambda += dvdl_term;
2134     return vtot;
2135 }
2136
2137 /*! \brief Computes angle restraints of two different types */
2138 template <BondedKernelFlavor flavor>
2139 real low_angres(int nbonds,
2140                 const t_iatom forceatoms[], const t_iparams forceparams[],
2141                 const rvec x[], rvec4 f[], rvec fshift[],
2142                 const t_pbc *pbc, const t_graph *g,
2143                 real lambda, real *dvdlambda,
2144                 gmx_bool bZAxis)
2145 {
2146     int  i, m, type, ai, aj, ak, al;
2147     int  t1, t2;
2148     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2149     rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2150     real st, sth, nrij2, nrkl2, c, cij, ckl;
2151
2152     ivec dt;
2153     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2154
2155     vtot = 0.0;
2156     ak   = al = 0; /* to avoid warnings */
2157     for (i = 0; i < nbonds; )
2158     {
2159         type = forceatoms[i++];
2160         ai   = forceatoms[i++];
2161         aj   = forceatoms[i++];
2162         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij);       /*  3               */
2163         if (!bZAxis)
2164         {
2165             ak   = forceatoms[i++];
2166             al   = forceatoms[i++];
2167             t2   = pbc_rvec_sub(pbc, x[al], x[ak], r_kl);  /*  3                */
2168         }
2169         else
2170         {
2171             r_kl[XX] = 0;
2172             r_kl[YY] = 0;
2173             r_kl[ZZ] = 1;
2174         }
2175
2176         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2177         phi     = std::acos(cos_phi);    /* 10           */
2178
2179         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2180                                   forceparams[type].pdihs.cpB,
2181                                   forceparams[type].pdihs.phiA,
2182                                   forceparams[type].pdihs.phiB,
2183                                   forceparams[type].pdihs.mult,
2184                                   phi, lambda, &vid, &dVdphi); /*  40  */
2185
2186         vtot += vid;
2187
2188         cos_phi2 = gmx::square(cos_phi);                /*   1          */
2189         if (cos_phi2 < 1)
2190         {
2191             st    = -dVdphi*gmx::invsqrt(1 - cos_phi2); /*  12          */
2192             sth   = st*cos_phi;                         /*   1          */
2193             nrij2 = iprod(r_ij, r_ij);                  /*   5          */
2194             nrkl2 = iprod(r_kl, r_kl);                  /*   5          */
2195
2196             c   = st*gmx::invsqrt(nrij2*nrkl2);         /*  11          */
2197             cij = sth/nrij2;                            /*  10          */
2198             ckl = sth/nrkl2;                            /*  10          */
2199
2200             for (m = 0; m < DIM; m++)                   /*  18+18       */
2201             {
2202                 f_i[m]    = (c*r_kl[m]-cij*r_ij[m]);
2203                 f[ai][m] += f_i[m];
2204                 f[aj][m] -= f_i[m];
2205                 if (!bZAxis)
2206                 {
2207                     f_k[m]    = (c*r_ij[m]-ckl*r_kl[m]);
2208                     f[ak][m] += f_k[m];
2209                     f[al][m] -= f_k[m];
2210                 }
2211             }
2212
2213             if (computeVirial(flavor))
2214             {
2215                 if (g)
2216                 {
2217                     ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2218                     t1 = IVEC2IS(dt);
2219                 }
2220                 rvec_inc(fshift[t1], f_i);
2221                 rvec_dec(fshift[CENTRAL], f_i);
2222                 if (!bZAxis)
2223                 {
2224                     if (g)
2225                     {
2226                         ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2227                         t2 = IVEC2IS(dt);
2228                     }
2229                     rvec_inc(fshift[t2], f_k);
2230                     rvec_dec(fshift[CENTRAL], f_k);
2231                 }
2232             }
2233         }
2234     }
2235
2236     return vtot; /*  184 / 157 (bZAxis)  total  */
2237 }
2238
2239 template <BondedKernelFlavor flavor>
2240 real angres(int nbonds,
2241             const t_iatom forceatoms[], const t_iparams forceparams[],
2242             const rvec x[], rvec4 f[], rvec fshift[],
2243             const t_pbc *pbc, const t_graph *g,
2244             real lambda, real *dvdlambda,
2245             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2246             int gmx_unused *global_atom_index)
2247 {
2248     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2249                               lambda, dvdlambda, FALSE);
2250 }
2251
2252 template <BondedKernelFlavor flavor>
2253 real angresz(int nbonds,
2254              const t_iatom forceatoms[], const t_iparams forceparams[],
2255              const rvec x[], rvec4 f[], rvec fshift[],
2256              const t_pbc *pbc, const t_graph *g,
2257              real lambda, real *dvdlambda,
2258              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2259              int gmx_unused *global_atom_index)
2260 {
2261     return low_angres<flavor>(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2262                               lambda, dvdlambda, TRUE);
2263 }
2264
2265 template <BondedKernelFlavor flavor>
2266 real dihres(int nbonds,
2267             const t_iatom forceatoms[], const t_iparams forceparams[],
2268             const rvec x[], rvec4 f[], rvec fshift[],
2269             const t_pbc *pbc, const t_graph *g,
2270             real lambda, real *dvdlambda,
2271             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2272             int gmx_unused  *global_atom_index)
2273 {
2274     real vtot = 0;
2275     int  ai, aj, ak, al, i, type, t1, t2, t3;
2276     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2277     real phi, ddphi, ddp, ddp2, dp, d2r, L1;
2278     rvec r_ij, r_kj, r_kl, m, n;
2279
2280     L1 = 1.0-lambda;
2281
2282     d2r = DEG2RAD;
2283
2284     for (i = 0; (i < nbonds); )
2285     {
2286         type = forceatoms[i++];
2287         ai   = forceatoms[i++];
2288         aj   = forceatoms[i++];
2289         ak   = forceatoms[i++];
2290         al   = forceatoms[i++];
2291
2292         phi0A  = forceparams[type].dihres.phiA*d2r;
2293         dphiA  = forceparams[type].dihres.dphiA*d2r;
2294         kfacA  = forceparams[type].dihres.kfacA;
2295
2296         phi0B  = forceparams[type].dihres.phiB*d2r;
2297         dphiB  = forceparams[type].dihres.dphiB*d2r;
2298         kfacB  = forceparams[type].dihres.kfacB;
2299
2300         phi0  = L1*phi0A + lambda*phi0B;
2301         dphi  = L1*dphiA + lambda*dphiB;
2302         kfac  = L1*kfacA + lambda*kfacB;
2303
2304         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2305                         &t1, &t2, &t3);
2306         /* 84 flops */
2307
2308         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2309          * force changes if we just apply a normal harmonic.
2310          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2311          * This means we will never have the periodicity problem, unless
2312          * the dihedral is Pi away from phiO, which is very unlikely due to
2313          * the potential.
2314          */
2315         dp = phi-phi0;
2316         make_dp_periodic(&dp);
2317
2318         if (dp > dphi)
2319         {
2320             ddp = dp-dphi;
2321         }
2322         else if (dp < -dphi)
2323         {
2324             ddp = dp+dphi;
2325         }
2326         else
2327         {
2328             ddp = 0;
2329         }
2330
2331         if (ddp != 0.0)
2332         {
2333             ddp2  = ddp*ddp;
2334             vtot += 0.5*kfac*ddp2;
2335             ddphi = kfac*ddp;
2336
2337             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2338             /* lambda dependence from changing restraint distances */
2339             if (ddp > 0)
2340             {
2341                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2342             }
2343             else if (ddp < 0)
2344             {
2345                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2346             }
2347             do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2348                                f, fshift, pbc, g, x, t1, t2, t3);      /* 112           */
2349         }
2350     }
2351     return vtot;
2352 }
2353
2354
2355 real unimplemented(int gmx_unused nbonds,
2356                    const t_iatom gmx_unused forceatoms[], const t_iparams gmx_unused forceparams[],
2357                    const rvec gmx_unused x[], rvec4 gmx_unused f[], rvec gmx_unused fshift[],
2358                    const t_pbc gmx_unused *pbc, const t_graph  gmx_unused *g,
2359                    real gmx_unused lambda, real gmx_unused *dvdlambda,
2360                    const t_mdatoms  gmx_unused *md, t_fcdata gmx_unused *fcd,
2361                    int gmx_unused *global_atom_index)
2362 {
2363     gmx_impl("*** you are using a not implemented function");
2364 }
2365
2366 template <BondedKernelFlavor flavor>
2367 real restrangles(int nbonds,
2368                  const t_iatom forceatoms[], const t_iparams forceparams[],
2369                  const rvec x[], rvec4 f[], rvec fshift[],
2370                  const t_pbc *pbc, const t_graph *g,
2371                  real gmx_unused lambda, real gmx_unused *dvdlambda,
2372                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2373                  int gmx_unused *global_atom_index)
2374 {
2375     int    i, d, ai, aj, ak, type, m;
2376     int    t1, t2;
2377     real   v, vtot;
2378     ivec   jt, dt_ij, dt_kj;
2379     rvec   f_i, f_j, f_k;
2380     double prefactor, ratio_ante, ratio_post;
2381     rvec   delta_ante, delta_post, vec_temp;
2382
2383     vtot = 0.0;
2384     for (i = 0; (i < nbonds); )
2385     {
2386         type = forceatoms[i++];
2387         ai   = forceatoms[i++];
2388         aj   = forceatoms[i++];
2389         ak   = forceatoms[i++];
2390
2391         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2392         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2393         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2394
2395
2396         /* This function computes factors needed for restricted angle potential.
2397          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2398          * when three particles align and the dihedral angle and dihedral potential
2399          * cannot be calculated. This potential is calculated using the formula:
2400            real restrangles(int nbonds,
2401             const t_iatom forceatoms[],const t_iparams forceparams[],
2402             const rvec x[],rvec4 f[],rvec fshift[],
2403             const t_pbc *pbc,const t_graph *g,
2404             real gmx_unused lambda,real gmx_unused *dvdlambda,
2405             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2406             int gmx_unused *global_atom_index)
2407            {
2408            int  i, d, ai, aj, ak, type, m;
2409            int t1, t2;
2410            rvec r_ij,r_kj;
2411            real v, vtot;
2412            ivec jt,dt_ij,dt_kj;
2413            rvec f_i, f_j, f_k;
2414            real prefactor, ratio_ante, ratio_post;
2415            rvec delta_ante, delta_post, vec_temp;
2416
2417            vtot = 0.0;
2418            for(i=0; (i<nbonds); )
2419            {
2420            type = forceatoms[i++];
2421            ai   = forceatoms[i++];
2422            aj   = forceatoms[i++];
2423            ak   = forceatoms[i++];
2424
2425          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2426          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2427          * For more explanations see comments file "restcbt.h". */
2428
2429         compute_factors_restangles(type, forceparams,  delta_ante, delta_post,
2430                                    &prefactor, &ratio_ante, &ratio_post, &v);
2431
2432         /*   Forces are computed per component */
2433         for (d = 0; d < DIM; d++)
2434         {
2435             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2436             f_j[d] = prefactor * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2437             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2438         }
2439
2440         /*   Computation of potential energy   */
2441
2442         vtot += v;
2443
2444         /*   Update forces */
2445
2446         for (m = 0; (m < DIM); m++)
2447         {
2448             f[ai][m] += f_i[m];
2449             f[aj][m] += f_j[m];
2450             f[ak][m] += f_k[m];
2451         }
2452
2453         if (computeVirial(flavor))
2454         {
2455             if (g)
2456             {
2457                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2458                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2459                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2460                 t1 = IVEC2IS(dt_ij);
2461                 t2 = IVEC2IS(dt_kj);
2462             }
2463
2464             rvec_inc(fshift[t1], f_i);
2465             rvec_inc(fshift[CENTRAL], f_j);
2466             rvec_inc(fshift[t2], f_k);
2467         }
2468     }
2469     return vtot;
2470 }
2471
2472
2473 template <BondedKernelFlavor flavor>
2474 real restrdihs(int nbonds,
2475                const t_iatom forceatoms[], const t_iparams forceparams[],
2476                const rvec x[], rvec4 f[], rvec fshift[],
2477                const t_pbc *pbc, const t_graph *g,
2478                real gmx_unused lambda, real gmx_unused *dvlambda,
2479                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2480                int gmx_unused *global_atom_index)
2481 {
2482     int  i, d, type, ai, aj, ak, al;
2483     rvec f_i, f_j, f_k, f_l;
2484     rvec dx_jl;
2485     ivec jt, dt_ij, dt_kj, dt_lj;
2486     int  t1, t2, t3;
2487     real v, vtot;
2488     rvec delta_ante,  delta_crnt, delta_post, vec_temp;
2489     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2490     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2491     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2492     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2493     real prefactor_phi;
2494
2495
2496     vtot = 0.0;
2497     for (i = 0; (i < nbonds); )
2498     {
2499         type = forceatoms[i++];
2500         ai   = forceatoms[i++];
2501         aj   = forceatoms[i++];
2502         ak   = forceatoms[i++];
2503         al   = forceatoms[i++];
2504
2505         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2506         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2507         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2508         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2509         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2510
2511         /* This function computes factors needed for restricted angle potential.
2512          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2513          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2514          * This potential is calculated using the formula:
2515          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2516          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2517          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2518          * For more explanations see comments file "restcbt.h" */
2519
2520         compute_factors_restrdihs(type, forceparams,
2521                                   delta_ante, delta_crnt, delta_post,
2522                                   &factor_phi_ai_ante, &factor_phi_ai_crnt, &factor_phi_ai_post,
2523                                   &factor_phi_aj_ante, &factor_phi_aj_crnt, &factor_phi_aj_post,
2524                                   &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2525                                   &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post,
2526                                   &prefactor_phi, &v);
2527
2528
2529         /*      Computation of forces per component */
2530         for (d = 0; d < DIM; d++)
2531         {
2532             f_i[d] = prefactor_phi * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d] + factor_phi_ai_post * delta_post[d]);
2533             f_j[d] = prefactor_phi * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d] + factor_phi_aj_post * delta_post[d]);
2534             f_k[d] = prefactor_phi * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d] + factor_phi_ak_post * delta_post[d]);
2535             f_l[d] = prefactor_phi * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d] + factor_phi_al_post * delta_post[d]);
2536         }
2537         /*      Computation of the energy */
2538
2539         vtot += v;
2540
2541
2542
2543         /*    Updating the forces */
2544
2545         rvec_inc(f[ai], f_i);
2546         rvec_inc(f[aj], f_j);
2547         rvec_inc(f[ak], f_k);
2548         rvec_inc(f[al], f_l);
2549
2550         if (computeVirial(flavor))
2551         {
2552             if (g)
2553             {
2554                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2555                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2556                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2557                 ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2558                 t1 = IVEC2IS(dt_ij);
2559                 t2 = IVEC2IS(dt_kj);
2560                 t3 = IVEC2IS(dt_lj);
2561             }
2562             else if (pbc)
2563             {
2564                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2565             }
2566             else
2567             {
2568                 t3 = CENTRAL;
2569             }
2570
2571             rvec_inc(fshift[t1], f_i);
2572             rvec_inc(fshift[CENTRAL], f_j);
2573             rvec_inc(fshift[t2], f_k);
2574             rvec_inc(fshift[t3], f_l);
2575         }
2576     }
2577
2578     return vtot;
2579 }
2580
2581
2582 template <BondedKernelFlavor flavor>
2583 real cbtdihs(int nbonds,
2584              const t_iatom forceatoms[], const t_iparams forceparams[],
2585              const rvec x[], rvec4 f[], rvec fshift[],
2586              const t_pbc *pbc, const t_graph *g,
2587              real gmx_unused lambda, real gmx_unused *dvdlambda,
2588              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2589              int gmx_unused *global_atom_index)
2590 {
2591     int  type, ai, aj, ak, al, i, d;
2592     int  t1, t2, t3;
2593     real v, vtot;
2594     rvec vec_temp;
2595     rvec f_i, f_j, f_k, f_l;
2596     ivec jt, dt_ij, dt_kj, dt_lj;
2597     rvec dx_jl;
2598     rvec delta_ante, delta_crnt, delta_post;
2599     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2600     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2601     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2602
2603
2604
2605
2606     vtot = 0.0;
2607     for (i = 0; (i < nbonds); )
2608     {
2609         type = forceatoms[i++];
2610         ai   = forceatoms[i++];
2611         aj   = forceatoms[i++];
2612         ak   = forceatoms[i++];
2613         al   = forceatoms[i++];
2614
2615
2616         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2617         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2618         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2619         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2620         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2621         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2622
2623         /* \brief Compute factors for CBT potential
2624          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2625          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2626          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2627          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2628          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2629          * It contains in its expression not only the dihedral angle \f$\phi\f$
2630          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2631          * --- the adjacent bending angles.
2632          * For more explanations see comments file "restcbt.h". */
2633
2634         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post,
2635                                 f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al,
2636                                 f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak,
2637                                 f_theta_post_aj, f_theta_post_ak, f_theta_post_al,
2638                                 &v);
2639
2640
2641         /*      Acumulate the resuts per beads */
2642         for (d = 0; d < DIM; d++)
2643         {
2644             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2645             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2646             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2647             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2648         }
2649
2650         /*      Compute the potential energy */
2651
2652         vtot += v;
2653
2654
2655         /*  Updating the forces */
2656         rvec_inc(f[ai], f_i);
2657         rvec_inc(f[aj], f_j);
2658         rvec_inc(f[ak], f_k);
2659         rvec_inc(f[al], f_l);
2660
2661
2662         if (computeVirial(flavor))
2663         {
2664             if (g)
2665             {
2666                 copy_ivec(SHIFT_IVEC(g, aj), jt);
2667                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2668                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2669                 ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2670                 t1 = IVEC2IS(dt_ij);
2671                 t2 = IVEC2IS(dt_kj);
2672                 t3 = IVEC2IS(dt_lj);
2673             }
2674             else if (pbc)
2675             {
2676                 t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2677             }
2678             else
2679             {
2680                 t3 = CENTRAL;
2681             }
2682
2683             rvec_inc(fshift[t1], f_i);
2684             rvec_inc(fshift[CENTRAL], f_j);
2685             rvec_inc(fshift[t2], f_k);
2686             rvec_inc(fshift[t3], f_l);
2687         }
2688     }
2689
2690     return vtot;
2691 }
2692
2693 template <BondedKernelFlavor flavor>
2694 std::enable_if_t<flavor != BondedKernelFlavor::ForcesSimdWhenAvailable || !GMX_SIMD_HAVE_REAL, real>
2695 rbdihs(int nbonds,
2696        const t_iatom forceatoms[], const t_iparams forceparams[],
2697        const rvec x[], rvec4 f[], rvec fshift[],
2698        const t_pbc *pbc, const t_graph *g,
2699        real lambda, real *dvdlambda,
2700        const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2701        int gmx_unused *global_atom_index)
2702 {
2703     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2704     int        type, ai, aj, ak, al, i, j;
2705     int        t1, t2, t3;
2706     rvec       r_ij, r_kj, r_kl, m, n;
2707     real       parmA[NR_RBDIHS];
2708     real       parmB[NR_RBDIHS];
2709     real       parm[NR_RBDIHS];
2710     real       cos_phi, phi, rbp, rbpBA;
2711     real       v, ddphi, sin_phi;
2712     real       cosfac, vtot;
2713     real       L1        = 1.0-lambda;
2714     real       dvdl_term = 0;
2715
2716     vtot = 0.0;
2717     for (i = 0; (i < nbonds); )
2718     {
2719         type = forceatoms[i++];
2720         ai   = forceatoms[i++];
2721         aj   = forceatoms[i++];
2722         ak   = forceatoms[i++];
2723         al   = forceatoms[i++];
2724
2725         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2726                         &t1, &t2, &t3);  /*  84         */
2727
2728         /* Change to polymer convention */
2729         if (phi < c0)
2730         {
2731             phi += M_PI;
2732         }
2733         else
2734         {
2735             phi -= M_PI;    /*   1              */
2736
2737         }
2738         cos_phi = std::cos(phi);
2739         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2740         sin_phi = std::sin(phi);
2741
2742         for (j = 0; (j < NR_RBDIHS); j++)
2743         {
2744             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2745             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2746             parm[j]  = L1*parmA[j]+lambda*parmB[j];
2747         }
2748         /* Calculate cosine powers */
2749         /* Calculate the energy */
2750         /* Calculate the derivative */
2751
2752         v            = parm[0];
2753         dvdl_term   += (parmB[0]-parmA[0]);
2754         ddphi        = c0;
2755         cosfac       = c1;
2756
2757         rbp          = parm[1];
2758         rbpBA        = parmB[1]-parmA[1];
2759         ddphi       += rbp*cosfac;
2760         cosfac      *= cos_phi;
2761         v           += cosfac*rbp;
2762         dvdl_term   += cosfac*rbpBA;
2763         rbp          = parm[2];
2764         rbpBA        = parmB[2]-parmA[2];
2765         ddphi       += c2*rbp*cosfac;
2766         cosfac      *= cos_phi;
2767         v           += cosfac*rbp;
2768         dvdl_term   += cosfac*rbpBA;
2769         rbp          = parm[3];
2770         rbpBA        = parmB[3]-parmA[3];
2771         ddphi       += c3*rbp*cosfac;
2772         cosfac      *= cos_phi;
2773         v           += cosfac*rbp;
2774         dvdl_term   += cosfac*rbpBA;
2775         rbp          = parm[4];
2776         rbpBA        = parmB[4]-parmA[4];
2777         ddphi       += c4*rbp*cosfac;
2778         cosfac      *= cos_phi;
2779         v           += cosfac*rbp;
2780         dvdl_term   += cosfac*rbpBA;
2781         rbp          = parm[5];
2782         rbpBA        = parmB[5]-parmA[5];
2783         ddphi       += c5*rbp*cosfac;
2784         cosfac      *= cos_phi;
2785         v           += cosfac*rbp;
2786         dvdl_term   += cosfac*rbpBA;
2787
2788         ddphi = -ddphi*sin_phi;         /*  11          */
2789
2790         do_dih_fup<flavor>(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2791                            f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2792         vtot += v;
2793     }
2794     *dvdlambda += dvdl_term;
2795
2796     return vtot;
2797 }
2798
2799 //! \endcond
2800
2801 /*! \brief Mysterious undocumented function */
2802 int
2803 cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2804 {
2805     int im1, ip1, ip2;
2806
2807     if (ip < 0)
2808     {
2809         ip = ip + grid_spacing - 1;
2810     }
2811     else if (ip > grid_spacing)
2812     {
2813         ip = ip - grid_spacing - 1;
2814     }
2815
2816     im1 = ip - 1;
2817     ip1 = ip + 1;
2818     ip2 = ip + 2;
2819
2820     if (ip == 0)
2821     {
2822         im1 = grid_spacing - 1;
2823     }
2824     else if (ip == grid_spacing-2)
2825     {
2826         ip2 = 0;
2827     }
2828     else if (ip == grid_spacing-1)
2829     {
2830         ip1 = 0;
2831         ip2 = 1;
2832     }
2833
2834     *ipm1 = im1;
2835     *ipp1 = ip1;
2836     *ipp2 = ip2;
2837
2838     return ip;
2839
2840 }
2841
2842 } // namespace
2843
2844 real
2845 cmap_dihs(int nbonds,
2846           const t_iatom forceatoms[], const t_iparams forceparams[],
2847           const gmx_cmap_t *cmap_grid,
2848           const rvec x[], rvec4 f[], rvec fshift[],
2849           const struct t_pbc *pbc, const struct t_graph *g,
2850           real gmx_unused lambda, real gmx_unused *dvdlambda,
2851           const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2852           int  gmx_unused *global_atom_index)
2853 {
2854     int         i, n;
2855     int         ai, aj, ak, al, am;
2856     int         a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2857     int         type;
2858     int         t11, t21, t31, t12, t22, t32;
2859     int         iphi1, ip1m1, ip1p1, ip1p2;
2860     int         iphi2, ip2m1, ip2p1, ip2p2;
2861     int         l1, l2, l3;
2862     int         pos1, pos2, pos3, pos4;
2863
2864     real        ty[4], ty1[4], ty2[4], ty12[4], tx[16];
2865     real        phi1, cos_phi1, sin_phi1, xphi1;
2866     real        phi2, cos_phi2, sin_phi2, xphi2;
2867     real        dx, tt, tu, e, df1, df2, vtot;
2868     real        ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2869     real        ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2870     real        fg1, hg1, fga1, hgb1, gaa1, gbb1;
2871     real        fg2, hg2, fga2, hgb2, gaa2, gbb2;
2872     real        fac;
2873
2874     rvec        r1_ij, r1_kj, r1_kl, m1, n1;
2875     rvec        r2_ij, r2_kj, r2_kl, m2, n2;
2876     rvec        f1_i, f1_j, f1_k, f1_l;
2877     rvec        f2_i, f2_j, f2_k, f2_l;
2878     rvec        a1, b1, a2, b2;
2879     rvec        f1, g1, h1, f2, g2, h2;
2880     rvec        dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2881     ivec        jt1, dt1_ij, dt1_kj, dt1_lj;
2882     ivec        jt2, dt2_ij, dt2_kj, dt2_lj;
2883
2884     int         loop_index[4][4] = {
2885         {0, 4, 8, 12},
2886         {1, 5, 9, 13},
2887         {2, 6, 10, 14},
2888         {3, 7, 11, 15}
2889     };
2890
2891     /* Total CMAP energy */
2892     vtot = 0;
2893
2894     for (n = 0; n < nbonds; )
2895     {
2896         /* Five atoms are involved in the two torsions */
2897         type   = forceatoms[n++];
2898         ai     = forceatoms[n++];
2899         aj     = forceatoms[n++];
2900         ak     = forceatoms[n++];
2901         al     = forceatoms[n++];
2902         am     = forceatoms[n++];
2903
2904         /* Which CMAP type is this */
2905         const int   cmapA = forceparams[type].cmap.cmapA;
2906         const real *cmapd = cmap_grid->cmapdata[cmapA].cmap.data();
2907
2908         /* First torsion */
2909         a1i   = ai;
2910         a1j   = aj;
2911         a1k   = ak;
2912         a1l   = al;
2913
2914         phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2915                           &t11, &t21, &t31);  /* 84 */
2916
2917         cos_phi1 = std::cos(phi1);
2918
2919         a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2920         a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2921         a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2922
2923         b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2924         b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2925         b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2926
2927         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2928
2929         ra21  = iprod(a1, a1);       /* 5 */
2930         rb21  = iprod(b1, b1);       /* 5 */
2931         rg21  = iprod(r1_kj, r1_kj); /* 5 */
2932         rg1   = sqrt(rg21);
2933
2934         rgr1  = 1.0/rg1;
2935         ra2r1 = 1.0/ra21;
2936         rb2r1 = 1.0/rb21;
2937         rabr1 = sqrt(ra2r1*rb2r1);
2938
2939         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2940
2941         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2942         {
2943             phi1 = std::asin(sin_phi1);
2944
2945             if (cos_phi1 < 0)
2946             {
2947                 if (phi1 > 0)
2948                 {
2949                     phi1 = M_PI - phi1;
2950                 }
2951                 else
2952                 {
2953                     phi1 = -M_PI - phi1;
2954                 }
2955             }
2956         }
2957         else
2958         {
2959             phi1 = std::acos(cos_phi1);
2960
2961             if (sin_phi1 < 0)
2962             {
2963                 phi1 = -phi1;
2964             }
2965         }
2966
2967         xphi1 = phi1 + M_PI; /* 1 */
2968
2969         /* Second torsion */
2970         a2i   = aj;
2971         a2j   = ak;
2972         a2k   = al;
2973         a2l   = am;
2974
2975         phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2976                           &t12, &t22, &t32); /* 84 */
2977
2978         cos_phi2 = std::cos(phi2);
2979
2980         a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2981         a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2982         a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2983
2984         b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2985         b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2986         b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2987
2988         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
2989
2990         ra22  = iprod(a2, a2);         /* 5 */
2991         rb22  = iprod(b2, b2);         /* 5 */
2992         rg22  = iprod(r2_kj, r2_kj);   /* 5 */
2993         rg2   = sqrt(rg22);
2994
2995         rgr2  = 1.0/rg2;
2996         ra2r2 = 1.0/ra22;
2997         rb2r2 = 1.0/rb22;
2998         rabr2 = sqrt(ra2r2*rb2r2);
2999
3000         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3001
3002         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3003         {
3004             phi2 = std::asin(sin_phi2);
3005
3006             if (cos_phi2 < 0)
3007             {
3008                 if (phi2 > 0)
3009                 {
3010                     phi2 = M_PI - phi2;
3011                 }
3012                 else
3013                 {
3014                     phi2 = -M_PI - phi2;
3015                 }
3016             }
3017         }
3018         else
3019         {
3020             phi2 = std::acos(cos_phi2);
3021
3022             if (sin_phi2 < 0)
3023             {
3024                 phi2 = -phi2;
3025             }
3026         }
3027
3028         xphi2 = phi2 + M_PI; /* 1 */
3029
3030         /* Range mangling */
3031         if (xphi1 < 0)
3032         {
3033             xphi1 = xphi1 + 2*M_PI;
3034         }
3035         else if (xphi1 >= 2*M_PI)
3036         {
3037             xphi1 = xphi1 - 2*M_PI;
3038         }
3039
3040         if (xphi2 < 0)
3041         {
3042             xphi2 = xphi2 + 2*M_PI;
3043         }
3044         else if (xphi2 >= 2*M_PI)
3045         {
3046             xphi2 = xphi2 - 2*M_PI;
3047         }
3048
3049         /* Number of grid points */
3050         dx = 2*M_PI / cmap_grid->grid_spacing;
3051
3052         /* Where on the grid are we */
3053         iphi1 = static_cast<int>(xphi1/dx);
3054         iphi2 = static_cast<int>(xphi2/dx);
3055
3056         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3057         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3058
3059         pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
3060         pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
3061         pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
3062         pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
3063
3064         ty[0]   = cmapd[pos1*4];
3065         ty[1]   = cmapd[pos2*4];
3066         ty[2]   = cmapd[pos3*4];
3067         ty[3]   = cmapd[pos4*4];
3068
3069         ty1[0]   = cmapd[pos1*4+1];
3070         ty1[1]   = cmapd[pos2*4+1];
3071         ty1[2]   = cmapd[pos3*4+1];
3072         ty1[3]   = cmapd[pos4*4+1];
3073
3074         ty2[0]   = cmapd[pos1*4+2];
3075         ty2[1]   = cmapd[pos2*4+2];
3076         ty2[2]   = cmapd[pos3*4+2];
3077         ty2[3]   = cmapd[pos4*4+2];
3078
3079         ty12[0]   = cmapd[pos1*4+3];
3080         ty12[1]   = cmapd[pos2*4+3];
3081         ty12[2]   = cmapd[pos3*4+3];
3082         ty12[3]   = cmapd[pos4*4+3];
3083
3084         /* Switch to degrees */
3085         dx    = 360.0 / cmap_grid->grid_spacing;
3086         xphi1 = xphi1 * RAD2DEG;
3087         xphi2 = xphi2 * RAD2DEG;
3088
3089         for (i = 0; i < 4; i++) /* 16 */
3090         {
3091             tx[i]    = ty[i];
3092             tx[i+4]  = ty1[i]*dx;
3093             tx[i+8]  = ty2[i]*dx;
3094             tx[i+12] = ty12[i]*dx*dx;
3095         }
3096
3097         real tc[16] = {0};
3098         for (int idx = 0; idx < 16; idx++) /* 1056 */
3099         {
3100             for (int k = 0; k < 16; k++)
3101             {
3102                 tc[idx] += cmap_coeff_matrix[k*16+idx]*tx[k];
3103             }
3104         }
3105
3106         tt    = (xphi1-iphi1*dx)/dx;
3107         tu    = (xphi2-iphi2*dx)/dx;
3108
3109         e     = 0;
3110         df1   = 0;
3111         df2   = 0;
3112
3113         for (i = 3; i >= 0; i--)
3114         {
3115             l1 = loop_index[i][3];
3116             l2 = loop_index[i][2];
3117             l3 = loop_index[i][1];
3118
3119             e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
3120             df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
3121             df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
3122         }
3123
3124         fac     = RAD2DEG/dx;
3125         df1     = df1   * fac;
3126         df2     = df2   * fac;
3127
3128         /* CMAP energy */
3129         vtot += e;
3130
3131         /* Do forces - first torsion */
3132         fg1       = iprod(r1_ij, r1_kj);
3133         hg1       = iprod(r1_kl, r1_kj);
3134         fga1      = fg1*ra2r1*rgr1;
3135         hgb1      = hg1*rb2r1*rgr1;
3136         gaa1      = -ra2r1*rg1;
3137         gbb1      = rb2r1*rg1;
3138
3139         for (i = 0; i < DIM; i++)
3140         {
3141             dtf1[i]   = gaa1 * a1[i];
3142             dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
3143             dth1[i]   = gbb1 * b1[i];
3144
3145             f1[i]     = df1  * dtf1[i];
3146             g1[i]     = df1  * dtg1[i];
3147             h1[i]     = df1  * dth1[i];
3148
3149             f1_i[i]   =  f1[i];
3150             f1_j[i]   = -f1[i] - g1[i];
3151             f1_k[i]   =  h1[i] + g1[i];
3152             f1_l[i]   = -h1[i];
3153
3154             f[a1i][i] = f[a1i][i] + f1_i[i];
3155             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3156             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3157             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3158         }
3159
3160         /* Do forces - second torsion */
3161         fg2       = iprod(r2_ij, r2_kj);
3162         hg2       = iprod(r2_kl, r2_kj);
3163         fga2      = fg2*ra2r2*rgr2;
3164         hgb2      = hg2*rb2r2*rgr2;
3165         gaa2      = -ra2r2*rg2;
3166         gbb2      = rb2r2*rg2;
3167
3168         for (i = 0; i < DIM; i++)
3169         {
3170             dtf2[i]   = gaa2 * a2[i];
3171             dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
3172             dth2[i]   = gbb2 * b2[i];
3173
3174             f2[i]     = df2  * dtf2[i];
3175             g2[i]     = df2  * dtg2[i];
3176             h2[i]     = df2  * dth2[i];
3177
3178             f2_i[i]   =  f2[i];
3179             f2_j[i]   = -f2[i] - g2[i];
3180             f2_k[i]   =  h2[i] + g2[i];
3181             f2_l[i]   = -h2[i];
3182
3183             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3184             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3185             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3186             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3187         }
3188
3189         /* Shift forces */
3190         if (fshift != nullptr)
3191         {
3192             if (g)
3193             {
3194                 copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3195                 ivec_sub(SHIFT_IVEC(g, a1i),  jt1, dt1_ij);
3196                 ivec_sub(SHIFT_IVEC(g, a1k),  jt1, dt1_kj);
3197                 ivec_sub(SHIFT_IVEC(g, a1l),  jt1, dt1_lj);
3198                 t11 = IVEC2IS(dt1_ij);
3199                 t21 = IVEC2IS(dt1_kj);
3200                 t31 = IVEC2IS(dt1_lj);
3201
3202                 copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3203                 ivec_sub(SHIFT_IVEC(g, a2i),  jt2, dt2_ij);
3204                 ivec_sub(SHIFT_IVEC(g, a2k),  jt2, dt2_kj);
3205                 ivec_sub(SHIFT_IVEC(g, a2l),  jt2, dt2_lj);
3206                 t12 = IVEC2IS(dt2_ij);
3207                 t22 = IVEC2IS(dt2_kj);
3208                 t32 = IVEC2IS(dt2_lj);
3209             }
3210             else if (pbc)
3211             {
3212                 t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3213                 t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3214             }
3215             else
3216             {
3217                 t31 = CENTRAL;
3218                 t32 = CENTRAL;
3219             }
3220
3221             rvec_inc(fshift[t11], f1_i);
3222             rvec_inc(fshift[CENTRAL], f1_j);
3223             rvec_inc(fshift[t21], f1_k);
3224             rvec_inc(fshift[t31], f1_l);
3225
3226             rvec_inc(fshift[t21], f2_i);
3227             rvec_inc(fshift[CENTRAL], f2_j);
3228             rvec_inc(fshift[t22], f2_k);
3229             rvec_inc(fshift[t32], f2_l);
3230         }
3231     }
3232     return vtot;
3233 }
3234
3235 namespace
3236 {
3237
3238 //! \cond
3239 /***********************************************************
3240  *
3241  *   G R O M O S  9 6   F U N C T I O N S
3242  *
3243  ***********************************************************/
3244 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3245                  real *V, real *F)
3246 {
3247     const real half = 0.5;
3248     real       L1, kk, x0, dx, dx2;
3249     real       v, f, dvdlambda;
3250
3251     L1    = 1.0-lambda;
3252     kk    = L1*kA+lambda*kB;
3253     x0    = L1*xA+lambda*xB;
3254
3255     dx    = x-x0;
3256     dx2   = dx*dx;
3257
3258     f          = -kk*dx;
3259     v          = half*kk*dx2;
3260     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3261
3262     *F    = f;
3263     *V    = v;
3264
3265     return dvdlambda;
3266
3267     /* That was 21 flops */
3268 }
3269
3270 template <BondedKernelFlavor flavor>
3271 real g96bonds(int nbonds,
3272               const t_iatom forceatoms[], const t_iparams forceparams[],
3273               const rvec x[], rvec4 f[], rvec fshift[],
3274               const t_pbc *pbc, const t_graph *g,
3275               real lambda, real *dvdlambda,
3276               const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3277               int gmx_unused *global_atom_index)
3278 {
3279     int  i, ki, ai, aj, type;
3280     real dr2, fbond, vbond, vtot;
3281     rvec dx;
3282
3283     vtot = 0.0;
3284     for (i = 0; (i < nbonds); )
3285     {
3286         type = forceatoms[i++];
3287         ai   = forceatoms[i++];
3288         aj   = forceatoms[i++];
3289
3290         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3291         dr2  = iprod(dx, dx);                       /*   5              */
3292
3293         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3294                                   forceparams[type].harmonic.krB,
3295                                   forceparams[type].harmonic.rA,
3296                                   forceparams[type].harmonic.rB,
3297                                   dr2, lambda, &vbond, &fbond);
3298
3299         vtot  += 0.5*vbond;                                            /* 1*/
3300
3301         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
3302     }                                                                  /* 44 TOTAL      */
3303     return vtot;
3304 }
3305
3306 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3307                    rvec r_ij, rvec r_kj,
3308                    int *t1, int *t2)
3309 /* Return value is the angle between the bonds i-j and j-k */
3310 {
3311     real costh;
3312
3313     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3314     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3315
3316     costh = cos_angle(r_ij, r_kj);         /* 25                */
3317     /* 41 TOTAL */
3318     return costh;
3319 }
3320
3321 template <BondedKernelFlavor flavor>
3322 real g96angles(int nbonds,
3323                const t_iatom forceatoms[], const t_iparams forceparams[],
3324                const rvec x[], rvec4 f[], rvec fshift[],
3325                const t_pbc *pbc, const t_graph *g,
3326                real lambda, real *dvdlambda,
3327                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3328                int gmx_unused *global_atom_index)
3329 {
3330     int  i, ai, aj, ak, type, m, t1, t2;
3331     rvec r_ij, r_kj;
3332     real cos_theta, dVdt, va, vtot;
3333     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3334     rvec f_i, f_j, f_k;
3335     ivec jt, dt_ij, dt_kj;
3336
3337     vtot = 0.0;
3338     for (i = 0; (i < nbonds); )
3339     {
3340         type = forceatoms[i++];
3341         ai   = forceatoms[i++];
3342         aj   = forceatoms[i++];
3343         ak   = forceatoms[i++];
3344
3345         cos_theta  = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3346
3347         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3348                                   forceparams[type].harmonic.krB,
3349                                   forceparams[type].harmonic.rA,
3350                                   forceparams[type].harmonic.rB,
3351                                   cos_theta, lambda, &va, &dVdt);
3352         vtot    += va;
3353
3354         rij_1    = gmx::invsqrt(iprod(r_ij, r_ij));
3355         rkj_1    = gmx::invsqrt(iprod(r_kj, r_kj));
3356         rij_2    = rij_1*rij_1;
3357         rkj_2    = rkj_1*rkj_1;
3358         rijrkj_1 = rij_1*rkj_1;         /* 23 */
3359
3360         for (m = 0; (m < DIM); m++)     /*  42  */
3361         {
3362             f_i[m]    = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3363             f_k[m]    = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3364             f_j[m]    = -f_i[m]-f_k[m];
3365             f[ai][m] += f_i[m];
3366             f[aj][m] += f_j[m];
3367             f[ak][m] += f_k[m];
3368         }
3369
3370         if (computeVirial(flavor))
3371         {
3372             if (g)
3373             {
3374                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3375
3376                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3377                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3378                 t1 = IVEC2IS(dt_ij);
3379                 t2 = IVEC2IS(dt_kj);
3380             }
3381             rvec_inc(fshift[t1], f_i);
3382             rvec_inc(fshift[CENTRAL], f_j);
3383             rvec_inc(fshift[t2], f_k);          /* 9 */
3384         }
3385         /* 163 TOTAL    */
3386     }
3387     return vtot;
3388 }
3389
3390 template <BondedKernelFlavor flavor>
3391 real cross_bond_bond(int nbonds,
3392                      const t_iatom forceatoms[], const t_iparams forceparams[],
3393                      const rvec x[], rvec4 f[], rvec fshift[],
3394                      const t_pbc *pbc, const t_graph *g,
3395                      real gmx_unused lambda, real gmx_unused *dvdlambda,
3396                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused  *fcd,
3397                      int gmx_unused *global_atom_index)
3398 {
3399     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3400      * pp. 842-847
3401      */
3402     int  i, ai, aj, ak, type, m, t1, t2;
3403     rvec r_ij, r_kj;
3404     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3405     rvec f_i, f_j, f_k;
3406     ivec jt, dt_ij, dt_kj;
3407
3408     vtot = 0.0;
3409     for (i = 0; (i < nbonds); )
3410     {
3411         type = forceatoms[i++];
3412         ai   = forceatoms[i++];
3413         aj   = forceatoms[i++];
3414         ak   = forceatoms[i++];
3415         r1e  = forceparams[type].cross_bb.r1e;
3416         r2e  = forceparams[type].cross_bb.r2e;
3417         krr  = forceparams[type].cross_bb.krr;
3418
3419         /* Compute distance vectors ... */
3420         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3421         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3422
3423         /* ... and their lengths */
3424         r1 = norm(r_ij);
3425         r2 = norm(r_kj);
3426
3427         /* Deviations from ideality */
3428         s1 = r1-r1e;
3429         s2 = r2-r2e;
3430
3431         /* Energy (can be negative!) */
3432         vrr   = krr*s1*s2;
3433         vtot += vrr;
3434
3435         /* Forces */
3436         svmul(-krr*s2/r1, r_ij, f_i);
3437         svmul(-krr*s1/r2, r_kj, f_k);
3438
3439         for (m = 0; (m < DIM); m++)     /*  12  */
3440         {
3441             f_j[m]    = -f_i[m] - f_k[m];
3442             f[ai][m] += f_i[m];
3443             f[aj][m] += f_j[m];
3444             f[ak][m] += f_k[m];
3445         }
3446
3447         if (computeVirial(flavor))
3448         {
3449             if (g)
3450             {
3451                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3452
3453                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3454                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3455                 t1 = IVEC2IS(dt_ij);
3456                 t2 = IVEC2IS(dt_kj);
3457             }
3458             rvec_inc(fshift[t1], f_i);
3459             rvec_inc(fshift[CENTRAL], f_j);
3460             rvec_inc(fshift[t2], f_k);          /* 9 */
3461         }
3462         /* 163 TOTAL    */
3463     }
3464     return vtot;
3465 }
3466
3467 template <BondedKernelFlavor flavor>
3468 real cross_bond_angle(int nbonds,
3469                       const t_iatom forceatoms[], const t_iparams forceparams[],
3470                       const rvec x[], rvec4 f[], rvec fshift[],
3471                       const t_pbc *pbc, const t_graph *g,
3472                       real gmx_unused lambda, real gmx_unused *dvdlambda,
3473                       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3474                       int gmx_unused *global_atom_index)
3475 {
3476     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3477      * pp. 842-847
3478      */
3479     int  i, ai, aj, ak, type, m, t1, t2;
3480     rvec r_ij, r_kj, r_ik;
3481     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3482     rvec f_i, f_j, f_k;
3483     ivec jt, dt_ij, dt_kj;
3484
3485     vtot = 0.0;
3486     for (i = 0; (i < nbonds); )
3487     {
3488         type = forceatoms[i++];
3489         ai   = forceatoms[i++];
3490         aj   = forceatoms[i++];
3491         ak   = forceatoms[i++];
3492         r1e  = forceparams[type].cross_ba.r1e;
3493         r2e  = forceparams[type].cross_ba.r2e;
3494         r3e  = forceparams[type].cross_ba.r3e;
3495         krt  = forceparams[type].cross_ba.krt;
3496
3497         /* Compute distance vectors ... */
3498         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3499         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3500         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3501
3502         /* ... and their lengths */
3503         r1 = norm(r_ij);
3504         r2 = norm(r_kj);
3505         r3 = norm(r_ik);
3506
3507         /* Deviations from ideality */
3508         s1 = r1-r1e;
3509         s2 = r2-r2e;
3510         s3 = r3-r3e;
3511
3512         /* Energy (can be negative!) */
3513         vrt   = krt*s3*(s1+s2);
3514         vtot += vrt;
3515
3516         /* Forces */
3517         k1 = -krt*(s3/r1);
3518         k2 = -krt*(s3/r2);
3519         k3 = -krt*(s1+s2)/r3;
3520         for (m = 0; (m < DIM); m++)
3521         {
3522             f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3523             f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3524             f_j[m] = -f_i[m] - f_k[m];
3525         }
3526
3527         for (m = 0; (m < DIM); m++)     /*  12  */
3528         {
3529             f[ai][m] += f_i[m];
3530             f[aj][m] += f_j[m];
3531             f[ak][m] += f_k[m];
3532         }
3533
3534         if (computeVirial(flavor))
3535         {
3536             if (g)
3537             {
3538                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3539
3540                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3541                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3542                 t1 = IVEC2IS(dt_ij);
3543                 t2 = IVEC2IS(dt_kj);
3544             }
3545             rvec_inc(fshift[t1], f_i);
3546             rvec_inc(fshift[CENTRAL], f_j);
3547             rvec_inc(fshift[t2], f_k);          /* 9 */
3548         }
3549         /* 163 TOTAL    */
3550     }
3551     return vtot;
3552 }
3553
3554 /*! \brief Computes the potential and force for a tabulated potential */
3555 real bonded_tab(const char *type, int table_nr,
3556                 const bondedtable_t *table, real kA, real kB, real r,
3557                 real lambda, real *V, real *F)
3558 {
3559     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3560     int  n0, nnn;
3561     real dvdlambda;
3562
3563     k = (1.0 - lambda)*kA + lambda*kB;
3564
3565     tabscale = table->scale;
3566     VFtab    = table->data;
3567
3568     rt    = r*tabscale;
3569     n0    = static_cast<int>(rt);
3570     if (n0 >= table->n)
3571     {
3572         gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3573                   type, table_nr, r, n0, n0+1, table->n);
3574     }
3575     eps   = rt - n0;
3576     eps2  = eps*eps;
3577     nnn   = 4*n0;
3578     Yt    = VFtab[nnn];
3579     Ft    = VFtab[nnn+1];
3580     Geps  = VFtab[nnn+2]*eps;
3581     Heps2 = VFtab[nnn+3]*eps2;
3582     Fp    = Ft + Geps + Heps2;
3583     VV    = Yt + Fp*eps;
3584     FF    = Fp + Geps + 2.0*Heps2;
3585
3586     *F         = -k*FF*tabscale;
3587     *V         = k*VV;
3588     dvdlambda  = (kB - kA)*VV;
3589
3590     return dvdlambda;
3591
3592     /* That was 22 flops */
3593 }
3594
3595 template <BondedKernelFlavor flavor>
3596 real tab_bonds(int nbonds,
3597                const t_iatom forceatoms[], const t_iparams forceparams[],
3598                const rvec x[], rvec4 f[], rvec fshift[],
3599                const t_pbc *pbc, const t_graph *g,
3600                real lambda, real *dvdlambda,
3601                const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3602                int gmx_unused  *global_atom_index)
3603 {
3604     int  i, ki, ai, aj, type, table;
3605     real dr, dr2, fbond, vbond, vtot;
3606     rvec dx;
3607
3608     vtot = 0.0;
3609     for (i = 0; (i < nbonds); )
3610     {
3611         type = forceatoms[i++];
3612         ai   = forceatoms[i++];
3613         aj   = forceatoms[i++];
3614
3615         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3616         dr2  = iprod(dx, dx);                       /*   5              */
3617         dr   = dr2*gmx::invsqrt(dr2);               /*  10              */
3618
3619         table = forceparams[type].tab.table;
3620
3621         *dvdlambda += bonded_tab("bond", table,
3622                                  &fcd->bondtab[table],
3623                                  forceparams[type].tab.kA,
3624                                  forceparams[type].tab.kB,
3625                                  dr, lambda, &vbond, &fbond); /*  22 */
3626
3627         if (dr2 == 0.0)
3628         {
3629             continue;
3630         }
3631
3632
3633         vtot  += vbond;                                                /* 1*/
3634         fbond *= gmx::invsqrt(dr2);                                    /*   6           */
3635
3636         spreadBondForces<flavor>(fbond, dx, ai, aj, f, ki, g, fshift); /* 15 */
3637     }                                                                  /* 62 TOTAL      */
3638     return vtot;
3639 }
3640
3641 template <BondedKernelFlavor flavor>
3642 real tab_angles(int nbonds,
3643                 const t_iatom forceatoms[], const t_iparams forceparams[],
3644                 const rvec x[], rvec4 f[], rvec fshift[],
3645                 const t_pbc *pbc, const t_graph *g,
3646                 real lambda, real *dvdlambda,
3647                 const t_mdatoms gmx_unused  *md, t_fcdata *fcd,
3648                 int gmx_unused *global_atom_index)
3649 {
3650     int  i, ai, aj, ak, t1, t2, type, table;
3651     rvec r_ij, r_kj;
3652     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3653     ivec jt, dt_ij, dt_kj;
3654
3655     vtot = 0.0;
3656     for (i = 0; (i < nbonds); )
3657     {
3658         type = forceatoms[i++];
3659         ai   = forceatoms[i++];
3660         aj   = forceatoms[i++];
3661         ak   = forceatoms[i++];
3662
3663         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
3664                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
3665
3666         table = forceparams[type].tab.table;
3667
3668         *dvdlambda += bonded_tab("angle", table,
3669                                  &fcd->angletab[table],
3670                                  forceparams[type].tab.kA,
3671                                  forceparams[type].tab.kB,
3672                                  theta, lambda, &va, &dVdt); /*  22  */
3673         vtot += va;
3674
3675         cos_theta2 = gmx::square(cos_theta);            /*   1          */
3676         if (cos_theta2 < 1)
3677         {
3678             int  m;
3679             real st, sth;
3680             real cik, cii, ckk;
3681             real nrkj2, nrij2;
3682             rvec f_i, f_j, f_k;
3683
3684             st    = dVdt*gmx::invsqrt(1 - cos_theta2); /*  12           */
3685             sth   = st*cos_theta;                      /*   1           */
3686             nrkj2 = iprod(r_kj, r_kj);                 /*   5           */
3687             nrij2 = iprod(r_ij, r_ij);
3688
3689             cik = st*gmx::invsqrt(nrkj2*nrij2); /*  12          */
3690             cii = sth/nrij2;                    /*  10          */
3691             ckk = sth/nrkj2;                    /*  10          */
3692
3693             for (m = 0; (m < DIM); m++)         /*  39          */
3694             {
3695                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
3696                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
3697                 f_j[m]    = -f_i[m]-f_k[m];
3698                 f[ai][m] += f_i[m];
3699                 f[aj][m] += f_j[m];
3700                 f[ak][m] += f_k[m];
3701             }
3702
3703             if (computeVirial(flavor))
3704             {
3705                 if (g)
3706                 {
3707                     copy_ivec(SHIFT_IVEC(g, aj), jt);
3708
3709                     ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3710                     ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3711                     t1 = IVEC2IS(dt_ij);
3712                     t2 = IVEC2IS(dt_kj);
3713                 }
3714                 rvec_inc(fshift[t1], f_i);
3715                 rvec_inc(fshift[CENTRAL], f_j);
3716                 rvec_inc(fshift[t2], f_k);
3717             }
3718         }                                       /* 169 TOTAL    */
3719     }
3720     return vtot;
3721 }
3722
3723 template <BondedKernelFlavor flavor>
3724 real tab_dihs(int nbonds,
3725               const t_iatom forceatoms[], const t_iparams forceparams[],
3726               const rvec x[], rvec4 f[], rvec fshift[],
3727               const t_pbc *pbc, const t_graph *g,
3728               real lambda, real *dvdlambda,
3729               const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3730               int gmx_unused *global_atom_index)
3731 {
3732     int  i, type, ai, aj, ak, al, table;
3733     int  t1, t2, t3;
3734     rvec r_ij, r_kj, r_kl, m, n;
3735     real phi, ddphi, vpd, vtot;
3736
3737     vtot = 0.0;
3738     for (i = 0; (i < nbonds); )
3739     {
3740         type = forceatoms[i++];
3741         ai   = forceatoms[i++];
3742         aj   = forceatoms[i++];
3743         ak   = forceatoms[i++];
3744         al   = forceatoms[i++];
3745
3746         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3747                         &t1, &t2, &t3);  /*  84  */
3748
3749         table = forceparams[type].tab.table;
3750
3751         /* Hopefully phi+M_PI never results in values < 0 */
3752         *dvdlambda += bonded_tab("dihedral", table,
3753                                  &fcd->dihtab[table],
3754                                  forceparams[type].tab.kA,
3755                                  forceparams[type].tab.kB,
3756                                  phi+M_PI, lambda, &vpd, &ddphi);
3757
3758         vtot += vpd;
3759         do_dih_fup<flavor>(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3760                            f, fshift, pbc, g, x, t1, t2, t3); /* 112    */
3761
3762     }                                                         /* 227 TOTAL  */
3763
3764     return vtot;
3765 }
3766
3767 struct BondedInteractions
3768 {
3769     BondedFunction function;
3770     int            nrnbIndex;
3771 };
3772
3773 /*! \brief Lookup table of bonded interaction functions
3774  *
3775  * This must have as many entries as interaction_function in ifunc.cpp */
3776 template<BondedKernelFlavor flavor>
3777 const std::array<BondedInteractions, F_NRE> c_bondedInteractionFunctions
3778     = {
3779     BondedInteractions {bonds<flavor>, eNR_BONDS },                       // F_BONDS
3780     BondedInteractions {g96bonds<flavor>, eNR_BONDS },                    // F_G96BONDS
3781     BondedInteractions {morse_bonds<flavor>, eNR_MORSE },                 // F_MORSE
3782     BondedInteractions {cubic_bonds<flavor>, eNR_CUBICBONDS },            // F_CUBICBONDS
3783     BondedInteractions {unimplemented, -1 },                              // F_CONNBONDS
3784     BondedInteractions {bonds<flavor>, eNR_BONDS },                       // F_HARMONIC
3785     BondedInteractions {FENE_bonds<flavor>, eNR_FENEBONDS },              // F_FENEBONDS
3786     BondedInteractions {tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDS
3787     BondedInteractions {tab_bonds<flavor>, eNR_TABBONDS },                // F_TABBONDSNC
3788     BondedInteractions {restraint_bonds<flavor>, eNR_RESTRBONDS },        // F_RESTRBONDS
3789     BondedInteractions {angles<flavor>, eNR_ANGLES },                     // F_ANGLES
3790     BondedInteractions {g96angles<flavor>, eNR_ANGLES },                  // F_G96ANGLES
3791     BondedInteractions {restrangles<flavor>, eNR_ANGLES },                // F_RESTRANGLES
3792     BondedInteractions {linear_angles<flavor>, eNR_ANGLES },              // F_LINEAR_ANGLES
3793     BondedInteractions {cross_bond_bond<flavor>, eNR_CROSS_BOND_BOND },   // F_CROSS_BOND_BONDS
3794     BondedInteractions {cross_bond_angle<flavor>, eNR_CROSS_BOND_ANGLE }, // F_CROSS_BOND_ANGLES
3795     BondedInteractions {urey_bradley<flavor>, eNR_UREY_BRADLEY },         // F_UREY_BRADLEY
3796     BondedInteractions {quartic_angles<flavor>, eNR_QANGLES },            // F_QUARTIC_ANGLES
3797     BondedInteractions {tab_angles<flavor>, eNR_TABANGLES },              // F_TABANGLES
3798     BondedInteractions {pdihs<flavor>, eNR_PROPER },                      // F_PDIHS
3799     BondedInteractions {rbdihs<flavor>, eNR_RB },                         // F_RBDIHS
3800     BondedInteractions {restrdihs<flavor>, eNR_PROPER },                  // F_RESTRDIHS
3801     BondedInteractions {cbtdihs<flavor>, eNR_RB },                        // F_CBTDIHS
3802     BondedInteractions {rbdihs<flavor>, eNR_FOURDIH },                    // F_FOURDIHS
3803     BondedInteractions {idihs<flavor>, eNR_IMPROPER },                    // F_IDIHS
3804     BondedInteractions {pdihs<flavor>, eNR_IMPROPER },                    // F_PIDIHS
3805     BondedInteractions {tab_dihs<flavor>, eNR_TABDIHS },                  // F_TABDIHS
3806     BondedInteractions {unimplemented, eNR_CMAP },                        // F_CMAP
3807     BondedInteractions {unimplemented, -1 },                              // F_GB12_NOLONGERUSED
3808     BondedInteractions {unimplemented, -1 },                              // F_GB13_NOLONGERUSED
3809     BondedInteractions {unimplemented, -1 },                              // F_GB14_NOLONGERUSED
3810     BondedInteractions {unimplemented, -1 },                              // F_GBPOL_NOLONGERUSED
3811     BondedInteractions {unimplemented, -1 },                              // F_NPSOLVATION_NOLONGERUSED
3812     BondedInteractions {unimplemented, eNR_NB14 },                        // F_LJ14
3813     BondedInteractions {unimplemented, -1 },                              // F_COUL14
3814     BondedInteractions {unimplemented, eNR_NB14 },                        // F_LJC14_Q
3815     BondedInteractions {unimplemented, eNR_NB14 },                        // F_LJC_PAIRS_NB
3816     BondedInteractions {unimplemented, -1 },                              // F_LJ
3817     BondedInteractions {unimplemented, -1 },                              // F_BHAM
3818     BondedInteractions {unimplemented, -1 },                              // F_LJ_LR_NOLONGERUSED
3819     BondedInteractions {unimplemented, -1 },                              // F_BHAM_LR_NOLONGERUSED
3820     BondedInteractions {unimplemented, -1 },                              // F_DISPCORR
3821     BondedInteractions {unimplemented, -1 },                              // F_COUL_SR
3822     BondedInteractions {unimplemented, -1 },                              // F_COUL_LR_NOLONGERUSED
3823     BondedInteractions {unimplemented, -1 },                              // F_RF_EXCL
3824     BondedInteractions {unimplemented, -1 },                              // F_COUL_RECIP
3825     BondedInteractions {unimplemented, -1 },                              // F_LJ_RECIP
3826     BondedInteractions {unimplemented, -1 },                              // F_DPD
3827     BondedInteractions {polarize<flavor>, eNR_POLARIZE },                 // F_POLARIZATION
3828     BondedInteractions {water_pol<flavor>, eNR_WPOL },                    // F_WATER_POL
3829     BondedInteractions {thole_pol<flavor>, eNR_THOLE },                   // F_THOLE_POL
3830     BondedInteractions {anharm_polarize<flavor>, eNR_ANHARM_POL },        // F_ANHARM_POL
3831     BondedInteractions {unimplemented, -1 },                              // F_POSRES
3832     BondedInteractions {unimplemented, -1 },                              // F_FBPOSRES
3833     BondedInteractions {ta_disres, eNR_DISRES },                          // F_DISRES
3834     BondedInteractions {unimplemented, -1 },                              // F_DISRESVIOL
3835     BondedInteractions {orires, eNR_ORIRES },                             // F_ORIRES
3836     BondedInteractions {unimplemented, -1 },                              // F_ORIRESDEV
3837     BondedInteractions {angres<flavor>, eNR_ANGRES },                     // F_ANGRES
3838     BondedInteractions {angresz<flavor>, eNR_ANGRESZ },                   // F_ANGRESZ
3839     BondedInteractions {dihres<flavor>, eNR_DIHRES },                     // F_DIHRES
3840     BondedInteractions {unimplemented, -1 },                              // F_DIHRESVIOL
3841     BondedInteractions {unimplemented, -1 },                              // F_CONSTR
3842     BondedInteractions {unimplemented, -1 },                              // F_CONSTRNC
3843     BondedInteractions {unimplemented, -1 },                              // F_SETTLE
3844     BondedInteractions {unimplemented, -1 },                              // F_VSITE2
3845     BondedInteractions {unimplemented, -1 },                              // F_VSITE3
3846     BondedInteractions {unimplemented, -1 },                              // F_VSITE3FD
3847     BondedInteractions {unimplemented, -1 },                              // F_VSITE3FAD
3848     BondedInteractions {unimplemented, -1 },                              // F_VSITE3OUT
3849     BondedInteractions {unimplemented, -1 },                              // F_VSITE4FD
3850     BondedInteractions {unimplemented, -1 },                              // F_VSITE4FDN
3851     BondedInteractions {unimplemented, -1 },                              // F_VSITEN
3852     BondedInteractions {unimplemented, -1 },                              // F_COM_PULL
3853     BondedInteractions {unimplemented, -1 },                              // F_DENSITYFITTING
3854     BondedInteractions {unimplemented, -1 },                              // F_EQM
3855     BondedInteractions {unimplemented, -1 },                              // F_EPOT
3856     BondedInteractions {unimplemented, -1 },                              // F_EKIN
3857     BondedInteractions {unimplemented, -1 },                              // F_ETOT
3858     BondedInteractions {unimplemented, -1 },                              // F_ECONSERVED
3859     BondedInteractions {unimplemented, -1 },                              // F_TEMP
3860     BondedInteractions {unimplemented, -1 },                              // F_VTEMP_NOLONGERUSED
3861     BondedInteractions {unimplemented, -1 },                              // F_PDISPCORR
3862     BondedInteractions {unimplemented, -1 },                              // F_PRES
3863     BondedInteractions {unimplemented, -1 },                              // F_DVDL_CONSTR
3864     BondedInteractions {unimplemented, -1 },                              // F_DVDL
3865     BondedInteractions {unimplemented, -1 },                              // F_DKDL
3866     BondedInteractions {unimplemented, -1 },                              // F_DVDL_COUL
3867     BondedInteractions {unimplemented, -1 },                              // F_DVDL_VDW
3868     BondedInteractions {unimplemented, -1 },                              // F_DVDL_BONDED
3869     BondedInteractions {unimplemented, -1 },                              // F_DVDL_RESTRAINT
3870     BondedInteractions {unimplemented, -1 },                              // F_DVDL_TEMPERATURE
3871     };
3872
3873 /*! \brief List of instantiated BondedInteractions list */
3874 const gmx::EnumerationArray < BondedKernelFlavor, std::array < BondedInteractions, F_NRE>> c_bondedInteractionFunctionsPerFlavor =
3875 {
3876     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesSimdWhenAvailable>,
3877     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesNoSimd>,
3878     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>,
3879     c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndEnergy>
3880 };
3881
3882 //! \endcond
3883
3884 } // namespace
3885
3886 real calculateSimpleBond(const int ftype,
3887                          const int numForceatoms, const t_iatom forceatoms[],
3888                          const t_iparams forceparams[],
3889                          const rvec x[], rvec4 f[], rvec fshift[],
3890                          const struct t_pbc *pbc,
3891                          const struct t_graph gmx_unused *g,
3892                          const real lambda, real *dvdlambda,
3893                          const t_mdatoms *md, t_fcdata *fcd,
3894                          int gmx_unused *global_atom_index,
3895                          const BondedKernelFlavor bondedKernelFlavor)
3896 {
3897     const BondedInteractions &bonded =
3898         c_bondedInteractionFunctionsPerFlavor[bondedKernelFlavor][ftype];
3899
3900     real v = bonded.function(numForceatoms, forceatoms,
3901                              forceparams,
3902                              x, f, fshift,
3903                              pbc, g, lambda, dvdlambda,
3904                              md, fcd, global_atom_index);
3905
3906     return v;
3907 }
3908
3909 int nrnbIndex(int ftype)
3910 {
3911     return c_bondedInteractionFunctions<BondedKernelFlavor::ForcesAndVirialAndEnergy>[ftype].nrnbIndex;
3912 }