src/gromacs/listed-forces/bonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file defines low-level functions necessary for
  40  * computing energies and forces for listed interactions.
  41  *
  42  * \author Mark Abraham <mark.j.abraham@gmail.com>
  43  *
  44  * \ingroup module_listed-forces
  45  */
  46 #include "gmxpre.h"
  47
  48 #include "bonded.h"
  49
  50 #include <assert.h>
  51
  52 #include <cmath>
  53
  54 #include <algorithm>
  55
  56 #include "gromacs/math/utilities.h"
  57 #include "gromacs/math/vec.h"
  58 #include "gromacs/pbcutil/ishift.h"
  59 #include "gromacs/pbcutil/mshift.h"
  60 #include "gromacs/pbcutil/pbc.h"
  61 #include "gromacs/simd/simd.h"
  62 #include "gromacs/simd/simd_math.h"
  63 #include "gromacs/simd/vector_operations.h"
  64 #include "gromacs/utility/fatalerror.h"
  65 #include "gromacs/utility/smalloc.h"
  66
  67 #include "listed-internal.h"
  68 #include "pairs.h"
  69 #include "restcbt.h"
  70
  71 /*! \brief Mysterious CMAP coefficient matrix */
  72 const int cmap_coeff_matrix[] = {
  73     1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4,
  74     0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
  75     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4,
  76     0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
  77     0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2,
  78     0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2,
  79     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
  80     0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
  81     0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2,
  82     0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
  83     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
  84     0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2,
  85     0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
  86     0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
  87     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
  88     0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
  89 };
  90
  91
  92 /*! \brief Compute dx = xi - xj, modulo PBC if non-NULL
  93  *
  94  * \todo This kind of code appears in many places. Consolidate it */
  95 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
  96 {
  97     if (pbc)
  98     {
  99         return pbc_dx_aiuc(pbc, xi, xj, dx);
 100     }
 101     else
 102     {
 103         rvec_sub(xi, xj, dx);
 104         return CENTRAL;
 105     }
 106 }
 107
 108 #ifdef GMX_SIMD_HAVE_REAL
 109
 110 /* SIMD PBC data structure, containing 1/boxdiag and the box vectors */
 111 typedef struct {
 112     gmx_simd_real_t inv_bzz;
 113     gmx_simd_real_t inv_byy;
 114     gmx_simd_real_t inv_bxx;
 115     gmx_simd_real_t bzx;
 116     gmx_simd_real_t bzy;
 117     gmx_simd_real_t bzz;
 118     gmx_simd_real_t byx;
 119     gmx_simd_real_t byy;
 120     gmx_simd_real_t bxx;
 121 } pbc_simd_t;
 122
 123 /*! \brief Set the SIMD pbc data from a normal t_pbc struct */
 124 static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd)
 125 {
 126     rvec inv_bdiag;
 127     int  d;
 128
 129     /* Setting inv_bdiag to 0 effectively turns off PBC */
 130     clear_rvec(inv_bdiag);
 131     if (pbc != NULL)
 132     {
 133         for (d = 0; d < pbc->ndim_ePBC; d++)
 134         {
 135             inv_bdiag[d] = 1.0/pbc->box[d][d];
 136         }
 137     }
 138
 139     pbc_simd->inv_bzz = gmx_simd_set1_r(inv_bdiag[ZZ]);
 140     pbc_simd->inv_byy = gmx_simd_set1_r(inv_bdiag[YY]);
 141     pbc_simd->inv_bxx = gmx_simd_set1_r(inv_bdiag[XX]);
 142
 143     if (pbc != NULL)
 144     {
 145         pbc_simd->bzx = gmx_simd_set1_r(pbc->box[ZZ][XX]);
 146         pbc_simd->bzy = gmx_simd_set1_r(pbc->box[ZZ][YY]);
 147         pbc_simd->bzz = gmx_simd_set1_r(pbc->box[ZZ][ZZ]);
 148         pbc_simd->byx = gmx_simd_set1_r(pbc->box[YY][XX]);
 149         pbc_simd->byy = gmx_simd_set1_r(pbc->box[YY][YY]);
 150         pbc_simd->bxx = gmx_simd_set1_r(pbc->box[XX][XX]);
 151     }
 152     else
 153     {
 154         pbc_simd->bzx = gmx_simd_setzero_r();
 155         pbc_simd->bzy = gmx_simd_setzero_r();
 156         pbc_simd->bzz = gmx_simd_setzero_r();
 157         pbc_simd->byx = gmx_simd_setzero_r();
 158         pbc_simd->byy = gmx_simd_setzero_r();
 159         pbc_simd->bxx = gmx_simd_setzero_r();
 160     }
 161 }
 162
 163 /*! \brief Correct distance vector *dx,*dy,*dz for PBC using SIMD */
 164 static gmx_inline void
 165 pbc_dx_simd(gmx_simd_real_t *dx, gmx_simd_real_t *dy, gmx_simd_real_t *dz,
 166             const pbc_simd_t *pbc)
 167 {
 168     gmx_simd_real_t sh;
 169
 170     sh  = gmx_simd_round_r(gmx_simd_mul_r(*dz, pbc->inv_bzz));
 171     *dx = gmx_simd_fnmadd_r(sh, pbc->bzx, *dx);
 172     *dy = gmx_simd_fnmadd_r(sh, pbc->bzy, *dy);
 173     *dz = gmx_simd_fnmadd_r(sh, pbc->bzz, *dz);
 174
 175     sh  = gmx_simd_round_r(gmx_simd_mul_r(*dy, pbc->inv_byy));
 176     *dx = gmx_simd_fnmadd_r(sh, pbc->byx, *dx);
 177     *dy = gmx_simd_fnmadd_r(sh, pbc->byy, *dy);
 178
 179     sh  = gmx_simd_round_r(gmx_simd_mul_r(*dx, pbc->inv_bxx));
 180     *dx = gmx_simd_fnmadd_r(sh, pbc->bxx, *dx);
 181 }
 182
 183 #endif /* GMX_SIMD_HAVE_REAL */
 184
 185 /*! \brief Morse potential bond
 186  *
 187  * By Frank Everdij. Three parameters needed:
 188  *
 189  * b0 = equilibrium distance in nm
 190  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 191  * cb = well depth in kJ/mol
 192  *
 193  * Note: the potential is referenced to be +cb at infinite separation
 194  *       and zero at the equilibrium distance!
 195  */
 196 real morse_bonds(int nbonds,
 197                  const t_iatom forceatoms[], const t_iparams forceparams[],
 198                  const rvec x[], rvec f[], rvec fshift[],
 199                  const t_pbc *pbc, const t_graph *g,
 200                  real lambda, real *dvdlambda,
 201                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 202                  int gmx_unused *global_atom_index)
 203 {
 204     const real one = 1.0;
 205     const real two = 2.0;
 206     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, fij, vtot;
 207     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 208     rvec       dx;
 209     int        i, m, ki, type, ai, aj;
 210     ivec       dt;
 211
 212     vtot = 0.0;
 213     for (i = 0; (i < nbonds); )
 214     {
 215         type = forceatoms[i++];
 216         ai   = forceatoms[i++];
 217         aj   = forceatoms[i++];
 218
 219         b0A   = forceparams[type].morse.b0A;
 220         beA   = forceparams[type].morse.betaA;
 221         cbA   = forceparams[type].morse.cbA;
 222
 223         b0B   = forceparams[type].morse.b0B;
 224         beB   = forceparams[type].morse.betaB;
 225         cbB   = forceparams[type].morse.cbB;
 226
 227         L1 = one-lambda;                            /* 1 */
 228         b0 = L1*b0A + lambda*b0B;                   /* 3 */
 229         be = L1*beA + lambda*beB;                   /* 3 */
 230         cb = L1*cbA + lambda*cbB;                   /* 3 */
 231
 232         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 233         dr2  = iprod(dx, dx);                       /*   5          */
 234         dr   = dr2*gmx_invsqrt(dr2);                /*  10          */
 235         temp = exp(-be*(dr-b0));                    /*  12          */
 236
 237         if (temp == one)
 238         {
 239             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 240             *dvdlambda += cbB-cbA;
 241             continue;
 242         }
 243
 244         omtemp    = one-temp;                                                                                        /*   1          */
 245         cbomtemp  = cb*omtemp;                                                                                       /*   1          */
 246         vbond     = cbomtemp*omtemp;                                                                                 /*   1          */
 247         fbond     = -two*be*temp*cbomtemp*gmx_invsqrt(dr2);                                                          /*   9          */
 248         vtot     += vbond;                                                                                           /*   1          */
 249
 250         *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 251
 252         if (g)
 253         {
 254             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 255             ki = IVEC2IS(dt);
 256         }
 257
 258         for (m = 0; (m < DIM); m++)                    /*  15          */
 259         {
 260             fij                 = fbond*dx[m];
 261             f[ai][m]           += fij;
 262             f[aj][m]           -= fij;
 263             fshift[ki][m]      += fij;
 264             fshift[CENTRAL][m] -= fij;
 265         }
 266     }                                         /*  83 TOTAL    */
 267     return vtot;
 268 }
 269
 270 //! \cond
 271 real cubic_bonds(int nbonds,
 272                  const t_iatom forceatoms[], const t_iparams forceparams[],
 273                  const rvec x[], rvec f[], rvec fshift[],
 274                  const t_pbc *pbc, const t_graph *g,
 275                  real gmx_unused lambda, real gmx_unused *dvdlambda,
 276                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 277                  int gmx_unused *global_atom_index)
 278 {
 279     const real three = 3.0;
 280     const real two   = 2.0;
 281     real       kb, b0, kcub;
 282     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, fij, vtot;
 283     rvec       dx;
 284     int        i, m, ki, type, ai, aj;
 285     ivec       dt;
 286
 287     vtot = 0.0;
 288     for (i = 0; (i < nbonds); )
 289     {
 290         type = forceatoms[i++];
 291         ai   = forceatoms[i++];
 292         aj   = forceatoms[i++];
 293
 294         b0   = forceparams[type].cubic.b0;
 295         kb   = forceparams[type].cubic.kb;
 296         kcub = forceparams[type].cubic.kcub;
 297
 298         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 299         dr2  = iprod(dx, dx);                           /*   5          */
 300
 301         if (dr2 == 0.0)
 302         {
 303             continue;
 304         }
 305
 306         dr         = dr2*gmx_invsqrt(dr2);                  /*  10          */
 307         dist       = dr-b0;
 308         kdist      = kb*dist;
 309         kdist2     = kdist*dist;
 310
 311         vbond      = kdist2 + kcub*kdist2*dist;
 312         fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 313
 314         vtot      += vbond;   /* 21 */
 315
 316         if (g)
 317         {
 318             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 319             ki = IVEC2IS(dt);
 320         }
 321         for (m = 0; (m < DIM); m++)                    /*  15          */
 322         {
 323             fij                 = fbond*dx[m];
 324             f[ai][m]           += fij;
 325             f[aj][m]           -= fij;
 326             fshift[ki][m]      += fij;
 327             fshift[CENTRAL][m] -= fij;
 328         }
 329     }                                         /*  54 TOTAL    */
 330     return vtot;
 331 }
 332
 333 real FENE_bonds(int nbonds,
 334                 const t_iatom forceatoms[], const t_iparams forceparams[],
 335                 const rvec x[], rvec f[], rvec fshift[],
 336                 const t_pbc *pbc, const t_graph *g,
 337                 real gmx_unused lambda, real gmx_unused *dvdlambda,
 338                 const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 339                 int *global_atom_index)
 340 {
 341     const real half = 0.5;
 342     const real one  = 1.0;
 343     real       bm, kb;
 344     real       dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
 345     rvec       dx;
 346     int        i, m, ki, type, ai, aj;
 347     ivec       dt;
 348
 349     vtot = 0.0;
 350     for (i = 0; (i < nbonds); )
 351     {
 352         type = forceatoms[i++];
 353         ai   = forceatoms[i++];
 354         aj   = forceatoms[i++];
 355
 356         bm   = forceparams[type].fene.bm;
 357         kb   = forceparams[type].fene.kb;
 358
 359         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 360         dr2  = iprod(dx, dx);                           /*   5          */
 361
 362         if (dr2 == 0.0)
 363         {
 364             continue;
 365         }
 366
 367         bm2 = bm*bm;
 368
 369         if (dr2 >= bm2)
 370         {
 371             gmx_fatal(FARGS,
 372                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 373                       dr2, bm2,
 374                       glatnr(global_atom_index, ai),
 375                       glatnr(global_atom_index, aj));
 376         }
 377
 378         omdr2obm2  = one - dr2/bm2;
 379
 380         vbond      = -half*kb*bm2*log(omdr2obm2);
 381         fbond      = -kb/omdr2obm2;
 382
 383         vtot      += vbond;   /* 35 */
 384
 385         if (g)
 386         {
 387             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 388             ki = IVEC2IS(dt);
 389         }
 390         for (m = 0; (m < DIM); m++)                    /*  15          */
 391         {
 392             fij                 = fbond*dx[m];
 393             f[ai][m]           += fij;
 394             f[aj][m]           -= fij;
 395             fshift[ki][m]      += fij;
 396             fshift[CENTRAL][m] -= fij;
 397         }
 398     }                                         /*  58 TOTAL    */
 399     return vtot;
 400 }
 401
 402 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
 403               real *V, real *F)
 404 {
 405     const real half = 0.5;
 406     real       L1, kk, x0, dx, dx2;
 407     real       v, f, dvdlambda;
 408
 409     L1    = 1.0-lambda;
 410     kk    = L1*kA+lambda*kB;
 411     x0    = L1*xA+lambda*xB;
 412
 413     dx    = x-x0;
 414     dx2   = dx*dx;
 415
 416     f          = -kk*dx;
 417     v          = half*kk*dx2;
 418     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 419
 420     *F    = f;
 421     *V    = v;
 422
 423     return dvdlambda;
 424
 425     /* That was 19 flops */
 426 }
 427
 428
 429 real bonds(int nbonds,
 430            const t_iatom forceatoms[], const t_iparams forceparams[],
 431            const rvec x[], rvec f[], rvec fshift[],
 432            const t_pbc *pbc, const t_graph *g,
 433            real lambda, real *dvdlambda,
 434            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 435            int gmx_unused *global_atom_index)
 436 {
 437     int  i, m, ki, ai, aj, type;
 438     real dr, dr2, fbond, vbond, fij, vtot;
 439     rvec dx;
 440     ivec dt;
 441
 442     vtot = 0.0;
 443     for (i = 0; (i < nbonds); )
 444     {
 445         type = forceatoms[i++];
 446         ai   = forceatoms[i++];
 447         aj   = forceatoms[i++];
 448
 449         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 450         dr2  = iprod(dx, dx);                       /*   5              */
 451         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
 452
 453         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 454                                forceparams[type].harmonic.krB,
 455                                forceparams[type].harmonic.rA,
 456                                forceparams[type].harmonic.rB,
 457                                dr, lambda, &vbond, &fbond); /*  19  */
 458
 459         if (dr2 == 0.0)
 460         {
 461             continue;
 462         }
 463
 464
 465         vtot  += vbond;            /* 1*/
 466         fbond *= gmx_invsqrt(dr2); /*   6               */
 467 #ifdef DEBUG
 468         if (debug)
 469         {
 470             fprintf(debug, "BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 471                     dr, vbond, fbond);
 472         }
 473 #endif
 474         if (g)
 475         {
 476             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 477             ki = IVEC2IS(dt);
 478         }
 479         for (m = 0; (m < DIM); m++)     /*  15          */
 480         {
 481             fij                 = fbond*dx[m];
 482             f[ai][m]           += fij;
 483             f[aj][m]           -= fij;
 484             fshift[ki][m]      += fij;
 485             fshift[CENTRAL][m] -= fij;
 486         }
 487     }               /* 59 TOTAL */
 488     return vtot;
 489 }
 490
 491 real restraint_bonds(int nbonds,
 492                      const t_iatom forceatoms[], const t_iparams forceparams[],
 493                      const rvec x[], rvec f[], rvec fshift[],
 494                      const t_pbc *pbc, const t_graph *g,
 495                      real lambda, real *dvdlambda,
 496                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 497                      int gmx_unused *global_atom_index)
 498 {
 499     int  i, m, ki, ai, aj, type;
 500     real dr, dr2, fbond, vbond, fij, vtot;
 501     real L1;
 502     real low, dlow, up1, dup1, up2, dup2, k, dk;
 503     real drh, drh2;
 504     rvec dx;
 505     ivec dt;
 506
 507     L1   = 1.0 - lambda;
 508
 509     vtot = 0.0;
 510     for (i = 0; (i < nbonds); )
 511     {
 512         type = forceatoms[i++];
 513         ai   = forceatoms[i++];
 514         aj   = forceatoms[i++];
 515
 516         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 517         dr2  = iprod(dx, dx);                       /*   5              */
 518         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
 519
 520         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 521         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 522         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 523         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 524         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 525         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 526         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 527         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 528         /* 24 */
 529
 530         if (dr < low)
 531         {
 532             drh         = dr - low;
 533             drh2        = drh*drh;
 534             vbond       = 0.5*k*drh2;
 535             fbond       = -k*drh;
 536             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 537         } /* 11 */
 538         else if (dr <= up1)
 539         {
 540             vbond = 0;
 541             fbond = 0;
 542         }
 543         else if (dr <= up2)
 544         {
 545             drh         = dr - up1;
 546             drh2        = drh*drh;
 547             vbond       = 0.5*k*drh2;
 548             fbond       = -k*drh;
 549             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 550         } /* 11 */
 551         else
 552         {
 553             drh         = dr - up2;
 554             vbond       = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 555             fbond       = -k*(up2 - up1);
 556             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 557                 + k*(dup2 - dup1)*(up2 - up1 + drh)
 558                 - k*(up2 - up1)*dup2;
 559         }
 560
 561         if (dr2 == 0.0)
 562         {
 563             continue;
 564         }
 565
 566         vtot  += vbond;            /* 1*/
 567         fbond *= gmx_invsqrt(dr2); /*   6               */
 568 #ifdef DEBUG
 569         if (debug)
 570         {
 571             fprintf(debug, "BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 572                     dr, vbond, fbond);
 573         }
 574 #endif
 575         if (g)
 576         {
 577             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 578             ki = IVEC2IS(dt);
 579         }
 580         for (m = 0; (m < DIM); m++)             /*  15          */
 581         {
 582             fij                 = fbond*dx[m];
 583             f[ai][m]           += fij;
 584             f[aj][m]           -= fij;
 585             fshift[ki][m]      += fij;
 586             fshift[CENTRAL][m] -= fij;
 587         }
 588     }                   /* 59 TOTAL     */
 589
 590     return vtot;
 591 }
 592
 593 real polarize(int nbonds,
 594               const t_iatom forceatoms[], const t_iparams forceparams[],
 595               const rvec x[], rvec f[], rvec fshift[],
 596               const t_pbc *pbc, const t_graph *g,
 597               real lambda, real *dvdlambda,
 598               const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 599               int gmx_unused *global_atom_index)
 600 {
 601     int  i, m, ki, ai, aj, type;
 602     real dr, dr2, fbond, vbond, fij, vtot, ksh;
 603     rvec dx;
 604     ivec dt;
 605
 606     vtot = 0.0;
 607     for (i = 0; (i < nbonds); )
 608     {
 609         type = forceatoms[i++];
 610         ai   = forceatoms[i++];
 611         aj   = forceatoms[i++];
 612         ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 613         if (debug)
 614         {
 615             fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
 616         }
 617
 618         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 619         dr2  = iprod(dx, dx);                                               /*   5              */
 620         dr   = dr2*gmx_invsqrt(dr2);                                        /*  10              */
 621
 622         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 623
 624         if (dr2 == 0.0)
 625         {
 626             continue;
 627         }
 628
 629         vtot  += vbond;            /* 1*/
 630         fbond *= gmx_invsqrt(dr2); /*   6               */
 631
 632         if (g)
 633         {
 634             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 635             ki = IVEC2IS(dt);
 636         }
 637         for (m = 0; (m < DIM); m++)     /*  15          */
 638         {
 639             fij                 = fbond*dx[m];
 640             f[ai][m]           += fij;
 641             f[aj][m]           -= fij;
 642             fshift[ki][m]      += fij;
 643             fshift[CENTRAL][m] -= fij;
 644         }
 645     }               /* 59 TOTAL */
 646     return vtot;
 647 }
 648
 649 real anharm_polarize(int nbonds,
 650                      const t_iatom forceatoms[], const t_iparams forceparams[],
 651                      const rvec x[], rvec f[], rvec fshift[],
 652                      const t_pbc *pbc, const t_graph *g,
 653                      real lambda, real *dvdlambda,
 654                      const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 655                      int gmx_unused *global_atom_index)
 656 {
 657     int  i, m, ki, ai, aj, type;
 658     real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
 659     rvec dx;
 660     ivec dt;
 661
 662     vtot = 0.0;
 663     for (i = 0; (i < nbonds); )
 664     {
 665         type  = forceatoms[i++];
 666         ai    = forceatoms[i++];
 667         aj    = forceatoms[i++];
 668         ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 669         khyp  = forceparams[type].anharm_polarize.khyp;
 670         drcut = forceparams[type].anharm_polarize.drcut;
 671         if (debug)
 672         {
 673             fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
 674         }
 675
 676         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 677         dr2  = iprod(dx, dx);                                               /*   5              */
 678         dr   = dr2*gmx_invsqrt(dr2);                                        /*  10              */
 679
 680         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 681
 682         if (dr2 == 0.0)
 683         {
 684             continue;
 685         }
 686
 687         if (dr > drcut)
 688         {
 689             ddr    = dr-drcut;
 690             ddr3   = ddr*ddr*ddr;
 691             vbond += khyp*ddr*ddr3;
 692             fbond -= 4*khyp*ddr3;
 693         }
 694         fbond *= gmx_invsqrt(dr2); /*   6               */
 695         vtot  += vbond;            /* 1*/
 696
 697         if (g)
 698         {
 699             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 700             ki = IVEC2IS(dt);
 701         }
 702         for (m = 0; (m < DIM); m++)     /*  15          */
 703         {
 704             fij                 = fbond*dx[m];
 705             f[ai][m]           += fij;
 706             f[aj][m]           -= fij;
 707             fshift[ki][m]      += fij;
 708             fshift[CENTRAL][m] -= fij;
 709         }
 710     }               /* 72 TOTAL */
 711     return vtot;
 712 }
 713
 714 real water_pol(int nbonds,
 715                const t_iatom forceatoms[], const t_iparams forceparams[],
 716                const rvec x[], rvec f[], rvec gmx_unused fshift[],
 717                const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
 718                real gmx_unused lambda, real gmx_unused *dvdlambda,
 719                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 720                int gmx_unused *global_atom_index)
 721 {
 722     /* This routine implements anisotropic polarizibility for water, through
 723      * a shell connected to a dummy with spring constant that differ in the
 724      * three spatial dimensions in the molecular frame.
 725      */
 726     int  i, m, aO, aH1, aH2, aD, aS, type, type0, ki;
 727     ivec dt;
 728     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 729 #ifdef DEBUG
 730     rvec df;
 731 #endif
 732     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 733
 734     vtot = 0.0;
 735     if (nbonds > 0)
 736     {
 737         type0  = forceatoms[0];
 738         aS     = forceatoms[5];
 739         qS     = md->chargeA[aS];
 740         kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 741         kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 742         kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 743         r_HH   = 1.0/forceparams[type0].wpol.rHH;
 744         if (debug)
 745         {
 746             fprintf(debug, "WPOL: qS  = %10.5f aS = %5d\n", qS, aS);
 747             fprintf(debug, "WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
 748                     kk[XX], kk[YY], kk[ZZ]);
 749             fprintf(debug, "WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
 750                     forceparams[type0].wpol.rOH,
 751                     forceparams[type0].wpol.rHH,
 752                     forceparams[type0].wpol.rOD);
 753         }
 754         for (i = 0; (i < nbonds); i += 6)
 755         {
 756             type = forceatoms[i];
 757             if (type != type0)
 758             {
 759                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
 760                           type, type0, __FILE__, __LINE__);
 761             }
 762             aO   = forceatoms[i+1];
 763             aH1  = forceatoms[i+2];
 764             aH2  = forceatoms[i+3];
 765             aD   = forceatoms[i+4];
 766             aS   = forceatoms[i+5];
 767
 768             /* Compute vectors describing the water frame */
 769             pbc_rvec_sub(pbc, x[aH1], x[aO], dOH1);
 770             pbc_rvec_sub(pbc, x[aH2], x[aO], dOH2);
 771             pbc_rvec_sub(pbc, x[aH2], x[aH1], dHH);
 772             pbc_rvec_sub(pbc, x[aD], x[aO], dOD);
 773             ki = pbc_rvec_sub(pbc, x[aS], x[aD], dDS);
 774             cprod(dOH1, dOH2, nW);
 775
 776             /* Compute inverse length of normal vector
 777              * (this one could be precomputed, but I'm too lazy now)
 778              */
 779             r_nW = gmx_invsqrt(iprod(nW, nW));
 780             /* This is for precision, but does not make a big difference,
 781              * it can go later.
 782              */
 783             r_OD = gmx_invsqrt(iprod(dOD, dOD));
 784
 785             /* Normalize the vectors in the water frame */
 786             svmul(r_nW, nW, nW);
 787             svmul(r_HH, dHH, dHH);
 788             svmul(r_OD, dOD, dOD);
 789
 790             /* Compute displacement of shell along components of the vector */
 791             dx[ZZ] = iprod(dDS, dOD);
 792             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 793             for (m = 0; (m < DIM); m++)
 794             {
 795                 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 796             }
 797
 798             /*dx[XX] = iprod(dDS,nW);
 799                dx[YY] = iprod(dDS,dHH);*/
 800             dx[XX] = iprod(proj, nW);
 801             for (m = 0; (m < DIM); m++)
 802             {
 803                 proj[m] -= dx[XX]*nW[m];
 804             }
 805             dx[YY] = iprod(proj, dHH);
 806             /*#define DEBUG*/
 807 #ifdef DEBUG
 808             if (debug)
 809             {
 810                 fprintf(debug, "WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
 811                         sqr(dx[XX]), sqr(dx[YY]), sqr(dx[ZZ]), iprod(dx, dx), iprod(dDS, dDS));
 812                 fprintf(debug, "WPOL: dHH=(%10g,%10g,%10g)\n", dHH[XX], dHH[YY], dHH[ZZ]);
 813                 fprintf(debug, "WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
 814                         dOD[XX], dOD[YY], dOD[ZZ], 1/r_OD);
 815                 fprintf(debug, "WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
 816                         nW[XX], nW[YY], nW[ZZ], 1/r_nW);
 817                 fprintf(debug, "WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
 818                         dx[XX], dx[YY], dx[ZZ]);
 819                 fprintf(debug, "WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
 820                         dDS[XX], dDS[YY], dDS[ZZ]);
 821             }
 822 #endif
 823             /* Now compute the forces and energy */
 824             kdx[XX] = kk[XX]*dx[XX];
 825             kdx[YY] = kk[YY]*dx[YY];
 826             kdx[ZZ] = kk[ZZ]*dx[ZZ];
 827             vtot   += iprod(dx, kdx);
 828
 829             if (g)
 830             {
 831                 ivec_sub(SHIFT_IVEC(g, aS), SHIFT_IVEC(g, aD), dt);
 832                 ki = IVEC2IS(dt);
 833             }
 834
 835             for (m = 0; (m < DIM); m++)
 836             {
 837                 /* This is a tensor operation but written out for speed */
 838                 tx        =  nW[m]*kdx[XX];
 839                 ty        = dHH[m]*kdx[YY];
 840                 tz        = dOD[m]*kdx[ZZ];
 841                 fij       = -tx-ty-tz;
 842 #ifdef DEBUG
 843                 df[m] = fij;
 844 #endif
 845                 f[aS][m]           += fij;
 846                 f[aD][m]           -= fij;
 847                 fshift[ki][m]      += fij;
 848                 fshift[CENTRAL][m] -= fij;
 849             }
 850 #ifdef DEBUG
 851             if (debug)
 852             {
 853                 fprintf(debug, "WPOL: vwpol=%g\n", 0.5*iprod(dx, kdx));
 854                 fprintf(debug, "WPOL: df = (%10g, %10g, %10g)\n", df[XX], df[YY], df[ZZ]);
 855             }
 856 #endif
 857         }
 858     }
 859     return 0.5*vtot;
 860 }
 861
 862 static real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
 863                        const t_pbc *pbc, real qq,
 864                        rvec fshift[], real afac)
 865 {
 866     rvec r12;
 867     real r12sq, r12_1, r12bar, v0, v1, fscal, ebar, fff;
 868     int  m, t;
 869
 870     t      = pbc_rvec_sub(pbc, xi, xj, r12);                      /*  3 */
 871
 872     r12sq  = iprod(r12, r12);                                     /*  5 */
 873     r12_1  = gmx_invsqrt(r12sq);                                  /*  5 */
 874     r12bar = afac/r12_1;                                          /*  5 */
 875     v0     = qq*ONE_4PI_EPS0*r12_1;                               /*  2 */
 876     ebar   = exp(-r12bar);                                        /*  5 */
 877     v1     = (1-(1+0.5*r12bar)*ebar);                             /*  4 */
 878     fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 879     if (debug)
 880     {
 881         fprintf(debug, "THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n", v0, v1, 1/r12_1, r12bar, fscal, ebar);
 882     }
 883
 884     for (m = 0; (m < DIM); m++)
 885     {
 886         fff                 = fscal*r12[m];
 887         fi[m]              += fff;
 888         fj[m]              -= fff;
 889         fshift[t][m]       += fff;
 890         fshift[CENTRAL][m] -= fff;
 891     }             /* 15 */
 892
 893     return v0*v1; /* 1 */
 894     /* 54 */
 895 }
 896
 897 real thole_pol(int nbonds,
 898                const t_iatom forceatoms[], const t_iparams forceparams[],
 899                const rvec x[], rvec f[], rvec fshift[],
 900                const t_pbc *pbc, const t_graph gmx_unused *g,
 901                real gmx_unused lambda, real gmx_unused *dvdlambda,
 902                const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 903                int gmx_unused *global_atom_index)
 904 {
 905     /* Interaction between two pairs of particles with opposite charge */
 906     int        i, type, a1, da1, a2, da2;
 907     real       q1, q2, qq, a, al1, al2, afac;
 908     real       V             = 0;
 909     const real minusOneOnSix = -1.0/6.0;
 910
 911     for (i = 0; (i < nbonds); )
 912     {
 913         type  = forceatoms[i++];
 914         a1    = forceatoms[i++];
 915         da1   = forceatoms[i++];
 916         a2    = forceatoms[i++];
 917         da2   = forceatoms[i++];
 918         q1    = md->chargeA[da1];
 919         q2    = md->chargeA[da2];
 920         a     = forceparams[type].thole.a;
 921         al1   = forceparams[type].thole.alpha1;
 922         al2   = forceparams[type].thole.alpha2;
 923         qq    = q1*q2;
 924         afac  = a*pow(al1*al2, minusOneOnSix);
 925         V    += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 926         V    += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 927         V    += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 928         V    += do_1_thole(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 929     }
 930     /* 290 flops */
 931     return V;
 932 }
 933
 934 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
 935                 rvec r_ij, rvec r_kj, real *costh,
 936                 int *t1, int *t2)
 937 /* Return value is the angle between the bonds i-j and j-k */
 938 {
 939     /* 41 FLOPS */
 940     real th;
 941
 942     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 943     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 944
 945     *costh = cos_angle(r_ij, r_kj);        /* 25                */
 946     th     = acos(*costh);                 /* 10                */
 947     /* 41 TOTAL */
 948     return th;
 949 }
 950
 951 real angles(int nbonds,
 952             const t_iatom forceatoms[], const t_iparams forceparams[],
 953             const rvec x[], rvec f[], rvec fshift[],
 954             const t_pbc *pbc, const t_graph *g,
 955             real lambda, real *dvdlambda,
 956             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 957             int gmx_unused *global_atom_index)
 958 {
 959     int  i, ai, aj, ak, t1, t2, type;
 960     rvec r_ij, r_kj;
 961     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 962     ivec jt, dt_ij, dt_kj;
 963
 964     vtot = 0.0;
 965     for (i = 0; i < nbonds; )
 966     {
 967         type = forceatoms[i++];
 968         ai   = forceatoms[i++];
 969         aj   = forceatoms[i++];
 970         ak   = forceatoms[i++];
 971
 972         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
 973                             r_ij, r_kj, &cos_theta, &t1, &t2);  /*  41          */
 974
 975         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 976                                forceparams[type].harmonic.krB,
 977                                forceparams[type].harmonic.rA*DEG2RAD,
 978                                forceparams[type].harmonic.rB*DEG2RAD,
 979                                theta, lambda, &va, &dVdt);  /*  21  */
 980         vtot += va;
 981
 982         cos_theta2 = sqr(cos_theta);
 983         if (cos_theta2 < 1)
 984         {
 985             int  m;
 986             real st, sth;
 987             real cik, cii, ckk;
 988             real nrkj2, nrij2;
 989             real nrkj_1, nrij_1;
 990             rvec f_i, f_j, f_k;
 991
 992             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
 993             sth = st*cos_theta;                     /*   1              */
 994 #ifdef DEBUG
 995             if (debug)
 996             {
 997                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 998                         theta*RAD2DEG, va, dVdt);
 999             }
1000 #endif
1001             nrij2 = iprod(r_ij, r_ij);      /*   5              */
1002             nrkj2 = iprod(r_kj, r_kj);      /*   5              */
1003
1004             nrij_1 = gmx_invsqrt(nrij2);    /*  10              */
1005             nrkj_1 = gmx_invsqrt(nrkj2);    /*  10              */
1006
1007             cik = st*nrij_1*nrkj_1;         /*   2              */
1008             cii = sth*nrij_1*nrij_1;        /*   2              */
1009             ckk = sth*nrkj_1*nrkj_1;        /*   2              */
1010
1011             for (m = 0; m < DIM; m++)
1012             {           /*  39          */
1013                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
1014                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
1015                 f_j[m]    = -f_i[m] - f_k[m];
1016                 f[ai][m] += f_i[m];
1017                 f[aj][m] += f_j[m];
1018                 f[ak][m] += f_k[m];
1019             }
1020             if (g != NULL)
1021             {
1022                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1023
1024                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1025                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1026                 t1 = IVEC2IS(dt_ij);
1027                 t2 = IVEC2IS(dt_kj);
1028             }
1029             rvec_inc(fshift[t1], f_i);
1030             rvec_inc(fshift[CENTRAL], f_j);
1031             rvec_inc(fshift[t2], f_k);
1032         }                                           /* 161 TOTAL        */
1033     }
1034
1035     return vtot;
1036 }
1037
1038 #ifdef GMX_SIMD_HAVE_REAL
1039
1040 /* As angles, but using SIMD to calculate many angles at once.
1041  * This routines does not calculate energies and shift forces.
1042  */
1043 void
1044 angles_noener_simd(int nbonds,
1045                    const t_iatom forceatoms[], const t_iparams forceparams[],
1046                    const rvec x[], rvec f[],
1047                    const t_pbc *pbc, const t_graph gmx_unused *g,
1048                    real gmx_unused lambda,
1049                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1050                    int gmx_unused *global_atom_index)
1051 {
1052     const int            nfa1 = 4;
1053     int                  i, iu, s, m;
1054     int                  type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH];
1055     int                  ak[GMX_SIMD_REAL_WIDTH];
1056     real                 coeff_array[2*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *coeff;
1057     real                 dr_array[2*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
1058     real                 f_buf_array[6*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *f_buf;
1059     gmx_simd_real_t      k_S, theta0_S;
1060     gmx_simd_real_t      rijx_S, rijy_S, rijz_S;
1061     gmx_simd_real_t      rkjx_S, rkjy_S, rkjz_S;
1062     gmx_simd_real_t      one_S;
1063     gmx_simd_real_t      min_one_plus_eps_S;
1064     gmx_simd_real_t      rij_rkj_S;
1065     gmx_simd_real_t      nrij2_S, nrij_1_S;
1066     gmx_simd_real_t      nrkj2_S, nrkj_1_S;
1067     gmx_simd_real_t      cos_S, invsin_S;
1068     gmx_simd_real_t      theta_S;
1069     gmx_simd_real_t      st_S, sth_S;
1070     gmx_simd_real_t      cik_S, cii_S, ckk_S;
1071     gmx_simd_real_t      f_ix_S, f_iy_S, f_iz_S;
1072     gmx_simd_real_t      f_kx_S, f_ky_S, f_kz_S;
1073     pbc_simd_t           pbc_simd;
1074
1075     /* Ensure register memory alignment */
1076     coeff = gmx_simd_align_r(coeff_array);
1077     dr    = gmx_simd_align_r(dr_array);
1078     f_buf = gmx_simd_align_r(f_buf_array);
1079
1080     set_pbc_simd(pbc, &pbc_simd);
1081
1082     one_S = gmx_simd_set1_r(1.0);
1083
1084     /* The smallest number > -1 */
1085     min_one_plus_eps_S = gmx_simd_set1_r(-1.0 + 2*GMX_REAL_EPS);
1086
1087     /* nbonds is the number of angles times nfa1, here we step GMX_SIMD_REAL_WIDTH angles */
1088     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
1089     {
1090         /* Collect atoms for GMX_SIMD_REAL_WIDTH angles.
1091          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1092          */
1093         iu = i;
1094         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1095         {
1096             type  = forceatoms[iu];
1097             ai[s] = forceatoms[iu+1];
1098             aj[s] = forceatoms[iu+2];
1099             ak[s] = forceatoms[iu+3];
1100
1101             coeff[s]                     = forceparams[type].harmonic.krA;
1102             coeff[GMX_SIMD_REAL_WIDTH+s] = forceparams[type].harmonic.rA*DEG2RAD;
1103
1104             /* If you can't use pbc_dx_simd below for PBC, e.g. because
1105              * you can't round in SIMD, use pbc_rvec_sub here.
1106              */
1107             /* Store the non PBC corrected distances packed and aligned */
1108             for (m = 0; m < DIM; m++)
1109             {
1110                 dr[s +      m *GMX_SIMD_REAL_WIDTH] = x[ai[s]][m] - x[aj[s]][m];
1111                 dr[s + (DIM+m)*GMX_SIMD_REAL_WIDTH] = x[ak[s]][m] - x[aj[s]][m];
1112             }
1113
1114             /* At the end fill the arrays with identical entries */
1115             if (iu + nfa1 < nbonds)
1116             {
1117                 iu += nfa1;
1118             }
1119         }
1120
1121         k_S       = gmx_simd_load_r(coeff);
1122         theta0_S  = gmx_simd_load_r(coeff+GMX_SIMD_REAL_WIDTH);
1123
1124         rijx_S    = gmx_simd_load_r(dr + 0*GMX_SIMD_REAL_WIDTH);
1125         rijy_S    = gmx_simd_load_r(dr + 1*GMX_SIMD_REAL_WIDTH);
1126         rijz_S    = gmx_simd_load_r(dr + 2*GMX_SIMD_REAL_WIDTH);
1127         rkjx_S    = gmx_simd_load_r(dr + 3*GMX_SIMD_REAL_WIDTH);
1128         rkjy_S    = gmx_simd_load_r(dr + 4*GMX_SIMD_REAL_WIDTH);
1129         rkjz_S    = gmx_simd_load_r(dr + 5*GMX_SIMD_REAL_WIDTH);
1130
1131         pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, &pbc_simd);
1132         pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, &pbc_simd);
1133
1134         rij_rkj_S = gmx_simd_iprod_r(rijx_S, rijy_S, rijz_S,
1135                                      rkjx_S, rkjy_S, rkjz_S);
1136
1137         nrij2_S   = gmx_simd_norm2_r(rijx_S, rijy_S, rijz_S);
1138         nrkj2_S   = gmx_simd_norm2_r(rkjx_S, rkjy_S, rkjz_S);
1139
1140         nrij_1_S  = gmx_simd_invsqrt_r(nrij2_S);
1141         nrkj_1_S  = gmx_simd_invsqrt_r(nrkj2_S);
1142
1143         cos_S     = gmx_simd_mul_r(rij_rkj_S, gmx_simd_mul_r(nrij_1_S, nrkj_1_S));
1144
1145         /* To allow for 180 degrees, we take the max of cos and -1 + 1bit,
1146          * so we can safely get the 1/sin from 1/sqrt(1 - cos^2).
1147          * This also ensures that rounding errors would cause the argument
1148          * of gmx_simd_acos_r to be < -1.
1149          * Note that we do not take precautions for cos(0)=1, so the outer
1150          * atoms in an angle should not be on top of each other.
1151          */
1152         cos_S     = gmx_simd_max_r(cos_S, min_one_plus_eps_S);
1153
1154         theta_S   = gmx_simd_acos_r(cos_S);
1155
1156         invsin_S  = gmx_simd_invsqrt_r(gmx_simd_sub_r(one_S, gmx_simd_mul_r(cos_S, cos_S)));
1157
1158         st_S      = gmx_simd_mul_r(gmx_simd_mul_r(k_S, gmx_simd_sub_r(theta0_S, theta_S)),
1159                                    invsin_S);
1160         sth_S     = gmx_simd_mul_r(st_S, cos_S);
1161
1162         cik_S     = gmx_simd_mul_r(st_S,  gmx_simd_mul_r(nrij_1_S, nrkj_1_S));
1163         cii_S     = gmx_simd_mul_r(sth_S, gmx_simd_mul_r(nrij_1_S, nrij_1_S));
1164         ckk_S     = gmx_simd_mul_r(sth_S, gmx_simd_mul_r(nrkj_1_S, nrkj_1_S));
1165
1166         f_ix_S    = gmx_simd_mul_r(cii_S, rijx_S);
1167         f_ix_S    = gmx_simd_fnmadd_r(cik_S, rkjx_S, f_ix_S);
1168         f_iy_S    = gmx_simd_mul_r(cii_S, rijy_S);
1169         f_iy_S    = gmx_simd_fnmadd_r(cik_S, rkjy_S, f_iy_S);
1170         f_iz_S    = gmx_simd_mul_r(cii_S, rijz_S);
1171         f_iz_S    = gmx_simd_fnmadd_r(cik_S, rkjz_S, f_iz_S);
1172         f_kx_S    = gmx_simd_mul_r(ckk_S, rkjx_S);
1173         f_kx_S    = gmx_simd_fnmadd_r(cik_S, rijx_S, f_kx_S);
1174         f_ky_S    = gmx_simd_mul_r(ckk_S, rkjy_S);
1175         f_ky_S    = gmx_simd_fnmadd_r(cik_S, rijy_S, f_ky_S);
1176         f_kz_S    = gmx_simd_mul_r(ckk_S, rkjz_S);
1177         f_kz_S    = gmx_simd_fnmadd_r(cik_S, rijz_S, f_kz_S);
1178
1179         gmx_simd_store_r(f_buf + 0*GMX_SIMD_REAL_WIDTH, f_ix_S);
1180         gmx_simd_store_r(f_buf + 1*GMX_SIMD_REAL_WIDTH, f_iy_S);
1181         gmx_simd_store_r(f_buf + 2*GMX_SIMD_REAL_WIDTH, f_iz_S);
1182         gmx_simd_store_r(f_buf + 3*GMX_SIMD_REAL_WIDTH, f_kx_S);
1183         gmx_simd_store_r(f_buf + 4*GMX_SIMD_REAL_WIDTH, f_ky_S);
1184         gmx_simd_store_r(f_buf + 5*GMX_SIMD_REAL_WIDTH, f_kz_S);
1185
1186         iu = i;
1187         s  = 0;
1188         do
1189         {
1190             for (m = 0; m < DIM; m++)
1191             {
1192                 f[ai[s]][m] += f_buf[s + m*GMX_SIMD_REAL_WIDTH];
1193                 f[aj[s]][m] -= f_buf[s + m*GMX_SIMD_REAL_WIDTH] + f_buf[s + (DIM+m)*GMX_SIMD_REAL_WIDTH];
1194                 f[ak[s]][m] += f_buf[s + (DIM+m)*GMX_SIMD_REAL_WIDTH];
1195             }
1196             s++;
1197             iu += nfa1;
1198         }
1199         while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
1200     }
1201 }
1202
1203 #endif /* GMX_SIMD_HAVE_REAL */
1204
1205 real linear_angles(int nbonds,
1206                    const t_iatom forceatoms[], const t_iparams forceparams[],
1207                    const rvec x[], rvec f[], rvec fshift[],
1208                    const t_pbc *pbc, const t_graph *g,
1209                    real lambda, real *dvdlambda,
1210                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1211                    int gmx_unused *global_atom_index)
1212 {
1213     int  i, m, ai, aj, ak, t1, t2, type;
1214     rvec f_i, f_j, f_k;
1215     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1216     ivec jt, dt_ij, dt_kj;
1217     rvec r_ij, r_kj, r_ik, dx;
1218
1219     L1   = 1-lambda;
1220     vtot = 0.0;
1221     for (i = 0; (i < nbonds); )
1222     {
1223         type = forceatoms[i++];
1224         ai   = forceatoms[i++];
1225         aj   = forceatoms[i++];
1226         ak   = forceatoms[i++];
1227
1228         kA   = forceparams[type].linangle.klinA;
1229         kB   = forceparams[type].linangle.klinB;
1230         klin = L1*kA + lambda*kB;
1231
1232         aA   = forceparams[type].linangle.aA;
1233         aB   = forceparams[type].linangle.aB;
1234         a    = L1*aA+lambda*aB;
1235         b    = 1-a;
1236
1237         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1238         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1239         rvec_sub(r_ij, r_kj, r_ik);
1240
1241         dr2 = 0;
1242         for (m = 0; (m < DIM); m++)
1243         {
1244             dr        = -a * r_ij[m] - b * r_kj[m];
1245             dr2      += dr*dr;
1246             dx[m]     = dr;
1247             f_i[m]    = a*klin*dr;
1248             f_k[m]    = b*klin*dr;
1249             f_j[m]    = -(f_i[m]+f_k[m]);
1250             f[ai][m] += f_i[m];
1251             f[aj][m] += f_j[m];
1252             f[ak][m] += f_k[m];
1253         }
1254         va          = 0.5*klin*dr2;
1255         *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1256
1257         vtot += va;
1258
1259         if (g)
1260         {
1261             copy_ivec(SHIFT_IVEC(g, aj), jt);
1262
1263             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1264             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1265             t1 = IVEC2IS(dt_ij);
1266             t2 = IVEC2IS(dt_kj);
1267         }
1268         rvec_inc(fshift[t1], f_i);
1269         rvec_inc(fshift[CENTRAL], f_j);
1270         rvec_inc(fshift[t2], f_k);
1271     }                                         /* 57 TOTAL       */
1272     return vtot;
1273 }
1274
1275 real urey_bradley(int nbonds,
1276                   const t_iatom forceatoms[], const t_iparams forceparams[],
1277                   const rvec x[], rvec f[], rvec fshift[],
1278                   const t_pbc *pbc, const t_graph *g,
1279                   real lambda, real *dvdlambda,
1280                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1281                   int gmx_unused *global_atom_index)
1282 {
1283     int  i, m, ai, aj, ak, t1, t2, type, ki;
1284     rvec r_ij, r_kj, r_ik;
1285     real cos_theta, cos_theta2, theta;
1286     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1287     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1288     ivec jt, dt_ij, dt_kj, dt_ik;
1289
1290     vtot = 0.0;
1291     for (i = 0; (i < nbonds); )
1292     {
1293         type  = forceatoms[i++];
1294         ai    = forceatoms[i++];
1295         aj    = forceatoms[i++];
1296         ak    = forceatoms[i++];
1297         th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
1298         kthA  = forceparams[type].u_b.kthetaA;
1299         r13A  = forceparams[type].u_b.r13A;
1300         kUBA  = forceparams[type].u_b.kUBA;
1301         th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
1302         kthB  = forceparams[type].u_b.kthetaB;
1303         r13B  = forceparams[type].u_b.r13B;
1304         kUBB  = forceparams[type].u_b.kUBB;
1305
1306         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1307                             r_ij, r_kj, &cos_theta, &t1, &t2);                     /*  41               */
1308
1309         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1310         vtot       += va;
1311
1312         ki   = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);                               /*   3      */
1313         dr2  = iprod(r_ik, r_ik);                                                   /*   5              */
1314         dr   = dr2*gmx_invsqrt(dr2);                                                /*  10              */
1315
1316         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1317
1318         cos_theta2 = sqr(cos_theta);                                                /*   1              */
1319         if (cos_theta2 < 1)
1320         {
1321             real st, sth;
1322             real cik, cii, ckk;
1323             real nrkj2, nrij2;
1324             rvec f_i, f_j, f_k;
1325
1326             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
1327             sth = st*cos_theta;                     /*   1              */
1328 #ifdef DEBUG
1329             if (debug)
1330             {
1331                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1332                         theta*RAD2DEG, va, dVdt);
1333             }
1334 #endif
1335             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
1336             nrij2 = iprod(r_ij, r_ij);
1337
1338             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
1339             cii = sth/nrij2;                   /*  10           */
1340             ckk = sth/nrkj2;                   /*  10           */
1341
1342             for (m = 0; (m < DIM); m++)        /*  39           */
1343             {
1344                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1345                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1346                 f_j[m]    = -f_i[m]-f_k[m];
1347                 f[ai][m] += f_i[m];
1348                 f[aj][m] += f_j[m];
1349                 f[ak][m] += f_k[m];
1350             }
1351             if (g)
1352             {
1353                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1354
1355                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1356                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1357                 t1 = IVEC2IS(dt_ij);
1358                 t2 = IVEC2IS(dt_kj);
1359             }
1360             rvec_inc(fshift[t1], f_i);
1361             rvec_inc(fshift[CENTRAL], f_j);
1362             rvec_inc(fshift[t2], f_k);
1363         }                                       /* 161 TOTAL    */
1364         /* Time for the bond calculations */
1365         if (dr2 == 0.0)
1366         {
1367             continue;
1368         }
1369
1370         vtot  += vbond;            /* 1*/
1371         fbond *= gmx_invsqrt(dr2); /*   6               */
1372
1373         if (g)
1374         {
1375             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1376             ki = IVEC2IS(dt_ik);
1377         }
1378         for (m = 0; (m < DIM); m++)     /*  15          */
1379         {
1380             fik                 = fbond*r_ik[m];
1381             f[ai][m]           += fik;
1382             f[ak][m]           -= fik;
1383             fshift[ki][m]      += fik;
1384             fshift[CENTRAL][m] -= fik;
1385         }
1386     }
1387     return vtot;
1388 }
1389
1390 real quartic_angles(int nbonds,
1391                     const t_iatom forceatoms[], const t_iparams forceparams[],
1392                     const rvec x[], rvec f[], rvec fshift[],
1393                     const t_pbc *pbc, const t_graph *g,
1394                     real gmx_unused lambda, real gmx_unused *dvdlambda,
1395                     const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1396                     int gmx_unused *global_atom_index)
1397 {
1398     int  i, j, ai, aj, ak, t1, t2, type;
1399     rvec r_ij, r_kj;
1400     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1401     ivec jt, dt_ij, dt_kj;
1402
1403     vtot = 0.0;
1404     for (i = 0; (i < nbonds); )
1405     {
1406         type = forceatoms[i++];
1407         ai   = forceatoms[i++];
1408         aj   = forceatoms[i++];
1409         ak   = forceatoms[i++];
1410
1411         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1412                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
1413
1414         dt = theta - forceparams[type].qangle.theta*DEG2RAD;   /* 2          */
1415
1416         dVdt = 0;
1417         va   = forceparams[type].qangle.c[0];
1418         dtp  = 1.0;
1419         for (j = 1; j <= 4; j++)
1420         {
1421             c     = forceparams[type].qangle.c[j];
1422             dVdt -= j*c*dtp;
1423             dtp  *= dt;
1424             va   += c*dtp;
1425         }
1426         /* 20 */
1427
1428         vtot += va;
1429
1430         cos_theta2 = sqr(cos_theta);            /*   1          */
1431         if (cos_theta2 < 1)
1432         {
1433             int  m;
1434             real st, sth;
1435             real cik, cii, ckk;
1436             real nrkj2, nrij2;
1437             rvec f_i, f_j, f_k;
1438
1439             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
1440             sth = st*cos_theta;                     /*   1              */
1441 #ifdef DEBUG
1442             if (debug)
1443             {
1444                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1445                         theta*RAD2DEG, va, dVdt);
1446             }
1447 #endif
1448             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
1449             nrij2 = iprod(r_ij, r_ij);
1450
1451             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
1452             cii = sth/nrij2;                   /*  10           */
1453             ckk = sth/nrkj2;                   /*  10           */
1454
1455             for (m = 0; (m < DIM); m++)        /*  39           */
1456             {
1457                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1458                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1459                 f_j[m]    = -f_i[m]-f_k[m];
1460                 f[ai][m] += f_i[m];
1461                 f[aj][m] += f_j[m];
1462                 f[ak][m] += f_k[m];
1463             }
1464             if (g)
1465             {
1466                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1467
1468                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1469                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1470                 t1 = IVEC2IS(dt_ij);
1471                 t2 = IVEC2IS(dt_kj);
1472             }
1473             rvec_inc(fshift[t1], f_i);
1474             rvec_inc(fshift[CENTRAL], f_j);
1475             rvec_inc(fshift[t2], f_k);
1476         }                                       /* 153 TOTAL    */
1477     }
1478     return vtot;
1479 }
1480
1481 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
1482                const t_pbc *pbc,
1483                rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
1484                real *sign, int *t1, int *t2, int *t3)
1485 {
1486     real ipr, phi;
1487
1488     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
1489     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
1490     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
1491
1492     cprod(r_ij, r_kj, m);                  /*  9        */
1493     cprod(r_kj, r_kl, n);                  /*  9                */
1494     phi     = gmx_angle(m, n);             /* 49 (assuming 25 for atan2) */
1495     ipr     = iprod(r_ij, n);              /*  5        */
1496     (*sign) = (ipr < 0.0) ? -1.0 : 1.0;
1497     phi     = (*sign)*phi;                 /*  1                */
1498     /* 82 TOTAL */
1499     return phi;
1500 }
1501
1502
1503 #ifdef GMX_SIMD_HAVE_REAL
1504
1505 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1506  * also calculates the pre-factor required for the dihedral force update.
1507  * Note that bv and buf should be register aligned.
1508  */
1509 static gmx_inline void
1510 dih_angle_simd(const rvec *x,
1511                const int *ai, const int *aj, const int *ak, const int *al,
1512                const pbc_simd_t *pbc,
1513                real *dr,
1514                gmx_simd_real_t *phi_S,
1515                gmx_simd_real_t *mx_S, gmx_simd_real_t *my_S, gmx_simd_real_t *mz_S,
1516                gmx_simd_real_t *nx_S, gmx_simd_real_t *ny_S, gmx_simd_real_t *nz_S,
1517                gmx_simd_real_t *nrkj_m2_S,
1518                gmx_simd_real_t *nrkj_n2_S,
1519                real *p,
1520                real *q)
1521 {
1522     int             s, m;
1523     gmx_simd_real_t rijx_S, rijy_S, rijz_S;
1524     gmx_simd_real_t rkjx_S, rkjy_S, rkjz_S;
1525     gmx_simd_real_t rklx_S, rkly_S, rklz_S;
1526     gmx_simd_real_t cx_S, cy_S, cz_S;
1527     gmx_simd_real_t cn_S;
1528     gmx_simd_real_t s_S;
1529     gmx_simd_real_t ipr_S;
1530     gmx_simd_real_t iprm_S, iprn_S;
1531     gmx_simd_real_t nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1532     gmx_simd_real_t toler_S;
1533     gmx_simd_real_t p_S, q_S;
1534     gmx_simd_real_t nrkj2_min_S;
1535     gmx_simd_real_t real_eps_S;
1536
1537     /* Used to avoid division by zero.
1538      * We take into acount that we multiply the result by real_eps_S.
1539      */
1540     nrkj2_min_S = gmx_simd_set1_r(GMX_REAL_MIN/(2*GMX_REAL_EPS));
1541
1542     /* The value of the last significant bit (GMX_REAL_EPS is half of that) */
1543     real_eps_S  = gmx_simd_set1_r(2*GMX_REAL_EPS);
1544
1545     for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
1546     {
1547         /* If you can't use pbc_dx_simd below for PBC, e.g. because
1548          * you can't round in SIMD, use pbc_rvec_sub here.
1549          */
1550         for (m = 0; m < DIM; m++)
1551         {
1552             dr[s + (0*DIM + m)*GMX_SIMD_REAL_WIDTH] = x[ai[s]][m] - x[aj[s]][m];
1553             dr[s + (1*DIM + m)*GMX_SIMD_REAL_WIDTH] = x[ak[s]][m] - x[aj[s]][m];
1554             dr[s + (2*DIM + m)*GMX_SIMD_REAL_WIDTH] = x[ak[s]][m] - x[al[s]][m];
1555         }
1556     }
1557
1558     rijx_S = gmx_simd_load_r(dr + 0*GMX_SIMD_REAL_WIDTH);
1559     rijy_S = gmx_simd_load_r(dr + 1*GMX_SIMD_REAL_WIDTH);
1560     rijz_S = gmx_simd_load_r(dr + 2*GMX_SIMD_REAL_WIDTH);
1561     rkjx_S = gmx_simd_load_r(dr + 3*GMX_SIMD_REAL_WIDTH);
1562     rkjy_S = gmx_simd_load_r(dr + 4*GMX_SIMD_REAL_WIDTH);
1563     rkjz_S = gmx_simd_load_r(dr + 5*GMX_SIMD_REAL_WIDTH);
1564     rklx_S = gmx_simd_load_r(dr + 6*GMX_SIMD_REAL_WIDTH);
1565     rkly_S = gmx_simd_load_r(dr + 7*GMX_SIMD_REAL_WIDTH);
1566     rklz_S = gmx_simd_load_r(dr + 8*GMX_SIMD_REAL_WIDTH);
1567
1568     pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc);
1569     pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc);
1570     pbc_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc);
1571
1572     gmx_simd_cprod_r(rijx_S, rijy_S, rijz_S,
1573                      rkjx_S, rkjy_S, rkjz_S,
1574                      mx_S, my_S, mz_S);
1575
1576     gmx_simd_cprod_r(rkjx_S, rkjy_S, rkjz_S,
1577                      rklx_S, rkly_S, rklz_S,
1578                      nx_S, ny_S, nz_S);
1579
1580     gmx_simd_cprod_r(*mx_S, *my_S, *mz_S,
1581                      *nx_S, *ny_S, *nz_S,
1582                      &cx_S, &cy_S, &cz_S);
1583
1584     cn_S       = gmx_simd_sqrt_r(gmx_simd_norm2_r(cx_S, cy_S, cz_S));
1585
1586     s_S        = gmx_simd_iprod_r(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1587
1588     /* Determine the dihedral angle, the sign might need correction */
1589     *phi_S     = gmx_simd_atan2_r(cn_S, s_S);
1590
1591     ipr_S      = gmx_simd_iprod_r(rijx_S, rijy_S, rijz_S,
1592                                   *nx_S, *ny_S, *nz_S);
1593
1594     iprm_S     = gmx_simd_norm2_r(*mx_S, *my_S, *mz_S);
1595     iprn_S     = gmx_simd_norm2_r(*nx_S, *ny_S, *nz_S);
1596
1597     nrkj2_S    = gmx_simd_norm2_r(rkjx_S, rkjy_S, rkjz_S);
1598
1599     /* Avoid division by zero. When zero, the result is multiplied by 0
1600      * anyhow, so the 3 max below do not affect the final result.
1601      */
1602     nrkj2_S    = gmx_simd_max_r(nrkj2_S, nrkj2_min_S);
1603     nrkj_1_S   = gmx_simd_invsqrt_r(nrkj2_S);
1604     nrkj_2_S   = gmx_simd_mul_r(nrkj_1_S, nrkj_1_S);
1605     nrkj_S     = gmx_simd_mul_r(nrkj2_S, nrkj_1_S);
1606
1607     toler_S    = gmx_simd_mul_r(nrkj2_S, real_eps_S);
1608
1609     /* Here the plain-C code uses a conditional, but we can't do that in SIMD.
1610      * So we take a max with the tolerance instead. Since we multiply with
1611      * m or n later, the max does not affect the results.
1612      */
1613     iprm_S     = gmx_simd_max_r(iprm_S, toler_S);
1614     iprn_S     = gmx_simd_max_r(iprn_S, toler_S);
1615     *nrkj_m2_S = gmx_simd_mul_r(nrkj_S, gmx_simd_inv_r(iprm_S));
1616     *nrkj_n2_S = gmx_simd_mul_r(nrkj_S, gmx_simd_inv_r(iprn_S));
1617
1618     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1619     *phi_S     = gmx_simd_xor_sign_r(*phi_S, ipr_S);
1620     p_S        = gmx_simd_iprod_r(rijx_S, rijy_S, rijz_S,
1621                                   rkjx_S, rkjy_S, rkjz_S);
1622     p_S        = gmx_simd_mul_r(p_S, nrkj_2_S);
1623
1624     q_S        = gmx_simd_iprod_r(rklx_S, rkly_S, rklz_S,
1625                                   rkjx_S, rkjy_S, rkjz_S);
1626     q_S        = gmx_simd_mul_r(q_S, nrkj_2_S);
1627
1628     gmx_simd_store_r(p, p_S);
1629     gmx_simd_store_r(q, q_S);
1630 }
1631
1632 #endif /* GMX_SIMD_HAVE_REAL */
1633
1634
1635 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1636                 rvec r_ij, rvec r_kj, rvec r_kl,
1637                 rvec m, rvec n, rvec f[], rvec fshift[],
1638                 const t_pbc *pbc, const t_graph *g,
1639                 const rvec x[], int t1, int t2, int t3)
1640 {
1641     /* 143 FLOPS */
1642     rvec f_i, f_j, f_k, f_l;
1643     rvec uvec, vvec, svec, dx_jl;
1644     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1645     real a, b, p, q, toler;
1646     ivec jt, dt_ij, dt_kj, dt_lj;
1647
1648     iprm  = iprod(m, m);       /*  5    */
1649     iprn  = iprod(n, n);       /*  5    */
1650     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1651     toler = nrkj2*GMX_REAL_EPS;
1652     if ((iprm > toler) && (iprn > toler))
1653     {
1654         nrkj_1 = gmx_invsqrt(nrkj2); /* 10      */
1655         nrkj_2 = nrkj_1*nrkj_1;      /*  1      */
1656         nrkj   = nrkj2*nrkj_1;       /*  1      */
1657         a      = -ddphi*nrkj/iprm;   /* 11      */
1658         svmul(a, m, f_i);            /*  3      */
1659         b     = ddphi*nrkj/iprn;     /* 11      */
1660         svmul(b, n, f_l);            /*  3  */
1661         p     = iprod(r_ij, r_kj);   /*  5      */
1662         p    *= nrkj_2;              /*  1      */
1663         q     = iprod(r_kl, r_kj);   /*  5      */
1664         q    *= nrkj_2;              /*  1      */
1665         svmul(p, f_i, uvec);         /*  3      */
1666         svmul(q, f_l, vvec);         /*  3      */
1667         rvec_sub(uvec, vvec, svec);  /*  3      */
1668         rvec_sub(f_i, svec, f_j);    /*  3      */
1669         rvec_add(f_l, svec, f_k);    /*  3      */
1670         rvec_inc(f[i], f_i);         /*  3      */
1671         rvec_dec(f[j], f_j);         /*  3      */
1672         rvec_dec(f[k], f_k);         /*  3      */
1673         rvec_inc(f[l], f_l);         /*  3      */
1674
1675         if (g)
1676         {
1677             copy_ivec(SHIFT_IVEC(g, j), jt);
1678             ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1679             ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1680             ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1681             t1 = IVEC2IS(dt_ij);
1682             t2 = IVEC2IS(dt_kj);
1683             t3 = IVEC2IS(dt_lj);
1684         }
1685         else if (pbc)
1686         {
1687             t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1688         }
1689         else
1690         {
1691             t3 = CENTRAL;
1692         }
1693
1694         rvec_inc(fshift[t1], f_i);
1695         rvec_dec(fshift[CENTRAL], f_j);
1696         rvec_dec(fshift[t2], f_k);
1697         rvec_inc(fshift[t3], f_l);
1698     }
1699     /* 112 TOTAL    */
1700 }
1701
1702 /* As do_dih_fup above, but without shift forces */
1703 static void
1704 do_dih_fup_noshiftf(int i, int j, int k, int l, real ddphi,
1705                     rvec r_ij, rvec r_kj, rvec r_kl,
1706                     rvec m, rvec n, rvec f[])
1707 {
1708     rvec f_i, f_j, f_k, f_l;
1709     rvec uvec, vvec, svec;
1710     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1711     real a, b, p, q, toler;
1712
1713     iprm  = iprod(m, m);       /*  5    */
1714     iprn  = iprod(n, n);       /*  5    */
1715     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1716     toler = nrkj2*GMX_REAL_EPS;
1717     if ((iprm > toler) && (iprn > toler))
1718     {
1719         nrkj_1 = gmx_invsqrt(nrkj2); /* 10      */
1720         nrkj_2 = nrkj_1*nrkj_1;      /*  1      */
1721         nrkj   = nrkj2*nrkj_1;       /*  1      */
1722         a      = -ddphi*nrkj/iprm;   /* 11      */
1723         svmul(a, m, f_i);            /*  3      */
1724         b     = ddphi*nrkj/iprn;     /* 11      */
1725         svmul(b, n, f_l);            /*  3  */
1726         p     = iprod(r_ij, r_kj);   /*  5      */
1727         p    *= nrkj_2;              /*  1      */
1728         q     = iprod(r_kl, r_kj);   /*  5      */
1729         q    *= nrkj_2;              /*  1      */
1730         svmul(p, f_i, uvec);         /*  3      */
1731         svmul(q, f_l, vvec);         /*  3      */
1732         rvec_sub(uvec, vvec, svec);  /*  3      */
1733         rvec_sub(f_i, svec, f_j);    /*  3      */
1734         rvec_add(f_l, svec, f_k);    /*  3      */
1735         rvec_inc(f[i], f_i);         /*  3      */
1736         rvec_dec(f[j], f_j);         /*  3      */
1737         rvec_dec(f[k], f_k);         /*  3      */
1738         rvec_inc(f[l], f_l);         /*  3      */
1739     }
1740 }
1741
1742 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1743 static gmx_inline void
1744 do_dih_fup_noshiftf_precalc(int i, int j, int k, int l,
1745                             real p, real q,
1746                             real f_i_x, real f_i_y, real f_i_z,
1747                             real mf_l_x, real mf_l_y, real mf_l_z,
1748                             rvec f[])
1749 {
1750     rvec f_i, f_j, f_k, f_l;
1751     rvec uvec, vvec, svec;
1752
1753     f_i[XX] = f_i_x;
1754     f_i[YY] = f_i_y;
1755     f_i[ZZ] = f_i_z;
1756     f_l[XX] = -mf_l_x;
1757     f_l[YY] = -mf_l_y;
1758     f_l[ZZ] = -mf_l_z;
1759     svmul(p, f_i, uvec);
1760     svmul(q, f_l, vvec);
1761     rvec_sub(uvec, vvec, svec);
1762     rvec_sub(f_i, svec, f_j);
1763     rvec_add(f_l, svec, f_k);
1764     rvec_inc(f[i], f_i);
1765     rvec_dec(f[j], f_j);
1766     rvec_dec(f[k], f_k);
1767     rvec_inc(f[l], f_l);
1768 }
1769
1770
1771 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1772              real phi, real lambda, real *V, real *F)
1773 {
1774     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1775     real L1   = 1.0 - lambda;
1776     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1777     real dph0 = (phiB - phiA)*DEG2RAD;
1778     real cp   = L1*cpA + lambda*cpB;
1779
1780     mdphi =  mult*phi - ph0;
1781     sdphi = sin(mdphi);
1782     ddphi = -cp*mult*sdphi;
1783     v1    = 1.0 + cos(mdphi);
1784     v     = cp*v1;
1785
1786     dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1787
1788     *V = v;
1789     *F = ddphi;
1790
1791     return dvdlambda;
1792
1793     /* That was 40 flops */
1794 }
1795
1796 static void
1797 dopdihs_noener(real cpA, real cpB, real phiA, real phiB, int mult,
1798                real phi, real lambda, real *F)
1799 {
1800     real mdphi, sdphi, ddphi;
1801     real L1   = 1.0 - lambda;
1802     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1803     real cp   = L1*cpA + lambda*cpB;
1804
1805     mdphi = mult*phi - ph0;
1806     sdphi = sin(mdphi);
1807     ddphi = -cp*mult*sdphi;
1808
1809     *F = ddphi;
1810
1811     /* That was 20 flops */
1812 }
1813
1814 static void
1815 dopdihs_mdphi(real cpA, real cpB, real phiA, real phiB, int mult,
1816               real phi, real lambda, real *cp, real *mdphi)
1817 {
1818     real L1   = 1.0 - lambda;
1819     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1820
1821     *cp    = L1*cpA + lambda*cpB;
1822
1823     *mdphi = mult*phi - ph0;
1824 }
1825
1826 static real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1827                         real phi, real lambda, real *V, real *F)
1828 /* similar to dopdihs, except for a minus sign  *
1829  * and a different treatment of mult/phi0       */
1830 {
1831     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1832     real L1   = 1.0 - lambda;
1833     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1834     real dph0 = (phiB - phiA)*DEG2RAD;
1835     real cp   = L1*cpA + lambda*cpB;
1836
1837     mdphi = mult*(phi-ph0);
1838     sdphi = sin(mdphi);
1839     ddphi = cp*mult*sdphi;
1840     v1    = 1.0-cos(mdphi);
1841     v     = cp*v1;
1842
1843     dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1844
1845     *V = v;
1846     *F = ddphi;
1847
1848     return dvdlambda;
1849
1850     /* That was 40 flops */
1851 }
1852
1853 real pdihs(int nbonds,
1854            const t_iatom forceatoms[], const t_iparams forceparams[],
1855            const rvec x[], rvec f[], rvec fshift[],
1856            const t_pbc *pbc, const t_graph *g,
1857            real lambda, real *dvdlambda,
1858            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1859            int gmx_unused *global_atom_index)
1860 {
1861     int  i, type, ai, aj, ak, al;
1862     int  t1, t2, t3;
1863     rvec r_ij, r_kj, r_kl, m, n;
1864     real phi, sign, ddphi, vpd, vtot;
1865
1866     vtot = 0.0;
1867
1868     for (i = 0; (i < nbonds); )
1869     {
1870         type = forceatoms[i++];
1871         ai   = forceatoms[i++];
1872         aj   = forceatoms[i++];
1873         ak   = forceatoms[i++];
1874         al   = forceatoms[i++];
1875
1876         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1877                         &sign, &t1, &t2, &t3);  /*  84      */
1878         *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1879                               forceparams[type].pdihs.cpB,
1880                               forceparams[type].pdihs.phiA,
1881                               forceparams[type].pdihs.phiB,
1882                               forceparams[type].pdihs.mult,
1883                               phi, lambda, &vpd, &ddphi);
1884
1885         vtot += vpd;
1886         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
1887                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
1888
1889 #ifdef DEBUG
1890         fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
1891                 ai, aj, ak, al, phi);
1892 #endif
1893     } /* 223 TOTAL  */
1894
1895     return vtot;
1896 }
1897
1898 void make_dp_periodic(real *dp)  /* 1 flop? */
1899 {
1900     /* dp cannot be outside (-pi,pi) */
1901     if (*dp >= M_PI)
1902     {
1903         *dp -= 2*M_PI;
1904     }
1905     else if (*dp < -M_PI)
1906     {
1907         *dp += 2*M_PI;
1908     }
1909     return;
1910 }
1911
1912 /* As pdihs above, but without calculating energies and shift forces */
1913 void
1914 pdihs_noener(int nbonds,
1915              const t_iatom forceatoms[], const t_iparams forceparams[],
1916              const rvec x[], rvec f[],
1917              const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
1918              real lambda,
1919              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1920              int gmx_unused *global_atom_index)
1921 {
1922     int  i, type, ai, aj, ak, al;
1923     int  t1, t2, t3;
1924     rvec r_ij, r_kj, r_kl, m, n;
1925     real phi, sign, ddphi_tot, ddphi;
1926
1927     for (i = 0; (i < nbonds); )
1928     {
1929         ai   = forceatoms[i+1];
1930         aj   = forceatoms[i+2];
1931         ak   = forceatoms[i+3];
1932         al   = forceatoms[i+4];
1933
1934         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1935                         &sign, &t1, &t2, &t3);
1936
1937         ddphi_tot = 0;
1938
1939         /* Loop over dihedrals working on the same atoms,
1940          * so we avoid recalculating angles and force distributions.
1941          */
1942         do
1943         {
1944             type = forceatoms[i];
1945             dopdihs_noener(forceparams[type].pdihs.cpA,
1946                            forceparams[type].pdihs.cpB,
1947                            forceparams[type].pdihs.phiA,
1948                            forceparams[type].pdihs.phiB,
1949                            forceparams[type].pdihs.mult,
1950                            phi, lambda, &ddphi);
1951             ddphi_tot += ddphi;
1952
1953             i += 5;
1954         }
1955         while (i < nbonds &&
1956                forceatoms[i+1] == ai &&
1957                forceatoms[i+2] == aj &&
1958                forceatoms[i+3] == ak &&
1959                forceatoms[i+4] == al);
1960
1961         do_dih_fup_noshiftf(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f);
1962     }
1963 }
1964
1965
1966 #ifdef GMX_SIMD_HAVE_REAL
1967
1968 /* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */
1969 void
1970 pdihs_noener_simd(int nbonds,
1971                   const t_iatom forceatoms[], const t_iparams forceparams[],
1972                   const rvec x[], rvec f[],
1973                   const t_pbc *pbc, const t_graph gmx_unused *g,
1974                   real gmx_unused lambda,
1975                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1976                   int gmx_unused *global_atom_index)
1977 {
1978     const int             nfa1 = 5;
1979     int                   i, iu, s;
1980     int                   type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
1981     real                  dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
1982     real                  buf_array[7*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
1983     real                 *cp, *phi0, *mult, *p, *q;
1984     gmx_simd_real_t       phi0_S, phi_S;
1985     gmx_simd_real_t       mx_S, my_S, mz_S;
1986     gmx_simd_real_t       nx_S, ny_S, nz_S;
1987     gmx_simd_real_t       nrkj_m2_S, nrkj_n2_S;
1988     gmx_simd_real_t       cp_S, mdphi_S, mult_S;
1989     gmx_simd_real_t       sin_S, cos_S;
1990     gmx_simd_real_t       mddphi_S;
1991     gmx_simd_real_t       sf_i_S, msf_l_S;
1992     pbc_simd_t            pbc_simd;
1993
1994     /* Ensure SIMD register alignment */
1995     dr  = gmx_simd_align_r(dr_array);
1996     buf = gmx_simd_align_r(buf_array);
1997
1998     /* Extract aligned pointer for parameters and variables */
1999     cp    = buf + 0*GMX_SIMD_REAL_WIDTH;
2000     phi0  = buf + 1*GMX_SIMD_REAL_WIDTH;
2001     mult  = buf + 2*GMX_SIMD_REAL_WIDTH;
2002     p     = buf + 3*GMX_SIMD_REAL_WIDTH;
2003     q     = buf + 4*GMX_SIMD_REAL_WIDTH;
2004
2005     set_pbc_simd(pbc, &pbc_simd);
2006
2007     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2008     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
2009     {
2010         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2011          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2012          */
2013         iu = i;
2014         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2015         {
2016             type  = forceatoms[iu];
2017             ai[s] = forceatoms[iu+1];
2018             aj[s] = forceatoms[iu+2];
2019             ak[s] = forceatoms[iu+3];
2020             al[s] = forceatoms[iu+4];
2021
2022             cp[s]   = forceparams[type].pdihs.cpA;
2023             phi0[s] = forceparams[type].pdihs.phiA*DEG2RAD;
2024             mult[s] = forceparams[type].pdihs.mult;
2025
2026             /* At the end fill the arrays with identical entries */
2027             if (iu + nfa1 < nbonds)
2028             {
2029                 iu += nfa1;
2030             }
2031         }
2032
2033         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2034         dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
2035                        dr,
2036                        &phi_S,
2037                        &mx_S, &my_S, &mz_S,
2038                        &nx_S, &ny_S, &nz_S,
2039                        &nrkj_m2_S,
2040                        &nrkj_n2_S,
2041                        p, q);
2042
2043         cp_S     = gmx_simd_load_r(cp);
2044         phi0_S   = gmx_simd_load_r(phi0);
2045         mult_S   = gmx_simd_load_r(mult);
2046
2047         mdphi_S  = gmx_simd_sub_r(gmx_simd_mul_r(mult_S, phi_S), phi0_S);
2048
2049         /* Calculate GMX_SIMD_REAL_WIDTH sines at once */
2050         gmx_simd_sincos_r(mdphi_S, &sin_S, &cos_S);
2051         mddphi_S = gmx_simd_mul_r(gmx_simd_mul_r(cp_S, mult_S), sin_S);
2052         sf_i_S   = gmx_simd_mul_r(mddphi_S, nrkj_m2_S);
2053         msf_l_S  = gmx_simd_mul_r(mddphi_S, nrkj_n2_S);
2054
2055         /* After this m?_S will contain f[i] */
2056         mx_S     = gmx_simd_mul_r(sf_i_S, mx_S);
2057         my_S     = gmx_simd_mul_r(sf_i_S, my_S);
2058         mz_S     = gmx_simd_mul_r(sf_i_S, mz_S);
2059
2060         /* After this m?_S will contain -f[l] */
2061         nx_S     = gmx_simd_mul_r(msf_l_S, nx_S);
2062         ny_S     = gmx_simd_mul_r(msf_l_S, ny_S);
2063         nz_S     = gmx_simd_mul_r(msf_l_S, nz_S);
2064
2065         gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S);
2066         gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S);
2067         gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S);
2068         gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S);
2069         gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S);
2070         gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S);
2071
2072         iu = i;
2073         s  = 0;
2074         do
2075         {
2076             do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
2077                                         p[s], q[s],
2078                                         dr[     XX *GMX_SIMD_REAL_WIDTH+s],
2079                                         dr[     YY *GMX_SIMD_REAL_WIDTH+s],
2080                                         dr[     ZZ *GMX_SIMD_REAL_WIDTH+s],
2081                                         dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s],
2082                                         dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s],
2083                                         dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s],
2084                                         f);
2085             s++;
2086             iu += nfa1;
2087         }
2088         while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
2089     }
2090 }
2091
2092 /* This is mostly a copy of pdihs_noener_simd above, but with using
2093  * the RB potential instead of a harmonic potential.
2094  * This function can replace rbdihs() when no energy and virial are needed.
2095  */
2096 void
2097 rbdihs_noener_simd(int nbonds,
2098                    const t_iatom forceatoms[], const t_iparams forceparams[],
2099                    const rvec x[], rvec f[],
2100                    const t_pbc *pbc, const t_graph gmx_unused *g,
2101                    real gmx_unused lambda,
2102                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2103                    int gmx_unused *global_atom_index)
2104 {
2105     const int             nfa1 = 5;
2106     int                   i, iu, s, j;
2107     int                   type, ai[GMX_SIMD_REAL_WIDTH], aj[GMX_SIMD_REAL_WIDTH], ak[GMX_SIMD_REAL_WIDTH], al[GMX_SIMD_REAL_WIDTH];
2108     real                  dr_array[3*DIM*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *dr;
2109     real                  buf_array[(NR_RBDIHS + 4)*GMX_SIMD_REAL_WIDTH+GMX_SIMD_REAL_WIDTH], *buf;
2110     real                 *parm, *p, *q;
2111
2112     gmx_simd_real_t       phi_S;
2113     gmx_simd_real_t       ddphi_S, cosfac_S;
2114     gmx_simd_real_t       mx_S, my_S, mz_S;
2115     gmx_simd_real_t       nx_S, ny_S, nz_S;
2116     gmx_simd_real_t       nrkj_m2_S, nrkj_n2_S;
2117     gmx_simd_real_t       parm_S, c_S;
2118     gmx_simd_real_t       sin_S, cos_S;
2119     gmx_simd_real_t       sf_i_S, msf_l_S;
2120     pbc_simd_t            pbc_simd;
2121
2122     gmx_simd_real_t       pi_S  = gmx_simd_set1_r(M_PI);
2123     gmx_simd_real_t       one_S = gmx_simd_set1_r(1.0);
2124
2125     /* Ensure SIMD register alignment */
2126     dr  = gmx_simd_align_r(dr_array);
2127     buf = gmx_simd_align_r(buf_array);
2128
2129     /* Extract aligned pointer for parameters and variables */
2130     parm  = buf;
2131     p     = buf + (NR_RBDIHS + 0)*GMX_SIMD_REAL_WIDTH;
2132     q     = buf + (NR_RBDIHS + 1)*GMX_SIMD_REAL_WIDTH;
2133
2134     set_pbc_simd(pbc, &pbc_simd);
2135
2136     /* nbonds is the number of dihedrals times nfa1, here we step GMX_SIMD_REAL_WIDTH dihs */
2137     for (i = 0; (i < nbonds); i += GMX_SIMD_REAL_WIDTH*nfa1)
2138     {
2139         /* Collect atoms quadruplets for GMX_SIMD_REAL_WIDTH dihedrals.
2140          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
2141          */
2142         iu = i;
2143         for (s = 0; s < GMX_SIMD_REAL_WIDTH; s++)
2144         {
2145             type  = forceatoms[iu];
2146             ai[s] = forceatoms[iu+1];
2147             aj[s] = forceatoms[iu+2];
2148             ak[s] = forceatoms[iu+3];
2149             al[s] = forceatoms[iu+4];
2150
2151             /* We don't need the first parameter, since that's a constant
2152              * which only affects the energies, not the forces.
2153              */
2154             for (j = 1; j < NR_RBDIHS; j++)
2155             {
2156                 parm[j*GMX_SIMD_REAL_WIDTH + s] =
2157                     forceparams[type].rbdihs.rbcA[j];
2158             }
2159
2160             /* At the end fill the arrays with identical entries */
2161             if (iu + nfa1 < nbonds)
2162             {
2163                 iu += nfa1;
2164             }
2165         }
2166
2167         /* Caclulate GMX_SIMD_REAL_WIDTH dihedral angles at once */
2168         dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
2169                        dr,
2170                        &phi_S,
2171                        &mx_S, &my_S, &mz_S,
2172                        &nx_S, &ny_S, &nz_S,
2173                        &nrkj_m2_S,
2174                        &nrkj_n2_S,
2175                        p, q);
2176
2177         /* Change to polymer convention */
2178         phi_S = gmx_simd_sub_r(phi_S, pi_S);
2179
2180         gmx_simd_sincos_r(phi_S, &sin_S, &cos_S);
2181
2182         ddphi_S   = gmx_simd_setzero_r();
2183         c_S       = one_S;
2184         cosfac_S  = one_S;
2185         for (j = 1; j < NR_RBDIHS; j++)
2186         {
2187             parm_S   = gmx_simd_load_r(parm + j*GMX_SIMD_REAL_WIDTH);
2188             ddphi_S  = gmx_simd_fmadd_r(gmx_simd_mul_r(c_S, parm_S), cosfac_S, ddphi_S);
2189             cosfac_S = gmx_simd_mul_r(cosfac_S, cos_S);
2190             c_S      = gmx_simd_add_r(c_S, one_S);
2191         }
2192
2193         /* Note that here we do not use the minus sign which is present
2194          * in the normal RB code. This is corrected for through (m)sf below.
2195          */
2196         ddphi_S  = gmx_simd_mul_r(ddphi_S, sin_S);
2197
2198         sf_i_S   = gmx_simd_mul_r(ddphi_S, nrkj_m2_S);
2199         msf_l_S  = gmx_simd_mul_r(ddphi_S, nrkj_n2_S);
2200
2201         /* After this m?_S will contain f[i] */
2202         mx_S     = gmx_simd_mul_r(sf_i_S, mx_S);
2203         my_S     = gmx_simd_mul_r(sf_i_S, my_S);
2204         mz_S     = gmx_simd_mul_r(sf_i_S, mz_S);
2205
2206         /* After this m?_S will contain -f[l] */
2207         nx_S     = gmx_simd_mul_r(msf_l_S, nx_S);
2208         ny_S     = gmx_simd_mul_r(msf_l_S, ny_S);
2209         nz_S     = gmx_simd_mul_r(msf_l_S, nz_S);
2210
2211         gmx_simd_store_r(dr + 0*GMX_SIMD_REAL_WIDTH, mx_S);
2212         gmx_simd_store_r(dr + 1*GMX_SIMD_REAL_WIDTH, my_S);
2213         gmx_simd_store_r(dr + 2*GMX_SIMD_REAL_WIDTH, mz_S);
2214         gmx_simd_store_r(dr + 3*GMX_SIMD_REAL_WIDTH, nx_S);
2215         gmx_simd_store_r(dr + 4*GMX_SIMD_REAL_WIDTH, ny_S);
2216         gmx_simd_store_r(dr + 5*GMX_SIMD_REAL_WIDTH, nz_S);
2217
2218         iu = i;
2219         s  = 0;
2220         do
2221         {
2222             do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
2223                                         p[s], q[s],
2224                                         dr[     XX *GMX_SIMD_REAL_WIDTH+s],
2225                                         dr[     YY *GMX_SIMD_REAL_WIDTH+s],
2226                                         dr[     ZZ *GMX_SIMD_REAL_WIDTH+s],
2227                                         dr[(DIM+XX)*GMX_SIMD_REAL_WIDTH+s],
2228                                         dr[(DIM+YY)*GMX_SIMD_REAL_WIDTH+s],
2229                                         dr[(DIM+ZZ)*GMX_SIMD_REAL_WIDTH+s],
2230                                         f);
2231             s++;
2232             iu += nfa1;
2233         }
2234         while (s < GMX_SIMD_REAL_WIDTH && iu < nbonds);
2235     }
2236 }
2237
2238 #endif /* GMX_SIMD_HAVE_REAL */
2239
2240
2241 real idihs(int nbonds,
2242            const t_iatom forceatoms[], const t_iparams forceparams[],
2243            const rvec x[], rvec f[], rvec fshift[],
2244            const t_pbc *pbc, const t_graph *g,
2245            real lambda, real *dvdlambda,
2246            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2247            int gmx_unused *global_atom_index)
2248 {
2249     int  i, type, ai, aj, ak, al;
2250     int  t1, t2, t3;
2251     real phi, phi0, dphi0, ddphi, sign, vtot;
2252     rvec r_ij, r_kj, r_kl, m, n;
2253     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2254
2255     L1        = 1.0-lambda;
2256     dvdl_term = 0;
2257     vtot      = 0.0;
2258     for (i = 0; (i < nbonds); )
2259     {
2260         type = forceatoms[i++];
2261         ai   = forceatoms[i++];
2262         aj   = forceatoms[i++];
2263         ak   = forceatoms[i++];
2264         al   = forceatoms[i++];
2265
2266         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2267                         &sign, &t1, &t2, &t3);  /*  84          */
2268
2269         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2270          * force changes if we just apply a normal harmonic.
2271          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2272          * This means we will never have the periodicity problem, unless
2273          * the dihedral is Pi away from phiO, which is very unlikely due to
2274          * the potential.
2275          */
2276         kA = forceparams[type].harmonic.krA;
2277         kB = forceparams[type].harmonic.krB;
2278         pA = forceparams[type].harmonic.rA;
2279         pB = forceparams[type].harmonic.rB;
2280
2281         kk    = L1*kA + lambda*kB;
2282         phi0  = (L1*pA + lambda*pB)*DEG2RAD;
2283         dphi0 = (pB - pA)*DEG2RAD;
2284
2285         dp = phi-phi0;
2286
2287         make_dp_periodic(&dp);
2288
2289         dp2 = dp*dp;
2290
2291         vtot += 0.5*kk*dp2;
2292         ddphi = -kk*dp;
2293
2294         dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2295
2296         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
2297                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2298         /* 218 TOTAL    */
2299 #ifdef DEBUG
2300         if (debug)
2301         {
2302             fprintf(debug, "idih: (%d,%d,%d,%d) phi=%g\n",
2303                     ai, aj, ak, al, phi);
2304         }
2305 #endif
2306     }
2307
2308     *dvdlambda += dvdl_term;
2309     return vtot;
2310 }
2311
2312 static real low_angres(int nbonds,
2313                        const t_iatom forceatoms[], const t_iparams forceparams[],
2314                        const rvec x[], rvec f[], rvec fshift[],
2315                        const t_pbc *pbc, const t_graph *g,
2316                        real lambda, real *dvdlambda,
2317                        gmx_bool bZAxis)
2318 {
2319     int  i, m, type, ai, aj, ak, al;
2320     int  t1, t2;
2321     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2322     rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2323     real st, sth, nrij2, nrkl2, c, cij, ckl;
2324
2325     ivec dt;
2326     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2327
2328     vtot = 0.0;
2329     ak   = al = 0; /* to avoid warnings */
2330     for (i = 0; i < nbonds; )
2331     {
2332         type = forceatoms[i++];
2333         ai   = forceatoms[i++];
2334         aj   = forceatoms[i++];
2335         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij);       /*  3               */
2336         if (!bZAxis)
2337         {
2338             ak   = forceatoms[i++];
2339             al   = forceatoms[i++];
2340             t2   = pbc_rvec_sub(pbc, x[al], x[ak], r_kl);  /*  3                */
2341         }
2342         else
2343         {
2344             r_kl[XX] = 0;
2345             r_kl[YY] = 0;
2346             r_kl[ZZ] = 1;
2347         }
2348
2349         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2350         phi     = acos(cos_phi);         /* 10           */
2351
2352         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2353                                   forceparams[type].pdihs.cpB,
2354                                   forceparams[type].pdihs.phiA,
2355                                   forceparams[type].pdihs.phiB,
2356                                   forceparams[type].pdihs.mult,
2357                                   phi, lambda, &vid, &dVdphi); /*  40  */
2358
2359         vtot += vid;
2360
2361         cos_phi2 = sqr(cos_phi);                /*   1          */
2362         if (cos_phi2 < 1)
2363         {
2364             st    = -dVdphi*gmx_invsqrt(1 - cos_phi2); /*  12           */
2365             sth   = st*cos_phi;                        /*   1           */
2366             nrij2 = iprod(r_ij, r_ij);                 /*   5           */
2367             nrkl2 = iprod(r_kl, r_kl);                 /*   5          */
2368
2369             c   = st*gmx_invsqrt(nrij2*nrkl2);         /*  11           */
2370             cij = sth/nrij2;                           /*  10           */
2371             ckl = sth/nrkl2;                           /*  10           */
2372
2373             for (m = 0; m < DIM; m++)                  /*  18+18       */
2374             {
2375                 f_i[m]    = (c*r_kl[m]-cij*r_ij[m]);
2376                 f[ai][m] += f_i[m];
2377                 f[aj][m] -= f_i[m];
2378                 if (!bZAxis)
2379                 {
2380                     f_k[m]    = (c*r_ij[m]-ckl*r_kl[m]);
2381                     f[ak][m] += f_k[m];
2382                     f[al][m] -= f_k[m];
2383                 }
2384             }
2385
2386             if (g)
2387             {
2388                 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2389                 t1 = IVEC2IS(dt);
2390             }
2391             rvec_inc(fshift[t1], f_i);
2392             rvec_dec(fshift[CENTRAL], f_i);
2393             if (!bZAxis)
2394             {
2395                 if (g)
2396                 {
2397                     ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2398                     t2 = IVEC2IS(dt);
2399                 }
2400                 rvec_inc(fshift[t2], f_k);
2401                 rvec_dec(fshift[CENTRAL], f_k);
2402             }
2403         }
2404     }
2405
2406     return vtot; /*  184 / 157 (bZAxis)  total  */
2407 }
2408
2409 real angres(int nbonds,
2410             const t_iatom forceatoms[], const t_iparams forceparams[],
2411             const rvec x[], rvec f[], rvec fshift[],
2412             const t_pbc *pbc, const t_graph *g,
2413             real lambda, real *dvdlambda,
2414             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2415             int gmx_unused *global_atom_index)
2416 {
2417     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2418                       lambda, dvdlambda, FALSE);
2419 }
2420
2421 real angresz(int nbonds,
2422              const t_iatom forceatoms[], const t_iparams forceparams[],
2423              const rvec x[], rvec f[], rvec fshift[],
2424              const t_pbc *pbc, const t_graph *g,
2425              real lambda, real *dvdlambda,
2426              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2427              int gmx_unused *global_atom_index)
2428 {
2429     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2430                       lambda, dvdlambda, TRUE);
2431 }
2432
2433 real dihres(int nbonds,
2434             const t_iatom forceatoms[], const t_iparams forceparams[],
2435             const rvec x[], rvec f[], rvec fshift[],
2436             const t_pbc *pbc, const t_graph *g,
2437             real lambda, real *dvdlambda,
2438             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2439             int gmx_unused  *global_atom_index)
2440 {
2441     real vtot = 0;
2442     int  ai, aj, ak, al, i, k, type, t1, t2, t3;
2443     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2444     real phi, ddphi, ddp, ddp2, dp, sign, d2r, L1;
2445     rvec r_ij, r_kj, r_kl, m, n;
2446
2447     L1 = 1.0-lambda;
2448
2449     d2r = DEG2RAD;
2450     k   = 0;
2451
2452     for (i = 0; (i < nbonds); )
2453     {
2454         type = forceatoms[i++];
2455         ai   = forceatoms[i++];
2456         aj   = forceatoms[i++];
2457         ak   = forceatoms[i++];
2458         al   = forceatoms[i++];
2459
2460         phi0A  = forceparams[type].dihres.phiA*d2r;
2461         dphiA  = forceparams[type].dihres.dphiA*d2r;
2462         kfacA  = forceparams[type].dihres.kfacA;
2463
2464         phi0B  = forceparams[type].dihres.phiB*d2r;
2465         dphiB  = forceparams[type].dihres.dphiB*d2r;
2466         kfacB  = forceparams[type].dihres.kfacB;
2467
2468         phi0  = L1*phi0A + lambda*phi0B;
2469         dphi  = L1*dphiA + lambda*dphiB;
2470         kfac  = L1*kfacA + lambda*kfacB;
2471
2472         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2473                         &sign, &t1, &t2, &t3);
2474         /* 84 flops */
2475
2476         if (debug)
2477         {
2478             fprintf(debug, "dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2479                     k++, ai, aj, ak, al, phi0, dphi, kfac);
2480         }
2481         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2482          * force changes if we just apply a normal harmonic.
2483          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2484          * This means we will never have the periodicity problem, unless
2485          * the dihedral is Pi away from phiO, which is very unlikely due to
2486          * the potential.
2487          */
2488         dp = phi-phi0;
2489         make_dp_periodic(&dp);
2490
2491         if (dp > dphi)
2492         {
2493             ddp = dp-dphi;
2494         }
2495         else if (dp < -dphi)
2496         {
2497             ddp = dp+dphi;
2498         }
2499         else
2500         {
2501             ddp = 0;
2502         }
2503
2504         if (ddp != 0.0)
2505         {
2506             ddp2  = ddp*ddp;
2507             vtot += 0.5*kfac*ddp2;
2508             ddphi = kfac*ddp;
2509
2510             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2511             /* lambda dependence from changing restraint distances */
2512             if (ddp > 0)
2513             {
2514                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2515             }
2516             else if (ddp < 0)
2517             {
2518                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2519             }
2520             do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2521                        f, fshift, pbc, g, x, t1, t2, t3);      /* 112           */
2522         }
2523     }
2524     return vtot;
2525 }
2526
2527
2528 real unimplemented(int gmx_unused nbonds,
2529                    const t_iatom gmx_unused forceatoms[], const t_iparams gmx_unused forceparams[],
2530                    const rvec gmx_unused x[], rvec gmx_unused f[], rvec gmx_unused fshift[],
2531                    const t_pbc gmx_unused *pbc, const t_graph  gmx_unused *g,
2532                    real gmx_unused lambda, real gmx_unused *dvdlambda,
2533                    const t_mdatoms  gmx_unused *md, t_fcdata gmx_unused *fcd,
2534                    int gmx_unused *global_atom_index)
2535 {
2536     gmx_impl("*** you are using a not implemented function");
2537
2538     return 0.0; /* To make the compiler happy */
2539 }
2540
2541 real restrangles(int nbonds,
2542                  const t_iatom forceatoms[], const t_iparams forceparams[],
2543                  const rvec x[], rvec f[], rvec fshift[],
2544                  const t_pbc *pbc, const t_graph *g,
2545                  real gmx_unused lambda, real gmx_unused *dvdlambda,
2546                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2547                  int gmx_unused *global_atom_index)
2548 {
2549     int  i, d, ai, aj, ak, type, m;
2550     int  t1, t2;
2551     real v, vtot;
2552     ivec jt, dt_ij, dt_kj;
2553     rvec f_i, f_j, f_k;
2554     real prefactor, ratio_ante, ratio_post;
2555     rvec delta_ante, delta_post, vec_temp;
2556
2557     vtot = 0.0;
2558     for (i = 0; (i < nbonds); )
2559     {
2560         type = forceatoms[i++];
2561         ai   = forceatoms[i++];
2562         aj   = forceatoms[i++];
2563         ak   = forceatoms[i++];
2564
2565         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2566         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2567         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_post);
2568
2569
2570         /* This function computes factors needed for restricted angle potential.
2571          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2572          * when three particles align and the dihedral angle and dihedral potential
2573          * cannot be calculated. This potential is calculated using the formula:
2574            real restrangles(int nbonds,
2575             const t_iatom forceatoms[],const t_iparams forceparams[],
2576             const rvec x[],rvec f[],rvec fshift[],
2577             const t_pbc *pbc,const t_graph *g,
2578             real gmx_unused lambda,real gmx_unused *dvdlambda,
2579             const t_mdatoms gmx_unused *md,t_fcdata gmx_unused *fcd,
2580             int gmx_unused *global_atom_index)
2581            {
2582            int  i, d, ai, aj, ak, type, m;
2583            int t1, t2;
2584            rvec r_ij,r_kj;
2585            real v, vtot;
2586            ivec jt,dt_ij,dt_kj;
2587            rvec f_i, f_j, f_k;
2588            real prefactor, ratio_ante, ratio_post;
2589            rvec delta_ante, delta_post, vec_temp;
2590
2591            vtot = 0.0;
2592            for(i=0; (i<nbonds); )
2593            {
2594            type = forceatoms[i++];
2595            ai   = forceatoms[i++];
2596            aj   = forceatoms[i++];
2597            ak   = forceatoms[i++];
2598
2599          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta} \frac{(\cos\theta_i - \cos\theta_0)^2}
2600          * {\sin^2\theta_i}\f] ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2601          * For more explanations see comments file "restcbt.h". */
2602
2603         compute_factors_restangles(type, forceparams,  delta_ante, delta_post,
2604                                    &prefactor, &ratio_ante, &ratio_post, &v);
2605
2606         /*   Forces are computed per component */
2607         for (d = 0; d < DIM; d++)
2608         {
2609             f_i[d] = prefactor * (ratio_ante * delta_ante[d] - delta_post[d]);
2610             f_j[d] = prefactor * ((ratio_post + 1.0) * delta_post[d] - (ratio_ante + 1.0) * delta_ante[d]);
2611             f_k[d] = prefactor * (delta_ante[d] - ratio_post * delta_post[d]);
2612         }
2613
2614         /*   Computation of potential energy   */
2615
2616         vtot += v;
2617
2618         /*   Update forces */
2619
2620         for (m = 0; (m < DIM); m++)
2621         {
2622             f[ai][m] += f_i[m];
2623             f[aj][m] += f_j[m];
2624             f[ak][m] += f_k[m];
2625         }
2626
2627         if (g)
2628         {
2629             copy_ivec(SHIFT_IVEC(g, aj), jt);
2630             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2631             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2632             t1 = IVEC2IS(dt_ij);
2633             t2 = IVEC2IS(dt_kj);
2634         }
2635
2636         rvec_inc(fshift[t1], f_i);
2637         rvec_inc(fshift[CENTRAL], f_j);
2638         rvec_inc(fshift[t2], f_k);
2639     }
2640     return vtot;
2641 }
2642
2643
2644 real restrdihs(int nbonds,
2645                const t_iatom forceatoms[], const t_iparams forceparams[],
2646                const rvec x[], rvec f[], rvec fshift[],
2647                const t_pbc *pbc, const t_graph *g,
2648                real gmx_unused lambda, real gmx_unused *dvlambda,
2649                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2650                int gmx_unused *global_atom_index)
2651 {
2652     int  i, d, type, ai, aj, ak, al;
2653     rvec f_i, f_j, f_k, f_l;
2654     rvec dx_jl;
2655     ivec jt, dt_ij, dt_kj, dt_lj;
2656     int  t1, t2, t3;
2657     real v, vtot;
2658     rvec delta_ante,  delta_crnt, delta_post, vec_temp;
2659     real factor_phi_ai_ante, factor_phi_ai_crnt, factor_phi_ai_post;
2660     real factor_phi_aj_ante, factor_phi_aj_crnt, factor_phi_aj_post;
2661     real factor_phi_ak_ante, factor_phi_ak_crnt, factor_phi_ak_post;
2662     real factor_phi_al_ante, factor_phi_al_crnt, factor_phi_al_post;
2663     real prefactor_phi;
2664
2665
2666     vtot = 0.0;
2667     for (i = 0; (i < nbonds); )
2668     {
2669         type = forceatoms[i++];
2670         ai   = forceatoms[i++];
2671         aj   = forceatoms[i++];
2672         ak   = forceatoms[i++];
2673         al   = forceatoms[i++];
2674
2675         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2676         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2677         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2678         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2679         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2680
2681         /* This function computes factors needed for restricted angle potential.
2682          * The restricted angle potential is used in coarse-grained simulations to avoid singularities
2683          * when three particles align and the dihedral angle and dihedral potential cannot be calculated.
2684          * This potential is calculated using the formula:
2685          * \f[V_{\rm ReB}(\theta_i) = \frac{1}{2} k_{\theta}
2686          * \frac{(\cos\theta_i - \cos\theta_0)^2}{\sin^2\theta_i}\f]
2687          * ({eq:ReB} and ref \cite{MonicaGoga2013} from the manual).
2688          * For more explanations see comments file "restcbt.h" */
2689
2690         compute_factors_restrdihs(type, forceparams,
2691                                   delta_ante, delta_crnt, delta_post,
2692                                   &factor_phi_ai_ante, &factor_phi_ai_crnt, &factor_phi_ai_post,
2693                                   &factor_phi_aj_ante, &factor_phi_aj_crnt, &factor_phi_aj_post,
2694                                   &factor_phi_ak_ante, &factor_phi_ak_crnt, &factor_phi_ak_post,
2695                                   &factor_phi_al_ante, &factor_phi_al_crnt, &factor_phi_al_post,
2696                                   &prefactor_phi, &v);
2697
2698
2699         /*      Computation of forces per component */
2700         for (d = 0; d < DIM; d++)
2701         {
2702             f_i[d] = prefactor_phi * (factor_phi_ai_ante * delta_ante[d] + factor_phi_ai_crnt * delta_crnt[d] + factor_phi_ai_post * delta_post[d]);
2703             f_j[d] = prefactor_phi * (factor_phi_aj_ante * delta_ante[d] + factor_phi_aj_crnt * delta_crnt[d] + factor_phi_aj_post * delta_post[d]);
2704             f_k[d] = prefactor_phi * (factor_phi_ak_ante * delta_ante[d] + factor_phi_ak_crnt * delta_crnt[d] + factor_phi_ak_post * delta_post[d]);
2705             f_l[d] = prefactor_phi * (factor_phi_al_ante * delta_ante[d] + factor_phi_al_crnt * delta_crnt[d] + factor_phi_al_post * delta_post[d]);
2706         }
2707         /*      Computation of the energy */
2708
2709         vtot += v;
2710
2711
2712
2713         /*    Updating the forces */
2714
2715         rvec_inc(f[ai], f_i);
2716         rvec_inc(f[aj], f_j);
2717         rvec_inc(f[ak], f_k);
2718         rvec_inc(f[al], f_l);
2719
2720
2721         /* Updating the fshift forces for the pressure coupling */
2722         if (g)
2723         {
2724             copy_ivec(SHIFT_IVEC(g, aj), jt);
2725             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2726             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2727             ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2728             t1 = IVEC2IS(dt_ij);
2729             t2 = IVEC2IS(dt_kj);
2730             t3 = IVEC2IS(dt_lj);
2731         }
2732         else if (pbc)
2733         {
2734             t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2735         }
2736         else
2737         {
2738             t3 = CENTRAL;
2739         }
2740
2741         rvec_inc(fshift[t1], f_i);
2742         rvec_inc(fshift[CENTRAL], f_j);
2743         rvec_inc(fshift[t2], f_k);
2744         rvec_inc(fshift[t3], f_l);
2745
2746     }
2747
2748     return vtot;
2749 }
2750
2751
2752 real cbtdihs(int nbonds,
2753              const t_iatom forceatoms[], const t_iparams forceparams[],
2754              const rvec x[], rvec f[], rvec fshift[],
2755              const t_pbc *pbc, const t_graph *g,
2756              real gmx_unused lambda, real gmx_unused *dvdlambda,
2757              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2758              int gmx_unused *global_atom_index)
2759 {
2760     int  type, ai, aj, ak, al, i, d;
2761     int  t1, t2, t3;
2762     real v, vtot;
2763     rvec vec_temp;
2764     rvec f_i, f_j, f_k, f_l;
2765     ivec jt, dt_ij, dt_kj, dt_lj;
2766     rvec dx_jl;
2767     rvec delta_ante, delta_crnt, delta_post;
2768     rvec f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al;
2769     rvec f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak;
2770     rvec f_theta_post_aj, f_theta_post_ak, f_theta_post_al;
2771
2772
2773
2774
2775     vtot = 0.0;
2776     for (i = 0; (i < nbonds); )
2777     {
2778         type = forceatoms[i++];
2779         ai   = forceatoms[i++];
2780         aj   = forceatoms[i++];
2781         ak   = forceatoms[i++];
2782         al   = forceatoms[i++];
2783
2784
2785         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], vec_temp);
2786         pbc_rvec_sub(pbc, x[aj], x[ai], delta_ante);
2787         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], vec_temp);
2788         pbc_rvec_sub(pbc, x[ak], x[aj], delta_crnt);
2789         pbc_rvec_sub(pbc, x[ak], x[al], vec_temp);
2790         pbc_rvec_sub(pbc, x[al], x[ak], delta_post);
2791
2792         /* \brief Compute factors for CBT potential
2793          * The combined bending-torsion potential goes to zero in a very smooth manner, eliminating the numerical
2794          * instabilities, when three coarse-grained particles align and the dihedral angle and standard
2795          * dihedral potentials cannot be calculated. The CBT potential is calculated using the formula:
2796          * \f[V_{\rm CBT}(\theta_{i-1}, \theta_i, \phi_i) = k_{\phi} \sin^3\theta_{i-1} \sin^3\theta_{i}
2797          * \sum_{n=0}^4 { a_n \cos^n\phi_i}\f] ({eq:CBT} and ref \cite{MonicaGoga2013} from the manual).
2798          * It contains in its expression not only the dihedral angle \f$\phi\f$
2799          * but also \f[\theta_{i-1}\f] (theta_ante bellow) and \f[\theta_{i}\f] (theta_post bellow)
2800          * --- the adjacent bending angles.
2801          * For more explanations see comments file "restcbt.h". */
2802
2803         compute_factors_cbtdihs(type, forceparams, delta_ante, delta_crnt, delta_post,
2804                                 f_phi_ai, f_phi_aj, f_phi_ak, f_phi_al,
2805                                 f_theta_ante_ai, f_theta_ante_aj, f_theta_ante_ak,
2806                                 f_theta_post_aj, f_theta_post_ak, f_theta_post_al,
2807                                 &v);
2808
2809
2810         /*      Acumulate the resuts per beads */
2811         for (d = 0; d < DIM; d++)
2812         {
2813             f_i[d] = f_phi_ai[d] + f_theta_ante_ai[d];
2814             f_j[d] = f_phi_aj[d] + f_theta_ante_aj[d] + f_theta_post_aj[d];
2815             f_k[d] = f_phi_ak[d] + f_theta_ante_ak[d] + f_theta_post_ak[d];
2816             f_l[d] = f_phi_al[d] + f_theta_post_al[d];
2817         }
2818
2819         /*      Compute the potential energy */
2820
2821         vtot += v;
2822
2823
2824         /*  Updating the forces */
2825         rvec_inc(f[ai], f_i);
2826         rvec_inc(f[aj], f_j);
2827         rvec_inc(f[ak], f_k);
2828         rvec_inc(f[al], f_l);
2829
2830
2831         /* Updating the fshift forces for the pressure coupling */
2832         if (g)
2833         {
2834             copy_ivec(SHIFT_IVEC(g, aj), jt);
2835             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
2836             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
2837             ivec_sub(SHIFT_IVEC(g, al), jt, dt_lj);
2838             t1 = IVEC2IS(dt_ij);
2839             t2 = IVEC2IS(dt_kj);
2840             t3 = IVEC2IS(dt_lj);
2841         }
2842         else if (pbc)
2843         {
2844             t3 = pbc_rvec_sub(pbc, x[al], x[aj], dx_jl);
2845         }
2846         else
2847         {
2848             t3 = CENTRAL;
2849         }
2850
2851         rvec_inc(fshift[t1], f_i);
2852         rvec_inc(fshift[CENTRAL], f_j);
2853         rvec_inc(fshift[t2], f_k);
2854         rvec_inc(fshift[t3], f_l);
2855     }
2856
2857     return vtot;
2858 }
2859
2860 real rbdihs(int nbonds,
2861             const t_iatom forceatoms[], const t_iparams forceparams[],
2862             const rvec x[], rvec f[], rvec fshift[],
2863             const t_pbc *pbc, const t_graph *g,
2864             real lambda, real *dvdlambda,
2865             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2866             int gmx_unused *global_atom_index)
2867 {
2868     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2869     int        type, ai, aj, ak, al, i, j;
2870     int        t1, t2, t3;
2871     rvec       r_ij, r_kj, r_kl, m, n;
2872     real       parmA[NR_RBDIHS];
2873     real       parmB[NR_RBDIHS];
2874     real       parm[NR_RBDIHS];
2875     real       cos_phi, phi, rbp, rbpBA;
2876     real       v, sign, ddphi, sin_phi;
2877     real       cosfac, vtot;
2878     real       L1        = 1.0-lambda;
2879     real       dvdl_term = 0;
2880
2881     vtot = 0.0;
2882     for (i = 0; (i < nbonds); )
2883     {
2884         type = forceatoms[i++];
2885         ai   = forceatoms[i++];
2886         aj   = forceatoms[i++];
2887         ak   = forceatoms[i++];
2888         al   = forceatoms[i++];
2889
2890         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2891                         &sign, &t1, &t2, &t3);  /*  84          */
2892
2893         /* Change to polymer convention */
2894         if (phi < c0)
2895         {
2896             phi += M_PI;
2897         }
2898         else
2899         {
2900             phi -= M_PI;    /*   1              */
2901
2902         }
2903         cos_phi = cos(phi);
2904         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2905         sin_phi = sin(phi);
2906
2907         for (j = 0; (j < NR_RBDIHS); j++)
2908         {
2909             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2910             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2911             parm[j]  = L1*parmA[j]+lambda*parmB[j];
2912         }
2913         /* Calculate cosine powers */
2914         /* Calculate the energy */
2915         /* Calculate the derivative */
2916
2917         v            = parm[0];
2918         dvdl_term   += (parmB[0]-parmA[0]);
2919         ddphi        = c0;
2920         cosfac       = c1;
2921
2922         rbp          = parm[1];
2923         rbpBA        = parmB[1]-parmA[1];
2924         ddphi       += rbp*cosfac;
2925         cosfac      *= cos_phi;
2926         v           += cosfac*rbp;
2927         dvdl_term   += cosfac*rbpBA;
2928         rbp          = parm[2];
2929         rbpBA        = parmB[2]-parmA[2];
2930         ddphi       += c2*rbp*cosfac;
2931         cosfac      *= cos_phi;
2932         v           += cosfac*rbp;
2933         dvdl_term   += cosfac*rbpBA;
2934         rbp          = parm[3];
2935         rbpBA        = parmB[3]-parmA[3];
2936         ddphi       += c3*rbp*cosfac;
2937         cosfac      *= cos_phi;
2938         v           += cosfac*rbp;
2939         dvdl_term   += cosfac*rbpBA;
2940         rbp          = parm[4];
2941         rbpBA        = parmB[4]-parmA[4];
2942         ddphi       += c4*rbp*cosfac;
2943         cosfac      *= cos_phi;
2944         v           += cosfac*rbp;
2945         dvdl_term   += cosfac*rbpBA;
2946         rbp          = parm[5];
2947         rbpBA        = parmB[5]-parmA[5];
2948         ddphi       += c5*rbp*cosfac;
2949         cosfac      *= cos_phi;
2950         v           += cosfac*rbp;
2951         dvdl_term   += cosfac*rbpBA;
2952
2953         ddphi = -ddphi*sin_phi;         /*  11          */
2954
2955         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2956                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2957         vtot += v;
2958     }
2959     *dvdlambda += dvdl_term;
2960
2961     return vtot;
2962 }
2963
2964 //! \endcond
2965
2966 /*! \brief Mysterious undocumented function */
2967 static int
2968 cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2969 {
2970     int im1, ip1, ip2;
2971
2972     if (ip < 0)
2973     {
2974         ip = ip + grid_spacing - 1;
2975     }
2976     else if (ip > grid_spacing)
2977     {
2978         ip = ip - grid_spacing - 1;
2979     }
2980
2981     im1 = ip - 1;
2982     ip1 = ip + 1;
2983     ip2 = ip + 2;
2984
2985     if (ip == 0)
2986     {
2987         im1 = grid_spacing - 1;
2988     }
2989     else if (ip == grid_spacing-2)
2990     {
2991         ip2 = 0;
2992     }
2993     else if (ip == grid_spacing-1)
2994     {
2995         ip1 = 0;
2996         ip2 = 1;
2997     }
2998
2999     *ipm1 = im1;
3000     *ipp1 = ip1;
3001     *ipp2 = ip2;
3002
3003     return ip;
3004
3005 }
3006
3007 real
3008 cmap_dihs(int nbonds,
3009           const t_iatom forceatoms[], const t_iparams forceparams[],
3010           const gmx_cmap_t *cmap_grid,
3011           const rvec x[], rvec f[], rvec fshift[],
3012           const struct t_pbc *pbc, const struct t_graph *g,
3013           real gmx_unused lambda, real gmx_unused *dvdlambda,
3014           const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3015           int  gmx_unused *global_atom_index)
3016 {
3017     int         i, j, k, n, idx;
3018     int         ai, aj, ak, al, am;
3019     int         a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
3020     int         type, cmapA;
3021     int         t11, t21, t31, t12, t22, t32;
3022     int         iphi1, ip1m1, ip1p1, ip1p2;
3023     int         iphi2, ip2m1, ip2p1, ip2p2;
3024     int         l1, l2, l3;
3025     int         pos1, pos2, pos3, pos4;
3026
3027     real        ty[4], ty1[4], ty2[4], ty12[4], tc[16], tx[16];
3028     real        phi1, cos_phi1, sin_phi1, sign1, xphi1;
3029     real        phi2, cos_phi2, sin_phi2, sign2, xphi2;
3030     real        dx, xx, tt, tu, e, df1, df2, vtot;
3031     real        ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
3032     real        ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
3033     real        fg1, hg1, fga1, hgb1, gaa1, gbb1;
3034     real        fg2, hg2, fga2, hgb2, gaa2, gbb2;
3035     real        fac;
3036
3037     rvec        r1_ij, r1_kj, r1_kl, m1, n1;
3038     rvec        r2_ij, r2_kj, r2_kl, m2, n2;
3039     rvec        f1_i, f1_j, f1_k, f1_l;
3040     rvec        f2_i, f2_j, f2_k, f2_l;
3041     rvec        a1, b1, a2, b2;
3042     rvec        f1, g1, h1, f2, g2, h2;
3043     rvec        dtf1, dtg1, dth1, dtf2, dtg2, dth2;
3044     ivec        jt1, dt1_ij, dt1_kj, dt1_lj;
3045     ivec        jt2, dt2_ij, dt2_kj, dt2_lj;
3046
3047     const real *cmapd;
3048
3049     int         loop_index[4][4] = {
3050         {0, 4, 8, 12},
3051         {1, 5, 9, 13},
3052         {2, 6, 10, 14},
3053         {3, 7, 11, 15}
3054     };
3055
3056     /* Total CMAP energy */
3057     vtot = 0;
3058
3059     for (n = 0; n < nbonds; )
3060     {
3061         /* Five atoms are involved in the two torsions */
3062         type   = forceatoms[n++];
3063         ai     = forceatoms[n++];
3064         aj     = forceatoms[n++];
3065         ak     = forceatoms[n++];
3066         al     = forceatoms[n++];
3067         am     = forceatoms[n++];
3068
3069         /* Which CMAP type is this */
3070         cmapA = forceparams[type].cmap.cmapA;
3071         cmapd = cmap_grid->cmapdata[cmapA].cmap;
3072
3073         /* First torsion */
3074         a1i   = ai;
3075         a1j   = aj;
3076         a1k   = ak;
3077         a1l   = al;
3078
3079         phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
3080                           &sign1, &t11, &t21, &t31);  /* 84 */
3081
3082         cos_phi1 = cos(phi1);
3083
3084         a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
3085         a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
3086         a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
3087
3088         b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
3089         b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
3090         b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
3091
3092         pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
3093
3094         ra21  = iprod(a1, a1);       /* 5 */
3095         rb21  = iprod(b1, b1);       /* 5 */
3096         rg21  = iprod(r1_kj, r1_kj); /* 5 */
3097         rg1   = sqrt(rg21);
3098
3099         rgr1  = 1.0/rg1;
3100         ra2r1 = 1.0/ra21;
3101         rb2r1 = 1.0/rb21;
3102         rabr1 = sqrt(ra2r1*rb2r1);
3103
3104         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
3105
3106         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
3107         {
3108             phi1 = asin(sin_phi1);
3109
3110             if (cos_phi1 < 0)
3111             {
3112                 if (phi1 > 0)
3113                 {
3114                     phi1 = M_PI - phi1;
3115                 }
3116                 else
3117                 {
3118                     phi1 = -M_PI - phi1;
3119                 }
3120             }
3121         }
3122         else
3123         {
3124             phi1 = acos(cos_phi1);
3125
3126             if (sin_phi1 < 0)
3127             {
3128                 phi1 = -phi1;
3129             }
3130         }
3131
3132         xphi1 = phi1 + M_PI; /* 1 */
3133
3134         /* Second torsion */
3135         a2i   = aj;
3136         a2j   = ak;
3137         a2k   = al;
3138         a2l   = am;
3139
3140         phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
3141                           &sign2, &t12, &t22, &t32); /* 84 */
3142
3143         cos_phi2 = cos(phi2);
3144
3145         a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
3146         a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
3147         a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
3148
3149         b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
3150         b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
3151         b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
3152
3153         pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
3154
3155         ra22  = iprod(a2, a2);         /* 5 */
3156         rb22  = iprod(b2, b2);         /* 5 */
3157         rg22  = iprod(r2_kj, r2_kj);   /* 5 */
3158         rg2   = sqrt(rg22);
3159
3160         rgr2  = 1.0/rg2;
3161         ra2r2 = 1.0/ra22;
3162         rb2r2 = 1.0/rb22;
3163         rabr2 = sqrt(ra2r2*rb2r2);
3164
3165         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
3166
3167         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
3168         {
3169             phi2 = asin(sin_phi2);
3170
3171             if (cos_phi2 < 0)
3172             {
3173                 if (phi2 > 0)
3174                 {
3175                     phi2 = M_PI - phi2;
3176                 }
3177                 else
3178                 {
3179                     phi2 = -M_PI - phi2;
3180                 }
3181             }
3182         }
3183         else
3184         {
3185             phi2 = acos(cos_phi2);
3186
3187             if (sin_phi2 < 0)
3188             {
3189                 phi2 = -phi2;
3190             }
3191         }
3192
3193         xphi2 = phi2 + M_PI; /* 1 */
3194
3195         /* Range mangling */
3196         if (xphi1 < 0)
3197         {
3198             xphi1 = xphi1 + 2*M_PI;
3199         }
3200         else if (xphi1 >= 2*M_PI)
3201         {
3202             xphi1 = xphi1 - 2*M_PI;
3203         }
3204
3205         if (xphi2 < 0)
3206         {
3207             xphi2 = xphi2 + 2*M_PI;
3208         }
3209         else if (xphi2 >= 2*M_PI)
3210         {
3211             xphi2 = xphi2 - 2*M_PI;
3212         }
3213
3214         /* Number of grid points */
3215         dx = 2*M_PI / cmap_grid->grid_spacing;
3216
3217         /* Where on the grid are we */
3218         iphi1 = static_cast<int>(xphi1/dx);
3219         iphi2 = static_cast<int>(xphi2/dx);
3220
3221         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3222         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3223
3224         pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
3225         pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
3226         pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
3227         pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
3228
3229         ty[0]   = cmapd[pos1*4];
3230         ty[1]   = cmapd[pos2*4];
3231         ty[2]   = cmapd[pos3*4];
3232         ty[3]   = cmapd[pos4*4];
3233
3234         ty1[0]   = cmapd[pos1*4+1];
3235         ty1[1]   = cmapd[pos2*4+1];
3236         ty1[2]   = cmapd[pos3*4+1];
3237         ty1[3]   = cmapd[pos4*4+1];
3238
3239         ty2[0]   = cmapd[pos1*4+2];
3240         ty2[1]   = cmapd[pos2*4+2];
3241         ty2[2]   = cmapd[pos3*4+2];
3242         ty2[3]   = cmapd[pos4*4+2];
3243
3244         ty12[0]   = cmapd[pos1*4+3];
3245         ty12[1]   = cmapd[pos2*4+3];
3246         ty12[2]   = cmapd[pos3*4+3];
3247         ty12[3]   = cmapd[pos4*4+3];
3248
3249         /* Switch to degrees */
3250         dx    = 360.0 / cmap_grid->grid_spacing;
3251         xphi1 = xphi1 * RAD2DEG;
3252         xphi2 = xphi2 * RAD2DEG;
3253
3254         for (i = 0; i < 4; i++) /* 16 */
3255         {
3256             tx[i]    = ty[i];
3257             tx[i+4]  = ty1[i]*dx;
3258             tx[i+8]  = ty2[i]*dx;
3259             tx[i+12] = ty12[i]*dx*dx;
3260         }
3261
3262         idx = 0;
3263         for (i = 0; i < 4; i++) /* 1056 */
3264         {
3265             for (j = 0; j < 4; j++)
3266             {
3267                 xx = 0;
3268                 for (k = 0; k < 16; k++)
3269                 {
3270                     xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
3271                 }
3272
3273                 idx++;
3274                 tc[i*4+j] = xx;
3275             }
3276         }
3277
3278         tt    = (xphi1-iphi1*dx)/dx;
3279         tu    = (xphi2-iphi2*dx)/dx;
3280
3281         e     = 0;
3282         df1   = 0;
3283         df2   = 0;
3284
3285         for (i = 3; i >= 0; i--)
3286         {
3287             l1 = loop_index[i][3];
3288             l2 = loop_index[i][2];
3289             l3 = loop_index[i][1];
3290
3291             e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
3292             df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
3293             df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
3294         }
3295
3296         fac     = RAD2DEG/dx;
3297         df1     = df1   * fac;
3298         df2     = df2   * fac;
3299
3300         /* CMAP energy */
3301         vtot += e;
3302
3303         /* Do forces - first torsion */
3304         fg1       = iprod(r1_ij, r1_kj);
3305         hg1       = iprod(r1_kl, r1_kj);
3306         fga1      = fg1*ra2r1*rgr1;
3307         hgb1      = hg1*rb2r1*rgr1;
3308         gaa1      = -ra2r1*rg1;
3309         gbb1      = rb2r1*rg1;
3310
3311         for (i = 0; i < DIM; i++)
3312         {
3313             dtf1[i]   = gaa1 * a1[i];
3314             dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
3315             dth1[i]   = gbb1 * b1[i];
3316
3317             f1[i]     = df1  * dtf1[i];
3318             g1[i]     = df1  * dtg1[i];
3319             h1[i]     = df1  * dth1[i];
3320
3321             f1_i[i]   =  f1[i];
3322             f1_j[i]   = -f1[i] - g1[i];
3323             f1_k[i]   =  h1[i] + g1[i];
3324             f1_l[i]   = -h1[i];
3325
3326             f[a1i][i] = f[a1i][i] + f1_i[i];
3327             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3328             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3329             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3330         }
3331
3332         /* Do forces - second torsion */
3333         fg2       = iprod(r2_ij, r2_kj);
3334         hg2       = iprod(r2_kl, r2_kj);
3335         fga2      = fg2*ra2r2*rgr2;
3336         hgb2      = hg2*rb2r2*rgr2;
3337         gaa2      = -ra2r2*rg2;
3338         gbb2      = rb2r2*rg2;
3339
3340         for (i = 0; i < DIM; i++)
3341         {
3342             dtf2[i]   = gaa2 * a2[i];
3343             dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
3344             dth2[i]   = gbb2 * b2[i];
3345
3346             f2[i]     = df2  * dtf2[i];
3347             g2[i]     = df2  * dtg2[i];
3348             h2[i]     = df2  * dth2[i];
3349
3350             f2_i[i]   =  f2[i];
3351             f2_j[i]   = -f2[i] - g2[i];
3352             f2_k[i]   =  h2[i] + g2[i];
3353             f2_l[i]   = -h2[i];
3354
3355             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3356             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3357             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3358             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3359         }
3360
3361         /* Shift forces */
3362         if (g)
3363         {
3364             copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3365             ivec_sub(SHIFT_IVEC(g, a1i),  jt1, dt1_ij);
3366             ivec_sub(SHIFT_IVEC(g, a1k),  jt1, dt1_kj);
3367             ivec_sub(SHIFT_IVEC(g, a1l),  jt1, dt1_lj);
3368             t11 = IVEC2IS(dt1_ij);
3369             t21 = IVEC2IS(dt1_kj);
3370             t31 = IVEC2IS(dt1_lj);
3371
3372             copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3373             ivec_sub(SHIFT_IVEC(g, a2i),  jt2, dt2_ij);
3374             ivec_sub(SHIFT_IVEC(g, a2k),  jt2, dt2_kj);
3375             ivec_sub(SHIFT_IVEC(g, a2l),  jt2, dt2_lj);
3376             t12 = IVEC2IS(dt2_ij);
3377             t22 = IVEC2IS(dt2_kj);
3378             t32 = IVEC2IS(dt2_lj);
3379         }
3380         else if (pbc)
3381         {
3382             t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3383             t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3384         }
3385         else
3386         {
3387             t31 = CENTRAL;
3388             t32 = CENTRAL;
3389         }
3390
3391         rvec_inc(fshift[t11], f1_i);
3392         rvec_inc(fshift[CENTRAL], f1_j);
3393         rvec_inc(fshift[t21], f1_k);
3394         rvec_inc(fshift[t31], f1_l);
3395
3396         rvec_inc(fshift[t21], f2_i);
3397         rvec_inc(fshift[CENTRAL], f2_j);
3398         rvec_inc(fshift[t22], f2_k);
3399         rvec_inc(fshift[t32], f2_l);
3400     }
3401     return vtot;
3402 }
3403
3404
3405 //! \cond
3406 /***********************************************************
3407  *
3408  *   G R O M O S  9 6   F U N C T I O N S
3409  *
3410  ***********************************************************/
3411 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3412                  real *V, real *F)
3413 {
3414     const real half = 0.5;
3415     real       L1, kk, x0, dx, dx2;
3416     real       v, f, dvdlambda;
3417
3418     L1    = 1.0-lambda;
3419     kk    = L1*kA+lambda*kB;
3420     x0    = L1*xA+lambda*xB;
3421
3422     dx    = x-x0;
3423     dx2   = dx*dx;
3424
3425     f          = -kk*dx;
3426     v          = half*kk*dx2;
3427     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3428
3429     *F    = f;
3430     *V    = v;
3431
3432     return dvdlambda;
3433
3434     /* That was 21 flops */
3435 }
3436
3437 real g96bonds(int nbonds,
3438               const t_iatom forceatoms[], const t_iparams forceparams[],
3439               const rvec x[], rvec f[], rvec fshift[],
3440               const t_pbc *pbc, const t_graph *g,
3441               real lambda, real *dvdlambda,
3442               const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3443               int gmx_unused *global_atom_index)
3444 {
3445     int  i, m, ki, ai, aj, type;
3446     real dr2, fbond, vbond, fij, vtot;
3447     rvec dx;
3448     ivec dt;
3449
3450     vtot = 0.0;
3451     for (i = 0; (i < nbonds); )
3452     {
3453         type = forceatoms[i++];
3454         ai   = forceatoms[i++];
3455         aj   = forceatoms[i++];
3456
3457         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3458         dr2  = iprod(dx, dx);                       /*   5              */
3459
3460         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3461                                   forceparams[type].harmonic.krB,
3462                                   forceparams[type].harmonic.rA,
3463                                   forceparams[type].harmonic.rB,
3464                                   dr2, lambda, &vbond, &fbond);
3465
3466         vtot  += 0.5*vbond;                         /* 1*/
3467 #ifdef DEBUG
3468         if (debug)
3469         {
3470             fprintf(debug, "G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3471                     sqrt(dr2), vbond, fbond);
3472         }
3473 #endif
3474
3475         if (g)
3476         {
3477             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3478             ki = IVEC2IS(dt);
3479         }
3480         for (m = 0; (m < DIM); m++)     /*  15          */
3481         {
3482             fij                 = fbond*dx[m];
3483             f[ai][m]           += fij;
3484             f[aj][m]           -= fij;
3485             fshift[ki][m]      += fij;
3486             fshift[CENTRAL][m] -= fij;
3487         }
3488     }               /* 44 TOTAL */
3489     return vtot;
3490 }
3491
3492 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3493                    rvec r_ij, rvec r_kj,
3494                    int *t1, int *t2)
3495 /* Return value is the angle between the bonds i-j and j-k */
3496 {
3497     real costh;
3498
3499     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3500     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3501
3502     costh = cos_angle(r_ij, r_kj);         /* 25                */
3503     /* 41 TOTAL */
3504     return costh;
3505 }
3506
3507 real g96angles(int nbonds,
3508                const t_iatom forceatoms[], const t_iparams forceparams[],
3509                const rvec x[], rvec f[], rvec fshift[],
3510                const t_pbc *pbc, const t_graph *g,
3511                real lambda, real *dvdlambda,
3512                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3513                int gmx_unused *global_atom_index)
3514 {
3515     int  i, ai, aj, ak, type, m, t1, t2;
3516     rvec r_ij, r_kj;
3517     real cos_theta, dVdt, va, vtot;
3518     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3519     rvec f_i, f_j, f_k;
3520     ivec jt, dt_ij, dt_kj;
3521
3522     vtot = 0.0;
3523     for (i = 0; (i < nbonds); )
3524     {
3525         type = forceatoms[i++];
3526         ai   = forceatoms[i++];
3527         aj   = forceatoms[i++];
3528         ak   = forceatoms[i++];
3529
3530         cos_theta  = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3531
3532         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3533                                   forceparams[type].harmonic.krB,
3534                                   forceparams[type].harmonic.rA,
3535                                   forceparams[type].harmonic.rB,
3536                                   cos_theta, lambda, &va, &dVdt);
3537         vtot    += va;
3538
3539         rij_1    = gmx_invsqrt(iprod(r_ij, r_ij));
3540         rkj_1    = gmx_invsqrt(iprod(r_kj, r_kj));
3541         rij_2    = rij_1*rij_1;
3542         rkj_2    = rkj_1*rkj_1;
3543         rijrkj_1 = rij_1*rkj_1;                 /* 23 */
3544
3545 #ifdef DEBUG
3546         if (debug)
3547         {
3548             fprintf(debug, "G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
3549                     cos_theta, va, dVdt);
3550         }
3551 #endif
3552         for (m = 0; (m < DIM); m++)     /*  42  */
3553         {
3554             f_i[m]    = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3555             f_k[m]    = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3556             f_j[m]    = -f_i[m]-f_k[m];
3557             f[ai][m] += f_i[m];
3558             f[aj][m] += f_j[m];
3559             f[ak][m] += f_k[m];
3560         }
3561
3562         if (g)
3563         {
3564             copy_ivec(SHIFT_IVEC(g, aj), jt);
3565
3566             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3567             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3568             t1 = IVEC2IS(dt_ij);
3569             t2 = IVEC2IS(dt_kj);
3570         }
3571         rvec_inc(fshift[t1], f_i);
3572         rvec_inc(fshift[CENTRAL], f_j);
3573         rvec_inc(fshift[t2], f_k);          /* 9 */
3574         /* 163 TOTAL    */
3575     }
3576     return vtot;
3577 }
3578
3579 real cross_bond_bond(int nbonds,
3580                      const t_iatom forceatoms[], const t_iparams forceparams[],
3581                      const rvec x[], rvec f[], rvec fshift[],
3582                      const t_pbc *pbc, const t_graph *g,
3583                      real gmx_unused lambda, real gmx_unused *dvdlambda,
3584                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused  *fcd,
3585                      int gmx_unused *global_atom_index)
3586 {
3587     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3588      * pp. 842-847
3589      */
3590     int  i, ai, aj, ak, type, m, t1, t2;
3591     rvec r_ij, r_kj;
3592     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3593     rvec f_i, f_j, f_k;
3594     ivec jt, dt_ij, dt_kj;
3595
3596     vtot = 0.0;
3597     for (i = 0; (i < nbonds); )
3598     {
3599         type = forceatoms[i++];
3600         ai   = forceatoms[i++];
3601         aj   = forceatoms[i++];
3602         ak   = forceatoms[i++];
3603         r1e  = forceparams[type].cross_bb.r1e;
3604         r2e  = forceparams[type].cross_bb.r2e;
3605         krr  = forceparams[type].cross_bb.krr;
3606
3607         /* Compute distance vectors ... */
3608         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3609         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3610
3611         /* ... and their lengths */
3612         r1 = norm(r_ij);
3613         r2 = norm(r_kj);
3614
3615         /* Deviations from ideality */
3616         s1 = r1-r1e;
3617         s2 = r2-r2e;
3618
3619         /* Energy (can be negative!) */
3620         vrr   = krr*s1*s2;
3621         vtot += vrr;
3622
3623         /* Forces */
3624         svmul(-krr*s2/r1, r_ij, f_i);
3625         svmul(-krr*s1/r2, r_kj, f_k);
3626
3627         for (m = 0; (m < DIM); m++)     /*  12  */
3628         {
3629             f_j[m]    = -f_i[m] - f_k[m];
3630             f[ai][m] += f_i[m];
3631             f[aj][m] += f_j[m];
3632             f[ak][m] += f_k[m];
3633         }
3634
3635         /* Virial stuff */
3636         if (g)
3637         {
3638             copy_ivec(SHIFT_IVEC(g, aj), jt);
3639
3640             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3641             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3642             t1 = IVEC2IS(dt_ij);
3643             t2 = IVEC2IS(dt_kj);
3644         }
3645         rvec_inc(fshift[t1], f_i);
3646         rvec_inc(fshift[CENTRAL], f_j);
3647         rvec_inc(fshift[t2], f_k);          /* 9 */
3648         /* 163 TOTAL    */
3649     }
3650     return vtot;
3651 }
3652
3653 real cross_bond_angle(int nbonds,
3654                       const t_iatom forceatoms[], const t_iparams forceparams[],
3655                       const rvec x[], rvec f[], rvec fshift[],
3656                       const t_pbc *pbc, const t_graph *g,
3657                       real gmx_unused lambda, real gmx_unused *dvdlambda,
3658                       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3659                       int gmx_unused *global_atom_index)
3660 {
3661     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3662      * pp. 842-847
3663      */
3664     int  i, ai, aj, ak, type, m, t1, t2;
3665     rvec r_ij, r_kj, r_ik;
3666     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3667     rvec f_i, f_j, f_k;
3668     ivec jt, dt_ij, dt_kj;
3669
3670     vtot = 0.0;
3671     for (i = 0; (i < nbonds); )
3672     {
3673         type = forceatoms[i++];
3674         ai   = forceatoms[i++];
3675         aj   = forceatoms[i++];
3676         ak   = forceatoms[i++];
3677         r1e  = forceparams[type].cross_ba.r1e;
3678         r2e  = forceparams[type].cross_ba.r2e;
3679         r3e  = forceparams[type].cross_ba.r3e;
3680         krt  = forceparams[type].cross_ba.krt;
3681
3682         /* Compute distance vectors ... */
3683         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3684         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3685         pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3686
3687         /* ... and their lengths */
3688         r1 = norm(r_ij);
3689         r2 = norm(r_kj);
3690         r3 = norm(r_ik);
3691
3692         /* Deviations from ideality */
3693         s1 = r1-r1e;
3694         s2 = r2-r2e;
3695         s3 = r3-r3e;
3696
3697         /* Energy (can be negative!) */
3698         vrt   = krt*s3*(s1+s2);
3699         vtot += vrt;
3700
3701         /* Forces */
3702         k1 = -krt*(s3/r1);
3703         k2 = -krt*(s3/r2);
3704         k3 = -krt*(s1+s2)/r3;
3705         for (m = 0; (m < DIM); m++)
3706         {
3707             f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3708             f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3709             f_j[m] = -f_i[m] - f_k[m];
3710         }
3711
3712         for (m = 0; (m < DIM); m++)     /*  12  */
3713         {
3714             f[ai][m] += f_i[m];
3715             f[aj][m] += f_j[m];
3716             f[ak][m] += f_k[m];
3717         }
3718
3719         /* Virial stuff */
3720         if (g)
3721         {
3722             copy_ivec(SHIFT_IVEC(g, aj), jt);
3723
3724             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3725             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3726             t1 = IVEC2IS(dt_ij);
3727             t2 = IVEC2IS(dt_kj);
3728         }
3729         rvec_inc(fshift[t1], f_i);
3730         rvec_inc(fshift[CENTRAL], f_j);
3731         rvec_inc(fshift[t2], f_k);          /* 9 */
3732         /* 163 TOTAL    */
3733     }
3734     return vtot;
3735 }
3736
3737 static real bonded_tab(const char *type, int table_nr,
3738                        const bondedtable_t *table, real kA, real kB, real r,
3739                        real lambda, real *V, real *F)
3740 {
3741     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3742     int  n0, nnn;
3743     real dvdlambda;
3744
3745     k = (1.0 - lambda)*kA + lambda*kB;
3746
3747     tabscale = table->scale;
3748     VFtab    = table->data;
3749
3750     rt    = r*tabscale;
3751     n0    = static_cast<int>(rt);
3752     if (n0 >= table->n)
3753     {
3754         gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3755                   type, table_nr, r, n0, n0+1, table->n);
3756     }
3757     eps   = rt - n0;
3758     eps2  = eps*eps;
3759     nnn   = 4*n0;
3760     Yt    = VFtab[nnn];
3761     Ft    = VFtab[nnn+1];
3762     Geps  = VFtab[nnn+2]*eps;
3763     Heps2 = VFtab[nnn+3]*eps2;
3764     Fp    = Ft + Geps + Heps2;
3765     VV    = Yt + Fp*eps;
3766     FF    = Fp + Geps + 2.0*Heps2;
3767
3768     *F         = -k*FF*tabscale;
3769     *V         = k*VV;
3770     dvdlambda  = (kB - kA)*VV;
3771
3772     return dvdlambda;
3773
3774     /* That was 22 flops */
3775 }
3776
3777 real tab_bonds(int nbonds,
3778                const t_iatom forceatoms[], const t_iparams forceparams[],
3779                const rvec x[], rvec f[], rvec fshift[],
3780                const t_pbc *pbc, const t_graph *g,
3781                real lambda, real *dvdlambda,
3782                const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3783                int gmx_unused  *global_atom_index)
3784 {
3785     int  i, m, ki, ai, aj, type, table;
3786     real dr, dr2, fbond, vbond, fij, vtot;
3787     rvec dx;
3788     ivec dt;
3789
3790     vtot = 0.0;
3791     for (i = 0; (i < nbonds); )
3792     {
3793         type = forceatoms[i++];
3794         ai   = forceatoms[i++];
3795         aj   = forceatoms[i++];
3796
3797         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3798         dr2  = iprod(dx, dx);                       /*   5              */
3799         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
3800
3801         table = forceparams[type].tab.table;
3802
3803         *dvdlambda += bonded_tab("bond", table,
3804                                  &fcd->bondtab[table],
3805                                  forceparams[type].tab.kA,
3806                                  forceparams[type].tab.kB,
3807                                  dr, lambda, &vbond, &fbond); /*  22 */
3808
3809         if (dr2 == 0.0)
3810         {
3811             continue;
3812         }
3813
3814
3815         vtot  += vbond;            /* 1*/
3816         fbond *= gmx_invsqrt(dr2); /*   6               */
3817 #ifdef DEBUG
3818         if (debug)
3819         {
3820             fprintf(debug, "TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3821                     dr, vbond, fbond);
3822         }
3823 #endif
3824         if (g)
3825         {
3826             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3827             ki = IVEC2IS(dt);
3828         }
3829         for (m = 0; (m < DIM); m++)     /*  15          */
3830         {
3831             fij                 = fbond*dx[m];
3832             f[ai][m]           += fij;
3833             f[aj][m]           -= fij;
3834             fshift[ki][m]      += fij;
3835             fshift[CENTRAL][m] -= fij;
3836         }
3837     }               /* 62 TOTAL */
3838     return vtot;
3839 }
3840
3841 real tab_angles(int nbonds,
3842                 const t_iatom forceatoms[], const t_iparams forceparams[],
3843                 const rvec x[], rvec f[], rvec fshift[],
3844                 const t_pbc *pbc, const t_graph *g,
3845                 real lambda, real *dvdlambda,
3846                 const t_mdatoms gmx_unused  *md, t_fcdata *fcd,
3847                 int gmx_unused *global_atom_index)
3848 {
3849     int  i, ai, aj, ak, t1, t2, type, table;
3850     rvec r_ij, r_kj;
3851     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3852     ivec jt, dt_ij, dt_kj;
3853
3854     vtot = 0.0;
3855     for (i = 0; (i < nbonds); )
3856     {
3857         type = forceatoms[i++];
3858         ai   = forceatoms[i++];
3859         aj   = forceatoms[i++];
3860         ak   = forceatoms[i++];
3861
3862         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
3863                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
3864
3865         table = forceparams[type].tab.table;
3866
3867         *dvdlambda += bonded_tab("angle", table,
3868                                  &fcd->angletab[table],
3869                                  forceparams[type].tab.kA,
3870                                  forceparams[type].tab.kB,
3871                                  theta, lambda, &va, &dVdt); /*  22  */
3872         vtot += va;
3873
3874         cos_theta2 = sqr(cos_theta);            /*   1          */
3875         if (cos_theta2 < 1)
3876         {
3877             int  m;
3878             real st, sth;
3879             real cik, cii, ckk;
3880             real nrkj2, nrij2;
3881             rvec f_i, f_j, f_k;
3882
3883             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
3884             sth = st*cos_theta;                     /*   1              */
3885 #ifdef DEBUG
3886             if (debug)
3887             {
3888                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
3889                         theta*RAD2DEG, va, dVdt);
3890             }
3891 #endif
3892             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
3893             nrij2 = iprod(r_ij, r_ij);
3894
3895             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
3896             cii = sth/nrij2;                   /*  10           */
3897             ckk = sth/nrkj2;                   /*  10           */
3898
3899             for (m = 0; (m < DIM); m++)        /*  39           */
3900             {
3901                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
3902                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
3903                 f_j[m]    = -f_i[m]-f_k[m];
3904                 f[ai][m] += f_i[m];
3905                 f[aj][m] += f_j[m];
3906                 f[ak][m] += f_k[m];
3907             }
3908             if (g)
3909             {
3910                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3911
3912                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3913                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3914                 t1 = IVEC2IS(dt_ij);
3915                 t2 = IVEC2IS(dt_kj);
3916             }
3917             rvec_inc(fshift[t1], f_i);
3918             rvec_inc(fshift[CENTRAL], f_j);
3919             rvec_inc(fshift[t2], f_k);
3920         }                                       /* 169 TOTAL    */
3921     }
3922     return vtot;
3923 }
3924
3925 real tab_dihs(int nbonds,
3926               const t_iatom forceatoms[], const t_iparams forceparams[],
3927               const rvec x[], rvec f[], rvec fshift[],
3928               const t_pbc *pbc, const t_graph *g,
3929               real lambda, real *dvdlambda,
3930               const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3931               int gmx_unused *global_atom_index)
3932 {
3933     int  i, type, ai, aj, ak, al, table;
3934     int  t1, t2, t3;
3935     rvec r_ij, r_kj, r_kl, m, n;
3936     real phi, sign, ddphi, vpd, vtot;
3937
3938     vtot = 0.0;
3939     for (i = 0; (i < nbonds); )
3940     {
3941         type = forceatoms[i++];
3942         ai   = forceatoms[i++];
3943         aj   = forceatoms[i++];
3944         ak   = forceatoms[i++];
3945         al   = forceatoms[i++];
3946
3947         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3948                         &sign, &t1, &t2, &t3);  /*  84  */
3949
3950         table = forceparams[type].tab.table;
3951
3952         /* Hopefully phi+M_PI never results in values < 0 */
3953         *dvdlambda += bonded_tab("dihedral", table,
3954                                  &fcd->dihtab[table],
3955                                  forceparams[type].tab.kA,
3956                                  forceparams[type].tab.kB,
3957                                  phi+M_PI, lambda, &vpd, &ddphi);
3958
3959         vtot += vpd;
3960         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3961                    f, fshift, pbc, g, x, t1, t2, t3); /* 112    */
3962
3963 #ifdef DEBUG
3964         fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
3965                 ai, aj, ak, al, phi);
3966 #endif
3967     } /* 227 TOTAL  */
3968
3969     return vtot;
3970 }
3971
3972 //! \endcond