src/gromacs/gmxlib/bondfree.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROningen Mixture of Alchemy and Childrens' Stories
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include "physics.h"
  42 #include "vec.h"
  43 #include "maths.h"
  44 #include "txtdump.h"
  45 #include "bondf.h"
  46 #include "smalloc.h"
  47 #include "pbc.h"
  48 #include "ns.h"
  49 #include "macros.h"
  50 #include "names.h"
  51 #include "gmx_fatal.h"
  52 #include "mshift.h"
  53 #include "main.h"
  54 #include "disre.h"
  55 #include "orires.h"
  56 #include "force.h"
  57 #include "nonbonded.h"
  58
  59 /* Include the SIMD macro file and then check for support */
  60 #include "gmx_simd_macros.h"
  61 #if defined GMX_HAVE_SIMD_MACROS && defined GMX_SIMD_HAVE_TRIGONOMETRIC
  62 #define SIMD_BONDEDS
  63 #include "gmx_simd_vec.h"
  64 #endif
  65
  66 /* Find a better place for this? */
  67 const int cmap_coeff_matrix[] = {
  68     1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4,
  69     0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
  70     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4,
  71     0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
  72     0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2,
  73     0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2,
  74     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
  75     0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
  76     0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2,
  77     0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
  78     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
  79     0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2,
  80     0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
  81     0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
  82     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
  83     0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
  84 };
  85
  86
  87
  88 int glatnr(int *global_atom_index, int i)
  89 {
  90     int atnr;
  91
  92     if (global_atom_index == NULL)
  93     {
  94         atnr = i + 1;
  95     }
  96     else
  97     {
  98         atnr = global_atom_index[i] + 1;
  99     }
 100
 101     return atnr;
 102 }
 103
 104 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
 105 {
 106     if (pbc)
 107     {
 108         return pbc_dx_aiuc(pbc, xi, xj, dx);
 109     }
 110     else
 111     {
 112         rvec_sub(xi, xj, dx);
 113         return CENTRAL;
 114     }
 115 }
 116
 117 #ifdef SIMD_BONDEDS
 118
 119 /* SIMD PBC data structure, containing 1/boxdiag and the box vectors */
 120 typedef struct {
 121     gmx_mm_pr inv_bzz;
 122     gmx_mm_pr inv_byy;
 123     gmx_mm_pr inv_bxx;
 124     gmx_mm_pr bzx;
 125     gmx_mm_pr bzy;
 126     gmx_mm_pr bzz;
 127     gmx_mm_pr byx;
 128     gmx_mm_pr byy;
 129     gmx_mm_pr bxx;
 130 } pbc_simd_t;
 131
 132 /* Set the SIMD pbc data from a normal t_pbc struct */
 133 static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd)
 134 {
 135     rvec inv_bdiag;
 136     int  d;
 137
 138     /* Setting inv_bdiag to 0 effectively turns off PBC */
 139     clear_rvec(inv_bdiag);
 140     if (pbc != NULL)
 141     {
 142         for (d = 0; d < pbc->ndim_ePBC; d++)
 143         {
 144             inv_bdiag[d] = 1.0/pbc->box[d][d];
 145         }
 146     }
 147
 148     pbc_simd->inv_bzz = gmx_set1_pr(inv_bdiag[ZZ]);
 149     pbc_simd->inv_byy = gmx_set1_pr(inv_bdiag[YY]);
 150     pbc_simd->inv_bxx = gmx_set1_pr(inv_bdiag[XX]);
 151
 152     if (pbc != NULL)
 153     {
 154         pbc_simd->bzx = gmx_set1_pr(pbc->box[ZZ][XX]);
 155         pbc_simd->bzy = gmx_set1_pr(pbc->box[ZZ][YY]);
 156         pbc_simd->bzz = gmx_set1_pr(pbc->box[ZZ][ZZ]);
 157         pbc_simd->byx = gmx_set1_pr(pbc->box[YY][XX]);
 158         pbc_simd->byy = gmx_set1_pr(pbc->box[YY][YY]);
 159         pbc_simd->bxx = gmx_set1_pr(pbc->box[XX][XX]);
 160     }
 161     else
 162     {
 163         pbc_simd->bzx = gmx_setzero_pr();
 164         pbc_simd->bzy = gmx_setzero_pr();
 165         pbc_simd->bzz = gmx_setzero_pr();
 166         pbc_simd->byx = gmx_setzero_pr();
 167         pbc_simd->byy = gmx_setzero_pr();
 168         pbc_simd->bxx = gmx_setzero_pr();
 169     }
 170 }
 171
 172 /* Correct distance vector *dx,*dy,*dz for PBC using SIMD */
 173 static gmx_inline void
 174 pbc_dx_simd(gmx_mm_pr *dx, gmx_mm_pr *dy, gmx_mm_pr *dz,
 175             const pbc_simd_t *pbc)
 176 {
 177     gmx_mm_pr sh;
 178
 179     sh  = gmx_round_pr(gmx_mul_pr(*dz, pbc->inv_bzz));
 180     *dx = gmx_nmsub_pr(sh, pbc->bzx, *dx);
 181     *dy = gmx_nmsub_pr(sh, pbc->bzy, *dy);
 182     *dz = gmx_nmsub_pr(sh, pbc->bzz, *dz);
 183
 184     sh  = gmx_round_pr(gmx_mul_pr(*dy, pbc->inv_byy));
 185     *dx = gmx_nmsub_pr(sh, pbc->byx, *dx);
 186     *dy = gmx_nmsub_pr(sh, pbc->byy, *dy);
 187
 188     sh  = gmx_round_pr(gmx_mul_pr(*dx, pbc->inv_bxx));
 189     *dx = gmx_nmsub_pr(sh, pbc->bxx, *dx);
 190 }
 191
 192 #endif /* SIMD_BONDEDS */
 193
 194 /*
 195  * Morse potential bond by Frank Everdij
 196  *
 197  * Three parameters needed:
 198  *
 199  * b0 = equilibrium distance in nm
 200  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 201  * cb = well depth in kJ/mol
 202  *
 203  * Note: the potential is referenced to be +cb at infinite separation
 204  *       and zero at the equilibrium distance!
 205  */
 206
 207 real morse_bonds(int nbonds,
 208                  const t_iatom forceatoms[], const t_iparams forceparams[],
 209                  const rvec x[], rvec f[], rvec fshift[],
 210                  const t_pbc *pbc, const t_graph *g,
 211                  real lambda, real *dvdlambda,
 212                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 213                  int gmx_unused *global_atom_index)
 214 {
 215     const real one = 1.0;
 216     const real two = 2.0;
 217     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, fij, vtot;
 218     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 219     rvec       dx;
 220     int        i, m, ki, type, ai, aj;
 221     ivec       dt;
 222
 223     vtot = 0.0;
 224     for (i = 0; (i < nbonds); )
 225     {
 226         type = forceatoms[i++];
 227         ai   = forceatoms[i++];
 228         aj   = forceatoms[i++];
 229
 230         b0A   = forceparams[type].morse.b0A;
 231         beA   = forceparams[type].morse.betaA;
 232         cbA   = forceparams[type].morse.cbA;
 233
 234         b0B   = forceparams[type].morse.b0B;
 235         beB   = forceparams[type].morse.betaB;
 236         cbB   = forceparams[type].morse.cbB;
 237
 238         L1 = one-lambda;                            /* 1 */
 239         b0 = L1*b0A + lambda*b0B;                   /* 3 */
 240         be = L1*beA + lambda*beB;                   /* 3 */
 241         cb = L1*cbA + lambda*cbB;                   /* 3 */
 242
 243         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 244         dr2  = iprod(dx, dx);                       /*   5          */
 245         dr   = dr2*gmx_invsqrt(dr2);                /*  10          */
 246         temp = exp(-be*(dr-b0));                    /*  12          */
 247
 248         if (temp == one)
 249         {
 250             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 251             *dvdlambda += cbB-cbA;
 252             continue;
 253         }
 254
 255         omtemp    = one-temp;                                                                                        /*   1          */
 256         cbomtemp  = cb*omtemp;                                                                                       /*   1          */
 257         vbond     = cbomtemp*omtemp;                                                                                 /*   1          */
 258         fbond     = -two*be*temp*cbomtemp*gmx_invsqrt(dr2);                                                          /*   9          */
 259         vtot     += vbond;                                                                                           /*   1          */
 260
 261         *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 262
 263         if (g)
 264         {
 265             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 266             ki = IVEC2IS(dt);
 267         }
 268
 269         for (m = 0; (m < DIM); m++)                    /*  15          */
 270         {
 271             fij                 = fbond*dx[m];
 272             f[ai][m]           += fij;
 273             f[aj][m]           -= fij;
 274             fshift[ki][m]      += fij;
 275             fshift[CENTRAL][m] -= fij;
 276         }
 277     }                                         /*  83 TOTAL    */
 278     return vtot;
 279 }
 280
 281 real cubic_bonds(int nbonds,
 282                  const t_iatom forceatoms[], const t_iparams forceparams[],
 283                  const rvec x[], rvec f[], rvec fshift[],
 284                  const t_pbc *pbc, const t_graph *g,
 285                  real gmx_unused lambda, real gmx_unused *dvdlambda,
 286                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 287                  int gmx_unused *global_atom_index)
 288 {
 289     const real three = 3.0;
 290     const real two   = 2.0;
 291     real       kb, b0, kcub;
 292     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, fij, vtot;
 293     rvec       dx;
 294     int        i, m, ki, type, ai, aj;
 295     ivec       dt;
 296
 297     vtot = 0.0;
 298     for (i = 0; (i < nbonds); )
 299     {
 300         type = forceatoms[i++];
 301         ai   = forceatoms[i++];
 302         aj   = forceatoms[i++];
 303
 304         b0   = forceparams[type].cubic.b0;
 305         kb   = forceparams[type].cubic.kb;
 306         kcub = forceparams[type].cubic.kcub;
 307
 308         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 309         dr2  = iprod(dx, dx);                           /*   5          */
 310
 311         if (dr2 == 0.0)
 312         {
 313             continue;
 314         }
 315
 316         dr         = dr2*gmx_invsqrt(dr2);                  /*  10          */
 317         dist       = dr-b0;
 318         kdist      = kb*dist;
 319         kdist2     = kdist*dist;
 320
 321         vbond      = kdist2 + kcub*kdist2*dist;
 322         fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 323
 324         vtot      += vbond;   /* 21 */
 325
 326         if (g)
 327         {
 328             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 329             ki = IVEC2IS(dt);
 330         }
 331         for (m = 0; (m < DIM); m++)                    /*  15          */
 332         {
 333             fij                 = fbond*dx[m];
 334             f[ai][m]           += fij;
 335             f[aj][m]           -= fij;
 336             fshift[ki][m]      += fij;
 337             fshift[CENTRAL][m] -= fij;
 338         }
 339     }                                         /*  54 TOTAL    */
 340     return vtot;
 341 }
 342
 343 real FENE_bonds(int nbonds,
 344                 const t_iatom forceatoms[], const t_iparams forceparams[],
 345                 const rvec x[], rvec f[], rvec fshift[],
 346                 const t_pbc *pbc, const t_graph *g,
 347                 real gmx_unused lambda, real gmx_unused *dvdlambda,
 348                 const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 349                 int *global_atom_index)
 350 {
 351     const real half = 0.5;
 352     const real one  = 1.0;
 353     real       bm, kb;
 354     real       dr, dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
 355     rvec       dx;
 356     int        i, m, ki, type, ai, aj;
 357     ivec       dt;
 358
 359     vtot = 0.0;
 360     for (i = 0; (i < nbonds); )
 361     {
 362         type = forceatoms[i++];
 363         ai   = forceatoms[i++];
 364         aj   = forceatoms[i++];
 365
 366         bm   = forceparams[type].fene.bm;
 367         kb   = forceparams[type].fene.kb;
 368
 369         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 370         dr2  = iprod(dx, dx);                           /*   5          */
 371
 372         if (dr2 == 0.0)
 373         {
 374             continue;
 375         }
 376
 377         bm2 = bm*bm;
 378
 379         if (dr2 >= bm2)
 380         {
 381             gmx_fatal(FARGS,
 382                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 383                       dr2, bm2,
 384                       glatnr(global_atom_index, ai),
 385                       glatnr(global_atom_index, aj));
 386         }
 387
 388         omdr2obm2  = one - dr2/bm2;
 389
 390         vbond      = -half*kb*bm2*log(omdr2obm2);
 391         fbond      = -kb/omdr2obm2;
 392
 393         vtot      += vbond;   /* 35 */
 394
 395         if (g)
 396         {
 397             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 398             ki = IVEC2IS(dt);
 399         }
 400         for (m = 0; (m < DIM); m++)                    /*  15          */
 401         {
 402             fij                 = fbond*dx[m];
 403             f[ai][m]           += fij;
 404             f[aj][m]           -= fij;
 405             fshift[ki][m]      += fij;
 406             fshift[CENTRAL][m] -= fij;
 407         }
 408     }                                         /*  58 TOTAL    */
 409     return vtot;
 410 }
 411
 412 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
 413               real *V, real *F)
 414 {
 415     const real half = 0.5;
 416     real       L1, kk, x0, dx, dx2;
 417     real       v, f, dvdlambda;
 418
 419     L1    = 1.0-lambda;
 420     kk    = L1*kA+lambda*kB;
 421     x0    = L1*xA+lambda*xB;
 422
 423     dx    = x-x0;
 424     dx2   = dx*dx;
 425
 426     f          = -kk*dx;
 427     v          = half*kk*dx2;
 428     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 429
 430     *F    = f;
 431     *V    = v;
 432
 433     return dvdlambda;
 434
 435     /* That was 19 flops */
 436 }
 437
 438
 439 real bonds(int nbonds,
 440            const t_iatom forceatoms[], const t_iparams forceparams[],
 441            const rvec x[], rvec f[], rvec fshift[],
 442            const t_pbc *pbc, const t_graph *g,
 443            real lambda, real *dvdlambda,
 444            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 445            int gmx_unused *global_atom_index)
 446 {
 447     int  i, m, ki, ai, aj, type;
 448     real dr, dr2, fbond, vbond, fij, vtot;
 449     rvec dx;
 450     ivec dt;
 451
 452     vtot = 0.0;
 453     for (i = 0; (i < nbonds); )
 454     {
 455         type = forceatoms[i++];
 456         ai   = forceatoms[i++];
 457         aj   = forceatoms[i++];
 458
 459         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 460         dr2  = iprod(dx, dx);                       /*   5              */
 461         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
 462
 463         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 464                                forceparams[type].harmonic.krB,
 465                                forceparams[type].harmonic.rA,
 466                                forceparams[type].harmonic.rB,
 467                                dr, lambda, &vbond, &fbond); /*  19  */
 468
 469         if (dr2 == 0.0)
 470         {
 471             continue;
 472         }
 473
 474
 475         vtot  += vbond;            /* 1*/
 476         fbond *= gmx_invsqrt(dr2); /*   6               */
 477 #ifdef DEBUG
 478         if (debug)
 479         {
 480             fprintf(debug, "BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 481                     dr, vbond, fbond);
 482         }
 483 #endif
 484         if (g)
 485         {
 486             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 487             ki = IVEC2IS(dt);
 488         }
 489         for (m = 0; (m < DIM); m++)     /*  15          */
 490         {
 491             fij                 = fbond*dx[m];
 492             f[ai][m]           += fij;
 493             f[aj][m]           -= fij;
 494             fshift[ki][m]      += fij;
 495             fshift[CENTRAL][m] -= fij;
 496         }
 497     }               /* 59 TOTAL */
 498     return vtot;
 499 }
 500
 501 real restraint_bonds(int nbonds,
 502                      const t_iatom forceatoms[], const t_iparams forceparams[],
 503                      const rvec x[], rvec f[], rvec fshift[],
 504                      const t_pbc *pbc, const t_graph *g,
 505                      real lambda, real *dvdlambda,
 506                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 507                      int gmx_unused *global_atom_index)
 508 {
 509     int  i, m, ki, ai, aj, type;
 510     real dr, dr2, fbond, vbond, fij, vtot;
 511     real L1;
 512     real low, dlow, up1, dup1, up2, dup2, k, dk;
 513     real drh, drh2;
 514     rvec dx;
 515     ivec dt;
 516
 517     L1   = 1.0 - lambda;
 518
 519     vtot = 0.0;
 520     for (i = 0; (i < nbonds); )
 521     {
 522         type = forceatoms[i++];
 523         ai   = forceatoms[i++];
 524         aj   = forceatoms[i++];
 525
 526         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 527         dr2  = iprod(dx, dx);                       /*   5              */
 528         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
 529
 530         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 531         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 532         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 533         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 534         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 535         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 536         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 537         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 538         /* 24 */
 539
 540         if (dr < low)
 541         {
 542             drh         = dr - low;
 543             drh2        = drh*drh;
 544             vbond       = 0.5*k*drh2;
 545             fbond       = -k*drh;
 546             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 547         } /* 11 */
 548         else if (dr <= up1)
 549         {
 550             vbond = 0;
 551             fbond = 0;
 552         }
 553         else if (dr <= up2)
 554         {
 555             drh         = dr - up1;
 556             drh2        = drh*drh;
 557             vbond       = 0.5*k*drh2;
 558             fbond       = -k*drh;
 559             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 560         } /* 11 */
 561         else
 562         {
 563             drh         = dr - up2;
 564             vbond       = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 565             fbond       = -k*(up2 - up1);
 566             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 567                 + k*(dup2 - dup1)*(up2 - up1 + drh)
 568                 - k*(up2 - up1)*dup2;
 569         }
 570
 571         if (dr2 == 0.0)
 572         {
 573             continue;
 574         }
 575
 576         vtot  += vbond;            /* 1*/
 577         fbond *= gmx_invsqrt(dr2); /*   6               */
 578 #ifdef DEBUG
 579         if (debug)
 580         {
 581             fprintf(debug, "BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 582                     dr, vbond, fbond);
 583         }
 584 #endif
 585         if (g)
 586         {
 587             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 588             ki = IVEC2IS(dt);
 589         }
 590         for (m = 0; (m < DIM); m++)             /*  15          */
 591         {
 592             fij                 = fbond*dx[m];
 593             f[ai][m]           += fij;
 594             f[aj][m]           -= fij;
 595             fshift[ki][m]      += fij;
 596             fshift[CENTRAL][m] -= fij;
 597         }
 598     }                   /* 59 TOTAL     */
 599
 600     return vtot;
 601 }
 602
 603 real polarize(int nbonds,
 604               const t_iatom forceatoms[], const t_iparams forceparams[],
 605               const rvec x[], rvec f[], rvec fshift[],
 606               const t_pbc *pbc, const t_graph *g,
 607               real lambda, real *dvdlambda,
 608               const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 609               int gmx_unused *global_atom_index)
 610 {
 611     int  i, m, ki, ai, aj, type;
 612     real dr, dr2, fbond, vbond, fij, vtot, ksh;
 613     rvec dx;
 614     ivec dt;
 615
 616     vtot = 0.0;
 617     for (i = 0; (i < nbonds); )
 618     {
 619         type = forceatoms[i++];
 620         ai   = forceatoms[i++];
 621         aj   = forceatoms[i++];
 622         ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 623         if (debug)
 624         {
 625             fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
 626         }
 627
 628         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 629         dr2  = iprod(dx, dx);                                               /*   5              */
 630         dr   = dr2*gmx_invsqrt(dr2);                                        /*  10              */
 631
 632         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 633
 634         if (dr2 == 0.0)
 635         {
 636             continue;
 637         }
 638
 639         vtot  += vbond;            /* 1*/
 640         fbond *= gmx_invsqrt(dr2); /*   6               */
 641
 642         if (g)
 643         {
 644             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 645             ki = IVEC2IS(dt);
 646         }
 647         for (m = 0; (m < DIM); m++)     /*  15          */
 648         {
 649             fij                 = fbond*dx[m];
 650             f[ai][m]           += fij;
 651             f[aj][m]           -= fij;
 652             fshift[ki][m]      += fij;
 653             fshift[CENTRAL][m] -= fij;
 654         }
 655     }               /* 59 TOTAL */
 656     return vtot;
 657 }
 658
 659 real anharm_polarize(int nbonds,
 660                      const t_iatom forceatoms[], const t_iparams forceparams[],
 661                      const rvec x[], rvec f[], rvec fshift[],
 662                      const t_pbc *pbc, const t_graph *g,
 663                      real lambda, real *dvdlambda,
 664                      const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 665                      int gmx_unused *global_atom_index)
 666 {
 667     int  i, m, ki, ai, aj, type;
 668     real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
 669     rvec dx;
 670     ivec dt;
 671
 672     vtot = 0.0;
 673     for (i = 0; (i < nbonds); )
 674     {
 675         type  = forceatoms[i++];
 676         ai    = forceatoms[i++];
 677         aj    = forceatoms[i++];
 678         ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 679         khyp  = forceparams[type].anharm_polarize.khyp;
 680         drcut = forceparams[type].anharm_polarize.drcut;
 681         if (debug)
 682         {
 683             fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
 684         }
 685
 686         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 687         dr2  = iprod(dx, dx);                                               /*   5              */
 688         dr   = dr2*gmx_invsqrt(dr2);                                        /*  10              */
 689
 690         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 691
 692         if (dr2 == 0.0)
 693         {
 694             continue;
 695         }
 696
 697         if (dr > drcut)
 698         {
 699             ddr    = dr-drcut;
 700             ddr3   = ddr*ddr*ddr;
 701             vbond += khyp*ddr*ddr3;
 702             fbond -= 4*khyp*ddr3;
 703         }
 704         fbond *= gmx_invsqrt(dr2); /*   6               */
 705         vtot  += vbond;            /* 1*/
 706
 707         if (g)
 708         {
 709             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 710             ki = IVEC2IS(dt);
 711         }
 712         for (m = 0; (m < DIM); m++)     /*  15          */
 713         {
 714             fij                 = fbond*dx[m];
 715             f[ai][m]           += fij;
 716             f[aj][m]           -= fij;
 717             fshift[ki][m]      += fij;
 718             fshift[CENTRAL][m] -= fij;
 719         }
 720     }               /* 72 TOTAL */
 721     return vtot;
 722 }
 723
 724 real water_pol(int nbonds,
 725                const t_iatom forceatoms[], const t_iparams forceparams[],
 726                const rvec x[], rvec f[], rvec gmx_unused fshift[],
 727                const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
 728                real gmx_unused lambda, real gmx_unused *dvdlambda,
 729                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 730                int gmx_unused *global_atom_index)
 731 {
 732     /* This routine implements anisotropic polarizibility for water, through
 733      * a shell connected to a dummy with spring constant that differ in the
 734      * three spatial dimensions in the molecular frame.
 735      */
 736     int  i, m, aO, aH1, aH2, aD, aS, type, type0;
 737     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 738 #ifdef DEBUG
 739     rvec df;
 740 #endif
 741     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 742
 743     vtot = 0.0;
 744     if (nbonds > 0)
 745     {
 746         type0  = forceatoms[0];
 747         aS     = forceatoms[5];
 748         qS     = md->chargeA[aS];
 749         kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 750         kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 751         kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 752         r_HH   = 1.0/forceparams[type0].wpol.rHH;
 753         r_OD   = 1.0/forceparams[type0].wpol.rOD;
 754         if (debug)
 755         {
 756             fprintf(debug, "WPOL: qS  = %10.5f aS = %5d\n", qS, aS);
 757             fprintf(debug, "WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
 758                     kk[XX], kk[YY], kk[ZZ]);
 759             fprintf(debug, "WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
 760                     forceparams[type0].wpol.rOH,
 761                     forceparams[type0].wpol.rHH,
 762                     forceparams[type0].wpol.rOD);
 763         }
 764         for (i = 0; (i < nbonds); i += 6)
 765         {
 766             type = forceatoms[i];
 767             if (type != type0)
 768             {
 769                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
 770                           type, type0, __FILE__, __LINE__);
 771             }
 772             aO   = forceatoms[i+1];
 773             aH1  = forceatoms[i+2];
 774             aH2  = forceatoms[i+3];
 775             aD   = forceatoms[i+4];
 776             aS   = forceatoms[i+5];
 777
 778             /* Compute vectors describing the water frame */
 779             rvec_sub(x[aH1], x[aO], dOH1);
 780             rvec_sub(x[aH2], x[aO], dOH2);
 781             rvec_sub(x[aH2], x[aH1], dHH);
 782             rvec_sub(x[aD], x[aO], dOD);
 783             rvec_sub(x[aS], x[aD], dDS);
 784             cprod(dOH1, dOH2, nW);
 785
 786             /* Compute inverse length of normal vector
 787              * (this one could be precomputed, but I'm too lazy now)
 788              */
 789             r_nW = gmx_invsqrt(iprod(nW, nW));
 790             /* This is for precision, but does not make a big difference,
 791              * it can go later.
 792              */
 793             r_OD = gmx_invsqrt(iprod(dOD, dOD));
 794
 795             /* Normalize the vectors in the water frame */
 796             svmul(r_nW, nW, nW);
 797             svmul(r_HH, dHH, dHH);
 798             svmul(r_OD, dOD, dOD);
 799
 800             /* Compute displacement of shell along components of the vector */
 801             dx[ZZ] = iprod(dDS, dOD);
 802             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 803             for (m = 0; (m < DIM); m++)
 804             {
 805                 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 806             }
 807
 808             /*dx[XX] = iprod(dDS,nW);
 809                dx[YY] = iprod(dDS,dHH);*/
 810             dx[XX] = iprod(proj, nW);
 811             for (m = 0; (m < DIM); m++)
 812             {
 813                 proj[m] -= dx[XX]*nW[m];
 814             }
 815             dx[YY] = iprod(proj, dHH);
 816             /*#define DEBUG*/
 817 #ifdef DEBUG
 818             if (debug)
 819             {
 820                 fprintf(debug, "WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
 821                         sqr(dx[XX]), sqr(dx[YY]), sqr(dx[ZZ]), iprod(dx, dx), iprod(dDS, dDS));
 822                 fprintf(debug, "WPOL: dHH=(%10g,%10g,%10g)\n", dHH[XX], dHH[YY], dHH[ZZ]);
 823                 fprintf(debug, "WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
 824                         dOD[XX], dOD[YY], dOD[ZZ], 1/r_OD);
 825                 fprintf(debug, "WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
 826                         nW[XX], nW[YY], nW[ZZ], 1/r_nW);
 827                 fprintf(debug, "WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
 828                         dx[XX], dx[YY], dx[ZZ]);
 829                 fprintf(debug, "WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
 830                         dDS[XX], dDS[YY], dDS[ZZ]);
 831             }
 832 #endif
 833             /* Now compute the forces and energy */
 834             kdx[XX] = kk[XX]*dx[XX];
 835             kdx[YY] = kk[YY]*dx[YY];
 836             kdx[ZZ] = kk[ZZ]*dx[ZZ];
 837             vtot   += iprod(dx, kdx);
 838             for (m = 0; (m < DIM); m++)
 839             {
 840                 /* This is a tensor operation but written out for speed */
 841                 tx        =  nW[m]*kdx[XX];
 842                 ty        = dHH[m]*kdx[YY];
 843                 tz        = dOD[m]*kdx[ZZ];
 844                 fij       = -tx-ty-tz;
 845 #ifdef DEBUG
 846                 df[m] = fij;
 847 #endif
 848                 f[aS][m] += fij;
 849                 f[aD][m] -= fij;
 850             }
 851 #ifdef DEBUG
 852             if (debug)
 853             {
 854                 fprintf(debug, "WPOL: vwpol=%g\n", 0.5*iprod(dx, kdx));
 855                 fprintf(debug, "WPOL: df = (%10g, %10g, %10g)\n", df[XX], df[YY], df[ZZ]);
 856             }
 857 #endif
 858         }
 859     }
 860     return 0.5*vtot;
 861 }
 862
 863 static real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
 864                        const t_pbc *pbc, real qq,
 865                        rvec fshift[], real afac)
 866 {
 867     rvec r12;
 868     real r12sq, r12_1, r12n, r12bar, v0, v1, fscal, ebar, fff;
 869     int  m, t;
 870
 871     t      = pbc_rvec_sub(pbc, xi, xj, r12);                      /*  3 */
 872
 873     r12sq  = iprod(r12, r12);                                     /*  5 */
 874     r12_1  = gmx_invsqrt(r12sq);                                  /*  5 */
 875     r12bar = afac/r12_1;                                          /*  5 */
 876     v0     = qq*ONE_4PI_EPS0*r12_1;                               /*  2 */
 877     ebar   = exp(-r12bar);                                        /*  5 */
 878     v1     = (1-(1+0.5*r12bar)*ebar);                             /*  4 */
 879     fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 880     if (debug)
 881     {
 882         fprintf(debug, "THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n", v0, v1, 1/r12_1, r12bar, fscal, ebar);
 883     }
 884
 885     for (m = 0; (m < DIM); m++)
 886     {
 887         fff                 = fscal*r12[m];
 888         fi[m]              += fff;
 889         fj[m]              -= fff;
 890         fshift[t][m]       += fff;
 891         fshift[CENTRAL][m] -= fff;
 892     }             /* 15 */
 893
 894     return v0*v1; /* 1 */
 895     /* 54 */
 896 }
 897
 898 real thole_pol(int nbonds,
 899                const t_iatom forceatoms[], const t_iparams forceparams[],
 900                const rvec x[], rvec f[], rvec fshift[],
 901                const t_pbc *pbc, const t_graph gmx_unused *g,
 902                real gmx_unused lambda, real gmx_unused *dvdlambda,
 903                const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 904                int gmx_unused *global_atom_index)
 905 {
 906     /* Interaction between two pairs of particles with opposite charge */
 907     int  i, type, a1, da1, a2, da2;
 908     real q1, q2, qq, a, al1, al2, afac;
 909     real V = 0;
 910
 911     for (i = 0; (i < nbonds); )
 912     {
 913         type  = forceatoms[i++];
 914         a1    = forceatoms[i++];
 915         da1   = forceatoms[i++];
 916         a2    = forceatoms[i++];
 917         da2   = forceatoms[i++];
 918         q1    = md->chargeA[da1];
 919         q2    = md->chargeA[da2];
 920         a     = forceparams[type].thole.a;
 921         al1   = forceparams[type].thole.alpha1;
 922         al2   = forceparams[type].thole.alpha2;
 923         qq    = q1*q2;
 924         afac  = a*pow(al1*al2, -1.0/6.0);
 925         V    += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 926         V    += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 927         V    += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 928         V    += do_1_thole(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 929     }
 930     /* 290 flops */
 931     return V;
 932 }
 933
 934 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
 935                 rvec r_ij, rvec r_kj, real *costh,
 936                 int *t1, int *t2)
 937 /* Return value is the angle between the bonds i-j and j-k */
 938 {
 939     /* 41 FLOPS */
 940     real th;
 941
 942     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 943     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 944
 945     *costh = cos_angle(r_ij, r_kj);        /* 25                */
 946     th     = acos(*costh);                 /* 10                */
 947     /* 41 TOTAL */
 948     return th;
 949 }
 950
 951 real angles(int nbonds,
 952             const t_iatom forceatoms[], const t_iparams forceparams[],
 953             const rvec x[], rvec f[], rvec fshift[],
 954             const t_pbc *pbc, const t_graph *g,
 955             real lambda, real *dvdlambda,
 956             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 957             int gmx_unused *global_atom_index)
 958 {
 959     int  i, ai, aj, ak, t1, t2, type;
 960     rvec r_ij, r_kj;
 961     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 962     ivec jt, dt_ij, dt_kj;
 963
 964     vtot = 0.0;
 965     for (i = 0; i < nbonds; )
 966     {
 967         type = forceatoms[i++];
 968         ai   = forceatoms[i++];
 969         aj   = forceatoms[i++];
 970         ak   = forceatoms[i++];
 971
 972         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
 973                             r_ij, r_kj, &cos_theta, &t1, &t2);  /*  41          */
 974
 975         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 976                                forceparams[type].harmonic.krB,
 977                                forceparams[type].harmonic.rA*DEG2RAD,
 978                                forceparams[type].harmonic.rB*DEG2RAD,
 979                                theta, lambda, &va, &dVdt);  /*  21  */
 980         vtot += va;
 981
 982         cos_theta2 = sqr(cos_theta);
 983         if (cos_theta2 < 1)
 984         {
 985             int  m;
 986             real st, sth;
 987             real cik, cii, ckk;
 988             real nrkj2, nrij2;
 989             real nrkj_1, nrij_1;
 990             rvec f_i, f_j, f_k;
 991
 992             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
 993             sth = st*cos_theta;                     /*   1              */
 994 #ifdef DEBUG
 995             if (debug)
 996             {
 997                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 998                         theta*RAD2DEG, va, dVdt);
 999             }
1000 #endif
1001             nrij2 = iprod(r_ij, r_ij);      /*   5              */
1002             nrkj2 = iprod(r_kj, r_kj);      /*   5              */
1003
1004             nrij_1 = gmx_invsqrt(nrij2);    /*  10              */
1005             nrkj_1 = gmx_invsqrt(nrkj2);    /*  10              */
1006
1007             cik = st*nrij_1*nrkj_1;         /*   2              */
1008             cii = sth*nrij_1*nrij_1;        /*   2              */
1009             ckk = sth*nrkj_1*nrkj_1;        /*   2              */
1010
1011             for (m = 0; m < DIM; m++)
1012             {           /*  39          */
1013                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
1014                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
1015                 f_j[m]    = -f_i[m] - f_k[m];
1016                 f[ai][m] += f_i[m];
1017                 f[aj][m] += f_j[m];
1018                 f[ak][m] += f_k[m];
1019             }
1020             if (g != NULL)
1021             {
1022                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1023
1024                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1025                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1026                 t1 = IVEC2IS(dt_ij);
1027                 t2 = IVEC2IS(dt_kj);
1028             }
1029             rvec_inc(fshift[t1], f_i);
1030             rvec_inc(fshift[CENTRAL], f_j);
1031             rvec_inc(fshift[t2], f_k);
1032         }                                           /* 161 TOTAL        */
1033     }
1034
1035     return vtot;
1036 }
1037
1038 #ifdef SIMD_BONDEDS
1039
1040 /* As angles, but using SIMD to calculate many dihedrals at once.
1041  * This routines does not calculate energies and shift forces.
1042  */
1043 static gmx_inline void
1044 angles_noener_simd(int nbonds,
1045                    const t_iatom forceatoms[], const t_iparams forceparams[],
1046                    const rvec x[], rvec f[],
1047                    const t_pbc *pbc, const t_graph gmx_unused *g,
1048                    real gmx_unused lambda,
1049                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1050                    int gmx_unused *global_atom_index)
1051 {
1052 #define UNROLL GMX_SIMD_WIDTH_HERE
1053     const int      nfa1 = 4;
1054     int            i, iu, s, m;
1055     int            type, ai[UNROLL], aj[UNROLL], ak[UNROLL];
1056     real           coeff_array[2*UNROLL+UNROLL], *coeff;
1057     real           dr_array[2*DIM*UNROLL+UNROLL], *dr;
1058     real           f_buf_array[6*UNROLL+UNROLL], *f_buf;
1059     gmx_mm_pr      k_S, theta0_S;
1060     gmx_mm_pr      rijx_S, rijy_S, rijz_S;
1061     gmx_mm_pr      rkjx_S, rkjy_S, rkjz_S;
1062     gmx_mm_pr      one_S;
1063     gmx_mm_pr      rij_rkj_S;
1064     gmx_mm_pr      nrij2_S, nrij_1_S;
1065     gmx_mm_pr      nrkj2_S, nrkj_1_S;
1066     gmx_mm_pr      cos_S, sin_S;
1067     gmx_mm_pr      theta_S;
1068     gmx_mm_pr      st_S, sth_S;
1069     gmx_mm_pr      cik_S, cii_S, ckk_S;
1070     gmx_mm_pr      f_ix_S, f_iy_S, f_iz_S;
1071     gmx_mm_pr      f_kx_S, f_ky_S, f_kz_S;
1072     pbc_simd_t     pbc_simd;
1073
1074     /* Ensure register memory alignment */
1075     coeff = gmx_simd_align_real(coeff_array);
1076     dr    = gmx_simd_align_real(dr_array);
1077     f_buf = gmx_simd_align_real(f_buf_array);
1078
1079     set_pbc_simd(pbc, &pbc_simd);
1080
1081     one_S = gmx_set1_pr(1.0);
1082
1083     /* nbonds is the number of angles times nfa1, here we step UNROLL angles */
1084     for (i = 0; (i < nbonds); i += UNROLL*nfa1)
1085     {
1086         /* Collect atoms for UNROLL angles.
1087          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1088          */
1089         iu = i;
1090         for (s = 0; s < UNROLL; s++)
1091         {
1092             type  = forceatoms[iu];
1093             ai[s] = forceatoms[iu+1];
1094             aj[s] = forceatoms[iu+2];
1095             ak[s] = forceatoms[iu+3];
1096
1097             coeff[s]        = forceparams[type].harmonic.krA;
1098             coeff[UNROLL+s] = forceparams[type].harmonic.rA*DEG2RAD;
1099
1100             /* If you can't use pbc_dx_simd below for PBC, e.g. because
1101              * you can't round in SIMD, use pbc_rvec_sub here.
1102              */
1103             /* Store the non PBC corrected distances packed and aligned */
1104             for (m = 0; m < DIM; m++)
1105             {
1106                 dr[s +      m *UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1107                 dr[s + (DIM+m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1108             }
1109
1110             /* At the end fill the arrays with identical entries */
1111             if (iu + nfa1 < nbonds)
1112             {
1113                 iu += nfa1;
1114             }
1115         }
1116
1117         k_S       = gmx_load_pr(coeff);
1118         theta0_S  = gmx_load_pr(coeff+UNROLL);
1119
1120         rijx_S    = gmx_load_pr(dr + 0*UNROLL);
1121         rijy_S    = gmx_load_pr(dr + 1*UNROLL);
1122         rijz_S    = gmx_load_pr(dr + 2*UNROLL);
1123         rkjx_S    = gmx_load_pr(dr + 3*UNROLL);
1124         rkjy_S    = gmx_load_pr(dr + 4*UNROLL);
1125         rkjz_S    = gmx_load_pr(dr + 5*UNROLL);
1126
1127         pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, &pbc_simd);
1128         pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, &pbc_simd);
1129
1130         rij_rkj_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1131                                  rkjx_S, rkjy_S, rkjz_S);
1132
1133         nrij2_S   = gmx_norm2_pr(rijx_S, rijy_S, rijz_S);
1134         nrkj2_S   = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1135
1136         nrij_1_S  = gmx_invsqrt_pr(nrij2_S);
1137         nrkj_1_S  = gmx_invsqrt_pr(nrkj2_S);
1138
1139         cos_S     = gmx_mul_pr(rij_rkj_S, gmx_mul_pr(nrij_1_S, nrkj_1_S));
1140
1141         theta_S   = gmx_acos_pr(cos_S);
1142
1143         sin_S     = gmx_invsqrt_pr(gmx_max_pr(gmx_sub_pr(one_S, gmx_mul_pr(cos_S, cos_S)),
1144                                               gmx_setzero_pr()));
1145         st_S      = gmx_mul_pr(gmx_mul_pr(k_S, gmx_sub_pr(theta0_S, theta_S)),
1146                                sin_S);
1147         sth_S     = gmx_mul_pr(st_S, cos_S);
1148
1149         cik_S     = gmx_mul_pr(st_S,  gmx_mul_pr(nrij_1_S, nrkj_1_S));
1150         cii_S     = gmx_mul_pr(sth_S, gmx_mul_pr(nrij_1_S, nrij_1_S));
1151         ckk_S     = gmx_mul_pr(sth_S, gmx_mul_pr(nrkj_1_S, nrkj_1_S));
1152
1153         f_ix_S    = gmx_mul_pr(cii_S, rijx_S);
1154         f_ix_S    = gmx_nmsub_pr(cik_S, rkjx_S, f_ix_S);
1155         f_iy_S    = gmx_mul_pr(cii_S, rijy_S);
1156         f_iy_S    = gmx_nmsub_pr(cik_S, rkjy_S, f_iy_S);
1157         f_iz_S    = gmx_mul_pr(cii_S, rijz_S);
1158         f_iz_S    = gmx_nmsub_pr(cik_S, rkjz_S, f_iz_S);
1159         f_kx_S    = gmx_mul_pr(ckk_S, rkjx_S);
1160         f_kx_S    = gmx_nmsub_pr(cik_S, rijx_S, f_kx_S);
1161         f_ky_S    = gmx_mul_pr(ckk_S, rkjy_S);
1162         f_ky_S    = gmx_nmsub_pr(cik_S, rijy_S, f_ky_S);
1163         f_kz_S    = gmx_mul_pr(ckk_S, rkjz_S);
1164         f_kz_S    = gmx_nmsub_pr(cik_S, rijz_S, f_kz_S);
1165
1166         gmx_store_pr(f_buf + 0*UNROLL, f_ix_S);
1167         gmx_store_pr(f_buf + 1*UNROLL, f_iy_S);
1168         gmx_store_pr(f_buf + 2*UNROLL, f_iz_S);
1169         gmx_store_pr(f_buf + 3*UNROLL, f_kx_S);
1170         gmx_store_pr(f_buf + 4*UNROLL, f_ky_S);
1171         gmx_store_pr(f_buf + 5*UNROLL, f_kz_S);
1172
1173         iu = i;
1174         s  = 0;
1175         do
1176         {
1177             for (m = 0; m < DIM; m++)
1178             {
1179                 f[ai[s]][m] += f_buf[s + m*UNROLL];
1180                 f[aj[s]][m] -= f_buf[s + m*UNROLL] + f_buf[s + (DIM+m)*UNROLL];
1181                 f[ak[s]][m] += f_buf[s + (DIM+m)*UNROLL];
1182             }
1183             s++;
1184             iu += nfa1;
1185         }
1186         while (s < UNROLL && iu < nbonds);
1187     }
1188 #undef UNROLL
1189 }
1190
1191 #endif /* SIMD_BONDEDS */
1192
1193 real linear_angles(int nbonds,
1194                    const t_iatom forceatoms[], const t_iparams forceparams[],
1195                    const rvec x[], rvec f[], rvec fshift[],
1196                    const t_pbc *pbc, const t_graph *g,
1197                    real lambda, real *dvdlambda,
1198                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1199                    int gmx_unused *global_atom_index)
1200 {
1201     int  i, m, ai, aj, ak, t1, t2, type;
1202     rvec f_i, f_j, f_k;
1203     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1204     ivec jt, dt_ij, dt_kj;
1205     rvec r_ij, r_kj, r_ik, dx;
1206
1207     L1   = 1-lambda;
1208     vtot = 0.0;
1209     for (i = 0; (i < nbonds); )
1210     {
1211         type = forceatoms[i++];
1212         ai   = forceatoms[i++];
1213         aj   = forceatoms[i++];
1214         ak   = forceatoms[i++];
1215
1216         kA   = forceparams[type].linangle.klinA;
1217         kB   = forceparams[type].linangle.klinB;
1218         klin = L1*kA + lambda*kB;
1219
1220         aA   = forceparams[type].linangle.aA;
1221         aB   = forceparams[type].linangle.aB;
1222         a    = L1*aA+lambda*aB;
1223         b    = 1-a;
1224
1225         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1226         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1227         rvec_sub(r_ij, r_kj, r_ik);
1228
1229         dr2 = 0;
1230         for (m = 0; (m < DIM); m++)
1231         {
1232             dr        = -a * r_ij[m] - b * r_kj[m];
1233             dr2      += dr*dr;
1234             dx[m]     = dr;
1235             f_i[m]    = a*klin*dr;
1236             f_k[m]    = b*klin*dr;
1237             f_j[m]    = -(f_i[m]+f_k[m]);
1238             f[ai][m] += f_i[m];
1239             f[aj][m] += f_j[m];
1240             f[ak][m] += f_k[m];
1241         }
1242         va          = 0.5*klin*dr2;
1243         *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1244
1245         vtot += va;
1246
1247         if (g)
1248         {
1249             copy_ivec(SHIFT_IVEC(g, aj), jt);
1250
1251             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1252             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1253             t1 = IVEC2IS(dt_ij);
1254             t2 = IVEC2IS(dt_kj);
1255         }
1256         rvec_inc(fshift[t1], f_i);
1257         rvec_inc(fshift[CENTRAL], f_j);
1258         rvec_inc(fshift[t2], f_k);
1259     }                                         /* 57 TOTAL       */
1260     return vtot;
1261 }
1262
1263 real urey_bradley(int nbonds,
1264                   const t_iatom forceatoms[], const t_iparams forceparams[],
1265                   const rvec x[], rvec f[], rvec fshift[],
1266                   const t_pbc *pbc, const t_graph *g,
1267                   real lambda, real *dvdlambda,
1268                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1269                   int gmx_unused *global_atom_index)
1270 {
1271     int  i, m, ai, aj, ak, t1, t2, type, ki;
1272     rvec r_ij, r_kj, r_ik;
1273     real cos_theta, cos_theta2, theta;
1274     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1275     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1276     ivec jt, dt_ij, dt_kj, dt_ik;
1277
1278     vtot = 0.0;
1279     for (i = 0; (i < nbonds); )
1280     {
1281         type  = forceatoms[i++];
1282         ai    = forceatoms[i++];
1283         aj    = forceatoms[i++];
1284         ak    = forceatoms[i++];
1285         th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
1286         kthA  = forceparams[type].u_b.kthetaA;
1287         r13A  = forceparams[type].u_b.r13A;
1288         kUBA  = forceparams[type].u_b.kUBA;
1289         th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
1290         kthB  = forceparams[type].u_b.kthetaB;
1291         r13B  = forceparams[type].u_b.r13B;
1292         kUBB  = forceparams[type].u_b.kUBB;
1293
1294         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1295                             r_ij, r_kj, &cos_theta, &t1, &t2);                     /*  41               */
1296
1297         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1298         vtot       += va;
1299
1300         ki   = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);                               /*   3      */
1301         dr2  = iprod(r_ik, r_ik);                                                   /*   5              */
1302         dr   = dr2*gmx_invsqrt(dr2);                                                /*  10              */
1303
1304         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1305
1306         cos_theta2 = sqr(cos_theta);                                                /*   1              */
1307         if (cos_theta2 < 1)
1308         {
1309             real st, sth;
1310             real cik, cii, ckk;
1311             real nrkj2, nrij2;
1312             rvec f_i, f_j, f_k;
1313
1314             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
1315             sth = st*cos_theta;                     /*   1              */
1316 #ifdef DEBUG
1317             if (debug)
1318             {
1319                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1320                         theta*RAD2DEG, va, dVdt);
1321             }
1322 #endif
1323             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
1324             nrij2 = iprod(r_ij, r_ij);
1325
1326             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
1327             cii = sth/nrij2;                   /*  10           */
1328             ckk = sth/nrkj2;                   /*  10           */
1329
1330             for (m = 0; (m < DIM); m++)        /*  39           */
1331             {
1332                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1333                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1334                 f_j[m]    = -f_i[m]-f_k[m];
1335                 f[ai][m] += f_i[m];
1336                 f[aj][m] += f_j[m];
1337                 f[ak][m] += f_k[m];
1338             }
1339             if (g)
1340             {
1341                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1342
1343                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1344                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1345                 t1 = IVEC2IS(dt_ij);
1346                 t2 = IVEC2IS(dt_kj);
1347             }
1348             rvec_inc(fshift[t1], f_i);
1349             rvec_inc(fshift[CENTRAL], f_j);
1350             rvec_inc(fshift[t2], f_k);
1351         }                                       /* 161 TOTAL    */
1352         /* Time for the bond calculations */
1353         if (dr2 == 0.0)
1354         {
1355             continue;
1356         }
1357
1358         vtot  += vbond;            /* 1*/
1359         fbond *= gmx_invsqrt(dr2); /*   6               */
1360
1361         if (g)
1362         {
1363             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1364             ki = IVEC2IS(dt_ik);
1365         }
1366         for (m = 0; (m < DIM); m++)     /*  15          */
1367         {
1368             fik                 = fbond*r_ik[m];
1369             f[ai][m]           += fik;
1370             f[ak][m]           -= fik;
1371             fshift[ki][m]      += fik;
1372             fshift[CENTRAL][m] -= fik;
1373         }
1374     }
1375     return vtot;
1376 }
1377
1378 real quartic_angles(int nbonds,
1379                     const t_iatom forceatoms[], const t_iparams forceparams[],
1380                     const rvec x[], rvec f[], rvec fshift[],
1381                     const t_pbc *pbc, const t_graph *g,
1382                     real gmx_unused lambda, real gmx_unused *dvdlambda,
1383                     const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1384                     int gmx_unused *global_atom_index)
1385 {
1386     int  i, j, ai, aj, ak, t1, t2, type;
1387     rvec r_ij, r_kj;
1388     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1389     ivec jt, dt_ij, dt_kj;
1390
1391     vtot = 0.0;
1392     for (i = 0; (i < nbonds); )
1393     {
1394         type = forceatoms[i++];
1395         ai   = forceatoms[i++];
1396         aj   = forceatoms[i++];
1397         ak   = forceatoms[i++];
1398
1399         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1400                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
1401
1402         dt = theta - forceparams[type].qangle.theta*DEG2RAD;   /* 2          */
1403
1404         dVdt = 0;
1405         va   = forceparams[type].qangle.c[0];
1406         dtp  = 1.0;
1407         for (j = 1; j <= 4; j++)
1408         {
1409             c     = forceparams[type].qangle.c[j];
1410             dVdt -= j*c*dtp;
1411             dtp  *= dt;
1412             va   += c*dtp;
1413         }
1414         /* 20 */
1415
1416         vtot += va;
1417
1418         cos_theta2 = sqr(cos_theta);            /*   1          */
1419         if (cos_theta2 < 1)
1420         {
1421             int  m;
1422             real st, sth;
1423             real cik, cii, ckk;
1424             real nrkj2, nrij2;
1425             rvec f_i, f_j, f_k;
1426
1427             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
1428             sth = st*cos_theta;                     /*   1              */
1429 #ifdef DEBUG
1430             if (debug)
1431             {
1432                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1433                         theta*RAD2DEG, va, dVdt);
1434             }
1435 #endif
1436             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
1437             nrij2 = iprod(r_ij, r_ij);
1438
1439             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
1440             cii = sth/nrij2;                   /*  10           */
1441             ckk = sth/nrkj2;                   /*  10           */
1442
1443             for (m = 0; (m < DIM); m++)        /*  39           */
1444             {
1445                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1446                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1447                 f_j[m]    = -f_i[m]-f_k[m];
1448                 f[ai][m] += f_i[m];
1449                 f[aj][m] += f_j[m];
1450                 f[ak][m] += f_k[m];
1451             }
1452             if (g)
1453             {
1454                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1455
1456                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1457                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1458                 t1 = IVEC2IS(dt_ij);
1459                 t2 = IVEC2IS(dt_kj);
1460             }
1461             rvec_inc(fshift[t1], f_i);
1462             rvec_inc(fshift[CENTRAL], f_j);
1463             rvec_inc(fshift[t2], f_k);
1464         }                                       /* 153 TOTAL    */
1465     }
1466     return vtot;
1467 }
1468
1469 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
1470                const t_pbc *pbc,
1471                rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
1472                real *sign, int *t1, int *t2, int *t3)
1473 {
1474     real ipr, phi;
1475
1476     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
1477     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
1478     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
1479
1480     cprod(r_ij, r_kj, m);                  /*  9        */
1481     cprod(r_kj, r_kl, n);                  /*  9                */
1482     phi     = gmx_angle(m, n);             /* 49 (assuming 25 for atan2) */
1483     ipr     = iprod(r_ij, n);              /*  5        */
1484     (*sign) = (ipr < 0.0) ? -1.0 : 1.0;
1485     phi     = (*sign)*phi;                 /*  1                */
1486     /* 82 TOTAL */
1487     return phi;
1488 }
1489
1490
1491 #ifdef SIMD_BONDEDS
1492
1493 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1494  * also calculates the pre-factor required for the dihedral force update.
1495  * Note that bv and buf should be register aligned.
1496  */
1497 static gmx_inline void
1498 dih_angle_simd(const rvec *x,
1499                const int *ai, const int *aj, const int *ak, const int *al,
1500                const pbc_simd_t *pbc,
1501                real *dr,
1502                gmx_mm_pr *phi_S,
1503                gmx_mm_pr *mx_S, gmx_mm_pr *my_S, gmx_mm_pr *mz_S,
1504                gmx_mm_pr *nx_S, gmx_mm_pr *ny_S, gmx_mm_pr *nz_S,
1505                gmx_mm_pr *nrkj_m2_S,
1506                gmx_mm_pr *nrkj_n2_S,
1507                real *p,
1508                real *q)
1509 {
1510 #define UNROLL GMX_SIMD_WIDTH_HERE
1511     int       s, m;
1512     gmx_mm_pr rijx_S, rijy_S, rijz_S;
1513     gmx_mm_pr rkjx_S, rkjy_S, rkjz_S;
1514     gmx_mm_pr rklx_S, rkly_S, rklz_S;
1515     gmx_mm_pr cx_S, cy_S, cz_S;
1516     gmx_mm_pr cn_S;
1517     gmx_mm_pr s_S;
1518     gmx_mm_pr ipr_S;
1519     gmx_mm_pr iprm_S, iprn_S;
1520     gmx_mm_pr nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1521     gmx_mm_pr p_S, q_S;
1522     gmx_mm_pr fmin_S = gmx_set1_pr(GMX_FLOAT_MIN);
1523
1524     for (s = 0; s < UNROLL; s++)
1525     {
1526         /* If you can't use pbc_dx_simd below for PBC, e.g. because
1527          * you can't round in SIMD, use pbc_rvec_sub here.
1528          */
1529         for (m = 0; m < DIM; m++)
1530         {
1531             dr[s + (0*DIM + m)*UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1532             dr[s + (1*DIM + m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1533             dr[s + (2*DIM + m)*UNROLL] = x[ak[s]][m] - x[al[s]][m];
1534         }
1535     }
1536
1537     rijx_S = gmx_load_pr(dr + 0*UNROLL);
1538     rijy_S = gmx_load_pr(dr + 1*UNROLL);
1539     rijz_S = gmx_load_pr(dr + 2*UNROLL);
1540     rkjx_S = gmx_load_pr(dr + 3*UNROLL);
1541     rkjy_S = gmx_load_pr(dr + 4*UNROLL);
1542     rkjz_S = gmx_load_pr(dr + 5*UNROLL);
1543     rklx_S = gmx_load_pr(dr + 6*UNROLL);
1544     rkly_S = gmx_load_pr(dr + 7*UNROLL);
1545     rklz_S = gmx_load_pr(dr + 8*UNROLL);
1546
1547     pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc);
1548     pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc);
1549     pbc_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc);
1550
1551     gmx_cprod_pr(rijx_S, rijy_S, rijz_S,
1552                  rkjx_S, rkjy_S, rkjz_S,
1553                  mx_S, my_S, mz_S);
1554
1555     gmx_cprod_pr(rkjx_S, rkjy_S, rkjz_S,
1556                  rklx_S, rkly_S, rklz_S,
1557                  nx_S, ny_S, nz_S);
1558
1559     gmx_cprod_pr(*mx_S, *my_S, *mz_S,
1560                  *nx_S, *ny_S, *nz_S,
1561                  &cx_S, &cy_S, &cz_S);
1562
1563     cn_S       = gmx_sqrt_pr(gmx_norm2_pr(cx_S, cy_S, cz_S));
1564
1565     s_S        = gmx_iprod_pr(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1566
1567     /* Determine the dihedral angle, the sign might need correction */
1568     *phi_S     = gmx_atan2_pr(cn_S, s_S);
1569
1570     ipr_S      = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1571                               *nx_S, *ny_S, *nz_S);
1572
1573     iprm_S     = gmx_norm2_pr(*mx_S, *my_S, *mz_S);
1574     iprn_S     = gmx_norm2_pr(*nx_S, *ny_S, *nz_S);
1575
1576     nrkj2_S    = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1577
1578     /* Avoid division by zero. When zero, the result is multiplied by 0
1579      * anyhow, so the 3 max below do not affect the final result.
1580      */
1581     nrkj2_S    = gmx_max_pr(nrkj2_S, fmin_S);
1582     nrkj_1_S   = gmx_invsqrt_pr(nrkj2_S);
1583     nrkj_2_S   = gmx_mul_pr(nrkj_1_S, nrkj_1_S);
1584     nrkj_S     = gmx_mul_pr(nrkj2_S, nrkj_1_S);
1585
1586     iprm_S     = gmx_max_pr(iprm_S, fmin_S);
1587     iprn_S     = gmx_max_pr(iprn_S, fmin_S);
1588     *nrkj_m2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprm_S));
1589     *nrkj_n2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprn_S));
1590
1591     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1592     *phi_S     = gmx_cpsgn_nonneg_pr(ipr_S, *phi_S);
1593
1594     p_S        = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1595                               rkjx_S, rkjy_S, rkjz_S);
1596     p_S        = gmx_mul_pr(p_S, nrkj_2_S);
1597
1598     q_S        = gmx_iprod_pr(rklx_S, rkly_S, rklz_S,
1599                               rkjx_S, rkjy_S, rkjz_S);
1600     q_S        = gmx_mul_pr(q_S, nrkj_2_S);
1601
1602     gmx_store_pr(p, p_S);
1603     gmx_store_pr(q, q_S);
1604 #undef UNROLL
1605 }
1606
1607 #endif /* SIMD_BONDEDS */
1608
1609
1610 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1611                 rvec r_ij, rvec r_kj, rvec r_kl,
1612                 rvec m, rvec n, rvec f[], rvec fshift[],
1613                 const t_pbc *pbc, const t_graph *g,
1614                 const rvec x[], int t1, int t2, int t3)
1615 {
1616     /* 143 FLOPS */
1617     rvec f_i, f_j, f_k, f_l;
1618     rvec uvec, vvec, svec, dx_jl;
1619     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1620     real a, b, p, q, toler;
1621     ivec jt, dt_ij, dt_kj, dt_lj;
1622
1623     iprm  = iprod(m, m);       /*  5    */
1624     iprn  = iprod(n, n);       /*  5    */
1625     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1626     toler = nrkj2*GMX_REAL_EPS;
1627     if ((iprm > toler) && (iprn > toler))
1628     {
1629         nrkj_1 = gmx_invsqrt(nrkj2); /* 10      */
1630         nrkj_2 = nrkj_1*nrkj_1;      /*  1      */
1631         nrkj   = nrkj2*nrkj_1;       /*  1      */
1632         a      = -ddphi*nrkj/iprm;   /* 11      */
1633         svmul(a, m, f_i);            /*  3      */
1634         b     = ddphi*nrkj/iprn;     /* 11      */
1635         svmul(b, n, f_l);            /*  3  */
1636         p     = iprod(r_ij, r_kj);   /*  5      */
1637         p    *= nrkj_2;              /*  1      */
1638         q     = iprod(r_kl, r_kj);   /*  5      */
1639         q    *= nrkj_2;              /*  1      */
1640         svmul(p, f_i, uvec);         /*  3      */
1641         svmul(q, f_l, vvec);         /*  3      */
1642         rvec_sub(uvec, vvec, svec);  /*  3      */
1643         rvec_sub(f_i, svec, f_j);    /*  3      */
1644         rvec_add(f_l, svec, f_k);    /*  3      */
1645         rvec_inc(f[i], f_i);         /*  3      */
1646         rvec_dec(f[j], f_j);         /*  3      */
1647         rvec_dec(f[k], f_k);         /*  3      */
1648         rvec_inc(f[l], f_l);         /*  3      */
1649
1650         if (g)
1651         {
1652             copy_ivec(SHIFT_IVEC(g, j), jt);
1653             ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1654             ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1655             ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1656             t1 = IVEC2IS(dt_ij);
1657             t2 = IVEC2IS(dt_kj);
1658             t3 = IVEC2IS(dt_lj);
1659         }
1660         else if (pbc)
1661         {
1662             t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1663         }
1664         else
1665         {
1666             t3 = CENTRAL;
1667         }
1668
1669         rvec_inc(fshift[t1], f_i);
1670         rvec_dec(fshift[CENTRAL], f_j);
1671         rvec_dec(fshift[t2], f_k);
1672         rvec_inc(fshift[t3], f_l);
1673     }
1674     /* 112 TOTAL    */
1675 }
1676
1677 /* As do_dih_fup above, but without shift forces */
1678 static void
1679 do_dih_fup_noshiftf(int i, int j, int k, int l, real ddphi,
1680                     rvec r_ij, rvec r_kj, rvec r_kl,
1681                     rvec m, rvec n, rvec f[])
1682 {
1683     rvec f_i, f_j, f_k, f_l;
1684     rvec uvec, vvec, svec, dx_jl;
1685     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1686     real a, b, p, q, toler;
1687     ivec jt, dt_ij, dt_kj, dt_lj;
1688
1689     iprm  = iprod(m, m);       /*  5    */
1690     iprn  = iprod(n, n);       /*  5    */
1691     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1692     toler = nrkj2*GMX_REAL_EPS;
1693     if ((iprm > toler) && (iprn > toler))
1694     {
1695         nrkj_1 = gmx_invsqrt(nrkj2); /* 10      */
1696         nrkj_2 = nrkj_1*nrkj_1;      /*  1      */
1697         nrkj   = nrkj2*nrkj_1;       /*  1      */
1698         a      = -ddphi*nrkj/iprm;   /* 11      */
1699         svmul(a, m, f_i);            /*  3      */
1700         b     = ddphi*nrkj/iprn;     /* 11      */
1701         svmul(b, n, f_l);            /*  3  */
1702         p     = iprod(r_ij, r_kj);   /*  5      */
1703         p    *= nrkj_2;              /*  1      */
1704         q     = iprod(r_kl, r_kj);   /*  5      */
1705         q    *= nrkj_2;              /*  1      */
1706         svmul(p, f_i, uvec);         /*  3      */
1707         svmul(q, f_l, vvec);         /*  3      */
1708         rvec_sub(uvec, vvec, svec);  /*  3      */
1709         rvec_sub(f_i, svec, f_j);    /*  3      */
1710         rvec_add(f_l, svec, f_k);    /*  3      */
1711         rvec_inc(f[i], f_i);         /*  3      */
1712         rvec_dec(f[j], f_j);         /*  3      */
1713         rvec_dec(f[k], f_k);         /*  3      */
1714         rvec_inc(f[l], f_l);         /*  3      */
1715     }
1716 }
1717
1718 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1719 static gmx_inline void
1720 do_dih_fup_noshiftf_precalc(int i, int j, int k, int l,
1721                             real p, real q,
1722                             real f_i_x, real f_i_y, real f_i_z,
1723                             real mf_l_x, real mf_l_y, real mf_l_z,
1724                             rvec f[])
1725 {
1726     rvec f_i, f_j, f_k, f_l;
1727     rvec uvec, vvec, svec;
1728
1729     f_i[XX] = f_i_x;
1730     f_i[YY] = f_i_y;
1731     f_i[ZZ] = f_i_z;
1732     f_l[XX] = -mf_l_x;
1733     f_l[YY] = -mf_l_y;
1734     f_l[ZZ] = -mf_l_z;
1735     svmul(p, f_i, uvec);
1736     svmul(q, f_l, vvec);
1737     rvec_sub(uvec, vvec, svec);
1738     rvec_sub(f_i, svec, f_j);
1739     rvec_add(f_l, svec, f_k);
1740     rvec_inc(f[i], f_i);
1741     rvec_dec(f[j], f_j);
1742     rvec_dec(f[k], f_k);
1743     rvec_inc(f[l], f_l);
1744 }
1745
1746
1747 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1748              real phi, real lambda, real *V, real *F)
1749 {
1750     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1751     real L1   = 1.0 - lambda;
1752     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1753     real dph0 = (phiB - phiA)*DEG2RAD;
1754     real cp   = L1*cpA + lambda*cpB;
1755
1756     mdphi =  mult*phi - ph0;
1757     sdphi = sin(mdphi);
1758     ddphi = -cp*mult*sdphi;
1759     v1    = 1.0 + cos(mdphi);
1760     v     = cp*v1;
1761
1762     dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1763
1764     *V = v;
1765     *F = ddphi;
1766
1767     return dvdlambda;
1768
1769     /* That was 40 flops */
1770 }
1771
1772 static void
1773 dopdihs_noener(real cpA, real cpB, real phiA, real phiB, int mult,
1774                real phi, real lambda, real *F)
1775 {
1776     real mdphi, sdphi, ddphi;
1777     real L1   = 1.0 - lambda;
1778     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1779     real cp   = L1*cpA + lambda*cpB;
1780
1781     mdphi = mult*phi - ph0;
1782     sdphi = sin(mdphi);
1783     ddphi = -cp*mult*sdphi;
1784
1785     *F = ddphi;
1786
1787     /* That was 20 flops */
1788 }
1789
1790 static void
1791 dopdihs_mdphi(real cpA, real cpB, real phiA, real phiB, int mult,
1792               real phi, real lambda, real *cp, real *mdphi)
1793 {
1794     real L1   = 1.0 - lambda;
1795     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1796
1797     *cp    = L1*cpA + lambda*cpB;
1798
1799     *mdphi = mult*phi - ph0;
1800 }
1801
1802 static real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1803                         real phi, real lambda, real *V, real *F)
1804 /* similar to dopdihs, except for a minus sign  *
1805  * and a different treatment of mult/phi0       */
1806 {
1807     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1808     real L1   = 1.0 - lambda;
1809     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1810     real dph0 = (phiB - phiA)*DEG2RAD;
1811     real cp   = L1*cpA + lambda*cpB;
1812
1813     mdphi = mult*(phi-ph0);
1814     sdphi = sin(mdphi);
1815     ddphi = cp*mult*sdphi;
1816     v1    = 1.0-cos(mdphi);
1817     v     = cp*v1;
1818
1819     dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1820
1821     *V = v;
1822     *F = ddphi;
1823
1824     return dvdlambda;
1825
1826     /* That was 40 flops */
1827 }
1828
1829 real pdihs(int nbonds,
1830            const t_iatom forceatoms[], const t_iparams forceparams[],
1831            const rvec x[], rvec f[], rvec fshift[],
1832            const t_pbc *pbc, const t_graph *g,
1833            real lambda, real *dvdlambda,
1834            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1835            int gmx_unused *global_atom_index)
1836 {
1837     int  i, type, ai, aj, ak, al;
1838     int  t1, t2, t3;
1839     rvec r_ij, r_kj, r_kl, m, n;
1840     real phi, sign, ddphi, vpd, vtot;
1841
1842     vtot = 0.0;
1843
1844     for (i = 0; (i < nbonds); )
1845     {
1846         type = forceatoms[i++];
1847         ai   = forceatoms[i++];
1848         aj   = forceatoms[i++];
1849         ak   = forceatoms[i++];
1850         al   = forceatoms[i++];
1851
1852         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1853                         &sign, &t1, &t2, &t3);  /*  84      */
1854         *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1855                               forceparams[type].pdihs.cpB,
1856                               forceparams[type].pdihs.phiA,
1857                               forceparams[type].pdihs.phiB,
1858                               forceparams[type].pdihs.mult,
1859                               phi, lambda, &vpd, &ddphi);
1860
1861         vtot += vpd;
1862         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
1863                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
1864
1865 #ifdef DEBUG
1866         fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
1867                 ai, aj, ak, al, phi);
1868 #endif
1869     } /* 223 TOTAL  */
1870
1871     return vtot;
1872 }
1873
1874 void make_dp_periodic(real *dp)  /* 1 flop? */
1875 {
1876     /* dp cannot be outside (-pi,pi) */
1877     if (*dp >= M_PI)
1878     {
1879         *dp -= 2*M_PI;
1880     }
1881     else if (*dp < -M_PI)
1882     {
1883         *dp += 2*M_PI;
1884     }
1885     return;
1886 }
1887
1888 /* As pdihs above, but without calculating energies and shift forces */
1889 static void
1890 pdihs_noener(int nbonds,
1891              const t_iatom forceatoms[], const t_iparams forceparams[],
1892              const rvec x[], rvec f[],
1893              const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
1894              real lambda,
1895              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1896              int gmx_unused *global_atom_index)
1897 {
1898     int  i, type, ai, aj, ak, al;
1899     int  t1, t2, t3;
1900     rvec r_ij, r_kj, r_kl, m, n;
1901     real phi, sign, ddphi_tot, ddphi;
1902
1903     for (i = 0; (i < nbonds); )
1904     {
1905         ai   = forceatoms[i+1];
1906         aj   = forceatoms[i+2];
1907         ak   = forceatoms[i+3];
1908         al   = forceatoms[i+4];
1909
1910         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1911                         &sign, &t1, &t2, &t3);
1912
1913         ddphi_tot = 0;
1914
1915         /* Loop over dihedrals working on the same atoms,
1916          * so we avoid recalculating angles and force distributions.
1917          */
1918         do
1919         {
1920             type = forceatoms[i];
1921             dopdihs_noener(forceparams[type].pdihs.cpA,
1922                            forceparams[type].pdihs.cpB,
1923                            forceparams[type].pdihs.phiA,
1924                            forceparams[type].pdihs.phiB,
1925                            forceparams[type].pdihs.mult,
1926                            phi, lambda, &ddphi);
1927             ddphi_tot += ddphi;
1928
1929             i += 5;
1930         }
1931         while (i < nbonds &&
1932                forceatoms[i+1] == ai &&
1933                forceatoms[i+2] == aj &&
1934                forceatoms[i+3] == ak &&
1935                forceatoms[i+4] == al);
1936
1937         do_dih_fup_noshiftf(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f);
1938     }
1939 }
1940
1941
1942 #ifdef SIMD_BONDEDS
1943
1944 /* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */
1945 static void
1946 pdihs_noener_simd(int nbonds,
1947                   const t_iatom forceatoms[], const t_iparams forceparams[],
1948                   const rvec x[], rvec f[],
1949                   const t_pbc *pbc, const t_graph gmx_unused *g,
1950                   real gmx_unused lambda,
1951                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1952                   int gmx_unused *global_atom_index)
1953 {
1954 #define UNROLL GMX_SIMD_WIDTH_HERE
1955     const int       nfa1 = 5;
1956     int             i, iu, s;
1957     int             type, ai[UNROLL], aj[UNROLL], ak[UNROLL], al[UNROLL];
1958     int             t1[UNROLL], t2[UNROLL], t3[UNROLL];
1959     real            ddphi;
1960     real            dr_array[3*DIM*UNROLL+UNROLL], *dr;
1961     real            buf_array[7*UNROLL+UNROLL], *buf;
1962     real           *cp, *phi0, *mult, *phi, *p, *q, *sf_i, *msf_l;
1963     gmx_mm_pr       phi0_S, phi_S;
1964     gmx_mm_pr       mx_S, my_S, mz_S;
1965     gmx_mm_pr       nx_S, ny_S, nz_S;
1966     gmx_mm_pr       nrkj_m2_S, nrkj_n2_S;
1967     gmx_mm_pr       cp_S, mdphi_S, mult_S;
1968     gmx_mm_pr       sin_S, cos_S;
1969     gmx_mm_pr       mddphi_S;
1970     gmx_mm_pr       sf_i_S, msf_l_S;
1971     pbc_simd_t      pbc_simd;
1972
1973     /* Ensure SIMD register alignment */
1974     dr  = gmx_simd_align_real(dr_array);
1975     buf = gmx_simd_align_real(buf_array);
1976
1977     /* Extract aligned pointer for parameters and variables */
1978     cp    = buf + 0*UNROLL;
1979     phi0  = buf + 1*UNROLL;
1980     mult  = buf + 2*UNROLL;
1981     p     = buf + 3*UNROLL;
1982     q     = buf + 4*UNROLL;
1983     sf_i  = buf + 5*UNROLL;
1984     msf_l = buf + 6*UNROLL;
1985
1986     set_pbc_simd(pbc, &pbc_simd);
1987
1988     /* nbonds is the number of dihedrals times nfa1, here we step UNROLL dihs */
1989     for (i = 0; (i < nbonds); i += UNROLL*nfa1)
1990     {
1991         /* Collect atoms quadruplets for UNROLL dihedrals.
1992          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1993          */
1994         iu = i;
1995         for (s = 0; s < UNROLL; s++)
1996         {
1997             type  = forceatoms[iu];
1998             ai[s] = forceatoms[iu+1];
1999             aj[s] = forceatoms[iu+2];
2000             ak[s] = forceatoms[iu+3];
2001             al[s] = forceatoms[iu+4];
2002
2003             cp[s]   = forceparams[type].pdihs.cpA;
2004             phi0[s] = forceparams[type].pdihs.phiA*DEG2RAD;
2005             mult[s] = forceparams[type].pdihs.mult;
2006
2007             /* At the end fill the arrays with identical entries */
2008             if (iu + nfa1 < nbonds)
2009             {
2010                 iu += nfa1;
2011             }
2012         }
2013
2014         /* Caclulate UNROLL dihedral angles at once */
2015         dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
2016                        dr,
2017                        &phi_S,
2018                        &mx_S, &my_S, &mz_S,
2019                        &nx_S, &ny_S, &nz_S,
2020                        &nrkj_m2_S,
2021                        &nrkj_n2_S,
2022                        p, q);
2023
2024         cp_S     = gmx_load_pr(cp);
2025         phi0_S   = gmx_load_pr(phi0);
2026         mult_S   = gmx_load_pr(mult);
2027
2028         mdphi_S  = gmx_sub_pr(gmx_mul_pr(mult_S, phi_S), phi0_S);
2029
2030         /* Calculate UNROLL sines at once */
2031         gmx_sincos_pr(mdphi_S, &sin_S, &cos_S);
2032         mddphi_S = gmx_mul_pr(gmx_mul_pr(cp_S, mult_S), sin_S);
2033         sf_i_S   = gmx_mul_pr(mddphi_S, nrkj_m2_S);
2034         msf_l_S  = gmx_mul_pr(mddphi_S, nrkj_n2_S);
2035
2036         /* After this m?_S will contain f[i] */
2037         mx_S     = gmx_mul_pr(sf_i_S, mx_S);
2038         my_S     = gmx_mul_pr(sf_i_S, my_S);
2039         mz_S     = gmx_mul_pr(sf_i_S, mz_S);
2040
2041         /* After this m?_S will contain -f[l] */
2042         nx_S     = gmx_mul_pr(msf_l_S, nx_S);
2043         ny_S     = gmx_mul_pr(msf_l_S, ny_S);
2044         nz_S     = gmx_mul_pr(msf_l_S, nz_S);
2045
2046         gmx_store_pr(dr + 0*UNROLL, mx_S);
2047         gmx_store_pr(dr + 1*UNROLL, my_S);
2048         gmx_store_pr(dr + 2*UNROLL, mz_S);
2049         gmx_store_pr(dr + 3*UNROLL, nx_S);
2050         gmx_store_pr(dr + 4*UNROLL, ny_S);
2051         gmx_store_pr(dr + 5*UNROLL, nz_S);
2052
2053         iu = i;
2054         s  = 0;
2055         do
2056         {
2057             do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
2058                                         p[s], q[s],
2059                                         dr[     XX *UNROLL+s],
2060                                         dr[     YY *UNROLL+s],
2061                                         dr[     ZZ *UNROLL+s],
2062                                         dr[(DIM+XX)*UNROLL+s],
2063                                         dr[(DIM+YY)*UNROLL+s],
2064                                         dr[(DIM+ZZ)*UNROLL+s],
2065                                         f);
2066             s++;
2067             iu += nfa1;
2068         }
2069         while (s < UNROLL && iu < nbonds);
2070     }
2071 #undef UNROLL
2072 }
2073
2074 #endif /* SIMD_BONDEDS */
2075
2076
2077 real idihs(int nbonds,
2078            const t_iatom forceatoms[], const t_iparams forceparams[],
2079            const rvec x[], rvec f[], rvec fshift[],
2080            const t_pbc *pbc, const t_graph *g,
2081            real lambda, real *dvdlambda,
2082            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2083            int gmx_unused *global_atom_index)
2084 {
2085     int  i, type, ai, aj, ak, al;
2086     int  t1, t2, t3;
2087     real phi, phi0, dphi0, ddphi, sign, vtot;
2088     rvec r_ij, r_kj, r_kl, m, n;
2089     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2090
2091     L1        = 1.0-lambda;
2092     dvdl_term = 0;
2093     vtot      = 0.0;
2094     for (i = 0; (i < nbonds); )
2095     {
2096         type = forceatoms[i++];
2097         ai   = forceatoms[i++];
2098         aj   = forceatoms[i++];
2099         ak   = forceatoms[i++];
2100         al   = forceatoms[i++];
2101
2102         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2103                         &sign, &t1, &t2, &t3);  /*  84          */
2104
2105         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2106          * force changes if we just apply a normal harmonic.
2107          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2108          * This means we will never have the periodicity problem, unless
2109          * the dihedral is Pi away from phiO, which is very unlikely due to
2110          * the potential.
2111          */
2112         kA = forceparams[type].harmonic.krA;
2113         kB = forceparams[type].harmonic.krB;
2114         pA = forceparams[type].harmonic.rA;
2115         pB = forceparams[type].harmonic.rB;
2116
2117         kk    = L1*kA + lambda*kB;
2118         phi0  = (L1*pA + lambda*pB)*DEG2RAD;
2119         dphi0 = (pB - pA)*DEG2RAD;
2120
2121         dp = phi-phi0;
2122
2123         make_dp_periodic(&dp);
2124
2125         dp2 = dp*dp;
2126
2127         vtot += 0.5*kk*dp2;
2128         ddphi = -kk*dp;
2129
2130         dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2131
2132         do_dih_fup(ai, aj, ak, al, (real)(-ddphi), r_ij, r_kj, r_kl, m, n,
2133                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2134         /* 218 TOTAL    */
2135 #ifdef DEBUG
2136         if (debug)
2137         {
2138             fprintf(debug, "idih: (%d,%d,%d,%d) phi=%g\n",
2139                     ai, aj, ak, al, phi);
2140         }
2141 #endif
2142     }
2143
2144     *dvdlambda += dvdl_term;
2145     return vtot;
2146 }
2147
2148
2149 /*! \brief returns dx, rdist, and dpdl for functions posres() and fbposres()
2150  */
2151 static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
2152                       const rvec comA_sc, const rvec comB_sc,
2153                       real lambda,
2154                       t_pbc *pbc, int refcoord_scaling, int npbcdim,
2155                       rvec dx, rvec rdist, rvec dpdl)
2156 {
2157     int  m, d;
2158     real posA, posB, L1, ref = 0.;
2159     rvec pos;
2160
2161     L1 = 1.0-lambda;
2162
2163     for (m = 0; m < DIM; m++)
2164     {
2165         posA = pos0A[m];
2166         posB = pos0B[m];
2167         if (m < npbcdim)
2168         {
2169             switch (refcoord_scaling)
2170             {
2171                 case erscNO:
2172                     ref      = 0;
2173                     rdist[m] = L1*posA + lambda*posB;
2174                     dpdl[m]  = posB - posA;
2175                     break;
2176                 case erscALL:
2177                     /* Box relative coordinates are stored for dimensions with pbc */
2178                     posA *= pbc->box[m][m];
2179                     posB *= pbc->box[m][m];
2180                     for (d = m+1; d < npbcdim; d++)
2181                     {
2182                         posA += pos0A[d]*pbc->box[d][m];
2183                         posB += pos0B[d]*pbc->box[d][m];
2184                     }
2185                     ref      = L1*posA + lambda*posB;
2186                     rdist[m] = 0;
2187                     dpdl[m]  = posB - posA;
2188                     break;
2189                 case erscCOM:
2190                     ref      = L1*comA_sc[m] + lambda*comB_sc[m];
2191                     rdist[m] = L1*posA       + lambda*posB;
2192                     dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
2193                     break;
2194                 default:
2195                     gmx_fatal(FARGS, "No such scaling method implemented");
2196             }
2197         }
2198         else
2199         {
2200             ref      = L1*posA + lambda*posB;
2201             rdist[m] = 0;
2202             dpdl[m]  = posB - posA;
2203         }
2204
2205         /* We do pbc_dx with ref+rdist,
2206          * since with only ref we can be up to half a box vector wrong.
2207          */
2208         pos[m] = ref + rdist[m];
2209     }
2210
2211     if (pbc)
2212     {
2213         pbc_dx(pbc, x, pos, dx);
2214     }
2215     else
2216     {
2217         rvec_sub(x, pos, dx);
2218     }
2219 }
2220
2221 /*! \brief Adds forces of flat-bottomed positions restraints to f[]
2222  *         and fixes vir_diag. Returns the flat-bottomed potential. */
2223 real fbposres(int nbonds,
2224               const t_iatom forceatoms[], const t_iparams forceparams[],
2225               const rvec x[], rvec f[], rvec vir_diag,
2226               t_pbc *pbc,
2227               int refcoord_scaling, int ePBC, rvec com)
2228 /* compute flat-bottomed positions restraints */
2229 {
2230     int              i, ai, m, d, type, npbcdim = 0, fbdim;
2231     const t_iparams *pr;
2232     real             vtot, kk, v;
2233     real             ref = 0, dr, dr2, rpot, rfb, rfb2, fact, invdr;
2234     rvec             com_sc, rdist, pos, dx, dpdl, fm;
2235     gmx_bool         bInvert;
2236
2237     npbcdim = ePBC2npbcdim(ePBC);
2238
2239     if (refcoord_scaling == erscCOM)
2240     {
2241         clear_rvec(com_sc);
2242         for (m = 0; m < npbcdim; m++)
2243         {
2244             for (d = m; d < npbcdim; d++)
2245             {
2246                 com_sc[m] += com[d]*pbc->box[d][m];
2247             }
2248         }
2249     }
2250
2251     vtot = 0.0;
2252     for (i = 0; (i < nbonds); )
2253     {
2254         type = forceatoms[i++];
2255         ai   = forceatoms[i++];
2256         pr   = &forceparams[type];
2257
2258         /* same calculation as for normal posres, but with identical A and B states, and lambda==0 */
2259         posres_dx(x[ai], forceparams[type].fbposres.pos0, forceparams[type].fbposres.pos0,
2260                   com_sc, com_sc, 0.0,
2261                   pbc, refcoord_scaling, npbcdim,
2262                   dx, rdist, dpdl);
2263
2264         clear_rvec(fm);
2265         v = 0.0;
2266
2267         kk   = pr->fbposres.k;
2268         rfb  = pr->fbposres.r;
2269         rfb2 = sqr(rfb);
2270
2271         /* with rfb<0, push particle out of the sphere/cylinder/layer */
2272         bInvert = FALSE;
2273         if (rfb < 0.)
2274         {
2275             bInvert = TRUE;
2276             rfb     = -rfb;
2277         }
2278
2279         switch (pr->fbposres.geom)
2280         {
2281             case efbposresSPHERE:
2282                 /* spherical flat-bottom posres */
2283                 dr2 = norm2(dx);
2284                 if (dr2 > 0.0 &&
2285                     ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
2286                     )
2287                 {
2288                     dr   = sqrt(dr2);
2289                     v    = 0.5*kk*sqr(dr - rfb);
2290                     fact = -kk*(dr-rfb)/dr; /* Force pointing to the center pos0 */
2291                     svmul(fact, dx, fm);
2292                 }
2293                 break;
2294             case efbposresCYLINDER:
2295                 /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
2296                 dr2 = sqr(dx[XX])+sqr(dx[YY]);
2297                 if  (dr2 > 0.0 &&
2298                      ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
2299                      )
2300                 {
2301                     dr     = sqrt(dr2);
2302                     invdr  = 1./dr;
2303                     v      = 0.5*kk*sqr(dr - rfb);
2304                     fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr; /* Force pointing to the center */
2305                     fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
2306                 }
2307                 break;
2308             case efbposresX: /* fbdim=XX */
2309             case efbposresY: /* fbdim=YY */
2310             case efbposresZ: /* fbdim=ZZ */
2311                 /* 1D flat-bottom potential */
2312                 fbdim = pr->fbposres.geom - efbposresX;
2313                 dr    = dx[fbdim];
2314                 if ( ( dr > rfb && bInvert == FALSE ) || ( 0 < dr && dr < rfb && bInvert == TRUE )  )
2315                 {
2316                     v         = 0.5*kk*sqr(dr - rfb);
2317                     fm[fbdim] = -kk*(dr - rfb);
2318                 }
2319                 else if ( (dr < (-rfb) && bInvert == FALSE ) || ( (-rfb) < dr && dr < 0 && bInvert == TRUE ))
2320                 {
2321                     v         = 0.5*kk*sqr(dr + rfb);
2322                     fm[fbdim] = -kk*(dr + rfb);
2323                 }
2324                 break;
2325         }
2326
2327         vtot += v;
2328
2329         for (m = 0; (m < DIM); m++)
2330         {
2331             f[ai][m]   += fm[m];
2332             /* Here we correct for the pbc_dx which included rdist */
2333             vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm[m];
2334         }
2335     }
2336
2337     return vtot;
2338 }
2339
2340
2341 real posres(int nbonds,
2342             const t_iatom forceatoms[], const t_iparams forceparams[],
2343             const rvec x[], rvec f[], rvec vir_diag,
2344             t_pbc *pbc,
2345             real lambda, real *dvdlambda,
2346             int refcoord_scaling, int ePBC, rvec comA, rvec comB)
2347 {
2348     int              i, ai, m, d, type, ki, npbcdim = 0;
2349     const t_iparams *pr;
2350     real             L1;
2351     real             vtot, kk, fm;
2352     real             posA, posB, ref = 0;
2353     rvec             comA_sc, comB_sc, rdist, dpdl, pos, dx;
2354     gmx_bool         bForceValid = TRUE;
2355
2356     if ((f == NULL) || (vir_diag == NULL))    /* should both be null together! */
2357     {
2358         bForceValid = FALSE;
2359     }
2360
2361     npbcdim = ePBC2npbcdim(ePBC);
2362
2363     if (refcoord_scaling == erscCOM)
2364     {
2365         clear_rvec(comA_sc);
2366         clear_rvec(comB_sc);
2367         for (m = 0; m < npbcdim; m++)
2368         {
2369             for (d = m; d < npbcdim; d++)
2370             {
2371                 comA_sc[m] += comA[d]*pbc->box[d][m];
2372                 comB_sc[m] += comB[d]*pbc->box[d][m];
2373             }
2374         }
2375     }
2376
2377     L1 = 1.0 - lambda;
2378
2379     vtot = 0.0;
2380     for (i = 0; (i < nbonds); )
2381     {
2382         type = forceatoms[i++];
2383         ai   = forceatoms[i++];
2384         pr   = &forceparams[type];
2385
2386         /* return dx, rdist, and dpdl */
2387         posres_dx(x[ai], forceparams[type].posres.pos0A, forceparams[type].posres.pos0B,
2388                   comA_sc, comB_sc, lambda,
2389                   pbc, refcoord_scaling, npbcdim,
2390                   dx, rdist, dpdl);
2391
2392         for (m = 0; (m < DIM); m++)
2393         {
2394             kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
2395             fm          = -kk*dx[m];
2396             vtot       += 0.5*kk*dx[m]*dx[m];
2397             *dvdlambda +=
2398                 0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
2399                 -fm*dpdl[m];
2400
2401             /* Here we correct for the pbc_dx which included rdist */
2402             if (bForceValid)
2403             {
2404                 f[ai][m]    += fm;
2405                 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
2406             }
2407         }
2408     }
2409
2410     return vtot;
2411 }
2412
2413 static real low_angres(int nbonds,
2414                        const t_iatom forceatoms[], const t_iparams forceparams[],
2415                        const rvec x[], rvec f[], rvec fshift[],
2416                        const t_pbc *pbc, const t_graph *g,
2417                        real lambda, real *dvdlambda,
2418                        gmx_bool bZAxis)
2419 {
2420     int  i, m, type, ai, aj, ak, al;
2421     int  t1, t2;
2422     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2423     rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2424     real st, sth, nrij2, nrkl2, c, cij, ckl;
2425
2426     ivec dt;
2427     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2428
2429     vtot = 0.0;
2430     ak   = al = 0; /* to avoid warnings */
2431     for (i = 0; i < nbonds; )
2432     {
2433         type = forceatoms[i++];
2434         ai   = forceatoms[i++];
2435         aj   = forceatoms[i++];
2436         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij);       /*  3               */
2437         if (!bZAxis)
2438         {
2439             ak   = forceatoms[i++];
2440             al   = forceatoms[i++];
2441             t2   = pbc_rvec_sub(pbc, x[al], x[ak], r_kl);  /*  3                */
2442         }
2443         else
2444         {
2445             r_kl[XX] = 0;
2446             r_kl[YY] = 0;
2447             r_kl[ZZ] = 1;
2448         }
2449
2450         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2451         phi     = acos(cos_phi);         /* 10           */
2452
2453         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2454                                   forceparams[type].pdihs.cpB,
2455                                   forceparams[type].pdihs.phiA,
2456                                   forceparams[type].pdihs.phiB,
2457                                   forceparams[type].pdihs.mult,
2458                                   phi, lambda, &vid, &dVdphi); /*  40  */
2459
2460         vtot += vid;
2461
2462         cos_phi2 = sqr(cos_phi);                /*   1          */
2463         if (cos_phi2 < 1)
2464         {
2465             st    = -dVdphi*gmx_invsqrt(1 - cos_phi2); /*  12           */
2466             sth   = st*cos_phi;                        /*   1           */
2467             nrij2 = iprod(r_ij, r_ij);                 /*   5           */
2468             nrkl2 = iprod(r_kl, r_kl);                 /*   5          */
2469
2470             c   = st*gmx_invsqrt(nrij2*nrkl2);         /*  11           */
2471             cij = sth/nrij2;                           /*  10           */
2472             ckl = sth/nrkl2;                           /*  10           */
2473
2474             for (m = 0; m < DIM; m++)                  /*  18+18       */
2475             {
2476                 f_i[m]    = (c*r_kl[m]-cij*r_ij[m]);
2477                 f[ai][m] += f_i[m];
2478                 f[aj][m] -= f_i[m];
2479                 if (!bZAxis)
2480                 {
2481                     f_k[m]    = (c*r_ij[m]-ckl*r_kl[m]);
2482                     f[ak][m] += f_k[m];
2483                     f[al][m] -= f_k[m];
2484                 }
2485             }
2486
2487             if (g)
2488             {
2489                 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2490                 t1 = IVEC2IS(dt);
2491             }
2492             rvec_inc(fshift[t1], f_i);
2493             rvec_dec(fshift[CENTRAL], f_i);
2494             if (!bZAxis)
2495             {
2496                 if (g)
2497                 {
2498                     ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2499                     t2 = IVEC2IS(dt);
2500                 }
2501                 rvec_inc(fshift[t2], f_k);
2502                 rvec_dec(fshift[CENTRAL], f_k);
2503             }
2504         }
2505     }
2506
2507     return vtot; /*  184 / 157 (bZAxis)  total  */
2508 }
2509
2510 real angres(int nbonds,
2511             const t_iatom forceatoms[], const t_iparams forceparams[],
2512             const rvec x[], rvec f[], rvec fshift[],
2513             const t_pbc *pbc, const t_graph *g,
2514             real lambda, real *dvdlambda,
2515             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2516             int gmx_unused *global_atom_index)
2517 {
2518     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2519                       lambda, dvdlambda, FALSE);
2520 }
2521
2522 real angresz(int nbonds,
2523              const t_iatom forceatoms[], const t_iparams forceparams[],
2524              const rvec x[], rvec f[], rvec fshift[],
2525              const t_pbc *pbc, const t_graph *g,
2526              real lambda, real *dvdlambda,
2527              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2528              int gmx_unused *global_atom_index)
2529 {
2530     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2531                       lambda, dvdlambda, TRUE);
2532 }
2533
2534 real dihres(int nbonds,
2535             const t_iatom forceatoms[], const t_iparams forceparams[],
2536             const rvec x[], rvec f[], rvec fshift[],
2537             const t_pbc *pbc, const t_graph *g,
2538             real lambda, real *dvdlambda,
2539             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2540             int gmx_unused  *global_atom_index)
2541 {
2542     real vtot = 0;
2543     int  ai, aj, ak, al, i, k, type, t1, t2, t3;
2544     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2545     real phi, ddphi, ddp, ddp2, dp, sign, d2r, fc, L1;
2546     rvec r_ij, r_kj, r_kl, m, n;
2547
2548     L1 = 1.0-lambda;
2549
2550     d2r = DEG2RAD;
2551     k   = 0;
2552
2553     for (i = 0; (i < nbonds); )
2554     {
2555         type = forceatoms[i++];
2556         ai   = forceatoms[i++];
2557         aj   = forceatoms[i++];
2558         ak   = forceatoms[i++];
2559         al   = forceatoms[i++];
2560
2561         phi0A  = forceparams[type].dihres.phiA*d2r;
2562         dphiA  = forceparams[type].dihres.dphiA*d2r;
2563         kfacA  = forceparams[type].dihres.kfacA;
2564
2565         phi0B  = forceparams[type].dihres.phiB*d2r;
2566         dphiB  = forceparams[type].dihres.dphiB*d2r;
2567         kfacB  = forceparams[type].dihres.kfacB;
2568
2569         phi0  = L1*phi0A + lambda*phi0B;
2570         dphi  = L1*dphiA + lambda*dphiB;
2571         kfac  = L1*kfacA + lambda*kfacB;
2572
2573         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2574                         &sign, &t1, &t2, &t3);
2575         /* 84 flops */
2576
2577         if (debug)
2578         {
2579             fprintf(debug, "dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2580                     k++, ai, aj, ak, al, phi0, dphi, kfac);
2581         }
2582         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2583          * force changes if we just apply a normal harmonic.
2584          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2585          * This means we will never have the periodicity problem, unless
2586          * the dihedral is Pi away from phiO, which is very unlikely due to
2587          * the potential.
2588          */
2589         dp = phi-phi0;
2590         make_dp_periodic(&dp);
2591
2592         if (dp > dphi)
2593         {
2594             ddp = dp-dphi;
2595         }
2596         else if (dp < -dphi)
2597         {
2598             ddp = dp+dphi;
2599         }
2600         else
2601         {
2602             ddp = 0;
2603         }
2604
2605         if (ddp != 0.0)
2606         {
2607             ddp2  = ddp*ddp;
2608             vtot += 0.5*kfac*ddp2;
2609             ddphi = kfac*ddp;
2610
2611             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2612             /* lambda dependence from changing restraint distances */
2613             if (ddp > 0)
2614             {
2615                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2616             }
2617             else if (ddp < 0)
2618             {
2619                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2620             }
2621             do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2622                        f, fshift, pbc, g, x, t1, t2, t3);      /* 112           */
2623         }
2624     }
2625     return vtot;
2626 }
2627
2628
2629 real unimplemented(int gmx_unused nbonds,
2630                    const t_iatom gmx_unused forceatoms[], const t_iparams gmx_unused forceparams[],
2631                    const rvec gmx_unused x[], rvec gmx_unused f[], rvec gmx_unused fshift[],
2632                    const t_pbc gmx_unused *pbc, const t_graph  gmx_unused *g,
2633                    real gmx_unused lambda, real gmx_unused *dvdlambda,
2634                    const t_mdatoms  gmx_unused *md, t_fcdata gmx_unused *fcd,
2635                    int gmx_unused *global_atom_index)
2636 {
2637     gmx_impl("*** you are using a not implemented function");
2638
2639     return 0.0; /* To make the compiler happy */
2640 }
2641
2642 real rbdihs(int nbonds,
2643             const t_iatom forceatoms[], const t_iparams forceparams[],
2644             const rvec x[], rvec f[], rvec fshift[],
2645             const t_pbc *pbc, const t_graph *g,
2646             real lambda, real *dvdlambda,
2647             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2648             int gmx_unused *global_atom_index)
2649 {
2650     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2651     int        type, ai, aj, ak, al, i, j;
2652     int        t1, t2, t3;
2653     rvec       r_ij, r_kj, r_kl, m, n;
2654     real       parmA[NR_RBDIHS];
2655     real       parmB[NR_RBDIHS];
2656     real       parm[NR_RBDIHS];
2657     real       cos_phi, phi, rbp, rbpBA;
2658     real       v, sign, ddphi, sin_phi;
2659     real       cosfac, vtot;
2660     real       L1        = 1.0-lambda;
2661     real       dvdl_term = 0;
2662
2663     vtot = 0.0;
2664     for (i = 0; (i < nbonds); )
2665     {
2666         type = forceatoms[i++];
2667         ai   = forceatoms[i++];
2668         aj   = forceatoms[i++];
2669         ak   = forceatoms[i++];
2670         al   = forceatoms[i++];
2671
2672         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2673                         &sign, &t1, &t2, &t3);  /*  84          */
2674
2675         /* Change to polymer convention */
2676         if (phi < c0)
2677         {
2678             phi += M_PI;
2679         }
2680         else
2681         {
2682             phi -= M_PI;    /*   1              */
2683
2684         }
2685         cos_phi = cos(phi);
2686         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2687         sin_phi = sin(phi);
2688
2689         for (j = 0; (j < NR_RBDIHS); j++)
2690         {
2691             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2692             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2693             parm[j]  = L1*parmA[j]+lambda*parmB[j];
2694         }
2695         /* Calculate cosine powers */
2696         /* Calculate the energy */
2697         /* Calculate the derivative */
2698
2699         v            = parm[0];
2700         dvdl_term   += (parmB[0]-parmA[0]);
2701         ddphi        = c0;
2702         cosfac       = c1;
2703
2704         rbp          = parm[1];
2705         rbpBA        = parmB[1]-parmA[1];
2706         ddphi       += rbp*cosfac;
2707         cosfac      *= cos_phi;
2708         v           += cosfac*rbp;
2709         dvdl_term   += cosfac*rbpBA;
2710         rbp          = parm[2];
2711         rbpBA        = parmB[2]-parmA[2];
2712         ddphi       += c2*rbp*cosfac;
2713         cosfac      *= cos_phi;
2714         v           += cosfac*rbp;
2715         dvdl_term   += cosfac*rbpBA;
2716         rbp          = parm[3];
2717         rbpBA        = parmB[3]-parmA[3];
2718         ddphi       += c3*rbp*cosfac;
2719         cosfac      *= cos_phi;
2720         v           += cosfac*rbp;
2721         dvdl_term   += cosfac*rbpBA;
2722         rbp          = parm[4];
2723         rbpBA        = parmB[4]-parmA[4];
2724         ddphi       += c4*rbp*cosfac;
2725         cosfac      *= cos_phi;
2726         v           += cosfac*rbp;
2727         dvdl_term   += cosfac*rbpBA;
2728         rbp          = parm[5];
2729         rbpBA        = parmB[5]-parmA[5];
2730         ddphi       += c5*rbp*cosfac;
2731         cosfac      *= cos_phi;
2732         v           += cosfac*rbp;
2733         dvdl_term   += cosfac*rbpBA;
2734
2735         ddphi = -ddphi*sin_phi;         /*  11          */
2736
2737         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2738                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2739         vtot += v;
2740     }
2741     *dvdlambda += dvdl_term;
2742
2743     return vtot;
2744 }
2745
2746 int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2747 {
2748     int im1, ip1, ip2;
2749
2750     if (ip < 0)
2751     {
2752         ip = ip + grid_spacing - 1;
2753     }
2754     else if (ip > grid_spacing)
2755     {
2756         ip = ip - grid_spacing - 1;
2757     }
2758
2759     im1 = ip - 1;
2760     ip1 = ip + 1;
2761     ip2 = ip + 2;
2762
2763     if (ip == 0)
2764     {
2765         im1 = grid_spacing - 1;
2766     }
2767     else if (ip == grid_spacing-2)
2768     {
2769         ip2 = 0;
2770     }
2771     else if (ip == grid_spacing-1)
2772     {
2773         ip1 = 0;
2774         ip2 = 1;
2775     }
2776
2777     *ipm1 = im1;
2778     *ipp1 = ip1;
2779     *ipp2 = ip2;
2780
2781     return ip;
2782
2783 }
2784
2785 real cmap_dihs(int nbonds,
2786                const t_iatom forceatoms[], const t_iparams forceparams[],
2787                const gmx_cmap_t *cmap_grid,
2788                const rvec x[], rvec f[], rvec fshift[],
2789                const t_pbc *pbc, const t_graph *g,
2790                real gmx_unused lambda, real gmx_unused *dvdlambda,
2791                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2792                int  gmx_unused *global_atom_index)
2793 {
2794     int         i, j, k, n, idx;
2795     int         ai, aj, ak, al, am;
2796     int         a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2797     int         type, cmapA;
2798     int         t11, t21, t31, t12, t22, t32;
2799     int         iphi1, ip1m1, ip1p1, ip1p2;
2800     int         iphi2, ip2m1, ip2p1, ip2p2;
2801     int         l1, l2, l3, l4;
2802     int         pos1, pos2, pos3, pos4, tmp;
2803
2804     real        ty[4], ty1[4], ty2[4], ty12[4], tc[16], tx[16];
2805     real        phi1, psi1, cos_phi1, sin_phi1, sign1, xphi1;
2806     real        phi2, psi2, cos_phi2, sin_phi2, sign2, xphi2;
2807     real        dx, xx, tt, tu, e, df1, df2, ddf1, ddf2, ddf12, vtot;
2808     real        ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2809     real        ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2810     real        fg1, hg1, fga1, hgb1, gaa1, gbb1;
2811     real        fg2, hg2, fga2, hgb2, gaa2, gbb2;
2812     real        fac;
2813
2814     rvec        r1_ij, r1_kj, r1_kl, m1, n1;
2815     rvec        r2_ij, r2_kj, r2_kl, m2, n2;
2816     rvec        f1_i, f1_j, f1_k, f1_l;
2817     rvec        f2_i, f2_j, f2_k, f2_l;
2818     rvec        a1, b1, a2, b2;
2819     rvec        f1, g1, h1, f2, g2, h2;
2820     rvec        dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2821     ivec        jt1, dt1_ij, dt1_kj, dt1_lj;
2822     ivec        jt2, dt2_ij, dt2_kj, dt2_lj;
2823
2824     const real *cmapd;
2825
2826     int         loop_index[4][4] = {
2827         {0, 4, 8, 12},
2828         {1, 5, 9, 13},
2829         {2, 6, 10, 14},
2830         {3, 7, 11, 15}
2831     };
2832
2833     /* Total CMAP energy */
2834     vtot = 0;
2835
2836     for (n = 0; n < nbonds; )
2837     {
2838         /* Five atoms are involved in the two torsions */
2839         type   = forceatoms[n++];
2840         ai     = forceatoms[n++];
2841         aj     = forceatoms[n++];
2842         ak     = forceatoms[n++];
2843         al     = forceatoms[n++];
2844         am     = forceatoms[n++];
2845
2846         /* Which CMAP type is this */
2847         cmapA = forceparams[type].cmap.cmapA;
2848         cmapd = cmap_grid->cmapdata[cmapA].cmap;
2849
2850         /* First torsion */
2851         a1i   = ai;
2852         a1j   = aj;
2853         a1k   = ak;
2854         a1l   = al;
2855
2856         phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2857                           &sign1, &t11, &t21, &t31);  /* 84 */
2858
2859         cos_phi1 = cos(phi1);
2860
2861         a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2862         a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2863         a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2864
2865         b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2866         b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2867         b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2868
2869         tmp = pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2870
2871         ra21  = iprod(a1, a1);       /* 5 */
2872         rb21  = iprod(b1, b1);       /* 5 */
2873         rg21  = iprod(r1_kj, r1_kj); /* 5 */
2874         rg1   = sqrt(rg21);
2875
2876         rgr1  = 1.0/rg1;
2877         ra2r1 = 1.0/ra21;
2878         rb2r1 = 1.0/rb21;
2879         rabr1 = sqrt(ra2r1*rb2r1);
2880
2881         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2882
2883         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2884         {
2885             phi1 = asin(sin_phi1);
2886
2887             if (cos_phi1 < 0)
2888             {
2889                 if (phi1 > 0)
2890                 {
2891                     phi1 = M_PI - phi1;
2892                 }
2893                 else
2894                 {
2895                     phi1 = -M_PI - phi1;
2896                 }
2897             }
2898         }
2899         else
2900         {
2901             phi1 = acos(cos_phi1);
2902
2903             if (sin_phi1 < 0)
2904             {
2905                 phi1 = -phi1;
2906             }
2907         }
2908
2909         xphi1 = phi1 + M_PI; /* 1 */
2910
2911         /* Second torsion */
2912         a2i   = aj;
2913         a2j   = ak;
2914         a2k   = al;
2915         a2l   = am;
2916
2917         phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2918                           &sign2, &t12, &t22, &t32); /* 84 */
2919
2920         cos_phi2 = cos(phi2);
2921
2922         a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2923         a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2924         a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2925
2926         b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2927         b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2928         b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2929
2930         tmp = pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
2931
2932         ra22  = iprod(a2, a2);         /* 5 */
2933         rb22  = iprod(b2, b2);         /* 5 */
2934         rg22  = iprod(r2_kj, r2_kj);   /* 5 */
2935         rg2   = sqrt(rg22);
2936
2937         rgr2  = 1.0/rg2;
2938         ra2r2 = 1.0/ra22;
2939         rb2r2 = 1.0/rb22;
2940         rabr2 = sqrt(ra2r2*rb2r2);
2941
2942         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
2943
2944         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
2945         {
2946             phi2 = asin(sin_phi2);
2947
2948             if (cos_phi2 < 0)
2949             {
2950                 if (phi2 > 0)
2951                 {
2952                     phi2 = M_PI - phi2;
2953                 }
2954                 else
2955                 {
2956                     phi2 = -M_PI - phi2;
2957                 }
2958             }
2959         }
2960         else
2961         {
2962             phi2 = acos(cos_phi2);
2963
2964             if (sin_phi2 < 0)
2965             {
2966                 phi2 = -phi2;
2967             }
2968         }
2969
2970         xphi2 = phi2 + M_PI; /* 1 */
2971
2972         /* Range mangling */
2973         if (xphi1 < 0)
2974         {
2975             xphi1 = xphi1 + 2*M_PI;
2976         }
2977         else if (xphi1 >= 2*M_PI)
2978         {
2979             xphi1 = xphi1 - 2*M_PI;
2980         }
2981
2982         if (xphi2 < 0)
2983         {
2984             xphi2 = xphi2 + 2*M_PI;
2985         }
2986         else if (xphi2 >= 2*M_PI)
2987         {
2988             xphi2 = xphi2 - 2*M_PI;
2989         }
2990
2991         /* Number of grid points */
2992         dx = 2*M_PI / cmap_grid->grid_spacing;
2993
2994         /* Where on the grid are we */
2995         iphi1 = (int)(xphi1/dx);
2996         iphi2 = (int)(xphi2/dx);
2997
2998         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
2999         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3000
3001         pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
3002         pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
3003         pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
3004         pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
3005
3006         ty[0]   = cmapd[pos1*4];
3007         ty[1]   = cmapd[pos2*4];
3008         ty[2]   = cmapd[pos3*4];
3009         ty[3]   = cmapd[pos4*4];
3010
3011         ty1[0]   = cmapd[pos1*4+1];
3012         ty1[1]   = cmapd[pos2*4+1];
3013         ty1[2]   = cmapd[pos3*4+1];
3014         ty1[3]   = cmapd[pos4*4+1];
3015
3016         ty2[0]   = cmapd[pos1*4+2];
3017         ty2[1]   = cmapd[pos2*4+2];
3018         ty2[2]   = cmapd[pos3*4+2];
3019         ty2[3]   = cmapd[pos4*4+2];
3020
3021         ty12[0]   = cmapd[pos1*4+3];
3022         ty12[1]   = cmapd[pos2*4+3];
3023         ty12[2]   = cmapd[pos3*4+3];
3024         ty12[3]   = cmapd[pos4*4+3];
3025
3026         /* Switch to degrees */
3027         dx    = 360.0 / cmap_grid->grid_spacing;
3028         xphi1 = xphi1 * RAD2DEG;
3029         xphi2 = xphi2 * RAD2DEG;
3030
3031         for (i = 0; i < 4; i++) /* 16 */
3032         {
3033             tx[i]    = ty[i];
3034             tx[i+4]  = ty1[i]*dx;
3035             tx[i+8]  = ty2[i]*dx;
3036             tx[i+12] = ty12[i]*dx*dx;
3037         }
3038
3039         idx = 0;
3040         for (i = 0; i < 4; i++) /* 1056 */
3041         {
3042             for (j = 0; j < 4; j++)
3043             {
3044                 xx = 0;
3045                 for (k = 0; k < 16; k++)
3046                 {
3047                     xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
3048                 }
3049
3050                 idx++;
3051                 tc[i*4+j] = xx;
3052             }
3053         }
3054
3055         tt    = (xphi1-iphi1*dx)/dx;
3056         tu    = (xphi2-iphi2*dx)/dx;
3057
3058         e     = 0;
3059         df1   = 0;
3060         df2   = 0;
3061         ddf1  = 0;
3062         ddf2  = 0;
3063         ddf12 = 0;
3064
3065         for (i = 3; i >= 0; i--)
3066         {
3067             l1 = loop_index[i][3];
3068             l2 = loop_index[i][2];
3069             l3 = loop_index[i][1];
3070
3071             e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
3072             df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
3073             df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
3074             ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
3075             ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
3076         }
3077
3078         ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
3079             3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
3080
3081         fac     = RAD2DEG/dx;
3082         df1     = df1   * fac;
3083         df2     = df2   * fac;
3084         ddf1    = ddf1  * fac * fac;
3085         ddf2    = ddf2  * fac * fac;
3086         ddf12   = ddf12 * fac * fac;
3087
3088         /* CMAP energy */
3089         vtot += e;
3090
3091         /* Do forces - first torsion */
3092         fg1       = iprod(r1_ij, r1_kj);
3093         hg1       = iprod(r1_kl, r1_kj);
3094         fga1      = fg1*ra2r1*rgr1;
3095         hgb1      = hg1*rb2r1*rgr1;
3096         gaa1      = -ra2r1*rg1;
3097         gbb1      = rb2r1*rg1;
3098
3099         for (i = 0; i < DIM; i++)
3100         {
3101             dtf1[i]   = gaa1 * a1[i];
3102             dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
3103             dth1[i]   = gbb1 * b1[i];
3104
3105             f1[i]     = df1  * dtf1[i];
3106             g1[i]     = df1  * dtg1[i];
3107             h1[i]     = df1  * dth1[i];
3108
3109             f1_i[i]   =  f1[i];
3110             f1_j[i]   = -f1[i] - g1[i];
3111             f1_k[i]   =  h1[i] + g1[i];
3112             f1_l[i]   = -h1[i];
3113
3114             f[a1i][i] = f[a1i][i] + f1_i[i];
3115             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3116             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3117             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3118         }
3119
3120         /* Do forces - second torsion */
3121         fg2       = iprod(r2_ij, r2_kj);
3122         hg2       = iprod(r2_kl, r2_kj);
3123         fga2      = fg2*ra2r2*rgr2;
3124         hgb2      = hg2*rb2r2*rgr2;
3125         gaa2      = -ra2r2*rg2;
3126         gbb2      = rb2r2*rg2;
3127
3128         for (i = 0; i < DIM; i++)
3129         {
3130             dtf2[i]   = gaa2 * a2[i];
3131             dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
3132             dth2[i]   = gbb2 * b2[i];
3133
3134             f2[i]     = df2  * dtf2[i];
3135             g2[i]     = df2  * dtg2[i];
3136             h2[i]     = df2  * dth2[i];
3137
3138             f2_i[i]   =  f2[i];
3139             f2_j[i]   = -f2[i] - g2[i];
3140             f2_k[i]   =  h2[i] + g2[i];
3141             f2_l[i]   = -h2[i];
3142
3143             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3144             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3145             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3146             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3147         }
3148
3149         /* Shift forces */
3150         if (g)
3151         {
3152             copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3153             ivec_sub(SHIFT_IVEC(g, a1i),  jt1, dt1_ij);
3154             ivec_sub(SHIFT_IVEC(g, a1k),  jt1, dt1_kj);
3155             ivec_sub(SHIFT_IVEC(g, a1l),  jt1, dt1_lj);
3156             t11 = IVEC2IS(dt1_ij);
3157             t21 = IVEC2IS(dt1_kj);
3158             t31 = IVEC2IS(dt1_lj);
3159
3160             copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3161             ivec_sub(SHIFT_IVEC(g, a2i),  jt2, dt2_ij);
3162             ivec_sub(SHIFT_IVEC(g, a2k),  jt2, dt2_kj);
3163             ivec_sub(SHIFT_IVEC(g, a2l),  jt2, dt2_lj);
3164             t12 = IVEC2IS(dt2_ij);
3165             t22 = IVEC2IS(dt2_kj);
3166             t32 = IVEC2IS(dt2_lj);
3167         }
3168         else if (pbc)
3169         {
3170             t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3171             t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3172         }
3173         else
3174         {
3175             t31 = CENTRAL;
3176             t32 = CENTRAL;
3177         }
3178
3179         rvec_inc(fshift[t11], f1_i);
3180         rvec_inc(fshift[CENTRAL], f1_j);
3181         rvec_inc(fshift[t21], f1_k);
3182         rvec_inc(fshift[t31], f1_l);
3183
3184         rvec_inc(fshift[t21], f2_i);
3185         rvec_inc(fshift[CENTRAL], f2_j);
3186         rvec_inc(fshift[t22], f2_k);
3187         rvec_inc(fshift[t32], f2_l);
3188     }
3189     return vtot;
3190 }
3191
3192
3193
3194 /***********************************************************
3195  *
3196  *   G R O M O S  9 6   F U N C T I O N S
3197  *
3198  ***********************************************************/
3199 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3200                  real *V, real *F)
3201 {
3202     const real half = 0.5;
3203     real       L1, kk, x0, dx, dx2;
3204     real       v, f, dvdlambda;
3205
3206     L1    = 1.0-lambda;
3207     kk    = L1*kA+lambda*kB;
3208     x0    = L1*xA+lambda*xB;
3209
3210     dx    = x-x0;
3211     dx2   = dx*dx;
3212
3213     f          = -kk*dx;
3214     v          = half*kk*dx2;
3215     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3216
3217     *F    = f;
3218     *V    = v;
3219
3220     return dvdlambda;
3221
3222     /* That was 21 flops */
3223 }
3224
3225 real g96bonds(int nbonds,
3226               const t_iatom forceatoms[], const t_iparams forceparams[],
3227               const rvec x[], rvec f[], rvec fshift[],
3228               const t_pbc *pbc, const t_graph *g,
3229               real lambda, real *dvdlambda,
3230               const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3231               int gmx_unused *global_atom_index)
3232 {
3233     int  i, m, ki, ai, aj, type;
3234     real dr2, fbond, vbond, fij, vtot;
3235     rvec dx;
3236     ivec dt;
3237
3238     vtot = 0.0;
3239     for (i = 0; (i < nbonds); )
3240     {
3241         type = forceatoms[i++];
3242         ai   = forceatoms[i++];
3243         aj   = forceatoms[i++];
3244
3245         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3246         dr2  = iprod(dx, dx);                       /*   5              */
3247
3248         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3249                                   forceparams[type].harmonic.krB,
3250                                   forceparams[type].harmonic.rA,
3251                                   forceparams[type].harmonic.rB,
3252                                   dr2, lambda, &vbond, &fbond);
3253
3254         vtot  += 0.5*vbond;                         /* 1*/
3255 #ifdef DEBUG
3256         if (debug)
3257         {
3258             fprintf(debug, "G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3259                     sqrt(dr2), vbond, fbond);
3260         }
3261 #endif
3262
3263         if (g)
3264         {
3265             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3266             ki = IVEC2IS(dt);
3267         }
3268         for (m = 0; (m < DIM); m++)     /*  15          */
3269         {
3270             fij                 = fbond*dx[m];
3271             f[ai][m]           += fij;
3272             f[aj][m]           -= fij;
3273             fshift[ki][m]      += fij;
3274             fshift[CENTRAL][m] -= fij;
3275         }
3276     }               /* 44 TOTAL */
3277     return vtot;
3278 }
3279
3280 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3281                    rvec r_ij, rvec r_kj,
3282                    int *t1, int *t2)
3283 /* Return value is the angle between the bonds i-j and j-k */
3284 {
3285     real costh;
3286
3287     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3288     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3289
3290     costh = cos_angle(r_ij, r_kj);         /* 25                */
3291     /* 41 TOTAL */
3292     return costh;
3293 }
3294
3295 real g96angles(int nbonds,
3296                const t_iatom forceatoms[], const t_iparams forceparams[],
3297                const rvec x[], rvec f[], rvec fshift[],
3298                const t_pbc *pbc, const t_graph *g,
3299                real lambda, real *dvdlambda,
3300                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3301                int gmx_unused *global_atom_index)
3302 {
3303     int  i, ai, aj, ak, type, m, t1, t2;
3304     rvec r_ij, r_kj;
3305     real cos_theta, dVdt, va, vtot;
3306     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3307     rvec f_i, f_j, f_k;
3308     ivec jt, dt_ij, dt_kj;
3309
3310     vtot = 0.0;
3311     for (i = 0; (i < nbonds); )
3312     {
3313         type = forceatoms[i++];
3314         ai   = forceatoms[i++];
3315         aj   = forceatoms[i++];
3316         ak   = forceatoms[i++];
3317
3318         cos_theta  = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3319
3320         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3321                                   forceparams[type].harmonic.krB,
3322                                   forceparams[type].harmonic.rA,
3323                                   forceparams[type].harmonic.rB,
3324                                   cos_theta, lambda, &va, &dVdt);
3325         vtot    += va;
3326
3327         rij_1    = gmx_invsqrt(iprod(r_ij, r_ij));
3328         rkj_1    = gmx_invsqrt(iprod(r_kj, r_kj));
3329         rij_2    = rij_1*rij_1;
3330         rkj_2    = rkj_1*rkj_1;
3331         rijrkj_1 = rij_1*rkj_1;                 /* 23 */
3332
3333 #ifdef DEBUG
3334         if (debug)
3335         {
3336             fprintf(debug, "G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
3337                     cos_theta, va, dVdt);
3338         }
3339 #endif
3340         for (m = 0; (m < DIM); m++)     /*  42  */
3341         {
3342             f_i[m]    = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3343             f_k[m]    = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3344             f_j[m]    = -f_i[m]-f_k[m];
3345             f[ai][m] += f_i[m];
3346             f[aj][m] += f_j[m];
3347             f[ak][m] += f_k[m];
3348         }
3349
3350         if (g)
3351         {
3352             copy_ivec(SHIFT_IVEC(g, aj), jt);
3353
3354             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3355             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3356             t1 = IVEC2IS(dt_ij);
3357             t2 = IVEC2IS(dt_kj);
3358         }
3359         rvec_inc(fshift[t1], f_i);
3360         rvec_inc(fshift[CENTRAL], f_j);
3361         rvec_inc(fshift[t2], f_k);          /* 9 */
3362         /* 163 TOTAL    */
3363     }
3364     return vtot;
3365 }
3366
3367 real cross_bond_bond(int nbonds,
3368                      const t_iatom forceatoms[], const t_iparams forceparams[],
3369                      const rvec x[], rvec f[], rvec fshift[],
3370                      const t_pbc *pbc, const t_graph *g,
3371                      real gmx_unused lambda, real gmx_unused *dvdlambda,
3372                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused  *fcd,
3373                      int gmx_unused *global_atom_index)
3374 {
3375     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3376      * pp. 842-847
3377      */
3378     int  i, ai, aj, ak, type, m, t1, t2;
3379     rvec r_ij, r_kj;
3380     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3381     rvec f_i, f_j, f_k;
3382     ivec jt, dt_ij, dt_kj;
3383
3384     vtot = 0.0;
3385     for (i = 0; (i < nbonds); )
3386     {
3387         type = forceatoms[i++];
3388         ai   = forceatoms[i++];
3389         aj   = forceatoms[i++];
3390         ak   = forceatoms[i++];
3391         r1e  = forceparams[type].cross_bb.r1e;
3392         r2e  = forceparams[type].cross_bb.r2e;
3393         krr  = forceparams[type].cross_bb.krr;
3394
3395         /* Compute distance vectors ... */
3396         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3397         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3398
3399         /* ... and their lengths */
3400         r1 = norm(r_ij);
3401         r2 = norm(r_kj);
3402
3403         /* Deviations from ideality */
3404         s1 = r1-r1e;
3405         s2 = r2-r2e;
3406
3407         /* Energy (can be negative!) */
3408         vrr   = krr*s1*s2;
3409         vtot += vrr;
3410
3411         /* Forces */
3412         svmul(-krr*s2/r1, r_ij, f_i);
3413         svmul(-krr*s1/r2, r_kj, f_k);
3414
3415         for (m = 0; (m < DIM); m++)     /*  12  */
3416         {
3417             f_j[m]    = -f_i[m] - f_k[m];
3418             f[ai][m] += f_i[m];
3419             f[aj][m] += f_j[m];
3420             f[ak][m] += f_k[m];
3421         }
3422
3423         /* Virial stuff */
3424         if (g)
3425         {
3426             copy_ivec(SHIFT_IVEC(g, aj), jt);
3427
3428             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3429             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3430             t1 = IVEC2IS(dt_ij);
3431             t2 = IVEC2IS(dt_kj);
3432         }
3433         rvec_inc(fshift[t1], f_i);
3434         rvec_inc(fshift[CENTRAL], f_j);
3435         rvec_inc(fshift[t2], f_k);          /* 9 */
3436         /* 163 TOTAL    */
3437     }
3438     return vtot;
3439 }
3440
3441 real cross_bond_angle(int nbonds,
3442                       const t_iatom forceatoms[], const t_iparams forceparams[],
3443                       const rvec x[], rvec f[], rvec fshift[],
3444                       const t_pbc *pbc, const t_graph *g,
3445                       real gmx_unused lambda, real gmx_unused *dvdlambda,
3446                       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3447                       int gmx_unused *global_atom_index)
3448 {
3449     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3450      * pp. 842-847
3451      */
3452     int  i, ai, aj, ak, type, m, t1, t2, t3;
3453     rvec r_ij, r_kj, r_ik;
3454     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3455     rvec f_i, f_j, f_k;
3456     ivec jt, dt_ij, dt_kj;
3457
3458     vtot = 0.0;
3459     for (i = 0; (i < nbonds); )
3460     {
3461         type = forceatoms[i++];
3462         ai   = forceatoms[i++];
3463         aj   = forceatoms[i++];
3464         ak   = forceatoms[i++];
3465         r1e  = forceparams[type].cross_ba.r1e;
3466         r2e  = forceparams[type].cross_ba.r2e;
3467         r3e  = forceparams[type].cross_ba.r3e;
3468         krt  = forceparams[type].cross_ba.krt;
3469
3470         /* Compute distance vectors ... */
3471         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3472         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3473         t3 = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3474
3475         /* ... and their lengths */
3476         r1 = norm(r_ij);
3477         r2 = norm(r_kj);
3478         r3 = norm(r_ik);
3479
3480         /* Deviations from ideality */
3481         s1 = r1-r1e;
3482         s2 = r2-r2e;
3483         s3 = r3-r3e;
3484
3485         /* Energy (can be negative!) */
3486         vrt   = krt*s3*(s1+s2);
3487         vtot += vrt;
3488
3489         /* Forces */
3490         k1 = -krt*(s3/r1);
3491         k2 = -krt*(s3/r2);
3492         k3 = -krt*(s1+s2)/r3;
3493         for (m = 0; (m < DIM); m++)
3494         {
3495             f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3496             f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3497             f_j[m] = -f_i[m] - f_k[m];
3498         }
3499
3500         for (m = 0; (m < DIM); m++)     /*  12  */
3501         {
3502             f[ai][m] += f_i[m];
3503             f[aj][m] += f_j[m];
3504             f[ak][m] += f_k[m];
3505         }
3506
3507         /* Virial stuff */
3508         if (g)
3509         {
3510             copy_ivec(SHIFT_IVEC(g, aj), jt);
3511
3512             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3513             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3514             t1 = IVEC2IS(dt_ij);
3515             t2 = IVEC2IS(dt_kj);
3516         }
3517         rvec_inc(fshift[t1], f_i);
3518         rvec_inc(fshift[CENTRAL], f_j);
3519         rvec_inc(fshift[t2], f_k);          /* 9 */
3520         /* 163 TOTAL    */
3521     }
3522     return vtot;
3523 }
3524
3525 static real bonded_tab(const char *type, int table_nr,
3526                        const bondedtable_t *table, real kA, real kB, real r,
3527                        real lambda, real *V, real *F)
3528 {
3529     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3530     int  n0, nnn;
3531     real v, f, dvdlambda;
3532
3533     k = (1.0 - lambda)*kA + lambda*kB;
3534
3535     tabscale = table->scale;
3536     VFtab    = table->data;
3537
3538     rt    = r*tabscale;
3539     n0    = rt;
3540     if (n0 >= table->n)
3541     {
3542         gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3543                   type, table_nr, r, n0, n0+1, table->n);
3544     }
3545     eps   = rt - n0;
3546     eps2  = eps*eps;
3547     nnn   = 4*n0;
3548     Yt    = VFtab[nnn];
3549     Ft    = VFtab[nnn+1];
3550     Geps  = VFtab[nnn+2]*eps;
3551     Heps2 = VFtab[nnn+3]*eps2;
3552     Fp    = Ft + Geps + Heps2;
3553     VV    = Yt + Fp*eps;
3554     FF    = Fp + Geps + 2.0*Heps2;
3555
3556     *F         = -k*FF*tabscale;
3557     *V         = k*VV;
3558     dvdlambda  = (kB - kA)*VV;
3559
3560     return dvdlambda;
3561
3562     /* That was 22 flops */
3563 }
3564
3565 real tab_bonds(int nbonds,
3566                const t_iatom forceatoms[], const t_iparams forceparams[],
3567                const rvec x[], rvec f[], rvec fshift[],
3568                const t_pbc *pbc, const t_graph *g,
3569                real lambda, real *dvdlambda,
3570                const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3571                int gmx_unused  *global_atom_index)
3572 {
3573     int  i, m, ki, ai, aj, type, table;
3574     real dr, dr2, fbond, vbond, fij, vtot;
3575     rvec dx;
3576     ivec dt;
3577
3578     vtot = 0.0;
3579     for (i = 0; (i < nbonds); )
3580     {
3581         type = forceatoms[i++];
3582         ai   = forceatoms[i++];
3583         aj   = forceatoms[i++];
3584
3585         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3586         dr2  = iprod(dx, dx);                       /*   5              */
3587         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
3588
3589         table = forceparams[type].tab.table;
3590
3591         *dvdlambda += bonded_tab("bond", table,
3592                                  &fcd->bondtab[table],
3593                                  forceparams[type].tab.kA,
3594                                  forceparams[type].tab.kB,
3595                                  dr, lambda, &vbond, &fbond); /*  22 */
3596
3597         if (dr2 == 0.0)
3598         {
3599             continue;
3600         }
3601
3602
3603         vtot  += vbond;            /* 1*/
3604         fbond *= gmx_invsqrt(dr2); /*   6               */
3605 #ifdef DEBUG
3606         if (debug)
3607         {
3608             fprintf(debug, "TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3609                     dr, vbond, fbond);
3610         }
3611 #endif
3612         if (g)
3613         {
3614             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3615             ki = IVEC2IS(dt);
3616         }
3617         for (m = 0; (m < DIM); m++)     /*  15          */
3618         {
3619             fij                 = fbond*dx[m];
3620             f[ai][m]           += fij;
3621             f[aj][m]           -= fij;
3622             fshift[ki][m]      += fij;
3623             fshift[CENTRAL][m] -= fij;
3624         }
3625     }               /* 62 TOTAL */
3626     return vtot;
3627 }
3628
3629 real tab_angles(int nbonds,
3630                 const t_iatom forceatoms[], const t_iparams forceparams[],
3631                 const rvec x[], rvec f[], rvec fshift[],
3632                 const t_pbc *pbc, const t_graph *g,
3633                 real lambda, real *dvdlambda,
3634                 const t_mdatoms gmx_unused  *md, t_fcdata *fcd,
3635                 int gmx_unused *global_atom_index)
3636 {
3637     int  i, ai, aj, ak, t1, t2, type, table;
3638     rvec r_ij, r_kj;
3639     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3640     ivec jt, dt_ij, dt_kj;
3641
3642     vtot = 0.0;
3643     for (i = 0; (i < nbonds); )
3644     {
3645         type = forceatoms[i++];
3646         ai   = forceatoms[i++];
3647         aj   = forceatoms[i++];
3648         ak   = forceatoms[i++];
3649
3650         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
3651                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
3652
3653         table = forceparams[type].tab.table;
3654
3655         *dvdlambda += bonded_tab("angle", table,
3656                                  &fcd->angletab[table],
3657                                  forceparams[type].tab.kA,
3658                                  forceparams[type].tab.kB,
3659                                  theta, lambda, &va, &dVdt); /*  22  */
3660         vtot += va;
3661
3662         cos_theta2 = sqr(cos_theta);            /*   1          */
3663         if (cos_theta2 < 1)
3664         {
3665             int  m;
3666             real snt, st, sth;
3667             real cik, cii, ckk;
3668             real nrkj2, nrij2;
3669             rvec f_i, f_j, f_k;
3670
3671             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
3672             sth = st*cos_theta;                     /*   1              */
3673 #ifdef DEBUG
3674             if (debug)
3675             {
3676                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
3677                         theta*RAD2DEG, va, dVdt);
3678             }
3679 #endif
3680             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
3681             nrij2 = iprod(r_ij, r_ij);
3682
3683             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
3684             cii = sth/nrij2;                   /*  10           */
3685             ckk = sth/nrkj2;                   /*  10           */
3686
3687             for (m = 0; (m < DIM); m++)        /*  39           */
3688             {
3689                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
3690                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
3691                 f_j[m]    = -f_i[m]-f_k[m];
3692                 f[ai][m] += f_i[m];
3693                 f[aj][m] += f_j[m];
3694                 f[ak][m] += f_k[m];
3695             }
3696             if (g)
3697             {
3698                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3699
3700                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3701                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3702                 t1 = IVEC2IS(dt_ij);
3703                 t2 = IVEC2IS(dt_kj);
3704             }
3705             rvec_inc(fshift[t1], f_i);
3706             rvec_inc(fshift[CENTRAL], f_j);
3707             rvec_inc(fshift[t2], f_k);
3708         }                                       /* 169 TOTAL    */
3709     }
3710     return vtot;
3711 }
3712
3713 real tab_dihs(int nbonds,
3714               const t_iatom forceatoms[], const t_iparams forceparams[],
3715               const rvec x[], rvec f[], rvec fshift[],
3716               const t_pbc *pbc, const t_graph *g,
3717               real lambda, real *dvdlambda,
3718               const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3719               int gmx_unused *global_atom_index)
3720 {
3721     int  i, type, ai, aj, ak, al, table;
3722     int  t1, t2, t3;
3723     rvec r_ij, r_kj, r_kl, m, n;
3724     real phi, sign, ddphi, vpd, vtot;
3725
3726     vtot = 0.0;
3727     for (i = 0; (i < nbonds); )
3728     {
3729         type = forceatoms[i++];
3730         ai   = forceatoms[i++];
3731         aj   = forceatoms[i++];
3732         ak   = forceatoms[i++];
3733         al   = forceatoms[i++];
3734
3735         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3736                         &sign, &t1, &t2, &t3);  /*  84  */
3737
3738         table = forceparams[type].tab.table;
3739
3740         /* Hopefully phi+M_PI never results in values < 0 */
3741         *dvdlambda += bonded_tab("dihedral", table,
3742                                  &fcd->dihtab[table],
3743                                  forceparams[type].tab.kA,
3744                                  forceparams[type].tab.kB,
3745                                  phi+M_PI, lambda, &vpd, &ddphi);
3746
3747         vtot += vpd;
3748         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3749                    f, fshift, pbc, g, x, t1, t2, t3); /* 112    */
3750
3751 #ifdef DEBUG
3752         fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
3753                 ai, aj, ak, al, phi);
3754 #endif
3755     } /* 227 TOTAL  */
3756
3757     return vtot;
3758 }
3759
3760 static unsigned
3761 calc_bonded_reduction_mask(const t_idef *idef,
3762                            int shift,
3763                            int t, int nt)
3764 {
3765     unsigned mask;
3766     int      ftype, nb, nat1, nb0, nb1, i, a;
3767
3768     mask = 0;
3769
3770     for (ftype = 0; ftype < F_NRE; ftype++)
3771     {
3772         if (interaction_function[ftype].flags & IF_BOND &&
3773             !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
3774             (ftype<F_GB12 || ftype>F_GB14))
3775         {
3776             nb = idef->il[ftype].nr;
3777             if (nb > 0)
3778             {
3779                 nat1 = interaction_function[ftype].nratoms + 1;
3780
3781                 /* Divide this interaction equally over the threads.
3782                  * This is not stored: should match division in calc_bonds.
3783                  */
3784                 nb0 = (((nb/nat1)* t   )/nt)*nat1;
3785                 nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
3786
3787                 for (i = nb0; i < nb1; i += nat1)
3788                 {
3789                     for (a = 1; a < nat1; a++)
3790                     {
3791                         mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
3792                     }
3793                 }
3794             }
3795         }
3796     }
3797
3798     return mask;
3799 }
3800
3801 void init_bonded_thread_force_reduction(t_forcerec   *fr,
3802                                         const t_idef *idef)
3803 {
3804 #define MAX_BLOCK_BITS 32
3805     int t;
3806     int ctot, c, b;
3807
3808     if (fr->nthreads <= 1)
3809     {
3810         fr->red_nblock = 0;
3811
3812         return;
3813     }
3814
3815     /* We divide the force array in a maximum of 32 blocks.
3816      * Minimum force block reduction size is 2^6=64.
3817      */
3818     fr->red_ashift = 6;
3819     while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
3820     {
3821         fr->red_ashift++;
3822     }
3823     if (debug)
3824     {
3825         fprintf(debug, "bonded force buffer block atom shift %d bits\n",
3826                 fr->red_ashift);
3827     }
3828
3829     /* Determine to which blocks each thread's bonded force calculation
3830      * contributes. Store this is a mask for each thread.
3831      */
3832 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3833     for (t = 1; t < fr->nthreads; t++)
3834     {
3835         fr->f_t[t].red_mask =
3836             calc_bonded_reduction_mask(idef, fr->red_ashift, t, fr->nthreads);
3837     }
3838
3839     /* Determine the maximum number of blocks we need to reduce over */
3840     fr->red_nblock = 0;
3841     ctot           = 0;
3842     for (t = 0; t < fr->nthreads; t++)
3843     {
3844         c = 0;
3845         for (b = 0; b < MAX_BLOCK_BITS; b++)
3846         {
3847             if (fr->f_t[t].red_mask & (1U<<b))
3848             {
3849                 fr->red_nblock = max(fr->red_nblock, b+1);
3850                 c++;
3851             }
3852         }
3853         if (debug)
3854         {
3855             fprintf(debug, "thread %d flags %x count %d\n",
3856                     t, fr->f_t[t].red_mask, c);
3857         }
3858         ctot += c;
3859     }
3860     if (debug)
3861     {
3862         fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
3863                 fr->red_nblock, 1<<fr->red_ashift);
3864         fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
3865                 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
3866                 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
3867     }
3868 }
3869
3870 static void zero_thread_forces(f_thread_t *f_t, int n,
3871                                int nblock, int blocksize)
3872 {
3873     int b, a0, a1, a, i, j;
3874
3875     if (n > f_t->f_nalloc)
3876     {
3877         f_t->f_nalloc = over_alloc_large(n);
3878         srenew(f_t->f, f_t->f_nalloc);
3879     }
3880
3881     if (f_t->red_mask != 0)
3882     {
3883         for (b = 0; b < nblock; b++)
3884         {
3885             if (f_t->red_mask && (1U<<b))
3886             {
3887                 a0 = b*blocksize;
3888                 a1 = min((b+1)*blocksize, n);
3889                 for (a = a0; a < a1; a++)
3890                 {
3891                     clear_rvec(f_t->f[a]);
3892                 }
3893             }
3894         }
3895     }
3896     for (i = 0; i < SHIFTS; i++)
3897     {
3898         clear_rvec(f_t->fshift[i]);
3899     }
3900     for (i = 0; i < F_NRE; i++)
3901     {
3902         f_t->ener[i] = 0;
3903     }
3904     for (i = 0; i < egNR; i++)
3905     {
3906         for (j = 0; j < f_t->grpp.nener; j++)
3907         {
3908             f_t->grpp.ener[i][j] = 0;
3909         }
3910     }
3911     for (i = 0; i < efptNR; i++)
3912     {
3913         f_t->dvdl[i] = 0;
3914     }
3915 }
3916
3917 static void reduce_thread_force_buffer(int n, rvec *f,
3918                                        int nthreads, f_thread_t *f_t,
3919                                        int nblock, int block_size)
3920 {
3921     /* The max thread number is arbitrary,
3922      * we used a fixed number to avoid memory management.
3923      * Using more than 16 threads is probably never useful performance wise.
3924      */
3925 #define MAX_BONDED_THREADS 256
3926     int b;
3927
3928     if (nthreads > MAX_BONDED_THREADS)
3929     {
3930         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
3931                   MAX_BONDED_THREADS);
3932     }
3933
3934     /* This reduction can run on any number of threads,
3935      * independently of nthreads.
3936      */
3937 #pragma omp parallel for num_threads(nthreads) schedule(static)
3938     for (b = 0; b < nblock; b++)
3939     {
3940         rvec *fp[MAX_BONDED_THREADS];
3941         int   nfb, ft, fb;
3942         int   a0, a1, a;
3943
3944         /* Determine which threads contribute to this block */
3945         nfb = 0;
3946         for (ft = 1; ft < nthreads; ft++)
3947         {
3948             if (f_t[ft].red_mask & (1U<<b))
3949             {
3950                 fp[nfb++] = f_t[ft].f;
3951             }
3952         }
3953         if (nfb > 0)
3954         {
3955             /* Reduce force buffers for threads that contribute */
3956             a0 =  b   *block_size;
3957             a1 = (b+1)*block_size;
3958             a1 = min(a1, n);
3959             for (a = a0; a < a1; a++)
3960             {
3961                 for (fb = 0; fb < nfb; fb++)
3962                 {
3963                     rvec_inc(f[a], fp[fb][a]);
3964                 }
3965             }
3966         }
3967     }
3968 }
3969
3970 static void reduce_thread_forces(int n, rvec *f, rvec *fshift,
3971                                  real *ener, gmx_grppairener_t *grpp, real *dvdl,
3972                                  int nthreads, f_thread_t *f_t,
3973                                  int nblock, int block_size,
3974                                  gmx_bool bCalcEnerVir,
3975                                  gmx_bool bDHDL)
3976 {
3977     if (nblock > 0)
3978     {
3979         /* Reduce the bonded force buffer */
3980         reduce_thread_force_buffer(n, f, nthreads, f_t, nblock, block_size);
3981     }
3982
3983     /* When necessary, reduce energy and virial using one thread only */
3984     if (bCalcEnerVir)
3985     {
3986         int t, i, j;
3987
3988         for (i = 0; i < SHIFTS; i++)
3989         {
3990             for (t = 1; t < nthreads; t++)
3991             {
3992                 rvec_inc(fshift[i], f_t[t].fshift[i]);
3993             }
3994         }
3995         for (i = 0; i < F_NRE; i++)
3996         {
3997             for (t = 1; t < nthreads; t++)
3998             {
3999                 ener[i] += f_t[t].ener[i];
4000             }
4001         }
4002         for (i = 0; i < egNR; i++)
4003         {
4004             for (j = 0; j < f_t[1].grpp.nener; j++)
4005             {
4006                 for (t = 1; t < nthreads; t++)
4007                 {
4008
4009                     grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
4010                 }
4011             }
4012         }
4013         if (bDHDL)
4014         {
4015             for (i = 0; i < efptNR; i++)
4016             {
4017
4018                 for (t = 1; t < nthreads; t++)
4019                 {
4020                     dvdl[i] += f_t[t].dvdl[i];
4021                 }
4022             }
4023         }
4024     }
4025 }
4026
4027 static real calc_one_bond(FILE *fplog, int thread,
4028                           int ftype, const t_idef *idef,
4029                           rvec x[], rvec f[], rvec fshift[],
4030                           t_forcerec *fr,
4031                           const t_pbc *pbc, const t_graph *g,
4032                           gmx_enerdata_t gmx_unused *enerd, gmx_grppairener_t *grpp,
4033                           t_nrnb *nrnb,
4034                           real *lambda, real *dvdl,
4035                           const t_mdatoms *md, t_fcdata *fcd,
4036                           gmx_bool bCalcEnerVir,
4037                           int *global_atom_index, gmx_bool bPrintSepPot)
4038 {
4039     int      ind, nat1, nbonds, efptFTYPE;
4040     real     v = 0;
4041     t_iatom *iatoms;
4042     int      nb0, nbn;
4043
4044     if (IS_RESTRAINT_TYPE(ftype))
4045     {
4046         efptFTYPE = efptRESTRAINT;
4047     }
4048     else
4049     {
4050         efptFTYPE = efptBONDED;
4051     }
4052
4053     if (interaction_function[ftype].flags & IF_BOND &&
4054         !(ftype == F_CONNBONDS || ftype == F_POSRES))
4055     {
4056         ind       = interaction_function[ftype].nrnb_ind;
4057         nat1      = interaction_function[ftype].nratoms + 1;
4058         nbonds    = idef->il[ftype].nr/nat1;
4059         iatoms    = idef->il[ftype].iatoms;
4060
4061         nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
4062         nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
4063
4064         if (!IS_LISTED_LJ_C(ftype))
4065         {
4066             if (ftype == F_CMAP)
4067             {
4068                 v = cmap_dihs(nbn, iatoms+nb0,
4069                               idef->iparams, &idef->cmap_grid,
4070                               (const rvec*)x, f, fshift,
4071                               pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
4072                               md, fcd, global_atom_index);
4073             }
4074 #ifdef SIMD_BONDEDS
4075             else if (ftype == F_ANGLES &&
4076                      !bCalcEnerVir && fr->efep == efepNO)
4077             {
4078                 /* No energies, shift forces, dvdl */
4079                 angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
4080                                    idef->iparams,
4081                                    (const rvec*)x, f,
4082                                    pbc, g, lambda[efptFTYPE], md, fcd,
4083                                    global_atom_index);
4084                 v = 0;
4085             }
4086 #endif
4087             else if (ftype == F_PDIHS &&
4088                      !bCalcEnerVir && fr->efep == efepNO)
4089             {
4090                 /* No energies, shift forces, dvdl */
4091 #ifndef SIMD_BONDEDS
4092                 pdihs_noener
4093 #else
4094                 pdihs_noener_simd
4095 #endif
4096                     (nbn, idef->il[ftype].iatoms+nb0,
4097                     idef->iparams,
4098                     (const rvec*)x, f,
4099                     pbc, g, lambda[efptFTYPE], md, fcd,
4100                     global_atom_index);
4101                 v = 0;
4102             }
4103             else
4104             {
4105                 v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
4106                                                       idef->iparams,
4107                                                       (const rvec*)x, f, fshift,
4108                                                       pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
4109                                                       md, fcd, global_atom_index);
4110             }
4111             if (bPrintSepPot)
4112             {
4113                 fprintf(fplog, "  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
4114                         interaction_function[ftype].longname,
4115                         nbonds/nat1, v, lambda[efptFTYPE]);
4116             }
4117         }
4118         else
4119         {
4120             v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
4121                                     pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
4122
4123             if (bPrintSepPot)
4124             {
4125                 fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
4126                         interaction_function[ftype].longname,
4127                         interaction_function[F_LJ14].longname, nbonds/nat1, dvdl[efptVDW]);
4128                 fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
4129                         interaction_function[ftype].longname,
4130                         interaction_function[F_COUL14].longname, nbonds/nat1, dvdl[efptCOUL]);
4131             }
4132         }
4133         if (ind != -1 && thread == 0)
4134         {
4135             inc_nrnb(nrnb, ind, nbonds);
4136         }
4137     }
4138
4139     return v;
4140 }
4141
4142 /* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
4143    function, or horrible things will happen when doing free energy
4144    calculations!  In a good coding world, this would not be a
4145    different function, but for speed reasons, it needs to be made a
4146    separate function.  TODO for 5.0 - figure out a way to reorganize
4147    to reduce duplication.
4148  */
4149
4150 static real calc_one_bond_foreign(FILE gmx_unused *fplog, int ftype, const t_idef *idef,
4151                                   rvec x[], rvec f[], t_forcerec *fr,
4152                                   const t_pbc *pbc, const t_graph *g,
4153                                   gmx_grppairener_t *grpp, t_nrnb *nrnb,
4154                                   real *lambda, real *dvdl,
4155                                   const t_mdatoms *md, t_fcdata *fcd,
4156                                   int *global_atom_index, gmx_bool gmx_unused bPrintSepPot)
4157 {
4158     int      ind, nat1, nbonds, efptFTYPE, nbonds_np;
4159     real     v = 0;
4160     t_iatom *iatoms;
4161
4162     if (IS_RESTRAINT_TYPE(ftype))
4163     {
4164         efptFTYPE = efptRESTRAINT;
4165     }
4166     else
4167     {
4168         efptFTYPE = efptBONDED;
4169     }
4170
4171     if (ftype < F_GB12 || ftype > F_GB14)
4172     {
4173         if (interaction_function[ftype].flags & IF_BOND &&
4174             !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
4175         {
4176             ind       = interaction_function[ftype].nrnb_ind;
4177             nat1      = interaction_function[ftype].nratoms+1;
4178             nbonds_np = idef->il[ftype].nr_nonperturbed;
4179             nbonds    = idef->il[ftype].nr - nbonds_np;
4180             iatoms    = idef->il[ftype].iatoms + nbonds_np;
4181             if (nbonds > 0)
4182             {
4183                 if (!IS_LISTED_LJ_C(ftype))
4184                 {
4185                     if (ftype == F_CMAP)
4186                     {
4187                         v = cmap_dihs(nbonds, iatoms,
4188                                       idef->iparams, &idef->cmap_grid,
4189                                       (const rvec*)x, f, fr->fshift,
4190                                       pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]), md, fcd,
4191                                       global_atom_index);
4192                     }
4193                     else
4194                     {
4195                         v =     interaction_function[ftype].ifunc(nbonds, iatoms,
4196                                                                   idef->iparams,
4197                                                                   (const rvec*)x, f, fr->fshift,
4198                                                                   pbc, g, lambda[efptFTYPE], &dvdl[efptFTYPE],
4199                                                                   md, fcd, global_atom_index);
4200                     }
4201                 }
4202                 else
4203                 {
4204                     v = do_nonbonded_listed(ftype, nbonds, iatoms,
4205                                             idef->iparams,
4206                                             (const rvec*)x, f, fr->fshift,
4207                                             pbc, g, lambda, dvdl,
4208                                             md, fr, grpp, global_atom_index);
4209                 }
4210                 if (ind != -1)
4211                 {
4212                     inc_nrnb(nrnb, ind, nbonds/nat1);
4213                 }
4214             }
4215         }
4216     }
4217     return v;
4218 }
4219
4220 void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
4221                 const t_idef *idef,
4222                 rvec x[], history_t *hist,
4223                 rvec f[], t_forcerec *fr,
4224                 const t_pbc *pbc, const t_graph *g,
4225                 gmx_enerdata_t *enerd, t_nrnb *nrnb,
4226                 real *lambda,
4227                 const t_mdatoms *md,
4228                 t_fcdata *fcd, int *global_atom_index,
4229                 t_atomtypes gmx_unused *atype, gmx_genborn_t gmx_unused *born,
4230                 int force_flags,
4231                 gmx_bool bPrintSepPot, gmx_large_int_t step)
4232 {
4233     gmx_bool      bCalcEnerVir;
4234     int           i;
4235     real          v, dvdl[efptNR], dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
4236                                                         of lambda, which will be thrown away in the end*/
4237     const  t_pbc *pbc_null;
4238     char          buf[22];
4239     int           thread;
4240
4241     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
4242
4243     for (i = 0; i < efptNR; i++)
4244     {
4245         dvdl[i] = 0.0;
4246     }
4247     if (fr->bMolPBC)
4248     {
4249         pbc_null = pbc;
4250     }
4251     else
4252     {
4253         pbc_null = NULL;
4254     }
4255     if (bPrintSepPot)
4256     {
4257         fprintf(fplog, "Step %s: bonded V and dVdl for this node\n",
4258                 gmx_step_str(step, buf));
4259     }
4260
4261 #ifdef DEBUG
4262     if (g && debug)
4263     {
4264         p_graph(debug, "Bondage is fun", g);
4265     }
4266 #endif
4267
4268     /* Do pre force calculation stuff which might require communication */
4269     if (idef->il[F_ORIRES].nr)
4270     {
4271         enerd->term[F_ORIRESDEV] =
4272             calc_orires_dev(ms, idef->il[F_ORIRES].nr,
4273                             idef->il[F_ORIRES].iatoms,
4274                             idef->iparams, md, (const rvec*)x,
4275                             pbc_null, fcd, hist);
4276     }
4277     if (idef->il[F_DISRES].nr)
4278     {
4279         calc_disres_R_6(ms, idef->il[F_DISRES].nr,
4280                         idef->il[F_DISRES].iatoms,
4281                         idef->iparams, (const rvec*)x, pbc_null,
4282                         fcd, hist);
4283     }
4284
4285 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
4286     for (thread = 0; thread < fr->nthreads; thread++)
4287     {
4288         int                ftype, nbonds, ind, nat1;
4289         real              *epot, v;
4290         /* thread stuff */
4291         rvec              *ft, *fshift;
4292         real              *dvdlt;
4293         gmx_grppairener_t *grpp;
4294         int                nb0, nbn;
4295
4296         if (thread == 0)
4297         {
4298             ft     = f;
4299             fshift = fr->fshift;
4300             epot   = enerd->term;
4301             grpp   = &enerd->grpp;
4302             dvdlt  = dvdl;
4303         }
4304         else
4305         {
4306             zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
4307                                fr->red_nblock, 1<<fr->red_ashift);
4308
4309             ft     = fr->f_t[thread].f;
4310             fshift = fr->f_t[thread].fshift;
4311             epot   = fr->f_t[thread].ener;
4312             grpp   = &fr->f_t[thread].grpp;
4313             dvdlt  = fr->f_t[thread].dvdl;
4314         }
4315         /* Loop over all bonded force types to calculate the bonded forces */
4316         for (ftype = 0; (ftype < F_NRE); ftype++)
4317         {
4318             if (idef->il[ftype].nr > 0 &&
4319                 (interaction_function[ftype].flags & IF_BOND) &&
4320                 (ftype < F_GB12 || ftype > F_GB14) &&
4321                 !(ftype == F_CONNBONDS || ftype == F_POSRES))
4322             {
4323                 v = calc_one_bond(fplog, thread, ftype, idef, x,
4324                                   ft, fshift, fr, pbc_null, g, enerd, grpp,
4325                                   nrnb, lambda, dvdlt,
4326                                   md, fcd, bCalcEnerVir,
4327                                   global_atom_index, bPrintSepPot);
4328                 epot[ftype]        += v;
4329             }
4330         }
4331     }
4332     if (fr->nthreads > 1)
4333     {
4334         reduce_thread_forces(fr->natoms_force, f, fr->fshift,
4335                              enerd->term, &enerd->grpp, dvdl,
4336                              fr->nthreads, fr->f_t,
4337                              fr->red_nblock, 1<<fr->red_ashift,
4338                              bCalcEnerVir,
4339                              force_flags & GMX_FORCE_DHDL);
4340     }
4341     if (force_flags & GMX_FORCE_DHDL)
4342     {
4343         for (i = 0; i < efptNR; i++)
4344         {
4345             enerd->dvdl_nonlin[i] += dvdl[i];
4346         }
4347     }
4348
4349     /* Copy the sum of violations for the distance restraints from fcd */
4350     if (fcd)
4351     {
4352         enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
4353
4354     }
4355 }
4356
4357 void calc_bonds_lambda(FILE *fplog,
4358                        const t_idef *idef,
4359                        rvec x[],
4360                        t_forcerec *fr,
4361                        const t_pbc *pbc, const t_graph *g,
4362                        gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
4363                        real *lambda,
4364                        const t_mdatoms *md,
4365                        t_fcdata *fcd,
4366                        int *global_atom_index)
4367 {
4368     int           i, ftype, nbonds_np, nbonds, ind, nat;
4369     real          v, dr, dr2;
4370     real          dvdl_dum[efptNR];
4371     rvec         *f, *fshift_orig;
4372     const  t_pbc *pbc_null;
4373     t_iatom      *iatom_fe;
4374
4375     if (fr->bMolPBC)
4376     {
4377         pbc_null = pbc;
4378     }
4379     else
4380     {
4381         pbc_null = NULL;
4382     }
4383
4384     snew(f, fr->natoms_force);
4385     /* We want to preserve the fshift array in forcerec */
4386     fshift_orig = fr->fshift;
4387     snew(fr->fshift, SHIFTS);
4388
4389     /* Loop over all bonded force types to calculate the bonded forces */
4390     for (ftype = 0; (ftype < F_NRE); ftype++)
4391     {
4392         v = calc_one_bond_foreign(fplog, ftype, idef, x,
4393                                   f, fr, pbc_null, g, grpp, nrnb, lambda, dvdl_dum,
4394                                   md, fcd, global_atom_index, FALSE);
4395         epot[ftype] += v;
4396     }
4397
4398     sfree(fr->fshift);
4399     fr->fshift = fshift_orig;
4400     sfree(f);
4401 }