src/gromacs/gmxlib/bondfree.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROningen Mixture of Alchemy and Childrens' Stories
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <assert.h>
  42 #include "physics.h"
  43 #include "vec.h"
  44 #include "maths.h"
  45 #include "txtdump.h"
  46 #include "bondf.h"
  47 #include "smalloc.h"
  48 #include "pbc.h"
  49 #include "ns.h"
  50 #include "macros.h"
  51 #include "names.h"
  52 #include "gmx_fatal.h"
  53 #include "mshift.h"
  54 #include "main.h"
  55 #include "disre.h"
  56 #include "orires.h"
  57 #include "force.h"
  58 #include "nonbonded.h"
  59
  60 /* Include the SIMD macro file and then check for support */
  61 #include "gmx_simd_macros.h"
  62 #if defined GMX_HAVE_SIMD_MACROS && defined GMX_SIMD_HAVE_TRIGONOMETRIC
  63 #define SIMD_BONDEDS
  64 #include "gmx_simd_vec.h"
  65 #endif
  66
  67 /* Find a better place for this? */
  68 const int cmap_coeff_matrix[] = {
  69     1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4,
  70     0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
  71     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4,
  72     0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
  73     0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2,
  74     0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2,
  75     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
  76     0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
  77     0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2,
  78     0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
  79     0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
  80     0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2,
  81     0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
  82     0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
  83     0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
  84     0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
  85 };
  86
  87
  88
  89 int glatnr(int *global_atom_index, int i)
  90 {
  91     int atnr;
  92
  93     if (global_atom_index == NULL)
  94     {
  95         atnr = i + 1;
  96     }
  97     else
  98     {
  99         atnr = global_atom_index[i] + 1;
 100     }
 101
 102     return atnr;
 103 }
 104
 105 static int pbc_rvec_sub(const t_pbc *pbc, const rvec xi, const rvec xj, rvec dx)
 106 {
 107     if (pbc)
 108     {
 109         return pbc_dx_aiuc(pbc, xi, xj, dx);
 110     }
 111     else
 112     {
 113         rvec_sub(xi, xj, dx);
 114         return CENTRAL;
 115     }
 116 }
 117
 118 #ifdef SIMD_BONDEDS
 119
 120 /* SIMD PBC data structure, containing 1/boxdiag and the box vectors */
 121 typedef struct {
 122     gmx_mm_pr inv_bzz;
 123     gmx_mm_pr inv_byy;
 124     gmx_mm_pr inv_bxx;
 125     gmx_mm_pr bzx;
 126     gmx_mm_pr bzy;
 127     gmx_mm_pr bzz;
 128     gmx_mm_pr byx;
 129     gmx_mm_pr byy;
 130     gmx_mm_pr bxx;
 131 } pbc_simd_t;
 132
 133 /* Set the SIMD pbc data from a normal t_pbc struct */
 134 static void set_pbc_simd(const t_pbc *pbc, pbc_simd_t *pbc_simd)
 135 {
 136     rvec inv_bdiag;
 137     int  d;
 138
 139     /* Setting inv_bdiag to 0 effectively turns off PBC */
 140     clear_rvec(inv_bdiag);
 141     if (pbc != NULL)
 142     {
 143         for (d = 0; d < pbc->ndim_ePBC; d++)
 144         {
 145             inv_bdiag[d] = 1.0/pbc->box[d][d];
 146         }
 147     }
 148
 149     pbc_simd->inv_bzz = gmx_set1_pr(inv_bdiag[ZZ]);
 150     pbc_simd->inv_byy = gmx_set1_pr(inv_bdiag[YY]);
 151     pbc_simd->inv_bxx = gmx_set1_pr(inv_bdiag[XX]);
 152
 153     if (pbc != NULL)
 154     {
 155         pbc_simd->bzx = gmx_set1_pr(pbc->box[ZZ][XX]);
 156         pbc_simd->bzy = gmx_set1_pr(pbc->box[ZZ][YY]);
 157         pbc_simd->bzz = gmx_set1_pr(pbc->box[ZZ][ZZ]);
 158         pbc_simd->byx = gmx_set1_pr(pbc->box[YY][XX]);
 159         pbc_simd->byy = gmx_set1_pr(pbc->box[YY][YY]);
 160         pbc_simd->bxx = gmx_set1_pr(pbc->box[XX][XX]);
 161     }
 162     else
 163     {
 164         pbc_simd->bzx = gmx_setzero_pr();
 165         pbc_simd->bzy = gmx_setzero_pr();
 166         pbc_simd->bzz = gmx_setzero_pr();
 167         pbc_simd->byx = gmx_setzero_pr();
 168         pbc_simd->byy = gmx_setzero_pr();
 169         pbc_simd->bxx = gmx_setzero_pr();
 170     }
 171 }
 172
 173 /* Correct distance vector *dx,*dy,*dz for PBC using SIMD */
 174 static gmx_inline void
 175 pbc_dx_simd(gmx_mm_pr *dx, gmx_mm_pr *dy, gmx_mm_pr *dz,
 176             const pbc_simd_t *pbc)
 177 {
 178     gmx_mm_pr sh;
 179
 180     sh  = gmx_round_pr(gmx_mul_pr(*dz, pbc->inv_bzz));
 181     *dx = gmx_nmsub_pr(sh, pbc->bzx, *dx);
 182     *dy = gmx_nmsub_pr(sh, pbc->bzy, *dy);
 183     *dz = gmx_nmsub_pr(sh, pbc->bzz, *dz);
 184
 185     sh  = gmx_round_pr(gmx_mul_pr(*dy, pbc->inv_byy));
 186     *dx = gmx_nmsub_pr(sh, pbc->byx, *dx);
 187     *dy = gmx_nmsub_pr(sh, pbc->byy, *dy);
 188
 189     sh  = gmx_round_pr(gmx_mul_pr(*dx, pbc->inv_bxx));
 190     *dx = gmx_nmsub_pr(sh, pbc->bxx, *dx);
 191 }
 192
 193 #endif /* SIMD_BONDEDS */
 194
 195 /*
 196  * Morse potential bond by Frank Everdij
 197  *
 198  * Three parameters needed:
 199  *
 200  * b0 = equilibrium distance in nm
 201  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 202  * cb = well depth in kJ/mol
 203  *
 204  * Note: the potential is referenced to be +cb at infinite separation
 205  *       and zero at the equilibrium distance!
 206  */
 207
 208 real morse_bonds(int nbonds,
 209                  const t_iatom forceatoms[], const t_iparams forceparams[],
 210                  const rvec x[], rvec f[], rvec fshift[],
 211                  const t_pbc *pbc, const t_graph *g,
 212                  real lambda, real *dvdlambda,
 213                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 214                  int gmx_unused *global_atom_index)
 215 {
 216     const real one = 1.0;
 217     const real two = 2.0;
 218     real       dr, dr2, temp, omtemp, cbomtemp, fbond, vbond, fij, vtot;
 219     real       b0, be, cb, b0A, beA, cbA, b0B, beB, cbB, L1;
 220     rvec       dx;
 221     int        i, m, ki, type, ai, aj;
 222     ivec       dt;
 223
 224     vtot = 0.0;
 225     for (i = 0; (i < nbonds); )
 226     {
 227         type = forceatoms[i++];
 228         ai   = forceatoms[i++];
 229         aj   = forceatoms[i++];
 230
 231         b0A   = forceparams[type].morse.b0A;
 232         beA   = forceparams[type].morse.betaA;
 233         cbA   = forceparams[type].morse.cbA;
 234
 235         b0B   = forceparams[type].morse.b0B;
 236         beB   = forceparams[type].morse.betaB;
 237         cbB   = forceparams[type].morse.cbB;
 238
 239         L1 = one-lambda;                            /* 1 */
 240         b0 = L1*b0A + lambda*b0B;                   /* 3 */
 241         be = L1*beA + lambda*beB;                   /* 3 */
 242         cb = L1*cbA + lambda*cbB;                   /* 3 */
 243
 244         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3          */
 245         dr2  = iprod(dx, dx);                       /*   5          */
 246         dr   = dr2*gmx_invsqrt(dr2);                /*  10          */
 247         temp = exp(-be*(dr-b0));                    /*  12          */
 248
 249         if (temp == one)
 250         {
 251             /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 252             *dvdlambda += cbB-cbA;
 253             continue;
 254         }
 255
 256         omtemp    = one-temp;                                                                                        /*   1          */
 257         cbomtemp  = cb*omtemp;                                                                                       /*   1          */
 258         vbond     = cbomtemp*omtemp;                                                                                 /*   1          */
 259         fbond     = -two*be*temp*cbomtemp*gmx_invsqrt(dr2);                                                          /*   9          */
 260         vtot     += vbond;                                                                                           /*   1          */
 261
 262         *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 263
 264         if (g)
 265         {
 266             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 267             ki = IVEC2IS(dt);
 268         }
 269
 270         for (m = 0; (m < DIM); m++)                    /*  15          */
 271         {
 272             fij                 = fbond*dx[m];
 273             f[ai][m]           += fij;
 274             f[aj][m]           -= fij;
 275             fshift[ki][m]      += fij;
 276             fshift[CENTRAL][m] -= fij;
 277         }
 278     }                                         /*  83 TOTAL    */
 279     return vtot;
 280 }
 281
 282 real cubic_bonds(int nbonds,
 283                  const t_iatom forceatoms[], const t_iparams forceparams[],
 284                  const rvec x[], rvec f[], rvec fshift[],
 285                  const t_pbc *pbc, const t_graph *g,
 286                  real gmx_unused lambda, real gmx_unused *dvdlambda,
 287                  const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 288                  int gmx_unused *global_atom_index)
 289 {
 290     const real three = 3.0;
 291     const real two   = 2.0;
 292     real       kb, b0, kcub;
 293     real       dr, dr2, dist, kdist, kdist2, fbond, vbond, fij, vtot;
 294     rvec       dx;
 295     int        i, m, ki, type, ai, aj;
 296     ivec       dt;
 297
 298     vtot = 0.0;
 299     for (i = 0; (i < nbonds); )
 300     {
 301         type = forceatoms[i++];
 302         ai   = forceatoms[i++];
 303         aj   = forceatoms[i++];
 304
 305         b0   = forceparams[type].cubic.b0;
 306         kb   = forceparams[type].cubic.kb;
 307         kcub = forceparams[type].cubic.kcub;
 308
 309         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 310         dr2  = iprod(dx, dx);                           /*   5          */
 311
 312         if (dr2 == 0.0)
 313         {
 314             continue;
 315         }
 316
 317         dr         = dr2*gmx_invsqrt(dr2);                  /*  10          */
 318         dist       = dr-b0;
 319         kdist      = kb*dist;
 320         kdist2     = kdist*dist;
 321
 322         vbond      = kdist2 + kcub*kdist2*dist;
 323         fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 324
 325         vtot      += vbond;   /* 21 */
 326
 327         if (g)
 328         {
 329             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 330             ki = IVEC2IS(dt);
 331         }
 332         for (m = 0; (m < DIM); m++)                    /*  15          */
 333         {
 334             fij                 = fbond*dx[m];
 335             f[ai][m]           += fij;
 336             f[aj][m]           -= fij;
 337             fshift[ki][m]      += fij;
 338             fshift[CENTRAL][m] -= fij;
 339         }
 340     }                                         /*  54 TOTAL    */
 341     return vtot;
 342 }
 343
 344 real FENE_bonds(int nbonds,
 345                 const t_iatom forceatoms[], const t_iparams forceparams[],
 346                 const rvec x[], rvec f[], rvec fshift[],
 347                 const t_pbc *pbc, const t_graph *g,
 348                 real gmx_unused lambda, real gmx_unused *dvdlambda,
 349                 const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 350                 int *global_atom_index)
 351 {
 352     const real half = 0.5;
 353     const real one  = 1.0;
 354     real       bm, kb;
 355     real       dr, dr2, bm2, omdr2obm2, fbond, vbond, fij, vtot;
 356     rvec       dx;
 357     int        i, m, ki, type, ai, aj;
 358     ivec       dt;
 359
 360     vtot = 0.0;
 361     for (i = 0; (i < nbonds); )
 362     {
 363         type = forceatoms[i++];
 364         ai   = forceatoms[i++];
 365         aj   = forceatoms[i++];
 366
 367         bm   = forceparams[type].fene.bm;
 368         kb   = forceparams[type].fene.kb;
 369
 370         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);     /*   3          */
 371         dr2  = iprod(dx, dx);                           /*   5          */
 372
 373         if (dr2 == 0.0)
 374         {
 375             continue;
 376         }
 377
 378         bm2 = bm*bm;
 379
 380         if (dr2 >= bm2)
 381         {
 382             gmx_fatal(FARGS,
 383                       "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 384                       dr2, bm2,
 385                       glatnr(global_atom_index, ai),
 386                       glatnr(global_atom_index, aj));
 387         }
 388
 389         omdr2obm2  = one - dr2/bm2;
 390
 391         vbond      = -half*kb*bm2*log(omdr2obm2);
 392         fbond      = -kb/omdr2obm2;
 393
 394         vtot      += vbond;   /* 35 */
 395
 396         if (g)
 397         {
 398             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 399             ki = IVEC2IS(dt);
 400         }
 401         for (m = 0; (m < DIM); m++)                    /*  15          */
 402         {
 403             fij                 = fbond*dx[m];
 404             f[ai][m]           += fij;
 405             f[aj][m]           -= fij;
 406             fshift[ki][m]      += fij;
 407             fshift[CENTRAL][m] -= fij;
 408         }
 409     }                                         /*  58 TOTAL    */
 410     return vtot;
 411 }
 412
 413 real harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
 414               real *V, real *F)
 415 {
 416     const real half = 0.5;
 417     real       L1, kk, x0, dx, dx2;
 418     real       v, f, dvdlambda;
 419
 420     L1    = 1.0-lambda;
 421     kk    = L1*kA+lambda*kB;
 422     x0    = L1*xA+lambda*xB;
 423
 424     dx    = x-x0;
 425     dx2   = dx*dx;
 426
 427     f          = -kk*dx;
 428     v          = half*kk*dx2;
 429     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 430
 431     *F    = f;
 432     *V    = v;
 433
 434     return dvdlambda;
 435
 436     /* That was 19 flops */
 437 }
 438
 439
 440 real bonds(int nbonds,
 441            const t_iatom forceatoms[], const t_iparams forceparams[],
 442            const rvec x[], rvec f[], rvec fshift[],
 443            const t_pbc *pbc, const t_graph *g,
 444            real lambda, real *dvdlambda,
 445            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 446            int gmx_unused *global_atom_index)
 447 {
 448     int  i, m, ki, ai, aj, type;
 449     real dr, dr2, fbond, vbond, fij, vtot;
 450     rvec dx;
 451     ivec dt;
 452
 453     vtot = 0.0;
 454     for (i = 0; (i < nbonds); )
 455     {
 456         type = forceatoms[i++];
 457         ai   = forceatoms[i++];
 458         aj   = forceatoms[i++];
 459
 460         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 461         dr2  = iprod(dx, dx);                       /*   5              */
 462         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
 463
 464         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 465                                forceparams[type].harmonic.krB,
 466                                forceparams[type].harmonic.rA,
 467                                forceparams[type].harmonic.rB,
 468                                dr, lambda, &vbond, &fbond); /*  19  */
 469
 470         if (dr2 == 0.0)
 471         {
 472             continue;
 473         }
 474
 475
 476         vtot  += vbond;            /* 1*/
 477         fbond *= gmx_invsqrt(dr2); /*   6               */
 478 #ifdef DEBUG
 479         if (debug)
 480         {
 481             fprintf(debug, "BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 482                     dr, vbond, fbond);
 483         }
 484 #endif
 485         if (g)
 486         {
 487             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 488             ki = IVEC2IS(dt);
 489         }
 490         for (m = 0; (m < DIM); m++)     /*  15          */
 491         {
 492             fij                 = fbond*dx[m];
 493             f[ai][m]           += fij;
 494             f[aj][m]           -= fij;
 495             fshift[ki][m]      += fij;
 496             fshift[CENTRAL][m] -= fij;
 497         }
 498     }               /* 59 TOTAL */
 499     return vtot;
 500 }
 501
 502 real restraint_bonds(int nbonds,
 503                      const t_iatom forceatoms[], const t_iparams forceparams[],
 504                      const rvec x[], rvec f[], rvec fshift[],
 505                      const t_pbc *pbc, const t_graph *g,
 506                      real lambda, real *dvdlambda,
 507                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 508                      int gmx_unused *global_atom_index)
 509 {
 510     int  i, m, ki, ai, aj, type;
 511     real dr, dr2, fbond, vbond, fij, vtot;
 512     real L1;
 513     real low, dlow, up1, dup1, up2, dup2, k, dk;
 514     real drh, drh2;
 515     rvec dx;
 516     ivec dt;
 517
 518     L1   = 1.0 - lambda;
 519
 520     vtot = 0.0;
 521     for (i = 0; (i < nbonds); )
 522     {
 523         type = forceatoms[i++];
 524         ai   = forceatoms[i++];
 525         aj   = forceatoms[i++];
 526
 527         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
 528         dr2  = iprod(dx, dx);                       /*   5              */
 529         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
 530
 531         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 532         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 533         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 534         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 535         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 536         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 537         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 538         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 539         /* 24 */
 540
 541         if (dr < low)
 542         {
 543             drh         = dr - low;
 544             drh2        = drh*drh;
 545             vbond       = 0.5*k*drh2;
 546             fbond       = -k*drh;
 547             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 548         } /* 11 */
 549         else if (dr <= up1)
 550         {
 551             vbond = 0;
 552             fbond = 0;
 553         }
 554         else if (dr <= up2)
 555         {
 556             drh         = dr - up1;
 557             drh2        = drh*drh;
 558             vbond       = 0.5*k*drh2;
 559             fbond       = -k*drh;
 560             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 561         } /* 11 */
 562         else
 563         {
 564             drh         = dr - up2;
 565             vbond       = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 566             fbond       = -k*(up2 - up1);
 567             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 568                 + k*(dup2 - dup1)*(up2 - up1 + drh)
 569                 - k*(up2 - up1)*dup2;
 570         }
 571
 572         if (dr2 == 0.0)
 573         {
 574             continue;
 575         }
 576
 577         vtot  += vbond;            /* 1*/
 578         fbond *= gmx_invsqrt(dr2); /*   6               */
 579 #ifdef DEBUG
 580         if (debug)
 581         {
 582             fprintf(debug, "BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 583                     dr, vbond, fbond);
 584         }
 585 #endif
 586         if (g)
 587         {
 588             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 589             ki = IVEC2IS(dt);
 590         }
 591         for (m = 0; (m < DIM); m++)             /*  15          */
 592         {
 593             fij                 = fbond*dx[m];
 594             f[ai][m]           += fij;
 595             f[aj][m]           -= fij;
 596             fshift[ki][m]      += fij;
 597             fshift[CENTRAL][m] -= fij;
 598         }
 599     }                   /* 59 TOTAL     */
 600
 601     return vtot;
 602 }
 603
 604 real polarize(int nbonds,
 605               const t_iatom forceatoms[], const t_iparams forceparams[],
 606               const rvec x[], rvec f[], rvec fshift[],
 607               const t_pbc *pbc, const t_graph *g,
 608               real lambda, real *dvdlambda,
 609               const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 610               int gmx_unused *global_atom_index)
 611 {
 612     int  i, m, ki, ai, aj, type;
 613     real dr, dr2, fbond, vbond, fij, vtot, ksh;
 614     rvec dx;
 615     ivec dt;
 616
 617     vtot = 0.0;
 618     for (i = 0; (i < nbonds); )
 619     {
 620         type = forceatoms[i++];
 621         ai   = forceatoms[i++];
 622         aj   = forceatoms[i++];
 623         ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 624         if (debug)
 625         {
 626             fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
 627         }
 628
 629         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 630         dr2  = iprod(dx, dx);                                               /*   5              */
 631         dr   = dr2*gmx_invsqrt(dr2);                                        /*  10              */
 632
 633         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 634
 635         if (dr2 == 0.0)
 636         {
 637             continue;
 638         }
 639
 640         vtot  += vbond;            /* 1*/
 641         fbond *= gmx_invsqrt(dr2); /*   6               */
 642
 643         if (g)
 644         {
 645             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 646             ki = IVEC2IS(dt);
 647         }
 648         for (m = 0; (m < DIM); m++)     /*  15          */
 649         {
 650             fij                 = fbond*dx[m];
 651             f[ai][m]           += fij;
 652             f[aj][m]           -= fij;
 653             fshift[ki][m]      += fij;
 654             fshift[CENTRAL][m] -= fij;
 655         }
 656     }               /* 59 TOTAL */
 657     return vtot;
 658 }
 659
 660 real anharm_polarize(int nbonds,
 661                      const t_iatom forceatoms[], const t_iparams forceparams[],
 662                      const rvec x[], rvec f[], rvec fshift[],
 663                      const t_pbc *pbc, const t_graph *g,
 664                      real lambda, real *dvdlambda,
 665                      const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 666                      int gmx_unused *global_atom_index)
 667 {
 668     int  i, m, ki, ai, aj, type;
 669     real dr, dr2, fbond, vbond, fij, vtot, ksh, khyp, drcut, ddr, ddr3;
 670     rvec dx;
 671     ivec dt;
 672
 673     vtot = 0.0;
 674     for (i = 0; (i < nbonds); )
 675     {
 676         type  = forceatoms[i++];
 677         ai    = forceatoms[i++];
 678         aj    = forceatoms[i++];
 679         ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 680         khyp  = forceparams[type].anharm_polarize.khyp;
 681         drcut = forceparams[type].anharm_polarize.drcut;
 682         if (debug)
 683         {
 684             fprintf(debug, "POL: local ai = %d aj = %d ksh = %.3f\n", ai, aj, ksh);
 685         }
 686
 687         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx);                         /*   3      */
 688         dr2  = iprod(dx, dx);                                               /*   5              */
 689         dr   = dr2*gmx_invsqrt(dr2);                                        /*  10              */
 690
 691         *dvdlambda += harmonic(ksh, ksh, 0, 0, dr, lambda, &vbond, &fbond); /*  19  */
 692
 693         if (dr2 == 0.0)
 694         {
 695             continue;
 696         }
 697
 698         if (dr > drcut)
 699         {
 700             ddr    = dr-drcut;
 701             ddr3   = ddr*ddr*ddr;
 702             vbond += khyp*ddr*ddr3;
 703             fbond -= 4*khyp*ddr3;
 704         }
 705         fbond *= gmx_invsqrt(dr2); /*   6               */
 706         vtot  += vbond;            /* 1*/
 707
 708         if (g)
 709         {
 710             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 711             ki = IVEC2IS(dt);
 712         }
 713         for (m = 0; (m < DIM); m++)     /*  15          */
 714         {
 715             fij                 = fbond*dx[m];
 716             f[ai][m]           += fij;
 717             f[aj][m]           -= fij;
 718             fshift[ki][m]      += fij;
 719             fshift[CENTRAL][m] -= fij;
 720         }
 721     }               /* 72 TOTAL */
 722     return vtot;
 723 }
 724
 725 real water_pol(int nbonds,
 726                const t_iatom forceatoms[], const t_iparams forceparams[],
 727                const rvec x[], rvec f[], rvec gmx_unused fshift[],
 728                const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
 729                real gmx_unused lambda, real gmx_unused *dvdlambda,
 730                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 731                int gmx_unused *global_atom_index)
 732 {
 733     /* This routine implements anisotropic polarizibility for water, through
 734      * a shell connected to a dummy with spring constant that differ in the
 735      * three spatial dimensions in the molecular frame.
 736      */
 737     int  i, m, aO, aH1, aH2, aD, aS, type, type0;
 738     rvec dOH1, dOH2, dHH, dOD, dDS, nW, kk, dx, kdx, proj;
 739 #ifdef DEBUG
 740     rvec df;
 741 #endif
 742     real vtot, fij, r_HH, r_OD, r_nW, tx, ty, tz, qS;
 743
 744     vtot = 0.0;
 745     if (nbonds > 0)
 746     {
 747         type0  = forceatoms[0];
 748         aS     = forceatoms[5];
 749         qS     = md->chargeA[aS];
 750         kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 751         kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 752         kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 753         r_HH   = 1.0/forceparams[type0].wpol.rHH;
 754         r_OD   = 1.0/forceparams[type0].wpol.rOD;
 755         if (debug)
 756         {
 757             fprintf(debug, "WPOL: qS  = %10.5f aS = %5d\n", qS, aS);
 758             fprintf(debug, "WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
 759                     kk[XX], kk[YY], kk[ZZ]);
 760             fprintf(debug, "WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
 761                     forceparams[type0].wpol.rOH,
 762                     forceparams[type0].wpol.rHH,
 763                     forceparams[type0].wpol.rOD);
 764         }
 765         for (i = 0; (i < nbonds); i += 6)
 766         {
 767             type = forceatoms[i];
 768             if (type != type0)
 769             {
 770                 gmx_fatal(FARGS, "Sorry, type = %d, type0 = %d, file = %s, line = %d",
 771                           type, type0, __FILE__, __LINE__);
 772             }
 773             aO   = forceatoms[i+1];
 774             aH1  = forceatoms[i+2];
 775             aH2  = forceatoms[i+3];
 776             aD   = forceatoms[i+4];
 777             aS   = forceatoms[i+5];
 778
 779             /* Compute vectors describing the water frame */
 780             rvec_sub(x[aH1], x[aO], dOH1);
 781             rvec_sub(x[aH2], x[aO], dOH2);
 782             rvec_sub(x[aH2], x[aH1], dHH);
 783             rvec_sub(x[aD], x[aO], dOD);
 784             rvec_sub(x[aS], x[aD], dDS);
 785             cprod(dOH1, dOH2, nW);
 786
 787             /* Compute inverse length of normal vector
 788              * (this one could be precomputed, but I'm too lazy now)
 789              */
 790             r_nW = gmx_invsqrt(iprod(nW, nW));
 791             /* This is for precision, but does not make a big difference,
 792              * it can go later.
 793              */
 794             r_OD = gmx_invsqrt(iprod(dOD, dOD));
 795
 796             /* Normalize the vectors in the water frame */
 797             svmul(r_nW, nW, nW);
 798             svmul(r_HH, dHH, dHH);
 799             svmul(r_OD, dOD, dOD);
 800
 801             /* Compute displacement of shell along components of the vector */
 802             dx[ZZ] = iprod(dDS, dOD);
 803             /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 804             for (m = 0; (m < DIM); m++)
 805             {
 806                 proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 807             }
 808
 809             /*dx[XX] = iprod(dDS,nW);
 810                dx[YY] = iprod(dDS,dHH);*/
 811             dx[XX] = iprod(proj, nW);
 812             for (m = 0; (m < DIM); m++)
 813             {
 814                 proj[m] -= dx[XX]*nW[m];
 815             }
 816             dx[YY] = iprod(proj, dHH);
 817             /*#define DEBUG*/
 818 #ifdef DEBUG
 819             if (debug)
 820             {
 821                 fprintf(debug, "WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
 822                         sqr(dx[XX]), sqr(dx[YY]), sqr(dx[ZZ]), iprod(dx, dx), iprod(dDS, dDS));
 823                 fprintf(debug, "WPOL: dHH=(%10g,%10g,%10g)\n", dHH[XX], dHH[YY], dHH[ZZ]);
 824                 fprintf(debug, "WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
 825                         dOD[XX], dOD[YY], dOD[ZZ], 1/r_OD);
 826                 fprintf(debug, "WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
 827                         nW[XX], nW[YY], nW[ZZ], 1/r_nW);
 828                 fprintf(debug, "WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
 829                         dx[XX], dx[YY], dx[ZZ]);
 830                 fprintf(debug, "WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
 831                         dDS[XX], dDS[YY], dDS[ZZ]);
 832             }
 833 #endif
 834             /* Now compute the forces and energy */
 835             kdx[XX] = kk[XX]*dx[XX];
 836             kdx[YY] = kk[YY]*dx[YY];
 837             kdx[ZZ] = kk[ZZ]*dx[ZZ];
 838             vtot   += iprod(dx, kdx);
 839             for (m = 0; (m < DIM); m++)
 840             {
 841                 /* This is a tensor operation but written out for speed */
 842                 tx        =  nW[m]*kdx[XX];
 843                 ty        = dHH[m]*kdx[YY];
 844                 tz        = dOD[m]*kdx[ZZ];
 845                 fij       = -tx-ty-tz;
 846 #ifdef DEBUG
 847                 df[m] = fij;
 848 #endif
 849                 f[aS][m] += fij;
 850                 f[aD][m] -= fij;
 851             }
 852 #ifdef DEBUG
 853             if (debug)
 854             {
 855                 fprintf(debug, "WPOL: vwpol=%g\n", 0.5*iprod(dx, kdx));
 856                 fprintf(debug, "WPOL: df = (%10g, %10g, %10g)\n", df[XX], df[YY], df[ZZ]);
 857             }
 858 #endif
 859         }
 860     }
 861     return 0.5*vtot;
 862 }
 863
 864 static real do_1_thole(const rvec xi, const rvec xj, rvec fi, rvec fj,
 865                        const t_pbc *pbc, real qq,
 866                        rvec fshift[], real afac)
 867 {
 868     rvec r12;
 869     real r12sq, r12_1, r12n, r12bar, v0, v1, fscal, ebar, fff;
 870     int  m, t;
 871
 872     t      = pbc_rvec_sub(pbc, xi, xj, r12);                      /*  3 */
 873
 874     r12sq  = iprod(r12, r12);                                     /*  5 */
 875     r12_1  = gmx_invsqrt(r12sq);                                  /*  5 */
 876     r12bar = afac/r12_1;                                          /*  5 */
 877     v0     = qq*ONE_4PI_EPS0*r12_1;                               /*  2 */
 878     ebar   = exp(-r12bar);                                        /*  5 */
 879     v1     = (1-(1+0.5*r12bar)*ebar);                             /*  4 */
 880     fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 881     if (debug)
 882     {
 883         fprintf(debug, "THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n", v0, v1, 1/r12_1, r12bar, fscal, ebar);
 884     }
 885
 886     for (m = 0; (m < DIM); m++)
 887     {
 888         fff                 = fscal*r12[m];
 889         fi[m]              += fff;
 890         fj[m]              -= fff;
 891         fshift[t][m]       += fff;
 892         fshift[CENTRAL][m] -= fff;
 893     }             /* 15 */
 894
 895     return v0*v1; /* 1 */
 896     /* 54 */
 897 }
 898
 899 real thole_pol(int nbonds,
 900                const t_iatom forceatoms[], const t_iparams forceparams[],
 901                const rvec x[], rvec f[], rvec fshift[],
 902                const t_pbc *pbc, const t_graph gmx_unused *g,
 903                real gmx_unused lambda, real gmx_unused *dvdlambda,
 904                const t_mdatoms *md, t_fcdata gmx_unused *fcd,
 905                int gmx_unused *global_atom_index)
 906 {
 907     /* Interaction between two pairs of particles with opposite charge */
 908     int  i, type, a1, da1, a2, da2;
 909     real q1, q2, qq, a, al1, al2, afac;
 910     real V = 0;
 911
 912     for (i = 0; (i < nbonds); )
 913     {
 914         type  = forceatoms[i++];
 915         a1    = forceatoms[i++];
 916         da1   = forceatoms[i++];
 917         a2    = forceatoms[i++];
 918         da2   = forceatoms[i++];
 919         q1    = md->chargeA[da1];
 920         q2    = md->chargeA[da2];
 921         a     = forceparams[type].thole.a;
 922         al1   = forceparams[type].thole.alpha1;
 923         al2   = forceparams[type].thole.alpha2;
 924         qq    = q1*q2;
 925         afac  = a*pow(al1*al2, -1.0/6.0);
 926         V    += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq, fshift, afac);
 927         V    += do_1_thole(x[da1], x[a2], f[da1], f[a2], pbc, -qq, fshift, afac);
 928         V    += do_1_thole(x[a1], x[da2], f[a1], f[da2], pbc, -qq, fshift, afac);
 929         V    += do_1_thole(x[da1], x[da2], f[da1], f[da2], pbc, qq, fshift, afac);
 930     }
 931     /* 290 flops */
 932     return V;
 933 }
 934
 935 real bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
 936                 rvec r_ij, rvec r_kj, real *costh,
 937                 int *t1, int *t2)
 938 /* Return value is the angle between the bonds i-j and j-k */
 939 {
 940     /* 41 FLOPS */
 941     real th;
 942
 943     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
 944     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
 945
 946     *costh = cos_angle(r_ij, r_kj);        /* 25                */
 947     th     = acos(*costh);                 /* 10                */
 948     /* 41 TOTAL */
 949     return th;
 950 }
 951
 952 real angles(int nbonds,
 953             const t_iatom forceatoms[], const t_iparams forceparams[],
 954             const rvec x[], rvec f[], rvec fshift[],
 955             const t_pbc *pbc, const t_graph *g,
 956             real lambda, real *dvdlambda,
 957             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
 958             int gmx_unused *global_atom_index)
 959 {
 960     int  i, ai, aj, ak, t1, t2, type;
 961     rvec r_ij, r_kj;
 962     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
 963     ivec jt, dt_ij, dt_kj;
 964
 965     vtot = 0.0;
 966     for (i = 0; i < nbonds; )
 967     {
 968         type = forceatoms[i++];
 969         ai   = forceatoms[i++];
 970         aj   = forceatoms[i++];
 971         ak   = forceatoms[i++];
 972
 973         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
 974                             r_ij, r_kj, &cos_theta, &t1, &t2);  /*  41          */
 975
 976         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 977                                forceparams[type].harmonic.krB,
 978                                forceparams[type].harmonic.rA*DEG2RAD,
 979                                forceparams[type].harmonic.rB*DEG2RAD,
 980                                theta, lambda, &va, &dVdt);  /*  21  */
 981         vtot += va;
 982
 983         cos_theta2 = sqr(cos_theta);
 984         if (cos_theta2 < 1)
 985         {
 986             int  m;
 987             real st, sth;
 988             real cik, cii, ckk;
 989             real nrkj2, nrij2;
 990             real nrkj_1, nrij_1;
 991             rvec f_i, f_j, f_k;
 992
 993             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
 994             sth = st*cos_theta;                     /*   1              */
 995 #ifdef DEBUG
 996             if (debug)
 997             {
 998                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 999                         theta*RAD2DEG, va, dVdt);
1000             }
1001 #endif
1002             nrij2 = iprod(r_ij, r_ij);      /*   5              */
1003             nrkj2 = iprod(r_kj, r_kj);      /*   5              */
1004
1005             nrij_1 = gmx_invsqrt(nrij2);    /*  10              */
1006             nrkj_1 = gmx_invsqrt(nrkj2);    /*  10              */
1007
1008             cik = st*nrij_1*nrkj_1;         /*   2              */
1009             cii = sth*nrij_1*nrij_1;        /*   2              */
1010             ckk = sth*nrkj_1*nrkj_1;        /*   2              */
1011
1012             for (m = 0; m < DIM; m++)
1013             {           /*  39          */
1014                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
1015                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
1016                 f_j[m]    = -f_i[m] - f_k[m];
1017                 f[ai][m] += f_i[m];
1018                 f[aj][m] += f_j[m];
1019                 f[ak][m] += f_k[m];
1020             }
1021             if (g != NULL)
1022             {
1023                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1024
1025                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1026                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1027                 t1 = IVEC2IS(dt_ij);
1028                 t2 = IVEC2IS(dt_kj);
1029             }
1030             rvec_inc(fshift[t1], f_i);
1031             rvec_inc(fshift[CENTRAL], f_j);
1032             rvec_inc(fshift[t2], f_k);
1033         }                                           /* 161 TOTAL        */
1034     }
1035
1036     return vtot;
1037 }
1038
1039 #ifdef SIMD_BONDEDS
1040
1041 /* As angles, but using SIMD to calculate many dihedrals at once.
1042  * This routines does not calculate energies and shift forces.
1043  */
1044 static gmx_inline void
1045 angles_noener_simd(int nbonds,
1046                    const t_iatom forceatoms[], const t_iparams forceparams[],
1047                    const rvec x[], rvec f[],
1048                    const t_pbc *pbc, const t_graph gmx_unused *g,
1049                    real gmx_unused lambda,
1050                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1051                    int gmx_unused *global_atom_index)
1052 {
1053 #define UNROLL GMX_SIMD_WIDTH_HERE
1054     const int      nfa1 = 4;
1055     int            i, iu, s, m;
1056     int            type, ai[UNROLL], aj[UNROLL], ak[UNROLL];
1057     real           coeff_array[2*UNROLL+UNROLL], *coeff;
1058     real           dr_array[2*DIM*UNROLL+UNROLL], *dr;
1059     real           f_buf_array[6*UNROLL+UNROLL], *f_buf;
1060     gmx_mm_pr      k_S, theta0_S;
1061     gmx_mm_pr      rijx_S, rijy_S, rijz_S;
1062     gmx_mm_pr      rkjx_S, rkjy_S, rkjz_S;
1063     gmx_mm_pr      one_S;
1064     gmx_mm_pr      rij_rkj_S;
1065     gmx_mm_pr      nrij2_S, nrij_1_S;
1066     gmx_mm_pr      nrkj2_S, nrkj_1_S;
1067     gmx_mm_pr      cos_S, sin_S;
1068     gmx_mm_pr      theta_S;
1069     gmx_mm_pr      st_S, sth_S;
1070     gmx_mm_pr      cik_S, cii_S, ckk_S;
1071     gmx_mm_pr      f_ix_S, f_iy_S, f_iz_S;
1072     gmx_mm_pr      f_kx_S, f_ky_S, f_kz_S;
1073     pbc_simd_t     pbc_simd;
1074
1075     /* Ensure register memory alignment */
1076     coeff = gmx_simd_align_real(coeff_array);
1077     dr    = gmx_simd_align_real(dr_array);
1078     f_buf = gmx_simd_align_real(f_buf_array);
1079
1080     set_pbc_simd(pbc, &pbc_simd);
1081
1082     one_S = gmx_set1_pr(1.0);
1083
1084     /* nbonds is the number of angles times nfa1, here we step UNROLL angles */
1085     for (i = 0; (i < nbonds); i += UNROLL*nfa1)
1086     {
1087         /* Collect atoms for UNROLL angles.
1088          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1089          */
1090         iu = i;
1091         for (s = 0; s < UNROLL; s++)
1092         {
1093             type  = forceatoms[iu];
1094             ai[s] = forceatoms[iu+1];
1095             aj[s] = forceatoms[iu+2];
1096             ak[s] = forceatoms[iu+3];
1097
1098             coeff[s]        = forceparams[type].harmonic.krA;
1099             coeff[UNROLL+s] = forceparams[type].harmonic.rA*DEG2RAD;
1100
1101             /* If you can't use pbc_dx_simd below for PBC, e.g. because
1102              * you can't round in SIMD, use pbc_rvec_sub here.
1103              */
1104             /* Store the non PBC corrected distances packed and aligned */
1105             for (m = 0; m < DIM; m++)
1106             {
1107                 dr[s +      m *UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1108                 dr[s + (DIM+m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1109             }
1110
1111             /* At the end fill the arrays with identical entries */
1112             if (iu + nfa1 < nbonds)
1113             {
1114                 iu += nfa1;
1115             }
1116         }
1117
1118         k_S       = gmx_load_pr(coeff);
1119         theta0_S  = gmx_load_pr(coeff+UNROLL);
1120
1121         rijx_S    = gmx_load_pr(dr + 0*UNROLL);
1122         rijy_S    = gmx_load_pr(dr + 1*UNROLL);
1123         rijz_S    = gmx_load_pr(dr + 2*UNROLL);
1124         rkjx_S    = gmx_load_pr(dr + 3*UNROLL);
1125         rkjy_S    = gmx_load_pr(dr + 4*UNROLL);
1126         rkjz_S    = gmx_load_pr(dr + 5*UNROLL);
1127
1128         pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, &pbc_simd);
1129         pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, &pbc_simd);
1130
1131         rij_rkj_S = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1132                                  rkjx_S, rkjy_S, rkjz_S);
1133
1134         nrij2_S   = gmx_norm2_pr(rijx_S, rijy_S, rijz_S);
1135         nrkj2_S   = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1136
1137         nrij_1_S  = gmx_invsqrt_pr(nrij2_S);
1138         nrkj_1_S  = gmx_invsqrt_pr(nrkj2_S);
1139
1140         cos_S     = gmx_mul_pr(rij_rkj_S, gmx_mul_pr(nrij_1_S, nrkj_1_S));
1141
1142         theta_S   = gmx_acos_pr(cos_S);
1143
1144         sin_S     = gmx_invsqrt_pr(gmx_max_pr(gmx_sub_pr(one_S, gmx_mul_pr(cos_S, cos_S)),
1145                                               gmx_setzero_pr()));
1146         st_S      = gmx_mul_pr(gmx_mul_pr(k_S, gmx_sub_pr(theta0_S, theta_S)),
1147                                sin_S);
1148         sth_S     = gmx_mul_pr(st_S, cos_S);
1149
1150         cik_S     = gmx_mul_pr(st_S,  gmx_mul_pr(nrij_1_S, nrkj_1_S));
1151         cii_S     = gmx_mul_pr(sth_S, gmx_mul_pr(nrij_1_S, nrij_1_S));
1152         ckk_S     = gmx_mul_pr(sth_S, gmx_mul_pr(nrkj_1_S, nrkj_1_S));
1153
1154         f_ix_S    = gmx_mul_pr(cii_S, rijx_S);
1155         f_ix_S    = gmx_nmsub_pr(cik_S, rkjx_S, f_ix_S);
1156         f_iy_S    = gmx_mul_pr(cii_S, rijy_S);
1157         f_iy_S    = gmx_nmsub_pr(cik_S, rkjy_S, f_iy_S);
1158         f_iz_S    = gmx_mul_pr(cii_S, rijz_S);
1159         f_iz_S    = gmx_nmsub_pr(cik_S, rkjz_S, f_iz_S);
1160         f_kx_S    = gmx_mul_pr(ckk_S, rkjx_S);
1161         f_kx_S    = gmx_nmsub_pr(cik_S, rijx_S, f_kx_S);
1162         f_ky_S    = gmx_mul_pr(ckk_S, rkjy_S);
1163         f_ky_S    = gmx_nmsub_pr(cik_S, rijy_S, f_ky_S);
1164         f_kz_S    = gmx_mul_pr(ckk_S, rkjz_S);
1165         f_kz_S    = gmx_nmsub_pr(cik_S, rijz_S, f_kz_S);
1166
1167         gmx_store_pr(f_buf + 0*UNROLL, f_ix_S);
1168         gmx_store_pr(f_buf + 1*UNROLL, f_iy_S);
1169         gmx_store_pr(f_buf + 2*UNROLL, f_iz_S);
1170         gmx_store_pr(f_buf + 3*UNROLL, f_kx_S);
1171         gmx_store_pr(f_buf + 4*UNROLL, f_ky_S);
1172         gmx_store_pr(f_buf + 5*UNROLL, f_kz_S);
1173
1174         iu = i;
1175         s  = 0;
1176         do
1177         {
1178             for (m = 0; m < DIM; m++)
1179             {
1180                 f[ai[s]][m] += f_buf[s + m*UNROLL];
1181                 f[aj[s]][m] -= f_buf[s + m*UNROLL] + f_buf[s + (DIM+m)*UNROLL];
1182                 f[ak[s]][m] += f_buf[s + (DIM+m)*UNROLL];
1183             }
1184             s++;
1185             iu += nfa1;
1186         }
1187         while (s < UNROLL && iu < nbonds);
1188     }
1189 #undef UNROLL
1190 }
1191
1192 #endif /* SIMD_BONDEDS */
1193
1194 real linear_angles(int nbonds,
1195                    const t_iatom forceatoms[], const t_iparams forceparams[],
1196                    const rvec x[], rvec f[], rvec fshift[],
1197                    const t_pbc *pbc, const t_graph *g,
1198                    real lambda, real *dvdlambda,
1199                    const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1200                    int gmx_unused *global_atom_index)
1201 {
1202     int  i, m, ai, aj, ak, t1, t2, type;
1203     rvec f_i, f_j, f_k;
1204     real L1, kA, kB, aA, aB, dr, dr2, va, vtot, a, b, klin;
1205     ivec jt, dt_ij, dt_kj;
1206     rvec r_ij, r_kj, r_ik, dx;
1207
1208     L1   = 1-lambda;
1209     vtot = 0.0;
1210     for (i = 0; (i < nbonds); )
1211     {
1212         type = forceatoms[i++];
1213         ai   = forceatoms[i++];
1214         aj   = forceatoms[i++];
1215         ak   = forceatoms[i++];
1216
1217         kA   = forceparams[type].linangle.klinA;
1218         kB   = forceparams[type].linangle.klinB;
1219         klin = L1*kA + lambda*kB;
1220
1221         aA   = forceparams[type].linangle.aA;
1222         aB   = forceparams[type].linangle.aB;
1223         a    = L1*aA+lambda*aB;
1224         b    = 1-a;
1225
1226         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
1227         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
1228         rvec_sub(r_ij, r_kj, r_ik);
1229
1230         dr2 = 0;
1231         for (m = 0; (m < DIM); m++)
1232         {
1233             dr        = -a * r_ij[m] - b * r_kj[m];
1234             dr2      += dr*dr;
1235             dx[m]     = dr;
1236             f_i[m]    = a*klin*dr;
1237             f_k[m]    = b*klin*dr;
1238             f_j[m]    = -(f_i[m]+f_k[m]);
1239             f[ai][m] += f_i[m];
1240             f[aj][m] += f_j[m];
1241             f[ak][m] += f_k[m];
1242         }
1243         va          = 0.5*klin*dr2;
1244         *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx, r_ik);
1245
1246         vtot += va;
1247
1248         if (g)
1249         {
1250             copy_ivec(SHIFT_IVEC(g, aj), jt);
1251
1252             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1253             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1254             t1 = IVEC2IS(dt_ij);
1255             t2 = IVEC2IS(dt_kj);
1256         }
1257         rvec_inc(fshift[t1], f_i);
1258         rvec_inc(fshift[CENTRAL], f_j);
1259         rvec_inc(fshift[t2], f_k);
1260     }                                         /* 57 TOTAL       */
1261     return vtot;
1262 }
1263
1264 real urey_bradley(int nbonds,
1265                   const t_iatom forceatoms[], const t_iparams forceparams[],
1266                   const rvec x[], rvec f[], rvec fshift[],
1267                   const t_pbc *pbc, const t_graph *g,
1268                   real lambda, real *dvdlambda,
1269                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1270                   int gmx_unused *global_atom_index)
1271 {
1272     int  i, m, ai, aj, ak, t1, t2, type, ki;
1273     rvec r_ij, r_kj, r_ik;
1274     real cos_theta, cos_theta2, theta;
1275     real dVdt, va, vtot, dr, dr2, vbond, fbond, fik;
1276     real kthA, th0A, kUBA, r13A, kthB, th0B, kUBB, r13B;
1277     ivec jt, dt_ij, dt_kj, dt_ik;
1278
1279     vtot = 0.0;
1280     for (i = 0; (i < nbonds); )
1281     {
1282         type  = forceatoms[i++];
1283         ai    = forceatoms[i++];
1284         aj    = forceatoms[i++];
1285         ak    = forceatoms[i++];
1286         th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
1287         kthA  = forceparams[type].u_b.kthetaA;
1288         r13A  = forceparams[type].u_b.r13A;
1289         kUBA  = forceparams[type].u_b.kUBA;
1290         th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
1291         kthB  = forceparams[type].u_b.kthetaB;
1292         r13B  = forceparams[type].u_b.r13B;
1293         kUBB  = forceparams[type].u_b.kUBB;
1294
1295         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1296                             r_ij, r_kj, &cos_theta, &t1, &t2);                     /*  41               */
1297
1298         *dvdlambda += harmonic(kthA, kthB, th0A, th0B, theta, lambda, &va, &dVdt); /*  21  */
1299         vtot       += va;
1300
1301         ki   = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);                               /*   3      */
1302         dr2  = iprod(r_ik, r_ik);                                                   /*   5              */
1303         dr   = dr2*gmx_invsqrt(dr2);                                                /*  10              */
1304
1305         *dvdlambda += harmonic(kUBA, kUBB, r13A, r13B, dr, lambda, &vbond, &fbond); /*  19  */
1306
1307         cos_theta2 = sqr(cos_theta);                                                /*   1              */
1308         if (cos_theta2 < 1)
1309         {
1310             real st, sth;
1311             real cik, cii, ckk;
1312             real nrkj2, nrij2;
1313             rvec f_i, f_j, f_k;
1314
1315             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
1316             sth = st*cos_theta;                     /*   1              */
1317 #ifdef DEBUG
1318             if (debug)
1319             {
1320                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1321                         theta*RAD2DEG, va, dVdt);
1322             }
1323 #endif
1324             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
1325             nrij2 = iprod(r_ij, r_ij);
1326
1327             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
1328             cii = sth/nrij2;                   /*  10           */
1329             ckk = sth/nrkj2;                   /*  10           */
1330
1331             for (m = 0; (m < DIM); m++)        /*  39           */
1332             {
1333                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1334                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1335                 f_j[m]    = -f_i[m]-f_k[m];
1336                 f[ai][m] += f_i[m];
1337                 f[aj][m] += f_j[m];
1338                 f[ak][m] += f_k[m];
1339             }
1340             if (g)
1341             {
1342                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1343
1344                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1345                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1346                 t1 = IVEC2IS(dt_ij);
1347                 t2 = IVEC2IS(dt_kj);
1348             }
1349             rvec_inc(fshift[t1], f_i);
1350             rvec_inc(fshift[CENTRAL], f_j);
1351             rvec_inc(fshift[t2], f_k);
1352         }                                       /* 161 TOTAL    */
1353         /* Time for the bond calculations */
1354         if (dr2 == 0.0)
1355         {
1356             continue;
1357         }
1358
1359         vtot  += vbond;            /* 1*/
1360         fbond *= gmx_invsqrt(dr2); /*   6               */
1361
1362         if (g)
1363         {
1364             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, ak), dt_ik);
1365             ki = IVEC2IS(dt_ik);
1366         }
1367         for (m = 0; (m < DIM); m++)     /*  15          */
1368         {
1369             fik                 = fbond*r_ik[m];
1370             f[ai][m]           += fik;
1371             f[ak][m]           -= fik;
1372             fshift[ki][m]      += fik;
1373             fshift[CENTRAL][m] -= fik;
1374         }
1375     }
1376     return vtot;
1377 }
1378
1379 real quartic_angles(int nbonds,
1380                     const t_iatom forceatoms[], const t_iparams forceparams[],
1381                     const rvec x[], rvec f[], rvec fshift[],
1382                     const t_pbc *pbc, const t_graph *g,
1383                     real gmx_unused lambda, real gmx_unused *dvdlambda,
1384                     const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1385                     int gmx_unused *global_atom_index)
1386 {
1387     int  i, j, ai, aj, ak, t1, t2, type;
1388     rvec r_ij, r_kj;
1389     real cos_theta, cos_theta2, theta, dt, dVdt, va, dtp, c, vtot;
1390     ivec jt, dt_ij, dt_kj;
1391
1392     vtot = 0.0;
1393     for (i = 0; (i < nbonds); )
1394     {
1395         type = forceatoms[i++];
1396         ai   = forceatoms[i++];
1397         aj   = forceatoms[i++];
1398         ak   = forceatoms[i++];
1399
1400         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
1401                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
1402
1403         dt = theta - forceparams[type].qangle.theta*DEG2RAD;   /* 2          */
1404
1405         dVdt = 0;
1406         va   = forceparams[type].qangle.c[0];
1407         dtp  = 1.0;
1408         for (j = 1; j <= 4; j++)
1409         {
1410             c     = forceparams[type].qangle.c[j];
1411             dVdt -= j*c*dtp;
1412             dtp  *= dt;
1413             va   += c*dtp;
1414         }
1415         /* 20 */
1416
1417         vtot += va;
1418
1419         cos_theta2 = sqr(cos_theta);            /*   1          */
1420         if (cos_theta2 < 1)
1421         {
1422             int  m;
1423             real st, sth;
1424             real cik, cii, ckk;
1425             real nrkj2, nrij2;
1426             rvec f_i, f_j, f_k;
1427
1428             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
1429             sth = st*cos_theta;                     /*   1              */
1430 #ifdef DEBUG
1431             if (debug)
1432             {
1433                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1434                         theta*RAD2DEG, va, dVdt);
1435             }
1436 #endif
1437             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
1438             nrij2 = iprod(r_ij, r_ij);
1439
1440             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
1441             cii = sth/nrij2;                   /*  10           */
1442             ckk = sth/nrkj2;                   /*  10           */
1443
1444             for (m = 0; (m < DIM); m++)        /*  39           */
1445             {
1446                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
1447                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
1448                 f_j[m]    = -f_i[m]-f_k[m];
1449                 f[ai][m] += f_i[m];
1450                 f[aj][m] += f_j[m];
1451                 f[ak][m] += f_k[m];
1452             }
1453             if (g)
1454             {
1455                 copy_ivec(SHIFT_IVEC(g, aj), jt);
1456
1457                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
1458                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
1459                 t1 = IVEC2IS(dt_ij);
1460                 t2 = IVEC2IS(dt_kj);
1461             }
1462             rvec_inc(fshift[t1], f_i);
1463             rvec_inc(fshift[CENTRAL], f_j);
1464             rvec_inc(fshift[t2], f_k);
1465         }                                       /* 153 TOTAL    */
1466     }
1467     return vtot;
1468 }
1469
1470 real dih_angle(const rvec xi, const rvec xj, const rvec xk, const rvec xl,
1471                const t_pbc *pbc,
1472                rvec r_ij, rvec r_kj, rvec r_kl, rvec m, rvec n,
1473                real *sign, int *t1, int *t2, int *t3)
1474 {
1475     real ipr, phi;
1476
1477     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3        */
1478     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
1479     *t3 = pbc_rvec_sub(pbc, xk, xl, r_kl); /*  3                */
1480
1481     cprod(r_ij, r_kj, m);                  /*  9        */
1482     cprod(r_kj, r_kl, n);                  /*  9                */
1483     phi     = gmx_angle(m, n);             /* 49 (assuming 25 for atan2) */
1484     ipr     = iprod(r_ij, n);              /*  5        */
1485     (*sign) = (ipr < 0.0) ? -1.0 : 1.0;
1486     phi     = (*sign)*phi;                 /*  1                */
1487     /* 82 TOTAL */
1488     return phi;
1489 }
1490
1491
1492 #ifdef SIMD_BONDEDS
1493
1494 /* As dih_angle above, but calculates 4 dihedral angles at once using SIMD,
1495  * also calculates the pre-factor required for the dihedral force update.
1496  * Note that bv and buf should be register aligned.
1497  */
1498 static gmx_inline void
1499 dih_angle_simd(const rvec *x,
1500                const int *ai, const int *aj, const int *ak, const int *al,
1501                const pbc_simd_t *pbc,
1502                real *dr,
1503                gmx_mm_pr *phi_S,
1504                gmx_mm_pr *mx_S, gmx_mm_pr *my_S, gmx_mm_pr *mz_S,
1505                gmx_mm_pr *nx_S, gmx_mm_pr *ny_S, gmx_mm_pr *nz_S,
1506                gmx_mm_pr *nrkj_m2_S,
1507                gmx_mm_pr *nrkj_n2_S,
1508                real *p,
1509                real *q)
1510 {
1511 #define UNROLL GMX_SIMD_WIDTH_HERE
1512     int       s, m;
1513     gmx_mm_pr rijx_S, rijy_S, rijz_S;
1514     gmx_mm_pr rkjx_S, rkjy_S, rkjz_S;
1515     gmx_mm_pr rklx_S, rkly_S, rklz_S;
1516     gmx_mm_pr cx_S, cy_S, cz_S;
1517     gmx_mm_pr cn_S;
1518     gmx_mm_pr s_S;
1519     gmx_mm_pr ipr_S;
1520     gmx_mm_pr iprm_S, iprn_S;
1521     gmx_mm_pr nrkj2_S, nrkj_1_S, nrkj_2_S, nrkj_S;
1522     gmx_mm_pr p_S, q_S;
1523     gmx_mm_pr fmin_S = gmx_set1_pr(GMX_FLOAT_MIN);
1524
1525     for (s = 0; s < UNROLL; s++)
1526     {
1527         /* If you can't use pbc_dx_simd below for PBC, e.g. because
1528          * you can't round in SIMD, use pbc_rvec_sub here.
1529          */
1530         for (m = 0; m < DIM; m++)
1531         {
1532             dr[s + (0*DIM + m)*UNROLL] = x[ai[s]][m] - x[aj[s]][m];
1533             dr[s + (1*DIM + m)*UNROLL] = x[ak[s]][m] - x[aj[s]][m];
1534             dr[s + (2*DIM + m)*UNROLL] = x[ak[s]][m] - x[al[s]][m];
1535         }
1536     }
1537
1538     rijx_S = gmx_load_pr(dr + 0*UNROLL);
1539     rijy_S = gmx_load_pr(dr + 1*UNROLL);
1540     rijz_S = gmx_load_pr(dr + 2*UNROLL);
1541     rkjx_S = gmx_load_pr(dr + 3*UNROLL);
1542     rkjy_S = gmx_load_pr(dr + 4*UNROLL);
1543     rkjz_S = gmx_load_pr(dr + 5*UNROLL);
1544     rklx_S = gmx_load_pr(dr + 6*UNROLL);
1545     rkly_S = gmx_load_pr(dr + 7*UNROLL);
1546     rklz_S = gmx_load_pr(dr + 8*UNROLL);
1547
1548     pbc_dx_simd(&rijx_S, &rijy_S, &rijz_S, pbc);
1549     pbc_dx_simd(&rkjx_S, &rkjy_S, &rkjz_S, pbc);
1550     pbc_dx_simd(&rklx_S, &rkly_S, &rklz_S, pbc);
1551
1552     gmx_cprod_pr(rijx_S, rijy_S, rijz_S,
1553                  rkjx_S, rkjy_S, rkjz_S,
1554                  mx_S, my_S, mz_S);
1555
1556     gmx_cprod_pr(rkjx_S, rkjy_S, rkjz_S,
1557                  rklx_S, rkly_S, rklz_S,
1558                  nx_S, ny_S, nz_S);
1559
1560     gmx_cprod_pr(*mx_S, *my_S, *mz_S,
1561                  *nx_S, *ny_S, *nz_S,
1562                  &cx_S, &cy_S, &cz_S);
1563
1564     cn_S       = gmx_sqrt_pr(gmx_norm2_pr(cx_S, cy_S, cz_S));
1565
1566     s_S        = gmx_iprod_pr(*mx_S, *my_S, *mz_S, *nx_S, *ny_S, *nz_S);
1567
1568     /* Determine the dihedral angle, the sign might need correction */
1569     *phi_S     = gmx_atan2_pr(cn_S, s_S);
1570
1571     ipr_S      = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1572                               *nx_S, *ny_S, *nz_S);
1573
1574     iprm_S     = gmx_norm2_pr(*mx_S, *my_S, *mz_S);
1575     iprn_S     = gmx_norm2_pr(*nx_S, *ny_S, *nz_S);
1576
1577     nrkj2_S    = gmx_norm2_pr(rkjx_S, rkjy_S, rkjz_S);
1578
1579     /* Avoid division by zero. When zero, the result is multiplied by 0
1580      * anyhow, so the 3 max below do not affect the final result.
1581      */
1582     nrkj2_S    = gmx_max_pr(nrkj2_S, fmin_S);
1583     nrkj_1_S   = gmx_invsqrt_pr(nrkj2_S);
1584     nrkj_2_S   = gmx_mul_pr(nrkj_1_S, nrkj_1_S);
1585     nrkj_S     = gmx_mul_pr(nrkj2_S, nrkj_1_S);
1586
1587     iprm_S     = gmx_max_pr(iprm_S, fmin_S);
1588     iprn_S     = gmx_max_pr(iprn_S, fmin_S);
1589     *nrkj_m2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprm_S));
1590     *nrkj_n2_S = gmx_mul_pr(nrkj_S, gmx_inv_pr(iprn_S));
1591
1592     /* Set sign of phi_S with the sign of ipr_S; phi_S is currently positive */
1593     *phi_S     = gmx_cpsgn_nonneg_pr(ipr_S, *phi_S);
1594
1595     p_S        = gmx_iprod_pr(rijx_S, rijy_S, rijz_S,
1596                               rkjx_S, rkjy_S, rkjz_S);
1597     p_S        = gmx_mul_pr(p_S, nrkj_2_S);
1598
1599     q_S        = gmx_iprod_pr(rklx_S, rkly_S, rklz_S,
1600                               rkjx_S, rkjy_S, rkjz_S);
1601     q_S        = gmx_mul_pr(q_S, nrkj_2_S);
1602
1603     gmx_store_pr(p, p_S);
1604     gmx_store_pr(q, q_S);
1605 #undef UNROLL
1606 }
1607
1608 #endif /* SIMD_BONDEDS */
1609
1610
1611 void do_dih_fup(int i, int j, int k, int l, real ddphi,
1612                 rvec r_ij, rvec r_kj, rvec r_kl,
1613                 rvec m, rvec n, rvec f[], rvec fshift[],
1614                 const t_pbc *pbc, const t_graph *g,
1615                 const rvec x[], int t1, int t2, int t3)
1616 {
1617     /* 143 FLOPS */
1618     rvec f_i, f_j, f_k, f_l;
1619     rvec uvec, vvec, svec, dx_jl;
1620     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1621     real a, b, p, q, toler;
1622     ivec jt, dt_ij, dt_kj, dt_lj;
1623
1624     iprm  = iprod(m, m);       /*  5    */
1625     iprn  = iprod(n, n);       /*  5    */
1626     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1627     toler = nrkj2*GMX_REAL_EPS;
1628     if ((iprm > toler) && (iprn > toler))
1629     {
1630         nrkj_1 = gmx_invsqrt(nrkj2); /* 10      */
1631         nrkj_2 = nrkj_1*nrkj_1;      /*  1      */
1632         nrkj   = nrkj2*nrkj_1;       /*  1      */
1633         a      = -ddphi*nrkj/iprm;   /* 11      */
1634         svmul(a, m, f_i);            /*  3      */
1635         b     = ddphi*nrkj/iprn;     /* 11      */
1636         svmul(b, n, f_l);            /*  3  */
1637         p     = iprod(r_ij, r_kj);   /*  5      */
1638         p    *= nrkj_2;              /*  1      */
1639         q     = iprod(r_kl, r_kj);   /*  5      */
1640         q    *= nrkj_2;              /*  1      */
1641         svmul(p, f_i, uvec);         /*  3      */
1642         svmul(q, f_l, vvec);         /*  3      */
1643         rvec_sub(uvec, vvec, svec);  /*  3      */
1644         rvec_sub(f_i, svec, f_j);    /*  3      */
1645         rvec_add(f_l, svec, f_k);    /*  3      */
1646         rvec_inc(f[i], f_i);         /*  3      */
1647         rvec_dec(f[j], f_j);         /*  3      */
1648         rvec_dec(f[k], f_k);         /*  3      */
1649         rvec_inc(f[l], f_l);         /*  3      */
1650
1651         if (g)
1652         {
1653             copy_ivec(SHIFT_IVEC(g, j), jt);
1654             ivec_sub(SHIFT_IVEC(g, i), jt, dt_ij);
1655             ivec_sub(SHIFT_IVEC(g, k), jt, dt_kj);
1656             ivec_sub(SHIFT_IVEC(g, l), jt, dt_lj);
1657             t1 = IVEC2IS(dt_ij);
1658             t2 = IVEC2IS(dt_kj);
1659             t3 = IVEC2IS(dt_lj);
1660         }
1661         else if (pbc)
1662         {
1663             t3 = pbc_rvec_sub(pbc, x[l], x[j], dx_jl);
1664         }
1665         else
1666         {
1667             t3 = CENTRAL;
1668         }
1669
1670         rvec_inc(fshift[t1], f_i);
1671         rvec_dec(fshift[CENTRAL], f_j);
1672         rvec_dec(fshift[t2], f_k);
1673         rvec_inc(fshift[t3], f_l);
1674     }
1675     /* 112 TOTAL    */
1676 }
1677
1678 /* As do_dih_fup above, but without shift forces */
1679 static void
1680 do_dih_fup_noshiftf(int i, int j, int k, int l, real ddphi,
1681                     rvec r_ij, rvec r_kj, rvec r_kl,
1682                     rvec m, rvec n, rvec f[])
1683 {
1684     rvec f_i, f_j, f_k, f_l;
1685     rvec uvec, vvec, svec, dx_jl;
1686     real iprm, iprn, nrkj, nrkj2, nrkj_1, nrkj_2;
1687     real a, b, p, q, toler;
1688     ivec jt, dt_ij, dt_kj, dt_lj;
1689
1690     iprm  = iprod(m, m);       /*  5    */
1691     iprn  = iprod(n, n);       /*  5    */
1692     nrkj2 = iprod(r_kj, r_kj); /*  5    */
1693     toler = nrkj2*GMX_REAL_EPS;
1694     if ((iprm > toler) && (iprn > toler))
1695     {
1696         nrkj_1 = gmx_invsqrt(nrkj2); /* 10      */
1697         nrkj_2 = nrkj_1*nrkj_1;      /*  1      */
1698         nrkj   = nrkj2*nrkj_1;       /*  1      */
1699         a      = -ddphi*nrkj/iprm;   /* 11      */
1700         svmul(a, m, f_i);            /*  3      */
1701         b     = ddphi*nrkj/iprn;     /* 11      */
1702         svmul(b, n, f_l);            /*  3  */
1703         p     = iprod(r_ij, r_kj);   /*  5      */
1704         p    *= nrkj_2;              /*  1      */
1705         q     = iprod(r_kl, r_kj);   /*  5      */
1706         q    *= nrkj_2;              /*  1      */
1707         svmul(p, f_i, uvec);         /*  3      */
1708         svmul(q, f_l, vvec);         /*  3      */
1709         rvec_sub(uvec, vvec, svec);  /*  3      */
1710         rvec_sub(f_i, svec, f_j);    /*  3      */
1711         rvec_add(f_l, svec, f_k);    /*  3      */
1712         rvec_inc(f[i], f_i);         /*  3      */
1713         rvec_dec(f[j], f_j);         /*  3      */
1714         rvec_dec(f[k], f_k);         /*  3      */
1715         rvec_inc(f[l], f_l);         /*  3      */
1716     }
1717 }
1718
1719 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1720 static gmx_inline void
1721 do_dih_fup_noshiftf_precalc(int i, int j, int k, int l,
1722                             real p, real q,
1723                             real f_i_x, real f_i_y, real f_i_z,
1724                             real mf_l_x, real mf_l_y, real mf_l_z,
1725                             rvec f[])
1726 {
1727     rvec f_i, f_j, f_k, f_l;
1728     rvec uvec, vvec, svec;
1729
1730     f_i[XX] = f_i_x;
1731     f_i[YY] = f_i_y;
1732     f_i[ZZ] = f_i_z;
1733     f_l[XX] = -mf_l_x;
1734     f_l[YY] = -mf_l_y;
1735     f_l[ZZ] = -mf_l_z;
1736     svmul(p, f_i, uvec);
1737     svmul(q, f_l, vvec);
1738     rvec_sub(uvec, vvec, svec);
1739     rvec_sub(f_i, svec, f_j);
1740     rvec_add(f_l, svec, f_k);
1741     rvec_inc(f[i], f_i);
1742     rvec_dec(f[j], f_j);
1743     rvec_dec(f[k], f_k);
1744     rvec_inc(f[l], f_l);
1745 }
1746
1747
1748 real dopdihs(real cpA, real cpB, real phiA, real phiB, int mult,
1749              real phi, real lambda, real *V, real *F)
1750 {
1751     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1752     real L1   = 1.0 - lambda;
1753     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1754     real dph0 = (phiB - phiA)*DEG2RAD;
1755     real cp   = L1*cpA + lambda*cpB;
1756
1757     mdphi =  mult*phi - ph0;
1758     sdphi = sin(mdphi);
1759     ddphi = -cp*mult*sdphi;
1760     v1    = 1.0 + cos(mdphi);
1761     v     = cp*v1;
1762
1763     dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1764
1765     *V = v;
1766     *F = ddphi;
1767
1768     return dvdlambda;
1769
1770     /* That was 40 flops */
1771 }
1772
1773 static void
1774 dopdihs_noener(real cpA, real cpB, real phiA, real phiB, int mult,
1775                real phi, real lambda, real *F)
1776 {
1777     real mdphi, sdphi, ddphi;
1778     real L1   = 1.0 - lambda;
1779     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1780     real cp   = L1*cpA + lambda*cpB;
1781
1782     mdphi = mult*phi - ph0;
1783     sdphi = sin(mdphi);
1784     ddphi = -cp*mult*sdphi;
1785
1786     *F = ddphi;
1787
1788     /* That was 20 flops */
1789 }
1790
1791 static void
1792 dopdihs_mdphi(real cpA, real cpB, real phiA, real phiB, int mult,
1793               real phi, real lambda, real *cp, real *mdphi)
1794 {
1795     real L1   = 1.0 - lambda;
1796     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1797
1798     *cp    = L1*cpA + lambda*cpB;
1799
1800     *mdphi = mult*phi - ph0;
1801 }
1802
1803 static real dopdihs_min(real cpA, real cpB, real phiA, real phiB, int mult,
1804                         real phi, real lambda, real *V, real *F)
1805 /* similar to dopdihs, except for a minus sign  *
1806  * and a different treatment of mult/phi0       */
1807 {
1808     real v, dvdlambda, mdphi, v1, sdphi, ddphi;
1809     real L1   = 1.0 - lambda;
1810     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1811     real dph0 = (phiB - phiA)*DEG2RAD;
1812     real cp   = L1*cpA + lambda*cpB;
1813
1814     mdphi = mult*(phi-ph0);
1815     sdphi = sin(mdphi);
1816     ddphi = cp*mult*sdphi;
1817     v1    = 1.0-cos(mdphi);
1818     v     = cp*v1;
1819
1820     dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1821
1822     *V = v;
1823     *F = ddphi;
1824
1825     return dvdlambda;
1826
1827     /* That was 40 flops */
1828 }
1829
1830 real pdihs(int nbonds,
1831            const t_iatom forceatoms[], const t_iparams forceparams[],
1832            const rvec x[], rvec f[], rvec fshift[],
1833            const t_pbc *pbc, const t_graph *g,
1834            real lambda, real *dvdlambda,
1835            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1836            int gmx_unused *global_atom_index)
1837 {
1838     int  i, type, ai, aj, ak, al;
1839     int  t1, t2, t3;
1840     rvec r_ij, r_kj, r_kl, m, n;
1841     real phi, sign, ddphi, vpd, vtot;
1842
1843     vtot = 0.0;
1844
1845     for (i = 0; (i < nbonds); )
1846     {
1847         type = forceatoms[i++];
1848         ai   = forceatoms[i++];
1849         aj   = forceatoms[i++];
1850         ak   = forceatoms[i++];
1851         al   = forceatoms[i++];
1852
1853         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1854                         &sign, &t1, &t2, &t3);  /*  84      */
1855         *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1856                               forceparams[type].pdihs.cpB,
1857                               forceparams[type].pdihs.phiA,
1858                               forceparams[type].pdihs.phiB,
1859                               forceparams[type].pdihs.mult,
1860                               phi, lambda, &vpd, &ddphi);
1861
1862         vtot += vpd;
1863         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
1864                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
1865
1866 #ifdef DEBUG
1867         fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
1868                 ai, aj, ak, al, phi);
1869 #endif
1870     } /* 223 TOTAL  */
1871
1872     return vtot;
1873 }
1874
1875 void make_dp_periodic(real *dp)  /* 1 flop? */
1876 {
1877     /* dp cannot be outside (-pi,pi) */
1878     if (*dp >= M_PI)
1879     {
1880         *dp -= 2*M_PI;
1881     }
1882     else if (*dp < -M_PI)
1883     {
1884         *dp += 2*M_PI;
1885     }
1886     return;
1887 }
1888
1889 /* As pdihs above, but without calculating energies and shift forces */
1890 static void
1891 pdihs_noener(int nbonds,
1892              const t_iatom forceatoms[], const t_iparams forceparams[],
1893              const rvec x[], rvec f[],
1894              const t_pbc gmx_unused *pbc, const t_graph gmx_unused *g,
1895              real lambda,
1896              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1897              int gmx_unused *global_atom_index)
1898 {
1899     int  i, type, ai, aj, ak, al;
1900     int  t1, t2, t3;
1901     rvec r_ij, r_kj, r_kl, m, n;
1902     real phi, sign, ddphi_tot, ddphi;
1903
1904     for (i = 0; (i < nbonds); )
1905     {
1906         ai   = forceatoms[i+1];
1907         aj   = forceatoms[i+2];
1908         ak   = forceatoms[i+3];
1909         al   = forceatoms[i+4];
1910
1911         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
1912                         &sign, &t1, &t2, &t3);
1913
1914         ddphi_tot = 0;
1915
1916         /* Loop over dihedrals working on the same atoms,
1917          * so we avoid recalculating angles and force distributions.
1918          */
1919         do
1920         {
1921             type = forceatoms[i];
1922             dopdihs_noener(forceparams[type].pdihs.cpA,
1923                            forceparams[type].pdihs.cpB,
1924                            forceparams[type].pdihs.phiA,
1925                            forceparams[type].pdihs.phiB,
1926                            forceparams[type].pdihs.mult,
1927                            phi, lambda, &ddphi);
1928             ddphi_tot += ddphi;
1929
1930             i += 5;
1931         }
1932         while (i < nbonds &&
1933                forceatoms[i+1] == ai &&
1934                forceatoms[i+2] == aj &&
1935                forceatoms[i+3] == ak &&
1936                forceatoms[i+4] == al);
1937
1938         do_dih_fup_noshiftf(ai, aj, ak, al, ddphi_tot, r_ij, r_kj, r_kl, m, n, f);
1939     }
1940 }
1941
1942
1943 #ifdef SIMD_BONDEDS
1944
1945 /* As pdihs_noner above, but using SIMD to calculate many dihedrals at once */
1946 static void
1947 pdihs_noener_simd(int nbonds,
1948                   const t_iatom forceatoms[], const t_iparams forceparams[],
1949                   const rvec x[], rvec f[],
1950                   const t_pbc *pbc, const t_graph gmx_unused *g,
1951                   real gmx_unused lambda,
1952                   const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
1953                   int gmx_unused *global_atom_index)
1954 {
1955 #define UNROLL GMX_SIMD_WIDTH_HERE
1956     const int       nfa1 = 5;
1957     int             i, iu, s;
1958     int             type, ai[UNROLL], aj[UNROLL], ak[UNROLL], al[UNROLL];
1959     int             t1[UNROLL], t2[UNROLL], t3[UNROLL];
1960     real            ddphi;
1961     real            dr_array[3*DIM*UNROLL+UNROLL], *dr;
1962     real            buf_array[7*UNROLL+UNROLL], *buf;
1963     real           *cp, *phi0, *mult, *phi, *p, *q, *sf_i, *msf_l;
1964     gmx_mm_pr       phi0_S, phi_S;
1965     gmx_mm_pr       mx_S, my_S, mz_S;
1966     gmx_mm_pr       nx_S, ny_S, nz_S;
1967     gmx_mm_pr       nrkj_m2_S, nrkj_n2_S;
1968     gmx_mm_pr       cp_S, mdphi_S, mult_S;
1969     gmx_mm_pr       sin_S, cos_S;
1970     gmx_mm_pr       mddphi_S;
1971     gmx_mm_pr       sf_i_S, msf_l_S;
1972     pbc_simd_t      pbc_simd;
1973
1974     /* Ensure SIMD register alignment */
1975     dr  = gmx_simd_align_real(dr_array);
1976     buf = gmx_simd_align_real(buf_array);
1977
1978     /* Extract aligned pointer for parameters and variables */
1979     cp    = buf + 0*UNROLL;
1980     phi0  = buf + 1*UNROLL;
1981     mult  = buf + 2*UNROLL;
1982     p     = buf + 3*UNROLL;
1983     q     = buf + 4*UNROLL;
1984     sf_i  = buf + 5*UNROLL;
1985     msf_l = buf + 6*UNROLL;
1986
1987     set_pbc_simd(pbc, &pbc_simd);
1988
1989     /* nbonds is the number of dihedrals times nfa1, here we step UNROLL dihs */
1990     for (i = 0; (i < nbonds); i += UNROLL*nfa1)
1991     {
1992         /* Collect atoms quadruplets for UNROLL dihedrals.
1993          * iu indexes into forceatoms, we should not let iu go beyond nbonds.
1994          */
1995         iu = i;
1996         for (s = 0; s < UNROLL; s++)
1997         {
1998             type  = forceatoms[iu];
1999             ai[s] = forceatoms[iu+1];
2000             aj[s] = forceatoms[iu+2];
2001             ak[s] = forceatoms[iu+3];
2002             al[s] = forceatoms[iu+4];
2003
2004             cp[s]   = forceparams[type].pdihs.cpA;
2005             phi0[s] = forceparams[type].pdihs.phiA*DEG2RAD;
2006             mult[s] = forceparams[type].pdihs.mult;
2007
2008             /* At the end fill the arrays with identical entries */
2009             if (iu + nfa1 < nbonds)
2010             {
2011                 iu += nfa1;
2012             }
2013         }
2014
2015         /* Caclulate UNROLL dihedral angles at once */
2016         dih_angle_simd(x, ai, aj, ak, al, &pbc_simd,
2017                        dr,
2018                        &phi_S,
2019                        &mx_S, &my_S, &mz_S,
2020                        &nx_S, &ny_S, &nz_S,
2021                        &nrkj_m2_S,
2022                        &nrkj_n2_S,
2023                        p, q);
2024
2025         cp_S     = gmx_load_pr(cp);
2026         phi0_S   = gmx_load_pr(phi0);
2027         mult_S   = gmx_load_pr(mult);
2028
2029         mdphi_S  = gmx_sub_pr(gmx_mul_pr(mult_S, phi_S), phi0_S);
2030
2031         /* Calculate UNROLL sines at once */
2032         gmx_sincos_pr(mdphi_S, &sin_S, &cos_S);
2033         mddphi_S = gmx_mul_pr(gmx_mul_pr(cp_S, mult_S), sin_S);
2034         sf_i_S   = gmx_mul_pr(mddphi_S, nrkj_m2_S);
2035         msf_l_S  = gmx_mul_pr(mddphi_S, nrkj_n2_S);
2036
2037         /* After this m?_S will contain f[i] */
2038         mx_S     = gmx_mul_pr(sf_i_S, mx_S);
2039         my_S     = gmx_mul_pr(sf_i_S, my_S);
2040         mz_S     = gmx_mul_pr(sf_i_S, mz_S);
2041
2042         /* After this m?_S will contain -f[l] */
2043         nx_S     = gmx_mul_pr(msf_l_S, nx_S);
2044         ny_S     = gmx_mul_pr(msf_l_S, ny_S);
2045         nz_S     = gmx_mul_pr(msf_l_S, nz_S);
2046
2047         gmx_store_pr(dr + 0*UNROLL, mx_S);
2048         gmx_store_pr(dr + 1*UNROLL, my_S);
2049         gmx_store_pr(dr + 2*UNROLL, mz_S);
2050         gmx_store_pr(dr + 3*UNROLL, nx_S);
2051         gmx_store_pr(dr + 4*UNROLL, ny_S);
2052         gmx_store_pr(dr + 5*UNROLL, nz_S);
2053
2054         iu = i;
2055         s  = 0;
2056         do
2057         {
2058             do_dih_fup_noshiftf_precalc(ai[s], aj[s], ak[s], al[s],
2059                                         p[s], q[s],
2060                                         dr[     XX *UNROLL+s],
2061                                         dr[     YY *UNROLL+s],
2062                                         dr[     ZZ *UNROLL+s],
2063                                         dr[(DIM+XX)*UNROLL+s],
2064                                         dr[(DIM+YY)*UNROLL+s],
2065                                         dr[(DIM+ZZ)*UNROLL+s],
2066                                         f);
2067             s++;
2068             iu += nfa1;
2069         }
2070         while (s < UNROLL && iu < nbonds);
2071     }
2072 #undef UNROLL
2073 }
2074
2075 #endif /* SIMD_BONDEDS */
2076
2077
2078 real idihs(int nbonds,
2079            const t_iatom forceatoms[], const t_iparams forceparams[],
2080            const rvec x[], rvec f[], rvec fshift[],
2081            const t_pbc *pbc, const t_graph *g,
2082            real lambda, real *dvdlambda,
2083            const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2084            int gmx_unused *global_atom_index)
2085 {
2086     int  i, type, ai, aj, ak, al;
2087     int  t1, t2, t3;
2088     real phi, phi0, dphi0, ddphi, sign, vtot;
2089     rvec r_ij, r_kj, r_kl, m, n;
2090     real L1, kk, dp, dp2, kA, kB, pA, pB, dvdl_term;
2091
2092     L1        = 1.0-lambda;
2093     dvdl_term = 0;
2094     vtot      = 0.0;
2095     for (i = 0; (i < nbonds); )
2096     {
2097         type = forceatoms[i++];
2098         ai   = forceatoms[i++];
2099         aj   = forceatoms[i++];
2100         ak   = forceatoms[i++];
2101         al   = forceatoms[i++];
2102
2103         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2104                         &sign, &t1, &t2, &t3);  /*  84          */
2105
2106         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2107          * force changes if we just apply a normal harmonic.
2108          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2109          * This means we will never have the periodicity problem, unless
2110          * the dihedral is Pi away from phiO, which is very unlikely due to
2111          * the potential.
2112          */
2113         kA = forceparams[type].harmonic.krA;
2114         kB = forceparams[type].harmonic.krB;
2115         pA = forceparams[type].harmonic.rA;
2116         pB = forceparams[type].harmonic.rB;
2117
2118         kk    = L1*kA + lambda*kB;
2119         phi0  = (L1*pA + lambda*pB)*DEG2RAD;
2120         dphi0 = (pB - pA)*DEG2RAD;
2121
2122         dp = phi-phi0;
2123
2124         make_dp_periodic(&dp);
2125
2126         dp2 = dp*dp;
2127
2128         vtot += 0.5*kk*dp2;
2129         ddphi = -kk*dp;
2130
2131         dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
2132
2133         do_dih_fup(ai, aj, ak, al, (real)(-ddphi), r_ij, r_kj, r_kl, m, n,
2134                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2135         /* 218 TOTAL    */
2136 #ifdef DEBUG
2137         if (debug)
2138         {
2139             fprintf(debug, "idih: (%d,%d,%d,%d) phi=%g\n",
2140                     ai, aj, ak, al, phi);
2141         }
2142 #endif
2143     }
2144
2145     *dvdlambda += dvdl_term;
2146     return vtot;
2147 }
2148
2149
2150 /*! \brief returns dx, rdist, and dpdl for functions posres() and fbposres()
2151  */
2152 static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
2153                       const rvec comA_sc, const rvec comB_sc,
2154                       real lambda,
2155                       t_pbc *pbc, int refcoord_scaling, int npbcdim,
2156                       rvec dx, rvec rdist, rvec dpdl)
2157 {
2158     int  m, d;
2159     real posA, posB, L1, ref = 0.;
2160     rvec pos;
2161
2162     L1 = 1.0-lambda;
2163
2164     for (m = 0; m < DIM; m++)
2165     {
2166         posA = pos0A[m];
2167         posB = pos0B[m];
2168         if (m < npbcdim)
2169         {
2170             switch (refcoord_scaling)
2171             {
2172                 case erscNO:
2173                     ref      = 0;
2174                     rdist[m] = L1*posA + lambda*posB;
2175                     dpdl[m]  = posB - posA;
2176                     break;
2177                 case erscALL:
2178                     /* Box relative coordinates are stored for dimensions with pbc */
2179                     posA *= pbc->box[m][m];
2180                     posB *= pbc->box[m][m];
2181                     for (d = m+1; d < npbcdim; d++)
2182                     {
2183                         posA += pos0A[d]*pbc->box[d][m];
2184                         posB += pos0B[d]*pbc->box[d][m];
2185                     }
2186                     ref      = L1*posA + lambda*posB;
2187                     rdist[m] = 0;
2188                     dpdl[m]  = posB - posA;
2189                     break;
2190                 case erscCOM:
2191                     ref      = L1*comA_sc[m] + lambda*comB_sc[m];
2192                     rdist[m] = L1*posA       + lambda*posB;
2193                     dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
2194                     break;
2195                 default:
2196                     gmx_fatal(FARGS, "No such scaling method implemented");
2197             }
2198         }
2199         else
2200         {
2201             ref      = L1*posA + lambda*posB;
2202             rdist[m] = 0;
2203             dpdl[m]  = posB - posA;
2204         }
2205
2206         /* We do pbc_dx with ref+rdist,
2207          * since with only ref we can be up to half a box vector wrong.
2208          */
2209         pos[m] = ref + rdist[m];
2210     }
2211
2212     if (pbc)
2213     {
2214         pbc_dx(pbc, x, pos, dx);
2215     }
2216     else
2217     {
2218         rvec_sub(x, pos, dx);
2219     }
2220 }
2221
2222 /*! \brief Adds forces of flat-bottomed positions restraints to f[]
2223  *         and fixes vir_diag. Returns the flat-bottomed potential. */
2224 real fbposres(int nbonds,
2225               const t_iatom forceatoms[], const t_iparams forceparams[],
2226               const rvec x[], rvec f[], rvec vir_diag,
2227               t_pbc *pbc,
2228               int refcoord_scaling, int ePBC, rvec com)
2229 /* compute flat-bottomed positions restraints */
2230 {
2231     int              i, ai, m, d, type, npbcdim = 0, fbdim;
2232     const t_iparams *pr;
2233     real             vtot, kk, v;
2234     real             ref = 0, dr, dr2, rpot, rfb, rfb2, fact, invdr;
2235     rvec             com_sc, rdist, pos, dx, dpdl, fm;
2236     gmx_bool         bInvert;
2237
2238     npbcdim = ePBC2npbcdim(ePBC);
2239
2240     if (refcoord_scaling == erscCOM)
2241     {
2242         clear_rvec(com_sc);
2243         for (m = 0; m < npbcdim; m++)
2244         {
2245             for (d = m; d < npbcdim; d++)
2246             {
2247                 com_sc[m] += com[d]*pbc->box[d][m];
2248             }
2249         }
2250     }
2251
2252     vtot = 0.0;
2253     for (i = 0; (i < nbonds); )
2254     {
2255         type = forceatoms[i++];
2256         ai   = forceatoms[i++];
2257         pr   = &forceparams[type];
2258
2259         /* same calculation as for normal posres, but with identical A and B states, and lambda==0 */
2260         posres_dx(x[ai], forceparams[type].fbposres.pos0, forceparams[type].fbposres.pos0,
2261                   com_sc, com_sc, 0.0,
2262                   pbc, refcoord_scaling, npbcdim,
2263                   dx, rdist, dpdl);
2264
2265         clear_rvec(fm);
2266         v = 0.0;
2267
2268         kk   = pr->fbposres.k;
2269         rfb  = pr->fbposres.r;
2270         rfb2 = sqr(rfb);
2271
2272         /* with rfb<0, push particle out of the sphere/cylinder/layer */
2273         bInvert = FALSE;
2274         if (rfb < 0.)
2275         {
2276             bInvert = TRUE;
2277             rfb     = -rfb;
2278         }
2279
2280         switch (pr->fbposres.geom)
2281         {
2282             case efbposresSPHERE:
2283                 /* spherical flat-bottom posres */
2284                 dr2 = norm2(dx);
2285                 if (dr2 > 0.0 &&
2286                     ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
2287                     )
2288                 {
2289                     dr   = sqrt(dr2);
2290                     v    = 0.5*kk*sqr(dr - rfb);
2291                     fact = -kk*(dr-rfb)/dr; /* Force pointing to the center pos0 */
2292                     svmul(fact, dx, fm);
2293                 }
2294                 break;
2295             case efbposresCYLINDER:
2296                 /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
2297                 dr2 = sqr(dx[XX])+sqr(dx[YY]);
2298                 if  (dr2 > 0.0 &&
2299                      ( (dr2 > rfb2 && bInvert == FALSE ) || (dr2 < rfb2 && bInvert == TRUE ) )
2300                      )
2301                 {
2302                     dr     = sqrt(dr2);
2303                     invdr  = 1./dr;
2304                     v      = 0.5*kk*sqr(dr - rfb);
2305                     fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr; /* Force pointing to the center */
2306                     fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
2307                 }
2308                 break;
2309             case efbposresX: /* fbdim=XX */
2310             case efbposresY: /* fbdim=YY */
2311             case efbposresZ: /* fbdim=ZZ */
2312                 /* 1D flat-bottom potential */
2313                 fbdim = pr->fbposres.geom - efbposresX;
2314                 dr    = dx[fbdim];
2315                 if ( ( dr > rfb && bInvert == FALSE ) || ( 0 < dr && dr < rfb && bInvert == TRUE )  )
2316                 {
2317                     v         = 0.5*kk*sqr(dr - rfb);
2318                     fm[fbdim] = -kk*(dr - rfb);
2319                 }
2320                 else if ( (dr < (-rfb) && bInvert == FALSE ) || ( (-rfb) < dr && dr < 0 && bInvert == TRUE ))
2321                 {
2322                     v         = 0.5*kk*sqr(dr + rfb);
2323                     fm[fbdim] = -kk*(dr + rfb);
2324                 }
2325                 break;
2326         }
2327
2328         vtot += v;
2329
2330         for (m = 0; (m < DIM); m++)
2331         {
2332             f[ai][m]   += fm[m];
2333             /* Here we correct for the pbc_dx which included rdist */
2334             vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm[m];
2335         }
2336     }
2337
2338     return vtot;
2339 }
2340
2341
2342 real posres(int nbonds,
2343             const t_iatom forceatoms[], const t_iparams forceparams[],
2344             const rvec x[], rvec f[], rvec vir_diag,
2345             t_pbc *pbc,
2346             real lambda, real *dvdlambda,
2347             int refcoord_scaling, int ePBC, rvec comA, rvec comB)
2348 {
2349     int              i, ai, m, d, type, ki, npbcdim = 0;
2350     const t_iparams *pr;
2351     real             L1;
2352     real             vtot, kk, fm;
2353     real             posA, posB, ref = 0;
2354     rvec             comA_sc, comB_sc, rdist, dpdl, pos, dx;
2355     gmx_bool         bForceValid = TRUE;
2356
2357     if ((f == NULL) || (vir_diag == NULL))    /* should both be null together! */
2358     {
2359         bForceValid = FALSE;
2360     }
2361
2362     npbcdim = ePBC2npbcdim(ePBC);
2363
2364     if (refcoord_scaling == erscCOM)
2365     {
2366         clear_rvec(comA_sc);
2367         clear_rvec(comB_sc);
2368         for (m = 0; m < npbcdim; m++)
2369         {
2370             for (d = m; d < npbcdim; d++)
2371             {
2372                 comA_sc[m] += comA[d]*pbc->box[d][m];
2373                 comB_sc[m] += comB[d]*pbc->box[d][m];
2374             }
2375         }
2376     }
2377
2378     L1 = 1.0 - lambda;
2379
2380     vtot = 0.0;
2381     for (i = 0; (i < nbonds); )
2382     {
2383         type = forceatoms[i++];
2384         ai   = forceatoms[i++];
2385         pr   = &forceparams[type];
2386
2387         /* return dx, rdist, and dpdl */
2388         posres_dx(x[ai], forceparams[type].posres.pos0A, forceparams[type].posres.pos0B,
2389                   comA_sc, comB_sc, lambda,
2390                   pbc, refcoord_scaling, npbcdim,
2391                   dx, rdist, dpdl);
2392
2393         for (m = 0; (m < DIM); m++)
2394         {
2395             kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
2396             fm          = -kk*dx[m];
2397             vtot       += 0.5*kk*dx[m]*dx[m];
2398             *dvdlambda +=
2399                 0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
2400                 -fm*dpdl[m];
2401
2402             /* Here we correct for the pbc_dx which included rdist */
2403             if (bForceValid)
2404             {
2405                 f[ai][m]    += fm;
2406                 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
2407             }
2408         }
2409     }
2410
2411     return vtot;
2412 }
2413
2414 static real low_angres(int nbonds,
2415                        const t_iatom forceatoms[], const t_iparams forceparams[],
2416                        const rvec x[], rvec f[], rvec fshift[],
2417                        const t_pbc *pbc, const t_graph *g,
2418                        real lambda, real *dvdlambda,
2419                        gmx_bool bZAxis)
2420 {
2421     int  i, m, type, ai, aj, ak, al;
2422     int  t1, t2;
2423     real phi, cos_phi, cos_phi2, vid, vtot, dVdphi;
2424     rvec r_ij, r_kl, f_i, f_k = {0, 0, 0};
2425     real st, sth, nrij2, nrkl2, c, cij, ckl;
2426
2427     ivec dt;
2428     t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2429
2430     vtot = 0.0;
2431     ak   = al = 0; /* to avoid warnings */
2432     for (i = 0; i < nbonds; )
2433     {
2434         type = forceatoms[i++];
2435         ai   = forceatoms[i++];
2436         aj   = forceatoms[i++];
2437         t1   = pbc_rvec_sub(pbc, x[aj], x[ai], r_ij);       /*  3               */
2438         if (!bZAxis)
2439         {
2440             ak   = forceatoms[i++];
2441             al   = forceatoms[i++];
2442             t2   = pbc_rvec_sub(pbc, x[al], x[ak], r_kl);  /*  3                */
2443         }
2444         else
2445         {
2446             r_kl[XX] = 0;
2447             r_kl[YY] = 0;
2448             r_kl[ZZ] = 1;
2449         }
2450
2451         cos_phi = cos_angle(r_ij, r_kl); /* 25          */
2452         phi     = acos(cos_phi);         /* 10           */
2453
2454         *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2455                                   forceparams[type].pdihs.cpB,
2456                                   forceparams[type].pdihs.phiA,
2457                                   forceparams[type].pdihs.phiB,
2458                                   forceparams[type].pdihs.mult,
2459                                   phi, lambda, &vid, &dVdphi); /*  40  */
2460
2461         vtot += vid;
2462
2463         cos_phi2 = sqr(cos_phi);                /*   1          */
2464         if (cos_phi2 < 1)
2465         {
2466             st    = -dVdphi*gmx_invsqrt(1 - cos_phi2); /*  12           */
2467             sth   = st*cos_phi;                        /*   1           */
2468             nrij2 = iprod(r_ij, r_ij);                 /*   5           */
2469             nrkl2 = iprod(r_kl, r_kl);                 /*   5          */
2470
2471             c   = st*gmx_invsqrt(nrij2*nrkl2);         /*  11           */
2472             cij = sth/nrij2;                           /*  10           */
2473             ckl = sth/nrkl2;                           /*  10           */
2474
2475             for (m = 0; m < DIM; m++)                  /*  18+18       */
2476             {
2477                 f_i[m]    = (c*r_kl[m]-cij*r_ij[m]);
2478                 f[ai][m] += f_i[m];
2479                 f[aj][m] -= f_i[m];
2480                 if (!bZAxis)
2481                 {
2482                     f_k[m]    = (c*r_ij[m]-ckl*r_kl[m]);
2483                     f[ak][m] += f_k[m];
2484                     f[al][m] -= f_k[m];
2485                 }
2486             }
2487
2488             if (g)
2489             {
2490                 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
2491                 t1 = IVEC2IS(dt);
2492             }
2493             rvec_inc(fshift[t1], f_i);
2494             rvec_dec(fshift[CENTRAL], f_i);
2495             if (!bZAxis)
2496             {
2497                 if (g)
2498                 {
2499                     ivec_sub(SHIFT_IVEC(g, ak), SHIFT_IVEC(g, al), dt);
2500                     t2 = IVEC2IS(dt);
2501                 }
2502                 rvec_inc(fshift[t2], f_k);
2503                 rvec_dec(fshift[CENTRAL], f_k);
2504             }
2505         }
2506     }
2507
2508     return vtot; /*  184 / 157 (bZAxis)  total  */
2509 }
2510
2511 real angres(int nbonds,
2512             const t_iatom forceatoms[], const t_iparams forceparams[],
2513             const rvec x[], rvec f[], rvec fshift[],
2514             const t_pbc *pbc, const t_graph *g,
2515             real lambda, real *dvdlambda,
2516             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2517             int gmx_unused *global_atom_index)
2518 {
2519     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2520                       lambda, dvdlambda, FALSE);
2521 }
2522
2523 real angresz(int nbonds,
2524              const t_iatom forceatoms[], const t_iparams forceparams[],
2525              const rvec x[], rvec f[], rvec fshift[],
2526              const t_pbc *pbc, const t_graph *g,
2527              real lambda, real *dvdlambda,
2528              const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2529              int gmx_unused *global_atom_index)
2530 {
2531     return low_angres(nbonds, forceatoms, forceparams, x, f, fshift, pbc, g,
2532                       lambda, dvdlambda, TRUE);
2533 }
2534
2535 real dihres(int nbonds,
2536             const t_iatom forceatoms[], const t_iparams forceparams[],
2537             const rvec x[], rvec f[], rvec fshift[],
2538             const t_pbc *pbc, const t_graph *g,
2539             real lambda, real *dvdlambda,
2540             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2541             int gmx_unused  *global_atom_index)
2542 {
2543     real vtot = 0;
2544     int  ai, aj, ak, al, i, k, type, t1, t2, t3;
2545     real phi0A, phi0B, dphiA, dphiB, kfacA, kfacB, phi0, dphi, kfac;
2546     real phi, ddphi, ddp, ddp2, dp, sign, d2r, fc, L1;
2547     rvec r_ij, r_kj, r_kl, m, n;
2548
2549     L1 = 1.0-lambda;
2550
2551     d2r = DEG2RAD;
2552     k   = 0;
2553
2554     for (i = 0; (i < nbonds); )
2555     {
2556         type = forceatoms[i++];
2557         ai   = forceatoms[i++];
2558         aj   = forceatoms[i++];
2559         ak   = forceatoms[i++];
2560         al   = forceatoms[i++];
2561
2562         phi0A  = forceparams[type].dihres.phiA*d2r;
2563         dphiA  = forceparams[type].dihres.dphiA*d2r;
2564         kfacA  = forceparams[type].dihres.kfacA;
2565
2566         phi0B  = forceparams[type].dihres.phiB*d2r;
2567         dphiB  = forceparams[type].dihres.dphiB*d2r;
2568         kfacB  = forceparams[type].dihres.kfacB;
2569
2570         phi0  = L1*phi0A + lambda*phi0B;
2571         dphi  = L1*dphiA + lambda*dphiB;
2572         kfac  = L1*kfacA + lambda*kfacB;
2573
2574         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2575                         &sign, &t1, &t2, &t3);
2576         /* 84 flops */
2577
2578         if (debug)
2579         {
2580             fprintf(debug, "dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2581                     k++, ai, aj, ak, al, phi0, dphi, kfac);
2582         }
2583         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2584          * force changes if we just apply a normal harmonic.
2585          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2586          * This means we will never have the periodicity problem, unless
2587          * the dihedral is Pi away from phiO, which is very unlikely due to
2588          * the potential.
2589          */
2590         dp = phi-phi0;
2591         make_dp_periodic(&dp);
2592
2593         if (dp > dphi)
2594         {
2595             ddp = dp-dphi;
2596         }
2597         else if (dp < -dphi)
2598         {
2599             ddp = dp+dphi;
2600         }
2601         else
2602         {
2603             ddp = 0;
2604         }
2605
2606         if (ddp != 0.0)
2607         {
2608             ddp2  = ddp*ddp;
2609             vtot += 0.5*kfac*ddp2;
2610             ddphi = kfac*ddp;
2611
2612             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2613             /* lambda dependence from changing restraint distances */
2614             if (ddp > 0)
2615             {
2616                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2617             }
2618             else if (ddp < 0)
2619             {
2620                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2621             }
2622             do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2623                        f, fshift, pbc, g, x, t1, t2, t3);      /* 112           */
2624         }
2625     }
2626     return vtot;
2627 }
2628
2629
2630 real unimplemented(int gmx_unused nbonds,
2631                    const t_iatom gmx_unused forceatoms[], const t_iparams gmx_unused forceparams[],
2632                    const rvec gmx_unused x[], rvec gmx_unused f[], rvec gmx_unused fshift[],
2633                    const t_pbc gmx_unused *pbc, const t_graph  gmx_unused *g,
2634                    real gmx_unused lambda, real gmx_unused *dvdlambda,
2635                    const t_mdatoms  gmx_unused *md, t_fcdata gmx_unused *fcd,
2636                    int gmx_unused *global_atom_index)
2637 {
2638     gmx_impl("*** you are using a not implemented function");
2639
2640     return 0.0; /* To make the compiler happy */
2641 }
2642
2643 real rbdihs(int nbonds,
2644             const t_iatom forceatoms[], const t_iparams forceparams[],
2645             const rvec x[], rvec f[], rvec fshift[],
2646             const t_pbc *pbc, const t_graph *g,
2647             real lambda, real *dvdlambda,
2648             const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2649             int gmx_unused *global_atom_index)
2650 {
2651     const real c0 = 0.0, c1 = 1.0, c2 = 2.0, c3 = 3.0, c4 = 4.0, c5 = 5.0;
2652     int        type, ai, aj, ak, al, i, j;
2653     int        t1, t2, t3;
2654     rvec       r_ij, r_kj, r_kl, m, n;
2655     real       parmA[NR_RBDIHS];
2656     real       parmB[NR_RBDIHS];
2657     real       parm[NR_RBDIHS];
2658     real       cos_phi, phi, rbp, rbpBA;
2659     real       v, sign, ddphi, sin_phi;
2660     real       cosfac, vtot;
2661     real       L1        = 1.0-lambda;
2662     real       dvdl_term = 0;
2663
2664     vtot = 0.0;
2665     for (i = 0; (i < nbonds); )
2666     {
2667         type = forceatoms[i++];
2668         ai   = forceatoms[i++];
2669         aj   = forceatoms[i++];
2670         ak   = forceatoms[i++];
2671         al   = forceatoms[i++];
2672
2673         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
2674                         &sign, &t1, &t2, &t3);  /*  84          */
2675
2676         /* Change to polymer convention */
2677         if (phi < c0)
2678         {
2679             phi += M_PI;
2680         }
2681         else
2682         {
2683             phi -= M_PI;    /*   1              */
2684
2685         }
2686         cos_phi = cos(phi);
2687         /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2688         sin_phi = sin(phi);
2689
2690         for (j = 0; (j < NR_RBDIHS); j++)
2691         {
2692             parmA[j] = forceparams[type].rbdihs.rbcA[j];
2693             parmB[j] = forceparams[type].rbdihs.rbcB[j];
2694             parm[j]  = L1*parmA[j]+lambda*parmB[j];
2695         }
2696         /* Calculate cosine powers */
2697         /* Calculate the energy */
2698         /* Calculate the derivative */
2699
2700         v            = parm[0];
2701         dvdl_term   += (parmB[0]-parmA[0]);
2702         ddphi        = c0;
2703         cosfac       = c1;
2704
2705         rbp          = parm[1];
2706         rbpBA        = parmB[1]-parmA[1];
2707         ddphi       += rbp*cosfac;
2708         cosfac      *= cos_phi;
2709         v           += cosfac*rbp;
2710         dvdl_term   += cosfac*rbpBA;
2711         rbp          = parm[2];
2712         rbpBA        = parmB[2]-parmA[2];
2713         ddphi       += c2*rbp*cosfac;
2714         cosfac      *= cos_phi;
2715         v           += cosfac*rbp;
2716         dvdl_term   += cosfac*rbpBA;
2717         rbp          = parm[3];
2718         rbpBA        = parmB[3]-parmA[3];
2719         ddphi       += c3*rbp*cosfac;
2720         cosfac      *= cos_phi;
2721         v           += cosfac*rbp;
2722         dvdl_term   += cosfac*rbpBA;
2723         rbp          = parm[4];
2724         rbpBA        = parmB[4]-parmA[4];
2725         ddphi       += c4*rbp*cosfac;
2726         cosfac      *= cos_phi;
2727         v           += cosfac*rbp;
2728         dvdl_term   += cosfac*rbpBA;
2729         rbp          = parm[5];
2730         rbpBA        = parmB[5]-parmA[5];
2731         ddphi       += c5*rbp*cosfac;
2732         cosfac      *= cos_phi;
2733         v           += cosfac*rbp;
2734         dvdl_term   += cosfac*rbpBA;
2735
2736         ddphi = -ddphi*sin_phi;         /*  11          */
2737
2738         do_dih_fup(ai, aj, ak, al, ddphi, r_ij, r_kj, r_kl, m, n,
2739                    f, fshift, pbc, g, x, t1, t2, t3); /* 112            */
2740         vtot += v;
2741     }
2742     *dvdlambda += dvdl_term;
2743
2744     return vtot;
2745 }
2746
2747 int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2748 {
2749     int im1, ip1, ip2;
2750
2751     if (ip < 0)
2752     {
2753         ip = ip + grid_spacing - 1;
2754     }
2755     else if (ip > grid_spacing)
2756     {
2757         ip = ip - grid_spacing - 1;
2758     }
2759
2760     im1 = ip - 1;
2761     ip1 = ip + 1;
2762     ip2 = ip + 2;
2763
2764     if (ip == 0)
2765     {
2766         im1 = grid_spacing - 1;
2767     }
2768     else if (ip == grid_spacing-2)
2769     {
2770         ip2 = 0;
2771     }
2772     else if (ip == grid_spacing-1)
2773     {
2774         ip1 = 0;
2775         ip2 = 1;
2776     }
2777
2778     *ipm1 = im1;
2779     *ipp1 = ip1;
2780     *ipp2 = ip2;
2781
2782     return ip;
2783
2784 }
2785
2786 real cmap_dihs(int nbonds,
2787                const t_iatom forceatoms[], const t_iparams forceparams[],
2788                const gmx_cmap_t *cmap_grid,
2789                const rvec x[], rvec f[], rvec fshift[],
2790                const t_pbc *pbc, const t_graph *g,
2791                real gmx_unused lambda, real gmx_unused *dvdlambda,
2792                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
2793                int  gmx_unused *global_atom_index)
2794 {
2795     int         i, j, k, n, idx;
2796     int         ai, aj, ak, al, am;
2797     int         a1i, a1j, a1k, a1l, a2i, a2j, a2k, a2l;
2798     int         type, cmapA;
2799     int         t11, t21, t31, t12, t22, t32;
2800     int         iphi1, ip1m1, ip1p1, ip1p2;
2801     int         iphi2, ip2m1, ip2p1, ip2p2;
2802     int         l1, l2, l3, l4;
2803     int         pos1, pos2, pos3, pos4, tmp;
2804
2805     real        ty[4], ty1[4], ty2[4], ty12[4], tc[16], tx[16];
2806     real        phi1, psi1, cos_phi1, sin_phi1, sign1, xphi1;
2807     real        phi2, psi2, cos_phi2, sin_phi2, sign2, xphi2;
2808     real        dx, xx, tt, tu, e, df1, df2, ddf1, ddf2, ddf12, vtot;
2809     real        ra21, rb21, rg21, rg1, rgr1, ra2r1, rb2r1, rabr1;
2810     real        ra22, rb22, rg22, rg2, rgr2, ra2r2, rb2r2, rabr2;
2811     real        fg1, hg1, fga1, hgb1, gaa1, gbb1;
2812     real        fg2, hg2, fga2, hgb2, gaa2, gbb2;
2813     real        fac;
2814
2815     rvec        r1_ij, r1_kj, r1_kl, m1, n1;
2816     rvec        r2_ij, r2_kj, r2_kl, m2, n2;
2817     rvec        f1_i, f1_j, f1_k, f1_l;
2818     rvec        f2_i, f2_j, f2_k, f2_l;
2819     rvec        a1, b1, a2, b2;
2820     rvec        f1, g1, h1, f2, g2, h2;
2821     rvec        dtf1, dtg1, dth1, dtf2, dtg2, dth2;
2822     ivec        jt1, dt1_ij, dt1_kj, dt1_lj;
2823     ivec        jt2, dt2_ij, dt2_kj, dt2_lj;
2824
2825     const real *cmapd;
2826
2827     int         loop_index[4][4] = {
2828         {0, 4, 8, 12},
2829         {1, 5, 9, 13},
2830         {2, 6, 10, 14},
2831         {3, 7, 11, 15}
2832     };
2833
2834     /* Total CMAP energy */
2835     vtot = 0;
2836
2837     for (n = 0; n < nbonds; )
2838     {
2839         /* Five atoms are involved in the two torsions */
2840         type   = forceatoms[n++];
2841         ai     = forceatoms[n++];
2842         aj     = forceatoms[n++];
2843         ak     = forceatoms[n++];
2844         al     = forceatoms[n++];
2845         am     = forceatoms[n++];
2846
2847         /* Which CMAP type is this */
2848         cmapA = forceparams[type].cmap.cmapA;
2849         cmapd = cmap_grid->cmapdata[cmapA].cmap;
2850
2851         /* First torsion */
2852         a1i   = ai;
2853         a1j   = aj;
2854         a1k   = ak;
2855         a1l   = al;
2856
2857         phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2858                           &sign1, &t11, &t21, &t31);  /* 84 */
2859
2860         cos_phi1 = cos(phi1);
2861
2862         a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2863         a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2864         a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2865
2866         b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2867         b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2868         b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2869
2870         tmp = pbc_rvec_sub(pbc, x[a1l], x[a1k], h1);
2871
2872         ra21  = iprod(a1, a1);       /* 5 */
2873         rb21  = iprod(b1, b1);       /* 5 */
2874         rg21  = iprod(r1_kj, r1_kj); /* 5 */
2875         rg1   = sqrt(rg21);
2876
2877         rgr1  = 1.0/rg1;
2878         ra2r1 = 1.0/ra21;
2879         rb2r1 = 1.0/rb21;
2880         rabr1 = sqrt(ra2r1*rb2r1);
2881
2882         sin_phi1 = rg1 * rabr1 * iprod(a1, h1) * (-1);
2883
2884         if (cos_phi1 < -0.5 || cos_phi1 > 0.5)
2885         {
2886             phi1 = asin(sin_phi1);
2887
2888             if (cos_phi1 < 0)
2889             {
2890                 if (phi1 > 0)
2891                 {
2892                     phi1 = M_PI - phi1;
2893                 }
2894                 else
2895                 {
2896                     phi1 = -M_PI - phi1;
2897                 }
2898             }
2899         }
2900         else
2901         {
2902             phi1 = acos(cos_phi1);
2903
2904             if (sin_phi1 < 0)
2905             {
2906                 phi1 = -phi1;
2907             }
2908         }
2909
2910         xphi1 = phi1 + M_PI; /* 1 */
2911
2912         /* Second torsion */
2913         a2i   = aj;
2914         a2j   = ak;
2915         a2k   = al;
2916         a2l   = am;
2917
2918         phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2919                           &sign2, &t12, &t22, &t32); /* 84 */
2920
2921         cos_phi2 = cos(phi2);
2922
2923         a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2924         a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2925         a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2926
2927         b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2928         b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2929         b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2930
2931         tmp = pbc_rvec_sub(pbc, x[a2l], x[a2k], h2);
2932
2933         ra22  = iprod(a2, a2);         /* 5 */
2934         rb22  = iprod(b2, b2);         /* 5 */
2935         rg22  = iprod(r2_kj, r2_kj);   /* 5 */
2936         rg2   = sqrt(rg22);
2937
2938         rgr2  = 1.0/rg2;
2939         ra2r2 = 1.0/ra22;
2940         rb2r2 = 1.0/rb22;
2941         rabr2 = sqrt(ra2r2*rb2r2);
2942
2943         sin_phi2 = rg2 * rabr2 * iprod(a2, h2) * (-1);
2944
2945         if (cos_phi2 < -0.5 || cos_phi2 > 0.5)
2946         {
2947             phi2 = asin(sin_phi2);
2948
2949             if (cos_phi2 < 0)
2950             {
2951                 if (phi2 > 0)
2952                 {
2953                     phi2 = M_PI - phi2;
2954                 }
2955                 else
2956                 {
2957                     phi2 = -M_PI - phi2;
2958                 }
2959             }
2960         }
2961         else
2962         {
2963             phi2 = acos(cos_phi2);
2964
2965             if (sin_phi2 < 0)
2966             {
2967                 phi2 = -phi2;
2968             }
2969         }
2970
2971         xphi2 = phi2 + M_PI; /* 1 */
2972
2973         /* Range mangling */
2974         if (xphi1 < 0)
2975         {
2976             xphi1 = xphi1 + 2*M_PI;
2977         }
2978         else if (xphi1 >= 2*M_PI)
2979         {
2980             xphi1 = xphi1 - 2*M_PI;
2981         }
2982
2983         if (xphi2 < 0)
2984         {
2985             xphi2 = xphi2 + 2*M_PI;
2986         }
2987         else if (xphi2 >= 2*M_PI)
2988         {
2989             xphi2 = xphi2 - 2*M_PI;
2990         }
2991
2992         /* Number of grid points */
2993         dx = 2*M_PI / cmap_grid->grid_spacing;
2994
2995         /* Where on the grid are we */
2996         iphi1 = (int)(xphi1/dx);
2997         iphi2 = (int)(xphi2/dx);
2998
2999         iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1, &ip1p1, &ip1p2);
3000         iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1, &ip2p1, &ip2p2);
3001
3002         pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
3003         pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
3004         pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
3005         pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
3006
3007         ty[0]   = cmapd[pos1*4];
3008         ty[1]   = cmapd[pos2*4];
3009         ty[2]   = cmapd[pos3*4];
3010         ty[3]   = cmapd[pos4*4];
3011
3012         ty1[0]   = cmapd[pos1*4+1];
3013         ty1[1]   = cmapd[pos2*4+1];
3014         ty1[2]   = cmapd[pos3*4+1];
3015         ty1[3]   = cmapd[pos4*4+1];
3016
3017         ty2[0]   = cmapd[pos1*4+2];
3018         ty2[1]   = cmapd[pos2*4+2];
3019         ty2[2]   = cmapd[pos3*4+2];
3020         ty2[3]   = cmapd[pos4*4+2];
3021
3022         ty12[0]   = cmapd[pos1*4+3];
3023         ty12[1]   = cmapd[pos2*4+3];
3024         ty12[2]   = cmapd[pos3*4+3];
3025         ty12[3]   = cmapd[pos4*4+3];
3026
3027         /* Switch to degrees */
3028         dx    = 360.0 / cmap_grid->grid_spacing;
3029         xphi1 = xphi1 * RAD2DEG;
3030         xphi2 = xphi2 * RAD2DEG;
3031
3032         for (i = 0; i < 4; i++) /* 16 */
3033         {
3034             tx[i]    = ty[i];
3035             tx[i+4]  = ty1[i]*dx;
3036             tx[i+8]  = ty2[i]*dx;
3037             tx[i+12] = ty12[i]*dx*dx;
3038         }
3039
3040         idx = 0;
3041         for (i = 0; i < 4; i++) /* 1056 */
3042         {
3043             for (j = 0; j < 4; j++)
3044             {
3045                 xx = 0;
3046                 for (k = 0; k < 16; k++)
3047                 {
3048                     xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
3049                 }
3050
3051                 idx++;
3052                 tc[i*4+j] = xx;
3053             }
3054         }
3055
3056         tt    = (xphi1-iphi1*dx)/dx;
3057         tu    = (xphi2-iphi2*dx)/dx;
3058
3059         e     = 0;
3060         df1   = 0;
3061         df2   = 0;
3062         ddf1  = 0;
3063         ddf2  = 0;
3064         ddf12 = 0;
3065
3066         for (i = 3; i >= 0; i--)
3067         {
3068             l1 = loop_index[i][3];
3069             l2 = loop_index[i][2];
3070             l3 = loop_index[i][1];
3071
3072             e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
3073             df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
3074             df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
3075             ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
3076             ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
3077         }
3078
3079         ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
3080             3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
3081
3082         fac     = RAD2DEG/dx;
3083         df1     = df1   * fac;
3084         df2     = df2   * fac;
3085         ddf1    = ddf1  * fac * fac;
3086         ddf2    = ddf2  * fac * fac;
3087         ddf12   = ddf12 * fac * fac;
3088
3089         /* CMAP energy */
3090         vtot += e;
3091
3092         /* Do forces - first torsion */
3093         fg1       = iprod(r1_ij, r1_kj);
3094         hg1       = iprod(r1_kl, r1_kj);
3095         fga1      = fg1*ra2r1*rgr1;
3096         hgb1      = hg1*rb2r1*rgr1;
3097         gaa1      = -ra2r1*rg1;
3098         gbb1      = rb2r1*rg1;
3099
3100         for (i = 0; i < DIM; i++)
3101         {
3102             dtf1[i]   = gaa1 * a1[i];
3103             dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
3104             dth1[i]   = gbb1 * b1[i];
3105
3106             f1[i]     = df1  * dtf1[i];
3107             g1[i]     = df1  * dtg1[i];
3108             h1[i]     = df1  * dth1[i];
3109
3110             f1_i[i]   =  f1[i];
3111             f1_j[i]   = -f1[i] - g1[i];
3112             f1_k[i]   =  h1[i] + g1[i];
3113             f1_l[i]   = -h1[i];
3114
3115             f[a1i][i] = f[a1i][i] + f1_i[i];
3116             f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
3117             f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
3118             f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
3119         }
3120
3121         /* Do forces - second torsion */
3122         fg2       = iprod(r2_ij, r2_kj);
3123         hg2       = iprod(r2_kl, r2_kj);
3124         fga2      = fg2*ra2r2*rgr2;
3125         hgb2      = hg2*rb2r2*rgr2;
3126         gaa2      = -ra2r2*rg2;
3127         gbb2      = rb2r2*rg2;
3128
3129         for (i = 0; i < DIM; i++)
3130         {
3131             dtf2[i]   = gaa2 * a2[i];
3132             dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
3133             dth2[i]   = gbb2 * b2[i];
3134
3135             f2[i]     = df2  * dtf2[i];
3136             g2[i]     = df2  * dtg2[i];
3137             h2[i]     = df2  * dth2[i];
3138
3139             f2_i[i]   =  f2[i];
3140             f2_j[i]   = -f2[i] - g2[i];
3141             f2_k[i]   =  h2[i] + g2[i];
3142             f2_l[i]   = -h2[i];
3143
3144             f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
3145             f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
3146             f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
3147             f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
3148         }
3149
3150         /* Shift forces */
3151         if (g)
3152         {
3153             copy_ivec(SHIFT_IVEC(g, a1j), jt1);
3154             ivec_sub(SHIFT_IVEC(g, a1i),  jt1, dt1_ij);
3155             ivec_sub(SHIFT_IVEC(g, a1k),  jt1, dt1_kj);
3156             ivec_sub(SHIFT_IVEC(g, a1l),  jt1, dt1_lj);
3157             t11 = IVEC2IS(dt1_ij);
3158             t21 = IVEC2IS(dt1_kj);
3159             t31 = IVEC2IS(dt1_lj);
3160
3161             copy_ivec(SHIFT_IVEC(g, a2j), jt2);
3162             ivec_sub(SHIFT_IVEC(g, a2i),  jt2, dt2_ij);
3163             ivec_sub(SHIFT_IVEC(g, a2k),  jt2, dt2_kj);
3164             ivec_sub(SHIFT_IVEC(g, a2l),  jt2, dt2_lj);
3165             t12 = IVEC2IS(dt2_ij);
3166             t22 = IVEC2IS(dt2_kj);
3167             t32 = IVEC2IS(dt2_lj);
3168         }
3169         else if (pbc)
3170         {
3171             t31 = pbc_rvec_sub(pbc, x[a1l], x[a1j], h1);
3172             t32 = pbc_rvec_sub(pbc, x[a2l], x[a2j], h2);
3173         }
3174         else
3175         {
3176             t31 = CENTRAL;
3177             t32 = CENTRAL;
3178         }
3179
3180         rvec_inc(fshift[t11], f1_i);
3181         rvec_inc(fshift[CENTRAL], f1_j);
3182         rvec_inc(fshift[t21], f1_k);
3183         rvec_inc(fshift[t31], f1_l);
3184
3185         rvec_inc(fshift[t21], f2_i);
3186         rvec_inc(fshift[CENTRAL], f2_j);
3187         rvec_inc(fshift[t22], f2_k);
3188         rvec_inc(fshift[t32], f2_l);
3189     }
3190     return vtot;
3191 }
3192
3193
3194
3195 /***********************************************************
3196  *
3197  *   G R O M O S  9 6   F U N C T I O N S
3198  *
3199  ***********************************************************/
3200 real g96harmonic(real kA, real kB, real xA, real xB, real x, real lambda,
3201                  real *V, real *F)
3202 {
3203     const real half = 0.5;
3204     real       L1, kk, x0, dx, dx2;
3205     real       v, f, dvdlambda;
3206
3207     L1    = 1.0-lambda;
3208     kk    = L1*kA+lambda*kB;
3209     x0    = L1*xA+lambda*xB;
3210
3211     dx    = x-x0;
3212     dx2   = dx*dx;
3213
3214     f          = -kk*dx;
3215     v          = half*kk*dx2;
3216     dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
3217
3218     *F    = f;
3219     *V    = v;
3220
3221     return dvdlambda;
3222
3223     /* That was 21 flops */
3224 }
3225
3226 real g96bonds(int nbonds,
3227               const t_iatom forceatoms[], const t_iparams forceparams[],
3228               const rvec x[], rvec f[], rvec fshift[],
3229               const t_pbc *pbc, const t_graph *g,
3230               real lambda, real *dvdlambda,
3231               const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3232               int gmx_unused *global_atom_index)
3233 {
3234     int  i, m, ki, ai, aj, type;
3235     real dr2, fbond, vbond, fij, vtot;
3236     rvec dx;
3237     ivec dt;
3238
3239     vtot = 0.0;
3240     for (i = 0; (i < nbonds); )
3241     {
3242         type = forceatoms[i++];
3243         ai   = forceatoms[i++];
3244         aj   = forceatoms[i++];
3245
3246         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3247         dr2  = iprod(dx, dx);                       /*   5              */
3248
3249         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3250                                   forceparams[type].harmonic.krB,
3251                                   forceparams[type].harmonic.rA,
3252                                   forceparams[type].harmonic.rB,
3253                                   dr2, lambda, &vbond, &fbond);
3254
3255         vtot  += 0.5*vbond;                         /* 1*/
3256 #ifdef DEBUG
3257         if (debug)
3258         {
3259             fprintf(debug, "G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3260                     sqrt(dr2), vbond, fbond);
3261         }
3262 #endif
3263
3264         if (g)
3265         {
3266             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3267             ki = IVEC2IS(dt);
3268         }
3269         for (m = 0; (m < DIM); m++)     /*  15          */
3270         {
3271             fij                 = fbond*dx[m];
3272             f[ai][m]           += fij;
3273             f[aj][m]           -= fij;
3274             fshift[ki][m]      += fij;
3275             fshift[CENTRAL][m] -= fij;
3276         }
3277     }               /* 44 TOTAL */
3278     return vtot;
3279 }
3280
3281 real g96bond_angle(const rvec xi, const rvec xj, const rvec xk, const t_pbc *pbc,
3282                    rvec r_ij, rvec r_kj,
3283                    int *t1, int *t2)
3284 /* Return value is the angle between the bonds i-j and j-k */
3285 {
3286     real costh;
3287
3288     *t1 = pbc_rvec_sub(pbc, xi, xj, r_ij); /*  3                */
3289     *t2 = pbc_rvec_sub(pbc, xk, xj, r_kj); /*  3                */
3290
3291     costh = cos_angle(r_ij, r_kj);         /* 25                */
3292     /* 41 TOTAL */
3293     return costh;
3294 }
3295
3296 real g96angles(int nbonds,
3297                const t_iatom forceatoms[], const t_iparams forceparams[],
3298                const rvec x[], rvec f[], rvec fshift[],
3299                const t_pbc *pbc, const t_graph *g,
3300                real lambda, real *dvdlambda,
3301                const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3302                int gmx_unused *global_atom_index)
3303 {
3304     int  i, ai, aj, ak, type, m, t1, t2;
3305     rvec r_ij, r_kj;
3306     real cos_theta, dVdt, va, vtot;
3307     real rij_1, rij_2, rkj_1, rkj_2, rijrkj_1;
3308     rvec f_i, f_j, f_k;
3309     ivec jt, dt_ij, dt_kj;
3310
3311     vtot = 0.0;
3312     for (i = 0; (i < nbonds); )
3313     {
3314         type = forceatoms[i++];
3315         ai   = forceatoms[i++];
3316         aj   = forceatoms[i++];
3317         ak   = forceatoms[i++];
3318
3319         cos_theta  = g96bond_angle(x[ai], x[aj], x[ak], pbc, r_ij, r_kj, &t1, &t2);
3320
3321         *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
3322                                   forceparams[type].harmonic.krB,
3323                                   forceparams[type].harmonic.rA,
3324                                   forceparams[type].harmonic.rB,
3325                                   cos_theta, lambda, &va, &dVdt);
3326         vtot    += va;
3327
3328         rij_1    = gmx_invsqrt(iprod(r_ij, r_ij));
3329         rkj_1    = gmx_invsqrt(iprod(r_kj, r_kj));
3330         rij_2    = rij_1*rij_1;
3331         rkj_2    = rkj_1*rkj_1;
3332         rijrkj_1 = rij_1*rkj_1;                 /* 23 */
3333
3334 #ifdef DEBUG
3335         if (debug)
3336         {
3337             fprintf(debug, "G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
3338                     cos_theta, va, dVdt);
3339         }
3340 #endif
3341         for (m = 0; (m < DIM); m++)     /*  42  */
3342         {
3343             f_i[m]    = dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
3344             f_k[m]    = dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
3345             f_j[m]    = -f_i[m]-f_k[m];
3346             f[ai][m] += f_i[m];
3347             f[aj][m] += f_j[m];
3348             f[ak][m] += f_k[m];
3349         }
3350
3351         if (g)
3352         {
3353             copy_ivec(SHIFT_IVEC(g, aj), jt);
3354
3355             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3356             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3357             t1 = IVEC2IS(dt_ij);
3358             t2 = IVEC2IS(dt_kj);
3359         }
3360         rvec_inc(fshift[t1], f_i);
3361         rvec_inc(fshift[CENTRAL], f_j);
3362         rvec_inc(fshift[t2], f_k);          /* 9 */
3363         /* 163 TOTAL    */
3364     }
3365     return vtot;
3366 }
3367
3368 real cross_bond_bond(int nbonds,
3369                      const t_iatom forceatoms[], const t_iparams forceparams[],
3370                      const rvec x[], rvec f[], rvec fshift[],
3371                      const t_pbc *pbc, const t_graph *g,
3372                      real gmx_unused lambda, real gmx_unused *dvdlambda,
3373                      const t_mdatoms gmx_unused *md, t_fcdata gmx_unused  *fcd,
3374                      int gmx_unused *global_atom_index)
3375 {
3376     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3377      * pp. 842-847
3378      */
3379     int  i, ai, aj, ak, type, m, t1, t2;
3380     rvec r_ij, r_kj;
3381     real vtot, vrr, s1, s2, r1, r2, r1e, r2e, krr;
3382     rvec f_i, f_j, f_k;
3383     ivec jt, dt_ij, dt_kj;
3384
3385     vtot = 0.0;
3386     for (i = 0; (i < nbonds); )
3387     {
3388         type = forceatoms[i++];
3389         ai   = forceatoms[i++];
3390         aj   = forceatoms[i++];
3391         ak   = forceatoms[i++];
3392         r1e  = forceparams[type].cross_bb.r1e;
3393         r2e  = forceparams[type].cross_bb.r2e;
3394         krr  = forceparams[type].cross_bb.krr;
3395
3396         /* Compute distance vectors ... */
3397         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3398         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3399
3400         /* ... and their lengths */
3401         r1 = norm(r_ij);
3402         r2 = norm(r_kj);
3403
3404         /* Deviations from ideality */
3405         s1 = r1-r1e;
3406         s2 = r2-r2e;
3407
3408         /* Energy (can be negative!) */
3409         vrr   = krr*s1*s2;
3410         vtot += vrr;
3411
3412         /* Forces */
3413         svmul(-krr*s2/r1, r_ij, f_i);
3414         svmul(-krr*s1/r2, r_kj, f_k);
3415
3416         for (m = 0; (m < DIM); m++)     /*  12  */
3417         {
3418             f_j[m]    = -f_i[m] - f_k[m];
3419             f[ai][m] += f_i[m];
3420             f[aj][m] += f_j[m];
3421             f[ak][m] += f_k[m];
3422         }
3423
3424         /* Virial stuff */
3425         if (g)
3426         {
3427             copy_ivec(SHIFT_IVEC(g, aj), jt);
3428
3429             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3430             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3431             t1 = IVEC2IS(dt_ij);
3432             t2 = IVEC2IS(dt_kj);
3433         }
3434         rvec_inc(fshift[t1], f_i);
3435         rvec_inc(fshift[CENTRAL], f_j);
3436         rvec_inc(fshift[t2], f_k);          /* 9 */
3437         /* 163 TOTAL    */
3438     }
3439     return vtot;
3440 }
3441
3442 real cross_bond_angle(int nbonds,
3443                       const t_iatom forceatoms[], const t_iparams forceparams[],
3444                       const rvec x[], rvec f[], rvec fshift[],
3445                       const t_pbc *pbc, const t_graph *g,
3446                       real gmx_unused lambda, real gmx_unused *dvdlambda,
3447                       const t_mdatoms gmx_unused *md, t_fcdata gmx_unused *fcd,
3448                       int gmx_unused *global_atom_index)
3449 {
3450     /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3451      * pp. 842-847
3452      */
3453     int  i, ai, aj, ak, type, m, t1, t2, t3;
3454     rvec r_ij, r_kj, r_ik;
3455     real vtot, vrt, s1, s2, s3, r1, r2, r3, r1e, r2e, r3e, krt, k1, k2, k3;
3456     rvec f_i, f_j, f_k;
3457     ivec jt, dt_ij, dt_kj;
3458
3459     vtot = 0.0;
3460     for (i = 0; (i < nbonds); )
3461     {
3462         type = forceatoms[i++];
3463         ai   = forceatoms[i++];
3464         aj   = forceatoms[i++];
3465         ak   = forceatoms[i++];
3466         r1e  = forceparams[type].cross_ba.r1e;
3467         r2e  = forceparams[type].cross_ba.r2e;
3468         r3e  = forceparams[type].cross_ba.r3e;
3469         krt  = forceparams[type].cross_ba.krt;
3470
3471         /* Compute distance vectors ... */
3472         t1 = pbc_rvec_sub(pbc, x[ai], x[aj], r_ij);
3473         t2 = pbc_rvec_sub(pbc, x[ak], x[aj], r_kj);
3474         t3 = pbc_rvec_sub(pbc, x[ai], x[ak], r_ik);
3475
3476         /* ... and their lengths */
3477         r1 = norm(r_ij);
3478         r2 = norm(r_kj);
3479         r3 = norm(r_ik);
3480
3481         /* Deviations from ideality */
3482         s1 = r1-r1e;
3483         s2 = r2-r2e;
3484         s3 = r3-r3e;
3485
3486         /* Energy (can be negative!) */
3487         vrt   = krt*s3*(s1+s2);
3488         vtot += vrt;
3489
3490         /* Forces */
3491         k1 = -krt*(s3/r1);
3492         k2 = -krt*(s3/r2);
3493         k3 = -krt*(s1+s2)/r3;
3494         for (m = 0; (m < DIM); m++)
3495         {
3496             f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3497             f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3498             f_j[m] = -f_i[m] - f_k[m];
3499         }
3500
3501         for (m = 0; (m < DIM); m++)     /*  12  */
3502         {
3503             f[ai][m] += f_i[m];
3504             f[aj][m] += f_j[m];
3505             f[ak][m] += f_k[m];
3506         }
3507
3508         /* Virial stuff */
3509         if (g)
3510         {
3511             copy_ivec(SHIFT_IVEC(g, aj), jt);
3512
3513             ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3514             ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3515             t1 = IVEC2IS(dt_ij);
3516             t2 = IVEC2IS(dt_kj);
3517         }
3518         rvec_inc(fshift[t1], f_i);
3519         rvec_inc(fshift[CENTRAL], f_j);
3520         rvec_inc(fshift[t2], f_k);          /* 9 */
3521         /* 163 TOTAL    */
3522     }
3523     return vtot;
3524 }
3525
3526 static real bonded_tab(const char *type, int table_nr,
3527                        const bondedtable_t *table, real kA, real kB, real r,
3528                        real lambda, real *V, real *F)
3529 {
3530     real k, tabscale, *VFtab, rt, eps, eps2, Yt, Ft, Geps, Heps2, Fp, VV, FF;
3531     int  n0, nnn;
3532     real v, f, dvdlambda;
3533
3534     k = (1.0 - lambda)*kA + lambda*kB;
3535
3536     tabscale = table->scale;
3537     VFtab    = table->data;
3538
3539     rt    = r*tabscale;
3540     n0    = rt;
3541     if (n0 >= table->n)
3542     {
3543         gmx_fatal(FARGS, "A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3544                   type, table_nr, r, n0, n0+1, table->n);
3545     }
3546     eps   = rt - n0;
3547     eps2  = eps*eps;
3548     nnn   = 4*n0;
3549     Yt    = VFtab[nnn];
3550     Ft    = VFtab[nnn+1];
3551     Geps  = VFtab[nnn+2]*eps;
3552     Heps2 = VFtab[nnn+3]*eps2;
3553     Fp    = Ft + Geps + Heps2;
3554     VV    = Yt + Fp*eps;
3555     FF    = Fp + Geps + 2.0*Heps2;
3556
3557     *F         = -k*FF*tabscale;
3558     *V         = k*VV;
3559     dvdlambda  = (kB - kA)*VV;
3560
3561     return dvdlambda;
3562
3563     /* That was 22 flops */
3564 }
3565
3566 real tab_bonds(int nbonds,
3567                const t_iatom forceatoms[], const t_iparams forceparams[],
3568                const rvec x[], rvec f[], rvec fshift[],
3569                const t_pbc *pbc, const t_graph *g,
3570                real lambda, real *dvdlambda,
3571                const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3572                int gmx_unused  *global_atom_index)
3573 {
3574     int  i, m, ki, ai, aj, type, table;
3575     real dr, dr2, fbond, vbond, fij, vtot;
3576     rvec dx;
3577     ivec dt;
3578
3579     vtot = 0.0;
3580     for (i = 0; (i < nbonds); )
3581     {
3582         type = forceatoms[i++];
3583         ai   = forceatoms[i++];
3584         aj   = forceatoms[i++];
3585
3586         ki   = pbc_rvec_sub(pbc, x[ai], x[aj], dx); /*   3      */
3587         dr2  = iprod(dx, dx);                       /*   5              */
3588         dr   = dr2*gmx_invsqrt(dr2);                /*  10              */
3589
3590         table = forceparams[type].tab.table;
3591
3592         *dvdlambda += bonded_tab("bond", table,
3593                                  &fcd->bondtab[table],
3594                                  forceparams[type].tab.kA,
3595                                  forceparams[type].tab.kB,
3596                                  dr, lambda, &vbond, &fbond); /*  22 */
3597
3598         if (dr2 == 0.0)
3599         {
3600             continue;
3601         }
3602
3603
3604         vtot  += vbond;            /* 1*/
3605         fbond *= gmx_invsqrt(dr2); /*   6               */
3606 #ifdef DEBUG
3607         if (debug)
3608         {
3609             fprintf(debug, "TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3610                     dr, vbond, fbond);
3611         }
3612 #endif
3613         if (g)
3614         {
3615             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
3616             ki = IVEC2IS(dt);
3617         }
3618         for (m = 0; (m < DIM); m++)     /*  15          */
3619         {
3620             fij                 = fbond*dx[m];
3621             f[ai][m]           += fij;
3622             f[aj][m]           -= fij;
3623             fshift[ki][m]      += fij;
3624             fshift[CENTRAL][m] -= fij;
3625         }
3626     }               /* 62 TOTAL */
3627     return vtot;
3628 }
3629
3630 real tab_angles(int nbonds,
3631                 const t_iatom forceatoms[], const t_iparams forceparams[],
3632                 const rvec x[], rvec f[], rvec fshift[],
3633                 const t_pbc *pbc, const t_graph *g,
3634                 real lambda, real *dvdlambda,
3635                 const t_mdatoms gmx_unused  *md, t_fcdata *fcd,
3636                 int gmx_unused *global_atom_index)
3637 {
3638     int  i, ai, aj, ak, t1, t2, type, table;
3639     rvec r_ij, r_kj;
3640     real cos_theta, cos_theta2, theta, dVdt, va, vtot;
3641     ivec jt, dt_ij, dt_kj;
3642
3643     vtot = 0.0;
3644     for (i = 0; (i < nbonds); )
3645     {
3646         type = forceatoms[i++];
3647         ai   = forceatoms[i++];
3648         aj   = forceatoms[i++];
3649         ak   = forceatoms[i++];
3650
3651         theta  = bond_angle(x[ai], x[aj], x[ak], pbc,
3652                             r_ij, r_kj, &cos_theta, &t1, &t2); /*  41           */
3653
3654         table = forceparams[type].tab.table;
3655
3656         *dvdlambda += bonded_tab("angle", table,
3657                                  &fcd->angletab[table],
3658                                  forceparams[type].tab.kA,
3659                                  forceparams[type].tab.kB,
3660                                  theta, lambda, &va, &dVdt); /*  22  */
3661         vtot += va;
3662
3663         cos_theta2 = sqr(cos_theta);            /*   1          */
3664         if (cos_theta2 < 1)
3665         {
3666             int  m;
3667             real snt, st, sth;
3668             real cik, cii, ckk;
3669             real nrkj2, nrij2;
3670             rvec f_i, f_j, f_k;
3671
3672             st  = dVdt*gmx_invsqrt(1 - cos_theta2); /*  12              */
3673             sth = st*cos_theta;                     /*   1              */
3674 #ifdef DEBUG
3675             if (debug)
3676             {
3677                 fprintf(debug, "ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
3678                         theta*RAD2DEG, va, dVdt);
3679             }
3680 #endif
3681             nrkj2 = iprod(r_kj, r_kj);  /*   5          */
3682             nrij2 = iprod(r_ij, r_ij);
3683
3684             cik = st*gmx_invsqrt(nrkj2*nrij2); /*  12           */
3685             cii = sth/nrij2;                   /*  10           */
3686             ckk = sth/nrkj2;                   /*  10           */
3687
3688             for (m = 0; (m < DIM); m++)        /*  39           */
3689             {
3690                 f_i[m]    = -(cik*r_kj[m]-cii*r_ij[m]);
3691                 f_k[m]    = -(cik*r_ij[m]-ckk*r_kj[m]);
3692                 f_j[m]    = -f_i[m]-f_k[m];
3693                 f[ai][m] += f_i[m];
3694                 f[aj][m] += f_j[m];
3695                 f[ak][m] += f_k[m];
3696             }
3697             if (g)
3698             {
3699                 copy_ivec(SHIFT_IVEC(g, aj), jt);
3700
3701                 ivec_sub(SHIFT_IVEC(g, ai), jt, dt_ij);
3702                 ivec_sub(SHIFT_IVEC(g, ak), jt, dt_kj);
3703                 t1 = IVEC2IS(dt_ij);
3704                 t2 = IVEC2IS(dt_kj);
3705             }
3706             rvec_inc(fshift[t1], f_i);
3707             rvec_inc(fshift[CENTRAL], f_j);
3708             rvec_inc(fshift[t2], f_k);
3709         }                                       /* 169 TOTAL    */
3710     }
3711     return vtot;
3712 }
3713
3714 real tab_dihs(int nbonds,
3715               const t_iatom forceatoms[], const t_iparams forceparams[],
3716               const rvec x[], rvec f[], rvec fshift[],
3717               const t_pbc *pbc, const t_graph *g,
3718               real lambda, real *dvdlambda,
3719               const t_mdatoms gmx_unused *md, t_fcdata *fcd,
3720               int gmx_unused *global_atom_index)
3721 {
3722     int  i, type, ai, aj, ak, al, table;
3723     int  t1, t2, t3;
3724     rvec r_ij, r_kj, r_kl, m, n;
3725     real phi, sign, ddphi, vpd, vtot;
3726
3727     vtot = 0.0;
3728     for (i = 0; (i < nbonds); )
3729     {
3730         type = forceatoms[i++];
3731         ai   = forceatoms[i++];
3732         aj   = forceatoms[i++];
3733         ak   = forceatoms[i++];
3734         al   = forceatoms[i++];
3735
3736         phi = dih_angle(x[ai], x[aj], x[ak], x[al], pbc, r_ij, r_kj, r_kl, m, n,
3737                         &sign, &t1, &t2, &t3);  /*  84  */
3738
3739         table = forceparams[type].tab.table;
3740
3741         /* Hopefully phi+M_PI never results in values < 0 */
3742         *dvdlambda += bonded_tab("dihedral", table,
3743                                  &fcd->dihtab[table],
3744                                  forceparams[type].tab.kA,
3745                                  forceparams[type].tab.kB,
3746                                  phi+M_PI, lambda, &vpd, &ddphi);
3747
3748         vtot += vpd;
3749         do_dih_fup(ai, aj, ak, al, -ddphi, r_ij, r_kj, r_kl, m, n,
3750                    f, fshift, pbc, g, x, t1, t2, t3); /* 112    */
3751
3752 #ifdef DEBUG
3753         fprintf(debug, "pdih: (%d,%d,%d,%d) phi=%g\n",
3754                 ai, aj, ak, al, phi);
3755 #endif
3756     } /* 227 TOTAL  */
3757
3758     return vtot;
3759 }
3760
3761 /* Return if this is a potential calculated in bondfree.c,
3762  * i.e. an interaction that actually calculates a potential and
3763  * works on multiple atoms (not e.g. a connection or a position restraint).
3764  */
3765 static gmx_inline gmx_bool ftype_is_bonded_potential(int ftype)
3766 {
3767     return
3768         (interaction_function[ftype].flags & IF_BOND) &&
3769         !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
3770         (ftype < F_GB12 || ftype > F_GB14);
3771 }
3772
3773 static void divide_bondeds_over_threads(t_idef *idef, int nthreads)
3774 {
3775     int ftype;
3776     int nat1;
3777     int t;
3778     int il_nr_thread;
3779
3780     idef->nthreads = nthreads;
3781
3782     if (F_NRE*(nthreads+1) > idef->il_thread_division_nalloc)
3783     {
3784         idef->il_thread_division_nalloc = F_NRE*(nthreads+1);
3785         snew(idef->il_thread_division, idef->il_thread_division_nalloc);
3786     }
3787
3788     for (ftype = 0; ftype < F_NRE; ftype++)
3789     {
3790         if (ftype_is_bonded_potential(ftype))
3791         {
3792             nat1 = interaction_function[ftype].nratoms + 1;
3793
3794             for (t = 0; t <= nthreads; t++)
3795             {
3796                 /* Divide the interactions equally over the threads.
3797                  * When the different types of bonded interactions
3798                  * are distributed roughly equally over the threads,
3799                  * this should lead to well localized output into
3800                  * the force buffer on each thread.
3801                  * If this is not the case, a more advanced scheme
3802                  * (not implemented yet) will do better.
3803                  */
3804                 il_nr_thread = (((idef->il[ftype].nr/nat1)*t)/nthreads)*nat1;
3805
3806                 /* Ensure that distance restraint pairs with the same label
3807                  * end up on the same thread.
3808                  * This is slighlty tricky code, since the next for iteration
3809                  * may have an initial il_nr_thread lower than the final value
3810                  * in the previous iteration, but this will anyhow be increased
3811                  * to the approriate value again by this while loop.
3812                  */
3813                 while (ftype == F_DISRES &&
3814                        il_nr_thread > 0 &&
3815                        il_nr_thread < idef->il[ftype].nr &&
3816                        idef->iparams[idef->il[ftype].iatoms[il_nr_thread]].disres.label ==
3817                        idef->iparams[idef->il[ftype].iatoms[il_nr_thread-nat1]].disres.label)
3818                 {
3819                     il_nr_thread += nat1;
3820                 }
3821
3822                 idef->il_thread_division[ftype*(nthreads+1)+t] = il_nr_thread;
3823             }
3824         }
3825     }
3826 }
3827
3828 static unsigned
3829 calc_bonded_reduction_mask(const t_idef *idef,
3830                            int shift,
3831                            int t, int nt)
3832 {
3833     unsigned mask;
3834     int      ftype, nb, nat1, nb0, nb1, i, a;
3835
3836     mask = 0;
3837
3838     for (ftype = 0; ftype < F_NRE; ftype++)
3839     {
3840         if (ftype_is_bonded_potential(ftype))
3841         {
3842             nb = idef->il[ftype].nr;
3843             if (nb > 0)
3844             {
3845                 nat1 = interaction_function[ftype].nratoms + 1;
3846
3847                 /* Divide this interaction equally over the threads.
3848                  * This is not stored: should match division in calc_bonds.
3849                  */
3850                 nb0 = idef->il_thread_division[ftype*(nt+1)+t];
3851                 nb1 = idef->il_thread_division[ftype*(nt+1)+t+1];
3852
3853                 for (i = nb0; i < nb1; i += nat1)
3854                 {
3855                     for (a = 1; a < nat1; a++)
3856                     {
3857                         mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
3858                     }
3859                 }
3860             }
3861         }
3862     }
3863
3864     return mask;
3865 }
3866
3867 void setup_bonded_threading(t_forcerec   *fr, t_idef *idef)
3868 {
3869 #define MAX_BLOCK_BITS 32
3870     int t;
3871     int ctot, c, b;
3872
3873     assert(fr->nthreads >= 1);
3874
3875     /* Divide the bonded interaction over the threads */
3876     divide_bondeds_over_threads(idef, fr->nthreads);
3877
3878     if (fr->nthreads == 1)
3879     {
3880         fr->red_nblock = 0;
3881
3882         return;
3883     }
3884
3885     /* We divide the force array in a maximum of 32 blocks.
3886      * Minimum force block reduction size is 2^6=64.
3887      */
3888     fr->red_ashift = 6;
3889     while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
3890     {
3891         fr->red_ashift++;
3892     }
3893     if (debug)
3894     {
3895         fprintf(debug, "bonded force buffer block atom shift %d bits\n",
3896                 fr->red_ashift);
3897     }
3898
3899     /* Determine to which blocks each thread's bonded force calculation
3900      * contributes. Store this is a mask for each thread.
3901      */
3902 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3903     for (t = 1; t < fr->nthreads; t++)
3904     {
3905         fr->f_t[t].red_mask =
3906             calc_bonded_reduction_mask(idef, fr->red_ashift, t, fr->nthreads);
3907     }
3908
3909     /* Determine the maximum number of blocks we need to reduce over */
3910     fr->red_nblock = 0;
3911     ctot           = 0;
3912     for (t = 0; t < fr->nthreads; t++)
3913     {
3914         c = 0;
3915         for (b = 0; b < MAX_BLOCK_BITS; b++)
3916         {
3917             if (fr->f_t[t].red_mask & (1U<<b))
3918             {
3919                 fr->red_nblock = max(fr->red_nblock, b+1);
3920                 c++;
3921             }
3922         }
3923         if (debug)
3924         {
3925             fprintf(debug, "thread %d flags %x count %d\n",
3926                     t, fr->f_t[t].red_mask, c);
3927         }
3928         ctot += c;
3929     }
3930     if (debug)
3931     {
3932         fprintf(debug, "Number of blocks to reduce: %d of size %d\n",
3933                 fr->red_nblock, 1<<fr->red_ashift);
3934         fprintf(debug, "Reduction density %.2f density/#thread %.2f\n",
3935                 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
3936                 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
3937     }
3938 }
3939
3940 static void zero_thread_forces(f_thread_t *f_t, int n,
3941                                int nblock, int blocksize)
3942 {
3943     int b, a0, a1, a, i, j;
3944
3945     if (n > f_t->f_nalloc)
3946     {
3947         f_t->f_nalloc = over_alloc_large(n);
3948         srenew(f_t->f, f_t->f_nalloc);
3949     }
3950
3951     if (f_t->red_mask != 0)
3952     {
3953         for (b = 0; b < nblock; b++)
3954         {
3955             if (f_t->red_mask && (1U<<b))
3956             {
3957                 a0 = b*blocksize;
3958                 a1 = min((b+1)*blocksize, n);
3959                 for (a = a0; a < a1; a++)
3960                 {
3961                     clear_rvec(f_t->f[a]);
3962                 }
3963             }
3964         }
3965     }
3966     for (i = 0; i < SHIFTS; i++)
3967     {
3968         clear_rvec(f_t->fshift[i]);
3969     }
3970     for (i = 0; i < F_NRE; i++)
3971     {
3972         f_t->ener[i] = 0;
3973     }
3974     for (i = 0; i < egNR; i++)
3975     {
3976         for (j = 0; j < f_t->grpp.nener; j++)
3977         {
3978             f_t->grpp.ener[i][j] = 0;
3979         }
3980     }
3981     for (i = 0; i < efptNR; i++)
3982     {
3983         f_t->dvdl[i] = 0;
3984     }
3985 }
3986
3987 static void reduce_thread_force_buffer(int n, rvec *f,
3988                                        int nthreads, f_thread_t *f_t,
3989                                        int nblock, int block_size)
3990 {
3991     /* The max thread number is arbitrary,
3992      * we used a fixed number to avoid memory management.
3993      * Using more than 16 threads is probably never useful performance wise.
3994      */
3995 #define MAX_BONDED_THREADS 256
3996     int b;
3997
3998     if (nthreads > MAX_BONDED_THREADS)
3999     {
4000         gmx_fatal(FARGS, "Can not reduce bonded forces on more than %d threads",
4001                   MAX_BONDED_THREADS);
4002     }
4003
4004     /* This reduction can run on any number of threads,
4005      * independently of nthreads.
4006      */
4007 #pragma omp parallel for num_threads(nthreads) schedule(static)
4008     for (b = 0; b < nblock; b++)
4009     {
4010         rvec *fp[MAX_BONDED_THREADS];
4011         int   nfb, ft, fb;
4012         int   a0, a1, a;
4013
4014         /* Determine which threads contribute to this block */
4015         nfb = 0;
4016         for (ft = 1; ft < nthreads; ft++)
4017         {
4018             if (f_t[ft].red_mask & (1U<<b))
4019             {
4020                 fp[nfb++] = f_t[ft].f;
4021             }
4022         }
4023         if (nfb > 0)
4024         {
4025             /* Reduce force buffers for threads that contribute */
4026             a0 =  b   *block_size;
4027             a1 = (b+1)*block_size;
4028             a1 = min(a1, n);
4029             for (a = a0; a < a1; a++)
4030             {
4031                 for (fb = 0; fb < nfb; fb++)
4032                 {
4033                     rvec_inc(f[a], fp[fb][a]);
4034                 }
4035             }
4036         }
4037     }
4038 }
4039
4040 static void reduce_thread_forces(int n, rvec *f, rvec *fshift,
4041                                  real *ener, gmx_grppairener_t *grpp, real *dvdl,
4042                                  int nthreads, f_thread_t *f_t,
4043                                  int nblock, int block_size,
4044                                  gmx_bool bCalcEnerVir,
4045                                  gmx_bool bDHDL)
4046 {
4047     if (nblock > 0)
4048     {
4049         /* Reduce the bonded force buffer */
4050         reduce_thread_force_buffer(n, f, nthreads, f_t, nblock, block_size);
4051     }
4052
4053     /* When necessary, reduce energy and virial using one thread only */
4054     if (bCalcEnerVir)
4055     {
4056         int t, i, j;
4057
4058         for (i = 0; i < SHIFTS; i++)
4059         {
4060             for (t = 1; t < nthreads; t++)
4061             {
4062                 rvec_inc(fshift[i], f_t[t].fshift[i]);
4063             }
4064         }
4065         for (i = 0; i < F_NRE; i++)
4066         {
4067             for (t = 1; t < nthreads; t++)
4068             {
4069                 ener[i] += f_t[t].ener[i];
4070             }
4071         }
4072         for (i = 0; i < egNR; i++)
4073         {
4074             for (j = 0; j < f_t[1].grpp.nener; j++)
4075             {
4076                 for (t = 1; t < nthreads; t++)
4077                 {
4078
4079                     grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
4080                 }
4081             }
4082         }
4083         if (bDHDL)
4084         {
4085             for (i = 0; i < efptNR; i++)
4086             {
4087
4088                 for (t = 1; t < nthreads; t++)
4089                 {
4090                     dvdl[i] += f_t[t].dvdl[i];
4091                 }
4092             }
4093         }
4094     }
4095 }
4096
4097 static real calc_one_bond(FILE *fplog, int thread,
4098                           int ftype, const t_idef *idef,
4099                           rvec x[], rvec f[], rvec fshift[],
4100                           t_forcerec *fr,
4101                           const t_pbc *pbc, const t_graph *g,
4102                           gmx_grppairener_t *grpp,
4103                           t_nrnb *nrnb,
4104                           real *lambda, real *dvdl,
4105                           const t_mdatoms *md, t_fcdata *fcd,
4106                           gmx_bool bCalcEnerVir,
4107                           int *global_atom_index, gmx_bool bPrintSepPot)
4108 {
4109     int      nat1, nbonds, efptFTYPE;
4110     real     v = 0;
4111     t_iatom *iatoms;
4112     int      nb0, nbn;
4113
4114     if (IS_RESTRAINT_TYPE(ftype))
4115     {
4116         efptFTYPE = efptRESTRAINT;
4117     }
4118     else
4119     {
4120         efptFTYPE = efptBONDED;
4121     }
4122
4123     nat1      = interaction_function[ftype].nratoms + 1;
4124     nbonds    = idef->il[ftype].nr/nat1;
4125     iatoms    = idef->il[ftype].iatoms;
4126
4127     nb0 = idef->il_thread_division[ftype*(idef->nthreads+1)+thread];
4128     nbn = idef->il_thread_division[ftype*(idef->nthreads+1)+thread+1] - nb0;
4129
4130     if (!IS_LISTED_LJ_C(ftype))
4131     {
4132         if (ftype == F_CMAP)
4133         {
4134             v = cmap_dihs(nbn, iatoms+nb0,
4135                           idef->iparams, &idef->cmap_grid,
4136                           (const rvec*)x, f, fshift,
4137                           pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
4138                           md, fcd, global_atom_index);
4139         }
4140 #ifdef SIMD_BONDEDS
4141         else if (ftype == F_ANGLES &&
4142                  !bCalcEnerVir && fr->efep == efepNO)
4143         {
4144             /* No energies, shift forces, dvdl */
4145             angles_noener_simd(nbn, idef->il[ftype].iatoms+nb0,
4146                                idef->iparams,
4147                                (const rvec*)x, f,
4148                                pbc, g, lambda[efptFTYPE], md, fcd,
4149                                global_atom_index);
4150             v = 0;
4151         }
4152 #endif
4153         else if (ftype == F_PDIHS &&
4154                  !bCalcEnerVir && fr->efep == efepNO)
4155         {
4156             /* No energies, shift forces, dvdl */
4157 #ifndef SIMD_BONDEDS
4158             pdihs_noener
4159 #else
4160             pdihs_noener_simd
4161 #endif
4162                 (nbn, idef->il[ftype].iatoms+nb0,
4163                  idef->iparams,
4164                  (const rvec*)x, f,
4165                  pbc, g, lambda[efptFTYPE], md, fcd,
4166                  global_atom_index);
4167             v = 0;
4168         }
4169         else
4170         {
4171             v = interaction_function[ftype].ifunc(nbn, iatoms+nb0,
4172                                                   idef->iparams,
4173                                                   (const rvec*)x, f, fshift,
4174                                                   pbc, g, lambda[efptFTYPE], &(dvdl[efptFTYPE]),
4175                                                   md, fcd, global_atom_index);
4176         }
4177         if (bPrintSepPot)
4178         {
4179             fprintf(fplog, "  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
4180                     interaction_function[ftype].longname,
4181                     nbonds, v, lambda[efptFTYPE]);
4182         }
4183     }
4184     else
4185     {
4186         v = do_nonbonded_listed(ftype, nbn, iatoms+nb0, idef->iparams, (const rvec*)x, f, fshift,
4187                                 pbc, g, lambda, dvdl, md, fr, grpp, global_atom_index);
4188
4189         if (bPrintSepPot)
4190         {
4191             fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
4192                     interaction_function[ftype].longname,
4193                     interaction_function[F_LJ14].longname, nbonds, dvdl[efptVDW]);
4194             fprintf(fplog, "  %-5s + %-15s #%4d                  dVdl %12.5e\n",
4195                     interaction_function[ftype].longname,
4196                     interaction_function[F_COUL14].longname, nbonds, dvdl[efptCOUL]);
4197         }
4198     }
4199
4200     if (thread == 0)
4201     {
4202         inc_nrnb(nrnb, interaction_function[ftype].nrnb_ind, nbonds);
4203     }
4204
4205     return v;
4206 }
4207
4208 void calc_bonds(FILE *fplog, const gmx_multisim_t *ms,
4209                 const t_idef *idef,
4210                 rvec x[], history_t *hist,
4211                 rvec f[], t_forcerec *fr,
4212                 const t_pbc *pbc, const t_graph *g,
4213                 gmx_enerdata_t *enerd, t_nrnb *nrnb,
4214                 real *lambda,
4215                 const t_mdatoms *md,
4216                 t_fcdata *fcd, int *global_atom_index,
4217                 t_atomtypes gmx_unused *atype, gmx_genborn_t gmx_unused *born,
4218                 int force_flags,
4219                 gmx_bool bPrintSepPot, gmx_large_int_t step)
4220 {
4221     gmx_bool      bCalcEnerVir;
4222     int           i;
4223     real          v, dvdl[efptNR], dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
4224                                                         of lambda, which will be thrown away in the end*/
4225     const  t_pbc *pbc_null;
4226     char          buf[22];
4227     int           thread;
4228
4229     assert(fr->nthreads == idef->nthreads);
4230
4231     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
4232
4233     for (i = 0; i < efptNR; i++)
4234     {
4235         dvdl[i] = 0.0;
4236     }
4237     if (fr->bMolPBC)
4238     {
4239         pbc_null = pbc;
4240     }
4241     else
4242     {
4243         pbc_null = NULL;
4244     }
4245     if (bPrintSepPot)
4246     {
4247         fprintf(fplog, "Step %s: bonded V and dVdl for this node\n",
4248                 gmx_step_str(step, buf));
4249     }
4250
4251 #ifdef DEBUG
4252     if (g && debug)
4253     {
4254         p_graph(debug, "Bondage is fun", g);
4255     }
4256 #endif
4257
4258     /* Do pre force calculation stuff which might require communication */
4259     if (idef->il[F_ORIRES].nr)
4260     {
4261         enerd->term[F_ORIRESDEV] =
4262             calc_orires_dev(ms, idef->il[F_ORIRES].nr,
4263                             idef->il[F_ORIRES].iatoms,
4264                             idef->iparams, md, (const rvec*)x,
4265                             pbc_null, fcd, hist);
4266     }
4267     if (idef->il[F_DISRES].nr)
4268     {
4269         calc_disres_R_6(ms, idef->il[F_DISRES].nr,
4270                         idef->il[F_DISRES].iatoms,
4271                         idef->iparams, (const rvec*)x, pbc_null,
4272                         fcd, hist);
4273     }
4274
4275 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
4276     for (thread = 0; thread < fr->nthreads; thread++)
4277     {
4278         int                ftype;
4279         real              *epot, v;
4280         /* thread stuff */
4281         rvec              *ft, *fshift;
4282         real              *dvdlt;
4283         gmx_grppairener_t *grpp;
4284
4285         if (thread == 0)
4286         {
4287             ft     = f;
4288             fshift = fr->fshift;
4289             epot   = enerd->term;
4290             grpp   = &enerd->grpp;
4291             dvdlt  = dvdl;
4292         }
4293         else
4294         {
4295             zero_thread_forces(&fr->f_t[thread], fr->natoms_force,
4296                                fr->red_nblock, 1<<fr->red_ashift);
4297
4298             ft     = fr->f_t[thread].f;
4299             fshift = fr->f_t[thread].fshift;
4300             epot   = fr->f_t[thread].ener;
4301             grpp   = &fr->f_t[thread].grpp;
4302             dvdlt  = fr->f_t[thread].dvdl;
4303         }
4304         /* Loop over all bonded force types to calculate the bonded forces */
4305         for (ftype = 0; (ftype < F_NRE); ftype++)
4306         {
4307             if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype))
4308             {
4309                 v = calc_one_bond(fplog, thread, ftype, idef, x,
4310                                   ft, fshift, fr, pbc_null, g, grpp,
4311                                   nrnb, lambda, dvdlt,
4312                                   md, fcd, bCalcEnerVir,
4313                                   global_atom_index, bPrintSepPot);
4314                 epot[ftype] += v;
4315             }
4316         }
4317     }
4318     if (fr->nthreads > 1)
4319     {
4320         reduce_thread_forces(fr->natoms_force, f, fr->fshift,
4321                              enerd->term, &enerd->grpp, dvdl,
4322                              fr->nthreads, fr->f_t,
4323                              fr->red_nblock, 1<<fr->red_ashift,
4324                              bCalcEnerVir,
4325                              force_flags & GMX_FORCE_DHDL);
4326     }
4327     if (force_flags & GMX_FORCE_DHDL)
4328     {
4329         for (i = 0; i < efptNR; i++)
4330         {
4331             enerd->dvdl_nonlin[i] += dvdl[i];
4332         }
4333     }
4334
4335     /* Copy the sum of violations for the distance restraints from fcd */
4336     if (fcd)
4337     {
4338         enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
4339
4340     }
4341 }
4342
4343 void calc_bonds_lambda(FILE *fplog,
4344                        const t_idef *idef,
4345                        rvec x[],
4346                        t_forcerec *fr,
4347                        const t_pbc *pbc, const t_graph *g,
4348                        gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
4349                        real *lambda,
4350                        const t_mdatoms *md,
4351                        t_fcdata *fcd,
4352                        int *global_atom_index)
4353 {
4354     int           i, ftype, nr_nonperturbed, nr;
4355     real          v;
4356     real          dvdl_dum[efptNR];
4357     rvec         *f, *fshift;
4358     const  t_pbc *pbc_null;
4359     t_idef       idef_fe;
4360
4361     if (fr->bMolPBC)
4362     {
4363         pbc_null = pbc;
4364     }
4365     else
4366     {
4367         pbc_null = NULL;
4368     }
4369
4370     /* Copy the whole idef, so we can modify the contents locally */
4371     idef_fe          = *idef;
4372     idef_fe.nthreads = 1;
4373     snew(idef_fe.il_thread_division, F_NRE*(idef_fe.nthreads+1));
4374
4375     /* We already have the forces, so we use temp buffers here */
4376     snew(f, fr->natoms_force);
4377     snew(fshift, SHIFTS);
4378
4379     /* Loop over all bonded force types to calculate the bonded energies */
4380     for (ftype = 0; (ftype < F_NRE); ftype++)
4381     {
4382         if (ftype_is_bonded_potential(ftype))
4383         {
4384             /* Set the work range of thread 0 to the perturbed bondeds only */
4385             nr_nonperturbed                       = idef->il[ftype].nr_nonperturbed;
4386             nr                                    = idef->il[ftype].nr;
4387             idef_fe.il_thread_division[ftype*2+0] = nr_nonperturbed;
4388             idef_fe.il_thread_division[ftype*2+1] = nr;
4389
4390             /* This is only to get the flop count correct */
4391             idef_fe.il[ftype].nr = nr - nr_nonperturbed;
4392
4393             if (nr - nr_nonperturbed > 0)
4394             {
4395                 v = calc_one_bond(fplog, 0, ftype, &idef_fe,
4396                                   x, f, fshift, fr, pbc_null, g,
4397                                   grpp, nrnb, lambda, dvdl_dum,
4398                                   md, fcd, TRUE,
4399                                   global_atom_index, FALSE);
4400                 epot[ftype] += v;
4401             }
4402         }
4403     }
4404
4405     sfree(fshift);
4406     sfree(f);
4407
4408     sfree(idef_fe.il_thread_division);
4409 }