src/gmxlib/bondfree.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <math.h>
  43 #include "physics.h"
  44 #include "vec.h"
  45 #include "maths.h"
  46 #include "txtdump.h"
  47 #include "bondf.h"
  48 #include "smalloc.h"
  49 #include "pbc.h"
  50 #include "ns.h"
  51 #include "macros.h"
  52 #include "names.h"
  53 #include "gmx_fatal.h"
  54 #include "mshift.h"
  55 #include "main.h"
  56 #include "disre.h"
  57 #include "orires.h"
  58 #include "force.h"
  59 #include "nonbonded.h"
  60
  61 #if !defined GMX_DOUBLE && defined GMX_X86_SSE2
  62 #include "gmx_x86_simd_single.h"
  63 #define SSE_PROPER_DIHEDRALS
  64 #endif
  65
  66 /* Find a better place for this? */
  67 const int cmap_coeff_matrix[] = {
  68 1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4 ,
  69 0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
  70 0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4 ,
  71 0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
  72 0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2 ,
  73 0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2 ,
  74 0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
  75 0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
  76 0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2 ,
  77 0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
  78 0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
  79 0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2 ,
  80 0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
  81 0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
  82 0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
  83 0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
  84 };
  85
  86
  87
  88 int glatnr(int *global_atom_index,int i)
  89 {
  90     int atnr;
  91
  92     if (global_atom_index == NULL) {
  93         atnr = i + 1;
  94     } else {
  95         atnr = global_atom_index[i] + 1;
  96     }
  97
  98     return atnr;
  99 }
 100
 101 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
 102 {
 103   if (pbc) {
 104     return pbc_dx_aiuc(pbc,xi,xj,dx);
 105   }
 106   else {
 107     rvec_sub(xi,xj,dx);
 108     return CENTRAL;
 109   }
 110 }
 111
 112 /*
 113  * Morse potential bond by Frank Everdij
 114  *
 115  * Three parameters needed:
 116  *
 117  * b0 = equilibrium distance in nm
 118  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
 119  * cb = well depth in kJ/mol
 120  *
 121  * Note: the potential is referenced to be +cb at infinite separation
 122  *       and zero at the equilibrium distance!
 123  */
 124
 125 real morse_bonds(int nbonds,
 126                  const t_iatom forceatoms[],const t_iparams forceparams[],
 127                  const rvec x[],rvec f[],rvec fshift[],
 128                  const t_pbc *pbc,const t_graph *g,
 129                  real lambda,real *dvdlambda,
 130                  const t_mdatoms *md,t_fcdata *fcd,
 131                  int *global_atom_index)
 132 {
 133   const real one=1.0;
 134   const real two=2.0;
 135   real  dr,dr2,temp,omtemp,cbomtemp,fbond,vbond,fij,vtot;
 136   real  b0,be,cb,b0A,beA,cbA,b0B,beB,cbB,L1;
 137   rvec  dx;
 138   int   i,m,ki,type,ai,aj;
 139   ivec  dt;
 140
 141   vtot = 0.0;
 142   for(i=0; (i<nbonds); ) {
 143     type = forceatoms[i++];
 144     ai   = forceatoms[i++];
 145     aj   = forceatoms[i++];
 146
 147     b0A   = forceparams[type].morse.b0A;
 148     beA   = forceparams[type].morse.betaA;
 149     cbA   = forceparams[type].morse.cbA;
 150
 151     b0B   = forceparams[type].morse.b0B;
 152     beB   = forceparams[type].morse.betaB;
 153     cbB   = forceparams[type].morse.cbB;
 154
 155     L1 = one-lambda;                      /* 1 */
 156     b0 = L1*b0A + lambda*b0B;             /* 3 */
 157     be = L1*beA + lambda*beB;             /* 3 */
 158     cb = L1*cbA + lambda*cbB;             /* 3 */
 159
 160     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
 161     dr2  = iprod(dx,dx);                            /*   5          */
 162     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
 163     temp = exp(-be*(dr-b0));                        /*  12          */
 164
 165     if (temp == one)
 166     {
 167         /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
 168         *dvdlambda += cbB-cbA;
 169         continue;
 170     }
 171
 172     omtemp   = one-temp;                               /*   1          */
 173     cbomtemp = cb*omtemp;                              /*   1          */
 174     vbond    = cbomtemp*omtemp;                        /*   1          */
 175     fbond    = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /*   9          */
 176     vtot     += vbond;                                 /*   1          */
 177
 178     *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
 179
 180     if (g) {
 181       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 182       ki = IVEC2IS(dt);
 183     }
 184
 185     for (m=0; (m<DIM); m++) {                          /*  15          */
 186       fij=fbond*dx[m];
 187       f[ai][m]+=fij;
 188       f[aj][m]-=fij;
 189       fshift[ki][m]+=fij;
 190       fshift[CENTRAL][m]-=fij;
 191     }
 192   }                                           /*  83 TOTAL    */
 193   return vtot;
 194 }
 195
 196 real cubic_bonds(int nbonds,
 197                  const t_iatom forceatoms[],const t_iparams forceparams[],
 198                  const rvec x[],rvec f[],rvec fshift[],
 199                  const t_pbc *pbc,const t_graph *g,
 200                  real lambda,real *dvdlambda,
 201                  const t_mdatoms *md,t_fcdata *fcd,
 202                  int *global_atom_index)
 203 {
 204   const real three = 3.0;
 205   const real two   = 2.0;
 206   real  kb,b0,kcub;
 207   real  dr,dr2,dist,kdist,kdist2,fbond,vbond,fij,vtot;
 208   rvec  dx;
 209   int   i,m,ki,type,ai,aj;
 210   ivec  dt;
 211
 212   vtot = 0.0;
 213   for(i=0; (i<nbonds); ) {
 214     type = forceatoms[i++];
 215     ai   = forceatoms[i++];
 216     aj   = forceatoms[i++];
 217
 218     b0   = forceparams[type].cubic.b0;
 219     kb   = forceparams[type].cubic.kb;
 220     kcub = forceparams[type].cubic.kcub;
 221
 222     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);                /*   3          */
 223     dr2  = iprod(dx,dx);                                /*   5          */
 224
 225     if (dr2 == 0.0)
 226       continue;
 227
 228     dr         = dr2*gmx_invsqrt(dr2);                      /*  10          */
 229     dist       = dr-b0;
 230     kdist      = kb*dist;
 231     kdist2     = kdist*dist;
 232
 233     vbond      = kdist2 + kcub*kdist2*dist;
 234     fbond      = -(two*kdist + three*kdist2*kcub)/dr;
 235
 236     vtot      += vbond;       /* 21 */
 237
 238     if (g) {
 239       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 240       ki=IVEC2IS(dt);
 241     }
 242     for (m=0; (m<DIM); m++) {                          /*  15          */
 243       fij=fbond*dx[m];
 244       f[ai][m]+=fij;
 245       f[aj][m]-=fij;
 246       fshift[ki][m]+=fij;
 247       fshift[CENTRAL][m]-=fij;
 248     }
 249   }                                           /*  54 TOTAL    */
 250   return vtot;
 251 }
 252
 253 real FENE_bonds(int nbonds,
 254                 const t_iatom forceatoms[],const t_iparams forceparams[],
 255                 const rvec x[],rvec f[],rvec fshift[],
 256                 const t_pbc *pbc,const t_graph *g,
 257                 real lambda,real *dvdlambda,
 258                 const t_mdatoms *md,t_fcdata *fcd,
 259                 int *global_atom_index)
 260 {
 261   const real half=0.5;
 262   const real one=1.0;
 263   real  bm,kb;
 264   real  dr,dr2,bm2,omdr2obm2,fbond,vbond,fij,vtot;
 265   rvec  dx;
 266   int   i,m,ki,type,ai,aj;
 267   ivec  dt;
 268
 269   vtot = 0.0;
 270   for(i=0; (i<nbonds); ) {
 271     type = forceatoms[i++];
 272     ai   = forceatoms[i++];
 273     aj   = forceatoms[i++];
 274
 275     bm   = forceparams[type].fene.bm;
 276     kb   = forceparams[type].fene.kb;
 277
 278     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
 279     dr2  = iprod(dx,dx);                                /*   5          */
 280
 281     if (dr2 == 0.0)
 282       continue;
 283
 284     bm2 = bm*bm;
 285
 286     if (dr2 >= bm2)
 287       gmx_fatal(FARGS,
 288                 "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
 289                 dr2,bm2,
 290                 glatnr(global_atom_index,ai),
 291                 glatnr(global_atom_index,aj));
 292
 293     omdr2obm2  = one - dr2/bm2;
 294
 295     vbond      = -half*kb*bm2*log(omdr2obm2);
 296     fbond      = -kb/omdr2obm2;
 297
 298     vtot      += vbond;       /* 35 */
 299
 300     if (g) {
 301       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 302       ki=IVEC2IS(dt);
 303     }
 304     for (m=0; (m<DIM); m++) {                          /*  15          */
 305       fij=fbond*dx[m];
 306       f[ai][m]+=fij;
 307       f[aj][m]-=fij;
 308       fshift[ki][m]+=fij;
 309       fshift[CENTRAL][m]-=fij;
 310     }
 311   }                                           /*  58 TOTAL    */
 312   return vtot;
 313 }
 314
 315 real harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
 316               real *V,real *F)
 317 {
 318   const real half=0.5;
 319   real  L1,kk,x0,dx,dx2;
 320   real  v,f,dvdlambda;
 321
 322   L1    = 1.0-lambda;
 323   kk    = L1*kA+lambda*kB;
 324   x0    = L1*xA+lambda*xB;
 325
 326   dx    = x-x0;
 327   dx2   = dx*dx;
 328
 329   f     = -kk*dx;
 330   v     = half*kk*dx2;
 331   dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
 332
 333   *F    = f;
 334   *V    = v;
 335
 336   return dvdlambda;
 337
 338   /* That was 19 flops */
 339 }
 340
 341
 342 real bonds(int nbonds,
 343            const t_iatom forceatoms[],const t_iparams forceparams[],
 344            const rvec x[],rvec f[],rvec fshift[],
 345            const t_pbc *pbc,const t_graph *g,
 346            real lambda,real *dvdlambda,
 347            const t_mdatoms *md,t_fcdata *fcd,
 348            int *global_atom_index)
 349 {
 350   int  i,m,ki,ai,aj,type;
 351   real dr,dr2,fbond,vbond,fij,vtot;
 352   rvec dx;
 353   ivec dt;
 354
 355   vtot = 0.0;
 356   for(i=0; (i<nbonds); ) {
 357     type = forceatoms[i++];
 358     ai   = forceatoms[i++];
 359     aj   = forceatoms[i++];
 360
 361     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
 362     dr2  = iprod(dx,dx);                        /*   5          */
 363     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
 364
 365     *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 366                            forceparams[type].harmonic.krB,
 367                            forceparams[type].harmonic.rA,
 368                            forceparams[type].harmonic.rB,
 369                            dr,lambda,&vbond,&fbond);  /*  19  */
 370
 371     if (dr2 == 0.0)
 372       continue;
 373
 374
 375     vtot  += vbond;/* 1*/
 376     fbond *= gmx_invsqrt(dr2);                  /*   6          */
 377 #ifdef DEBUG
 378     if (debug)
 379       fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 380               dr,vbond,fbond);
 381 #endif
 382     if (g) {
 383       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 384       ki=IVEC2IS(dt);
 385     }
 386     for (m=0; (m<DIM); m++) {                   /*  15          */
 387       fij=fbond*dx[m];
 388       f[ai][m]+=fij;
 389       f[aj][m]-=fij;
 390       fshift[ki][m]+=fij;
 391       fshift[CENTRAL][m]-=fij;
 392     }
 393   }                                     /* 59 TOTAL     */
 394   return vtot;
 395 }
 396
 397 real restraint_bonds(int nbonds,
 398                      const t_iatom forceatoms[],const t_iparams forceparams[],
 399                      const rvec x[],rvec f[],rvec fshift[],
 400                      const t_pbc *pbc,const t_graph *g,
 401                      real lambda,real *dvdlambda,
 402                      const t_mdatoms *md,t_fcdata *fcd,
 403                      int *global_atom_index)
 404 {
 405     int  i,m,ki,ai,aj,type;
 406     real dr,dr2,fbond,vbond,fij,vtot;
 407     real L1;
 408     real low,dlow,up1,dup1,up2,dup2,k,dk;
 409     real drh,drh2;
 410     rvec dx;
 411     ivec dt;
 412
 413     L1   = 1.0 - lambda;
 414
 415     vtot = 0.0;
 416     for(i=0; (i<nbonds); )
 417     {
 418         type = forceatoms[i++];
 419         ai   = forceatoms[i++];
 420         aj   = forceatoms[i++];
 421
 422         ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);        /*   3          */
 423         dr2  = iprod(dx,dx);                            /*   5          */
 424         dr   = dr2*gmx_invsqrt(dr2);                    /*  10          */
 425
 426         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
 427         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
 428         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
 429         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
 430         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
 431         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
 432         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
 433         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
 434         /* 24 */
 435
 436         if (dr < low)
 437         {
 438             drh   = dr - low;
 439             drh2  = drh*drh;
 440             vbond = 0.5*k*drh2;
 441             fbond = -k*drh;
 442             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
 443         } /* 11 */
 444         else if (dr <= up1)
 445         {
 446             vbond = 0;
 447             fbond = 0;
 448         }
 449         else if (dr <= up2)
 450         {
 451             drh   = dr - up1;
 452             drh2  = drh*drh;
 453             vbond = 0.5*k*drh2;
 454             fbond = -k*drh;
 455             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
 456         } /* 11 */
 457         else
 458         {
 459             drh   = dr - up2;
 460             vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
 461             fbond = -k*(up2 - up1);
 462             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
 463                 + k*(dup2 - dup1)*(up2 - up1 + drh)
 464                 - k*(up2 - up1)*dup2;
 465         }
 466
 467         if (dr2 == 0.0)
 468             continue;
 469
 470         vtot  += vbond;/* 1*/
 471         fbond *= gmx_invsqrt(dr2);                      /*   6          */
 472 #ifdef DEBUG
 473         if (debug)
 474             fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
 475                     dr,vbond,fbond);
 476 #endif
 477         if (g) {
 478             ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 479             ki=IVEC2IS(dt);
 480         }
 481         for (m=0; (m<DIM); m++) {                       /*  15          */
 482             fij=fbond*dx[m];
 483             f[ai][m]+=fij;
 484             f[aj][m]-=fij;
 485             fshift[ki][m]+=fij;
 486             fshift[CENTRAL][m]-=fij;
 487         }
 488     }                                   /* 59 TOTAL     */
 489
 490     return vtot;
 491 }
 492
 493 real polarize(int nbonds,
 494               const t_iatom forceatoms[],const t_iparams forceparams[],
 495               const rvec x[],rvec f[],rvec fshift[],
 496               const t_pbc *pbc,const t_graph *g,
 497               real lambda,real *dvdlambda,
 498               const t_mdatoms *md,t_fcdata *fcd,
 499               int *global_atom_index)
 500 {
 501   int  i,m,ki,ai,aj,type;
 502   real dr,dr2,fbond,vbond,fij,vtot,ksh;
 503   rvec dx;
 504   ivec dt;
 505
 506   vtot = 0.0;
 507   for(i=0; (i<nbonds); ) {
 508     type = forceatoms[i++];
 509     ai   = forceatoms[i++];
 510     aj   = forceatoms[i++];
 511     ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
 512     if (debug)
 513       fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
 514
 515     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
 516     dr2  = iprod(dx,dx);                        /*   5          */
 517     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
 518
 519     *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
 520
 521     if (dr2 == 0.0)
 522       continue;
 523
 524     vtot  += vbond;/* 1*/
 525     fbond *= gmx_invsqrt(dr2);                  /*   6          */
 526
 527     if (g) {
 528       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 529       ki=IVEC2IS(dt);
 530     }
 531     for (m=0; (m<DIM); m++) {                   /*  15          */
 532       fij=fbond*dx[m];
 533       f[ai][m]+=fij;
 534       f[aj][m]-=fij;
 535       fshift[ki][m]+=fij;
 536       fshift[CENTRAL][m]-=fij;
 537     }
 538   }                                     /* 59 TOTAL     */
 539   return vtot;
 540 }
 541
 542 real anharm_polarize(int nbonds,
 543                      const t_iatom forceatoms[],const t_iparams forceparams[],
 544                      const rvec x[],rvec f[],rvec fshift[],
 545                      const t_pbc *pbc,const t_graph *g,
 546                      real lambda,real *dvdlambda,
 547                      const t_mdatoms *md,t_fcdata *fcd,
 548                      int *global_atom_index)
 549 {
 550   int  i,m,ki,ai,aj,type;
 551   real dr,dr2,fbond,vbond,fij,vtot,ksh,khyp,drcut,ddr,ddr3;
 552   rvec dx;
 553   ivec dt;
 554
 555   vtot = 0.0;
 556   for(i=0; (i<nbonds); ) {
 557     type  = forceatoms[i++];
 558     ai    = forceatoms[i++];
 559     aj    = forceatoms[i++];
 560     ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
 561     khyp  = forceparams[type].anharm_polarize.khyp;
 562     drcut = forceparams[type].anharm_polarize.drcut;
 563     if (debug)
 564       fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
 565
 566     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
 567     dr2  = iprod(dx,dx);                        /*   5          */
 568     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
 569
 570     *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
 571
 572     if (dr2 == 0.0)
 573       continue;
 574
 575     if (dr > drcut) {
 576         ddr    = dr-drcut;
 577         ddr3   = ddr*ddr*ddr;
 578         vbond += khyp*ddr*ddr3;
 579         fbond -= 4*khyp*ddr3;
 580     }
 581     fbond *= gmx_invsqrt(dr2);                  /*   6          */
 582     vtot  += vbond;/* 1*/
 583
 584     if (g) {
 585       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
 586       ki=IVEC2IS(dt);
 587     }
 588     for (m=0; (m<DIM); m++) {                   /*  15          */
 589       fij=fbond*dx[m];
 590       f[ai][m]+=fij;
 591       f[aj][m]-=fij;
 592       fshift[ki][m]+=fij;
 593       fshift[CENTRAL][m]-=fij;
 594     }
 595   }                                     /* 72 TOTAL     */
 596   return vtot;
 597 }
 598
 599 real water_pol(int nbonds,
 600                const t_iatom forceatoms[],const t_iparams forceparams[],
 601                const rvec x[],rvec f[],rvec fshift[],
 602                const t_pbc *pbc,const t_graph *g,
 603                real lambda,real *dvdlambda,
 604                const t_mdatoms *md,t_fcdata *fcd,
 605                int *global_atom_index)
 606 {
 607   /* This routine implements anisotropic polarizibility for water, through
 608    * a shell connected to a dummy with spring constant that differ in the
 609    * three spatial dimensions in the molecular frame.
 610    */
 611   int  i,m,aO,aH1,aH2,aD,aS,type,type0;
 612   rvec dOH1,dOH2,dHH,dOD,dDS,nW,kk,dx,kdx,proj;
 613 #ifdef DEBUG
 614   rvec df;
 615 #endif
 616   real vtot,fij,r_HH,r_OD,r_nW,tx,ty,tz,qS;
 617
 618   vtot = 0.0;
 619   if (nbonds > 0) {
 620     type0  = forceatoms[0];
 621     aS     = forceatoms[5];
 622     qS     = md->chargeA[aS];
 623     kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
 624     kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
 625     kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
 626     r_HH   = 1.0/forceparams[type0].wpol.rHH;
 627     r_OD   = 1.0/forceparams[type0].wpol.rOD;
 628     if (debug) {
 629       fprintf(debug,"WPOL: qS  = %10.5f aS = %5d\n",qS,aS);
 630       fprintf(debug,"WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
 631               kk[XX],kk[YY],kk[ZZ]);
 632       fprintf(debug,"WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
 633               forceparams[type0].wpol.rOH,
 634               forceparams[type0].wpol.rHH,
 635               forceparams[type0].wpol.rOD);
 636     }
 637     for(i=0; (i<nbonds); i+=6) {
 638       type = forceatoms[i];
 639       if (type != type0)
 640         gmx_fatal(FARGS,"Sorry, type = %d, type0 = %d, file = %s, line = %d",
 641                     type,type0,__FILE__,__LINE__);
 642       aO   = forceatoms[i+1];
 643       aH1  = forceatoms[i+2];
 644       aH2  = forceatoms[i+3];
 645       aD   = forceatoms[i+4];
 646       aS   = forceatoms[i+5];
 647
 648       /* Compute vectors describing the water frame */
 649       rvec_sub(x[aH1],x[aO], dOH1);
 650       rvec_sub(x[aH2],x[aO], dOH2);
 651       rvec_sub(x[aH2],x[aH1],dHH);
 652       rvec_sub(x[aD], x[aO], dOD);
 653       rvec_sub(x[aS], x[aD], dDS);
 654       cprod(dOH1,dOH2,nW);
 655
 656       /* Compute inverse length of normal vector
 657        * (this one could be precomputed, but I'm too lazy now)
 658        */
 659       r_nW = gmx_invsqrt(iprod(nW,nW));
 660       /* This is for precision, but does not make a big difference,
 661        * it can go later.
 662        */
 663       r_OD = gmx_invsqrt(iprod(dOD,dOD));
 664
 665       /* Normalize the vectors in the water frame */
 666       svmul(r_nW,nW,nW);
 667       svmul(r_HH,dHH,dHH);
 668       svmul(r_OD,dOD,dOD);
 669
 670       /* Compute displacement of shell along components of the vector */
 671       dx[ZZ] = iprod(dDS,dOD);
 672       /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
 673       for(m=0; (m<DIM); m++)
 674         proj[m] = dDS[m]-dx[ZZ]*dOD[m];
 675
 676       /*dx[XX] = iprod(dDS,nW);
 677         dx[YY] = iprod(dDS,dHH);*/
 678       dx[XX] = iprod(proj,nW);
 679       for(m=0; (m<DIM); m++)
 680         proj[m] -= dx[XX]*nW[m];
 681       dx[YY] = iprod(proj,dHH);
 682       /*#define DEBUG*/
 683 #ifdef DEBUG
 684       if (debug) {
 685         fprintf(debug,"WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
 686                 sqr(dx[XX]),sqr(dx[YY]),sqr(dx[ZZ]),iprod(dx,dx),iprod(dDS,dDS));
 687         fprintf(debug,"WPOL: dHH=(%10g,%10g,%10g)\n",dHH[XX],dHH[YY],dHH[ZZ]);
 688         fprintf(debug,"WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
 689                 dOD[XX],dOD[YY],dOD[ZZ],1/r_OD);
 690         fprintf(debug,"WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
 691                 nW[XX],nW[YY],nW[ZZ],1/r_nW);
 692         fprintf(debug,"WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
 693                 dx[XX],dx[YY],dx[ZZ]);
 694         fprintf(debug,"WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
 695                 dDS[XX],dDS[YY],dDS[ZZ]);
 696       }
 697 #endif
 698       /* Now compute the forces and energy */
 699       kdx[XX] = kk[XX]*dx[XX];
 700       kdx[YY] = kk[YY]*dx[YY];
 701       kdx[ZZ] = kk[ZZ]*dx[ZZ];
 702       vtot   += iprod(dx,kdx);
 703       for(m=0; (m<DIM); m++) {
 704         /* This is a tensor operation but written out for speed */
 705         tx        =  nW[m]*kdx[XX];
 706         ty        = dHH[m]*kdx[YY];
 707         tz        = dOD[m]*kdx[ZZ];
 708         fij       = -tx-ty-tz;
 709 #ifdef DEBUG
 710         df[m] = fij;
 711 #endif
 712         f[aS][m] += fij;
 713         f[aD][m] -= fij;
 714       }
 715 #ifdef DEBUG
 716       if (debug) {
 717         fprintf(debug,"WPOL: vwpol=%g\n",0.5*iprod(dx,kdx));
 718         fprintf(debug,"WPOL: df = (%10g, %10g, %10g)\n",df[XX],df[YY],df[ZZ]);
 719       }
 720 #endif
 721     }
 722   }
 723   return 0.5*vtot;
 724 }
 725
 726 static real do_1_thole(const rvec xi,const rvec xj,rvec fi,rvec fj,
 727                        const t_pbc *pbc,real qq,
 728                        rvec fshift[],real afac)
 729 {
 730   rvec r12;
 731   real r12sq,r12_1,r12n,r12bar,v0,v1,fscal,ebar,fff;
 732   int  m,t;
 733
 734   t      = pbc_rvec_sub(pbc,xi,xj,r12); /*  3 */
 735
 736   r12sq  = iprod(r12,r12);              /*  5 */
 737   r12_1  = gmx_invsqrt(r12sq);              /*  5 */
 738   r12bar = afac/r12_1;                  /*  5 */
 739   v0     = qq*ONE_4PI_EPS0*r12_1;       /*  2 */
 740   ebar   = exp(-r12bar);                /*  5 */
 741   v1     = (1-(1+0.5*r12bar)*ebar);     /*  4 */
 742   fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
 743   if (debug)
 744     fprintf(debug,"THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n",v0,v1,1/r12_1,r12bar,fscal,ebar);
 745
 746   for(m=0; (m<DIM); m++) {
 747     fff    = fscal*r12[m];
 748     fi[m] += fff;
 749     fj[m] -= fff;
 750     fshift[t][m]       += fff;
 751     fshift[CENTRAL][m] -= fff;
 752   } /* 15 */
 753
 754   return v0*v1; /* 1 */
 755   /* 54 */
 756 }
 757
 758 real thole_pol(int nbonds,
 759                const t_iatom forceatoms[],const t_iparams forceparams[],
 760                const rvec x[],rvec f[],rvec fshift[],
 761                const t_pbc *pbc,const t_graph *g,
 762                real lambda,real *dvdlambda,
 763                const t_mdatoms *md,t_fcdata *fcd,
 764                int *global_atom_index)
 765 {
 766   /* Interaction between two pairs of particles with opposite charge */
 767   int i,type,a1,da1,a2,da2;
 768   real q1,q2,qq,a,al1,al2,afac;
 769   real V=0;
 770
 771   for(i=0; (i<nbonds); ) {
 772     type  = forceatoms[i++];
 773     a1    = forceatoms[i++];
 774     da1   = forceatoms[i++];
 775     a2    = forceatoms[i++];
 776     da2   = forceatoms[i++];
 777     q1    = md->chargeA[da1];
 778     q2    = md->chargeA[da2];
 779     a     = forceparams[type].thole.a;
 780     al1   = forceparams[type].thole.alpha1;
 781     al2   = forceparams[type].thole.alpha2;
 782     qq    = q1*q2;
 783     afac  = a*pow(al1*al2,-1.0/6.0);
 784     V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq,fshift,afac);
 785     V += do_1_thole(x[da1],x[a2], f[da1],f[a2], pbc,-qq,fshift,afac);
 786     V += do_1_thole(x[a1], x[da2],f[a1], f[da2],pbc,-qq,fshift,afac);
 787     V += do_1_thole(x[da1],x[da2],f[da1],f[da2],pbc, qq,fshift,afac);
 788   }
 789   /* 290 flops */
 790   return V;
 791 }
 792
 793 real bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
 794                 rvec r_ij,rvec r_kj,real *costh,
 795                 int *t1,int *t2)
 796 /* Return value is the angle between the bonds i-j and j-k */
 797 {
 798   /* 41 FLOPS */
 799   real th;
 800
 801   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
 802   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
 803
 804   *costh=cos_angle(r_ij,r_kj);          /* 25           */
 805   th=acos(*costh);                      /* 10           */
 806                                         /* 41 TOTAL     */
 807   return th;
 808 }
 809
 810 real angles(int nbonds,
 811             const t_iatom forceatoms[],const t_iparams forceparams[],
 812             const rvec x[],rvec f[],rvec fshift[],
 813             const t_pbc *pbc,const t_graph *g,
 814             real lambda,real *dvdlambda,
 815             const t_mdatoms *md,t_fcdata *fcd,
 816             int *global_atom_index)
 817 {
 818     int  i,ai,aj,ak,t1,t2,type;
 819     rvec r_ij,r_kj;
 820     real cos_theta,cos_theta2,theta,dVdt,va,vtot;
 821     ivec jt,dt_ij,dt_kj;
 822
 823     vtot = 0.0;
 824     for(i=0; i<nbonds; )
 825     {
 826         type = forceatoms[i++];
 827         ai   = forceatoms[i++];
 828         aj   = forceatoms[i++];
 829         ak   = forceatoms[i++];
 830
 831         theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 832                             r_ij,r_kj,&cos_theta,&t1,&t2);      /*  41          */
 833
 834         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
 835                                forceparams[type].harmonic.krB,
 836                                forceparams[type].harmonic.rA*DEG2RAD,
 837                                forceparams[type].harmonic.rB*DEG2RAD,
 838                                theta,lambda,&va,&dVdt);  /*  21  */
 839         vtot += va;
 840
 841         cos_theta2 = sqr(cos_theta);
 842         if (cos_theta2 < 1)
 843         {
 844             int  m;
 845             real st,sth;
 846             real cik,cii,ckk;
 847             real nrkj2,nrij2;
 848             real nrkj_1,nrij_1;
 849             rvec f_i,f_j,f_k;
 850
 851             st  = dVdt*gmx_invsqrt(1 - cos_theta2);     /*  12          */
 852             sth = st*cos_theta;                 /*   1          */
 853 #ifdef DEBUG
 854             if (debug)
 855                 fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
 856                         theta*RAD2DEG,va,dVdt);
 857 #endif
 858             nrij2 = iprod(r_ij,r_ij);                   /*   5          */
 859             nrkj2 = iprod(r_kj,r_kj);                   /*   5          */
 860
 861             nrij_1 = gmx_invsqrt(nrij2);                /*  10          */
 862             nrkj_1 = gmx_invsqrt(nrkj2);                /*  10          */
 863
 864             cik = st*nrij_1*nrkj_1;                     /*   2          */
 865             cii = sth*nrij_1*nrij_1;                    /*   2          */
 866             ckk = sth*nrkj_1*nrkj_1;                    /*   2          */
 867
 868             for (m=0; m<DIM; m++)
 869             {                   /*  39          */
 870                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
 871                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
 872                 f_j[m]    = -f_i[m] - f_k[m];
 873                 f[ai][m] += f_i[m];
 874                 f[aj][m] += f_j[m];
 875                 f[ak][m] += f_k[m];
 876             }
 877             if (g != NULL)
 878             {
 879                 copy_ivec(SHIFT_IVEC(g,aj),jt);
 880
 881                 ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 882                 ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 883                 t1 = IVEC2IS(dt_ij);
 884                 t2 = IVEC2IS(dt_kj);
 885             }
 886             rvec_inc(fshift[t1],f_i);
 887             rvec_inc(fshift[CENTRAL],f_j);
 888             rvec_inc(fshift[t2],f_k);
 889         }                                           /* 161 TOTAL        */
 890     }
 891
 892     return vtot;
 893 }
 894
 895 real linear_angles(int nbonds,
 896                    const t_iatom forceatoms[],const t_iparams forceparams[],
 897                    const rvec x[],rvec f[],rvec fshift[],
 898                    const t_pbc *pbc,const t_graph *g,
 899                    real lambda,real *dvdlambda,
 900                    const t_mdatoms *md,t_fcdata *fcd,
 901                    int *global_atom_index)
 902 {
 903   int  i,m,ai,aj,ak,t1,t2,type;
 904   rvec f_i,f_j,f_k;
 905   real L1,kA,kB,aA,aB,dr,dr2,va,vtot,a,b,klin;
 906   ivec jt,dt_ij,dt_kj;
 907   rvec r_ij,r_kj,r_ik,dx;
 908
 909   L1   = 1-lambda;
 910   vtot = 0.0;
 911   for(i=0; (i<nbonds); ) {
 912     type = forceatoms[i++];
 913     ai   = forceatoms[i++];
 914     aj   = forceatoms[i++];
 915     ak   = forceatoms[i++];
 916
 917     kA = forceparams[type].linangle.klinA;
 918     kB = forceparams[type].linangle.klinB;
 919     klin = L1*kA + lambda*kB;
 920
 921     aA   = forceparams[type].linangle.aA;
 922     aB   = forceparams[type].linangle.aB;
 923     a    = L1*aA+lambda*aB;
 924     b    = 1-a;
 925
 926     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
 927     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
 928     rvec_sub(r_ij,r_kj,r_ik);
 929
 930     dr2 = 0;
 931     for(m=0; (m<DIM); m++)
 932     {
 933         dr     = - a * r_ij[m] - b * r_kj[m];
 934         dr2   += dr*dr;
 935         dx[m]  = dr;
 936         f_i[m] = a*klin*dr;
 937         f_k[m] = b*klin*dr;
 938         f_j[m] = -(f_i[m]+f_k[m]);
 939         f[ai][m] += f_i[m];
 940         f[aj][m] += f_j[m];
 941         f[ak][m] += f_k[m];
 942     }
 943     va    = 0.5*klin*dr2;
 944     *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx,r_ik);
 945
 946     vtot += va;
 947
 948     if (g) {
 949         copy_ivec(SHIFT_IVEC(g,aj),jt);
 950
 951         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
 952         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
 953         t1=IVEC2IS(dt_ij);
 954         t2=IVEC2IS(dt_kj);
 955     }
 956     rvec_inc(fshift[t1],f_i);
 957     rvec_inc(fshift[CENTRAL],f_j);
 958     rvec_inc(fshift[t2],f_k);
 959   }                                           /* 57 TOTAL       */
 960   return vtot;
 961 }
 962
 963 real urey_bradley(int nbonds,
 964                   const t_iatom forceatoms[],const t_iparams forceparams[],
 965                   const rvec x[],rvec f[],rvec fshift[],
 966                   const t_pbc *pbc,const t_graph *g,
 967                   real lambda,real *dvdlambda,
 968                   const t_mdatoms *md,t_fcdata *fcd,
 969                   int *global_atom_index)
 970 {
 971   int  i,m,ai,aj,ak,t1,t2,type,ki;
 972   rvec r_ij,r_kj,r_ik;
 973   real cos_theta,cos_theta2,theta;
 974   real dVdt,va,vtot,dr,dr2,vbond,fbond,fik;
 975   real kthA,th0A,kUBA,r13A,kthB,th0B,kUBB,r13B;
 976   ivec jt,dt_ij,dt_kj,dt_ik;
 977
 978   vtot = 0.0;
 979   for(i=0; (i<nbonds); ) {
 980     type = forceatoms[i++];
 981     ai   = forceatoms[i++];
 982     aj   = forceatoms[i++];
 983     ak   = forceatoms[i++];
 984     th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
 985     kthA  = forceparams[type].u_b.kthetaA;
 986     r13A  = forceparams[type].u_b.r13A;
 987     kUBA  = forceparams[type].u_b.kUBA;
 988     th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
 989     kthB  = forceparams[type].u_b.kthetaB;
 990     r13B  = forceparams[type].u_b.r13B;
 991     kUBB  = forceparams[type].u_b.kUBB;
 992
 993     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
 994                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
 995
 996     *dvdlambda += harmonic(kthA,kthB,th0A,th0B,theta,lambda,&va,&dVdt);  /*  21  */
 997     vtot += va;
 998
 999     ki   = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);  /*   3          */
1000     dr2  = iprod(r_ik,r_ik);                    /*   5          */
1001     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
1002
1003     *dvdlambda += harmonic(kUBA,kUBB,r13A,r13B,dr,lambda,&vbond,&fbond); /*  19  */
1004
1005     cos_theta2 = sqr(cos_theta);                /*   1          */
1006     if (cos_theta2 < 1) {
1007       real st,sth;
1008       real cik,cii,ckk;
1009       real nrkj2,nrij2;
1010       rvec f_i,f_j,f_k;
1011
1012       st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
1013       sth = st*cos_theta;                       /*   1          */
1014 #ifdef DEBUG
1015       if (debug)
1016         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1017                 theta*RAD2DEG,va,dVdt);
1018 #endif
1019       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
1020       nrij2=iprod(r_ij,r_ij);
1021
1022       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */
1023       cii=sth/nrij2;                            /*  10          */
1024       ckk=sth/nrkj2;                            /*  10          */
1025
1026       for (m=0; (m<DIM); m++) {                 /*  39          */
1027         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
1028         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
1029         f_j[m]=-f_i[m]-f_k[m];
1030         f[ai][m]+=f_i[m];
1031         f[aj][m]+=f_j[m];
1032         f[ak][m]+=f_k[m];
1033       }
1034       if (g) {
1035         copy_ivec(SHIFT_IVEC(g,aj),jt);
1036
1037         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
1038         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
1039         t1=IVEC2IS(dt_ij);
1040         t2=IVEC2IS(dt_kj);
1041       }
1042       rvec_inc(fshift[t1],f_i);
1043       rvec_inc(fshift[CENTRAL],f_j);
1044       rvec_inc(fshift[t2],f_k);
1045     }                                           /* 161 TOTAL    */
1046     /* Time for the bond calculations */
1047     if (dr2 == 0.0)
1048       continue;
1049
1050     vtot  += vbond;  /* 1*/
1051     fbond *= gmx_invsqrt(dr2);                  /*   6          */
1052
1053     if (g) {
1054       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),dt_ik);
1055       ki=IVEC2IS(dt_ik);
1056     }
1057     for (m=0; (m<DIM); m++) {                   /*  15          */
1058       fik=fbond*r_ik[m];
1059       f[ai][m]+=fik;
1060       f[ak][m]-=fik;
1061       fshift[ki][m]+=fik;
1062       fshift[CENTRAL][m]-=fik;
1063     }
1064   }
1065   return vtot;
1066 }
1067
1068 real quartic_angles(int nbonds,
1069                     const t_iatom forceatoms[],const t_iparams forceparams[],
1070                     const rvec x[],rvec f[],rvec fshift[],
1071                     const t_pbc *pbc,const t_graph *g,
1072                     real lambda,real *dvdlambda,
1073                     const t_mdatoms *md,t_fcdata *fcd,
1074                     int *global_atom_index)
1075 {
1076   int  i,j,ai,aj,ak,t1,t2,type;
1077   rvec r_ij,r_kj;
1078   real cos_theta,cos_theta2,theta,dt,dVdt,va,dtp,c,vtot;
1079   ivec jt,dt_ij,dt_kj;
1080
1081   vtot = 0.0;
1082   for(i=0; (i<nbonds); ) {
1083     type = forceatoms[i++];
1084     ai   = forceatoms[i++];
1085     aj   = forceatoms[i++];
1086     ak   = forceatoms[i++];
1087
1088     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
1089                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
1090
1091     dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2          */
1092
1093     dVdt = 0;
1094     va = forceparams[type].qangle.c[0];
1095     dtp = 1.0;
1096     for(j=1; j<=4; j++) {
1097       c = forceparams[type].qangle.c[j];
1098       dVdt -= j*c*dtp;
1099       dtp *= dt;
1100       va += c*dtp;
1101     }
1102     /* 20 */
1103
1104     vtot += va;
1105
1106     cos_theta2 = sqr(cos_theta);                /*   1          */
1107     if (cos_theta2 < 1) {
1108       int  m;
1109       real st,sth;
1110       real cik,cii,ckk;
1111       real nrkj2,nrij2;
1112       rvec f_i,f_j,f_k;
1113
1114       st  = dVdt*gmx_invsqrt(1 - cos_theta2);           /*  12          */
1115       sth = st*cos_theta;                       /*   1          */
1116 #ifdef DEBUG
1117       if (debug)
1118         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1119                 theta*RAD2DEG,va,dVdt);
1120 #endif
1121       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
1122       nrij2=iprod(r_ij,r_ij);
1123
1124       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */
1125       cii=sth/nrij2;                            /*  10          */
1126       ckk=sth/nrkj2;                            /*  10          */
1127
1128       for (m=0; (m<DIM); m++) {                 /*  39          */
1129         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
1130         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
1131         f_j[m]=-f_i[m]-f_k[m];
1132         f[ai][m]+=f_i[m];
1133         f[aj][m]+=f_j[m];
1134         f[ak][m]+=f_k[m];
1135       }
1136       if (g) {
1137         copy_ivec(SHIFT_IVEC(g,aj),jt);
1138
1139         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
1140         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
1141         t1=IVEC2IS(dt_ij);
1142         t2=IVEC2IS(dt_kj);
1143       }
1144       rvec_inc(fshift[t1],f_i);
1145       rvec_inc(fshift[CENTRAL],f_j);
1146       rvec_inc(fshift[t2],f_k);
1147     }                                           /* 153 TOTAL    */
1148   }
1149   return vtot;
1150 }
1151
1152 real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
1153                const t_pbc *pbc,
1154                rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n,
1155                real *sign,int *t1,int *t2,int *t3)
1156 {
1157   real ipr,phi;
1158
1159   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
1160   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
1161   *t3 = pbc_rvec_sub(pbc,xk,xl,r_kl);                   /*  3           */
1162
1163   cprod(r_ij,r_kj,m);                   /*  9           */
1164   cprod(r_kj,r_kl,n);                   /*  9           */
1165   phi=gmx_angle(m,n);                   /* 49 (assuming 25 for atan2) */
1166   ipr=iprod(r_ij,n);                    /*  5           */
1167   (*sign)=(ipr<0.0)?-1.0:1.0;
1168   phi=(*sign)*phi;                      /*  1           */
1169                                         /* 82 TOTAL     */
1170   return phi;
1171 }
1172
1173
1174 #ifdef SSE_PROPER_DIHEDRALS
1175
1176 /* x86 SIMD inner-product of 4 float vectors */
1177 #define GMX_MM_IPROD_PS(ax,ay,az,bx,by,bz)                 \
1178     _mm_add_ps(_mm_add_ps(_mm_mul_ps(ax,bx),_mm_mul_ps(ay,by)),_mm_mul_ps(az,bz))
1179
1180 /* x86 SIMD norm^2 of 4 float vectors */
1181 #define GMX_MM_NORM2_PS(ax,ay,az) GMX_MM_IPROD_PS(ax,ay,az,ax,ay,az)
1182
1183 /* x86 SIMD cross-product of 4 float vectors */
1184 #define GMX_MM_CPROD_PS(ax,ay,az,bx,by,bz,cx,cy,cz)        \
1185 {                                                          \
1186     cx = _mm_sub_ps(_mm_mul_ps(ay,bz),_mm_mul_ps(az,by));  \
1187     cy = _mm_sub_ps(_mm_mul_ps(az,bx),_mm_mul_ps(ax,bz));  \
1188     cz = _mm_sub_ps(_mm_mul_ps(ax,by),_mm_mul_ps(ay,bx));  \
1189 }
1190
1191 /* load 4 rvec's into 3 x86 SIMD float registers */
1192 #define load_rvec4(r0,r1,r2,r3,rx_SSE,ry_SSE,rz_SSE)          \
1193 {                                                             \
1194     __m128 tmp;                                               \
1195     rx_SSE = _mm_load_ps(r0);                                 \
1196     ry_SSE = _mm_load_ps(r1);                                 \
1197     rz_SSE = _mm_load_ps(r2);                                 \
1198     tmp    = _mm_load_ps(r3);                                 \
1199     _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
1200 }
1201
1202 #define store_rvec4(rx_SSE,ry_SSE,rz_SSE,r0,r1,r2,r3)         \
1203 {                                                             \
1204     __m128 tmp=_mm_setzero_ps();                              \
1205     _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
1206     _mm_store_ps(r0,rx_SSE);                                  \
1207     _mm_store_ps(r1,ry_SSE);                                  \
1208     _mm_store_ps(r2,rz_SSE);                                  \
1209     _mm_store_ps(r3,tmp   );                                  \
1210 }
1211
1212 /* An rvec in a structure which can be allocated 16-byte aligned */
1213 typedef struct {
1214     rvec  v;
1215     float f;
1216 } rvec_sse_t;
1217
1218 /* As dih_angle above, but calculates 4 dihedral angles at once using SSE,
1219  * also calculates the pre-factor required for the dihedral force update.
1220  * Note that bv and buf should be 16-byte aligned.
1221  */
1222 static void
1223 dih_angle_sse(const rvec *x,
1224               int ai[4],int aj[4],int ak[4],int al[4],
1225               const t_pbc *pbc,
1226               int t1[4],int t2[4],int t3[4],
1227               rvec_sse_t *bv,
1228               real *buf)
1229 {
1230     int s;
1231     __m128 rijx_SSE,rijy_SSE,rijz_SSE;
1232     __m128 rkjx_SSE,rkjy_SSE,rkjz_SSE;
1233     __m128 rklx_SSE,rkly_SSE,rklz_SSE;
1234     __m128 mx_SSE,my_SSE,mz_SSE;
1235     __m128 nx_SSE,ny_SSE,nz_SSE;
1236     __m128 cx_SSE,cy_SSE,cz_SSE;
1237     __m128 cn_SSE;
1238     __m128 s_SSE;
1239     __m128 phi_SSE;
1240     __m128 ipr_SSE;
1241     int signs;
1242     __m128 iprm_SSE,iprn_SSE;
1243     __m128 nrkj2_SSE,nrkj_1_SSE,nrkj_2_SSE,nrkj_SSE;
1244     __m128 nrkj_m2_SSE,nrkj_n2_SSE;
1245     __m128 p_SSE,q_SSE;
1246     __m128 fmin_SSE=_mm_set1_ps(GMX_FLOAT_MIN);
1247
1248     for(s=0; s<4; s++)
1249     {
1250         t1[s] = pbc_rvec_sub(pbc,x[ai[s]],x[aj[s]],bv[0+s].v);
1251         t2[s] = pbc_rvec_sub(pbc,x[ak[s]],x[aj[s]],bv[4+s].v);
1252         t3[s] = pbc_rvec_sub(pbc,x[ak[s]],x[al[s]],bv[8+s].v);
1253     }
1254
1255     load_rvec4(bv[0].v,bv[1].v,bv[2].v,bv[3].v,rijx_SSE,rijy_SSE,rijz_SSE);
1256     load_rvec4(bv[4].v,bv[5].v,bv[6].v,bv[7].v,rkjx_SSE,rkjy_SSE,rkjz_SSE);
1257     load_rvec4(bv[8].v,bv[9].v,bv[10].v,bv[11].v,rklx_SSE,rkly_SSE,rklz_SSE);
1258
1259     GMX_MM_CPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1260                     rkjx_SSE,rkjy_SSE,rkjz_SSE,
1261                     mx_SSE,my_SSE,mz_SSE);
1262
1263     GMX_MM_CPROD_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE,
1264                     rklx_SSE,rkly_SSE,rklz_SSE,
1265                     nx_SSE,ny_SSE,nz_SSE);
1266
1267     GMX_MM_CPROD_PS(mx_SSE,my_SSE,mz_SSE,
1268                     nx_SSE,ny_SSE,nz_SSE,
1269                     cx_SSE,cy_SSE,cz_SSE);
1270
1271     cn_SSE = gmx_mm_sqrt_ps(GMX_MM_NORM2_PS(cx_SSE,cy_SSE,cz_SSE));
1272
1273     s_SSE = GMX_MM_IPROD_PS(mx_SSE,my_SSE,mz_SSE,nx_SSE,ny_SSE,nz_SSE);
1274
1275     phi_SSE = gmx_mm_atan2_ps(cn_SSE,s_SSE);
1276     _mm_store_ps(buf+16,phi_SSE);
1277
1278     ipr_SSE = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1279                               nx_SSE,ny_SSE,nz_SSE);
1280
1281     signs = _mm_movemask_ps(ipr_SSE);
1282
1283     for(s=0; s<4; s++)
1284     {
1285         if (signs & (1<<s))
1286         {
1287             buf[16+s] = -buf[16+s];
1288         }
1289     }
1290
1291     iprm_SSE    = GMX_MM_NORM2_PS(mx_SSE,my_SSE,mz_SSE);
1292     iprn_SSE    = GMX_MM_NORM2_PS(nx_SSE,ny_SSE,nz_SSE);
1293
1294     /* store_rvec4 messes with the input, don't use it after this! */
1295     store_rvec4(mx_SSE,my_SSE,mz_SSE,bv[0].v,bv[1].v,bv[2].v,bv[3].v);
1296     store_rvec4(nx_SSE,ny_SSE,nz_SSE,bv[4].v,bv[5].v,bv[6].v,bv[7].v);
1297
1298     nrkj2_SSE   = GMX_MM_NORM2_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE);
1299
1300     /* Avoid division by zero. When zero, the result is multiplied by 0
1301      * anyhow, so the 3 max below do not affect the final result.
1302      */
1303     nrkj2_SSE   = _mm_max_ps(nrkj2_SSE,fmin_SSE);
1304     nrkj_1_SSE  = gmx_mm_invsqrt_ps(nrkj2_SSE);
1305     nrkj_2_SSE  = _mm_mul_ps(nrkj_1_SSE,nrkj_1_SSE);
1306     nrkj_SSE    = _mm_mul_ps(nrkj2_SSE,nrkj_1_SSE);
1307
1308     iprm_SSE    = _mm_max_ps(iprm_SSE,fmin_SSE);
1309     iprn_SSE    = _mm_max_ps(iprn_SSE,fmin_SSE);
1310     nrkj_m2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprm_SSE));
1311     nrkj_n2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprn_SSE));
1312
1313     _mm_store_ps(buf+0,nrkj_m2_SSE);
1314     _mm_store_ps(buf+4,nrkj_n2_SSE);
1315
1316     p_SSE       = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1317                                   rkjx_SSE,rkjy_SSE,rkjz_SSE);
1318     p_SSE       = _mm_mul_ps(p_SSE,nrkj_2_SSE);
1319
1320     q_SSE       = GMX_MM_IPROD_PS(rklx_SSE,rkly_SSE,rklz_SSE,
1321                                   rkjx_SSE,rkjy_SSE,rkjz_SSE);
1322     q_SSE       = _mm_mul_ps(q_SSE,nrkj_2_SSE);
1323
1324     _mm_store_ps(buf+8 ,p_SSE);
1325     _mm_store_ps(buf+12,q_SSE);
1326 }
1327
1328 #endif /* SSE_PROPER_DIHEDRALS */
1329
1330
1331 void do_dih_fup(int i,int j,int k,int l,real ddphi,
1332                 rvec r_ij,rvec r_kj,rvec r_kl,
1333                 rvec m,rvec n,rvec f[],rvec fshift[],
1334                 const t_pbc *pbc,const t_graph *g,
1335                 const rvec x[],int t1,int t2,int t3)
1336 {
1337   /* 143 FLOPS */
1338   rvec f_i,f_j,f_k,f_l;
1339   rvec uvec,vvec,svec,dx_jl;
1340   real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
1341   real a,b,p,q,toler;
1342   ivec jt,dt_ij,dt_kj,dt_lj;
1343
1344   iprm  = iprod(m,m);           /*  5   */
1345   iprn  = iprod(n,n);           /*  5   */
1346   nrkj2 = iprod(r_kj,r_kj);     /*  5   */
1347   toler = nrkj2*GMX_REAL_EPS;
1348   if ((iprm > toler) && (iprn > toler)) {
1349     nrkj_1 = gmx_invsqrt(nrkj2);        /* 10   */
1350     nrkj_2 = nrkj_1*nrkj_1;     /*  1   */
1351     nrkj  = nrkj2*nrkj_1;       /*  1   */
1352     a     = -ddphi*nrkj/iprm;   /* 11   */
1353     svmul(a,m,f_i);             /*  3   */
1354     b     = ddphi*nrkj/iprn;    /* 11   */
1355     svmul(b,n,f_l);             /*  3   */
1356     p     = iprod(r_ij,r_kj);   /*  5   */
1357     p    *= nrkj_2;             /*  1   */
1358     q     = iprod(r_kl,r_kj);   /*  5   */
1359     q    *= nrkj_2;             /*  1   */
1360     svmul(p,f_i,uvec);          /*  3   */
1361     svmul(q,f_l,vvec);          /*  3   */
1362     rvec_sub(uvec,vvec,svec);   /*  3   */
1363     rvec_sub(f_i,svec,f_j);     /*  3   */
1364     rvec_add(f_l,svec,f_k);     /*  3   */
1365     rvec_inc(f[i],f_i);         /*  3   */
1366     rvec_dec(f[j],f_j);         /*  3   */
1367     rvec_dec(f[k],f_k);         /*  3   */
1368     rvec_inc(f[l],f_l);         /*  3   */
1369
1370     if (g) {
1371       copy_ivec(SHIFT_IVEC(g,j),jt);
1372       ivec_sub(SHIFT_IVEC(g,i),jt,dt_ij);
1373       ivec_sub(SHIFT_IVEC(g,k),jt,dt_kj);
1374       ivec_sub(SHIFT_IVEC(g,l),jt,dt_lj);
1375       t1=IVEC2IS(dt_ij);
1376       t2=IVEC2IS(dt_kj);
1377       t3=IVEC2IS(dt_lj);
1378     } else if (pbc) {
1379       t3 = pbc_rvec_sub(pbc,x[l],x[j],dx_jl);
1380     } else {
1381       t3 = CENTRAL;
1382     }
1383
1384     rvec_inc(fshift[t1],f_i);
1385     rvec_dec(fshift[CENTRAL],f_j);
1386     rvec_dec(fshift[t2],f_k);
1387     rvec_inc(fshift[t3],f_l);
1388   }
1389   /* 112 TOTAL  */
1390 }
1391
1392 /* As do_dih_fup above, but without shift forces */
1393 static void
1394 do_dih_fup_noshiftf(int i,int j,int k,int l,real ddphi,
1395                     rvec r_ij,rvec r_kj,rvec r_kl,
1396                     rvec m,rvec n,rvec f[])
1397 {
1398   rvec f_i,f_j,f_k,f_l;
1399   rvec uvec,vvec,svec,dx_jl;
1400   real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
1401   real a,b,p,q,toler;
1402   ivec jt,dt_ij,dt_kj,dt_lj;
1403
1404   iprm  = iprod(m,m);           /*  5   */
1405   iprn  = iprod(n,n);           /*  5   */
1406   nrkj2 = iprod(r_kj,r_kj);     /*  5   */
1407   toler = nrkj2*GMX_REAL_EPS;
1408   if ((iprm > toler) && (iprn > toler)) {
1409     nrkj_1 = gmx_invsqrt(nrkj2);        /* 10   */
1410     nrkj_2 = nrkj_1*nrkj_1;     /*  1   */
1411     nrkj  = nrkj2*nrkj_1;       /*  1   */
1412     a     = -ddphi*nrkj/iprm;   /* 11   */
1413     svmul(a,m,f_i);             /*  3   */
1414     b     = ddphi*nrkj/iprn;    /* 11   */
1415     svmul(b,n,f_l);             /*  3   */
1416     p     = iprod(r_ij,r_kj);   /*  5   */
1417     p    *= nrkj_2;             /*  1   */
1418     q     = iprod(r_kl,r_kj);   /*  5   */
1419     q    *= nrkj_2;             /*  1   */
1420     svmul(p,f_i,uvec);          /*  3   */
1421     svmul(q,f_l,vvec);          /*  3   */
1422     rvec_sub(uvec,vvec,svec);   /*  3   */
1423     rvec_sub(f_i,svec,f_j);     /*  3   */
1424     rvec_add(f_l,svec,f_k);     /*  3   */
1425     rvec_inc(f[i],f_i);         /*  3   */
1426     rvec_dec(f[j],f_j);         /*  3   */
1427     rvec_dec(f[k],f_k);         /*  3   */
1428     rvec_inc(f[l],f_l);         /*  3   */
1429   }
1430 }
1431
1432 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1433 static void
1434 do_dih_fup_noshiftf_precalc(int i,int j,int k,int l,real ddphi,
1435                             real nrkj_m2,real nrkj_n2,
1436                             real p,real q,
1437                             rvec m,rvec n,rvec f[])
1438 {
1439     rvec f_i,f_j,f_k,f_l;
1440     rvec uvec,vvec,svec,dx_jl;
1441     real a,b,toler;
1442     ivec jt,dt_ij,dt_kj,dt_lj;
1443
1444     a = -ddphi*nrkj_m2;
1445     svmul(a,m,f_i);
1446     b =  ddphi*nrkj_n2;
1447     svmul(b,n,f_l);
1448     svmul(p,f_i,uvec);
1449     svmul(q,f_l,vvec);
1450     rvec_sub(uvec,vvec,svec);
1451     rvec_sub(f_i,svec,f_j);
1452     rvec_add(f_l,svec,f_k);
1453     rvec_inc(f[i],f_i);
1454     rvec_dec(f[j],f_j);
1455     rvec_dec(f[k],f_k);
1456     rvec_inc(f[l],f_l);
1457 }
1458
1459
1460 real dopdihs(real cpA,real cpB,real phiA,real phiB,int mult,
1461              real phi,real lambda,real *V,real *F)
1462 {
1463   real v,dvdlambda,mdphi,v1,sdphi,ddphi;
1464   real L1   = 1.0 - lambda;
1465   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1466   real dph0 = (phiB - phiA)*DEG2RAD;
1467   real cp   = L1*cpA + lambda*cpB;
1468
1469   mdphi =  mult*phi - ph0;
1470   sdphi = sin(mdphi);
1471   ddphi = -cp*mult*sdphi;
1472   v1    = 1.0 + cos(mdphi);
1473   v     = cp*v1;
1474
1475   dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1476
1477   *V = v;
1478   *F = ddphi;
1479
1480   return dvdlambda;
1481
1482   /* That was 40 flops */
1483 }
1484
1485 static void
1486 dopdihs_noener(real cpA,real cpB,real phiA,real phiB,int mult,
1487                real phi,real lambda,real *F)
1488 {
1489   real mdphi,sdphi,ddphi;
1490   real L1   = 1.0 - lambda;
1491   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1492   real cp   = L1*cpA + lambda*cpB;
1493
1494   mdphi = mult*phi - ph0;
1495   sdphi = sin(mdphi);
1496   ddphi = -cp*mult*sdphi;
1497
1498   *F = ddphi;
1499
1500   /* That was 20 flops */
1501 }
1502
1503 static void
1504 dopdihs_mdphi(real cpA,real cpB,real phiA,real phiB,int mult,
1505               real phi,real lambda,real *cp,real *mdphi)
1506 {
1507     real L1   = 1.0 - lambda;
1508     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1509
1510     *cp    = L1*cpA + lambda*cpB;
1511
1512     *mdphi = mult*phi - ph0;
1513 }
1514
1515 static real dopdihs_min(real cpA,real cpB,real phiA,real phiB,int mult,
1516                         real phi,real lambda,real *V,real *F)
1517      /* similar to dopdihs, except for a minus sign  *
1518       * and a different treatment of mult/phi0       */
1519 {
1520   real v,dvdlambda,mdphi,v1,sdphi,ddphi;
1521   real L1   = 1.0 - lambda;
1522   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1523   real dph0 = (phiB - phiA)*DEG2RAD;
1524   real cp   = L1*cpA + lambda*cpB;
1525
1526   mdphi = mult*(phi-ph0);
1527   sdphi = sin(mdphi);
1528   ddphi = cp*mult*sdphi;
1529   v1    = 1.0-cos(mdphi);
1530   v     = cp*v1;
1531
1532   dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1533
1534   *V = v;
1535   *F = ddphi;
1536
1537   return dvdlambda;
1538
1539   /* That was 40 flops */
1540 }
1541
1542 real pdihs(int nbonds,
1543            const t_iatom forceatoms[],const t_iparams forceparams[],
1544            const rvec x[],rvec f[],rvec fshift[],
1545            const t_pbc *pbc,const t_graph *g,
1546            real lambda,real *dvdlambda,
1547            const t_mdatoms *md,t_fcdata *fcd,
1548            int *global_atom_index)
1549 {
1550   int  i,type,ai,aj,ak,al;
1551   int  t1,t2,t3;
1552   rvec r_ij,r_kj,r_kl,m,n;
1553   real phi,sign,ddphi,vpd,vtot;
1554
1555   vtot = 0.0;
1556
1557   for(i=0; (i<nbonds); ) {
1558     type = forceatoms[i++];
1559     ai   = forceatoms[i++];
1560     aj   = forceatoms[i++];
1561     ak   = forceatoms[i++];
1562     al   = forceatoms[i++];
1563
1564     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1565                   &sign,&t1,&t2,&t3);                   /*  84          */
1566     *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1567                           forceparams[type].pdihs.cpB,
1568                           forceparams[type].pdihs.phiA,
1569                           forceparams[type].pdihs.phiB,
1570                           forceparams[type].pdihs.mult,
1571                           phi,lambda,&vpd,&ddphi);
1572
1573     vtot += vpd;
1574     do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
1575                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
1576
1577 #ifdef DEBUG
1578     fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
1579             ai,aj,ak,al,phi);
1580 #endif
1581   } /* 223 TOTAL        */
1582
1583   return vtot;
1584 }
1585
1586 void make_dp_periodic(real *dp)  /* 1 flop? */
1587 {
1588     /* dp cannot be outside (-pi,pi) */
1589     if (*dp >= M_PI)
1590     {
1591         *dp -= 2*M_PI;
1592     }
1593     else if (*dp < -M_PI)
1594     {
1595         *dp += 2*M_PI;
1596     }
1597     return;
1598 }
1599
1600 /* As pdihs above, but without calculating energies and shift forces */
1601 static void
1602 pdihs_noener(int nbonds,
1603              const t_iatom forceatoms[],const t_iparams forceparams[],
1604              const rvec x[],rvec f[],
1605              const t_pbc *pbc,const t_graph *g,
1606              real lambda,
1607              const t_mdatoms *md,t_fcdata *fcd,
1608              int *global_atom_index)
1609 {
1610     int  i,type,ai,aj,ak,al;
1611     int  t1,t2,t3;
1612     rvec r_ij,r_kj,r_kl,m,n;
1613     real phi,sign,ddphi_tot,ddphi;
1614
1615     for(i=0; (i<nbonds); )
1616     {
1617         ai   = forceatoms[i+1];
1618         aj   = forceatoms[i+2];
1619         ak   = forceatoms[i+3];
1620         al   = forceatoms[i+4];
1621
1622         phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1623                         &sign,&t1,&t2,&t3);
1624
1625         ddphi_tot = 0;
1626
1627         /* Loop over dihedrals working on the same atoms,
1628          * so we avoid recalculating angles and force distributions.
1629          */
1630         do
1631         {
1632             type = forceatoms[i];
1633             dopdihs_noener(forceparams[type].pdihs.cpA,
1634                            forceparams[type].pdihs.cpB,
1635                            forceparams[type].pdihs.phiA,
1636                            forceparams[type].pdihs.phiB,
1637                            forceparams[type].pdihs.mult,
1638                            phi,lambda,&ddphi);
1639             ddphi_tot += ddphi;
1640
1641             i += 5;
1642         }
1643         while(i < nbonds &&
1644               forceatoms[i+1] == ai &&
1645               forceatoms[i+2] == aj &&
1646               forceatoms[i+3] == ak &&
1647               forceatoms[i+4] == al);
1648
1649         do_dih_fup_noshiftf(ai,aj,ak,al,ddphi_tot,r_ij,r_kj,r_kl,m,n,f);
1650     }
1651 }
1652
1653
1654 #ifdef SSE_PROPER_DIHEDRALS
1655
1656 /* As pdihs_noner above, but using SSE to calculate 4 dihedrals at once */
1657 static void
1658 pdihs_noener_sse(int nbonds,
1659                  const t_iatom forceatoms[],const t_iparams forceparams[],
1660                  const rvec x[],rvec f[],
1661                  const t_pbc *pbc,const t_graph *g,
1662                  real lambda,
1663                  const t_mdatoms *md,t_fcdata *fcd,
1664                  int *global_atom_index)
1665 {
1666     int  i,i4,s;
1667     int  type,ai[4],aj[4],ak[4],al[4];
1668     int  t1[4],t2[4],t3[4];
1669     int  mult[4];
1670     real cp[4],mdphi[4];
1671     real ddphi;
1672     rvec_sse_t rs_array[13],*rs;
1673     real buf_array[24],*buf;
1674     __m128 mdphi_SSE,sin_SSE,cos_SSE;
1675
1676     /* Ensure 16-byte alignment */
1677     rs  = (rvec_sse_t *)(((size_t)(rs_array +1)) & (~((size_t)15)));
1678     buf =      (float *)(((size_t)(buf_array+3)) & (~((size_t)15)));
1679
1680     for(i=0; (i<nbonds); i+=20)
1681     {
1682         /* Collect atoms quadruplets for 4 dihedrals */
1683         i4 = i;
1684         for(s=0; s<4; s++)
1685         {
1686             ai[s] = forceatoms[i4+1];
1687             aj[s] = forceatoms[i4+2];
1688             ak[s] = forceatoms[i4+3];
1689             al[s] = forceatoms[i4+4];
1690             /* At the end fill the arrays with identical entries */
1691             if (i4 + 5 < nbonds)
1692             {
1693                 i4 += 5;
1694             }
1695         }
1696
1697         /* Caclulate 4 dihedral angles at once */
1698         dih_angle_sse(x,ai,aj,ak,al,pbc,t1,t2,t3,rs,buf);
1699
1700         i4 = i;
1701         for(s=0; s<4; s++)
1702         {
1703             if (i4 < nbonds)
1704             {
1705                 /* Calculate the coefficient and angle deviation */
1706                 type = forceatoms[i4];
1707                 dopdihs_mdphi(forceparams[type].pdihs.cpA,
1708                               forceparams[type].pdihs.cpB,
1709                               forceparams[type].pdihs.phiA,
1710                               forceparams[type].pdihs.phiB,
1711                               forceparams[type].pdihs.mult,
1712                               buf[16+s],lambda,&cp[s],&buf[16+s]);
1713                 mult[s] = forceparams[type].pdihs.mult;
1714             }
1715             else
1716             {
1717                 buf[16+s] = 0;
1718             }
1719             i4 += 5;
1720         }
1721
1722         /* Calculate 4 sines at once */
1723         mdphi_SSE = _mm_load_ps(buf+16);
1724         gmx_mm_sincos_ps(mdphi_SSE,&sin_SSE,&cos_SSE);
1725         _mm_store_ps(buf+16,sin_SSE);
1726
1727         i4 = i;
1728         s = 0;
1729         do
1730         {
1731             ddphi = -cp[s]*mult[s]*buf[16+s];
1732
1733             do_dih_fup_noshiftf_precalc(ai[s],aj[s],ak[s],al[s],ddphi,
1734                                         buf[ 0+s],buf[ 4+s],
1735                                         buf[ 8+s],buf[12+s],
1736                                         rs[0+s].v,rs[4+s].v,
1737                                         f);
1738             s++;
1739             i4 += 5;
1740         }
1741         while (s < 4 && i4 < nbonds);
1742     }
1743 }
1744
1745 #endif /* SSE_PROPER_DIHEDRALS */
1746
1747
1748 real idihs(int nbonds,
1749            const t_iatom forceatoms[],const t_iparams forceparams[],
1750            const rvec x[],rvec f[],rvec fshift[],
1751            const t_pbc *pbc,const t_graph *g,
1752            real lambda,real *dvdlambda,
1753            const t_mdatoms *md,t_fcdata *fcd,
1754            int *global_atom_index)
1755 {
1756   int  i,type,ai,aj,ak,al;
1757   int  t1,t2,t3;
1758   real phi,phi0,dphi0,ddphi,sign,vtot;
1759   rvec r_ij,r_kj,r_kl,m,n;
1760   real L1,kk,dp,dp2,kA,kB,pA,pB,dvdl_term;
1761
1762   L1 = 1.0-lambda;
1763   dvdl_term = 0;
1764   vtot = 0.0;
1765   for(i=0; (i<nbonds); ) {
1766     type = forceatoms[i++];
1767     ai   = forceatoms[i++];
1768     aj   = forceatoms[i++];
1769     ak   = forceatoms[i++];
1770     al   = forceatoms[i++];
1771
1772     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1773                   &sign,&t1,&t2,&t3);                   /*  84          */
1774
1775     /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
1776      * force changes if we just apply a normal harmonic.
1777      * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
1778      * This means we will never have the periodicity problem, unless
1779      * the dihedral is Pi away from phiO, which is very unlikely due to
1780      * the potential.
1781      */
1782     kA = forceparams[type].harmonic.krA;
1783     kB = forceparams[type].harmonic.krB;
1784     pA = forceparams[type].harmonic.rA;
1785     pB = forceparams[type].harmonic.rB;
1786
1787     kk    = L1*kA + lambda*kB;
1788     phi0  = (L1*pA + lambda*pB)*DEG2RAD;
1789     dphi0 = (pB - pA)*DEG2RAD;
1790
1791     dp = phi-phi0;
1792
1793     make_dp_periodic(&dp);
1794
1795     dp2 = dp*dp;
1796
1797     vtot += 0.5*kk*dp2;
1798     ddphi = -kk*dp;
1799
1800     dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
1801
1802     do_dih_fup(ai,aj,ak,al,(real)(-ddphi),r_ij,r_kj,r_kl,m,n,
1803                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
1804     /* 218 TOTAL        */
1805 #ifdef DEBUG
1806     if (debug)
1807       fprintf(debug,"idih: (%d,%d,%d,%d) phi=%g\n",
1808               ai,aj,ak,al,phi);
1809 #endif
1810   }
1811
1812   *dvdlambda += dvdl_term;
1813   return vtot;
1814 }
1815
1816
1817 real posres(int nbonds,
1818             const t_iatom forceatoms[],const t_iparams forceparams[],
1819             const rvec x[],rvec f[],rvec vir_diag,
1820             t_pbc *pbc,
1821             real lambda,real *dvdlambda,
1822             int refcoord_scaling,int ePBC,rvec comA,rvec comB)
1823 {
1824     int  i,ai,m,d,type,ki,npbcdim=0;
1825     const t_iparams *pr;
1826     real L1;
1827     real vtot,kk,fm;
1828     real posA,posB,ref=0;
1829     rvec comA_sc,comB_sc,rdist,dpdl,pos,dx;
1830     gmx_bool bForceValid = TRUE;
1831
1832     if ((f==NULL) || (vir_diag==NULL)) {  /* should both be null together! */
1833         bForceValid = FALSE;
1834     }
1835
1836     npbcdim = ePBC2npbcdim(ePBC);
1837
1838     if (refcoord_scaling == erscCOM)
1839     {
1840         clear_rvec(comA_sc);
1841         clear_rvec(comB_sc);
1842         for(m=0; m<npbcdim; m++)
1843         {
1844             for(d=m; d<npbcdim; d++)
1845             {
1846                 comA_sc[m] += comA[d]*pbc->box[d][m];
1847                 comB_sc[m] += comB[d]*pbc->box[d][m];
1848             }
1849         }
1850     }
1851
1852     L1 = 1.0 - lambda;
1853
1854     vtot = 0.0;
1855     for(i=0; (i<nbonds); )
1856     {
1857         type = forceatoms[i++];
1858         ai   = forceatoms[i++];
1859         pr   = &forceparams[type];
1860
1861         for(m=0; m<DIM; m++)
1862         {
1863             posA = forceparams[type].posres.pos0A[m];
1864             posB = forceparams[type].posres.pos0B[m];
1865             if (m < npbcdim)
1866             {
1867                 switch (refcoord_scaling)
1868                 {
1869                 case erscNO:
1870                     ref      = 0;
1871                     rdist[m] = L1*posA + lambda*posB;
1872                     dpdl[m]  = posB - posA;
1873                     break;
1874                 case erscALL:
1875                     /* Box relative coordinates are stored for dimensions with pbc */
1876                     posA *= pbc->box[m][m];
1877                     posB *= pbc->box[m][m];
1878                     for(d=m+1; d<npbcdim; d++)
1879                     {
1880                         posA += forceparams[type].posres.pos0A[d]*pbc->box[d][m];
1881                         posB += forceparams[type].posres.pos0B[d]*pbc->box[d][m];
1882                     }
1883                     ref      = L1*posA + lambda*posB;
1884                     rdist[m] = 0;
1885                     dpdl[m]  = posB - posA;
1886                     break;
1887                 case erscCOM:
1888                     ref      = L1*comA_sc[m] + lambda*comB_sc[m];
1889                     rdist[m] = L1*posA       + lambda*posB;
1890                     dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
1891                     break;
1892                 default:
1893                     gmx_fatal(FARGS, "No such scaling method implemented");
1894                 }
1895             }
1896             else
1897             {
1898                 ref      = L1*posA + lambda*posB;
1899                 rdist[m] = 0;
1900                 dpdl[m]  = posB - posA;
1901             }
1902
1903             /* We do pbc_dx with ref+rdist,
1904              * since with only ref we can be up to half a box vector wrong.
1905              */
1906             pos[m] = ref + rdist[m];
1907         }
1908
1909         if (pbc)
1910         {
1911             pbc_dx(pbc,x[ai],pos,dx);
1912         }
1913         else
1914         {
1915             rvec_sub(x[ai],pos,dx);
1916         }
1917
1918         for (m=0; (m<DIM); m++)
1919         {
1920             kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
1921             fm          = -kk*dx[m];
1922             vtot       += 0.5*kk*dx[m]*dx[m];
1923             *dvdlambda +=
1924                 0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
1925                 -fm*dpdl[m];
1926
1927             /* Here we correct for the pbc_dx which included rdist */
1928             if (bForceValid) {
1929                 f[ai][m]   += fm;
1930                 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
1931             }
1932         }
1933     }
1934
1935     return vtot;
1936 }
1937
1938 static real low_angres(int nbonds,
1939                        const t_iatom forceatoms[],const t_iparams forceparams[],
1940                        const rvec x[],rvec f[],rvec fshift[],
1941                        const t_pbc *pbc,const t_graph *g,
1942                        real lambda,real *dvdlambda,
1943                        gmx_bool bZAxis)
1944 {
1945   int  i,m,type,ai,aj,ak,al;
1946   int  t1,t2;
1947   real phi,cos_phi,cos_phi2,vid,vtot,dVdphi;
1948   rvec r_ij,r_kl,f_i,f_k={0,0,0};
1949   real st,sth,nrij2,nrkl2,c,cij,ckl;
1950
1951   ivec dt;
1952   t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
1953
1954   vtot = 0.0;
1955   ak=al=0; /* to avoid warnings */
1956   for(i=0; i<nbonds; ) {
1957     type = forceatoms[i++];
1958     ai   = forceatoms[i++];
1959     aj   = forceatoms[i++];
1960     t1   = pbc_rvec_sub(pbc,x[aj],x[ai],r_ij);                  /*  3           */
1961     if (!bZAxis) {
1962       ak   = forceatoms[i++];
1963       al   = forceatoms[i++];
1964       t2   = pbc_rvec_sub(pbc,x[al],x[ak],r_kl);           /*  3                */
1965     } else {
1966       r_kl[XX] = 0;
1967       r_kl[YY] = 0;
1968       r_kl[ZZ] = 1;
1969     }
1970
1971     cos_phi = cos_angle(r_ij,r_kl);             /* 25           */
1972     phi     = acos(cos_phi);                    /* 10           */
1973
1974     *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
1975                               forceparams[type].pdihs.cpB,
1976                               forceparams[type].pdihs.phiA,
1977                               forceparams[type].pdihs.phiB,
1978                               forceparams[type].pdihs.mult,
1979                               phi,lambda,&vid,&dVdphi); /*  40  */
1980
1981     vtot += vid;
1982
1983     cos_phi2 = sqr(cos_phi);                    /*   1          */
1984     if (cos_phi2 < 1) {
1985       st  = -dVdphi*gmx_invsqrt(1 - cos_phi2);      /*  12              */
1986       sth = st*cos_phi;                         /*   1          */
1987       nrij2 = iprod(r_ij,r_ij);                 /*   5          */
1988       nrkl2 = iprod(r_kl,r_kl);                 /*   5          */
1989
1990       c   = st*gmx_invsqrt(nrij2*nrkl2);                /*  11          */
1991       cij = sth/nrij2;                          /*  10          */
1992       ckl = sth/nrkl2;                          /*  10          */
1993
1994       for (m=0; m<DIM; m++) {                   /*  18+18       */
1995         f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
1996         f[ai][m] += f_i[m];
1997         f[aj][m] -= f_i[m];
1998         if (!bZAxis) {
1999           f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
2000           f[ak][m] += f_k[m];
2001           f[al][m] -= f_k[m];
2002         }
2003       }
2004
2005       if (g) {
2006         ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
2007         t1=IVEC2IS(dt);
2008       }
2009       rvec_inc(fshift[t1],f_i);
2010       rvec_dec(fshift[CENTRAL],f_i);
2011       if (!bZAxis) {
2012         if (g) {
2013           ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,al),dt);
2014           t2=IVEC2IS(dt);
2015         }
2016         rvec_inc(fshift[t2],f_k);
2017         rvec_dec(fshift[CENTRAL],f_k);
2018       }
2019     }
2020   }
2021
2022   return vtot;  /*  184 / 157 (bZAxis)  total  */
2023 }
2024
2025 real angres(int nbonds,
2026             const t_iatom forceatoms[],const t_iparams forceparams[],
2027             const rvec x[],rvec f[],rvec fshift[],
2028             const t_pbc *pbc,const t_graph *g,
2029             real lambda,real *dvdlambda,
2030             const t_mdatoms *md,t_fcdata *fcd,
2031             int *global_atom_index)
2032 {
2033   return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
2034                     lambda,dvdlambda,FALSE);
2035 }
2036
2037 real angresz(int nbonds,
2038              const t_iatom forceatoms[],const t_iparams forceparams[],
2039              const rvec x[],rvec f[],rvec fshift[],
2040              const t_pbc *pbc,const t_graph *g,
2041              real lambda,real *dvdlambda,
2042              const t_mdatoms *md,t_fcdata *fcd,
2043              int *global_atom_index)
2044 {
2045   return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
2046                     lambda,dvdlambda,TRUE);
2047 }
2048
2049 real dihres(int nbonds,
2050             const t_iatom forceatoms[],const t_iparams forceparams[],
2051             const rvec x[],rvec f[],rvec fshift[],
2052             const t_pbc *pbc,const t_graph *g,
2053             real lambda,real *dvdlambda,
2054             const t_mdatoms *md,t_fcdata *fcd,
2055             int *global_atom_index)
2056 {
2057     real vtot = 0;
2058     int  ai,aj,ak,al,i,k,type,t1,t2,t3;
2059     real phi0A,phi0B,dphiA,dphiB,kfacA,kfacB,phi0,dphi,kfac;
2060     real phi,ddphi,ddp,ddp2,dp,sign,d2r,fc,L1;
2061     rvec r_ij,r_kj,r_kl,m,n;
2062
2063     L1 = 1.0-lambda;
2064
2065     d2r = DEG2RAD;
2066     k   = 0;
2067
2068     for (i=0; (i<nbonds); )
2069     {
2070         type = forceatoms[i++];
2071         ai   = forceatoms[i++];
2072         aj   = forceatoms[i++];
2073         ak   = forceatoms[i++];
2074         al   = forceatoms[i++];
2075
2076         phi0A  = forceparams[type].dihres.phiA*d2r;
2077         dphiA  = forceparams[type].dihres.dphiA*d2r;
2078         kfacA  = forceparams[type].dihres.kfacA;
2079
2080         phi0B  = forceparams[type].dihres.phiB*d2r;
2081         dphiB  = forceparams[type].dihres.dphiB*d2r;
2082         kfacB  = forceparams[type].dihres.kfacB;
2083
2084         phi0  = L1*phi0A + lambda*phi0B;
2085         dphi  = L1*dphiA + lambda*dphiB;
2086         kfac = L1*kfacA + lambda*kfacB;
2087
2088         phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
2089                         &sign,&t1,&t2,&t3);
2090         /* 84 flops */
2091
2092         if (debug)
2093         {
2094             fprintf(debug,"dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2095                     k++,ai,aj,ak,al,phi0,dphi,kfac);
2096         }
2097         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2098          * force changes if we just apply a normal harmonic.
2099          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2100          * This means we will never have the periodicity problem, unless
2101          * the dihedral is Pi away from phiO, which is very unlikely due to
2102          * the potential.
2103          */
2104         dp = phi-phi0;
2105         make_dp_periodic(&dp);
2106
2107         if (dp > dphi)
2108         {
2109             ddp = dp-dphi;
2110         }
2111         else if (dp < -dphi)
2112         {
2113             ddp = dp+dphi;
2114         }
2115         else
2116         {
2117             ddp = 0;
2118         }
2119
2120         if (ddp != 0.0)
2121         {
2122             ddp2 = ddp*ddp;
2123             vtot += 0.5*kfac*ddp2;
2124             ddphi = kfac*ddp;
2125
2126             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2127             /* lambda dependence from changing restraint distances */
2128             if (ddp > 0)
2129             {
2130                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2131             }
2132             else if (ddp < 0)
2133             {
2134                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2135             }
2136             do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
2137                        f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
2138         }
2139     }
2140     return vtot;
2141 }
2142
2143
2144 real unimplemented(int nbonds,
2145                    const t_iatom forceatoms[],const t_iparams forceparams[],
2146                    const rvec x[],rvec f[],rvec fshift[],
2147                    const t_pbc *pbc,const t_graph *g,
2148                    real lambda,real *dvdlambda,
2149                    const t_mdatoms *md,t_fcdata *fcd,
2150                    int *global_atom_index)
2151 {
2152   gmx_impl("*** you are using a not implemented function");
2153
2154   return 0.0; /* To make the compiler happy */
2155 }
2156
2157 real rbdihs(int nbonds,
2158             const t_iatom forceatoms[],const t_iparams forceparams[],
2159             const rvec x[],rvec f[],rvec fshift[],
2160             const t_pbc *pbc,const t_graph *g,
2161             real lambda,real *dvdlambda,
2162             const t_mdatoms *md,t_fcdata *fcd,
2163             int *global_atom_index)
2164 {
2165   const real c0=0.0,c1=1.0,c2=2.0,c3=3.0,c4=4.0,c5=5.0;
2166   int  type,ai,aj,ak,al,i,j;
2167   int  t1,t2,t3;
2168   rvec r_ij,r_kj,r_kl,m,n;
2169   real parmA[NR_RBDIHS];
2170   real parmB[NR_RBDIHS];
2171   real parm[NR_RBDIHS];
2172   real cos_phi,phi,rbp,rbpBA;
2173   real v,sign,ddphi,sin_phi;
2174   real cosfac,vtot;
2175   real L1   = 1.0-lambda;
2176   real dvdl_term=0;
2177
2178   vtot = 0.0;
2179   for(i=0; (i<nbonds); ) {
2180     type = forceatoms[i++];
2181     ai   = forceatoms[i++];
2182     aj   = forceatoms[i++];
2183     ak   = forceatoms[i++];
2184     al   = forceatoms[i++];
2185
2186       phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
2187                     &sign,&t1,&t2,&t3);                 /*  84          */
2188
2189     /* Change to polymer convention */
2190     if (phi < c0)
2191       phi += M_PI;
2192     else
2193       phi -= M_PI;                      /*   1          */
2194
2195     cos_phi = cos(phi);
2196     /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2197     sin_phi = sin(phi);
2198
2199     for(j=0; (j<NR_RBDIHS); j++) {
2200       parmA[j] = forceparams[type].rbdihs.rbcA[j];
2201       parmB[j] = forceparams[type].rbdihs.rbcB[j];
2202       parm[j]  = L1*parmA[j]+lambda*parmB[j];
2203     }
2204     /* Calculate cosine powers */
2205     /* Calculate the energy */
2206     /* Calculate the derivative */
2207
2208     v       = parm[0];
2209     dvdl_term   += (parmB[0]-parmA[0]);
2210     ddphi   = c0;
2211     cosfac  = c1;
2212
2213     rbp     = parm[1];
2214     rbpBA   = parmB[1]-parmA[1];
2215     ddphi  += rbp*cosfac;
2216     cosfac *= cos_phi;
2217     v      += cosfac*rbp;
2218     dvdl_term   += cosfac*rbpBA;
2219     rbp     = parm[2];
2220     rbpBA   = parmB[2]-parmA[2];
2221     ddphi  += c2*rbp*cosfac;
2222     cosfac *= cos_phi;
2223     v      += cosfac*rbp;
2224     dvdl_term   += cosfac*rbpBA;
2225     rbp     = parm[3];
2226     rbpBA   = parmB[3]-parmA[3];
2227     ddphi  += c3*rbp*cosfac;
2228     cosfac *= cos_phi;
2229     v      += cosfac*rbp;
2230     dvdl_term   += cosfac*rbpBA;
2231     rbp     = parm[4];
2232     rbpBA   = parmB[4]-parmA[4];
2233     ddphi  += c4*rbp*cosfac;
2234     cosfac *= cos_phi;
2235     v      += cosfac*rbp;
2236     dvdl_term   += cosfac*rbpBA;
2237     rbp     = parm[5];
2238     rbpBA   = parmB[5]-parmA[5];
2239     ddphi  += c5*rbp*cosfac;
2240     cosfac *= cos_phi;
2241     v      += cosfac*rbp;
2242     dvdl_term   += cosfac*rbpBA;
2243
2244     ddphi = -ddphi*sin_phi;                             /*  11          */
2245
2246     do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
2247                f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
2248     vtot += v;
2249   }
2250   *dvdlambda += dvdl_term;
2251
2252   return vtot;
2253 }
2254
2255 int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2256 {
2257         int im1, ip1, ip2;
2258
2259         if(ip<0)
2260         {
2261                 ip = ip + grid_spacing - 1;
2262         }
2263         else if(ip > grid_spacing)
2264         {
2265                 ip = ip - grid_spacing - 1;
2266         }
2267
2268         im1 = ip - 1;
2269         ip1 = ip + 1;
2270         ip2 = ip + 2;
2271
2272         if(ip == 0)
2273         {
2274                 im1 = grid_spacing - 1;
2275         }
2276         else if(ip == grid_spacing-2)
2277         {
2278                 ip2 = 0;
2279         }
2280         else if(ip == grid_spacing-1)
2281         {
2282                 ip1 = 0;
2283                 ip2 = 1;
2284         }
2285
2286         *ipm1 = im1;
2287         *ipp1 = ip1;
2288         *ipp2 = ip2;
2289
2290         return ip;
2291
2292 }
2293
2294 real cmap_dihs(int nbonds,
2295                            const t_iatom forceatoms[],const t_iparams forceparams[],
2296                const gmx_cmap_t *cmap_grid,
2297                            const rvec x[],rvec f[],rvec fshift[],
2298                            const t_pbc *pbc,const t_graph *g,
2299                            real lambda,real *dvdlambda,
2300                            const t_mdatoms *md,t_fcdata *fcd,
2301                            int *global_atom_index)
2302 {
2303         int i,j,k,n,idx;
2304         int ai,aj,ak,al,am;
2305         int a1i,a1j,a1k,a1l,a2i,a2j,a2k,a2l;
2306         int type,cmapA;
2307         int t11,t21,t31,t12,t22,t32;
2308         int iphi1,ip1m1,ip1p1,ip1p2;
2309         int iphi2,ip2m1,ip2p1,ip2p2;
2310         int l1,l2,l3,l4;
2311         int pos1,pos2,pos3,pos4,tmp;
2312
2313         real ty[4],ty1[4],ty2[4],ty12[4],tc[16],tx[16];
2314         real phi1,psi1,cos_phi1,sin_phi1,sign1,xphi1;
2315         real phi2,psi2,cos_phi2,sin_phi2,sign2,xphi2;
2316         real dx,xx,tt,tu,e,df1,df2,ddf1,ddf2,ddf12,vtot;
2317         real ra21,rb21,rg21,rg1,rgr1,ra2r1,rb2r1,rabr1;
2318         real ra22,rb22,rg22,rg2,rgr2,ra2r2,rb2r2,rabr2;
2319         real fg1,hg1,fga1,hgb1,gaa1,gbb1;
2320         real fg2,hg2,fga2,hgb2,gaa2,gbb2;
2321         real fac;
2322
2323         rvec r1_ij, r1_kj, r1_kl,m1,n1;
2324         rvec r2_ij, r2_kj, r2_kl,m2,n2;
2325         rvec f1_i,f1_j,f1_k,f1_l;
2326         rvec f2_i,f2_j,f2_k,f2_l;
2327         rvec a1,b1,a2,b2;
2328         rvec f1,g1,h1,f2,g2,h2;
2329         rvec dtf1,dtg1,dth1,dtf2,dtg2,dth2;
2330         ivec jt1,dt1_ij,dt1_kj,dt1_lj;
2331         ivec jt2,dt2_ij,dt2_kj,dt2_lj;
2332
2333     const real *cmapd;
2334
2335         int loop_index[4][4] = {
2336                 {0,4,8,12},
2337                 {1,5,9,13},
2338                 {2,6,10,14},
2339                 {3,7,11,15}
2340         };
2341
2342         /* Total CMAP energy */
2343         vtot = 0;
2344
2345         for(n=0;n<nbonds; )
2346         {
2347                 /* Five atoms are involved in the two torsions */
2348                 type   = forceatoms[n++];
2349                 ai     = forceatoms[n++];
2350                 aj     = forceatoms[n++];
2351                 ak     = forceatoms[n++];
2352                 al     = forceatoms[n++];
2353                 am     = forceatoms[n++];
2354
2355                 /* Which CMAP type is this */
2356                 cmapA = forceparams[type].cmap.cmapA;
2357         cmapd = cmap_grid->cmapdata[cmapA].cmap;
2358
2359                 /* First torsion */
2360                 a1i   = ai;
2361                 a1j   = aj;
2362                 a1k   = ak;
2363                 a1l   = al;
2364
2365                 phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2366                                                    &sign1, &t11, &t21, &t31); /* 84 */
2367
2368         cos_phi1 = cos(phi1);
2369
2370                 a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2371                 a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2372                 a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2373
2374                 b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2375                 b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2376                 b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2377
2378                 tmp = pbc_rvec_sub(pbc,x[a1l],x[a1k],h1);
2379
2380                 ra21  = iprod(a1,a1);       /* 5 */
2381                 rb21  = iprod(b1,b1);       /* 5 */
2382                 rg21  = iprod(r1_kj,r1_kj); /* 5 */
2383                 rg1   = sqrt(rg21);
2384
2385                 rgr1  = 1.0/rg1;
2386                 ra2r1 = 1.0/ra21;
2387                 rb2r1 = 1.0/rb21;
2388                 rabr1 = sqrt(ra2r1*rb2r1);
2389
2390                 sin_phi1 = rg1 * rabr1 * iprod(a1,h1) * (-1);
2391
2392                 if(cos_phi1 < -0.5 || cos_phi1 > 0.5)
2393                 {
2394                         phi1 = asin(sin_phi1);
2395
2396                         if(cos_phi1 < 0)
2397                         {
2398                                 if(phi1 > 0)
2399                                 {
2400                                         phi1 = M_PI - phi1;
2401                                 }
2402                                 else
2403                                 {
2404                                         phi1 = -M_PI - phi1;
2405                                 }
2406                         }
2407                 }
2408                 else
2409                 {
2410                         phi1 = acos(cos_phi1);
2411
2412                         if(sin_phi1 < 0)
2413                         {
2414                                 phi1 = -phi1;
2415                         }
2416                 }
2417
2418                 xphi1 = phi1 + M_PI; /* 1 */
2419
2420                 /* Second torsion */
2421                 a2i   = aj;
2422                 a2j   = ak;
2423                 a2k   = al;
2424                 a2l   = am;
2425
2426                 phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2427                                                   &sign2, &t12, &t22, &t32); /* 84 */
2428
2429         cos_phi2 = cos(phi2);
2430
2431                 a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2432                 a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2433                 a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2434
2435                 b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2436                 b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2437                 b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2438
2439                 tmp = pbc_rvec_sub(pbc,x[a2l],x[a2k],h2);
2440
2441                 ra22  = iprod(a2,a2);         /* 5 */
2442                 rb22  = iprod(b2,b2);         /* 5 */
2443                 rg22  = iprod(r2_kj,r2_kj);   /* 5 */
2444                 rg2   = sqrt(rg22);
2445
2446                 rgr2  = 1.0/rg2;
2447                 ra2r2 = 1.0/ra22;
2448                 rb2r2 = 1.0/rb22;
2449                 rabr2 = sqrt(ra2r2*rb2r2);
2450
2451                 sin_phi2 = rg2 * rabr2 * iprod(a2,h2) * (-1);
2452
2453                 if(cos_phi2 < -0.5 || cos_phi2 > 0.5)
2454                 {
2455                         phi2 = asin(sin_phi2);
2456
2457                         if(cos_phi2 < 0)
2458                         {
2459                                 if(phi2 > 0)
2460                                 {
2461                                         phi2 = M_PI - phi2;
2462                                 }
2463                                 else
2464                                 {
2465                                         phi2 = -M_PI - phi2;
2466                                 }
2467                         }
2468                 }
2469                 else
2470                 {
2471                         phi2 = acos(cos_phi2);
2472
2473                         if(sin_phi2 < 0)
2474                         {
2475                                 phi2 = -phi2;
2476                         }
2477                 }
2478
2479                 xphi2 = phi2 + M_PI; /* 1 */
2480
2481                 /* Range mangling */
2482                 if(xphi1<0)
2483                 {
2484                         xphi1 = xphi1 + 2*M_PI;
2485                 }
2486                 else if(xphi1>=2*M_PI)
2487                 {
2488                         xphi1 = xphi1 - 2*M_PI;
2489                 }
2490
2491                 if(xphi2<0)
2492                 {
2493                         xphi2 = xphi2 + 2*M_PI;
2494                 }
2495                 else if(xphi2>=2*M_PI)
2496                 {
2497                         xphi2 = xphi2 - 2*M_PI;
2498                 }
2499
2500                 /* Number of grid points */
2501                 dx = 2*M_PI / cmap_grid->grid_spacing;
2502
2503                 /* Where on the grid are we */
2504                 iphi1 = (int)(xphi1/dx);
2505                 iphi2 = (int)(xphi2/dx);
2506
2507                 iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1,&ip1p1,&ip1p2);
2508                 iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1,&ip2p1,&ip2p2);
2509
2510                 pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
2511                 pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
2512                 pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
2513                 pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
2514
2515                 ty[0]   = cmapd[pos1*4];
2516                 ty[1]   = cmapd[pos2*4];
2517                 ty[2]   = cmapd[pos3*4];
2518                 ty[3]   = cmapd[pos4*4];
2519
2520                 ty1[0]   = cmapd[pos1*4+1];
2521                 ty1[1]   = cmapd[pos2*4+1];
2522                 ty1[2]   = cmapd[pos3*4+1];
2523                 ty1[3]   = cmapd[pos4*4+1];
2524
2525                 ty2[0]   = cmapd[pos1*4+2];
2526                 ty2[1]   = cmapd[pos2*4+2];
2527                 ty2[2]   = cmapd[pos3*4+2];
2528                 ty2[3]   = cmapd[pos4*4+2];
2529
2530                 ty12[0]   = cmapd[pos1*4+3];
2531                 ty12[1]   = cmapd[pos2*4+3];
2532                 ty12[2]   = cmapd[pos3*4+3];
2533                 ty12[3]   = cmapd[pos4*4+3];
2534
2535                 /* Switch to degrees */
2536                 dx = 360.0 / cmap_grid->grid_spacing;
2537                 xphi1 = xphi1 * RAD2DEG;
2538                 xphi2 = xphi2 * RAD2DEG;
2539
2540                 for(i=0;i<4;i++) /* 16 */
2541                 {
2542                         tx[i] = ty[i];
2543                         tx[i+4] = ty1[i]*dx;
2544                         tx[i+8] = ty2[i]*dx;
2545                         tx[i+12] = ty12[i]*dx*dx;
2546                 }
2547
2548                 idx=0;
2549                 for(i=0;i<4;i++) /* 1056 */
2550                 {
2551                         for(j=0;j<4;j++)
2552                         {
2553                                 xx = 0;
2554                                 for(k=0;k<16;k++)
2555                                 {
2556                                         xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
2557                                 }
2558
2559                                 idx++;
2560                                 tc[i*4+j]=xx;
2561                         }
2562                 }
2563
2564                 tt    = (xphi1-iphi1*dx)/dx;
2565                 tu    = (xphi2-iphi2*dx)/dx;
2566
2567                 e     = 0;
2568                 df1   = 0;
2569                 df2   = 0;
2570                 ddf1  = 0;
2571                 ddf2  = 0;
2572                 ddf12 = 0;
2573
2574                 for(i=3;i>=0;i--)
2575                 {
2576                         l1 = loop_index[i][3];
2577                         l2 = loop_index[i][2];
2578                         l3 = loop_index[i][1];
2579
2580                         e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
2581                         df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
2582                         df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
2583                         ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
2584                         ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
2585                 }
2586
2587                 ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
2588                 3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
2589
2590                 fac     = RAD2DEG/dx;
2591                 df1     = df1   * fac;
2592                 df2     = df2   * fac;
2593                 ddf1    = ddf1  * fac * fac;
2594                 ddf2    = ddf2  * fac * fac;
2595                 ddf12   = ddf12 * fac * fac;
2596
2597                 /* CMAP energy */
2598                 vtot += e;
2599
2600                 /* Do forces - first torsion */
2601                 fg1       = iprod(r1_ij,r1_kj);
2602                 hg1       = iprod(r1_kl,r1_kj);
2603                 fga1      = fg1*ra2r1*rgr1;
2604                 hgb1      = hg1*rb2r1*rgr1;
2605                 gaa1      = -ra2r1*rg1;
2606                 gbb1      = rb2r1*rg1;
2607
2608                 for(i=0;i<DIM;i++)
2609                 {
2610                         dtf1[i]   = gaa1 * a1[i];
2611                         dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
2612                         dth1[i]   = gbb1 * b1[i];
2613
2614                         f1[i]     = df1  * dtf1[i];
2615                         g1[i]     = df1  * dtg1[i];
2616                         h1[i]     = df1  * dth1[i];
2617
2618                         f1_i[i]   =  f1[i];
2619                         f1_j[i]   = -f1[i] - g1[i];
2620                         f1_k[i]   =  h1[i] + g1[i];
2621                         f1_l[i]   = -h1[i];
2622
2623                         f[a1i][i] = f[a1i][i] + f1_i[i];
2624                         f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */
2625                         f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */
2626                         f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */
2627                 }
2628
2629                 /* Do forces - second torsion */
2630                 fg2       = iprod(r2_ij,r2_kj);
2631                 hg2       = iprod(r2_kl,r2_kj);
2632                 fga2      = fg2*ra2r2*rgr2;
2633                 hgb2      = hg2*rb2r2*rgr2;
2634                 gaa2      = -ra2r2*rg2;
2635                 gbb2      = rb2r2*rg2;
2636
2637                 for(i=0;i<DIM;i++)
2638                 {
2639                         dtf2[i]   = gaa2 * a2[i];
2640                         dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
2641                         dth2[i]   = gbb2 * b2[i];
2642
2643                         f2[i]     = df2  * dtf2[i];
2644                         g2[i]     = df2  * dtg2[i];
2645                         h2[i]     = df2  * dth2[i];
2646
2647                         f2_i[i]   =  f2[i];
2648                         f2_j[i]   = -f2[i] - g2[i];
2649                         f2_k[i]   =  h2[i] + g2[i];
2650                         f2_l[i]   = -h2[i];
2651
2652                         f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */
2653                         f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */
2654                         f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */
2655                         f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */
2656                 }
2657
2658                 /* Shift forces */
2659                 if(g)
2660                 {
2661                         copy_ivec(SHIFT_IVEC(g,a1j), jt1);
2662                         ivec_sub(SHIFT_IVEC(g,a1i),  jt1,dt1_ij);
2663                         ivec_sub(SHIFT_IVEC(g,a1k),  jt1,dt1_kj);
2664                         ivec_sub(SHIFT_IVEC(g,a1l),  jt1,dt1_lj);
2665                         t11 = IVEC2IS(dt1_ij);
2666                         t21 = IVEC2IS(dt1_kj);
2667                         t31 = IVEC2IS(dt1_lj);
2668
2669                         copy_ivec(SHIFT_IVEC(g,a2j), jt2);
2670                         ivec_sub(SHIFT_IVEC(g,a2i),  jt2,dt2_ij);
2671                         ivec_sub(SHIFT_IVEC(g,a2k),  jt2,dt2_kj);
2672                         ivec_sub(SHIFT_IVEC(g,a2l),  jt2,dt2_lj);
2673                         t12 = IVEC2IS(dt2_ij);
2674                         t22 = IVEC2IS(dt2_kj);
2675                         t32 = IVEC2IS(dt2_lj);
2676                 }
2677                 else if(pbc)
2678                 {
2679                         t31 = pbc_rvec_sub(pbc,x[a1l],x[a1j],h1);
2680                         t32 = pbc_rvec_sub(pbc,x[a2l],x[a2j],h2);
2681                 }
2682                 else
2683                 {
2684                         t31 = CENTRAL;
2685                         t32 = CENTRAL;
2686                 }
2687
2688                 rvec_inc(fshift[t11],f1_i);
2689                 rvec_inc(fshift[CENTRAL],f1_j);
2690                 rvec_inc(fshift[t21],f1_k);
2691                 rvec_inc(fshift[t31],f1_l);
2692
2693                 rvec_inc(fshift[t21],f2_i);
2694                 rvec_inc(fshift[CENTRAL],f2_j);
2695                 rvec_inc(fshift[t22],f2_k);
2696                 rvec_inc(fshift[t32],f2_l);
2697         }
2698         return vtot;
2699 }
2700
2701
2702
2703 /***********************************************************
2704  *
2705  *   G R O M O S  9 6   F U N C T I O N S
2706  *
2707  ***********************************************************/
2708 real g96harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
2709                  real *V,real *F)
2710 {
2711   const real half=0.5;
2712   real  L1,kk,x0,dx,dx2;
2713   real  v,f,dvdlambda;
2714
2715   L1    = 1.0-lambda;
2716   kk    = L1*kA+lambda*kB;
2717   x0    = L1*xA+lambda*xB;
2718
2719   dx    = x-x0;
2720   dx2   = dx*dx;
2721
2722   f     = -kk*dx;
2723   v     = half*kk*dx2;
2724   dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
2725
2726   *F    = f;
2727   *V    = v;
2728
2729   return dvdlambda;
2730
2731   /* That was 21 flops */
2732 }
2733
2734 real g96bonds(int nbonds,
2735               const t_iatom forceatoms[],const t_iparams forceparams[],
2736               const rvec x[],rvec f[],rvec fshift[],
2737               const t_pbc *pbc,const t_graph *g,
2738               real lambda,real *dvdlambda,
2739               const t_mdatoms *md,t_fcdata *fcd,
2740               int *global_atom_index)
2741 {
2742   int  i,m,ki,ai,aj,type;
2743   real dr2,fbond,vbond,fij,vtot;
2744   rvec dx;
2745   ivec dt;
2746
2747   vtot = 0.0;
2748   for(i=0; (i<nbonds); ) {
2749     type = forceatoms[i++];
2750     ai   = forceatoms[i++];
2751     aj   = forceatoms[i++];
2752
2753     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
2754     dr2  = iprod(dx,dx);                                /*   5          */
2755
2756     *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
2757                               forceparams[type].harmonic.krB,
2758                               forceparams[type].harmonic.rA,
2759                               forceparams[type].harmonic.rB,
2760                               dr2,lambda,&vbond,&fbond);
2761
2762     vtot  += 0.5*vbond;                             /* 1*/
2763 #ifdef DEBUG
2764     if (debug)
2765       fprintf(debug,"G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
2766               sqrt(dr2),vbond,fbond);
2767 #endif
2768
2769     if (g) {
2770       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
2771       ki=IVEC2IS(dt);
2772     }
2773     for (m=0; (m<DIM); m++) {                   /*  15          */
2774       fij=fbond*dx[m];
2775       f[ai][m]+=fij;
2776       f[aj][m]-=fij;
2777       fshift[ki][m]+=fij;
2778       fshift[CENTRAL][m]-=fij;
2779     }
2780   }                                     /* 44 TOTAL     */
2781   return vtot;
2782 }
2783
2784 real g96bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
2785                    rvec r_ij,rvec r_kj,
2786                    int *t1,int *t2)
2787 /* Return value is the angle between the bonds i-j and j-k */
2788 {
2789   real costh;
2790
2791   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
2792   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
2793
2794   costh=cos_angle(r_ij,r_kj);           /* 25           */
2795                                         /* 41 TOTAL     */
2796   return costh;
2797 }
2798
2799 real g96angles(int nbonds,
2800                const t_iatom forceatoms[],const t_iparams forceparams[],
2801                const rvec x[],rvec f[],rvec fshift[],
2802                const t_pbc *pbc,const t_graph *g,
2803                real lambda,real *dvdlambda,
2804                const t_mdatoms *md,t_fcdata *fcd,
2805                int *global_atom_index)
2806 {
2807   int  i,ai,aj,ak,type,m,t1,t2;
2808   rvec r_ij,r_kj;
2809   real cos_theta,dVdt,va,vtot;
2810   real rij_1,rij_2,rkj_1,rkj_2,rijrkj_1;
2811   rvec f_i,f_j,f_k;
2812   ivec jt,dt_ij,dt_kj;
2813
2814   vtot = 0.0;
2815   for(i=0; (i<nbonds); ) {
2816     type = forceatoms[i++];
2817     ai   = forceatoms[i++];
2818     aj   = forceatoms[i++];
2819     ak   = forceatoms[i++];
2820
2821     cos_theta  = g96bond_angle(x[ai],x[aj],x[ak],pbc,r_ij,r_kj,&t1,&t2);
2822
2823     *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
2824                               forceparams[type].harmonic.krB,
2825                               forceparams[type].harmonic.rA,
2826                               forceparams[type].harmonic.rB,
2827                               cos_theta,lambda,&va,&dVdt);
2828     vtot    += va;
2829
2830     rij_1    = gmx_invsqrt(iprod(r_ij,r_ij));
2831     rkj_1    = gmx_invsqrt(iprod(r_kj,r_kj));
2832     rij_2    = rij_1*rij_1;
2833     rkj_2    = rkj_1*rkj_1;
2834     rijrkj_1 = rij_1*rkj_1;                     /* 23 */
2835
2836 #ifdef DEBUG
2837     if (debug)
2838       fprintf(debug,"G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
2839               cos_theta,va,dVdt);
2840 #endif
2841     for (m=0; (m<DIM); m++) {                   /*  42  */
2842       f_i[m]=dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
2843       f_k[m]=dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
2844       f_j[m]=-f_i[m]-f_k[m];
2845       f[ai][m]+=f_i[m];
2846       f[aj][m]+=f_j[m];
2847       f[ak][m]+=f_k[m];
2848     }
2849
2850     if (g) {
2851       copy_ivec(SHIFT_IVEC(g,aj),jt);
2852
2853       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
2854       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
2855       t1=IVEC2IS(dt_ij);
2856       t2=IVEC2IS(dt_kj);
2857     }
2858     rvec_inc(fshift[t1],f_i);
2859     rvec_inc(fshift[CENTRAL],f_j);
2860     rvec_inc(fshift[t2],f_k);               /* 9 */
2861     /* 163 TOTAL        */
2862   }
2863   return vtot;
2864 }
2865
2866 real cross_bond_bond(int nbonds,
2867                      const t_iatom forceatoms[],const t_iparams forceparams[],
2868                      const rvec x[],rvec f[],rvec fshift[],
2869                      const t_pbc *pbc,const t_graph *g,
2870                      real lambda,real *dvdlambda,
2871                      const t_mdatoms *md,t_fcdata *fcd,
2872                      int *global_atom_index)
2873 {
2874   /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
2875    * pp. 842-847
2876    */
2877   int  i,ai,aj,ak,type,m,t1,t2;
2878   rvec r_ij,r_kj;
2879   real vtot,vrr,s1,s2,r1,r2,r1e,r2e,krr;
2880   rvec f_i,f_j,f_k;
2881   ivec jt,dt_ij,dt_kj;
2882
2883   vtot = 0.0;
2884   for(i=0; (i<nbonds); ) {
2885     type = forceatoms[i++];
2886     ai   = forceatoms[i++];
2887     aj   = forceatoms[i++];
2888     ak   = forceatoms[i++];
2889     r1e  = forceparams[type].cross_bb.r1e;
2890     r2e  = forceparams[type].cross_bb.r2e;
2891     krr  = forceparams[type].cross_bb.krr;
2892
2893     /* Compute distance vectors ... */
2894     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
2895     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
2896
2897     /* ... and their lengths */
2898     r1 = norm(r_ij);
2899     r2 = norm(r_kj);
2900
2901     /* Deviations from ideality */
2902     s1 = r1-r1e;
2903     s2 = r2-r2e;
2904
2905     /* Energy (can be negative!) */
2906     vrr   = krr*s1*s2;
2907     vtot += vrr;
2908
2909     /* Forces */
2910     svmul(-krr*s2/r1,r_ij,f_i);
2911     svmul(-krr*s1/r2,r_kj,f_k);
2912
2913     for (m=0; (m<DIM); m++) {                   /*  12  */
2914       f_j[m]    = -f_i[m] - f_k[m];
2915       f[ai][m] += f_i[m];
2916       f[aj][m] += f_j[m];
2917       f[ak][m] += f_k[m];
2918     }
2919
2920     /* Virial stuff */
2921     if (g) {
2922       copy_ivec(SHIFT_IVEC(g,aj),jt);
2923
2924       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
2925       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
2926       t1=IVEC2IS(dt_ij);
2927       t2=IVEC2IS(dt_kj);
2928     }
2929     rvec_inc(fshift[t1],f_i);
2930     rvec_inc(fshift[CENTRAL],f_j);
2931     rvec_inc(fshift[t2],f_k);               /* 9 */
2932     /* 163 TOTAL        */
2933   }
2934   return vtot;
2935 }
2936
2937 real cross_bond_angle(int nbonds,
2938                       const t_iatom forceatoms[],const t_iparams forceparams[],
2939                       const rvec x[],rvec f[],rvec fshift[],
2940                       const t_pbc *pbc,const t_graph *g,
2941                       real lambda,real *dvdlambda,
2942                       const t_mdatoms *md,t_fcdata *fcd,
2943                       int *global_atom_index)
2944 {
2945   /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
2946    * pp. 842-847
2947    */
2948   int  i,ai,aj,ak,type,m,t1,t2,t3;
2949   rvec r_ij,r_kj,r_ik;
2950   real vtot,vrt,s1,s2,s3,r1,r2,r3,r1e,r2e,r3e,krt,k1,k2,k3;
2951   rvec f_i,f_j,f_k;
2952   ivec jt,dt_ij,dt_kj;
2953
2954   vtot = 0.0;
2955   for(i=0; (i<nbonds); ) {
2956     type = forceatoms[i++];
2957     ai   = forceatoms[i++];
2958     aj   = forceatoms[i++];
2959     ak   = forceatoms[i++];
2960     r1e  = forceparams[type].cross_ba.r1e;
2961     r2e  = forceparams[type].cross_ba.r2e;
2962     r3e  = forceparams[type].cross_ba.r3e;
2963     krt  = forceparams[type].cross_ba.krt;
2964
2965     /* Compute distance vectors ... */
2966     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
2967     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
2968     t3 = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);
2969
2970     /* ... and their lengths */
2971     r1 = norm(r_ij);
2972     r2 = norm(r_kj);
2973     r3 = norm(r_ik);
2974
2975     /* Deviations from ideality */
2976     s1 = r1-r1e;
2977     s2 = r2-r2e;
2978     s3 = r3-r3e;
2979
2980     /* Energy (can be negative!) */
2981     vrt   = krt*s3*(s1+s2);
2982     vtot += vrt;
2983
2984     /* Forces */
2985     k1 = -krt*(s3/r1);
2986     k2 = -krt*(s3/r2);
2987     k3 = -krt*(s1+s2)/r3;
2988     for(m=0; (m<DIM); m++) {
2989       f_i[m] = k1*r_ij[m] + k3*r_ik[m];
2990       f_k[m] = k2*r_kj[m] - k3*r_ik[m];
2991       f_j[m] = -f_i[m] - f_k[m];
2992     }
2993
2994     for (m=0; (m<DIM); m++) {                   /*  12  */
2995       f[ai][m] += f_i[m];
2996       f[aj][m] += f_j[m];
2997       f[ak][m] += f_k[m];
2998     }
2999
3000     /* Virial stuff */
3001     if (g) {
3002       copy_ivec(SHIFT_IVEC(g,aj),jt);
3003
3004       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
3005       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
3006       t1=IVEC2IS(dt_ij);
3007       t2=IVEC2IS(dt_kj);
3008     }
3009     rvec_inc(fshift[t1],f_i);
3010     rvec_inc(fshift[CENTRAL],f_j);
3011     rvec_inc(fshift[t2],f_k);               /* 9 */
3012     /* 163 TOTAL        */
3013   }
3014   return vtot;
3015 }
3016
3017 static real bonded_tab(const char *type,int table_nr,
3018                        const bondedtable_t *table,real kA,real kB,real r,
3019                        real lambda,real *V,real *F)
3020 {
3021   real k,tabscale,*VFtab,rt,eps,eps2,Yt,Ft,Geps,Heps2,Fp,VV,FF;
3022   int  n0,nnn;
3023   real v,f,dvdlambda;
3024
3025   k = (1.0 - lambda)*kA + lambda*kB;
3026
3027   tabscale = table->scale;
3028   VFtab    = table->data;
3029
3030   rt    = r*tabscale;
3031   n0    = rt;
3032   if (n0 >= table->n) {
3033     gmx_fatal(FARGS,"A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3034               type,table_nr,r,n0,n0+1,table->n);
3035   }
3036   eps   = rt - n0;
3037   eps2  = eps*eps;
3038   nnn   = 4*n0;
3039   Yt    = VFtab[nnn];
3040   Ft    = VFtab[nnn+1];
3041   Geps  = VFtab[nnn+2]*eps;
3042   Heps2 = VFtab[nnn+3]*eps2;
3043   Fp    = Ft + Geps + Heps2;
3044   VV    = Yt + Fp*eps;
3045   FF    = Fp + Geps + 2.0*Heps2;
3046
3047   *F    = -k*FF*tabscale;
3048   *V    = k*VV;
3049   dvdlambda  = (kB - kA)*VV;
3050
3051   return dvdlambda;
3052
3053   /* That was 22 flops */
3054 }
3055
3056 real tab_bonds(int nbonds,
3057                const t_iatom forceatoms[],const t_iparams forceparams[],
3058                const rvec x[],rvec f[],rvec fshift[],
3059                const t_pbc *pbc,const t_graph *g,
3060                real lambda,real *dvdlambda,
3061                const t_mdatoms *md,t_fcdata *fcd,
3062                int *global_atom_index)
3063 {
3064   int  i,m,ki,ai,aj,type,table;
3065   real dr,dr2,fbond,vbond,fij,vtot;
3066   rvec dx;
3067   ivec dt;
3068
3069   vtot = 0.0;
3070   for(i=0; (i<nbonds); ) {
3071     type = forceatoms[i++];
3072     ai   = forceatoms[i++];
3073     aj   = forceatoms[i++];
3074
3075     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
3076     dr2  = iprod(dx,dx);                        /*   5          */
3077     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
3078
3079     table = forceparams[type].tab.table;
3080
3081     *dvdlambda += bonded_tab("bond",table,
3082                              &fcd->bondtab[table],
3083                              forceparams[type].tab.kA,
3084                              forceparams[type].tab.kB,
3085                              dr,lambda,&vbond,&fbond);  /*  22 */
3086
3087     if (dr2 == 0.0)
3088       continue;
3089
3090
3091     vtot  += vbond;/* 1*/
3092     fbond *= gmx_invsqrt(dr2);                  /*   6          */
3093 #ifdef DEBUG
3094     if (debug)
3095       fprintf(debug,"TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3096               dr,vbond,fbond);
3097 #endif
3098     if (g) {
3099       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
3100       ki=IVEC2IS(dt);
3101     }
3102     for (m=0; (m<DIM); m++) {                   /*  15          */
3103       fij=fbond*dx[m];
3104       f[ai][m]+=fij;
3105       f[aj][m]-=fij;
3106       fshift[ki][m]+=fij;
3107       fshift[CENTRAL][m]-=fij;
3108     }
3109   }                                     /* 62 TOTAL     */
3110   return vtot;
3111 }
3112
3113 real tab_angles(int nbonds,
3114                 const t_iatom forceatoms[],const t_iparams forceparams[],
3115                 const rvec x[],rvec f[],rvec fshift[],
3116                 const t_pbc *pbc,const t_graph *g,
3117                 real lambda,real *dvdlambda,
3118                 const t_mdatoms *md,t_fcdata *fcd,
3119                 int *global_atom_index)
3120 {
3121   int  i,ai,aj,ak,t1,t2,type,table;
3122   rvec r_ij,r_kj;
3123   real cos_theta,cos_theta2,theta,dVdt,va,vtot;
3124   ivec jt,dt_ij,dt_kj;
3125
3126   vtot = 0.0;
3127   for(i=0; (i<nbonds); ) {
3128     type = forceatoms[i++];
3129     ai   = forceatoms[i++];
3130     aj   = forceatoms[i++];
3131     ak   = forceatoms[i++];
3132
3133     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
3134                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
3135
3136     table = forceparams[type].tab.table;
3137
3138     *dvdlambda += bonded_tab("angle",table,
3139                              &fcd->angletab[table],
3140                              forceparams[type].tab.kA,
3141                              forceparams[type].tab.kB,
3142                              theta,lambda,&va,&dVdt);  /*  22  */
3143     vtot += va;
3144
3145     cos_theta2 = sqr(cos_theta);                /*   1          */
3146     if (cos_theta2 < 1) {
3147       int  m;
3148       real snt,st,sth;
3149       real cik,cii,ckk;
3150       real nrkj2,nrij2;
3151       rvec f_i,f_j,f_k;
3152
3153       st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
3154       sth = st*cos_theta;                       /*   1          */
3155 #ifdef DEBUG
3156       if (debug)
3157         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
3158                 theta*RAD2DEG,va,dVdt);
3159 #endif
3160       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
3161       nrij2=iprod(r_ij,r_ij);
3162
3163       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */
3164       cii=sth/nrij2;                            /*  10          */
3165       ckk=sth/nrkj2;                            /*  10          */
3166
3167       for (m=0; (m<DIM); m++) {                 /*  39          */
3168         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
3169         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
3170         f_j[m]=-f_i[m]-f_k[m];
3171         f[ai][m]+=f_i[m];
3172         f[aj][m]+=f_j[m];
3173         f[ak][m]+=f_k[m];
3174       }
3175       if (g) {
3176         copy_ivec(SHIFT_IVEC(g,aj),jt);
3177
3178         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
3179         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
3180         t1=IVEC2IS(dt_ij);
3181         t2=IVEC2IS(dt_kj);
3182       }
3183       rvec_inc(fshift[t1],f_i);
3184       rvec_inc(fshift[CENTRAL],f_j);
3185       rvec_inc(fshift[t2],f_k);
3186     }                                           /* 169 TOTAL    */
3187   }
3188   return vtot;
3189 }
3190
3191 real tab_dihs(int nbonds,
3192               const t_iatom forceatoms[],const t_iparams forceparams[],
3193               const rvec x[],rvec f[],rvec fshift[],
3194               const t_pbc *pbc,const t_graph *g,
3195               real lambda,real *dvdlambda,
3196               const t_mdatoms *md,t_fcdata *fcd,
3197               int *global_atom_index)
3198 {
3199   int  i,type,ai,aj,ak,al,table;
3200   int  t1,t2,t3;
3201   rvec r_ij,r_kj,r_kl,m,n;
3202   real phi,sign,ddphi,vpd,vtot;
3203
3204   vtot = 0.0;
3205   for(i=0; (i<nbonds); ) {
3206     type = forceatoms[i++];
3207     ai   = forceatoms[i++];
3208     aj   = forceatoms[i++];
3209     ak   = forceatoms[i++];
3210     al   = forceatoms[i++];
3211
3212     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
3213                   &sign,&t1,&t2,&t3);                   /*  84  */
3214
3215     table = forceparams[type].tab.table;
3216
3217     /* Hopefully phi+M_PI never results in values < 0 */
3218     *dvdlambda += bonded_tab("dihedral",table,
3219                              &fcd->dihtab[table],
3220                              forceparams[type].tab.kA,
3221                              forceparams[type].tab.kB,
3222                              phi+M_PI,lambda,&vpd,&ddphi);
3223
3224     vtot += vpd;
3225     do_dih_fup(ai,aj,ak,al,-ddphi,r_ij,r_kj,r_kl,m,n,
3226                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112  */
3227
3228 #ifdef DEBUG
3229     fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
3230             ai,aj,ak,al,phi);
3231 #endif
3232   } /* 227 TOTAL        */
3233
3234   return vtot;
3235 }
3236
3237 static unsigned
3238 calc_bonded_reduction_mask(const t_idef *idef,
3239                            int shift,
3240                            int t,int nt)
3241 {
3242     unsigned mask;
3243     int ftype,nb,nat1,nb0,nb1,i,a;
3244
3245     mask = 0;
3246
3247     for(ftype=0; ftype<F_NRE; ftype++)
3248     {
3249         if (interaction_function[ftype].flags & IF_BOND &&
3250             !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
3251             (ftype<F_GB12 || ftype>F_GB14))
3252         {
3253             nb = idef->il[ftype].nr;
3254             if (nb > 0)
3255             {
3256                 nat1 = interaction_function[ftype].nratoms + 1;
3257
3258                 /* Divide this interaction equally over the threads.
3259                  * This is not stored: should match division in calc_bonds.
3260                  */
3261                 nb0 = (((nb/nat1)* t   )/nt)*nat1;
3262                 nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
3263
3264                 for(i=nb0; i<nb1; i+=nat1)
3265                 {
3266                     for(a=1; a<nat1; a++)
3267                     {
3268                         mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
3269                     }
3270                 }
3271             }
3272         }
3273     }
3274
3275     return mask;
3276 }
3277
3278 void init_bonded_thread_force_reduction(t_forcerec *fr,
3279                                         const t_idef *idef)
3280 {
3281 #define MAX_BLOCK_BITS 32
3282     int t;
3283     int ctot,c,b;
3284
3285     if (fr->nthreads <= 1)
3286     {
3287         fr->red_nblock = 0;
3288
3289         return;
3290     }
3291
3292     /* We divide the force array in a maximum of 32 blocks.
3293      * Minimum force block reduction size is 2^6=64.
3294      */
3295     fr->red_ashift = 6;
3296     while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
3297     {
3298         fr->red_ashift++;
3299     }
3300     if (debug)
3301     {
3302         fprintf(debug,"bonded force buffer block atom shift %d bits\n",
3303                 fr->red_ashift);
3304     }
3305
3306     /* Determine to which blocks each thread's bonded force calculation
3307      * contributes. Store this is a mask for each thread.
3308      */
3309 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3310     for(t=1; t<fr->nthreads; t++)
3311     {
3312         fr->f_t[t].red_mask =
3313             calc_bonded_reduction_mask(idef,fr->red_ashift,t,fr->nthreads);
3314     }
3315
3316     /* Determine the maximum number of blocks we need to reduce over */
3317     fr->red_nblock = 0;
3318     ctot = 0;
3319     for(t=0; t<fr->nthreads; t++)
3320     {
3321         c = 0;
3322         for(b=0; b<MAX_BLOCK_BITS; b++)
3323         {
3324             if (fr->f_t[t].red_mask & (1U<<b))
3325             {
3326                 fr->red_nblock = max(fr->red_nblock,b+1);
3327                 c++;
3328             }
3329         }
3330         if (debug)
3331         {
3332             fprintf(debug,"thread %d flags %x count %d\n",
3333                     t,fr->f_t[t].red_mask,c);
3334         }
3335         ctot += c;
3336     }
3337     if (debug)
3338     {
3339         fprintf(debug,"Number of blocks to reduce: %d of size %d\n",
3340                 fr->red_nblock,1<<fr->red_ashift);
3341         fprintf(debug,"Reduction density %.2f density/#thread %.2f\n",
3342                 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
3343                 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
3344     }
3345 }
3346
3347 static void zero_thread_forces(f_thread_t *f_t,int n,
3348                                int nblock,int blocksize)
3349 {
3350     int b,a0,a1,a,i,j;
3351
3352     if (n > f_t->f_nalloc)
3353     {
3354         f_t->f_nalloc = over_alloc_large(n);
3355         srenew(f_t->f,f_t->f_nalloc);
3356     }
3357
3358     if (f_t->red_mask != 0)
3359     {
3360         for(b=0; b<nblock; b++)
3361         {
3362             if (f_t->red_mask && (1U<<b))
3363             {
3364                 a0 = b*blocksize;
3365                 a1 = min((b+1)*blocksize,n);
3366                 for(a=a0; a<a1; a++)
3367                 {
3368                     clear_rvec(f_t->f[a]);
3369                 }
3370             }
3371         }
3372     }
3373     for(i=0; i<SHIFTS; i++)
3374     {
3375         clear_rvec(f_t->fshift[i]);
3376     }
3377     for(i=0; i<F_NRE; i++)
3378     {
3379         f_t->ener[i] = 0;
3380     }
3381     for(i=0; i<egNR; i++)
3382     {
3383         for(j=0; j<f_t->grpp.nener; j++)
3384         {
3385             f_t->grpp.ener[i][j] = 0;
3386         }
3387     }
3388     for(i=0; i<efptNR; i++)
3389     {
3390         f_t->dvdl[i] = 0;
3391     }
3392 }
3393
3394 static void reduce_thread_force_buffer(int n,rvec *f,
3395                                        int nthreads,f_thread_t *f_t,
3396                                        int nblock,int block_size)
3397 {
3398     /* The max thread number is arbitrary,
3399      * we used a fixed number to avoid memory management.
3400      * Using more than 16 threads is probably never useful performance wise.
3401      */
3402 #define MAX_BONDED_THREADS 256
3403     int b;
3404
3405     if (nthreads > MAX_BONDED_THREADS)
3406     {
3407         gmx_fatal(FARGS,"Can not reduce bonded forces on more than %d threads",
3408                   MAX_BONDED_THREADS);
3409     }
3410
3411     /* This reduction can run on any number of threads,
3412      * independently of nthreads.
3413      */
3414 #pragma omp parallel for num_threads(nthreads) schedule(static)
3415     for(b=0; b<nblock; b++)
3416     {
3417         rvec *fp[MAX_BONDED_THREADS];
3418         int nfb,ft,fb;
3419         int a0,a1,a;
3420
3421         /* Determine which threads contribute to this block */
3422         nfb = 0;
3423         for(ft=1; ft<nthreads; ft++)
3424         {
3425             if (f_t[ft].red_mask & (1U<<b))
3426             {
3427                 fp[nfb++] = f_t[ft].f;
3428             }
3429         }
3430         if (nfb > 0)
3431         {
3432             /* Reduce force buffers for threads that contribute */
3433             a0 =  b   *block_size;
3434             a1 = (b+1)*block_size;
3435             a1 = min(a1,n);
3436             for(a=a0; a<a1; a++)
3437             {
3438                 for(fb=0; fb<nfb; fb++)
3439                 {
3440                     rvec_inc(f[a],fp[fb][a]);
3441                 }
3442             }
3443         }
3444     }
3445 }
3446
3447 static void reduce_thread_forces(int n,rvec *f,rvec *fshift,
3448                                  real *ener,gmx_grppairener_t *grpp,real *dvdl,
3449                                  int nthreads,f_thread_t *f_t,
3450                                  int nblock,int block_size,
3451                                  gmx_bool bCalcEnerVir,
3452                                  gmx_bool bDHDL)
3453 {
3454     if (nblock > 0)
3455     {
3456         /* Reduce the bonded force buffer */
3457         reduce_thread_force_buffer(n,f,nthreads,f_t,nblock,block_size);
3458     }
3459
3460     /* When necessary, reduce energy and virial using one thread only */
3461     if (bCalcEnerVir)
3462     {
3463         int t,i,j;
3464
3465         for(i=0; i<SHIFTS; i++)
3466         {
3467             for(t=1; t<nthreads; t++)
3468             {
3469                 rvec_inc(fshift[i],f_t[t].fshift[i]);
3470             }
3471         }
3472         for(i=0; i<F_NRE; i++)
3473         {
3474             for(t=1; t<nthreads; t++)
3475             {
3476                 ener[i] += f_t[t].ener[i];
3477             }
3478         }
3479         for(i=0; i<egNR; i++)
3480         {
3481             for(j=0; j<f_t[1].grpp.nener; j++)
3482             {
3483                 for(t=1; t<nthreads; t++)
3484                 {
3485
3486                     grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
3487                 }
3488             }
3489         }
3490         if (bDHDL)
3491         {
3492             for(i=0; i<efptNR; i++)
3493             {
3494
3495                 for(t=1; t<nthreads; t++)
3496                 {
3497                     dvdl[i] += f_t[t].dvdl[i];
3498                 }
3499             }
3500         }
3501     }
3502 }
3503
3504 static real calc_one_bond(FILE *fplog,int thread,
3505                           int ftype,const t_idef *idef,
3506                           rvec x[], rvec f[], rvec fshift[],
3507                           t_forcerec *fr,
3508                           const t_pbc *pbc,const t_graph *g,
3509                           gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
3510                           t_nrnb *nrnb,
3511                           real *lambda, real *dvdl,
3512                           const t_mdatoms *md,t_fcdata *fcd,
3513                           gmx_bool bCalcEnerVir,
3514                           int *global_atom_index, gmx_bool bPrintSepPot)
3515 {
3516     int ind,nat1,nbonds,efptFTYPE;
3517     real v=0;
3518     t_iatom *iatoms;
3519     int nb0,nbn;
3520
3521     if (IS_RESTRAINT_TYPE(ftype))
3522     {
3523         efptFTYPE = efptRESTRAINT;
3524     }
3525     else
3526     {
3527         efptFTYPE = efptBONDED;
3528     }
3529
3530     if (interaction_function[ftype].flags & IF_BOND &&
3531         !(ftype == F_CONNBONDS || ftype == F_POSRES))
3532     {
3533         ind  = interaction_function[ftype].nrnb_ind;
3534         nat1 = interaction_function[ftype].nratoms + 1;
3535         nbonds    = idef->il[ftype].nr/nat1;
3536         iatoms    = idef->il[ftype].iatoms;
3537
3538         nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
3539         nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
3540
3541         if (!IS_LISTED_LJ_C(ftype))
3542         {
3543             if(ftype==F_CMAP)
3544             {
3545                 v = cmap_dihs(nbn,iatoms+nb0,
3546                               idef->iparams,&idef->cmap_grid,
3547                               (const rvec*)x,f,fshift,
3548                               pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
3549                               md,fcd,global_atom_index);
3550             }
3551             else if (ftype == F_PDIHS &&
3552                      !bCalcEnerVir && fr->efep==efepNO)
3553             {
3554                 /* No energies, shift forces, dvdl */
3555 #ifndef SSE_PROPER_DIHEDRALS
3556                 pdihs_noener
3557 #else
3558                 pdihs_noener_sse
3559 #endif
3560                     (nbn,idef->il[ftype].iatoms+nb0,
3561                      idef->iparams,
3562                      (const rvec*)x,f,
3563                      pbc,g,lambda[efptFTYPE],md,fcd,
3564                      global_atom_index);
3565                 v = 0;
3566             }
3567             else
3568             {
3569                 v = interaction_function[ftype].ifunc(nbn,iatoms+nb0,
3570                                                       idef->iparams,
3571                                                       (const rvec*)x,f,fshift,
3572                                                       pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
3573                                                       md,fcd,global_atom_index);
3574             }
3575             if (bPrintSepPot)
3576             {
3577                 fprintf(fplog,"  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
3578                         interaction_function[ftype].longname,
3579                         nbonds/nat1,v,lambda[efptFTYPE]);
3580             }
3581         }
3582         else
3583         {
3584             v = do_nonbonded_listed(ftype,nbn,iatoms+nb0,idef->iparams,(const rvec*)x,f,fshift,
3585                                     pbc,g,lambda,dvdl,md,fr,grpp,global_atom_index);
3586
3587             enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
3588             enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
3589
3590             if (bPrintSepPot)
3591             {
3592                 fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
3593                         interaction_function[ftype].longname,
3594                         interaction_function[F_LJ14].longname,nbonds/nat1,dvdl[efptVDW]);
3595                 fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
3596                         interaction_function[ftype].longname,
3597                         interaction_function[F_COUL14].longname,nbonds/nat1,dvdl[efptCOUL]);
3598             }
3599         }
3600         if (ind != -1 && thread == 0)
3601         {
3602             inc_nrnb(nrnb,ind,nbonds);
3603         }
3604     }
3605
3606     return v;
3607 }
3608
3609 /* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
3610    function, or horrible things will happen when doing free energy
3611    calculations!  In a good coding world, this would not be a
3612    different function, but for speed reasons, it needs to be made a
3613    separate function.  TODO for 5.0 - figure out a way to reorganize
3614    to reduce duplication.
3615 */
3616
3617 static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
3618                                   rvec x[], rvec f[], t_forcerec *fr,
3619                                   const t_pbc *pbc,const t_graph *g,
3620                                   gmx_grppairener_t *grpp, t_nrnb *nrnb,
3621                                   real *lambda, real *dvdl,
3622                                   const t_mdatoms *md,t_fcdata *fcd,
3623                                   int *global_atom_index, gmx_bool bPrintSepPot)
3624 {
3625     int ind,nat1,nbonds,efptFTYPE,nbonds_np;
3626     real v=0;
3627     t_iatom *iatoms;
3628
3629     if (IS_RESTRAINT_TYPE(ftype))
3630     {
3631         efptFTYPE = efptRESTRAINT;
3632     }
3633     else
3634     {
3635         efptFTYPE = efptBONDED;
3636     }
3637
3638     if (ftype<F_GB12 || ftype>F_GB14)
3639     {
3640         if (interaction_function[ftype].flags & IF_BOND &&
3641             !(ftype == F_CONNBONDS || ftype == F_POSRES))
3642         {
3643             ind  = interaction_function[ftype].nrnb_ind;
3644             nat1 = interaction_function[ftype].nratoms+1;
3645             nbonds_np = idef->il[ftype].nr_nonperturbed;
3646             nbonds    = idef->il[ftype].nr - nbonds_np;
3647             iatoms    = idef->il[ftype].iatoms + nbonds_np;
3648             if (nbonds > 0)
3649             {
3650                 if (!IS_LISTED_LJ_C(ftype))
3651                 {
3652                     if(ftype==F_CMAP)
3653                     {
3654                         v = cmap_dihs(nbonds,iatoms,
3655                                       idef->iparams,&idef->cmap_grid,
3656                                       (const rvec*)x,f,fr->fshift,
3657                                       pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),md,fcd,
3658                                       global_atom_index);
3659                     }
3660                     else
3661                     {
3662                         v =         interaction_function[ftype].ifunc(nbonds,iatoms,
3663                                                                   idef->iparams,
3664                                                                   (const rvec*)x,f,fr->fshift,
3665                                                                   pbc,g,lambda[efptFTYPE],&dvdl[efptFTYPE],
3666                                                                   md,fcd,global_atom_index);
3667                     }
3668                 }
3669                 else
3670                 {
3671                     v = do_nonbonded_listed(ftype,nbonds,iatoms,
3672                                             idef->iparams,
3673                                             (const rvec*)x,f,fr->fshift,
3674                                             pbc,g,lambda,dvdl,
3675                                             md,fr,grpp,global_atom_index);
3676                 }
3677                 if (ind != -1)
3678                 {
3679                     inc_nrnb(nrnb,ind,nbonds/nat1);
3680                 }
3681             }
3682         }
3683     }
3684     return v;
3685 }
3686
3687 void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
3688                 const t_idef *idef,
3689                 rvec x[],history_t *hist,
3690                 rvec f[],t_forcerec *fr,
3691                 const t_pbc *pbc,const t_graph *g,
3692                 gmx_enerdata_t *enerd,t_nrnb *nrnb,
3693                 real *lambda,
3694                 const t_mdatoms *md,
3695                 t_fcdata *fcd,int *global_atom_index,
3696                 t_atomtypes *atype, gmx_genborn_t *born,
3697                 int force_flags,
3698                 gmx_bool bPrintSepPot,gmx_large_int_t step)
3699 {
3700     gmx_bool bCalcEnerVir;
3701     int    i;
3702     real   v,dvdl[efptNR],dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
3703                                                of lambda, which will be thrown away in the end*/
3704     const  t_pbc *pbc_null;
3705     char   buf[22];
3706     int    thread;
3707
3708     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
3709
3710     for (i=0;i<efptNR;i++)
3711     {
3712         dvdl[i] = 0.0;
3713     }
3714     if (fr->bMolPBC)
3715     {
3716         pbc_null = pbc;
3717     }
3718     else
3719     {
3720         pbc_null = NULL;
3721     }
3722     if (bPrintSepPot)
3723     {
3724         fprintf(fplog,"Step %s: bonded V and dVdl for this node\n",
3725                 gmx_step_str(step,buf));
3726     }
3727
3728 #ifdef DEBUG
3729     if (g && debug)
3730     {
3731         p_graph(debug,"Bondage is fun",g);
3732     }
3733 #endif
3734
3735     /* Do pre force calculation stuff which might require communication */
3736     if (idef->il[F_ORIRES].nr)
3737     {
3738         enerd->term[F_ORIRESDEV] =
3739             calc_orires_dev(ms,idef->il[F_ORIRES].nr,
3740                             idef->il[F_ORIRES].iatoms,
3741                             idef->iparams,md,(const rvec*)x,
3742                             pbc_null,fcd,hist);
3743     }
3744     if (idef->il[F_DISRES].nr)
3745     {
3746         calc_disres_R_6(ms,idef->il[F_DISRES].nr,
3747                         idef->il[F_DISRES].iatoms,
3748                         idef->iparams,(const rvec*)x,pbc_null,
3749                         fcd,hist);
3750     }
3751
3752 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3753     for(thread=0; thread<fr->nthreads; thread++)
3754     {
3755         int    ftype,nbonds,ind,nat1;
3756         real   *epot,v;
3757         /* thread stuff */
3758         rvec   *ft,*fshift;
3759         real   *dvdlt;
3760         gmx_grppairener_t *grpp;
3761         int    nb0,nbn;
3762
3763         if (thread == 0)
3764         {
3765             ft     = f;
3766             fshift = fr->fshift;
3767             epot   = enerd->term;
3768             grpp   = &enerd->grpp;
3769             dvdlt  = dvdl;
3770         }
3771         else
3772         {
3773             zero_thread_forces(&fr->f_t[thread],fr->natoms_force,
3774                                fr->red_nblock,1<<fr->red_ashift);
3775
3776             ft     = fr->f_t[thread].f;
3777             fshift = fr->f_t[thread].fshift;
3778             epot   = fr->f_t[thread].ener;
3779             grpp   = &fr->f_t[thread].grpp;
3780             dvdlt  = fr->f_t[thread].dvdl;
3781         }
3782         /* Loop over all bonded force types to calculate the bonded forces */
3783         for(ftype=0; (ftype<F_NRE); ftype++)
3784         {
3785             if (idef->il[ftype].nr > 0 &&
3786                 (interaction_function[ftype].flags & IF_BOND) &&
3787                 (ftype < F_GB12 || ftype > F_GB14) &&
3788                 !(ftype == F_CONNBONDS || ftype == F_POSRES))
3789             {
3790                 v = calc_one_bond(fplog,thread,ftype,idef,x,
3791                                   ft,fshift,fr,pbc_null,g,enerd,grpp,
3792                                   nrnb,lambda,dvdlt,
3793                                   md,fcd,bCalcEnerVir,
3794                                   global_atom_index,bPrintSepPot);
3795                 epot[ftype]        += v;
3796             }
3797         }
3798     }
3799     if (fr->nthreads > 1)
3800     {
3801         reduce_thread_forces(fr->natoms_force,f,fr->fshift,
3802                              enerd->term,&enerd->grpp,dvdl,
3803                              fr->nthreads,fr->f_t,
3804                              fr->red_nblock,1<<fr->red_ashift,
3805                              bCalcEnerVir,
3806                              force_flags & GMX_FORCE_DHDL);
3807     }
3808     if (force_flags & GMX_FORCE_DHDL)
3809     {
3810         for(i=0; i<efptNR; i++)
3811         {
3812             enerd->dvdl_nonlin[i] += dvdl[i];
3813         }
3814     }
3815
3816     /* Copy the sum of violations for the distance restraints from fcd */
3817     if (fcd)
3818     {
3819         enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
3820
3821     }
3822 }
3823
3824 void calc_bonds_lambda(FILE *fplog,
3825                        const t_idef *idef,
3826                        rvec x[],
3827                        t_forcerec *fr,
3828                        const t_pbc *pbc,const t_graph *g,
3829                        gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
3830                        real *lambda,
3831                        const t_mdatoms *md,
3832                        t_fcdata *fcd,
3833                        int *global_atom_index)
3834 {
3835     int    i,ftype,nbonds_np,nbonds,ind,nat;
3836     real   v,dr,dr2;
3837     real   dvdl_dum[efptNR];
3838     rvec   *f,*fshift_orig;
3839     const  t_pbc *pbc_null;
3840     t_iatom *iatom_fe;
3841
3842     if (fr->bMolPBC)
3843     {
3844         pbc_null = pbc;
3845     }
3846     else
3847     {
3848         pbc_null = NULL;
3849     }
3850
3851     snew(f,fr->natoms_force);
3852     /* We want to preserve the fshift array in forcerec */
3853     fshift_orig = fr->fshift;
3854     snew(fr->fshift,SHIFTS);
3855
3856     /* Loop over all bonded force types to calculate the bonded forces */
3857     for(ftype=0; (ftype<F_NRE); ftype++)
3858     {
3859         v = calc_one_bond_foreign(fplog,ftype,idef,x,
3860                                   f,fr,pbc_null,g,grpp,nrnb,lambda,dvdl_dum,
3861                                   md,fcd,global_atom_index,FALSE);
3862         epot[ftype] += v;
3863     }
3864
3865     sfree(fr->fshift);
3866     fr->fshift = fshift_orig;
3867     sfree(f);
3868 }