Merge origin/release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / bondfree.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41 #include "physics.h"
42 #include "vec.h"
43 #include "maths.h"
44 #include "txtdump.h"
45 #include "bondf.h"
46 #include "smalloc.h"
47 #include "pbc.h"
48 #include "ns.h"
49 #include "macros.h"
50 #include "names.h"
51 #include "gmx_fatal.h"
52 #include "mshift.h"
53 #include "main.h"
54 #include "disre.h"
55 #include "orires.h"
56 #include "force.h"
57 #include "nonbonded.h"
58
59 #if !defined GMX_DOUBLE && defined GMX_X86_SSE2
60 #include "gmx_x86_simd_single.h"
61 #define SSE_PROPER_DIHEDRALS
62 #endif
63
64 /* Find a better place for this? */
65 const int cmap_coeff_matrix[] = {
66 1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4 ,
67 0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
68 0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4 ,
69 0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
70 0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2 ,
71 0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2 ,
72 0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
73 0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
74 0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2 ,
75 0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
76 0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
77 0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2 ,
78 0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
79 0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
80 0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
81 0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
82 };
83
84
85
86 int glatnr(int *global_atom_index,int i)
87 {
88     int atnr;
89
90     if (global_atom_index == NULL) {
91         atnr = i + 1;
92     } else {
93         atnr = global_atom_index[i] + 1;
94     }
95
96     return atnr;
97 }
98
99 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
100 {
101   if (pbc) {
102     return pbc_dx_aiuc(pbc,xi,xj,dx);
103   }
104   else {
105     rvec_sub(xi,xj,dx);
106     return CENTRAL;
107   }
108 }
109
110 /*
111  * Morse potential bond by Frank Everdij
112  *
113  * Three parameters needed:
114  *
115  * b0 = equilibrium distance in nm
116  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
117  * cb = well depth in kJ/mol
118  *
119  * Note: the potential is referenced to be +cb at infinite separation
120  *       and zero at the equilibrium distance!
121  */
122
123 real morse_bonds(int nbonds,
124                  const t_iatom forceatoms[],const t_iparams forceparams[],
125                  const rvec x[],rvec f[],rvec fshift[],
126                  const t_pbc *pbc,const t_graph *g,
127                  real lambda,real *dvdlambda,
128                  const t_mdatoms *md,t_fcdata *fcd,
129                  int *global_atom_index)
130 {
131   const real one=1.0;
132   const real two=2.0;
133   real  dr,dr2,temp,omtemp,cbomtemp,fbond,vbond,fij,vtot;
134   real  b0,be,cb,b0A,beA,cbA,b0B,beB,cbB,L1;
135   rvec  dx;
136   int   i,m,ki,type,ai,aj;
137   ivec  dt;
138
139   vtot = 0.0;
140   for(i=0; (i<nbonds); ) {
141     type = forceatoms[i++];
142     ai   = forceatoms[i++];
143     aj   = forceatoms[i++];
144     
145     b0A   = forceparams[type].morse.b0A;
146     beA   = forceparams[type].morse.betaA;
147     cbA   = forceparams[type].morse.cbA;
148
149     b0B   = forceparams[type].morse.b0B;
150     beB   = forceparams[type].morse.betaB;
151     cbB   = forceparams[type].morse.cbB;
152
153     L1 = one-lambda;                      /* 1 */
154     b0 = L1*b0A + lambda*b0B;             /* 3 */
155     be = L1*beA + lambda*beB;             /* 3 */
156     cb = L1*cbA + lambda*cbB;             /* 3 */
157
158     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
159     dr2  = iprod(dx,dx);                            /*   5          */
160     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
161     temp = exp(-be*(dr-b0));                        /*  12          */
162     
163     if (temp == one)
164     {
165         /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
166         *dvdlambda += cbB-cbA;
167         continue;
168     }
169
170     omtemp   = one-temp;                               /*   1          */
171     cbomtemp = cb*omtemp;                              /*   1          */
172     vbond    = cbomtemp*omtemp;                        /*   1          */
173     fbond    = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /*   9          */
174     vtot     += vbond;                                 /*   1          */
175
176     *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
177     
178     if (g) {
179       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
180       ki = IVEC2IS(dt);
181     }
182
183     for (m=0; (m<DIM); m++) {                          /*  15          */
184       fij=fbond*dx[m];
185       f[ai][m]+=fij;
186       f[aj][m]-=fij;
187       fshift[ki][m]+=fij;
188       fshift[CENTRAL][m]-=fij;
189     }
190   }                                           /*  83 TOTAL    */
191   return vtot;
192 }
193
194 real cubic_bonds(int nbonds,
195                  const t_iatom forceatoms[],const t_iparams forceparams[],
196                  const rvec x[],rvec f[],rvec fshift[],
197                  const t_pbc *pbc,const t_graph *g,
198                  real lambda,real *dvdlambda,
199                  const t_mdatoms *md,t_fcdata *fcd,
200                  int *global_atom_index)
201 {
202   const real three = 3.0;
203   const real two   = 2.0;
204   real  kb,b0,kcub;
205   real  dr,dr2,dist,kdist,kdist2,fbond,vbond,fij,vtot;
206   rvec  dx;
207   int   i,m,ki,type,ai,aj;
208   ivec  dt;
209
210   vtot = 0.0;
211   for(i=0; (i<nbonds); ) {
212     type = forceatoms[i++];
213     ai   = forceatoms[i++];
214     aj   = forceatoms[i++];
215     
216     b0   = forceparams[type].cubic.b0;
217     kb   = forceparams[type].cubic.kb;
218     kcub = forceparams[type].cubic.kcub;
219
220     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);                /*   3          */
221     dr2  = iprod(dx,dx);                                /*   5          */
222     
223     if (dr2 == 0.0)
224       continue;
225       
226     dr         = dr2*gmx_invsqrt(dr2);                      /*  10          */
227     dist       = dr-b0;
228     kdist      = kb*dist;
229     kdist2     = kdist*dist;
230     
231     vbond      = kdist2 + kcub*kdist2*dist;
232     fbond      = -(two*kdist + three*kdist2*kcub)/dr;
233
234     vtot      += vbond;       /* 21 */
235     
236     if (g) {
237       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
238       ki=IVEC2IS(dt);
239     }
240     for (m=0; (m<DIM); m++) {                          /*  15          */
241       fij=fbond*dx[m];
242       f[ai][m]+=fij;
243       f[aj][m]-=fij;
244       fshift[ki][m]+=fij;
245       fshift[CENTRAL][m]-=fij;
246     }
247   }                                           /*  54 TOTAL    */
248   return vtot;
249 }
250
251 real FENE_bonds(int nbonds,
252                 const t_iatom forceatoms[],const t_iparams forceparams[],
253                 const rvec x[],rvec f[],rvec fshift[],
254                 const t_pbc *pbc,const t_graph *g,
255                 real lambda,real *dvdlambda,
256                 const t_mdatoms *md,t_fcdata *fcd,
257                 int *global_atom_index)
258 {
259   const real half=0.5;
260   const real one=1.0;
261   real  bm,kb;
262   real  dr,dr2,bm2,omdr2obm2,fbond,vbond,fij,vtot;
263   rvec  dx;
264   int   i,m,ki,type,ai,aj;
265   ivec  dt;
266
267   vtot = 0.0;
268   for(i=0; (i<nbonds); ) {
269     type = forceatoms[i++];
270     ai   = forceatoms[i++];
271     aj   = forceatoms[i++];
272     
273     bm   = forceparams[type].fene.bm;
274     kb   = forceparams[type].fene.kb;
275
276     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
277     dr2  = iprod(dx,dx);                                /*   5          */
278     
279     if (dr2 == 0.0)
280       continue;
281
282     bm2 = bm*bm;
283
284     if (dr2 >= bm2)
285       gmx_fatal(FARGS,
286                 "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
287                 dr2,bm2,
288                 glatnr(global_atom_index,ai),
289                 glatnr(global_atom_index,aj));
290       
291     omdr2obm2  = one - dr2/bm2;
292     
293     vbond      = -half*kb*bm2*log(omdr2obm2);
294     fbond      = -kb/omdr2obm2;
295
296     vtot      += vbond;       /* 35 */
297     
298     if (g) {
299       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
300       ki=IVEC2IS(dt);
301     }
302     for (m=0; (m<DIM); m++) {                          /*  15          */
303       fij=fbond*dx[m];
304       f[ai][m]+=fij;
305       f[aj][m]-=fij;
306       fshift[ki][m]+=fij;
307       fshift[CENTRAL][m]-=fij;
308     }
309   }                                           /*  58 TOTAL    */
310   return vtot;
311 }
312
313 real harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
314               real *V,real *F)
315 {
316   const real half=0.5;
317   real  L1,kk,x0,dx,dx2;
318   real  v,f,dvdlambda;
319   
320   L1    = 1.0-lambda;
321   kk    = L1*kA+lambda*kB;
322   x0    = L1*xA+lambda*xB;
323
324   dx    = x-x0;
325   dx2   = dx*dx;
326
327   f     = -kk*dx;
328   v     = half*kk*dx2;
329   dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
330
331   *F    = f;
332   *V    = v;
333
334   return dvdlambda;
335
336   /* That was 19 flops */
337 }
338
339
340 real bonds(int nbonds,
341            const t_iatom forceatoms[],const t_iparams forceparams[],
342            const rvec x[],rvec f[],rvec fshift[],
343            const t_pbc *pbc,const t_graph *g,
344            real lambda,real *dvdlambda,
345            const t_mdatoms *md,t_fcdata *fcd,
346            int *global_atom_index)
347 {
348   int  i,m,ki,ai,aj,type;
349   real dr,dr2,fbond,vbond,fij,vtot;
350   rvec dx;
351   ivec dt;
352
353   vtot = 0.0;
354   for(i=0; (i<nbonds); ) {
355     type = forceatoms[i++];
356     ai   = forceatoms[i++];
357     aj   = forceatoms[i++];
358   
359     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
360     dr2  = iprod(dx,dx);                        /*   5          */
361     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
362
363     *dvdlambda += harmonic(forceparams[type].harmonic.krA,
364                            forceparams[type].harmonic.krB,
365                            forceparams[type].harmonic.rA,
366                            forceparams[type].harmonic.rB,
367                            dr,lambda,&vbond,&fbond);  /*  19  */
368
369     if (dr2 == 0.0)
370       continue;
371
372     
373     vtot  += vbond;/* 1*/
374     fbond *= gmx_invsqrt(dr2);                  /*   6          */
375 #ifdef DEBUG
376     if (debug)
377       fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
378               dr,vbond,fbond);
379 #endif
380     if (g) {
381       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
382       ki=IVEC2IS(dt);
383     }
384     for (m=0; (m<DIM); m++) {                   /*  15          */
385       fij=fbond*dx[m];
386       f[ai][m]+=fij;
387       f[aj][m]-=fij;
388       fshift[ki][m]+=fij;
389       fshift[CENTRAL][m]-=fij;
390     }
391   }                                     /* 59 TOTAL     */
392   return vtot;
393 }
394
395 real restraint_bonds(int nbonds,
396                      const t_iatom forceatoms[],const t_iparams forceparams[],
397                      const rvec x[],rvec f[],rvec fshift[],
398                      const t_pbc *pbc,const t_graph *g,
399                      real lambda,real *dvdlambda,
400                      const t_mdatoms *md,t_fcdata *fcd,
401                      int *global_atom_index)
402 {
403     int  i,m,ki,ai,aj,type;
404     real dr,dr2,fbond,vbond,fij,vtot;
405     real L1;
406     real low,dlow,up1,dup1,up2,dup2,k,dk;
407     real drh,drh2;
408     rvec dx;
409     ivec dt;
410
411     L1   = 1.0 - lambda;
412
413     vtot = 0.0;
414     for(i=0; (i<nbonds); )
415     {
416         type = forceatoms[i++];
417         ai   = forceatoms[i++];
418         aj   = forceatoms[i++];
419         
420         ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);        /*   3          */
421         dr2  = iprod(dx,dx);                            /*   5          */
422         dr   = dr2*gmx_invsqrt(dr2);                    /*  10          */
423
424         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
425         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
426         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
427         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
428         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
429         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
430         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
431         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
432         /* 24 */
433
434         if (dr < low)
435         {
436             drh   = dr - low;
437             drh2  = drh*drh;
438             vbond = 0.5*k*drh2;
439             fbond = -k*drh;
440             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
441         } /* 11 */
442         else if (dr <= up1)
443         {
444             vbond = 0;
445             fbond = 0;
446         }
447         else if (dr <= up2)
448         {
449             drh   = dr - up1;
450             drh2  = drh*drh;
451             vbond = 0.5*k*drh2;
452             fbond = -k*drh;
453             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
454         } /* 11 */
455         else
456         {
457             drh   = dr - up2;
458             vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
459             fbond = -k*(up2 - up1);
460             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
461                 + k*(dup2 - dup1)*(up2 - up1 + drh)
462                 - k*(up2 - up1)*dup2;
463         }
464    
465         if (dr2 == 0.0)
466             continue;
467         
468         vtot  += vbond;/* 1*/
469         fbond *= gmx_invsqrt(dr2);                      /*   6          */
470 #ifdef DEBUG
471         if (debug)
472             fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
473                     dr,vbond,fbond);
474 #endif
475         if (g) {
476             ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
477             ki=IVEC2IS(dt);
478         }
479         for (m=0; (m<DIM); m++) {                       /*  15          */
480             fij=fbond*dx[m];
481             f[ai][m]+=fij;
482             f[aj][m]-=fij;
483             fshift[ki][m]+=fij;
484             fshift[CENTRAL][m]-=fij;
485         }
486     }                                   /* 59 TOTAL     */
487
488     return vtot;
489 }
490
491 real polarize(int nbonds,
492               const t_iatom forceatoms[],const t_iparams forceparams[],
493               const rvec x[],rvec f[],rvec fshift[],
494               const t_pbc *pbc,const t_graph *g,
495               real lambda,real *dvdlambda,
496               const t_mdatoms *md,t_fcdata *fcd,
497               int *global_atom_index)
498 {
499   int  i,m,ki,ai,aj,type;
500   real dr,dr2,fbond,vbond,fij,vtot,ksh;
501   rvec dx;
502   ivec dt;
503
504   vtot = 0.0;
505   for(i=0; (i<nbonds); ) {
506     type = forceatoms[i++];
507     ai   = forceatoms[i++];
508     aj   = forceatoms[i++];
509     ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
510     if (debug)
511       fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
512   
513     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
514     dr2  = iprod(dx,dx);                        /*   5          */
515     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
516
517     *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
518
519     if (dr2 == 0.0)
520       continue;
521     
522     vtot  += vbond;/* 1*/
523     fbond *= gmx_invsqrt(dr2);                  /*   6          */
524
525     if (g) {
526       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
527       ki=IVEC2IS(dt);
528     }
529     for (m=0; (m<DIM); m++) {                   /*  15          */
530       fij=fbond*dx[m];
531       f[ai][m]+=fij;
532       f[aj][m]-=fij;
533       fshift[ki][m]+=fij;
534       fshift[CENTRAL][m]-=fij;
535     }
536   }                                     /* 59 TOTAL     */
537   return vtot;
538 }
539
540 real anharm_polarize(int nbonds,
541                      const t_iatom forceatoms[],const t_iparams forceparams[],
542                      const rvec x[],rvec f[],rvec fshift[],
543                      const t_pbc *pbc,const t_graph *g,
544                      real lambda,real *dvdlambda,
545                      const t_mdatoms *md,t_fcdata *fcd,
546                      int *global_atom_index)
547 {
548   int  i,m,ki,ai,aj,type;
549   real dr,dr2,fbond,vbond,fij,vtot,ksh,khyp,drcut,ddr,ddr3;
550   rvec dx;
551   ivec dt;
552
553   vtot = 0.0;
554   for(i=0; (i<nbonds); ) {
555     type  = forceatoms[i++];
556     ai    = forceatoms[i++];
557     aj    = forceatoms[i++];
558     ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
559     khyp  = forceparams[type].anharm_polarize.khyp;
560     drcut = forceparams[type].anharm_polarize.drcut;
561     if (debug)
562       fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
563   
564     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
565     dr2  = iprod(dx,dx);                        /*   5          */
566     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
567
568     *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
569
570     if (dr2 == 0.0)
571       continue;
572     
573     if (dr > drcut) {
574         ddr    = dr-drcut;
575         ddr3   = ddr*ddr*ddr;
576         vbond += khyp*ddr*ddr3;
577         fbond -= 4*khyp*ddr3;
578     }
579     fbond *= gmx_invsqrt(dr2);                  /*   6          */
580     vtot  += vbond;/* 1*/
581
582     if (g) {
583       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
584       ki=IVEC2IS(dt);
585     }
586     for (m=0; (m<DIM); m++) {                   /*  15          */
587       fij=fbond*dx[m];
588       f[ai][m]+=fij;
589       f[aj][m]-=fij;
590       fshift[ki][m]+=fij;
591       fshift[CENTRAL][m]-=fij;
592     }
593   }                                     /* 72 TOTAL     */
594   return vtot;
595 }
596
597 real water_pol(int nbonds,
598                const t_iatom forceatoms[],const t_iparams forceparams[],
599                const rvec x[],rvec f[],rvec fshift[],
600                const t_pbc *pbc,const t_graph *g,
601                real lambda,real *dvdlambda,
602                const t_mdatoms *md,t_fcdata *fcd,
603                int *global_atom_index)
604 {
605   /* This routine implements anisotropic polarizibility for water, through
606    * a shell connected to a dummy with spring constant that differ in the
607    * three spatial dimensions in the molecular frame.
608    */
609   int  i,m,aO,aH1,aH2,aD,aS,type,type0;
610   rvec dOH1,dOH2,dHH,dOD,dDS,nW,kk,dx,kdx,proj;
611 #ifdef DEBUG
612   rvec df;
613 #endif
614   real vtot,fij,r_HH,r_OD,r_nW,tx,ty,tz,qS;
615
616   vtot = 0.0;
617   if (nbonds > 0) {
618     type0  = forceatoms[0];
619     aS     = forceatoms[5];
620     qS     = md->chargeA[aS];
621     kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
622     kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
623     kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
624     r_HH   = 1.0/forceparams[type0].wpol.rHH;
625     r_OD   = 1.0/forceparams[type0].wpol.rOD;
626     if (debug) {
627       fprintf(debug,"WPOL: qS  = %10.5f aS = %5d\n",qS,aS);
628       fprintf(debug,"WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
629               kk[XX],kk[YY],kk[ZZ]);
630       fprintf(debug,"WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
631               forceparams[type0].wpol.rOH,
632               forceparams[type0].wpol.rHH,
633               forceparams[type0].wpol.rOD);
634     }
635     for(i=0; (i<nbonds); i+=6) {
636       type = forceatoms[i];
637       if (type != type0)
638         gmx_fatal(FARGS,"Sorry, type = %d, type0 = %d, file = %s, line = %d",
639                     type,type0,__FILE__,__LINE__);
640       aO   = forceatoms[i+1];
641       aH1  = forceatoms[i+2];
642       aH2  = forceatoms[i+3];
643       aD   = forceatoms[i+4];
644       aS   = forceatoms[i+5];
645       
646       /* Compute vectors describing the water frame */
647       rvec_sub(x[aH1],x[aO], dOH1);
648       rvec_sub(x[aH2],x[aO], dOH2);
649       rvec_sub(x[aH2],x[aH1],dHH);
650       rvec_sub(x[aD], x[aO], dOD);
651       rvec_sub(x[aS], x[aD], dDS);
652       cprod(dOH1,dOH2,nW);
653       
654       /* Compute inverse length of normal vector 
655        * (this one could be precomputed, but I'm too lazy now)
656        */
657       r_nW = gmx_invsqrt(iprod(nW,nW));
658       /* This is for precision, but does not make a big difference,
659        * it can go later.
660        */
661       r_OD = gmx_invsqrt(iprod(dOD,dOD)); 
662       
663       /* Normalize the vectors in the water frame */
664       svmul(r_nW,nW,nW);
665       svmul(r_HH,dHH,dHH);
666       svmul(r_OD,dOD,dOD);
667       
668       /* Compute displacement of shell along components of the vector */
669       dx[ZZ] = iprod(dDS,dOD);
670       /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
671       for(m=0; (m<DIM); m++)
672         proj[m] = dDS[m]-dx[ZZ]*dOD[m];
673       
674       /*dx[XX] = iprod(dDS,nW);
675         dx[YY] = iprod(dDS,dHH);*/
676       dx[XX] = iprod(proj,nW);
677       for(m=0; (m<DIM); m++)
678         proj[m] -= dx[XX]*nW[m];
679       dx[YY] = iprod(proj,dHH);
680       /*#define DEBUG*/
681 #ifdef DEBUG
682       if (debug) {
683         fprintf(debug,"WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
684                 sqr(dx[XX]),sqr(dx[YY]),sqr(dx[ZZ]),iprod(dx,dx),iprod(dDS,dDS));
685         fprintf(debug,"WPOL: dHH=(%10g,%10g,%10g)\n",dHH[XX],dHH[YY],dHH[ZZ]);
686         fprintf(debug,"WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
687                 dOD[XX],dOD[YY],dOD[ZZ],1/r_OD);
688         fprintf(debug,"WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
689                 nW[XX],nW[YY],nW[ZZ],1/r_nW);
690         fprintf(debug,"WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
691                 dx[XX],dx[YY],dx[ZZ]);
692         fprintf(debug,"WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
693                 dDS[XX],dDS[YY],dDS[ZZ]);
694       }
695 #endif
696       /* Now compute the forces and energy */
697       kdx[XX] = kk[XX]*dx[XX];
698       kdx[YY] = kk[YY]*dx[YY];
699       kdx[ZZ] = kk[ZZ]*dx[ZZ];
700       vtot   += iprod(dx,kdx);
701       for(m=0; (m<DIM); m++) {
702         /* This is a tensor operation but written out for speed */
703         tx        =  nW[m]*kdx[XX];
704         ty        = dHH[m]*kdx[YY];
705         tz        = dOD[m]*kdx[ZZ];
706         fij       = -tx-ty-tz;
707 #ifdef DEBUG
708         df[m] = fij;
709 #endif
710         f[aS][m] += fij;
711         f[aD][m] -= fij;
712       }
713 #ifdef DEBUG
714       if (debug) {
715         fprintf(debug,"WPOL: vwpol=%g\n",0.5*iprod(dx,kdx));
716         fprintf(debug,"WPOL: df = (%10g, %10g, %10g)\n",df[XX],df[YY],df[ZZ]);
717       }
718 #endif
719     }   
720   }
721   return 0.5*vtot;
722 }
723
724 static real do_1_thole(const rvec xi,const rvec xj,rvec fi,rvec fj,
725                        const t_pbc *pbc,real qq,
726                        rvec fshift[],real afac)
727 {
728   rvec r12;
729   real r12sq,r12_1,r12n,r12bar,v0,v1,fscal,ebar,fff;
730   int  m,t;
731     
732   t      = pbc_rvec_sub(pbc,xi,xj,r12); /*  3 */
733   
734   r12sq  = iprod(r12,r12);              /*  5 */
735   r12_1  = gmx_invsqrt(r12sq);              /*  5 */
736   r12bar = afac/r12_1;                  /*  5 */
737   v0     = qq*ONE_4PI_EPS0*r12_1;       /*  2 */
738   ebar   = exp(-r12bar);                /*  5 */
739   v1     = (1-(1+0.5*r12bar)*ebar);     /*  4 */
740   fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
741   if (debug)
742     fprintf(debug,"THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n",v0,v1,1/r12_1,r12bar,fscal,ebar);
743   
744   for(m=0; (m<DIM); m++) {
745     fff    = fscal*r12[m];
746     fi[m] += fff;
747     fj[m] -= fff;             
748     fshift[t][m]       += fff;
749     fshift[CENTRAL][m] -= fff;
750   } /* 15 */
751   
752   return v0*v1; /* 1 */
753   /* 54 */
754 }
755
756 real thole_pol(int nbonds,
757                const t_iatom forceatoms[],const t_iparams forceparams[],
758                const rvec x[],rvec f[],rvec fshift[],
759                const t_pbc *pbc,const t_graph *g,
760                real lambda,real *dvdlambda,
761                const t_mdatoms *md,t_fcdata *fcd,
762                int *global_atom_index)
763 {
764   /* Interaction between two pairs of particles with opposite charge */
765   int i,type,a1,da1,a2,da2;
766   real q1,q2,qq,a,al1,al2,afac;
767   real V=0;
768   
769   for(i=0; (i<nbonds); ) {
770     type  = forceatoms[i++];
771     a1    = forceatoms[i++];
772     da1   = forceatoms[i++];
773     a2    = forceatoms[i++];
774     da2   = forceatoms[i++];
775     q1    = md->chargeA[da1];
776     q2    = md->chargeA[da2];
777     a     = forceparams[type].thole.a;
778     al1   = forceparams[type].thole.alpha1;
779     al2   = forceparams[type].thole.alpha2;
780     qq    = q1*q2;
781     afac  = a*pow(al1*al2,-1.0/6.0);
782     V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq,fshift,afac);
783     V += do_1_thole(x[da1],x[a2], f[da1],f[a2], pbc,-qq,fshift,afac);
784     V += do_1_thole(x[a1], x[da2],f[a1], f[da2],pbc,-qq,fshift,afac);
785     V += do_1_thole(x[da1],x[da2],f[da1],f[da2],pbc, qq,fshift,afac);
786   }
787   /* 290 flops */
788   return V;
789 }
790
791 real bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
792                 rvec r_ij,rvec r_kj,real *costh,
793                 int *t1,int *t2)
794 /* Return value is the angle between the bonds i-j and j-k */
795 {
796   /* 41 FLOPS */
797   real th;
798   
799   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
800   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
801
802   *costh=cos_angle(r_ij,r_kj);          /* 25           */
803   th=acos(*costh);                      /* 10           */
804                                         /* 41 TOTAL     */
805   return th;
806 }
807
808 real angles(int nbonds,
809             const t_iatom forceatoms[],const t_iparams forceparams[],
810             const rvec x[],rvec f[],rvec fshift[],
811             const t_pbc *pbc,const t_graph *g,
812             real lambda,real *dvdlambda,
813             const t_mdatoms *md,t_fcdata *fcd,
814             int *global_atom_index)
815 {
816     int  i,ai,aj,ak,t1,t2,type;
817     rvec r_ij,r_kj;
818     real cos_theta,cos_theta2,theta,dVdt,va,vtot;
819     ivec jt,dt_ij,dt_kj;
820
821     vtot = 0.0;
822     for(i=0; i<nbonds; )
823     {
824         type = forceatoms[i++];
825         ai   = forceatoms[i++];
826         aj   = forceatoms[i++];
827         ak   = forceatoms[i++];
828
829         theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
830                             r_ij,r_kj,&cos_theta,&t1,&t2);      /*  41          */
831   
832         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
833                                forceparams[type].harmonic.krB,
834                                forceparams[type].harmonic.rA*DEG2RAD,
835                                forceparams[type].harmonic.rB*DEG2RAD,
836                                theta,lambda,&va,&dVdt);  /*  21  */
837         vtot += va;
838
839         cos_theta2 = sqr(cos_theta);
840         if (cos_theta2 < 1)
841         {
842             int  m;
843             real st,sth;
844             real cik,cii,ckk;
845             real nrkj2,nrij2;
846             real nrkj_1,nrij_1;
847             rvec f_i,f_j,f_k;
848
849             st  = dVdt*gmx_invsqrt(1 - cos_theta2);     /*  12          */
850             sth = st*cos_theta;                 /*   1          */
851 #ifdef DEBUG
852             if (debug)
853                 fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
854                         theta*RAD2DEG,va,dVdt);
855 #endif
856             nrij2 = iprod(r_ij,r_ij);                   /*   5          */
857             nrkj2 = iprod(r_kj,r_kj);                   /*   5          */
858
859             nrij_1 = gmx_invsqrt(nrij2);                /*  10          */
860             nrkj_1 = gmx_invsqrt(nrkj2);                /*  10          */
861
862             cik = st*nrij_1*nrkj_1;                     /*   2          */
863             cii = sth*nrij_1*nrij_1;                    /*   2          */
864             ckk = sth*nrkj_1*nrkj_1;                    /*   2          */
865       
866             for (m=0; m<DIM; m++)
867             {                   /*  39          */
868                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
869                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
870                 f_j[m]    = -f_i[m] - f_k[m];
871                 f[ai][m] += f_i[m];
872                 f[aj][m] += f_j[m];
873                 f[ak][m] += f_k[m];
874             }
875             if (g != NULL)
876             {
877                 copy_ivec(SHIFT_IVEC(g,aj),jt);
878
879                 ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
880                 ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
881                 t1 = IVEC2IS(dt_ij);
882                 t2 = IVEC2IS(dt_kj);
883             }
884             rvec_inc(fshift[t1],f_i);
885             rvec_inc(fshift[CENTRAL],f_j);
886             rvec_inc(fshift[t2],f_k);
887         }                                           /* 161 TOTAL        */
888     }
889
890     return vtot;
891 }
892
893 real linear_angles(int nbonds,
894                    const t_iatom forceatoms[],const t_iparams forceparams[],
895                    const rvec x[],rvec f[],rvec fshift[],
896                    const t_pbc *pbc,const t_graph *g,
897                    real lambda,real *dvdlambda,
898                    const t_mdatoms *md,t_fcdata *fcd,
899                    int *global_atom_index)
900 {
901   int  i,m,ai,aj,ak,t1,t2,type;
902   rvec f_i,f_j,f_k;
903   real L1,kA,kB,aA,aB,dr,dr2,va,vtot,a,b,klin;
904   ivec jt,dt_ij,dt_kj;
905   rvec r_ij,r_kj,r_ik,dx;
906     
907   L1   = 1-lambda;
908   vtot = 0.0;
909   for(i=0; (i<nbonds); ) {
910     type = forceatoms[i++];
911     ai   = forceatoms[i++];
912     aj   = forceatoms[i++];
913     ak   = forceatoms[i++];
914     
915     kA = forceparams[type].linangle.klinA;
916     kB = forceparams[type].linangle.klinB;
917     klin = L1*kA + lambda*kB;
918     
919     aA   = forceparams[type].linangle.aA;
920     aB   = forceparams[type].linangle.aB;
921     a    = L1*aA+lambda*aB;
922     b    = 1-a;
923     
924     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
925     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
926     rvec_sub(r_ij,r_kj,r_ik);
927     
928     dr2 = 0;
929     for(m=0; (m<DIM); m++) 
930     {
931         dr     = - a * r_ij[m] - b * r_kj[m];
932         dr2   += dr*dr;
933         dx[m]  = dr;
934         f_i[m] = a*klin*dr;
935         f_k[m] = b*klin*dr;
936         f_j[m] = -(f_i[m]+f_k[m]);
937         f[ai][m] += f_i[m];
938         f[aj][m] += f_j[m];
939         f[ak][m] += f_k[m];
940     }
941     va    = 0.5*klin*dr2;
942     *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx,r_ik); 
943             
944     vtot += va;
945     
946     if (g) {
947         copy_ivec(SHIFT_IVEC(g,aj),jt);
948       
949         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
950         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
951         t1=IVEC2IS(dt_ij);
952         t2=IVEC2IS(dt_kj);
953     }
954     rvec_inc(fshift[t1],f_i);
955     rvec_inc(fshift[CENTRAL],f_j);
956     rvec_inc(fshift[t2],f_k);
957   }                                           /* 57 TOTAL       */
958   return vtot;
959 }
960
961 real urey_bradley(int nbonds,
962                   const t_iatom forceatoms[],const t_iparams forceparams[],
963                   const rvec x[],rvec f[],rvec fshift[],
964                   const t_pbc *pbc,const t_graph *g,
965                   real lambda,real *dvdlambda,
966                   const t_mdatoms *md,t_fcdata *fcd,
967                   int *global_atom_index)
968 {
969   int  i,m,ai,aj,ak,t1,t2,type,ki;
970   rvec r_ij,r_kj,r_ik;
971   real cos_theta,cos_theta2,theta;
972   real dVdt,va,vtot,dr,dr2,vbond,fbond,fik;
973   real kthA,th0A,kUBA,r13A,kthB,th0B,kUBB,r13B;
974   ivec jt,dt_ij,dt_kj,dt_ik;
975   
976   vtot = 0.0;
977   for(i=0; (i<nbonds); ) {
978     type = forceatoms[i++];
979     ai   = forceatoms[i++];
980     aj   = forceatoms[i++];
981     ak   = forceatoms[i++];
982     th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
983     kthA  = forceparams[type].u_b.kthetaA;
984     r13A  = forceparams[type].u_b.r13A;
985     kUBA  = forceparams[type].u_b.kUBA;
986     th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
987     kthB  = forceparams[type].u_b.kthetaB;
988     r13B  = forceparams[type].u_b.r13B;
989     kUBB  = forceparams[type].u_b.kUBB;
990     
991     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
992                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
993   
994     *dvdlambda += harmonic(kthA,kthB,th0A,th0B,theta,lambda,&va,&dVdt);  /*  21  */
995     vtot += va;
996     
997     ki   = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);  /*   3          */
998     dr2  = iprod(r_ik,r_ik);                    /*   5          */
999     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
1000
1001     *dvdlambda += harmonic(kUBA,kUBB,r13A,r13B,dr,lambda,&vbond,&fbond); /*  19  */
1002
1003     cos_theta2 = sqr(cos_theta);                /*   1          */
1004     if (cos_theta2 < 1) {
1005       real st,sth;
1006       real cik,cii,ckk;
1007       real nrkj2,nrij2;
1008       rvec f_i,f_j,f_k;
1009       
1010       st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
1011       sth = st*cos_theta;                       /*   1          */
1012 #ifdef DEBUG
1013       if (debug)
1014         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1015                 theta*RAD2DEG,va,dVdt);
1016 #endif
1017       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
1018       nrij2=iprod(r_ij,r_ij);
1019       
1020       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */ 
1021       cii=sth/nrij2;                            /*  10          */
1022       ckk=sth/nrkj2;                            /*  10          */
1023       
1024       for (m=0; (m<DIM); m++) {                 /*  39          */
1025         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
1026         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
1027         f_j[m]=-f_i[m]-f_k[m];
1028         f[ai][m]+=f_i[m];
1029         f[aj][m]+=f_j[m];
1030         f[ak][m]+=f_k[m];
1031       }
1032       if (g) {
1033         copy_ivec(SHIFT_IVEC(g,aj),jt);
1034       
1035         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
1036         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
1037         t1=IVEC2IS(dt_ij);
1038         t2=IVEC2IS(dt_kj);
1039       }
1040       rvec_inc(fshift[t1],f_i);
1041       rvec_inc(fshift[CENTRAL],f_j);
1042       rvec_inc(fshift[t2],f_k);
1043     }                                           /* 161 TOTAL    */
1044     /* Time for the bond calculations */
1045     if (dr2 == 0.0)
1046       continue;
1047
1048     vtot  += vbond;  /* 1*/
1049     fbond *= gmx_invsqrt(dr2);                  /*   6          */
1050     
1051     if (g) {
1052       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),dt_ik);
1053       ki=IVEC2IS(dt_ik);
1054     }
1055     for (m=0; (m<DIM); m++) {                   /*  15          */
1056       fik=fbond*r_ik[m];
1057       f[ai][m]+=fik;
1058       f[ak][m]-=fik;
1059       fshift[ki][m]+=fik;
1060       fshift[CENTRAL][m]-=fik;
1061     }
1062   }
1063   return vtot;
1064 }
1065
1066 real quartic_angles(int nbonds,
1067                     const t_iatom forceatoms[],const t_iparams forceparams[],
1068                     const rvec x[],rvec f[],rvec fshift[],
1069                     const t_pbc *pbc,const t_graph *g,
1070                     real lambda,real *dvdlambda,
1071                     const t_mdatoms *md,t_fcdata *fcd,
1072                     int *global_atom_index)
1073 {
1074   int  i,j,ai,aj,ak,t1,t2,type;
1075   rvec r_ij,r_kj;
1076   real cos_theta,cos_theta2,theta,dt,dVdt,va,dtp,c,vtot;
1077   ivec jt,dt_ij,dt_kj;
1078   
1079   vtot = 0.0;
1080   for(i=0; (i<nbonds); ) {
1081     type = forceatoms[i++];
1082     ai   = forceatoms[i++];
1083     aj   = forceatoms[i++];
1084     ak   = forceatoms[i++];
1085
1086     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
1087                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
1088
1089     dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2          */
1090
1091     dVdt = 0;
1092     va = forceparams[type].qangle.c[0];
1093     dtp = 1.0;
1094     for(j=1; j<=4; j++) {
1095       c = forceparams[type].qangle.c[j];
1096       dVdt -= j*c*dtp;
1097       dtp *= dt;
1098       va += c*dtp;
1099     }
1100     /* 20 */
1101
1102     vtot += va;
1103     
1104     cos_theta2 = sqr(cos_theta);                /*   1          */
1105     if (cos_theta2 < 1) {
1106       int  m;
1107       real st,sth;
1108       real cik,cii,ckk;
1109       real nrkj2,nrij2;
1110       rvec f_i,f_j,f_k;
1111       
1112       st  = dVdt*gmx_invsqrt(1 - cos_theta2);           /*  12          */
1113       sth = st*cos_theta;                       /*   1          */
1114 #ifdef DEBUG
1115       if (debug)
1116         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1117                 theta*RAD2DEG,va,dVdt);
1118 #endif
1119       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
1120       nrij2=iprod(r_ij,r_ij);
1121       
1122       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */ 
1123       cii=sth/nrij2;                            /*  10          */
1124       ckk=sth/nrkj2;                            /*  10          */
1125       
1126       for (m=0; (m<DIM); m++) {                 /*  39          */
1127         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
1128         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
1129         f_j[m]=-f_i[m]-f_k[m];
1130         f[ai][m]+=f_i[m];
1131         f[aj][m]+=f_j[m];
1132         f[ak][m]+=f_k[m];
1133       }
1134       if (g) {
1135         copy_ivec(SHIFT_IVEC(g,aj),jt);
1136       
1137         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
1138         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
1139         t1=IVEC2IS(dt_ij);
1140         t2=IVEC2IS(dt_kj);
1141       }
1142       rvec_inc(fshift[t1],f_i);
1143       rvec_inc(fshift[CENTRAL],f_j);
1144       rvec_inc(fshift[t2],f_k);
1145     }                                           /* 153 TOTAL    */
1146   }
1147   return vtot;
1148 }
1149
1150 real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
1151                const t_pbc *pbc,
1152                rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n,
1153                real *sign,int *t1,int *t2,int *t3)
1154 {
1155   real ipr,phi;
1156
1157   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
1158   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
1159   *t3 = pbc_rvec_sub(pbc,xk,xl,r_kl);                   /*  3           */
1160
1161   cprod(r_ij,r_kj,m);                   /*  9           */
1162   cprod(r_kj,r_kl,n);                   /*  9           */
1163   phi=gmx_angle(m,n);                   /* 49 (assuming 25 for atan2) */
1164   ipr=iprod(r_ij,n);                    /*  5           */
1165   (*sign)=(ipr<0.0)?-1.0:1.0;
1166   phi=(*sign)*phi;                      /*  1           */
1167                                         /* 82 TOTAL     */
1168   return phi;
1169 }
1170
1171
1172 #ifdef SSE_PROPER_DIHEDRALS
1173
1174 /* x86 SIMD inner-product of 4 float vectors */
1175 #define GMX_MM_IPROD_PS(ax,ay,az,bx,by,bz)                 \
1176     _mm_add_ps(_mm_add_ps(_mm_mul_ps(ax,bx),_mm_mul_ps(ay,by)),_mm_mul_ps(az,bz))
1177
1178 /* x86 SIMD norm^2 of 4 float vectors */
1179 #define GMX_MM_NORM2_PS(ax,ay,az) GMX_MM_IPROD_PS(ax,ay,az,ax,ay,az)
1180
1181 /* x86 SIMD cross-product of 4 float vectors */
1182 #define GMX_MM_CPROD_PS(ax,ay,az,bx,by,bz,cx,cy,cz)        \
1183 {                                                          \
1184     cx = _mm_sub_ps(_mm_mul_ps(ay,bz),_mm_mul_ps(az,by));  \
1185     cy = _mm_sub_ps(_mm_mul_ps(az,bx),_mm_mul_ps(ax,bz));  \
1186     cz = _mm_sub_ps(_mm_mul_ps(ax,by),_mm_mul_ps(ay,bx));  \
1187 }
1188
1189 /* load 4 rvec's into 3 x86 SIMD float registers */
1190 #define load_rvec4(r0,r1,r2,r3,rx_SSE,ry_SSE,rz_SSE)          \
1191 {                                                             \
1192     __m128 tmp;                                               \
1193     rx_SSE = _mm_load_ps(r0);                                 \
1194     ry_SSE = _mm_load_ps(r1);                                 \
1195     rz_SSE = _mm_load_ps(r2);                                 \
1196     tmp    = _mm_load_ps(r3);                                 \
1197     _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
1198 }
1199
1200 #define store_rvec4(rx_SSE,ry_SSE,rz_SSE,r0,r1,r2,r3)         \
1201 {                                                             \
1202     __m128 tmp=_mm_setzero_ps();                              \
1203     _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
1204     _mm_store_ps(r0,rx_SSE);                                  \
1205     _mm_store_ps(r1,ry_SSE);                                  \
1206     _mm_store_ps(r2,rz_SSE);                                  \
1207     _mm_store_ps(r3,tmp   );                                  \
1208 }
1209
1210 /* An rvec in a structure which can be allocated 16-byte aligned */
1211 typedef struct {
1212     rvec  v;
1213     float f;
1214 } rvec_sse_t;
1215
1216 /* As dih_angle above, but calculates 4 dihedral angles at once using SSE,
1217  * also calculates the pre-factor required for the dihedral force update.
1218  * Note that bv and buf should be 16-byte aligned.
1219  */
1220 static void
1221 dih_angle_sse(const rvec *x,
1222               int ai[4],int aj[4],int ak[4],int al[4],
1223               const t_pbc *pbc,
1224               int t1[4],int t2[4],int t3[4],
1225               rvec_sse_t *bv,
1226               real *buf)
1227 {
1228     int s;
1229     __m128 rijx_SSE,rijy_SSE,rijz_SSE;
1230     __m128 rkjx_SSE,rkjy_SSE,rkjz_SSE;
1231     __m128 rklx_SSE,rkly_SSE,rklz_SSE;
1232     __m128 mx_SSE,my_SSE,mz_SSE;
1233     __m128 nx_SSE,ny_SSE,nz_SSE;
1234     __m128 cx_SSE,cy_SSE,cz_SSE;
1235     __m128 cn_SSE;
1236     __m128 s_SSE;
1237     __m128 phi_SSE;
1238     __m128 ipr_SSE;
1239     int signs;
1240     __m128 iprm_SSE,iprn_SSE;
1241     __m128 nrkj2_SSE,nrkj_1_SSE,nrkj_2_SSE,nrkj_SSE;
1242     __m128 nrkj_m2_SSE,nrkj_n2_SSE;
1243     __m128 p_SSE,q_SSE;
1244     __m128 fmin_SSE=_mm_set1_ps(GMX_FLOAT_MIN);
1245
1246     for(s=0; s<4; s++)
1247     {
1248         t1[s] = pbc_rvec_sub(pbc,x[ai[s]],x[aj[s]],bv[0+s].v);
1249         t2[s] = pbc_rvec_sub(pbc,x[ak[s]],x[aj[s]],bv[4+s].v);
1250         t3[s] = pbc_rvec_sub(pbc,x[ak[s]],x[al[s]],bv[8+s].v);
1251     }
1252
1253     load_rvec4(bv[0].v,bv[1].v,bv[2].v,bv[3].v,rijx_SSE,rijy_SSE,rijz_SSE);
1254     load_rvec4(bv[4].v,bv[5].v,bv[6].v,bv[7].v,rkjx_SSE,rkjy_SSE,rkjz_SSE);
1255     load_rvec4(bv[8].v,bv[9].v,bv[10].v,bv[11].v,rklx_SSE,rkly_SSE,rklz_SSE);
1256
1257     GMX_MM_CPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1258                     rkjx_SSE,rkjy_SSE,rkjz_SSE,
1259                     mx_SSE,my_SSE,mz_SSE);
1260
1261     GMX_MM_CPROD_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE,
1262                     rklx_SSE,rkly_SSE,rklz_SSE,
1263                     nx_SSE,ny_SSE,nz_SSE);
1264
1265     GMX_MM_CPROD_PS(mx_SSE,my_SSE,mz_SSE,
1266                     nx_SSE,ny_SSE,nz_SSE,
1267                     cx_SSE,cy_SSE,cz_SSE);
1268
1269     cn_SSE = gmx_mm_sqrt_ps(GMX_MM_NORM2_PS(cx_SSE,cy_SSE,cz_SSE));
1270     
1271     s_SSE = GMX_MM_IPROD_PS(mx_SSE,my_SSE,mz_SSE,nx_SSE,ny_SSE,nz_SSE);
1272
1273     phi_SSE = gmx_mm_atan2_ps(cn_SSE,s_SSE);
1274     _mm_store_ps(buf+16,phi_SSE);
1275
1276     ipr_SSE = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1277                               nx_SSE,ny_SSE,nz_SSE);
1278
1279     signs = _mm_movemask_ps(ipr_SSE);
1280     
1281     for(s=0; s<4; s++)
1282     {
1283         if (signs & (1<<s))
1284         {
1285             buf[16+s] = -buf[16+s];
1286         }
1287     }
1288
1289     iprm_SSE    = GMX_MM_NORM2_PS(mx_SSE,my_SSE,mz_SSE);
1290     iprn_SSE    = GMX_MM_NORM2_PS(nx_SSE,ny_SSE,nz_SSE);
1291
1292     /* store_rvec4 messes with the input, don't use it after this! */
1293     store_rvec4(mx_SSE,my_SSE,mz_SSE,bv[0].v,bv[1].v,bv[2].v,bv[3].v);
1294     store_rvec4(nx_SSE,ny_SSE,nz_SSE,bv[4].v,bv[5].v,bv[6].v,bv[7].v);
1295
1296     nrkj2_SSE   = GMX_MM_NORM2_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE);
1297
1298     /* Avoid division by zero. When zero, the result is multiplied by 0
1299      * anyhow, so the 3 max below do not affect the final result.
1300      */
1301     nrkj2_SSE   = _mm_max_ps(nrkj2_SSE,fmin_SSE);
1302     nrkj_1_SSE  = gmx_mm_invsqrt_ps(nrkj2_SSE);
1303     nrkj_2_SSE  = _mm_mul_ps(nrkj_1_SSE,nrkj_1_SSE);
1304     nrkj_SSE    = _mm_mul_ps(nrkj2_SSE,nrkj_1_SSE);
1305
1306     iprm_SSE    = _mm_max_ps(iprm_SSE,fmin_SSE);
1307     iprn_SSE    = _mm_max_ps(iprn_SSE,fmin_SSE);
1308     nrkj_m2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprm_SSE));
1309     nrkj_n2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprn_SSE));
1310
1311     _mm_store_ps(buf+0,nrkj_m2_SSE);
1312     _mm_store_ps(buf+4,nrkj_n2_SSE);
1313
1314     p_SSE       = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1315                                   rkjx_SSE,rkjy_SSE,rkjz_SSE);
1316     p_SSE       = _mm_mul_ps(p_SSE,nrkj_2_SSE);
1317
1318     q_SSE       = GMX_MM_IPROD_PS(rklx_SSE,rkly_SSE,rklz_SSE,
1319                                   rkjx_SSE,rkjy_SSE,rkjz_SSE);
1320     q_SSE       = _mm_mul_ps(q_SSE,nrkj_2_SSE);
1321
1322     _mm_store_ps(buf+8 ,p_SSE);
1323     _mm_store_ps(buf+12,q_SSE);
1324 }
1325
1326 #endif /* SSE_PROPER_DIHEDRALS */
1327
1328
1329 void do_dih_fup(int i,int j,int k,int l,real ddphi,
1330                 rvec r_ij,rvec r_kj,rvec r_kl,
1331                 rvec m,rvec n,rvec f[],rvec fshift[],
1332                 const t_pbc *pbc,const t_graph *g,
1333                 const rvec x[],int t1,int t2,int t3)
1334 {
1335   /* 143 FLOPS */
1336   rvec f_i,f_j,f_k,f_l;
1337   rvec uvec,vvec,svec,dx_jl;
1338   real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
1339   real a,b,p,q,toler;
1340   ivec jt,dt_ij,dt_kj,dt_lj;  
1341   
1342   iprm  = iprod(m,m);           /*  5   */
1343   iprn  = iprod(n,n);           /*  5   */
1344   nrkj2 = iprod(r_kj,r_kj);     /*  5   */
1345   toler = nrkj2*GMX_REAL_EPS;
1346   if ((iprm > toler) && (iprn > toler)) {
1347     nrkj_1 = gmx_invsqrt(nrkj2);        /* 10   */
1348     nrkj_2 = nrkj_1*nrkj_1;     /*  1   */
1349     nrkj  = nrkj2*nrkj_1;       /*  1   */
1350     a     = -ddphi*nrkj/iprm;   /* 11   */
1351     svmul(a,m,f_i);             /*  3   */
1352     b     = ddphi*nrkj/iprn;    /* 11   */
1353     svmul(b,n,f_l);             /*  3   */
1354     p     = iprod(r_ij,r_kj);   /*  5   */
1355     p    *= nrkj_2;             /*  1   */
1356     q     = iprod(r_kl,r_kj);   /*  5   */
1357     q    *= nrkj_2;             /*  1   */
1358     svmul(p,f_i,uvec);          /*  3   */
1359     svmul(q,f_l,vvec);          /*  3   */
1360     rvec_sub(uvec,vvec,svec);   /*  3   */
1361     rvec_sub(f_i,svec,f_j);     /*  3   */
1362     rvec_add(f_l,svec,f_k);     /*  3   */
1363     rvec_inc(f[i],f_i);         /*  3   */
1364     rvec_dec(f[j],f_j);         /*  3   */
1365     rvec_dec(f[k],f_k);         /*  3   */
1366     rvec_inc(f[l],f_l);         /*  3   */
1367     
1368     if (g) {
1369       copy_ivec(SHIFT_IVEC(g,j),jt);
1370       ivec_sub(SHIFT_IVEC(g,i),jt,dt_ij);
1371       ivec_sub(SHIFT_IVEC(g,k),jt,dt_kj);
1372       ivec_sub(SHIFT_IVEC(g,l),jt,dt_lj);
1373       t1=IVEC2IS(dt_ij);
1374       t2=IVEC2IS(dt_kj);
1375       t3=IVEC2IS(dt_lj);
1376     } else if (pbc) {
1377       t3 = pbc_rvec_sub(pbc,x[l],x[j],dx_jl);
1378     } else {
1379       t3 = CENTRAL;
1380     }
1381     
1382     rvec_inc(fshift[t1],f_i);
1383     rvec_dec(fshift[CENTRAL],f_j);
1384     rvec_dec(fshift[t2],f_k);
1385     rvec_inc(fshift[t3],f_l);
1386   }
1387   /* 112 TOTAL  */
1388 }
1389
1390 /* As do_dih_fup above, but without shift forces */
1391 static void
1392 do_dih_fup_noshiftf(int i,int j,int k,int l,real ddphi,
1393                     rvec r_ij,rvec r_kj,rvec r_kl,
1394                     rvec m,rvec n,rvec f[])
1395 {
1396   rvec f_i,f_j,f_k,f_l;
1397   rvec uvec,vvec,svec,dx_jl;
1398   real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
1399   real a,b,p,q,toler;
1400   ivec jt,dt_ij,dt_kj,dt_lj;  
1401   
1402   iprm  = iprod(m,m);           /*  5   */
1403   iprn  = iprod(n,n);           /*  5   */
1404   nrkj2 = iprod(r_kj,r_kj);     /*  5   */
1405   toler = nrkj2*GMX_REAL_EPS;
1406   if ((iprm > toler) && (iprn > toler)) {
1407     nrkj_1 = gmx_invsqrt(nrkj2);        /* 10   */
1408     nrkj_2 = nrkj_1*nrkj_1;     /*  1   */
1409     nrkj  = nrkj2*nrkj_1;       /*  1   */
1410     a     = -ddphi*nrkj/iprm;   /* 11   */
1411     svmul(a,m,f_i);             /*  3   */
1412     b     = ddphi*nrkj/iprn;    /* 11   */
1413     svmul(b,n,f_l);             /*  3   */
1414     p     = iprod(r_ij,r_kj);   /*  5   */
1415     p    *= nrkj_2;             /*  1   */
1416     q     = iprod(r_kl,r_kj);   /*  5   */
1417     q    *= nrkj_2;             /*  1   */
1418     svmul(p,f_i,uvec);          /*  3   */
1419     svmul(q,f_l,vvec);          /*  3   */
1420     rvec_sub(uvec,vvec,svec);   /*  3   */
1421     rvec_sub(f_i,svec,f_j);     /*  3   */
1422     rvec_add(f_l,svec,f_k);     /*  3   */
1423     rvec_inc(f[i],f_i);         /*  3   */
1424     rvec_dec(f[j],f_j);         /*  3   */
1425     rvec_dec(f[k],f_k);         /*  3   */
1426     rvec_inc(f[l],f_l);         /*  3   */
1427   }
1428 }
1429
1430 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1431 static void
1432 do_dih_fup_noshiftf_precalc(int i,int j,int k,int l,real ddphi,
1433                             real nrkj_m2,real nrkj_n2,
1434                             real p,real q,
1435                             rvec m,rvec n,rvec f[])
1436 {
1437     rvec f_i,f_j,f_k,f_l;
1438     rvec uvec,vvec,svec,dx_jl;
1439     real a,b,toler;
1440     ivec jt,dt_ij,dt_kj,dt_lj;  
1441   
1442     a = -ddphi*nrkj_m2;
1443     svmul(a,m,f_i);
1444     b =  ddphi*nrkj_n2;
1445     svmul(b,n,f_l);
1446     svmul(p,f_i,uvec);
1447     svmul(q,f_l,vvec);
1448     rvec_sub(uvec,vvec,svec);
1449     rvec_sub(f_i,svec,f_j);
1450     rvec_add(f_l,svec,f_k);
1451     rvec_inc(f[i],f_i);
1452     rvec_dec(f[j],f_j);
1453     rvec_dec(f[k],f_k);
1454     rvec_inc(f[l],f_l);
1455 }
1456
1457
1458 real dopdihs(real cpA,real cpB,real phiA,real phiB,int mult,
1459              real phi,real lambda,real *V,real *F)
1460 {
1461   real v,dvdlambda,mdphi,v1,sdphi,ddphi;
1462   real L1   = 1.0 - lambda;
1463   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1464   real dph0 = (phiB - phiA)*DEG2RAD;
1465   real cp   = L1*cpA + lambda*cpB;
1466   
1467   mdphi =  mult*phi - ph0;
1468   sdphi = sin(mdphi);
1469   ddphi = -cp*mult*sdphi;
1470   v1    = 1.0 + cos(mdphi);
1471   v     = cp*v1;
1472   
1473   dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1474   
1475   *V = v;
1476   *F = ddphi;
1477   
1478   return dvdlambda;
1479   
1480   /* That was 40 flops */
1481 }
1482
1483 static void
1484 dopdihs_noener(real cpA,real cpB,real phiA,real phiB,int mult,
1485                real phi,real lambda,real *F)
1486 {
1487   real mdphi,sdphi,ddphi;
1488   real L1   = 1.0 - lambda;
1489   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1490   real cp   = L1*cpA + lambda*cpB;
1491   
1492   mdphi = mult*phi - ph0;
1493   sdphi = sin(mdphi);
1494   ddphi = -cp*mult*sdphi;
1495   
1496   *F = ddphi;
1497   
1498   /* That was 20 flops */
1499 }
1500
1501 static void
1502 dopdihs_mdphi(real cpA,real cpB,real phiA,real phiB,int mult,
1503               real phi,real lambda,real *cp,real *mdphi)
1504 {
1505     real L1   = 1.0 - lambda;
1506     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1507
1508     *cp    = L1*cpA + lambda*cpB;
1509
1510     *mdphi = mult*phi - ph0;
1511 }
1512
1513 static real dopdihs_min(real cpA,real cpB,real phiA,real phiB,int mult,
1514                         real phi,real lambda,real *V,real *F)
1515      /* similar to dopdihs, except for a minus sign  *
1516       * and a different treatment of mult/phi0       */
1517 {
1518   real v,dvdlambda,mdphi,v1,sdphi,ddphi;
1519   real L1   = 1.0 - lambda;
1520   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1521   real dph0 = (phiB - phiA)*DEG2RAD;
1522   real cp   = L1*cpA + lambda*cpB;
1523   
1524   mdphi = mult*(phi-ph0);
1525   sdphi = sin(mdphi);
1526   ddphi = cp*mult*sdphi;
1527   v1    = 1.0-cos(mdphi);
1528   v     = cp*v1;
1529   
1530   dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1531   
1532   *V = v;
1533   *F = ddphi;
1534   
1535   return dvdlambda;
1536   
1537   /* That was 40 flops */
1538 }
1539
1540 real pdihs(int nbonds,
1541            const t_iatom forceatoms[],const t_iparams forceparams[],
1542            const rvec x[],rvec f[],rvec fshift[],
1543            const t_pbc *pbc,const t_graph *g,
1544            real lambda,real *dvdlambda,
1545            const t_mdatoms *md,t_fcdata *fcd,
1546            int *global_atom_index)
1547 {
1548   int  i,type,ai,aj,ak,al;
1549   int  t1,t2,t3;
1550   rvec r_ij,r_kj,r_kl,m,n;
1551   real phi,sign,ddphi,vpd,vtot;
1552
1553   vtot = 0.0;
1554
1555   for(i=0; (i<nbonds); ) {
1556     type = forceatoms[i++];
1557     ai   = forceatoms[i++];
1558     aj   = forceatoms[i++];
1559     ak   = forceatoms[i++];
1560     al   = forceatoms[i++];
1561     
1562     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1563                   &sign,&t1,&t2,&t3);                   /*  84          */
1564     *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1565                           forceparams[type].pdihs.cpB,
1566                           forceparams[type].pdihs.phiA,
1567                           forceparams[type].pdihs.phiB,
1568                           forceparams[type].pdihs.mult,
1569                           phi,lambda,&vpd,&ddphi);
1570
1571     vtot += vpd;
1572     do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
1573                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
1574
1575 #ifdef DEBUG
1576     fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
1577             ai,aj,ak,al,phi);
1578 #endif
1579   } /* 223 TOTAL        */
1580
1581   return vtot;
1582 }
1583
1584 void make_dp_periodic(real *dp)  /* 1 flop? */
1585 {
1586     /* dp cannot be outside (-pi,pi) */
1587     if (*dp >= M_PI)
1588     {
1589         *dp -= 2*M_PI;
1590     }
1591     else if (*dp < -M_PI)
1592     {
1593         *dp += 2*M_PI;
1594     }
1595     return;
1596 }
1597
1598 /* As pdihs above, but without calculating energies and shift forces */
1599 static void
1600 pdihs_noener(int nbonds,
1601              const t_iatom forceatoms[],const t_iparams forceparams[],
1602              const rvec x[],rvec f[],
1603              const t_pbc *pbc,const t_graph *g,
1604              real lambda,
1605              const t_mdatoms *md,t_fcdata *fcd,
1606              int *global_atom_index)
1607 {
1608     int  i,type,ai,aj,ak,al;
1609     int  t1,t2,t3;
1610     rvec r_ij,r_kj,r_kl,m,n;
1611     real phi,sign,ddphi_tot,ddphi;
1612
1613     for(i=0; (i<nbonds); )
1614     {
1615         ai   = forceatoms[i+1];
1616         aj   = forceatoms[i+2];
1617         ak   = forceatoms[i+3];
1618         al   = forceatoms[i+4];
1619
1620         phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1621                         &sign,&t1,&t2,&t3);
1622
1623         ddphi_tot = 0;
1624
1625         /* Loop over dihedrals working on the same atoms,
1626          * so we avoid recalculating angles and force distributions.
1627          */
1628         do
1629         {
1630             type = forceatoms[i];
1631             dopdihs_noener(forceparams[type].pdihs.cpA,
1632                            forceparams[type].pdihs.cpB,
1633                            forceparams[type].pdihs.phiA,
1634                            forceparams[type].pdihs.phiB,
1635                            forceparams[type].pdihs.mult,
1636                            phi,lambda,&ddphi);
1637             ddphi_tot += ddphi;
1638
1639             i += 5;
1640         }
1641         while(i < nbonds &&
1642               forceatoms[i+1] == ai &&
1643               forceatoms[i+2] == aj &&
1644               forceatoms[i+3] == ak &&
1645               forceatoms[i+4] == al);
1646
1647         do_dih_fup_noshiftf(ai,aj,ak,al,ddphi_tot,r_ij,r_kj,r_kl,m,n,f);
1648     }
1649 }
1650
1651
1652 #ifdef SSE_PROPER_DIHEDRALS
1653
1654 /* As pdihs_noner above, but using SSE to calculate 4 dihedrals at once */
1655 static void
1656 pdihs_noener_sse(int nbonds,
1657                  const t_iatom forceatoms[],const t_iparams forceparams[],
1658                  const rvec x[],rvec f[],
1659                  const t_pbc *pbc,const t_graph *g,
1660                  real lambda,
1661                  const t_mdatoms *md,t_fcdata *fcd,
1662                  int *global_atom_index)
1663 {
1664     int  i,i4,s;
1665     int  type,ai[4],aj[4],ak[4],al[4];
1666     int  t1[4],t2[4],t3[4];
1667     int  mult[4];
1668     real cp[4],mdphi[4];
1669     real ddphi;
1670     rvec_sse_t rs_array[13],*rs;
1671     real buf_array[24],*buf;
1672     __m128 mdphi_SSE,sin_SSE,cos_SSE;
1673
1674     /* Ensure 16-byte alignment */
1675     rs  = (rvec_sse_t *)(((size_t)(rs_array +1)) & (~((size_t)15)));
1676     buf =      (float *)(((size_t)(buf_array+3)) & (~((size_t)15)));
1677
1678     for(i=0; (i<nbonds); i+=20)
1679     {
1680         /* Collect atoms quadruplets for 4 dihedrals */
1681         i4 = i;
1682         for(s=0; s<4; s++)
1683         {
1684             ai[s] = forceatoms[i4+1];
1685             aj[s] = forceatoms[i4+2];
1686             ak[s] = forceatoms[i4+3];
1687             al[s] = forceatoms[i4+4];
1688             /* At the end fill the arrays with identical entries */
1689             if (i4 + 5 < nbonds)
1690             {
1691                 i4 += 5;
1692             }
1693         }
1694
1695         /* Caclulate 4 dihedral angles at once */
1696         dih_angle_sse(x,ai,aj,ak,al,pbc,t1,t2,t3,rs,buf);
1697
1698         i4 = i;
1699         for(s=0; s<4; s++)
1700         {
1701             if (i4 < nbonds)
1702             {
1703                 /* Calculate the coefficient and angle deviation */
1704                 type = forceatoms[i4];
1705                 dopdihs_mdphi(forceparams[type].pdihs.cpA,
1706                               forceparams[type].pdihs.cpB,
1707                               forceparams[type].pdihs.phiA,
1708                               forceparams[type].pdihs.phiB,
1709                               forceparams[type].pdihs.mult,
1710                               buf[16+s],lambda,&cp[s],&buf[16+s]);
1711                 mult[s] = forceparams[type].pdihs.mult;
1712             }
1713             else
1714             {
1715                 buf[16+s] = 0;
1716             }
1717             i4 += 5;
1718         }
1719
1720         /* Calculate 4 sines at once */
1721         mdphi_SSE = _mm_load_ps(buf+16);
1722         gmx_mm_sincos_ps(mdphi_SSE,&sin_SSE,&cos_SSE);
1723         _mm_store_ps(buf+16,sin_SSE);
1724
1725         i4 = i;
1726         s = 0;
1727         do
1728         {
1729             ddphi = -cp[s]*mult[s]*buf[16+s];
1730
1731             do_dih_fup_noshiftf_precalc(ai[s],aj[s],ak[s],al[s],ddphi,
1732                                         buf[ 0+s],buf[ 4+s],
1733                                         buf[ 8+s],buf[12+s],
1734                                         rs[0+s].v,rs[4+s].v,
1735                                         f);
1736             s++;
1737             i4 += 5;
1738         }
1739         while (s < 4 && i4 < nbonds);
1740     }
1741 }
1742
1743 #endif /* SSE_PROPER_DIHEDRALS */
1744
1745
1746 real idihs(int nbonds,
1747            const t_iatom forceatoms[],const t_iparams forceparams[],
1748            const rvec x[],rvec f[],rvec fshift[],
1749            const t_pbc *pbc,const t_graph *g,
1750            real lambda,real *dvdlambda,
1751            const t_mdatoms *md,t_fcdata *fcd,
1752            int *global_atom_index)
1753 {
1754   int  i,type,ai,aj,ak,al;
1755   int  t1,t2,t3;
1756   real phi,phi0,dphi0,ddphi,sign,vtot;
1757   rvec r_ij,r_kj,r_kl,m,n;
1758   real L1,kk,dp,dp2,kA,kB,pA,pB,dvdl_term;
1759
1760   L1 = 1.0-lambda;
1761   dvdl_term = 0;
1762   vtot = 0.0;
1763   for(i=0; (i<nbonds); ) {
1764     type = forceatoms[i++];
1765     ai   = forceatoms[i++];
1766     aj   = forceatoms[i++];
1767     ak   = forceatoms[i++];
1768     al   = forceatoms[i++];
1769     
1770     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1771                   &sign,&t1,&t2,&t3);                   /*  84          */
1772     
1773     /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
1774      * force changes if we just apply a normal harmonic.
1775      * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
1776      * This means we will never have the periodicity problem, unless
1777      * the dihedral is Pi away from phiO, which is very unlikely due to
1778      * the potential.
1779      */
1780     kA = forceparams[type].harmonic.krA;
1781     kB = forceparams[type].harmonic.krB;
1782     pA = forceparams[type].harmonic.rA;
1783     pB = forceparams[type].harmonic.rB;
1784
1785     kk    = L1*kA + lambda*kB;
1786     phi0  = (L1*pA + lambda*pB)*DEG2RAD;
1787     dphi0 = (pB - pA)*DEG2RAD;
1788
1789     dp = phi-phi0;  
1790
1791     make_dp_periodic(&dp);
1792     
1793     dp2 = dp*dp;
1794
1795     vtot += 0.5*kk*dp2;
1796     ddphi = -kk*dp;
1797     
1798     dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
1799
1800     do_dih_fup(ai,aj,ak,al,(real)(-ddphi),r_ij,r_kj,r_kl,m,n,
1801                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
1802     /* 218 TOTAL        */
1803 #ifdef DEBUG
1804     if (debug)
1805       fprintf(debug,"idih: (%d,%d,%d,%d) phi=%g\n",
1806               ai,aj,ak,al,phi);
1807 #endif
1808   }
1809   
1810   *dvdlambda += dvdl_term;
1811   return vtot;
1812 }
1813
1814
1815 /*! \brief returns dx, rdist, and dpdl for functions posres() and fbposres()        
1816  */
1817 static void posres_dx(const rvec x, const rvec pos0A, const rvec pos0B,
1818                       const rvec comA_sc, const rvec comB_sc,
1819                       real lambda,
1820                       t_pbc *pbc, int refcoord_scaling,int npbcdim,
1821                       rvec dx, rvec rdist, rvec dpdl)
1822 {
1823     int m,d;
1824     real posA, posB, L1, ref=0.;
1825     rvec pos;
1826
1827     L1=1.0-lambda;
1828
1829     for(m=0; m<DIM; m++)
1830     {
1831         posA = pos0A[m];
1832         posB = pos0B[m];
1833         if (m < npbcdim)
1834         {
1835             switch (refcoord_scaling)
1836             {
1837             case erscNO:
1838                 ref      = 0;
1839                 rdist[m] = L1*posA + lambda*posB;
1840                 dpdl[m]  = posB - posA;
1841                     break;
1842             case erscALL:
1843                 /* Box relative coordinates are stored for dimensions with pbc */
1844                 posA *= pbc->box[m][m];
1845                 posB *= pbc->box[m][m];
1846                 for(d=m+1; d<npbcdim; d++)
1847                 {
1848                     posA += pos0A[d]*pbc->box[d][m];
1849                     posB += pos0B[d]*pbc->box[d][m];
1850                 }
1851                 ref      = L1*posA + lambda*posB;
1852                 rdist[m] = 0;
1853                 dpdl[m]  = posB - posA;
1854                 break;
1855             case erscCOM:
1856                 ref      = L1*comA_sc[m] + lambda*comB_sc[m];
1857                 rdist[m] = L1*posA       + lambda*posB;
1858                 dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
1859                 break;
1860             default:
1861                 gmx_fatal(FARGS, "No such scaling method implemented");
1862             }
1863         }
1864         else
1865         {
1866             ref      = L1*posA + lambda*posB;
1867             rdist[m] = 0;
1868             dpdl[m]  = posB - posA;
1869         }
1870
1871         /* We do pbc_dx with ref+rdist,
1872          * since with only ref we can be up to half a box vector wrong.
1873          */
1874         pos[m] = ref + rdist[m];
1875     }
1876
1877     if (pbc)
1878     {
1879         pbc_dx(pbc,x,pos,dx);
1880     }
1881     else
1882     {
1883         rvec_sub(x,pos,dx);
1884     }
1885 }
1886
1887 /*! \brief Adds forces of flat-bottomed positions restraints to f[]
1888  *         and fixes vir_diag. Returns the flat-bottomed potential. */
1889 real fbposres(int nbonds,
1890               const t_iatom forceatoms[],const t_iparams forceparams[],
1891               const rvec x[],rvec f[],rvec vir_diag,
1892               t_pbc *pbc,
1893               int refcoord_scaling,int ePBC,rvec com)
1894 /* compute flat-bottomed positions restraints */
1895 {
1896     int  i,ai,m,d,type,npbcdim=0,fbdim;
1897     const t_iparams *pr;
1898     real vtot,kk,v;
1899     real ref=0,dr,dr2,rpot,rfb,rfb2,fact,invdr;
1900     rvec com_sc,rdist,pos,dx,dpdl,fm;
1901     gmx_bool bInvert;
1902
1903     npbcdim = ePBC2npbcdim(ePBC);
1904
1905     if (refcoord_scaling == erscCOM)
1906     {
1907         clear_rvec(com_sc);
1908         for(m=0; m<npbcdim; m++)
1909         {
1910             for(d=m; d<npbcdim; d++)
1911             {
1912                 com_sc[m] += com[d]*pbc->box[d][m];
1913             }
1914         }
1915     }
1916
1917     vtot = 0.0;
1918     for(i=0; (i<nbonds); )
1919     {
1920         type = forceatoms[i++];
1921         ai   = forceatoms[i++];
1922         pr   = &forceparams[type];
1923
1924         /* same calculation as for normal posres, but with identical A and B states, and lambda==0 */
1925         posres_dx(x[ai],forceparams[type].fbposres.pos0, forceparams[type].fbposres.pos0,
1926                   com_sc, com_sc, 0.0,
1927                   pbc, refcoord_scaling, npbcdim,
1928                   dx, rdist, dpdl);
1929
1930         clear_rvec(fm);
1931         v=0.0;
1932
1933         kk=pr->fbposres.k;
1934         rfb=pr->fbposres.r;
1935         rfb2=sqr(rfb);
1936
1937         /* with rfb<0, push particle out of the sphere/cylinder/layer */
1938         bInvert=FALSE;
1939         if (rfb<0.){
1940             bInvert=TRUE;
1941             rfb=-rfb;
1942         }
1943
1944         switch (pr->fbposres.geom)
1945         {
1946         case efbposresSPHERE:
1947             /* spherical flat-bottom posres */
1948             dr2=norm2(dx);
1949             if ( dr2 > 0.0 &&
1950                  ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
1951                 )
1952             {
1953                 dr=sqrt(dr2);
1954                 v = 0.5*kk*sqr(dr - rfb);
1955                 fact = -kk*(dr-rfb)/dr;  /* Force pointing to the center pos0 */
1956                 svmul(fact,dx,fm);
1957             }
1958             break;
1959         case efbposresCYLINDER:
1960             /* cylidrical flat-bottom posres in x-y plane. fm[ZZ] = 0. */
1961             dr2=sqr(dx[XX])+sqr(dx[YY]);
1962             if  ( dr2 > 0.0 &&
1963                   ( (dr2 > rfb2 && bInvert==FALSE ) || (dr2 < rfb2 && bInvert==TRUE ) )
1964                 )
1965             {
1966                 dr=sqrt(dr2);
1967                 invdr=1./dr;
1968                 v = 0.5*kk*sqr(dr - rfb);
1969                 fm[XX] = -kk*(dr-rfb)*dx[XX]*invdr;  /* Force pointing to the center */
1970                 fm[YY] = -kk*(dr-rfb)*dx[YY]*invdr;
1971             }
1972             break;
1973         case efbposresX: /* fbdim=XX */
1974         case efbposresY: /* fbdim=YY */
1975         case efbposresZ: /* fbdim=ZZ */
1976             /* 1D flat-bottom potential */
1977             fbdim = pr->fbposres.geom - efbposresX;
1978             dr=dx[fbdim];
1979             if ( ( dr>rfb && bInvert==FALSE ) || ( 0<dr && dr<rfb && bInvert==TRUE )  )
1980             {
1981                 v = 0.5*kk*sqr(dr - rfb);
1982                 fm[fbdim] = -kk*(dr - rfb);
1983             }
1984             else if ( (dr < (-rfb) && bInvert==FALSE ) || ( (-rfb)<dr && dr<0 && bInvert==TRUE ))
1985             {
1986                 v = 0.5*kk*sqr(dr + rfb);
1987                 fm[fbdim] = -kk*(dr + rfb);
1988             }
1989             break;
1990         }
1991
1992         vtot += v;
1993
1994         for (m=0; (m<DIM); m++)
1995         {
1996             f[ai][m]   += fm[m];
1997             /* Here we correct for the pbc_dx which included rdist */
1998             vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm[m];
1999         }
2000     }
2001
2002     return vtot;
2003 }
2004
2005
2006 real posres(int nbonds,
2007             const t_iatom forceatoms[],const t_iparams forceparams[],
2008             const rvec x[],rvec f[],rvec vir_diag,
2009             t_pbc *pbc,
2010             real lambda,real *dvdlambda,
2011             int refcoord_scaling,int ePBC,rvec comA,rvec comB)
2012 {
2013     int  i,ai,m,d,type,ki,npbcdim=0;
2014     const t_iparams *pr;
2015     real L1;
2016     real vtot,kk,fm;
2017     real posA,posB,ref=0;
2018     rvec comA_sc,comB_sc,rdist,dpdl,pos,dx;
2019     gmx_bool bForceValid = TRUE;
2020
2021     if ((f==NULL) || (vir_diag==NULL)) {  /* should both be null together! */
2022         bForceValid = FALSE;
2023     }
2024
2025     npbcdim = ePBC2npbcdim(ePBC);
2026
2027     if (refcoord_scaling == erscCOM)
2028     {
2029         clear_rvec(comA_sc);
2030         clear_rvec(comB_sc);
2031         for(m=0; m<npbcdim; m++)
2032         {
2033             for(d=m; d<npbcdim; d++)
2034             {
2035                 comA_sc[m] += comA[d]*pbc->box[d][m];
2036                 comB_sc[m] += comB[d]*pbc->box[d][m];
2037             }
2038         }
2039     }
2040
2041     L1 = 1.0 - lambda;
2042
2043     vtot = 0.0;
2044     for(i=0; (i<nbonds); )
2045     {
2046         type = forceatoms[i++];
2047         ai   = forceatoms[i++];
2048         pr   = &forceparams[type];
2049         
2050         /* return dx, rdist, and dpdl */
2051         posres_dx(x[ai],forceparams[type].posres.pos0A, forceparams[type].posres.pos0B,
2052                   comA_sc, comB_sc, lambda,
2053                   pbc, refcoord_scaling, npbcdim,
2054                   dx, rdist, dpdl);
2055
2056         for (m=0; (m<DIM); m++)
2057         {
2058             kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
2059             fm          = -kk*dx[m];
2060             vtot       += 0.5*kk*dx[m]*dx[m];
2061             *dvdlambda +=
2062                 0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
2063                 -fm*dpdl[m];
2064
2065             /* Here we correct for the pbc_dx which included rdist */
2066             if (bForceValid) {
2067                 f[ai][m]   += fm;
2068                 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
2069             }
2070         }
2071     }
2072
2073     return vtot;
2074 }
2075
2076 static real low_angres(int nbonds,
2077                        const t_iatom forceatoms[],const t_iparams forceparams[],
2078                        const rvec x[],rvec f[],rvec fshift[],
2079                        const t_pbc *pbc,const t_graph *g,
2080                        real lambda,real *dvdlambda,
2081                        gmx_bool bZAxis)
2082 {
2083   int  i,m,type,ai,aj,ak,al;
2084   int  t1,t2;
2085   real phi,cos_phi,cos_phi2,vid,vtot,dVdphi;
2086   rvec r_ij,r_kl,f_i,f_k={0,0,0};
2087   real st,sth,nrij2,nrkl2,c,cij,ckl;
2088
2089   ivec dt;  
2090   t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
2091
2092   vtot = 0.0;
2093   ak=al=0; /* to avoid warnings */
2094   for(i=0; i<nbonds; ) {
2095     type = forceatoms[i++];
2096     ai   = forceatoms[i++];
2097     aj   = forceatoms[i++];
2098     t1   = pbc_rvec_sub(pbc,x[aj],x[ai],r_ij);                  /*  3           */
2099     if (!bZAxis) {      
2100       ak   = forceatoms[i++];
2101       al   = forceatoms[i++];
2102       t2   = pbc_rvec_sub(pbc,x[al],x[ak],r_kl);           /*  3                */
2103     } else {
2104       r_kl[XX] = 0;
2105       r_kl[YY] = 0;
2106       r_kl[ZZ] = 1;
2107     }
2108
2109     cos_phi = cos_angle(r_ij,r_kl);             /* 25           */
2110     phi     = acos(cos_phi);                    /* 10           */
2111
2112     *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
2113                               forceparams[type].pdihs.cpB,
2114                               forceparams[type].pdihs.phiA,
2115                               forceparams[type].pdihs.phiB,
2116                               forceparams[type].pdihs.mult,
2117                               phi,lambda,&vid,&dVdphi); /*  40  */
2118     
2119     vtot += vid;
2120
2121     cos_phi2 = sqr(cos_phi);                    /*   1          */
2122     if (cos_phi2 < 1) {
2123       st  = -dVdphi*gmx_invsqrt(1 - cos_phi2);      /*  12              */
2124       sth = st*cos_phi;                         /*   1          */
2125       nrij2 = iprod(r_ij,r_ij);                 /*   5          */
2126       nrkl2 = iprod(r_kl,r_kl);                 /*   5          */
2127       
2128       c   = st*gmx_invsqrt(nrij2*nrkl2);                /*  11          */ 
2129       cij = sth/nrij2;                          /*  10          */
2130       ckl = sth/nrkl2;                          /*  10          */
2131       
2132       for (m=0; m<DIM; m++) {                   /*  18+18       */
2133         f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
2134         f[ai][m] += f_i[m];
2135         f[aj][m] -= f_i[m];
2136         if (!bZAxis) {
2137           f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
2138           f[ak][m] += f_k[m];
2139           f[al][m] -= f_k[m];
2140         }
2141       }
2142       
2143       if (g) {
2144         ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
2145         t1=IVEC2IS(dt);
2146       }
2147       rvec_inc(fshift[t1],f_i);
2148       rvec_dec(fshift[CENTRAL],f_i);
2149       if (!bZAxis) {
2150         if (g) {
2151           ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,al),dt);
2152           t2=IVEC2IS(dt);
2153         }
2154         rvec_inc(fshift[t2],f_k);
2155         rvec_dec(fshift[CENTRAL],f_k);
2156       }
2157     }
2158   }
2159
2160   return vtot;  /*  184 / 157 (bZAxis)  total  */
2161 }
2162
2163 real angres(int nbonds,
2164             const t_iatom forceatoms[],const t_iparams forceparams[],
2165             const rvec x[],rvec f[],rvec fshift[],
2166             const t_pbc *pbc,const t_graph *g,
2167             real lambda,real *dvdlambda,
2168             const t_mdatoms *md,t_fcdata *fcd,
2169             int *global_atom_index)
2170 {
2171   return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
2172                     lambda,dvdlambda,FALSE);
2173 }
2174
2175 real angresz(int nbonds,
2176              const t_iatom forceatoms[],const t_iparams forceparams[],
2177              const rvec x[],rvec f[],rvec fshift[],
2178              const t_pbc *pbc,const t_graph *g,
2179              real lambda,real *dvdlambda,
2180              const t_mdatoms *md,t_fcdata *fcd,
2181              int *global_atom_index)
2182 {
2183   return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
2184                     lambda,dvdlambda,TRUE);
2185 }
2186
2187 real dihres(int nbonds,
2188             const t_iatom forceatoms[],const t_iparams forceparams[],
2189             const rvec x[],rvec f[],rvec fshift[],
2190             const t_pbc *pbc,const t_graph *g,
2191             real lambda,real *dvdlambda,
2192             const t_mdatoms *md,t_fcdata *fcd,
2193             int *global_atom_index)
2194 {
2195     real vtot = 0;
2196     int  ai,aj,ak,al,i,k,type,t1,t2,t3;
2197     real phi0A,phi0B,dphiA,dphiB,kfacA,kfacB,phi0,dphi,kfac;
2198     real phi,ddphi,ddp,ddp2,dp,sign,d2r,fc,L1;
2199     rvec r_ij,r_kj,r_kl,m,n;
2200
2201     L1 = 1.0-lambda;
2202
2203     d2r = DEG2RAD;
2204     k   = 0;
2205
2206     for (i=0; (i<nbonds); )
2207     {
2208         type = forceatoms[i++];
2209         ai   = forceatoms[i++];
2210         aj   = forceatoms[i++];
2211         ak   = forceatoms[i++];
2212         al   = forceatoms[i++];
2213
2214         phi0A  = forceparams[type].dihres.phiA*d2r;
2215         dphiA  = forceparams[type].dihres.dphiA*d2r;
2216         kfacA  = forceparams[type].dihres.kfacA;
2217
2218         phi0B  = forceparams[type].dihres.phiB*d2r;
2219         dphiB  = forceparams[type].dihres.dphiB*d2r;
2220         kfacB  = forceparams[type].dihres.kfacB;
2221
2222         phi0  = L1*phi0A + lambda*phi0B;
2223         dphi  = L1*dphiA + lambda*dphiB;
2224         kfac = L1*kfacA + lambda*kfacB;
2225
2226         phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
2227                         &sign,&t1,&t2,&t3);
2228         /* 84 flops */
2229
2230         if (debug)
2231         {
2232             fprintf(debug,"dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2233                     k++,ai,aj,ak,al,phi0,dphi,kfac);
2234         }
2235         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2236          * force changes if we just apply a normal harmonic.
2237          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2238          * This means we will never have the periodicity problem, unless
2239          * the dihedral is Pi away from phiO, which is very unlikely due to
2240          * the potential.
2241          */
2242         dp = phi-phi0;
2243         make_dp_periodic(&dp);
2244
2245         if (dp > dphi)
2246         {
2247             ddp = dp-dphi;
2248         }
2249         else if (dp < -dphi)
2250         {
2251             ddp = dp+dphi;
2252         }
2253         else
2254         {
2255             ddp = 0;
2256         }
2257
2258         if (ddp != 0.0)
2259         {
2260             ddp2 = ddp*ddp;
2261             vtot += 0.5*kfac*ddp2;
2262             ddphi = kfac*ddp;
2263
2264             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2265             /* lambda dependence from changing restraint distances */
2266             if (ddp > 0)
2267             {
2268                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2269             }
2270             else if (ddp < 0)
2271             {
2272                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2273             }
2274             do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
2275                        f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
2276         }
2277     }
2278     return vtot;
2279 }
2280
2281
2282 real unimplemented(int nbonds,
2283                    const t_iatom forceatoms[],const t_iparams forceparams[],
2284                    const rvec x[],rvec f[],rvec fshift[],
2285                    const t_pbc *pbc,const t_graph *g,
2286                    real lambda,real *dvdlambda,
2287                    const t_mdatoms *md,t_fcdata *fcd,
2288                    int *global_atom_index)
2289 {
2290   gmx_impl("*** you are using a not implemented function");
2291
2292   return 0.0; /* To make the compiler happy */
2293 }
2294
2295 real rbdihs(int nbonds,
2296             const t_iatom forceatoms[],const t_iparams forceparams[],
2297             const rvec x[],rvec f[],rvec fshift[],
2298             const t_pbc *pbc,const t_graph *g,
2299             real lambda,real *dvdlambda,
2300             const t_mdatoms *md,t_fcdata *fcd,
2301             int *global_atom_index)
2302 {
2303   const real c0=0.0,c1=1.0,c2=2.0,c3=3.0,c4=4.0,c5=5.0;
2304   int  type,ai,aj,ak,al,i,j;
2305   int  t1,t2,t3;
2306   rvec r_ij,r_kj,r_kl,m,n;
2307   real parmA[NR_RBDIHS];
2308   real parmB[NR_RBDIHS];
2309   real parm[NR_RBDIHS];
2310   real cos_phi,phi,rbp,rbpBA;
2311   real v,sign,ddphi,sin_phi;
2312   real cosfac,vtot;
2313   real L1   = 1.0-lambda;
2314   real dvdl_term=0;
2315
2316   vtot = 0.0;
2317   for(i=0; (i<nbonds); ) {
2318     type = forceatoms[i++];
2319     ai   = forceatoms[i++];
2320     aj   = forceatoms[i++];
2321     ak   = forceatoms[i++];
2322     al   = forceatoms[i++];
2323
2324       phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
2325                     &sign,&t1,&t2,&t3);                 /*  84          */
2326
2327     /* Change to polymer convention */
2328     if (phi < c0)
2329       phi += M_PI;
2330     else
2331       phi -= M_PI;                      /*   1          */
2332       
2333     cos_phi = cos(phi);         
2334     /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2335     sin_phi = sin(phi);
2336
2337     for(j=0; (j<NR_RBDIHS); j++) {
2338       parmA[j] = forceparams[type].rbdihs.rbcA[j];
2339       parmB[j] = forceparams[type].rbdihs.rbcB[j];
2340       parm[j]  = L1*parmA[j]+lambda*parmB[j];
2341     }
2342     /* Calculate cosine powers */
2343     /* Calculate the energy */
2344     /* Calculate the derivative */
2345
2346     v       = parm[0];
2347     dvdl_term   += (parmB[0]-parmA[0]);
2348     ddphi   = c0;
2349     cosfac  = c1;
2350     
2351     rbp     = parm[1];
2352     rbpBA   = parmB[1]-parmA[1];
2353     ddphi  += rbp*cosfac;
2354     cosfac *= cos_phi;
2355     v      += cosfac*rbp;
2356     dvdl_term   += cosfac*rbpBA;
2357     rbp     = parm[2];
2358     rbpBA   = parmB[2]-parmA[2];    
2359     ddphi  += c2*rbp*cosfac;
2360     cosfac *= cos_phi;
2361     v      += cosfac*rbp;
2362     dvdl_term   += cosfac*rbpBA;
2363     rbp     = parm[3];
2364     rbpBA   = parmB[3]-parmA[3];
2365     ddphi  += c3*rbp*cosfac;
2366     cosfac *= cos_phi;
2367     v      += cosfac*rbp;
2368     dvdl_term   += cosfac*rbpBA;
2369     rbp     = parm[4];
2370     rbpBA   = parmB[4]-parmA[4];
2371     ddphi  += c4*rbp*cosfac;
2372     cosfac *= cos_phi;
2373     v      += cosfac*rbp;
2374     dvdl_term   += cosfac*rbpBA;
2375     rbp     = parm[5];
2376     rbpBA   = parmB[5]-parmA[5];
2377     ddphi  += c5*rbp*cosfac;
2378     cosfac *= cos_phi;
2379     v      += cosfac*rbp;
2380     dvdl_term   += cosfac*rbpBA;
2381    
2382     ddphi = -ddphi*sin_phi;                             /*  11          */
2383     
2384     do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
2385                f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
2386     vtot += v;
2387   }  
2388   *dvdlambda += dvdl_term;
2389
2390   return vtot;
2391 }
2392
2393 int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2394 {
2395         int im1, ip1, ip2;
2396         
2397         if(ip<0)
2398         {
2399                 ip = ip + grid_spacing - 1;
2400         }
2401         else if(ip > grid_spacing)
2402         {
2403                 ip = ip - grid_spacing - 1;
2404         }
2405         
2406         im1 = ip - 1;
2407         ip1 = ip + 1;
2408         ip2 = ip + 2;
2409         
2410         if(ip == 0)
2411         {
2412                 im1 = grid_spacing - 1;
2413         }
2414         else if(ip == grid_spacing-2)
2415         {
2416                 ip2 = 0;
2417         }
2418         else if(ip == grid_spacing-1)
2419         {
2420                 ip1 = 0;
2421                 ip2 = 1;
2422         }
2423         
2424         *ipm1 = im1;
2425         *ipp1 = ip1;
2426         *ipp2 = ip2;
2427         
2428         return ip;
2429         
2430 }
2431
2432 real cmap_dihs(int nbonds,
2433                            const t_iatom forceatoms[],const t_iparams forceparams[],
2434                const gmx_cmap_t *cmap_grid,
2435                            const rvec x[],rvec f[],rvec fshift[],
2436                            const t_pbc *pbc,const t_graph *g,
2437                            real lambda,real *dvdlambda,
2438                            const t_mdatoms *md,t_fcdata *fcd,
2439                            int *global_atom_index)
2440 {
2441         int i,j,k,n,idx;
2442         int ai,aj,ak,al,am;
2443         int a1i,a1j,a1k,a1l,a2i,a2j,a2k,a2l;
2444         int type,cmapA;
2445         int t11,t21,t31,t12,t22,t32;
2446         int iphi1,ip1m1,ip1p1,ip1p2;
2447         int iphi2,ip2m1,ip2p1,ip2p2;
2448         int l1,l2,l3,l4;
2449         int pos1,pos2,pos3,pos4,tmp;
2450         
2451         real ty[4],ty1[4],ty2[4],ty12[4],tc[16],tx[16];
2452         real phi1,psi1,cos_phi1,sin_phi1,sign1,xphi1;
2453         real phi2,psi2,cos_phi2,sin_phi2,sign2,xphi2;
2454         real dx,xx,tt,tu,e,df1,df2,ddf1,ddf2,ddf12,vtot;
2455         real ra21,rb21,rg21,rg1,rgr1,ra2r1,rb2r1,rabr1;
2456         real ra22,rb22,rg22,rg2,rgr2,ra2r2,rb2r2,rabr2;
2457         real fg1,hg1,fga1,hgb1,gaa1,gbb1;
2458         real fg2,hg2,fga2,hgb2,gaa2,gbb2;
2459         real fac;
2460         
2461         rvec r1_ij, r1_kj, r1_kl,m1,n1;
2462         rvec r2_ij, r2_kj, r2_kl,m2,n2;
2463         rvec f1_i,f1_j,f1_k,f1_l;
2464         rvec f2_i,f2_j,f2_k,f2_l;
2465         rvec a1,b1,a2,b2;
2466         rvec f1,g1,h1,f2,g2,h2;
2467         rvec dtf1,dtg1,dth1,dtf2,dtg2,dth2;
2468         ivec jt1,dt1_ij,dt1_kj,dt1_lj;
2469         ivec jt2,dt2_ij,dt2_kj,dt2_lj;
2470
2471     const real *cmapd;
2472
2473         int loop_index[4][4] = {
2474                 {0,4,8,12},
2475                 {1,5,9,13},
2476                 {2,6,10,14},
2477                 {3,7,11,15}
2478         };
2479         
2480         /* Total CMAP energy */
2481         vtot = 0;
2482         
2483         for(n=0;n<nbonds; )
2484         {
2485                 /* Five atoms are involved in the two torsions */
2486                 type   = forceatoms[n++];
2487                 ai     = forceatoms[n++];
2488                 aj     = forceatoms[n++];
2489                 ak     = forceatoms[n++];
2490                 al     = forceatoms[n++];
2491                 am     = forceatoms[n++];
2492                 
2493                 /* Which CMAP type is this */
2494                 cmapA = forceparams[type].cmap.cmapA;
2495         cmapd = cmap_grid->cmapdata[cmapA].cmap;
2496
2497                 /* First torsion */
2498                 a1i   = ai;
2499                 a1j   = aj;
2500                 a1k   = ak;
2501                 a1l   = al;
2502                 
2503                 phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2504                                                    &sign1, &t11, &t21, &t31); /* 84 */
2505                 
2506         cos_phi1 = cos(phi1);
2507         
2508                 a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2509                 a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2510                 a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2511                 
2512                 b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2513                 b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2514                 b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2515                 
2516                 tmp = pbc_rvec_sub(pbc,x[a1l],x[a1k],h1);
2517                 
2518                 ra21  = iprod(a1,a1);       /* 5 */
2519                 rb21  = iprod(b1,b1);       /* 5 */
2520                 rg21  = iprod(r1_kj,r1_kj); /* 5 */
2521                 rg1   = sqrt(rg21);
2522                 
2523                 rgr1  = 1.0/rg1;
2524                 ra2r1 = 1.0/ra21;
2525                 rb2r1 = 1.0/rb21;
2526                 rabr1 = sqrt(ra2r1*rb2r1);
2527                 
2528                 sin_phi1 = rg1 * rabr1 * iprod(a1,h1) * (-1);
2529                 
2530                 if(cos_phi1 < -0.5 || cos_phi1 > 0.5)
2531                 {
2532                         phi1 = asin(sin_phi1);
2533                         
2534                         if(cos_phi1 < 0)
2535                         {
2536                                 if(phi1 > 0)
2537                                 {
2538                                         phi1 = M_PI - phi1;
2539                                 }
2540                                 else
2541                                 {
2542                                         phi1 = -M_PI - phi1;
2543                                 }
2544                         }
2545                 }
2546                 else
2547                 {
2548                         phi1 = acos(cos_phi1);
2549                         
2550                         if(sin_phi1 < 0)
2551                         {
2552                                 phi1 = -phi1;
2553                         }
2554                 }
2555                 
2556                 xphi1 = phi1 + M_PI; /* 1 */
2557                 
2558                 /* Second torsion */
2559                 a2i   = aj;
2560                 a2j   = ak;
2561                 a2k   = al;
2562                 a2l   = am;
2563                 
2564                 phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2565                                                   &sign2, &t12, &t22, &t32); /* 84 */
2566                 
2567         cos_phi2 = cos(phi2);
2568
2569                 a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2570                 a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2571                 a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2572                 
2573                 b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2574                 b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2575                 b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2576                 
2577                 tmp = pbc_rvec_sub(pbc,x[a2l],x[a2k],h2);
2578                 
2579                 ra22  = iprod(a2,a2);         /* 5 */
2580                 rb22  = iprod(b2,b2);         /* 5 */
2581                 rg22  = iprod(r2_kj,r2_kj);   /* 5 */
2582                 rg2   = sqrt(rg22);
2583                 
2584                 rgr2  = 1.0/rg2;
2585                 ra2r2 = 1.0/ra22;
2586                 rb2r2 = 1.0/rb22;
2587                 rabr2 = sqrt(ra2r2*rb2r2);
2588                 
2589                 sin_phi2 = rg2 * rabr2 * iprod(a2,h2) * (-1);
2590                 
2591                 if(cos_phi2 < -0.5 || cos_phi2 > 0.5)
2592                 {
2593                         phi2 = asin(sin_phi2);
2594                         
2595                         if(cos_phi2 < 0)
2596                         {
2597                                 if(phi2 > 0)
2598                                 {
2599                                         phi2 = M_PI - phi2;
2600                                 }
2601                                 else
2602                                 {
2603                                         phi2 = -M_PI - phi2;
2604                                 }
2605                         }
2606                 }
2607                 else
2608                 {
2609                         phi2 = acos(cos_phi2);
2610                         
2611                         if(sin_phi2 < 0)
2612                         {
2613                                 phi2 = -phi2;
2614                         }
2615                 }
2616                 
2617                 xphi2 = phi2 + M_PI; /* 1 */
2618                 
2619                 /* Range mangling */
2620                 if(xphi1<0)
2621                 {
2622                         xphi1 = xphi1 + 2*M_PI;
2623                 }
2624                 else if(xphi1>=2*M_PI)
2625                 {
2626                         xphi1 = xphi1 - 2*M_PI;
2627                 }
2628                 
2629                 if(xphi2<0)
2630                 {
2631                         xphi2 = xphi2 + 2*M_PI;
2632                 }
2633                 else if(xphi2>=2*M_PI)
2634                 {
2635                         xphi2 = xphi2 - 2*M_PI;
2636                 }
2637                 
2638                 /* Number of grid points */
2639                 dx = 2*M_PI / cmap_grid->grid_spacing;
2640                 
2641                 /* Where on the grid are we */
2642                 iphi1 = (int)(xphi1/dx);
2643                 iphi2 = (int)(xphi2/dx);
2644                 
2645                 iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1,&ip1p1,&ip1p2);
2646                 iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1,&ip2p1,&ip2p2);
2647                 
2648                 pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
2649                 pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
2650                 pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
2651                 pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
2652
2653                 ty[0]   = cmapd[pos1*4];
2654                 ty[1]   = cmapd[pos2*4];
2655                 ty[2]   = cmapd[pos3*4];
2656                 ty[3]   = cmapd[pos4*4];
2657                 
2658                 ty1[0]   = cmapd[pos1*4+1];
2659                 ty1[1]   = cmapd[pos2*4+1];
2660                 ty1[2]   = cmapd[pos3*4+1];
2661                 ty1[3]   = cmapd[pos4*4+1];
2662                 
2663                 ty2[0]   = cmapd[pos1*4+2];
2664                 ty2[1]   = cmapd[pos2*4+2];
2665                 ty2[2]   = cmapd[pos3*4+2];
2666                 ty2[3]   = cmapd[pos4*4+2];
2667                 
2668                 ty12[0]   = cmapd[pos1*4+3];
2669                 ty12[1]   = cmapd[pos2*4+3];
2670                 ty12[2]   = cmapd[pos3*4+3];
2671                 ty12[3]   = cmapd[pos4*4+3];
2672                 
2673                 /* Switch to degrees */
2674                 dx = 360.0 / cmap_grid->grid_spacing;
2675                 xphi1 = xphi1 * RAD2DEG;
2676                 xphi2 = xphi2 * RAD2DEG; 
2677                 
2678                 for(i=0;i<4;i++) /* 16 */
2679                 {
2680                         tx[i] = ty[i];
2681                         tx[i+4] = ty1[i]*dx;
2682                         tx[i+8] = ty2[i]*dx;
2683                         tx[i+12] = ty12[i]*dx*dx;
2684                 }
2685                 
2686                 idx=0;
2687                 for(i=0;i<4;i++) /* 1056 */
2688                 {
2689                         for(j=0;j<4;j++)
2690                         {
2691                                 xx = 0;
2692                                 for(k=0;k<16;k++)
2693                                 {
2694                                         xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
2695                                 }
2696                                 
2697                                 idx++;
2698                                 tc[i*4+j]=xx;
2699                         }
2700                 }
2701                 
2702                 tt    = (xphi1-iphi1*dx)/dx;
2703                 tu    = (xphi2-iphi2*dx)/dx;
2704                 
2705                 e     = 0;
2706                 df1   = 0;
2707                 df2   = 0;
2708                 ddf1  = 0;
2709                 ddf2  = 0;
2710                 ddf12 = 0;
2711                 
2712                 for(i=3;i>=0;i--)
2713                 {
2714                         l1 = loop_index[i][3];
2715                         l2 = loop_index[i][2];
2716                         l3 = loop_index[i][1];
2717                         
2718                         e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
2719                         df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
2720                         df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
2721                         ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
2722                         ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
2723                 }
2724                 
2725                 ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
2726                 3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
2727                 
2728                 fac     = RAD2DEG/dx;
2729                 df1     = df1   * fac;
2730                 df2     = df2   * fac;
2731                 ddf1    = ddf1  * fac * fac;
2732                 ddf2    = ddf2  * fac * fac;
2733                 ddf12   = ddf12 * fac * fac;
2734                 
2735                 /* CMAP energy */
2736                 vtot += e;
2737                 
2738                 /* Do forces - first torsion */
2739                 fg1       = iprod(r1_ij,r1_kj);
2740                 hg1       = iprod(r1_kl,r1_kj);
2741                 fga1      = fg1*ra2r1*rgr1;
2742                 hgb1      = hg1*rb2r1*rgr1;
2743                 gaa1      = -ra2r1*rg1;
2744                 gbb1      = rb2r1*rg1;
2745                 
2746                 for(i=0;i<DIM;i++)
2747                 {
2748                         dtf1[i]   = gaa1 * a1[i];
2749                         dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
2750                         dth1[i]   = gbb1 * b1[i];
2751                         
2752                         f1[i]     = df1  * dtf1[i];
2753                         g1[i]     = df1  * dtg1[i];
2754                         h1[i]     = df1  * dth1[i];
2755                         
2756                         f1_i[i]   =  f1[i];
2757                         f1_j[i]   = -f1[i] - g1[i];
2758                         f1_k[i]   =  h1[i] + g1[i];
2759                         f1_l[i]   = -h1[i];
2760                         
2761                         f[a1i][i] = f[a1i][i] + f1_i[i];
2762                         f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */                                                            
2763                         f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */                                                            
2764                         f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */                                                                       
2765                 }
2766                 
2767                 /* Do forces - second torsion */
2768                 fg2       = iprod(r2_ij,r2_kj);
2769                 hg2       = iprod(r2_kl,r2_kj);
2770                 fga2      = fg2*ra2r2*rgr2;
2771                 hgb2      = hg2*rb2r2*rgr2;
2772                 gaa2      = -ra2r2*rg2;
2773                 gbb2      = rb2r2*rg2;
2774                 
2775                 for(i=0;i<DIM;i++)
2776                 {
2777                         dtf2[i]   = gaa2 * a2[i];
2778                         dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
2779                         dth2[i]   = gbb2 * b2[i];
2780                         
2781                         f2[i]     = df2  * dtf2[i];
2782                         g2[i]     = df2  * dtg2[i];
2783                         h2[i]     = df2  * dth2[i];
2784                         
2785                         f2_i[i]   =  f2[i];
2786                         f2_j[i]   = -f2[i] - g2[i];
2787                         f2_k[i]   =  h2[i] + g2[i];
2788                         f2_l[i]   = -h2[i];
2789                         
2790                         f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */                                                                        
2791                         f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */                                                              
2792                         f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */                            
2793                         f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */                                                                      
2794                 }
2795                 
2796                 /* Shift forces */
2797                 if(g)
2798                 {
2799                         copy_ivec(SHIFT_IVEC(g,a1j), jt1);
2800                         ivec_sub(SHIFT_IVEC(g,a1i),  jt1,dt1_ij);
2801                         ivec_sub(SHIFT_IVEC(g,a1k),  jt1,dt1_kj);
2802                         ivec_sub(SHIFT_IVEC(g,a1l),  jt1,dt1_lj);
2803                         t11 = IVEC2IS(dt1_ij);
2804                         t21 = IVEC2IS(dt1_kj);
2805                         t31 = IVEC2IS(dt1_lj);
2806                         
2807                         copy_ivec(SHIFT_IVEC(g,a2j), jt2);
2808                         ivec_sub(SHIFT_IVEC(g,a2i),  jt2,dt2_ij);
2809                         ivec_sub(SHIFT_IVEC(g,a2k),  jt2,dt2_kj);
2810                         ivec_sub(SHIFT_IVEC(g,a2l),  jt2,dt2_lj);
2811                         t12 = IVEC2IS(dt2_ij);
2812                         t22 = IVEC2IS(dt2_kj);
2813                         t32 = IVEC2IS(dt2_lj);
2814                 }
2815                 else if(pbc)
2816                 {
2817                         t31 = pbc_rvec_sub(pbc,x[a1l],x[a1j],h1);
2818                         t32 = pbc_rvec_sub(pbc,x[a2l],x[a2j],h2);
2819                 }
2820                 else
2821                 {
2822                         t31 = CENTRAL;
2823                         t32 = CENTRAL;
2824                 }
2825                 
2826                 rvec_inc(fshift[t11],f1_i);
2827                 rvec_inc(fshift[CENTRAL],f1_j);
2828                 rvec_inc(fshift[t21],f1_k);
2829                 rvec_inc(fshift[t31],f1_l);
2830                 
2831                 rvec_inc(fshift[t21],f2_i);
2832                 rvec_inc(fshift[CENTRAL],f2_j);
2833                 rvec_inc(fshift[t22],f2_k);
2834                 rvec_inc(fshift[t32],f2_l);
2835         }       
2836         return vtot;
2837 }
2838
2839
2840
2841 /***********************************************************
2842  *
2843  *   G R O M O S  9 6   F U N C T I O N S
2844  *
2845  ***********************************************************/
2846 real g96harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
2847                  real *V,real *F)
2848 {
2849   const real half=0.5;
2850   real  L1,kk,x0,dx,dx2;
2851   real  v,f,dvdlambda;
2852   
2853   L1    = 1.0-lambda;
2854   kk    = L1*kA+lambda*kB;
2855   x0    = L1*xA+lambda*xB;
2856   
2857   dx    = x-x0;
2858   dx2   = dx*dx;
2859   
2860   f     = -kk*dx;
2861   v     = half*kk*dx2;
2862   dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
2863   
2864   *F    = f;
2865   *V    = v;
2866   
2867   return dvdlambda;
2868   
2869   /* That was 21 flops */
2870 }
2871
2872 real g96bonds(int nbonds,
2873               const t_iatom forceatoms[],const t_iparams forceparams[],
2874               const rvec x[],rvec f[],rvec fshift[],
2875               const t_pbc *pbc,const t_graph *g,
2876               real lambda,real *dvdlambda,
2877               const t_mdatoms *md,t_fcdata *fcd,
2878               int *global_atom_index)
2879 {
2880   int  i,m,ki,ai,aj,type;
2881   real dr2,fbond,vbond,fij,vtot;
2882   rvec dx;
2883   ivec dt;
2884   
2885   vtot = 0.0;
2886   for(i=0; (i<nbonds); ) {
2887     type = forceatoms[i++];
2888     ai   = forceatoms[i++];
2889     aj   = forceatoms[i++];
2890   
2891     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
2892     dr2  = iprod(dx,dx);                                /*   5          */
2893       
2894     *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
2895                               forceparams[type].harmonic.krB,
2896                               forceparams[type].harmonic.rA,
2897                               forceparams[type].harmonic.rB,
2898                               dr2,lambda,&vbond,&fbond);
2899
2900     vtot  += 0.5*vbond;                             /* 1*/
2901 #ifdef DEBUG
2902     if (debug)
2903       fprintf(debug,"G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
2904               sqrt(dr2),vbond,fbond);
2905 #endif
2906    
2907     if (g) {
2908       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
2909       ki=IVEC2IS(dt);
2910     }
2911     for (m=0; (m<DIM); m++) {                   /*  15          */
2912       fij=fbond*dx[m];
2913       f[ai][m]+=fij;
2914       f[aj][m]-=fij;
2915       fshift[ki][m]+=fij;
2916       fshift[CENTRAL][m]-=fij;
2917     }
2918   }                                     /* 44 TOTAL     */
2919   return vtot;
2920 }
2921
2922 real g96bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
2923                    rvec r_ij,rvec r_kj,
2924                    int *t1,int *t2)
2925 /* Return value is the angle between the bonds i-j and j-k */
2926 {
2927   real costh;
2928   
2929   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
2930   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
2931
2932   costh=cos_angle(r_ij,r_kj);           /* 25           */
2933                                         /* 41 TOTAL     */
2934   return costh;
2935 }
2936
2937 real g96angles(int nbonds,
2938                const t_iatom forceatoms[],const t_iparams forceparams[],
2939                const rvec x[],rvec f[],rvec fshift[],
2940                const t_pbc *pbc,const t_graph *g,
2941                real lambda,real *dvdlambda,
2942                const t_mdatoms *md,t_fcdata *fcd,
2943                int *global_atom_index)
2944 {
2945   int  i,ai,aj,ak,type,m,t1,t2;
2946   rvec r_ij,r_kj;
2947   real cos_theta,dVdt,va,vtot;
2948   real rij_1,rij_2,rkj_1,rkj_2,rijrkj_1;
2949   rvec f_i,f_j,f_k;
2950   ivec jt,dt_ij,dt_kj;
2951   
2952   vtot = 0.0;
2953   for(i=0; (i<nbonds); ) {
2954     type = forceatoms[i++];
2955     ai   = forceatoms[i++];
2956     aj   = forceatoms[i++];
2957     ak   = forceatoms[i++];
2958     
2959     cos_theta  = g96bond_angle(x[ai],x[aj],x[ak],pbc,r_ij,r_kj,&t1,&t2);
2960
2961     *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
2962                               forceparams[type].harmonic.krB,
2963                               forceparams[type].harmonic.rA,
2964                               forceparams[type].harmonic.rB,
2965                               cos_theta,lambda,&va,&dVdt);
2966     vtot    += va;
2967     
2968     rij_1    = gmx_invsqrt(iprod(r_ij,r_ij));
2969     rkj_1    = gmx_invsqrt(iprod(r_kj,r_kj));
2970     rij_2    = rij_1*rij_1;
2971     rkj_2    = rkj_1*rkj_1;
2972     rijrkj_1 = rij_1*rkj_1;                     /* 23 */
2973     
2974 #ifdef DEBUG
2975     if (debug)
2976       fprintf(debug,"G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
2977               cos_theta,va,dVdt);
2978 #endif
2979     for (m=0; (m<DIM); m++) {                   /*  42  */
2980       f_i[m]=dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
2981       f_k[m]=dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
2982       f_j[m]=-f_i[m]-f_k[m];
2983       f[ai][m]+=f_i[m];
2984       f[aj][m]+=f_j[m];
2985       f[ak][m]+=f_k[m];
2986     }
2987     
2988     if (g) {
2989       copy_ivec(SHIFT_IVEC(g,aj),jt);
2990       
2991       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
2992       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
2993       t1=IVEC2IS(dt_ij);
2994       t2=IVEC2IS(dt_kj);
2995     }      
2996     rvec_inc(fshift[t1],f_i);
2997     rvec_inc(fshift[CENTRAL],f_j);
2998     rvec_inc(fshift[t2],f_k);               /* 9 */
2999     /* 163 TOTAL        */
3000   }
3001   return vtot;
3002 }
3003
3004 real cross_bond_bond(int nbonds,
3005                      const t_iatom forceatoms[],const t_iparams forceparams[],
3006                      const rvec x[],rvec f[],rvec fshift[],
3007                      const t_pbc *pbc,const t_graph *g,
3008                      real lambda,real *dvdlambda,
3009                      const t_mdatoms *md,t_fcdata *fcd,
3010                      int *global_atom_index)
3011 {
3012   /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3013    * pp. 842-847
3014    */
3015   int  i,ai,aj,ak,type,m,t1,t2;
3016   rvec r_ij,r_kj;
3017   real vtot,vrr,s1,s2,r1,r2,r1e,r2e,krr;
3018   rvec f_i,f_j,f_k;
3019   ivec jt,dt_ij,dt_kj;
3020   
3021   vtot = 0.0;
3022   for(i=0; (i<nbonds); ) {
3023     type = forceatoms[i++];
3024     ai   = forceatoms[i++];
3025     aj   = forceatoms[i++];
3026     ak   = forceatoms[i++];
3027     r1e  = forceparams[type].cross_bb.r1e;
3028     r2e  = forceparams[type].cross_bb.r2e;
3029     krr  = forceparams[type].cross_bb.krr;
3030     
3031     /* Compute distance vectors ... */
3032     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
3033     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
3034     
3035     /* ... and their lengths */
3036     r1 = norm(r_ij);
3037     r2 = norm(r_kj);
3038     
3039     /* Deviations from ideality */
3040     s1 = r1-r1e;
3041     s2 = r2-r2e;
3042     
3043     /* Energy (can be negative!) */
3044     vrr   = krr*s1*s2;
3045     vtot += vrr;
3046     
3047     /* Forces */
3048     svmul(-krr*s2/r1,r_ij,f_i);
3049     svmul(-krr*s1/r2,r_kj,f_k);
3050     
3051     for (m=0; (m<DIM); m++) {                   /*  12  */
3052       f_j[m]    = -f_i[m] - f_k[m];
3053       f[ai][m] += f_i[m];
3054       f[aj][m] += f_j[m];
3055       f[ak][m] += f_k[m];
3056     }
3057     
3058     /* Virial stuff */
3059     if (g) {
3060       copy_ivec(SHIFT_IVEC(g,aj),jt);
3061       
3062       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
3063       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
3064       t1=IVEC2IS(dt_ij);
3065       t2=IVEC2IS(dt_kj);
3066     }      
3067     rvec_inc(fshift[t1],f_i);
3068     rvec_inc(fshift[CENTRAL],f_j);
3069     rvec_inc(fshift[t2],f_k);               /* 9 */
3070     /* 163 TOTAL        */
3071   }
3072   return vtot;
3073 }
3074
3075 real cross_bond_angle(int nbonds,
3076                       const t_iatom forceatoms[],const t_iparams forceparams[],
3077                       const rvec x[],rvec f[],rvec fshift[],
3078                       const t_pbc *pbc,const t_graph *g,
3079                       real lambda,real *dvdlambda,
3080                       const t_mdatoms *md,t_fcdata *fcd,
3081                       int *global_atom_index)
3082 {
3083   /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
3084    * pp. 842-847
3085    */
3086   int  i,ai,aj,ak,type,m,t1,t2,t3;
3087   rvec r_ij,r_kj,r_ik;
3088   real vtot,vrt,s1,s2,s3,r1,r2,r3,r1e,r2e,r3e,krt,k1,k2,k3;
3089   rvec f_i,f_j,f_k;
3090   ivec jt,dt_ij,dt_kj;
3091   
3092   vtot = 0.0;
3093   for(i=0; (i<nbonds); ) {
3094     type = forceatoms[i++];
3095     ai   = forceatoms[i++];
3096     aj   = forceatoms[i++];
3097     ak   = forceatoms[i++];
3098     r1e  = forceparams[type].cross_ba.r1e;
3099     r2e  = forceparams[type].cross_ba.r2e;
3100     r3e  = forceparams[type].cross_ba.r3e;
3101     krt  = forceparams[type].cross_ba.krt;
3102     
3103     /* Compute distance vectors ... */
3104     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
3105     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
3106     t3 = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);
3107     
3108     /* ... and their lengths */
3109     r1 = norm(r_ij);
3110     r2 = norm(r_kj);
3111     r3 = norm(r_ik);
3112     
3113     /* Deviations from ideality */
3114     s1 = r1-r1e;
3115     s2 = r2-r2e;
3116     s3 = r3-r3e;
3117     
3118     /* Energy (can be negative!) */
3119     vrt   = krt*s3*(s1+s2);
3120     vtot += vrt;
3121     
3122     /* Forces */
3123     k1 = -krt*(s3/r1);
3124     k2 = -krt*(s3/r2);
3125     k3 = -krt*(s1+s2)/r3;
3126     for(m=0; (m<DIM); m++) {
3127       f_i[m] = k1*r_ij[m] + k3*r_ik[m];
3128       f_k[m] = k2*r_kj[m] - k3*r_ik[m];
3129       f_j[m] = -f_i[m] - f_k[m];
3130     }
3131     
3132     for (m=0; (m<DIM); m++) {                   /*  12  */
3133       f[ai][m] += f_i[m];
3134       f[aj][m] += f_j[m];
3135       f[ak][m] += f_k[m];
3136     }
3137     
3138     /* Virial stuff */
3139     if (g) {
3140       copy_ivec(SHIFT_IVEC(g,aj),jt);
3141       
3142       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
3143       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
3144       t1=IVEC2IS(dt_ij);
3145       t2=IVEC2IS(dt_kj);
3146     }      
3147     rvec_inc(fshift[t1],f_i);
3148     rvec_inc(fshift[CENTRAL],f_j);
3149     rvec_inc(fshift[t2],f_k);               /* 9 */
3150     /* 163 TOTAL        */
3151   }
3152   return vtot;
3153 }
3154
3155 static real bonded_tab(const char *type,int table_nr,
3156                        const bondedtable_t *table,real kA,real kB,real r,
3157                        real lambda,real *V,real *F)
3158 {
3159   real k,tabscale,*VFtab,rt,eps,eps2,Yt,Ft,Geps,Heps2,Fp,VV,FF;
3160   int  n0,nnn;
3161   real v,f,dvdlambda;
3162
3163   k = (1.0 - lambda)*kA + lambda*kB;
3164
3165   tabscale = table->scale;
3166   VFtab    = table->data;
3167   
3168   rt    = r*tabscale;
3169   n0    = rt;
3170   if (n0 >= table->n) {
3171     gmx_fatal(FARGS,"A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3172               type,table_nr,r,n0,n0+1,table->n);
3173   }
3174   eps   = rt - n0;
3175   eps2  = eps*eps;
3176   nnn   = 4*n0;
3177   Yt    = VFtab[nnn];
3178   Ft    = VFtab[nnn+1];
3179   Geps  = VFtab[nnn+2]*eps;
3180   Heps2 = VFtab[nnn+3]*eps2;
3181   Fp    = Ft + Geps + Heps2;
3182   VV    = Yt + Fp*eps;
3183   FF    = Fp + Geps + 2.0*Heps2;
3184   
3185   *F    = -k*FF*tabscale;
3186   *V    = k*VV;
3187   dvdlambda  = (kB - kA)*VV;
3188   
3189   return dvdlambda;
3190   
3191   /* That was 22 flops */
3192 }
3193
3194 real tab_bonds(int nbonds,
3195                const t_iatom forceatoms[],const t_iparams forceparams[],
3196                const rvec x[],rvec f[],rvec fshift[],
3197                const t_pbc *pbc,const t_graph *g,
3198                real lambda,real *dvdlambda,
3199                const t_mdatoms *md,t_fcdata *fcd,
3200                int *global_atom_index)
3201 {
3202   int  i,m,ki,ai,aj,type,table;
3203   real dr,dr2,fbond,vbond,fij,vtot;
3204   rvec dx;
3205   ivec dt;
3206
3207   vtot = 0.0;
3208   for(i=0; (i<nbonds); ) {
3209     type = forceatoms[i++];
3210     ai   = forceatoms[i++];
3211     aj   = forceatoms[i++];
3212   
3213     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
3214     dr2  = iprod(dx,dx);                        /*   5          */
3215     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
3216
3217     table = forceparams[type].tab.table;
3218
3219     *dvdlambda += bonded_tab("bond",table,
3220                              &fcd->bondtab[table],
3221                              forceparams[type].tab.kA,
3222                              forceparams[type].tab.kB,
3223                              dr,lambda,&vbond,&fbond);  /*  22 */
3224
3225     if (dr2 == 0.0)
3226       continue;
3227
3228     
3229     vtot  += vbond;/* 1*/
3230     fbond *= gmx_invsqrt(dr2);                  /*   6          */
3231 #ifdef DEBUG
3232     if (debug)
3233       fprintf(debug,"TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3234               dr,vbond,fbond);
3235 #endif
3236     if (g) {
3237       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
3238       ki=IVEC2IS(dt);
3239     }
3240     for (m=0; (m<DIM); m++) {                   /*  15          */
3241       fij=fbond*dx[m];
3242       f[ai][m]+=fij;
3243       f[aj][m]-=fij;
3244       fshift[ki][m]+=fij;
3245       fshift[CENTRAL][m]-=fij;
3246     }
3247   }                                     /* 62 TOTAL     */
3248   return vtot;
3249 }
3250
3251 real tab_angles(int nbonds,
3252                 const t_iatom forceatoms[],const t_iparams forceparams[],
3253                 const rvec x[],rvec f[],rvec fshift[],
3254                 const t_pbc *pbc,const t_graph *g,
3255                 real lambda,real *dvdlambda,
3256                 const t_mdatoms *md,t_fcdata *fcd,
3257                 int *global_atom_index)
3258 {
3259   int  i,ai,aj,ak,t1,t2,type,table;
3260   rvec r_ij,r_kj;
3261   real cos_theta,cos_theta2,theta,dVdt,va,vtot;
3262   ivec jt,dt_ij,dt_kj;
3263   
3264   vtot = 0.0;
3265   for(i=0; (i<nbonds); ) {
3266     type = forceatoms[i++];
3267     ai   = forceatoms[i++];
3268     aj   = forceatoms[i++];
3269     ak   = forceatoms[i++];
3270     
3271     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
3272                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
3273
3274     table = forceparams[type].tab.table;
3275   
3276     *dvdlambda += bonded_tab("angle",table,
3277                              &fcd->angletab[table],
3278                              forceparams[type].tab.kA,
3279                              forceparams[type].tab.kB,
3280                              theta,lambda,&va,&dVdt);  /*  22  */
3281     vtot += va;
3282     
3283     cos_theta2 = sqr(cos_theta);                /*   1          */
3284     if (cos_theta2 < 1) {
3285       int  m;
3286       real snt,st,sth;
3287       real cik,cii,ckk;
3288       real nrkj2,nrij2;
3289       rvec f_i,f_j,f_k;
3290       
3291       st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
3292       sth = st*cos_theta;                       /*   1          */
3293 #ifdef DEBUG
3294       if (debug)
3295         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
3296                 theta*RAD2DEG,va,dVdt);
3297 #endif
3298       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
3299       nrij2=iprod(r_ij,r_ij);
3300       
3301       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */ 
3302       cii=sth/nrij2;                            /*  10          */
3303       ckk=sth/nrkj2;                            /*  10          */
3304       
3305       for (m=0; (m<DIM); m++) {                 /*  39          */
3306         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
3307         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
3308         f_j[m]=-f_i[m]-f_k[m];
3309         f[ai][m]+=f_i[m];
3310         f[aj][m]+=f_j[m];
3311         f[ak][m]+=f_k[m];
3312       }
3313       if (g) {
3314         copy_ivec(SHIFT_IVEC(g,aj),jt);
3315       
3316         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
3317         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
3318         t1=IVEC2IS(dt_ij);
3319         t2=IVEC2IS(dt_kj);
3320       }
3321       rvec_inc(fshift[t1],f_i);
3322       rvec_inc(fshift[CENTRAL],f_j);
3323       rvec_inc(fshift[t2],f_k);
3324     }                                           /* 169 TOTAL    */
3325   }
3326   return vtot;
3327 }
3328
3329 real tab_dihs(int nbonds,
3330               const t_iatom forceatoms[],const t_iparams forceparams[],
3331               const rvec x[],rvec f[],rvec fshift[],
3332               const t_pbc *pbc,const t_graph *g,
3333               real lambda,real *dvdlambda,
3334               const t_mdatoms *md,t_fcdata *fcd,
3335               int *global_atom_index)
3336 {
3337   int  i,type,ai,aj,ak,al,table;
3338   int  t1,t2,t3;
3339   rvec r_ij,r_kj,r_kl,m,n;
3340   real phi,sign,ddphi,vpd,vtot;
3341
3342   vtot = 0.0;
3343   for(i=0; (i<nbonds); ) {
3344     type = forceatoms[i++];
3345     ai   = forceatoms[i++];
3346     aj   = forceatoms[i++];
3347     ak   = forceatoms[i++];
3348     al   = forceatoms[i++];
3349     
3350     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
3351                   &sign,&t1,&t2,&t3);                   /*  84  */
3352
3353     table = forceparams[type].tab.table;
3354
3355     /* Hopefully phi+M_PI never results in values < 0 */
3356     *dvdlambda += bonded_tab("dihedral",table,
3357                              &fcd->dihtab[table],
3358                              forceparams[type].tab.kA,
3359                              forceparams[type].tab.kB,
3360                              phi+M_PI,lambda,&vpd,&ddphi);
3361                        
3362     vtot += vpd;
3363     do_dih_fup(ai,aj,ak,al,-ddphi,r_ij,r_kj,r_kl,m,n,
3364                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112  */
3365
3366 #ifdef DEBUG
3367     fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
3368             ai,aj,ak,al,phi);
3369 #endif
3370   } /* 227 TOTAL        */
3371
3372   return vtot;
3373 }
3374
3375 static unsigned
3376 calc_bonded_reduction_mask(const t_idef *idef,
3377                            int shift,
3378                            int t,int nt)
3379 {
3380     unsigned mask;
3381     int ftype,nb,nat1,nb0,nb1,i,a;
3382
3383     mask = 0;
3384
3385     for(ftype=0; ftype<F_NRE; ftype++)
3386     {
3387         if (interaction_function[ftype].flags & IF_BOND &&
3388             !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
3389             (ftype<F_GB12 || ftype>F_GB14))
3390         {
3391             nb = idef->il[ftype].nr;
3392             if (nb > 0)
3393             {
3394                 nat1 = interaction_function[ftype].nratoms + 1;
3395
3396                 /* Divide this interaction equally over the threads.
3397                  * This is not stored: should match division in calc_bonds.
3398                  */
3399                 nb0 = (((nb/nat1)* t   )/nt)*nat1;
3400                 nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
3401
3402                 for(i=nb0; i<nb1; i+=nat1)
3403                 {
3404                     for(a=1; a<nat1; a++)
3405                     {
3406                         mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
3407                     }
3408                 }
3409             }
3410         }
3411     }
3412
3413     return mask;
3414 }
3415
3416 void init_bonded_thread_force_reduction(t_forcerec *fr,
3417                                         const t_idef *idef)
3418 {
3419 #define MAX_BLOCK_BITS 32
3420     int t;
3421     int ctot,c,b;
3422
3423     if (fr->nthreads <= 1)
3424     {
3425         fr->red_nblock = 0;
3426
3427         return;
3428     }
3429
3430     /* We divide the force array in a maximum of 32 blocks.
3431      * Minimum force block reduction size is 2^6=64.
3432      */
3433     fr->red_ashift = 6;
3434     while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
3435     {
3436         fr->red_ashift++;
3437     }
3438     if (debug)
3439     {
3440         fprintf(debug,"bonded force buffer block atom shift %d bits\n",
3441                 fr->red_ashift);
3442     }
3443
3444     /* Determine to which blocks each thread's bonded force calculation
3445      * contributes. Store this is a mask for each thread.
3446      */
3447 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3448     for(t=1; t<fr->nthreads; t++)
3449     {
3450         fr->f_t[t].red_mask =
3451             calc_bonded_reduction_mask(idef,fr->red_ashift,t,fr->nthreads);
3452     }
3453
3454     /* Determine the maximum number of blocks we need to reduce over */
3455     fr->red_nblock = 0;
3456     ctot = 0;
3457     for(t=0; t<fr->nthreads; t++)
3458     {
3459         c = 0;
3460         for(b=0; b<MAX_BLOCK_BITS; b++)
3461         {
3462             if (fr->f_t[t].red_mask & (1U<<b))
3463             {
3464                 fr->red_nblock = max(fr->red_nblock,b+1);
3465                 c++;
3466             }
3467         }
3468         if (debug)
3469         {
3470             fprintf(debug,"thread %d flags %x count %d\n",
3471                     t,fr->f_t[t].red_mask,c);
3472         }
3473         ctot += c;
3474     }
3475     if (debug)
3476     {
3477         fprintf(debug,"Number of blocks to reduce: %d of size %d\n",
3478                 fr->red_nblock,1<<fr->red_ashift);
3479         fprintf(debug,"Reduction density %.2f density/#thread %.2f\n",
3480                 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
3481                 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
3482     }
3483 }
3484
3485 static void zero_thread_forces(f_thread_t *f_t,int n,
3486                                int nblock,int blocksize)
3487 {
3488     int b,a0,a1,a,i,j;
3489
3490     if (n > f_t->f_nalloc)
3491     {
3492         f_t->f_nalloc = over_alloc_large(n);
3493         srenew(f_t->f,f_t->f_nalloc);
3494     }
3495
3496     if (f_t->red_mask != 0)
3497     {
3498         for(b=0; b<nblock; b++)
3499         {
3500             if (f_t->red_mask && (1U<<b))
3501             {
3502                 a0 = b*blocksize;
3503                 a1 = min((b+1)*blocksize,n);
3504                 for(a=a0; a<a1; a++)
3505                 {
3506                     clear_rvec(f_t->f[a]);
3507                 }
3508             }
3509         }
3510     }
3511     for(i=0; i<SHIFTS; i++)
3512     {
3513         clear_rvec(f_t->fshift[i]);
3514     }
3515     for(i=0; i<F_NRE; i++)
3516     {
3517         f_t->ener[i] = 0;
3518     }
3519     for(i=0; i<egNR; i++)
3520     {
3521         for(j=0; j<f_t->grpp.nener; j++)
3522         {
3523             f_t->grpp.ener[i][j] = 0;
3524         }
3525     }
3526     for(i=0; i<efptNR; i++)
3527     {
3528         f_t->dvdl[i] = 0;
3529     }
3530 }
3531
3532 static void reduce_thread_force_buffer(int n,rvec *f,
3533                                        int nthreads,f_thread_t *f_t,
3534                                        int nblock,int block_size)
3535 {
3536     /* The max thread number is arbitrary,
3537      * we used a fixed number to avoid memory management.
3538      * Using more than 16 threads is probably never useful performance wise.
3539      */
3540 #define MAX_BONDED_THREADS 256
3541     int b;
3542
3543     if (nthreads > MAX_BONDED_THREADS)
3544     {
3545         gmx_fatal(FARGS,"Can not reduce bonded forces on more than %d threads",
3546                   MAX_BONDED_THREADS);
3547     }
3548
3549     /* This reduction can run on any number of threads,
3550      * independently of nthreads.
3551      */
3552 #pragma omp parallel for num_threads(nthreads) schedule(static)
3553     for(b=0; b<nblock; b++)
3554     {
3555         rvec *fp[MAX_BONDED_THREADS];
3556         int nfb,ft,fb;
3557         int a0,a1,a;
3558
3559         /* Determine which threads contribute to this block */
3560         nfb = 0;
3561         for(ft=1; ft<nthreads; ft++)
3562         {
3563             if (f_t[ft].red_mask & (1U<<b))
3564             {
3565                 fp[nfb++] = f_t[ft].f;
3566             }
3567         }
3568         if (nfb > 0)
3569         {
3570             /* Reduce force buffers for threads that contribute */
3571             a0 =  b   *block_size;
3572             a1 = (b+1)*block_size;
3573             a1 = min(a1,n);
3574             for(a=a0; a<a1; a++)
3575             {
3576                 for(fb=0; fb<nfb; fb++)
3577                 {
3578                     rvec_inc(f[a],fp[fb][a]);
3579                 }
3580             }
3581         }
3582     }
3583 }
3584
3585 static void reduce_thread_forces(int n,rvec *f,rvec *fshift,
3586                                  real *ener,gmx_grppairener_t *grpp,real *dvdl,
3587                                  int nthreads,f_thread_t *f_t,
3588                                  int nblock,int block_size,
3589                                  gmx_bool bCalcEnerVir,
3590                                  gmx_bool bDHDL)
3591 {
3592     if (nblock > 0)
3593     {
3594         /* Reduce the bonded force buffer */
3595         reduce_thread_force_buffer(n,f,nthreads,f_t,nblock,block_size);
3596     }
3597
3598     /* When necessary, reduce energy and virial using one thread only */
3599     if (bCalcEnerVir)
3600     {
3601         int t,i,j;
3602
3603         for(i=0; i<SHIFTS; i++)
3604         {
3605             for(t=1; t<nthreads; t++)
3606             {
3607                 rvec_inc(fshift[i],f_t[t].fshift[i]);
3608             }
3609         }
3610         for(i=0; i<F_NRE; i++)
3611         {
3612             for(t=1; t<nthreads; t++)
3613             {
3614                 ener[i] += f_t[t].ener[i];
3615             }
3616         }
3617         for(i=0; i<egNR; i++)
3618         {
3619             for(j=0; j<f_t[1].grpp.nener; j++)
3620             {
3621                 for(t=1; t<nthreads; t++)
3622                 {
3623                     
3624                     grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
3625                 }
3626             }
3627         }
3628         if (bDHDL)
3629         {
3630             for(i=0; i<efptNR; i++)
3631             {
3632                 
3633                 for(t=1; t<nthreads; t++)
3634                 {
3635                     dvdl[i] += f_t[t].dvdl[i];
3636                 }
3637             }
3638         }
3639     }
3640 }
3641
3642 static real calc_one_bond(FILE *fplog,int thread,
3643                           int ftype,const t_idef *idef,
3644                           rvec x[], rvec f[], rvec fshift[],
3645                           t_forcerec *fr,
3646                           const t_pbc *pbc,const t_graph *g,
3647                           gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
3648                           t_nrnb *nrnb,
3649                           real *lambda, real *dvdl,
3650                           const t_mdatoms *md,t_fcdata *fcd,
3651                           gmx_bool bCalcEnerVir,
3652                           int *global_atom_index, gmx_bool bPrintSepPot)
3653 {
3654     int ind,nat1,nbonds,efptFTYPE;
3655     real v=0;
3656     t_iatom *iatoms;
3657     int nb0,nbn;
3658
3659     if (IS_RESTRAINT_TYPE(ftype))
3660     {
3661         efptFTYPE = efptRESTRAINT;
3662     }
3663     else
3664     {
3665         efptFTYPE = efptBONDED;
3666     }
3667
3668     if (interaction_function[ftype].flags & IF_BOND &&
3669         !(ftype == F_CONNBONDS || ftype == F_POSRES))
3670     {
3671         ind  = interaction_function[ftype].nrnb_ind;
3672         nat1 = interaction_function[ftype].nratoms + 1;
3673         nbonds    = idef->il[ftype].nr/nat1;
3674         iatoms    = idef->il[ftype].iatoms;
3675
3676         nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
3677         nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
3678
3679         if (!IS_LISTED_LJ_C(ftype))
3680         {
3681             if(ftype==F_CMAP)
3682             {
3683                 v = cmap_dihs(nbn,iatoms+nb0,
3684                               idef->iparams,&idef->cmap_grid,
3685                               (const rvec*)x,f,fshift,
3686                               pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
3687                               md,fcd,global_atom_index);
3688             }
3689             else if (ftype == F_PDIHS &&
3690                      !bCalcEnerVir && fr->efep==efepNO)
3691             {
3692                 /* No energies, shift forces, dvdl */
3693 #ifndef SSE_PROPER_DIHEDRALS
3694                 pdihs_noener
3695 #else
3696                 pdihs_noener_sse
3697 #endif
3698                     (nbn,idef->il[ftype].iatoms+nb0,
3699                      idef->iparams,
3700                      (const rvec*)x,f,
3701                      pbc,g,lambda[efptFTYPE],md,fcd,
3702                      global_atom_index);
3703                 v = 0;
3704             }
3705             else
3706             {
3707                 v = interaction_function[ftype].ifunc(nbn,iatoms+nb0,
3708                                                       idef->iparams,
3709                                                       (const rvec*)x,f,fshift,
3710                                                       pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
3711                                                       md,fcd,global_atom_index);
3712             }
3713             if (bPrintSepPot)
3714             {
3715                 fprintf(fplog,"  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
3716                         interaction_function[ftype].longname,
3717                         nbonds/nat1,v,lambda[efptFTYPE]);
3718             }
3719         }
3720         else
3721         {
3722             v = do_nonbonded_listed(ftype,nbn,iatoms+nb0,idef->iparams,(const rvec*)x,f,fshift,
3723                                     pbc,g,lambda,dvdl,md,fr,grpp,global_atom_index);
3724
3725             enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
3726             enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
3727             
3728             if (bPrintSepPot)
3729             {
3730                 fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
3731                         interaction_function[ftype].longname,
3732                         interaction_function[F_LJ14].longname,nbonds/nat1,dvdl[efptVDW]);
3733                 fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
3734                         interaction_function[ftype].longname,
3735                         interaction_function[F_COUL14].longname,nbonds/nat1,dvdl[efptCOUL]);
3736             }
3737         }
3738         if (ind != -1 && thread == 0)
3739         {
3740             inc_nrnb(nrnb,ind,nbonds);
3741         }
3742     }
3743
3744     return v;
3745 }
3746
3747 /* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
3748    function, or horrible things will happen when doing free energy
3749    calculations!  In a good coding world, this would not be a
3750    different function, but for speed reasons, it needs to be made a
3751    separate function.  TODO for 5.0 - figure out a way to reorganize
3752    to reduce duplication.
3753 */
3754
3755 static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
3756                                   rvec x[], rvec f[], t_forcerec *fr,
3757                                   const t_pbc *pbc,const t_graph *g,
3758                                   gmx_enerdata_t *enerd, t_nrnb *nrnb,
3759                                   real *lambda, real *dvdl,
3760                                   const t_mdatoms *md,t_fcdata *fcd,
3761                                   int *global_atom_index, gmx_bool bPrintSepPot)
3762 {
3763     int ind,nat1,nbonds,efptFTYPE,nbonds_np;
3764     real v=0;
3765     t_iatom *iatoms;
3766
3767     if (IS_RESTRAINT_TYPE(ftype))
3768     {
3769         efptFTYPE = efptRESTRAINT;
3770     }
3771     else
3772     {
3773         efptFTYPE = efptBONDED;
3774     }
3775
3776     if (ftype<F_GB12 || ftype>F_GB14)
3777     {
3778         if (interaction_function[ftype].flags & IF_BOND &&
3779             !(ftype == F_CONNBONDS || ftype == F_POSRES || ftype == F_FBPOSRES))
3780         {
3781             ind  = interaction_function[ftype].nrnb_ind;
3782             nat1 = interaction_function[ftype].nratoms+1;
3783             nbonds_np = idef->il[ftype].nr_nonperturbed;
3784             nbonds    = idef->il[ftype].nr - nbonds_np;
3785             iatoms    = idef->il[ftype].iatoms + nbonds_np;
3786             if (nbonds > 0)
3787             {
3788                 if (!IS_LISTED_LJ_C(ftype))
3789                 {
3790                     if(ftype==F_CMAP)
3791                     {
3792                         v = cmap_dihs(nbonds,iatoms,
3793                                       idef->iparams,&idef->cmap_grid,
3794                                       (const rvec*)x,f,fr->fshift,
3795                                       pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),md,fcd,
3796                                       global_atom_index);
3797                     }
3798                     else
3799                     {
3800                         v =         interaction_function[ftype].ifunc(nbonds,iatoms,
3801                                                                   idef->iparams,
3802                                                                   (const rvec*)x,f,fr->fshift,
3803                                                                   pbc,g,lambda[efptFTYPE],&dvdl[efptFTYPE],
3804                                                                   md,fcd,global_atom_index);
3805                     }
3806                 }
3807                 else
3808                 {
3809                     v = do_nonbonded_listed(ftype,nbonds,iatoms,
3810                                             idef->iparams,
3811                                             (const rvec*)x,f,fr->fshift,
3812                                             pbc,g,lambda,dvdl,
3813                                             md,fr,&enerd->grpp,global_atom_index);
3814                 }
3815                 if (ind != -1)
3816                 {
3817                     inc_nrnb(nrnb,ind,nbonds/nat1);
3818                 }
3819             }
3820         }
3821     }
3822     return v;
3823 }
3824
3825 void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
3826                 const t_idef *idef,
3827                 rvec x[],history_t *hist,
3828                 rvec f[],t_forcerec *fr,
3829                 const t_pbc *pbc,const t_graph *g,
3830                 gmx_enerdata_t *enerd,t_nrnb *nrnb,
3831                 real *lambda,
3832                 const t_mdatoms *md,
3833                 t_fcdata *fcd,int *global_atom_index,
3834                 t_atomtypes *atype, gmx_genborn_t *born,
3835                 int force_flags,
3836                 gmx_bool bPrintSepPot,gmx_large_int_t step)
3837 {
3838     gmx_bool bCalcEnerVir;
3839     int    i;
3840     real   v,dvdl[efptNR],dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
3841                                                of lambda, which will be thrown away in the end*/
3842     const  t_pbc *pbc_null;
3843     char   buf[22];
3844     int    thread;
3845
3846     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
3847
3848     for (i=0;i<efptNR;i++)
3849     {
3850         dvdl[i] = 0.0;
3851     }
3852     if (fr->bMolPBC)
3853     {
3854         pbc_null = pbc;
3855     }
3856     else
3857     {
3858         pbc_null = NULL;
3859     }
3860     if (bPrintSepPot)
3861     {
3862         fprintf(fplog,"Step %s: bonded V and dVdl for this node\n",
3863                 gmx_step_str(step,buf));
3864     }
3865
3866 #ifdef DEBUG
3867     if (g && debug)
3868     {
3869         p_graph(debug,"Bondage is fun",g);
3870     }
3871 #endif
3872
3873     /* Do pre force calculation stuff which might require communication */
3874     if (idef->il[F_ORIRES].nr)
3875     {
3876         enerd->term[F_ORIRESDEV] =
3877             calc_orires_dev(ms,idef->il[F_ORIRES].nr,
3878                             idef->il[F_ORIRES].iatoms,
3879                             idef->iparams,md,(const rvec*)x,
3880                             pbc_null,fcd,hist);
3881     }
3882     if (idef->il[F_DISRES].nr)
3883     {
3884         calc_disres_R_6(ms,idef->il[F_DISRES].nr,
3885                         idef->il[F_DISRES].iatoms,
3886                         idef->iparams,(const rvec*)x,pbc_null,
3887                         fcd,hist);
3888     }
3889
3890 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3891     for(thread=0; thread<fr->nthreads; thread++)
3892     {
3893         int    ftype,nbonds,ind,nat1;
3894         real   *epot,v;
3895         /* thread stuff */
3896         rvec   *ft,*fshift;
3897         real   *dvdlt;
3898         gmx_grppairener_t *grpp;
3899         int    nb0,nbn;
3900
3901         if (thread == 0)
3902         {
3903             ft     = f;
3904             fshift = fr->fshift;
3905             epot   = enerd->term;
3906             grpp   = &enerd->grpp;
3907             dvdlt  = dvdl;
3908         }
3909         else
3910         {
3911             zero_thread_forces(&fr->f_t[thread],fr->natoms_force,
3912                                fr->red_nblock,1<<fr->red_ashift);
3913
3914             ft     = fr->f_t[thread].f;
3915             fshift = fr->f_t[thread].fshift;
3916             epot   = fr->f_t[thread].ener;
3917             grpp   = &fr->f_t[thread].grpp;
3918             dvdlt  = fr->f_t[thread].dvdl;
3919         }
3920         /* Loop over all bonded force types to calculate the bonded forces */
3921         for(ftype=0; (ftype<F_NRE); ftype++)
3922         {
3923             if (idef->il[ftype].nr > 0 &&
3924                 (interaction_function[ftype].flags & IF_BOND) &&
3925                 (ftype < F_GB12 || ftype > F_GB14) &&
3926                 !(ftype == F_CONNBONDS || ftype == F_POSRES))
3927             {
3928                 v = calc_one_bond(fplog,thread,ftype,idef,x, 
3929                                   ft,fshift,fr,pbc_null,g,enerd,grpp,
3930                                   nrnb,lambda,dvdlt,
3931                                   md,fcd,bCalcEnerVir,
3932                                   global_atom_index,bPrintSepPot);
3933                 epot[ftype]        += v;
3934             }
3935         }
3936     }
3937     if (fr->nthreads > 1)
3938     {
3939         reduce_thread_forces(fr->natoms_force,f,fr->fshift,
3940                              enerd->term,&enerd->grpp,dvdl,
3941                              fr->nthreads,fr->f_t,
3942                              fr->red_nblock,1<<fr->red_ashift,
3943                              bCalcEnerVir,
3944                              force_flags & GMX_FORCE_DHDL);
3945     }
3946     if (force_flags & GMX_FORCE_DHDL)
3947     {
3948         for(i=0; i<efptNR; i++)
3949         {
3950             enerd->dvdl_nonlin[i] += dvdl[i];
3951         }
3952     }
3953
3954     /* Copy the sum of violations for the distance restraints from fcd */
3955     if (fcd)
3956     {
3957         enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
3958
3959     }
3960 }
3961
3962 void calc_bonds_lambda(FILE *fplog,
3963                        const t_idef *idef,
3964                        rvec x[],
3965                        t_forcerec *fr,
3966                        const t_pbc *pbc,const t_graph *g,
3967                        gmx_enerdata_t *enerd,t_nrnb *nrnb,
3968                        real *lambda,
3969                        const t_mdatoms *md,
3970                        t_fcdata *fcd,
3971                        int *global_atom_index)
3972 {
3973     int    i,ftype,nbonds_np,nbonds,ind,nat;
3974     real   v,dr,dr2,*epot;
3975     real   dvdl_dum[efptNR];
3976     rvec   *f,*fshift_orig;
3977     const  t_pbc *pbc_null;
3978     t_iatom *iatom_fe;
3979
3980     if (fr->bMolPBC)
3981     {
3982         pbc_null = pbc;
3983     }
3984     else
3985     {
3986         pbc_null = NULL;
3987     }
3988
3989     epot = enerd->term;
3990
3991     snew(f,fr->natoms_force);
3992     /* We want to preserve the fshift array in forcerec */
3993     fshift_orig = fr->fshift;
3994     snew(fr->fshift,SHIFTS);
3995
3996     /* Loop over all bonded force types to calculate the bonded forces */
3997     for(ftype=0; (ftype<F_NRE); ftype++) 
3998     {
3999         v = calc_one_bond_foreign(fplog,ftype,idef,x, 
4000                                   f,fr,pbc_null,g,enerd,nrnb,lambda,dvdl_dum,
4001                                   md,fcd,global_atom_index,FALSE);
4002         epot[ftype] += v;
4003     }
4004
4005     sfree(fr->fshift);
4006     fr->fshift = fshift_orig;
4007     sfree(f);
4008 }