Update copyright statements and change license to LGPL
[alexxy/gromacs.git] / src / gmxlib / bondfree.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43 #include "physics.h"
44 #include "vec.h"
45 #include "maths.h"
46 #include "txtdump.h"
47 #include "bondf.h"
48 #include "smalloc.h"
49 #include "pbc.h"
50 #include "ns.h"
51 #include "macros.h"
52 #include "names.h"
53 #include "gmx_fatal.h"
54 #include "mshift.h"
55 #include "main.h"
56 #include "disre.h"
57 #include "orires.h"
58 #include "force.h"
59 #include "nonbonded.h"
60
61 #if !defined GMX_DOUBLE && defined GMX_X86_SSE2
62 #include "gmx_x86_simd_single.h"
63 #define SSE_PROPER_DIHEDRALS
64 #endif
65
66 /* Find a better place for this? */
67 const int cmap_coeff_matrix[] = {
68 1, 0, -3,  2, 0, 0,  0,  0, -3,  0,  9, -6,  2,  0, -6,  4 ,
69 0, 0,  0,  0, 0, 0,  0,  0,  3,  0, -9,  6, -2,  0,  6, -4,
70 0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  9, -6,  0,  0, -6,  4 ,
71 0, 0,  3, -2, 0, 0,  0,  0,  0,  0, -9,  6,  0,  0,  6, -4,
72 0, 0,  0,  0, 1, 0, -3,  2, -2,  0,  6, -4,  1,  0, -3,  2 ,
73 0, 0,  0,  0, 0, 0,  0,  0, -1,  0,  3, -2,  1,  0, -3,  2 ,
74 0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  2,  0,  0,  3, -2,
75 0, 0,  0,  0, 0, 0,  3, -2,  0,  0, -6,  4,  0,  0,  3, -2,
76 0, 1, -2,  1, 0, 0,  0,  0,  0, -3,  6, -3,  0,  2, -4,  2 ,
77 0, 0,  0,  0, 0, 0,  0,  0,  0,  3, -6,  3,  0, -2,  4, -2,
78 0, 0,  0,  0, 0, 0,  0,  0,  0,  0, -3,  3,  0,  0,  2, -2,
79 0, 0, -1,  1, 0, 0,  0,  0,  0,  0,  3, -3,  0,  0, -2,  2 ,
80 0, 0,  0,  0, 0, 1, -2,  1,  0, -2,  4, -2,  0,  1, -2,  1,
81 0, 0,  0,  0, 0, 0,  0,  0,  0, -1,  2, -1,  0,  1, -2,  1,
82 0, 0,  0,  0, 0, 0,  0,  0,  0,  0,  1, -1,  0,  0, -1,  1,
83 0, 0,  0,  0, 0, 0, -1,  1,  0,  0,  2, -2,  0,  0, -1,  1
84 };
85
86
87
88 int glatnr(int *global_atom_index,int i)
89 {
90     int atnr;
91
92     if (global_atom_index == NULL) {
93         atnr = i + 1;
94     } else {
95         atnr = global_atom_index[i] + 1;
96     }
97
98     return atnr;
99 }
100
101 static int pbc_rvec_sub(const t_pbc *pbc,const rvec xi,const rvec xj,rvec dx)
102 {
103   if (pbc) {
104     return pbc_dx_aiuc(pbc,xi,xj,dx);
105   }
106   else {
107     rvec_sub(xi,xj,dx);
108     return CENTRAL;
109   }
110 }
111
112 /*
113  * Morse potential bond by Frank Everdij
114  *
115  * Three parameters needed:
116  *
117  * b0 = equilibrium distance in nm
118  * be = beta in nm^-1 (actually, it's nu_e*Sqrt(2*pi*pi*mu/D_e))
119  * cb = well depth in kJ/mol
120  *
121  * Note: the potential is referenced to be +cb at infinite separation
122  *       and zero at the equilibrium distance!
123  */
124
125 real morse_bonds(int nbonds,
126                  const t_iatom forceatoms[],const t_iparams forceparams[],
127                  const rvec x[],rvec f[],rvec fshift[],
128                  const t_pbc *pbc,const t_graph *g,
129                  real lambda,real *dvdlambda,
130                  const t_mdatoms *md,t_fcdata *fcd,
131                  int *global_atom_index)
132 {
133   const real one=1.0;
134   const real two=2.0;
135   real  dr,dr2,temp,omtemp,cbomtemp,fbond,vbond,fij,vtot;
136   real  b0,be,cb,b0A,beA,cbA,b0B,beB,cbB,L1;
137   rvec  dx;
138   int   i,m,ki,type,ai,aj;
139   ivec  dt;
140
141   vtot = 0.0;
142   for(i=0; (i<nbonds); ) {
143     type = forceatoms[i++];
144     ai   = forceatoms[i++];
145     aj   = forceatoms[i++];
146     
147     b0A   = forceparams[type].morse.b0A;
148     beA   = forceparams[type].morse.betaA;
149     cbA   = forceparams[type].morse.cbA;
150
151     b0B   = forceparams[type].morse.b0B;
152     beB   = forceparams[type].morse.betaB;
153     cbB   = forceparams[type].morse.cbB;
154
155     L1 = one-lambda;                      /* 1 */
156     b0 = L1*b0A + lambda*b0B;             /* 3 */
157     be = L1*beA + lambda*beB;             /* 3 */
158     cb = L1*cbA + lambda*cbB;             /* 3 */
159
160     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
161     dr2  = iprod(dx,dx);                            /*   5          */
162     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
163     temp = exp(-be*(dr-b0));                        /*  12          */
164     
165     if (temp == one)
166     {
167         /* bonds are constrainted. This may _not_ include bond constraints if they are lambda dependent */
168         *dvdlambda += cbB-cbA;
169         continue;
170     }
171
172     omtemp   = one-temp;                               /*   1          */
173     cbomtemp = cb*omtemp;                              /*   1          */
174     vbond    = cbomtemp*omtemp;                        /*   1          */
175     fbond    = -two*be*temp*cbomtemp*gmx_invsqrt(dr2); /*   9          */
176     vtot     += vbond;                                 /*   1          */
177
178     *dvdlambda += (cbB - cbA) * omtemp * omtemp - (2-2*omtemp)*omtemp * cb * ((b0B-b0A)*be - (beB-beA)*(dr-b0)); /* 15 */
179     
180     if (g) {
181       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
182       ki = IVEC2IS(dt);
183     }
184
185     for (m=0; (m<DIM); m++) {                          /*  15          */
186       fij=fbond*dx[m];
187       f[ai][m]+=fij;
188       f[aj][m]-=fij;
189       fshift[ki][m]+=fij;
190       fshift[CENTRAL][m]-=fij;
191     }
192   }                                           /*  83 TOTAL    */
193   return vtot;
194 }
195
196 real cubic_bonds(int nbonds,
197                  const t_iatom forceatoms[],const t_iparams forceparams[],
198                  const rvec x[],rvec f[],rvec fshift[],
199                  const t_pbc *pbc,const t_graph *g,
200                  real lambda,real *dvdlambda,
201                  const t_mdatoms *md,t_fcdata *fcd,
202                  int *global_atom_index)
203 {
204   const real three = 3.0;
205   const real two   = 2.0;
206   real  kb,b0,kcub;
207   real  dr,dr2,dist,kdist,kdist2,fbond,vbond,fij,vtot;
208   rvec  dx;
209   int   i,m,ki,type,ai,aj;
210   ivec  dt;
211
212   vtot = 0.0;
213   for(i=0; (i<nbonds); ) {
214     type = forceatoms[i++];
215     ai   = forceatoms[i++];
216     aj   = forceatoms[i++];
217     
218     b0   = forceparams[type].cubic.b0;
219     kb   = forceparams[type].cubic.kb;
220     kcub = forceparams[type].cubic.kcub;
221
222     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);                /*   3          */
223     dr2  = iprod(dx,dx);                                /*   5          */
224     
225     if (dr2 == 0.0)
226       continue;
227       
228     dr         = dr2*gmx_invsqrt(dr2);                      /*  10          */
229     dist       = dr-b0;
230     kdist      = kb*dist;
231     kdist2     = kdist*dist;
232     
233     vbond      = kdist2 + kcub*kdist2*dist;
234     fbond      = -(two*kdist + three*kdist2*kcub)/dr;
235
236     vtot      += vbond;       /* 21 */
237     
238     if (g) {
239       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
240       ki=IVEC2IS(dt);
241     }
242     for (m=0; (m<DIM); m++) {                          /*  15          */
243       fij=fbond*dx[m];
244       f[ai][m]+=fij;
245       f[aj][m]-=fij;
246       fshift[ki][m]+=fij;
247       fshift[CENTRAL][m]-=fij;
248     }
249   }                                           /*  54 TOTAL    */
250   return vtot;
251 }
252
253 real FENE_bonds(int nbonds,
254                 const t_iatom forceatoms[],const t_iparams forceparams[],
255                 const rvec x[],rvec f[],rvec fshift[],
256                 const t_pbc *pbc,const t_graph *g,
257                 real lambda,real *dvdlambda,
258                 const t_mdatoms *md,t_fcdata *fcd,
259                 int *global_atom_index)
260 {
261   const real half=0.5;
262   const real one=1.0;
263   real  bm,kb;
264   real  dr,dr2,bm2,omdr2obm2,fbond,vbond,fij,vtot;
265   rvec  dx;
266   int   i,m,ki,type,ai,aj;
267   ivec  dt;
268
269   vtot = 0.0;
270   for(i=0; (i<nbonds); ) {
271     type = forceatoms[i++];
272     ai   = forceatoms[i++];
273     aj   = forceatoms[i++];
274     
275     bm   = forceparams[type].fene.bm;
276     kb   = forceparams[type].fene.kb;
277
278     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
279     dr2  = iprod(dx,dx);                                /*   5          */
280     
281     if (dr2 == 0.0)
282       continue;
283
284     bm2 = bm*bm;
285
286     if (dr2 >= bm2)
287       gmx_fatal(FARGS,
288                 "r^2 (%f) >= bm^2 (%f) in FENE bond between atoms %d and %d",
289                 dr2,bm2,
290                 glatnr(global_atom_index,ai),
291                 glatnr(global_atom_index,aj));
292       
293     omdr2obm2  = one - dr2/bm2;
294     
295     vbond      = -half*kb*bm2*log(omdr2obm2);
296     fbond      = -kb/omdr2obm2;
297
298     vtot      += vbond;       /* 35 */
299     
300     if (g) {
301       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
302       ki=IVEC2IS(dt);
303     }
304     for (m=0; (m<DIM); m++) {                          /*  15          */
305       fij=fbond*dx[m];
306       f[ai][m]+=fij;
307       f[aj][m]-=fij;
308       fshift[ki][m]+=fij;
309       fshift[CENTRAL][m]-=fij;
310     }
311   }                                           /*  58 TOTAL    */
312   return vtot;
313 }
314
315 real harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
316               real *V,real *F)
317 {
318   const real half=0.5;
319   real  L1,kk,x0,dx,dx2;
320   real  v,f,dvdlambda;
321   
322   L1    = 1.0-lambda;
323   kk    = L1*kA+lambda*kB;
324   x0    = L1*xA+lambda*xB;
325
326   dx    = x-x0;
327   dx2   = dx*dx;
328
329   f     = -kk*dx;
330   v     = half*kk*dx2;
331   dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
332
333   *F    = f;
334   *V    = v;
335
336   return dvdlambda;
337
338   /* That was 19 flops */
339 }
340
341
342 real bonds(int nbonds,
343            const t_iatom forceatoms[],const t_iparams forceparams[],
344            const rvec x[],rvec f[],rvec fshift[],
345            const t_pbc *pbc,const t_graph *g,
346            real lambda,real *dvdlambda,
347            const t_mdatoms *md,t_fcdata *fcd,
348            int *global_atom_index)
349 {
350   int  i,m,ki,ai,aj,type;
351   real dr,dr2,fbond,vbond,fij,vtot;
352   rvec dx;
353   ivec dt;
354
355   vtot = 0.0;
356   for(i=0; (i<nbonds); ) {
357     type = forceatoms[i++];
358     ai   = forceatoms[i++];
359     aj   = forceatoms[i++];
360   
361     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
362     dr2  = iprod(dx,dx);                        /*   5          */
363     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
364
365     *dvdlambda += harmonic(forceparams[type].harmonic.krA,
366                            forceparams[type].harmonic.krB,
367                            forceparams[type].harmonic.rA,
368                            forceparams[type].harmonic.rB,
369                            dr,lambda,&vbond,&fbond);  /*  19  */
370
371     if (dr2 == 0.0)
372       continue;
373
374     
375     vtot  += vbond;/* 1*/
376     fbond *= gmx_invsqrt(dr2);                  /*   6          */
377 #ifdef DEBUG
378     if (debug)
379       fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
380               dr,vbond,fbond);
381 #endif
382     if (g) {
383       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
384       ki=IVEC2IS(dt);
385     }
386     for (m=0; (m<DIM); m++) {                   /*  15          */
387       fij=fbond*dx[m];
388       f[ai][m]+=fij;
389       f[aj][m]-=fij;
390       fshift[ki][m]+=fij;
391       fshift[CENTRAL][m]-=fij;
392     }
393   }                                     /* 59 TOTAL     */
394   return vtot;
395 }
396
397 real restraint_bonds(int nbonds,
398                      const t_iatom forceatoms[],const t_iparams forceparams[],
399                      const rvec x[],rvec f[],rvec fshift[],
400                      const t_pbc *pbc,const t_graph *g,
401                      real lambda,real *dvdlambda,
402                      const t_mdatoms *md,t_fcdata *fcd,
403                      int *global_atom_index)
404 {
405     int  i,m,ki,ai,aj,type;
406     real dr,dr2,fbond,vbond,fij,vtot;
407     real L1;
408     real low,dlow,up1,dup1,up2,dup2,k,dk;
409     real drh,drh2;
410     rvec dx;
411     ivec dt;
412
413     L1   = 1.0 - lambda;
414
415     vtot = 0.0;
416     for(i=0; (i<nbonds); )
417     {
418         type = forceatoms[i++];
419         ai   = forceatoms[i++];
420         aj   = forceatoms[i++];
421         
422         ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);        /*   3          */
423         dr2  = iprod(dx,dx);                            /*   5          */
424         dr   = dr2*gmx_invsqrt(dr2);                    /*  10          */
425
426         low  = L1*forceparams[type].restraint.lowA + lambda*forceparams[type].restraint.lowB;
427         dlow =   -forceparams[type].restraint.lowA +        forceparams[type].restraint.lowB;
428         up1  = L1*forceparams[type].restraint.up1A + lambda*forceparams[type].restraint.up1B;
429         dup1 =   -forceparams[type].restraint.up1A +        forceparams[type].restraint.up1B;
430         up2  = L1*forceparams[type].restraint.up2A + lambda*forceparams[type].restraint.up2B;
431         dup2 =   -forceparams[type].restraint.up2A +        forceparams[type].restraint.up2B;
432         k    = L1*forceparams[type].restraint.kA   + lambda*forceparams[type].restraint.kB;
433         dk   =   -forceparams[type].restraint.kA   +        forceparams[type].restraint.kB;
434         /* 24 */
435
436         if (dr < low)
437         {
438             drh   = dr - low;
439             drh2  = drh*drh;
440             vbond = 0.5*k*drh2;
441             fbond = -k*drh;
442             *dvdlambda += 0.5*dk*drh2 - k*dlow*drh;
443         } /* 11 */
444         else if (dr <= up1)
445         {
446             vbond = 0;
447             fbond = 0;
448         }
449         else if (dr <= up2)
450         {
451             drh   = dr - up1;
452             drh2  = drh*drh;
453             vbond = 0.5*k*drh2;
454             fbond = -k*drh;
455             *dvdlambda += 0.5*dk*drh2 - k*dup1*drh;
456         } /* 11 */
457         else
458         {
459             drh   = dr - up2;
460             vbond = k*(up2 - up1)*(0.5*(up2 - up1) + drh);
461             fbond = -k*(up2 - up1);
462             *dvdlambda += dk*(up2 - up1)*(0.5*(up2 - up1) + drh)
463                 + k*(dup2 - dup1)*(up2 - up1 + drh)
464                 - k*(up2 - up1)*dup2;
465         }
466    
467         if (dr2 == 0.0)
468             continue;
469         
470         vtot  += vbond;/* 1*/
471         fbond *= gmx_invsqrt(dr2);                      /*   6          */
472 #ifdef DEBUG
473         if (debug)
474             fprintf(debug,"BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
475                     dr,vbond,fbond);
476 #endif
477         if (g) {
478             ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
479             ki=IVEC2IS(dt);
480         }
481         for (m=0; (m<DIM); m++) {                       /*  15          */
482             fij=fbond*dx[m];
483             f[ai][m]+=fij;
484             f[aj][m]-=fij;
485             fshift[ki][m]+=fij;
486             fshift[CENTRAL][m]-=fij;
487         }
488     }                                   /* 59 TOTAL     */
489
490     return vtot;
491 }
492
493 real polarize(int nbonds,
494               const t_iatom forceatoms[],const t_iparams forceparams[],
495               const rvec x[],rvec f[],rvec fshift[],
496               const t_pbc *pbc,const t_graph *g,
497               real lambda,real *dvdlambda,
498               const t_mdatoms *md,t_fcdata *fcd,
499               int *global_atom_index)
500 {
501   int  i,m,ki,ai,aj,type;
502   real dr,dr2,fbond,vbond,fij,vtot,ksh;
503   rvec dx;
504   ivec dt;
505
506   vtot = 0.0;
507   for(i=0; (i<nbonds); ) {
508     type = forceatoms[i++];
509     ai   = forceatoms[i++];
510     aj   = forceatoms[i++];
511     ksh  = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].polarize.alpha;
512     if (debug)
513       fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
514   
515     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
516     dr2  = iprod(dx,dx);                        /*   5          */
517     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
518
519     *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
520
521     if (dr2 == 0.0)
522       continue;
523     
524     vtot  += vbond;/* 1*/
525     fbond *= gmx_invsqrt(dr2);                  /*   6          */
526
527     if (g) {
528       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
529       ki=IVEC2IS(dt);
530     }
531     for (m=0; (m<DIM); m++) {                   /*  15          */
532       fij=fbond*dx[m];
533       f[ai][m]+=fij;
534       f[aj][m]-=fij;
535       fshift[ki][m]+=fij;
536       fshift[CENTRAL][m]-=fij;
537     }
538   }                                     /* 59 TOTAL     */
539   return vtot;
540 }
541
542 real anharm_polarize(int nbonds,
543                      const t_iatom forceatoms[],const t_iparams forceparams[],
544                      const rvec x[],rvec f[],rvec fshift[],
545                      const t_pbc *pbc,const t_graph *g,
546                      real lambda,real *dvdlambda,
547                      const t_mdatoms *md,t_fcdata *fcd,
548                      int *global_atom_index)
549 {
550   int  i,m,ki,ai,aj,type;
551   real dr,dr2,fbond,vbond,fij,vtot,ksh,khyp,drcut,ddr,ddr3;
552   rvec dx;
553   ivec dt;
554
555   vtot = 0.0;
556   for(i=0; (i<nbonds); ) {
557     type  = forceatoms[i++];
558     ai    = forceatoms[i++];
559     aj    = forceatoms[i++];
560     ksh   = sqr(md->chargeA[aj])*ONE_4PI_EPS0/forceparams[type].anharm_polarize.alpha; /* 7*/
561     khyp  = forceparams[type].anharm_polarize.khyp;
562     drcut = forceparams[type].anharm_polarize.drcut;
563     if (debug)
564       fprintf(debug,"POL: local ai = %d aj = %d ksh = %.3f\n",ai,aj,ksh);
565   
566     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
567     dr2  = iprod(dx,dx);                        /*   5          */
568     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
569
570     *dvdlambda += harmonic(ksh,ksh,0,0,dr,lambda,&vbond,&fbond);  /*  19  */
571
572     if (dr2 == 0.0)
573       continue;
574     
575     if (dr > drcut) {
576         ddr    = dr-drcut;
577         ddr3   = ddr*ddr*ddr;
578         vbond += khyp*ddr*ddr3;
579         fbond -= 4*khyp*ddr3;
580     }
581     fbond *= gmx_invsqrt(dr2);                  /*   6          */
582     vtot  += vbond;/* 1*/
583
584     if (g) {
585       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
586       ki=IVEC2IS(dt);
587     }
588     for (m=0; (m<DIM); m++) {                   /*  15          */
589       fij=fbond*dx[m];
590       f[ai][m]+=fij;
591       f[aj][m]-=fij;
592       fshift[ki][m]+=fij;
593       fshift[CENTRAL][m]-=fij;
594     }
595   }                                     /* 72 TOTAL     */
596   return vtot;
597 }
598
599 real water_pol(int nbonds,
600                const t_iatom forceatoms[],const t_iparams forceparams[],
601                const rvec x[],rvec f[],rvec fshift[],
602                const t_pbc *pbc,const t_graph *g,
603                real lambda,real *dvdlambda,
604                const t_mdatoms *md,t_fcdata *fcd,
605                int *global_atom_index)
606 {
607   /* This routine implements anisotropic polarizibility for water, through
608    * a shell connected to a dummy with spring constant that differ in the
609    * three spatial dimensions in the molecular frame.
610    */
611   int  i,m,aO,aH1,aH2,aD,aS,type,type0;
612   rvec dOH1,dOH2,dHH,dOD,dDS,nW,kk,dx,kdx,proj;
613 #ifdef DEBUG
614   rvec df;
615 #endif
616   real vtot,fij,r_HH,r_OD,r_nW,tx,ty,tz,qS;
617
618   vtot = 0.0;
619   if (nbonds > 0) {
620     type0  = forceatoms[0];
621     aS     = forceatoms[5];
622     qS     = md->chargeA[aS];
623     kk[XX] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_x;
624     kk[YY] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_y;
625     kk[ZZ] = sqr(qS)*ONE_4PI_EPS0/forceparams[type0].wpol.al_z;
626     r_HH   = 1.0/forceparams[type0].wpol.rHH;
627     r_OD   = 1.0/forceparams[type0].wpol.rOD;
628     if (debug) {
629       fprintf(debug,"WPOL: qS  = %10.5f aS = %5d\n",qS,aS);
630       fprintf(debug,"WPOL: kk  = %10.3f        %10.3f        %10.3f\n",
631               kk[XX],kk[YY],kk[ZZ]);
632       fprintf(debug,"WPOL: rOH = %10.3f  rHH = %10.3f  rOD = %10.3f\n",
633               forceparams[type0].wpol.rOH,
634               forceparams[type0].wpol.rHH,
635               forceparams[type0].wpol.rOD);
636     }
637     for(i=0; (i<nbonds); i+=6) {
638       type = forceatoms[i];
639       if (type != type0)
640         gmx_fatal(FARGS,"Sorry, type = %d, type0 = %d, file = %s, line = %d",
641                     type,type0,__FILE__,__LINE__);
642       aO   = forceatoms[i+1];
643       aH1  = forceatoms[i+2];
644       aH2  = forceatoms[i+3];
645       aD   = forceatoms[i+4];
646       aS   = forceatoms[i+5];
647       
648       /* Compute vectors describing the water frame */
649       rvec_sub(x[aH1],x[aO], dOH1);
650       rvec_sub(x[aH2],x[aO], dOH2);
651       rvec_sub(x[aH2],x[aH1],dHH);
652       rvec_sub(x[aD], x[aO], dOD);
653       rvec_sub(x[aS], x[aD], dDS);
654       cprod(dOH1,dOH2,nW);
655       
656       /* Compute inverse length of normal vector 
657        * (this one could be precomputed, but I'm too lazy now)
658        */
659       r_nW = gmx_invsqrt(iprod(nW,nW));
660       /* This is for precision, but does not make a big difference,
661        * it can go later.
662        */
663       r_OD = gmx_invsqrt(iprod(dOD,dOD)); 
664       
665       /* Normalize the vectors in the water frame */
666       svmul(r_nW,nW,nW);
667       svmul(r_HH,dHH,dHH);
668       svmul(r_OD,dOD,dOD);
669       
670       /* Compute displacement of shell along components of the vector */
671       dx[ZZ] = iprod(dDS,dOD);
672       /* Compute projection on the XY plane: dDS - dx[ZZ]*dOD */
673       for(m=0; (m<DIM); m++)
674         proj[m] = dDS[m]-dx[ZZ]*dOD[m];
675       
676       /*dx[XX] = iprod(dDS,nW);
677         dx[YY] = iprod(dDS,dHH);*/
678       dx[XX] = iprod(proj,nW);
679       for(m=0; (m<DIM); m++)
680         proj[m] -= dx[XX]*nW[m];
681       dx[YY] = iprod(proj,dHH);
682       /*#define DEBUG*/
683 #ifdef DEBUG
684       if (debug) {
685         fprintf(debug,"WPOL: dx2=%10g  dy2=%10g  dz2=%10g  sum=%10g  dDS^2=%10g\n",
686                 sqr(dx[XX]),sqr(dx[YY]),sqr(dx[ZZ]),iprod(dx,dx),iprod(dDS,dDS));
687         fprintf(debug,"WPOL: dHH=(%10g,%10g,%10g)\n",dHH[XX],dHH[YY],dHH[ZZ]);
688         fprintf(debug,"WPOL: dOD=(%10g,%10g,%10g), 1/r_OD = %10g\n",
689                 dOD[XX],dOD[YY],dOD[ZZ],1/r_OD);
690         fprintf(debug,"WPOL: nW =(%10g,%10g,%10g), 1/r_nW = %10g\n",
691                 nW[XX],nW[YY],nW[ZZ],1/r_nW);
692         fprintf(debug,"WPOL: dx  =%10g, dy  =%10g, dz  =%10g\n",
693                 dx[XX],dx[YY],dx[ZZ]);
694         fprintf(debug,"WPOL: dDSx=%10g, dDSy=%10g, dDSz=%10g\n",
695                 dDS[XX],dDS[YY],dDS[ZZ]);
696       }
697 #endif
698       /* Now compute the forces and energy */
699       kdx[XX] = kk[XX]*dx[XX];
700       kdx[YY] = kk[YY]*dx[YY];
701       kdx[ZZ] = kk[ZZ]*dx[ZZ];
702       vtot   += iprod(dx,kdx);
703       for(m=0; (m<DIM); m++) {
704         /* This is a tensor operation but written out for speed */
705         tx        =  nW[m]*kdx[XX];
706         ty        = dHH[m]*kdx[YY];
707         tz        = dOD[m]*kdx[ZZ];
708         fij       = -tx-ty-tz;
709 #ifdef DEBUG
710         df[m] = fij;
711 #endif
712         f[aS][m] += fij;
713         f[aD][m] -= fij;
714       }
715 #ifdef DEBUG
716       if (debug) {
717         fprintf(debug,"WPOL: vwpol=%g\n",0.5*iprod(dx,kdx));
718         fprintf(debug,"WPOL: df = (%10g, %10g, %10g)\n",df[XX],df[YY],df[ZZ]);
719       }
720 #endif
721     }   
722   }
723   return 0.5*vtot;
724 }
725
726 static real do_1_thole(const rvec xi,const rvec xj,rvec fi,rvec fj,
727                        const t_pbc *pbc,real qq,
728                        rvec fshift[],real afac)
729 {
730   rvec r12;
731   real r12sq,r12_1,r12n,r12bar,v0,v1,fscal,ebar,fff;
732   int  m,t;
733     
734   t      = pbc_rvec_sub(pbc,xi,xj,r12); /*  3 */
735   
736   r12sq  = iprod(r12,r12);              /*  5 */
737   r12_1  = gmx_invsqrt(r12sq);              /*  5 */
738   r12bar = afac/r12_1;                  /*  5 */
739   v0     = qq*ONE_4PI_EPS0*r12_1;       /*  2 */
740   ebar   = exp(-r12bar);                /*  5 */
741   v1     = (1-(1+0.5*r12bar)*ebar);     /*  4 */
742   fscal  = ((v0*r12_1)*v1 - v0*0.5*afac*ebar*(r12bar+1))*r12_1; /* 9 */
743   if (debug)
744     fprintf(debug,"THOLE: v0 = %.3f v1 = %.3f r12= % .3f r12bar = %.3f fscal = %.3f  ebar = %.3f\n",v0,v1,1/r12_1,r12bar,fscal,ebar);
745   
746   for(m=0; (m<DIM); m++) {
747     fff    = fscal*r12[m];
748     fi[m] += fff;
749     fj[m] -= fff;             
750     fshift[t][m]       += fff;
751     fshift[CENTRAL][m] -= fff;
752   } /* 15 */
753   
754   return v0*v1; /* 1 */
755   /* 54 */
756 }
757
758 real thole_pol(int nbonds,
759                const t_iatom forceatoms[],const t_iparams forceparams[],
760                const rvec x[],rvec f[],rvec fshift[],
761                const t_pbc *pbc,const t_graph *g,
762                real lambda,real *dvdlambda,
763                const t_mdatoms *md,t_fcdata *fcd,
764                int *global_atom_index)
765 {
766   /* Interaction between two pairs of particles with opposite charge */
767   int i,type,a1,da1,a2,da2;
768   real q1,q2,qq,a,al1,al2,afac;
769   real V=0;
770   
771   for(i=0; (i<nbonds); ) {
772     type  = forceatoms[i++];
773     a1    = forceatoms[i++];
774     da1   = forceatoms[i++];
775     a2    = forceatoms[i++];
776     da2   = forceatoms[i++];
777     q1    = md->chargeA[da1];
778     q2    = md->chargeA[da2];
779     a     = forceparams[type].thole.a;
780     al1   = forceparams[type].thole.alpha1;
781     al2   = forceparams[type].thole.alpha2;
782     qq    = q1*q2;
783     afac  = a*pow(al1*al2,-1.0/6.0);
784     V += do_1_thole(x[a1], x[a2], f[a1], f[a2], pbc, qq,fshift,afac);
785     V += do_1_thole(x[da1],x[a2], f[da1],f[a2], pbc,-qq,fshift,afac);
786     V += do_1_thole(x[a1], x[da2],f[a1], f[da2],pbc,-qq,fshift,afac);
787     V += do_1_thole(x[da1],x[da2],f[da1],f[da2],pbc, qq,fshift,afac);
788   }
789   /* 290 flops */
790   return V;
791 }
792
793 real bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
794                 rvec r_ij,rvec r_kj,real *costh,
795                 int *t1,int *t2)
796 /* Return value is the angle between the bonds i-j and j-k */
797 {
798   /* 41 FLOPS */
799   real th;
800   
801   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
802   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
803
804   *costh=cos_angle(r_ij,r_kj);          /* 25           */
805   th=acos(*costh);                      /* 10           */
806                                         /* 41 TOTAL     */
807   return th;
808 }
809
810 real angles(int nbonds,
811             const t_iatom forceatoms[],const t_iparams forceparams[],
812             const rvec x[],rvec f[],rvec fshift[],
813             const t_pbc *pbc,const t_graph *g,
814             real lambda,real *dvdlambda,
815             const t_mdatoms *md,t_fcdata *fcd,
816             int *global_atom_index)
817 {
818     int  i,ai,aj,ak,t1,t2,type;
819     rvec r_ij,r_kj;
820     real cos_theta,cos_theta2,theta,dVdt,va,vtot;
821     ivec jt,dt_ij,dt_kj;
822
823     vtot = 0.0;
824     for(i=0; i<nbonds; )
825     {
826         type = forceatoms[i++];
827         ai   = forceatoms[i++];
828         aj   = forceatoms[i++];
829         ak   = forceatoms[i++];
830
831         theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
832                             r_ij,r_kj,&cos_theta,&t1,&t2);      /*  41          */
833   
834         *dvdlambda += harmonic(forceparams[type].harmonic.krA,
835                                forceparams[type].harmonic.krB,
836                                forceparams[type].harmonic.rA*DEG2RAD,
837                                forceparams[type].harmonic.rB*DEG2RAD,
838                                theta,lambda,&va,&dVdt);  /*  21  */
839         vtot += va;
840
841         cos_theta2 = sqr(cos_theta);
842         if (cos_theta2 < 1)
843         {
844             int  m;
845             real st,sth;
846             real cik,cii,ckk;
847             real nrkj2,nrij2;
848             real nrkj_1,nrij_1;
849             rvec f_i,f_j,f_k;
850
851             st  = dVdt*gmx_invsqrt(1 - cos_theta2);     /*  12          */
852             sth = st*cos_theta;                 /*   1          */
853 #ifdef DEBUG
854             if (debug)
855                 fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
856                         theta*RAD2DEG,va,dVdt);
857 #endif
858             nrij2 = iprod(r_ij,r_ij);                   /*   5          */
859             nrkj2 = iprod(r_kj,r_kj);                   /*   5          */
860
861             nrij_1 = gmx_invsqrt(nrij2);                /*  10          */
862             nrkj_1 = gmx_invsqrt(nrkj2);                /*  10          */
863
864             cik = st*nrij_1*nrkj_1;                     /*   2          */
865             cii = sth*nrij_1*nrij_1;                    /*   2          */
866             ckk = sth*nrkj_1*nrkj_1;                    /*   2          */
867       
868             for (m=0; m<DIM; m++)
869             {                   /*  39          */
870                 f_i[m]    = -(cik*r_kj[m] - cii*r_ij[m]);
871                 f_k[m]    = -(cik*r_ij[m] - ckk*r_kj[m]);
872                 f_j[m]    = -f_i[m] - f_k[m];
873                 f[ai][m] += f_i[m];
874                 f[aj][m] += f_j[m];
875                 f[ak][m] += f_k[m];
876             }
877             if (g != NULL)
878             {
879                 copy_ivec(SHIFT_IVEC(g,aj),jt);
880
881                 ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
882                 ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
883                 t1 = IVEC2IS(dt_ij);
884                 t2 = IVEC2IS(dt_kj);
885             }
886             rvec_inc(fshift[t1],f_i);
887             rvec_inc(fshift[CENTRAL],f_j);
888             rvec_inc(fshift[t2],f_k);
889         }                                           /* 161 TOTAL        */
890     }
891
892     return vtot;
893 }
894
895 real linear_angles(int nbonds,
896                    const t_iatom forceatoms[],const t_iparams forceparams[],
897                    const rvec x[],rvec f[],rvec fshift[],
898                    const t_pbc *pbc,const t_graph *g,
899                    real lambda,real *dvdlambda,
900                    const t_mdatoms *md,t_fcdata *fcd,
901                    int *global_atom_index)
902 {
903   int  i,m,ai,aj,ak,t1,t2,type;
904   rvec f_i,f_j,f_k;
905   real L1,kA,kB,aA,aB,dr,dr2,va,vtot,a,b,klin;
906   ivec jt,dt_ij,dt_kj;
907   rvec r_ij,r_kj,r_ik,dx;
908     
909   L1   = 1-lambda;
910   vtot = 0.0;
911   for(i=0; (i<nbonds); ) {
912     type = forceatoms[i++];
913     ai   = forceatoms[i++];
914     aj   = forceatoms[i++];
915     ak   = forceatoms[i++];
916     
917     kA = forceparams[type].linangle.klinA;
918     kB = forceparams[type].linangle.klinB;
919     klin = L1*kA + lambda*kB;
920     
921     aA   = forceparams[type].linangle.aA;
922     aB   = forceparams[type].linangle.aB;
923     a    = L1*aA+lambda*aB;
924     b    = 1-a;
925     
926     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
927     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
928     rvec_sub(r_ij,r_kj,r_ik);
929     
930     dr2 = 0;
931     for(m=0; (m<DIM); m++) 
932     {
933         dr     = - a * r_ij[m] - b * r_kj[m];
934         dr2   += dr*dr;
935         dx[m]  = dr;
936         f_i[m] = a*klin*dr;
937         f_k[m] = b*klin*dr;
938         f_j[m] = -(f_i[m]+f_k[m]);
939         f[ai][m] += f_i[m];
940         f[aj][m] += f_j[m];
941         f[ak][m] += f_k[m];
942     }
943     va    = 0.5*klin*dr2;
944     *dvdlambda += 0.5*(kB-kA)*dr2 + klin*(aB-aA)*iprod(dx,r_ik); 
945             
946     vtot += va;
947     
948     if (g) {
949         copy_ivec(SHIFT_IVEC(g,aj),jt);
950       
951         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
952         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
953         t1=IVEC2IS(dt_ij);
954         t2=IVEC2IS(dt_kj);
955     }
956     rvec_inc(fshift[t1],f_i);
957     rvec_inc(fshift[CENTRAL],f_j);
958     rvec_inc(fshift[t2],f_k);
959   }                                           /* 57 TOTAL       */
960   return vtot;
961 }
962
963 real urey_bradley(int nbonds,
964                   const t_iatom forceatoms[],const t_iparams forceparams[],
965                   const rvec x[],rvec f[],rvec fshift[],
966                   const t_pbc *pbc,const t_graph *g,
967                   real lambda,real *dvdlambda,
968                   const t_mdatoms *md,t_fcdata *fcd,
969                   int *global_atom_index)
970 {
971   int  i,m,ai,aj,ak,t1,t2,type,ki;
972   rvec r_ij,r_kj,r_ik;
973   real cos_theta,cos_theta2,theta;
974   real dVdt,va,vtot,dr,dr2,vbond,fbond,fik;
975   real kthA,th0A,kUBA,r13A,kthB,th0B,kUBB,r13B;
976   ivec jt,dt_ij,dt_kj,dt_ik;
977   
978   vtot = 0.0;
979   for(i=0; (i<nbonds); ) {
980     type = forceatoms[i++];
981     ai   = forceatoms[i++];
982     aj   = forceatoms[i++];
983     ak   = forceatoms[i++];
984     th0A  = forceparams[type].u_b.thetaA*DEG2RAD;
985     kthA  = forceparams[type].u_b.kthetaA;
986     r13A  = forceparams[type].u_b.r13A;
987     kUBA  = forceparams[type].u_b.kUBA;
988     th0B  = forceparams[type].u_b.thetaB*DEG2RAD;
989     kthB  = forceparams[type].u_b.kthetaB;
990     r13B  = forceparams[type].u_b.r13B;
991     kUBB  = forceparams[type].u_b.kUBB;
992     
993     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
994                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
995   
996     *dvdlambda += harmonic(kthA,kthB,th0A,th0B,theta,lambda,&va,&dVdt);  /*  21  */
997     vtot += va;
998     
999     ki   = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);  /*   3          */
1000     dr2  = iprod(r_ik,r_ik);                    /*   5          */
1001     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
1002
1003     *dvdlambda += harmonic(kUBA,kUBB,r13A,r13B,dr,lambda,&vbond,&fbond); /*  19  */
1004
1005     cos_theta2 = sqr(cos_theta);                /*   1          */
1006     if (cos_theta2 < 1) {
1007       real st,sth;
1008       real cik,cii,ckk;
1009       real nrkj2,nrij2;
1010       rvec f_i,f_j,f_k;
1011       
1012       st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
1013       sth = st*cos_theta;                       /*   1          */
1014 #ifdef DEBUG
1015       if (debug)
1016         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1017                 theta*RAD2DEG,va,dVdt);
1018 #endif
1019       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
1020       nrij2=iprod(r_ij,r_ij);
1021       
1022       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */ 
1023       cii=sth/nrij2;                            /*  10          */
1024       ckk=sth/nrkj2;                            /*  10          */
1025       
1026       for (m=0; (m<DIM); m++) {                 /*  39          */
1027         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
1028         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
1029         f_j[m]=-f_i[m]-f_k[m];
1030         f[ai][m]+=f_i[m];
1031         f[aj][m]+=f_j[m];
1032         f[ak][m]+=f_k[m];
1033       }
1034       if (g) {
1035         copy_ivec(SHIFT_IVEC(g,aj),jt);
1036       
1037         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
1038         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
1039         t1=IVEC2IS(dt_ij);
1040         t2=IVEC2IS(dt_kj);
1041       }
1042       rvec_inc(fshift[t1],f_i);
1043       rvec_inc(fshift[CENTRAL],f_j);
1044       rvec_inc(fshift[t2],f_k);
1045     }                                           /* 161 TOTAL    */
1046     /* Time for the bond calculations */
1047     if (dr2 == 0.0)
1048       continue;
1049
1050     vtot  += vbond;  /* 1*/
1051     fbond *= gmx_invsqrt(dr2);                  /*   6          */
1052     
1053     if (g) {
1054       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,ak),dt_ik);
1055       ki=IVEC2IS(dt_ik);
1056     }
1057     for (m=0; (m<DIM); m++) {                   /*  15          */
1058       fik=fbond*r_ik[m];
1059       f[ai][m]+=fik;
1060       f[ak][m]-=fik;
1061       fshift[ki][m]+=fik;
1062       fshift[CENTRAL][m]-=fik;
1063     }
1064   }
1065   return vtot;
1066 }
1067
1068 real quartic_angles(int nbonds,
1069                     const t_iatom forceatoms[],const t_iparams forceparams[],
1070                     const rvec x[],rvec f[],rvec fshift[],
1071                     const t_pbc *pbc,const t_graph *g,
1072                     real lambda,real *dvdlambda,
1073                     const t_mdatoms *md,t_fcdata *fcd,
1074                     int *global_atom_index)
1075 {
1076   int  i,j,ai,aj,ak,t1,t2,type;
1077   rvec r_ij,r_kj;
1078   real cos_theta,cos_theta2,theta,dt,dVdt,va,dtp,c,vtot;
1079   ivec jt,dt_ij,dt_kj;
1080   
1081   vtot = 0.0;
1082   for(i=0; (i<nbonds); ) {
1083     type = forceatoms[i++];
1084     ai   = forceatoms[i++];
1085     aj   = forceatoms[i++];
1086     ak   = forceatoms[i++];
1087
1088     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
1089                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
1090
1091     dt = theta - forceparams[type].qangle.theta*DEG2RAD; /* 2          */
1092
1093     dVdt = 0;
1094     va = forceparams[type].qangle.c[0];
1095     dtp = 1.0;
1096     for(j=1; j<=4; j++) {
1097       c = forceparams[type].qangle.c[j];
1098       dVdt -= j*c*dtp;
1099       dtp *= dt;
1100       va += c*dtp;
1101     }
1102     /* 20 */
1103
1104     vtot += va;
1105     
1106     cos_theta2 = sqr(cos_theta);                /*   1          */
1107     if (cos_theta2 < 1) {
1108       int  m;
1109       real st,sth;
1110       real cik,cii,ckk;
1111       real nrkj2,nrij2;
1112       rvec f_i,f_j,f_k;
1113       
1114       st  = dVdt*gmx_invsqrt(1 - cos_theta2);           /*  12          */
1115       sth = st*cos_theta;                       /*   1          */
1116 #ifdef DEBUG
1117       if (debug)
1118         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
1119                 theta*RAD2DEG,va,dVdt);
1120 #endif
1121       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
1122       nrij2=iprod(r_ij,r_ij);
1123       
1124       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */ 
1125       cii=sth/nrij2;                            /*  10          */
1126       ckk=sth/nrkj2;                            /*  10          */
1127       
1128       for (m=0; (m<DIM); m++) {                 /*  39          */
1129         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
1130         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
1131         f_j[m]=-f_i[m]-f_k[m];
1132         f[ai][m]+=f_i[m];
1133         f[aj][m]+=f_j[m];
1134         f[ak][m]+=f_k[m];
1135       }
1136       if (g) {
1137         copy_ivec(SHIFT_IVEC(g,aj),jt);
1138       
1139         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
1140         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
1141         t1=IVEC2IS(dt_ij);
1142         t2=IVEC2IS(dt_kj);
1143       }
1144       rvec_inc(fshift[t1],f_i);
1145       rvec_inc(fshift[CENTRAL],f_j);
1146       rvec_inc(fshift[t2],f_k);
1147     }                                           /* 153 TOTAL    */
1148   }
1149   return vtot;
1150 }
1151
1152 real dih_angle(const rvec xi,const rvec xj,const rvec xk,const rvec xl,
1153                const t_pbc *pbc,
1154                rvec r_ij,rvec r_kj,rvec r_kl,rvec m,rvec n,
1155                real *sign,int *t1,int *t2,int *t3)
1156 {
1157   real ipr,phi;
1158
1159   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
1160   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
1161   *t3 = pbc_rvec_sub(pbc,xk,xl,r_kl);                   /*  3           */
1162
1163   cprod(r_ij,r_kj,m);                   /*  9           */
1164   cprod(r_kj,r_kl,n);                   /*  9           */
1165   phi=gmx_angle(m,n);                   /* 49 (assuming 25 for atan2) */
1166   ipr=iprod(r_ij,n);                    /*  5           */
1167   (*sign)=(ipr<0.0)?-1.0:1.0;
1168   phi=(*sign)*phi;                      /*  1           */
1169                                         /* 82 TOTAL     */
1170   return phi;
1171 }
1172
1173
1174 #ifdef SSE_PROPER_DIHEDRALS
1175
1176 /* x86 SIMD inner-product of 4 float vectors */
1177 #define GMX_MM_IPROD_PS(ax,ay,az,bx,by,bz)                 \
1178     _mm_add_ps(_mm_add_ps(_mm_mul_ps(ax,bx),_mm_mul_ps(ay,by)),_mm_mul_ps(az,bz))
1179
1180 /* x86 SIMD norm^2 of 4 float vectors */
1181 #define GMX_MM_NORM2_PS(ax,ay,az) GMX_MM_IPROD_PS(ax,ay,az,ax,ay,az)
1182
1183 /* x86 SIMD cross-product of 4 float vectors */
1184 #define GMX_MM_CPROD_PS(ax,ay,az,bx,by,bz,cx,cy,cz)        \
1185 {                                                          \
1186     cx = _mm_sub_ps(_mm_mul_ps(ay,bz),_mm_mul_ps(az,by));  \
1187     cy = _mm_sub_ps(_mm_mul_ps(az,bx),_mm_mul_ps(ax,bz));  \
1188     cz = _mm_sub_ps(_mm_mul_ps(ax,by),_mm_mul_ps(ay,bx));  \
1189 }
1190
1191 /* load 4 rvec's into 3 x86 SIMD float registers */
1192 #define load_rvec4(r0,r1,r2,r3,rx_SSE,ry_SSE,rz_SSE)          \
1193 {                                                             \
1194     __m128 tmp;                                               \
1195     rx_SSE = _mm_load_ps(r0);                                 \
1196     ry_SSE = _mm_load_ps(r1);                                 \
1197     rz_SSE = _mm_load_ps(r2);                                 \
1198     tmp    = _mm_load_ps(r3);                                 \
1199     _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
1200 }
1201
1202 #define store_rvec4(rx_SSE,ry_SSE,rz_SSE,r0,r1,r2,r3)         \
1203 {                                                             \
1204     __m128 tmp=_mm_setzero_ps();                              \
1205     _MM_TRANSPOSE4_PS(rx_SSE,ry_SSE,rz_SSE,tmp);              \
1206     _mm_store_ps(r0,rx_SSE);                                  \
1207     _mm_store_ps(r1,ry_SSE);                                  \
1208     _mm_store_ps(r2,rz_SSE);                                  \
1209     _mm_store_ps(r3,tmp   );                                  \
1210 }
1211
1212 /* An rvec in a structure which can be allocated 16-byte aligned */
1213 typedef struct {
1214     rvec  v;
1215     float f;
1216 } rvec_sse_t;
1217
1218 /* As dih_angle above, but calculates 4 dihedral angles at once using SSE,
1219  * also calculates the pre-factor required for the dihedral force update.
1220  * Note that bv and buf should be 16-byte aligned.
1221  */
1222 static void
1223 dih_angle_sse(const rvec *x,
1224               int ai[4],int aj[4],int ak[4],int al[4],
1225               const t_pbc *pbc,
1226               int t1[4],int t2[4],int t3[4],
1227               rvec_sse_t *bv,
1228               real *buf)
1229 {
1230     int s;
1231     __m128 rijx_SSE,rijy_SSE,rijz_SSE;
1232     __m128 rkjx_SSE,rkjy_SSE,rkjz_SSE;
1233     __m128 rklx_SSE,rkly_SSE,rklz_SSE;
1234     __m128 mx_SSE,my_SSE,mz_SSE;
1235     __m128 nx_SSE,ny_SSE,nz_SSE;
1236     __m128 cx_SSE,cy_SSE,cz_SSE;
1237     __m128 cn_SSE;
1238     __m128 s_SSE;
1239     __m128 phi_SSE;
1240     __m128 ipr_SSE;
1241     int signs;
1242     __m128 iprm_SSE,iprn_SSE;
1243     __m128 nrkj2_SSE,nrkj_1_SSE,nrkj_2_SSE,nrkj_SSE;
1244     __m128 nrkj_m2_SSE,nrkj_n2_SSE;
1245     __m128 p_SSE,q_SSE;
1246     __m128 fmin_SSE=_mm_set1_ps(GMX_FLOAT_MIN);
1247
1248     for(s=0; s<4; s++)
1249     {
1250         t1[s] = pbc_rvec_sub(pbc,x[ai[s]],x[aj[s]],bv[0+s].v);
1251         t2[s] = pbc_rvec_sub(pbc,x[ak[s]],x[aj[s]],bv[4+s].v);
1252         t3[s] = pbc_rvec_sub(pbc,x[ak[s]],x[al[s]],bv[8+s].v);
1253     }
1254
1255     load_rvec4(bv[0].v,bv[1].v,bv[2].v,bv[3].v,rijx_SSE,rijy_SSE,rijz_SSE);
1256     load_rvec4(bv[4].v,bv[5].v,bv[6].v,bv[7].v,rkjx_SSE,rkjy_SSE,rkjz_SSE);
1257     load_rvec4(bv[8].v,bv[9].v,bv[10].v,bv[11].v,rklx_SSE,rkly_SSE,rklz_SSE);
1258
1259     GMX_MM_CPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1260                     rkjx_SSE,rkjy_SSE,rkjz_SSE,
1261                     mx_SSE,my_SSE,mz_SSE);
1262
1263     GMX_MM_CPROD_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE,
1264                     rklx_SSE,rkly_SSE,rklz_SSE,
1265                     nx_SSE,ny_SSE,nz_SSE);
1266
1267     GMX_MM_CPROD_PS(mx_SSE,my_SSE,mz_SSE,
1268                     nx_SSE,ny_SSE,nz_SSE,
1269                     cx_SSE,cy_SSE,cz_SSE);
1270
1271     cn_SSE = gmx_mm_sqrt_ps(GMX_MM_NORM2_PS(cx_SSE,cy_SSE,cz_SSE));
1272     
1273     s_SSE = GMX_MM_IPROD_PS(mx_SSE,my_SSE,mz_SSE,nx_SSE,ny_SSE,nz_SSE);
1274
1275     phi_SSE = gmx_mm_atan2_ps(cn_SSE,s_SSE);
1276     _mm_store_ps(buf+16,phi_SSE);
1277
1278     ipr_SSE = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1279                               nx_SSE,ny_SSE,nz_SSE);
1280
1281     signs = _mm_movemask_ps(ipr_SSE);
1282     
1283     for(s=0; s<4; s++)
1284     {
1285         if (signs & (1<<s))
1286         {
1287             buf[16+s] = -buf[16+s];
1288         }
1289     }
1290
1291     iprm_SSE    = GMX_MM_NORM2_PS(mx_SSE,my_SSE,mz_SSE);
1292     iprn_SSE    = GMX_MM_NORM2_PS(nx_SSE,ny_SSE,nz_SSE);
1293
1294     /* store_rvec4 messes with the input, don't use it after this! */
1295     store_rvec4(mx_SSE,my_SSE,mz_SSE,bv[0].v,bv[1].v,bv[2].v,bv[3].v);
1296     store_rvec4(nx_SSE,ny_SSE,nz_SSE,bv[4].v,bv[5].v,bv[6].v,bv[7].v);
1297
1298     nrkj2_SSE   = GMX_MM_NORM2_PS(rkjx_SSE,rkjy_SSE,rkjz_SSE);
1299
1300     /* Avoid division by zero. When zero, the result is multiplied by 0
1301      * anyhow, so the 3 max below do not affect the final result.
1302      */
1303     nrkj2_SSE   = _mm_max_ps(nrkj2_SSE,fmin_SSE);
1304     nrkj_1_SSE  = gmx_mm_invsqrt_ps(nrkj2_SSE);
1305     nrkj_2_SSE  = _mm_mul_ps(nrkj_1_SSE,nrkj_1_SSE);
1306     nrkj_SSE    = _mm_mul_ps(nrkj2_SSE,nrkj_1_SSE);
1307
1308     iprm_SSE    = _mm_max_ps(iprm_SSE,fmin_SSE);
1309     iprn_SSE    = _mm_max_ps(iprn_SSE,fmin_SSE);
1310     nrkj_m2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprm_SSE));
1311     nrkj_n2_SSE = _mm_mul_ps(nrkj_SSE,gmx_mm_inv_ps(iprn_SSE));
1312
1313     _mm_store_ps(buf+0,nrkj_m2_SSE);
1314     _mm_store_ps(buf+4,nrkj_n2_SSE);
1315
1316     p_SSE       = GMX_MM_IPROD_PS(rijx_SSE,rijy_SSE,rijz_SSE,
1317                                   rkjx_SSE,rkjy_SSE,rkjz_SSE);
1318     p_SSE       = _mm_mul_ps(p_SSE,nrkj_2_SSE);
1319
1320     q_SSE       = GMX_MM_IPROD_PS(rklx_SSE,rkly_SSE,rklz_SSE,
1321                                   rkjx_SSE,rkjy_SSE,rkjz_SSE);
1322     q_SSE       = _mm_mul_ps(q_SSE,nrkj_2_SSE);
1323
1324     _mm_store_ps(buf+8 ,p_SSE);
1325     _mm_store_ps(buf+12,q_SSE);
1326 }
1327
1328 #endif /* SSE_PROPER_DIHEDRALS */
1329
1330
1331 void do_dih_fup(int i,int j,int k,int l,real ddphi,
1332                 rvec r_ij,rvec r_kj,rvec r_kl,
1333                 rvec m,rvec n,rvec f[],rvec fshift[],
1334                 const t_pbc *pbc,const t_graph *g,
1335                 const rvec x[],int t1,int t2,int t3)
1336 {
1337   /* 143 FLOPS */
1338   rvec f_i,f_j,f_k,f_l;
1339   rvec uvec,vvec,svec,dx_jl;
1340   real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
1341   real a,b,p,q,toler;
1342   ivec jt,dt_ij,dt_kj,dt_lj;  
1343   
1344   iprm  = iprod(m,m);           /*  5   */
1345   iprn  = iprod(n,n);           /*  5   */
1346   nrkj2 = iprod(r_kj,r_kj);     /*  5   */
1347   toler = nrkj2*GMX_REAL_EPS;
1348   if ((iprm > toler) && (iprn > toler)) {
1349     nrkj_1 = gmx_invsqrt(nrkj2);        /* 10   */
1350     nrkj_2 = nrkj_1*nrkj_1;     /*  1   */
1351     nrkj  = nrkj2*nrkj_1;       /*  1   */
1352     a     = -ddphi*nrkj/iprm;   /* 11   */
1353     svmul(a,m,f_i);             /*  3   */
1354     b     = ddphi*nrkj/iprn;    /* 11   */
1355     svmul(b,n,f_l);             /*  3   */
1356     p     = iprod(r_ij,r_kj);   /*  5   */
1357     p    *= nrkj_2;             /*  1   */
1358     q     = iprod(r_kl,r_kj);   /*  5   */
1359     q    *= nrkj_2;             /*  1   */
1360     svmul(p,f_i,uvec);          /*  3   */
1361     svmul(q,f_l,vvec);          /*  3   */
1362     rvec_sub(uvec,vvec,svec);   /*  3   */
1363     rvec_sub(f_i,svec,f_j);     /*  3   */
1364     rvec_add(f_l,svec,f_k);     /*  3   */
1365     rvec_inc(f[i],f_i);         /*  3   */
1366     rvec_dec(f[j],f_j);         /*  3   */
1367     rvec_dec(f[k],f_k);         /*  3   */
1368     rvec_inc(f[l],f_l);         /*  3   */
1369     
1370     if (g) {
1371       copy_ivec(SHIFT_IVEC(g,j),jt);
1372       ivec_sub(SHIFT_IVEC(g,i),jt,dt_ij);
1373       ivec_sub(SHIFT_IVEC(g,k),jt,dt_kj);
1374       ivec_sub(SHIFT_IVEC(g,l),jt,dt_lj);
1375       t1=IVEC2IS(dt_ij);
1376       t2=IVEC2IS(dt_kj);
1377       t3=IVEC2IS(dt_lj);
1378     } else if (pbc) {
1379       t3 = pbc_rvec_sub(pbc,x[l],x[j],dx_jl);
1380     } else {
1381       t3 = CENTRAL;
1382     }
1383     
1384     rvec_inc(fshift[t1],f_i);
1385     rvec_dec(fshift[CENTRAL],f_j);
1386     rvec_dec(fshift[t2],f_k);
1387     rvec_inc(fshift[t3],f_l);
1388   }
1389   /* 112 TOTAL  */
1390 }
1391
1392 /* As do_dih_fup above, but without shift forces */
1393 static void
1394 do_dih_fup_noshiftf(int i,int j,int k,int l,real ddphi,
1395                     rvec r_ij,rvec r_kj,rvec r_kl,
1396                     rvec m,rvec n,rvec f[])
1397 {
1398   rvec f_i,f_j,f_k,f_l;
1399   rvec uvec,vvec,svec,dx_jl;
1400   real iprm,iprn,nrkj,nrkj2,nrkj_1,nrkj_2;
1401   real a,b,p,q,toler;
1402   ivec jt,dt_ij,dt_kj,dt_lj;  
1403   
1404   iprm  = iprod(m,m);           /*  5   */
1405   iprn  = iprod(n,n);           /*  5   */
1406   nrkj2 = iprod(r_kj,r_kj);     /*  5   */
1407   toler = nrkj2*GMX_REAL_EPS;
1408   if ((iprm > toler) && (iprn > toler)) {
1409     nrkj_1 = gmx_invsqrt(nrkj2);        /* 10   */
1410     nrkj_2 = nrkj_1*nrkj_1;     /*  1   */
1411     nrkj  = nrkj2*nrkj_1;       /*  1   */
1412     a     = -ddphi*nrkj/iprm;   /* 11   */
1413     svmul(a,m,f_i);             /*  3   */
1414     b     = ddphi*nrkj/iprn;    /* 11   */
1415     svmul(b,n,f_l);             /*  3   */
1416     p     = iprod(r_ij,r_kj);   /*  5   */
1417     p    *= nrkj_2;             /*  1   */
1418     q     = iprod(r_kl,r_kj);   /*  5   */
1419     q    *= nrkj_2;             /*  1   */
1420     svmul(p,f_i,uvec);          /*  3   */
1421     svmul(q,f_l,vvec);          /*  3   */
1422     rvec_sub(uvec,vvec,svec);   /*  3   */
1423     rvec_sub(f_i,svec,f_j);     /*  3   */
1424     rvec_add(f_l,svec,f_k);     /*  3   */
1425     rvec_inc(f[i],f_i);         /*  3   */
1426     rvec_dec(f[j],f_j);         /*  3   */
1427     rvec_dec(f[k],f_k);         /*  3   */
1428     rvec_inc(f[l],f_l);         /*  3   */
1429   }
1430 }
1431
1432 /* As do_dih_fup_noshiftf above, but with pre-calculated pre-factors */
1433 static void
1434 do_dih_fup_noshiftf_precalc(int i,int j,int k,int l,real ddphi,
1435                             real nrkj_m2,real nrkj_n2,
1436                             real p,real q,
1437                             rvec m,rvec n,rvec f[])
1438 {
1439     rvec f_i,f_j,f_k,f_l;
1440     rvec uvec,vvec,svec,dx_jl;
1441     real a,b,toler;
1442     ivec jt,dt_ij,dt_kj,dt_lj;  
1443   
1444     a = -ddphi*nrkj_m2;
1445     svmul(a,m,f_i);
1446     b =  ddphi*nrkj_n2;
1447     svmul(b,n,f_l);
1448     svmul(p,f_i,uvec);
1449     svmul(q,f_l,vvec);
1450     rvec_sub(uvec,vvec,svec);
1451     rvec_sub(f_i,svec,f_j);
1452     rvec_add(f_l,svec,f_k);
1453     rvec_inc(f[i],f_i);
1454     rvec_dec(f[j],f_j);
1455     rvec_dec(f[k],f_k);
1456     rvec_inc(f[l],f_l);
1457 }
1458
1459
1460 real dopdihs(real cpA,real cpB,real phiA,real phiB,int mult,
1461              real phi,real lambda,real *V,real *F)
1462 {
1463   real v,dvdlambda,mdphi,v1,sdphi,ddphi;
1464   real L1   = 1.0 - lambda;
1465   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1466   real dph0 = (phiB - phiA)*DEG2RAD;
1467   real cp   = L1*cpA + lambda*cpB;
1468   
1469   mdphi =  mult*phi - ph0;
1470   sdphi = sin(mdphi);
1471   ddphi = -cp*mult*sdphi;
1472   v1    = 1.0 + cos(mdphi);
1473   v     = cp*v1;
1474   
1475   dvdlambda  = (cpB - cpA)*v1 + cp*dph0*sdphi;
1476   
1477   *V = v;
1478   *F = ddphi;
1479   
1480   return dvdlambda;
1481   
1482   /* That was 40 flops */
1483 }
1484
1485 static void
1486 dopdihs_noener(real cpA,real cpB,real phiA,real phiB,int mult,
1487                real phi,real lambda,real *F)
1488 {
1489   real mdphi,sdphi,ddphi;
1490   real L1   = 1.0 - lambda;
1491   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1492   real cp   = L1*cpA + lambda*cpB;
1493   
1494   mdphi = mult*phi - ph0;
1495   sdphi = sin(mdphi);
1496   ddphi = -cp*mult*sdphi;
1497   
1498   *F = ddphi;
1499   
1500   /* That was 20 flops */
1501 }
1502
1503 static void
1504 dopdihs_mdphi(real cpA,real cpB,real phiA,real phiB,int mult,
1505               real phi,real lambda,real *cp,real *mdphi)
1506 {
1507     real L1   = 1.0 - lambda;
1508     real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1509
1510     *cp    = L1*cpA + lambda*cpB;
1511
1512     *mdphi = mult*phi - ph0;
1513 }
1514
1515 static real dopdihs_min(real cpA,real cpB,real phiA,real phiB,int mult,
1516                         real phi,real lambda,real *V,real *F)
1517      /* similar to dopdihs, except for a minus sign  *
1518       * and a different treatment of mult/phi0       */
1519 {
1520   real v,dvdlambda,mdphi,v1,sdphi,ddphi;
1521   real L1   = 1.0 - lambda;
1522   real ph0  = (L1*phiA + lambda*phiB)*DEG2RAD;
1523   real dph0 = (phiB - phiA)*DEG2RAD;
1524   real cp   = L1*cpA + lambda*cpB;
1525   
1526   mdphi = mult*(phi-ph0);
1527   sdphi = sin(mdphi);
1528   ddphi = cp*mult*sdphi;
1529   v1    = 1.0-cos(mdphi);
1530   v     = cp*v1;
1531   
1532   dvdlambda  = (cpB-cpA)*v1 + cp*dph0*sdphi;
1533   
1534   *V = v;
1535   *F = ddphi;
1536   
1537   return dvdlambda;
1538   
1539   /* That was 40 flops */
1540 }
1541
1542 real pdihs(int nbonds,
1543            const t_iatom forceatoms[],const t_iparams forceparams[],
1544            const rvec x[],rvec f[],rvec fshift[],
1545            const t_pbc *pbc,const t_graph *g,
1546            real lambda,real *dvdlambda,
1547            const t_mdatoms *md,t_fcdata *fcd,
1548            int *global_atom_index)
1549 {
1550   int  i,type,ai,aj,ak,al;
1551   int  t1,t2,t3;
1552   rvec r_ij,r_kj,r_kl,m,n;
1553   real phi,sign,ddphi,vpd,vtot;
1554
1555   vtot = 0.0;
1556
1557   for(i=0; (i<nbonds); ) {
1558     type = forceatoms[i++];
1559     ai   = forceatoms[i++];
1560     aj   = forceatoms[i++];
1561     ak   = forceatoms[i++];
1562     al   = forceatoms[i++];
1563     
1564     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1565                   &sign,&t1,&t2,&t3);                   /*  84          */
1566     *dvdlambda += dopdihs(forceparams[type].pdihs.cpA,
1567                           forceparams[type].pdihs.cpB,
1568                           forceparams[type].pdihs.phiA,
1569                           forceparams[type].pdihs.phiB,
1570                           forceparams[type].pdihs.mult,
1571                           phi,lambda,&vpd,&ddphi);
1572
1573     vtot += vpd;
1574     do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
1575                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
1576
1577 #ifdef DEBUG
1578     fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
1579             ai,aj,ak,al,phi);
1580 #endif
1581   } /* 223 TOTAL        */
1582
1583   return vtot;
1584 }
1585
1586 void make_dp_periodic(real *dp)  /* 1 flop? */
1587 {
1588     /* dp cannot be outside (-pi,pi) */
1589     if (*dp >= M_PI)
1590     {
1591         *dp -= 2*M_PI;
1592     }
1593     else if (*dp < -M_PI)
1594     {
1595         *dp += 2*M_PI;
1596     }
1597     return;
1598 }
1599
1600 /* As pdihs above, but without calculating energies and shift forces */
1601 static void
1602 pdihs_noener(int nbonds,
1603              const t_iatom forceatoms[],const t_iparams forceparams[],
1604              const rvec x[],rvec f[],
1605              const t_pbc *pbc,const t_graph *g,
1606              real lambda,
1607              const t_mdatoms *md,t_fcdata *fcd,
1608              int *global_atom_index)
1609 {
1610     int  i,type,ai,aj,ak,al;
1611     int  t1,t2,t3;
1612     rvec r_ij,r_kj,r_kl,m,n;
1613     real phi,sign,ddphi_tot,ddphi;
1614
1615     for(i=0; (i<nbonds); )
1616     {
1617         ai   = forceatoms[i+1];
1618         aj   = forceatoms[i+2];
1619         ak   = forceatoms[i+3];
1620         al   = forceatoms[i+4];
1621
1622         phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1623                         &sign,&t1,&t2,&t3);
1624
1625         ddphi_tot = 0;
1626
1627         /* Loop over dihedrals working on the same atoms,
1628          * so we avoid recalculating angles and force distributions.
1629          */
1630         do
1631         {
1632             type = forceatoms[i];
1633             dopdihs_noener(forceparams[type].pdihs.cpA,
1634                            forceparams[type].pdihs.cpB,
1635                            forceparams[type].pdihs.phiA,
1636                            forceparams[type].pdihs.phiB,
1637                            forceparams[type].pdihs.mult,
1638                            phi,lambda,&ddphi);
1639             ddphi_tot += ddphi;
1640
1641             i += 5;
1642         }
1643         while(i < nbonds &&
1644               forceatoms[i+1] == ai &&
1645               forceatoms[i+2] == aj &&
1646               forceatoms[i+3] == ak &&
1647               forceatoms[i+4] == al);
1648
1649         do_dih_fup_noshiftf(ai,aj,ak,al,ddphi_tot,r_ij,r_kj,r_kl,m,n,f);
1650     }
1651 }
1652
1653
1654 #ifdef SSE_PROPER_DIHEDRALS
1655
1656 /* As pdihs_noner above, but using SSE to calculate 4 dihedrals at once */
1657 static void
1658 pdihs_noener_sse(int nbonds,
1659                  const t_iatom forceatoms[],const t_iparams forceparams[],
1660                  const rvec x[],rvec f[],
1661                  const t_pbc *pbc,const t_graph *g,
1662                  real lambda,
1663                  const t_mdatoms *md,t_fcdata *fcd,
1664                  int *global_atom_index)
1665 {
1666     int  i,i4,s;
1667     int  type,ai[4],aj[4],ak[4],al[4];
1668     int  t1[4],t2[4],t3[4];
1669     int  mult[4];
1670     real cp[4],mdphi[4];
1671     real ddphi;
1672     rvec_sse_t rs_array[13],*rs;
1673     real buf_array[24],*buf;
1674     __m128 mdphi_SSE,sin_SSE,cos_SSE;
1675
1676     /* Ensure 16-byte alignment */
1677     rs  = (rvec_sse_t *)(((size_t)(rs_array +1)) & (~((size_t)15)));
1678     buf =      (float *)(((size_t)(buf_array+3)) & (~((size_t)15)));
1679
1680     for(i=0; (i<nbonds); i+=20)
1681     {
1682         /* Collect atoms quadruplets for 4 dihedrals */
1683         i4 = i;
1684         for(s=0; s<4; s++)
1685         {
1686             ai[s] = forceatoms[i4+1];
1687             aj[s] = forceatoms[i4+2];
1688             ak[s] = forceatoms[i4+3];
1689             al[s] = forceatoms[i4+4];
1690             /* At the end fill the arrays with identical entries */
1691             if (i4 + 5 < nbonds)
1692             {
1693                 i4 += 5;
1694             }
1695         }
1696
1697         /* Caclulate 4 dihedral angles at once */
1698         dih_angle_sse(x,ai,aj,ak,al,pbc,t1,t2,t3,rs,buf);
1699
1700         i4 = i;
1701         for(s=0; s<4; s++)
1702         {
1703             if (i4 < nbonds)
1704             {
1705                 /* Calculate the coefficient and angle deviation */
1706                 type = forceatoms[i4];
1707                 dopdihs_mdphi(forceparams[type].pdihs.cpA,
1708                               forceparams[type].pdihs.cpB,
1709                               forceparams[type].pdihs.phiA,
1710                               forceparams[type].pdihs.phiB,
1711                               forceparams[type].pdihs.mult,
1712                               buf[16+s],lambda,&cp[s],&buf[16+s]);
1713                 mult[s] = forceparams[type].pdihs.mult;
1714             }
1715             else
1716             {
1717                 buf[16+s] = 0;
1718             }
1719             i4 += 5;
1720         }
1721
1722         /* Calculate 4 sines at once */
1723         mdphi_SSE = _mm_load_ps(buf+16);
1724         gmx_mm_sincos_ps(mdphi_SSE,&sin_SSE,&cos_SSE);
1725         _mm_store_ps(buf+16,sin_SSE);
1726
1727         i4 = i;
1728         s = 0;
1729         do
1730         {
1731             ddphi = -cp[s]*mult[s]*buf[16+s];
1732
1733             do_dih_fup_noshiftf_precalc(ai[s],aj[s],ak[s],al[s],ddphi,
1734                                         buf[ 0+s],buf[ 4+s],
1735                                         buf[ 8+s],buf[12+s],
1736                                         rs[0+s].v,rs[4+s].v,
1737                                         f);
1738             s++;
1739             i4 += 5;
1740         }
1741         while (s < 4 && i4 < nbonds);
1742     }
1743 }
1744
1745 #endif /* SSE_PROPER_DIHEDRALS */
1746
1747
1748 real idihs(int nbonds,
1749            const t_iatom forceatoms[],const t_iparams forceparams[],
1750            const rvec x[],rvec f[],rvec fshift[],
1751            const t_pbc *pbc,const t_graph *g,
1752            real lambda,real *dvdlambda,
1753            const t_mdatoms *md,t_fcdata *fcd,
1754            int *global_atom_index)
1755 {
1756   int  i,type,ai,aj,ak,al;
1757   int  t1,t2,t3;
1758   real phi,phi0,dphi0,ddphi,sign,vtot;
1759   rvec r_ij,r_kj,r_kl,m,n;
1760   real L1,kk,dp,dp2,kA,kB,pA,pB,dvdl_term;
1761
1762   L1 = 1.0-lambda;
1763   dvdl_term = 0;
1764   vtot = 0.0;
1765   for(i=0; (i<nbonds); ) {
1766     type = forceatoms[i++];
1767     ai   = forceatoms[i++];
1768     aj   = forceatoms[i++];
1769     ak   = forceatoms[i++];
1770     al   = forceatoms[i++];
1771     
1772     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
1773                   &sign,&t1,&t2,&t3);                   /*  84          */
1774     
1775     /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
1776      * force changes if we just apply a normal harmonic.
1777      * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
1778      * This means we will never have the periodicity problem, unless
1779      * the dihedral is Pi away from phiO, which is very unlikely due to
1780      * the potential.
1781      */
1782     kA = forceparams[type].harmonic.krA;
1783     kB = forceparams[type].harmonic.krB;
1784     pA = forceparams[type].harmonic.rA;
1785     pB = forceparams[type].harmonic.rB;
1786
1787     kk    = L1*kA + lambda*kB;
1788     phi0  = (L1*pA + lambda*pB)*DEG2RAD;
1789     dphi0 = (pB - pA)*DEG2RAD;
1790
1791     dp = phi-phi0;  
1792
1793     make_dp_periodic(&dp);
1794     
1795     dp2 = dp*dp;
1796
1797     vtot += 0.5*kk*dp2;
1798     ddphi = -kk*dp;
1799     
1800     dvdl_term += 0.5*(kB - kA)*dp2 - kk*dphi0*dp;
1801
1802     do_dih_fup(ai,aj,ak,al,(real)(-ddphi),r_ij,r_kj,r_kl,m,n,
1803                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112          */
1804     /* 218 TOTAL        */
1805 #ifdef DEBUG
1806     if (debug)
1807       fprintf(debug,"idih: (%d,%d,%d,%d) phi=%g\n",
1808               ai,aj,ak,al,phi);
1809 #endif
1810   }
1811   
1812   *dvdlambda += dvdl_term;
1813   return vtot;
1814 }
1815
1816
1817 real posres(int nbonds,
1818             const t_iatom forceatoms[],const t_iparams forceparams[],
1819             const rvec x[],rvec f[],rvec vir_diag,
1820             t_pbc *pbc,
1821             real lambda,real *dvdlambda,
1822             int refcoord_scaling,int ePBC,rvec comA,rvec comB)
1823 {
1824     int  i,ai,m,d,type,ki,npbcdim=0;
1825     const t_iparams *pr;
1826     real L1;
1827     real vtot,kk,fm;
1828     real posA,posB,ref=0;
1829     rvec comA_sc,comB_sc,rdist,dpdl,pos,dx;
1830     gmx_bool bForceValid = TRUE;
1831
1832     if ((f==NULL) || (vir_diag==NULL)) {  /* should both be null together! */
1833         bForceValid = FALSE;
1834     }
1835
1836     npbcdim = ePBC2npbcdim(ePBC);
1837
1838     if (refcoord_scaling == erscCOM)
1839     {
1840         clear_rvec(comA_sc);
1841         clear_rvec(comB_sc);
1842         for(m=0; m<npbcdim; m++)
1843         {
1844             for(d=m; d<npbcdim; d++)
1845             {
1846                 comA_sc[m] += comA[d]*pbc->box[d][m];
1847                 comB_sc[m] += comB[d]*pbc->box[d][m];
1848             }
1849         }
1850     }
1851
1852     L1 = 1.0 - lambda;
1853
1854     vtot = 0.0;
1855     for(i=0; (i<nbonds); )
1856     {
1857         type = forceatoms[i++];
1858         ai   = forceatoms[i++];
1859         pr   = &forceparams[type];
1860         
1861         for(m=0; m<DIM; m++)
1862         {
1863             posA = forceparams[type].posres.pos0A[m];
1864             posB = forceparams[type].posres.pos0B[m];
1865             if (m < npbcdim)
1866             {
1867                 switch (refcoord_scaling)
1868                 {
1869                 case erscNO:
1870                     ref      = 0;
1871                     rdist[m] = L1*posA + lambda*posB;
1872                     dpdl[m]  = posB - posA;
1873                     break;
1874                 case erscALL:
1875                     /* Box relative coordinates are stored for dimensions with pbc */
1876                     posA *= pbc->box[m][m];
1877                     posB *= pbc->box[m][m];
1878                     for(d=m+1; d<npbcdim; d++)
1879                     {
1880                         posA += forceparams[type].posres.pos0A[d]*pbc->box[d][m];
1881                         posB += forceparams[type].posres.pos0B[d]*pbc->box[d][m];
1882                     }
1883                     ref      = L1*posA + lambda*posB;
1884                     rdist[m] = 0;
1885                     dpdl[m]  = posB - posA;
1886                     break;
1887                 case erscCOM:
1888                     ref      = L1*comA_sc[m] + lambda*comB_sc[m];
1889                     rdist[m] = L1*posA       + lambda*posB;
1890                     dpdl[m]  = comB_sc[m] - comA_sc[m] + posB - posA;
1891                     break;
1892                 default:
1893                     gmx_fatal(FARGS, "No such scaling method implemented");
1894                 }
1895             }
1896             else
1897             {
1898                 ref      = L1*posA + lambda*posB;
1899                 rdist[m] = 0;
1900                 dpdl[m]  = posB - posA;
1901             }
1902
1903             /* We do pbc_dx with ref+rdist,
1904              * since with only ref we can be up to half a box vector wrong.
1905              */
1906             pos[m] = ref + rdist[m];
1907         }
1908
1909         if (pbc)
1910         {
1911             pbc_dx(pbc,x[ai],pos,dx);
1912         }
1913         else
1914         {
1915             rvec_sub(x[ai],pos,dx);
1916         }
1917
1918         for (m=0; (m<DIM); m++)
1919         {
1920             kk          = L1*pr->posres.fcA[m] + lambda*pr->posres.fcB[m];
1921             fm          = -kk*dx[m];
1922             vtot       += 0.5*kk*dx[m]*dx[m];
1923             *dvdlambda +=
1924                 0.5*(pr->posres.fcB[m] - pr->posres.fcA[m])*dx[m]*dx[m]
1925                 -fm*dpdl[m];
1926
1927             /* Here we correct for the pbc_dx which included rdist */
1928             if (bForceValid) {
1929                 f[ai][m]   += fm;
1930                 vir_diag[m] -= 0.5*(dx[m] + rdist[m])*fm;
1931             }
1932         }
1933     }
1934
1935     return vtot;
1936 }
1937
1938 static real low_angres(int nbonds,
1939                        const t_iatom forceatoms[],const t_iparams forceparams[],
1940                        const rvec x[],rvec f[],rvec fshift[],
1941                        const t_pbc *pbc,const t_graph *g,
1942                        real lambda,real *dvdlambda,
1943                        gmx_bool bZAxis)
1944 {
1945   int  i,m,type,ai,aj,ak,al;
1946   int  t1,t2;
1947   real phi,cos_phi,cos_phi2,vid,vtot,dVdphi;
1948   rvec r_ij,r_kl,f_i,f_k={0,0,0};
1949   real st,sth,nrij2,nrkl2,c,cij,ckl;
1950
1951   ivec dt;  
1952   t2 = 0; /* avoid warning with gcc-3.3. It is never used uninitialized */
1953
1954   vtot = 0.0;
1955   ak=al=0; /* to avoid warnings */
1956   for(i=0; i<nbonds; ) {
1957     type = forceatoms[i++];
1958     ai   = forceatoms[i++];
1959     aj   = forceatoms[i++];
1960     t1   = pbc_rvec_sub(pbc,x[aj],x[ai],r_ij);                  /*  3           */
1961     if (!bZAxis) {      
1962       ak   = forceatoms[i++];
1963       al   = forceatoms[i++];
1964       t2   = pbc_rvec_sub(pbc,x[al],x[ak],r_kl);           /*  3                */
1965     } else {
1966       r_kl[XX] = 0;
1967       r_kl[YY] = 0;
1968       r_kl[ZZ] = 1;
1969     }
1970
1971     cos_phi = cos_angle(r_ij,r_kl);             /* 25           */
1972     phi     = acos(cos_phi);                    /* 10           */
1973
1974     *dvdlambda += dopdihs_min(forceparams[type].pdihs.cpA,
1975                               forceparams[type].pdihs.cpB,
1976                               forceparams[type].pdihs.phiA,
1977                               forceparams[type].pdihs.phiB,
1978                               forceparams[type].pdihs.mult,
1979                               phi,lambda,&vid,&dVdphi); /*  40  */
1980     
1981     vtot += vid;
1982
1983     cos_phi2 = sqr(cos_phi);                    /*   1          */
1984     if (cos_phi2 < 1) {
1985       st  = -dVdphi*gmx_invsqrt(1 - cos_phi2);      /*  12              */
1986       sth = st*cos_phi;                         /*   1          */
1987       nrij2 = iprod(r_ij,r_ij);                 /*   5          */
1988       nrkl2 = iprod(r_kl,r_kl);                 /*   5          */
1989       
1990       c   = st*gmx_invsqrt(nrij2*nrkl2);                /*  11          */ 
1991       cij = sth/nrij2;                          /*  10          */
1992       ckl = sth/nrkl2;                          /*  10          */
1993       
1994       for (m=0; m<DIM; m++) {                   /*  18+18       */
1995         f_i[m] = (c*r_kl[m]-cij*r_ij[m]);
1996         f[ai][m] += f_i[m];
1997         f[aj][m] -= f_i[m];
1998         if (!bZAxis) {
1999           f_k[m] = (c*r_ij[m]-ckl*r_kl[m]);
2000           f[ak][m] += f_k[m];
2001           f[al][m] -= f_k[m];
2002         }
2003       }
2004       
2005       if (g) {
2006         ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
2007         t1=IVEC2IS(dt);
2008       }
2009       rvec_inc(fshift[t1],f_i);
2010       rvec_dec(fshift[CENTRAL],f_i);
2011       if (!bZAxis) {
2012         if (g) {
2013           ivec_sub(SHIFT_IVEC(g,ak),SHIFT_IVEC(g,al),dt);
2014           t2=IVEC2IS(dt);
2015         }
2016         rvec_inc(fshift[t2],f_k);
2017         rvec_dec(fshift[CENTRAL],f_k);
2018       }
2019     }
2020   }
2021
2022   return vtot;  /*  184 / 157 (bZAxis)  total  */
2023 }
2024
2025 real angres(int nbonds,
2026             const t_iatom forceatoms[],const t_iparams forceparams[],
2027             const rvec x[],rvec f[],rvec fshift[],
2028             const t_pbc *pbc,const t_graph *g,
2029             real lambda,real *dvdlambda,
2030             const t_mdatoms *md,t_fcdata *fcd,
2031             int *global_atom_index)
2032 {
2033   return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
2034                     lambda,dvdlambda,FALSE);
2035 }
2036
2037 real angresz(int nbonds,
2038              const t_iatom forceatoms[],const t_iparams forceparams[],
2039              const rvec x[],rvec f[],rvec fshift[],
2040              const t_pbc *pbc,const t_graph *g,
2041              real lambda,real *dvdlambda,
2042              const t_mdatoms *md,t_fcdata *fcd,
2043              int *global_atom_index)
2044 {
2045   return low_angres(nbonds,forceatoms,forceparams,x,f,fshift,pbc,g,
2046                     lambda,dvdlambda,TRUE);
2047 }
2048
2049 real dihres(int nbonds,
2050             const t_iatom forceatoms[],const t_iparams forceparams[],
2051             const rvec x[],rvec f[],rvec fshift[],
2052             const t_pbc *pbc,const t_graph *g,
2053             real lambda,real *dvdlambda,
2054             const t_mdatoms *md,t_fcdata *fcd,
2055             int *global_atom_index)
2056 {
2057     real vtot = 0;
2058     int  ai,aj,ak,al,i,k,type,t1,t2,t3;
2059     real phi0A,phi0B,dphiA,dphiB,kfacA,kfacB,phi0,dphi,kfac;
2060     real phi,ddphi,ddp,ddp2,dp,sign,d2r,fc,L1;
2061     rvec r_ij,r_kj,r_kl,m,n;
2062
2063     L1 = 1.0-lambda;
2064
2065     d2r = DEG2RAD;
2066     k   = 0;
2067
2068     for (i=0; (i<nbonds); )
2069     {
2070         type = forceatoms[i++];
2071         ai   = forceatoms[i++];
2072         aj   = forceatoms[i++];
2073         ak   = forceatoms[i++];
2074         al   = forceatoms[i++];
2075
2076         phi0A  = forceparams[type].dihres.phiA*d2r;
2077         dphiA  = forceparams[type].dihres.dphiA*d2r;
2078         kfacA  = forceparams[type].dihres.kfacA;
2079
2080         phi0B  = forceparams[type].dihres.phiB*d2r;
2081         dphiB  = forceparams[type].dihres.dphiB*d2r;
2082         kfacB  = forceparams[type].dihres.kfacB;
2083
2084         phi0  = L1*phi0A + lambda*phi0B;
2085         dphi  = L1*dphiA + lambda*dphiB;
2086         kfac = L1*kfacA + lambda*kfacB;
2087
2088         phi = dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
2089                         &sign,&t1,&t2,&t3);
2090         /* 84 flops */
2091
2092         if (debug)
2093         {
2094             fprintf(debug,"dihres[%d]: %d %d %d %d : phi=%f, dphi=%f, kfac=%f\n",
2095                     k++,ai,aj,ak,al,phi0,dphi,kfac);
2096         }
2097         /* phi can jump if phi0 is close to Pi/-Pi, which will cause huge
2098          * force changes if we just apply a normal harmonic.
2099          * Instead, we first calculate phi-phi0 and take it modulo (-Pi,Pi).
2100          * This means we will never have the periodicity problem, unless
2101          * the dihedral is Pi away from phiO, which is very unlikely due to
2102          * the potential.
2103          */
2104         dp = phi-phi0;
2105         make_dp_periodic(&dp);
2106
2107         if (dp > dphi)
2108         {
2109             ddp = dp-dphi;
2110         }
2111         else if (dp < -dphi)
2112         {
2113             ddp = dp+dphi;
2114         }
2115         else
2116         {
2117             ddp = 0;
2118         }
2119
2120         if (ddp != 0.0)
2121         {
2122             ddp2 = ddp*ddp;
2123             vtot += 0.5*kfac*ddp2;
2124             ddphi = kfac*ddp;
2125
2126             *dvdlambda += 0.5*(kfacB - kfacA)*ddp2;
2127             /* lambda dependence from changing restraint distances */
2128             if (ddp > 0)
2129             {
2130                 *dvdlambda -= kfac*ddp*((dphiB - dphiA)+(phi0B - phi0A));
2131             }
2132             else if (ddp < 0)
2133             {
2134                 *dvdlambda += kfac*ddp*((dphiB - dphiA)-(phi0B - phi0A));
2135             }
2136             do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
2137                        f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
2138         }
2139     }
2140     return vtot;
2141 }
2142
2143
2144 real unimplemented(int nbonds,
2145                    const t_iatom forceatoms[],const t_iparams forceparams[],
2146                    const rvec x[],rvec f[],rvec fshift[],
2147                    const t_pbc *pbc,const t_graph *g,
2148                    real lambda,real *dvdlambda,
2149                    const t_mdatoms *md,t_fcdata *fcd,
2150                    int *global_atom_index)
2151 {
2152   gmx_impl("*** you are using a not implemented function");
2153
2154   return 0.0; /* To make the compiler happy */
2155 }
2156
2157 real rbdihs(int nbonds,
2158             const t_iatom forceatoms[],const t_iparams forceparams[],
2159             const rvec x[],rvec f[],rvec fshift[],
2160             const t_pbc *pbc,const t_graph *g,
2161             real lambda,real *dvdlambda,
2162             const t_mdatoms *md,t_fcdata *fcd,
2163             int *global_atom_index)
2164 {
2165   const real c0=0.0,c1=1.0,c2=2.0,c3=3.0,c4=4.0,c5=5.0;
2166   int  type,ai,aj,ak,al,i,j;
2167   int  t1,t2,t3;
2168   rvec r_ij,r_kj,r_kl,m,n;
2169   real parmA[NR_RBDIHS];
2170   real parmB[NR_RBDIHS];
2171   real parm[NR_RBDIHS];
2172   real cos_phi,phi,rbp,rbpBA;
2173   real v,sign,ddphi,sin_phi;
2174   real cosfac,vtot;
2175   real L1   = 1.0-lambda;
2176   real dvdl_term=0;
2177
2178   vtot = 0.0;
2179   for(i=0; (i<nbonds); ) {
2180     type = forceatoms[i++];
2181     ai   = forceatoms[i++];
2182     aj   = forceatoms[i++];
2183     ak   = forceatoms[i++];
2184     al   = forceatoms[i++];
2185
2186       phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
2187                     &sign,&t1,&t2,&t3);                 /*  84          */
2188
2189     /* Change to polymer convention */
2190     if (phi < c0)
2191       phi += M_PI;
2192     else
2193       phi -= M_PI;                      /*   1          */
2194       
2195     cos_phi = cos(phi);         
2196     /* Beware of accuracy loss, cannot use 1-sqrt(cos^2) ! */
2197     sin_phi = sin(phi);
2198
2199     for(j=0; (j<NR_RBDIHS); j++) {
2200       parmA[j] = forceparams[type].rbdihs.rbcA[j];
2201       parmB[j] = forceparams[type].rbdihs.rbcB[j];
2202       parm[j]  = L1*parmA[j]+lambda*parmB[j];
2203     }
2204     /* Calculate cosine powers */
2205     /* Calculate the energy */
2206     /* Calculate the derivative */
2207
2208     v       = parm[0];
2209     dvdl_term   += (parmB[0]-parmA[0]);
2210     ddphi   = c0;
2211     cosfac  = c1;
2212     
2213     rbp     = parm[1];
2214     rbpBA   = parmB[1]-parmA[1];
2215     ddphi  += rbp*cosfac;
2216     cosfac *= cos_phi;
2217     v      += cosfac*rbp;
2218     dvdl_term   += cosfac*rbpBA;
2219     rbp     = parm[2];
2220     rbpBA   = parmB[2]-parmA[2];    
2221     ddphi  += c2*rbp*cosfac;
2222     cosfac *= cos_phi;
2223     v      += cosfac*rbp;
2224     dvdl_term   += cosfac*rbpBA;
2225     rbp     = parm[3];
2226     rbpBA   = parmB[3]-parmA[3];
2227     ddphi  += c3*rbp*cosfac;
2228     cosfac *= cos_phi;
2229     v      += cosfac*rbp;
2230     dvdl_term   += cosfac*rbpBA;
2231     rbp     = parm[4];
2232     rbpBA   = parmB[4]-parmA[4];
2233     ddphi  += c4*rbp*cosfac;
2234     cosfac *= cos_phi;
2235     v      += cosfac*rbp;
2236     dvdl_term   += cosfac*rbpBA;
2237     rbp     = parm[5];
2238     rbpBA   = parmB[5]-parmA[5];
2239     ddphi  += c5*rbp*cosfac;
2240     cosfac *= cos_phi;
2241     v      += cosfac*rbp;
2242     dvdl_term   += cosfac*rbpBA;
2243    
2244     ddphi = -ddphi*sin_phi;                             /*  11          */
2245     
2246     do_dih_fup(ai,aj,ak,al,ddphi,r_ij,r_kj,r_kl,m,n,
2247                f,fshift,pbc,g,x,t1,t2,t3);              /* 112          */
2248     vtot += v;
2249   }  
2250   *dvdlambda += dvdl_term;
2251
2252   return vtot;
2253 }
2254
2255 int cmap_setup_grid_index(int ip, int grid_spacing, int *ipm1, int *ipp1, int *ipp2)
2256 {
2257         int im1, ip1, ip2;
2258         
2259         if(ip<0)
2260         {
2261                 ip = ip + grid_spacing - 1;
2262         }
2263         else if(ip > grid_spacing)
2264         {
2265                 ip = ip - grid_spacing - 1;
2266         }
2267         
2268         im1 = ip - 1;
2269         ip1 = ip + 1;
2270         ip2 = ip + 2;
2271         
2272         if(ip == 0)
2273         {
2274                 im1 = grid_spacing - 1;
2275         }
2276         else if(ip == grid_spacing-2)
2277         {
2278                 ip2 = 0;
2279         }
2280         else if(ip == grid_spacing-1)
2281         {
2282                 ip1 = 0;
2283                 ip2 = 1;
2284         }
2285         
2286         *ipm1 = im1;
2287         *ipp1 = ip1;
2288         *ipp2 = ip2;
2289         
2290         return ip;
2291         
2292 }
2293
2294 real cmap_dihs(int nbonds,
2295                            const t_iatom forceatoms[],const t_iparams forceparams[],
2296                const gmx_cmap_t *cmap_grid,
2297                            const rvec x[],rvec f[],rvec fshift[],
2298                            const t_pbc *pbc,const t_graph *g,
2299                            real lambda,real *dvdlambda,
2300                            const t_mdatoms *md,t_fcdata *fcd,
2301                            int *global_atom_index)
2302 {
2303         int i,j,k,n,idx;
2304         int ai,aj,ak,al,am;
2305         int a1i,a1j,a1k,a1l,a2i,a2j,a2k,a2l;
2306         int type,cmapA;
2307         int t11,t21,t31,t12,t22,t32;
2308         int iphi1,ip1m1,ip1p1,ip1p2;
2309         int iphi2,ip2m1,ip2p1,ip2p2;
2310         int l1,l2,l3,l4;
2311         int pos1,pos2,pos3,pos4,tmp;
2312         
2313         real ty[4],ty1[4],ty2[4],ty12[4],tc[16],tx[16];
2314         real phi1,psi1,cos_phi1,sin_phi1,sign1,xphi1;
2315         real phi2,psi2,cos_phi2,sin_phi2,sign2,xphi2;
2316         real dx,xx,tt,tu,e,df1,df2,ddf1,ddf2,ddf12,vtot;
2317         real ra21,rb21,rg21,rg1,rgr1,ra2r1,rb2r1,rabr1;
2318         real ra22,rb22,rg22,rg2,rgr2,ra2r2,rb2r2,rabr2;
2319         real fg1,hg1,fga1,hgb1,gaa1,gbb1;
2320         real fg2,hg2,fga2,hgb2,gaa2,gbb2;
2321         real fac;
2322         
2323         rvec r1_ij, r1_kj, r1_kl,m1,n1;
2324         rvec r2_ij, r2_kj, r2_kl,m2,n2;
2325         rvec f1_i,f1_j,f1_k,f1_l;
2326         rvec f2_i,f2_j,f2_k,f2_l;
2327         rvec a1,b1,a2,b2;
2328         rvec f1,g1,h1,f2,g2,h2;
2329         rvec dtf1,dtg1,dth1,dtf2,dtg2,dth2;
2330         ivec jt1,dt1_ij,dt1_kj,dt1_lj;
2331         ivec jt2,dt2_ij,dt2_kj,dt2_lj;
2332
2333     const real *cmapd;
2334
2335         int loop_index[4][4] = {
2336                 {0,4,8,12},
2337                 {1,5,9,13},
2338                 {2,6,10,14},
2339                 {3,7,11,15}
2340         };
2341         
2342         /* Total CMAP energy */
2343         vtot = 0;
2344         
2345         for(n=0;n<nbonds; )
2346         {
2347                 /* Five atoms are involved in the two torsions */
2348                 type   = forceatoms[n++];
2349                 ai     = forceatoms[n++];
2350                 aj     = forceatoms[n++];
2351                 ak     = forceatoms[n++];
2352                 al     = forceatoms[n++];
2353                 am     = forceatoms[n++];
2354                 
2355                 /* Which CMAP type is this */
2356                 cmapA = forceparams[type].cmap.cmapA;
2357         cmapd = cmap_grid->cmapdata[cmapA].cmap;
2358
2359                 /* First torsion */
2360                 a1i   = ai;
2361                 a1j   = aj;
2362                 a1k   = ak;
2363                 a1l   = al;
2364                 
2365                 phi1  = dih_angle(x[a1i], x[a1j], x[a1k], x[a1l], pbc, r1_ij, r1_kj, r1_kl, m1, n1,
2366                                                    &sign1, &t11, &t21, &t31); /* 84 */
2367                 
2368         cos_phi1 = cos(phi1);
2369         
2370                 a1[0] = r1_ij[1]*r1_kj[2]-r1_ij[2]*r1_kj[1];
2371                 a1[1] = r1_ij[2]*r1_kj[0]-r1_ij[0]*r1_kj[2];
2372                 a1[2] = r1_ij[0]*r1_kj[1]-r1_ij[1]*r1_kj[0]; /* 9 */
2373                 
2374                 b1[0] = r1_kl[1]*r1_kj[2]-r1_kl[2]*r1_kj[1];
2375                 b1[1] = r1_kl[2]*r1_kj[0]-r1_kl[0]*r1_kj[2];
2376                 b1[2] = r1_kl[0]*r1_kj[1]-r1_kl[1]*r1_kj[0]; /* 9 */
2377                 
2378                 tmp = pbc_rvec_sub(pbc,x[a1l],x[a1k],h1);
2379                 
2380                 ra21  = iprod(a1,a1);       /* 5 */
2381                 rb21  = iprod(b1,b1);       /* 5 */
2382                 rg21  = iprod(r1_kj,r1_kj); /* 5 */
2383                 rg1   = sqrt(rg21);
2384                 
2385                 rgr1  = 1.0/rg1;
2386                 ra2r1 = 1.0/ra21;
2387                 rb2r1 = 1.0/rb21;
2388                 rabr1 = sqrt(ra2r1*rb2r1);
2389                 
2390                 sin_phi1 = rg1 * rabr1 * iprod(a1,h1) * (-1);
2391                 
2392                 if(cos_phi1 < -0.5 || cos_phi1 > 0.5)
2393                 {
2394                         phi1 = asin(sin_phi1);
2395                         
2396                         if(cos_phi1 < 0)
2397                         {
2398                                 if(phi1 > 0)
2399                                 {
2400                                         phi1 = M_PI - phi1;
2401                                 }
2402                                 else
2403                                 {
2404                                         phi1 = -M_PI - phi1;
2405                                 }
2406                         }
2407                 }
2408                 else
2409                 {
2410                         phi1 = acos(cos_phi1);
2411                         
2412                         if(sin_phi1 < 0)
2413                         {
2414                                 phi1 = -phi1;
2415                         }
2416                 }
2417                 
2418                 xphi1 = phi1 + M_PI; /* 1 */
2419                 
2420                 /* Second torsion */
2421                 a2i   = aj;
2422                 a2j   = ak;
2423                 a2k   = al;
2424                 a2l   = am;
2425                 
2426                 phi2  = dih_angle(x[a2i], x[a2j], x[a2k], x[a2l], pbc, r2_ij, r2_kj, r2_kl, m2, n2,
2427                                                   &sign2, &t12, &t22, &t32); /* 84 */
2428                 
2429         cos_phi2 = cos(phi2);
2430
2431                 a2[0] = r2_ij[1]*r2_kj[2]-r2_ij[2]*r2_kj[1];
2432                 a2[1] = r2_ij[2]*r2_kj[0]-r2_ij[0]*r2_kj[2];
2433                 a2[2] = r2_ij[0]*r2_kj[1]-r2_ij[1]*r2_kj[0]; /* 9 */
2434                 
2435                 b2[0] = r2_kl[1]*r2_kj[2]-r2_kl[2]*r2_kj[1];
2436                 b2[1] = r2_kl[2]*r2_kj[0]-r2_kl[0]*r2_kj[2];
2437                 b2[2] = r2_kl[0]*r2_kj[1]-r2_kl[1]*r2_kj[0]; /* 9 */
2438                 
2439                 tmp = pbc_rvec_sub(pbc,x[a2l],x[a2k],h2);
2440                 
2441                 ra22  = iprod(a2,a2);         /* 5 */
2442                 rb22  = iprod(b2,b2);         /* 5 */
2443                 rg22  = iprod(r2_kj,r2_kj);   /* 5 */
2444                 rg2   = sqrt(rg22);
2445                 
2446                 rgr2  = 1.0/rg2;
2447                 ra2r2 = 1.0/ra22;
2448                 rb2r2 = 1.0/rb22;
2449                 rabr2 = sqrt(ra2r2*rb2r2);
2450                 
2451                 sin_phi2 = rg2 * rabr2 * iprod(a2,h2) * (-1);
2452                 
2453                 if(cos_phi2 < -0.5 || cos_phi2 > 0.5)
2454                 {
2455                         phi2 = asin(sin_phi2);
2456                         
2457                         if(cos_phi2 < 0)
2458                         {
2459                                 if(phi2 > 0)
2460                                 {
2461                                         phi2 = M_PI - phi2;
2462                                 }
2463                                 else
2464                                 {
2465                                         phi2 = -M_PI - phi2;
2466                                 }
2467                         }
2468                 }
2469                 else
2470                 {
2471                         phi2 = acos(cos_phi2);
2472                         
2473                         if(sin_phi2 < 0)
2474                         {
2475                                 phi2 = -phi2;
2476                         }
2477                 }
2478                 
2479                 xphi2 = phi2 + M_PI; /* 1 */
2480                 
2481                 /* Range mangling */
2482                 if(xphi1<0)
2483                 {
2484                         xphi1 = xphi1 + 2*M_PI;
2485                 }
2486                 else if(xphi1>=2*M_PI)
2487                 {
2488                         xphi1 = xphi1 - 2*M_PI;
2489                 }
2490                 
2491                 if(xphi2<0)
2492                 {
2493                         xphi2 = xphi2 + 2*M_PI;
2494                 }
2495                 else if(xphi2>=2*M_PI)
2496                 {
2497                         xphi2 = xphi2 - 2*M_PI;
2498                 }
2499                 
2500                 /* Number of grid points */
2501                 dx = 2*M_PI / cmap_grid->grid_spacing;
2502                 
2503                 /* Where on the grid are we */
2504                 iphi1 = (int)(xphi1/dx);
2505                 iphi2 = (int)(xphi2/dx);
2506                 
2507                 iphi1 = cmap_setup_grid_index(iphi1, cmap_grid->grid_spacing, &ip1m1,&ip1p1,&ip1p2);
2508                 iphi2 = cmap_setup_grid_index(iphi2, cmap_grid->grid_spacing, &ip2m1,&ip2p1,&ip2p2);
2509                 
2510                 pos1    = iphi1*cmap_grid->grid_spacing+iphi2;
2511                 pos2    = ip1p1*cmap_grid->grid_spacing+iphi2;
2512                 pos3    = ip1p1*cmap_grid->grid_spacing+ip2p1;
2513                 pos4    = iphi1*cmap_grid->grid_spacing+ip2p1;
2514
2515                 ty[0]   = cmapd[pos1*4];
2516                 ty[1]   = cmapd[pos2*4];
2517                 ty[2]   = cmapd[pos3*4];
2518                 ty[3]   = cmapd[pos4*4];
2519                 
2520                 ty1[0]   = cmapd[pos1*4+1];
2521                 ty1[1]   = cmapd[pos2*4+1];
2522                 ty1[2]   = cmapd[pos3*4+1];
2523                 ty1[3]   = cmapd[pos4*4+1];
2524                 
2525                 ty2[0]   = cmapd[pos1*4+2];
2526                 ty2[1]   = cmapd[pos2*4+2];
2527                 ty2[2]   = cmapd[pos3*4+2];
2528                 ty2[3]   = cmapd[pos4*4+2];
2529                 
2530                 ty12[0]   = cmapd[pos1*4+3];
2531                 ty12[1]   = cmapd[pos2*4+3];
2532                 ty12[2]   = cmapd[pos3*4+3];
2533                 ty12[3]   = cmapd[pos4*4+3];
2534                 
2535                 /* Switch to degrees */
2536                 dx = 360.0 / cmap_grid->grid_spacing;
2537                 xphi1 = xphi1 * RAD2DEG;
2538                 xphi2 = xphi2 * RAD2DEG; 
2539                 
2540                 for(i=0;i<4;i++) /* 16 */
2541                 {
2542                         tx[i] = ty[i];
2543                         tx[i+4] = ty1[i]*dx;
2544                         tx[i+8] = ty2[i]*dx;
2545                         tx[i+12] = ty12[i]*dx*dx;
2546                 }
2547                 
2548                 idx=0;
2549                 for(i=0;i<4;i++) /* 1056 */
2550                 {
2551                         for(j=0;j<4;j++)
2552                         {
2553                                 xx = 0;
2554                                 for(k=0;k<16;k++)
2555                                 {
2556                                         xx = xx + cmap_coeff_matrix[k*16+idx]*tx[k];
2557                                 }
2558                                 
2559                                 idx++;
2560                                 tc[i*4+j]=xx;
2561                         }
2562                 }
2563                 
2564                 tt    = (xphi1-iphi1*dx)/dx;
2565                 tu    = (xphi2-iphi2*dx)/dx;
2566                 
2567                 e     = 0;
2568                 df1   = 0;
2569                 df2   = 0;
2570                 ddf1  = 0;
2571                 ddf2  = 0;
2572                 ddf12 = 0;
2573                 
2574                 for(i=3;i>=0;i--)
2575                 {
2576                         l1 = loop_index[i][3];
2577                         l2 = loop_index[i][2];
2578                         l3 = loop_index[i][1];
2579                         
2580                         e     = tt * e    + ((tc[i*4+3]*tu+tc[i*4+2])*tu + tc[i*4+1])*tu+tc[i*4];
2581                         df1   = tu * df1  + (3.0*tc[l1]*tt+2.0*tc[l2])*tt+tc[l3];
2582                         df2   = tt * df2  + (3.0*tc[i*4+3]*tu+2.0*tc[i*4+2])*tu+tc[i*4+1];
2583                         ddf1  = tu * ddf1 + 2.0*3.0*tc[l1]*tt+2.0*tc[l2];
2584                         ddf2  = tt * ddf2 + 2.0*3.0*tc[4*i+3]*tu+2.0*tc[4*i+2];
2585                 }
2586                 
2587                 ddf12 = tc[5] + 2.0*tc[9]*tt + 3.0*tc[13]*tt*tt + 2.0*tu*(tc[6]+2.0*tc[10]*tt+3.0*tc[14]*tt*tt) +
2588                 3.0*tu*tu*(tc[7]+2.0*tc[11]*tt+3.0*tc[15]*tt*tt);
2589                 
2590                 fac     = RAD2DEG/dx;
2591                 df1     = df1   * fac;
2592                 df2     = df2   * fac;
2593                 ddf1    = ddf1  * fac * fac;
2594                 ddf2    = ddf2  * fac * fac;
2595                 ddf12   = ddf12 * fac * fac;
2596                 
2597                 /* CMAP energy */
2598                 vtot += e;
2599                 
2600                 /* Do forces - first torsion */
2601                 fg1       = iprod(r1_ij,r1_kj);
2602                 hg1       = iprod(r1_kl,r1_kj);
2603                 fga1      = fg1*ra2r1*rgr1;
2604                 hgb1      = hg1*rb2r1*rgr1;
2605                 gaa1      = -ra2r1*rg1;
2606                 gbb1      = rb2r1*rg1;
2607                 
2608                 for(i=0;i<DIM;i++)
2609                 {
2610                         dtf1[i]   = gaa1 * a1[i];
2611                         dtg1[i]   = fga1 * a1[i] - hgb1 * b1[i];
2612                         dth1[i]   = gbb1 * b1[i];
2613                         
2614                         f1[i]     = df1  * dtf1[i];
2615                         g1[i]     = df1  * dtg1[i];
2616                         h1[i]     = df1  * dth1[i];
2617                         
2618                         f1_i[i]   =  f1[i];
2619                         f1_j[i]   = -f1[i] - g1[i];
2620                         f1_k[i]   =  h1[i] + g1[i];
2621                         f1_l[i]   = -h1[i];
2622                         
2623                         f[a1i][i] = f[a1i][i] + f1_i[i];
2624                         f[a1j][i] = f[a1j][i] + f1_j[i]; /* - f1[i] - g1[i] */                                                            
2625                         f[a1k][i] = f[a1k][i] + f1_k[i]; /* h1[i] + g1[i] */                                                            
2626                         f[a1l][i] = f[a1l][i] + f1_l[i]; /* h1[i] */                                                                       
2627                 }
2628                 
2629                 /* Do forces - second torsion */
2630                 fg2       = iprod(r2_ij,r2_kj);
2631                 hg2       = iprod(r2_kl,r2_kj);
2632                 fga2      = fg2*ra2r2*rgr2;
2633                 hgb2      = hg2*rb2r2*rgr2;
2634                 gaa2      = -ra2r2*rg2;
2635                 gbb2      = rb2r2*rg2;
2636                 
2637                 for(i=0;i<DIM;i++)
2638                 {
2639                         dtf2[i]   = gaa2 * a2[i];
2640                         dtg2[i]   = fga2 * a2[i] - hgb2 * b2[i];
2641                         dth2[i]   = gbb2 * b2[i];
2642                         
2643                         f2[i]     = df2  * dtf2[i];
2644                         g2[i]     = df2  * dtg2[i];
2645                         h2[i]     = df2  * dth2[i];
2646                         
2647                         f2_i[i]   =  f2[i];
2648                         f2_j[i]   = -f2[i] - g2[i];
2649                         f2_k[i]   =  h2[i] + g2[i];
2650                         f2_l[i]   = -h2[i];
2651                         
2652                         f[a2i][i] = f[a2i][i] + f2_i[i]; /* f2[i] */                                                                        
2653                         f[a2j][i] = f[a2j][i] + f2_j[i]; /* - f2[i] - g2[i] */                                                              
2654                         f[a2k][i] = f[a2k][i] + f2_k[i]; /* h2[i] + g2[i] */                            
2655                         f[a2l][i] = f[a2l][i] + f2_l[i]; /* - h2[i] */                                                                      
2656                 }
2657                 
2658                 /* Shift forces */
2659                 if(g)
2660                 {
2661                         copy_ivec(SHIFT_IVEC(g,a1j), jt1);
2662                         ivec_sub(SHIFT_IVEC(g,a1i),  jt1,dt1_ij);
2663                         ivec_sub(SHIFT_IVEC(g,a1k),  jt1,dt1_kj);
2664                         ivec_sub(SHIFT_IVEC(g,a1l),  jt1,dt1_lj);
2665                         t11 = IVEC2IS(dt1_ij);
2666                         t21 = IVEC2IS(dt1_kj);
2667                         t31 = IVEC2IS(dt1_lj);
2668                         
2669                         copy_ivec(SHIFT_IVEC(g,a2j), jt2);
2670                         ivec_sub(SHIFT_IVEC(g,a2i),  jt2,dt2_ij);
2671                         ivec_sub(SHIFT_IVEC(g,a2k),  jt2,dt2_kj);
2672                         ivec_sub(SHIFT_IVEC(g,a2l),  jt2,dt2_lj);
2673                         t12 = IVEC2IS(dt2_ij);
2674                         t22 = IVEC2IS(dt2_kj);
2675                         t32 = IVEC2IS(dt2_lj);
2676                 }
2677                 else if(pbc)
2678                 {
2679                         t31 = pbc_rvec_sub(pbc,x[a1l],x[a1j],h1);
2680                         t32 = pbc_rvec_sub(pbc,x[a2l],x[a2j],h2);
2681                 }
2682                 else
2683                 {
2684                         t31 = CENTRAL;
2685                         t32 = CENTRAL;
2686                 }
2687                 
2688                 rvec_inc(fshift[t11],f1_i);
2689                 rvec_inc(fshift[CENTRAL],f1_j);
2690                 rvec_inc(fshift[t21],f1_k);
2691                 rvec_inc(fshift[t31],f1_l);
2692                 
2693                 rvec_inc(fshift[t21],f2_i);
2694                 rvec_inc(fshift[CENTRAL],f2_j);
2695                 rvec_inc(fshift[t22],f2_k);
2696                 rvec_inc(fshift[t32],f2_l);
2697         }       
2698         return vtot;
2699 }
2700
2701
2702
2703 /***********************************************************
2704  *
2705  *   G R O M O S  9 6   F U N C T I O N S
2706  *
2707  ***********************************************************/
2708 real g96harmonic(real kA,real kB,real xA,real xB,real x,real lambda,
2709                  real *V,real *F)
2710 {
2711   const real half=0.5;
2712   real  L1,kk,x0,dx,dx2;
2713   real  v,f,dvdlambda;
2714   
2715   L1    = 1.0-lambda;
2716   kk    = L1*kA+lambda*kB;
2717   x0    = L1*xA+lambda*xB;
2718   
2719   dx    = x-x0;
2720   dx2   = dx*dx;
2721   
2722   f     = -kk*dx;
2723   v     = half*kk*dx2;
2724   dvdlambda  = half*(kB-kA)*dx2 + (xA-xB)*kk*dx;
2725   
2726   *F    = f;
2727   *V    = v;
2728   
2729   return dvdlambda;
2730   
2731   /* That was 21 flops */
2732 }
2733
2734 real g96bonds(int nbonds,
2735               const t_iatom forceatoms[],const t_iparams forceparams[],
2736               const rvec x[],rvec f[],rvec fshift[],
2737               const t_pbc *pbc,const t_graph *g,
2738               real lambda,real *dvdlambda,
2739               const t_mdatoms *md,t_fcdata *fcd,
2740               int *global_atom_index)
2741 {
2742   int  i,m,ki,ai,aj,type;
2743   real dr2,fbond,vbond,fij,vtot;
2744   rvec dx;
2745   ivec dt;
2746   
2747   vtot = 0.0;
2748   for(i=0; (i<nbonds); ) {
2749     type = forceatoms[i++];
2750     ai   = forceatoms[i++];
2751     aj   = forceatoms[i++];
2752   
2753     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);            /*   3          */
2754     dr2  = iprod(dx,dx);                                /*   5          */
2755       
2756     *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
2757                               forceparams[type].harmonic.krB,
2758                               forceparams[type].harmonic.rA,
2759                               forceparams[type].harmonic.rB,
2760                               dr2,lambda,&vbond,&fbond);
2761
2762     vtot  += 0.5*vbond;                             /* 1*/
2763 #ifdef DEBUG
2764     if (debug)
2765       fprintf(debug,"G96-BONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
2766               sqrt(dr2),vbond,fbond);
2767 #endif
2768    
2769     if (g) {
2770       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
2771       ki=IVEC2IS(dt);
2772     }
2773     for (m=0; (m<DIM); m++) {                   /*  15          */
2774       fij=fbond*dx[m];
2775       f[ai][m]+=fij;
2776       f[aj][m]-=fij;
2777       fshift[ki][m]+=fij;
2778       fshift[CENTRAL][m]-=fij;
2779     }
2780   }                                     /* 44 TOTAL     */
2781   return vtot;
2782 }
2783
2784 real g96bond_angle(const rvec xi,const rvec xj,const rvec xk,const t_pbc *pbc,
2785                    rvec r_ij,rvec r_kj,
2786                    int *t1,int *t2)
2787 /* Return value is the angle between the bonds i-j and j-k */
2788 {
2789   real costh;
2790   
2791   *t1 = pbc_rvec_sub(pbc,xi,xj,r_ij);                   /*  3           */
2792   *t2 = pbc_rvec_sub(pbc,xk,xj,r_kj);                   /*  3           */
2793
2794   costh=cos_angle(r_ij,r_kj);           /* 25           */
2795                                         /* 41 TOTAL     */
2796   return costh;
2797 }
2798
2799 real g96angles(int nbonds,
2800                const t_iatom forceatoms[],const t_iparams forceparams[],
2801                const rvec x[],rvec f[],rvec fshift[],
2802                const t_pbc *pbc,const t_graph *g,
2803                real lambda,real *dvdlambda,
2804                const t_mdatoms *md,t_fcdata *fcd,
2805                int *global_atom_index)
2806 {
2807   int  i,ai,aj,ak,type,m,t1,t2;
2808   rvec r_ij,r_kj;
2809   real cos_theta,dVdt,va,vtot;
2810   real rij_1,rij_2,rkj_1,rkj_2,rijrkj_1;
2811   rvec f_i,f_j,f_k;
2812   ivec jt,dt_ij,dt_kj;
2813   
2814   vtot = 0.0;
2815   for(i=0; (i<nbonds); ) {
2816     type = forceatoms[i++];
2817     ai   = forceatoms[i++];
2818     aj   = forceatoms[i++];
2819     ak   = forceatoms[i++];
2820     
2821     cos_theta  = g96bond_angle(x[ai],x[aj],x[ak],pbc,r_ij,r_kj,&t1,&t2);
2822
2823     *dvdlambda += g96harmonic(forceparams[type].harmonic.krA,
2824                               forceparams[type].harmonic.krB,
2825                               forceparams[type].harmonic.rA,
2826                               forceparams[type].harmonic.rB,
2827                               cos_theta,lambda,&va,&dVdt);
2828     vtot    += va;
2829     
2830     rij_1    = gmx_invsqrt(iprod(r_ij,r_ij));
2831     rkj_1    = gmx_invsqrt(iprod(r_kj,r_kj));
2832     rij_2    = rij_1*rij_1;
2833     rkj_2    = rkj_1*rkj_1;
2834     rijrkj_1 = rij_1*rkj_1;                     /* 23 */
2835     
2836 #ifdef DEBUG
2837     if (debug)
2838       fprintf(debug,"G96ANGLES: costheta = %10g  vth = %10g  dV/dct = %10g\n",
2839               cos_theta,va,dVdt);
2840 #endif
2841     for (m=0; (m<DIM); m++) {                   /*  42  */
2842       f_i[m]=dVdt*(r_kj[m]*rijrkj_1 - r_ij[m]*rij_2*cos_theta);
2843       f_k[m]=dVdt*(r_ij[m]*rijrkj_1 - r_kj[m]*rkj_2*cos_theta);
2844       f_j[m]=-f_i[m]-f_k[m];
2845       f[ai][m]+=f_i[m];
2846       f[aj][m]+=f_j[m];
2847       f[ak][m]+=f_k[m];
2848     }
2849     
2850     if (g) {
2851       copy_ivec(SHIFT_IVEC(g,aj),jt);
2852       
2853       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
2854       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
2855       t1=IVEC2IS(dt_ij);
2856       t2=IVEC2IS(dt_kj);
2857     }      
2858     rvec_inc(fshift[t1],f_i);
2859     rvec_inc(fshift[CENTRAL],f_j);
2860     rvec_inc(fshift[t2],f_k);               /* 9 */
2861     /* 163 TOTAL        */
2862   }
2863   return vtot;
2864 }
2865
2866 real cross_bond_bond(int nbonds,
2867                      const t_iatom forceatoms[],const t_iparams forceparams[],
2868                      const rvec x[],rvec f[],rvec fshift[],
2869                      const t_pbc *pbc,const t_graph *g,
2870                      real lambda,real *dvdlambda,
2871                      const t_mdatoms *md,t_fcdata *fcd,
2872                      int *global_atom_index)
2873 {
2874   /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
2875    * pp. 842-847
2876    */
2877   int  i,ai,aj,ak,type,m,t1,t2;
2878   rvec r_ij,r_kj;
2879   real vtot,vrr,s1,s2,r1,r2,r1e,r2e,krr;
2880   rvec f_i,f_j,f_k;
2881   ivec jt,dt_ij,dt_kj;
2882   
2883   vtot = 0.0;
2884   for(i=0; (i<nbonds); ) {
2885     type = forceatoms[i++];
2886     ai   = forceatoms[i++];
2887     aj   = forceatoms[i++];
2888     ak   = forceatoms[i++];
2889     r1e  = forceparams[type].cross_bb.r1e;
2890     r2e  = forceparams[type].cross_bb.r2e;
2891     krr  = forceparams[type].cross_bb.krr;
2892     
2893     /* Compute distance vectors ... */
2894     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
2895     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
2896     
2897     /* ... and their lengths */
2898     r1 = norm(r_ij);
2899     r2 = norm(r_kj);
2900     
2901     /* Deviations from ideality */
2902     s1 = r1-r1e;
2903     s2 = r2-r2e;
2904     
2905     /* Energy (can be negative!) */
2906     vrr   = krr*s1*s2;
2907     vtot += vrr;
2908     
2909     /* Forces */
2910     svmul(-krr*s2/r1,r_ij,f_i);
2911     svmul(-krr*s1/r2,r_kj,f_k);
2912     
2913     for (m=0; (m<DIM); m++) {                   /*  12  */
2914       f_j[m]    = -f_i[m] - f_k[m];
2915       f[ai][m] += f_i[m];
2916       f[aj][m] += f_j[m];
2917       f[ak][m] += f_k[m];
2918     }
2919     
2920     /* Virial stuff */
2921     if (g) {
2922       copy_ivec(SHIFT_IVEC(g,aj),jt);
2923       
2924       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
2925       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
2926       t1=IVEC2IS(dt_ij);
2927       t2=IVEC2IS(dt_kj);
2928     }      
2929     rvec_inc(fshift[t1],f_i);
2930     rvec_inc(fshift[CENTRAL],f_j);
2931     rvec_inc(fshift[t2],f_k);               /* 9 */
2932     /* 163 TOTAL        */
2933   }
2934   return vtot;
2935 }
2936
2937 real cross_bond_angle(int nbonds,
2938                       const t_iatom forceatoms[],const t_iparams forceparams[],
2939                       const rvec x[],rvec f[],rvec fshift[],
2940                       const t_pbc *pbc,const t_graph *g,
2941                       real lambda,real *dvdlambda,
2942                       const t_mdatoms *md,t_fcdata *fcd,
2943                       int *global_atom_index)
2944 {
2945   /* Potential from Lawrence and Skimmer, Chem. Phys. Lett. 372 (2003)
2946    * pp. 842-847
2947    */
2948   int  i,ai,aj,ak,type,m,t1,t2,t3;
2949   rvec r_ij,r_kj,r_ik;
2950   real vtot,vrt,s1,s2,s3,r1,r2,r3,r1e,r2e,r3e,krt,k1,k2,k3;
2951   rvec f_i,f_j,f_k;
2952   ivec jt,dt_ij,dt_kj;
2953   
2954   vtot = 0.0;
2955   for(i=0; (i<nbonds); ) {
2956     type = forceatoms[i++];
2957     ai   = forceatoms[i++];
2958     aj   = forceatoms[i++];
2959     ak   = forceatoms[i++];
2960     r1e  = forceparams[type].cross_ba.r1e;
2961     r2e  = forceparams[type].cross_ba.r2e;
2962     r3e  = forceparams[type].cross_ba.r3e;
2963     krt  = forceparams[type].cross_ba.krt;
2964     
2965     /* Compute distance vectors ... */
2966     t1 = pbc_rvec_sub(pbc,x[ai],x[aj],r_ij);
2967     t2 = pbc_rvec_sub(pbc,x[ak],x[aj],r_kj);
2968     t3 = pbc_rvec_sub(pbc,x[ai],x[ak],r_ik);
2969     
2970     /* ... and their lengths */
2971     r1 = norm(r_ij);
2972     r2 = norm(r_kj);
2973     r3 = norm(r_ik);
2974     
2975     /* Deviations from ideality */
2976     s1 = r1-r1e;
2977     s2 = r2-r2e;
2978     s3 = r3-r3e;
2979     
2980     /* Energy (can be negative!) */
2981     vrt   = krt*s3*(s1+s2);
2982     vtot += vrt;
2983     
2984     /* Forces */
2985     k1 = -krt*(s3/r1);
2986     k2 = -krt*(s3/r2);
2987     k3 = -krt*(s1+s2)/r3;
2988     for(m=0; (m<DIM); m++) {
2989       f_i[m] = k1*r_ij[m] + k3*r_ik[m];
2990       f_k[m] = k2*r_kj[m] - k3*r_ik[m];
2991       f_j[m] = -f_i[m] - f_k[m];
2992     }
2993     
2994     for (m=0; (m<DIM); m++) {                   /*  12  */
2995       f[ai][m] += f_i[m];
2996       f[aj][m] += f_j[m];
2997       f[ak][m] += f_k[m];
2998     }
2999     
3000     /* Virial stuff */
3001     if (g) {
3002       copy_ivec(SHIFT_IVEC(g,aj),jt);
3003       
3004       ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
3005       ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
3006       t1=IVEC2IS(dt_ij);
3007       t2=IVEC2IS(dt_kj);
3008     }      
3009     rvec_inc(fshift[t1],f_i);
3010     rvec_inc(fshift[CENTRAL],f_j);
3011     rvec_inc(fshift[t2],f_k);               /* 9 */
3012     /* 163 TOTAL        */
3013   }
3014   return vtot;
3015 }
3016
3017 static real bonded_tab(const char *type,int table_nr,
3018                        const bondedtable_t *table,real kA,real kB,real r,
3019                        real lambda,real *V,real *F)
3020 {
3021   real k,tabscale,*VFtab,rt,eps,eps2,Yt,Ft,Geps,Heps2,Fp,VV,FF;
3022   int  n0,nnn;
3023   real v,f,dvdlambda;
3024
3025   k = (1.0 - lambda)*kA + lambda*kB;
3026
3027   tabscale = table->scale;
3028   VFtab    = table->data;
3029   
3030   rt    = r*tabscale;
3031   n0    = rt;
3032   if (n0 >= table->n) {
3033     gmx_fatal(FARGS,"A tabulated %s interaction table number %d is out of the table range: r %f, between table indices %d and %d, table length %d",
3034               type,table_nr,r,n0,n0+1,table->n);
3035   }
3036   eps   = rt - n0;
3037   eps2  = eps*eps;
3038   nnn   = 4*n0;
3039   Yt    = VFtab[nnn];
3040   Ft    = VFtab[nnn+1];
3041   Geps  = VFtab[nnn+2]*eps;
3042   Heps2 = VFtab[nnn+3]*eps2;
3043   Fp    = Ft + Geps + Heps2;
3044   VV    = Yt + Fp*eps;
3045   FF    = Fp + Geps + 2.0*Heps2;
3046   
3047   *F    = -k*FF*tabscale;
3048   *V    = k*VV;
3049   dvdlambda  = (kB - kA)*VV;
3050   
3051   return dvdlambda;
3052   
3053   /* That was 22 flops */
3054 }
3055
3056 real tab_bonds(int nbonds,
3057                const t_iatom forceatoms[],const t_iparams forceparams[],
3058                const rvec x[],rvec f[],rvec fshift[],
3059                const t_pbc *pbc,const t_graph *g,
3060                real lambda,real *dvdlambda,
3061                const t_mdatoms *md,t_fcdata *fcd,
3062                int *global_atom_index)
3063 {
3064   int  i,m,ki,ai,aj,type,table;
3065   real dr,dr2,fbond,vbond,fij,vtot;
3066   rvec dx;
3067   ivec dt;
3068
3069   vtot = 0.0;
3070   for(i=0; (i<nbonds); ) {
3071     type = forceatoms[i++];
3072     ai   = forceatoms[i++];
3073     aj   = forceatoms[i++];
3074   
3075     ki   = pbc_rvec_sub(pbc,x[ai],x[aj],dx);    /*   3          */
3076     dr2  = iprod(dx,dx);                        /*   5          */
3077     dr   = dr2*gmx_invsqrt(dr2);                        /*  10          */
3078
3079     table = forceparams[type].tab.table;
3080
3081     *dvdlambda += bonded_tab("bond",table,
3082                              &fcd->bondtab[table],
3083                              forceparams[type].tab.kA,
3084                              forceparams[type].tab.kB,
3085                              dr,lambda,&vbond,&fbond);  /*  22 */
3086
3087     if (dr2 == 0.0)
3088       continue;
3089
3090     
3091     vtot  += vbond;/* 1*/
3092     fbond *= gmx_invsqrt(dr2);                  /*   6          */
3093 #ifdef DEBUG
3094     if (debug)
3095       fprintf(debug,"TABBONDS: dr = %10g  vbond = %10g  fbond = %10g\n",
3096               dr,vbond,fbond);
3097 #endif
3098     if (g) {
3099       ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
3100       ki=IVEC2IS(dt);
3101     }
3102     for (m=0; (m<DIM); m++) {                   /*  15          */
3103       fij=fbond*dx[m];
3104       f[ai][m]+=fij;
3105       f[aj][m]-=fij;
3106       fshift[ki][m]+=fij;
3107       fshift[CENTRAL][m]-=fij;
3108     }
3109   }                                     /* 62 TOTAL     */
3110   return vtot;
3111 }
3112
3113 real tab_angles(int nbonds,
3114                 const t_iatom forceatoms[],const t_iparams forceparams[],
3115                 const rvec x[],rvec f[],rvec fshift[],
3116                 const t_pbc *pbc,const t_graph *g,
3117                 real lambda,real *dvdlambda,
3118                 const t_mdatoms *md,t_fcdata *fcd,
3119                 int *global_atom_index)
3120 {
3121   int  i,ai,aj,ak,t1,t2,type,table;
3122   rvec r_ij,r_kj;
3123   real cos_theta,cos_theta2,theta,dVdt,va,vtot;
3124   ivec jt,dt_ij,dt_kj;
3125   
3126   vtot = 0.0;
3127   for(i=0; (i<nbonds); ) {
3128     type = forceatoms[i++];
3129     ai   = forceatoms[i++];
3130     aj   = forceatoms[i++];
3131     ak   = forceatoms[i++];
3132     
3133     theta  = bond_angle(x[ai],x[aj],x[ak],pbc,
3134                         r_ij,r_kj,&cos_theta,&t1,&t2);  /*  41          */
3135
3136     table = forceparams[type].tab.table;
3137   
3138     *dvdlambda += bonded_tab("angle",table,
3139                              &fcd->angletab[table],
3140                              forceparams[type].tab.kA,
3141                              forceparams[type].tab.kB,
3142                              theta,lambda,&va,&dVdt);  /*  22  */
3143     vtot += va;
3144     
3145     cos_theta2 = sqr(cos_theta);                /*   1          */
3146     if (cos_theta2 < 1) {
3147       int  m;
3148       real snt,st,sth;
3149       real cik,cii,ckk;
3150       real nrkj2,nrij2;
3151       rvec f_i,f_j,f_k;
3152       
3153       st  = dVdt*gmx_invsqrt(1 - cos_theta2);   /*  12          */
3154       sth = st*cos_theta;                       /*   1          */
3155 #ifdef DEBUG
3156       if (debug)
3157         fprintf(debug,"ANGLES: theta = %10g  vth = %10g  dV/dtheta = %10g\n",
3158                 theta*RAD2DEG,va,dVdt);
3159 #endif
3160       nrkj2=iprod(r_kj,r_kj);                   /*   5          */
3161       nrij2=iprod(r_ij,r_ij);
3162       
3163       cik=st*gmx_invsqrt(nrkj2*nrij2);          /*  12          */ 
3164       cii=sth/nrij2;                            /*  10          */
3165       ckk=sth/nrkj2;                            /*  10          */
3166       
3167       for (m=0; (m<DIM); m++) {                 /*  39          */
3168         f_i[m]=-(cik*r_kj[m]-cii*r_ij[m]);
3169         f_k[m]=-(cik*r_ij[m]-ckk*r_kj[m]);
3170         f_j[m]=-f_i[m]-f_k[m];
3171         f[ai][m]+=f_i[m];
3172         f[aj][m]+=f_j[m];
3173         f[ak][m]+=f_k[m];
3174       }
3175       if (g) {
3176         copy_ivec(SHIFT_IVEC(g,aj),jt);
3177       
3178         ivec_sub(SHIFT_IVEC(g,ai),jt,dt_ij);
3179         ivec_sub(SHIFT_IVEC(g,ak),jt,dt_kj);
3180         t1=IVEC2IS(dt_ij);
3181         t2=IVEC2IS(dt_kj);
3182       }
3183       rvec_inc(fshift[t1],f_i);
3184       rvec_inc(fshift[CENTRAL],f_j);
3185       rvec_inc(fshift[t2],f_k);
3186     }                                           /* 169 TOTAL    */
3187   }
3188   return vtot;
3189 }
3190
3191 real tab_dihs(int nbonds,
3192               const t_iatom forceatoms[],const t_iparams forceparams[],
3193               const rvec x[],rvec f[],rvec fshift[],
3194               const t_pbc *pbc,const t_graph *g,
3195               real lambda,real *dvdlambda,
3196               const t_mdatoms *md,t_fcdata *fcd,
3197               int *global_atom_index)
3198 {
3199   int  i,type,ai,aj,ak,al,table;
3200   int  t1,t2,t3;
3201   rvec r_ij,r_kj,r_kl,m,n;
3202   real phi,sign,ddphi,vpd,vtot;
3203
3204   vtot = 0.0;
3205   for(i=0; (i<nbonds); ) {
3206     type = forceatoms[i++];
3207     ai   = forceatoms[i++];
3208     aj   = forceatoms[i++];
3209     ak   = forceatoms[i++];
3210     al   = forceatoms[i++];
3211     
3212     phi=dih_angle(x[ai],x[aj],x[ak],x[al],pbc,r_ij,r_kj,r_kl,m,n,
3213                   &sign,&t1,&t2,&t3);                   /*  84  */
3214
3215     table = forceparams[type].tab.table;
3216
3217     /* Hopefully phi+M_PI never results in values < 0 */
3218     *dvdlambda += bonded_tab("dihedral",table,
3219                              &fcd->dihtab[table],
3220                              forceparams[type].tab.kA,
3221                              forceparams[type].tab.kB,
3222                              phi+M_PI,lambda,&vpd,&ddphi);
3223                        
3224     vtot += vpd;
3225     do_dih_fup(ai,aj,ak,al,-ddphi,r_ij,r_kj,r_kl,m,n,
3226                f,fshift,pbc,g,x,t1,t2,t3);                      /* 112  */
3227
3228 #ifdef DEBUG
3229     fprintf(debug,"pdih: (%d,%d,%d,%d) phi=%g\n",
3230             ai,aj,ak,al,phi);
3231 #endif
3232   } /* 227 TOTAL        */
3233
3234   return vtot;
3235 }
3236
3237 static unsigned
3238 calc_bonded_reduction_mask(const t_idef *idef,
3239                            int shift,
3240                            int t,int nt)
3241 {
3242     unsigned mask;
3243     int ftype,nb,nat1,nb0,nb1,i,a;
3244
3245     mask = 0;
3246
3247     for(ftype=0; ftype<F_NRE; ftype++)
3248     {
3249         if (interaction_function[ftype].flags & IF_BOND &&
3250             !(ftype == F_CONNBONDS || ftype == F_POSRES) &&
3251             (ftype<F_GB12 || ftype>F_GB14))
3252         {
3253             nb = idef->il[ftype].nr;
3254             if (nb > 0)
3255             {
3256                 nat1 = interaction_function[ftype].nratoms + 1;
3257
3258                 /* Divide this interaction equally over the threads.
3259                  * This is not stored: should match division in calc_bonds.
3260                  */
3261                 nb0 = (((nb/nat1)* t   )/nt)*nat1;
3262                 nb1 = (((nb/nat1)*(t+1))/nt)*nat1;
3263
3264                 for(i=nb0; i<nb1; i+=nat1)
3265                 {
3266                     for(a=1; a<nat1; a++)
3267                     {
3268                         mask |= (1U << (idef->il[ftype].iatoms[i+a]>>shift));
3269                     }
3270                 }
3271             }
3272         }
3273     }
3274
3275     return mask;
3276 }
3277
3278 void init_bonded_thread_force_reduction(t_forcerec *fr,
3279                                         const t_idef *idef)
3280 {
3281 #define MAX_BLOCK_BITS 32
3282     int t;
3283     int ctot,c,b;
3284
3285     if (fr->nthreads <= 1)
3286     {
3287         fr->red_nblock = 0;
3288
3289         return;
3290     }
3291
3292     /* We divide the force array in a maximum of 32 blocks.
3293      * Minimum force block reduction size is 2^6=64.
3294      */
3295     fr->red_ashift = 6;
3296     while (fr->natoms_force > (int)(MAX_BLOCK_BITS*(1U<<fr->red_ashift)))
3297     {
3298         fr->red_ashift++;
3299     }
3300     if (debug)
3301     {
3302         fprintf(debug,"bonded force buffer block atom shift %d bits\n",
3303                 fr->red_ashift);
3304     }
3305
3306     /* Determine to which blocks each thread's bonded force calculation
3307      * contributes. Store this is a mask for each thread.
3308      */
3309 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3310     for(t=1; t<fr->nthreads; t++)
3311     {
3312         fr->f_t[t].red_mask =
3313             calc_bonded_reduction_mask(idef,fr->red_ashift,t,fr->nthreads);
3314     }
3315
3316     /* Determine the maximum number of blocks we need to reduce over */
3317     fr->red_nblock = 0;
3318     ctot = 0;
3319     for(t=0; t<fr->nthreads; t++)
3320     {
3321         c = 0;
3322         for(b=0; b<MAX_BLOCK_BITS; b++)
3323         {
3324             if (fr->f_t[t].red_mask & (1U<<b))
3325             {
3326                 fr->red_nblock = max(fr->red_nblock,b+1);
3327                 c++;
3328             }
3329         }
3330         if (debug)
3331         {
3332             fprintf(debug,"thread %d flags %x count %d\n",
3333                     t,fr->f_t[t].red_mask,c);
3334         }
3335         ctot += c;
3336     }
3337     if (debug)
3338     {
3339         fprintf(debug,"Number of blocks to reduce: %d of size %d\n",
3340                 fr->red_nblock,1<<fr->red_ashift);
3341         fprintf(debug,"Reduction density %.2f density/#thread %.2f\n",
3342                 ctot*(1<<fr->red_ashift)/(double)fr->natoms_force,
3343                 ctot*(1<<fr->red_ashift)/(double)(fr->natoms_force*fr->nthreads));
3344     }
3345 }
3346
3347 static void zero_thread_forces(f_thread_t *f_t,int n,
3348                                int nblock,int blocksize)
3349 {
3350     int b,a0,a1,a,i,j;
3351
3352     if (n > f_t->f_nalloc)
3353     {
3354         f_t->f_nalloc = over_alloc_large(n);
3355         srenew(f_t->f,f_t->f_nalloc);
3356     }
3357
3358     if (f_t->red_mask != 0)
3359     {
3360         for(b=0; b<nblock; b++)
3361         {
3362             if (f_t->red_mask && (1U<<b))
3363             {
3364                 a0 = b*blocksize;
3365                 a1 = min((b+1)*blocksize,n);
3366                 for(a=a0; a<a1; a++)
3367                 {
3368                     clear_rvec(f_t->f[a]);
3369                 }
3370             }
3371         }
3372     }
3373     for(i=0; i<SHIFTS; i++)
3374     {
3375         clear_rvec(f_t->fshift[i]);
3376     }
3377     for(i=0; i<F_NRE; i++)
3378     {
3379         f_t->ener[i] = 0;
3380     }
3381     for(i=0; i<egNR; i++)
3382     {
3383         for(j=0; j<f_t->grpp.nener; j++)
3384         {
3385             f_t->grpp.ener[i][j] = 0;
3386         }
3387     }
3388     for(i=0; i<efptNR; i++)
3389     {
3390         f_t->dvdl[i] = 0;
3391     }
3392 }
3393
3394 static void reduce_thread_force_buffer(int n,rvec *f,
3395                                        int nthreads,f_thread_t *f_t,
3396                                        int nblock,int block_size)
3397 {
3398     /* The max thread number is arbitrary,
3399      * we used a fixed number to avoid memory management.
3400      * Using more than 16 threads is probably never useful performance wise.
3401      */
3402 #define MAX_BONDED_THREADS 256
3403     int b;
3404
3405     if (nthreads > MAX_BONDED_THREADS)
3406     {
3407         gmx_fatal(FARGS,"Can not reduce bonded forces on more than %d threads",
3408                   MAX_BONDED_THREADS);
3409     }
3410
3411     /* This reduction can run on any number of threads,
3412      * independently of nthreads.
3413      */
3414 #pragma omp parallel for num_threads(nthreads) schedule(static)
3415     for(b=0; b<nblock; b++)
3416     {
3417         rvec *fp[MAX_BONDED_THREADS];
3418         int nfb,ft,fb;
3419         int a0,a1,a;
3420
3421         /* Determine which threads contribute to this block */
3422         nfb = 0;
3423         for(ft=1; ft<nthreads; ft++)
3424         {
3425             if (f_t[ft].red_mask & (1U<<b))
3426             {
3427                 fp[nfb++] = f_t[ft].f;
3428             }
3429         }
3430         if (nfb > 0)
3431         {
3432             /* Reduce force buffers for threads that contribute */
3433             a0 =  b   *block_size;
3434             a1 = (b+1)*block_size;
3435             a1 = min(a1,n);
3436             for(a=a0; a<a1; a++)
3437             {
3438                 for(fb=0; fb<nfb; fb++)
3439                 {
3440                     rvec_inc(f[a],fp[fb][a]);
3441                 }
3442             }
3443         }
3444     }
3445 }
3446
3447 static void reduce_thread_forces(int n,rvec *f,rvec *fshift,
3448                                  real *ener,gmx_grppairener_t *grpp,real *dvdl,
3449                                  int nthreads,f_thread_t *f_t,
3450                                  int nblock,int block_size,
3451                                  gmx_bool bCalcEnerVir,
3452                                  gmx_bool bDHDL)
3453 {
3454     if (nblock > 0)
3455     {
3456         /* Reduce the bonded force buffer */
3457         reduce_thread_force_buffer(n,f,nthreads,f_t,nblock,block_size);
3458     }
3459
3460     /* When necessary, reduce energy and virial using one thread only */
3461     if (bCalcEnerVir)
3462     {
3463         int t,i,j;
3464
3465         for(i=0; i<SHIFTS; i++)
3466         {
3467             for(t=1; t<nthreads; t++)
3468             {
3469                 rvec_inc(fshift[i],f_t[t].fshift[i]);
3470             }
3471         }
3472         for(i=0; i<F_NRE; i++)
3473         {
3474             for(t=1; t<nthreads; t++)
3475             {
3476                 ener[i] += f_t[t].ener[i];
3477             }
3478         }
3479         for(i=0; i<egNR; i++)
3480         {
3481             for(j=0; j<f_t[1].grpp.nener; j++)
3482             {
3483                 for(t=1; t<nthreads; t++)
3484                 {
3485                     
3486                     grpp->ener[i][j] += f_t[t].grpp.ener[i][j];
3487                 }
3488             }
3489         }
3490         if (bDHDL)
3491         {
3492             for(i=0; i<efptNR; i++)
3493             {
3494                 
3495                 for(t=1; t<nthreads; t++)
3496                 {
3497                     dvdl[i] += f_t[t].dvdl[i];
3498                 }
3499             }
3500         }
3501     }
3502 }
3503
3504 static real calc_one_bond(FILE *fplog,int thread,
3505                           int ftype,const t_idef *idef,
3506                           rvec x[], rvec f[], rvec fshift[],
3507                           t_forcerec *fr,
3508                           const t_pbc *pbc,const t_graph *g,
3509                           gmx_enerdata_t *enerd, gmx_grppairener_t *grpp,
3510                           t_nrnb *nrnb,
3511                           real *lambda, real *dvdl,
3512                           const t_mdatoms *md,t_fcdata *fcd,
3513                           gmx_bool bCalcEnerVir,
3514                           int *global_atom_index, gmx_bool bPrintSepPot)
3515 {
3516     int ind,nat1,nbonds,efptFTYPE;
3517     real v=0;
3518     t_iatom *iatoms;
3519     int nb0,nbn;
3520
3521     if (IS_RESTRAINT_TYPE(ftype))
3522     {
3523         efptFTYPE = efptRESTRAINT;
3524     }
3525     else
3526     {
3527         efptFTYPE = efptBONDED;
3528     }
3529
3530     if (interaction_function[ftype].flags & IF_BOND &&
3531         !(ftype == F_CONNBONDS || ftype == F_POSRES))
3532     {
3533         ind  = interaction_function[ftype].nrnb_ind;
3534         nat1 = interaction_function[ftype].nratoms + 1;
3535         nbonds    = idef->il[ftype].nr/nat1;
3536         iatoms    = idef->il[ftype].iatoms;
3537
3538         nb0 = ((nbonds* thread   )/(fr->nthreads))*nat1;
3539         nbn = ((nbonds*(thread+1))/(fr->nthreads))*nat1 - nb0;
3540
3541         if (!IS_LISTED_LJ_C(ftype))
3542         {
3543             if(ftype==F_CMAP)
3544             {
3545                 v = cmap_dihs(nbn,iatoms+nb0,
3546                               idef->iparams,&idef->cmap_grid,
3547                               (const rvec*)x,f,fshift,
3548                               pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
3549                               md,fcd,global_atom_index);
3550             }
3551             else if (ftype == F_PDIHS &&
3552                      !bCalcEnerVir && fr->efep==efepNO)
3553             {
3554                 /* No energies, shift forces, dvdl */
3555 #ifndef SSE_PROPER_DIHEDRALS
3556                 pdihs_noener
3557 #else
3558                 pdihs_noener_sse
3559 #endif
3560                     (nbn,idef->il[ftype].iatoms+nb0,
3561                      idef->iparams,
3562                      (const rvec*)x,f,
3563                      pbc,g,lambda[efptFTYPE],md,fcd,
3564                      global_atom_index);
3565                 v = 0;
3566             }
3567             else
3568             {
3569                 v = interaction_function[ftype].ifunc(nbn,iatoms+nb0,
3570                                                       idef->iparams,
3571                                                       (const rvec*)x,f,fshift,
3572                                                       pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),
3573                                                       md,fcd,global_atom_index);
3574             }
3575             if (bPrintSepPot)
3576             {
3577                 fprintf(fplog,"  %-23s #%4d  V %12.5e  dVdl %12.5e\n",
3578                         interaction_function[ftype].longname,
3579                         nbonds/nat1,v,lambda[efptFTYPE]);
3580             }
3581         }
3582         else
3583         {
3584             v = do_nonbonded_listed(ftype,nbn,iatoms+nb0,idef->iparams,(const rvec*)x,f,fshift,
3585                                     pbc,g,lambda,dvdl,md,fr,grpp,global_atom_index);
3586
3587             enerd->dvdl_nonlin[efptCOUL] += dvdl[efptCOUL];
3588             enerd->dvdl_nonlin[efptVDW] += dvdl[efptVDW];
3589             
3590             if (bPrintSepPot)
3591             {
3592                 fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
3593                         interaction_function[ftype].longname,
3594                         interaction_function[F_LJ14].longname,nbonds/nat1,dvdl[efptVDW]);
3595                 fprintf(fplog,"  %-5s + %-15s #%4d                  dVdl %12.5e\n",
3596                         interaction_function[ftype].longname,
3597                         interaction_function[F_COUL14].longname,nbonds/nat1,dvdl[efptCOUL]);
3598             }
3599         }
3600         if (ind != -1 && thread == 0)
3601         {
3602             inc_nrnb(nrnb,ind,nbonds);
3603         }
3604     }
3605
3606     return v;
3607 }
3608
3609 /* WARNING!  THIS FUNCTION MUST EXACTLY TRACK THE calc
3610    function, or horrible things will happen when doing free energy
3611    calculations!  In a good coding world, this would not be a
3612    different function, but for speed reasons, it needs to be made a
3613    separate function.  TODO for 5.0 - figure out a way to reorganize
3614    to reduce duplication.
3615 */
3616
3617 static real calc_one_bond_foreign(FILE *fplog,int ftype, const t_idef *idef,
3618                                   rvec x[], rvec f[], t_forcerec *fr,
3619                                   const t_pbc *pbc,const t_graph *g,
3620                                   gmx_grppairener_t *grpp, t_nrnb *nrnb,
3621                                   real *lambda, real *dvdl,
3622                                   const t_mdatoms *md,t_fcdata *fcd,
3623                                   int *global_atom_index, gmx_bool bPrintSepPot)
3624 {
3625     int ind,nat1,nbonds,efptFTYPE,nbonds_np;
3626     real v=0;
3627     t_iatom *iatoms;
3628
3629     if (IS_RESTRAINT_TYPE(ftype))
3630     {
3631         efptFTYPE = efptRESTRAINT;
3632     }
3633     else
3634     {
3635         efptFTYPE = efptBONDED;
3636     }
3637
3638     if (ftype<F_GB12 || ftype>F_GB14)
3639     {
3640         if (interaction_function[ftype].flags & IF_BOND &&
3641             !(ftype == F_CONNBONDS || ftype == F_POSRES))
3642         {
3643             ind  = interaction_function[ftype].nrnb_ind;
3644             nat1 = interaction_function[ftype].nratoms+1;
3645             nbonds_np = idef->il[ftype].nr_nonperturbed;
3646             nbonds    = idef->il[ftype].nr - nbonds_np;
3647             iatoms    = idef->il[ftype].iatoms + nbonds_np;
3648             if (nbonds > 0)
3649             {
3650                 if (!IS_LISTED_LJ_C(ftype))
3651                 {
3652                     if(ftype==F_CMAP)
3653                     {
3654                         v = cmap_dihs(nbonds,iatoms,
3655                                       idef->iparams,&idef->cmap_grid,
3656                                       (const rvec*)x,f,fr->fshift,
3657                                       pbc,g,lambda[efptFTYPE],&(dvdl[efptFTYPE]),md,fcd,
3658                                       global_atom_index);
3659                     }
3660                     else
3661                     {
3662                         v =         interaction_function[ftype].ifunc(nbonds,iatoms,
3663                                                                   idef->iparams,
3664                                                                   (const rvec*)x,f,fr->fshift,
3665                                                                   pbc,g,lambda[efptFTYPE],&dvdl[efptFTYPE],
3666                                                                   md,fcd,global_atom_index);
3667                     }
3668                 }
3669                 else
3670                 {
3671                     v = do_nonbonded_listed(ftype,nbonds,iatoms,
3672                                             idef->iparams,
3673                                             (const rvec*)x,f,fr->fshift,
3674                                             pbc,g,lambda,dvdl,
3675                                             md,fr,grpp,global_atom_index);
3676                 }
3677                 if (ind != -1)
3678                 {
3679                     inc_nrnb(nrnb,ind,nbonds/nat1);
3680                 }
3681             }
3682         }
3683     }
3684     return v;
3685 }
3686
3687 void calc_bonds(FILE *fplog,const gmx_multisim_t *ms,
3688                 const t_idef *idef,
3689                 rvec x[],history_t *hist,
3690                 rvec f[],t_forcerec *fr,
3691                 const t_pbc *pbc,const t_graph *g,
3692                 gmx_enerdata_t *enerd,t_nrnb *nrnb,
3693                 real *lambda,
3694                 const t_mdatoms *md,
3695                 t_fcdata *fcd,int *global_atom_index,
3696                 t_atomtypes *atype, gmx_genborn_t *born,
3697                 int force_flags,
3698                 gmx_bool bPrintSepPot,gmx_large_int_t step)
3699 {
3700     gmx_bool bCalcEnerVir;
3701     int    i;
3702     real   v,dvdl[efptNR],dvdl_dum[efptNR]; /* The dummy array is to have a place to store the dhdl at other values
3703                                                of lambda, which will be thrown away in the end*/
3704     const  t_pbc *pbc_null;
3705     char   buf[22];
3706     int    thread;
3707
3708     bCalcEnerVir = (force_flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY));
3709
3710     for (i=0;i<efptNR;i++)
3711     {
3712         dvdl[i] = 0.0;
3713     }
3714     if (fr->bMolPBC)
3715     {
3716         pbc_null = pbc;
3717     }
3718     else
3719     {
3720         pbc_null = NULL;
3721     }
3722     if (bPrintSepPot)
3723     {
3724         fprintf(fplog,"Step %s: bonded V and dVdl for this node\n",
3725                 gmx_step_str(step,buf));
3726     }
3727
3728 #ifdef DEBUG
3729     if (g && debug)
3730     {
3731         p_graph(debug,"Bondage is fun",g);
3732     }
3733 #endif
3734
3735     /* Do pre force calculation stuff which might require communication */
3736     if (idef->il[F_ORIRES].nr)
3737     {
3738         enerd->term[F_ORIRESDEV] =
3739             calc_orires_dev(ms,idef->il[F_ORIRES].nr,
3740                             idef->il[F_ORIRES].iatoms,
3741                             idef->iparams,md,(const rvec*)x,
3742                             pbc_null,fcd,hist);
3743     }
3744     if (idef->il[F_DISRES].nr)
3745     {
3746         calc_disres_R_6(ms,idef->il[F_DISRES].nr,
3747                         idef->il[F_DISRES].iatoms,
3748                         idef->iparams,(const rvec*)x,pbc_null,
3749                         fcd,hist);
3750     }
3751
3752 #pragma omp parallel for num_threads(fr->nthreads) schedule(static)
3753     for(thread=0; thread<fr->nthreads; thread++)
3754     {
3755         int    ftype,nbonds,ind,nat1;
3756         real   *epot,v;
3757         /* thread stuff */
3758         rvec   *ft,*fshift;
3759         real   *dvdlt;
3760         gmx_grppairener_t *grpp;
3761         int    nb0,nbn;
3762
3763         if (thread == 0)
3764         {
3765             ft     = f;
3766             fshift = fr->fshift;
3767             epot   = enerd->term;
3768             grpp   = &enerd->grpp;
3769             dvdlt  = dvdl;
3770         }
3771         else
3772         {
3773             zero_thread_forces(&fr->f_t[thread],fr->natoms_force,
3774                                fr->red_nblock,1<<fr->red_ashift);
3775
3776             ft     = fr->f_t[thread].f;
3777             fshift = fr->f_t[thread].fshift;
3778             epot   = fr->f_t[thread].ener;
3779             grpp   = &fr->f_t[thread].grpp;
3780             dvdlt  = fr->f_t[thread].dvdl;
3781         }
3782         /* Loop over all bonded force types to calculate the bonded forces */
3783         for(ftype=0; (ftype<F_NRE); ftype++)
3784         {
3785             if (idef->il[ftype].nr > 0 &&
3786                 (interaction_function[ftype].flags & IF_BOND) &&
3787                 (ftype < F_GB12 || ftype > F_GB14) &&
3788                 !(ftype == F_CONNBONDS || ftype == F_POSRES))
3789             {
3790                 v = calc_one_bond(fplog,thread,ftype,idef,x, 
3791                                   ft,fshift,fr,pbc_null,g,enerd,grpp,
3792                                   nrnb,lambda,dvdlt,
3793                                   md,fcd,bCalcEnerVir,
3794                                   global_atom_index,bPrintSepPot);
3795                 epot[ftype]        += v;
3796             }
3797         }
3798     }
3799     if (fr->nthreads > 1)
3800     {
3801         reduce_thread_forces(fr->natoms_force,f,fr->fshift,
3802                              enerd->term,&enerd->grpp,dvdl,
3803                              fr->nthreads,fr->f_t,
3804                              fr->red_nblock,1<<fr->red_ashift,
3805                              bCalcEnerVir,
3806                              force_flags & GMX_FORCE_DHDL);
3807     }
3808     if (force_flags & GMX_FORCE_DHDL)
3809     {
3810         for(i=0; i<efptNR; i++)
3811         {
3812             enerd->dvdl_nonlin[i] += dvdl[i];
3813         }
3814     }
3815
3816     /* Copy the sum of violations for the distance restraints from fcd */
3817     if (fcd)
3818     {
3819         enerd->term[F_DISRESVIOL] = fcd->disres.sumviol;
3820
3821     }
3822 }
3823
3824 void calc_bonds_lambda(FILE *fplog,
3825                        const t_idef *idef,
3826                        rvec x[],
3827                        t_forcerec *fr,
3828                        const t_pbc *pbc,const t_graph *g,
3829                        gmx_grppairener_t *grpp, real *epot, t_nrnb *nrnb,
3830                        real *lambda,
3831                        const t_mdatoms *md,
3832                        t_fcdata *fcd,
3833                        int *global_atom_index)
3834 {
3835     int    i,ftype,nbonds_np,nbonds,ind,nat;
3836     real   v,dr,dr2;
3837     real   dvdl_dum[efptNR];
3838     rvec   *f,*fshift_orig;
3839     const  t_pbc *pbc_null;
3840     t_iatom *iatom_fe;
3841
3842     if (fr->bMolPBC)
3843     {
3844         pbc_null = pbc;
3845     }
3846     else
3847     {
3848         pbc_null = NULL;
3849     }
3850
3851     snew(f,fr->natoms_force);
3852     /* We want to preserve the fshift array in forcerec */
3853     fshift_orig = fr->fshift;
3854     snew(fr->fshift,SHIFTS);
3855
3856     /* Loop over all bonded force types to calculate the bonded forces */
3857     for(ftype=0; (ftype<F_NRE); ftype++) 
3858     {
3859         v = calc_one_bond_foreign(fplog,ftype,idef,x, 
3860                                   f,fr,pbc_null,g,grpp,nrnb,lambda,dvdl_dum,
3861                                   md,fcd,global_atom_index,FALSE);
3862         epot[ftype] += v;
3863     }
3864
3865     sfree(fr->fshift);
3866     fr->fshift = fshift_orig;
3867     sfree(f);
3868 }