Merge branch 'release-4-6'
[alexxy/gromacs.git] / src / gromacs / mdlib / force.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROwing Monsters And Cloning Shrimps
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41 #include <string.h>
42 #include <assert.h>
43 #include "sysstuff.h"
44 #include "typedefs.h"
45 #include "macros.h"
46 #include "smalloc.h"
47 #include "macros.h"
48 #include "physics.h"
49 #include "force.h"
50 #include "nonbonded.h"
51 #include "names.h"
52 #include "network.h"
53 #include "pbc.h"
54 #include "ns.h"
55 #include "nrnb.h"
56 #include "bondf.h"
57 #include "mshift.h"
58 #include "txtdump.h"
59 #include "coulomb.h"
60 #include "pme.h"
61 #include "mdrun.h"
62 #include "domdec.h"
63 #include "partdec.h"
64 #include "qmmm.h"
65 #include "gmx_omp_nthreads.h"
66
67
68 void ns(FILE *fp,
69         t_forcerec *fr,
70         rvec       x[],
71         matrix     box,
72         gmx_groups_t *groups,
73         t_grpopts  *opts,
74         gmx_localtop_t *top,
75         t_mdatoms  *md,
76         t_commrec  *cr,
77         t_nrnb     *nrnb,
78         real       *lambda,
79         real       *dvdlambda,
80         gmx_grppairener_t *grppener,
81         gmx_bool       bFillGrid,
82         gmx_bool       bDoLongRange,
83         gmx_bool       bDoForces,
84         rvec       *f)
85 {
86   char   *ptr;
87   int    nsearch;
88
89
90   if (!fr->ns.nblist_initialized)
91   {
92       init_neighbor_list(fp, fr, md->homenr);
93   }
94
95   if (fr->bTwinRange)
96     fr->nlr=0;
97
98     nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
99                                 lambda,dvdlambda,grppener,
100                                 bFillGrid,bDoLongRange,
101                                 bDoForces,f);
102   if (debug)
103     fprintf(debug,"nsearch = %d\n",nsearch);
104
105   /* Check whether we have to do dynamic load balancing */
106   /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
107     count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
108     &(top->idef),opts->ngener);
109   */
110   if (fr->ns.dump_nl > 0)
111     dump_nblist(fp,cr,fr,fr->ns.dump_nl);
112 }
113
114 static void reduce_thread_forces(int n,rvec *f,
115                                  tensor vir,
116                                  real *Vcorr,
117                                  int efpt_ind,real *dvdl,
118                                  int nthreads,f_thread_t *f_t)
119 {
120     int t,i;
121
122     /* This reduction can run over any number of threads */
123 #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
124     for(i=0; i<n; i++)
125     {
126         for(t=1; t<nthreads; t++)
127         {
128             rvec_inc(f[i],f_t[t].f[i]);
129         }
130     }
131     for(t=1; t<nthreads; t++)
132     {
133         *Vcorr += f_t[t].Vcorr;
134         *dvdl  += f_t[t].dvdl[efpt_ind];
135         m_add(vir,f_t[t].vir,vir);
136     }
137 }
138
139 void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
140                        t_forcerec *fr,      t_inputrec *ir,
141                        t_idef     *idef,    t_commrec  *cr,
142                        t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
143                        t_mdatoms  *md,
144                        t_grpopts  *opts,
145                        rvec       x[],      history_t  *hist,
146                        rvec       f[],
147                        gmx_enerdata_t *enerd,
148                        t_fcdata   *fcd,
149                        gmx_mtop_t     *mtop,
150                        gmx_localtop_t *top,
151                        gmx_genborn_t *born,
152                        t_atomtypes *atype,
153                        gmx_bool       bBornRadii,
154                        matrix     box,
155                        t_lambda   *fepvals,
156                        real       *lambda,
157                        t_graph    *graph,
158                        t_blocka   *excl,
159                        rvec       mu_tot[],
160                        int        flags,
161                        float      *cycles_pme)
162 {
163     int     i,j,status;
164     int     donb_flags;
165     gmx_bool    bDoEpot,bSepDVDL,bSB;
166     int     pme_flags;
167     matrix  boxs;
168     rvec    box_size;
169     real    Vsr,Vlr,Vcorr=0;
170     t_pbc   pbc;
171     real    dvdgb;
172     char    buf[22];
173     gmx_enerdata_t ed_lam;
174     double  clam_i,vlam_i;
175     real    dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
176     real    dvdlsum;
177
178 #ifdef GMX_MPI
179     double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
180 #endif
181
182 #define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
183
184
185     set_pbc(&pbc,fr->ePBC,box);
186
187     /* reset free energy components */
188     for (i=0;i<efptNR;i++)
189     {
190         dvdl_nb[i]  = 0;
191         dvdl_dum[i] = 0;
192     }
193
194     /* Reset box */
195     for(i=0; (i<DIM); i++)
196     {
197         box_size[i]=box[i][i];
198     }
199
200     bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
201     debug_gmx();
202
203     /* do QMMM first if requested */
204     if(fr->bQMMM)
205     {
206         enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
207     }
208
209     if (bSepDVDL)
210     {
211         fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
212                 gmx_step_str(step,buf),cr->nodeid);
213     }
214
215     /* Call the short range functions all in one go. */
216
217 #ifdef GMX_MPI
218     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
219 #define TAKETIME FALSE
220     if (TAKETIME)
221     {
222         MPI_Barrier(cr->mpi_comm_mygroup);
223         t0=MPI_Wtime();
224     }
225 #endif
226
227     if (ir->nwall)
228     {
229         /* foreign lambda component for walls */
230         dvdl = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
231                         enerd->grpp.ener[egLJSR],nrnb);
232         PRINT_SEPDVDL("Walls",0.0,dvdl);
233         enerd->dvdl_lin[efptVDW] += dvdl;
234     }
235
236         /* If doing GB, reset dvda and calculate the Born radii */
237         if (ir->implicit_solvent)
238         {
239         wallcycle_sub_start(wcycle, ewcsNONBONDED);
240
241                 for(i=0;i<born->nr;i++)
242                 {
243                         fr->dvda[i]=0;
244                 }
245
246                 if(bBornRadii)
247                 {
248                         calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
249                 }
250
251         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
252         }
253
254     where();
255     if (flags & GMX_FORCE_NONBONDED)
256     {
257         donb_flags = 0;
258         if (flags & GMX_FORCE_FORCES)
259         {
260             donb_flags |= GMX_DONB_FORCES;
261         }
262
263         wallcycle_sub_start(wcycle, ewcsNONBONDED);
264         do_nonbonded(cr,fr,x,f,md,excl,
265                     fr->bBHAM ?
266                     enerd->grpp.ener[egBHAMSR] :
267                     enerd->grpp.ener[egLJSR],
268                     enerd->grpp.ener[egCOULSR],
269                     enerd->grpp.ener[egGB],box_size,nrnb,
270                     lambda,dvdl_nb,-1,-1,donb_flags);
271         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
272     }
273
274     /* If we do foreign lambda and we have soft-core interactions
275      * we have to recalculate the (non-linear) energies contributions.
276      */
277     if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
278     {
279         wallcycle_sub_start(wcycle, ewcsNONBONDED);
280         init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
281
282         for(i=0; i<enerd->n_lambda; i++)
283         {
284             for (j=0;j<efptNR;j++)
285             {
286                 lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
287             }
288             reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
289             do_nonbonded(cr,fr,x,f,md,excl,
290                          fr->bBHAM ?
291                          ed_lam.grpp.ener[egBHAMSR] :
292                          ed_lam.grpp.ener[egLJSR],
293                          ed_lam.grpp.ener[egCOULSR],
294                          enerd->grpp.ener[egGB], box_size,nrnb,
295                          lam_i,dvdl_dum,-1,-1,
296                          GMX_DONB_FOREIGNLAMBDA);
297             sum_epot(&ir->opts,&ed_lam);
298             enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
299         }
300         destroy_enerdata(&ed_lam);
301         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
302     }
303     where();
304
305         /* If we are doing GB, calculate bonded forces and apply corrections
306          * to the solvation forces */
307     /* MRS: Eventually, many need to include free energy contribution here! */
308         if (ir->implicit_solvent)
309     {
310                 calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
311                        ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
312         wallcycle_sub_stop(wcycle, ewcsBONDED);
313     }
314
315 #ifdef GMX_MPI
316     if (TAKETIME)
317     {
318         t1=MPI_Wtime();
319         fr->t_fnbf += t1-t0;
320     }
321 #endif
322
323     if (fepvals->sc_alpha!=0)
324     {
325         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
326     }
327     else
328     {
329         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
330     }
331
332     if (fepvals->sc_alpha!=0)
333
334         /* even though coulomb part is linear, we already added it, beacuse we
335            need to go through the vdw calculation anyway */
336     {
337         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
338     }
339     else
340     {
341         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
342     }
343
344     Vsr = 0;
345     if (bSepDVDL)
346     {
347         for(i=0; i<enerd->grpp.nener; i++)
348         {
349             Vsr +=
350                 (fr->bBHAM ?
351                  enerd->grpp.ener[egBHAMSR][i] :
352                  enerd->grpp.ener[egLJSR][i])
353                 + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
354         }
355         dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
356         PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
357     }
358     debug_gmx();
359
360
361     if (debug)
362     {
363         pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
364     }
365
366     /* Shift the coordinates. Must be done before bonded forces and PPPM,
367      * but is also necessary for SHAKE and update, therefore it can NOT
368      * go when no bonded forces have to be evaluated.
369      */
370
371     /* Here sometimes we would not need to shift with NBFonly,
372      * but we do so anyhow for consistency of the returned coordinates.
373      */
374     if (graph)
375     {
376         shift_self(graph,box,x);
377         if (TRICLINIC(box))
378         {
379             inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
380         }
381         else
382         {
383             inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
384         }
385     }
386     /* Check whether we need to do bondeds or correct for exclusions */
387     if (fr->bMolPBC &&
388         ((flags & GMX_FORCE_BONDED)
389          || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
390     {
391         /* Since all atoms are in the rectangular or triclinic unit-cell,
392          * only single box vector shifts (2 in x) are required.
393          */
394         set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
395     }
396     debug_gmx();
397
398     if (flags & GMX_FORCE_BONDED)
399     {
400         wallcycle_sub_start(wcycle, ewcsBONDED);
401         calc_bonds(fplog,cr->ms,
402                    idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
403                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
404                    flags,
405                    fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
406
407         /* Check if we have to determine energy differences
408          * at foreign lambda's.
409          */
410         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
411             idef->ilsort != ilsortNO_FE)
412         {
413             if (idef->ilsort != ilsortFE_SORTED)
414             {
415                 gmx_incons("The bonded interactions are not sorted for free energy");
416             }
417             init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
418
419             for(i=0; i<enerd->n_lambda; i++)
420             {
421                 reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
422                 for (j=0;j<efptNR;j++)
423                 {
424                     lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
425                 }
426                 calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
427                                   fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
428                 sum_epot(&ir->opts,&ed_lam);
429                 enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
430             }
431             destroy_enerdata(&ed_lam);
432         }
433         debug_gmx();
434
435         wallcycle_sub_stop(wcycle, ewcsBONDED);
436     }
437
438     where();
439
440     *cycles_pme = 0;
441     if (EEL_FULL(fr->eeltype))
442     {
443         bSB = (ir->nwall == 2);
444         if (bSB)
445         {
446             copy_mat(box,boxs);
447             svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
448             box_size[ZZ] *= ir->wall_ewald_zfac;
449         }
450
451         clear_mat(fr->vir_el_recip);
452
453         if (fr->bEwald)
454         {
455             Vcorr = 0;
456             dvdl  = 0;
457
458             /* With the Verlet scheme exclusion forces are calculated
459              * in the non-bonded kernel.
460              */
461             /* The TPI molecule does not have exclusions with the rest
462              * of the system and no intra-molecular PME grid contributions
463              * will be calculated in gmx_pme_calc_energy.
464              */
465             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
466                 ir->ewald_geometry != eewg3D ||
467                 ir->epsilon_surface != 0)
468             {
469                 int nthreads,t;
470
471                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
472
473                 if (fr->n_tpi > 0)
474                 {
475                     gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
476                 }
477
478                 nthreads = gmx_omp_nthreads_get(emntBonded);
479 #pragma omp parallel for num_threads(nthreads) schedule(static)
480                 for(t=0; t<nthreads; t++)
481                 {
482                     int s,e,i;
483                     rvec *fnv;
484                     tensor *vir;
485                     real *Vcorrt,*dvdlt;
486                     if (t == 0)
487                     {
488                         fnv    = fr->f_novirsum;
489                         vir    = &fr->vir_el_recip;
490                         Vcorrt = &Vcorr;
491                         dvdlt  = &dvdl;
492                     }
493                     else
494                     {
495                         fnv    = fr->f_t[t].f;
496                         vir    = &fr->f_t[t].vir;
497                         Vcorrt = &fr->f_t[t].Vcorr;
498                         dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
499                         for(i=0; i<fr->natoms_force; i++)
500                         {
501                             clear_rvec(fnv[i]);
502                         }
503                         clear_mat(*vir);
504                     }
505                     *dvdlt = 0;
506                     *Vcorrt =
507                         ewald_LRcorrection(fplog,
508                                            fr->excl_load[t],fr->excl_load[t+1],
509                                            cr,t,fr,
510                                            md->chargeA,
511                                            md->nChargePerturbed ? md->chargeB : NULL,
512                                            ir->cutoff_scheme != ecutsVERLET,
513                                            excl,x,bSB ? boxs : box,mu_tot,
514                                            ir->ewald_geometry,
515                                            ir->epsilon_surface,
516                                            fnv,*vir,
517                                            lambda[efptCOUL],dvdlt);
518                 }
519                 if (nthreads > 1)
520                 {
521                     reduce_thread_forces(fr->natoms_force,fr->f_novirsum,
522                                          fr->vir_el_recip,
523                                          &Vcorr,efptCOUL,&dvdl,
524                                          nthreads,fr->f_t);
525                 }
526
527                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
528             }
529
530             if (fr->n_tpi == 0)
531             {
532                 Vcorr += ewald_charge_correction(cr,fr,lambda[efptCOUL],box,
533                                                  &dvdl,fr->vir_el_recip);
534             }
535
536             PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdl);
537             enerd->dvdl_lin[efptCOUL] += dvdl;
538         }
539
540         status = 0;
541         dvdl = 0;
542         switch (fr->eeltype)
543         {
544         case eelPME:
545         case eelPMESWITCH:
546         case eelPMEUSER:
547         case eelPMEUSERSWITCH:
548         case eelP3M_AD:
549             if (cr->duty & DUTY_PME)
550             {
551                 assert(fr->n_tpi >= 0);
552                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
553                 {
554                     pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
555                     if (flags & GMX_FORCE_FORCES)
556                     {
557                         pme_flags |= GMX_PME_CALC_F;
558                     }
559                     if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
560                     {
561                         pme_flags |= GMX_PME_CALC_ENER_VIR;
562                     }
563                     if (fr->n_tpi > 0)
564                     {
565                         /* We don't calculate f, but we do want the potential */
566                         pme_flags |= GMX_PME_CALC_POT;
567                     }
568                     wallcycle_start(wcycle,ewcPMEMESH);
569                     status = gmx_pme_do(fr->pmedata,
570                                         md->start,md->homenr - fr->n_tpi,
571                                         x,fr->f_novirsum,
572                                         md->chargeA,md->chargeB,
573                                         bSB ? boxs : box,cr,
574                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
575                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
576                                         nrnb,wcycle,
577                                         fr->vir_el_recip,fr->ewaldcoeff,
578                                         &Vlr,lambda[efptCOUL],&dvdl,
579                                         pme_flags);
580                     *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
581
582                     /* We should try to do as little computation after
583                      * this as possible, because parallel PME synchronizes
584                      * the nodes, so we want all load imbalance of the rest
585                      * of the force calculation to be before the PME call.
586                      * DD load balancing is done on the whole time of
587                      * the force call (without PME).
588                      */
589                 }
590                 if (fr->n_tpi > 0)
591                 {
592                     /* Determine the PME grid energy of the test molecule
593                      * with the PME grid potential of the other charges.
594                      */
595                     gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
596                                         x + md->homenr - fr->n_tpi,
597                                         md->chargeA + md->homenr - fr->n_tpi,
598                                         &Vlr);
599                 }
600                 PRINT_SEPDVDL("PME mesh",Vlr,dvdl);
601             }
602             else
603             {
604                 /* Energies and virial are obtained later from the PME nodes */
605                 /* but values have to be zeroed out here */
606                 Vlr=0.0;
607             }
608             break;
609         case eelEWALD:
610             Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
611                            md->chargeA,md->chargeB,
612                            box_size,cr,md->homenr,
613                            fr->vir_el_recip,fr->ewaldcoeff,
614                            lambda[efptCOUL],&dvdl,fr->ewald_table);
615             PRINT_SEPDVDL("Ewald long-range",Vlr,dvdl);
616             break;
617         default:
618             Vlr = 0;
619             gmx_fatal(FARGS,"No such electrostatics method implemented %s",
620                       eel_names[fr->eeltype]);
621         }
622         if (status != 0)
623         {
624             gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
625                       status,EELTYPE(fr->eeltype));
626                 }
627         enerd->dvdl_lin[efptCOUL] += dvdl;
628         enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
629         if (debug)
630         {
631             fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
632                     Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
633             pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
634             pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
635         }
636     }
637     else
638     {
639         if (EEL_RF(fr->eeltype))
640         {
641             /* With the Verlet scheme exclusion forces are calculated
642              * in the non-bonded kernel.
643              */
644             if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
645             {
646                 dvdl = 0;
647                 enerd->term[F_RF_EXCL] =
648                     RF_excl_correction(fplog,fr,graph,md,excl,x,f,
649                                        fr->fshift,&pbc,lambda[efptCOUL],&dvdl);
650             }
651
652             enerd->dvdl_lin[efptCOUL] += dvdl;
653             PRINT_SEPDVDL("RF exclusion correction",
654                           enerd->term[F_RF_EXCL],dvdl);
655         }
656     }
657     where();
658     debug_gmx();
659
660     if (debug)
661     {
662         print_nrnb(debug,nrnb);
663     }
664     debug_gmx();
665
666 #ifdef GMX_MPI
667     if (TAKETIME)
668     {
669         t2=MPI_Wtime();
670         MPI_Barrier(cr->mpi_comm_mygroup);
671         t3=MPI_Wtime();
672         fr->t_wait += t3-t2;
673         if (fr->timesteps == 11)
674         {
675             fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
676                     cr->nodeid, gmx_step_str(fr->timesteps,buf),
677                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
678                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
679         }
680         fr->timesteps++;
681     }
682 #endif
683
684     if (debug)
685     {
686         pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
687     }
688
689 }
690
691 void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
692 {
693     int i,n2;
694
695     for(i=0; i<F_NRE; i++)
696     {
697         enerd->term[i] = 0;
698     }
699
700
701     for(i=0; i<efptNR; i++) {
702         enerd->dvdl_lin[i]  = 0;
703         enerd->dvdl_nonlin[i]  = 0;
704     }
705
706     n2=ngener*ngener;
707     if (debug)
708     {
709         fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
710     }
711     enerd->grpp.nener = n2;
712     for(i=0; (i<egNR); i++)
713     {
714         snew(enerd->grpp.ener[i],n2);
715     }
716
717     if (n_lambda)
718     {
719         enerd->n_lambda = 1 + n_lambda;
720         snew(enerd->enerpart_lambda,enerd->n_lambda);
721     }
722     else
723     {
724         enerd->n_lambda = 0;
725     }
726 }
727
728 void destroy_enerdata(gmx_enerdata_t *enerd)
729 {
730     int i;
731
732     for(i=0; (i<egNR); i++)
733     {
734         sfree(enerd->grpp.ener[i]);
735     }
736
737     if (enerd->n_lambda)
738     {
739         sfree(enerd->enerpart_lambda);
740     }
741 }
742
743 static real sum_v(int n,real v[])
744 {
745   real t;
746   int  i;
747
748   t = 0.0;
749   for(i=0; (i<n); i++)
750     t = t + v[i];
751
752   return t;
753 }
754
755 void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd)
756 {
757   gmx_grppairener_t *grpp;
758   real *epot;
759   int i;
760
761   grpp = &enerd->grpp;
762   epot = enerd->term;
763
764   /* Accumulate energies */
765   epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
766   epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
767   epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
768   epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
769   epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
770   epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
771   /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
772   epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
773
774 /* lattice part of LR doesnt belong to any group
775  * and has been added earlier
776  */
777   epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
778   epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
779
780   epot[F_EPOT] = 0;
781   for(i=0; (i<F_EPOT); i++)
782   {
783       if (i != F_DISRESVIOL && i != F_ORIRESDEV)
784       {
785           epot[F_EPOT] += epot[i];
786       }
787   }
788 }
789
790 void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
791 {
792     int i,j,index;
793     double dlam;
794
795     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
796     enerd->term[F_DVDL] = 0.0;
797     for (i=0;i<efptNR;i++)
798     {
799         if (fepvals->separate_dvdl[i])
800         {
801             /* could this be done more readably/compactly? */
802             switch (i) {
803             case (efptCOUL):
804                 index = F_DVDL_COUL;
805                 break;
806             case (efptVDW):
807                 index = F_DVDL_VDW;
808                 break;
809             case (efptBONDED):
810                 index = F_DVDL_BONDED;
811                 break;
812             case (efptRESTRAINT):
813                 index = F_DVDL_RESTRAINT;
814                 break;
815             case (efptMASS):
816                 index = F_DKDL;
817                 break;
818             default:
819                 index = F_DVDL;
820                 break;
821             }
822             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
823             if (debug)
824             {
825                 fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
826                         efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
827             }
828         }
829         else
830         {
831             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
832             if (debug)
833             {
834                 fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
835                         efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
836             }
837         }
838     }
839
840     /* Notes on the foreign lambda free energy difference evaluation:
841      * Adding the potential and ekin terms that depend linearly on lambda
842      * as delta lam * dvdl to the energy differences is exact.
843      * For the constraints this is not exact, but we have no other option
844      * without literally changing the lengths and reevaluating the energies at each step.
845      * (try to remedy this post 4.6 - MRS)
846      * For the non-bonded LR term we assume that the soft-core (if present)
847      * no longer affects the energy beyond the short-range cut-off,
848      * which is a very good approximation (except for exotic settings).
849      * (investigate how to overcome this post 4.6 - MRS)
850      */
851
852     for(i=0; i<fepvals->n_lambda; i++)
853     {                                         /* note we are iterating over fepvals here!
854                                                  For the current lam, dlam = 0 automatically,
855                                                  so we don't need to add anything to the
856                                                  enerd->enerpart_lambda[0] */
857
858         /* we don't need to worry about dvdl contributions to the current lambda, because
859            it's automatically zero */
860
861         /* first kinetic energy term */
862         dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
863
864         enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
865
866         for (j=0;j<efptNR;j++)
867         {
868             if (j==efptMASS) {continue;} /* no other mass term to worry about */
869
870             dlam = (fepvals->all_lambda[j][i]-lambda[j]);
871             enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
872             if (debug)
873             {
874                 fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
875                         fepvals->all_lambda[j][i],efpt_names[j],
876                         (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
877                         dlam,enerd->dvdl_lin[j]);
878             }
879         }
880     }
881 }
882
883 void reset_enerdata(t_grpopts *opts,
884                     t_forcerec *fr,gmx_bool bNS,
885                     gmx_enerdata_t *enerd,
886                     gmx_bool bMaster)
887 {
888     gmx_bool bKeepLR;
889     int  i,j;
890
891     /* First reset all energy components, except for the long range terms
892      * on the master at non neighbor search steps, since the long range
893      * terms have already been summed at the last neighbor search step.
894      */
895     bKeepLR = (fr->bTwinRange && !bNS);
896     for(i=0; (i<egNR); i++) {
897         if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
898             for(j=0; (j<enerd->grpp.nener); j++)
899                 enerd->grpp.ener[i][j] = 0.0;
900         }
901     }
902     for (i=0;i<efptNR;i++)
903     {
904         enerd->dvdl_lin[i]    = 0.0;
905         enerd->dvdl_nonlin[i] = 0.0;
906     }
907
908     /* Normal potential energy components */
909     for(i=0; (i<=F_EPOT); i++) {
910         enerd->term[i] = 0.0;
911     }
912     /* Initialize the dVdlambda term with the long range contribution */
913     /* Initialize the dvdl term with the long range contribution */
914     enerd->term[F_DVDL]            = 0.0;
915     enerd->term[F_DVDL_COUL]       = 0.0;
916     enerd->term[F_DVDL_VDW]        = 0.0;
917     enerd->term[F_DVDL_BONDED]     = 0.0;
918     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
919     enerd->term[F_DKDL]            = 0.0;
920     if (enerd->n_lambda > 0)
921     {
922         for(i=0; i<enerd->n_lambda; i++)
923         {
924             enerd->enerpart_lambda[i] = 0.0;
925         }
926     }
927 }