Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / mdlib / force.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROwing Monsters And Cloning Shrimps
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41 #include <string.h>
42 #include <assert.h>
43 #include "sysstuff.h"
44 #include "typedefs.h"
45 #include "macros.h"
46 #include "smalloc.h"
47 #include "macros.h"
48 #include "physics.h"
49 #include "force.h"
50 #include "nonbonded.h"
51 #include "names.h"
52 #include "network.h"
53 #include "pbc.h"
54 #include "ns.h"
55 #include "nrnb.h"
56 #include "bondf.h"
57 #include "mshift.h"
58 #include "txtdump.h"
59 #include "coulomb.h"
60 #include "pme.h"
61 #include "mdrun.h"
62 #include "domdec.h"
63 #include "partdec.h"
64 #include "qmmm.h"
65 #include "gmx_omp_nthreads.h"
66
67
68 void ns(FILE *fp,
69         t_forcerec *fr,
70         rvec       x[],
71         matrix     box,
72         gmx_groups_t *groups,
73         t_grpopts  *opts,
74         gmx_localtop_t *top,
75         t_mdatoms  *md,
76         t_commrec  *cr,
77         t_nrnb     *nrnb,
78         real       *lambda,
79         real       *dvdlambda,
80         gmx_grppairener_t *grppener,
81         gmx_bool       bFillGrid,
82         gmx_bool       bDoLongRangeNS)
83 {
84   char   *ptr;
85   int    nsearch;
86
87
88   if (!fr->ns.nblist_initialized)
89   {
90       init_neighbor_list(fp, fr, md->homenr);
91   }
92
93   if (fr->bTwinRange)
94     fr->nlr=0;
95
96     nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
97                                 lambda,dvdlambda,grppener,
98                                 bFillGrid,bDoLongRangeNS);
99   if (debug)
100     fprintf(debug,"nsearch = %d\n",nsearch);
101
102   /* Check whether we have to do dynamic load balancing */
103   /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
104     count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
105     &(top->idef),opts->ngener);
106   */
107   if (fr->ns.dump_nl > 0)
108     dump_nblist(fp,cr,fr,fr->ns.dump_nl);
109 }
110
111 static void reduce_thread_forces(int n,rvec *f,
112                                  tensor vir,
113                                  real *Vcorr,
114                                  int efpt_ind,real *dvdl,
115                                  int nthreads,f_thread_t *f_t)
116 {
117     int t,i;
118
119     /* This reduction can run over any number of threads */
120 #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
121     for(i=0; i<n; i++)
122     {
123         for(t=1; t<nthreads; t++)
124         {
125             rvec_inc(f[i],f_t[t].f[i]);
126         }
127     }
128     for(t=1; t<nthreads; t++)
129     {
130         *Vcorr += f_t[t].Vcorr;
131         *dvdl  += f_t[t].dvdl[efpt_ind];
132         m_add(vir,f_t[t].vir,vir);
133     }
134 }
135
136 void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
137                        t_forcerec *fr,      t_inputrec *ir,
138                        t_idef     *idef,    t_commrec  *cr,
139                        t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
140                        t_mdatoms  *md,
141                        t_grpopts  *opts,
142                        rvec       x[],      history_t  *hist,
143                        rvec       f[],
144                        rvec       f_longrange[],
145                        gmx_enerdata_t *enerd,
146                        t_fcdata   *fcd,
147                        gmx_mtop_t     *mtop,
148                        gmx_localtop_t *top,
149                        gmx_genborn_t *born,
150                        t_atomtypes *atype,
151                        gmx_bool       bBornRadii,
152                        matrix     box,
153                        t_lambda   *fepvals,
154                        real       *lambda,
155                        t_graph    *graph,
156                        t_blocka   *excl,
157                        rvec       mu_tot[],
158                        int        flags,
159                        float      *cycles_pme)
160 {
161     int     i,j,status;
162     int     donb_flags;
163     gmx_bool    bDoEpot,bSepDVDL,bSB;
164     int     pme_flags;
165     matrix  boxs;
166     rvec    box_size;
167     real    Vsr,Vlr,Vcorr=0;
168     t_pbc   pbc;
169     real    dvdgb;
170     char    buf[22];
171     gmx_enerdata_t ed_lam;
172     double  clam_i,vlam_i;
173     real    dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
174     real    dvdlsum;
175
176 #ifdef GMX_MPI
177     double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
178 #endif
179
180 #define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
181
182
183     set_pbc(&pbc,fr->ePBC,box);
184
185     /* reset free energy components */
186     for (i=0;i<efptNR;i++)
187     {
188         dvdl_nb[i]  = 0;
189         dvdl_dum[i] = 0;
190     }
191
192     /* Reset box */
193     for(i=0; (i<DIM); i++)
194     {
195         box_size[i]=box[i][i];
196     }
197
198     bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
199     debug_gmx();
200
201     /* do QMMM first if requested */
202     if(fr->bQMMM)
203     {
204         enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
205     }
206
207     if (bSepDVDL)
208     {
209         fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
210                 gmx_step_str(step,buf),cr->nodeid);
211     }
212
213     /* Call the short range functions all in one go. */
214
215 #ifdef GMX_MPI
216     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
217 #define TAKETIME FALSE
218     if (TAKETIME)
219     {
220         MPI_Barrier(cr->mpi_comm_mygroup);
221         t0=MPI_Wtime();
222     }
223 #endif
224
225     if (ir->nwall)
226     {
227         /* foreign lambda component for walls */
228         dvdl = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
229                         enerd->grpp.ener[egLJSR],nrnb);
230         PRINT_SEPDVDL("Walls",0.0,dvdl);
231         enerd->dvdl_lin[efptVDW] += dvdl;
232     }
233
234         /* If doing GB, reset dvda and calculate the Born radii */
235         if (ir->implicit_solvent)
236         {
237         wallcycle_sub_start(wcycle, ewcsNONBONDED);
238
239                 for(i=0;i<born->nr;i++)
240                 {
241                         fr->dvda[i]=0;
242                 }
243
244                 if(bBornRadii)
245                 {
246                         calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
247                 }
248
249         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
250         }
251
252     where();
253     if (flags & GMX_FORCE_NONBONDED)
254     {
255         donb_flags = 0;
256         /* Add short-range interactions */
257         donb_flags |= GMX_NONBONDED_DO_SR;
258
259         if (flags & GMX_FORCE_FORCES)
260         {
261             donb_flags |= GMX_NONBONDED_DO_FORCE;
262         }
263         if (flags & GMX_FORCE_ENERGY)
264         {
265             donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
266         }
267         if (flags & GMX_FORCE_DO_LR)
268         {
269             donb_flags |= GMX_NONBONDED_DO_LR;
270         }
271
272         wallcycle_sub_start(wcycle, ewcsNONBONDED);
273         do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
274                     &enerd->grpp,box_size,nrnb,
275                     lambda,dvdl_nb,-1,-1,donb_flags);
276         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
277     }
278
279     /* If we do foreign lambda and we have soft-core interactions
280      * we have to recalculate the (non-linear) energies contributions.
281      */
282     if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
283     {
284         wallcycle_sub_start(wcycle, ewcsNONBONDED);
285         init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
286
287         for(i=0; i<enerd->n_lambda; i++)
288         {
289             for (j=0;j<efptNR;j++)
290             {
291                 lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
292             }
293             reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
294             do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
295                          &(ed_lam.grpp), box_size,nrnb,
296                          lam_i,dvdl_dum,-1,-1,
297                          GMX_NONBONDED_DO_FOREIGNLAMBDA | GMX_NONBONDED_DO_SR);
298             sum_epot(&ir->opts,&ed_lam);
299             enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
300         }
301         destroy_enerdata(&ed_lam);
302         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
303     }
304     where();
305
306         /* If we are doing GB, calculate bonded forces and apply corrections
307          * to the solvation forces */
308     /* MRS: Eventually, many need to include free energy contribution here! */
309         if (ir->implicit_solvent)
310     {
311                 calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
312                        ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
313         wallcycle_sub_stop(wcycle, ewcsBONDED);
314     }
315
316 #ifdef GMX_MPI
317     if (TAKETIME)
318     {
319         t1=MPI_Wtime();
320         fr->t_fnbf += t1-t0;
321     }
322 #endif
323
324     if (fepvals->sc_alpha!=0)
325     {
326         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
327     }
328     else
329     {
330         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
331     }
332
333     if (fepvals->sc_alpha!=0)
334
335         /* even though coulomb part is linear, we already added it, beacuse we
336            need to go through the vdw calculation anyway */
337     {
338         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
339     }
340     else
341     {
342         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
343     }
344
345     Vsr = 0;
346     if (bSepDVDL)
347     {
348         for(i=0; i<enerd->grpp.nener; i++)
349         {
350             Vsr +=
351                 (fr->bBHAM ?
352                  enerd->grpp.ener[egBHAMSR][i] :
353                  enerd->grpp.ener[egLJSR][i])
354                 + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
355         }
356         dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
357         PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
358     }
359     debug_gmx();
360
361
362     if (debug)
363     {
364         pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
365     }
366
367     /* Shift the coordinates. Must be done before bonded forces and PPPM,
368      * but is also necessary for SHAKE and update, therefore it can NOT
369      * go when no bonded forces have to be evaluated.
370      */
371
372     /* Here sometimes we would not need to shift with NBFonly,
373      * but we do so anyhow for consistency of the returned coordinates.
374      */
375     if (graph)
376     {
377         shift_self(graph,box,x);
378         if (TRICLINIC(box))
379         {
380             inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
381         }
382         else
383         {
384             inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
385         }
386     }
387     /* Check whether we need to do bondeds or correct for exclusions */
388     if (fr->bMolPBC &&
389         ((flags & GMX_FORCE_BONDED)
390          || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
391     {
392         /* Since all atoms are in the rectangular or triclinic unit-cell,
393          * only single box vector shifts (2 in x) are required.
394          */
395         set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
396     }
397     debug_gmx();
398
399     if (flags & GMX_FORCE_BONDED)
400     {
401         wallcycle_sub_start(wcycle, ewcsBONDED);
402         calc_bonds(fplog,cr->ms,
403                    idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
404                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
405                    flags,
406                    fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
407
408         /* Check if we have to determine energy differences
409          * at foreign lambda's.
410          */
411         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
412             idef->ilsort != ilsortNO_FE)
413         {
414             if (idef->ilsort != ilsortFE_SORTED)
415             {
416                 gmx_incons("The bonded interactions are not sorted for free energy");
417             }
418             init_enerdata(mtop->groups.grps[egcENER].nr,fepvals->n_lambda,&ed_lam);
419
420             for(i=0; i<enerd->n_lambda; i++)
421             {
422                 reset_enerdata(&ir->opts,fr,TRUE,&ed_lam,FALSE);
423                 for (j=0;j<efptNR;j++)
424                 {
425                     lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
426                 }
427                 calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&ed_lam,nrnb,lam_i,md,
428                                   fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
429                 sum_epot(&ir->opts,&ed_lam);
430                 enerd->enerpart_lambda[i] += ed_lam.term[F_EPOT];
431             }
432             destroy_enerdata(&ed_lam);
433         }
434         debug_gmx();
435
436         wallcycle_sub_stop(wcycle, ewcsBONDED);
437     }
438
439     where();
440
441     *cycles_pme = 0;
442     if (EEL_FULL(fr->eeltype))
443     {
444         bSB = (ir->nwall == 2);
445         if (bSB)
446         {
447             copy_mat(box,boxs);
448             svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
449             box_size[ZZ] *= ir->wall_ewald_zfac;
450         }
451
452         clear_mat(fr->vir_el_recip);
453
454         if (fr->bEwald)
455         {
456             Vcorr = 0;
457             dvdl  = 0;
458
459             /* With the Verlet scheme exclusion forces are calculated
460              * in the non-bonded kernel.
461              */
462             /* The TPI molecule does not have exclusions with the rest
463              * of the system and no intra-molecular PME grid contributions
464              * will be calculated in gmx_pme_calc_energy.
465              */
466             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
467                 ir->ewald_geometry != eewg3D ||
468                 ir->epsilon_surface != 0)
469             {
470                 int nthreads,t;
471
472                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
473
474                 if (fr->n_tpi > 0)
475                 {
476                     gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
477                 }
478
479                 nthreads = gmx_omp_nthreads_get(emntBonded);
480 #pragma omp parallel for num_threads(nthreads) schedule(static)
481                 for(t=0; t<nthreads; t++)
482                 {
483                     int s,e,i;
484                     rvec *fnv;
485                     tensor *vir;
486                     real *Vcorrt,*dvdlt;
487                     if (t == 0)
488                     {
489                         fnv    = fr->f_novirsum;
490                         vir    = &fr->vir_el_recip;
491                         Vcorrt = &Vcorr;
492                         dvdlt  = &dvdl;
493                     }
494                     else
495                     {
496                         fnv    = fr->f_t[t].f;
497                         vir    = &fr->f_t[t].vir;
498                         Vcorrt = &fr->f_t[t].Vcorr;
499                         dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
500                         for(i=0; i<fr->natoms_force; i++)
501                         {
502                             clear_rvec(fnv[i]);
503                         }
504                         clear_mat(*vir);
505                     }
506                     *dvdlt = 0;
507                     *Vcorrt =
508                         ewald_LRcorrection(fplog,
509                                            fr->excl_load[t],fr->excl_load[t+1],
510                                            cr,t,fr,
511                                            md->chargeA,
512                                            md->nChargePerturbed ? md->chargeB : NULL,
513                                            ir->cutoff_scheme != ecutsVERLET,
514                                            excl,x,bSB ? boxs : box,mu_tot,
515                                            ir->ewald_geometry,
516                                            ir->epsilon_surface,
517                                            fnv,*vir,
518                                            lambda[efptCOUL],dvdlt);
519                 }
520                 if (nthreads > 1)
521                 {
522                     reduce_thread_forces(fr->natoms_force,fr->f_novirsum,
523                                          fr->vir_el_recip,
524                                          &Vcorr,efptCOUL,&dvdl,
525                                          nthreads,fr->f_t);
526                 }
527
528                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
529             }
530
531             if (fr->n_tpi == 0)
532             {
533                 Vcorr += ewald_charge_correction(cr,fr,lambda[efptCOUL],box,
534                                                  &dvdl,fr->vir_el_recip);
535             }
536
537             PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdl);
538             enerd->dvdl_lin[efptCOUL] += dvdl;
539         }
540
541         status = 0;
542         Vlr  = 0;
543         dvdl = 0;
544         switch (fr->eeltype)
545         {
546         case eelPME:
547         case eelPMESWITCH:
548         case eelPMEUSER:
549         case eelPMEUSERSWITCH:
550         case eelP3M_AD:
551             if (cr->duty & DUTY_PME)
552             {
553                 assert(fr->n_tpi >= 0);
554                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
555                 {
556                     pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
557                     if (flags & GMX_FORCE_FORCES)
558                     {
559                         pme_flags |= GMX_PME_CALC_F;
560                     }
561                     if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
562                     {
563                         pme_flags |= GMX_PME_CALC_ENER_VIR;
564                     }
565                     if (fr->n_tpi > 0)
566                     {
567                         /* We don't calculate f, but we do want the potential */
568                         pme_flags |= GMX_PME_CALC_POT;
569                     }
570                     wallcycle_start(wcycle,ewcPMEMESH);
571                     status = gmx_pme_do(fr->pmedata,
572                                         md->start,md->homenr - fr->n_tpi,
573                                         x,fr->f_novirsum,
574                                         md->chargeA,md->chargeB,
575                                         bSB ? boxs : box,cr,
576                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
577                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
578                                         nrnb,wcycle,
579                                         fr->vir_el_recip,fr->ewaldcoeff,
580                                         &Vlr,lambda[efptCOUL],&dvdl,
581                                         pme_flags);
582                     *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
583
584                     /* We should try to do as little computation after
585                      * this as possible, because parallel PME synchronizes
586                      * the nodes, so we want all load imbalance of the rest
587                      * of the force calculation to be before the PME call.
588                      * DD load balancing is done on the whole time of
589                      * the force call (without PME).
590                      */
591                 }
592                 if (fr->n_tpi > 0)
593                 {
594                     /* Determine the PME grid energy of the test molecule
595                      * with the PME grid potential of the other charges.
596                      */
597                     gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
598                                         x + md->homenr - fr->n_tpi,
599                                         md->chargeA + md->homenr - fr->n_tpi,
600                                         &Vlr);
601                 }
602                 PRINT_SEPDVDL("PME mesh",Vlr,dvdl);
603             }
604             break;
605         case eelEWALD:
606             Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
607                            md->chargeA,md->chargeB,
608                            box_size,cr,md->homenr,
609                            fr->vir_el_recip,fr->ewaldcoeff,
610                            lambda[efptCOUL],&dvdl,fr->ewald_table);
611             PRINT_SEPDVDL("Ewald long-range",Vlr,dvdl);
612             break;
613         default:
614             gmx_fatal(FARGS,"No such electrostatics method implemented %s",
615                       eel_names[fr->eeltype]);
616         }
617         if (status != 0)
618         {
619             gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
620                       status,EELTYPE(fr->eeltype));
621                 }
622         /* Note that with separate PME nodes we get the real energies later */
623         enerd->dvdl_lin[efptCOUL] += dvdl;
624         enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
625         if (debug)
626         {
627             fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
628                     Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
629             pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
630             pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
631         }
632     }
633     else
634     {
635         if (EEL_RF(fr->eeltype))
636         {
637             /* With the Verlet scheme exclusion forces are calculated
638              * in the non-bonded kernel.
639              */
640             if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
641             {
642                 dvdl = 0;
643                 enerd->term[F_RF_EXCL] =
644                     RF_excl_correction(fplog,fr,graph,md,excl,x,f,
645                                        fr->fshift,&pbc,lambda[efptCOUL],&dvdl);
646             }
647
648             enerd->dvdl_lin[efptCOUL] += dvdl;
649             PRINT_SEPDVDL("RF exclusion correction",
650                           enerd->term[F_RF_EXCL],dvdl);
651         }
652     }
653     where();
654     debug_gmx();
655
656     if (debug)
657     {
658         print_nrnb(debug,nrnb);
659     }
660     debug_gmx();
661
662 #ifdef GMX_MPI
663     if (TAKETIME)
664     {
665         t2=MPI_Wtime();
666         MPI_Barrier(cr->mpi_comm_mygroup);
667         t3=MPI_Wtime();
668         fr->t_wait += t3-t2;
669         if (fr->timesteps == 11)
670         {
671             fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
672                     cr->nodeid, gmx_step_str(fr->timesteps,buf),
673                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
674                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
675         }
676         fr->timesteps++;
677     }
678 #endif
679
680     if (debug)
681     {
682         pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
683     }
684
685 }
686
687 void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
688 {
689     int i,n2;
690
691     for(i=0; i<F_NRE; i++)
692     {
693         enerd->term[i] = 0;
694     }
695
696
697     for(i=0; i<efptNR; i++) {
698         enerd->dvdl_lin[i]  = 0;
699         enerd->dvdl_nonlin[i]  = 0;
700     }
701
702     n2=ngener*ngener;
703     if (debug)
704     {
705         fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
706     }
707     enerd->grpp.nener = n2;
708     for(i=0; (i<egNR); i++)
709     {
710         snew(enerd->grpp.ener[i],n2);
711     }
712
713     if (n_lambda)
714     {
715         enerd->n_lambda = 1 + n_lambda;
716         snew(enerd->enerpart_lambda,enerd->n_lambda);
717     }
718     else
719     {
720         enerd->n_lambda = 0;
721     }
722 }
723
724 void destroy_enerdata(gmx_enerdata_t *enerd)
725 {
726     int i;
727
728     for(i=0; (i<egNR); i++)
729     {
730         sfree(enerd->grpp.ener[i]);
731     }
732
733     if (enerd->n_lambda)
734     {
735         sfree(enerd->enerpart_lambda);
736     }
737 }
738
739 static real sum_v(int n,real v[])
740 {
741   real t;
742   int  i;
743
744   t = 0.0;
745   for(i=0; (i<n); i++)
746     t = t + v[i];
747
748   return t;
749 }
750
751 void sum_epot(t_grpopts *opts,gmx_enerdata_t *enerd)
752 {
753   gmx_grppairener_t *grpp;
754   real *epot;
755   int i;
756
757   grpp = &enerd->grpp;
758   epot = enerd->term;
759
760   /* Accumulate energies */
761   epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
762   epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
763   epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
764   epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
765   epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
766   epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
767   /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
768   epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
769
770 /* lattice part of LR doesnt belong to any group
771  * and has been added earlier
772  */
773   epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
774   epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
775
776   epot[F_EPOT] = 0;
777   for(i=0; (i<F_EPOT); i++)
778   {
779       if (i != F_DISRESVIOL && i != F_ORIRESDEV)
780       {
781           epot[F_EPOT] += epot[i];
782       }
783   }
784 }
785
786 void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
787 {
788     int i,j,index;
789     double dlam;
790
791     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
792     enerd->term[F_DVDL] = 0.0;
793     for (i=0;i<efptNR;i++)
794     {
795         if (fepvals->separate_dvdl[i])
796         {
797             /* could this be done more readably/compactly? */
798             switch (i) {
799             case (efptCOUL):
800                 index = F_DVDL_COUL;
801                 break;
802             case (efptVDW):
803                 index = F_DVDL_VDW;
804                 break;
805             case (efptBONDED):
806                 index = F_DVDL_BONDED;
807                 break;
808             case (efptRESTRAINT):
809                 index = F_DVDL_RESTRAINT;
810                 break;
811             case (efptMASS):
812                 index = F_DKDL;
813                 break;
814             default:
815                 index = F_DVDL;
816                 break;
817             }
818             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
819             if (debug)
820             {
821                 fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
822                         efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
823             }
824         }
825         else
826         {
827             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
828             if (debug)
829             {
830                 fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
831                         efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
832             }
833         }
834     }
835
836     /* Notes on the foreign lambda free energy difference evaluation:
837      * Adding the potential and ekin terms that depend linearly on lambda
838      * as delta lam * dvdl to the energy differences is exact.
839      * For the constraints this is not exact, but we have no other option
840      * without literally changing the lengths and reevaluating the energies at each step.
841      * (try to remedy this post 4.6 - MRS)
842      * For the non-bonded LR term we assume that the soft-core (if present)
843      * no longer affects the energy beyond the short-range cut-off,
844      * which is a very good approximation (except for exotic settings).
845      * (investigate how to overcome this post 4.6 - MRS)
846      */
847
848     for(i=0; i<fepvals->n_lambda; i++)
849     {                                         /* note we are iterating over fepvals here!
850                                                  For the current lam, dlam = 0 automatically,
851                                                  so we don't need to add anything to the
852                                                  enerd->enerpart_lambda[0] */
853
854         /* we don't need to worry about dvdl contributions to the current lambda, because
855            it's automatically zero */
856
857         /* first kinetic energy term */
858         dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
859
860         enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
861
862         for (j=0;j<efptNR;j++)
863         {
864             if (j==efptMASS) {continue;} /* no other mass term to worry about */
865
866             dlam = (fepvals->all_lambda[j][i]-lambda[j]);
867             enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
868             if (debug)
869             {
870                 fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
871                         fepvals->all_lambda[j][i],efpt_names[j],
872                         (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
873                         dlam,enerd->dvdl_lin[j]);
874             }
875         }
876     }
877 }
878
879 void reset_enerdata(t_grpopts *opts,
880                     t_forcerec *fr,gmx_bool bNS,
881                     gmx_enerdata_t *enerd,
882                     gmx_bool bMaster)
883 {
884     gmx_bool bKeepLR;
885     int  i,j;
886
887     /* First reset all energy components, except for the long range terms
888      * on the master at non neighbor search steps, since the long range
889      * terms have already been summed at the last neighbor search step.
890      */
891     bKeepLR = (fr->bTwinRange && !bNS);
892     for(i=0; (i<egNR); i++) {
893         if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
894             for(j=0; (j<enerd->grpp.nener); j++)
895                 enerd->grpp.ener[i][j] = 0.0;
896         }
897     }
898     for (i=0;i<efptNR;i++)
899     {
900         enerd->dvdl_lin[i]    = 0.0;
901         enerd->dvdl_nonlin[i] = 0.0;
902     }
903
904     /* Normal potential energy components */
905     for(i=0; (i<=F_EPOT); i++) {
906         enerd->term[i] = 0.0;
907     }
908     /* Initialize the dVdlambda term with the long range contribution */
909     /* Initialize the dvdl term with the long range contribution */
910     enerd->term[F_DVDL]            = 0.0;
911     enerd->term[F_DVDL_COUL]       = 0.0;
912     enerd->term[F_DVDL_VDW]        = 0.0;
913     enerd->term[F_DVDL_BONDED]     = 0.0;
914     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
915     enerd->term[F_DKDL]            = 0.0;
916     if (enerd->n_lambda > 0)
917     {
918         for(i=0; i<enerd->n_lambda; i++)
919         {
920             enerd->enerpart_lambda[i] = 0.0;
921         }
922     }
923 }