Merge release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / mdlib / force.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  *          GROningen MAchine for Chemical Simulations
9  *
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  *
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  *
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  *
31  * For more info, check our website at http://www.gromacs.org
32  *
33  * And Hey:
34  * GROwing Monsters And Cloning Shrimps
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41 #include <string.h>
42 #include <assert.h>
43 #include "sysstuff.h"
44 #include "typedefs.h"
45 #include "macros.h"
46 #include "smalloc.h"
47 #include "macros.h"
48 #include "physics.h"
49 #include "force.h"
50 #include "nonbonded.h"
51 #include "names.h"
52 #include "network.h"
53 #include "pbc.h"
54 #include "ns.h"
55 #include "nrnb.h"
56 #include "bondf.h"
57 #include "mshift.h"
58 #include "txtdump.h"
59 #include "coulomb.h"
60 #include "pme.h"
61 #include "mdrun.h"
62 #include "domdec.h"
63 #include "partdec.h"
64 #include "qmmm.h"
65 #include "gmx_omp_nthreads.h"
66
67
68 void ns(FILE *fp,
69         t_forcerec *fr,
70         rvec       x[],
71         matrix     box,
72         gmx_groups_t *groups,
73         t_grpopts  *opts,
74         gmx_localtop_t *top,
75         t_mdatoms  *md,
76         t_commrec  *cr,
77         t_nrnb     *nrnb,
78         real       *lambda,
79         real       *dvdlambda,
80         gmx_grppairener_t *grppener,
81         gmx_bool       bFillGrid,
82         gmx_bool       bDoLongRangeNS)
83 {
84   char   *ptr;
85   int    nsearch;
86
87
88   if (!fr->ns.nblist_initialized)
89   {
90       init_neighbor_list(fp, fr, md->homenr);
91   }
92
93   if (fr->bTwinRange)
94     fr->nlr=0;
95
96     nsearch = search_neighbours(fp,fr,x,box,top,groups,cr,nrnb,md,
97                                 lambda,dvdlambda,grppener,
98                                 bFillGrid,bDoLongRangeNS,TRUE);
99   if (debug)
100     fprintf(debug,"nsearch = %d\n",nsearch);
101
102   /* Check whether we have to do dynamic load balancing */
103   /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
104     count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
105     &(top->idef),opts->ngener);
106   */
107   if (fr->ns.dump_nl > 0)
108     dump_nblist(fp,cr,fr,fr->ns.dump_nl);
109 }
110
111 static void reduce_thread_forces(int n,rvec *f,
112                                  tensor vir,
113                                  real *Vcorr,
114                                  int efpt_ind,real *dvdl,
115                                  int nthreads,f_thread_t *f_t)
116 {
117     int t,i;
118
119     /* This reduction can run over any number of threads */
120 #pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static)
121     for(i=0; i<n; i++)
122     {
123         for(t=1; t<nthreads; t++)
124         {
125             rvec_inc(f[i],f_t[t].f[i]);
126         }
127     }
128     for(t=1; t<nthreads; t++)
129     {
130         *Vcorr += f_t[t].Vcorr;
131         *dvdl  += f_t[t].dvdl[efpt_ind];
132         m_add(vir,f_t[t].vir,vir);
133     }
134 }
135
136 void do_force_lowlevel(FILE       *fplog,   gmx_large_int_t step,
137                        t_forcerec *fr,      t_inputrec *ir,
138                        t_idef     *idef,    t_commrec  *cr,
139                        t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
140                        t_mdatoms  *md,
141                        t_grpopts  *opts,
142                        rvec       x[],      history_t  *hist,
143                        rvec       f[],
144                        rvec       f_longrange[],
145                        gmx_enerdata_t *enerd,
146                        t_fcdata   *fcd,
147                        gmx_mtop_t     *mtop,
148                        gmx_localtop_t *top,
149                        gmx_genborn_t *born,
150                        t_atomtypes *atype,
151                        gmx_bool       bBornRadii,
152                        matrix     box,
153                        t_lambda   *fepvals,
154                        real       *lambda,
155                        t_graph    *graph,
156                        t_blocka   *excl,
157                        rvec       mu_tot[],
158                        int        flags,
159                        float      *cycles_pme)
160 {
161     int     i,j,status;
162     int     donb_flags;
163     gmx_bool    bDoEpot,bSepDVDL,bSB;
164     int     pme_flags;
165     matrix  boxs;
166     rvec    box_size;
167     real    Vsr,Vlr,Vcorr=0;
168     t_pbc   pbc;
169     real    dvdgb;
170     char    buf[22];
171     double  clam_i,vlam_i;
172     real    dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR];
173     real    dvdlsum;
174
175 #ifdef GMX_MPI
176     double  t0=0.0,t1,t2,t3; /* time measurement for coarse load balancing */
177 #endif
178
179 #define PRINT_SEPDVDL(s,v,dvdlambda) if (bSepDVDL) fprintf(fplog,sepdvdlformat,s,v,dvdlambda);
180
181
182     set_pbc(&pbc,fr->ePBC,box);
183
184     /* reset free energy components */
185     for (i=0;i<efptNR;i++)
186     {
187         dvdl_nb[i]  = 0;
188         dvdl_dum[i] = 0;
189     }
190
191     /* Reset box */
192     for(i=0; (i<DIM); i++)
193     {
194         box_size[i]=box[i][i];
195     }
196
197     bSepDVDL=(fr->bSepDVDL && do_per_step(step,ir->nstlog));
198     debug_gmx();
199
200     /* do QMMM first if requested */
201     if(fr->bQMMM)
202     {
203         enerd->term[F_EQM] = calculate_QMMM(cr,x,f,fr,md);
204     }
205
206     if (bSepDVDL)
207     {
208         fprintf(fplog,"Step %s: non-bonded V and dVdl for node %d:\n",
209                 gmx_step_str(step,buf),cr->nodeid);
210     }
211
212     /* Call the short range functions all in one go. */
213
214 #ifdef GMX_MPI
215     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
216 #define TAKETIME FALSE
217     if (TAKETIME)
218     {
219         MPI_Barrier(cr->mpi_comm_mygroup);
220         t0=MPI_Wtime();
221     }
222 #endif
223
224     if (ir->nwall)
225     {
226         /* foreign lambda component for walls */
227         dvdl = do_walls(ir,fr,box,md,x,f,lambda[efptVDW],
228                         enerd->grpp.ener[egLJSR],nrnb);
229         PRINT_SEPDVDL("Walls",0.0,dvdl);
230         enerd->dvdl_lin[efptVDW] += dvdl;
231     }
232
233         /* If doing GB, reset dvda and calculate the Born radii */
234         if (ir->implicit_solvent)
235         {
236         wallcycle_sub_start(wcycle, ewcsNONBONDED);
237
238                 for(i=0;i<born->nr;i++)
239                 {
240                         fr->dvda[i]=0;
241                 }
242
243                 if(bBornRadii)
244                 {
245                         calc_gb_rad(cr,fr,ir,top,atype,x,&(fr->gblist),born,md,nrnb);
246                 }
247
248         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
249         }
250
251     where();
252     /* We only do non-bonded calculation with group scheme here, the verlet
253      * calls are done from do_force_cutsVERLET(). */
254     if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
255     {
256         donb_flags = 0;
257         /* Add short-range interactions */
258         donb_flags |= GMX_NONBONDED_DO_SR;
259
260         if (flags & GMX_FORCE_FORCES)
261         {
262             donb_flags |= GMX_NONBONDED_DO_FORCE;
263         }
264         if (flags & GMX_FORCE_ENERGY)
265         {
266             donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
267         }
268         if (flags & GMX_FORCE_DO_LR)
269         {
270             donb_flags |= GMX_NONBONDED_DO_LR;
271         }
272
273         wallcycle_sub_start(wcycle, ewcsNONBONDED);
274         do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
275                     &enerd->grpp,box_size,nrnb,
276                     lambda,dvdl_nb,-1,-1,donb_flags);
277
278         /* If we do foreign lambda and we have soft-core interactions
279          * we have to recalculate the (non-linear) energies contributions.
280          */
281         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
282         {
283             for(i=0; i<enerd->n_lambda; i++)
284             {
285                 for (j=0;j<efptNR;j++)
286                 {
287                     lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
288                 }
289                 reset_foreign_enerdata(enerd);
290                 do_nonbonded(cr,fr,x,f,f_longrange,md,excl,
291                              &(enerd->foreign_grpp),box_size,nrnb,
292                              lam_i,dvdl_dum,-1,-1,
293                              (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
294                 sum_epot(&ir->opts,&(enerd->foreign_grpp),enerd->foreign_term);
295                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
296             }
297         }
298         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
299         where();
300     }
301
302         /* If we are doing GB, calculate bonded forces and apply corrections
303          * to the solvation forces */
304     /* MRS: Eventually, many need to include free energy contribution here! */
305         if (ir->implicit_solvent)
306     {
307         wallcycle_sub_start(wcycle, ewcsBONDED);
308                 calc_gb_forces(cr,md,born,top,atype,x,f,fr,idef,
309                        ir->gb_algorithm,ir->sa_algorithm,nrnb,bBornRadii,&pbc,graph,enerd);
310         wallcycle_sub_stop(wcycle, ewcsBONDED);
311     }
312
313 #ifdef GMX_MPI
314     if (TAKETIME)
315     {
316         t1=MPI_Wtime();
317         fr->t_fnbf += t1-t0;
318     }
319 #endif
320
321     if (fepvals->sc_alpha!=0)
322     {
323         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
324     }
325     else
326     {
327         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
328     }
329
330     if (fepvals->sc_alpha!=0)
331
332         /* even though coulomb part is linear, we already added it, beacuse we
333            need to go through the vdw calculation anyway */
334     {
335         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
336     }
337     else
338     {
339         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
340     }
341
342     Vsr = 0;
343     if (bSepDVDL)
344     {
345         for(i=0; i<enerd->grpp.nener; i++)
346         {
347             Vsr +=
348                 (fr->bBHAM ?
349                  enerd->grpp.ener[egBHAMSR][i] :
350                  enerd->grpp.ener[egLJSR][i])
351                 + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i];
352         }
353         dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL];
354         PRINT_SEPDVDL("VdW and Coulomb SR particle-p.",Vsr,dvdlsum);
355     }
356     debug_gmx();
357
358
359     if (debug)
360     {
361         pr_rvecs(debug,0,"fshift after SR",fr->fshift,SHIFTS);
362     }
363
364     /* Shift the coordinates. Must be done before bonded forces and PPPM,
365      * but is also necessary for SHAKE and update, therefore it can NOT
366      * go when no bonded forces have to be evaluated.
367      */
368
369     /* Here sometimes we would not need to shift with NBFonly,
370      * but we do so anyhow for consistency of the returned coordinates.
371      */
372     if (graph)
373     {
374         shift_self(graph,box,x);
375         if (TRICLINIC(box))
376         {
377             inc_nrnb(nrnb,eNR_SHIFTX,2*graph->nnodes);
378         }
379         else
380         {
381             inc_nrnb(nrnb,eNR_SHIFTX,graph->nnodes);
382         }
383     }
384     /* Check whether we need to do bondeds or correct for exclusions */
385     if (fr->bMolPBC &&
386         ((flags & GMX_FORCE_BONDED)
387          || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype)))
388     {
389         /* Since all atoms are in the rectangular or triclinic unit-cell,
390          * only single box vector shifts (2 in x) are required.
391          */
392         set_pbc_dd(&pbc,fr->ePBC,cr->dd,TRUE,box);
393     }
394     debug_gmx();
395
396     if (flags & GMX_FORCE_BONDED)
397     {
398         wallcycle_sub_start(wcycle, ewcsBONDED);
399         calc_bonds(fplog,cr->ms,
400                    idef,x,hist,f,fr,&pbc,graph,enerd,nrnb,lambda,md,fcd,
401                    DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born,
402                    flags,
403                    fr->bSepDVDL && do_per_step(step,ir->nstlog),step);
404
405         /* Check if we have to determine energy differences
406          * at foreign lambda's.
407          */
408         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) &&
409             idef->ilsort != ilsortNO_FE)
410         {
411             if (idef->ilsort != ilsortFE_SORTED)
412             {
413                 gmx_incons("The bonded interactions are not sorted for free energy");
414             }
415             for(i=0; i<enerd->n_lambda; i++)
416             {
417                 reset_foreign_enerdata(enerd);
418                 for (j=0;j<efptNR;j++)
419                 {
420                     lam_i[j] = (i==0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
421                 }
422                 calc_bonds_lambda(fplog,idef,x,fr,&pbc,graph,&(enerd->foreign_grpp),enerd->foreign_term,nrnb,lam_i,md,
423                                   fcd,DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
424                 sum_epot(&ir->opts,&(enerd->foreign_grpp),enerd->foreign_term);
425                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
426             }
427         }
428         debug_gmx();
429
430         wallcycle_sub_stop(wcycle, ewcsBONDED);
431     }
432
433     where();
434
435     *cycles_pme = 0;
436     if (EEL_FULL(fr->eeltype))
437     {
438         bSB = (ir->nwall == 2);
439         if (bSB)
440         {
441             copy_mat(box,boxs);
442             svmul(ir->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
443             box_size[ZZ] *= ir->wall_ewald_zfac;
444         }
445
446         clear_mat(fr->vir_el_recip);
447
448         if (fr->bEwald)
449         {
450             Vcorr = 0;
451             dvdl  = 0;
452
453             /* With the Verlet scheme exclusion forces are calculated
454              * in the non-bonded kernel.
455              */
456             /* The TPI molecule does not have exclusions with the rest
457              * of the system and no intra-molecular PME grid contributions
458              * will be calculated in gmx_pme_calc_energy.
459              */
460             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
461                 ir->ewald_geometry != eewg3D ||
462                 ir->epsilon_surface != 0)
463             {
464                 int nthreads,t;
465
466                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
467
468                 if (fr->n_tpi > 0)
469                 {
470                     gmx_fatal(FARGS,"TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
471                 }
472
473                 nthreads = gmx_omp_nthreads_get(emntBonded);
474 #pragma omp parallel for num_threads(nthreads) schedule(static)
475                 for(t=0; t<nthreads; t++)
476                 {
477                     int s,e,i;
478                     rvec *fnv;
479                     tensor *vir;
480                     real *Vcorrt,*dvdlt;
481                     if (t == 0)
482                     {
483                         fnv    = fr->f_novirsum;
484                         vir    = &fr->vir_el_recip;
485                         Vcorrt = &Vcorr;
486                         dvdlt  = &dvdl;
487                     }
488                     else
489                     {
490                         fnv    = fr->f_t[t].f;
491                         vir    = &fr->f_t[t].vir;
492                         Vcorrt = &fr->f_t[t].Vcorr;
493                         dvdlt  = &fr->f_t[t].dvdl[efptCOUL];
494                         for(i=0; i<fr->natoms_force; i++)
495                         {
496                             clear_rvec(fnv[i]);
497                         }
498                         clear_mat(*vir);
499                     }
500                     *dvdlt = 0;
501                     *Vcorrt =
502                         ewald_LRcorrection(fplog,
503                                            fr->excl_load[t],fr->excl_load[t+1],
504                                            cr,t,fr,
505                                            md->chargeA,
506                                            md->nChargePerturbed ? md->chargeB : NULL,
507                                            ir->cutoff_scheme != ecutsVERLET,
508                                            excl,x,bSB ? boxs : box,mu_tot,
509                                            ir->ewald_geometry,
510                                            ir->epsilon_surface,
511                                            fnv,*vir,
512                                            lambda[efptCOUL],dvdlt);
513                 }
514                 if (nthreads > 1)
515                 {
516                     reduce_thread_forces(fr->natoms_force,fr->f_novirsum,
517                                          fr->vir_el_recip,
518                                          &Vcorr,efptCOUL,&dvdl,
519                                          nthreads,fr->f_t);
520                 }
521
522                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
523             }
524
525             if (fr->n_tpi == 0)
526             {
527                 Vcorr += ewald_charge_correction(cr,fr,lambda[efptCOUL],box,
528                                                  &dvdl,fr->vir_el_recip);
529             }
530
531             PRINT_SEPDVDL("Ewald excl./charge/dip. corr.",Vcorr,dvdl);
532             enerd->dvdl_lin[efptCOUL] += dvdl;
533         }
534
535         status = 0;
536         Vlr  = 0;
537         dvdl = 0;
538         switch (fr->eeltype)
539         {
540         case eelPME:
541         case eelPMESWITCH:
542         case eelPMEUSER:
543         case eelPMEUSERSWITCH:
544         case eelP3M_AD:
545             if (cr->duty & DUTY_PME)
546             {
547                 assert(fr->n_tpi >= 0);
548                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
549                 {
550                     pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE;
551                     if (flags & GMX_FORCE_FORCES)
552                     {
553                         pme_flags |= GMX_PME_CALC_F;
554                     }
555                     if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY))
556                     {
557                         pme_flags |= GMX_PME_CALC_ENER_VIR;
558                     }
559                     if (fr->n_tpi > 0)
560                     {
561                         /* We don't calculate f, but we do want the potential */
562                         pme_flags |= GMX_PME_CALC_POT;
563                     }
564                     wallcycle_start(wcycle,ewcPMEMESH);
565                     status = gmx_pme_do(fr->pmedata,
566                                         md->start,md->homenr - fr->n_tpi,
567                                         x,fr->f_novirsum,
568                                         md->chargeA,md->chargeB,
569                                         bSB ? boxs : box,cr,
570                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
571                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
572                                         nrnb,wcycle,
573                                         fr->vir_el_recip,fr->ewaldcoeff,
574                                         &Vlr,lambda[efptCOUL],&dvdl,
575                                         pme_flags);
576                     *cycles_pme = wallcycle_stop(wcycle,ewcPMEMESH);
577
578                     /* We should try to do as little computation after
579                      * this as possible, because parallel PME synchronizes
580                      * the nodes, so we want all load imbalance of the rest
581                      * of the force calculation to be before the PME call.
582                      * DD load balancing is done on the whole time of
583                      * the force call (without PME).
584                      */
585                 }
586                 if (fr->n_tpi > 0)
587                 {
588                     /* Determine the PME grid energy of the test molecule
589                      * with the PME grid potential of the other charges.
590                      */
591                     gmx_pme_calc_energy(fr->pmedata,fr->n_tpi,
592                                         x + md->homenr - fr->n_tpi,
593                                         md->chargeA + md->homenr - fr->n_tpi,
594                                         &Vlr);
595                 }
596                 PRINT_SEPDVDL("PME mesh",Vlr,dvdl);
597             }
598             break;
599         case eelEWALD:
600             Vlr = do_ewald(fplog,FALSE,ir,x,fr->f_novirsum,
601                            md->chargeA,md->chargeB,
602                            box_size,cr,md->homenr,
603                            fr->vir_el_recip,fr->ewaldcoeff,
604                            lambda[efptCOUL],&dvdl,fr->ewald_table);
605             PRINT_SEPDVDL("Ewald long-range",Vlr,dvdl);
606             break;
607         default:
608             gmx_fatal(FARGS,"No such electrostatics method implemented %s",
609                       eel_names[fr->eeltype]);
610         }
611         if (status != 0)
612         {
613             gmx_fatal(FARGS,"Error %d in long range electrostatics routine %s",
614                       status,EELTYPE(fr->eeltype));
615                 }
616         /* Note that with separate PME nodes we get the real energies later */
617         enerd->dvdl_lin[efptCOUL] += dvdl;
618         enerd->term[F_COUL_RECIP] = Vlr + Vcorr;
619         if (debug)
620         {
621             fprintf(debug,"Vlr = %g, Vcorr = %g, Vlr_corr = %g\n",
622                     Vlr,Vcorr,enerd->term[F_COUL_RECIP]);
623             pr_rvecs(debug,0,"vir_el_recip after corr",fr->vir_el_recip,DIM);
624             pr_rvecs(debug,0,"fshift after LR Corrections",fr->fshift,SHIFTS);
625         }
626     }
627     else
628     {
629         if (EEL_RF(fr->eeltype))
630         {
631             /* With the Verlet scheme exclusion forces are calculated
632              * in the non-bonded kernel.
633              */
634             if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC)
635             {
636                 dvdl = 0;
637                 enerd->term[F_RF_EXCL] =
638                     RF_excl_correction(fplog,fr,graph,md,excl,x,f,
639                                        fr->fshift,&pbc,lambda[efptCOUL],&dvdl);
640             }
641
642             enerd->dvdl_lin[efptCOUL] += dvdl;
643             PRINT_SEPDVDL("RF exclusion correction",
644                           enerd->term[F_RF_EXCL],dvdl);
645         }
646     }
647     where();
648     debug_gmx();
649
650     if (debug)
651     {
652         print_nrnb(debug,nrnb);
653     }
654     debug_gmx();
655
656 #ifdef GMX_MPI
657     if (TAKETIME)
658     {
659         t2=MPI_Wtime();
660         MPI_Barrier(cr->mpi_comm_mygroup);
661         t3=MPI_Wtime();
662         fr->t_wait += t3-t2;
663         if (fr->timesteps == 11)
664         {
665             fprintf(stderr,"* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
666                     cr->nodeid, gmx_step_str(fr->timesteps,buf),
667                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
668                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
669         }
670         fr->timesteps++;
671     }
672 #endif
673
674     if (debug)
675     {
676         pr_rvecs(debug,0,"fshift after bondeds",fr->fshift,SHIFTS);
677     }
678
679 }
680
681 void init_enerdata(int ngener,int n_lambda,gmx_enerdata_t *enerd)
682 {
683     int i,n2;
684
685     for(i=0; i<F_NRE; i++)
686     {
687         enerd->term[i] = 0;
688         enerd->foreign_term[i] = 0;
689     }
690
691
692     for(i=0; i<efptNR; i++) {
693         enerd->dvdl_lin[i]  = 0;
694         enerd->dvdl_nonlin[i]  = 0;
695     }
696
697     n2=ngener*ngener;
698     if (debug)
699     {
700         fprintf(debug,"Creating %d sized group matrix for energies\n",n2);
701     }
702     enerd->grpp.nener = n2;
703     enerd->foreign_grpp.nener = n2;
704     for(i=0; (i<egNR); i++)
705     {
706         snew(enerd->grpp.ener[i],n2);
707         snew(enerd->foreign_grpp.ener[i],n2);
708     }
709
710     if (n_lambda)
711     {
712         enerd->n_lambda = 1 + n_lambda;
713         snew(enerd->enerpart_lambda,enerd->n_lambda);
714     }
715     else
716     {
717         enerd->n_lambda = 0;
718     }
719 }
720
721 void destroy_enerdata(gmx_enerdata_t *enerd)
722 {
723     int i;
724
725     for(i=0; (i<egNR); i++)
726     {
727         sfree(enerd->grpp.ener[i]);
728     }
729
730     for(i=0; (i<egNR); i++)
731     {
732         sfree(enerd->foreign_grpp.ener[i]);
733     }
734
735     if (enerd->n_lambda)
736     {
737         sfree(enerd->enerpart_lambda);
738     }
739 }
740
741 static real sum_v(int n,real v[])
742 {
743   real t;
744   int  i;
745
746   t = 0.0;
747   for(i=0; (i<n); i++)
748     t = t + v[i];
749
750   return t;
751 }
752
753 void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot)
754 {
755   int i;
756
757   /* Accumulate energies */
758   epot[F_COUL_SR]  = sum_v(grpp->nener,grpp->ener[egCOULSR]);
759   epot[F_LJ]       = sum_v(grpp->nener,grpp->ener[egLJSR]);
760   epot[F_LJ14]     = sum_v(grpp->nener,grpp->ener[egLJ14]);
761   epot[F_COUL14]   = sum_v(grpp->nener,grpp->ener[egCOUL14]);
762   epot[F_COUL_LR]  = sum_v(grpp->nener,grpp->ener[egCOULLR]);
763   epot[F_LJ_LR]    = sum_v(grpp->nener,grpp->ener[egLJLR]);
764   /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
765   epot[F_GBPOL]   += sum_v(grpp->nener,grpp->ener[egGB]);
766
767 /* lattice part of LR doesnt belong to any group
768  * and has been added earlier
769  */
770   epot[F_BHAM]     = sum_v(grpp->nener,grpp->ener[egBHAMSR]);
771   epot[F_BHAM_LR]  = sum_v(grpp->nener,grpp->ener[egBHAMLR]);
772
773   epot[F_EPOT] = 0;
774   for(i=0; (i<F_EPOT); i++)
775   {
776       if (i != F_DISRESVIOL && i != F_ORIRESDEV)
777       {
778           epot[F_EPOT] += epot[i];
779       }
780   }
781 }
782
783 void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
784 {
785     int i,j,index;
786     double dlam;
787
788     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
789     enerd->term[F_DVDL] = 0.0;
790     for (i=0;i<efptNR;i++)
791     {
792         if (fepvals->separate_dvdl[i])
793         {
794             /* could this be done more readably/compactly? */
795             switch (i) {
796             case (efptCOUL):
797                 index = F_DVDL_COUL;
798                 break;
799             case (efptVDW):
800                 index = F_DVDL_VDW;
801                 break;
802             case (efptBONDED):
803                 index = F_DVDL_BONDED;
804                 break;
805             case (efptRESTRAINT):
806                 index = F_DVDL_RESTRAINT;
807                 break;
808             case (efptMASS):
809                 index = F_DKDL;
810                 break;
811             default:
812                 index = F_DVDL;
813                 break;
814             }
815             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
816             if (debug)
817             {
818                 fprintf(debug,"dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
819                         efpt_names[i],i,enerd->term[index],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
820             }
821         }
822         else
823         {
824             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
825             if (debug)
826             {
827                 fprintf(debug,"dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
828                         efpt_names[0],i,enerd->term[F_DVDL],enerd->dvdl_nonlin[i],enerd->dvdl_lin[i]);
829             }
830         }
831     }
832
833     /* Notes on the foreign lambda free energy difference evaluation:
834      * Adding the potential and ekin terms that depend linearly on lambda
835      * as delta lam * dvdl to the energy differences is exact.
836      * For the constraints this is not exact, but we have no other option
837      * without literally changing the lengths and reevaluating the energies at each step.
838      * (try to remedy this post 4.6 - MRS)
839      * For the non-bonded LR term we assume that the soft-core (if present)
840      * no longer affects the energy beyond the short-range cut-off,
841      * which is a very good approximation (except for exotic settings).
842      * (investigate how to overcome this post 4.6 - MRS)
843      */
844
845     for(i=0; i<fepvals->n_lambda; i++)
846     {                                         /* note we are iterating over fepvals here!
847                                                  For the current lam, dlam = 0 automatically,
848                                                  so we don't need to add anything to the
849                                                  enerd->enerpart_lambda[0] */
850
851         /* we don't need to worry about dvdl contributions to the current lambda, because
852            it's automatically zero */
853
854         /* first kinetic energy term */
855         dlam = (fepvals->all_lambda[efptMASS][i] - lambda[efptMASS]);
856
857         enerd->enerpart_lambda[i+1] += enerd->term[F_DKDL]*dlam;
858
859         for (j=0;j<efptNR;j++)
860         {
861             if (j==efptMASS) {continue;} /* no other mass term to worry about */
862
863             dlam = (fepvals->all_lambda[j][i]-lambda[j]);
864             enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
865             if (debug)
866             {
867                 fprintf(debug,"enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
868                         fepvals->all_lambda[j][i],efpt_names[j],
869                         (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
870                         dlam,enerd->dvdl_lin[j]);
871             }
872         }
873     }
874 }
875
876
877 void reset_foreign_enerdata(gmx_enerdata_t *enerd)
878 {
879     int  i,j;
880
881     /* First reset all foreign energy components.  Foreign energies always called on
882        neighbor search steps */
883     for(i=0; (i<egNR); i++)
884     {
885         for(j=0; (j<enerd->grpp.nener); j++)
886         {
887             enerd->foreign_grpp.ener[i][j] = 0.0;
888         }
889     }
890
891     /* potential energy components */
892     for(i=0; (i<=F_EPOT); i++)
893     {
894         enerd->foreign_term[i] = 0.0;
895     }
896 }
897
898 void reset_enerdata(t_grpopts *opts,
899                     t_forcerec *fr,gmx_bool bNS,
900                     gmx_enerdata_t *enerd,
901                     gmx_bool bMaster)
902 {
903     gmx_bool bKeepLR;
904     int  i,j;
905
906     /* First reset all energy components, except for the long range terms
907      * on the master at non neighbor search steps, since the long range
908      * terms have already been summed at the last neighbor search step.
909      */
910     bKeepLR = (fr->bTwinRange && !bNS);
911     for(i=0; (i<egNR); i++) {
912         if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) {
913             for(j=0; (j<enerd->grpp.nener); j++)
914                 enerd->grpp.ener[i][j] = 0.0;
915         }
916     }
917     for (i=0;i<efptNR;i++)
918     {
919         enerd->dvdl_lin[i]    = 0.0;
920         enerd->dvdl_nonlin[i] = 0.0;
921     }
922
923     /* Normal potential energy components */
924     for(i=0; (i<=F_EPOT); i++) {
925         enerd->term[i] = 0.0;
926     }
927     /* Initialize the dVdlambda term with the long range contribution */
928     /* Initialize the dvdl term with the long range contribution */
929     enerd->term[F_DVDL]            = 0.0;
930     enerd->term[F_DVDL_COUL]       = 0.0;
931     enerd->term[F_DVDL_VDW]        = 0.0;
932     enerd->term[F_DVDL_BONDED]     = 0.0;
933     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
934     enerd->term[F_DKDL]            = 0.0;
935     if (enerd->n_lambda > 0)
936     {
937         for(i=0; i<enerd->n_lambda; i++)
938         {
939             enerd->enerpart_lambda[i] = 0.0;
940         }
941     }
942     /* reset foreign energy data - separate function since we also call it elsewhere */
943     reset_foreign_enerdata(enerd);
944 }