Fix Parrinello-Rahman with nstpcouple>1
[alexxy/gromacs.git] / src / gromacs / mdlib / force.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "gromacs/legacyheaders/force.h"
40
41 #include "config.h"
42
43 #include <assert.h>
44 #include <math.h>
45 #include <string.h>
46
47 #include "gromacs/domdec/domdec.h"
48 #include "gromacs/ewald/ewald.h"
49 #include "gromacs/ewald/long-range-correction.h"
50 #include "gromacs/ewald/pme.h"
51 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
52 #include "gromacs/legacyheaders/macros.h"
53 #include "gromacs/legacyheaders/mdrun.h"
54 #include "gromacs/legacyheaders/names.h"
55 #include "gromacs/legacyheaders/network.h"
56 #include "gromacs/legacyheaders/nonbonded.h"
57 #include "gromacs/legacyheaders/nrnb.h"
58 #include "gromacs/legacyheaders/ns.h"
59 #include "gromacs/legacyheaders/qmmm.h"
60 #include "gromacs/legacyheaders/txtdump.h"
61 #include "gromacs/legacyheaders/typedefs.h"
62 #include "gromacs/legacyheaders/types/commrec.h"
63 #include "gromacs/listed-forces/listed-forces.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdlib/forcerec-threading.h"
66 #include "gromacs/pbcutil/ishift.h"
67 #include "gromacs/pbcutil/mshift.h"
68 #include "gromacs/pbcutil/pbc.h"
69 #include "gromacs/timing/wallcycle.h"
70 #include "gromacs/utility/fatalerror.h"
71 #include "gromacs/utility/smalloc.h"
72
73 void ns(FILE              *fp,
74         t_forcerec        *fr,
75         matrix             box,
76         gmx_groups_t      *groups,
77         gmx_localtop_t    *top,
78         t_mdatoms         *md,
79         t_commrec         *cr,
80         t_nrnb            *nrnb,
81         gmx_bool           bFillGrid,
82         gmx_bool           bDoLongRangeNS)
83 {
84     int     nsearch;
85
86
87     if (!fr->ns.nblist_initialized)
88     {
89         init_neighbor_list(fp, fr, md->homenr);
90     }
91
92     if (fr->bTwinRange)
93     {
94         fr->nlr = 0;
95     }
96
97     nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
98                                 bFillGrid, bDoLongRangeNS);
99     if (debug)
100     {
101         fprintf(debug, "nsearch = %d\n", nsearch);
102     }
103
104     /* Check whether we have to do dynamic load balancing */
105     /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
106        count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
107        &(top->idef),opts->ngener);
108      */
109     if (fr->ns.dump_nl > 0)
110     {
111         dump_nblist(fp, cr, fr, fr->ns.dump_nl);
112     }
113 }
114
115 static void reduce_thread_forces(int n, rvec *f,
116                                  tensor vir_q, tensor vir_lj,
117                                  real *Vcorr_q, real *Vcorr_lj,
118                                  real *dvdl_q, real *dvdl_lj,
119                                  int nthreads, f_thread_t *f_t)
120 {
121     int t, i;
122     int nthreads_loop gmx_unused;
123
124     // cppcheck-suppress unreadVariable
125     nthreads_loop = gmx_omp_nthreads_get(emntBonded);
126     /* This reduction can run over any number of threads */
127 #pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
128     for (i = 0; i < n; i++)
129     {
130         for (t = 1; t < nthreads; t++)
131         {
132             rvec_inc(f[i], f_t[t].f[i]);
133         }
134     }
135     for (t = 1; t < nthreads; t++)
136     {
137         *Vcorr_q  += f_t[t].Vcorr_q;
138         *Vcorr_lj += f_t[t].Vcorr_lj;
139         *dvdl_q   += f_t[t].dvdl[efptCOUL];
140         *dvdl_lj  += f_t[t].dvdl[efptVDW];
141         m_add(vir_q, f_t[t].vir_q, vir_q);
142         m_add(vir_lj, f_t[t].vir_lj, vir_lj);
143     }
144 }
145
146 void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
147                        t_idef     *idef,    t_commrec  *cr,
148                        t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
149                        t_mdatoms  *md,
150                        rvec       x[],      history_t  *hist,
151                        rvec       f[],
152                        rvec       f_longrange[],
153                        gmx_enerdata_t *enerd,
154                        t_fcdata   *fcd,
155                        gmx_localtop_t *top,
156                        gmx_genborn_t *born,
157                        gmx_bool       bBornRadii,
158                        matrix     box,
159                        t_lambda   *fepvals,
160                        real       *lambda,
161                        t_graph    *graph,
162                        t_blocka   *excl,
163                        rvec       mu_tot[],
164                        int        flags,
165                        float      *cycles_pme)
166 {
167     int         i, j;
168     int         donb_flags;
169     gmx_bool    bSB;
170     int         pme_flags;
171     matrix      boxs;
172     rvec        box_size;
173     t_pbc       pbc;
174     real        dvdl_dum[efptNR], dvdl_nb[efptNR];
175
176 #ifdef GMX_MPI
177     double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
178 #endif
179
180     set_pbc(&pbc, fr->ePBC, box);
181
182     /* reset free energy components */
183     for (i = 0; i < efptNR; i++)
184     {
185         dvdl_nb[i]  = 0;
186         dvdl_dum[i] = 0;
187     }
188
189     /* Reset box */
190     for (i = 0; (i < DIM); i++)
191     {
192         box_size[i] = box[i][i];
193     }
194
195     debug_gmx();
196
197     /* do QMMM first if requested */
198     if (fr->bQMMM)
199     {
200         enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
201     }
202
203     /* Call the short range functions all in one go. */
204
205 #ifdef GMX_MPI
206     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
207 #define TAKETIME FALSE
208     if (TAKETIME)
209     {
210         MPI_Barrier(cr->mpi_comm_mygroup);
211         t0 = MPI_Wtime();
212     }
213 #endif
214
215     if (ir->nwall)
216     {
217         /* foreign lambda component for walls */
218         real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
219                                    enerd->grpp.ener[egLJSR], nrnb);
220         enerd->dvdl_lin[efptVDW] += dvdl_walls;
221     }
222
223     /* If doing GB, reset dvda and calculate the Born radii */
224     if (ir->implicit_solvent)
225     {
226         wallcycle_sub_start(wcycle, ewcsNONBONDED);
227
228         for (i = 0; i < born->nr; i++)
229         {
230             fr->dvda[i] = 0;
231         }
232
233         if (bBornRadii)
234         {
235             calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
236         }
237
238         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
239     }
240
241     where();
242     /* We only do non-bonded calculation with group scheme here, the verlet
243      * calls are done from do_force_cutsVERLET(). */
244     if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
245     {
246         donb_flags = 0;
247         /* Add short-range interactions */
248         donb_flags |= GMX_NONBONDED_DO_SR;
249
250         /* Currently all group scheme kernels always calculate (shift-)forces */
251         if (flags & GMX_FORCE_FORCES)
252         {
253             donb_flags |= GMX_NONBONDED_DO_FORCE;
254         }
255         if (flags & GMX_FORCE_VIRIAL)
256         {
257             donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
258         }
259         if (flags & GMX_FORCE_ENERGY)
260         {
261             donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
262         }
263         if (flags & GMX_FORCE_DO_LR)
264         {
265             donb_flags |= GMX_NONBONDED_DO_LR;
266         }
267
268         wallcycle_sub_start(wcycle, ewcsNONBONDED);
269         do_nonbonded(fr, x, f, f_longrange, md, excl,
270                      &enerd->grpp, nrnb,
271                      lambda, dvdl_nb, -1, -1, donb_flags);
272
273         /* If we do foreign lambda and we have soft-core interactions
274          * we have to recalculate the (non-linear) energies contributions.
275          */
276         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
277         {
278             for (i = 0; i < enerd->n_lambda; i++)
279             {
280                 real lam_i[efptNR];
281
282                 for (j = 0; j < efptNR; j++)
283                 {
284                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
285                 }
286                 reset_foreign_enerdata(enerd);
287                 do_nonbonded(fr, x, f, f_longrange, md, excl,
288                              &(enerd->foreign_grpp), nrnb,
289                              lam_i, dvdl_dum, -1, -1,
290                              (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
291                 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
292                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
293             }
294         }
295         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
296         where();
297     }
298
299     /* If we are doing GB, calculate bonded forces and apply corrections
300      * to the solvation forces */
301     /* MRS: Eventually, many need to include free energy contribution here! */
302     if (ir->implicit_solvent)
303     {
304         wallcycle_sub_start(wcycle, ewcsLISTED);
305         calc_gb_forces(cr, md, born, top, x, f, fr, idef,
306                        ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
307         wallcycle_sub_stop(wcycle, ewcsLISTED);
308     }
309
310 #ifdef GMX_MPI
311     if (TAKETIME)
312     {
313         t1          = MPI_Wtime();
314         fr->t_fnbf += t1-t0;
315     }
316 #endif
317
318     if (fepvals->sc_alpha != 0)
319     {
320         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
321     }
322     else
323     {
324         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
325     }
326
327     if (fepvals->sc_alpha != 0)
328
329     /* even though coulomb part is linear, we already added it, beacuse we
330        need to go through the vdw calculation anyway */
331     {
332         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
333     }
334     else
335     {
336         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
337     }
338
339     debug_gmx();
340
341
342     if (debug)
343     {
344         pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
345     }
346
347     /* Shift the coordinates. Must be done before listed forces and PPPM,
348      * but is also necessary for SHAKE and update, therefore it can NOT
349      * go when no listed forces have to be evaluated.
350      *
351      * The shifting and PBC code is deliberately not timed, since with
352      * the Verlet scheme it only takes non-zero time with triclinic
353      * boxes, and even then the time is around a factor of 100 less
354      * than the next smallest counter.
355      */
356
357
358     /* Here sometimes we would not need to shift with NBFonly,
359      * but we do so anyhow for consistency of the returned coordinates.
360      */
361     if (graph)
362     {
363         shift_self(graph, box, x);
364         if (TRICLINIC(box))
365         {
366             inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
367         }
368         else
369         {
370             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
371         }
372     }
373     /* Check whether we need to do listed interactions or correct for exclusions */
374     if (fr->bMolPBC &&
375         ((flags & GMX_FORCE_LISTED)
376          || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
377     {
378         /* TODO There are no electrostatics methods that require this
379            transformation, when using the Verlet scheme, so update the
380            above conditional. */
381         /* Since all atoms are in the rectangular or triclinic unit-cell,
382          * only single box vector shifts (2 in x) are required.
383          */
384         set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
385     }
386     debug_gmx();
387
388     do_force_listed(wcycle, box, ir->fepvals, cr->ms,
389                     idef, (const rvec *) x, hist, f, fr,
390                     &pbc, graph, enerd, nrnb, lambda, md, fcd,
391                     DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
392                     flags);
393
394     where();
395
396     *cycles_pme = 0;
397     clear_mat(fr->vir_el_recip);
398     clear_mat(fr->vir_lj_recip);
399
400     /* Do long-range electrostatics and/or LJ-PME, including related short-range
401      * corrections.
402      */
403     if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
404     {
405         int  status            = 0;
406         real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
407         real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;
408
409         bSB = (ir->nwall == 2);
410         if (bSB)
411         {
412             copy_mat(box, boxs);
413             svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
414             box_size[ZZ] *= ir->wall_ewald_zfac;
415         }
416
417         if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
418         {
419             real dvdl_long_range_correction_q   = 0;
420             real dvdl_long_range_correction_lj  = 0;
421             /* With the Verlet scheme exclusion forces are calculated
422              * in the non-bonded kernel.
423              */
424             /* The TPI molecule does not have exclusions with the rest
425              * of the system and no intra-molecular PME grid
426              * contributions will be calculated in
427              * gmx_pme_calc_energy.
428              */
429             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
430                 ir->ewald_geometry != eewg3D ||
431                 ir->epsilon_surface != 0)
432             {
433                 int nthreads, t;
434
435                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
436
437                 if (fr->n_tpi > 0)
438                 {
439                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
440                 }
441
442                 nthreads = gmx_omp_nthreads_get(emntBonded);
443 #pragma omp parallel for num_threads(nthreads) schedule(static)
444                 for (t = 0; t < nthreads; t++)
445                 {
446                     int     i;
447                     rvec   *fnv;
448                     tensor *vir_q, *vir_lj;
449                     real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
450                     if (t == 0)
451                     {
452                         fnv       = fr->f_novirsum;
453                         vir_q     = &fr->vir_el_recip;
454                         vir_lj    = &fr->vir_lj_recip;
455                         Vcorrt_q  = &Vcorr_q;
456                         Vcorrt_lj = &Vcorr_lj;
457                         dvdlt_q   = &dvdl_long_range_correction_q;
458                         dvdlt_lj  = &dvdl_long_range_correction_lj;
459                     }
460                     else
461                     {
462                         fnv       = fr->f_t[t].f;
463                         vir_q     = &fr->f_t[t].vir_q;
464                         vir_lj    = &fr->f_t[t].vir_lj;
465                         Vcorrt_q  = &fr->f_t[t].Vcorr_q;
466                         Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
467                         dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
468                         dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
469                         for (i = 0; i < fr->natoms_force; i++)
470                         {
471                             clear_rvec(fnv[i]);
472                         }
473                         clear_mat(*vir_q);
474                         clear_mat(*vir_lj);
475                     }
476                     *dvdlt_q  = 0;
477                     *dvdlt_lj = 0;
478
479                     ewald_LRcorrection(md->homenr, cr, nthreads, t, fr,
480                                        md->chargeA, md->chargeB,
481                                        md->sqrt_c6A, md->sqrt_c6B,
482                                        md->sigmaA, md->sigmaB,
483                                        md->sigma3A, md->sigma3B,
484                                        md->nChargePerturbed || md->nTypePerturbed,
485                                        ir->cutoff_scheme != ecutsVERLET,
486                                        excl, x, bSB ? boxs : box, mu_tot,
487                                        ir->ewald_geometry,
488                                        ir->epsilon_surface,
489                                        fnv, *vir_q, *vir_lj,
490                                        Vcorrt_q, Vcorrt_lj,
491                                        lambda[efptCOUL], lambda[efptVDW],
492                                        dvdlt_q, dvdlt_lj);
493                 }
494                 if (nthreads > 1)
495                 {
496                     reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
497                                          fr->vir_el_recip, fr->vir_lj_recip,
498                                          &Vcorr_q, &Vcorr_lj,
499                                          &dvdl_long_range_correction_q,
500                                          &dvdl_long_range_correction_lj,
501                                          nthreads, fr->f_t);
502                 }
503                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
504             }
505
506             if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
507             {
508                 /* This is not in a subcounter because it takes a
509                    negligible and constant-sized amount of time */
510                 Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
511                                                    &dvdl_long_range_correction_q,
512                                                    fr->vir_el_recip);
513             }
514
515             enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
516             enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;
517
518             if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
519             {
520                 /* Do reciprocal PME for Coulomb and/or LJ. */
521                 assert(fr->n_tpi >= 0);
522                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
523                 {
524                     pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
525                     if (EEL_PME(fr->eeltype))
526                     {
527                         pme_flags     |= GMX_PME_DO_COULOMB;
528                     }
529                     if (EVDW_PME(fr->vdwtype))
530                     {
531                         pme_flags |= GMX_PME_DO_LJ;
532                     }
533                     if (flags & GMX_FORCE_FORCES)
534                     {
535                         pme_flags |= GMX_PME_CALC_F;
536                     }
537                     if (flags & GMX_FORCE_VIRIAL)
538                     {
539                         pme_flags |= GMX_PME_CALC_ENER_VIR;
540                     }
541                     if (fr->n_tpi > 0)
542                     {
543                         /* We don't calculate f, but we do want the potential */
544                         pme_flags |= GMX_PME_CALC_POT;
545                     }
546                     wallcycle_start(wcycle, ewcPMEMESH);
547                     status = gmx_pme_do(fr->pmedata,
548                                         0, md->homenr - fr->n_tpi,
549                                         x, fr->f_novirsum,
550                                         md->chargeA, md->chargeB,
551                                         md->sqrt_c6A, md->sqrt_c6B,
552                                         md->sigmaA, md->sigmaB,
553                                         bSB ? boxs : box, cr,
554                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
555                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
556                                         nrnb, wcycle,
557                                         fr->vir_el_recip, fr->ewaldcoeff_q,
558                                         fr->vir_lj_recip, fr->ewaldcoeff_lj,
559                                         &Vlr_q, &Vlr_lj,
560                                         lambda[efptCOUL], lambda[efptVDW],
561                                         &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
562                     *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
563                     if (status != 0)
564                     {
565                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
566                     }
567                     /* We should try to do as little computation after
568                      * this as possible, because parallel PME synchronizes
569                      * the nodes, so we want all load imbalance of the
570                      * rest of the force calculation to be before the PME
571                      * call.  DD load balancing is done on the whole time
572                      * of the force call (without PME).
573                      */
574                 }
575                 if (fr->n_tpi > 0)
576                 {
577                     if (EVDW_PME(ir->vdwtype))
578                     {
579
580                         gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
581                     }
582                     /* Determine the PME grid energy of the test molecule
583                      * with the PME grid potential of the other charges.
584                      */
585                     gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
586                                         x + md->homenr - fr->n_tpi,
587                                         md->chargeA + md->homenr - fr->n_tpi,
588                                         &Vlr_q);
589                 }
590             }
591         }
592
593         if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
594         {
595             Vlr_q = do_ewald(ir, x, fr->f_novirsum,
596                              md->chargeA, md->chargeB,
597                              box_size, cr, md->homenr,
598                              fr->vir_el_recip, fr->ewaldcoeff_q,
599                              lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
600         }
601
602         /* Note that with separate PME nodes we get the real energies later */
603         enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
604         enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
605         enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
606         enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
607         if (debug)
608         {
609             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
610                     Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
611             pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
612             pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
613             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
614                     Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
615             pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
616         }
617     }
618     else
619     {
620         /* Is there a reaction-field exclusion correction needed? */
621         if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
622         {
623             /* With the Verlet scheme, exclusion forces are calculated
624              * in the non-bonded kernel.
625              */
626             if (ir->cutoff_scheme != ecutsVERLET)
627             {
628                 real dvdl_rf_excl      = 0;
629                 enerd->term[F_RF_EXCL] =
630                     RF_excl_correction(fr, graph, md, excl, x, f,
631                                        fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
632
633                 enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
634             }
635         }
636     }
637     where();
638     debug_gmx();
639
640     if (debug)
641     {
642         print_nrnb(debug, nrnb);
643     }
644     debug_gmx();
645
646 #ifdef GMX_MPI
647     if (TAKETIME)
648     {
649         t2 = MPI_Wtime();
650         MPI_Barrier(cr->mpi_comm_mygroup);
651         t3          = MPI_Wtime();
652         fr->t_wait += t3-t2;
653         if (fr->timesteps == 11)
654         {
655             char buf[22];
656             fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
657                     cr->nodeid, gmx_step_str(fr->timesteps, buf),
658                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
659                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
660         }
661         fr->timesteps++;
662     }
663 #endif
664
665     if (debug)
666     {
667         pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
668     }
669
670 }
671
672 void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
673 {
674     int i, n2;
675
676     for (i = 0; i < F_NRE; i++)
677     {
678         enerd->term[i]         = 0;
679         enerd->foreign_term[i] = 0;
680     }
681
682
683     for (i = 0; i < efptNR; i++)
684     {
685         enerd->dvdl_lin[i]     = 0;
686         enerd->dvdl_nonlin[i]  = 0;
687     }
688
689     n2 = ngener*ngener;
690     if (debug)
691     {
692         fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
693     }
694     enerd->grpp.nener         = n2;
695     enerd->foreign_grpp.nener = n2;
696     for (i = 0; (i < egNR); i++)
697     {
698         snew(enerd->grpp.ener[i], n2);
699         snew(enerd->foreign_grpp.ener[i], n2);
700     }
701
702     if (n_lambda)
703     {
704         enerd->n_lambda = 1 + n_lambda;
705         snew(enerd->enerpart_lambda, enerd->n_lambda);
706     }
707     else
708     {
709         enerd->n_lambda = 0;
710     }
711 }
712
713 void destroy_enerdata(gmx_enerdata_t *enerd)
714 {
715     int i;
716
717     for (i = 0; (i < egNR); i++)
718     {
719         sfree(enerd->grpp.ener[i]);
720     }
721
722     for (i = 0; (i < egNR); i++)
723     {
724         sfree(enerd->foreign_grpp.ener[i]);
725     }
726
727     if (enerd->n_lambda)
728     {
729         sfree(enerd->enerpart_lambda);
730     }
731 }
732
733 static real sum_v(int n, real v[])
734 {
735     real t;
736     int  i;
737
738     t = 0.0;
739     for (i = 0; (i < n); i++)
740     {
741         t = t + v[i];
742     }
743
744     return t;
745 }
746
747 void sum_epot(gmx_grppairener_t *grpp, real *epot)
748 {
749     int i;
750
751     /* Accumulate energies */
752     epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
753     epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
754     epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
755     epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
756     epot[F_COUL_LR]  = sum_v(grpp->nener, grpp->ener[egCOULLR]);
757     epot[F_LJ_LR]    = sum_v(grpp->nener, grpp->ener[egLJLR]);
758     /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
759     epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
760
761 /* lattice part of LR doesnt belong to any group
762  * and has been added earlier
763  */
764     epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
765     epot[F_BHAM_LR]  = sum_v(grpp->nener, grpp->ener[egBHAMLR]);
766
767     epot[F_EPOT] = 0;
768     for (i = 0; (i < F_EPOT); i++)
769     {
770         if (i != F_DISRESVIOL && i != F_ORIRESDEV)
771         {
772             epot[F_EPOT] += epot[i];
773         }
774     }
775 }
776
777 void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
778 {
779     int    i, j, index;
780     double dlam;
781
782     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
783     enerd->term[F_DVDL]       = 0.0;
784     for (i = 0; i < efptNR; i++)
785     {
786         if (fepvals->separate_dvdl[i])
787         {
788             /* could this be done more readably/compactly? */
789             switch (i)
790             {
791                 case (efptMASS):
792                     index = F_DKDL;
793                     break;
794                 case (efptCOUL):
795                     index = F_DVDL_COUL;
796                     break;
797                 case (efptVDW):
798                     index = F_DVDL_VDW;
799                     break;
800                 case (efptBONDED):
801                     index = F_DVDL_BONDED;
802                     break;
803                 case (efptRESTRAINT):
804                     index = F_DVDL_RESTRAINT;
805                     break;
806                 default:
807                     index = F_DVDL;
808                     break;
809             }
810             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
811             if (debug)
812             {
813                 fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
814                         efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
815             }
816         }
817         else
818         {
819             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
820             if (debug)
821             {
822                 fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
823                         efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
824             }
825         }
826     }
827
828     /* Notes on the foreign lambda free energy difference evaluation:
829      * Adding the potential and ekin terms that depend linearly on lambda
830      * as delta lam * dvdl to the energy differences is exact.
831      * For the constraints this is not exact, but we have no other option
832      * without literally changing the lengths and reevaluating the energies at each step.
833      * (try to remedy this post 4.6 - MRS)
834      * For the non-bonded LR term we assume that the soft-core (if present)
835      * no longer affects the energy beyond the short-range cut-off,
836      * which is a very good approximation (except for exotic settings).
837      * (investigate how to overcome this post 4.6 - MRS)
838      */
839     if (fepvals->separate_dvdl[efptBONDED])
840     {
841         enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
842     }
843     else
844     {
845         enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
846     }
847     enerd->term[F_DVDL_CONSTR] = 0;
848
849     for (i = 0; i < fepvals->n_lambda; i++)
850     {
851         /* note we are iterating over fepvals here!
852            For the current lam, dlam = 0 automatically,
853            so we don't need to add anything to the
854            enerd->enerpart_lambda[0] */
855
856         /* we don't need to worry about dvdl_lin contributions to dE at
857            current lambda, because the contributions to the current
858            lambda are automatically zeroed */
859
860         for (j = 0; j < efptNR; j++)
861         {
862             /* Note that this loop is over all dhdl components, not just the separated ones */
863             dlam = (fepvals->all_lambda[j][i]-lambda[j]);
864             enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
865             if (debug)
866             {
867                 fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
868                         fepvals->all_lambda[j][i], efpt_names[j],
869                         (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
870                         dlam, enerd->dvdl_lin[j]);
871             }
872         }
873     }
874 }
875
876
877 void reset_foreign_enerdata(gmx_enerdata_t *enerd)
878 {
879     int  i, j;
880
881     /* First reset all foreign energy components.  Foreign energies always called on
882        neighbor search steps */
883     for (i = 0; (i < egNR); i++)
884     {
885         for (j = 0; (j < enerd->grpp.nener); j++)
886         {
887             enerd->foreign_grpp.ener[i][j] = 0.0;
888         }
889     }
890
891     /* potential energy components */
892     for (i = 0; (i <= F_EPOT); i++)
893     {
894         enerd->foreign_term[i] = 0.0;
895     }
896 }
897
898 void reset_enerdata(t_forcerec *fr, gmx_bool bNS,
899                     gmx_enerdata_t *enerd,
900                     gmx_bool bMaster)
901 {
902     gmx_bool bKeepLR;
903     int      i, j;
904
905     /* First reset all energy components, except for the long range terms
906      * on the master at non neighbor search steps, since the long range
907      * terms have already been summed at the last neighbor search step.
908      */
909     bKeepLR = (fr->bTwinRange && !bNS);
910     for (i = 0; (i < egNR); i++)
911     {
912         if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR)))
913         {
914             for (j = 0; (j < enerd->grpp.nener); j++)
915             {
916                 enerd->grpp.ener[i][j] = 0.0;
917             }
918         }
919     }
920     for (i = 0; i < efptNR; i++)
921     {
922         enerd->dvdl_lin[i]    = 0.0;
923         enerd->dvdl_nonlin[i] = 0.0;
924     }
925
926     /* Normal potential energy components */
927     for (i = 0; (i <= F_EPOT); i++)
928     {
929         enerd->term[i] = 0.0;
930     }
931     /* Initialize the dVdlambda term with the long range contribution */
932     /* Initialize the dvdl term with the long range contribution */
933     enerd->term[F_DVDL]            = 0.0;
934     enerd->term[F_DVDL_COUL]       = 0.0;
935     enerd->term[F_DVDL_VDW]        = 0.0;
936     enerd->term[F_DVDL_BONDED]     = 0.0;
937     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
938     enerd->term[F_DKDL]            = 0.0;
939     if (enerd->n_lambda > 0)
940     {
941         for (i = 0; i < enerd->n_lambda; i++)
942         {
943             enerd->enerpart_lambda[i] = 0.0;
944         }
945     }
946     /* reset foreign energy data - separate function since we also call it elsewhere */
947     reset_foreign_enerdata(enerd);
948 }