Extend Force sub-counters
[alexxy/gromacs.git] / src / gromacs / mdlib / force.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "gromacs/legacyheaders/force.h"
40
41 #include "config.h"
42
43 #include <assert.h>
44 #include <math.h>
45 #include <string.h>
46
47 #include "gromacs/domdec/domdec.h"
48 #include "gromacs/ewald/ewald.h"
49 #include "gromacs/ewald/long-range-correction.h"
50 #include "gromacs/ewald/pme.h"
51 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
52 #include "gromacs/legacyheaders/macros.h"
53 #include "gromacs/legacyheaders/mdrun.h"
54 #include "gromacs/legacyheaders/names.h"
55 #include "gromacs/legacyheaders/network.h"
56 #include "gromacs/legacyheaders/nonbonded.h"
57 #include "gromacs/legacyheaders/nrnb.h"
58 #include "gromacs/legacyheaders/ns.h"
59 #include "gromacs/legacyheaders/qmmm.h"
60 #include "gromacs/legacyheaders/txtdump.h"
61 #include "gromacs/legacyheaders/typedefs.h"
62 #include "gromacs/legacyheaders/types/commrec.h"
63 #include "gromacs/listed-forces/listed-forces.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/mdlib/forcerec-threading.h"
66 #include "gromacs/pbcutil/ishift.h"
67 #include "gromacs/pbcutil/mshift.h"
68 #include "gromacs/pbcutil/pbc.h"
69 #include "gromacs/timing/wallcycle.h"
70 #include "gromacs/utility/fatalerror.h"
71 #include "gromacs/utility/smalloc.h"
72
73 void ns(FILE              *fp,
74         t_forcerec        *fr,
75         matrix             box,
76         gmx_groups_t      *groups,
77         gmx_localtop_t    *top,
78         t_mdatoms         *md,
79         t_commrec         *cr,
80         t_nrnb            *nrnb,
81         gmx_bool           bFillGrid,
82         gmx_bool           bDoLongRangeNS)
83 {
84     int     nsearch;
85
86
87     if (!fr->ns.nblist_initialized)
88     {
89         init_neighbor_list(fp, fr, md->homenr);
90     }
91
92     if (fr->bTwinRange)
93     {
94         fr->nlr = 0;
95     }
96
97     nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
98                                 bFillGrid, bDoLongRangeNS);
99     if (debug)
100     {
101         fprintf(debug, "nsearch = %d\n", nsearch);
102     }
103
104     /* Check whether we have to do dynamic load balancing */
105     /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
106        count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
107        &(top->idef),opts->ngener);
108      */
109     if (fr->ns.dump_nl > 0)
110     {
111         dump_nblist(fp, cr, fr, fr->ns.dump_nl);
112     }
113 }
114
115 static void reduce_thread_forces(int n, rvec *f,
116                                  tensor vir_q, tensor vir_lj,
117                                  real *Vcorr_q, real *Vcorr_lj,
118                                  real *dvdl_q, real *dvdl_lj,
119                                  int nthreads, f_thread_t *f_t)
120 {
121     int t, i;
122     int nthreads_loop gmx_unused;
123
124     // cppcheck-suppress unreadVariable
125     nthreads_loop = gmx_omp_nthreads_get(emntBonded);
126     /* This reduction can run over any number of threads */
127 #pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
128     for (i = 0; i < n; i++)
129     {
130         for (t = 1; t < nthreads; t++)
131         {
132             rvec_inc(f[i], f_t[t].f[i]);
133         }
134     }
135     for (t = 1; t < nthreads; t++)
136     {
137         *Vcorr_q  += f_t[t].Vcorr_q;
138         *Vcorr_lj += f_t[t].Vcorr_lj;
139         *dvdl_q   += f_t[t].dvdl[efptCOUL];
140         *dvdl_lj  += f_t[t].dvdl[efptVDW];
141         m_add(vir_q, f_t[t].vir_q, vir_q);
142         m_add(vir_lj, f_t[t].vir_lj, vir_lj);
143     }
144 }
145
146 void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
147                        t_idef     *idef,    t_commrec  *cr,
148                        t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
149                        t_mdatoms  *md,
150                        rvec       x[],      history_t  *hist,
151                        rvec       f[],
152                        rvec       f_longrange[],
153                        gmx_enerdata_t *enerd,
154                        t_fcdata   *fcd,
155                        gmx_localtop_t *top,
156                        gmx_genborn_t *born,
157                        gmx_bool       bBornRadii,
158                        matrix     box,
159                        t_lambda   *fepvals,
160                        real       *lambda,
161                        t_graph    *graph,
162                        t_blocka   *excl,
163                        rvec       mu_tot[],
164                        int        flags,
165                        float      *cycles_pme)
166 {
167     int         i, j;
168     int         donb_flags;
169     gmx_bool    bSB;
170     int         pme_flags;
171     matrix      boxs;
172     rvec        box_size;
173     t_pbc       pbc;
174     real        dvdl_dum[efptNR], dvdl_nb[efptNR];
175
176 #ifdef GMX_MPI
177     double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
178 #endif
179
180     set_pbc(&pbc, fr->ePBC, box);
181
182     /* reset free energy components */
183     for (i = 0; i < efptNR; i++)
184     {
185         dvdl_nb[i]  = 0;
186         dvdl_dum[i] = 0;
187     }
188
189     /* Reset box */
190     for (i = 0; (i < DIM); i++)
191     {
192         box_size[i] = box[i][i];
193     }
194
195     debug_gmx();
196
197     /* do QMMM first if requested */
198     if (fr->bQMMM)
199     {
200         enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
201     }
202
203     /* Call the short range functions all in one go. */
204
205 #ifdef GMX_MPI
206     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
207 #define TAKETIME FALSE
208     if (TAKETIME)
209     {
210         MPI_Barrier(cr->mpi_comm_mygroup);
211         t0 = MPI_Wtime();
212     }
213 #endif
214
215     if (ir->nwall)
216     {
217         /* foreign lambda component for walls */
218         real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
219                                    enerd->grpp.ener[egLJSR], nrnb);
220         enerd->dvdl_lin[efptVDW] += dvdl_walls;
221     }
222
223     /* If doing GB, reset dvda and calculate the Born radii */
224     if (ir->implicit_solvent)
225     {
226         wallcycle_sub_start(wcycle, ewcsNONBONDED);
227
228         for (i = 0; i < born->nr; i++)
229         {
230             fr->dvda[i] = 0;
231         }
232
233         if (bBornRadii)
234         {
235             calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
236         }
237
238         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
239     }
240
241     where();
242     /* We only do non-bonded calculation with group scheme here, the verlet
243      * calls are done from do_force_cutsVERLET(). */
244     if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
245     {
246         donb_flags = 0;
247         /* Add short-range interactions */
248         donb_flags |= GMX_NONBONDED_DO_SR;
249
250         /* Currently all group scheme kernels always calculate (shift-)forces */
251         if (flags & GMX_FORCE_FORCES)
252         {
253             donb_flags |= GMX_NONBONDED_DO_FORCE;
254         }
255         if (flags & GMX_FORCE_VIRIAL)
256         {
257             donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
258         }
259         if (flags & GMX_FORCE_ENERGY)
260         {
261             donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
262         }
263         if (flags & GMX_FORCE_DO_LR)
264         {
265             donb_flags |= GMX_NONBONDED_DO_LR;
266         }
267
268         wallcycle_sub_start(wcycle, ewcsNONBONDED);
269         do_nonbonded(fr, x, f, f_longrange, md, excl,
270                      &enerd->grpp, nrnb,
271                      lambda, dvdl_nb, -1, -1, donb_flags);
272
273         /* If we do foreign lambda and we have soft-core interactions
274          * we have to recalculate the (non-linear) energies contributions.
275          */
276         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
277         {
278             for (i = 0; i < enerd->n_lambda; i++)
279             {
280                 real lam_i[efptNR];
281
282                 for (j = 0; j < efptNR; j++)
283                 {
284                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
285                 }
286                 reset_foreign_enerdata(enerd);
287                 do_nonbonded(fr, x, f, f_longrange, md, excl,
288                              &(enerd->foreign_grpp), nrnb,
289                              lam_i, dvdl_dum, -1, -1,
290                              (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
291                 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
292                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
293             }
294         }
295         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
296         where();
297     }
298
299     /* If we are doing GB, calculate bonded forces and apply corrections
300      * to the solvation forces */
301     /* MRS: Eventually, many need to include free energy contribution here! */
302     if (ir->implicit_solvent)
303     {
304         wallcycle_sub_start(wcycle, ewcsLISTED);
305         calc_gb_forces(cr, md, born, top, x, f, fr, idef,
306                        ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
307         wallcycle_sub_stop(wcycle, ewcsLISTED);
308     }
309
310 #ifdef GMX_MPI
311     if (TAKETIME)
312     {
313         t1          = MPI_Wtime();
314         fr->t_fnbf += t1-t0;
315     }
316 #endif
317
318     if (fepvals->sc_alpha != 0)
319     {
320         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
321     }
322     else
323     {
324         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
325     }
326
327     if (fepvals->sc_alpha != 0)
328
329     /* even though coulomb part is linear, we already added it, beacuse we
330        need to go through the vdw calculation anyway */
331     {
332         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
333     }
334     else
335     {
336         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
337     }
338
339     debug_gmx();
340
341
342     if (debug)
343     {
344         pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
345     }
346
347     /* Shift the coordinates. Must be done before listed forces and PPPM,
348      * but is also necessary for SHAKE and update, therefore it can NOT
349      * go when no listed forces have to be evaluated.
350      *
351      * The shifting and PBC code is deliberately not timed, since with
352      * the Verlet scheme it only takes non-zero time with triclinic
353      * boxes, and even then the time is around a factor of 100 less
354      * than the next smallest counter.
355      */
356
357
358     /* Here sometimes we would not need to shift with NBFonly,
359      * but we do so anyhow for consistency of the returned coordinates.
360      */
361     if (graph)
362     {
363         shift_self(graph, box, x);
364         if (TRICLINIC(box))
365         {
366             inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
367         }
368         else
369         {
370             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
371         }
372     }
373     /* Check whether we need to do listed interactions or correct for exclusions */
374     if (fr->bMolPBC &&
375         ((flags & GMX_FORCE_LISTED)
376          || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
377     {
378         /* TODO There are no electrostatics methods that require this
379            transformation, when using the Verlet scheme, so update the
380            above conditional. */
381         /* Since all atoms are in the rectangular or triclinic unit-cell,
382          * only single box vector shifts (2 in x) are required.
383          */
384         set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
385     }
386     debug_gmx();
387
388     do_force_listed(wcycle, box, ir->fepvals, cr->ms,
389                     idef, (const rvec *) x, hist, f, fr,
390                     &pbc, graph, enerd, nrnb, lambda, md, fcd,
391                     DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
392                     flags);
393
394     where();
395
396     *cycles_pme = 0;
397     clear_mat(fr->vir_el_recip);
398     clear_mat(fr->vir_lj_recip);
399
400     /* Do long-range electrostatics and/or LJ-PME, including related short-range
401      * corrections.
402      */
403     if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
404     {
405         int  status            = 0;
406         real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
407         real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;
408
409         bSB = (ir->nwall == 2);
410         if (bSB)
411         {
412             copy_mat(box, boxs);
413             svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
414             box_size[ZZ] *= ir->wall_ewald_zfac;
415         }
416
417         if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
418         {
419             real dvdl_long_range_correction_q   = 0;
420             real dvdl_long_range_correction_lj  = 0;
421             /* With the Verlet scheme exclusion forces are calculated
422              * in the non-bonded kernel.
423              */
424             /* The TPI molecule does not have exclusions with the rest
425              * of the system and no intra-molecular PME grid
426              * contributions will be calculated in
427              * gmx_pme_calc_energy.
428              */
429             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
430                 ir->ewald_geometry != eewg3D ||
431                 ir->epsilon_surface != 0)
432             {
433                 int nthreads, t;
434
435                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
436
437                 if (fr->n_tpi > 0)
438                 {
439                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
440                 }
441
442                 nthreads = gmx_omp_nthreads_get(emntBonded);
443 #pragma omp parallel for num_threads(nthreads) schedule(static)
444                 for (t = 0; t < nthreads; t++)
445                 {
446                     int     i;
447                     rvec   *fnv;
448                     tensor *vir_q, *vir_lj;
449                     real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
450                     if (t == 0)
451                     {
452                         fnv       = fr->f_novirsum;
453                         vir_q     = &fr->vir_el_recip;
454                         vir_lj    = &fr->vir_lj_recip;
455                         Vcorrt_q  = &Vcorr_q;
456                         Vcorrt_lj = &Vcorr_lj;
457                         dvdlt_q   = &dvdl_long_range_correction_q;
458                         dvdlt_lj  = &dvdl_long_range_correction_lj;
459                     }
460                     else
461                     {
462                         fnv       = fr->f_t[t].f;
463                         vir_q     = &fr->f_t[t].vir_q;
464                         vir_lj    = &fr->f_t[t].vir_lj;
465                         Vcorrt_q  = &fr->f_t[t].Vcorr_q;
466                         Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
467                         dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
468                         dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
469                         for (i = 0; i < fr->natoms_force; i++)
470                         {
471                             clear_rvec(fnv[i]);
472                         }
473                         clear_mat(*vir_q);
474                         clear_mat(*vir_lj);
475                     }
476                     *dvdlt_q  = 0;
477                     *dvdlt_lj = 0;
478
479                     ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
480                                        cr, t, fr,
481                                        md->chargeA, md->chargeB,
482                                        md->sqrt_c6A, md->sqrt_c6B,
483                                        md->sigmaA, md->sigmaB,
484                                        md->sigma3A, md->sigma3B,
485                                        md->nChargePerturbed || md->nTypePerturbed,
486                                        ir->cutoff_scheme != ecutsVERLET,
487                                        excl, x, bSB ? boxs : box, mu_tot,
488                                        ir->ewald_geometry,
489                                        ir->epsilon_surface,
490                                        fnv, *vir_q, *vir_lj,
491                                        Vcorrt_q, Vcorrt_lj,
492                                        lambda[efptCOUL], lambda[efptVDW],
493                                        dvdlt_q, dvdlt_lj);
494                 }
495                 if (nthreads > 1)
496                 {
497                     reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
498                                          fr->vir_el_recip, fr->vir_lj_recip,
499                                          &Vcorr_q, &Vcorr_lj,
500                                          &dvdl_long_range_correction_q,
501                                          &dvdl_long_range_correction_lj,
502                                          nthreads, fr->f_t);
503                 }
504                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
505             }
506
507             if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
508             {
509                 /* This is not in a subcounter because it takes a
510                    negligible and constant-sized amount of time */
511                 Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
512                                                    &dvdl_long_range_correction_q,
513                                                    fr->vir_el_recip);
514             }
515
516             enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
517             enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;
518
519             if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
520             {
521                 /* Do reciprocal PME for Coulomb and/or LJ. */
522                 assert(fr->n_tpi >= 0);
523                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
524                 {
525                     pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
526                     if (EEL_PME(fr->eeltype))
527                     {
528                         pme_flags     |= GMX_PME_DO_COULOMB;
529                     }
530                     if (EVDW_PME(fr->vdwtype))
531                     {
532                         pme_flags |= GMX_PME_DO_LJ;
533                     }
534                     if (flags & GMX_FORCE_FORCES)
535                     {
536                         pme_flags |= GMX_PME_CALC_F;
537                     }
538                     if (flags & GMX_FORCE_VIRIAL)
539                     {
540                         pme_flags |= GMX_PME_CALC_ENER_VIR;
541                     }
542                     if (fr->n_tpi > 0)
543                     {
544                         /* We don't calculate f, but we do want the potential */
545                         pme_flags |= GMX_PME_CALC_POT;
546                     }
547                     wallcycle_start(wcycle, ewcPMEMESH);
548                     status = gmx_pme_do(fr->pmedata,
549                                         0, md->homenr - fr->n_tpi,
550                                         x, fr->f_novirsum,
551                                         md->chargeA, md->chargeB,
552                                         md->sqrt_c6A, md->sqrt_c6B,
553                                         md->sigmaA, md->sigmaB,
554                                         bSB ? boxs : box, cr,
555                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
556                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
557                                         nrnb, wcycle,
558                                         fr->vir_el_recip, fr->ewaldcoeff_q,
559                                         fr->vir_lj_recip, fr->ewaldcoeff_lj,
560                                         &Vlr_q, &Vlr_lj,
561                                         lambda[efptCOUL], lambda[efptVDW],
562                                         &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
563                     *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
564                     if (status != 0)
565                     {
566                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
567                     }
568                     /* We should try to do as little computation after
569                      * this as possible, because parallel PME synchronizes
570                      * the nodes, so we want all load imbalance of the
571                      * rest of the force calculation to be before the PME
572                      * call.  DD load balancing is done on the whole time
573                      * of the force call (without PME).
574                      */
575                 }
576                 if (fr->n_tpi > 0)
577                 {
578                     if (EVDW_PME(ir->vdwtype))
579                     {
580
581                         gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
582                     }
583                     /* Determine the PME grid energy of the test molecule
584                      * with the PME grid potential of the other charges.
585                      */
586                     gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
587                                         x + md->homenr - fr->n_tpi,
588                                         md->chargeA + md->homenr - fr->n_tpi,
589                                         &Vlr_q);
590                 }
591             }
592         }
593
594         if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
595         {
596             Vlr_q = do_ewald(ir, x, fr->f_novirsum,
597                              md->chargeA, md->chargeB,
598                              box_size, cr, md->homenr,
599                              fr->vir_el_recip, fr->ewaldcoeff_q,
600                              lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
601         }
602
603         /* Note that with separate PME nodes we get the real energies later */
604         enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
605         enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
606         enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
607         enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
608         if (debug)
609         {
610             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
611                     Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
612             pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
613             pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
614             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
615                     Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
616             pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
617         }
618     }
619     else
620     {
621         /* Is there a reaction-field exclusion correction needed? */
622         if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
623         {
624             /* With the Verlet scheme, exclusion forces are calculated
625              * in the non-bonded kernel.
626              */
627             if (ir->cutoff_scheme != ecutsVERLET)
628             {
629                 real dvdl_rf_excl      = 0;
630                 enerd->term[F_RF_EXCL] =
631                     RF_excl_correction(fr, graph, md, excl, x, f,
632                                        fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
633
634                 enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
635             }
636         }
637     }
638     where();
639     debug_gmx();
640
641     if (debug)
642     {
643         print_nrnb(debug, nrnb);
644     }
645     debug_gmx();
646
647 #ifdef GMX_MPI
648     if (TAKETIME)
649     {
650         t2 = MPI_Wtime();
651         MPI_Barrier(cr->mpi_comm_mygroup);
652         t3          = MPI_Wtime();
653         fr->t_wait += t3-t2;
654         if (fr->timesteps == 11)
655         {
656             char buf[22];
657             fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
658                     cr->nodeid, gmx_step_str(fr->timesteps, buf),
659                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
660                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
661         }
662         fr->timesteps++;
663     }
664 #endif
665
666     if (debug)
667     {
668         pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
669     }
670
671 }
672
673 void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
674 {
675     int i, n2;
676
677     for (i = 0; i < F_NRE; i++)
678     {
679         enerd->term[i]         = 0;
680         enerd->foreign_term[i] = 0;
681     }
682
683
684     for (i = 0; i < efptNR; i++)
685     {
686         enerd->dvdl_lin[i]     = 0;
687         enerd->dvdl_nonlin[i]  = 0;
688     }
689
690     n2 = ngener*ngener;
691     if (debug)
692     {
693         fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
694     }
695     enerd->grpp.nener         = n2;
696     enerd->foreign_grpp.nener = n2;
697     for (i = 0; (i < egNR); i++)
698     {
699         snew(enerd->grpp.ener[i], n2);
700         snew(enerd->foreign_grpp.ener[i], n2);
701     }
702
703     if (n_lambda)
704     {
705         enerd->n_lambda = 1 + n_lambda;
706         snew(enerd->enerpart_lambda, enerd->n_lambda);
707     }
708     else
709     {
710         enerd->n_lambda = 0;
711     }
712 }
713
714 void destroy_enerdata(gmx_enerdata_t *enerd)
715 {
716     int i;
717
718     for (i = 0; (i < egNR); i++)
719     {
720         sfree(enerd->grpp.ener[i]);
721     }
722
723     for (i = 0; (i < egNR); i++)
724     {
725         sfree(enerd->foreign_grpp.ener[i]);
726     }
727
728     if (enerd->n_lambda)
729     {
730         sfree(enerd->enerpart_lambda);
731     }
732 }
733
734 static real sum_v(int n, real v[])
735 {
736     real t;
737     int  i;
738
739     t = 0.0;
740     for (i = 0; (i < n); i++)
741     {
742         t = t + v[i];
743     }
744
745     return t;
746 }
747
748 void sum_epot(gmx_grppairener_t *grpp, real *epot)
749 {
750     int i;
751
752     /* Accumulate energies */
753     epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
754     epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
755     epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
756     epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
757     epot[F_COUL_LR]  = sum_v(grpp->nener, grpp->ener[egCOULLR]);
758     epot[F_LJ_LR]    = sum_v(grpp->nener, grpp->ener[egLJLR]);
759     /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
760     epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
761
762 /* lattice part of LR doesnt belong to any group
763  * and has been added earlier
764  */
765     epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
766     epot[F_BHAM_LR]  = sum_v(grpp->nener, grpp->ener[egBHAMLR]);
767
768     epot[F_EPOT] = 0;
769     for (i = 0; (i < F_EPOT); i++)
770     {
771         if (i != F_DISRESVIOL && i != F_ORIRESDEV)
772         {
773             epot[F_EPOT] += epot[i];
774         }
775     }
776 }
777
778 void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
779 {
780     int    i, j, index;
781     double dlam;
782
783     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
784     enerd->term[F_DVDL]       = 0.0;
785     for (i = 0; i < efptNR; i++)
786     {
787         if (fepvals->separate_dvdl[i])
788         {
789             /* could this be done more readably/compactly? */
790             switch (i)
791             {
792                 case (efptMASS):
793                     index = F_DKDL;
794                     break;
795                 case (efptCOUL):
796                     index = F_DVDL_COUL;
797                     break;
798                 case (efptVDW):
799                     index = F_DVDL_VDW;
800                     break;
801                 case (efptBONDED):
802                     index = F_DVDL_BONDED;
803                     break;
804                 case (efptRESTRAINT):
805                     index = F_DVDL_RESTRAINT;
806                     break;
807                 default:
808                     index = F_DVDL;
809                     break;
810             }
811             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
812             if (debug)
813             {
814                 fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
815                         efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
816             }
817         }
818         else
819         {
820             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
821             if (debug)
822             {
823                 fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
824                         efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
825             }
826         }
827     }
828
829     /* Notes on the foreign lambda free energy difference evaluation:
830      * Adding the potential and ekin terms that depend linearly on lambda
831      * as delta lam * dvdl to the energy differences is exact.
832      * For the constraints this is not exact, but we have no other option
833      * without literally changing the lengths and reevaluating the energies at each step.
834      * (try to remedy this post 4.6 - MRS)
835      * For the non-bonded LR term we assume that the soft-core (if present)
836      * no longer affects the energy beyond the short-range cut-off,
837      * which is a very good approximation (except for exotic settings).
838      * (investigate how to overcome this post 4.6 - MRS)
839      */
840     if (fepvals->separate_dvdl[efptBONDED])
841     {
842         enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
843     }
844     else
845     {
846         enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
847     }
848     enerd->term[F_DVDL_CONSTR] = 0;
849
850     for (i = 0; i < fepvals->n_lambda; i++)
851     {
852         /* note we are iterating over fepvals here!
853            For the current lam, dlam = 0 automatically,
854            so we don't need to add anything to the
855            enerd->enerpart_lambda[0] */
856
857         /* we don't need to worry about dvdl_lin contributions to dE at
858            current lambda, because the contributions to the current
859            lambda are automatically zeroed */
860
861         for (j = 0; j < efptNR; j++)
862         {
863             /* Note that this loop is over all dhdl components, not just the separated ones */
864             dlam = (fepvals->all_lambda[j][i]-lambda[j]);
865             enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
866             if (debug)
867             {
868                 fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
869                         fepvals->all_lambda[j][i], efpt_names[j],
870                         (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
871                         dlam, enerd->dvdl_lin[j]);
872             }
873         }
874     }
875 }
876
877
878 void reset_foreign_enerdata(gmx_enerdata_t *enerd)
879 {
880     int  i, j;
881
882     /* First reset all foreign energy components.  Foreign energies always called on
883        neighbor search steps */
884     for (i = 0; (i < egNR); i++)
885     {
886         for (j = 0; (j < enerd->grpp.nener); j++)
887         {
888             enerd->foreign_grpp.ener[i][j] = 0.0;
889         }
890     }
891
892     /* potential energy components */
893     for (i = 0; (i <= F_EPOT); i++)
894     {
895         enerd->foreign_term[i] = 0.0;
896     }
897 }
898
899 void reset_enerdata(t_forcerec *fr, gmx_bool bNS,
900                     gmx_enerdata_t *enerd,
901                     gmx_bool bMaster)
902 {
903     gmx_bool bKeepLR;
904     int      i, j;
905
906     /* First reset all energy components, except for the long range terms
907      * on the master at non neighbor search steps, since the long range
908      * terms have already been summed at the last neighbor search step.
909      */
910     bKeepLR = (fr->bTwinRange && !bNS);
911     for (i = 0; (i < egNR); i++)
912     {
913         if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR)))
914         {
915             for (j = 0; (j < enerd->grpp.nener); j++)
916             {
917                 enerd->grpp.ener[i][j] = 0.0;
918             }
919         }
920     }
921     for (i = 0; i < efptNR; i++)
922     {
923         enerd->dvdl_lin[i]    = 0.0;
924         enerd->dvdl_nonlin[i] = 0.0;
925     }
926
927     /* Normal potential energy components */
928     for (i = 0; (i <= F_EPOT); i++)
929     {
930         enerd->term[i] = 0.0;
931     }
932     /* Initialize the dVdlambda term with the long range contribution */
933     /* Initialize the dvdl term with the long range contribution */
934     enerd->term[F_DVDL]            = 0.0;
935     enerd->term[F_DVDL_COUL]       = 0.0;
936     enerd->term[F_DVDL_VDW]        = 0.0;
937     enerd->term[F_DVDL_BONDED]     = 0.0;
938     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
939     enerd->term[F_DKDL]            = 0.0;
940     if (enerd->n_lambda > 0)
941     {
942         for (i = 0; i < enerd->n_lambda; i++)
943         {
944             enerd->enerpart_lambda[i] = 0.0;
945         }
946     }
947     /* reset foreign energy data - separate function since we also call it elsewhere */
948     reset_foreign_enerdata(enerd);
949 }