Revert "Eliminate mdlib/mdrun.h"
[alexxy/gromacs.git] / src / gromacs / mdlib / force.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37 #include "gmxpre.h"
38
39 #include "force.h"
40
41 #include "config.h"
42
43 #include <cassert>
44 #include <cmath>
45 #include <cstring>
46
47 #include "gromacs/domdec/dlbtiming.h"
48 #include "gromacs/domdec/domdec.h"
49 #include "gromacs/domdec/domdec_struct.h"
50 #include "gromacs/ewald/ewald.h"
51 #include "gromacs/ewald/long_range_correction.h"
52 #include "gromacs/ewald/pme.h"
53 #include "gromacs/gmxlib/network.h"
54 #include "gromacs/gmxlib/nrnb.h"
55 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
56 #include "gromacs/listed_forces/listed_forces.h"
57 #include "gromacs/math/vec.h"
58 #include "gromacs/math/vecdump.h"
59 #include "gromacs/mdlib/force_flags.h"
60 #include "gromacs/mdlib/forcerec_threading.h"
61 #include "gromacs/mdlib/mdrun.h"
62 #include "gromacs/mdlib/ns.h"
63 #include "gromacs/mdlib/qmmm.h"
64 #include "gromacs/mdlib/rf_util.h"
65 #include "gromacs/mdlib/wall.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/mdtypes/enerdata.h"
68 #include "gromacs/mdtypes/forceoutput.h"
69 #include "gromacs/mdtypes/forcerec.h"
70 #include "gromacs/mdtypes/inputrec.h"
71 #include "gromacs/mdtypes/md_enums.h"
72 #include "gromacs/pbcutil/ishift.h"
73 #include "gromacs/pbcutil/mshift.h"
74 #include "gromacs/pbcutil/pbc.h"
75 #include "gromacs/timing/wallcycle.h"
76 #include "gromacs/utility/cstringutil.h"
77 #include "gromacs/utility/exceptions.h"
78 #include "gromacs/utility/fatalerror.h"
79 #include "gromacs/utility/smalloc.h"
80
81 void ns(FILE               *fp,
82         t_forcerec         *fr,
83         matrix              box,
84         const gmx_groups_t *groups,
85         gmx_localtop_t     *top,
86         const t_mdatoms    *md,
87         const t_commrec    *cr,
88         t_nrnb             *nrnb,
89         gmx_bool            bFillGrid)
90 {
91     int     nsearch;
92
93
94     if (!fr->ns->nblist_initialized)
95     {
96         init_neighbor_list(fp, fr, md->homenr);
97     }
98
99     nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
100                                 bFillGrid);
101     if (debug)
102     {
103         fprintf(debug, "nsearch = %d\n", nsearch);
104     }
105
106     /* Check whether we have to do dynamic load balancing */
107     /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
108        count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
109        &(top->idef),opts->ngener);
110      */
111     if (fr->ns->dump_nl > 0)
112     {
113         dump_nblist(fp, cr, fr, fr->ns->dump_nl);
114     }
115 }
116
117 static void clearEwaldThreadOutput(ewald_corr_thread_t *ewc_t)
118 {
119     ewc_t->Vcorr_q        = 0;
120     ewc_t->Vcorr_lj       = 0;
121     ewc_t->dvdl[efptCOUL] = 0;
122     ewc_t->dvdl[efptVDW]  = 0;
123     clear_mat(ewc_t->vir_q);
124     clear_mat(ewc_t->vir_lj);
125 }
126
127 static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t *ewc_t)
128 {
129     ewald_corr_thread_t &dest = ewc_t[0];
130
131     for (int t = 1; t < nthreads; t++)
132     {
133         dest.Vcorr_q        += ewc_t[t].Vcorr_q;
134         dest.Vcorr_lj       += ewc_t[t].Vcorr_lj;
135         dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
136         dest.dvdl[efptVDW]  += ewc_t[t].dvdl[efptVDW];
137         m_add(dest.vir_q,  ewc_t[t].vir_q,  dest.vir_q);
138         m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
139     }
140 }
141
142 void do_force_lowlevel(t_forcerec                   *fr,
143                        const t_inputrec             *ir,
144                        const t_idef                 *idef,
145                        const t_commrec              *cr,
146                        const gmx_multisim_t         *ms,
147                        t_nrnb                       *nrnb,
148                        gmx_wallcycle_t               wcycle,
149                        const t_mdatoms              *md,
150                        rvec                          x[],
151                        history_t                    *hist,
152                        rvec                         *forceForUseWithShiftForces,
153                        gmx::ForceWithVirial         *forceWithVirial,
154                        gmx_enerdata_t               *enerd,
155                        t_fcdata                     *fcd,
156                        matrix                        box,
157                        t_lambda                     *fepvals,
158                        real                         *lambda,
159                        const t_graph                *graph,
160                        const t_blocka               *excl,
161                        rvec                          mu_tot[],
162                        int                           flags,
163                        float                        *cycles_pme,
164                        const DDBalanceRegionHandler &ddBalanceRegionHandler)
165 {
166     int         i, j;
167     int         donb_flags;
168     int         pme_flags;
169     t_pbc       pbc;
170     real        dvdl_dum[efptNR], dvdl_nb[efptNR];
171
172 #if GMX_MPI
173     double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
174 #endif
175
176     set_pbc(&pbc, fr->ePBC, box);
177
178     /* reset free energy components */
179     for (i = 0; i < efptNR; i++)
180     {
181         dvdl_nb[i]  = 0;
182         dvdl_dum[i] = 0;
183     }
184
185     /* do QMMM first if requested */
186     if (fr->bQMMM)
187     {
188         enerd->term[F_EQM] = calculate_QMMM(cr, forceForUseWithShiftForces, fr);
189     }
190
191     /* Call the short range functions all in one go. */
192
193 #if GMX_MPI
194     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
195 #define TAKETIME FALSE
196     if (TAKETIME)
197     {
198         MPI_Barrier(cr->mpi_comm_mygroup);
199         t0 = MPI_Wtime();
200     }
201 #endif
202
203     if (ir->nwall)
204     {
205         /* foreign lambda component for walls */
206         real dvdl_walls = do_walls(*ir, *fr, box, *md, x,
207                                    forceWithVirial, lambda[efptVDW],
208                                    enerd->grpp.ener[egLJSR], nrnb);
209         enerd->dvdl_lin[efptVDW] += dvdl_walls;
210     }
211
212     /* We only do non-bonded calculation with group scheme here, the verlet
213      * calls are done from do_force_cutsVERLET(). */
214     if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
215     {
216         donb_flags = 0;
217         /* Add short-range interactions */
218         donb_flags |= GMX_NONBONDED_DO_SR;
219
220         /* Currently all group scheme kernels always calculate (shift-)forces */
221         if (flags & GMX_FORCE_FORCES)
222         {
223             donb_flags |= GMX_NONBONDED_DO_FORCE;
224         }
225         if (flags & GMX_FORCE_VIRIAL)
226         {
227             donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
228         }
229         if (flags & GMX_FORCE_ENERGY)
230         {
231             donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
232         }
233
234         wallcycle_sub_start(wcycle, ewcsNONBONDED);
235         do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl,
236                      &enerd->grpp, nrnb,
237                      lambda, dvdl_nb, -1, -1, donb_flags);
238
239         /* If we do foreign lambda and we have soft-core interactions
240          * we have to recalculate the (non-linear) energies contributions.
241          */
242         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
243         {
244             for (i = 0; i < enerd->n_lambda; i++)
245             {
246                 real lam_i[efptNR];
247
248                 for (j = 0; j < efptNR; j++)
249                 {
250                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
251                 }
252                 reset_foreign_enerdata(enerd);
253                 do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl,
254                              &(enerd->foreign_grpp), nrnb,
255                              lam_i, dvdl_dum, -1, -1,
256                              (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
257                 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
258                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
259             }
260         }
261         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
262     }
263
264 #if GMX_MPI
265     if (TAKETIME)
266     {
267         t1          = MPI_Wtime();
268         fr->t_fnbf += t1-t0;
269     }
270 #endif
271
272     if (fepvals->sc_alpha != 0)
273     {
274         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
275     }
276     else
277     {
278         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
279     }
280
281     if (fepvals->sc_alpha != 0)
282
283     /* even though coulomb part is linear, we already added it, beacuse we
284        need to go through the vdw calculation anyway */
285     {
286         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
287     }
288     else
289     {
290         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
291     }
292
293     if (debug)
294     {
295         pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
296     }
297
298     /* Shift the coordinates. Must be done before listed forces and PPPM,
299      * but is also necessary for SHAKE and update, therefore it can NOT
300      * go when no listed forces have to be evaluated.
301      *
302      * The shifting and PBC code is deliberately not timed, since with
303      * the Verlet scheme it only takes non-zero time with triclinic
304      * boxes, and even then the time is around a factor of 100 less
305      * than the next smallest counter.
306      */
307
308
309     /* Here sometimes we would not need to shift with NBFonly,
310      * but we do so anyhow for consistency of the returned coordinates.
311      */
312     if (graph)
313     {
314         shift_self(graph, box, x);
315         if (TRICLINIC(box))
316         {
317             inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
318         }
319         else
320         {
321             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
322         }
323     }
324     /* Check whether we need to do listed interactions or correct for exclusions */
325     if (fr->bMolPBC &&
326         ((flags & GMX_FORCE_LISTED)
327          || EEL_RF(fr->ic->eeltype) || EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)))
328     {
329         /* TODO There are no electrostatics methods that require this
330            transformation, when using the Verlet scheme, so update the
331            above conditional. */
332         /* Since all atoms are in the rectangular or triclinic unit-cell,
333          * only single box vector shifts (2 in x) are required.
334          */
335         set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr,
336                    TRUE, box);
337     }
338
339     do_force_listed(wcycle, box, ir->fepvals, cr, ms,
340                     idef, x, hist,
341                     forceForUseWithShiftForces, forceWithVirial,
342                     fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
343                     DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr,
344                     flags);
345
346
347     *cycles_pme = 0;
348
349     /* Do long-range electrostatics and/or LJ-PME, including related short-range
350      * corrections.
351      */
352     if (EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
353     {
354         int  status            = 0;
355         real Vlr_q             = 0, Vlr_lj = 0;
356
357         /* We reduce all virial, dV/dlambda and energy contributions, except
358          * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
359          */
360         ewald_corr_thread_t &ewaldOutput = fr->ewc_t[0];
361         clearEwaldThreadOutput(&ewaldOutput);
362
363         if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
364         {
365             /* With the Verlet scheme exclusion forces are calculated
366              * in the non-bonded kernel.
367              */
368             /* The TPI molecule does not have exclusions with the rest
369              * of the system and no intra-molecular PME grid
370              * contributions will be calculated in
371              * gmx_pme_calc_energy.
372              */
373             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
374                 ir->ewald_geometry != eewg3D ||
375                 ir->epsilon_surface != 0)
376             {
377                 int nthreads, t;
378
379                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
380
381                 if (fr->n_tpi > 0)
382                 {
383                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
384                 }
385
386                 nthreads = fr->nthread_ewc;
387 #pragma omp parallel for num_threads(nthreads) schedule(static)
388                 for (t = 0; t < nthreads; t++)
389                 {
390                     try
391                     {
392                         ewald_corr_thread_t &ewc_t = fr->ewc_t[t];
393                         if (t > 0)
394                         {
395                             clearEwaldThreadOutput(&ewc_t);
396                         }
397
398                         /* Threading is only supported with the Verlet cut-off
399                          * scheme and then only single particle forces (no
400                          * exclusion forces) are calculated, so we can store
401                          * the forces in the normal, single forceWithVirial->force_ array.
402                          */
403                         ewald_LRcorrection(md->homenr, cr, nthreads, t, fr, ir,
404                                            md->chargeA, md->chargeB,
405                                            md->sqrt_c6A, md->sqrt_c6B,
406                                            md->sigmaA, md->sigmaB,
407                                            md->sigma3A, md->sigma3B,
408                                            (md->nChargePerturbed != 0) || (md->nTypePerturbed != 0),
409                                            ir->cutoff_scheme != ecutsVERLET,
410                                            excl, x, box, mu_tot,
411                                            ir->ewald_geometry,
412                                            ir->epsilon_surface,
413                                            as_rvec_array(forceWithVirial->force_.data()),
414                                            ewc_t.vir_q, ewc_t.vir_lj,
415                                            &ewc_t.Vcorr_q, &ewc_t.Vcorr_lj,
416                                            lambda[efptCOUL], lambda[efptVDW],
417                                            &ewc_t.dvdl[efptCOUL], &ewc_t.dvdl[efptVDW]);
418                     }
419                     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
420                 }
421                 if (nthreads > 1)
422                 {
423                     reduceEwaldThreadOuput(nthreads, fr->ewc_t);
424                 }
425                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
426             }
427
428             if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
429             {
430                 /* This is not in a subcounter because it takes a
431                    negligible and constant-sized amount of time */
432                 ewaldOutput.Vcorr_q +=
433                     ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
434                                             &ewaldOutput.dvdl[efptCOUL],
435                                             ewaldOutput.vir_q);
436             }
437
438             if ((EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) &&
439                 thisRankHasDuty(cr, DUTY_PME) && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU))
440             {
441                 /* Do reciprocal PME for Coulomb and/or LJ. */
442                 assert(fr->n_tpi >= 0);
443                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
444                 {
445                     pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
446
447                     if (flags & GMX_FORCE_FORCES)
448                     {
449                         pme_flags |= GMX_PME_CALC_F;
450                     }
451                     if (flags & GMX_FORCE_VIRIAL)
452                     {
453                         pme_flags |= GMX_PME_CALC_ENER_VIR;
454                     }
455                     if (fr->n_tpi > 0)
456                     {
457                         /* We don't calculate f, but we do want the potential */
458                         pme_flags |= GMX_PME_CALC_POT;
459                     }
460
461                     /* With domain decomposition we close the CPU side load
462                      * balancing region here, because PME does global
463                      * communication that acts as a global barrier.
464                      */
465                     ddBalanceRegionHandler.closeAfterForceComputationCpu();
466
467                     wallcycle_start(wcycle, ewcPMEMESH);
468                     status = gmx_pme_do(fr->pmedata,
469                                         0, md->homenr - fr->n_tpi,
470                                         x,
471                                         as_rvec_array(forceWithVirial->force_.data()),
472                                         md->chargeA, md->chargeB,
473                                         md->sqrt_c6A, md->sqrt_c6B,
474                                         md->sigmaA, md->sigmaB,
475                                         box, cr,
476                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
477                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
478                                         nrnb, wcycle,
479                                         ewaldOutput.vir_q, ewaldOutput.vir_lj,
480                                         &Vlr_q, &Vlr_lj,
481                                         lambda[efptCOUL], lambda[efptVDW],
482                                         &ewaldOutput.dvdl[efptCOUL],
483                                         &ewaldOutput.dvdl[efptVDW],
484                                         pme_flags);
485                     *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
486                     if (status != 0)
487                     {
488                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
489                     }
490
491                     /* We should try to do as little computation after
492                      * this as possible, because parallel PME synchronizes
493                      * the nodes, so we want all load imbalance of the
494                      * rest of the force calculation to be before the PME
495                      * call.  DD load balancing is done on the whole time
496                      * of the force call (without PME).
497                      */
498                 }
499                 if (fr->n_tpi > 0)
500                 {
501                     if (EVDW_PME(ir->vdwtype))
502                     {
503
504                         gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
505                     }
506                     /* Determine the PME grid energy of the test molecule
507                      * with the PME grid potential of the other charges.
508                      */
509                     gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
510                                         x + md->homenr - fr->n_tpi,
511                                         md->chargeA + md->homenr - fr->n_tpi,
512                                         &Vlr_q);
513                 }
514             }
515         }
516
517         if (!EEL_PME(fr->ic->eeltype) && EEL_PME_EWALD(fr->ic->eeltype))
518         {
519             Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial->force_.data()),
520                              md->chargeA, md->chargeB,
521                              box, cr, md->homenr,
522                              ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
523                              lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL],
524                              fr->ewald_table);
525         }
526
527         /* Note that with separate PME nodes we get the real energies later */
528         // TODO it would be simpler if we just accumulated a single
529         // long-range virial contribution.
530         forceWithVirial->addVirialContribution(ewaldOutput.vir_q);
531         forceWithVirial->addVirialContribution(ewaldOutput.vir_lj);
532         enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
533         enerd->dvdl_lin[efptVDW]  += ewaldOutput.dvdl[efptVDW];
534         enerd->term[F_COUL_RECIP]  = Vlr_q + ewaldOutput.Vcorr_q;
535         enerd->term[F_LJ_RECIP]    = Vlr_lj + ewaldOutput.Vcorr_lj;
536
537         if (debug)
538         {
539             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
540                     Vlr_q, ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
541             pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
542             pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
543             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
544                     Vlr_lj, ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
545             pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
546         }
547     }
548     else
549     {
550         /* Is there a reaction-field exclusion correction needed?
551          * With the Verlet scheme, exclusion forces are calculated
552          * in the non-bonded kernel.
553          */
554         if (ir->cutoff_scheme != ecutsVERLET && EEL_RF(fr->ic->eeltype))
555         {
556             real dvdl_rf_excl      = 0;
557             enerd->term[F_RF_EXCL] =
558                 RF_excl_correction(fr, graph, md, excl, DOMAINDECOMP(cr),
559                                    x, forceForUseWithShiftForces,
560                                    fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
561
562             enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
563         }
564     }
565
566     if (debug)
567     {
568         print_nrnb(debug, nrnb);
569     }
570
571 #if GMX_MPI
572     if (TAKETIME)
573     {
574         t2 = MPI_Wtime();
575         MPI_Barrier(cr->mpi_comm_mygroup);
576         t3          = MPI_Wtime();
577         fr->t_wait += t3-t2;
578         if (fr->timesteps == 11)
579         {
580             char buf[22];
581             fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
582                     cr->nodeid, gmx_step_str(fr->timesteps, buf),
583                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
584                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
585         }
586         fr->timesteps++;
587     }
588 #endif
589
590     if (debug)
591     {
592         pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
593     }
594
595 }
596
597 void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
598 {
599     int i, n2;
600
601     for (i = 0; i < F_NRE; i++)
602     {
603         enerd->term[i]         = 0;
604         enerd->foreign_term[i] = 0;
605     }
606
607
608     for (i = 0; i < efptNR; i++)
609     {
610         enerd->dvdl_lin[i]     = 0;
611         enerd->dvdl_nonlin[i]  = 0;
612     }
613
614     n2 = ngener*ngener;
615     if (debug)
616     {
617         fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
618     }
619     enerd->grpp.nener         = n2;
620     enerd->foreign_grpp.nener = n2;
621     for (i = 0; (i < egNR); i++)
622     {
623         snew(enerd->grpp.ener[i], n2);
624         snew(enerd->foreign_grpp.ener[i], n2);
625     }
626
627     if (n_lambda)
628     {
629         enerd->n_lambda = 1 + n_lambda;
630         snew(enerd->enerpart_lambda, enerd->n_lambda);
631     }
632     else
633     {
634         enerd->n_lambda = 0;
635     }
636 }
637
638 void destroy_enerdata(gmx_enerdata_t *enerd)
639 {
640     int i;
641
642     for (i = 0; (i < egNR); i++)
643     {
644         sfree(enerd->grpp.ener[i]);
645     }
646
647     for (i = 0; (i < egNR); i++)
648     {
649         sfree(enerd->foreign_grpp.ener[i]);
650     }
651
652     if (enerd->n_lambda)
653     {
654         sfree(enerd->enerpart_lambda);
655     }
656 }
657
658 static real sum_v(int n, const real v[])
659 {
660     real t;
661     int  i;
662
663     t = 0.0;
664     for (i = 0; (i < n); i++)
665     {
666         t = t + v[i];
667     }
668
669     return t;
670 }
671
672 void sum_epot(gmx_grppairener_t *grpp, real *epot)
673 {
674     int i;
675
676     /* Accumulate energies */
677     epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
678     epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
679     epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
680     epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
681
682 /* lattice part of LR doesnt belong to any group
683  * and has been added earlier
684  */
685     epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
686
687     epot[F_EPOT] = 0;
688     for (i = 0; (i < F_EPOT); i++)
689     {
690         if (i != F_DISRESVIOL && i != F_ORIRESDEV)
691         {
692             epot[F_EPOT] += epot[i];
693         }
694     }
695 }
696
697 void sum_dhdl(gmx_enerdata_t *enerd, gmx::ArrayRef<const real> lambda, t_lambda *fepvals)
698 {
699     int    index;
700
701     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
702     enerd->term[F_DVDL]       = 0.0;
703     for (int i = 0; i < efptNR; i++)
704     {
705         if (fepvals->separate_dvdl[i])
706         {
707             /* could this be done more readably/compactly? */
708             switch (i)
709             {
710                 case (efptMASS):
711                     index = F_DKDL;
712                     break;
713                 case (efptCOUL):
714                     index = F_DVDL_COUL;
715                     break;
716                 case (efptVDW):
717                     index = F_DVDL_VDW;
718                     break;
719                 case (efptBONDED):
720                     index = F_DVDL_BONDED;
721                     break;
722                 case (efptRESTRAINT):
723                     index = F_DVDL_RESTRAINT;
724                     break;
725                 default:
726                     index = F_DVDL;
727                     break;
728             }
729             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
730             if (debug)
731             {
732                 fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
733                         efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
734             }
735         }
736         else
737         {
738             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
739             if (debug)
740             {
741                 fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
742                         efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
743             }
744         }
745     }
746
747     if (fepvals->separate_dvdl[efptBONDED])
748     {
749         enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
750     }
751     else
752     {
753         enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
754     }
755
756     for (int i = 0; i < fepvals->n_lambda; i++)
757     {
758         /* note we are iterating over fepvals here!
759            For the current lam, dlam = 0 automatically,
760            so we don't need to add anything to the
761            enerd->enerpart_lambda[0] */
762
763         /* we don't need to worry about dvdl_lin contributions to dE at
764            current lambda, because the contributions to the current
765            lambda are automatically zeroed */
766
767         double &enerpart_lambda = enerd->enerpart_lambda[i + 1];
768
769         for (gmx::index j = 0; j < lambda.ssize(); j++)
770         {
771             /* Note that this loop is over all dhdl components, not just the separated ones */
772             const double dlam  = fepvals->all_lambda[j][i] - lambda[j];
773
774             enerpart_lambda   += dlam*enerd->dvdl_lin[j];
775
776             /* Constraints can not be evaluated at foreign lambdas, so we add
777              * a linear extrapolation. This is an approximation, but usually
778              * quite accurate since constraints change little between lambdas.
779              */
780             if ((j == efptBONDED && fepvals->separate_dvdl[efptBONDED]) ||
781                 (j == efptFEP && !fepvals->separate_dvdl[efptBONDED]))
782             {
783                 enerpart_lambda += dlam*enerd->term[F_DVDL_CONSTR];
784             }
785
786             if (j == efptMASS && !fepvals->separate_dvdl[j])
787             {
788                 enerpart_lambda += dlam*enerd->term[F_DKDL];
789             }
790
791             if (debug)
792             {
793                 fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
794                         fepvals->all_lambda[j][i], efpt_names[j],
795                         enerpart_lambda - enerd->enerpart_lambda[0],
796                         dlam, enerd->dvdl_lin[j]);
797             }
798         }
799     }
800
801     /* The constrain contribution is now included in other terms, so clear it */
802     enerd->term[F_DVDL_CONSTR] = 0;
803 }
804
805
806 void reset_foreign_enerdata(gmx_enerdata_t *enerd)
807 {
808     int  i, j;
809
810     /* First reset all foreign energy components.  Foreign energies always called on
811        neighbor search steps */
812     for (i = 0; (i < egNR); i++)
813     {
814         for (j = 0; (j < enerd->grpp.nener); j++)
815         {
816             enerd->foreign_grpp.ener[i][j] = 0.0;
817         }
818     }
819
820     /* potential energy components */
821     for (i = 0; (i <= F_EPOT); i++)
822     {
823         enerd->foreign_term[i] = 0.0;
824     }
825 }
826
827 void reset_enerdata(gmx_enerdata_t *enerd)
828 {
829     int      i, j;
830
831     /* First reset all energy components. */
832     for (i = 0; (i < egNR); i++)
833     {
834         for (j = 0; (j < enerd->grpp.nener); j++)
835         {
836             enerd->grpp.ener[i][j] = 0.0;
837         }
838     }
839     for (i = 0; i < efptNR; i++)
840     {
841         enerd->dvdl_lin[i]    = 0.0;
842         enerd->dvdl_nonlin[i] = 0.0;
843     }
844
845     /* Normal potential energy components */
846     for (i = 0; (i <= F_EPOT); i++)
847     {
848         enerd->term[i] = 0.0;
849     }
850     enerd->term[F_DVDL]            = 0.0;
851     enerd->term[F_DVDL_COUL]       = 0.0;
852     enerd->term[F_DVDL_VDW]        = 0.0;
853     enerd->term[F_DVDL_BONDED]     = 0.0;
854     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
855     enerd->term[F_DKDL]            = 0.0;
856     if (enerd->n_lambda > 0)
857     {
858         for (i = 0; i < enerd->n_lambda; i++)
859         {
860             enerd->enerpart_lambda[i] = 0.0;
861         }
862     }
863     /* reset foreign energy data - separate function since we also call it elsewhere */
864     reset_foreign_enerdata(enerd);
865 }