src/gromacs/mdlib/force.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "force.h"
  40
  41 #include "config.h"
  42
  43 #include <cassert>
  44 #include <cmath>
  45 #include <cstring>
  46
  47 #include "gromacs/domdec/dlbtiming.h"
  48 #include "gromacs/domdec/domdec.h"
  49 #include "gromacs/domdec/domdec_struct.h"
  50 #include "gromacs/ewald/ewald.h"
  51 #include "gromacs/ewald/long_range_correction.h"
  52 #include "gromacs/ewald/pme.h"
  53 #include "gromacs/gmxlib/network.h"
  54 #include "gromacs/gmxlib/nrnb.h"
  55 #include "gromacs/gmxlib/nonbonded/nonbonded.h"
  56 #include "gromacs/listed_forces/listed_forces.h"
  57 #include "gromacs/math/vec.h"
  58 #include "gromacs/math/vecdump.h"
  59 #include "gromacs/mdlib/force_flags.h"
  60 #include "gromacs/mdlib/forcerec_threading.h"
  61 #include "gromacs/mdlib/mdrun.h"
  62 #include "gromacs/mdlib/ns.h"
  63 #include "gromacs/mdlib/qmmm.h"
  64 #include "gromacs/mdlib/rf_util.h"
  65 #include "gromacs/mdlib/wall.h"
  66 #include "gromacs/mdtypes/commrec.h"
  67 #include "gromacs/mdtypes/enerdata.h"
  68 #include "gromacs/mdtypes/forceoutput.h"
  69 #include "gromacs/mdtypes/forcerec.h"
  70 #include "gromacs/mdtypes/inputrec.h"
  71 #include "gromacs/mdtypes/md_enums.h"
  72 #include "gromacs/pbcutil/ishift.h"
  73 #include "gromacs/pbcutil/mshift.h"
  74 #include "gromacs/pbcutil/pbc.h"
  75 #include "gromacs/timing/wallcycle.h"
  76 #include "gromacs/utility/cstringutil.h"
  77 #include "gromacs/utility/exceptions.h"
  78 #include "gromacs/utility/fatalerror.h"
  79 #include "gromacs/utility/smalloc.h"
  80
  81 void ns(FILE               *fp,
  82         t_forcerec         *fr,
  83         matrix              box,
  84         const gmx_groups_t *groups,
  85         gmx_localtop_t     *top,
  86         const t_mdatoms    *md,
  87         const t_commrec    *cr,
  88         t_nrnb             *nrnb,
  89         gmx_bool            bFillGrid)
  90 {
  91     int     nsearch;
  92
  93
  94     if (!fr->ns->nblist_initialized)
  95     {
  96         init_neighbor_list(fp, fr, md->homenr);
  97     }
  98
  99     nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
 100                                 bFillGrid);
 101     if (debug)
 102     {
 103         fprintf(debug, "nsearch = %d\n", nsearch);
 104     }
 105
 106     /* Check whether we have to do dynamic load balancing */
 107     /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
 108        count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
 109        &(top->idef),opts->ngener);
 110      */
 111     if (fr->ns->dump_nl > 0)
 112     {
 113         dump_nblist(fp, cr, fr, fr->ns->dump_nl);
 114     }
 115 }
 116
 117 static void clearEwaldThreadOutput(ewald_corr_thread_t *ewc_t)
 118 {
 119     ewc_t->Vcorr_q        = 0;
 120     ewc_t->Vcorr_lj       = 0;
 121     ewc_t->dvdl[efptCOUL] = 0;
 122     ewc_t->dvdl[efptVDW]  = 0;
 123     clear_mat(ewc_t->vir_q);
 124     clear_mat(ewc_t->vir_lj);
 125 }
 126
 127 static void reduceEwaldThreadOuput(int nthreads, ewald_corr_thread_t *ewc_t)
 128 {
 129     ewald_corr_thread_t &dest = ewc_t[0];
 130
 131     for (int t = 1; t < nthreads; t++)
 132     {
 133         dest.Vcorr_q        += ewc_t[t].Vcorr_q;
 134         dest.Vcorr_lj       += ewc_t[t].Vcorr_lj;
 135         dest.dvdl[efptCOUL] += ewc_t[t].dvdl[efptCOUL];
 136         dest.dvdl[efptVDW]  += ewc_t[t].dvdl[efptVDW];
 137         m_add(dest.vir_q,  ewc_t[t].vir_q,  dest.vir_q);
 138         m_add(dest.vir_lj, ewc_t[t].vir_lj, dest.vir_lj);
 139     }
 140 }
 141
 142 void do_force_lowlevel(t_forcerec                   *fr,
 143                        const t_inputrec             *ir,
 144                        const t_idef                 *idef,
 145                        const t_commrec              *cr,
 146                        const gmx_multisim_t         *ms,
 147                        t_nrnb                       *nrnb,
 148                        gmx_wallcycle_t               wcycle,
 149                        const t_mdatoms              *md,
 150                        rvec                          x[],
 151                        history_t                    *hist,
 152                        rvec                         *forceForUseWithShiftForces,
 153                        gmx::ForceWithVirial         *forceWithVirial,
 154                        gmx_enerdata_t               *enerd,
 155                        t_fcdata                     *fcd,
 156                        matrix                        box,
 157                        t_lambda                     *fepvals,
 158                        real                         *lambda,
 159                        const t_graph                *graph,
 160                        const t_blocka               *excl,
 161                        rvec                          mu_tot[],
 162                        int                           flags,
 163                        float                        *cycles_pme,
 164                        const DDBalanceRegionHandler &ddBalanceRegionHandler)
 165 {
 166     int         i, j;
 167     int         donb_flags;
 168     int         pme_flags;
 169     t_pbc       pbc;
 170     real        dvdl_dum[efptNR], dvdl_nb[efptNR];
 171
 172 #if GMX_MPI
 173     double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
 174 #endif
 175
 176     set_pbc(&pbc, fr->ePBC, box);
 177
 178     /* reset free energy components */
 179     for (i = 0; i < efptNR; i++)
 180     {
 181         dvdl_nb[i]  = 0;
 182         dvdl_dum[i] = 0;
 183     }
 184
 185     /* do QMMM first if requested */
 186     if (fr->bQMMM)
 187     {
 188         enerd->term[F_EQM] = calculate_QMMM(cr, forceForUseWithShiftForces, fr);
 189     }
 190
 191     /* Call the short range functions all in one go. */
 192
 193 #if GMX_MPI
 194     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
 195 #define TAKETIME FALSE
 196     if (TAKETIME)
 197     {
 198         MPI_Barrier(cr->mpi_comm_mygroup);
 199         t0 = MPI_Wtime();
 200     }
 201 #endif
 202
 203     if (ir->nwall)
 204     {
 205         /* foreign lambda component for walls */
 206         real dvdl_walls = do_walls(*ir, *fr, box, *md, x,
 207                                    forceWithVirial, lambda[efptVDW],
 208                                    enerd->grpp.ener[egLJSR], nrnb);
 209         enerd->dvdl_lin[efptVDW] += dvdl_walls;
 210     }
 211
 212     /* We only do non-bonded calculation with group scheme here, the verlet
 213      * calls are done from do_force_cutsVERLET(). */
 214     if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
 215     {
 216         donb_flags = 0;
 217         /* Add short-range interactions */
 218         donb_flags |= GMX_NONBONDED_DO_SR;
 219
 220         /* Currently all group scheme kernels always calculate (shift-)forces */
 221         if (flags & GMX_FORCE_FORCES)
 222         {
 223             donb_flags |= GMX_NONBONDED_DO_FORCE;
 224         }
 225         if (flags & GMX_FORCE_VIRIAL)
 226         {
 227             donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
 228         }
 229         if (flags & GMX_FORCE_ENERGY)
 230         {
 231             donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
 232         }
 233
 234         wallcycle_sub_start(wcycle, ewcsNONBONDED);
 235         do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl,
 236                      &enerd->grpp, nrnb,
 237                      lambda, dvdl_nb, -1, -1, donb_flags);
 238
 239         /* If we do foreign lambda and we have soft-core interactions
 240          * we have to recalculate the (non-linear) energies contributions.
 241          */
 242         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 243         {
 244             for (i = 0; i < enerd->n_lambda; i++)
 245             {
 246                 real lam_i[efptNR];
 247
 248                 for (j = 0; j < efptNR; j++)
 249                 {
 250                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 251                 }
 252                 reset_foreign_enerdata(enerd);
 253                 do_nonbonded(fr, x, forceForUseWithShiftForces, md, excl,
 254                              &(enerd->foreign_grpp), nrnb,
 255                              lam_i, dvdl_dum, -1, -1,
 256                              (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
 257                 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
 258                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 259             }
 260         }
 261         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 262     }
 263
 264 #if GMX_MPI
 265     if (TAKETIME)
 266     {
 267         t1          = MPI_Wtime();
 268         fr->t_fnbf += t1-t0;
 269     }
 270 #endif
 271
 272     if (fepvals->sc_alpha != 0)
 273     {
 274         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
 275     }
 276     else
 277     {
 278         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
 279     }
 280
 281     if (fepvals->sc_alpha != 0)
 282
 283     /* even though coulomb part is linear, we already added it, beacuse we
 284        need to go through the vdw calculation anyway */
 285     {
 286         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
 287     }
 288     else
 289     {
 290         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
 291     }
 292
 293     if (debug)
 294     {
 295         pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
 296     }
 297
 298     /* Shift the coordinates. Must be done before listed forces and PPPM,
 299      * but is also necessary for SHAKE and update, therefore it can NOT
 300      * go when no listed forces have to be evaluated.
 301      *
 302      * The shifting and PBC code is deliberately not timed, since with
 303      * the Verlet scheme it only takes non-zero time with triclinic
 304      * boxes, and even then the time is around a factor of 100 less
 305      * than the next smallest counter.
 306      */
 307
 308
 309     /* Here sometimes we would not need to shift with NBFonly,
 310      * but we do so anyhow for consistency of the returned coordinates.
 311      */
 312     if (graph)
 313     {
 314         shift_self(graph, box, x);
 315         if (TRICLINIC(box))
 316         {
 317             inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
 318         }
 319         else
 320         {
 321             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
 322         }
 323     }
 324     /* Check whether we need to do listed interactions or correct for exclusions */
 325     if (fr->bMolPBC &&
 326         ((flags & GMX_FORCE_LISTED)
 327          || EEL_RF(fr->ic->eeltype) || EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)))
 328     {
 329         /* TODO There are no electrostatics methods that require this
 330            transformation, when using the Verlet scheme, so update the
 331            above conditional. */
 332         /* Since all atoms are in the rectangular or triclinic unit-cell,
 333          * only single box vector shifts (2 in x) are required.
 334          */
 335         set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr,
 336                    TRUE, box);
 337     }
 338
 339     do_force_listed(wcycle, box, ir->fepvals, cr, ms,
 340                     idef, x, hist,
 341                     forceForUseWithShiftForces, forceWithVirial,
 342                     fr, &pbc, graph, enerd, nrnb, lambda, md, fcd,
 343                     DOMAINDECOMP(cr) ? cr->dd->globalAtomIndices.data() : nullptr,
 344                     flags);
 345
 346
 347     *cycles_pme = 0;
 348
 349     /* Do long-range electrostatics and/or LJ-PME, including related short-range
 350      * corrections.
 351      */
 352     if (EEL_FULL(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
 353     {
 354         int  status            = 0;
 355         real Vlr_q             = 0, Vlr_lj = 0;
 356
 357         /* We reduce all virial, dV/dlambda and energy contributions, except
 358          * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
 359          */
 360         ewald_corr_thread_t &ewaldOutput = fr->ewc_t[0];
 361         clearEwaldThreadOutput(&ewaldOutput);
 362
 363         if (EEL_PME_EWALD(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype))
 364         {
 365             /* With the Verlet scheme exclusion forces are calculated
 366              * in the non-bonded kernel.
 367              */
 368             /* The TPI molecule does not have exclusions with the rest
 369              * of the system and no intra-molecular PME grid
 370              * contributions will be calculated in
 371              * gmx_pme_calc_energy.
 372              */
 373             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
 374                 ir->ewald_geometry != eewg3D ||
 375                 ir->epsilon_surface != 0)
 376             {
 377                 int nthreads, t;
 378
 379                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
 380
 381                 if (fr->n_tpi > 0)
 382                 {
 383                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
 384                 }
 385
 386                 nthreads = fr->nthread_ewc;
 387 #pragma omp parallel for num_threads(nthreads) schedule(static)
 388                 for (t = 0; t < nthreads; t++)
 389                 {
 390                     try
 391                     {
 392                         ewald_corr_thread_t &ewc_t = fr->ewc_t[t];
 393                         if (t > 0)
 394                         {
 395                             clearEwaldThreadOutput(&ewc_t);
 396                         }
 397
 398                         /* Threading is only supported with the Verlet cut-off
 399                          * scheme and then only single particle forces (no
 400                          * exclusion forces) are calculated, so we can store
 401                          * the forces in the normal, single forceWithVirial->force_ array.
 402                          */
 403                         ewald_LRcorrection(md->homenr, cr, nthreads, t, fr, ir,
 404                                            md->chargeA, md->chargeB,
 405                                            md->sqrt_c6A, md->sqrt_c6B,
 406                                            md->sigmaA, md->sigmaB,
 407                                            md->sigma3A, md->sigma3B,
 408                                            (md->nChargePerturbed != 0) || (md->nTypePerturbed != 0),
 409                                            ir->cutoff_scheme != ecutsVERLET,
 410                                            excl, x, box, mu_tot,
 411                                            ir->ewald_geometry,
 412                                            ir->epsilon_surface,
 413                                            as_rvec_array(forceWithVirial->force_.data()),
 414                                            ewc_t.vir_q, ewc_t.vir_lj,
 415                                            &ewc_t.Vcorr_q, &ewc_t.Vcorr_lj,
 416                                            lambda[efptCOUL], lambda[efptVDW],
 417                                            &ewc_t.dvdl[efptCOUL], &ewc_t.dvdl[efptVDW]);
 418                     }
 419                     GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 420                 }
 421                 if (nthreads > 1)
 422                 {
 423                     reduceEwaldThreadOuput(nthreads, fr->ewc_t);
 424                 }
 425                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
 426             }
 427
 428             if (EEL_PME_EWALD(fr->ic->eeltype) && fr->n_tpi == 0)
 429             {
 430                 /* This is not in a subcounter because it takes a
 431                    negligible and constant-sized amount of time */
 432                 ewaldOutput.Vcorr_q +=
 433                     ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
 434                                             &ewaldOutput.dvdl[efptCOUL],
 435                                             ewaldOutput.vir_q);
 436             }
 437
 438             if ((EEL_PME(fr->ic->eeltype) || EVDW_PME(fr->ic->vdwtype)) &&
 439                 thisRankHasDuty(cr, DUTY_PME) && (pme_run_mode(fr->pmedata) == PmeRunMode::CPU))
 440             {
 441                 /* Do reciprocal PME for Coulomb and/or LJ. */
 442                 assert(fr->n_tpi >= 0);
 443                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
 444                 {
 445                     pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
 446
 447                     if (flags & GMX_FORCE_FORCES)
 448                     {
 449                         pme_flags |= GMX_PME_CALC_F;
 450                     }
 451                     if (flags & GMX_FORCE_VIRIAL)
 452                     {
 453                         pme_flags |= GMX_PME_CALC_ENER_VIR;
 454                     }
 455                     if (fr->n_tpi > 0)
 456                     {
 457                         /* We don't calculate f, but we do want the potential */
 458                         pme_flags |= GMX_PME_CALC_POT;
 459                     }
 460
 461                     /* With domain decomposition we close the CPU side load
 462                      * balancing region here, because PME does global
 463                      * communication that acts as a global barrier.
 464                      */
 465                     ddBalanceRegionHandler.closeAfterForceComputationCpu();
 466
 467                     wallcycle_start(wcycle, ewcPMEMESH);
 468                     status = gmx_pme_do(fr->pmedata,
 469                                         0, md->homenr - fr->n_tpi,
 470                                         x,
 471                                         as_rvec_array(forceWithVirial->force_.data()),
 472                                         md->chargeA, md->chargeB,
 473                                         md->sqrt_c6A, md->sqrt_c6B,
 474                                         md->sigmaA, md->sigmaB,
 475                                         box, cr,
 476                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 477                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
 478                                         nrnb, wcycle,
 479                                         ewaldOutput.vir_q, ewaldOutput.vir_lj,
 480                                         &Vlr_q, &Vlr_lj,
 481                                         lambda[efptCOUL], lambda[efptVDW],
 482                                         &ewaldOutput.dvdl[efptCOUL],
 483                                         &ewaldOutput.dvdl[efptVDW],
 484                                         pme_flags);
 485                     *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
 486                     if (status != 0)
 487                     {
 488                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
 489                     }
 490
 491                     /* We should try to do as little computation after
 492                      * this as possible, because parallel PME synchronizes
 493                      * the nodes, so we want all load imbalance of the
 494                      * rest of the force calculation to be before the PME
 495                      * call.  DD load balancing is done on the whole time
 496                      * of the force call (without PME).
 497                      */
 498                 }
 499                 if (fr->n_tpi > 0)
 500                 {
 501                     if (EVDW_PME(ir->vdwtype))
 502                     {
 503
 504                         gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
 505                     }
 506                     /* Determine the PME grid energy of the test molecule
 507                      * with the PME grid potential of the other charges.
 508                      */
 509                     gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
 510                                         x + md->homenr - fr->n_tpi,
 511                                         md->chargeA + md->homenr - fr->n_tpi,
 512                                         &Vlr_q);
 513                 }
 514             }
 515         }
 516
 517         if (!EEL_PME(fr->ic->eeltype) && EEL_PME_EWALD(fr->ic->eeltype))
 518         {
 519             Vlr_q = do_ewald(ir, x, as_rvec_array(forceWithVirial->force_.data()),
 520                              md->chargeA, md->chargeB,
 521                              box, cr, md->homenr,
 522                              ewaldOutput.vir_q, fr->ic->ewaldcoeff_q,
 523                              lambda[efptCOUL], &ewaldOutput.dvdl[efptCOUL],
 524                              fr->ewald_table);
 525         }
 526
 527         /* Note that with separate PME nodes we get the real energies later */
 528         // TODO it would be simpler if we just accumulated a single
 529         // long-range virial contribution.
 530         forceWithVirial->addVirialContribution(ewaldOutput.vir_q);
 531         forceWithVirial->addVirialContribution(ewaldOutput.vir_lj);
 532         enerd->dvdl_lin[efptCOUL] += ewaldOutput.dvdl[efptCOUL];
 533         enerd->dvdl_lin[efptVDW]  += ewaldOutput.dvdl[efptVDW];
 534         enerd->term[F_COUL_RECIP]  = Vlr_q + ewaldOutput.Vcorr_q;
 535         enerd->term[F_LJ_RECIP]    = Vlr_lj + ewaldOutput.Vcorr_lj;
 536
 537         if (debug)
 538         {
 539             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
 540                     Vlr_q, ewaldOutput.Vcorr_q, enerd->term[F_COUL_RECIP]);
 541             pr_rvecs(debug, 0, "vir_el_recip after corr", ewaldOutput.vir_q, DIM);
 542             pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
 543             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
 544                     Vlr_lj, ewaldOutput.Vcorr_lj, enerd->term[F_LJ_RECIP]);
 545             pr_rvecs(debug, 0, "vir_lj_recip after corr", ewaldOutput.vir_lj, DIM);
 546         }
 547     }
 548     else
 549     {
 550         /* Is there a reaction-field exclusion correction needed?
 551          * With the Verlet scheme, exclusion forces are calculated
 552          * in the non-bonded kernel.
 553          */
 554         if (ir->cutoff_scheme != ecutsVERLET && EEL_RF(fr->ic->eeltype))
 555         {
 556             real dvdl_rf_excl      = 0;
 557             enerd->term[F_RF_EXCL] =
 558                 RF_excl_correction(fr, graph, md, excl, DOMAINDECOMP(cr),
 559                                    x, forceForUseWithShiftForces,
 560                                    fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
 561
 562             enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
 563         }
 564     }
 565
 566     if (debug)
 567     {
 568         print_nrnb(debug, nrnb);
 569     }
 570
 571 #if GMX_MPI
 572     if (TAKETIME)
 573     {
 574         t2 = MPI_Wtime();
 575         MPI_Barrier(cr->mpi_comm_mygroup);
 576         t3          = MPI_Wtime();
 577         fr->t_wait += t3-t2;
 578         if (fr->timesteps == 11)
 579         {
 580             char buf[22];
 581             fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
 582                     cr->nodeid, gmx_step_str(fr->timesteps, buf),
 583                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
 584                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
 585         }
 586         fr->timesteps++;
 587     }
 588 #endif
 589
 590     if (debug)
 591     {
 592         pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
 593     }
 594
 595 }
 596
 597 void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
 598 {
 599     int i, n2;
 600
 601     for (i = 0; i < F_NRE; i++)
 602     {
 603         enerd->term[i]         = 0;
 604         enerd->foreign_term[i] = 0;
 605     }
 606
 607
 608     for (i = 0; i < efptNR; i++)
 609     {
 610         enerd->dvdl_lin[i]     = 0;
 611         enerd->dvdl_nonlin[i]  = 0;
 612     }
 613
 614     n2 = ngener*ngener;
 615     if (debug)
 616     {
 617         fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
 618     }
 619     enerd->grpp.nener         = n2;
 620     enerd->foreign_grpp.nener = n2;
 621     for (i = 0; (i < egNR); i++)
 622     {
 623         snew(enerd->grpp.ener[i], n2);
 624         snew(enerd->foreign_grpp.ener[i], n2);
 625     }
 626
 627     if (n_lambda)
 628     {
 629         enerd->n_lambda = 1 + n_lambda;
 630         snew(enerd->enerpart_lambda, enerd->n_lambda);
 631     }
 632     else
 633     {
 634         enerd->n_lambda = 0;
 635     }
 636 }
 637
 638 void destroy_enerdata(gmx_enerdata_t *enerd)
 639 {
 640     int i;
 641
 642     for (i = 0; (i < egNR); i++)
 643     {
 644         sfree(enerd->grpp.ener[i]);
 645     }
 646
 647     for (i = 0; (i < egNR); i++)
 648     {
 649         sfree(enerd->foreign_grpp.ener[i]);
 650     }
 651
 652     if (enerd->n_lambda)
 653     {
 654         sfree(enerd->enerpart_lambda);
 655     }
 656 }
 657
 658 static real sum_v(int n, const real v[])
 659 {
 660     real t;
 661     int  i;
 662
 663     t = 0.0;
 664     for (i = 0; (i < n); i++)
 665     {
 666         t = t + v[i];
 667     }
 668
 669     return t;
 670 }
 671
 672 void sum_epot(gmx_grppairener_t *grpp, real *epot)
 673 {
 674     int i;
 675
 676     /* Accumulate energies */
 677     epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
 678     epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
 679     epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
 680     epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
 681
 682 /* lattice part of LR doesnt belong to any group
 683  * and has been added earlier
 684  */
 685     epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
 686
 687     epot[F_EPOT] = 0;
 688     for (i = 0; (i < F_EPOT); i++)
 689     {
 690         if (i != F_DISRESVIOL && i != F_ORIRESDEV)
 691         {
 692             epot[F_EPOT] += epot[i];
 693         }
 694     }
 695 }
 696
 697 void sum_dhdl(gmx_enerdata_t *enerd, gmx::ArrayRef<const real> lambda, t_lambda *fepvals)
 698 {
 699     int    index;
 700
 701     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
 702     enerd->term[F_DVDL]       = 0.0;
 703     for (int i = 0; i < efptNR; i++)
 704     {
 705         if (fepvals->separate_dvdl[i])
 706         {
 707             /* could this be done more readably/compactly? */
 708             switch (i)
 709             {
 710                 case (efptMASS):
 711                     index = F_DKDL;
 712                     break;
 713                 case (efptCOUL):
 714                     index = F_DVDL_COUL;
 715                     break;
 716                 case (efptVDW):
 717                     index = F_DVDL_VDW;
 718                     break;
 719                 case (efptBONDED):
 720                     index = F_DVDL_BONDED;
 721                     break;
 722                 case (efptRESTRAINT):
 723                     index = F_DVDL_RESTRAINT;
 724                     break;
 725                 default:
 726                     index = F_DVDL;
 727                     break;
 728             }
 729             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 730             if (debug)
 731             {
 732                 fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
 733                         efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
 734             }
 735         }
 736         else
 737         {
 738             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 739             if (debug)
 740             {
 741                 fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
 742                         efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
 743             }
 744         }
 745     }
 746
 747     if (fepvals->separate_dvdl[efptBONDED])
 748     {
 749         enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
 750     }
 751     else
 752     {
 753         enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
 754     }
 755
 756     for (int i = 0; i < fepvals->n_lambda; i++)
 757     {
 758         /* note we are iterating over fepvals here!
 759            For the current lam, dlam = 0 automatically,
 760            so we don't need to add anything to the
 761            enerd->enerpart_lambda[0] */
 762
 763         /* we don't need to worry about dvdl_lin contributions to dE at
 764            current lambda, because the contributions to the current
 765            lambda are automatically zeroed */
 766
 767         double &enerpart_lambda = enerd->enerpart_lambda[i + 1];
 768
 769         for (gmx::index j = 0; j < lambda.ssize(); j++)
 770         {
 771             /* Note that this loop is over all dhdl components, not just the separated ones */
 772             const double dlam  = fepvals->all_lambda[j][i] - lambda[j];
 773
 774             enerpart_lambda   += dlam*enerd->dvdl_lin[j];
 775
 776             /* Constraints can not be evaluated at foreign lambdas, so we add
 777              * a linear extrapolation. This is an approximation, but usually
 778              * quite accurate since constraints change little between lambdas.
 779              */
 780             if ((j == efptBONDED && fepvals->separate_dvdl[efptBONDED]) ||
 781                 (j == efptFEP && !fepvals->separate_dvdl[efptBONDED]))
 782             {
 783                 enerpart_lambda += dlam*enerd->term[F_DVDL_CONSTR];
 784             }
 785
 786             if (j == efptMASS && !fepvals->separate_dvdl[j])
 787             {
 788                 enerpart_lambda += dlam*enerd->term[F_DKDL];
 789             }
 790
 791             if (debug)
 792             {
 793                 fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
 794                         fepvals->all_lambda[j][i], efpt_names[j],
 795                         enerpart_lambda - enerd->enerpart_lambda[0],
 796                         dlam, enerd->dvdl_lin[j]);
 797             }
 798         }
 799     }
 800
 801     /* The constrain contribution is now included in other terms, so clear it */
 802     enerd->term[F_DVDL_CONSTR] = 0;
 803 }
 804
 805
 806 void reset_foreign_enerdata(gmx_enerdata_t *enerd)
 807 {
 808     int  i, j;
 809
 810     /* First reset all foreign energy components.  Foreign energies always called on
 811        neighbor search steps */
 812     for (i = 0; (i < egNR); i++)
 813     {
 814         for (j = 0; (j < enerd->grpp.nener); j++)
 815         {
 816             enerd->foreign_grpp.ener[i][j] = 0.0;
 817         }
 818     }
 819
 820     /* potential energy components */
 821     for (i = 0; (i <= F_EPOT); i++)
 822     {
 823         enerd->foreign_term[i] = 0.0;
 824     }
 825 }
 826
 827 void reset_enerdata(gmx_enerdata_t *enerd)
 828 {
 829     int      i, j;
 830
 831     /* First reset all energy components. */
 832     for (i = 0; (i < egNR); i++)
 833     {
 834         for (j = 0; (j < enerd->grpp.nener); j++)
 835         {
 836             enerd->grpp.ener[i][j] = 0.0;
 837         }
 838     }
 839     for (i = 0; i < efptNR; i++)
 840     {
 841         enerd->dvdl_lin[i]    = 0.0;
 842         enerd->dvdl_nonlin[i] = 0.0;
 843     }
 844
 845     /* Normal potential energy components */
 846     for (i = 0; (i <= F_EPOT); i++)
 847     {
 848         enerd->term[i] = 0.0;
 849     }
 850     enerd->term[F_DVDL]            = 0.0;
 851     enerd->term[F_DVDL_COUL]       = 0.0;
 852     enerd->term[F_DVDL_VDW]        = 0.0;
 853     enerd->term[F_DVDL_BONDED]     = 0.0;
 854     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
 855     enerd->term[F_DKDL]            = 0.0;
 856     if (enerd->n_lambda > 0)
 857     {
 858         for (i = 0; i < enerd->n_lambda; i++)
 859         {
 860             enerd->enerpart_lambda[i] = 0.0;
 861         }
 862     }
 863     /* reset foreign energy data - separate function since we also call it elsewhere */
 864     reset_foreign_enerdata(enerd);
 865 }