src/gromacs/mdlib/force.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "gromacs/legacyheaders/force.h"
  40
  41 #include "config.h"
  42
  43 #include <assert.h>
  44 #include <math.h>
  45 #include <string.h>
  46
  47 #include "gromacs/domdec/domdec.h"
  48 #include "gromacs/ewald/ewald.h"
  49 #include "gromacs/ewald/long-range-correction.h"
  50 #include "gromacs/ewald/pme.h"
  51 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
  52 #include "gromacs/legacyheaders/macros.h"
  53 #include "gromacs/legacyheaders/mdrun.h"
  54 #include "gromacs/legacyheaders/names.h"
  55 #include "gromacs/legacyheaders/network.h"
  56 #include "gromacs/legacyheaders/nonbonded.h"
  57 #include "gromacs/legacyheaders/nrnb.h"
  58 #include "gromacs/legacyheaders/ns.h"
  59 #include "gromacs/legacyheaders/qmmm.h"
  60 #include "gromacs/legacyheaders/txtdump.h"
  61 #include "gromacs/legacyheaders/typedefs.h"
  62 #include "gromacs/legacyheaders/types/commrec.h"
  63 #include "gromacs/listed-forces/listed-forces.h"
  64 #include "gromacs/math/vec.h"
  65 #include "gromacs/mdlib/forcerec-threading.h"
  66 #include "gromacs/pbcutil/ishift.h"
  67 #include "gromacs/pbcutil/mshift.h"
  68 #include "gromacs/pbcutil/pbc.h"
  69 #include "gromacs/timing/wallcycle.h"
  70 #include "gromacs/utility/cstringutil.h"
  71 #include "gromacs/utility/fatalerror.h"
  72 #include "gromacs/utility/smalloc.h"
  73
  74 void ns(FILE              *fp,
  75         t_forcerec        *fr,
  76         matrix             box,
  77         gmx_groups_t      *groups,
  78         gmx_localtop_t    *top,
  79         t_mdatoms         *md,
  80         t_commrec         *cr,
  81         t_nrnb            *nrnb,
  82         gmx_bool           bFillGrid,
  83         gmx_bool           bDoLongRangeNS)
  84 {
  85     int     nsearch;
  86
  87
  88     if (!fr->ns.nblist_initialized)
  89     {
  90         init_neighbor_list(fp, fr, md->homenr);
  91     }
  92
  93     if (fr->bTwinRange)
  94     {
  95         fr->nlr = 0;
  96     }
  97
  98     nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
  99                                 bFillGrid, bDoLongRangeNS);
 100     if (debug)
 101     {
 102         fprintf(debug, "nsearch = %d\n", nsearch);
 103     }
 104
 105     /* Check whether we have to do dynamic load balancing */
 106     /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0))
 107        count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
 108        &(top->idef),opts->ngener);
 109      */
 110     if (fr->ns.dump_nl > 0)
 111     {
 112         dump_nblist(fp, cr, fr, fr->ns.dump_nl);
 113     }
 114 }
 115
 116 static void reduce_thread_energies(tensor vir_q, tensor vir_lj,
 117                                    real *Vcorr_q, real *Vcorr_lj,
 118                                    real *dvdl_q, real *dvdl_lj,
 119                                    int nthreads,
 120                                    ewald_corr_thread_t *ewc_t)
 121 {
 122     int t;
 123
 124     for (t = 1; t < nthreads; t++)
 125     {
 126         *Vcorr_q  += ewc_t[t].Vcorr_q;
 127         *Vcorr_lj += ewc_t[t].Vcorr_lj;
 128         *dvdl_q   += ewc_t[t].dvdl[efptCOUL];
 129         *dvdl_lj  += ewc_t[t].dvdl[efptVDW];
 130         m_add(vir_q, ewc_t[t].vir_q, vir_q);
 131         m_add(vir_lj, ewc_t[t].vir_lj, vir_lj);
 132     }
 133 }
 134
 135 void do_force_lowlevel(t_forcerec *fr,      t_inputrec *ir,
 136                        t_idef     *idef,    t_commrec  *cr,
 137                        t_nrnb     *nrnb,    gmx_wallcycle_t wcycle,
 138                        t_mdatoms  *md,
 139                        rvec       x[],      history_t  *hist,
 140                        rvec       f[],
 141                        rvec       f_longrange[],
 142                        gmx_enerdata_t *enerd,
 143                        t_fcdata   *fcd,
 144                        gmx_localtop_t *top,
 145                        gmx_genborn_t *born,
 146                        gmx_bool       bBornRadii,
 147                        matrix     box,
 148                        t_lambda   *fepvals,
 149                        real       *lambda,
 150                        t_graph    *graph,
 151                        t_blocka   *excl,
 152                        rvec       mu_tot[],
 153                        int        flags,
 154                        float      *cycles_pme)
 155 {
 156     int         i, j;
 157     int         donb_flags;
 158     gmx_bool    bSB;
 159     int         pme_flags;
 160     matrix      boxs;
 161     rvec        box_size;
 162     t_pbc       pbc;
 163     real        dvdl_dum[efptNR], dvdl_nb[efptNR];
 164
 165 #ifdef GMX_MPI
 166     double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
 167 #endif
 168
 169     set_pbc(&pbc, fr->ePBC, box);
 170
 171     /* reset free energy components */
 172     for (i = 0; i < efptNR; i++)
 173     {
 174         dvdl_nb[i]  = 0;
 175         dvdl_dum[i] = 0;
 176     }
 177
 178     /* Reset box */
 179     for (i = 0; (i < DIM); i++)
 180     {
 181         box_size[i] = box[i][i];
 182     }
 183
 184     debug_gmx();
 185
 186     /* do QMMM first if requested */
 187     if (fr->bQMMM)
 188     {
 189         enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr);
 190     }
 191
 192     /* Call the short range functions all in one go. */
 193
 194 #ifdef GMX_MPI
 195     /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
 196 #define TAKETIME FALSE
 197     if (TAKETIME)
 198     {
 199         MPI_Barrier(cr->mpi_comm_mygroup);
 200         t0 = MPI_Wtime();
 201     }
 202 #endif
 203
 204     if (ir->nwall)
 205     {
 206         /* foreign lambda component for walls */
 207         real dvdl_walls = do_walls(ir, fr, box, md, x, f, lambda[efptVDW],
 208                                    enerd->grpp.ener[egLJSR], nrnb);
 209         enerd->dvdl_lin[efptVDW] += dvdl_walls;
 210     }
 211
 212     /* If doing GB, reset dvda and calculate the Born radii */
 213     if (ir->implicit_solvent)
 214     {
 215         wallcycle_sub_start(wcycle, ewcsNONBONDED);
 216
 217         for (i = 0; i < born->nr; i++)
 218         {
 219             fr->dvda[i] = 0;
 220         }
 221
 222         if (bBornRadii)
 223         {
 224             calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
 225         }
 226
 227         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 228     }
 229
 230     where();
 231     /* We only do non-bonded calculation with group scheme here, the verlet
 232      * calls are done from do_force_cutsVERLET(). */
 233     if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED))
 234     {
 235         donb_flags = 0;
 236         /* Add short-range interactions */
 237         donb_flags |= GMX_NONBONDED_DO_SR;
 238
 239         /* Currently all group scheme kernels always calculate (shift-)forces */
 240         if (flags & GMX_FORCE_FORCES)
 241         {
 242             donb_flags |= GMX_NONBONDED_DO_FORCE;
 243         }
 244         if (flags & GMX_FORCE_VIRIAL)
 245         {
 246             donb_flags |= GMX_NONBONDED_DO_SHIFTFORCE;
 247         }
 248         if (flags & GMX_FORCE_ENERGY)
 249         {
 250             donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
 251         }
 252         if (flags & GMX_FORCE_DO_LR)
 253         {
 254             donb_flags |= GMX_NONBONDED_DO_LR;
 255         }
 256
 257         wallcycle_sub_start(wcycle, ewcsNONBONDED);
 258         do_nonbonded(fr, x, f, f_longrange, md, excl,
 259                      &enerd->grpp, nrnb,
 260                      lambda, dvdl_nb, -1, -1, donb_flags);
 261
 262         /* If we do foreign lambda and we have soft-core interactions
 263          * we have to recalculate the (non-linear) energies contributions.
 264          */
 265         if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0)
 266         {
 267             for (i = 0; i < enerd->n_lambda; i++)
 268             {
 269                 real lam_i[efptNR];
 270
 271                 for (j = 0; j < efptNR; j++)
 272                 {
 273                     lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
 274                 }
 275                 reset_foreign_enerdata(enerd);
 276                 do_nonbonded(fr, x, f, f_longrange, md, excl,
 277                              &(enerd->foreign_grpp), nrnb,
 278                              lam_i, dvdl_dum, -1, -1,
 279                              (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
 280                 sum_epot(&(enerd->foreign_grpp), enerd->foreign_term);
 281                 enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT];
 282             }
 283         }
 284         wallcycle_sub_stop(wcycle, ewcsNONBONDED);
 285         where();
 286     }
 287
 288     /* If we are doing GB, calculate bonded forces and apply corrections
 289      * to the solvation forces */
 290     /* MRS: Eventually, many need to include free energy contribution here! */
 291     if (ir->implicit_solvent)
 292     {
 293         wallcycle_sub_start(wcycle, ewcsLISTED);
 294         calc_gb_forces(cr, md, born, top, x, f, fr, idef,
 295                        ir->gb_algorithm, ir->sa_algorithm, nrnb, &pbc, graph, enerd);
 296         wallcycle_sub_stop(wcycle, ewcsLISTED);
 297     }
 298
 299 #ifdef GMX_MPI
 300     if (TAKETIME)
 301     {
 302         t1          = MPI_Wtime();
 303         fr->t_fnbf += t1-t0;
 304     }
 305 #endif
 306
 307     if (fepvals->sc_alpha != 0)
 308     {
 309         enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW];
 310     }
 311     else
 312     {
 313         enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW];
 314     }
 315
 316     if (fepvals->sc_alpha != 0)
 317
 318     /* even though coulomb part is linear, we already added it, beacuse we
 319        need to go through the vdw calculation anyway */
 320     {
 321         enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL];
 322     }
 323     else
 324     {
 325         enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
 326     }
 327
 328     debug_gmx();
 329
 330
 331     if (debug)
 332     {
 333         pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
 334     }
 335
 336     /* Shift the coordinates. Must be done before listed forces and PPPM,
 337      * but is also necessary for SHAKE and update, therefore it can NOT
 338      * go when no listed forces have to be evaluated.
 339      *
 340      * The shifting and PBC code is deliberately not timed, since with
 341      * the Verlet scheme it only takes non-zero time with triclinic
 342      * boxes, and even then the time is around a factor of 100 less
 343      * than the next smallest counter.
 344      */
 345
 346
 347     /* Here sometimes we would not need to shift with NBFonly,
 348      * but we do so anyhow for consistency of the returned coordinates.
 349      */
 350     if (graph)
 351     {
 352         shift_self(graph, box, x);
 353         if (TRICLINIC(box))
 354         {
 355             inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes);
 356         }
 357         else
 358         {
 359             inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes);
 360         }
 361     }
 362     /* Check whether we need to do listed interactions or correct for exclusions */
 363     if (fr->bMolPBC &&
 364         ((flags & GMX_FORCE_LISTED)
 365          || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype)))
 366     {
 367         /* TODO There are no electrostatics methods that require this
 368            transformation, when using the Verlet scheme, so update the
 369            above conditional. */
 370         /* Since all atoms are in the rectangular or triclinic unit-cell,
 371          * only single box vector shifts (2 in x) are required.
 372          */
 373         set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
 374     }
 375     debug_gmx();
 376
 377     do_force_listed(wcycle, box, ir->fepvals, cr->ms,
 378                     idef, (const rvec *) x, hist, f, fr,
 379                     &pbc, graph, enerd, nrnb, lambda, md, fcd,
 380                     DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL,
 381                     flags);
 382
 383     where();
 384
 385     *cycles_pme = 0;
 386     clear_mat(fr->vir_el_recip);
 387     clear_mat(fr->vir_lj_recip);
 388
 389     /* Do long-range electrostatics and/or LJ-PME, including related short-range
 390      * corrections.
 391      */
 392     if (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype))
 393     {
 394         int  status            = 0;
 395         real Vlr_q             = 0, Vlr_lj = 0, Vcorr_q = 0, Vcorr_lj = 0;
 396         real dvdl_long_range_q = 0, dvdl_long_range_lj = 0;
 397
 398         bSB = (ir->nwall == 2);
 399         if (bSB)
 400         {
 401             copy_mat(box, boxs);
 402             svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]);
 403             box_size[ZZ] *= ir->wall_ewald_zfac;
 404         }
 405
 406         if (EEL_PME_EWALD(fr->eeltype) || EVDW_PME(fr->vdwtype))
 407         {
 408             real dvdl_long_range_correction_q   = 0;
 409             real dvdl_long_range_correction_lj  = 0;
 410             /* With the Verlet scheme exclusion forces are calculated
 411              * in the non-bonded kernel.
 412              */
 413             /* The TPI molecule does not have exclusions with the rest
 414              * of the system and no intra-molecular PME grid
 415              * contributions will be calculated in
 416              * gmx_pme_calc_energy.
 417              */
 418             if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) ||
 419                 ir->ewald_geometry != eewg3D ||
 420                 ir->epsilon_surface != 0)
 421             {
 422                 int nthreads, t;
 423
 424                 wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION);
 425
 426                 if (fr->n_tpi > 0)
 427                 {
 428                     gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
 429                 }
 430
 431                 nthreads = fr->nthread_ewc;
 432 #pragma omp parallel for num_threads(nthreads) schedule(static)
 433                 for (t = 0; t < nthreads; t++)
 434                 {
 435                     tensor *vir_q, *vir_lj;
 436                     real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
 437                     if (t == 0)
 438                     {
 439                         vir_q     = &fr->vir_el_recip;
 440                         vir_lj    = &fr->vir_lj_recip;
 441                         Vcorrt_q  = &Vcorr_q;
 442                         Vcorrt_lj = &Vcorr_lj;
 443                         dvdlt_q   = &dvdl_long_range_correction_q;
 444                         dvdlt_lj  = &dvdl_long_range_correction_lj;
 445                     }
 446                     else
 447                     {
 448                         vir_q     = &fr->ewc_t[t].vir_q;
 449                         vir_lj    = &fr->ewc_t[t].vir_lj;
 450                         Vcorrt_q  = &fr->ewc_t[t].Vcorr_q;
 451                         Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
 452                         dvdlt_q   = &fr->ewc_t[t].dvdl[efptCOUL];
 453                         dvdlt_lj  = &fr->ewc_t[t].dvdl[efptVDW];
 454                         clear_mat(*vir_q);
 455                         clear_mat(*vir_lj);
 456                     }
 457                     *dvdlt_q  = 0;
 458                     *dvdlt_lj = 0;
 459
 460                     /* Threading is only supported with the Verlet cut-off
 461                      * scheme and then only single particle forces (no
 462                      * exclusion forces) are calculated, so we can store
 463                      * the forces in the normal, single fr->f_novirsum array.
 464                      */
 465                     ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
 466                                        cr, t, fr,
 467                                        md->chargeA, md->chargeB,
 468                                        md->sqrt_c6A, md->sqrt_c6B,
 469                                        md->sigmaA, md->sigmaB,
 470                                        md->sigma3A, md->sigma3B,
 471                                        md->nChargePerturbed || md->nTypePerturbed,
 472                                        ir->cutoff_scheme != ecutsVERLET,
 473                                        excl, x, bSB ? boxs : box, mu_tot,
 474                                        ir->ewald_geometry,
 475                                        ir->epsilon_surface,
 476                                        fr->f_novirsum, *vir_q, *vir_lj,
 477                                        Vcorrt_q, Vcorrt_lj,
 478                                        lambda[efptCOUL], lambda[efptVDW],
 479                                        dvdlt_q, dvdlt_lj);
 480                 }
 481                 if (nthreads > 1)
 482                 {
 483                     reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
 484                                            &Vcorr_q, &Vcorr_lj,
 485                                            &dvdl_long_range_correction_q,
 486                                            &dvdl_long_range_correction_lj,
 487                                            nthreads, fr->ewc_t);
 488                 }
 489                 wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
 490             }
 491
 492             if (EEL_PME_EWALD(fr->eeltype) && fr->n_tpi == 0)
 493             {
 494                 /* This is not in a subcounter because it takes a
 495                    negligible and constant-sized amount of time */
 496                 Vcorr_q += ewald_charge_correction(cr, fr, lambda[efptCOUL], box,
 497                                                    &dvdl_long_range_correction_q,
 498                                                    fr->vir_el_recip);
 499             }
 500
 501             enerd->dvdl_lin[efptCOUL] += dvdl_long_range_correction_q;
 502             enerd->dvdl_lin[efptVDW]  += dvdl_long_range_correction_lj;
 503
 504             if ((EEL_PME(fr->eeltype) || EVDW_PME(fr->vdwtype)) && (cr->duty & DUTY_PME))
 505             {
 506                 /* Do reciprocal PME for Coulomb and/or LJ. */
 507                 assert(fr->n_tpi >= 0);
 508                 if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED))
 509                 {
 510                     pme_flags = GMX_PME_SPREAD | GMX_PME_SOLVE;
 511                     if (EEL_PME(fr->eeltype))
 512                     {
 513                         pme_flags     |= GMX_PME_DO_COULOMB;
 514                     }
 515                     if (EVDW_PME(fr->vdwtype))
 516                     {
 517                         pme_flags |= GMX_PME_DO_LJ;
 518                     }
 519                     if (flags & GMX_FORCE_FORCES)
 520                     {
 521                         pme_flags |= GMX_PME_CALC_F;
 522                     }
 523                     if (flags & GMX_FORCE_VIRIAL)
 524                     {
 525                         pme_flags |= GMX_PME_CALC_ENER_VIR;
 526                     }
 527                     if (fr->n_tpi > 0)
 528                     {
 529                         /* We don't calculate f, but we do want the potential */
 530                         pme_flags |= GMX_PME_CALC_POT;
 531                     }
 532                     wallcycle_start(wcycle, ewcPMEMESH);
 533                     status = gmx_pme_do(fr->pmedata,
 534                                         0, md->homenr - fr->n_tpi,
 535                                         x, fr->f_novirsum,
 536                                         md->chargeA, md->chargeB,
 537                                         md->sqrt_c6A, md->sqrt_c6B,
 538                                         md->sigmaA, md->sigmaB,
 539                                         bSB ? boxs : box, cr,
 540                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0,
 541                                         DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0,
 542                                         nrnb, wcycle,
 543                                         fr->vir_el_recip, fr->ewaldcoeff_q,
 544                                         fr->vir_lj_recip, fr->ewaldcoeff_lj,
 545                                         &Vlr_q, &Vlr_lj,
 546                                         lambda[efptCOUL], lambda[efptVDW],
 547                                         &dvdl_long_range_q, &dvdl_long_range_lj, pme_flags);
 548                     *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH);
 549                     if (status != 0)
 550                     {
 551                         gmx_fatal(FARGS, "Error %d in reciprocal PME routine", status);
 552                     }
 553                     /* We should try to do as little computation after
 554                      * this as possible, because parallel PME synchronizes
 555                      * the nodes, so we want all load imbalance of the
 556                      * rest of the force calculation to be before the PME
 557                      * call.  DD load balancing is done on the whole time
 558                      * of the force call (without PME).
 559                      */
 560                 }
 561                 if (fr->n_tpi > 0)
 562                 {
 563                     if (EVDW_PME(ir->vdwtype))
 564                     {
 565
 566                         gmx_fatal(FARGS, "Test particle insertion not implemented with LJ-PME");
 567                     }
 568                     /* Determine the PME grid energy of the test molecule
 569                      * with the PME grid potential of the other charges.
 570                      */
 571                     gmx_pme_calc_energy(fr->pmedata, fr->n_tpi,
 572                                         x + md->homenr - fr->n_tpi,
 573                                         md->chargeA + md->homenr - fr->n_tpi,
 574                                         &Vlr_q);
 575                 }
 576             }
 577         }
 578
 579         if (!EEL_PME(fr->eeltype) && EEL_PME_EWALD(fr->eeltype))
 580         {
 581             Vlr_q = do_ewald(ir, x, fr->f_novirsum,
 582                              md->chargeA, md->chargeB,
 583                              box_size, cr, md->homenr,
 584                              fr->vir_el_recip, fr->ewaldcoeff_q,
 585                              lambda[efptCOUL], &dvdl_long_range_q, fr->ewald_table);
 586         }
 587
 588         /* Note that with separate PME nodes we get the real energies later */
 589         enerd->dvdl_lin[efptCOUL] += dvdl_long_range_q;
 590         enerd->dvdl_lin[efptVDW]  += dvdl_long_range_lj;
 591         enerd->term[F_COUL_RECIP]  = Vlr_q + Vcorr_q;
 592         enerd->term[F_LJ_RECIP]    = Vlr_lj + Vcorr_lj;
 593         if (debug)
 594         {
 595             fprintf(debug, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n",
 596                     Vlr_q, Vcorr_q, enerd->term[F_COUL_RECIP]);
 597             pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM);
 598             pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS);
 599             fprintf(debug, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n",
 600                     Vlr_lj, Vcorr_lj, enerd->term[F_LJ_RECIP]);
 601             pr_rvecs(debug, 0, "vir_lj_recip after corr", fr->vir_lj_recip, DIM);
 602         }
 603     }
 604     else
 605     {
 606         /* Is there a reaction-field exclusion correction needed? */
 607         if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
 608         {
 609             /* With the Verlet scheme, exclusion forces are calculated
 610              * in the non-bonded kernel.
 611              */
 612             if (ir->cutoff_scheme != ecutsVERLET)
 613             {
 614                 real dvdl_rf_excl      = 0;
 615                 enerd->term[F_RF_EXCL] =
 616                     RF_excl_correction(fr, graph, md, excl, x, f,
 617                                        fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
 618
 619                 enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
 620             }
 621         }
 622     }
 623     where();
 624     debug_gmx();
 625
 626     if (debug)
 627     {
 628         print_nrnb(debug, nrnb);
 629     }
 630     debug_gmx();
 631
 632 #ifdef GMX_MPI
 633     if (TAKETIME)
 634     {
 635         t2 = MPI_Wtime();
 636         MPI_Barrier(cr->mpi_comm_mygroup);
 637         t3          = MPI_Wtime();
 638         fr->t_wait += t3-t2;
 639         if (fr->timesteps == 11)
 640         {
 641             char buf[22];
 642             fprintf(stderr, "* PP load balancing info: rank %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n",
 643                     cr->nodeid, gmx_step_str(fr->timesteps, buf),
 644                     100*fr->t_wait/(fr->t_wait+fr->t_fnbf),
 645                     (fr->t_fnbf+fr->t_wait)/fr->t_fnbf);
 646         }
 647         fr->timesteps++;
 648     }
 649 #endif
 650
 651     if (debug)
 652     {
 653         pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS);
 654     }
 655
 656 }
 657
 658 void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd)
 659 {
 660     int i, n2;
 661
 662     for (i = 0; i < F_NRE; i++)
 663     {
 664         enerd->term[i]         = 0;
 665         enerd->foreign_term[i] = 0;
 666     }
 667
 668
 669     for (i = 0; i < efptNR; i++)
 670     {
 671         enerd->dvdl_lin[i]     = 0;
 672         enerd->dvdl_nonlin[i]  = 0;
 673     }
 674
 675     n2 = ngener*ngener;
 676     if (debug)
 677     {
 678         fprintf(debug, "Creating %d sized group matrix for energies\n", n2);
 679     }
 680     enerd->grpp.nener         = n2;
 681     enerd->foreign_grpp.nener = n2;
 682     for (i = 0; (i < egNR); i++)
 683     {
 684         snew(enerd->grpp.ener[i], n2);
 685         snew(enerd->foreign_grpp.ener[i], n2);
 686     }
 687
 688     if (n_lambda)
 689     {
 690         enerd->n_lambda = 1 + n_lambda;
 691         snew(enerd->enerpart_lambda, enerd->n_lambda);
 692     }
 693     else
 694     {
 695         enerd->n_lambda = 0;
 696     }
 697 }
 698
 699 void destroy_enerdata(gmx_enerdata_t *enerd)
 700 {
 701     int i;
 702
 703     for (i = 0; (i < egNR); i++)
 704     {
 705         sfree(enerd->grpp.ener[i]);
 706     }
 707
 708     for (i = 0; (i < egNR); i++)
 709     {
 710         sfree(enerd->foreign_grpp.ener[i]);
 711     }
 712
 713     if (enerd->n_lambda)
 714     {
 715         sfree(enerd->enerpart_lambda);
 716     }
 717 }
 718
 719 static real sum_v(int n, real v[])
 720 {
 721     real t;
 722     int  i;
 723
 724     t = 0.0;
 725     for (i = 0; (i < n); i++)
 726     {
 727         t = t + v[i];
 728     }
 729
 730     return t;
 731 }
 732
 733 void sum_epot(gmx_grppairener_t *grpp, real *epot)
 734 {
 735     int i;
 736
 737     /* Accumulate energies */
 738     epot[F_COUL_SR]  = sum_v(grpp->nener, grpp->ener[egCOULSR]);
 739     epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
 740     epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
 741     epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
 742     epot[F_COUL_LR]  = sum_v(grpp->nener, grpp->ener[egCOULLR]);
 743     epot[F_LJ_LR]    = sum_v(grpp->nener, grpp->ener[egLJLR]);
 744     /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
 745     epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
 746
 747 /* lattice part of LR doesnt belong to any group
 748  * and has been added earlier
 749  */
 750     epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
 751     epot[F_BHAM_LR]  = sum_v(grpp->nener, grpp->ener[egBHAMLR]);
 752
 753     epot[F_EPOT] = 0;
 754     for (i = 0; (i < F_EPOT); i++)
 755     {
 756         if (i != F_DISRESVIOL && i != F_ORIRESDEV)
 757         {
 758             epot[F_EPOT] += epot[i];
 759         }
 760     }
 761 }
 762
 763 void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals)
 764 {
 765     int    i, j, index;
 766     double dlam;
 767
 768     enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW];  /* include dispersion correction */
 769     enerd->term[F_DVDL]       = 0.0;
 770     for (i = 0; i < efptNR; i++)
 771     {
 772         if (fepvals->separate_dvdl[i])
 773         {
 774             /* could this be done more readably/compactly? */
 775             switch (i)
 776             {
 777                 case (efptMASS):
 778                     index = F_DKDL;
 779                     break;
 780                 case (efptCOUL):
 781                     index = F_DVDL_COUL;
 782                     break;
 783                 case (efptVDW):
 784                     index = F_DVDL_VDW;
 785                     break;
 786                 case (efptBONDED):
 787                     index = F_DVDL_BONDED;
 788                     break;
 789                 case (efptRESTRAINT):
 790                     index = F_DVDL_RESTRAINT;
 791                     break;
 792                 default:
 793                     index = F_DVDL;
 794                     break;
 795             }
 796             enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 797             if (debug)
 798             {
 799                 fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n",
 800                         efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
 801             }
 802         }
 803         else
 804         {
 805             enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i];
 806             if (debug)
 807             {
 808                 fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n",
 809                         efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]);
 810             }
 811         }
 812     }
 813
 814     /* Notes on the foreign lambda free energy difference evaluation:
 815      * Adding the potential and ekin terms that depend linearly on lambda
 816      * as delta lam * dvdl to the energy differences is exact.
 817      * For the constraints this is not exact, but we have no other option
 818      * without literally changing the lengths and reevaluating the energies at each step.
 819      * (try to remedy this post 4.6 - MRS)
 820      * For the non-bonded LR term we assume that the soft-core (if present)
 821      * no longer affects the energy beyond the short-range cut-off,
 822      * which is a very good approximation (except for exotic settings).
 823      * (investigate how to overcome this post 4.6 - MRS)
 824      */
 825     if (fepvals->separate_dvdl[efptBONDED])
 826     {
 827         enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR];
 828     }
 829     else
 830     {
 831         enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR];
 832     }
 833     enerd->term[F_DVDL_CONSTR] = 0;
 834
 835     for (i = 0; i < fepvals->n_lambda; i++)
 836     {
 837         /* note we are iterating over fepvals here!
 838            For the current lam, dlam = 0 automatically,
 839            so we don't need to add anything to the
 840            enerd->enerpart_lambda[0] */
 841
 842         /* we don't need to worry about dvdl_lin contributions to dE at
 843            current lambda, because the contributions to the current
 844            lambda are automatically zeroed */
 845
 846         for (j = 0; j < efptNR; j++)
 847         {
 848             /* Note that this loop is over all dhdl components, not just the separated ones */
 849             dlam = (fepvals->all_lambda[j][i]-lambda[j]);
 850             enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j];
 851             if (debug)
 852             {
 853                 fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n",
 854                         fepvals->all_lambda[j][i], efpt_names[j],
 855                         (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]),
 856                         dlam, enerd->dvdl_lin[j]);
 857             }
 858         }
 859     }
 860 }
 861
 862
 863 void reset_foreign_enerdata(gmx_enerdata_t *enerd)
 864 {
 865     int  i, j;
 866
 867     /* First reset all foreign energy components.  Foreign energies always called on
 868        neighbor search steps */
 869     for (i = 0; (i < egNR); i++)
 870     {
 871         for (j = 0; (j < enerd->grpp.nener); j++)
 872         {
 873             enerd->foreign_grpp.ener[i][j] = 0.0;
 874         }
 875     }
 876
 877     /* potential energy components */
 878     for (i = 0; (i <= F_EPOT); i++)
 879     {
 880         enerd->foreign_term[i] = 0.0;
 881     }
 882 }
 883
 884 void reset_enerdata(t_forcerec *fr, gmx_bool bNS,
 885                     gmx_enerdata_t *enerd,
 886                     gmx_bool bMaster)
 887 {
 888     gmx_bool bKeepLR;
 889     int      i, j;
 890
 891     /* First reset all energy components, except for the long range terms
 892      * on the master at non neighbor search steps, since the long range
 893      * terms have already been summed at the last neighbor search step.
 894      */
 895     bKeepLR = (fr->bTwinRange && !bNS);
 896     for (i = 0; (i < egNR); i++)
 897     {
 898         if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR)))
 899         {
 900             for (j = 0; (j < enerd->grpp.nener); j++)
 901             {
 902                 enerd->grpp.ener[i][j] = 0.0;
 903             }
 904         }
 905     }
 906     for (i = 0; i < efptNR; i++)
 907     {
 908         enerd->dvdl_lin[i]    = 0.0;
 909         enerd->dvdl_nonlin[i] = 0.0;
 910     }
 911
 912     /* Normal potential energy components */
 913     for (i = 0; (i <= F_EPOT); i++)
 914     {
 915         enerd->term[i] = 0.0;
 916     }
 917     /* Initialize the dVdlambda term with the long range contribution */
 918     /* Initialize the dvdl term with the long range contribution */
 919     enerd->term[F_DVDL]            = 0.0;
 920     enerd->term[F_DVDL_COUL]       = 0.0;
 921     enerd->term[F_DVDL_VDW]        = 0.0;
 922     enerd->term[F_DVDL_BONDED]     = 0.0;
 923     enerd->term[F_DVDL_RESTRAINT]  = 0.0;
 924     enerd->term[F_DKDL]            = 0.0;
 925     if (enerd->n_lambda > 0)
 926     {
 927         for (i = 0; i < enerd->n_lambda; i++)
 928         {
 929             enerd->enerpart_lambda[i] = 0.0;
 930         }
 931     }
 932     /* reset foreign energy data - separate function since we also call it elsewhere */
 933     reset_foreign_enerdata(enerd);
 934 }