Merge branch release-5-1 into release-2016
authorBerk Hess <hess@kth.se>
Fri, 2 Sep 2016 15:28:25 +0000 (17:28 +0200)
committerBerk Hess <hess@kth.se>
Fri, 2 Sep 2016 15:28:25 +0000 (17:28 +0200)
Change-Id: Ia1a7fad67f0ff11175ea24c46f813a445cd49ed6

1  2 
src/gromacs/domdec/domdec_topology.cpp
src/gromacs/ewald/long-range-correction.cpp
src/gromacs/ewald/long-range-correction.h
src/gromacs/mdlib/force.cpp
src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/minimize.cpp
src/gromacs/mdlib/update.cpp
src/gromacs/mdtypes/forcerec.h
src/programs/mdrun/md.cpp

index 7f54c1f9e5a63c1bb886d6aaf1e3bae8fff55910,f31c3f4fc10772ceac8ef46a7c1fc497bbb749ed..ae5ff5ed23c2116d688420cff6852c0a1bbaab58
  #include "gromacs/math/units.h"
  #include "gromacs/math/utilities.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/forcerec.h"
 +#include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/utility/fatalerror.h"
+ #include "gromacs/utility/gmxassert.h"
  
  /* There's nothing special to do here if just masses are perturbed,
   * but if either charge or type is perturbed then the implementation
@@@ -75,8 -77,24 +78,24 @@@ void ewald_LRcorrection(int numAtomsLoc
                          real lambda_q, real lambda_lj,
                          real *dvdlambda_q, real *dvdlambda_lj)
  {
+     int numAtomsToBeCorrected;
+     if (calc_excl_corr)
+     {
+         /* We need to correct all exclusion pairs (cutoff-scheme = group) */
+         numAtomsToBeCorrected = excl->nr;
+         GMX_RELEASE_ASSERT(numAtomsToBeCorrected >= numAtomsLocal, "We might need to do self-corrections");
+     }
+     else
+     {
+         /* We need to correct only self interactions */
+         numAtomsToBeCorrected = numAtomsLocal;
+     }
+     int         start =  (numAtomsToBeCorrected* thread     )/numThreads;
+     int         end   =  (numAtomsToBeCorrected*(thread + 1))/numThreads;
      int         i, i1, i2, j, k, m, iv, jv, q;
 -    atom_id    *AA;
 +    int        *AA;
      double      Vexcl_q, dvdl_excl_q, dvdl_excl_lj; /* Necessary for precision */
      double      Vexcl_lj;
      real        one_4pi_eps;
index 107e00a8e7b3393ec0eaf42a07f584d29c6ea4eb,c6d18a862ee109a32130b14b50c08caa66b9dcee..90c4b02f5772714ec0be982d49ea8a27a1b365b1
@@@ -417,55 -443,53 +417,54 @@@ void do_force_lowlevel(t_forcerec *fr
  #pragma omp parallel for num_threads(nthreads) schedule(static)
                  for (t = 0; t < nthreads; t++)
                  {
 -                    int     i;
 -                    rvec   *fnv;
 -                    tensor *vir_q, *vir_lj;
 -                    real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
 -                    if (t == 0)
 +                    try
                      {
 -                        fnv       = fr->f_novirsum;
 -                        vir_q     = &fr->vir_el_recip;
 -                        vir_lj    = &fr->vir_lj_recip;
 -                        Vcorrt_q  = &Vcorr_q;
 -                        Vcorrt_lj = &Vcorr_lj;
 -                        dvdlt_q   = &dvdl_long_range_correction_q;
 -                        dvdlt_lj  = &dvdl_long_range_correction_lj;
 -                    }
 -                    else
 -                    {
 -                        fnv       = fr->f_t[t].f;
 -                        vir_q     = &fr->f_t[t].vir_q;
 -                        vir_lj    = &fr->f_t[t].vir_lj;
 -                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
 -                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
 -                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
 -                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
 -                        for (i = 0; i < fr->natoms_force; i++)
 +                        tensor *vir_q, *vir_lj;
 +                        real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
 +                        if (t == 0)
 +                        {
 +                            vir_q     = &fr->vir_el_recip;
 +                            vir_lj    = &fr->vir_lj_recip;
 +                            Vcorrt_q  = &Vcorr_q;
 +                            Vcorrt_lj = &Vcorr_lj;
 +                            dvdlt_q   = &dvdl_long_range_correction_q;
 +                            dvdlt_lj  = &dvdl_long_range_correction_lj;
 +                        }
 +                        else
                          {
 -                            clear_rvec(fnv[i]);
 +                            vir_q     = &fr->ewc_t[t].vir_q;
 +                            vir_lj    = &fr->ewc_t[t].vir_lj;
 +                            Vcorrt_q  = &fr->ewc_t[t].Vcorr_q;
 +                            Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
 +                            dvdlt_q   = &fr->ewc_t[t].dvdl[efptCOUL];
 +                            dvdlt_lj  = &fr->ewc_t[t].dvdl[efptVDW];
 +                            clear_mat(*vir_q);
 +                            clear_mat(*vir_lj);
                          }
 -                        clear_mat(*vir_q);
 -                        clear_mat(*vir_lj);
 +                        *dvdlt_q  = 0;
 +                        *dvdlt_lj = 0;
 +
 +                        /* Threading is only supported with the Verlet cut-off
 +                         * scheme and then only single particle forces (no
 +                         * exclusion forces) are calculated, so we can store
 +                         * the forces in the normal, single fr->f_novirsum array.
 +                         */
-                         ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
-                                            cr, t, fr,
++                        ewald_LRcorrection(md->homenr, cr, nthreads, t, fr,
 +                                           md->chargeA, md->chargeB,
 +                                           md->sqrt_c6A, md->sqrt_c6B,
 +                                           md->sigmaA, md->sigmaB,
 +                                           md->sigma3A, md->sigma3B,
 +                                           md->nChargePerturbed || md->nTypePerturbed,
 +                                           ir->cutoff_scheme != ecutsVERLET,
 +                                           excl, x, bSB ? boxs : box, mu_tot,
 +                                           ir->ewald_geometry,
 +                                           ir->epsilon_surface,
 +                                           fr->f_novirsum, *vir_q, *vir_lj,
 +                                           Vcorrt_q, Vcorrt_lj,
 +                                           lambda[efptCOUL], lambda[efptVDW],
 +                                           dvdlt_q, dvdlt_lj);
                      }
 -                    *dvdlt_q  = 0;
 -                    *dvdlt_lj = 0;
 -
 -                    ewald_LRcorrection(md->homenr, cr, nthreads, t, fr,
 -                                       md->chargeA, md->chargeB,
 -                                       md->sqrt_c6A, md->sqrt_c6B,
 -                                       md->sigmaA, md->sigmaB,
 -                                       md->sigma3A, md->sigma3B,
 -                                       md->nChargePerturbed || md->nTypePerturbed,
 -                                       ir->cutoff_scheme != ecutsVERLET,
 -                                       excl, x, bSB ? boxs : box, mu_tot,
 -                                       ir->ewald_geometry,
 -                                       ir->epsilon_surface,
 -                                       fnv, *vir_q, *vir_lj,
 -                                       Vcorrt_q, Vcorrt_lj,
 -                                       lambda[efptCOUL], lambda[efptVDW],
 -                                       dvdlt_q, dvdlt_lj);
 +                    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
                  }
                  if (nthreads > 1)
                  {
index 703a1cd471ad50196031a097fa5ad0d9349ed62e,e63381eaf1ab89ea0ca5bb96640300b630ca2673..6b5a102f83a2f9adc68960e8b672e1c253384ca1
@@@ -3181,12 -3288,13 +3181,11 @@@ void init_forcerec(FILE              *f
      }
  
      /* Initialize the thread working data for bonded interactions */
 -    init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
 +    init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
 +                          &fr->bonded_threading);
 +
 +    fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
 +    snew(fr->ewc_t, fr->nthread_ewc);
-     snew(fr->excl_load, fr->nthread_ewc + 1);
  
      /* fr->ic is used both by verlet and group kernels (to some extent) now */
      init_interaction_const(fp, &fr->ic, fr);
index 3890e24e3734a167e3e6c9efab91a204945dcbc1,fb60991a55779433b1f6ec5e80b4f7acc684d67d..591e20a7dc88cbfa338f971b35b97ed014b2a718
@@@ -399,10 -373,9 +399,8 @@@ void init_em(FILE *fplog, const char *t
          }
          copy_mat(state_global->box, ems->s.box);
  
 -        *top      = gmx_mtop_generate_local_top(top_global, ir);
 -        *f_global = *f;
 +        *top      = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
  
-         forcerec_set_excl_load(fr, *top);
          setup_bonded_threading(fr, &(*top)->idef);
  
          if (ir->ePBC != epbcNONE && !fr->bMolPBC)
index 0a65e04d0b396f68d1ab5fbb81558fe750e8ce76,f6f86195667019bebd416e1026c1704468b988e9..cc01f266c007e30ea5691bdc99942026f6c9950c
@@@ -110,10 -111,11 +110,11 @@@ struct gmx_update_
      /* Variables for the deform algorithm */
      gmx_int64_t     deformref_step;
      matrix          deformref_box;
 -} t_gmx_update;
 +};
  
  
- static void do_update_md(int start, int nrend, double dt,
+ static void do_update_md(int start, int nrend,
+                          double dt, int nstpcouple,
                           t_grp_tcstat *tcstat,
                           double nh_vxi[],
                           gmx_bool bNEMD, t_grp_acc *gstat, rvec accel[],
@@@ -1657,88 -1996,101 +1659,90 @@@ void update_coords(FILE             *fp
  #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
      for (th = 0; th < nth; th++)
      {
 -        int start_th, end_th;
 +        try
 +        {
 +            int start_th, end_th;
  
 -        start_th = start + ((nrend-start)* th   )/nth;
 -        end_th   = start + ((nrend-start)*(th+1))/nth;
 +            start_th = start + ((nrend-start)* th   )/nth;
 +            end_th   = start + ((nrend-start)*(th+1))/nth;
  
 -        switch (inputrec->eI)
 -        {
 -            case (eiMD):
 -                if (ekind->cosacc.cos_accel == 0)
 -                {
 -                    do_update_md(start_th, end_th,
 -                                 dt, inputrec->nstpcouple,
 -                                 ekind->tcstat, state->nosehoover_vxi,
 -                                 ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
 -                                 inputrec->opts.nFreeze,
 -                                 md->invmass, md->ptype,
 -                                 md->cFREEZE, md->cACC, md->cTC,
 -                                 state->x, xprime, state->v, force, M,
 -                                 bNH, bPR);
 -                }
 -                else
 -                {
 -                    do_update_visc(start_th, end_th,
 -                                   dt, inputrec->nstpcouple,
 -                                   ekind->tcstat, state->nosehoover_vxi,
 -                                   md->invmass, md->ptype,
 -                                   md->cTC, state->x, xprime, state->v, force, M,
 -                                   state->box,
 -                                   ekind->cosacc.cos_accel,
 -                                   ekind->cosacc.vcos,
 -                                   bNH, bPR);
 -                }
 -                break;
 -            case (eiSD1):
 -                /* With constraints, the SD1 update is done in 2 parts */
 -                do_update_sd1(upd->sd,
 -                              start_th, end_th, dt,
 -                              inputrec->opts.acc, inputrec->opts.nFreeze,
 -                              md->invmass, md->ptype,
 -                              md->cFREEZE, md->cACC, md->cTC,
 -                              state->x, xprime, state->v, force,
 -                              inputrec->opts.ngtc, inputrec->opts.ref_t,
 -                              bDoConstr, TRUE,
 -                              step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 -                break;
 -            case (eiSD2):
 -                /* The SD2 update is always done in 2 parts,
 -                 * because an extra constraint step is needed
 -                 */
 -                do_update_sd2(upd->sd,
 -                              bInitStep, start_th, end_th,
 -                              inputrec->opts.acc, inputrec->opts.nFreeze,
 -                              md->invmass, md->ptype,
 -                              md->cFREEZE, md->cACC, md->cTC,
 -                              state->x, xprime, state->v, force, state->sd_X,
 -                              inputrec->opts.tau_t,
 -                              TRUE, step, inputrec->ld_seed,
 -                              DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 -                break;
 -            case (eiBD):
 -                do_update_bd(start_th, end_th, dt,
 -                             inputrec->opts.nFreeze, md->invmass, md->ptype,
 -                             md->cFREEZE, md->cTC,
 -                             state->x, xprime, state->v, force,
 -                             inputrec->bd_fric,
 -                             upd->sd->bd_rf,
 -                             step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 -                break;
 -            case (eiVV):
 -            case (eiVVAK):
 -                alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
 -                switch (UpdatePart)
 -                {
 -                    case etrtVELOCITY1:
 -                    case etrtVELOCITY2:
 -                        do_update_vv_vel(start_th, end_th, dt,
 -                                         inputrec->opts.acc, inputrec->opts.nFreeze,
 -                                         md->invmass, md->ptype,
 -                                         md->cFREEZE, md->cACC,
 -                                         state->v, force,
 -                                         (bNH || bPR), state->veta, alpha);
 -                        break;
 -                    case etrtPOSITION:
 -                        do_update_vv_pos(start_th, end_th, dt,
 -                                         inputrec->opts.nFreeze,
 -                                         md->ptype, md->cFREEZE,
 -                                         state->x, xprime, state->v,
 -                                         (bNH || bPR), state->veta);
 -                        break;
 -                }
 -                break;
 -            default:
 -                gmx_fatal(FARGS, "Don't know how to update coordinates");
 -                break;
 +            switch (inputrec->eI)
 +            {
 +                case (eiMD):
 +                    if (ekind->cosacc.cos_accel == 0)
 +                    {
-                         do_update_md(start_th, end_th, dt,
++                        do_update_md(start_th, end_th,
++                                     dt, inputrec->nstpcouple,
 +                                     ekind->tcstat, state->nosehoover_vxi,
 +                                     ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
 +                                     inputrec->opts.nFreeze,
 +                                     md->invmass, md->ptype,
 +                                     md->cFREEZE, md->cACC, md->cTC,
 +                                     state->x, upd->xp, state->v, f, M,
 +                                     bNH, bPR);
 +                    }
 +                    else
 +                    {
-                         do_update_visc(start_th, end_th, dt,
++                        do_update_visc(start_th, end_th,
++                                       dt, inputrec->nstpcouple,
 +                                       ekind->tcstat, state->nosehoover_vxi,
 +                                       md->invmass, md->ptype,
 +                                       md->cTC, state->x, upd->xp, state->v, f, M,
 +                                       state->box,
 +                                       ekind->cosacc.cos_accel,
 +                                       ekind->cosacc.vcos,
 +                                       bNH, bPR);
 +                    }
 +                    break;
 +                case (eiSD1):
 +                    /* With constraints, the SD1 update is done in 2 parts */
 +                    do_update_sd1(upd->sd,
 +                                  start_th, end_th, dt,
 +                                  inputrec->opts.acc, inputrec->opts.nFreeze,
 +                                  md->invmass, md->ptype,
 +                                  md->cFREEZE, md->cACC, md->cTC,
 +                                  state->x, upd->xp, state->v, f,
 +                                  bDoConstr, TRUE,
 +                                  step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                    break;
 +                case (eiBD):
 +                    do_update_bd(start_th, end_th, dt,
 +                                 inputrec->opts.nFreeze, md->invmass, md->ptype,
 +                                 md->cFREEZE, md->cTC,
 +                                 state->x, upd->xp, state->v, f,
 +                                 inputrec->bd_fric,
 +                                 upd->sd->bd_rf,
 +                                 step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                    break;
 +                case (eiVV):
 +                case (eiVVAK):
 +                    alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
 +                    switch (UpdatePart)
 +                    {
 +                        case etrtVELOCITY1:
 +                        case etrtVELOCITY2:
 +                            do_update_vv_vel(start_th, end_th, dt,
 +                                             inputrec->opts.acc, inputrec->opts.nFreeze,
 +                                             md->invmass, md->ptype,
 +                                             md->cFREEZE, md->cACC,
 +                                             state->v, f,
 +                                             (bNH || bPR), state->veta, alpha);
 +                            break;
 +                        case etrtPOSITION:
 +                            do_update_vv_pos(start_th, end_th, dt,
 +                                             inputrec->opts.nFreeze,
 +                                             md->ptype, md->cFREEZE,
 +                                             state->x, upd->xp, state->v,
 +                                             (bNH || bPR), state->veta);
 +                            break;
 +                    }
 +                    break;
 +                default:
 +                    gmx_fatal(FARGS, "Don't know how to update coordinates");
 +                    break;
 +            }
          }
 +        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
  
  }
index a2aa4f8d747cebce5b599234e970f6a16d2411a7,0000000000000000000000000000000000000000..1ba04de5238db7940e6865baa5d6a92a2e1851d4
mode 100644,000000..100644
--- /dev/null
@@@ -1,443 -1,0 +1,441 @@@
-     /* Ewald charge correction load distribution over the threads */
-     int                 *excl_load;
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifndef GMX_MDTYPES_TYPES_FORCEREC_H
 +#define GMX_MDTYPES_TYPES_FORCEREC_H
 +
 +#include "gromacs/math/vectypes.h"
 +#include "gromacs/mdtypes/interaction_const.h"
 +#include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/topology/idef.h"
 +#include "gromacs/utility/basedefinitions.h"
 +#include "gromacs/utility/real.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +#if 0
 +} /* fixes auto-indentation problems */
 +#endif
 +
 +/* Abstract type for PME that is defined only in the routine that use them. */
 +struct gmx_genborn_t;
 +struct gmx_ns_t;
 +struct gmx_pme_t;
 +struct nonbonded_verlet_t;
 +struct bonded_threading_t;
 +struct t_forcetable;
 +struct t_nblist;
 +struct t_nblists;
 +struct t_QMMMrec;
 +struct gmx_hw_info_t;
 +struct gmx_gpu_opt_t;
 +
 +/* macros for the cginfo data in forcerec
 + *
 + * Since the tpx format support max 256 energy groups, we do the same here.
 + * Note that we thus have bits 8-14 still unused.
 + *
 + * The maximum cg size in cginfo is 63
 + * because we only have space for 6 bits in cginfo,
 + * this cg size entry is actually only read with domain decomposition.
 + * But there is a smaller limit due to the t_excl data structure
 + * which is defined in nblist.h.
 + */
 +#define SET_CGINFO_GID(cgi, gid)     (cgi) = (((cgi)  &  ~255) | (gid))
 +#define GET_CGINFO_GID(cgi)        ( (cgi)            &   255)
 +#define SET_CGINFO_FEP(cgi)          (cgi) =  ((cgi)  |  (1<<15))
 +#define GET_CGINFO_FEP(cgi)        ( (cgi)            &  (1<<15))
 +#define SET_CGINFO_EXCL_INTRA(cgi)   (cgi) =  ((cgi)  |  (1<<16))
 +#define GET_CGINFO_EXCL_INTRA(cgi) ( (cgi)            &  (1<<16))
 +#define SET_CGINFO_EXCL_INTER(cgi)   (cgi) =  ((cgi)  |  (1<<17))
 +#define GET_CGINFO_EXCL_INTER(cgi) ( (cgi)            &  (1<<17))
 +#define SET_CGINFO_SOLOPT(cgi, opt)  (cgi) = (((cgi)  & ~(3<<18)) | ((opt)<<18))
 +#define GET_CGINFO_SOLOPT(cgi)     (((cgi)>>18)       &   3)
 +#define SET_CGINFO_CONSTR(cgi)       (cgi) =  ((cgi)  |  (1<<20))
 +#define GET_CGINFO_CONSTR(cgi)     ( (cgi)            &  (1<<20))
 +#define SET_CGINFO_SETTLE(cgi)       (cgi) =  ((cgi)  |  (1<<21))
 +#define GET_CGINFO_SETTLE(cgi)     ( (cgi)            &  (1<<21))
 +/* This bit is only used with bBondComm in the domain decomposition */
 +#define SET_CGINFO_BOND_INTER(cgi)   (cgi) =  ((cgi)  |  (1<<22))
 +#define GET_CGINFO_BOND_INTER(cgi) ( (cgi)            &  (1<<22))
 +#define SET_CGINFO_HAS_VDW(cgi)      (cgi) =  ((cgi)  |  (1<<23))
 +#define GET_CGINFO_HAS_VDW(cgi)    ( (cgi)            &  (1<<23))
 +#define SET_CGINFO_HAS_Q(cgi)        (cgi) =  ((cgi)  |  (1<<24))
 +#define GET_CGINFO_HAS_Q(cgi)      ( (cgi)            &  (1<<24))
 +#define SET_CGINFO_NATOMS(cgi, opt)  (cgi) = (((cgi)  & ~(63<<25)) | ((opt)<<25))
 +#define GET_CGINFO_NATOMS(cgi)     (((cgi)>>25)       &   63)
 +
 +
 +/* Value to be used in mdrun for an infinite cut-off.
 + * Since we need to compare with the cut-off squared,
 + * this value should be slighlty smaller than sqrt(GMX_FLOAT_MAX).
 + */
 +#define GMX_CUTOFF_INF 1E+18
 +
 +/* enums for the neighborlist type */
 +enum {
 +    enbvdwNONE, enbvdwLJ, enbvdwBHAM, enbvdwTAB, enbvdwNR
 +};
 +/* OOR is "one over r" -- standard coul */
 +enum {
 +    enbcoulNONE, enbcoulOOR, enbcoulRF, enbcoulTAB, enbcoulGB, enbcoulFEWALD, enbcoulNR
 +};
 +
 +enum {
 +    egCOULSR, egLJSR, egBHAMSR,
 +    egCOUL14, egLJ14, egGB, egNR
 +};
 +extern const char *egrp_nm[egNR+1];
 +
 +typedef struct gmx_grppairener_t {
 +    int   nener;      /* The number of energy group pairs     */
 +    real *ener[egNR]; /* Energy terms for each pair of groups */
 +} gmx_grppairener_t;
 +
 +typedef struct gmx_enerdata_t {
 +    real              term[F_NRE];         /* The energies for all different interaction types */
 +    gmx_grppairener_t grpp;
 +    double            dvdl_lin[efptNR];    /* Contributions to dvdl with linear lam-dependence */
 +    double            dvdl_nonlin[efptNR]; /* Idem, but non-linear dependence                  */
 +    int               n_lambda;
 +    int               fep_state;           /*current fep state -- just for printing */
 +    double           *enerpart_lambda;     /* Partial energy for lambda and flambda[] */
 +    real              foreign_term[F_NRE]; /* alternate array for storing foreign lambda energies */
 +    gmx_grppairener_t foreign_grpp;        /* alternate array for storing foreign lambda energies */
 +} gmx_enerdata_t;
 +/* The idea is that dvdl terms with linear lambda dependence will be added
 + * automatically to enerpart_lambda. Terms with non-linear lambda dependence
 + * should explicitly determine the energies at foreign lambda points
 + * when n_lambda > 0.
 + */
 +
 +typedef struct {
 +    int  cg_start;
 +    int  cg_end;
 +    int  cg_mod;
 +    int *cginfo;
 +} cginfo_mb_t;
 +
 +
 +/* Forward declaration of type for managing Ewald tables */
 +struct gmx_ewald_tab_t;
 +
 +typedef struct ewald_corr_thread_t ewald_corr_thread_t;
 +
 +typedef struct t_forcerec {
 +    interaction_const_t *ic;
 +
 +    /* Domain Decomposition */
 +    gmx_bool bDomDec;
 +
 +    /* PBC stuff */
 +    int                         ePBC;
 +    gmx_bool                    bMolPBC;
 +    int                         rc_scaling;
 +    rvec                        posres_com;
 +    rvec                        posres_comB;
 +
 +    const struct gmx_hw_info_t *hwinfo;
 +    const struct gmx_gpu_opt_t *gpu_opt;
 +    gmx_bool                    use_simd_kernels;
 +
 +    /* Interaction for calculated in kernels. In many cases this is similar to
 +     * the electrostatics settings in the inputrecord, but the difference is that
 +     * these variables always specify the actual interaction in the kernel - if
 +     * we are tabulating reaction-field the inputrec will say reaction-field, but
 +     * the kernel interaction will say cubic-spline-table. To be safe we also
 +     * have a kernel-specific setting for the modifiers - if the interaction is
 +     * tabulated we already included the inputrec modification there, so the kernel
 +     * modification setting will say 'none' in that case.
 +     */
 +    int nbkernel_elec_interaction;
 +    int nbkernel_vdw_interaction;
 +    int nbkernel_elec_modifier;
 +    int nbkernel_vdw_modifier;
 +
 +    /* Use special N*N kernels? */
 +    gmx_bool bAllvsAll;
 +    /* Private work data */
 +    void    *AllvsAll_work;
 +    void    *AllvsAll_workgb;
 +
 +    /* Cut-Off stuff.
 +     * Infinite cut-off's will be GMX_CUTOFF_INF (unlike in t_inputrec: 0).
 +     */
 +    real rlist;
 +
 +    /* Dielectric constant resp. multiplication factor for charges */
 +    real zsquare, temp;
 +    real epsilon_r, epsilon_rf, epsfac;
 +
 +    /* Constants for reaction fields */
 +    real kappa, k_rf, c_rf;
 +
 +    /* Charge sum and dipole for topology A/B ([0]/[1]) for Ewald corrections */
 +    double qsum[2];
 +    double q2sum[2];
 +    double c6sum[2];
 +    rvec   mu_tot[2];
 +
 +    /* Dispersion correction stuff */
 +    int                  eDispCorr;
 +    int                  numAtomsForDispersionCorrection;
 +    struct t_forcetable *dispersionCorrectionTable;
 +
 +    /* The shift of the shift or user potentials */
 +    real enershiftsix;
 +    real enershifttwelve;
 +    /* Integrated differces for energy and virial with cut-off functions */
 +    real enerdiffsix;
 +    real enerdifftwelve;
 +    real virdiffsix;
 +    real virdifftwelve;
 +    /* Constant for long range dispersion correction (average dispersion)
 +     * for topology A/B ([0]/[1]) */
 +    real avcsix[2];
 +    /* Constant for long range repulsion term. Relative difference of about
 +     * 0.1 percent with 0.8 nm cutoffs. But hey, it's cheap anyway...
 +     */
 +    real avctwelve[2];
 +
 +    /* Fudge factors */
 +    real fudgeQQ;
 +
 +    /* Table stuff */
 +    gmx_bool             bcoultab;
 +    gmx_bool             bvdwtab;
 +    /* The normal tables are in the nblists struct(s) below */
 +
 +    struct t_forcetable *pairsTable; /* for 1-4 interactions, [pairs] and [pairs_nb] */
 +
 +    /* PPPM & Shifting stuff */
 +    int   coulomb_modifier;
 +    real  rcoulomb_switch, rcoulomb;
 +    real *phi;
 +
 +    /* VdW stuff */
 +    int    vdw_modifier;
 +    double reppow;
 +    real   rvdw_switch, rvdw;
 +    real   bham_b_max;
 +
 +    /* Free energy */
 +    int      efep;
 +    real     sc_alphavdw;
 +    real     sc_alphacoul;
 +    int      sc_power;
 +    real     sc_r_power;
 +    real     sc_sigma6_def;
 +    real     sc_sigma6_min;
 +
 +    /* NS Stuff */
 +    int  eeltype;
 +    int  vdwtype;
 +    int  cg0, hcg;
 +    /* solvent_opt contains the enum for the most common solvent
 +     * in the system, which will be optimized.
 +     * It can be set to esolNO to disable all water optimization */
 +    int          solvent_opt;
 +    int          nWatMol;
 +    gmx_bool     bGrid;
 +    gmx_bool     bExcl_IntraCGAll_InterCGNone;
 +    cginfo_mb_t *cginfo_mb;
 +    int         *cginfo;
 +    rvec        *cg_cm;
 +    int          cg_nalloc;
 +    rvec        *shift_vec;
 +
 +    /* The neighborlists including tables */
 +    int                        nnblists;
 +    int                       *gid2nblists;
 +    struct t_nblists          *nblists;
 +
 +    int                        cutoff_scheme; /* group- or Verlet-style cutoff */
 +    gmx_bool                   bNonbonded;    /* true if nonbonded calculations are *not* turned off */
 +    struct nonbonded_verlet_t *nbv;
 +
 +    /* The wall tables (if used) */
 +    int                    nwall;
 +    struct t_forcetable ***wall_tab;
 +
 +    /* The number of charge groups participating in do_force_lowlevel */
 +    int ncg_force;
 +    /* The number of atoms participating in do_force_lowlevel */
 +    int natoms_force;
 +    /* The number of atoms participating in force and constraints */
 +    int natoms_force_constr;
 +    /* The allocation size of vectors of size natoms_force */
 +    int nalloc_force;
 +
 +    /* Forces that should not enter into the virial summation:
 +     * PPPM/PME/Ewald/posres
 +     */
 +    gmx_bool bF_NoVirSum;
 +    int      f_novirsum_n;
 +    int      f_novirsum_nalloc;
 +    rvec    *f_novirsum_alloc;
 +    /* Pointer that points to f_novirsum_alloc when pressure is calcaluted,
 +     * points to the normal force vectors wen pressure is not requested.
 +     */
 +    rvec *f_novirsum;
 +
 +    /* Long-range forces and virial for PPPM/PME/Ewald */
 +    struct gmx_pme_t *pmedata;
 +    int               ljpme_combination_rule;
 +    tensor            vir_el_recip;
 +    tensor            vir_lj_recip;
 +
 +    /* PME/Ewald stuff */
 +    gmx_bool                bEwald;
 +    real                    ewaldcoeff_q;
 +    real                    ewaldcoeff_lj;
 +    struct gmx_ewald_tab_t *ewald_table;
 +
 +    /* Virial Stuff */
 +    rvec *fshift;
 +    rvec  vir_diag_posres;
 +    dvec  vir_wall_z;
 +
 +    /* Non bonded Parameter lists */
 +    int      ntype; /* Number of atom types */
 +    gmx_bool bBHAM;
 +    real    *nbfp;
 +    real    *ljpme_c6grid; /* C6-values used on grid in LJPME */
 +
 +    /* Energy group pair flags */
 +    int *egp_flags;
 +
 +    /* Shell molecular dynamics flexible constraints */
 +    real fc_stepsize;
 +
 +    /* Generalized born implicit solvent */
 +    gmx_bool              bGB;
 +    /* Generalized born stuff */
 +    real                  gb_epsilon_solvent;
 +    /* Table data for GB */
 +    struct t_forcetable  *gbtab;
 +    /* VdW radius for each atomtype (dim is thus ntype) */
 +    real                 *atype_radius;
 +    /* Effective radius (derived from effective volume) for each type */
 +    real                 *atype_vol;
 +    /* Implicit solvent - surface tension for each atomtype */
 +    real                 *atype_surftens;
 +    /* Implicit solvent - radius for GB calculation */
 +    real                 *atype_gb_radius;
 +    /* Implicit solvent - overlap for HCT model */
 +    real                 *atype_S_hct;
 +    /* Generalized born interaction data */
 +    struct gmx_genborn_t *born;
 +
 +    /* Table scale for GB */
 +    real gbtabscale;
 +    /* Table range for GB */
 +    real gbtabr;
 +    /* GB neighborlists (the sr list will contain for each atom all other atoms
 +     * (for use in the SA calculation) and the lr list will contain
 +     * for each atom all atoms 1-4 or greater (for use in the GB calculation)
 +     */
 +    struct t_nblist *gblist_sr;
 +    struct t_nblist *gblist_lr;
 +    struct t_nblist *gblist;
 +
 +    /* Inverse square root of the Born radii for implicit solvent */
 +    real *invsqrta;
 +    /* Derivatives of the potential with respect to the Born radii */
 +    real *dvda;
 +    /* Derivatives of the Born radii with respect to coordinates */
 +    real *dadx;
 +    real *dadx_rawptr;
 +    int   nalloc_dadx; /* Allocated size of dadx */
 +
 +    /* If > 0 signals Test Particle Insertion,
 +     * the value is the number of atoms of the molecule to insert
 +     * Only the energy difference due to the addition of the last molecule
 +     * should be calculated.
 +     */
 +    gmx_bool n_tpi;
 +
 +    /* Neighbor searching stuff */
 +    struct gmx_ns_t *ns;
 +
 +    /* QMMM stuff */
 +    gmx_bool          bQMMM;
 +    struct t_QMMMrec *qr;
 +
 +    /* QM-MM neighborlists */
 +    struct t_nblist        *QMMMlist;
 +
 +    /* Limit for printing large forces, negative is don't print */
 +    real print_force;
 +
 +    /* coarse load balancing time measurement */
 +    double t_fnbf;
 +    double t_wait;
 +    int    timesteps;
 +
 +    /* User determined parameters, copied from the inputrec */
 +    int  userint1;
 +    int  userint2;
 +    int  userint3;
 +    int  userint4;
 +    real userreal1;
 +    real userreal2;
 +    real userreal3;
 +    real userreal4;
 +
 +    /* Pointer to struct for managing threading of bonded force calculation */
 +    struct bonded_threading_t *bonded_threading;
 +
 +    /* Ewald correction thread local virial and energy data */
 +    int                  nthread_ewc;
 +    ewald_corr_thread_t *ewc_t;
 +} t_forcerec;
 +
 +/* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
 + * been scaled by 6.0 or 12.0 to save flops in the kernels. We have corrected this everywhere
 + * in the code, but beware if you are using these macros externally.
 + */
 +#define C6(nbfp, ntp, ai, aj)     (nbfp)[2*((ntp)*(ai)+(aj))]
 +#define C12(nbfp, ntp, ai, aj)    (nbfp)[2*((ntp)*(ai)+(aj))+1]
 +#define BHAMC(nbfp, ntp, ai, aj)  (nbfp)[3*((ntp)*(ai)+(aj))]
 +#define BHAMA(nbfp, ntp, ai, aj)  (nbfp)[3*((ntp)*(ai)+(aj))+1]
 +#define BHAMB(nbfp, ntp, ai, aj)  (nbfp)[3*((ntp)*(ai)+(aj))+2]
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +#endif
index 2a49aba4a37b3d20df5bb7b49f472fbf1f96000c,afa3d62a75a4e225841c05283444184851e8fa20..06c25f321404e012b870cfff79c5044308431b7e
@@@ -419,11 -366,15 +419,9 @@@ double gmx::do_md(FILE *fplog, t_commre
      }
      else
      {
 -        top = gmx_mtop_generate_local_top(top_global, ir);
 +        top = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
  
-         forcerec_set_excl_load(fr, top);
          state    = serial_init_local_state(state_global);
 -        f_global = f;
  
          atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);