Merge branch release-5-1 into release-2016
authorBerk Hess <hess@kth.se>
Fri, 2 Sep 2016 15:28:25 +0000 (17:28 +0200)
committerBerk Hess <hess@kth.se>
Fri, 2 Sep 2016 15:28:25 +0000 (17:28 +0200)
Change-Id: Ia1a7fad67f0ff11175ea24c46f813a445cd49ed6

1  2 
src/gromacs/domdec/domdec_topology.cpp
src/gromacs/ewald/long-range-correction.cpp
src/gromacs/ewald/long-range-correction.h
src/gromacs/mdlib/force.cpp
src/gromacs/mdlib/forcerec.cpp
src/gromacs/mdlib/minimize.cpp
src/gromacs/mdlib/update.cpp
src/gromacs/mdtypes/forcerec.h
src/programs/mdrun/md.cpp

index 548e373e8564e9237308a1ace4970eccb2fe027f,53b86035ec2dee927826cb5c23138d4f0db6bae0..46b05ddd71ddf01e40ece4a02723cd8fa42f0143
  #include <string.h>
  
  #include <algorithm>
 +#include <string>
  
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/domdec/domdec_network.h"
 -#include "gromacs/legacyheaders/chargegroup.h"
 -#include "gromacs/legacyheaders/force.h"
 -#include "gromacs/legacyheaders/gmx_ga2la.h"
 -#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 -#include "gromacs/legacyheaders/names.h"
 -#include "gromacs/legacyheaders/network.h"
 -#include "gromacs/legacyheaders/typedefs.h"
 -#include "gromacs/legacyheaders/vsite.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/domdec/ga2la.h"
 +#include "gromacs/gmxlib/chargegroup.h"
 +#include "gromacs/gmxlib/network.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/mdlib/force.h"
 +#include "gromacs/mdlib/forcerec.h"
 +#include "gromacs/mdlib/gmx_omp_nthreads.h"
 +#include "gromacs/mdlib/vsite.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/mdtypes/state.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/topology/topsort.h"
  #include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/smalloc.h"
 +#include "gromacs/utility/stringutil.h"
  
  #include "domdec_constraints.h"
  #include "domdec_internal.h"
@@@ -107,8 -102,7 +107,8 @@@ typedef struct 
  } thread_work_t;
  
  /*! \brief Struct for the reverse topology: links bonded interactions to atomsx */
 -typedef struct gmx_reverse_top {
 +struct gmx_reverse_top_t
 +{
      //! @cond Doxygen_Suppress
      gmx_bool         bExclRequired;               /**< Do we require all exclusions to be assigned? */
      int              n_excl_at_max;               /**< The maximum number of exclusions one atom can have */
      /* Work data structures for multi-threading */
      int            nthread;           /**< The number of threads to be used */
      thread_work_t *th_work;           /**< Thread work array for local topology generation */
 -
 -    /* Pointers only used for an error message */
 -    gmx_mtop_t     *err_top_global; /**< Pointer to the global top, only used for error reporting */
 -    gmx_localtop_t *err_top_local;  /**< Pointer to the local top, only used for error reporting */
      //! @endcond
 -} gmx_reverse_top_t;
 +};
  
  /*! \brief Returns the number of atom entries for il in gmx_reverse_top_t */
  static int nral_rt(int ftype)
@@@ -160,7 -158,7 +160,7 @@@ static gmx_bool dd_check_ftype(int ftyp
  }
  
  /*! \brief Print a header on error messages */
 -static void print_error_header(FILE *fplog, char *moltypename, int nprint)
 +static void print_error_header(FILE *fplog, const char *moltypename, int nprint)
  {
      fprintf(fplog, "\nMolecule type '%s'\n", moltypename);
      fprintf(stderr, "\nMolecule type '%s'\n", moltypename);
  
  /*! \brief Help print error output when interactions are missing */
  static void print_missing_interactions_mb(FILE *fplog, t_commrec *cr,
 -                                          gmx_reverse_top_t *rt,
 -                                          char *moltypename,
 -                                          reverse_ilist_t *ril,
 +                                          const gmx_reverse_top_t *rt,
 +                                          const char *moltypename,
 +                                          const reverse_ilist_t *ril,
                                            int a_start, int a_end,
                                            int nat_mol, int nmol,
 -                                          t_idef *idef)
 +                                          const t_idef *idef)
  {
 -    int      nril_mol, *assigned, *gatindex;
 -    int      ftype, ftype_j, nral, i, j_mol, j, a0, a0_mol, mol, a;
 -    int      nprint;
 -    t_ilist *il;
 -    t_iatom *ia;
 -    gmx_bool bFound;
 -
 -    nril_mol = ril->index[nat_mol];
 +    int *assigned;
 +    int  nril_mol = ril->index[nat_mol];
      snew(assigned, nmol*nril_mol);
  
 -    gatindex = cr->dd->gatindex;
 -    for (ftype = 0; ftype < F_NRE; ftype++)
 +    int *gatindex = cr->dd->gatindex;
 +    for (int ftype = 0; ftype < F_NRE; ftype++)
      {
          if (dd_check_ftype(ftype, rt->bBCheck, rt->bConstr, rt->bSettle))
          {
 -            nral = NRAL(ftype);
 -            il   = &idef->il[ftype];
 -            ia   = il->iatoms;
 -            for (i = 0; i < il->nr; i += 1+nral)
 +            int            nral = NRAL(ftype);
 +            const t_ilist *il   = &idef->il[ftype];
 +            const t_iatom *ia   = il->iatoms;
 +            for (int i = 0; i < il->nr; i += 1+nral)
              {
 -                a0     = gatindex[ia[1]];
 +                int a0 = gatindex[ia[1]];
                  /* Check if this interaction is in
                   * the currently checked molblock.
                   */
                  if (a0 >= a_start && a0 < a_end)
                  {
 -                    mol    = (a0 - a_start)/nat_mol;
 -                    a0_mol = (a0 - a_start) - mol*nat_mol;
 -                    j_mol  = ril->index[a0_mol];
 -                    bFound = FALSE;
 -                    while (j_mol < ril->index[a0_mol+1] && !bFound)
 +                    int  mol    = (a0 - a_start)/nat_mol;
 +                    int  a0_mol = (a0 - a_start) - mol*nat_mol;
 +                    int  j_mol  = ril->index[a0_mol];
 +                    bool found  = false;
 +                    while (j_mol < ril->index[a0_mol+1] && !found)
                      {
 -                        j       = mol*nril_mol + j_mol;
 -                        ftype_j = ril->il[j_mol];
 +                        int j       = mol*nril_mol + j_mol;
 +                        int ftype_j = ril->il[j_mol];
                          /* Here we need to check if this interaction has
                           * not already been assigned, since we could have
                           * multiply defined interactions.
                              assigned[j] == 0)
                          {
                              /* Check the atoms */
 -                            bFound = TRUE;
 -                            for (a = 0; a < nral; a++)
 +                            found = true;
 +                            for (int a = 0; a < nral; a++)
                              {
                                  if (gatindex[ia[1+a]] !=
                                      a_start + mol*nat_mol + ril->il[j_mol+2+a])
                                  {
 -                                    bFound = FALSE;
 +                                    found = false;
                                  }
                              }
 -                            if (bFound)
 +                            if (found)
                              {
                                  assigned[j] = 1;
                              }
                          }
                          j_mol += 2 + nral_rt(ftype_j);
                      }
 -                    if (!bFound)
 +                    if (!found)
                      {
                          gmx_incons("Some interactions seem to be assigned multiple times");
                      }
  
      gmx_sumi(nmol*nril_mol, assigned, cr);
  
 -    nprint = 10;
 -    i      = 0;
 -    for (mol = 0; mol < nmol; mol++)
 +    int nprint = 10;
 +    int i      = 0;
 +    for (int mol = 0; mol < nmol; mol++)
      {
 -        j_mol = 0;
 +        int j_mol = 0;
          while (j_mol < nril_mol)
          {
 -            ftype = ril->il[j_mol];
 -            nral  = NRAL(ftype);
 -            j     = mol*nril_mol + j_mol;
 +            int ftype = ril->il[j_mol];
 +            int nral  = NRAL(ftype);
 +            int j     = mol*nril_mol + j_mol;
              if (assigned[j] == 0 &&
                  !(interaction_function[ftype].flags & IF_VSITE))
              {
                              interaction_function[ftype].longname);
                      fprintf(stderr, "%20s atoms",
                              interaction_function[ftype].longname);
 +                    int a;
                      for (a = 0; a < nral; a++)
                      {
                          fprintf(fplog, "%5d", ril->il[j_mol+2+a]+1);
  
  /*! \brief Help print error output when interactions are missing */
  static void print_missing_interactions_atoms(FILE *fplog, t_commrec *cr,
 -                                             gmx_mtop_t *mtop, t_idef *idef)
 +                                             const gmx_mtop_t *mtop,
 +                                             const t_idef *idef)
  {
 -    int                mb, a_start, a_end;
 -    gmx_molblock_t    *molb;
 -    gmx_reverse_top_t *rt;
 +    int                      mb, a_start, a_end;
 +    const gmx_molblock_t    *molb;
 +    const gmx_reverse_top_t *rt;
  
      rt = cr->dd->reverse_top;
  
      }
  }
  
 -void dd_print_missing_interactions(FILE *fplog, t_commrec *cr, int local_count,  gmx_mtop_t *top_global, t_state *state_local)
 +void dd_print_missing_interactions(FILE *fplog, t_commrec *cr,
 +                                   int local_count,
 +                                   const gmx_mtop_t *top_global,
 +                                   const gmx_localtop_t *top_local,
 +                                   t_state *state_local)
  {
      int             ndiff_tot, cl[F_NRE], n, ndiff, rest_global, rest_local;
      int             ftype, nral;
      char            buf[STRLEN];
      gmx_domdec_t   *dd;
 -    gmx_mtop_t     *err_top_global;
 -    gmx_localtop_t *err_top_local;
  
      dd = cr->dd;
  
 -    err_top_global = dd->reverse_top->err_top_global;
 -    err_top_local  = dd->reverse_top->err_top_local;
 -
      if (fplog)
      {
          fprintf(fplog, "\nNot all bonded interactions have been properly assigned to the domain decomposition cells\n");
      for (ftype = 0; ftype < F_NRE; ftype++)
      {
          nral      = NRAL(ftype);
 -        cl[ftype] = err_top_local->idef.il[ftype].nr/(1+nral);
 +        cl[ftype] = top_local->idef.il[ftype].nr/(1+nral);
      }
  
      gmx_sumi(F_NRE, cl, cr);
                  || (dd->reverse_top->bConstr && ftype == F_CONSTR)
                  || (dd->reverse_top->bSettle && ftype == F_SETTLE))
              {
 -                n    = gmx_mtop_ftype_count(err_top_global, ftype);
 +                n    = gmx_mtop_ftype_count(top_global, ftype);
                  if (ftype == F_CONSTR)
                  {
 -                    n += gmx_mtop_ftype_count(err_top_global, F_CONSTRNC);
 +                    n += gmx_mtop_ftype_count(top_global, F_CONSTRNC);
                  }
                  ndiff = cl[ftype] - n;
                  if (ndiff != 0)
          }
      }
  
 -    print_missing_interactions_atoms(fplog, cr, err_top_global,
 -                                     &err_top_local->idef);
 +    print_missing_interactions_atoms(fplog, cr, top_global, &top_local->idef);
      write_dd_pdb("dd_dump_err", 0, "dump", top_global, cr,
                   -1, state_local->x, state_local->box);
 -    if (DDMASTER(dd))
 +
 +    std::string errorMessage;
 +
 +    if (ndiff_tot > 0)
      {
 -        if (ndiff_tot > 0)
 -        {
 -            gmx_incons("One or more interactions were multiple assigned in the domain decompostion");
 -        }
 -        else
 -        {
 -            gmx_fatal(FARGS, "%d of the %d bonded interactions could not be calculated because some atoms involved moved further apart than the multi-body cut-off distance (%g nm) or the two-body cut-off distance (%g nm), see option -rdd, for pairs and tabulated bonds also see option -ddcheck", -ndiff_tot, cr->dd->nbonded_global, dd_cutoff_multibody(cr->dd), dd_cutoff_twobody(cr->dd));
 -        }
 +        errorMessage = "One or more interactions were assigned to multiple domains of the domain decompostion. Please report this bug.";
 +    }
 +    else
 +    {
 +        errorMessage = gmx::formatString("%d of the %d bonded interactions could not be calculated because some atoms involved moved further apart than the multi-body cut-off distance (%g nm) or the two-body cut-off distance (%g nm), see option -rdd, for pairs and tabulated bonds also see option -ddcheck", -ndiff_tot, cr->dd->nbonded_global, dd_cutoff_multibody(dd), dd_cutoff_twobody(dd));
      }
 +    gmx_fatal_collective(FARGS, cr->mpi_comm_mygroup, MASTER(cr), errorMessage.c_str());
  }
  
  /*! \brief Return global topology molecule information for global atom index \p i_gl */
@@@ -504,8 -507,9 +504,8 @@@ static void count_excls(const t_block *
  }
  
  /*! \brief Run the reverse ilist generation and store it when \p bAssign = TRUE */
 -static int low_make_reverse_ilist(const t_ilist *il_mt,
 -                                  const t_atom *atom,
 -                                  int **vsite_pbc, /* should be const */
 +static int low_make_reverse_ilist(const t_ilist *il_mt, const t_atom *atom,
 +                                  const int * const * vsite_pbc,
                                    int *count,
                                    gmx_bool bConstr, gmx_bool bSettle,
                                    gmx_bool bBCheck,
  {
      int            ftype, nral, i, j, nlink, link;
      const t_ilist *il;
 -    t_iatom       *ia;
 -    atom_id        a;
 +    const t_iatom *ia;
 +    int            a;
      int            nint;
      gmx_bool       bVSite;
  
  /*! \brief Make the reverse ilist: a list of bonded interactions linked to atoms */
  static int make_reverse_ilist(const t_ilist *ilist,
                                const t_atoms *atoms,
 -                              int **vsite_pbc, /* should be const (C issue) */
 +                              const int * const * vsite_pbc,
                                gmx_bool bConstr, gmx_bool bSettle,
                                gmx_bool bBCheck,
                                gmx_bool bLinkToAllAtoms,
@@@ -655,8 -659,8 +655,8 @@@ static void destroy_reverse_ilist(rever
  }
  
  /*! \brief Generate the reverse topology */
 -static gmx_reverse_top_t *make_reverse_top(gmx_mtop_t *mtop, gmx_bool bFE,
 -                                           int ***vsite_pbc_molt,
 +static gmx_reverse_top_t *make_reverse_top(const gmx_mtop_t *mtop, gmx_bool bFE,
 +                                           const int * const * const * vsite_pbc_molt,
                                             gmx_bool bConstr, gmx_bool bSettle,
                                             gmx_bool bBCheck, int *nint)
  {
  }
  
  void dd_make_reverse_top(FILE *fplog,
 -                         gmx_domdec_t *dd, gmx_mtop_t *mtop,
 -                         gmx_vsite_t *vsite,
 -                         t_inputrec *ir, gmx_bool bBCheck)
 +                         gmx_domdec_t *dd, const gmx_mtop_t *mtop,
 +                         const gmx_vsite_t *vsite,
 +                         const t_inputrec *ir, gmx_bool bBCheck)
  {
      if (fplog)
      {
       * excluded pair should appear exactly once.
       */
      rt->bExclRequired = (ir->cutoff_scheme == ecutsGROUP &&
 -                         IR_EXCL_FORCES(*ir));
 +                         inputrecExclForces(ir));
  
      int nexcl, mb;
  
   * atom-indexing organization code with the ifunc-adding code, so that
   * they can see that nral is the same value. */
  static gmx_inline void
 -add_ifunc_for_vsites(t_iatom *tiatoms, gmx_ga2la_t ga2la,
 +add_ifunc_for_vsites(t_iatom *tiatoms, gmx_ga2la_t *ga2la,
                       int nral, gmx_bool bHomeA,
                       int a, int a_gl, int a_mol,
                       const t_iatom *iatoms,
@@@ -1022,7 -1026,7 +1022,7 @@@ static void add_fbposres(int mol, int a
  }
  
  /*! \brief Store a virtual site interaction, complex because of PBC and recursion */
 -static void add_vsite(gmx_ga2la_t ga2la, const int *index, const int *rtil,
 +static void add_vsite(gmx_ga2la_t *ga2la, const int *index, const int *rtil,
                        int ftype, int nral,
                        gmx_bool bHomeA, int a, int a_gl, int a_mol,
                        const t_iatom *iatoms,
@@@ -1657,11 -1661,11 +1657,11 @@@ static int make_exclusions_zone_cg(gmx_
                                     int iz,
                                     int cg_start, int cg_end)
  {
 -    int             n_excl_at_max, n, count, jla0, jla1, jla;
 -    int             cg, la0, la1, la, a_gl, mb, mt, mol, a_mol, j, aj_mol;
 -    const t_blocka *excls;
 -    gmx_ga2la_t     ga2la;
 -    int             cell;
 +    int               n_excl_at_max, n, count, jla0, jla1, jla;
 +    int               cg, la0, la1, la, a_gl, mb, mt, mol, a_mol, j, aj_mol;
 +    const t_blocka   *excls;
 +    gmx_ga2la_t      *ga2la;
 +    int               cell;
  
      ga2la = dd->ga2la;
  
@@@ -1794,9 -1798,9 +1794,9 @@@ static void make_exclusions_zone(gmx_do
                                   int iz,
                                   int at_start, int at_end)
  {
 -    gmx_ga2la_t ga2la;
 -    int         jla0, jla1;
 -    int         n_excl_at_max, n, at;
 +    gmx_ga2la_t *ga2la;
 +    int          jla0, jla1;
 +    int          n_excl_at_max, n, at;
  
      ga2la = dd->ga2la;
  
@@@ -1992,92 -1996,88 +1992,92 @@@ static int make_local_bondeds_excls(gmx
  #pragma omp parallel for num_threads(rt->nthread) schedule(static)
          for (thread = 0; thread < rt->nthread; thread++)
          {
 -            int       cg0t, cg1t;
 -            t_idef   *idef_t;
 -            int     **vsite_pbc;
 -            int      *vsite_pbc_nalloc;
 -            t_blocka *excl_t;
 -
 -            cg0t = cg0 + ((cg1 - cg0)* thread   )/rt->nthread;
 -            cg1t = cg0 + ((cg1 - cg0)*(thread+1))/rt->nthread;
 -
 -            if (thread == 0)
 +            try
              {
 -                idef_t = idef;
 -            }
 -            else
 -            {
 -                idef_t = &rt->th_work[thread].idef;
 -                clear_idef(idef_t);
 -            }
 +                int       cg0t, cg1t;
 +                t_idef   *idef_t;
 +                int     **vsite_pbc;
 +                int      *vsite_pbc_nalloc;
 +                t_blocka *excl_t;
 +
 +                cg0t = cg0 + ((cg1 - cg0)* thread   )/rt->nthread;
 +                cg1t = cg0 + ((cg1 - cg0)*(thread+1))/rt->nthread;
  
 -            if (vsite && vsite->bHaveChargeGroups && vsite->n_intercg_vsite > 0)
 -            {
                  if (thread == 0)
                  {
 -                    vsite_pbc        = vsite->vsite_pbc_loc;
 -                    vsite_pbc_nalloc = vsite->vsite_pbc_loc_nalloc;
 +                    idef_t = idef;
                  }
                  else
                  {
 -                    vsite_pbc        = rt->th_work[thread].vsite_pbc;
 -                    vsite_pbc_nalloc = rt->th_work[thread].vsite_pbc_nalloc;
 +                    idef_t = &rt->th_work[thread].idef;
 +                    clear_idef(idef_t);
                  }
 -            }
 -            else
 -            {
 -                vsite_pbc        = NULL;
 -                vsite_pbc_nalloc = NULL;
 -            }
  
 -            rt->th_work[thread].nbonded =
 -                make_bondeds_zone(dd, zones,
 -                                  mtop->molblock,
 -                                  bRCheckMB, rcheck, bRCheck2B, rc2,
 -                                  la2lc, pbc_null, cg_cm, idef->iparams,
 -                                  idef_t,
 -                                  vsite_pbc, vsite_pbc_nalloc,
 -                                  izone,
 -                                  dd->cgindex[cg0t], dd->cgindex[cg1t]);
 -
 -            if (izone < nzone_excl)
 -            {
 -                if (thread == 0)
 +                if (vsite && vsite->bHaveChargeGroups && vsite->n_intercg_vsite > 0)
                  {
 -                    excl_t = lexcls;
 +                    if (thread == 0)
 +                    {
 +                        vsite_pbc        = vsite->vsite_pbc_loc;
 +                        vsite_pbc_nalloc = vsite->vsite_pbc_loc_nalloc;
 +                    }
 +                    else
 +                    {
 +                        vsite_pbc        = rt->th_work[thread].vsite_pbc;
 +                        vsite_pbc_nalloc = rt->th_work[thread].vsite_pbc_nalloc;
 +                    }
                  }
                  else
                  {
 -                    excl_t      = &rt->th_work[thread].excl;
 -                    excl_t->nr  = 0;
 -                    excl_t->nra = 0;
 +                    vsite_pbc        = NULL;
 +                    vsite_pbc_nalloc = NULL;
                  }
  
 -                if (dd->cgindex[dd->ncg_tot] == dd->ncg_tot &&
 -                    !rt->bExclRequired)
 -                {
 -                    /* No charge groups and no distance check required */
 -                    make_exclusions_zone(dd, zones,
 -                                         mtop->moltype, cginfo,
 -                                         excl_t,
 -                                         izone,
 -                                         cg0t, cg1t);
 -                }
 -                else
 +                rt->th_work[thread].nbonded =
 +                    make_bondeds_zone(dd, zones,
 +                                      mtop->molblock,
 +                                      bRCheckMB, rcheck, bRCheck2B, rc2,
 +                                      la2lc, pbc_null, cg_cm, idef->iparams,
 +                                      idef_t,
 +                                      vsite_pbc, vsite_pbc_nalloc,
 +                                      izone,
 +                                      dd->cgindex[cg0t], dd->cgindex[cg1t]);
 +
 +                if (izone < nzone_excl)
                  {
 -                    rt->th_work[thread].excl_count =
 -                        make_exclusions_zone_cg(dd, zones,
 -                                                mtop->moltype, bRCheck2B, rc2,
 -                                                la2lc, pbc_null, cg_cm, cginfo,
 -                                                excl_t,
 -                                                izone,
 -                                                cg0t, cg1t);
 +                    if (thread == 0)
 +                    {
 +                        excl_t = lexcls;
 +                    }
 +                    else
 +                    {
 +                        excl_t      = &rt->th_work[thread].excl;
 +                        excl_t->nr  = 0;
 +                        excl_t->nra = 0;
 +                    }
 +
 +                    if (dd->cgindex[dd->ncg_tot] == dd->ncg_tot &&
 +                        !rt->bExclRequired)
 +                    {
 +                        /* No charge groups and no distance check required */
 +                        make_exclusions_zone(dd, zones,
 +                                             mtop->moltype, cginfo,
 +                                             excl_t,
 +                                             izone,
 +                                             cg0t, cg1t);
 +                    }
 +                    else
 +                    {
 +                        rt->th_work[thread].excl_count =
 +                            make_exclusions_zone_cg(dd, zones,
 +                                                    mtop->moltype, bRCheck2B, rc2,
 +                                                    la2lc, pbc_null, cg_cm, cginfo,
 +                                                    excl_t,
 +                                                    izone,
 +                                                    cg0t, cg1t);
 +                    }
                  }
              }
 +            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
  
          if (rt->nthread > 1)
@@@ -2134,7 -2134,7 +2134,7 @@@ void dd_make_local_top(gmx_domdec_t *dd
                         t_forcerec *fr,
                         rvec *cgcm_or_x,
                         gmx_vsite_t *vsite,
 -                       gmx_mtop_t *mtop, gmx_localtop_t *ltop)
 +                       const gmx_mtop_t *mtop, gmx_localtop_t *ltop)
  {
      gmx_bool bRCheckMB, bRCheck2B;
      real     rc = -1;
              make_la2lc(dd);
              if (fr->bMolPBC)
              {
 -                set_pbc_dd(&pbc, fr->ePBC, dd, TRUE, box);
 -                pbc_null = &pbc;
 +                pbc_null = set_pbc_dd(&pbc, fr->ePBC, dd->nc, TRUE, box);
              }
              else
              {
      if (dd->reverse_top->bExclRequired)
      {
          dd->nbonded_local += nexcl;
-         forcerec_set_excl_load(fr, ltop);
      }
  
      ltop->atomtypes  = mtop->atomtypes;
 -
 -    /* For an error message only */
 -    dd->reverse_top->err_top_global = mtop;
 -    dd->reverse_top->err_top_local  = ltop;
  }
  
 -void dd_sort_local_top(gmx_domdec_t *dd, t_mdatoms *mdatoms,
 +void dd_sort_local_top(gmx_domdec_t *dd, const t_mdatoms *mdatoms,
                         gmx_localtop_t *ltop)
  {
      if (dd->reverse_top->ilsort == ilsortNO_FE)
      }
  }
  
 -gmx_localtop_t *dd_init_local_top(gmx_mtop_t *top_global)
 +gmx_localtop_t *dd_init_local_top(const gmx_mtop_t *top_global)
  {
      gmx_localtop_t *top;
      int             i;
@@@ -2329,7 -2332,7 +2327,7 @@@ static int *make_at2cg(t_block *cgs
      return at2cg;
  }
  
 -t_blocka *make_charge_group_links(gmx_mtop_t *mtop, gmx_domdec_t *dd,
 +t_blocka *make_charge_group_links(const gmx_mtop_t *mtop, gmx_domdec_t *dd,
                                    cginfo_mb_t *cginfo_mb)
  {
      gmx_bool            bExclRequired;
@@@ -2594,7 -2597,7 +2592,7 @@@ static void bonded_cg_distance_mol(gmx_
  /*! \brief Set the distance, function type and atom indices for the longest atom distance involved in intermolecular interactions for two-body and multi-body bonded interactions */
  static void bonded_distance_intermol(const t_ilist *ilists_intermol,
                                       gmx_bool bBCheck,
 -                                     rvec *x, int ePBC, matrix box,
 +                                     const rvec *x, int ePBC, matrix box,
                                       bonded_distance_t *bd_2b,
                                       bonded_distance_t *bd_mb)
  {
  }
  
  //! Compute charge group centers of mass for molecule \p molt
 -static void get_cgcm_mol(gmx_moltype_t *molt, gmx_ffparams_t *ffparams,
 +static void get_cgcm_mol(const gmx_moltype_t *molt,
 +                         const gmx_ffparams_t *ffparams,
                           int ePBC, t_graph *graph, matrix box,
 -                         gmx_vsite_t *vsite,
 -                         rvec *x, rvec *xs, rvec *cg_cm)
 +                         const gmx_vsite_t *vsite,
 +                         const rvec *x, rvec *xs, rvec *cg_cm)
  {
      int n, i;
  
@@@ -2703,9 -2705,8 +2701,9 @@@ static int have_vsite_molt(gmx_moltype_
  }
  
  void dd_bonded_cg_distance(FILE *fplog,
 -                           gmx_mtop_t *mtop,
 -                           t_inputrec *ir, rvec *x, matrix box,
 +                           const gmx_mtop_t *mtop,
 +                           const t_inputrec *ir,
 +                           const rvec *x, matrix box,
                             gmx_bool bBCheck,
                             real *r_2b, real *r_mb)
  {
      bonded_distance_t  bd_2b = { 0, -1, -1, -1 };
      bonded_distance_t  bd_mb = { 0, -1, -1, -1 };
  
 -    bExclRequired = IR_EXCL_FORCES(*ir);
 +    bExclRequired = inputrecExclForces(ir);
  
      vsite = init_vsite(mtop, NULL, TRUE);
  
index 7f54c1f9e5a63c1bb886d6aaf1e3bae8fff55910,f31c3f4fc10772ceac8ef46a7c1fc497bbb749ed..ae5ff5ed23c2116d688420cff6852c0a1bbaab58
@@@ -3,7 -3,7 +3,7 @@@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
-  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
+  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
  
  #include "long-range-correction.h"
  
 -#include <math.h>
 +#include <cmath>
  
 -#include "gromacs/legacyheaders/macros.h"
 -#include "gromacs/legacyheaders/names.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
 -#include "gromacs/legacyheaders/types/forcerec.h"
 +#include "gromacs/math/functions.h"
  #include "gromacs/math/units.h"
  #include "gromacs/math/utilities.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/forcerec.h"
 +#include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/utility/fatalerror.h"
+ #include "gromacs/utility/gmxassert.h"
  
  /* There's nothing special to do here if just masses are perturbed,
   * but if either charge or type is perturbed then the implementation
   * perturbations. The parameter vectors for LJ-PME are likewise
   * undefined when LJ-PME is not active. This works because
   * bHaveChargeOrTypePerturbed handles the control flow. */
- void ewald_LRcorrection(int start, int end,
-                         t_commrec *cr, int thread, t_forcerec *fr,
+ void ewald_LRcorrection(int numAtomsLocal,
+                         t_commrec *cr,
+                         int numThreads, int thread,
+                         t_forcerec *fr,
                          real *chargeA, real *chargeB,
                          real *C6A, real *C6B,
                          real *sigmaA, real *sigmaB,
                          real lambda_q, real lambda_lj,
                          real *dvdlambda_q, real *dvdlambda_lj)
  {
+     int numAtomsToBeCorrected;
+     if (calc_excl_corr)
+     {
+         /* We need to correct all exclusion pairs (cutoff-scheme = group) */
+         numAtomsToBeCorrected = excl->nr;
+         GMX_RELEASE_ASSERT(numAtomsToBeCorrected >= numAtomsLocal, "We might need to do self-corrections");
+     }
+     else
+     {
+         /* We need to correct only self interactions */
+         numAtomsToBeCorrected = numAtomsLocal;
+     }
+     int         start =  (numAtomsToBeCorrected* thread     )/numThreads;
+     int         end   =  (numAtomsToBeCorrected*(thread + 1))/numThreads;
      int         i, i1, i2, j, k, m, iv, jv, q;
 -    atom_id    *AA;
 +    int        *AA;
      double      Vexcl_q, dvdl_excl_q, dvdl_excl_lj; /* Necessary for precision */
      double      Vexcl_lj;
      real        one_4pi_eps;
      vr0_q         = ewc_q*M_2_SQRTPI;
      if (EVDW_PME(fr->vdwtype))
      {
 -        vr0_lj    = -pow(ewc_lj, 6)/6.0;
 +        vr0_lj    = -gmx::power6(ewc_lj)/6.0;
      }
  
      AA           = excl->a;
                              c6A  = c6Ai * C6A[k];
                              if (bDoingLBRule)
                              {
 -                                c6A *= pow(0.5*(sigmaA[i]+sigmaA[k]), 6)*sigma3A[k];
 +                                c6A *= gmx::power6(0.5*(sigmaA[i]+sigmaA[k]))*sigma3A[k];
                              }
                          }
                          if (qqA != 0.0 || c6A != 0.0)
                               */
                              if (dr2 != 0)
                              {
 -                                rinv              = gmx_invsqrt(dr2);
 +                                rinv              = gmx::invsqrt(dr2);
                                  rinv2             = rinv*rinv;
                                  if (qqA != 0.0)
                                  {
  
                                      dr       = 1.0/rinv;
                                      ewcdr    = ewc_q*dr;
 -                                    vc       = qqA*gmx_erf(ewcdr)*rinv;
 +                                    vc       = qqA*std::erf(ewcdr)*rinv;
                                      Vexcl_q += vc;
 -#ifdef GMX_DOUBLE
 +#if GMX_DOUBLE
                                      /* Relative accuracy at R_ERF_R_INACC of 3e-10 */
  #define       R_ERF_R_INACC 0.006
  #else
                  }
              }
              /* Dipole correction on force */
-             if (dipole_coeff != 0)
+             if (dipole_coeff != 0 && i < numAtomsLocal)
              {
                  for (j = 0; (j < DIM); j++)
                  {
                              c6B = c6Bi*C6B[k];
                              if (bDoingLBRule)
                              {
 -                                c6A *= pow(0.5*(sigmaA[i]+sigmaA[k]), 6)*sigma3A[k];
 -                                c6B *= pow(0.5*(sigmaB[i]+sigmaB[k]), 6)*sigma3B[k];
 +                                c6A *= gmx::power6(0.5*(sigmaA[i]+sigmaA[k]))*sigma3A[k];
 +                                c6B *= gmx::power6(0.5*(sigmaB[i]+sigmaB[k]))*sigma3B[k];
                              }
                          }
                          if (qqA != 0.0 || qqB != 0.0 || c6A != 0.0 || c6B != 0.0)
                              dr2 = norm2(dx);
                              if (dr2 != 0)
                              {
 -                                rinv    = gmx_invsqrt(dr2);
 +                                rinv    = gmx::invsqrt(dr2);
                                  rinv2   = rinv*rinv;
                                  if (qqA != 0.0 || qqB != 0.0)
                                  {
                                      real dr;
  
                                      dr           = 1.0/rinv;
 -                                    v            = gmx_erf(ewc_q*dr)*rinv;
 +                                    v            = std::erf(ewc_q*dr)*rinv;
                                      vc           = qqL*v;
                                      Vexcl_q     += vc;
                                      /* fscal is the scalar force pre-multiplied by rinv,
                  }
              }
              /* Dipole correction on force */
-             if (dipole_coeff != 0)
+             if (dipole_coeff != 0 && i < numAtomsLocal)
              {
                  for (j = 0; (j < DIM); j++)
                  {
      if (debug)
      {
          fprintf(debug, "Long Range corrections for Ewald interactions:\n");
-         fprintf(debug, "start=%d,natoms=%d\n", start, end-start);
          fprintf(debug, "q2sum = %g, Vself_q=%g c6sum = %g, Vself_lj=%g\n",
                  L1_q*fr->q2sum[0]+lambda_q*fr->q2sum[1], L1_q*Vself_q[0]+lambda_q*Vself_q[1], L1_lj*fr->c6sum[0]+lambda_lj*fr->c6sum[1], L1_lj*Vself_lj[0]+lambda_lj*Vself_lj[1]);
          fprintf(debug, "Electrostatic Long Range correction: Vexcl=%g\n", Vexcl_q);
index fe076ad81a12eadbd527689576afc0d1374225d6,4fdc7825e674628835d26c8650c93c055c61884f..37ca4fb4d0422eda75f69b14f09d6eb74a5fc4f9
@@@ -3,7 -3,7 +3,7 @@@
   *
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team.
-  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
+  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@@ -50,9 -50,9 +50,9 @@@
  #ifndef GMX_EWALD_LONG_RANGE_CORRECTION_H
  #define GMX_EWALD_LONG_RANGE_CORRECTION_H
  
 -#include "gromacs/legacyheaders/types/commrec.h"
 -#include "gromacs/legacyheaders/types/forcerec.h"
  #include "gromacs/math/vectypes.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/forcerec.h"
  #include "gromacs/topology/block.h"
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/real.h"
   * For both cutoff schemes, but only for Coulomb interactions,
   * calculates correction for surface dipole terms. */
  void
- ewald_LRcorrection(int start, int end,
-                    t_commrec *cr, int thread, t_forcerec *fr,
+ ewald_LRcorrection(int numAtomsLocal,
+                    t_commrec *cr,
+                    int numThreads, int thread,
+                    t_forcerec *fr,
                     real *chargeA, real *chargeB,
                     real *C6A, real *C6B,
                     real *sigmaA, real *sigmaB,
index 107e00a8e7b3393ec0eaf42a07f584d29c6ea4eb,c6d18a862ee109a32130b14b50c08caa66b9dcee..90c4b02f5772714ec0be982d49ea8a27a1b365b1
@@@ -36,7 -36,7 +36,7 @@@
   */
  #include "gmxpre.h"
  
 -#include "gromacs/legacyheaders/force.h"
 +#include "force.h"
  
  #include "config.h"
  
  #include <string.h>
  
  #include "gromacs/domdec/domdec.h"
 +#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/ewald/ewald.h"
  #include "gromacs/ewald/long-range-correction.h"
  #include "gromacs/ewald/pme.h"
 -#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 -#include "gromacs/legacyheaders/macros.h"
 -#include "gromacs/legacyheaders/mdrun.h"
 -#include "gromacs/legacyheaders/names.h"
 -#include "gromacs/legacyheaders/network.h"
 -#include "gromacs/legacyheaders/nonbonded.h"
 -#include "gromacs/legacyheaders/nrnb.h"
 -#include "gromacs/legacyheaders/ns.h"
 -#include "gromacs/legacyheaders/qmmm.h"
 -#include "gromacs/legacyheaders/txtdump.h"
 -#include "gromacs/legacyheaders/typedefs.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/gmxlib/network.h"
 +#include "gromacs/gmxlib/nrnb.h"
 +#include "gromacs/gmxlib/nonbonded/nonbonded.h"
  #include "gromacs/listed-forces/listed-forces.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/math/vecdump.h"
  #include "gromacs/mdlib/forcerec-threading.h"
 +#include "gromacs/mdlib/genborn.h"
 +#include "gromacs/mdlib/mdrun.h"
 +#include "gromacs/mdlib/ns.h"
 +#include "gromacs/mdlib/qmmm.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/timing/wallcycle.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  
@@@ -80,18 -78,24 +80,18 @@@ void ns(FILE              *fp
          t_mdatoms         *md,
          t_commrec         *cr,
          t_nrnb            *nrnb,
 -        gmx_bool           bFillGrid,
 -        gmx_bool           bDoLongRangeNS)
 +        gmx_bool           bFillGrid)
  {
      int     nsearch;
  
  
 -    if (!fr->ns.nblist_initialized)
 +    if (!fr->ns->nblist_initialized)
      {
          init_neighbor_list(fp, fr, md->homenr);
      }
  
 -    if (fr->bTwinRange)
 -    {
 -        fr->nlr = 0;
 -    }
 -
      nsearch = search_neighbours(fp, fr, box, top, groups, cr, nrnb, md,
 -                                bFillGrid, bDoLongRangeNS);
 +                                bFillGrid);
      if (debug)
      {
          fprintf(debug, "nsearch = %d\n", nsearch);
         count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr,
         &(top->idef),opts->ngener);
       */
 -    if (fr->ns.dump_nl > 0)
 +    if (fr->ns->dump_nl > 0)
      {
 -        dump_nblist(fp, cr, fr, fr->ns.dump_nl);
 +        dump_nblist(fp, cr, fr, fr->ns->dump_nl);
      }
  }
  
 -static void reduce_thread_forces(int n, rvec *f,
 -                                 tensor vir_q, tensor vir_lj,
 -                                 real *Vcorr_q, real *Vcorr_lj,
 -                                 real *dvdl_q, real *dvdl_lj,
 -                                 int nthreads, f_thread_t *f_t)
 +static void reduce_thread_energies(tensor vir_q, tensor vir_lj,
 +                                   real *Vcorr_q, real *Vcorr_lj,
 +                                   real *dvdl_q, real *dvdl_lj,
 +                                   int nthreads,
 +                                   ewald_corr_thread_t *ewc_t)
  {
 -    int t, i;
 -    int nthreads_loop gmx_unused;
 +    int t;
  
 -    // cppcheck-suppress unreadVariable
 -    nthreads_loop = gmx_omp_nthreads_get(emntBonded);
 -    /* This reduction can run over any number of threads */
 -#pragma omp parallel for num_threads(nthreads_loop) private(t) schedule(static)
 -    for (i = 0; i < n; i++)
 -    {
 -        for (t = 1; t < nthreads; t++)
 -        {
 -            rvec_inc(f[i], f_t[t].f[i]);
 -        }
 -    }
      for (t = 1; t < nthreads; t++)
      {
 -        *Vcorr_q  += f_t[t].Vcorr_q;
 -        *Vcorr_lj += f_t[t].Vcorr_lj;
 -        *dvdl_q   += f_t[t].dvdl[efptCOUL];
 -        *dvdl_lj  += f_t[t].dvdl[efptVDW];
 -        m_add(vir_q, f_t[t].vir_q, vir_q);
 -        m_add(vir_lj, f_t[t].vir_lj, vir_lj);
 +        *Vcorr_q  += ewc_t[t].Vcorr_q;
 +        *Vcorr_lj += ewc_t[t].Vcorr_lj;
 +        *dvdl_q   += ewc_t[t].dvdl[efptCOUL];
 +        *dvdl_lj  += ewc_t[t].dvdl[efptVDW];
 +        m_add(vir_q, ewc_t[t].vir_q, vir_q);
 +        m_add(vir_lj, ewc_t[t].vir_lj, vir_lj);
      }
  }
  
@@@ -133,6 -149,7 +133,6 @@@ void do_force_lowlevel(t_forcerec *fr
                         t_mdatoms  *md,
                         rvec       x[],      history_t  *hist,
                         rvec       f[],
 -                       rvec       f_longrange[],
                         gmx_enerdata_t *enerd,
                         t_fcdata   *fcd,
                         gmx_localtop_t *top,
      t_pbc       pbc;
      real        dvdl_dum[efptNR], dvdl_nb[efptNR];
  
 -#ifdef GMX_MPI
 +#if GMX_MPI
      double  t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */
  #endif
  
          box_size[i] = box[i][i];
      }
  
 -    debug_gmx();
 -
      /* do QMMM first if requested */
      if (fr->bQMMM)
      {
  
      /* Call the short range functions all in one go. */
  
 -#ifdef GMX_MPI
 +#if GMX_MPI
      /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/
  #define TAKETIME FALSE
      if (TAKETIME)
  
          if (bBornRadii)
          {
 -            calc_gb_rad(cr, fr, ir, top, x, &(fr->gblist), born, md, nrnb);
 +            calc_gb_rad(cr, fr, ir, top, x, fr->gblist, born, md, nrnb);
          }
  
          wallcycle_sub_stop(wcycle, ewcsNONBONDED);
          {
              donb_flags |= GMX_NONBONDED_DO_POTENTIAL;
          }
 -        if (flags & GMX_FORCE_DO_LR)
 -        {
 -            donb_flags |= GMX_NONBONDED_DO_LR;
 -        }
  
          wallcycle_sub_start(wcycle, ewcsNONBONDED);
 -        do_nonbonded(fr, x, f, f_longrange, md, excl,
 +        do_nonbonded(fr, x, f, md, excl,
                       &enerd->grpp, nrnb,
                       lambda, dvdl_nb, -1, -1, donb_flags);
  
                      lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]);
                  }
                  reset_foreign_enerdata(enerd);
 -                do_nonbonded(fr, x, f, f_longrange, md, excl,
 +                do_nonbonded(fr, x, f, md, excl,
                               &(enerd->foreign_grpp), nrnb,
                               lam_i, dvdl_dum, -1, -1,
                               (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA);
          wallcycle_sub_stop(wcycle, ewcsLISTED);
      }
  
 -#ifdef GMX_MPI
 +#if GMX_MPI
      if (TAKETIME)
      {
          t1          = MPI_Wtime();
          enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL];
      }
  
 -    debug_gmx();
 -
 -
      if (debug)
      {
          pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS);
          /* Since all atoms are in the rectangular or triclinic unit-cell,
           * only single box vector shifts (2 in x) are required.
           */
 -        set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box);
 +        set_pbc_dd(&pbc, fr->ePBC, DOMAINDECOMP(cr) ? cr->dd->nc : nullptr,
 +                   TRUE, box);
      }
 -    debug_gmx();
  
      do_force_listed(wcycle, box, ir->fepvals, cr->ms,
                      idef, (const rvec *) x, hist, f, fr,
                      gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions");
                  }
  
 -                nthreads = gmx_omp_nthreads_get(emntBonded);
 +                nthreads = fr->nthread_ewc;
  #pragma omp parallel for num_threads(nthreads) schedule(static)
                  for (t = 0; t < nthreads; t++)
                  {
 -                    int     i;
 -                    rvec   *fnv;
 -                    tensor *vir_q, *vir_lj;
 -                    real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
 -                    if (t == 0)
 +                    try
                      {
 -                        fnv       = fr->f_novirsum;
 -                        vir_q     = &fr->vir_el_recip;
 -                        vir_lj    = &fr->vir_lj_recip;
 -                        Vcorrt_q  = &Vcorr_q;
 -                        Vcorrt_lj = &Vcorr_lj;
 -                        dvdlt_q   = &dvdl_long_range_correction_q;
 -                        dvdlt_lj  = &dvdl_long_range_correction_lj;
 -                    }
 -                    else
 -                    {
 -                        fnv       = fr->f_t[t].f;
 -                        vir_q     = &fr->f_t[t].vir_q;
 -                        vir_lj    = &fr->f_t[t].vir_lj;
 -                        Vcorrt_q  = &fr->f_t[t].Vcorr_q;
 -                        Vcorrt_lj = &fr->f_t[t].Vcorr_lj;
 -                        dvdlt_q   = &fr->f_t[t].dvdl[efptCOUL];
 -                        dvdlt_lj  = &fr->f_t[t].dvdl[efptVDW];
 -                        for (i = 0; i < fr->natoms_force; i++)
 +                        tensor *vir_q, *vir_lj;
 +                        real   *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj;
 +                        if (t == 0)
 +                        {
 +                            vir_q     = &fr->vir_el_recip;
 +                            vir_lj    = &fr->vir_lj_recip;
 +                            Vcorrt_q  = &Vcorr_q;
 +                            Vcorrt_lj = &Vcorr_lj;
 +                            dvdlt_q   = &dvdl_long_range_correction_q;
 +                            dvdlt_lj  = &dvdl_long_range_correction_lj;
 +                        }
 +                        else
                          {
 -                            clear_rvec(fnv[i]);
 +                            vir_q     = &fr->ewc_t[t].vir_q;
 +                            vir_lj    = &fr->ewc_t[t].vir_lj;
 +                            Vcorrt_q  = &fr->ewc_t[t].Vcorr_q;
 +                            Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj;
 +                            dvdlt_q   = &fr->ewc_t[t].dvdl[efptCOUL];
 +                            dvdlt_lj  = &fr->ewc_t[t].dvdl[efptVDW];
 +                            clear_mat(*vir_q);
 +                            clear_mat(*vir_lj);
                          }
 -                        clear_mat(*vir_q);
 -                        clear_mat(*vir_lj);
 +                        *dvdlt_q  = 0;
 +                        *dvdlt_lj = 0;
 +
 +                        /* Threading is only supported with the Verlet cut-off
 +                         * scheme and then only single particle forces (no
 +                         * exclusion forces) are calculated, so we can store
 +                         * the forces in the normal, single fr->f_novirsum array.
 +                         */
-                         ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1],
-                                            cr, t, fr,
++                        ewald_LRcorrection(md->homenr, cr, nthreads, t, fr,
 +                                           md->chargeA, md->chargeB,
 +                                           md->sqrt_c6A, md->sqrt_c6B,
 +                                           md->sigmaA, md->sigmaB,
 +                                           md->sigma3A, md->sigma3B,
 +                                           md->nChargePerturbed || md->nTypePerturbed,
 +                                           ir->cutoff_scheme != ecutsVERLET,
 +                                           excl, x, bSB ? boxs : box, mu_tot,
 +                                           ir->ewald_geometry,
 +                                           ir->epsilon_surface,
 +                                           fr->f_novirsum, *vir_q, *vir_lj,
 +                                           Vcorrt_q, Vcorrt_lj,
 +                                           lambda[efptCOUL], lambda[efptVDW],
 +                                           dvdlt_q, dvdlt_lj);
                      }
 -                    *dvdlt_q  = 0;
 -                    *dvdlt_lj = 0;
 -
 -                    ewald_LRcorrection(md->homenr, cr, nthreads, t, fr,
 -                                       md->chargeA, md->chargeB,
 -                                       md->sqrt_c6A, md->sqrt_c6B,
 -                                       md->sigmaA, md->sigmaB,
 -                                       md->sigma3A, md->sigma3B,
 -                                       md->nChargePerturbed || md->nTypePerturbed,
 -                                       ir->cutoff_scheme != ecutsVERLET,
 -                                       excl, x, bSB ? boxs : box, mu_tot,
 -                                       ir->ewald_geometry,
 -                                       ir->epsilon_surface,
 -                                       fnv, *vir_q, *vir_lj,
 -                                       Vcorrt_q, Vcorrt_lj,
 -                                       lambda[efptCOUL], lambda[efptVDW],
 -                                       dvdlt_q, dvdlt_lj);
 +                    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
                  }
                  if (nthreads > 1)
                  {
 -                    reduce_thread_forces(fr->natoms_force, fr->f_novirsum,
 -                                         fr->vir_el_recip, fr->vir_lj_recip,
 -                                         &Vcorr_q, &Vcorr_lj,
 -                                         &dvdl_long_range_correction_q,
 -                                         &dvdl_long_range_correction_lj,
 -                                         nthreads, fr->f_t);
 +                    reduce_thread_energies(fr->vir_el_recip, fr->vir_lj_recip,
 +                                           &Vcorr_q, &Vcorr_lj,
 +                                           &dvdl_long_range_correction_q,
 +                                           &dvdl_long_range_correction_lj,
 +                                           nthreads, fr->ewc_t);
                  }
                  wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION);
              }
      }
      else
      {
 -        /* Is there a reaction-field exclusion correction needed? */
 -        if (EEL_RF(fr->eeltype) && eelRF_NEC != fr->eeltype)
 +        /* Is there a reaction-field exclusion correction needed?
 +         * With the Verlet scheme, exclusion forces are calculated
 +         * in the non-bonded kernel.
 +         */
 +        if (ir->cutoff_scheme != ecutsVERLET && EEL_RF(fr->eeltype))
          {
 -            /* With the Verlet scheme, exclusion forces are calculated
 -             * in the non-bonded kernel.
 -             */
 -            if (ir->cutoff_scheme != ecutsVERLET)
 -            {
 -                real dvdl_rf_excl      = 0;
 -                enerd->term[F_RF_EXCL] =
 -                    RF_excl_correction(fr, graph, md, excl, x, f,
 -                                       fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
 +            real dvdl_rf_excl      = 0;
 +            enerd->term[F_RF_EXCL] =
 +                RF_excl_correction(fr, graph, md, excl, x, f,
 +                                   fr->fshift, &pbc, lambda[efptCOUL], &dvdl_rf_excl);
  
 -                enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
 -            }
 +            enerd->dvdl_lin[efptCOUL] += dvdl_rf_excl;
          }
      }
      where();
 -    debug_gmx();
  
      if (debug)
      {
          print_nrnb(debug, nrnb);
      }
 -    debug_gmx();
  
 -#ifdef GMX_MPI
 +#if GMX_MPI
      if (TAKETIME)
      {
          t2 = MPI_Wtime();
@@@ -723,6 -753,8 +722,6 @@@ void sum_epot(gmx_grppairener_t *grpp, 
      epot[F_LJ]       = sum_v(grpp->nener, grpp->ener[egLJSR]);
      epot[F_LJ14]     = sum_v(grpp->nener, grpp->ener[egLJ14]);
      epot[F_COUL14]   = sum_v(grpp->nener, grpp->ener[egCOUL14]);
 -    epot[F_COUL_LR]  = sum_v(grpp->nener, grpp->ener[egCOULLR]);
 -    epot[F_LJ_LR]    = sum_v(grpp->nener, grpp->ener[egLJLR]);
      /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */
      epot[F_GBPOL]   += sum_v(grpp->nener, grpp->ener[egGB]);
  
   * and has been added earlier
   */
      epot[F_BHAM]     = sum_v(grpp->nener, grpp->ener[egBHAMSR]);
 -    epot[F_BHAM_LR]  = sum_v(grpp->nener, grpp->ener[egBHAMLR]);
  
      epot[F_EPOT] = 0;
      for (i = 0; (i < F_EPOT); i++)
@@@ -798,6 -831,10 +797,6 @@@ void sum_dhdl(gmx_enerdata_t *enerd, re
       * For the constraints this is not exact, but we have no other option
       * without literally changing the lengths and reevaluating the energies at each step.
       * (try to remedy this post 4.6 - MRS)
 -     * For the non-bonded LR term we assume that the soft-core (if present)
 -     * no longer affects the energy beyond the short-range cut-off,
 -     * which is a very good approximation (except for exotic settings).
 -     * (investigate how to overcome this post 4.6 - MRS)
       */
      if (fepvals->separate_dvdl[efptBONDED])
      {
@@@ -858,16 -895,26 +857,16 @@@ void reset_foreign_enerdata(gmx_enerdat
      }
  }
  
 -void reset_enerdata(t_forcerec *fr, gmx_bool bNS,
 -                    gmx_enerdata_t *enerd,
 -                    gmx_bool bMaster)
 +void reset_enerdata(gmx_enerdata_t *enerd)
  {
 -    gmx_bool bKeepLR;
      int      i, j;
  
 -    /* First reset all energy components, except for the long range terms
 -     * on the master at non neighbor search steps, since the long range
 -     * terms have already been summed at the last neighbor search step.
 -     */
 -    bKeepLR = (fr->bTwinRange && !bNS);
 +    /* First reset all energy components. */
      for (i = 0; (i < egNR); i++)
      {
 -        if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR)))
 +        for (j = 0; (j < enerd->grpp.nener); j++)
          {
 -            for (j = 0; (j < enerd->grpp.nener); j++)
 -            {
 -                enerd->grpp.ener[i][j] = 0.0;
 -            }
 +            enerd->grpp.ener[i][j] = 0.0;
          }
      }
      for (i = 0; i < efptNR; i++)
      {
          enerd->term[i] = 0.0;
      }
 -    /* Initialize the dVdlambda term with the long range contribution */
 -    /* Initialize the dvdl term with the long range contribution */
      enerd->term[F_DVDL]            = 0.0;
      enerd->term[F_DVDL_COUL]       = 0.0;
      enerd->term[F_DVDL_VDW]        = 0.0;
index 703a1cd471ad50196031a097fa5ad0d9349ed62e,e63381eaf1ab89ea0ca5bb96640300b630ca2673..6b5a102f83a2f9adc68960e8b672e1c253384ca1
   */
  #include "gmxpre.h"
  
 +#include "forcerec.h"
 +
  #include "config.h"
  
  #include <assert.h>
 -#include <math.h>
  #include <stdlib.h>
  #include <string.h>
  
 +#include <cmath>
 +
  #include <algorithm>
  
 +#include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/domdec.h"
 +#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/ewald/ewald.h"
 -#include "gromacs/fileio/filenm.h"
 -#include "gromacs/gmxlib/gpu_utils/gpu_utils.h"
 -#include "gromacs/legacyheaders/copyrite.h"
 -#include "gromacs/legacyheaders/force.h"
 -#include "gromacs/legacyheaders/gmx_detect_hardware.h"
 -#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 -#include "gromacs/legacyheaders/inputrec.h"
 -#include "gromacs/legacyheaders/macros.h"
 -#include "gromacs/legacyheaders/md_logging.h"
 -#include "gromacs/legacyheaders/md_support.h"
 -#include "gromacs/legacyheaders/names.h"
 -#include "gromacs/legacyheaders/network.h"
 -#include "gromacs/legacyheaders/nonbonded.h"
 -#include "gromacs/legacyheaders/ns.h"
 -#include "gromacs/legacyheaders/qmmm.h"
 -#include "gromacs/legacyheaders/tables.h"
 -#include "gromacs/legacyheaders/txtdump.h"
 -#include "gromacs/legacyheaders/typedefs.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/fileio/filetypes.h"
 +#include "gromacs/gmxlib/md_logging.h"
 +#include "gromacs/gmxlib/network.h"
 +#include "gromacs/gmxlib/nonbonded/nonbonded.h"
 +#include "gromacs/gpu_utils/gpu_utils.h"
 +#include "gromacs/hardware/detecthardware.h"
  #include "gromacs/listed-forces/manage-threading.h"
 +#include "gromacs/listed-forces/pairs.h"
  #include "gromacs/math/calculate-ewald-splitting-coefficient.h"
 +#include "gromacs/math/functions.h"
  #include "gromacs/math/units.h"
  #include "gromacs/math/utilities.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/mdlib/force.h"
  #include "gromacs/mdlib/forcerec-threading.h"
 +#include "gromacs/mdlib/gmx_omp_nthreads.h"
 +#include "gromacs/mdlib/md_support.h"
  #include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/mdlib/nbnxn_atomdata.h"
  #include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
  #include "gromacs/mdlib/nbnxn_search.h"
  #include "gromacs/mdlib/nbnxn_simd.h"
 +#include "gromacs/mdlib/nbnxn_util.h"
 +#include "gromacs/mdlib/ns.h"
 +#include "gromacs/mdlib/qmmm.h"
 +#include "gromacs/mdlib/sim_util.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/fcdata.h"
 +#include "gromacs/mdtypes/group.h"
 +#include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/simd/simd.h"
 +#include "gromacs/tables/forcetable.h"
  #include "gromacs/topology/mtop_util.h"
 +#include "gromacs/trajectory/trajectoryframe.h"
 +#include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/gmxassert.h"
 +#include "gromacs/utility/pleasecite.h"
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/stringutil.h"
  
  #include "nbnxn_gpu_jit_support.h"
  
 +const char *egrp_nm[egNR+1] = {
 +    "Coul-SR", "LJ-SR", "Buck-SR",
 +    "Coul-14", "LJ-14", NULL
 +};
 +
  t_forcerec *mk_forcerec(void)
  {
      t_forcerec *fr;
@@@ -180,6 -164,7 +180,6 @@@ static real *make_ljpme_c6grid(const gm
      int        i, j, k, atnr;
      real       c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
      real      *grid;
 -    const real oneOverSix = 1.0 / 6.0;
  
      /* For LJ-PME simulations, we correct the energies with the reciprocal space
       * inside of the cut-off. To do this the non-bonded kernels needs to have
              c12i = idef->iparams[i*(atnr+1)].lj.c12;
              c6j  = idef->iparams[j*(atnr+1)].lj.c6;
              c12j = idef->iparams[j*(atnr+1)].lj.c12;
 -            c6   = sqrt(c6i * c6j);
 +            c6   = std::sqrt(c6i * c6j);
              if (fr->ljpme_combination_rule == eljpmeLB
                  && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
              {
 -                sigmai = pow(c12i / c6i, oneOverSix);
 -                sigmaj = pow(c12j / c6j, oneOverSix);
 +                sigmai = gmx::sixthroot(c12i / c6i);
 +                sigmaj = gmx::sixthroot(c12j / c6j);
                  epsi   = c6i * c6i / c12i;
                  epsj   = c6j * c6j / c12j;
 -                c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 +                c6     = std::sqrt(epsi * epsj) * gmx::power6(0.5*(sigmai+sigmaj));
              }
              /* Store the elements at the same relative positions as C6 in nbfp in order
               * to simplify access in the kernels
@@@ -221,6 -206,7 +221,6 @@@ static real *mk_nbfp_combination_rule(c
      int        i, j, atnr;
      real       c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
      real       c6, c12;
 -    const real oneOverSix = 1.0 / 6.0;
  
      atnr = idef->atnr;
      snew(nbfp, 2*atnr*atnr);
              c12i = idef->iparams[i*(atnr+1)].lj.c12;
              c6j  = idef->iparams[j*(atnr+1)].lj.c6;
              c12j = idef->iparams[j*(atnr+1)].lj.c12;
 -            c6   = sqrt(c6i  * c6j);
 -            c12  = sqrt(c12i * c12j);
 +            c6   = std::sqrt(c6i  * c6j);
 +            c12  = std::sqrt(c12i * c12j);
              if (comb_rule == eCOMB_ARITHMETIC
                  && !gmx_numzero(c6) && !gmx_numzero(c12))
              {
 -                sigmai = pow(c12i / c6i, oneOverSix);
 -                sigmaj = pow(c12j / c6j, oneOverSix);
 +                sigmai = gmx::sixthroot(c12i / c6i);
 +                sigmaj = gmx::sixthroot(c12j / c6j);
                  epsi   = c6i * c6i / c12i;
                  epsj   = c6j * c6j / c12j;
 -                c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 -                c12    = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
 +                c6     = std::sqrt(epsi * epsj) * gmx::power6(0.5*(sigmai+sigmaj));
 +                c12    = std::sqrt(epsi * epsj) * gmx::power12(0.5*(sigmai+sigmaj));
              }
              C6(nbfp, atnr, i, j)   = c6*6.0;
              C12(nbfp, atnr, i, j)  = c12*12.0;
@@@ -1288,8 -1274,9 +1288,8 @@@ static void set_bham_b_max(FILE *fplog
      }
  }
  
 -static void make_nbf_tables(FILE *fp, const output_env_t oenv,
 +static void make_nbf_tables(FILE *fp,
                              t_forcerec *fr, real rtab,
 -                            const t_commrec *cr,
                              const char *tabfn, char *eg1, char *eg2,
                              t_nblists *nbl)
  {
          sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
                  eg1, eg2, ftp2ext(efXVG));
      }
 -    nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
 +    nbl->table_elec_vdw = make_tables(fp, fr, buf, rtab, 0);
      /* Copy the contents of the table to separate coulomb and LJ tables too,
       * to improve cache performance.
       */
       * the table data to be aligned to 16-byte. The pointers could be freed
       * but currently aren't.
       */
 -    nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
 -    nbl->table_elec.format        = nbl->table_elec_vdw.format;
 -    nbl->table_elec.r             = nbl->table_elec_vdw.r;
 -    nbl->table_elec.n             = nbl->table_elec_vdw.n;
 -    nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
 -    nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
 -    nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
 -    nbl->table_elec.ninteractions = 1;
 -    nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
 -    snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
 -
 -    nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
 -    nbl->table_vdw.format        = nbl->table_elec_vdw.format;
 -    nbl->table_vdw.r             = nbl->table_elec_vdw.r;
 -    nbl->table_vdw.n             = nbl->table_elec_vdw.n;
 -    nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
 -    nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
 -    nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
 -    nbl->table_vdw.ninteractions = 2;
 -    nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
 -    snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
 -
 -    for (i = 0; i <= nbl->table_elec_vdw.n; i++)
 +    snew(nbl->table_elec, 1);
 +    nbl->table_elec->interaction   = GMX_TABLE_INTERACTION_ELEC;
 +    nbl->table_elec->format        = nbl->table_elec_vdw->format;
 +    nbl->table_elec->r             = nbl->table_elec_vdw->r;
 +    nbl->table_elec->n             = nbl->table_elec_vdw->n;
 +    nbl->table_elec->scale         = nbl->table_elec_vdw->scale;
 +    nbl->table_elec->formatsize    = nbl->table_elec_vdw->formatsize;
 +    nbl->table_elec->ninteractions = 1;
 +    nbl->table_elec->stride        = nbl->table_elec->formatsize * nbl->table_elec->ninteractions;
 +    snew_aligned(nbl->table_elec->data, nbl->table_elec->stride*(nbl->table_elec->n+1), 32);
 +
 +    snew(nbl->table_vdw, 1);
 +    nbl->table_vdw->interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
 +    nbl->table_vdw->format        = nbl->table_elec_vdw->format;
 +    nbl->table_vdw->r             = nbl->table_elec_vdw->r;
 +    nbl->table_vdw->n             = nbl->table_elec_vdw->n;
 +    nbl->table_vdw->scale         = nbl->table_elec_vdw->scale;
 +    nbl->table_vdw->formatsize    = nbl->table_elec_vdw->formatsize;
 +    nbl->table_vdw->ninteractions = 2;
 +    nbl->table_vdw->stride        = nbl->table_vdw->formatsize * nbl->table_vdw->ninteractions;
 +    snew_aligned(nbl->table_vdw->data, nbl->table_vdw->stride*(nbl->table_vdw->n+1), 32);
 +
 +    for (i = 0; i <= nbl->table_elec_vdw->n; i++)
      {
          for (j = 0; j < 4; j++)
          {
 -            nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
 +            nbl->table_elec->data[4*i+j] = nbl->table_elec_vdw->data[12*i+j];
          }
          for (j = 0; j < 8; j++)
          {
 -            nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
 +            nbl->table_vdw->data[8*i+j] = nbl->table_elec_vdw->data[12*i+4+j];
          }
      }
  }
@@@ -1500,6 -1487,11 +1500,6 @@@ void forcerec_set_ranges(t_forcerec *fr
      if (fr->natoms_force_constr > fr->nalloc_force)
      {
          fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
 -
 -        if (fr->bTwinRange)
 -        {
 -            srenew(fr->f_twin, fr->nalloc_force);
 -        }
      }
  
      if (fr->bF_NoVirSum)
@@@ -1527,6 -1519,37 +1527,6 @@@ static real cutoff_inf(real cutoff
      return cutoff;
  }
  
 -static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
 -                                  t_forcerec *fr, const t_inputrec *ir,
 -                                  const char *tabfn, const gmx_mtop_t *mtop,
 -                                  matrix     box)
 -{
 -    char buf[STRLEN];
 -    int  i, j;
 -
 -    if (tabfn == NULL)
 -    {
 -        gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
 -        return;
 -    }
 -
 -    snew(fr->atf_tabs, ir->adress->n_tf_grps);
 -
 -    sprintf(buf, "%s", tabfn);
 -    for (i = 0; i < ir->adress->n_tf_grps; i++)
 -    {
 -        j = ir->adress->tf_table_index[i]; /* get energy group index */
 -        sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
 -                *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
 -        if (fp)
 -        {
 -            fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
 -        }
 -        fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
 -    }
 -
 -}
 -
  gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
  {
      gmx_bool bAllvsAll;
@@@ -1620,7 -1643,7 +1620,7 @@@ static void pick_nbnxn_kernel_cpu(cons
      *kernel_type = nbnxnk4x4_PlainC;
      *ewald_excl  = ewaldexclTable;
  
 -#ifdef GMX_NBNXN_SIMD
 +#if GMX_SIMD
      {
  #ifdef GMX_NBNXN_SIMD_4XN
          *kernel_type = nbnxnk4xN_SIMD_4xN;
           */
          *kernel_type = nbnxnk4xN_SIMD_4xN;
  
 -#ifndef GMX_SIMD_HAVE_FMA
 +#if !GMX_SIMD_HAVE_FMA
          if (EEL_PME_EWALD(ir->coulombtype) ||
              EVDW_PME(ir->vdwtype))
          {
           * In single precision, this is faster on Bulldozer.
           */
  #if GMX_SIMD_REAL_WIDTH >= 8 || \
 -        (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
 -        defined GMX_SIMD_IBM_QPX
 +        (GMX_SIMD_REAL_WIDTH >= 4 && GMX_SIMD_HAVE_FMA && !GMX_DOUBLE) || GMX_SIMD_IBM_QPX
          *ewald_excl = ewaldexclAnalytical;
  #endif
          if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
          }
  
      }
 -#endif /* GMX_NBNXN_SIMD */
 +#endif // GMX_SIMD
  }
  
  
@@@ -1717,11 -1741,23 +1717,11 @@@ const char *lookup_nbnxn_kernel_name(in
              break;
          case nbnxnk4xN_SIMD_4xN:
          case nbnxnk4xN_SIMD_2xNN:
 -#ifdef GMX_NBNXN_SIMD
 -#if defined GMX_SIMD_X86_SSE2
 -            returnvalue = "SSE2";
 -#elif defined GMX_SIMD_X86_SSE4_1
 -            returnvalue = "SSE4.1";
 -#elif defined GMX_SIMD_X86_AVX_128_FMA
 -            returnvalue = "AVX_128_FMA";
 -#elif defined GMX_SIMD_X86_AVX_256
 -            returnvalue = "AVX_256";
 -#elif defined GMX_SIMD_X86_AVX2_256
 -            returnvalue = "AVX2_256";
 -#else
 +#if GMX_SIMD
              returnvalue = "SIMD";
 -#endif
 -#else  /* GMX_NBNXN_SIMD */
 +#else  // GMX_SIMD
              returnvalue = "not available";
 -#endif /* GMX_NBNXN_SIMD */
 +#endif // GMX_SIMD
              break;
          case nbnxnk8x8x8_GPU: returnvalue    = "GPU"; break;
          case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
@@@ -1781,8 -1817,8 +1781,8 @@@ static void pick_nbnxn_kernel(FIL
      {
          fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
                  lookup_nbnxn_kernel_name(*kernel_type),
 -                nbnxn_kernel_to_ci_size(*kernel_type),
 -                nbnxn_kernel_to_cj_size(*kernel_type));
 +                nbnxn_kernel_to_cluster_i_size(*kernel_type),
 +                nbnxn_kernel_to_cluster_j_size(*kernel_type));
  
          if (nbnxnk4x4_PlainC == *kernel_type ||
              nbnxnk8x8x8_PlainC == *kernel_type)
@@@ -1901,7 -1937,7 +1901,7 @@@ static void init_ewald_f_table(interact
      sfree_aligned(ic->tabq_vdw_F);
      sfree_aligned(ic->tabq_vdw_V);
  
 -    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
 +    if (EEL_PME_EWALD(ic->eeltype))
      {
          /* Create the original table data in FDV0 */
          snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
@@@ -1925,7 -1961,7 +1925,7 @@@ void init_interaction_const_tables(FIL
                                     interaction_const_t *ic,
                                     real                 rtab)
  {
 -    if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
 +    if (EEL_PME_EWALD(ic->eeltype) || EVDW_PME(ic->vdwtype))
      {
          init_ewald_f_table(ic, rtab);
  
@@@ -1956,9 -1992,9 +1956,9 @@@ static void force_switch_constants(rea
       * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
       * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
       */
 -    sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
 -    sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
 -    sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
 +    sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*gmx::square(rc - rsw));
 +    sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*gmx::power3(rc - rsw));
 +    sc->cpot = -pow(rc, -p) + p*sc->c2/3*gmx::power3(rc - rsw) + p*sc->c3/4*gmx::power4(rc - rsw);
  }
  
  static void potential_switch_constants(real rsw, real rc,
       * force      = force*dsw - potential*sw
       * potential *= sw
       */
 -    sc->c3 = -10*pow(rc - rsw, -3);
 -    sc->c4 =  15*pow(rc - rsw, -4);
 -    sc->c5 =  -6*pow(rc - rsw, -5);
 +    sc->c3 = -10/gmx::power3(rc - rsw);
 +    sc->c4 =  15/gmx::power4(rc - rsw);
 +    sc->c5 =  -6/gmx::power5(rc - rsw);
  }
  
  /*! \brief Construct interaction constants
@@@ -1989,6 -2025,8 +1989,6 @@@ init_interaction_const(FIL
                         const t_forcerec           *fr)
  {
      interaction_const_t *ic;
 -    const real           minusSix          = -6.0;
 -    const real           minusTwelve       = -12.0;
  
      snew(ic, 1);
  
      snew_aligned(ic->tabq_coul_V, 16, 32);
  
      ic->rlist           = fr->rlist;
 -    ic->rlistlong       = fr->rlistlong;
  
      /* Lennard-Jones */
      ic->vdwtype         = fr->vdwtype;
      {
          case eintmodPOTSHIFT:
              /* Only shift the potential, don't touch the force */
 -            ic->dispersion_shift.cpot = -pow(ic->rvdw, minusSix);
 -            ic->repulsion_shift.cpot  = -pow(ic->rvdw, minusTwelve);
 +            ic->dispersion_shift.cpot = -1.0/gmx::power6(ic->rvdw);
 +            ic->repulsion_shift.cpot  = -1.0/gmx::power12(ic->rvdw);
              if (EVDW_PME(ic->vdwtype))
              {
                  real crc2;
  
 -                crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
 -                ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, minusSix);
 +                crc2            = gmx::square(ic->ewaldcoeff_lj*ic->rvdw);
 +                ic->sh_lj_ewald = (std::exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)/gmx::power6(ic->rvdw);
              }
              break;
          case eintmodFORCESWITCH:
  
      if (fr->coulomb_modifier == eintmodPOTSHIFT)
      {
 -        ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
 +        ic->sh_ewald = std::erfc(ic->ewaldcoeff_q*ic->rcoulomb);
      }
      else
      {
@@@ -2202,10 -2241,7 +2202,10 @@@ static void init_nb_verlet(FIL
  
              bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
  
 -            if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
 +            if (fr->vdwtype == evdwCUT &&
 +                (fr->vdw_modifier == eintmodNONE ||
 +                 fr->vdw_modifier == eintmodPOTSHIFT) &&
 +                getenv("GMX_NO_LJ_COMB_RULE") == NULL)
              {
                  /* Plain LJ cut-off: we can optimize with combination rules */
                  enbnxninitcombrule = enbnxninitcombruleDETECT;
      {
          /* init the NxN GPU data; the last argument tells whether we'll have
           * both local and non-local NB calculation on GPU */
 -        nbnxn_gpu_init(fp, &nbv->gpu_nbv,
 +        nbnxn_gpu_init(&nbv->gpu_nbv,
                         &fr->hwinfo->gpu_info,
                         fr->gpu_opt,
                         fr->ic,
           * texture objects are used), but as this is initialization code, there
           * is no point in complicating things.
           */
 -#ifdef GMX_THREAD_MPI
 +#if GMX_THREAD_MPI
          if (PAR(cr))
          {
              gmx_barrier(cr);
              char *end;
  
              nbv->min_ci_balanced = strtol(env, &end, 10);
 -            if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
 +            if (!end || (*end != 0) || nbv->min_ci_balanced < 0)
              {
 -                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
 +                gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, non-negative integer required", env);
              }
  
              if (debug)
@@@ -2314,6 -2350,7 +2314,6 @@@ gmx_bool usingGpu(nonbonded_verlet_t *n
  }
  
  void init_forcerec(FILE              *fp,
 -                   const output_env_t oenv,
                     t_forcerec        *fr,
                     t_fcdata          *fcd,
                     const t_inputrec  *ir,
                     const t_commrec   *cr,
                     matrix             box,
                     const char        *tabfn,
 -                   const char        *tabafn,
                     const char        *tabpfn,
                     const t_filenm    *tabbfnm,
                     const char        *nbpu_opt,
      double         dbl;
      const t_block *cgs;
      gmx_bool       bGenericKernelOnly;
 -    gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
 +    gmx_bool       needGroupSchemeTables, bSomeNormalNbListsAreInUse;
      gmx_bool       bFEP_NonBonded;
      int           *nm_ind, egp_flags;
  
          fr->n_tpi = 0;
      }
  
 -    /* Copy AdResS parameters */
 -    if (ir->bAdress)
 +    if (ir->coulombtype == eelRF_NEC_UNSUPPORTED)
      {
 -        fr->adress_type           = ir->adress->type;
 -        fr->adress_const_wf       = ir->adress->const_wf;
 -        fr->adress_ex_width       = ir->adress->ex_width;
 -        fr->adress_hy_width       = ir->adress->hy_width;
 -        fr->adress_icor           = ir->adress->icor;
 -        fr->adress_site           = ir->adress->site;
 -        fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
 -        fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
 -
 -
 -        snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
 -        for (i = 0; i < ir->adress->n_energy_grps; i++)
 -        {
 -            fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
 -        }
 +        gmx_fatal(FARGS, "%s electrostatics is no longer supported",
 +                  eel_names[ir->coulombtype]);
 +    }
  
 -        fr->n_adress_tf_grps = ir->adress->n_tf_grps;
 -        snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
 -        for (i = 0; i < fr->n_adress_tf_grps; i++)
 -        {
 -            fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
 -        }
 -        copy_rvec(ir->adress->refs, fr->adress_refs);
 +    if (ir->bAdress)
 +    {
 +        gmx_fatal(FARGS, "AdResS simulations are no longer supported");
      }
 -    else
 +    if (ir->useTwinRange)
      {
 -        fr->adress_type           = eAdressOff;
 -        fr->adress_do_hybridpairs = FALSE;
 +        gmx_fatal(FARGS, "Twin-range simulations are no longer supported");
      }
 -
      /* Copy the user determined parameters */
      fr->userint1  = ir->userint1;
      fr->userint2  = ir->userint2;
      if (ir->fepvals->bScCoul)
      {
          fr->sc_alphacoul  = ir->fepvals->sc_alpha;
 -        fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
 +        fr->sc_sigma6_min = gmx::power6(ir->fepvals->sc_sigma_min);
      }
      else
      {
      }
      fr->sc_power      = ir->fepvals->sc_power;
      fr->sc_r_power    = ir->fepvals->sc_r_power;
 -    fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
 +    fr->sc_sigma6_def = gmx::power6(ir->fepvals->sc_sigma);
  
      env = getenv("GMX_SCSIGMA_MIN");
      if (env != NULL)
      {
          dbl = 0;
          sscanf(env, "%20lf", &dbl);
 -        fr->sc_sigma6_min = pow(dbl, 6);
 +        fr->sc_sigma6_min = gmx::power6(dbl);
          if (fp)
          {
              fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
      copy_rvec(ir->posres_com, fr->posres_com);
      copy_rvec(ir->posres_comB, fr->posres_comB);
      fr->rlist                    = cutoff_inf(ir->rlist);
 -    fr->rlistlong                = cutoff_inf(ir->rlistlong);
      fr->eeltype                  = ir->coulombtype;
      fr->vdwtype                  = ir->vdwtype;
      fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
  
          case eelRF:
          case eelGRF:
 -        case eelRF_NEC:
              fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
              break;
  
      fr->rcoulomb         = cutoff_inf(ir->rcoulomb);
      fr->rcoulomb_switch  = ir->rcoulomb_switch;
  
 -    fr->bTwinRange = fr->rlistlong > fr->rlist;
 -    fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
 +    fr->bEwald     = EEL_PME_EWALD(fr->eeltype);
  
      fr->reppow     = mtop->ffparams.reppow;
  
  
          if (fp)
          {
 -            fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
 -            fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
 +            fprintf(fp, "Table routines are used for coulomb: %s\n",
 +                    gmx::boolToString(fr->bcoultab));
 +            fprintf(fp, "Table routines are used for vdw:     %s\n",
 +                    gmx::boolToString(fr->bvdwtab));
          }
  
          if (fr->bvdwtab == TRUE)
      fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
                         gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
                         gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
 -                       IR_ELEC_FIELD(*ir) ||
 -                       (fr->adress_icor != eAdressICOff)
 +                       inputrecElecField(ir)
                         );
  
      if (fr->cutoff_scheme == ecutsGROUP &&
      }
  
      fr->eDispCorr = ir->eDispCorr;
 +    fr->numAtomsForDispersionCorrection = mtop->natoms;
      if (ir->eDispCorr != edispcNO)
      {
          set_avcsixtwelve(fp, fr, mtop);
      /* Generate the GB table if needed */
      if (fr->bGB)
      {
 -#ifdef GMX_DOUBLE
 +#if GMX_DOUBLE
          fr->gbtabscale = 2000;
  #else
          fr->gbtabscale = 500;
  #endif
  
          fr->gbtabr = 100;
 -        fr->gbtab  = make_gb_table(oenv, fr);
 +        fr->gbtab  = make_gb_table(fr);
  
          init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
  
      /*This now calculates sum for q and c6*/
      set_chargesum(fp, fr, mtop);
  
 -    /* if we are using LR electrostatics, and they are tabulated,
 -     * the tables will contain modified coulomb interactions.
 -     * Since we want to use the non-shifted ones for 1-4
 -     * coulombic interactions, we must have an extra set of tables.
 -     */
 -
 -    /* Construct tables.
 -     * A little unnecessary to make both vdw and coul tables sometimes,
 -     * but what the heck... */
 -
 -    bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
 -        (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
 -
 -    bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
 -                             fr->coulomb_modifier != eintmodNONE ||
 -                             fr->vdw_modifier != eintmodNONE ||
 -                             fr->bBHAM || fr->bEwald) &&
 -                            (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
 -                             gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
 -                             gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
 +    /* Construct tables for the group scheme. A little unnecessary to
 +     * make both vdw and coul tables sometimes, but what the
 +     * heck. Note that both cutoff schemes construct Ewald tables in
 +     * init_interaction_const_tables. */
 +    needGroupSchemeTables = (ir->cutoff_scheme == ecutsGROUP &&
 +                             (fr->bcoultab || fr->bvdwtab));
  
      negp_pp   = ir->opts.ngener - ir->nwall;
      negptable = 0;
 -    if (!bMakeTables)
 +    if (!needGroupSchemeTables)
      {
          bSomeNormalNbListsAreInUse = TRUE;
          fr->nnblists               = 1;
      }
      else
      {
 -        bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
 +        bSomeNormalNbListsAreInUse = FALSE;
          for (egi = 0; egi < negp_pp; egi++)
          {
              for (egj = egi; egj < negp_pp; egj++)
          }
      }
  
 -    if (ir->adress)
 -    {
 -        fr->nnblists *= 2;
 -    }
 -
      snew(fr->nblists, fr->nnblists);
  
      /* This code automatically gives table length tabext without cut-off's,
       * in that case grompp should already have checked that we do not need
       * normal tables and we only generate tables for 1-4 interactions.
       */
 -    rtab = ir->rlistlong + ir->tabext;
 +    rtab = ir->rlist + ir->tabext;
  
 -    if (bMakeTables)
 +    if (needGroupSchemeTables)
      {
          /* make tables for ordinary interactions */
          if (bSomeNormalNbListsAreInUse)
          {
 -            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
 -            if (ir->adress)
 -            {
 -                make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
 -            }
 -            if (!bMakeSeparate14Table)
 -            {
 -                fr->tab14 = fr->nblists[0].table_elec_vdw;
 -            }
 +            make_nbf_tables(fp, fr, rtab, tabfn, NULL, NULL, &fr->nblists[0]);
              m = 1;
          }
          else
                              fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
                          }
                          /* Read the table file with the two energy groups names appended */
 -                        make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 +                        make_nbf_tables(fp, fr, rtab, tabfn,
                                          *mtop->groups.grpname[nm_ind[egi]],
                                          *mtop->groups.grpname[nm_ind[egj]],
                                          &fr->nblists[m]);
 -                        if (ir->adress)
 -                        {
 -                            make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
 -                                            *mtop->groups.grpname[nm_ind[egi]],
 -                                            *mtop->groups.grpname[nm_ind[egj]],
 -                                            &fr->nblists[fr->nnblists/2+m]);
 -                        }
                          m++;
                      }
                      else if (fr->nnblists > 1)
              }
          }
      }
 -    else if ((fr->eDispCorr != edispcNO) &&
 -             ((fr->vdw_modifier == eintmodPOTSWITCH) ||
 -              (fr->vdw_modifier == eintmodFORCESWITCH) ||
 -              (fr->vdw_modifier == eintmodPOTSHIFT)))
 -    {
 -        /* Tables might not be used for the potential modifier interactions per se, but
 -         * we still need them to evaluate switch/shift dispersion corrections in this case.
 -         */
 -        make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
 -    }
  
 -    if (bMakeSeparate14Table)
 +    /* Tables might not be used for the potential modifier
 +     * interactions per se, but we still need them to evaluate
 +     * switch/shift dispersion corrections in this case. */
 +    if (fr->eDispCorr != edispcNO)
      {
 -        /* generate extra tables with plain Coulomb for 1-4 interactions only */
 -        fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
 -                                GMX_MAKETABLES_14ONLY);
 +        fr->dispersionCorrectionTable = makeDispersionCorrectionTable(fp, fr, rtab, tabfn);
      }
  
 -    /* Read AdResS Thermo Force table if needed */
 -    if (fr->adress_icor == eAdressICThermoForce)
 +    /* We want to use unmodified tables for 1-4 coulombic
 +     * interactions, so we must in general have an extra set of
 +     * tables. */
 +    if (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
 +        gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
 +        gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0)
      {
 -        /* old todo replace */
 -
 -        if (ir->adress->n_tf_grps > 0)
 -        {
 -            make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
 -
 -        }
 -        else
 -        {
 -            /* load the default table */
 -            snew(fr->atf_tabs, 1);
 -            fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
 -        }
 +        fr->pairsTable = make_tables(fp, fr, tabpfn, rtab,
 +                                     GMX_MAKETABLES_14ONLY);
      }
  
      /* Wall stuff */
      fr->nwall = ir->nwall;
      if (ir->nwall && ir->wall_type == ewtTABLE)
      {
 -        make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
 +        make_wall_tables(fp, ir, tabfn, &mtop->groups, fr);
      }
  
      if (fcd && tabbfnm)
      fr->timesteps = 0;
  
      /* Initialize neighbor search */
 -    init_ns(fp, cr, &fr->ns, fr, mtop);
 +    snew(fr->ns, 1);
 +    init_ns(fp, cr, fr->ns, fr, mtop);
  
      if (cr->duty & DUTY_PP)
      {
          gmx_nonbonded_setup(fr, bGenericKernelOnly);
 -        /*
 -           if (ir->bAdress)
 -            {
 -                gmx_setup_adress_kernels(fp,bGenericKernelOnly);
 -            }
 -         */
      }
  
      /* Initialize the thread working data for bonded interactions */
 -    init_bonded_threading(fp, fr, mtop->groups.grps[egcENER].nr);
 +    init_bonded_threading(fp, mtop->groups.grps[egcENER].nr,
 +                          &fr->bonded_threading);
 +
 +    fr->nthread_ewc = gmx_omp_nthreads_get(emntBonded);
 +    snew(fr->ewc_t, fr->nthread_ewc);
-     snew(fr->excl_load, fr->nthread_ewc + 1);
  
      /* fr->ic is used both by verlet and group kernels (to some extent) now */
      init_interaction_const(fp, &fr->ic, fr);
  
      if (fr->cutoff_scheme == ecutsVERLET)
      {
 -        if (ir->rcoulomb != ir->rvdw)
 +        // We checked the cut-offs in grompp, but double-check here.
 +        // We have PME+LJcutoff kernels for rcoulomb>rvdw.
 +        if (EEL_PME_EWALD(ir->coulombtype) && ir->vdwtype == eelCUT)
          {
 -            gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
 +            GMX_RELEASE_ASSERT(ir->rcoulomb >= ir->rvdw, "With Verlet lists and PME we should have rcoulomb>=rvdw");
 +        }
 +        else
 +        {
 +            GMX_RELEASE_ASSERT(ir->rcoulomb == ir->rvdw, "With Verlet lists and no PME rcoulomb and rvdw should be identical");
          }
  
          init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
  
  #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
  #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
 -#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
 +#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, gmx::boolToString(b))
  
  void pr_forcerec(FILE *fp, t_forcerec *fr)
  {
      pr_real(fp, fr->rcoulomb);
      pr_real(fp, fr->fudgeQQ);
      pr_bool(fp, fr->bGrid);
 -    pr_bool(fp, fr->bTwinRange);
      /*pr_int(fp,fr->cg0);
         pr_int(fp,fr->hcg);*/
      for (i = 0; i < fr->nnblists; i++)
      {
 -        pr_int(fp, fr->nblists[i].table_elec_vdw.n);
 +        pr_int(fp, fr->nblists[i].table_elec_vdw->n);
      }
      pr_real(fp, fr->rcoulomb_switch);
      pr_real(fp, fr->rcoulomb);
      fflush(fp);
  }
  
- void forcerec_set_excl_load(t_forcerec           *fr,
-                             const gmx_localtop_t *top)
- {
-     const int *ind, *a;
-     int        t, i, j, ntot, n, ntarget;
-     ind = top->excls.index;
-     a   = top->excls.a;
-     ntot = 0;
-     for (i = 0; i < top->excls.nr; i++)
-     {
-         for (j = ind[i]; j < ind[i+1]; j++)
-         {
-             if (a[j] > i)
-             {
-                 ntot++;
-             }
-         }
-     }
-     fr->excl_load[0] = 0;
-     n                = 0;
-     i                = 0;
-     for (t = 1; t <= fr->nthread_ewc; t++)
-     {
-         ntarget = (ntot*t)/fr->nthread_ewc;
-         while (i < top->excls.nr && n < ntarget)
-         {
-             for (j = ind[i]; j < ind[i+1]; j++)
-             {
-                 if (a[j] > i)
-                 {
-                     n++;
-                 }
-             }
-             i++;
-         }
-         fr->excl_load[t] = i;
-     }
- }
  /* Frees GPU memory and destroys the GPU context.
   *
   * Note that this function needs to be called even if GPUs are not used
@@@ -3300,20 -3361,14 +3257,20 @@@ void free_gpu_resources(const t_forcere
      {
          /* free nbnxn data in GPU memory */
          nbnxn_gpu_free(fr->nbv->gpu_nbv);
 +        /* stop the GPU profiler (only CUDA) */
 +        stopGpuProfiler();
  
          /* With tMPI we need to wait for all ranks to finish deallocation before
 -         * destroying the context in free_gpu() as some ranks may be sharing
 +         * destroying the CUDA context in free_gpu() as some tMPI ranks may be sharing
           * GPU and context.
 +         *
 +         * This is not a concern in OpenCL where we use one context per rank which
 +         * is freed in nbnxn_gpu_free().
 +         *
           * Note: as only PP ranks need to free GPU resources, so it is safe to
           * not call the barrier on PME ranks.
           */
 -#ifdef GMX_THREAD_MPI
 +#if GMX_THREAD_MPI
          if (PAR(cr))
          {
              gmx_barrier(cr);
index 3890e24e3734a167e3e6c9efab91a204945dcbc1,fb60991a55779433b1f6ec5e80b4f7acc684d67d..591e20a7dc88cbfa338f971b35b97ed014b2a718
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 +/*! \internal \file
 + *
 + * \brief This file defines integrators for energy minimization
 + *
 + * \author Berk Hess <hess@kth.se>
 + * \author Erik Lindahl <erik@kth.se>
 + * \ingroup module_mdlib
 + */
  #include "gmxpre.h"
  
 +#include "minimize.h"
 +
  #include "config.h"
  
 -#include <math.h>
 -#include <string.h>
 -#include <time.h>
 +#include <cmath>
 +#include <cstring>
 +#include <ctime>
  
  #include <algorithm>
 +#include <vector>
  
 +#include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/domdec.h"
 +#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/ewald/pme.h"
  #include "gromacs/fileio/confio.h"
  #include "gromacs/fileio/mtxio.h"
 -#include "gromacs/fileio/trajectory_writing.h"
 +#include "gromacs/gmxlib/md_logging.h"
 +#include "gromacs/gmxlib/network.h"
 +#include "gromacs/gmxlib/nrnb.h"
  #include "gromacs/imd/imd.h"
 -#include "gromacs/legacyheaders/constr.h"
 -#include "gromacs/legacyheaders/force.h"
 -#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 -#include "gromacs/legacyheaders/macros.h"
 -#include "gromacs/legacyheaders/md_logging.h"
 -#include "gromacs/legacyheaders/md_support.h"
 -#include "gromacs/legacyheaders/mdatoms.h"
 -#include "gromacs/legacyheaders/mdebin.h"
 -#include "gromacs/legacyheaders/mdrun.h"
 -#include "gromacs/legacyheaders/names.h"
 -#include "gromacs/legacyheaders/network.h"
 -#include "gromacs/legacyheaders/nrnb.h"
 -#include "gromacs/legacyheaders/ns.h"
 -#include "gromacs/legacyheaders/sim_util.h"
 -#include "gromacs/legacyheaders/tgroup.h"
 -#include "gromacs/legacyheaders/txtdump.h"
 -#include "gromacs/legacyheaders/typedefs.h"
 -#include "gromacs/legacyheaders/update.h"
 -#include "gromacs/legacyheaders/vsite.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
  #include "gromacs/linearalgebra/sparsematrix.h"
  #include "gromacs/listed-forces/manage-threading.h"
 +#include "gromacs/math/functions.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/mdlib/constr.h"
 +#include "gromacs/mdlib/force.h"
 +#include "gromacs/mdlib/forcerec.h"
 +#include "gromacs/mdlib/gmx_omp_nthreads.h"
 +#include "gromacs/mdlib/md_support.h"
 +#include "gromacs/mdlib/mdatoms.h"
 +#include "gromacs/mdlib/mdebin.h"
 +#include "gromacs/mdlib/mdrun.h"
 +#include "gromacs/mdlib/ns.h"
 +#include "gromacs/mdlib/shellfc.h"
 +#include "gromacs/mdlib/sim_util.h"
 +#include "gromacs/mdlib/tgroup.h"
 +#include "gromacs/mdlib/trajectory_writing.h"
 +#include "gromacs/mdlib/update.h"
 +#include "gromacs/mdlib/vsite.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/timing/walltime_accounting.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  
 +//! Utility structure for manipulating states during EM
  typedef struct {
 +    //! Copy of the global state
      t_state  s;
 +    //! Force array
      rvec    *f;
 +    //! Potential energy
      real     epot;
 +    //! Norm of the force
      real     fnorm;
 +    //! Maximum force
      real     fmax;
 +    //! Direction
      int      a_fmax;
  } em_state_t;
  
 +//! Initiate em_state_t structure and return pointer to it
  static em_state_t *init_em_state()
  {
      em_state_t *ems;
      return ems;
  }
  
 +//! Print the EM starting conditions
  static void print_em_start(FILE                     *fplog,
                             t_commrec                *cr,
                             gmx_walltime_accounting_t walltime_accounting,
      wallcycle_start(wcycle, ewcRUN);
      print_start(fplog, cr, walltime_accounting, name);
  }
 +
 +//! Stop counting time for EM
  static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
                          gmx_wallcycle_t           wcycle)
  {
      walltime_accounting_end(walltime_accounting);
  }
  
 +//! Printing a log file and console header
  static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
  {
      fprintf(out, "\n");
      fprintf(out, "   Number of steps    = %12d\n", nsteps);
  }
  
 +//! Print warning message
  static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
  {
      char buffer[2048];
      fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
  }
  
 -
 -
 +//! Print message about convergence of the EM
  static void print_converged(FILE *fp, const char *alg, real ftol,
                              gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
                              real epot, real fmax, int nfmax, real fnorm)
                  alg, ftol, gmx_step_str(count, buf));
      }
  
 -#ifdef GMX_DOUBLE
 +#if GMX_DOUBLE
      fprintf(fp, "Potential Energy  = %21.14e\n", epot);
      fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
      fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
  #endif
  }
  
 +//! Compute the norm and max of the force array in parallel
  static void get_f_norm_max(t_commrec *cr,
                             t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
                             real *fnorm, real *fmax, int *a_fmax)
              {
                  if (!opts->nFreeze[gf][m])
                  {
 -                    fam += sqr(f[i][m]);
 +                    fam += gmx::square(f[i][m]);
                  }
              }
              fnorm2 += fam;
      }
  }
  
 +//! Compute the norm of the force
  static void get_state_f_norm_max(t_commrec *cr,
                                   t_grpopts *opts, t_mdatoms *mdatoms,
                                   em_state_t *ems)
      get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
  }
  
 +//! Initialize the energy minimization
  void init_em(FILE *fplog, const char *title,
               t_commrec *cr, t_inputrec *ir,
               t_state *state_global, gmx_mtop_t *top_global,
               em_state_t *ems, gmx_localtop_t **top,
 -             rvec **f, rvec **f_global,
 +             rvec **f,
               t_nrnb *nrnb, rvec mu_tot,
               t_forcerec *fr, gmx_enerdata_t **enerd,
               t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
          dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
                              state_global, top_global, ir,
                              &ems->s, &ems->f, mdatoms, *top,
 -                            fr, vsite, NULL, constr,
 +                            fr, vsite, constr,
                              nrnb, NULL, FALSE);
          dd_store_state(cr->dd, &ems->s);
  
 -        if (ir->nstfout)
 -        {
 -            snew(*f_global, top_global->natoms);
 -        }
 -        else
 -        {
 -            *f_global = NULL;
 -        }
          *graph = NULL;
      }
      else
  
          /* Just copy the state */
          ems->s = *state_global;
 -        snew(ems->s.x, ems->s.nalloc);
 -        snew(ems->f, ems->s.nalloc);
 +        /* We need to allocate one element extra, since we might use
 +         * (unaligned) 4-wide SIMD loads to access rvec entries.
 +         */
 +        snew(ems->s.x, ems->s.nalloc + 1);
 +        snew(ems->f, ems->s.nalloc+1);
 +        snew(ems->s.v, ems->s.nalloc+1);
          for (i = 0; i < state_global->natoms; i++)
          {
              copy_rvec(state_global->x[i], ems->s.x[i]);
          }
          copy_mat(state_global->box, ems->s.box);
  
 -        *top      = gmx_mtop_generate_local_top(top_global, ir);
 -        *f_global = *f;
 +        *top      = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
  
-         forcerec_set_excl_load(fr, *top);
          setup_bonded_threading(fr, &(*top)->idef);
  
          if (ir->ePBC != epbcNONE && !fr->bMolPBC)
      calc_shifts(ems->s.box, fr->shift_vec);
  }
  
 +//! Finalize the minimization
  static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
                        gmx_walltime_accounting_t walltime_accounting,
                        gmx_wallcycle_t wcycle)
      em_time_end(walltime_accounting, wcycle);
  }
  
 +//! Swap two different EM states during minimization
  static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
  {
      em_state_t tmp;
      *ems2 = tmp;
  }
  
 +//! Copy coordinate from an EM state to a "normal" state structure
  static void copy_em_coords(em_state_t *ems, t_state *state)
  {
      int i;
      }
  }
  
 +//! Save the EM trajectory
  static void write_em_traj(FILE *fplog, t_commrec *cr,
                            gmx_mdoutf_t outf,
                            gmx_bool bX, gmx_bool bF, const char *confout,
                            gmx_mtop_t *top_global,
                            t_inputrec *ir, gmx_int64_t step,
                            em_state_t *state,
 -                          t_state *state_global, rvec *f_global)
 +                          t_state *state_global)
  {
      int      mdof_flags;
      gmx_bool bIMDout = FALSE;
      if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr))
      {
          copy_em_coords(state, state_global);
 -        f_global = state->f;
      }
  
      mdof_flags = 0;
  
      mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
                                       top_global, step, (double)step,
 -                                     &state->s, state_global, state->f, f_global);
 +                                     &state->s, state_global, state->f);
  
      if (confout != NULL && MASTER(cr))
      {
      }
  }
  
 -static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
 +//! \brief Do one minimization step
 +//
 +// \returns true when the step succeeded, false when a constraint error occurred
 +static bool do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
                         gmx_bool bMolPBC,
                         em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
                         gmx_constr_t constr, gmx_localtop_t *top,
      real     dvdl_constr;
      int      nthreads gmx_unused;
  
 +    bool     validStep = true;
 +
      s1 = &ems1->s;
      s2 = &ems2->s;
  
      if (s2->nalloc != s1->nalloc)
      {
          s2->nalloc = s1->nalloc;
 -        srenew(s2->x, s1->nalloc);
 +        /* We need to allocate one element extra, since we might use
 +         * (unaligned) 4-wide SIMD loads to access rvec entries.
 +         */
 +        srenew(s2->x, s1->nalloc + 1);
          srenew(ems2->f,  s1->nalloc);
          if (s2->flags & (1<<estCGP))
          {
 -            srenew(s2->cg_p,  s1->nalloc);
 +            srenew(s2->cg_p,  s1->nalloc + 1);
          }
      }
  
  #pragma omp for schedule(static) nowait
          for (i = start; i < end; i++)
          {
 -            if (md->cFREEZE)
 +            try
              {
 -                gf = md->cFREEZE[i];
 -            }
 -            for (m = 0; m < DIM; m++)
 -            {
 -                if (ir->opts.nFreeze[gf][m])
 +                if (md->cFREEZE)
                  {
 -                    x2[i][m] = x1[i][m];
 +                    gf = md->cFREEZE[i];
                  }
 -                else
 +                for (m = 0; m < DIM; m++)
                  {
 -                    x2[i][m] = x1[i][m] + a*f[i][m];
 +                    if (ir->opts.nFreeze[gf][m])
 +                    {
 +                        x2[i][m] = x1[i][m];
 +                    }
 +                    else
 +                    {
 +                        x2[i][m] = x1[i][m] + a*f[i][m];
 +                    }
                  }
              }
 +            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
  
          if (s2->flags & (1<<estCGP))
  #pragma omp for schedule(static) nowait
              for (i = start; i < end; i++)
              {
 +                // Trivial OpenMP block that does not throw
                  copy_rvec(x1[i], x2[i]);
              }
          }
              {
  #pragma omp barrier
                  s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 -                srenew(s2->cg_gl, s2->cg_gl_nalloc);
 +                try
 +                {
 +                    /* We need to allocate one element extra, since we might use
 +                     * (unaligned) 4-wide SIMD loads to access rvec entries.
 +                     */
 +                    srenew(s2->cg_gl, s2->cg_gl_nalloc + 1);
 +                }
 +                GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  #pragma omp barrier
              }
              s2->ncg_gl = s1->ncg_gl;
      {
          wallcycle_start(wcycle, ewcCONSTR);
          dvdl_constr = 0;
 -        constrain(NULL, TRUE, TRUE, constr, &top->idef,
 -                  ir, cr, count, 0, 1.0, md,
 -                  s1->x, s2->x, NULL, bMolPBC, s2->box,
 -                  s2->lambda[efptBONDED], &dvdl_constr,
 -                  NULL, NULL, nrnb, econqCoord);
 +        validStep   =
 +            constrain(NULL, TRUE, TRUE, constr, &top->idef,
 +                      ir, cr, count, 0, 1.0, md,
 +                      s1->x, s2->x, NULL, bMolPBC, s2->box,
 +                      s2->lambda[efptBONDED], &dvdl_constr,
 +                      NULL, NULL, nrnb, econqCoord);
          wallcycle_stop(wcycle, ewcCONSTR);
 +
 +        // We should move this check to the different minimizers
 +        if (!validStep && ir->eI != eiSteep)
 +        {
 +            gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
 +                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
 +        }
      }
 +
 +    return validStep;
  }
  
 +//! Prepare EM for using domain decomposition parallellization
  static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
                                     gmx_mtop_t *top_global, t_inputrec *ir,
                                     em_state_t *ems, gmx_localtop_t *top,
      dd_partition_system(fplog, step, cr, FALSE, 1,
                          NULL, top_global, ir,
                          &ems->s, &ems->f,
 -                        mdatoms, top, fr, vsite, NULL, constr,
 +                        mdatoms, top, fr, vsite, constr,
                          nrnb, wcycle, FALSE);
      dd_store_state(cr->dd, &ems->s);
  }
  
 +//! De one energy evaluation
  static void evaluate_energy(FILE *fplog, t_commrec *cr,
                              gmx_mtop_t *top_global,
                              em_state_t *ems, gmx_localtop_t *top,
               ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
               GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
               GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
 -             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 +             (bNS ? GMX_FORCE_NS : 0));
  
      /* Clear the unused shake virial and pressure */
      clear_mat(shake_vir);
      {
          wallcycle_start(wcycle, ewcMoveE);
  
 -        global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
 +        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
                      inputrec, NULL, NULL, NULL, 1, &terminate,
 -                    top_global, &ems->s, FALSE,
 +                    NULL, FALSE,
                      CGLO_ENERGY |
                      CGLO_PRESSURE |
                      CGLO_CONSTRAINT);
      }
  
      /* Calculate long range corrections to pressure and energy */
 -    calc_dispcorr(inputrec, fr, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
 +    calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW],
                    pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
      enerd->term[F_DISPCORR] = enercorr;
      enerd->term[F_EPOT]    += enercorr;
      }
  }
  
 +//! Parallel utility summing energies and forces
  static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 -                              gmx_mtop_t *mtop,
 +                              gmx_mtop_t *top_global,
                                em_state_t *s_min, em_state_t *s_b)
  {
      rvec          *fm, *fb, *fmg;
       * This conflicts with the spirit of domain decomposition,
       * but to fully optimize this a much more complicated algorithm is required.
       */
 -    snew(fmg, mtop->natoms);
 +    snew(fmg, top_global->natoms);
  
      ncg   = s_min->s.ncg_gl;
      cg_gl = s_min->s.cg_gl;
              i++;
          }
      }
 -    gmx_sum(mtop->natoms*3, fmg[0], cr);
 +    gmx_sum(top_global->natoms*3, fmg[0], cr);
  
      /* Now we will determine the part of the sum for the cgs in state s_b */
      ncg         = s_b->s.ncg_gl;
      partsum     = 0;
      i           = 0;
      gf          = 0;
 -    grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 +    grpnrFREEZE = top_global->groups.grpnr[egcFREEZE];
      for (c = 0; c < ncg; c++)
      {
          cg = cg_gl[c];
      return partsum;
  }
  
 +//! Print some stuff, like beta, whatever that means.
  static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 -                    gmx_mtop_t *mtop,
 +                    gmx_mtop_t *top_global,
                      em_state_t *s_min, em_state_t *s_b)
  {
      rvec  *fm, *fb;
      else
      {
          /* We need to reorder cgs while summing */
 -        sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
 +        sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b);
      }
      if (PAR(cr))
      {
          gmx_sumd(1, &sum, cr);
      }
  
 -    return sum/sqr(s_min->fnorm);
 +    return sum/gmx::square(s_min->fnorm);
  }
  
 +namespace gmx
 +{
 +
 +/*! \brief Do conjugate gradients minimization
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           gmx_membed_t gmx_unused *membed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_cg(FILE *fplog, t_commrec *cr,
               int nfile, const t_filenm fnm[],
 -             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
 +             const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
               int gmx_unused nstglobalcomm,
               gmx_vsite_t *vsite, gmx_constr_t constr,
               int gmx_unused stepout,
               gmx_edsam_t gmx_unused ed,
               t_forcerec *fr,
               int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -             gmx_membed_t gmx_unused membed,
 +             gmx_membed_t gmx_unused *membed,
               real gmx_unused cpt_period, real gmx_unused max_hours,
               int imdport,
               unsigned long gmx_unused Flags,
      rvec             *f;
      gmx_global_stat_t gstat;
      t_graph          *graph;
 -    rvec             *f_global, *p, *sf;
 +    rvec             *p, *sf;
      double            gpa, gpb, gpc, tmp, minstep;
      real              fnormn;
      real              stepsize;
  
      /* Init em and store the local state in s_min */
      init_em(fplog, CG, cr, inputrec,
 -            state_global, top_global, s_min, &top, &f, &f_global,
 +            state_global, top_global, s_min, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
  
                     mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
                     NULL, NULL, vir, pres, NULL, mu_tot, constr);
  
 -        print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
 +        print_ebin_header(fplog, step, step);
          print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
 -                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
      where();
  
       * we either converge or reach the max number of steps.
       */
      converged = FALSE;
 -    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
 +    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* start taking steps in a new direction
  
          write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
                        top_global, inputrec, step,
 -                      s_min, state_global, f_global);
 +                      s_min, state_global);
  
          /* Take a step downhill.
           * In theory, we should minimize the function along this direction.
                  fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
                          step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
                          s_min->fmax, s_min->a_fmax+1);
 +                fflush(stderr);
              }
              /* Store the new (lower) energies */
              upd_mdebin(mdebin, FALSE, FALSE, (double)step,
  
              if (do_log)
              {
 -                print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
 +                print_ebin_header(fplog, step, step);
              }
              print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
                         do_log ? fplog : NULL, step, step, eprNORMAL,
 -                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
  
          /* Send energies and positions to the IMD client if bIMD is TRUE. */
           */
          converged = converged || (s_min->fmax < inputrec->em_tol);
  
 -    } /* End of the loop */
 +    }   /* End of the loop */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
          if (!do_log)
          {
              /* Write final value to log since we didn't do anything the last step */
 -            print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
 +            print_ebin_header(fplog, step, step);
          }
          if (!do_ene || !do_log)
          {
              /* Write final energy file entries */
              print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
                         !do_log ? fplog : NULL, step, step, eprNORMAL,
 -                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
      }
  
  
      write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, step,
 -                  s_min, state_global, f_global);
 +                  s_min, state_global);
  
  
      if (MASTER(cr))
      walltime_accounting_set_nsteps_done(walltime_accounting, step);
  
      return 0;
 -} /* That's all folks */
 -
 -
 +}   /* That's all folks */
 +
 +
 +/*! \brief Do L-BFGS conjugate gradients minimization
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_lbfgs(FILE *fplog, t_commrec *cr,
                  int nfile, const t_filenm fnm[],
 -                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
 +                const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
                  int gmx_unused nstglobalcomm,
                  gmx_vsite_t *vsite, gmx_constr_t constr,
                  int gmx_unused stepout,
                  t_inputrec *inputrec,
                  gmx_mtop_t *top_global, t_fcdata *fcd,
 -                t_state *state,
 +                t_state *state_global,
                  t_mdatoms *mdatoms,
                  t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                  gmx_edsam_t gmx_unused ed,
                  t_forcerec *fr,
                  int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -                gmx_membed_t gmx_unused membed,
 +                gmx_membed_t gmx_unused *membed,
                  real gmx_unused cpt_period, real gmx_unused max_hours,
                  int imdport,
                  unsigned long gmx_unused Flags,
      rvec              *f;
      gmx_global_stat_t  gstat;
      t_graph           *graph;
 -    rvec              *f_global;
      int                ncorr, nmaxcorr, point, cp, neval, nminstep;
      double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
      real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
          gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
      }
  
 -    n        = 3*state->natoms;
 +    n        = 3*state_global->natoms;
      nmaxcorr = inputrec->nbfgscorr;
  
      /* Allocate memory */
  
      /* Init em */
      init_em(fplog, LBFGS, cr, inputrec,
 -            state, top_global, &ems, &top, &f, &f_global,
 +            state_global, top_global, &ems, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
      /* Do_lbfgs is not completely updated like do_steep and do_cg,
      sfree(ems.s.x);
      sfree(ems.f);
  
 -    xx = (real *)state->x;
 +    xx = (real *)state_global->x;
      ff = (real *)f;
  
      start = 0;
  
      if (vsite)
      {
 -        construct_vsites(vsite, state->x, 1, NULL,
 +        construct_vsites(vsite, state_global->x, 1, NULL,
                           top->idef.iparams, top->idef.il,
 -                         fr->ePBC, fr->bMolPBC, cr, state->box);
 +                         fr->ePBC, fr->bMolPBC, cr, state_global->box);
      }
  
      /* Call the force routine and some auxiliary (neighboursearching etc.) */
       * We do not unshift, so molecules are always whole
       */
      neval++;
 -    ems.s.x = state->x;
 +    ems.s.x = state_global->x;
      ems.f   = f;
      evaluate_energy(fplog, cr,
                      top_global, &ems, top,
      {
          /* Copy stuff to the energy bin for easy printing etc. */
          upd_mdebin(mdebin, FALSE, FALSE, (double)step,
 -                   mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
 +                   mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
                     NULL, NULL, vir, pres, NULL, mu_tot, constr);
  
 -        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
 +        print_ebin_header(fplog, step, step);
          print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
 -                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
      where();
  
  
      if (MASTER(cr))
      {
 -        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
 +        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
          fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
          fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
          fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
  
      /* Set the gradient from the force */
      converged = FALSE;
 -    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
 +    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* Write coordinates if necessary */
          }
  
          mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
 -                                         top_global, step, (real)step, state, state, f, f);
 +                                         top_global, step, (real)step, state_global, state_global, f);
  
          /* Do the linesearching in the direction dx[point][0..(n-1)] */
  
          {
              if (bVerbose)
              {
 -                double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
 +                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
                  fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
                          step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1);
 +                fflush(stderr);
              }
              /* Store the new (lower) energies */
              upd_mdebin(mdebin, FALSE, FALSE, (double)step,
 -                       mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
 +                       mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
                         NULL, NULL, vir, pres, NULL, mu_tot, constr);
              do_log = do_per_step(step, inputrec->nstlog);
              do_ene = do_per_step(step, inputrec->nstenergy);
              if (do_log)
              {
 -                print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
 +                print_ebin_header(fplog, step, step);
              }
              print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
                         do_log ? fplog : NULL, step, step, eprNORMAL,
 -                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
  
          /* Send x and E to IMD client, if bIMD is TRUE. */
 -        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr))
 +        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
          {
              IMD_send_positions(inputrec->imd);
          }
           */
          converged = converged || (fmax < inputrec->em_tol);
  
 -    } /* End of the loop */
 +    }   /* End of the loop */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
       */
      if (!do_log) /* Write final value to log since we didn't do anythin last step */
      {
 -        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
 +        print_ebin_header(fplog, step, step);
      }
      if (!do_ene || !do_log) /* Write final energy file entries */
      {
          print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
                     !do_log ? fplog : NULL, step, step, eprNORMAL,
 -                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
  
      /* Print some stuff... */
      do_f = !do_per_step(step, inputrec->nstfout);
      write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, step,
 -                  &ems, state, f);
 +                  &ems, state_global);
  
      if (MASTER(cr))
      {
 -        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
 +        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
          print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
                          number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
          print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
      walltime_accounting_set_nsteps_done(walltime_accounting, step);
  
      return 0;
 -} /* That's all folks */
 -
 -
 +}   /* That's all folks */
 +
 +/*! \brief Do steepest descents minimization
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_steep(FILE *fplog, t_commrec *cr,
                  int nfile, const t_filenm fnm[],
 -                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
 +                const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
                  int gmx_unused nstglobalcomm,
                  gmx_vsite_t *vsite, gmx_constr_t constr,
                  int gmx_unused stepout,
                  gmx_edsam_t gmx_unused  ed,
                  t_forcerec *fr,
                  int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -                gmx_membed_t gmx_unused membed,
 +                gmx_membed_t gmx_unused *membed,
                  real gmx_unused cpt_period, real gmx_unused max_hours,
                  int imdport,
                  unsigned long gmx_unused Flags,
  {
      const char       *SD = "Steepest Descents";
      em_state_t       *s_min, *s_try;
 -    rvec             *f_global;
      gmx_localtop_t   *top;
      gmx_enerdata_t   *enerd;
      rvec             *f;
  
      /* Init em and store the local state in s_try */
      init_em(fplog, SD, cr, inputrec,
 -            state_global, top_global, s_try, &top, &f, &f_global,
 +            state_global, top_global, s_try, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
  
          bAbort = (nsteps >= 0) && (count == nsteps);
  
          /* set new coordinates, except for first step */
 +        bool validStep = true;
          if (count > 0)
          {
 -            do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
 -                       s_min, stepsize, s_min->f, s_try,
 -                       constr, top, nrnb, wcycle, count);
 +            validStep =
 +                do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
 +                           s_min, stepsize, s_min->f, s_try,
 +                           constr, top, nrnb, wcycle, count);
          }
  
 -        evaluate_energy(fplog, cr,
 -                        top_global, s_try, top,
 -                        inputrec, nrnb, wcycle, gstat,
 -                        vsite, constr, fcd, graph, mdatoms, fr,
 -                        mu_tot, enerd, vir, pres, count, count == 0);
 +        if (validStep)
 +        {
 +            evaluate_energy(fplog, cr,
 +                            top_global, s_try, top,
 +                            inputrec, nrnb, wcycle, gstat,
 +                            vsite, constr, fcd, graph, mdatoms, fr,
 +                            mu_tot, enerd, vir, pres, count, count == 0);
 +        }
 +        else
 +        {
 +            // Signal constraint error during stepping with energy=inf
 +            s_try->epot = std::numeric_limits<real>::infinity();
 +        }
  
          if (MASTER(cr))
          {
 -            print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
 +            print_ebin_header(fplog, count, count);
          }
  
          if (count == 0)
                  fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
                          count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
                          ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
 +                fflush(stderr);
              }
  
              if ( (count == 0) || (s_try->epot < s_min->epot) )
                  print_ebin(mdoutf_get_fp_ene(outf), TRUE,
                             do_per_step(steps_accepted, inputrec->nstdisreout),
                             do_per_step(steps_accepted, inputrec->nstorireout),
 -                           fplog, count, count, eprNORMAL, TRUE,
 +                           fplog, count, count, eprNORMAL,
                             mdebin, fcd, &(top_global->groups), &(inputrec->opts));
                  fflush(fplog);
              }
              do_f = do_per_step(steps_accepted, inputrec->nstfout);
              write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
                            top_global, inputrec, count,
 -                          s_min, state_global, f_global);
 +                          s_min, state_global);
          }
          else
          {
          stepsize = ustep/s_min->fmax;
  
          /* Check if stepsize is too small, with 1 nm as a characteristic length */
 -#ifdef GMX_DOUBLE
 +#if GMX_DOUBLE
          if (count == nsteps || ustep < 1e-12)
  #else
          if (count == nsteps || ustep < 1e-6)
          }
  
          count++;
 -    } /* End of the loop  */
 +    }   /* End of the loop  */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
      }
      write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, count,
 -                  s_min, state_global, f_global);
 +                  s_min, state_global);
  
      if (MASTER(cr))
      {
      walltime_accounting_set_nsteps_done(walltime_accounting, count);
  
      return 0;
 -} /* That's all folks */
 -
 -
 +}   /* That's all folks */
 +
 +/*! \brief Do normal modes analysis
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_nm(FILE *fplog, t_commrec *cr,
               int nfile, const t_filenm fnm[],
 -             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused  bCompact,
 +             const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
               int gmx_unused nstglobalcomm,
               gmx_vsite_t *vsite, gmx_constr_t constr,
               int gmx_unused stepout,
               gmx_edsam_t  gmx_unused ed,
               t_forcerec *fr,
               int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -             gmx_membed_t gmx_unused membed,
 +             gmx_membed_t gmx_unused *membed,
               real gmx_unused cpt_period, real gmx_unused max_hours,
               int imdport,
               unsigned long gmx_unused Flags,
  {
      const char          *NM = "Normal Mode Analysis";
      gmx_mdoutf_t         outf;
 -    int                  natoms, atom, d;
      int                  nnodes, node;
 -    rvec                *f_global;
      gmx_localtop_t      *top;
      gmx_enerdata_t      *enerd;
      rvec                *f;
      rvec                 mu_tot;
      rvec                *fneg, *dfdx;
      gmx_bool             bSparse; /* use sparse matrix storage format */
 -    size_t               sz = 0;
 +    size_t               sz;
      gmx_sparsematrix_t * sparse_matrix           = NULL;
      real           *     full_matrix             = NULL;
      em_state_t       *   state_work;
  
      /* added with respect to mdrun */
 -    int        i, j, k, row, col;
 -    real       der_range = 10.0*sqrt(GMX_REAL_EPS);
 -    real       x_min;
 -    bool       bIsMaster = MASTER(cr);
 +    int                       row, col;
 +    real                      der_range = 10.0*sqrt(GMX_REAL_EPS);
 +    real                      x_min;
 +    bool                      bIsMaster = MASTER(cr);
  
      if (constr != NULL)
      {
      /* Init em and store the local state in state_minimum */
      init_em(fplog, NM, cr, inputrec,
              state_global, top_global, state_work, &top,
 -            &f, &f_global,
 +            &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, NULL, imdport, Flags, wcycle);
  
 -    natoms = top_global->natoms;
 -    snew(fneg, natoms);
 -    snew(dfdx, natoms);
 +    gmx_shellfc_t *shellfc = init_shell_flexcon(stdout,
 +                                                top_global,
 +                                                n_flexible_constraints(constr),
 +                                                inputrec->nstcalcenergy,
 +                                                DOMAINDECOMP(cr));
  
 -#ifndef GMX_DOUBLE
 +    if (shellfc)
 +    {
 +        make_local_shells(cr, mdatoms, shellfc);
 +    }
 +    std::vector<size_t> atom_index = get_atom_index(top_global);
 +    snew(fneg, atom_index.size());
 +    snew(dfdx, atom_index.size());
 +
 +#if !GMX_DOUBLE
      if (bIsMaster)
      {
          fprintf(stderr,
          md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
          bSparse = FALSE;
      }
 -    else if (top_global->natoms < 1000)
 +    else if (atom_index.size() < 1000)
      {
 -        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
 +        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", atom_index.size());
          bSparse = FALSE;
      }
      else
          bSparse = TRUE;
      }
  
 -    if (bIsMaster)
 -    {
 -        sz = DIM*top_global->natoms;
 +    /* Number of dimensions, based on real atoms, that is not vsites or shell */
 +    sz = DIM*atom_index.size();
  
 -        fprintf(stderr, "Allocating Hessian memory...\n\n");
 +    fprintf(stderr, "Allocating Hessian memory...\n\n");
  
 -        if (bSparse)
 -        {
 -            sparse_matrix = gmx_sparsematrix_init(sz);
 -            sparse_matrix->compressed_symmetric = TRUE;
 -        }
 -        else
 -        {
 -            snew(full_matrix, sz*sz);
 -        }
 +    if (bSparse)
 +    {
 +        sparse_matrix = gmx_sparsematrix_init(sz);
 +        sparse_matrix->compressed_symmetric = TRUE;
 +    }
 +    else
 +    {
 +        snew(full_matrix, sz*sz);
      }
  
      init_nrnb(nrnb);
      print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
  
      /* fudge nr of steps to nr of atoms */
 -    inputrec->nsteps = natoms*2;
 +    inputrec->nsteps = atom_index.size()*2;
  
      if (bIsMaster)
      {
       ************************************************************/
  
      /* Steps are divided one by one over the nodes */
 -    for (atom = cr->nodeid; atom < natoms; atom += nnodes)
 +    bool bNS = true;
 +    for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes)
      {
 -
 -        for (d = 0; d < DIM; d++)
 +        size_t atom = atom_index[aid];
 +        for (size_t d = 0; d < DIM; d++)
          {
 -            x_min = state_work->s.x[atom][d];
 +            gmx_bool    bBornRadii  = FALSE;
 +            gmx_int64_t step        = 0;
 +            int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
 +            double      t           = 0;
  
 -            state_work->s.x[atom][d] = x_min - der_range;
 -
 -            /* Make evaluate_energy do a single node force calculation */
 -            cr->nnodes = 1;
 -            evaluate_energy(fplog, cr,
 -                            top_global, state_work, top,
 -                            inputrec, nrnb, wcycle, gstat,
 -                            vsite, constr, fcd, graph, mdatoms, fr,
 -                            mu_tot, enerd, vir, pres, atom*2, FALSE);
 +            x_min = state_work->s.x[atom][d];
  
 -            for (i = 0; i < natoms; i++)
 +            for (unsigned int dx = 0; (dx < 2); dx++)
              {
 -                copy_rvec(state_work->f[i], fneg[i]);
 -            }
 +                if (dx == 0)
 +                {
 +                    state_work->s.x[atom][d] = x_min - der_range;
 +                }
 +                else
 +                {
 +                    state_work->s.x[atom][d] = x_min + der_range;
 +                }
  
 -            state_work->s.x[atom][d] = x_min + der_range;
 +                /* Make evaluate_energy do a single node force calculation */
 +                cr->nnodes = 1;
 +                if (shellfc)
 +                {
 +                    /* Now is the time to relax the shells */
 +                    (void) relax_shell_flexcon(fplog, cr, bVerbose, step,
 +                                               inputrec, bNS, force_flags,
 +                                               top,
 +                                               constr, enerd, fcd,
 +                                               &state_work->s, state_work->f, vir, mdatoms,
 +                                               nrnb, wcycle, graph, &top_global->groups,
 +                                               shellfc, fr, bBornRadii, t, mu_tot,
 +                                               vsite, NULL);
 +                    bNS = false;
 +                    step++;
 +                }
 +                else
 +                {
 +                    evaluate_energy(fplog, cr,
 +                                    top_global, state_work, top,
 +                                    inputrec, nrnb, wcycle, gstat,
 +                                    vsite, constr, fcd, graph, mdatoms, fr,
 +                                    mu_tot, enerd, vir, pres, atom*2+dx, FALSE);
 +                }
  
 -            evaluate_energy(fplog, cr,
 -                            top_global, state_work, top,
 -                            inputrec, nrnb, wcycle, gstat,
 -                            vsite, constr, fcd, graph, mdatoms, fr,
 -                            mu_tot, enerd, vir, pres, atom*2+1, FALSE);
 -            cr->nnodes = nnodes;
 +                cr->nnodes = nnodes;
 +
 +                if (dx == 0)
 +                {
 +                    for (size_t i = 0; i < atom_index.size(); i++)
 +                    {
 +                        copy_rvec(state_work->f[atom_index[i]], fneg[i]);
 +                    }
 +                }
 +            }
  
              /* x is restored to original */
              state_work->s.x[atom][d] = x_min;
  
 -            for (j = 0; j < natoms; j++)
 +            for (size_t j = 0; j < atom_index.size(); j++)
              {
 -                for (k = 0; (k < DIM); k++)
 +                for (size_t k = 0; (k < DIM); k++)
                  {
                      dfdx[j][k] =
 -                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
 +                        -(state_work->f[atom_index[j]][k] - fneg[j][k])/(2*der_range);
                  }
              }
  
              if (!bIsMaster)
              {
 -#ifdef GMX_MPI
 -#ifdef GMX_DOUBLE
 -#define mpi_type MPI_DOUBLE
 -#else
 -#define mpi_type MPI_FLOAT
 -#endif
 -                MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
 -                         cr->mpi_comm_mygroup);
 +#if GMX_MPI
 +#define mpi_type GMX_MPI_REAL
 +                MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),
 +                         cr->nodeid, cr->mpi_comm_mygroup);
  #endif
              }
              else
              {
 -                for (node = 0; (node < nnodes && atom+node < natoms); node++)
 +                for (node = 0; (node < nnodes && atom+node < atom_index.size()); node++)
                  {
                      if (node > 0)
                      {
 -#ifdef GMX_MPI
 +#if GMX_MPI
                          MPI_Status stat;
 -                        MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
 +                        MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,
                                   cr->mpi_comm_mygroup, &stat);
  #undef mpi_type
  #endif
  
                      row = (atom + node)*DIM + d;
  
 -                    for (j = 0; j < natoms; j++)
 +                    for (size_t j = 0; j < atom_index.size(); j++)
                      {
 -                        for (k = 0; k < DIM; k++)
 +                        for (size_t k = 0; k < DIM; k++)
                          {
                              col = j*DIM + k;
  
          if (bIsMaster && bVerbose)
          {
              fprintf(stderr, "\rFinished step %d out of %d",
 -                    std::min(atom+nnodes, natoms), natoms);
 +                    static_cast<int>(std::min(atom+nnodes, atom_index.size())),
 +                    static_cast<int>(atom_index.size()));
              fflush(stderr);
          }
      }
  
      finish_em(cr, outf, walltime_accounting, wcycle);
  
 -    walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2);
 +    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);
  
      return 0;
  }
 +
 +} // namespace gmx
index 0a65e04d0b396f68d1ab5fbb81558fe750e8ce76,f6f86195667019bebd416e1026c1704468b988e9..cc01f266c007e30ea5691bdc99942026f6c9950c
   */
  #include "gmxpre.h"
  
 -#include "gromacs/legacyheaders/update.h"
 +#include "update.h"
  
  #include <math.h>
  #include <stdio.h>
  
  #include <algorithm>
  
 +#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/fileio/confio.h"
 -#include "gromacs/legacyheaders/constr.h"
 -#include "gromacs/legacyheaders/disre.h"
 -#include "gromacs/legacyheaders/force.h"
 -#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 -#include "gromacs/legacyheaders/macros.h"
 -#include "gromacs/legacyheaders/mdrun.h"
 -#include "gromacs/legacyheaders/names.h"
 -#include "gromacs/legacyheaders/nrnb.h"
 -#include "gromacs/legacyheaders/orires.h"
 -#include "gromacs/legacyheaders/tgroup.h"
 -#include "gromacs/legacyheaders/txtdump.h"
 -#include "gromacs/legacyheaders/typedefs.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/gmxlib/network.h"
 +#include "gromacs/gmxlib/nrnb.h"
 +#include "gromacs/listed-forces/disre.h"
 +#include "gromacs/listed-forces/orires.h"
 +#include "gromacs/math/functions.h"
 +#include "gromacs/math/invertmatrix.h"
  #include "gromacs/math/units.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/math/vecdump.h"
 +#include "gromacs/mdlib/constr.h"
 +#include "gromacs/mdlib/force.h"
 +#include "gromacs/mdlib/gmx_omp_nthreads.h"
 +#include "gromacs/mdlib/mdrun.h"
 +#include "gromacs/mdlib/sim_util.h"
 +#include "gromacs/mdlib/tgroup.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/group.h"
 +#include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/pbcutil/boxutilities.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/pulling/pull.h"
 -#include "gromacs/random/random.h"
 +#include "gromacs/random/tabulatednormaldistribution.h"
 +#include "gromacs/random/threefry.h"
  #include "gromacs/timing/wallcycle.h"
 +#include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/futil.h"
 +#include "gromacs/utility/gmxassert.h"
  #include "gromacs/utility/gmxomp.h"
  #include "gromacs/utility/smalloc.h"
  
  /*#define STARTFROMDT2*/
  
  typedef struct {
 -    double gdt;
 -    double eph;
 -    double emh;
      double em;
 -    double b;
 -    double c;
 -    double d;
  } gmx_sd_const_t;
  
  typedef struct {
      real V;
 -    real X;
 -    real Yv;
 -    real Yx;
  } gmx_sd_sigma_t;
  
  typedef struct {
      /* SD stuff */
      gmx_sd_const_t *sdc;
      gmx_sd_sigma_t *sdsig;
 -    rvec           *sd_V;
 -    int             sd_V_nalloc;
      /* andersen temperature control stuff */
      gmx_bool       *randomize_group;
      real           *boltzfac;
  } gmx_stochd_t;
  
 -typedef struct gmx_update
 +struct gmx_update_t
  {
      gmx_stochd_t *sd;
      /* xprime for constraint algorithms */
      /* Variables for the deform algorithm */
      gmx_int64_t     deformref_step;
      matrix          deformref_box;
 -} t_gmx_update;
 +};
  
  
- static void do_update_md(int start, int nrend, double dt,
+ static void do_update_md(int start, int nrend,
+                          double dt, int nstpcouple,
                           t_grp_tcstat *tcstat,
                           double nh_vxi[],
                           gmx_bool bNEMD, t_grp_acc *gstat, rvec accel[],
                  if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
                  {
                      vnrel = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
-                                               - iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
+                                               - nstpcouple*iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
                      /* do not scale the mean velocities u */
                      vn             = gstat[ga].u[d] + accel[ga][d]*dt + vnrel;
                      v[n][d]        = vn;
@@@ -272,7 -274,7 +273,7 @@@ static void do_update_vv_vel(int start
      {
          g        = 0.25*dt*veta*alpha;
          mv1      = exp(-g);
 -        mv2      = series_sinhx(g);
 +        mv2      = gmx::series_sinhx(g);
      }
      else
      {
@@@ -320,7 -322,7 +321,7 @@@ static void do_update_vv_pos(int start
      {
          g        = 0.5*dt*veta;
          mr1      = exp(g);
 -        mr2      = series_sinhx(g);
 +        mr2      = gmx::series_sinhx(g);
      }
      else
      {
      }
  } /* do_update_vv_pos */
  
- static void do_update_visc(int start, int nrend, double dt,
+ static void do_update_visc(int start, int nrend,
+                            double dt, int nstpcouple,
                             t_grp_tcstat *tcstat,
                             double nh_vxi[],
                             real invmass[],
                  if ((ptype[n] != eptVSite) && (ptype[n] != eptShell))
                  {
                      vn  = (lg*vrel[d] + dt*(imass*f[n][d] - 0.5*vxi*vrel[d]
-                                             - iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
+                                             - nstpcouple*iprod(M[d], vrel)))/(1 + 0.5*vxi*dt);
                      if (d == XX)
                      {
                          vn += vc + dt*cosz*cos_accel;
      }
  }
  
 -static gmx_stochd_t *init_stochd(t_inputrec *ir)
 +static gmx_stochd_t *init_stochd(const t_inputrec *ir)
  {
      gmx_stochd_t   *sd;
 -    gmx_sd_const_t *sdc;
 -    int             ngtc, n;
 -    real            y;
  
      snew(sd, 1);
  
 -    ngtc = ir->opts.ngtc;
 +    const t_grpopts *opts = &ir->opts;
 +    int              ngtc = opts->ngtc;
  
      if (ir->eI == eiBD)
      {
          snew(sd->sdc, ngtc);
          snew(sd->sdsig, ngtc);
  
 -        sdc = sd->sdc;
 -        for (n = 0; n < ngtc; n++)
 +        gmx_sd_const_t *sdc = sd->sdc;
 +
 +        for (int gt = 0; gt < ngtc; gt++)
          {
 -            if (ir->opts.tau_t[n] > 0)
 +            if (opts->tau_t[gt] > 0)
              {
 -                sdc[n].gdt = ir->delta_t/ir->opts.tau_t[n];
 -                sdc[n].eph = exp(sdc[n].gdt/2);
 -                sdc[n].emh = exp(-sdc[n].gdt/2);
 -                sdc[n].em  = exp(-sdc[n].gdt);
 +                sdc[gt].em  = exp(-ir->delta_t/opts->tau_t[gt]);
              }
              else
              {
                  /* No friction and noise on this group */
 -                sdc[n].gdt = 0;
 -                sdc[n].eph = 1;
 -                sdc[n].emh = 1;
 -                sdc[n].em  = 1;
 -            }
 -            if (sdc[n].gdt >= 0.05)
 -            {
 -                sdc[n].b = sdc[n].gdt*(sdc[n].eph*sdc[n].eph - 1)
 -                    - 4*(sdc[n].eph - 1)*(sdc[n].eph - 1);
 -                sdc[n].c = sdc[n].gdt - 3 + 4*sdc[n].emh - sdc[n].em;
 -                sdc[n].d = 2 - sdc[n].eph - sdc[n].emh;
 -            }
 -            else
 -            {
 -                y = sdc[n].gdt/2;
 -                /* Seventh order expansions for small y */
 -                sdc[n].b = y*y*y*y*(1/3.0+y*(1/3.0+y*(17/90.0+y*7/9.0)));
 -                sdc[n].c = y*y*y*(2/3.0+y*(-1/2.0+y*(7/30.0+y*(-1/12.0+y*31/1260.0))));
 -                sdc[n].d = y*y*(-1+y*y*(-1/12.0-y*y/360.0));
 -            }
 -            if (debug)
 -            {
 -                fprintf(debug, "SD const tc-grp %d: b %g  c %g  d %g\n",
 -                        n, sdc[n].b, sdc[n].c, sdc[n].d);
 +                sdc[gt].em  = 1;
              }
          }
      }
      else if (ETC_ANDERSEN(ir->etc))
      {
 -        int        ngtc;
 -        t_grpopts *opts;
 -        real       reft;
 -
 -        opts = &ir->opts;
 -        ngtc = opts->ngtc;
 -
          snew(sd->randomize_group, ngtc);
          snew(sd->boltzfac, ngtc);
  
          /* for now, assume that all groups, if randomized, are randomized at the same rate, i.e. tau_t is the same. */
          /* since constraint groups don't necessarily match up with temperature groups! This is checked in readir.c */
  
 -        for (n = 0; n < ngtc; n++)
 +        for (int gt = 0; gt < ngtc; gt++)
          {
 -            reft = std::max<real>(0, opts->ref_t[n]);
 -            if ((opts->tau_t[n] > 0) && (reft > 0))  /* tau_t or ref_t = 0 means that no randomization is done */
 +            real reft = std::max<real>(0, opts->ref_t[gt]);
 +            if ((opts->tau_t[gt] > 0) && (reft > 0))  /* tau_t or ref_t = 0 means that no randomization is done */
              {
 -                sd->randomize_group[n] = TRUE;
 -                sd->boltzfac[n]        = BOLTZ*opts->ref_t[n];
 +                sd->randomize_group[gt] = TRUE;
 +                sd->boltzfac[gt]        = BOLTZ*opts->ref_t[gt];
              }
              else
              {
 -                sd->randomize_group[n] = FALSE;
 +                sd->randomize_group[gt] = FALSE;
              }
          }
      }
 +
      return sd;
  }
  
 -gmx_update_t init_update(t_inputrec *ir)
 +void update_temperature_constants(gmx_update_t *upd, const t_inputrec *ir)
 +{
 +    if (ir->eI == eiBD)
 +    {
 +        if (ir->bd_fric != 0)
 +        {
 +            for (int gt = 0; gt < ir->opts.ngtc; gt++)
 +            {
 +                upd->sd->bd_rf[gt] = std::sqrt(2.0*BOLTZ*ir->opts.ref_t[gt]/(ir->bd_fric*ir->delta_t));
 +            }
 +        }
 +        else
 +        {
 +            for (int gt = 0; gt < ir->opts.ngtc; gt++)
 +            {
 +                upd->sd->bd_rf[gt] = std::sqrt(2.0*BOLTZ*ir->opts.ref_t[gt]);
 +            }
 +        }
 +    }
 +    if (ir->eI == eiSD1)
 +    {
 +        for (int gt = 0; gt < ir->opts.ngtc; gt++)
 +        {
 +            real kT = BOLTZ*ir->opts.ref_t[gt];
 +            /* The mass is accounted for later, since this differs per atom */
 +            upd->sd->sdsig[gt].V  = std::sqrt(kT*(1 - upd->sd->sdc[gt].em*upd->sd->sdc[gt].em));
 +        }
 +    }
 +}
 +
 +gmx_update_t *init_update(const t_inputrec *ir)
  {
 -    t_gmx_update *upd;
 +    gmx_update_t *upd;
  
      snew(upd, 1);
  
          upd->sd    = init_stochd(ir);
      }
  
 +    update_temperature_constants(upd, ir);
 +
      upd->xp        = NULL;
      upd->xp_nalloc = 0;
  
      return upd;
  }
  
 +void update_realloc(gmx_update_t *upd, int state_nalloc)
 +{
 +    GMX_ASSERT(upd, "upd must be allocated before its fields can be reallocated");
 +    if (state_nalloc > upd->xp_nalloc)
 +    {
 +        upd->xp_nalloc = state_nalloc;
 +        /* We need to allocate one element extra, since we might use
 +         * (unaligned) 4-wide SIMD loads to access rvec entries. */
 +        srenew(upd->xp, upd->xp_nalloc + 1);
 +    }
 +}
 +
  static void do_update_sd1(gmx_stochd_t *sd,
                            int start, int nrend, double dt,
                            rvec accel[], ivec nFreeze[],
                            unsigned short cFREEZE[], unsigned short cACC[],
                            unsigned short cTC[],
                            rvec x[], rvec xprime[], rvec v[], rvec f[],
 -                          int ngtc, real ref_t[],
                            gmx_bool bDoConstr,
                            gmx_bool bFirstHalfConstr,
                            gmx_int64_t step, int seed, int* gatindex)
  {
      gmx_sd_const_t *sdc;
      gmx_sd_sigma_t *sig;
 -    real            kT;
      int             gf = 0, ga = 0, gt = 0;
      real            ism;
      int             n, d;
  
 +    // Even 0 bits internal counter gives 2x64 ints (more than enough for three table lookups)
 +    gmx::ThreeFry2x64<0> rng(seed, gmx::RandomDomain::UpdateCoordinates);
 +    gmx::TabulatedNormalDistribution<real, 14> dist;
 +
      sdc = sd->sdc;
      sig = sd->sdsig;
  
 -    for (n = 0; n < ngtc; n++)
 -    {
 -        kT = BOLTZ*ref_t[n];
 -        /* The mass is encounted for later, since this differs per atom */
 -        sig[n].V  = sqrt(kT*(1 - sdc[n].em*sdc[n].em));
 -    }
 -
      if (!bDoConstr)
      {
          for (n = start; n < nrend; n++)
          {
 -            real rnd[3];
              int  ng = gatindex ? gatindex[n] : n;
  
 -            ism = sqrt(invmass[n]);
 +            rng.restart(step, ng);
 +            dist.reset();
 +
 +            ism = std::sqrt(invmass[n]);
 +
              if (cFREEZE)
              {
                  gf  = cFREEZE[n];
                  gt  = cTC[n];
              }
  
 -            gmx_rng_cycle_3gaussian_table(step, ng, seed, RND_SEED_UPDATE, rnd);
 -
              for (d = 0; d < DIM; d++)
              {
                  if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
                  {
                      real sd_V, vn;
  
 -                    sd_V         = ism*sig[gt].V*rnd[d];
 +                    sd_V         = ism*sig[gt].V*dist(rng);
                      vn           = v[n][d] + (invmass[n]*f[n][d] + accel[ga][d])*dt;
                      v[n][d]      = vn*sdc[gt].em + sd_V;
                      /* Here we include half of the friction+noise
              /* Update friction and noise only */
              for (n = start; n < nrend; n++)
              {
 -                real rnd[3];
                  int  ng = gatindex ? gatindex[n] : n;
  
 -                ism = sqrt(invmass[n]);
 +                rng.restart(step, ng);
 +                dist.reset();
 +
 +                ism = std::sqrt(invmass[n]);
 +
                  if (cFREEZE)
                  {
                      gf  = cFREEZE[n];
                      gt  = cTC[n];
                  }
  
 -                gmx_rng_cycle_3gaussian_table(step, ng, seed, RND_SEED_UPDATE, rnd);
 -
                  for (d = 0; d < DIM; d++)
                  {
                      if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
                      {
                          real sd_V, vn;
  
 -                        sd_V         = ism*sig[gt].V*rnd[d];
 +                        sd_V         = ism*sig[gt].V*dist(rng);
                          vn           = v[n][d];
                          v[n][d]      = vn*sdc[gt].em + sd_V;
                          /* Add the friction and noise contribution only */
      }
  }
  
 -static void check_sd2_work_data_allocation(gmx_stochd_t *sd, int nrend)
 -{
 -    if (nrend > sd->sd_V_nalloc)
 -    {
 -        sd->sd_V_nalloc = over_alloc_dd(nrend);
 -        srenew(sd->sd_V, sd->sd_V_nalloc);
 -    }
 -}
 -
 -static void do_update_sd2_Tconsts(gmx_stochd_t *sd,
 -                                  int           ngtc,
 -                                  const real    tau_t[],
 -                                  const real    ref_t[])
 -{
 -    /* This is separated from the update below, because it is single threaded */
 -    gmx_sd_const_t *sdc;
 -    gmx_sd_sigma_t *sig;
 -    int             gt;
 -    real            kT;
 -
 -    sdc = sd->sdc;
 -    sig = sd->sdsig;
 -
 -    for (gt = 0; gt < ngtc; gt++)
 -    {
 -        kT = BOLTZ*ref_t[gt];
 -        /* The mass is encounted for later, since this differs per atom */
 -        sig[gt].V  = sqrt(kT*(1-sdc[gt].em));
 -        sig[gt].X  = sqrt(kT*sqr(tau_t[gt])*sdc[gt].c);
 -        sig[gt].Yv = sqrt(kT*sdc[gt].b/sdc[gt].c);
 -        sig[gt].Yx = sqrt(kT*sqr(tau_t[gt])*sdc[gt].b/(1-sdc[gt].em));
 -    }
 -}
 -
 -static void do_update_sd2(gmx_stochd_t *sd,
 -                          gmx_bool bInitStep,
 -                          int start, int nrend,
 -                          rvec accel[], ivec nFreeze[],
 -                          real invmass[], unsigned short ptype[],
 -                          unsigned short cFREEZE[], unsigned short cACC[],
 -                          unsigned short cTC[],
 -                          rvec x[], rvec xprime[], rvec v[], rvec f[],
 -                          rvec sd_X[],
 -                          const real tau_t[],
 -                          gmx_bool bFirstHalf, gmx_int64_t step, int seed,
 -                          int* gatindex)
 -{
 -    gmx_sd_const_t *sdc;
 -    gmx_sd_sigma_t *sig;
 -    /* The random part of the velocity update, generated in the first
 -     * half of the update, needs to be remembered for the second half.
 -     */
 -    rvec  *sd_V;
 -    int    gf = 0, ga = 0, gt = 0;
 -    real   vn = 0, Vmh, Xmh;
 -    real   ism;
 -    int    n, d, ng;
 -
 -    sdc  = sd->sdc;
 -    sig  = sd->sdsig;
 -    sd_V = sd->sd_V;
 -
 -    for (n = start; n < nrend; n++)
 -    {
 -        real rnd[6], rndi[3];
 -        ng  = gatindex ? gatindex[n] : n;
 -        ism = sqrt(invmass[n]);
 -        if (cFREEZE)
 -        {
 -            gf  = cFREEZE[n];
 -        }
 -        if (cACC)
 -        {
 -            ga  = cACC[n];
 -        }
 -        if (cTC)
 -        {
 -            gt  = cTC[n];
 -        }
 -
 -        gmx_rng_cycle_6gaussian_table(step*2+(bFirstHalf ? 1 : 2), ng, seed, RND_SEED_UPDATE, rnd);
 -        if (bInitStep)
 -        {
 -            gmx_rng_cycle_3gaussian_table(step*2, ng, seed, RND_SEED_UPDATE, rndi);
 -        }
 -        for (d = 0; d < DIM; d++)
 -        {
 -            if (bFirstHalf)
 -            {
 -                vn             = v[n][d];
 -            }
 -            if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
 -            {
 -                if (bFirstHalf)
 -                {
 -                    if (bInitStep)
 -                    {
 -                        sd_X[n][d] = ism*sig[gt].X*rndi[d];
 -                    }
 -                    Vmh = sd_X[n][d]*sdc[gt].d/(tau_t[gt]*sdc[gt].c)
 -                        + ism*sig[gt].Yv*rnd[d*2];
 -                    sd_V[n][d] = ism*sig[gt].V*rnd[d*2+1];
 -
 -                    v[n][d] = vn*sdc[gt].em
 -                        + (invmass[n]*f[n][d] + accel[ga][d])*tau_t[gt]*(1 - sdc[gt].em)
 -                        + sd_V[n][d] - sdc[gt].em*Vmh;
 -
 -                    xprime[n][d] = x[n][d] + v[n][d]*tau_t[gt]*(sdc[gt].eph - sdc[gt].emh);
 -                }
 -                else
 -                {
 -                    /* Correct the velocities for the constraints.
 -                     * This operation introduces some inaccuracy,
 -                     * since the velocity is determined from differences in coordinates.
 -                     */
 -                    v[n][d] =
 -                        (xprime[n][d] - x[n][d])/(tau_t[gt]*(sdc[gt].eph - sdc[gt].emh));
 -
 -                    Xmh = sd_V[n][d]*tau_t[gt]*sdc[gt].d/(sdc[gt].em-1)
 -                        + ism*sig[gt].Yx*rnd[d*2];
 -                    sd_X[n][d] = ism*sig[gt].X*rnd[d*2+1];
 -
 -                    xprime[n][d] += sd_X[n][d] - Xmh;
 -
 -                }
 -            }
 -            else
 -            {
 -                if (bFirstHalf)
 -                {
 -                    v[n][d]        = 0.0;
 -                    xprime[n][d]   = x[n][d];
 -                }
 -            }
 -        }
 -    }
 -}
 -
 -static void do_update_bd_Tconsts(double dt, real friction_coefficient,
 -                                 int ngtc, const real ref_t[],
 -                                 real *rf)
 -{
 -    /* This is separated from the update below, because it is single threaded */
 -    int gt;
 -
 -    if (friction_coefficient != 0)
 -    {
 -        for (gt = 0; gt < ngtc; gt++)
 -        {
 -            rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]/(friction_coefficient*dt));
 -        }
 -    }
 -    else
 -    {
 -        for (gt = 0; gt < ngtc; gt++)
 -        {
 -            rf[gt] = sqrt(2.0*BOLTZ*ref_t[gt]);
 -        }
 -    }
 -}
 -
  static void do_update_bd(int start, int nrend, double dt,
                           ivec nFreeze[],
                           real invmass[], unsigned short ptype[],
      real   vn;
      real   invfr = 0;
      int    n, d;
 +    // Use 1 bit of internal counters to give us 2*2 64-bits values per stream
 +    // Each 64-bit value is enough for 4 normal distribution table numbers.
 +    gmx::ThreeFry2x64<0> rng(seed, gmx::RandomDomain::UpdateCoordinates);
 +    gmx::TabulatedNormalDistribution<real, 14> dist;
  
      if (friction_coefficient != 0)
      {
  
      for (n = start; (n < nrend); n++)
      {
 -        real rnd[3];
          int  ng  = gatindex ? gatindex[n] : n;
  
 +        rng.restart(step, ng);
 +        dist.reset();
 +
          if (cFREEZE)
          {
              gf = cFREEZE[n];
          {
              gt = cTC[n];
          }
 -        gmx_rng_cycle_3gaussian_table(step, ng, seed, RND_SEED_UPDATE, rnd);
          for (d = 0; (d < DIM); d++)
          {
              if ((ptype[n] != eptVSite) && (ptype[n] != eptShell) && !nFreeze[gf][d])
              {
                  if (friction_coefficient != 0)
                  {
 -                    vn = invfr*f[n][d] + rf[gt]*rnd[d];
 +                    vn = invfr*f[n][d] + rf[gt]*dist(rng);
                  }
                  else
                  {
                      /* NOTE: invmass = 2/(mass*friction_constant*dt) */
                      vn = 0.5*invmass[n]*f[n][d]*dt
 -                        + sqrt(0.5*invmass[n])*rf[gt]*rnd[d];
 +                        + std::sqrt(0.5*invmass[n])*rf[gt]*dist(rng);
                  }
  
                  v[n][d]      = vn;
@@@ -840,9 -991,6 +842,9 @@@ static void calc_ke_part_normal(rvec v[
  #pragma omp parallel for num_threads(nthread) schedule(static)
      for (thread = 0; thread < nthread; thread++)
      {
 +        // This OpenMP only loops over arrays and does not call any functions
 +        // or memory allocation. It should not be able to throw, so for now
 +        // we do not need a try/catch wrapper.
          int     start_t, end_t, n;
          int     ga, gt;
          rvec    v_corrt;
@@@ -1040,7 -1188,7 +1042,7 @@@ void update_ekinstate(ekinstate_t *ekin
  }
  
  void restore_ekinstate_from_state(t_commrec *cr,
 -                                  gmx_ekindata_t *ekind, ekinstate_t *ekinstate)
 +                                  gmx_ekindata_t *ekind, const ekinstate_t *ekinstate)
  {
      int i, n;
  
      }
  }
  
 -void set_deform_reference_box(gmx_update_t upd, gmx_int64_t step, matrix box)
 +void set_deform_reference_box(gmx_update_t *upd, gmx_int64_t step, matrix box)
  {
      upd->deformref_step = step;
      copy_mat(box, upd->deformref_box);
  }
  
 -static void deform(gmx_update_t upd,
 +static void deform(gmx_update_t *upd,
                     int start, int homenr, rvec x[], matrix box,
                     const t_inputrec *ir, gmx_int64_t step)
  {
              }
          }
      }
 -    m_inv_ur0(box, invbox);
 +    gmx::invertBoxMatrix(box, invbox);
      copy_mat(bnew, box);
      mmul_ur0(box, invbox, mu);
  
@@@ -1161,7 -1309,7 +1163,7 @@@ void update_tcouple(gmx_int64_t       s
  
      /* if using vv with trotter decomposition methods, we do this elsewhere in the code */
      if (inputrec->etc != etcNO &&
 -        !(IR_NVT_TROTTER(inputrec) || IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec)))
 +        !(inputrecNvtTrotter(inputrec) || inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec)))
      {
          /* We should only couple after a step where energies were determined (for leapfrog versions)
             or the step energies are determined, for velocity verlet versions */
@@@ -1228,7 -1376,7 +1230,7 @@@ void update_pcouple(FILE             *f
      int        i;
  
      /* if using Trotter pressure, we do this in coupling.c, so we leave it false. */
 -    if (inputrec->epc != epcNO && (!(IR_NPT_TROTTER(inputrec) || IR_NPH_TROTTER(inputrec))))
 +    if (inputrec->epc != epcNO && (!(inputrecNptTrotter(inputrec) || inputrecNphTrotter(inputrec))))
      {
          /* We should only couple after a step where energies were determined */
          bPCouple = (inputrec->nstpcouple == 1 ||
      }
  }
  
 -static rvec *get_xprime(const t_state *state, gmx_update_t upd)
 -{
 -    if (state->nalloc > upd->xp_nalloc)
 -    {
 -        upd->xp_nalloc = state->nalloc;
 -        srenew(upd->xp, upd->xp_nalloc);
 -    }
 -
 -    return upd->xp;
 -}
 -
 -static void combine_forces(gmx_update_t upd,
 -                           int nstcalclr,
 -                           gmx_constr_t constr,
 -                           t_inputrec *ir, t_mdatoms *md, t_idef *idef,
 -                           t_commrec *cr,
 -                           gmx_int64_t step,
 -                           t_state *state, gmx_bool bMolPBC,
 -                           int start, int nrend,
 -                           rvec f[], rvec f_lr[],
 -                           tensor *vir_lr_constr,
 -                           t_nrnb *nrnb)
 -{
 -    int  i, d;
 -
 -    /* f contains the short-range forces + the long range forces
 -     * which are stored separately in f_lr.
 -     */
 -
 -    if (constr != NULL && vir_lr_constr != NULL &&
 -        !(ir->eConstrAlg == econtSHAKE && ir->epc == epcNO))
 -    {
 -        /* We need to constrain the LR forces separately,
 -         * because due to the different pre-factor for the SR and LR
 -         * forces in the update algorithm, we have to correct
 -         * the constraint virial for the nstcalclr-1 extra f_lr.
 -         * Constrain only the additional LR part of the force.
 -         */
 -        /* MRS -- need to make sure this works with trotter integration -- the constraint calls may not be right.*/
 -        rvec *xp;
 -        real  fac;
 -        int   gf = 0;
 -
 -        xp  = get_xprime(state, upd);
 -
 -        fac = (nstcalclr - 1)*ir->delta_t*ir->delta_t;
 -
 -        for (i = 0; i < md->homenr; i++)
 -        {
 -            if (md->cFREEZE != NULL)
 -            {
 -                gf = md->cFREEZE[i];
 -            }
 -            for (d = 0; d < DIM; d++)
 -            {
 -                if ((md->ptype[i] != eptVSite) &&
 -                    (md->ptype[i] != eptShell) &&
 -                    !ir->opts.nFreeze[gf][d])
 -                {
 -                    xp[i][d] = state->x[i][d] + fac*f_lr[i][d]*md->invmass[i];
 -                }
 -                else
 -                {
 -                    xp[i][d] = state->x[i][d];
 -                }
 -            }
 -        }
 -        constrain(NULL, FALSE, FALSE, constr, idef, ir, cr, step, 0, 1.0, md,
 -                  state->x, xp, xp, bMolPBC, state->box, state->lambda[efptBONDED], NULL,
 -                  NULL, vir_lr_constr, nrnb, econqForce);
 -    }
 -
 -    /* Add nstcalclr-1 times the LR force to the sum of both forces
 -     * and store the result in forces_lr.
 -     */
 -    for (i = start; i < nrend; i++)
 -    {
 -        for (d = 0; d < DIM; d++)
 -        {
 -            f_lr[i][d] = f[i][d] + (nstcalclr - 1)*f_lr[i][d];
 -        }
 -    }
 -}
 -
  void update_constraints(FILE             *fplog,
                          gmx_int64_t       step,
                          real             *dvdlambda, /* the contribution to be added to the bonded interactions */
                          t_commrec        *cr,
                          t_nrnb           *nrnb,
                          gmx_wallcycle_t   wcycle,
 -                        gmx_update_t      upd,
 +                        gmx_update_t     *upd,
                          gmx_constr_t      constr,
                          gmx_bool          bFirstHalf,
                          gmx_bool          bCalcVir)
  {
      gmx_bool             bLastStep, bLog = FALSE, bEner = FALSE, bDoConstr = FALSE;
      double               dt;
 -    int                  start, homenr, nrend, i, m;
 +    int                  start, homenr, nrend, i;
      tensor               vir_con;
 -    rvec                *xprime = NULL;
      int                  nth, th;
  
      if (constr)
          /* clear out constraints before applying */
          clear_mat(vir_part);
  
 -        xprime = get_xprime(state, upd);
 -
          bLastStep = (step == inputrec->init_step+inputrec->nsteps);
          bLog      = (do_per_step(step, inputrec->nstlog) || bLastStep || (step < 0));
          bEner     = (do_per_step(step, inputrec->nstenergy) || bLastStep);
 -        /* Constrain the coordinates xprime */
 +        /* Constrain the coordinates upd->xp */
          wallcycle_start(wcycle, ewcCONSTR);
          if (EI_VV(inputrec->eI) && bFirstHalf)
          {
          {
              constrain(NULL, bLog, bEner, constr, idef,
                        inputrec, cr, step, 1, 1.0, md,
 -                      state->x, xprime, NULL,
 +                      state->x, upd->xp, NULL,
                        bMolPBC, state->box,
                        state->lambda[efptBONDED], dvdlambda,
                        state->v, bCalcVir ? &vir_con : NULL, nrnb, econqCoord);
          where();
  
          dump_it_all(fplog, "After Shake",
 -                    state->natoms, state->x, xprime, state->v, force);
 +                    state->natoms, state->x, upd->xp, state->v, force);
  
          if (bCalcVir)
          {
 -            if (inputrec->eI == eiSD2)
 -            {
 -                /* A correction factor eph is needed for the SD constraint force */
 -                /* Here we can, unfortunately, not have proper corrections
 -                 * for different friction constants, so we use the first one.
 -                 */
 -                for (i = 0; i < DIM; i++)
 -                {
 -                    for (m = 0; m < DIM; m++)
 -                    {
 -                        vir_part[i][m] += upd->sd->sdc[0].eph*vir_con[i][m];
 -                    }
 -                }
 -            }
 -            else
 -            {
 -                m_add(vir_part, vir_con, vir_part);
 -            }
 +            m_add(vir_part, vir_con, vir_part);
              if (debug)
              {
                  pr_rvecs(debug, 0, "constraint virial", vir_part, DIM);
      if (inputrec->eI == eiSD1 && bDoConstr && !bFirstHalf)
      {
          wallcycle_start(wcycle, ewcUPDATE);
 -        xprime = get_xprime(state, upd);
  
          nth = gmx_omp_nthreads_get(emntUpdate);
  
  #pragma omp parallel for num_threads(nth) schedule(static)
 -
          for (th = 0; th < nth; th++)
          {
 -            int start_th, end_th;
 +            try
 +            {
 +                int start_th, end_th;
  
 -            start_th = start + ((nrend-start)* th   )/nth;
 -            end_th   = start + ((nrend-start)*(th+1))/nth;
 +                start_th = start + ((nrend-start)* th   )/nth;
 +                end_th   = start + ((nrend-start)*(th+1))/nth;
  
 -            /* The second part of the SD integration */
 -            do_update_sd1(upd->sd,
 -                          start_th, end_th, dt,
 -                          inputrec->opts.acc, inputrec->opts.nFreeze,
 -                          md->invmass, md->ptype,
 -                          md->cFREEZE, md->cACC, md->cTC,
 -                          state->x, xprime, state->v, force,
 -                          inputrec->opts.ngtc, inputrec->opts.ref_t,
 -                          bDoConstr, FALSE,
 -                          step, inputrec->ld_seed,
 -                          DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                /* The second part of the SD integration */
 +                do_update_sd1(upd->sd,
 +                              start_th, end_th, dt,
 +                              inputrec->opts.acc, inputrec->opts.nFreeze,
 +                              md->invmass, md->ptype,
 +                              md->cFREEZE, md->cACC, md->cTC,
 +                              state->x, upd->xp, state->v, force,
 +                              bDoConstr, FALSE,
 +                              step, inputrec->ld_seed,
 +                              DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +            }
 +            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
          inc_nrnb(nrnb, eNR_UPDATE, homenr);
          wallcycle_stop(wcycle, ewcUPDATE);
  
          if (bDoConstr)
          {
 -            /* Constrain the coordinates xprime for half a time step */
 +            /* Constrain the coordinates upd->xp for half a time step */
              wallcycle_start(wcycle, ewcCONSTR);
  
              constrain(NULL, bLog, bEner, constr, idef,
                        inputrec, cr, step, 1, 0.5, md,
 -                      state->x, xprime, NULL,
 +                      state->x, upd->xp, NULL,
                        bMolPBC, state->box,
                        state->lambda[efptBONDED], dvdlambda,
                        state->v, NULL, nrnb, econqCoord);
          }
      }
  
 -    if ((inputrec->eI == eiSD2) && !(bFirstHalf))
 -    {
 -        wallcycle_start(wcycle, ewcUPDATE);
 -        xprime = get_xprime(state, upd);
 -
 -        nth = gmx_omp_nthreads_get(emntUpdate);
 -
 -#pragma omp parallel for num_threads(nth) schedule(static)
 -        for (th = 0; th < nth; th++)
 -        {
 -            int start_th, end_th;
 -
 -            start_th = start + ((nrend-start)* th   )/nth;
 -            end_th   = start + ((nrend-start)*(th+1))/nth;
 -
 -            /* The second part of the SD integration */
 -            do_update_sd2(upd->sd,
 -                          FALSE, start_th, end_th,
 -                          inputrec->opts.acc, inputrec->opts.nFreeze,
 -                          md->invmass, md->ptype,
 -                          md->cFREEZE, md->cACC, md->cTC,
 -                          state->x, xprime, state->v, force, state->sd_X,
 -                          inputrec->opts.tau_t,
 -                          FALSE, step, inputrec->ld_seed,
 -                          DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 -        }
 -        inc_nrnb(nrnb, eNR_UPDATE, homenr);
 -        wallcycle_stop(wcycle, ewcUPDATE);
 -
 -        if (bDoConstr)
 -        {
 -            /* Constrain the coordinates xprime */
 -            wallcycle_start(wcycle, ewcCONSTR);
 -            constrain(NULL, bLog, bEner, constr, idef,
 -                      inputrec, cr, step, 1, 1.0, md,
 -                      state->x, xprime, NULL,
 -                      bMolPBC, state->box,
 -                      state->lambda[efptBONDED], dvdlambda,
 -                      NULL, NULL, nrnb, econqCoord);
 -            wallcycle_stop(wcycle, ewcCONSTR);
 -        }
 -    }
 -
 -
      /* We must always unshift after updating coordinates; if we did not shake
         x was shifted in do_force */
  
  #pragma omp parallel for num_threads(nth) schedule(static)
              for (i = start; i < nrend; i++)
              {
 +                // Trivial statement, does not throw
                  copy_rvec(upd->xp[i], state->x[i]);
              }
          }
@@@ -1512,7 -1806,7 +1514,7 @@@ void update_box(FILE             *fplog
                  rvec              force[],   /* forces on home particles */
                  matrix            pcoupl_mu,
                  t_nrnb           *nrnb,
 -                gmx_update_t      upd)
 +                gmx_update_t     *upd)
  {
      double               dt;
      int                  start, homenr, i, n, m;
              break;
      }
  
 -    if (DEFORM(*inputrec))
 +    if (inputrecDeform(inputrec))
      {
          deform(upd, start, homenr, state->x, state->box, inputrec, step);
      }
@@@ -1604,18 -1898,27 +1606,18 @@@ void update_coords(FILE             *fp
                     t_inputrec       *inputrec,  /* input record and box stuff */
                     t_mdatoms        *md,
                     t_state          *state,
 -                   gmx_bool          bMolPBC,
                     rvec             *f,    /* forces on home particles */
 -                   gmx_bool          bDoLR,
 -                   rvec             *f_lr,
 -                   tensor           *vir_lr_constr,
                     t_fcdata         *fcd,
                     gmx_ekindata_t   *ekind,
                     matrix            M,
 -                   gmx_update_t      upd,
 -                   gmx_bool          bInitStep,
 +                   gmx_update_t     *upd,
                     int               UpdatePart,
                     t_commrec        *cr, /* these shouldn't be here -- need to think about it */
 -                   t_nrnb           *nrnb,
 -                   gmx_constr_t      constr,
 -                   t_idef           *idef)
 +                   gmx_constr_t      constr)
  {
      gmx_bool          bNH, bPR, bDoConstr = FALSE;
      double            dt, alpha;
 -    rvec             *force;
      int               start, homenr, nrend;
 -    rvec             *xprime;
      int               nth, th;
  
      bDoConstr = (NULL != constr);
      homenr = md->homenr;
      nrend  = start+homenr;
  
 -    xprime = get_xprime(state, upd);
 -
      dt   = inputrec->delta_t;
  
      /* We need to update the NMR restraint history when time averaging is used */
      bNH = inputrec->etc == etcNOSEHOOVER;
      bPR = ((inputrec->epc == epcPARRINELLORAHMAN) || (inputrec->epc == epcMTTK));
  
 -    if (bDoLR && inputrec->nstcalclr > 1 && !EI_VV(inputrec->eI))  /* get this working with VV? */
 -    {
 -        /* Store the total force + nstcalclr-1 times the LR force
 -         * in forces_lr, so it can be used in a normal update algorithm
 -         * to produce twin time stepping.
 -         */
 -        /* is this correct in the new construction? MRS */
 -        combine_forces(upd,
 -                       inputrec->nstcalclr, constr, inputrec, md, idef, cr,
 -                       step, state, bMolPBC,
 -                       start, nrend, f, f_lr, vir_lr_constr, nrnb);
 -        force = f_lr;
 -    }
 -    else
 -    {
 -        force = f;
 -    }
 -
      /* ############# START The update of velocities and positions ######### */
      where();
      dump_it_all(fplog, "Before update",
 -                state->natoms, state->x, xprime, state->v, force);
 -
 -    if (inputrec->eI == eiSD2)
 -    {
 -        check_sd2_work_data_allocation(upd->sd, nrend);
 -
 -        do_update_sd2_Tconsts(upd->sd,
 -                              inputrec->opts.ngtc,
 -                              inputrec->opts.tau_t,
 -                              inputrec->opts.ref_t);
 -    }
 -    if (inputrec->eI == eiBD)
 -    {
 -        do_update_bd_Tconsts(dt, inputrec->bd_fric,
 -                             inputrec->opts.ngtc, inputrec->opts.ref_t,
 -                             upd->sd->bd_rf);
 -    }
 +                state->natoms, state->x, upd->xp, state->v, f);
  
      nth = gmx_omp_nthreads_get(emntUpdate);
  
  #pragma omp parallel for num_threads(nth) schedule(static) private(alpha)
      for (th = 0; th < nth; th++)
      {
 -        int start_th, end_th;
 +        try
 +        {
 +            int start_th, end_th;
  
 -        start_th = start + ((nrend-start)* th   )/nth;
 -        end_th   = start + ((nrend-start)*(th+1))/nth;
 +            start_th = start + ((nrend-start)* th   )/nth;
 +            end_th   = start + ((nrend-start)*(th+1))/nth;
  
 -        switch (inputrec->eI)
 -        {
 -            case (eiMD):
 -                if (ekind->cosacc.cos_accel == 0)
 -                {
 -                    do_update_md(start_th, end_th,
 -                                 dt, inputrec->nstpcouple,
 -                                 ekind->tcstat, state->nosehoover_vxi,
 -                                 ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
 -                                 inputrec->opts.nFreeze,
 -                                 md->invmass, md->ptype,
 -                                 md->cFREEZE, md->cACC, md->cTC,
 -                                 state->x, xprime, state->v, force, M,
 -                                 bNH, bPR);
 -                }
 -                else
 -                {
 -                    do_update_visc(start_th, end_th,
 -                                   dt, inputrec->nstpcouple,
 -                                   ekind->tcstat, state->nosehoover_vxi,
 -                                   md->invmass, md->ptype,
 -                                   md->cTC, state->x, xprime, state->v, force, M,
 -                                   state->box,
 -                                   ekind->cosacc.cos_accel,
 -                                   ekind->cosacc.vcos,
 -                                   bNH, bPR);
 -                }
 -                break;
 -            case (eiSD1):
 -                /* With constraints, the SD1 update is done in 2 parts */
 -                do_update_sd1(upd->sd,
 -                              start_th, end_th, dt,
 -                              inputrec->opts.acc, inputrec->opts.nFreeze,
 -                              md->invmass, md->ptype,
 -                              md->cFREEZE, md->cACC, md->cTC,
 -                              state->x, xprime, state->v, force,
 -                              inputrec->opts.ngtc, inputrec->opts.ref_t,
 -                              bDoConstr, TRUE,
 -                              step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 -                break;
 -            case (eiSD2):
 -                /* The SD2 update is always done in 2 parts,
 -                 * because an extra constraint step is needed
 -                 */
 -                do_update_sd2(upd->sd,
 -                              bInitStep, start_th, end_th,
 -                              inputrec->opts.acc, inputrec->opts.nFreeze,
 -                              md->invmass, md->ptype,
 -                              md->cFREEZE, md->cACC, md->cTC,
 -                              state->x, xprime, state->v, force, state->sd_X,
 -                              inputrec->opts.tau_t,
 -                              TRUE, step, inputrec->ld_seed,
 -                              DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 -                break;
 -            case (eiBD):
 -                do_update_bd(start_th, end_th, dt,
 -                             inputrec->opts.nFreeze, md->invmass, md->ptype,
 -                             md->cFREEZE, md->cTC,
 -                             state->x, xprime, state->v, force,
 -                             inputrec->bd_fric,
 -                             upd->sd->bd_rf,
 -                             step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 -                break;
 -            case (eiVV):
 -            case (eiVVAK):
 -                alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
 -                switch (UpdatePart)
 -                {
 -                    case etrtVELOCITY1:
 -                    case etrtVELOCITY2:
 -                        do_update_vv_vel(start_th, end_th, dt,
 -                                         inputrec->opts.acc, inputrec->opts.nFreeze,
 -                                         md->invmass, md->ptype,
 -                                         md->cFREEZE, md->cACC,
 -                                         state->v, force,
 -                                         (bNH || bPR), state->veta, alpha);
 -                        break;
 -                    case etrtPOSITION:
 -                        do_update_vv_pos(start_th, end_th, dt,
 -                                         inputrec->opts.nFreeze,
 -                                         md->ptype, md->cFREEZE,
 -                                         state->x, xprime, state->v,
 -                                         (bNH || bPR), state->veta);
 -                        break;
 -                }
 -                break;
 -            default:
 -                gmx_fatal(FARGS, "Don't know how to update coordinates");
 -                break;
 +            switch (inputrec->eI)
 +            {
 +                case (eiMD):
 +                    if (ekind->cosacc.cos_accel == 0)
 +                    {
-                         do_update_md(start_th, end_th, dt,
++                        do_update_md(start_th, end_th,
++                                     dt, inputrec->nstpcouple,
 +                                     ekind->tcstat, state->nosehoover_vxi,
 +                                     ekind->bNEMD, ekind->grpstat, inputrec->opts.acc,
 +                                     inputrec->opts.nFreeze,
 +                                     md->invmass, md->ptype,
 +                                     md->cFREEZE, md->cACC, md->cTC,
 +                                     state->x, upd->xp, state->v, f, M,
 +                                     bNH, bPR);
 +                    }
 +                    else
 +                    {
-                         do_update_visc(start_th, end_th, dt,
++                        do_update_visc(start_th, end_th,
++                                       dt, inputrec->nstpcouple,
 +                                       ekind->tcstat, state->nosehoover_vxi,
 +                                       md->invmass, md->ptype,
 +                                       md->cTC, state->x, upd->xp, state->v, f, M,
 +                                       state->box,
 +                                       ekind->cosacc.cos_accel,
 +                                       ekind->cosacc.vcos,
 +                                       bNH, bPR);
 +                    }
 +                    break;
 +                case (eiSD1):
 +                    /* With constraints, the SD1 update is done in 2 parts */
 +                    do_update_sd1(upd->sd,
 +                                  start_th, end_th, dt,
 +                                  inputrec->opts.acc, inputrec->opts.nFreeze,
 +                                  md->invmass, md->ptype,
 +                                  md->cFREEZE, md->cACC, md->cTC,
 +                                  state->x, upd->xp, state->v, f,
 +                                  bDoConstr, TRUE,
 +                                  step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                    break;
 +                case (eiBD):
 +                    do_update_bd(start_th, end_th, dt,
 +                                 inputrec->opts.nFreeze, md->invmass, md->ptype,
 +                                 md->cFREEZE, md->cTC,
 +                                 state->x, upd->xp, state->v, f,
 +                                 inputrec->bd_fric,
 +                                 upd->sd->bd_rf,
 +                                 step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL);
 +                    break;
 +                case (eiVV):
 +                case (eiVVAK):
 +                    alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */
 +                    switch (UpdatePart)
 +                    {
 +                        case etrtVELOCITY1:
 +                        case etrtVELOCITY2:
 +                            do_update_vv_vel(start_th, end_th, dt,
 +                                             inputrec->opts.acc, inputrec->opts.nFreeze,
 +                                             md->invmass, md->ptype,
 +                                             md->cFREEZE, md->cACC,
 +                                             state->v, f,
 +                                             (bNH || bPR), state->veta, alpha);
 +                            break;
 +                        case etrtPOSITION:
 +                            do_update_vv_pos(start_th, end_th, dt,
 +                                             inputrec->opts.nFreeze,
 +                                             md->ptype, md->cFREEZE,
 +                                             state->x, upd->xp, state->v,
 +                                             (bNH || bPR), state->veta);
 +                            break;
 +                    }
 +                    break;
 +                default:
 +                    gmx_fatal(FARGS, "Don't know how to update coordinates");
 +                    break;
 +            }
          }
 +        GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
      }
  
  }
@@@ -1799,7 -2151,7 +1803,7 @@@ void correct_ekin(FILE *log, int start
  }
  
  extern gmx_bool update_randomize_velocities(t_inputrec *ir, gmx_int64_t step, const t_commrec *cr,
 -                                            t_mdatoms *md, t_state *state, gmx_update_t upd, gmx_constr_t constr)
 +                                            t_mdatoms *md, t_state *state, gmx_update_t *upd, gmx_constr_t constr)
  {
  
      real rate = (ir->delta_t)/ir->opts.tau_t[0];
index a2aa4f8d747cebce5b599234e970f6a16d2411a7,0000000000000000000000000000000000000000..1ba04de5238db7940e6865baa5d6a92a2e1851d4
mode 100644,000000..100644
--- /dev/null
@@@ -1,443 -1,0 +1,441 @@@
-     /* Ewald charge correction load distribution over the threads */
-     int                 *excl_load;
 +/*
 + * This file is part of the GROMACS molecular simulation package.
 + *
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team.
 + * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
 + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
 + * and including many others, as listed in the AUTHORS file in the
 + * top-level source directory and at http://www.gromacs.org.
 + *
 + * GROMACS is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU Lesser General Public License
 + * as published by the Free Software Foundation; either version 2.1
 + * of the License, or (at your option) any later version.
 + *
 + * GROMACS is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * Lesser General Public License for more details.
 + *
 + * You should have received a copy of the GNU Lesser General Public
 + * License along with GROMACS; if not, see
 + * http://www.gnu.org/licenses, or write to the Free Software Foundation,
 + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
 + *
 + * If you want to redistribute modifications to GROMACS, please
 + * consider that scientific software is very special. Version
 + * control is crucial - bugs must be traceable. We will be happy to
 + * consider code for inclusion in the official distribution, but
 + * derived work must not be called official GROMACS. Details are found
 + * in the README & COPYING files - if they are missing, get the
 + * official version at http://www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org.
 + */
 +#ifndef GMX_MDTYPES_TYPES_FORCEREC_H
 +#define GMX_MDTYPES_TYPES_FORCEREC_H
 +
 +#include "gromacs/math/vectypes.h"
 +#include "gromacs/mdtypes/interaction_const.h"
 +#include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/topology/idef.h"
 +#include "gromacs/utility/basedefinitions.h"
 +#include "gromacs/utility/real.h"
 +
 +#ifdef __cplusplus
 +extern "C" {
 +#endif
 +#if 0
 +} /* fixes auto-indentation problems */
 +#endif
 +
 +/* Abstract type for PME that is defined only in the routine that use them. */
 +struct gmx_genborn_t;
 +struct gmx_ns_t;
 +struct gmx_pme_t;
 +struct nonbonded_verlet_t;
 +struct bonded_threading_t;
 +struct t_forcetable;
 +struct t_nblist;
 +struct t_nblists;
 +struct t_QMMMrec;
 +struct gmx_hw_info_t;
 +struct gmx_gpu_opt_t;
 +
 +/* macros for the cginfo data in forcerec
 + *
 + * Since the tpx format support max 256 energy groups, we do the same here.
 + * Note that we thus have bits 8-14 still unused.
 + *
 + * The maximum cg size in cginfo is 63
 + * because we only have space for 6 bits in cginfo,
 + * this cg size entry is actually only read with domain decomposition.
 + * But there is a smaller limit due to the t_excl data structure
 + * which is defined in nblist.h.
 + */
 +#define SET_CGINFO_GID(cgi, gid)     (cgi) = (((cgi)  &  ~255) | (gid))
 +#define GET_CGINFO_GID(cgi)        ( (cgi)            &   255)
 +#define SET_CGINFO_FEP(cgi)          (cgi) =  ((cgi)  |  (1<<15))
 +#define GET_CGINFO_FEP(cgi)        ( (cgi)            &  (1<<15))
 +#define SET_CGINFO_EXCL_INTRA(cgi)   (cgi) =  ((cgi)  |  (1<<16))
 +#define GET_CGINFO_EXCL_INTRA(cgi) ( (cgi)            &  (1<<16))
 +#define SET_CGINFO_EXCL_INTER(cgi)   (cgi) =  ((cgi)  |  (1<<17))
 +#define GET_CGINFO_EXCL_INTER(cgi) ( (cgi)            &  (1<<17))
 +#define SET_CGINFO_SOLOPT(cgi, opt)  (cgi) = (((cgi)  & ~(3<<18)) | ((opt)<<18))
 +#define GET_CGINFO_SOLOPT(cgi)     (((cgi)>>18)       &   3)
 +#define SET_CGINFO_CONSTR(cgi)       (cgi) =  ((cgi)  |  (1<<20))
 +#define GET_CGINFO_CONSTR(cgi)     ( (cgi)            &  (1<<20))
 +#define SET_CGINFO_SETTLE(cgi)       (cgi) =  ((cgi)  |  (1<<21))
 +#define GET_CGINFO_SETTLE(cgi)     ( (cgi)            &  (1<<21))
 +/* This bit is only used with bBondComm in the domain decomposition */
 +#define SET_CGINFO_BOND_INTER(cgi)   (cgi) =  ((cgi)  |  (1<<22))
 +#define GET_CGINFO_BOND_INTER(cgi) ( (cgi)            &  (1<<22))
 +#define SET_CGINFO_HAS_VDW(cgi)      (cgi) =  ((cgi)  |  (1<<23))
 +#define GET_CGINFO_HAS_VDW(cgi)    ( (cgi)            &  (1<<23))
 +#define SET_CGINFO_HAS_Q(cgi)        (cgi) =  ((cgi)  |  (1<<24))
 +#define GET_CGINFO_HAS_Q(cgi)      ( (cgi)            &  (1<<24))
 +#define SET_CGINFO_NATOMS(cgi, opt)  (cgi) = (((cgi)  & ~(63<<25)) | ((opt)<<25))
 +#define GET_CGINFO_NATOMS(cgi)     (((cgi)>>25)       &   63)
 +
 +
 +/* Value to be used in mdrun for an infinite cut-off.
 + * Since we need to compare with the cut-off squared,
 + * this value should be slighlty smaller than sqrt(GMX_FLOAT_MAX).
 + */
 +#define GMX_CUTOFF_INF 1E+18
 +
 +/* enums for the neighborlist type */
 +enum {
 +    enbvdwNONE, enbvdwLJ, enbvdwBHAM, enbvdwTAB, enbvdwNR
 +};
 +/* OOR is "one over r" -- standard coul */
 +enum {
 +    enbcoulNONE, enbcoulOOR, enbcoulRF, enbcoulTAB, enbcoulGB, enbcoulFEWALD, enbcoulNR
 +};
 +
 +enum {
 +    egCOULSR, egLJSR, egBHAMSR,
 +    egCOUL14, egLJ14, egGB, egNR
 +};
 +extern const char *egrp_nm[egNR+1];
 +
 +typedef struct gmx_grppairener_t {
 +    int   nener;      /* The number of energy group pairs     */
 +    real *ener[egNR]; /* Energy terms for each pair of groups */
 +} gmx_grppairener_t;
 +
 +typedef struct gmx_enerdata_t {
 +    real              term[F_NRE];         /* The energies for all different interaction types */
 +    gmx_grppairener_t grpp;
 +    double            dvdl_lin[efptNR];    /* Contributions to dvdl with linear lam-dependence */
 +    double            dvdl_nonlin[efptNR]; /* Idem, but non-linear dependence                  */
 +    int               n_lambda;
 +    int               fep_state;           /*current fep state -- just for printing */
 +    double           *enerpart_lambda;     /* Partial energy for lambda and flambda[] */
 +    real              foreign_term[F_NRE]; /* alternate array for storing foreign lambda energies */
 +    gmx_grppairener_t foreign_grpp;        /* alternate array for storing foreign lambda energies */
 +} gmx_enerdata_t;
 +/* The idea is that dvdl terms with linear lambda dependence will be added
 + * automatically to enerpart_lambda. Terms with non-linear lambda dependence
 + * should explicitly determine the energies at foreign lambda points
 + * when n_lambda > 0.
 + */
 +
 +typedef struct {
 +    int  cg_start;
 +    int  cg_end;
 +    int  cg_mod;
 +    int *cginfo;
 +} cginfo_mb_t;
 +
 +
 +/* Forward declaration of type for managing Ewald tables */
 +struct gmx_ewald_tab_t;
 +
 +typedef struct ewald_corr_thread_t ewald_corr_thread_t;
 +
 +typedef struct t_forcerec {
 +    interaction_const_t *ic;
 +
 +    /* Domain Decomposition */
 +    gmx_bool bDomDec;
 +
 +    /* PBC stuff */
 +    int                         ePBC;
 +    gmx_bool                    bMolPBC;
 +    int                         rc_scaling;
 +    rvec                        posres_com;
 +    rvec                        posres_comB;
 +
 +    const struct gmx_hw_info_t *hwinfo;
 +    const struct gmx_gpu_opt_t *gpu_opt;
 +    gmx_bool                    use_simd_kernels;
 +
 +    /* Interaction for calculated in kernels. In many cases this is similar to
 +     * the electrostatics settings in the inputrecord, but the difference is that
 +     * these variables always specify the actual interaction in the kernel - if
 +     * we are tabulating reaction-field the inputrec will say reaction-field, but
 +     * the kernel interaction will say cubic-spline-table. To be safe we also
 +     * have a kernel-specific setting for the modifiers - if the interaction is
 +     * tabulated we already included the inputrec modification there, so the kernel
 +     * modification setting will say 'none' in that case.
 +     */
 +    int nbkernel_elec_interaction;
 +    int nbkernel_vdw_interaction;
 +    int nbkernel_elec_modifier;
 +    int nbkernel_vdw_modifier;
 +
 +    /* Use special N*N kernels? */
 +    gmx_bool bAllvsAll;
 +    /* Private work data */
 +    void    *AllvsAll_work;
 +    void    *AllvsAll_workgb;
 +
 +    /* Cut-Off stuff.
 +     * Infinite cut-off's will be GMX_CUTOFF_INF (unlike in t_inputrec: 0).
 +     */
 +    real rlist;
 +
 +    /* Dielectric constant resp. multiplication factor for charges */
 +    real zsquare, temp;
 +    real epsilon_r, epsilon_rf, epsfac;
 +
 +    /* Constants for reaction fields */
 +    real kappa, k_rf, c_rf;
 +
 +    /* Charge sum and dipole for topology A/B ([0]/[1]) for Ewald corrections */
 +    double qsum[2];
 +    double q2sum[2];
 +    double c6sum[2];
 +    rvec   mu_tot[2];
 +
 +    /* Dispersion correction stuff */
 +    int                  eDispCorr;
 +    int                  numAtomsForDispersionCorrection;
 +    struct t_forcetable *dispersionCorrectionTable;
 +
 +    /* The shift of the shift or user potentials */
 +    real enershiftsix;
 +    real enershifttwelve;
 +    /* Integrated differces for energy and virial with cut-off functions */
 +    real enerdiffsix;
 +    real enerdifftwelve;
 +    real virdiffsix;
 +    real virdifftwelve;
 +    /* Constant for long range dispersion correction (average dispersion)
 +     * for topology A/B ([0]/[1]) */
 +    real avcsix[2];
 +    /* Constant for long range repulsion term. Relative difference of about
 +     * 0.1 percent with 0.8 nm cutoffs. But hey, it's cheap anyway...
 +     */
 +    real avctwelve[2];
 +
 +    /* Fudge factors */
 +    real fudgeQQ;
 +
 +    /* Table stuff */
 +    gmx_bool             bcoultab;
 +    gmx_bool             bvdwtab;
 +    /* The normal tables are in the nblists struct(s) below */
 +
 +    struct t_forcetable *pairsTable; /* for 1-4 interactions, [pairs] and [pairs_nb] */
 +
 +    /* PPPM & Shifting stuff */
 +    int   coulomb_modifier;
 +    real  rcoulomb_switch, rcoulomb;
 +    real *phi;
 +
 +    /* VdW stuff */
 +    int    vdw_modifier;
 +    double reppow;
 +    real   rvdw_switch, rvdw;
 +    real   bham_b_max;
 +
 +    /* Free energy */
 +    int      efep;
 +    real     sc_alphavdw;
 +    real     sc_alphacoul;
 +    int      sc_power;
 +    real     sc_r_power;
 +    real     sc_sigma6_def;
 +    real     sc_sigma6_min;
 +
 +    /* NS Stuff */
 +    int  eeltype;
 +    int  vdwtype;
 +    int  cg0, hcg;
 +    /* solvent_opt contains the enum for the most common solvent
 +     * in the system, which will be optimized.
 +     * It can be set to esolNO to disable all water optimization */
 +    int          solvent_opt;
 +    int          nWatMol;
 +    gmx_bool     bGrid;
 +    gmx_bool     bExcl_IntraCGAll_InterCGNone;
 +    cginfo_mb_t *cginfo_mb;
 +    int         *cginfo;
 +    rvec        *cg_cm;
 +    int          cg_nalloc;
 +    rvec        *shift_vec;
 +
 +    /* The neighborlists including tables */
 +    int                        nnblists;
 +    int                       *gid2nblists;
 +    struct t_nblists          *nblists;
 +
 +    int                        cutoff_scheme; /* group- or Verlet-style cutoff */
 +    gmx_bool                   bNonbonded;    /* true if nonbonded calculations are *not* turned off */
 +    struct nonbonded_verlet_t *nbv;
 +
 +    /* The wall tables (if used) */
 +    int                    nwall;
 +    struct t_forcetable ***wall_tab;
 +
 +    /* The number of charge groups participating in do_force_lowlevel */
 +    int ncg_force;
 +    /* The number of atoms participating in do_force_lowlevel */
 +    int natoms_force;
 +    /* The number of atoms participating in force and constraints */
 +    int natoms_force_constr;
 +    /* The allocation size of vectors of size natoms_force */
 +    int nalloc_force;
 +
 +    /* Forces that should not enter into the virial summation:
 +     * PPPM/PME/Ewald/posres
 +     */
 +    gmx_bool bF_NoVirSum;
 +    int      f_novirsum_n;
 +    int      f_novirsum_nalloc;
 +    rvec    *f_novirsum_alloc;
 +    /* Pointer that points to f_novirsum_alloc when pressure is calcaluted,
 +     * points to the normal force vectors wen pressure is not requested.
 +     */
 +    rvec *f_novirsum;
 +
 +    /* Long-range forces and virial for PPPM/PME/Ewald */
 +    struct gmx_pme_t *pmedata;
 +    int               ljpme_combination_rule;
 +    tensor            vir_el_recip;
 +    tensor            vir_lj_recip;
 +
 +    /* PME/Ewald stuff */
 +    gmx_bool                bEwald;
 +    real                    ewaldcoeff_q;
 +    real                    ewaldcoeff_lj;
 +    struct gmx_ewald_tab_t *ewald_table;
 +
 +    /* Virial Stuff */
 +    rvec *fshift;
 +    rvec  vir_diag_posres;
 +    dvec  vir_wall_z;
 +
 +    /* Non bonded Parameter lists */
 +    int      ntype; /* Number of atom types */
 +    gmx_bool bBHAM;
 +    real    *nbfp;
 +    real    *ljpme_c6grid; /* C6-values used on grid in LJPME */
 +
 +    /* Energy group pair flags */
 +    int *egp_flags;
 +
 +    /* Shell molecular dynamics flexible constraints */
 +    real fc_stepsize;
 +
 +    /* Generalized born implicit solvent */
 +    gmx_bool              bGB;
 +    /* Generalized born stuff */
 +    real                  gb_epsilon_solvent;
 +    /* Table data for GB */
 +    struct t_forcetable  *gbtab;
 +    /* VdW radius for each atomtype (dim is thus ntype) */
 +    real                 *atype_radius;
 +    /* Effective radius (derived from effective volume) for each type */
 +    real                 *atype_vol;
 +    /* Implicit solvent - surface tension for each atomtype */
 +    real                 *atype_surftens;
 +    /* Implicit solvent - radius for GB calculation */
 +    real                 *atype_gb_radius;
 +    /* Implicit solvent - overlap for HCT model */
 +    real                 *atype_S_hct;
 +    /* Generalized born interaction data */
 +    struct gmx_genborn_t *born;
 +
 +    /* Table scale for GB */
 +    real gbtabscale;
 +    /* Table range for GB */
 +    real gbtabr;
 +    /* GB neighborlists (the sr list will contain for each atom all other atoms
 +     * (for use in the SA calculation) and the lr list will contain
 +     * for each atom all atoms 1-4 or greater (for use in the GB calculation)
 +     */
 +    struct t_nblist *gblist_sr;
 +    struct t_nblist *gblist_lr;
 +    struct t_nblist *gblist;
 +
 +    /* Inverse square root of the Born radii for implicit solvent */
 +    real *invsqrta;
 +    /* Derivatives of the potential with respect to the Born radii */
 +    real *dvda;
 +    /* Derivatives of the Born radii with respect to coordinates */
 +    real *dadx;
 +    real *dadx_rawptr;
 +    int   nalloc_dadx; /* Allocated size of dadx */
 +
 +    /* If > 0 signals Test Particle Insertion,
 +     * the value is the number of atoms of the molecule to insert
 +     * Only the energy difference due to the addition of the last molecule
 +     * should be calculated.
 +     */
 +    gmx_bool n_tpi;
 +
 +    /* Neighbor searching stuff */
 +    struct gmx_ns_t *ns;
 +
 +    /* QMMM stuff */
 +    gmx_bool          bQMMM;
 +    struct t_QMMMrec *qr;
 +
 +    /* QM-MM neighborlists */
 +    struct t_nblist        *QMMMlist;
 +
 +    /* Limit for printing large forces, negative is don't print */
 +    real print_force;
 +
 +    /* coarse load balancing time measurement */
 +    double t_fnbf;
 +    double t_wait;
 +    int    timesteps;
 +
 +    /* User determined parameters, copied from the inputrec */
 +    int  userint1;
 +    int  userint2;
 +    int  userint3;
 +    int  userint4;
 +    real userreal1;
 +    real userreal2;
 +    real userreal3;
 +    real userreal4;
 +
 +    /* Pointer to struct for managing threading of bonded force calculation */
 +    struct bonded_threading_t *bonded_threading;
 +
 +    /* Ewald correction thread local virial and energy data */
 +    int                  nthread_ewc;
 +    ewald_corr_thread_t *ewc_t;
 +} t_forcerec;
 +
 +/* Important: Starting with Gromacs-4.6, the values of c6 and c12 in the nbfp array have
 + * been scaled by 6.0 or 12.0 to save flops in the kernels. We have corrected this everywhere
 + * in the code, but beware if you are using these macros externally.
 + */
 +#define C6(nbfp, ntp, ai, aj)     (nbfp)[2*((ntp)*(ai)+(aj))]
 +#define C12(nbfp, ntp, ai, aj)    (nbfp)[2*((ntp)*(ai)+(aj))+1]
 +#define BHAMC(nbfp, ntp, ai, aj)  (nbfp)[3*((ntp)*(ai)+(aj))]
 +#define BHAMA(nbfp, ntp, ai, aj)  (nbfp)[3*((ntp)*(ai)+(aj))+1]
 +#define BHAMB(nbfp, ntp, ai, aj)  (nbfp)[3*((ntp)*(ai)+(aj))+2]
 +
 +#ifdef __cplusplus
 +}
 +#endif
 +#endif
index 2a49aba4a37b3d20df5bb7b49f472fbf1f96000c,afa3d62a75a4e225841c05283444184851e8fa20..06c25f321404e012b870cfff79c5044308431b7e
   */
  #include "gmxpre.h"
  
 +#include "md.h"
 +
  #include "config.h"
  
  #include <math.h>
  #include <stdio.h>
  #include <stdlib.h>
  
 +#include <algorithm>
 +
  #include "thread_mpi/threads.h"
  
 +#include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/domdec.h"
  #include "gromacs/domdec/domdec_network.h"
 -#include "gromacs/ewald/pme-load-balancing.h"
 +#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/ewald/pme.h"
 -#include "gromacs/fileio/filenm.h"
 -#include "gromacs/fileio/mdoutf.h"
 -#include "gromacs/fileio/trajectory_writing.h"
 -#include "gromacs/fileio/trx.h"
 +#include "gromacs/ewald/pme-load-balancing.h"
  #include "gromacs/fileio/trxio.h"
 +#include "gromacs/gmxlib/md_logging.h"
 +#include "gromacs/gmxlib/network.h"
 +#include "gromacs/gmxlib/nrnb.h"
 +#include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/imd/imd.h"
 -#include "gromacs/legacyheaders/constr.h"
 -#include "gromacs/legacyheaders/ebin.h"
 -#include "gromacs/legacyheaders/force.h"
 -#include "gromacs/legacyheaders/md_logging.h"
 -#include "gromacs/legacyheaders/md_support.h"
 -#include "gromacs/legacyheaders/mdatoms.h"
 -#include "gromacs/legacyheaders/mdebin.h"
 -#include "gromacs/legacyheaders/mdrun.h"
 -#include "gromacs/legacyheaders/network.h"
 -#include "gromacs/legacyheaders/nrnb.h"
 -#include "gromacs/legacyheaders/ns.h"
 -#include "gromacs/legacyheaders/shellfc.h"
 -#include "gromacs/legacyheaders/sighandler.h"
 -#include "gromacs/legacyheaders/sim_util.h"
 -#include "gromacs/legacyheaders/tgroup.h"
 -#include "gromacs/legacyheaders/typedefs.h"
 -#include "gromacs/legacyheaders/update.h"
 -#include "gromacs/legacyheaders/vcm.h"
 -#include "gromacs/legacyheaders/vsite.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
 -#include "gromacs/legacyheaders/types/constr.h"
 -#include "gromacs/legacyheaders/types/enums.h"
 -#include "gromacs/legacyheaders/types/fcdata.h"
 -#include "gromacs/legacyheaders/types/force_flags.h"
 -#include "gromacs/legacyheaders/types/forcerec.h"
 -#include "gromacs/legacyheaders/types/group.h"
 -#include "gromacs/legacyheaders/types/inputrec.h"
 -#include "gromacs/legacyheaders/types/interaction_const.h"
 -#include "gromacs/legacyheaders/types/mdatom.h"
 -#include "gromacs/legacyheaders/types/membedt.h"
 -#include "gromacs/legacyheaders/types/nrnb.h"
 -#include "gromacs/legacyheaders/types/oenv.h"
 -#include "gromacs/legacyheaders/types/shellfc.h"
 -#include "gromacs/legacyheaders/types/state.h"
  #include "gromacs/listed-forces/manage-threading.h"
 +#include "gromacs/math/functions.h"
  #include "gromacs/math/utilities.h"
  #include "gromacs/math/vec.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/mdlib/compute_io.h"
 -#include "gromacs/mdlib/mdrun_signalling.h"
 +#include "gromacs/mdlib/constr.h"
 +#include "gromacs/mdlib/ebin.h"
 +#include "gromacs/mdlib/force.h"
 +#include "gromacs/mdlib/force_flags.h"
 +#include "gromacs/mdlib/forcerec.h"
 +#include "gromacs/mdlib/md_support.h"
 +#include "gromacs/mdlib/mdatoms.h"
 +#include "gromacs/mdlib/mdebin.h"
 +#include "gromacs/mdlib/mdoutf.h"
 +#include "gromacs/mdlib/mdrun.h"
  #include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/mdlib/nbnxn_gpu_data_mgmt.h"
 +#include "gromacs/mdlib/ns.h"
 +#include "gromacs/mdlib/shellfc.h"
 +#include "gromacs/mdlib/sighandler.h"
 +#include "gromacs/mdlib/sim_util.h"
 +#include "gromacs/mdlib/simulationsignal.h"
 +#include "gromacs/mdlib/tgroup.h"
 +#include "gromacs/mdlib/trajectory_writing.h"
 +#include "gromacs/mdlib/update.h"
 +#include "gromacs/mdlib/vcm.h"
 +#include "gromacs/mdlib/vsite.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/df_history.h"
 +#include "gromacs/mdtypes/energyhistory.h"
 +#include "gromacs/mdtypes/fcdata.h"
 +#include "gromacs/mdtypes/forcerec.h"
 +#include "gromacs/mdtypes/group.h"
 +#include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/interaction_const.h"
 +#include "gromacs/mdtypes/md_enums.h"
 +#include "gromacs/mdtypes/mdatom.h"
 +#include "gromacs/mdtypes/state.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/pulling/pull.h"
  #include "gromacs/topology/idef.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/topology/topology.h"
 +#include "gromacs/trajectory/trajectoryframe.h"
  #include "gromacs/utility/basedefinitions.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/fatalerror.h"
  #include "corewrap.h"
  #endif
  
 +using gmx::SimulationSignaller;
 +
 +/*! \brief Check whether bonded interactions are missing, if appropriate
 + *
 + * \param[in]    fplog                                  Log file pointer
 + * \param[in]    cr                                     Communication object
 + * \param[in]    totalNumberOfBondedInteractions        Result of the global reduction over the number of bonds treated in each domain
 + * \param[in]    top_global                             Global topology for the error message
 + * \param[in]    top_local                              Local topology for the error message
 + * \param[in]    state                                  Global state for the error message
 + * \param[inout] shouldCheckNumberOfBondedInteractions  Whether we should do the check.
 + *
 + * \return Nothing, except that shouldCheckNumberOfBondedInteractions
 + * is always set to false after exit.
 + */
 +static void checkNumberOfBondedInteractions(FILE *fplog, t_commrec *cr, int totalNumberOfBondedInteractions,
 +                                            gmx_mtop_t *top_global, gmx_localtop_t *top_local, t_state *state,
 +                                            bool *shouldCheckNumberOfBondedInteractions)
 +{
 +    if (*shouldCheckNumberOfBondedInteractions)
 +    {
 +        if (totalNumberOfBondedInteractions != cr->dd->nbonded_global)
 +        {
 +            dd_print_missing_interactions(fplog, cr, totalNumberOfBondedInteractions, top_global, top_local, state); // Does not return
 +        }
 +        *shouldCheckNumberOfBondedInteractions = false;
 +    }
 +}
 +
  static void reset_all_counters(FILE *fplog, t_commrec *cr,
                                 gmx_int64_t step,
                                 gmx_int64_t *step_rel, t_inputrec *ir,
      if (use_GPU(nbv))
      {
          nbnxn_gpu_reset_timings(nbv);
 +        resetGpuProfiler();
      }
  
      wallcycle_stop(wcycle, ewcRUN);
      print_date_and_time(fplog, cr->nodeid, "Restarted time", gmx_gettime());
  }
  
 -double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
 -             const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact,
 -             int nstglobalcomm,
 -             gmx_vsite_t *vsite, gmx_constr_t constr,
 -             int stepout, t_inputrec *ir,
 -             gmx_mtop_t *top_global,
 -             t_fcdata *fcd,
 -             t_state *state_global,
 -             t_mdatoms *mdatoms,
 -             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 -             gmx_edsam_t ed, t_forcerec *fr,
 -             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed,
 -             real cpt_period, real max_hours,
 -             int imdport,
 -             unsigned long Flags,
 -             gmx_walltime_accounting_t walltime_accounting)
 +/*! \libinternal
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
 +double gmx::do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
 +                  const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                  int nstglobalcomm,
 +                  gmx_vsite_t *vsite, gmx_constr_t constr,
 +                  int stepout, t_inputrec *ir,
 +                  gmx_mtop_t *top_global,
 +                  t_fcdata *fcd,
 +                  t_state *state_global,
 +                  t_mdatoms *mdatoms,
 +                  t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                  gmx_edsam_t ed, t_forcerec *fr,
 +                  int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                  gmx_membed_t *membed,
 +                  real cpt_period, real max_hours,
 +                  int imdport,
 +                  unsigned long Flags,
 +                  gmx_walltime_accounting_t walltime_accounting)
  {
      gmx_mdoutf_t    outf = NULL;
      gmx_int64_t     step, step_rel;
      double          elapsed_time;
      double          t, t0, lam0[efptNR];
      gmx_bool        bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
 -    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE,
 -                    bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep,
 -                    bBornRadii, bStartingFromCpt;
 +    gmx_bool        bNS, bNStList, bSimAnn, bStopCM, bRerunMD,
 +                    bFirstStep, startingFromCheckpoint, bInitStep, bLastStep = FALSE,
 +                    bBornRadii, bUsingEnsembleRestraints;
      gmx_bool          bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
      gmx_bool          do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE,
                        bForceUpdate = FALSE, bCPT;
      gmx_localtop_t   *top;
      t_mdebin         *mdebin   = NULL;
      t_state          *state    = NULL;
 -    rvec             *f_global = NULL;
      gmx_enerdata_t   *enerd;
      rvec             *f = NULL;
      gmx_global_stat_t gstat;
 -    gmx_update_t      upd   = NULL;
 +    gmx_update_t     *upd   = NULL;
      t_graph          *graph = NULL;
 -    gmx_signalling_t  gs;
      gmx_groups_t     *groups;
      gmx_ekindata_t   *ekind;
 -    gmx_shellfc_t     shellfc;
 -    int               count, nconverged = 0;
 -    double            tcount                 = 0;
 -    gmx_bool          bConverged             = TRUE, bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
 +    gmx_shellfc_t    *shellfc;
 +    gmx_bool          bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
      gmx_bool          bResetCountersHalfMaxH = FALSE;
 -    gmx_bool          bVV, bTemp, bPres, bTrotter;
 -    gmx_bool          bUpdateDoLR;
 +    gmx_bool          bTemp, bPres, bTrotter;
      real              dvdl_constr;
      rvec             *cbuf        = NULL;
      int               cbuf_nalloc = 0;
      int             **trotter_seq;
      char              sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
      int               handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/
 -    gmx_int64_t       multisim_nsteps        = -1;                 /* number of steps to do  before first multisim
 -                                                                          simulation stops. If equal to zero, don't
 -                                                                          communicate any more between multisims.*/
 +
 +
      /* PME load balancing data for GPU kernels */
      pme_load_balancing_t *pme_loadbal      = NULL;
      gmx_bool              bPMETune         = FALSE;
      /* Temporary addition for FAHCORE checkpointing */
      int chkpt_ret;
  #endif
 +    /* Domain decomposition could incorrectly miss a bonded
 +       interaction, but checking for that requires a global
 +       communication stage, which does not otherwise happen in DD
 +       code. So we do that alongside the first global energy reduction
 +       after a new DD is made. These variables handle whether the
 +       check happens, and the result it returns. */
 +    bool              shouldCheckNumberOfBondedInteractions = false;
 +    int               totalNumberOfBondedInteractions       = -1;
 +
 +    SimulationSignals signals;
 +    // Most global communnication stages don't propagate mdrun
 +    // signals, and will use this object to achieve that.
 +    SimulationSignaller nullSignaller(nullptr, nullptr, false, false);
  
      /* Check for special mdrun options */
      bRerunMD = (Flags & MD_RERUN);
      /* md-vv uses averaged full step velocities for T-control
         md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
         md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 -    bVV      = EI_VV(ir->eI);
 -    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
 +    bTrotter = (EI_VV(ir->eI) && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
  
      if (bRerunMD)
      {
          nstglobalcomm     = 1;
      }
  
 -    check_ir_old_tpx_versions(cr, fplog, ir, top_global);
 -
      nstglobalcomm   = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir);
      bGStatEveryStep = (nstglobalcomm == 1);
  
      }
      groups = &top_global->groups;
  
 +    if (ir->eSwapCoords != eswapNO)
 +    {
 +        /* Initialize ion swapping code */
 +        init_swapcoords(fplog, bVerbose, ir, opt2fn_master("-swap", nfile, fnm, cr),
 +                        top_global, state_global->x, state_global->box, &state_global->swapstate, cr, oenv, Flags);
 +    }
 +
      /* Initial values */
      init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda,
              &(state_global->fep_state), lam0,
      ekind->cosacc.cos_accel = ir->cos_accel;
  
      gstat = global_stat_init(ir);
 -    debug_gmx();
  
      /* Check for polarizable models and flexible constraints */
      shellfc = init_shell_flexcon(fplog,
                                   top_global, n_flexible_constraints(constr),
 -                                 (ir->bContinuation ||
 -                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 -                                 NULL : state_global->x);
 +                                 ir->nstcalcenergy, DOMAINDECOMP(cr));
 +
      if (shellfc && ir->nstcalcenergy != 1)
      {
          gmx_fatal(FARGS, "You have nstcalcenergy set to a value (%d) that is different from 1.\nThis is not supported in combinations with shell particles.\nPlease make a new tpr file.", ir->nstcalcenergy);
      {
          gmx_fatal(FARGS, "Shell particles are not implemented with domain decomposition, use a single rank");
      }
 -    if (shellfc && ir->eI == eiNM)
 -    {
 -        /* Currently shells don't work with Normal Modes */
 -        gmx_fatal(FARGS, "Normal Mode analysis is not supported with shells.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
 -    }
  
 -    if (vsite && ir->eI == eiNM)
 -    {
 -        /* Currently virtual sites don't work with Normal Modes */
 -        gmx_fatal(FARGS, "Normal Mode analysis is not supported with virtual sites.\nIf you'd like to help with adding support, we have an open discussion at http://redmine.gromacs.org/issues/879\n");
 -    }
 -
 -    if (DEFORM(*ir))
 +    if (inputrecDeform(ir))
      {
          tMPI_Thread_mutex_lock(&deform_init_box_mutex);
          set_deform_reference_box(upd,
  
          snew(state, 1);
          dd_init_local_state(cr->dd, state_global, state);
 -
 -        if (DDMASTER(cr->dd) && ir->nstfout)
 -        {
 -            snew(f_global, state_global->natoms);
 -        }
      }
      else
      {
 -        top = gmx_mtop_generate_local_top(top_global, ir);
 +        top = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
  
-         forcerec_set_excl_load(fr, top);
          state    = serial_init_local_state(state_global);
 -        f_global = f;
  
          atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);
  
          }
  
          setup_bonded_threading(fr, &top->idef);
 +
 +        update_realloc(upd, state->nalloc);
      }
  
      /* Set up interactive MD (IMD) */
          dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
                              state_global, top_global, ir,
                              state, &f, mdatoms, top, fr,
 -                            vsite, shellfc, constr,
 +                            vsite, constr,
                              nrnb, NULL, FALSE);
 -
 +        shouldCheckNumberOfBondedInteractions = true;
 +        update_realloc(upd, state->nalloc);
      }
  
      update_mdatoms(mdatoms, state->lambda[efptMASS]);
  
 -    if (opt2bSet("-cpi", nfile, fnm))
 -    {
 -        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr);
 -    }
 -    else
 -    {
 -        bStateFromCP = FALSE;
 -    }
 +    startingFromCheckpoint = Flags & MD_STARTFROMCPT;
  
      if (ir->bExpanded)
      {
 -        init_expanded_ensemble(bStateFromCP, ir, &state->dfhist);
 +        init_expanded_ensemble(startingFromCheckpoint, ir, &state->dfhist);
      }
  
      if (MASTER(cr))
      {
 -        if (bStateFromCP)
 +        if (startingFromCheckpoint)
          {
              /* Update mdebin with energy history if appending to output files */
              if (Flags & MD_APPENDFILES)
              {
 -                restore_energyhistory_from_state(mdebin, &state_global->enerhist);
 +                restore_energyhistory_from_state(mdebin, state_global->enerhist);
              }
              else
              {
                  /* We might have read an energy history from checkpoint,
                   * free the allocated memory and reset the counts.
                   */
 -                done_energyhistory(&state_global->enerhist);
 -                init_energyhistory(&state_global->enerhist);
 +                done_energyhistory(state_global->enerhist);
 +                init_energyhistory(state_global->enerhist);
              }
          }
          /* Set the initial energy history in state by updating once */
 -        update_energyhistory(&state_global->enerhist, mdebin);
 +        update_energyhistory(state_global->enerhist, mdebin);
      }
  
      /* Initialize constraints */
          }
      }
  
 -    debug_gmx();
 -
 -    if (IR_TWINRANGE(*ir) && repl_ex_nst % ir->nstcalclr != 0)
 -    {
 -        /* We should exchange at nstcalclr steps to get correct integration */
 -        gmx_fatal(FARGS, "The replica exchange period (%d) is not divisible by nstcalclr (%d)", repl_ex_nst, ir->nstcalclr);
 -    }
 -
      if (ir->efep != efepNO)
      {
          /* Set free energy calculation frequency as the greatest common
 -         * denominator of nstdhdl and repl_ex_nst.
 -         * Check for nstcalclr with twin-range, since we need the long-range
 -         * contribution to the free-energy at the correct (nstcalclr) steps.
 -         */
 +         * denominator of nstdhdl and repl_ex_nst. */
          nstfep = ir->fepvals->nstdhdl;
          if (ir->bExpanded)
          {
 -            if (IR_TWINRANGE(*ir) &&
 -                ir->expandedvals->nstexpanded % ir->nstcalclr != 0)
 -            {
 -                gmx_fatal(FARGS, "nstexpanded should be divisible by nstcalclr");
 -            }
              nstfep = gmx_greatest_common_divisor(ir->expandedvals->nstexpanded, nstfep);
          }
          if (repl_ex_nst > 0)
          {
              nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep);
          }
 -        /* We checked divisibility of repl_ex_nst and nstcalclr above */
 -        if (IR_TWINRANGE(*ir) && nstfep % ir->nstcalclr != 0)
 -        {
 -            gmx_incons("nstfep not divisible by nstcalclr");
 -        }
      }
  
      /* Be REALLY careful about what flags you set here. You CANNOT assume
       */
      bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
  
 +    if (Flags & MD_READ_EKIN)
 +    {
 +        restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
 +    }
 +
      cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
                    | (bStopCM ? CGLO_STOPCM : 0)
 -                  | (bVV ? CGLO_PRESSURE : 0)
 -                  | (bVV ? CGLO_CONSTRAINT : 0)
 -                  | (bRerunMD ? CGLO_RERUNMD : 0)
 +                  | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
 +                  | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0)
                    | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0));
  
      bSumEkinhOld = FALSE;
 -    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm,
                      NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 -                    constr, NULL, FALSE, state->box,
 -                    top_global, &bSumEkinhOld, cglo_flags);
 +                    constr, &nullSignaller, state->box,
 +                    &totalNumberOfBondedInteractions, &bSumEkinhOld, cglo_flags
 +                    | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0));
 +    checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions,
 +                                    top_global, top, state,
 +                                    &shouldCheckNumberOfBondedInteractions);
      if (ir->eI == eiVVAK)
      {
          /* a second call to get the half step temperature initialized as well */
             kinetic energy calculation.  This minimized excess variables, but
             perhaps loses some logic?*/
  
 -        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm,
                          NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 -                        constr, NULL, FALSE, state->box,
 -                        top_global, &bSumEkinhOld,
 +                        constr, &nullSignaller, state->box,
 +                        NULL, &bSumEkinhOld,
                          cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE));
      }
  
          {
              fprintf(fplog,
                      "RMS relative constraint deviation after constraining: %.2e\n",
 -                    constr_rmsd(constr, FALSE));
 +                    constr_rmsd(constr));
          }
          if (EI_STATE_VELOCITY(ir->eI))
          {
      }
  #endif
  
 -    debug_gmx();
      /***********************************************************
       *
       *             Loop over MD steps
          rerun_fr.natoms = 0;
          if (MASTER(cr))
          {
 -            bNotLastFrame = read_first_frame(oenv, &status,
 -                                             opt2fn("-rerun", nfile, fnm),
 -                                             &rerun_fr, TRX_NEED_X | TRX_READ_V);
 +            bLastStep = !read_first_frame(oenv, &status,
 +                                          opt2fn("-rerun", nfile, fnm),
 +                                          &rerun_fr, TRX_NEED_X | TRX_READ_V);
              if (rerun_fr.natoms != top_global->natoms)
              {
                  gmx_fatal(FARGS,
                  {
                      gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time);
                  }
 -                if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong))
 +                if (max_cutoff2(ir->ePBC, rerun_fr.box) < gmx::square(fr->rlist))
                  {
                      gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time);
                  }
  
          if (PAR(cr))
          {
 -            rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
 +            rerun_parallel_comm(cr, &rerun_fr, &bLastStep);
          }
  
          if (ir->ePBC != epbcNONE)
      /* loop over MD steps or if rerunMD to end of input trajectory */
      bFirstStep = TRUE;
      /* Skip the first Nose-Hoover integration when we get the state from tpx */
 -    bStateFromTPX    = !bStateFromCP;
 -    bInitStep        = bFirstStep && (bStateFromTPX || bVV);
 -    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bInitStep        = !startingFromCheckpoint || EI_VV(ir->eI);
      bSumEkinhOld     = FALSE;
      bExchanged       = FALSE;
      bNeedRepartition = FALSE;
 +    // TODO This implementation of ensemble orientation restraints is nasty because
 +    // a user can't just do multi-sim with single-sim orientation restraints.
 +    bUsingEnsembleRestraints = (fcd->disres.nsystems > 1) || (cr->ms && fcd->orires.nr);
  
 -    init_global_signals(&gs, cr, ir, repl_ex_nst);
 +    {
 +        // Replica exchange and ensemble restraints need all
 +        // simulations to remain synchronized, so they need
 +        // checkpoints and stop conditions to act on the same step, so
 +        // the propagation of such signals must take place between
 +        // simulations, not just within simulations.
 +        bool checkpointIsLocal    = (repl_ex_nst <= 0) && !bUsingEnsembleRestraints;
 +        bool stopConditionIsLocal = (repl_ex_nst <= 0) && !bUsingEnsembleRestraints;
 +        bool resetCountersIsLocal = true;
 +        signals[eglsCHKPT]         = SimulationSignal(checkpointIsLocal);
 +        signals[eglsSTOPCOND]      = SimulationSignal(stopConditionIsLocal);
 +        signals[eglsRESETCOUNTERS] = SimulationSignal(resetCountersIsLocal);
 +    }
  
      step     = ir->init_step;
      step_rel = 0;
  
 -    if (MULTISIM(cr) && (repl_ex_nst <= 0 ))
 +    // TODO extract this to new multi-simulation module
 +    if (MASTER(cr) && MULTISIM(cr) && (repl_ex_nst <= 0 ))
      {
 -        /* check how many steps are left in other sims */
 -        multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps);
 -    }
 -    if (MULTISIM(cr) && max_hours > 0)
 -    {
 -        gmx_fatal(FARGS, "The combination of mdrun -maxh and mdrun -multi is not supported. Please use the nsteps .mdp field.");
 +        if (!multisim_int_all_are_equal(cr->ms, ir->nsteps))
 +        {
 +            md_print_info(cr, fplog,
 +                          "Note: The number of steps is not consistent across multi simulations,\n"
 +                          "but we are proceeding anyway!\n");
 +        }
 +        if (!multisim_int_all_are_equal(cr->ms, ir->init_step))
 +        {
 +            md_print_info(cr, fplog,
 +                          "Note: The initial step is not consistent across multi simulations,\n"
 +                          "but we are proceeding anyway!\n");
 +        }
      }
  
      /* and stop now if we should */
 -    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 -                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 -    while (!bLastStep || (bRerunMD && bNotLastFrame))
 +    bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
 +    while (!bLastStep)
      {
  
          /* Determine if this is a neighbor search step */
              pme_loadbal_do(pme_loadbal, cr,
                             (bVerbose && MASTER(cr)) ? stderr : NULL,
                             fplog,
 -                           ir, fr, state, wcycle,
 +                           ir, fr, state,
 +                           wcycle,
                             step, step_rel,
                             &bPMETunePrinting);
          }
              t         = t0 + step*ir->delta_t;
          }
  
 +        // TODO Refactor this, so that nstfep does not need a default value of zero
          if (ir->efep != efepNO || ir->bSimTemp)
          {
              /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
              bDoDHDL      = do_per_step(step, ir->fepvals->nstdhdl);
              bDoFEP       = ((ir->efep != efepNO) && do_per_step(step, nstfep));
              bDoExpanded  = (do_per_step(step, ir->expandedvals->nstexpanded)
 -                            && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt));
 +                            && (ir->bExpanded) && (step > 0) && (!startingFromCheckpoint));
          }
  
          bDoReplEx = ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
  
          if (bSimAnn)
          {
 -            update_annealing_target_temp(&(ir->opts), t);
 +            update_annealing_target_temp(ir, t, upd);
          }
  
          if (bRerunMD)
          {
              /* for rerun MD always do Neighbour Searching */
              bNS      = (bFirstStep || ir->nstlist != 0);
 -            bNStList = bNS;
          }
          else
          {
 -            /* Determine whether or not to do Neighbour Searching and LR */
 +            /* Determine whether or not to do Neighbour Searching */
              bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
          }
  
 -        /* check whether we should stop because another simulation has
 -           stopped. */
 -        if (MULTISIM(cr))
 -        {
 -            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&
 -                 (multisim_nsteps != ir->nsteps) )
 -            {
 -                if (bNS)
 -                {
 -                    if (MASTER(cr))
 -                    {
 -                        fprintf(stderr,
 -                                "Stopping simulation %d because another one has finished\n",
 -                                cr->ms->sim);
 -                    }
 -                    bLastStep         = TRUE;
 -                    gs.sig[eglsCHKPT] = 1;
 -                }
 -            }
 -        }
 -
          /* < 0 means stop at next step, > 0 means stop at next NS step */
 -        if ( (gs.set[eglsSTOPCOND] < 0) ||
 -             ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) )
 +        if ( (signals[eglsSTOPCOND].set < 0) ||
 +             ( (signals[eglsSTOPCOND].set > 0 ) && ( bNS || ir->nstlist == 0)))
          {
              bLastStep = TRUE;
          }
           * Note that the || bLastStep can result in non-exact continuation
           * beyond the last step. But we don't consider that to be an issue.
           */
 -        do_log     = do_per_step(step, ir->nstlog) || (bFirstStep && !bStateFromCP) || bLastStep;
 +        do_log     = do_per_step(step, ir->nstlog) || (bFirstStep && !startingFromCheckpoint) || bLastStep || bRerunMD;
          do_verbose = bVerbose &&
 -            (step % stepout == 0 || bFirstStep || bLastStep);
 +            (step % stepout == 0 || bFirstStep || bLastStep || bRerunMD);
  
          if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
          {
              {
                  bMasterState = FALSE;
                  /* Correct the new box if it is too skewed */
 -                if (DYNAMIC_BOX(*ir))
 +                if (inputrecDynamicBox(ir))
                  {
                      if (correct_box(fplog, step, state->box, graph))
                      {
                                      bMasterState, nstglobalcomm,
                                      state_global, top_global, ir,
                                      state, &f, mdatoms, top, fr,
 -                                    vsite, shellfc, constr,
 +                                    vsite, constr,
                                      nrnb, wcycle,
                                      do_verbose && !bPMETunePrinting);
 +                shouldCheckNumberOfBondedInteractions = true;
 +                update_realloc(upd, state->nalloc);
              }
          }
  
          if (MASTER(cr) && do_log)
          {
 -            print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */
 +            print_ebin_header(fplog, step, t); /* can we improve the information printed here? */
          }
  
          if (ir->efep != efepNO)
              /* We need the kinetic energy at minus the half step for determining
               * the full step kinetic energy and possibly for T-coupling.*/
              /* This may not be quite working correctly yet . . . . */
 -            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm,
                              wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
 -                            constr, NULL, FALSE, state->box,
 -                            top_global, &bSumEkinhOld,
 -                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +                            constr, &nullSignaller, state->box,
 +                            &totalNumberOfBondedInteractions, &bSumEkinhOld,
 +                            CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
 +            checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions,
 +                                            top_global, top, state,
 +                                            &shouldCheckNumberOfBondedInteractions);
          }
          clear_mat(force_vir);
  
           * or at the last step (but not when we do not want confout),
           * but never at the first step or with rerun.
           */
 -        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +        bCPT = (((signals[eglsCHKPT].set && (bNS || ir->nstlist == 0)) ||
                   (bLastStep && (Flags & MD_CONFOUT))) &&
                  step > ir->init_step && !bRerunMD);
          if (bCPT)
          {
 -            gs.set[eglsCHKPT] = 0;
 +            signals[eglsCHKPT].set = 0;
          }
  
          /* Determine the energy and pressure:
          }
          bCalcEner = bCalcEnerStep;
  
 -        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
 +        do_ene = (do_per_step(step, ir->nstenergy) || bLastStep || bRerunMD);
  
          if (do_ene || do_log || bDoReplEx)
          {
          /* Do we need global communication ? */
          bGStat = (bCalcVir || bCalcEner || bStopCM ||
                    do_per_step(step, nstglobalcomm) ||
 -                  (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)));
 -
 -        /* these CGLO_ options remain the same throughout the iteration */
 -        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 -                      (bGStat ? CGLO_GSTAT : 0)
 -                      );
 +                  (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step-1, nstglobalcomm)));
  
          force_flags = (GMX_FORCE_STATECHANGED |
 -                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       ((inputrecDynamicBox(ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
                         GMX_FORCE_ALLFORCES |
 -                       GMX_FORCE_SEPLRF |
                         (bCalcVir ? GMX_FORCE_VIRIAL : 0) |
                         (bCalcEner ? GMX_FORCE_ENERGY : 0) |
                         (bDoFEP ? GMX_FORCE_DHDL : 0)
                         );
  
 -        if (fr->bTwinRange)
 -        {
 -            if (do_per_step(step, ir->nstcalclr))
 -            {
 -                force_flags |= GMX_FORCE_DO_LR;
 -            }
 -        }
 -
          if (shellfc)
          {
              /* Now is the time to relax the shells */
 -            count = relax_shell_flexcon(fplog, cr, bVerbose, step,
 -                                        ir, bNS, force_flags,
 -                                        top,
 -                                        constr, enerd, fcd,
 -                                        state, f, force_vir, mdatoms,
 -                                        nrnb, wcycle, graph, groups,
 -                                        shellfc, fr, bBornRadii, t, mu_tot,
 -                                        &bConverged, vsite,
 -                                        mdoutf_get_fp_field(outf));
 -            tcount += count;
 -
 -            if (bConverged)
 -            {
 -                nconverged++;
 -            }
 +            relax_shell_flexcon(fplog, cr, bVerbose, step,
 +                                ir, bNS, force_flags, top,
 +                                constr, enerd, fcd,
 +                                state, f, force_vir, mdatoms,
 +                                nrnb, wcycle, graph, groups,
 +                                shellfc, fr, bBornRadii, t, mu_tot,
 +                                vsite, mdoutf_get_fp_field(outf));
          }
          else
          {
                       (bNS ? GMX_FORCE_NS : 0) | force_flags);
          }
  
 -        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        if (EI_VV(ir->eI) && !startingFromCheckpoint && !bRerunMD)
          /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
          {
              rvec *vbuf = NULL;
                  trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1);
              }
  
 -            /* If we are using twin-range interactions where the long-range component
 -             * is only evaluated every nstcalclr>1 steps, we should do a special update
 -             * step to combine the long-range forces on these steps.
 -             * For nstcalclr=1 this is not done, since the forces would have been added
 -             * directly to the short-range forces already.
 -             *
 -             * TODO Remove various aspects of VV+twin-range in master
 -             * branch, because VV integrators did not ever support
 -             * twin-range multiple time stepping with constraints.
 -             */
 -            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 -
 -            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC,
 -                          f, bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
 -                          ekind, M, upd, bInitStep, etrtVELOCITY1,
 -                          cr, nrnb, constr, &top->idef);
 +            update_coords(fplog, step, ir, mdatoms, state, f, fcd,
 +                          ekind, M, upd, etrtVELOCITY1,
 +                          cr, constr);
  
              if (!bRerunMD || rerun_fr.bV || bForceUpdate)         /* Why is rerun_fr.bV here?  Unclear. */
              {
                                     cr, nrnb, wcycle, upd, constr,
                                     TRUE, bCalcVir);
                  wallcycle_start(wcycle, ewcUPDATE);
 -                if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
 -                {
 -                    /* Correct the virial for multiple time stepping */
 -                    m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
 -                }
              }
              else if (graph)
              {
               * Think about ways around this in the future?
               * For now, keep this choice in comments.
               */
 -            /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 -            /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +            /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
 +            /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
              bPres = TRUE;
              bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
              if (bCalcEner && ir->eI == eiVVAK)
              if (bGStat || do_per_step(step-1, nstglobalcomm))
              {
                  wallcycle_stop(wcycle, ewcUPDATE);
 -                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm,
                                  wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 -                                constr, NULL, FALSE, state->box,
 -                                top_global, &bSumEkinhOld,
 -                                cglo_flags
 +                                constr, &nullSignaller, state->box,
 +                                &totalNumberOfBondedInteractions, &bSumEkinhOld,
 +                                (bGStat ? CGLO_GSTAT : 0)
                                  | CGLO_ENERGY
                                  | (bTemp ? CGLO_TEMPERATURE : 0)
                                  | (bPres ? CGLO_PRESSURE : 0)
                                  | (bPres ? CGLO_CONSTRAINT : 0)
                                  | (bStopCM ? CGLO_STOPCM : 0)
 +                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
                                  | CGLO_SCALEEKIN
                                  );
                  /* explanation of above:
                     time step kinetic energy for the pressure (always true now, since we want accurate statistics).
                     b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
                     EkinAveVel because it's needed for the pressure */
 +                checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions,
 +                                                top_global, top, state,
 +                                                &shouldCheckNumberOfBondedInteractions);
                  wallcycle_start(wcycle, ewcUPDATE);
              }
              /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
                  {
                      m_add(force_vir, shake_vir, total_vir);     /* we need the un-dispersion corrected total vir here */
                      trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2);
 -                }
 -                else
 -                {
 -                    if (bExchanged)
 +
 +                    copy_mat(shake_vir, state->svir_prev);
 +                    copy_mat(force_vir, state->fvir_prev);
 +                    if (inputrecNvtTrotter(ir) && ir->eI == eiVV)
                      {
 -                        wallcycle_stop(wcycle, ewcUPDATE);
 -                        /* We need the kinetic energy at minus the half step for determining
 -                         * the full step kinetic energy and possibly for T-coupling.*/
 -                        /* This may not be quite working correctly yet . . . . */
 -                        compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 -                                        wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
 -                                        constr, NULL, FALSE, state->box,
 -                                        top_global, &bSumEkinhOld,
 -                                        CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 -                        wallcycle_start(wcycle, ewcUPDATE);
 +                        /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                        enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE);
 +                        enerd->term[F_EKIN] = trace(ekind->ekin);
                      }
                  }
 -            }
 -            if (bTrotter && !bInitStep)
 -            {
 -                copy_mat(shake_vir, state->svir_prev);
 -                copy_mat(force_vir, state->fvir_prev);
 -                if (IR_NVT_TROTTER(ir) && ir->eI == eiVV)
 +                else if (bExchanged)
                  {
 -                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 -                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE);
 -                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                    wallcycle_stop(wcycle, ewcUPDATE);
 +                    /* We need the kinetic energy at minus the half step for determining
 +                     * the full step kinetic energy and possibly for T-coupling.*/
 +                    /* This may not be quite working correctly yet . . . . */
 +                    compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm,
 +                                    wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot,
 +                                    constr, &nullSignaller, state->box,
 +                                    NULL, &bSumEkinhOld,
 +                                    CGLO_GSTAT | CGLO_TEMPERATURE);
 +                    wallcycle_start(wcycle, ewcUPDATE);
                  }
              }
              /* if it's the initial step, we performed this first step just to get the constraint virial */
          }
  
          /* compute the conserved quantity */
 -        if (bVV)
 +        if (EI_VV(ir->eI))
          {
              saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ);
              if (ir->eI == eiVV)
           */
          do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t,
                                   ir, state, state_global, top_global, fr,
 -                                 outf, mdebin, ekind, f, f_global,
 +                                 outf, mdebin, ekind, f,
                                   &nchkpt,
                                   bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT),
                                   bSumEkinhOld);
          bIMDstep = do_IMD(ir->bIMD, step, cr, bNS, state->box, state->x, ir, t, wcycle);
  
          /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
 -        if (bStartingFromCpt && bTrotter)
 +        if (startingFromCheckpoint && bTrotter)
          {
              copy_mat(state->svir_prev, shake_vir);
              copy_mat(state->fvir_prev, force_vir);
  
          /* Check whether everything is still allright */
          if (((int)gmx_get_stop_condition() > handled_stop_condition)
 -#ifdef GMX_THREAD_MPI
 +#if GMX_THREAD_MPI
              && MASTER(cr)
  #endif
              )
          {
 -            /* this is just make gs.sig compatible with the hack
 -               of sending signals around by MPI_Reduce with together with
 +            int nsteps_stop = -1;
 +
 +            /* this just makes signals[].sig compatible with the hack
 +               of sending signals around by MPI_Reduce together with
                 other floats */
              if (gmx_get_stop_condition() == gmx_stop_cond_next_ns)
              {
 -                gs.sig[eglsSTOPCOND] = 1;
 +                signals[eglsSTOPCOND].sig = 1;
 +                nsteps_stop               = std::max(ir->nstlist, 2*nstglobalcomm);
              }
              if (gmx_get_stop_condition() == gmx_stop_cond_next)
              {
 -                gs.sig[eglsSTOPCOND] = -1;
 +                signals[eglsSTOPCOND].sig = -1;
 +                nsteps_stop               = nstglobalcomm + 1;
              }
 -            /* < 0 means stop at next step, > 0 means stop at next NS step */
              if (fplog)
              {
                  fprintf(fplog,
 -                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 -                        gmx_get_signal_name(),
 -                        gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
 +                        "\n\nReceived the %s signal, stopping within %d steps\n\n",
 +                        gmx_get_signal_name(), nsteps_stop);
                  fflush(fplog);
              }
              fprintf(stderr,
 -                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 -                    gmx_get_signal_name(),
 -                    gs.sig[eglsSTOPCOND] == 1 ? "NS " : "");
 +                    "\n\nReceived the %s signal, stopping within %d steps\n\n",
 +                    gmx_get_signal_name(), nsteps_stop);
              fflush(stderr);
              handled_stop_condition = (int)gmx_get_stop_condition();
          }
          else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
                   (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) &&
 -                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +                 signals[eglsSTOPCOND].sig == 0 && signals[eglsSTOPCOND].set == 0)
          {
              /* Signal to terminate the run */
 -            gs.sig[eglsSTOPCOND] = 1;
 +            signals[eglsSTOPCOND].sig = 1;
              if (fplog)
              {
                  fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99);
              elapsed_time > max_hours*60.0*60.0*0.495)
          {
              /* Set flag that will communicate the signal to all ranks in the simulation */
 -            gs.sig[eglsRESETCOUNTERS] = 1;
 +            signals[eglsRESETCOUNTERS].sig = 1;
          }
  
          /* In parallel we only have to check for checkpointing in steps
                             cpt_period >= 0 &&
                             (cpt_period == 0 ||
                              elapsed_time >= nchkpt*cpt_period*60.0)) &&
 -            gs.set[eglsCHKPT] == 0)
 +            signals[eglsCHKPT].set == 0)
          {
 -            gs.sig[eglsCHKPT] = 1;
 +            signals[eglsCHKPT].sig = 1;
          }
  
 +        /* #########   START SECOND UPDATE STEP ################# */
 +
          /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen controlled
             in preprocessing */
  
                                     TRUE, bCalcVir);
              }
          }
 -        /* #########   START SECOND UPDATE STEP ################# */
          /* Box is changed in update() when we do pressure coupling,
           * but we should still use the old box for energy corrections and when
           * writing it to the energy file, so it matches the trajectory files for
                  update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep);
              }
  
 -            if (bVV)
 +            if (EI_VV(ir->eI))
              {
 -                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 -
                  /* velocity half-step update */
 -                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 -                              bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
 -                              ekind, M, upd, FALSE, etrtVELOCITY2,
 -                              cr, nrnb, constr, &top->idef);
 +                update_coords(fplog, step, ir, mdatoms, state, f, fcd,
 +                              ekind, M, upd, etrtVELOCITY2,
 +                              cr, constr);
              }
  
              /* Above, initialize just copies ekinh into ekin,
                  }
                  copy_rvecn(state->x, cbuf, 0, state->natoms);
              }
 -            bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
  
 -            update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 -                          bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
 -                          ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
 +            update_coords(fplog, step, ir, mdatoms, state, f, fcd,
 +                          ekind, M, upd, etrtPOSITION, cr, constr);
              wallcycle_stop(wcycle, ewcUPDATE);
  
              update_constraints(fplog, step, &dvdl_constr, ir, mdatoms, state,
                                 cr, nrnb, wcycle, upd, constr,
                                 FALSE, bCalcVir);
  
 -            if (bCalcVir && bUpdateDoLR && ir->nstcalclr > 1)
 -            {
 -                /* Correct the virial for multiple time stepping */
 -                m_sub(shake_vir, fr->vir_twin_constr, shake_vir);
 -            }
 -
              if (ir->eI == eiVVAK)
              {
                  /* erase F_EKIN and F_TEMP here? */
                  /* just compute the kinetic energy at the half step to perform a trotter step */
 -                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 +                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm,
                                  wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 -                                constr, NULL, FALSE, lastbox,
 -                                top_global, &bSumEkinhOld,
 -                                cglo_flags | CGLO_TEMPERATURE
 +                                constr, &nullSignaller, lastbox,
 +                                NULL, &bSumEkinhOld,
 +                                (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE
                                  );
                  wallcycle_start(wcycle, ewcUPDATE);
                  trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
                  /* now we know the scaling, we can compute the positions again again */
                  copy_rvecn(cbuf, state->x, 0, state->natoms);
  
 -                bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr));
 -
 -                update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f,
 -                              bUpdateDoLR, fr->f_twin, bCalcVir ? &fr->vir_twin_constr : NULL, fcd,
 -                              ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef);
 +                update_coords(fplog, step, ir, mdatoms, state, f, fcd,
 +                              ekind, M, upd, etrtPOSITION, cr, constr);
                  wallcycle_stop(wcycle, ewcUPDATE);
  
                  /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
                                     cr, nrnb, wcycle, upd, NULL,
                                     FALSE, bCalcVir);
              }
 -            if (bVV)
 +            if (EI_VV(ir->eI))
              {
                  /* this factor or 2 correction is necessary
                     because half of the constraint force is removed
           * non-communication steps, but we need to calculate
           * the kinetic energy one step before communication.
           */
 -        if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)))
 -        {
 -            compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm,
 -                            wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 -                            constr, &gs,
 -                            (step_rel % gs.nstms == 0) &&
 -                            (multisim_nsteps < 0 || (step_rel < multisim_nsteps)),
 -                            lastbox,
 -                            top_global, &bSumEkinhOld,
 -                            cglo_flags
 -                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
 -                            | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 -                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
 -                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
 -                            | CGLO_CONSTRAINT
 -                            );
 +        {
 +            // Organize to do inter-simulation signalling on steps if
 +            // and when algorithms require it.
 +            bool doInterSimSignal = (!bFirstStep && bDoReplEx) || bUsingEnsembleRestraints;
 +
 +            if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm)) || doInterSimSignal)
 +            {
 +                // Since we're already communicating at this step, we
 +                // can propagate intra-simulation signals. Note that
 +                // check_nstglobalcomm has the responsibility for
 +                // choosing the value of nstglobalcomm that is one way
 +                // bGStat becomes true, so we can't get into a
 +                // situation where e.g. checkpointing can't be
 +                // signalled.
 +                bool                doIntraSimSignal = true;
 +                SimulationSignaller signaller(&signals, cr, doInterSimSignal, doIntraSimSignal);
 +
 +                compute_globals(fplog, gstat, cr, ir, fr, ekind, state, mdatoms, nrnb, vcm,
 +                                wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot,
 +                                constr, &signaller,
 +                                lastbox,
 +                                &totalNumberOfBondedInteractions, &bSumEkinhOld,
 +                                (bGStat ? CGLO_GSTAT : 0)
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0)
 +                                | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                                | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
 +                                | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
 +                                | CGLO_CONSTRAINT
 +                                | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS : 0)
 +                                );
 +                checkNumberOfBondedInteractions(fplog, cr, totalNumberOfBondedInteractions,
 +                                                top_global, top, state,
 +                                                &shouldCheckNumberOfBondedInteractions);
 +            }
          }
  
          /* #############  END CALC EKIN AND PRESSURE ################# */
             but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
             generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
  
 -        if (ir->efep != efepNO && (!bVV || bRerunMD))
 +        if (ir->efep != efepNO && (!EI_VV(ir->eI) || bRerunMD))
          {
              /* Sum up the foreign energy and dhdl terms for md and sd.
                 Currently done every step so that dhdl is correct in the .edr */
          }
          enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
  
 -        if (bVV)
 +        if (EI_VV(ir->eI))
          {
              enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
          }
  
              print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL,
                         step, t,
 -                       eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts));
 +                       eprNORMAL, mdebin, fcd, groups, &(ir->opts));
  
              if (ir->bPull)
              {
              dd_partition_system(fplog, step, cr, TRUE, 1,
                                  state_global, top_global, ir,
                                  state, &f, mdatoms, top, fr,
 -                                vsite, shellfc, constr,
 +                                vsite, constr,
                                  nrnb, wcycle, FALSE);
 +            shouldCheckNumberOfBondedInteractions = true;
 +            update_realloc(upd, state->nalloc);
          }
  
 -        bFirstStep       = FALSE;
 -        bInitStep        = FALSE;
 -        bStartingFromCpt = FALSE;
 +        bFirstStep             = FALSE;
 +        bInitStep              = FALSE;
 +        startingFromCheckpoint = FALSE;
  
          /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
          /* With all integrators, except VV, we need to retain the pressure
              if (MASTER(cr))
              {
                  /* read next frame from input trajectory */
 -                bNotLastFrame = read_next_frame(oenv, status, &rerun_fr);
 +                bLastStep = !read_next_frame(oenv, status, &rerun_fr);
              }
  
              if (PAR(cr))
              {
 -                rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame);
 +                rerun_parallel_comm(cr, &rerun_fr, &bLastStep);
              }
          }
  
          /* If it is time to reset counters, set a flag that remains
             true until counters actually get reset */
          if (step_rel == wcycle_get_reset_counters(wcycle) ||
 -            gs.set[eglsRESETCOUNTERS] != 0)
 +            signals[eglsRESETCOUNTERS].set != 0)
          {
              if (pme_loadbal_is_active(pme_loadbal))
              {
              /* Correct max_hours for the elapsed time */
              max_hours                -= elapsed_time/(60.0*60.0);
              /* If mdrun -maxh -resethway was active, it can only trigger once */
 -            bResetCountersHalfMaxH    = FALSE; /* TODO move this to where gs.sig[eglsRESETCOUNTERS] is set */
 +            bResetCountersHalfMaxH    = FALSE; /* TODO move this to where signals[eglsRESETCOUNTERS].sig is set */
              /* Reset can only happen once, so clear the triggering flag. */
 -            gs.set[eglsRESETCOUNTERS] = 0;
 +            signals[eglsRESETCOUNTERS].set = 0;
          }
  
          /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
  
      }
      /* End of main MD loop */
 -    debug_gmx();
  
      /* Closing TNG files can include compressing data. Therefore it is good to do that
       * before stopping the time measurements. */
          if (ir->nstcalcenergy > 0 && !bRerunMD)
          {
              print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t,
 -                       eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts));
 +                       eprAVER, mdebin, fcd, groups, &(ir->opts));
          }
      }
  
      done_mdoutf(outf);
 -    debug_gmx();
  
      if (bPMETune)
      {
          pme_loadbal_done(pme_loadbal, cr, fplog, use_GPU(fr->nbv));
      }
  
 -    if (shellfc && fplog)
 -    {
 -        fprintf(fplog, "Fraction of iterations that converged:           %.2f %%\n",
 -                (nconverged*100.0)/step_rel);
 -        fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n",
 -                tcount/step_rel);
 -    }
 +    done_shellfc(fplog, shellfc, step_rel);
  
      if (repl_ex_nst > 0 && MASTER(cr))
      {
          print_replica_exchange_statistics(fplog, repl_ex);
      }
  
 +    // Clean up swapcoords
 +    if (ir->eSwapCoords != eswapNO)
 +    {
 +        finish_swapcoords(ir->swap);
 +    }
 +
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(ir->bIMD, ir->imd);