Merge release-5-0 into master
authorRoland Schulz <roland@utk.edu>
Fri, 5 Dec 2014 23:31:17 +0000 (18:31 -0500)
committerRoland Schulz <roland@utk.edu>
Sun, 7 Dec 2014 00:08:19 +0000 (19:08 -0500)
Conflicts (trivial):
src/gromacs/gmxlib/gmx_thread_affinity.c
src/gromacs/mdlib/clincs.cpp

Manual changes:
Removed config.h because not necessary anymore:
src/gromacs/mdlib/nbnxn_search_simd_4xn.h
src/gromacs/mdlib/nbnxn_search.c
Removed config.h supression:
docs/doxygen/suppressions.txt
uncrustify:
src/gromacs/mdlib/clincs.cpp

Change-Id: I00064120f12f609fabecac9b3c823d33c015e59c

1  2 
docs/doxygen/suppressions.txt
src/gromacs/domdec/domdec.cpp
src/gromacs/gmxlib/gmx_thread_affinity.c
src/gromacs/gmxpreprocess/readir.c
src/gromacs/gmxpreprocess/readpull.c
src/gromacs/mdlib/clincs.cpp
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
src/gromacs/mdlib/nbnxn_pairlist.h
src/gromacs/mdlib/nbnxn_search.c
src/gromacs/mdlib/nbnxn_search_simd_4xn.h

index d1337b46dab06f6afe85173a414a23a45efa0ee5,592505a9621365d18c49fe914f035d8ae78291a4..71d9f29d23e6ac0dedbf67ee83ac07c9ed0f61ae
@@@ -1,31 -1,21 +1,30 @@@
 -# These look like bugs in Doxygen 1.8.5
 -src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_vendor'
 -src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_x86_smt'
 -src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_simd_suggest'
 -
  # The script is currently a bit too eager
  share/template/template.cpp: error: source file documentation appears outside full documentation
 +# The parser in the script is not clever enough
 +src/gromacs/version.h: warning: includes local file as <gromacs/version.h>
 +
 +# These are OK
 +src/gromacs/math/vec.h: warning: installed header includes non-installed "config.h"
 +src/gromacs/linearalgebra/eigensolver.c: warning: should include "config.h"
 +src/gromacs/linearalgebra/gmx_arpack.c: warning: should include "config.h"
 +src/gromacs/linearalgebra/gmx_blas/*: warning: does not include "gmxpre.h" first
 +src/gromacs/linearalgebra/gmx_blas/*: warning: should include "config.h"
 +src/gromacs/linearalgebra/gmx_lapack/*: warning: does not include "gmxpre.h" first
 +src/gromacs/linearalgebra/gmx_lapack/*: warning: should include "config.h"
 +src/gromacs/utility/baseversion-gen.c: warning: does not include "gmxpre.h" first
  
  # This module name doesn't really fall into any currently used pattern; needs some thought
  : error: no matching directory for module: module_mdrun_integration_tests
  
  # These would be nice to fix, but can wait for later
 -*: warning: includes local file as <config.h>
  src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: included file "gromacs/simd/math_x86_*.h" is not documented as exposed outside its module
- src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h: warning: should include "config.h"
 +src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: includes "config.h" unnecessarily
 +src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c: warning: includes "config.h" unnecessarily
 +src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c: warning: includes "config.h" unnecessarily
 +src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c: warning: includes "config.h" unnecessarily
 +src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h"
 +src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c: warning: includes "config.h" unnecessarily
  
  # These are specific to Folding@Home, and easiest to suppress here
  *: warning: includes non-local file as "corewrap.h"
 -src/config.h.cmakein: warning: includes non-local file as "swindirect.h"
 -
 -# These are limitations in the current script
 -src/gromacs/utility/gmx_header_config.h: warning: includes non-local file as "gmx_header_config_gen.h"
 +src/gmxpre.h: warning: includes non-local file as "swindirect.h"
index ba974d348f92b7afff6aa1703fc7e63de343fcb0,bf53531323672cbabd24c2af99727812b05b5dd2..54fb8f7c35120334dcf036902d8ee1d5cd5872c4
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
  
 -#include <stdio.h>
 -#include <time.h>
 +#include "domdec.h"
 +
 +#include "config.h"
 +
 +#include <assert.h>
 +#include <limits.h>
  #include <math.h>
 -#include <string.h>
 +#include <stdio.h>
  #include <stdlib.h>
 -#include <assert.h>
 +#include <string.h>
  
 -#include "typedefs.h"
 -#include "gromacs/utility/smalloc.h"
 -#include "gmx_fatal.h"
 -#include "gmx_fatal_collective.h"
 -#include "vec.h"
 -#include "domdec.h"
 -#include "domdec_network.h"
 -#include "nrnb.h"
 -#include "pbc.h"
 -#include "chargegroup.h"
 -#include "constr.h"
 -#include "mdatoms.h"
 -#include "names.h"
 -#include "force.h"
 -#include "pme.h"
 -#include "mdrun.h"
 -#include "nsgrid.h"
 -#include "shellfc.h"
 -#include "mtop_util.h"
 -#include "gmx_ga2la.h"
 -#include "macros.h"
 -#include "nbnxn_search.h"
 -#include "bondf.h"
 -#include "gmx_omp_nthreads.h"
 -#include "gpu_utils.h"
 -
 -#include "gromacs/fileio/futil.h"
 +#include <algorithm>
 +
 +#include "gromacs/domdec/domdec_network.h"
 +#include "gromacs/ewald/pme.h"
  #include "gromacs/fileio/gmxfio.h"
  #include "gromacs/fileio/pdbio.h"
 +#include "gromacs/imd/imd.h"
 +#include "gromacs/legacyheaders/chargegroup.h"
 +#include "gromacs/legacyheaders/constr.h"
 +#include "gromacs/legacyheaders/force.h"
 +#include "gromacs/legacyheaders/genborn.h"
 +#include "gromacs/legacyheaders/gmx_ga2la.h"
 +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 +#include "gromacs/legacyheaders/gpu_utils.h"
 +#include "gromacs/legacyheaders/mdatoms.h"
 +#include "gromacs/legacyheaders/mdrun.h"
 +#include "gromacs/legacyheaders/names.h"
 +#include "gromacs/legacyheaders/network.h"
 +#include "gromacs/legacyheaders/nrnb.h"
 +#include "gromacs/legacyheaders/nsgrid.h"
 +#include "gromacs/legacyheaders/shellfc.h"
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/legacyheaders/vsite.h"
 +#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/legacyheaders/types/constr.h"
 +#include "gromacs/legacyheaders/types/enums.h"
 +#include "gromacs/legacyheaders/types/forcerec.h"
 +#include "gromacs/legacyheaders/types/hw_info.h"
 +#include "gromacs/legacyheaders/types/ifunc.h"
 +#include "gromacs/legacyheaders/types/inputrec.h"
 +#include "gromacs/legacyheaders/types/mdatom.h"
 +#include "gromacs/legacyheaders/types/nrnb.h"
 +#include "gromacs/legacyheaders/types/ns.h"
 +#include "gromacs/legacyheaders/types/nsgrid.h"
 +#include "gromacs/legacyheaders/types/shellfc.h"
 +#include "gromacs/legacyheaders/types/simple.h"
 +#include "gromacs/legacyheaders/types/state.h"
 +#include "gromacs/listed-forces/manage-threading.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/math/vectypes.h"
 +#include "gromacs/mdlib/nb_verlet.h"
 +#include "gromacs/mdlib/nbnxn_search.h"
 +#include "gromacs/pbcutil/ishift.h"
 +#include "gromacs/pbcutil/pbc.h"
 +#include "gromacs/pulling/pull.h"
 +#include "gromacs/pulling/pull_rotation.h"
 +#include "gromacs/swap/swapcoords.h"
  #include "gromacs/timing/wallcycle.h"
 +#include "gromacs/topology/block.h"
 +#include "gromacs/topology/idef.h"
 +#include "gromacs/topology/mtop_util.h"
 +#include "gromacs/topology/topology.h"
 +#include "gromacs/utility/basedefinitions.h"
 +#include "gromacs/utility/basenetwork.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxmpi.h"
 -#include "gromacs/swap/swapcoords.h"
  #include "gromacs/utility/qsort_threadsafe.h"
 -#include "gromacs/pulling/pull.h"
 -#include "gromacs/pulling/pull_rotation.h"
 -#include "gromacs/imd/imd.h"
 +#include "gromacs/utility/real.h"
 +#include "gromacs/utility/smalloc.h"
 +
 +#include "domdec_constraints.h"
 +#include "domdec_internal.h"
 +#include "domdec_vsite.h"
  
  #define DDRANK(dd, rank)    (rank)
  #define DDMASTERRANK(dd)   (dd->masterrank)
@@@ -798,7 -768,7 +798,7 @@@ void dd_move_f(gmx_domdec_t *dd, rvec f
      rvec                  *buf, *sbuf;
      ivec                   vis;
      int                    is;
 -    gmx_bool               bPBC, bScrew;
 +    gmx_bool               bShiftForcesNeedPbc, bScrew;
  
      comm = dd->comm;
  
  
      buf = comm->vbuf.v;
  
 -    n       = 0;
      nzone   = comm->zones.n/2;
      nat_tot = dd->nat_tot;
      for (d = dd->ndim-1; d >= 0; d--)
      {
 -        bPBC   = (dd->ci[dd->dim[d]] == 0);
 -        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        /* Only forces in domains near the PBC boundaries need to
 +           consider PBC in the treatment of fshift */
 +        bShiftForcesNeedPbc   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew                = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
          if (fshift == NULL && !bScrew)
          {
 -            bPBC = FALSE;
 +            bShiftForcesNeedPbc = FALSE;
          }
          /* Determine which shift vector we need */
          clear_ivec(vis);
              index = ind->index;
              /* Add the received forces */
              n = 0;
 -            if (!bPBC)
 +            if (!bShiftForcesNeedPbc)
              {
                  for (i = 0; i < ind->nsend[nzone]; i++)
                  {
              }
              else if (!bScrew)
              {
 +                /* fshift should always be defined if this function is
 +                 * called when bShiftForcesNeedPbc is true */
 +                assert(NULL != fshift);
                  for (i = 0; i < ind->nsend[nzone]; i++)
                  {
                      at0 = cgindex[index[i]];
@@@ -990,6 -956,7 +990,6 @@@ void dd_atom_sum_real(gmx_domdec_t *dd
  
      buf = &comm->vbuf.v[0][0];
  
 -    n       = 0;
      nzone   = comm->zones.n/2;
      nat_tot = dd->nat_tot;
      for (d = dd->ndim-1; d >= 0; d--)
@@@ -1095,7 -1062,7 +1095,7 @@@ static void dd_sendrecv_ddzone(const gm
  static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
                            rvec cell_ns_x0, rvec cell_ns_x1)
  {
 -    int                d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
 +    int                d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
      gmx_ddzone_t      *zp;
      gmx_ddzone_t       buf_s[DDZONECOMM_MAXZONE];
      gmx_ddzone_t       buf_r[DDZONECOMM_MAXZONE];
          if (bPBC)
          {
              /* Take the minimum to avoid double communication */
 -            npulse_min = min(npulse, dd->nc[dim]-1-npulse);
 +            npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
          }
          else
          {
              {
                  for (d1 = d; d1 < dd->ndim-1; d1++)
                  {
 -                    extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
 -                    extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
 -                    extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
 +                    extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
 +                    extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
 +                    extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
                  }
              }
          }
                  {
                      if (bUse)
                      {
 -                        buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
 -                        buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
 -                        buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
 +                        buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
 +                        buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
 +                        buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
                      }
  
                      if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
                      }
                      if (bUse && dh[d1] >= 0)
                      {
 -                        buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
 -                        buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
 +                        buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
 +                        buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
                      }
                  }
                  /* Copy the received buffer to the send buffer,
  
                  for (d1 = d; d1 < dd->ndim-1; d1++)
                  {
 -                    extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
 -                    extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
 -                    extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
 +                    extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
 +                    extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
 +                    extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
                      pos++;
                  }
  
              {
                  print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
              }
 -            cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
 -            cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
 +            cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
 +            cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
          }
      }
      if (dd->ndim >= 3)
                  {
                      print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
                  }
 -                cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
 -                cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
 +                cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
 +                cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
              }
          }
      }
@@@ -1560,6 -1527,10 +1560,6 @@@ static void dd_collect_vec_gatherv(gmx_
  void dd_collect_vec(gmx_domdec_t *dd,
                      t_state *state_local, rvec *lv, rvec *v)
  {
 -    gmx_domdec_master_t *ma;
 -    int                  n, i, c, a, nalloc = 0;
 -    rvec                *buf = NULL;
 -
      dd_collect_cg(dd, state_local);
  
      if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
@@@ -1781,7 -1752,7 +1781,7 @@@ static void dd_distribute_vec_scatterv(
  {
      gmx_domdec_master_t *ma;
      int                 *scounts = NULL, *disps = NULL;
 -    int                  n, i, c, a, nalloc = 0;
 +    int                  n, i, c, a;
      rvec                *buf = NULL;
  
      if (DDMASTER(dd))
@@@ -1975,7 -1946,7 +1975,7 @@@ static void write_dd_grid_pdb(const cha
          snew(grid_r, 2*dd->nnodes);
      }
  
 -    dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
 +    dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
  
      if (DDMASTER(dd))
      {
@@@ -2097,7 -2068,7 +2097,7 @@@ void write_dd_pdb(const char *fn, gmx_i
      gmx_fio_fclose(out);
  }
  
 -real dd_cutoff_mbody(gmx_domdec_t *dd)
 +real dd_cutoff_multibody(const gmx_domdec_t *dd)
  {
      gmx_domdec_comm_t *comm;
      int                di;
              r = comm->cellsize_min[dd->dim[0]];
              for (di = 1; di < dd->ndim; di++)
              {
 -                r = min(r, comm->cellsize_min[dd->dim[di]]);
 +                r = std::min(r, comm->cellsize_min[dd->dim[di]]);
              }
              if (comm->bBondComm)
              {
 -                r = max(r, comm->cutoff_mbody);
 +                r = std::max(r, comm->cutoff_mbody);
              }
              else
              {
 -                r = min(r, comm->cutoff);
 +                r = std::min(r, comm->cutoff);
              }
          }
      }
      return r;
  }
  
 -real dd_cutoff_twobody(gmx_domdec_t *dd)
 +real dd_cutoff_twobody(const gmx_domdec_t *dd)
  {
      real r_mb;
  
 -    r_mb = dd_cutoff_mbody(dd);
 +    r_mb = dd_cutoff_multibody(dd);
  
 -    return max(dd->comm->cutoff, r_mb);
 +    return std::max(dd->comm->cutoff, r_mb);
  }
  
  
@@@ -2202,7 -2173,7 +2202,7 @@@ static int *dd_pmenodes(t_commrec *cr
  static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
  {
      gmx_domdec_t *dd;
 -    ivec          coords, coords_pme, nc;
 +    ivec          coords;
      int           slab;
  
      dd = cr->dd;
@@@ -2271,6 -2242,7 +2271,6 @@@ static int dd_simnode2pmenode(t_commre
  {
      gmx_domdec_t      *dd;
      gmx_domdec_comm_t *comm;
 -    ivec               coord, coord_pme;
      int                i;
      int                pmenode = -1;
  
      if (comm->bCartesianPP_PME)
      {
  #ifdef GMX_MPI
 +        ivec coord, coord_pme;
          MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
          if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
          {
@@@ -2343,6 -2314,22 +2343,6 @@@ void get_pme_nnodes(const gmx_domdec_t 
      }
  }
  
 -gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
 -{
 -    gmx_bool bPMEOnlyNode;
 -
 -    if (DOMAINDECOMP(cr))
 -    {
 -        bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
 -    }
 -    else
 -    {
 -        bPMEOnlyNode = FALSE;
 -    }
 -
 -    return bPMEOnlyNode;
 -}
 -
  void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
                       int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
  {
  static gmx_bool receive_vir_ener(t_commrec *cr)
  {
      gmx_domdec_comm_t *comm;
 -    int                pmenode, coords[DIM], rank;
 +    int                pmenode;
      gmx_bool           bReceive;
  
      bReceive = TRUE;
          {
              pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
  #ifdef GMX_MPI
 +            ivec coords;
              MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
              coords[comm->cartpmedim]++;
              if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
              {
 +                int rank;
                  MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
                  if (dd_simnode2pmenode(cr, rank) == pmenode)
                  {
@@@ -2527,8 -2512,12 +2527,8 @@@ static void make_dd_indices(gmx_domdec_
  {
      int          nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
      int         *zone2cg, *zone_ncg1, *index_gl, *gatindex;
 -    gmx_ga2la_t *ga2la;
 -    char        *bLocalCG;
      gmx_bool     bCGs;
  
 -    bLocalCG = dd->comm->bLocalCG;
 -
      if (dd->nat_tot > dd->gatindex_nalloc)
      {
          dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
  static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
                            const char *where)
  {
 -    int ncg, i, ngl, nerr;
 +    int i, ngl, nerr;
  
      nerr = 0;
      if (bLocalCG == NULL)
@@@ -2753,12 -2742,12 +2753,12 @@@ static real cellsize_min_dlb(gmx_domdec
          /* The cut-off might have changed, e.g. by PME load balacning,
           * from the value used to set comm->cellsize_min, so check it.
           */
 -        cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
 +        cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
  
          if (comm->bPMELoadBalDLBLimits)
          {
              /* Check for the cut-off limit set by the PME load balancing */
 -            cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
 +            cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
          }
      }
  
@@@ -2781,10 -2770,10 +2781,10 @@@ static real grid_jump_limit(gmx_domdec_
      {
          if (comm->bPMELoadBalDLBLimits)
          {
 -            cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
 +            cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
          }
 -        grid_jump_limit = max(grid_jump_limit,
 -                              cutoff/comm->cd[dim_ind].np);
 +        grid_jump_limit = std::max(grid_jump_limit,
 +                                   cutoff/comm->cd[dim_ind].np);
      }
  
      return grid_jump_limit;
@@@ -2971,8 -2960,8 +2971,8 @@@ static void init_ddpme(gmx_domdec_t *dd
              {
                  slab = pmeindex % ddpme->nslab;
              }
 -            ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
 -            ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
 +            ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
 +            ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
          }
      }
  
@@@ -3173,7 -3162,7 +3173,7 @@@ static void set_dd_cell_sizes_slb(gmx_d
                  {
                      npulse[d]++;
                  }
 -                cellsize_min[d] = min(cellsize_min[d], cellsize);
 +                cellsize_min[d] = std::min(cellsize_min[d], cellsize);
              }
              if (setmode == setcellsizeslbLOCAL)
              {
@@@ -3443,7 -3432,7 +3443,7 @@@ static void set_dd_cell_sizes_dlb_root(
                                         gmx_bool bUniform, gmx_int64_t step)
  {
      gmx_domdec_comm_t *comm;
 -    int                ncd, d1, i, j, pos;
 +    int                ncd, d1, i, pos;
      real              *cell_size;
      real               load_aver, load_i, imbalance, change, change_max, sc;
      real               cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
              imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
              /* Determine the change of the cell size using underrelaxation */
              change     = -relax*imbalance;
 -            change_max = max(change_max, max(change, -change));
 +            change_max = std::max(change_max, std::max(change, -change));
          }
          /* Limit the amount of scaling.
           * We need to use the same rescaling for all cells in one row,
@@@ -3629,7 -3618,7 +3629,7 @@@ static void distribute_dd_cell_sizes_dl
                                           gmx_ddbox_t *ddbox)
  {
      gmx_domdec_comm_t *comm;
 -    int                d1, dim1, pos;
 +    int                d1, pos;
  
      comm = dd->comm;
  
@@@ -3917,13 -3906,13 +3917,13 @@@ static void distribute_cg(FILE *fplog, 
  {
      gmx_domdec_master_t *ma;
      int                **tmp_ind = NULL, *tmp_nalloc = NULL;
 -    int                  i, icg, j, k, k0, k1, d, npbcdim;
 +    int                  i, icg, j, k, k0, k1, d;
      matrix               tcm;
 -    rvec                 box_size, cg_cm;
 +    rvec                 cg_cm;
      ivec                 ind;
      real                 nrcg, inv_ncg, pos_d;
      atom_id             *cgindex;
 -    gmx_bool             bUnbounded, bScrew;
 +    gmx_bool             bScrew;
  
      ma = dd->ma;
  
@@@ -4131,9 -4120,9 +4131,9 @@@ static void get_cg_distribution(FILE *f
      }
  
      dd_scatterv(dd,
 -                DDMASTER(dd) ? ma->ibuf : NULL,
 -                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 -                DDMASTER(dd) ? ma->cg : NULL,
 +                bMaster ? ma->ibuf : NULL,
 +                bMaster ? ma->ibuf+dd->nnodes : NULL,
 +                bMaster ? ma->cg : NULL,
                  dd->ncg_home*sizeof(int), dd->index_gl);
  
      /* Determine the home charge group sizes */
@@@ -4468,8 -4457,8 +4468,8 @@@ static void calc_cg_move(FILE *fplog, g
                           int *move)
  {
      int      npbcdim;
 -    int      c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
 -    int      mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
 +    int      cg, k, k0, k1, d, dim, d2;
 +    int      mc, nrcg;
      int      flag;
      gmx_bool bScrew;
      ivec     dev;
@@@ -4645,15 -4634,17 +4645,15 @@@ static void dd_redistribute_cg(FILE *fp
      int               *move;
      int                npbcdim;
      int                ncg[DIM*2], nat[DIM*2];
 -    int                c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
 -    int                mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
 +    int                c, i, cg, k, d, dim, dim2, dir, d2, d3;
 +    int                mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
      int                sbuf[2], rbuf[2];
      int                home_pos_cg, home_pos_at, buf_pos;
      int                flag;
      gmx_bool           bV = FALSE, bSDX = FALSE, bCGP = FALSE;
 -    gmx_bool           bScrew;
 -    ivec               dev;
 -    real               inv_ncg, pos_d;
 +    real               pos_d;
      matrix             tcm;
 -    rvec              *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
 +    rvec              *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1;
      atom_id           *cgindex;
      cginfo_mb_t       *cginfo_mb;
      gmx_domdec_comm_t *comm;
      {
          dim      = dd->dim[d];
          ncg_recv = 0;
 -        nat_recv = 0;
          nvr      = 0;
          for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
          {
                               comm->cgcm_state[cdd], nvs,
                               comm->vbuf.v+nvr, i);
              ncg_recv += rbuf[0];
 -            nat_recv += rbuf[1];
              nvr      += i;
          }
  
@@@ -5247,7 -5240,7 +5247,7 @@@ static void get_load_distribution(gmx_d
      gmx_domdec_comm_t *comm;
      gmx_domdec_load_t *load;
      gmx_domdec_root_t *root = NULL;
 -    int                d, dim, cid, i, pos;
 +    int                d, dim, i, pos;
      float              cell_frac = 0, sbuf[DD_NLOAD_MAX];
      gmx_bool           bSepPME;
  
                  for (i = 0; i < dd->nc[dim]; i++)
                  {
                      load->sum += load->load[pos++];
 -                    load->max  = max(load->max, load->load[pos]);
 +                    load->max  = std::max(load->max, load->load[pos]);
                      pos++;
                      if (dd->bGridJump)
                      {
                              /* This direction could not be load balanced properly,
                               * therefore we need to use the maximum iso the average load.
                               */
 -                            load->sum_m = max(load->sum_m, load->load[pos]);
 +                            load->sum_m = std::max(load->sum_m, load->load[pos]);
                          }
                          else
                          {
                              load->sum_m += load->load[pos];
                          }
                          pos++;
 -                        load->cvol_min = min(load->cvol_min, load->load[pos]);
 +                        load->cvol_min = std::min(load->cvol_min, load->load[pos]);
                          pos++;
                          if (d < dd->ndim-1)
                          {
                      }
                      if (bSepPME)
                      {
 -                        load->mdf = max(load->mdf, load->load[pos]);
 +                        load->mdf = std::max(load->mdf, load->load[pos]);
                          pos++;
 -                        load->pme = max(load->pme, load->load[pos]);
 +                        load->pme = std::max(load->pme, load->load[pos]);
                          pos++;
                      }
                  }
@@@ -5769,6 -5762,7 +5769,6 @@@ static void make_load_communicators(gmx
  
  void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
  {
 -    gmx_bool                bZYX;
      int                     d, dim, i, j, m;
      ivec                    tmp, s;
      int                     nzone, nzonep;
  static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
  {
      gmx_domdec_t      *dd;
 +    dd   = cr->dd;
 +
 +#ifdef GMX_MPI
      gmx_domdec_comm_t *comm;
 -    int                i, rank, *buf;
 +    int                rank, *buf;
      ivec               periods;
 -#ifdef GMX_MPI
      MPI_Comm           comm_cart;
 -#endif
  
 -    dd   = cr->dd;
      comm = dd->comm;
  
 -#ifdef GMX_MPI
      if (comm->bCartesianPP)
      {
          /* Set up cartesian communication for the particle-particle part */
                      dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
          }
  
 -        for (i = 0; i < DIM; i++)
 +        for (int i = 0; i < DIM; i++)
          {
              periods[i] = TRUE;
          }
          /* Determine the master coordinates and rank.
           * The DD master should be the same node as the master of this sim.
           */
 -        for (i = 0; i < dd->nnodes; i++)
 +        for (int i = 0; i < dd->nnodes; i++)
          {
              if (comm->ddindex2simnodeid[i] == 0)
              {
      }
  }
  
 -static void receive_ddindex2simnodeid(t_commrec *cr)
 +static void receive_ddindex2simnodeid(t_commrec gmx_unused *cr)
  {
 +#ifdef GMX_MPI
      gmx_domdec_t      *dd;
 -
      gmx_domdec_comm_t *comm;
 -    int               *buf;
  
      dd   = cr->dd;
      comm = dd->comm;
  
 -#ifdef GMX_MPI
      if (!comm->bCartesianPP_PME && comm->bCartesianPP)
      {
 +        int *buf;
          snew(comm->ddindex2simnodeid, dd->nnodes);
          snew(buf, dd->nnodes);
          if (cr->duty & DUTY_PP)
          {
              buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
          }
 -#ifdef GMX_MPI
          /* Communicate the ddindex to simulation nodeid index */
          MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
                        cr->mpi_comm_mysim);
 -#endif
          sfree(buf);
      }
  #endif
@@@ -6110,8 -6108,9 +6110,8 @@@ static void split_communicator(FILE *fp
  {
      gmx_domdec_t      *dd;
      gmx_domdec_comm_t *comm;
 -    int                i, rank;
 +    int                i;
      gmx_bool           bDiv[DIM];
 -    ivec               periods;
  #ifdef GMX_MPI
      MPI_Comm           comm_cart;
  #endif
  #ifdef GMX_MPI
      if (comm->bCartesianPP_PME)
      {
 +        int  rank;
 +        ivec periods;
 +
          if (fplog)
          {
              fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
          }
          MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
                          &comm_cart);
 -
          MPI_Comm_rank(comm_cart, &rank);
          if (MASTERNODE(cr) && rank != 0)
          {
@@@ -6356,7 -6353,7 +6356,7 @@@ static real *get_slb_frac(FILE *fplog, 
          for (i = 0; i < nc; i++)
          {
              dbl = 0;
 -            sscanf(size_string, "%lf%n", &dbl, &n);
 +            sscanf(size_string, "%20lf%n", &dbl, &n);
              if (dbl == 0)
              {
                  gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
@@@ -6419,7 -6416,7 +6419,7 @@@ static int dd_getenv(FILE *fplog, cons
      val = getenv(env_var);
      if (val)
      {
 -        if (sscanf(val, "%d", &nst) <= 0)
 +        if (sscanf(val, "%20d", &nst) <= 0)
          {
              nst = 1;
          }
@@@ -6480,7 -6477,7 +6480,7 @@@ static real average_cellsize_min(gmx_do
      {
          d = dd->dim[di];
          /* Check using the initial average cell size */
 -        r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +        r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
      }
  
      return r;
@@@ -6490,6 -6487,7 +6490,6 @@@ static int check_dlb_support(FILE *fplo
                               const char *dlb_opt, gmx_bool bRecordLoad,
                               unsigned long Flags, t_inputrec *ir)
  {
 -    gmx_domdec_t *dd;
      int           eDLB = -1;
      char          buf[STRLEN];
  
@@@ -6631,10 -6629,10 +6631,10 @@@ gmx_domdec_t *init_domain_decomposition
      gmx_domdec_t      *dd;
      gmx_domdec_comm_t *comm;
      int                recload;
 -    int                d, i, j;
      real               r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
      gmx_bool           bC;
      char               buf[STRLEN];
 +    const real         tenPercentMargin = 1.1;
  
      if (fplog)
      {
       * within nstlist steps. Since boundaries are allowed to displace by half
       * a cell size, DD cells should be at least the size of the list buffer.
       */
 -    comm->cellsize_limit = max(comm->cellsize_limit,
 -                               ir->rlistlong - max(ir->rvdw, ir->rcoulomb));
 +    comm->cellsize_limit = std::max(comm->cellsize_limit,
 +                                    ir->rlistlong - std::max(ir->rvdw, ir->rcoulomb));
  
      if (comm->bInterCGBondeds)
      {
              }
              else
              {
 -                comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
 +                comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
              }
              r_bonded_limit = comm->cutoff_mbody;
          }
               */
              if (Flags & MD_DDBONDCOMM)
              {
 -                if (max(r_2b, r_mb) > comm->cutoff)
 +                if (std::max(r_2b, r_mb) > comm->cutoff)
                  {
 -                    r_bonded        = max(r_2b, r_mb);
 -                    r_bonded_limit  = 1.1*r_bonded;
 +                    r_bonded        = std::max(r_2b, r_mb);
 +                    r_bonded_limit  = tenPercentMargin*r_bonded;
                      comm->bBondComm = TRUE;
                  }
                  else
                  {
                      r_bonded       = r_mb;
 -                    r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
 +                    r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
                  }
                  /* We determine cutoff_mbody later */
              }
                  /* No special bonded communication,
                   * simply increase the DD cut-off.
                   */
 -                r_bonded_limit     = 1.1*max(r_2b, r_mb);
 +                r_bonded_limit     = tenPercentMargin*std::max(r_2b, r_mb);
                  comm->cutoff_mbody = r_bonded_limit;
 -                comm->cutoff       = max(comm->cutoff, comm->cutoff_mbody);
 +                comm->cutoff       = std::max(comm->cutoff, comm->cutoff_mbody);
              }
          }
          if (fplog)
                      "Minimum cell size due to bonded interactions: %.3f nm\n",
                      r_bonded_limit);
          }
 -        comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
 +        comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
      }
  
      if (dd->bInterCGcons && rconstr <= 0)
                  "User supplied maximum distance required for P-LINCS: %.3f nm\n",
                  rconstr);
      }
 -    comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
 +    comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
  
      comm->cgs_gl = gmx_mtop_global_cgs(mtop);
  
              if (comm->eDLB != edlbNO)
              {
                  /* Check if this does not limit the scaling */
 -                comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
 +                comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
              }
              if (!comm->bBondComm)
              {
                  /* Without bBondComm do not go beyond the n.b. cut-off */
 -                comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
 +                comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
                  if (comm->cellsize_limit >= comm->cutoff)
                  {
                      /* We don't loose a lot of efficieny
                  }
              }
              /* Check if we did not end up below our original limit */
 -            comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
 +            comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
  
              if (comm->cutoff_mbody > comm->cellsize_limit)
              {
@@@ -7093,7 -7091,7 +7093,7 @@@ static void turn_on_dlb(FILE *fplog, t_
      cellsize_min = comm->cellsize_min[dd->dim[0]];
      for (d = 1; d < dd->ndim; d++)
      {
 -        cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
 +        cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
      }
  
      if (cellsize_min < comm->cellsize_limit*1.05)
@@@ -7158,6 -7156,8 +7158,6 @@@ void dd_init_bondeds(FILE *fplog
                       t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
  {
      gmx_domdec_comm_t *comm;
 -    gmx_bool           bBondComm;
 -    int                d;
  
      dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
  
@@@ -7268,7 -7268,7 +7268,7 @@@ static void print_dd_settings(FILE *fpl
              limit = dd->comm->cellsize_min[XX];
              for (d = 1; d < DIM; d++)
              {
 -                limit = min(limit, dd->comm->cellsize_min[d]);
 +                limit = std::min(limit, dd->comm->cellsize_min[d]);
              }
          }
  
          {
              fprintf(fplog, "%40s  %-7s %6.3f nm\n",
                      "two-body bonded interactions", "(-rdd)",
 -                    max(comm->cutoff, comm->cutoff_mbody));
 +                    std::max(comm->cutoff, comm->cutoff_mbody));
              fprintf(fplog, "%40s  %-7s %6.3f nm\n",
                      "multi-body bonded interactions", "(-rdd)",
 -                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
 +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
          }
          if (dd->vsite_comm)
          {
@@@ -7314,7 -7314,7 +7314,7 @@@ static void set_cell_limits_dlb(gmx_dom
  
      /* Determine the maximum number of comm. pulses in one dimension */
  
 -    comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
 +    comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
  
      /* Determine the maximum required number of grid pulses */
      if (comm->cellsize_limit >= comm->cutoff)
      else
      {
          /* There is no cell size limit */
 -        npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
 +        npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
      }
  
      if (!bNoCutOff && npulse > 1)
              dim      = dd->dim[d];
              npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
                               /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
 -            npulse_d_max = max(npulse_d_max, npulse_d);
 +            npulse_d_max = std::max(npulse_d_max, npulse_d);
          }
 -        npulse = min(npulse, npulse_d_max);
 +        npulse = std::min(npulse, npulse_d_max);
      }
  
      /* This env var can override npulse */
      comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
      for (d = 0; d < dd->ndim; d++)
      {
 -        comm->cd[d].np_dlb    = min(npulse, dd->nc[dd->dim[d]]-1);
 +        comm->cd[d].np_dlb    = std::min(npulse, dd->nc[dd->dim[d]]-1);
          comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
          snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
 -        comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
 +        comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
          if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
          {
              comm->bVacDLBNoLimit = FALSE;
      /* cellsize_limit is set for LINCS in init_domain_decomposition */
      if (!comm->bVacDLBNoLimit)
      {
 -        comm->cellsize_limit = max(comm->cellsize_limit,
 -                                   comm->cutoff/comm->maxpulse);
 +        comm->cellsize_limit = std::max(comm->cellsize_limit,
 +                                        comm->cutoff/comm->maxpulse);
      }
 -    comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
 +    comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
      /* Set the minimum cell size for each DD dimension */
      for (d = 0; d < dd->ndim; d++)
      {
      }
      if (comm->cutoff_mbody <= 0)
      {
 -        comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
 +        comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
      }
      if (comm->bDynLoadBal)
      {
@@@ -7487,7 -7487,7 +7487,7 @@@ void set_dd_parameters(FILE *fplog, gmx
      }
      natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
  
 -    dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
 +    dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
  }
  
  static gmx_bool test_dd_cutoff(t_commrec *cr,
@@@ -7765,11 -7765,11 +7765,11 @@@ set_dd_corners(const gmx_domdec_t *dd
          c->c[1][1] = comm->cell_x0[dim1];
          if (dd->bGridJump)
          {
 -            c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
 +            c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
              if (bDistMB)
              {
                  /* For the multi-body distance we need the maximum */
 -                c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
 +                c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
              }
          }
          /* Set the upper-right corner for rounding */
                          if (j >= 4)
                          {
                              c->c[2][j-4] =
 -                                max(c->c[2][j-4],
 -                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
 +                                std::max(c->c[2][j-4],
 +                                         comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
                          }
                      }
                  }
                      {
                          for (j = 0; j < 2; j++)
                          {
 -                            c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
 +                            c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
                          }
                      }
                  }
              c->cr1[3] = comm->cell_x1[dim1];
              if (dd->bGridJump)
              {
 -                c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
 +                c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
                  if (bDistMB)
                  {
                      /* For the multi-body distance we need the maximum */
 -                    c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
 +                    c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
                  }
              }
          }
@@@ -8103,7 -8103,7 +8103,7 @@@ static void setup_dd_communication(gmx_
  {
      int                    dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
      int                    nzone, nzone_send, zone, zonei, cg0, cg1;
 -    int                    c, i, j, cg, cg_gl, nrcg;
 +    int                    c, i, cg, cg_gl, nrcg;
      int                   *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
      gmx_domdec_comm_t     *comm;
      gmx_domdec_zones_t    *zones;
      gmx_domdec_ind_t      *ind;
      cginfo_mb_t           *cginfo_mb;
      gmx_bool               bBondComm, bDist2B, bDistMB, bDistBonded;
 -    real                   r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
 +    real                   r_comm2, r_bcomm2;
      dd_corners_t           corners;
      ivec                   tric_dist;
      rvec                  *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
  
      for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
      {
 -        dim = dd->dim[dim_ind];
 -
          /* Check if we need to use triclinic distances */
          tric_dist[dim_ind] = 0;
          for (i = 0; i <= dim_ind; i++)
                      /* The rvec buffer is also required for atom buffers
                       * of size nrecv in dd_move_x and dd_move_f.
                       */
 -                    i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
 +                    i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
                      vec_rvec_check_alloc(&comm->vbuf2, i);
                  }
              }
@@@ -8578,9 -8580,10 +8578,9 @@@ static void set_zones_size(gmx_domdec_
      gmx_domdec_comm_t  *comm;
      gmx_domdec_zones_t *zones;
      gmx_bool            bDistMB;
 -    int                 z, zi, zj0, zj1, d, dim;
 +    int                 z, zi, d, dim;
      real                rcs, rcmbs;
      int                 i, j;
 -    real                size_j, add_tric;
      real                vol;
  
      comm = dd->comm;
                                   * With multiple pulses this will lead
                                   * to a larger zone then strictly necessary.
                                   */
 -                                zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
 -                                                             zones->size[zi].x1[dim]+rcmbs);
 +                                zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
 +                                                                  zones->size[zi].x1[dim]+rcmbs);
                              }
                          }
                      }
                  {
                      if (zones->shift[z][dim] > 0)
                      {
 -                        zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
 -                                                     zones->size[zi].x1[dim]+rcs);
 +                        zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
 +                                                          zones->size[zi].x1[dim]+rcs);
                      }
                  }
              }
              {
                  corner[ZZ] = zones->size[z].x1[ZZ];
              }
-             if (dd->ndim == 1 && box[ZZ][YY] != 0)
-             {
-                 /* With 1D domain decomposition the cg's are not in
-                  * the triclinic box, but triclinic x-y and rectangular y-z.
-                  * Shift y back, so it will later end up at 0.
-                  */
-                 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
-             }
              /* Apply the triclinic couplings */
              assert(ddbox->npbcdim <= DIM);
              for (i = YY; i < ddbox->npbcdim; i++)
              {
                  for (j = XX; j < i; j++)
                  {
-                     corner[j] += corner[i]*box[i][j]/box[i][i];
+                     /* With 1D domain decomposition the cg's are not in
+                      * a triclinic box, but triclinic x-y and rectangular y/x-z.
+                      * So we should ignore the coupling for the non
+                      * domain-decomposed dimension of the pair x and y.
+                      */
+                     if (!(dd->ndim == 1 && ((dd->dim[0] == XX && j == YY) ||
+                                             (dd->dim[0] == YY && j == XX))))
+                     {
+                         corner[j] += corner[i]*box[i][j]/box[i][i];
+                     }
                  }
              }
              if (c == 0)
              {
                  for (i = 0; i < DIM; i++)
                  {
 -                    corner_min[i] = min(corner_min[i], corner[i]);
 -                    corner_max[i] = max(corner_max[i], corner[i]);
 +                    corner_min[i] = std::min(corner_min[i], corner[i]);
 +                    corner_max[i] = std::max(corner_max[i], corner[i]);
                  }
              }
          }
@@@ -8934,7 -8938,8 +8935,7 @@@ static int dd_sort_order(gmx_domdec_t *
  {
      gmx_domdec_sort_t *sort;
      gmx_cgsort_t      *cgsort, *sort_i;
 -    int                ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
 -    int                sort_last, sort_skip;
 +    int                ncg_new, nsort2, nsort_new, i, *a, moved;
  
      sort = dd->comm->sort;
  
@@@ -9045,7 -9050,7 +9046,7 @@@ static void dd_sort_state(gmx_domdec_t 
                            int ncg_home_old)
  {
      gmx_domdec_sort_t *sort;
 -    gmx_cgsort_t      *cgsort, *sort_i;
 +    gmx_cgsort_t      *cgsort;
      int               *cgindex;
      int                ncg_new, i, *ibuf, cgsize;
      rvec              *vbuf;
@@@ -9302,7 -9307,7 +9303,7 @@@ void dd_partition_system(FIL
      t_block           *cgs_gl;
      gmx_int64_t        step_pcoupl;
      rvec               cell_ns_x0, cell_ns_x1;
 -    int                i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
 +    int                i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
      gmx_bool           bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
      gmx_bool           bRedist, bSortCG, bResortAll;
      ivec               ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
               * so the extra communication cost is negligible.
               */
              const int nddp_chk_dlb = 100;
 -
              bCheckDLB = (comm->n_load_collect == 0 ||
                           comm->n_load_have % nddp_chk_dlb == nddp_chk_dlb - 1);
          }
index 75e277999be1fcef991d5d14a30d2456a8449214,78695c64d21f81a4427ad126fb8c71dfb9e1ac33..e4cc7386e1324c5e825e251ec8f25460f38cb1f8
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "gromacs/legacyheaders/gmx_thread_affinity.h"
 +
 +#include "config.h"
 +
 +#include <assert.h>
 +#include <errno.h>
 +#include <stdio.h>
 +#include <string.h>
 +
  #ifdef HAVE_SCHED_AFFINITY
 -#  ifndef _GNU_SOURCE
 -#    define _GNU_SOURCE 1
 -#  endif
  #  include <sched.h>
  #  include <sys/syscall.h>
  #endif
 -#include <string.h>
 -#include <errno.h>
 -#include <assert.h>
 -#include <stdio.h>
  
  #include "thread_mpi/threads.h"
  
 -#include "typedefs.h"
 -#include "types/commrec.h"
 -#include "types/hw_info.h"
 -#include "copyrite.h"
 -#include "gmx_cpuid.h"
 -#include "gmx_omp_nthreads.h"
 -#include "md_logging.h"
 -#include "gmx_thread_affinity.h"
 -
 -#include "gmx_fatal.h"
 +#include "gromacs/legacyheaders/copyrite.h"
 +#include "gromacs/legacyheaders/gmx_cpuid.h"
 +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 +#include "gromacs/legacyheaders/md_logging.h"
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/legacyheaders/types/hw_info.h"
 +#include "gromacs/utility/basenetwork.h"
 +#include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxomp.h"
 +#include "gromacs/utility/smalloc.h"
  
  static int
  get_thread_affinity_layout(FILE *fplog,
@@@ -373,53 -371,21 +373,56 @@@ gmx_set_thread_affinity(FIL
   * Note that this will only work on Linux as we use a GNU feature.
   */
  void
 -gmx_check_thread_affinity_set(FILE            gmx_unused *fplog,
 -                              const t_commrec gmx_unused *cr,
 -                              gmx_hw_opt_t    gmx_unused *hw_opt,
 -                              int             gmx_unused  nthreads_hw_avail,
 -                              gmx_bool        gmx_unused  bAfterOpenmpInit)
 +gmx_check_thread_affinity_set(FILE            *fplog,
 +                              const t_commrec *cr,
 +                              gmx_hw_opt_t    *hw_opt,
 +                              int  gmx_unused  nthreads_hw_avail,
 +                              gmx_bool         bAfterOpenmpInit)
  {
  #ifdef HAVE_SCHED_AFFINITY
      cpu_set_t mask_current;
      int       i, ret, cpu_count, cpu_set;
      gmx_bool  bAllSet;
 +#endif
+ #ifdef GMX_LIB_MPI
+     gmx_bool  bAllSet_All;
+ #endif
  
      assert(hw_opt);
 +    if (!bAfterOpenmpInit)
 +    {
 +        /* Check for externally set OpenMP affinity and turn off internal
 +         * pinning if any is found. We need to do this check early to tell
 +         * thread-MPI whether it should do pinning when spawning threads.
 +         * TODO: the above no longer holds, we should move these checks later
 +         */
 +        if (hw_opt->thread_affinity != threadaffOFF)
 +        {
 +            char *message;
 +            if (!gmx_omp_check_thread_affinity(&message))
 +            {
 +                /* TODO: with -pin auto we should only warn when using all cores */
 +                md_print_warn(cr, fplog, "%s", message);
 +                sfree(message);
 +                hw_opt->thread_affinity = threadaffOFF;
 +            }
 +        }
 +
 +        /* With thread-MPI this is needed as pinning might get turned off,
 +         * which needs to be known before starting thread-MPI.
 +         * With thread-MPI hw_opt is processed here on the master rank
 +         * and passed to the other ranks later, so we only do this on master.
 +         */
 +        if (!SIMMASTER(cr))
 +        {
 +            return;
 +        }
 +#ifndef GMX_THREAD_MPI
 +        return;
 +#endif
 +    }
 +
 +#ifdef HAVE_SCHED_GETAFFINITY
      if (hw_opt->thread_affinity == threadaffOFF)
      {
          /* internal affinity setting is off, don't bother checking process affinity */
          bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
      }
  
+ #ifdef GMX_LIB_MPI
+     MPI_Allreduce(&bAllSet, &bAllSet_All, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
+     bAllSet = bAllSet_All;
+ #endif
      if (!bAllSet)
      {
          if (hw_opt->thread_affinity == threadaffAUTO)
index 8f55b6c6609358dcc0d19c872e9da8e9fdb006d3,3319882e7fd9a83f913fe853fd79047e49687f8e..9a9cc3ed3c23c06b56ea454806cf1dad7218aab2
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "readir.h"
  
  #include <ctype.h>
 -#include <stdlib.h>
  #include <limits.h>
 -#include "sysstuff.h"
 -#include "gromacs/utility/smalloc.h"
 -#include "typedefs.h"
 -#include "physics.h"
 -#include "names.h"
 -#include "gmx_fatal.h"
 -#include "macros.h"
 -#include "index.h"
 -#include "symtab.h"
 +#include <stdlib.h>
 +
 +#include "gromacs/gmxpreprocess/calc_verletbuf.h"
 +#include "gromacs/gmxpreprocess/toputil.h"
 +#include "gromacs/legacyheaders/chargegroup.h"
 +#include "gromacs/legacyheaders/inputrec.h"
 +#include "gromacs/legacyheaders/macros.h"
 +#include "gromacs/legacyheaders/names.h"
 +#include "gromacs/legacyheaders/network.h"
 +#include "gromacs/legacyheaders/readinp.h"
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/legacyheaders/warninp.h"
 +#include "gromacs/math/units.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/pbcutil/pbc.h"
 +#include "gromacs/topology/block.h"
 +#include "gromacs/topology/index.h"
 +#include "gromacs/topology/mtop_util.h"
 +#include "gromacs/topology/symtab.h"
  #include "gromacs/utility/cstringutil.h"
 -#include "readinp.h"
 -#include "warninp.h"
 -#include "readir.h"
 -#include "toputil.h"
 -#include "index.h"
 -#include "network.h"
 -#include "vec.h"
 -#include "pbc.h"
 -#include "mtop_util.h"
 -#include "chargegroup.h"
 -#include "inputrec.h"
 -#include "calc_verletbuf.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/smalloc.h"
  
  #define MAXPTR 254
  #define NOGID  255
@@@ -455,7 -456,7 +455,7 @@@ void check_ir(const char *mdparin, t_in
          warning_error(wi, "nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
      }
  
-     if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr > 1)
+     if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rlist && ir->nstcalclr > 1)
      {
          warning_error(wi, "When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
      }
          sprintf(err_buf, "wall-ewald-zfac should be >= 2");
          CHECK(ir->wall_ewald_zfac < 2);
      }
+     if ((ir->ewald_geometry == eewg3DC) && (ir->ePBC != epbcXY) &&
+         EEL_FULL(ir->coulombtype))
+     {
+         sprintf(warn_buf, "With %s and ewald_geometry = %s you should use pbc = %s",
+                 eel_names[ir->coulombtype], eewg_names[eewg3DC], epbc_names[epbcXY]);
+         warning(wi, warn_buf);
+     }
+     if ((ir->epsilon_surface != 0) && EEL_FULL(ir->coulombtype))
+     {
+         if (ir->cutoff_scheme == ecutsVERLET)
+         {
+             sprintf(warn_buf, "Since molecules/charge groups are broken using the Verlet scheme, you can not use a dipole correction to the %s electrostatics.",
+                     eel_names[ir->coulombtype]);
+             warning(wi, warn_buf);
+         }
+         else
+         {
+             sprintf(warn_buf, "Dipole corrections to %s electrostatics only work if all charge groups that can cross PBC boundaries are dipoles. If this is not the case set epsilon_surface to 0",
+                     eel_names[ir->coulombtype]);
+             warning_note(wi, warn_buf);
+         }
+     }
  
      if (ir_vdw_switched(ir))
      {
@@@ -1425,7 -1448,7 +1447,7 @@@ int str_nelem(const char *str, int maxp
      int   np = 0;
      char *copy0, *copy;
  
 -    copy0 = strdup(str);
 +    copy0 = gmx_strdup(str);
      copy  = copy0;
      ltrim(copy);
      while (*copy != '\0')
@@@ -1680,7 -1703,7 +1702,7 @@@ static void do_wall_params(t_inputrec *
          }
          for (i = 0; i < ir->nwall; i++)
          {
 -            opts->wall_atomtype[i] = strdup(names[i]);
 +            opts->wall_atomtype[i] = gmx_strdup(names[i]);
          }
  
          if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
@@@ -2343,7 -2366,7 +2365,7 @@@ void get_ir(const char *mdparin, const 
      {
          if (ir->efep != efepNO)
          {
 -            opts->couple_moltype = strdup(is->couple_moltype);
 +            opts->couple_moltype = gmx_strdup(is->couple_moltype);
              if (opts->couple_lam0 == opts->couple_lam1)
              {
                  warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
          {
              do_simtemp_params(ir);
          }
+         /* Because sc-coul (=FALSE by default) only acts on the lambda state
+          * setup and not on the old way of specifying the free-energy setup,
+          * we should check for using soft-core when not needed, since that
+          * can complicate the sampling significantly.
+          * Note that we only check for the automated coupling setup.
+          * If the (advanced) user does FEP through manual topology changes,
+          * this check will not be triggered.
+          */
+         if (ir->efep != efepNO && ir->fepvals->n_lambda == 0 &&
+             ir->fepvals->sc_alpha != 0 &&
+             ((opts->couple_lam0 == ecouplamVDW  && opts->couple_lam0 == ecouplamVDWQ) ||
+              (opts->couple_lam1 == ecouplamVDWQ && opts->couple_lam1 == ecouplamVDW)))
+         {
+             warning(wi, "You are using soft-core interactions while the Van der Waals interactions are not decoupled (note that the sc-coul option is only active when using lambda states). Although this will not lead to errors, you will need much more sampling than without soft-core interactions. Consider using sc-alpha=0.");
+         }
      }
      else
      {
@@@ -2935,7 -2974,7 +2973,7 @@@ static void decode_cos(char *s, t_cosin
      double  a, phi;
      int     i;
  
 -    t = strdup(s);
 +    t = gmx_strdup(s);
      trim(t);
  
      cosine->n   = 0;
@@@ -3116,7 -3155,7 +3154,7 @@@ static void make_swap_groups
  
  void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames)
  {
 -    int      ig = -1, i;
 +    int      ig, i;
  
  
      ig            = search_string(IMDgname, grps->nr, gnames);
@@@ -3823,15 -3862,7 +3861,15 @@@ static gmx_bool absolute_reference(t_in
                          case efbposresSPHERE:
                              AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
                              break;
 +                        case efbposresCYLINDERX:
 +                            AbsRef[YY] = AbsRef[ZZ] = 1;
 +                            break;
 +                        case efbposresCYLINDERY:
 +                            AbsRef[XX] = AbsRef[ZZ] = 1;
 +                            break;
                          case efbposresCYLINDER:
 +                        /* efbposres is a synonym for efbposresCYLINDERZ for backwards compatibility */
 +                        case efbposresCYLINDERZ:
                              AbsRef[XX] = AbsRef[YY] = 1;
                              break;
                          case efbposresX: /* d=XX */
index 2e78022c5b1d32b148f31348616f69ab83af7e1a,4ae7480f0f32d3bb46c33ee5747e14bcbc81b1a1..e181244bf516fb04d18096bced06e5bf4dfdb890
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
  
 -#include <string.h>
  #include <stdlib.h>
 +#include <string.h>
  
 +#include "gromacs/gmxpreprocess/readir.h"
 +#include "gromacs/legacyheaders/macros.h"
 +#include "gromacs/legacyheaders/mdatoms.h"
 +#include "gromacs/legacyheaders/names.h"
 +#include "gromacs/legacyheaders/readinp.h"
 +#include "gromacs/legacyheaders/typedefs.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/pbcutil/pbc.h"
 +#include "gromacs/pulling/pull.h"
  #include "gromacs/utility/cstringutil.h"
 -#include "sysstuff.h"
 -#include "princ.h"
 -#include "gromacs/fileio/futil.h"
 -#include "vec.h"
 +#include "gromacs/utility/fatalerror.h"
 +#include "gromacs/utility/futil.h"
  #include "gromacs/utility/smalloc.h"
 -#include "typedefs.h"
 -#include "names.h"
 -#include "gmx_fatal.h"
 -#include "macros.h"
 -#include "index.h"
 -#include "symtab.h"
 -#include "readinp.h"
 -#include "readir.h"
 -#include "mdatoms.h"
 -#include "pbc.h"
 -#include "gromacs/pulling/pull.h"
  
  
  static char pulldim[STRLEN];
@@@ -204,7 -210,8 +204,8 @@@ char **read_pullparams(int *ninp_p, t_i
          nscan = sscanf(groups, "%d %d %d", &pcrd->group[0], &pcrd->group[1], &idum);
          if (nscan != 2)
          {
-             fprintf(stderr, "ERROR: %s should have %d components\n", buf, 2);
+             fprintf(stderr, "ERROR: %s should contain %d pull group indices\n",
+                     buf, 2);
              nerror++;
          }
          sprintf(buf, "pull-coord%d-origin", i);
@@@ -248,7 -255,7 +249,7 @@@ void make_pull_groups(t_pull *pull
  
          if (strcmp(pgnames[g], "") == 0)
          {
-             gmx_fatal(FARGS, "Group pull_group%d required by grompp was undefined.", g);
+             gmx_fatal(FARGS, "Pull option pull_group%d required by grompp has not been set.", g);
          }
  
          ig        = search_string(pgnames[g], grps->nr, gnames);
index 57ff2d7b8fb86f5924a748bdba5f6f0a6d92725b,783ad72c8530158783b5eab8f7f3d647489c9f75..aa4e633f77e0729350f3b990760068ca7f8bafa8
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /* This file is completely threadsafe - keep it that way! */
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
  
  #include <math.h>
 -#include "main.h"
 -#include "constr.h"
 -#include "copyrite.h"
 -#include "physics.h"
 -#include "vec.h"
 -#include "pbc.h"
 -#include "gromacs/utility/smalloc.h"
 -#include "mdrun.h"
 -#include "nrnb.h"
 -#include "domdec.h"
 -#include "mtop_util.h"
 -#include "gmx_omp_nthreads.h"
 +#include <stdlib.h>
  
 +#include "gromacs/domdec/domdec.h"
  #include "gromacs/fileio/gmxfio.h"
 -#include "gmx_fatal.h"
 +#include "gromacs/legacyheaders/constr.h"
 +#include "gromacs/legacyheaders/copyrite.h"
 +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 +#include "gromacs/legacyheaders/mdrun.h"
 +#include "gromacs/legacyheaders/nrnb.h"
 +#include "gromacs/legacyheaders/types/commrec.h"
 +#include "gromacs/math/units.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/mdlib/bitmask.h"
 +#include "gromacs/pbcutil/pbc.h"
 +#include "gromacs/topology/block.h"
 +#include "gromacs/topology/mtop_util.h"
 +#include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/gmxomp.h"
 +#include "gromacs/utility/smalloc.h"
  
  typedef struct {
      int    b0;         /* first constraint for this thread */
@@@ -67,6 -66,7 +67,7 @@@
      int   *ind_r;      /* constraint index for updating atom data */
      int    ind_nalloc; /* allocation size of ind and ind_r */
      tensor vir_r_m_dr; /* temporary variable for virial calculation */
+     real   dhdlambda;  /* temporary variable for lambda derivative */
  } lincs_thread_t;
  
  typedef struct gmx_lincsdata {
@@@ -97,7 -97,7 +98,7 @@@
      real           *bllen;        /* the reference bond length */
      int             nth;          /* The number of threads doing LINCS */
      lincs_thread_t *th;           /* LINCS thread division */
 -    unsigned       *atf;          /* atom flags for thread parallelization */
 +    gmx_bitmask_t  *atf;          /* atom flags for thread parallelization */
      int             atf_nalloc;   /* allocation size of atf */
      /* arrays for temporary storage in the LINCS algorithm */
      rvec           *tmpv;
@@@ -360,11 -360,11 +361,11 @@@ static void lincs_update_atoms(struct g
  static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
                        struct gmx_lincsdata *lincsd, int th,
                        real *invmass,
-                       int econq, real *dvdlambda,
+                       int econq, gmx_bool bCalcDHDL,
                        gmx_bool bCalcVir, tensor rmdf)
  {
      int      b0, b1, b, i, j, k, n;
 -    real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, wfac, lam;
 +    real     tmp0, tmp1, tmp2, mvb;
      rvec     dx;
      int     *bla, *blnr, *blbnb;
      rvec    *r;
      lincs_update_atoms(lincsd, th, 1.0, sol, r,
                         (econq != econqForce) ? invmass : NULL, fp);
  
-     if (dvdlambda != NULL)
+     if (bCalcDHDL)
      {
- #pragma omp barrier
+         real dhdlambda;
+         dhdlambda = 0;
          for (b = b0; b < b1; b++)
          {
-             *dvdlambda -= sol[b]*lincsd->ddist[b];
+             dhdlambda -= sol[b]*lincsd->ddist[b];
          }
-         /* 10 ncons flops */
+         lincsd->th[th].dhdlambda = dhdlambda;
      }
  
      if (bCalcVir)
@@@ -499,13 -502,13 +503,13 @@@ static void do_lincs(rvec *x, rvec *xp
                       struct gmx_lincsdata *lincsd, int th,
                       real *invmass,
                       t_commrec *cr,
-                      gmx_bool bCalcLambda,
+                      gmx_bool bCalcDHDL,
                       real wangle, int *warn,
                       real invdt, rvec *v,
                       gmx_bool bCalcVir, tensor vir_r_m_dr)
  {
      int      b0, b1, b, i, j, k, n, iter;
 -    real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, len2, dlen2, wfac;
 +    real     tmp0, tmp1, tmp2, mvb, rlen, len, len2, dlen2, wfac;
      rvec     dx;
      int     *bla, *blnr, *blbnb;
      rvec    *r;
              dlen2 = 2*len2 - norm2(dx);
              if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
              {
 +                /* not race free - see detailed comment in caller */
                  *warn = b;
              }
              if (dlen2 > 0)
          /* 16 ncons flops */
      }
  
-     if (nlocat != NULL && bCalcLambda)
+     if (nlocat != NULL && (bCalcDHDL || bCalcVir))
      {
-         /* In lincs_update_atoms thread might cross-read mlambda */
+         /* In lincs_update_atoms threads might cross-read mlambda */
  #pragma omp barrier
  
          /* Only account for local atoms */
          }
      }
  
+     if (bCalcDHDL)
+     {
+         real dhdl;
+         dhdl = 0;
+         for (b = b0; b < b1; b++)
+         {
+             /* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
+              * later after the contributions are reduced over the threads.
+              */
+             dhdl -= lincsd->mlambda[b]*lincsd->ddist[b];
+         }
+         lincsd->th[th].dhdlambda = dhdl;
+     }
      if (bCalcVir)
      {
          /* Constraint virial */
@@@ -785,7 -802,7 +804,7 @@@ void set_lincs_matrix(struct gmx_lincsd
                              li->triangle[li->ntriangle] = i;
                              li->tri_bits[li->ntriangle] = 0;
                              li->ntriangle++;
 -                            if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
 +                            if (li->blnr[i+1] - li->blnr[i] > static_cast<int>(sizeof(li->tri_bits[0])*8 - 1))
                              {
                                  gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
                                            li->blnr[i+1] - li->blnr[i],
@@@ -1017,7 -1034,7 +1036,7 @@@ static void lincs_thread_setup(struct g
  {
      lincs_thread_t *li_m;
      int             th;
 -    unsigned       *atf;
 +    gmx_bitmask_t  *atf;
      int             a;
  
      if (natoms > li->atf_nalloc)
      /* Clear the atom flags */
      for (a = 0; a < natoms; a++)
      {
 -        atf[a] = 0;
 +        bitmask_clear(&atf[a]);
 +    }
 +
 +    if (li->nth > BITMASK_SIZE)
 +    {
 +        gmx_fatal(FARGS, "More than %d threads is not supported for LINCS.", BITMASK_SIZE);
      }
  
      for (th = 0; th < li->nth; th++)
          li_th->b0 = (li->nc* th   )/li->nth;
          li_th->b1 = (li->nc*(th+1))/li->nth;
  
 -        if (th < sizeof(*atf)*8)
 +        /* For each atom set a flag for constraints from each */
 +        for (b = li_th->b0; b < li_th->b1; b++)
          {
 -            /* For each atom set a flag for constraints from each */
 -            for (b = li_th->b0; b < li_th->b1; b++)
 -            {
 -                atf[li->bla[b*2]  ] |= (1U<<th);
 -                atf[li->bla[b*2+1]] |= (1U<<th);
 -            }
 +            bitmask_set_bit(&atf[li->bla[b*2]], th);
 +            bitmask_set_bit(&atf[li->bla[b*2+1]], th);
          }
      }
  
      for (th = 0; th < li->nth; th++)
      {
          lincs_thread_t *li_th;
 -        unsigned        mask;
 +        gmx_bitmask_t   mask;
          int             b;
  
          li_th = &li->th[th];
              srenew(li_th->ind_r, li_th->ind_nalloc);
          }
  
 -        if (th < sizeof(*atf)*8)
 -        {
 -            mask = (1U<<th) - 1U;
 +        bitmask_init_low_bits(&mask, th);
  
 -            li_th->nind   = 0;
 -            li_th->nind_r = 0;
 -            for (b = li_th->b0; b < li_th->b1; b++)
 +        li_th->nind   = 0;
 +        li_th->nind_r = 0;
 +        for (b = li_th->b0; b < li_th->b1; b++)
 +        {
 +            /* We let the constraint with the lowest thread index
 +             * operate on atoms with constraints from multiple threads.
 +             */
 +            if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) &&
 +                bitmask_is_disjoint(atf[li->bla[b*2+1]], mask))
              {
 -                /* We let the constraint with the lowest thread index
 -                 * operate on atoms with constraints from multiple threads.
 -                 */
 -                if (((atf[li->bla[b*2]]   & mask) == 0) &&
 -                    ((atf[li->bla[b*2+1]] & mask) == 0))
 -                {
 -                    /* Add the constraint to the local atom update index */
 -                    li_th->ind[li_th->nind++] = b;
 -                }
 -                else
 -                {
 -                    /* Add the constraint to the rest block */
 -                    li_th->ind_r[li_th->nind_r++] = b;
 -                }
 +                /* Add the constraint to the local atom update index */
 +                li_th->ind[li_th->nind++] = b;
              }
 -        }
 -        else
 -        {
 -            /* We are out of bits, assign all constraints to rest */
 -            for (b = li_th->b0; b < li_th->b1; b++)
 +            else
              {
 +                /* Add the constraint to the rest block */
                  li_th->ind_r[li_th->nind_r++] = b;
              }
          }
@@@ -1145,6 -1171,7 +1164,6 @@@ void set_lincs(t_idef *idef, t_mdatoms 
      int          i, k, ncc_alloc, ni, con, nconnect, concon;
      int          type, a1, a2;
      real         lenA = 0, lenB;
 -    gmx_bool     bLocal;
  
      li->nc  = 0;
      li->ncc = 0;
      li->blnr[con] = nconnect;
      for (i = 0; i < ni; i++)
      {
 -        bLocal = TRUE;
          type   = iatom[3*i];
          a1     = iatom[3*i+1];
          a2     = iatom[3*i+2];
@@@ -1462,14 -1490,21 +1481,21 @@@ gmx_bool constrain_lincs(FILE *fplog, g
                           t_nrnb *nrnb,
                           int maxwarn, int *warncount)
  {
+     gmx_bool  bCalcDHDL;
      char      buf[STRLEN], buf2[22], buf3[STRLEN];
 -    int       i, warn, p_imax, error;
 +    int       i, warn, p_imax;
      real      ncons_loc, p_ssd, p_max = 0;
      rvec      dx;
      gmx_bool  bOK;
  
      bOK = TRUE;
  
+     /* This boolean should be set by a flag passed to this routine.
+      * We can also easily check if any constraint length is changed,
+      * if not dH/dlambda=0 and we can also set the boolean to FALSE.
+      */
+     bCalcDHDL = (ir->efep != efepNO && dvdlambda != NULL);
      if (lincsd->nc == 0 && cr->dd == NULL)
      {
          if (bLog || bEner)
  
      if (econq == econqCoord)
      {
+         /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
+          * also with efep!=fepNO.
+          */
          if (ir->efep != efepNO)
          {
              if (md->nMassPerturbed && lincsd->matlam != md->lambda)
  
              do_lincs(x, xprime, box, pbc, lincsd, th,
                       md->invmass, cr,
-                      bCalcVir || (ir->efep != efepNO),
+                      bCalcDHDL,
                       ir->LincsWarnAngle, &warn,
                       invdt, v, bCalcVir,
                       th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
          }
  
-         if (ir->efep != efepNO)
-         {
-             real dt_2, dvdl = 0;
-             /* TODO This should probably use invdt, so that sd integrator scaling works properly */
-             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
-             for (i = 0; (i < lincsd->nc); i++)
-             {
-                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
-             }
-             *dvdlambda += dvdl;
-         }
          if (bLog && fplog && lincsd->nc > 0)
          {
              fprintf(fplog, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
              int th = gmx_omp_get_thread_num();
  
              do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
-                       md->invmass, econq, ir->efep != efepNO ? dvdlambda : NULL,
+                       md->invmass, econq, bCalcDHDL,
                        bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
          }
      }
  
+     if (bCalcDHDL)
+     {
+         /* Reduce the dH/dlambda contributions over the threads */
+         real dhdlambda;
+         int  th;
+         dhdlambda = 0;
+         for (th = 0; th < lincsd->nth; th++)
+         {
+             dhdlambda += lincsd->th[th].dhdlambda;
+         }
+         if (econqCoord)
+         {
+             /* dhdlambda contains dH/dlambda*dt^2, correct for this */
++            /* TODO This should probably use invdt, so that sd integrator scaling works properly */
+             dhdlambda /= ir->delta_t*ir->delta_t;
+         }
+         *dvdlambda += dhdlambda;
+     }
      if (bCalcVir && lincsd->nth > 1)
      {
          for (i = 1; i < lincsd->nth; i++)
index 146c95855be950c76df0f85aa910b34dd6994de0,7b4bc4f524eac0e8bfa8e03b1b35208291004842..70d6edff67ce522c8c678f1f7a8cab28bd5163e7
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 +#include "gromacs/mdlib/nbnxn_consts.h"
 +#include "gromacs/pbcutil/ishift.h"
  #include "gromacs/simd/simd.h"
  #include "gromacs/simd/simd_math.h"
  #include "gromacs/simd/vector_operations.h"
 -#include "../../nbnxn_consts.h"
  #ifdef CALC_COUL_EWALD
  #include "gromacs/math/utilities.h"
  #endif
  
 +#include "config.h"
 +
  #ifndef GMX_SIMD_J_UNROLL_SIZE
  #error "Need to define GMX_SIMD_J_UNROLL_SIZE before including the 4xn kernel common header file"
  #endif
  #define STRIDE     (UNROLLI)
  #endif
  
 -#include "../nbnxn_kernel_simd_utils.h"
 +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h"
  
  static gmx_inline void gmx_simdcall
- gmx_load_simd_4xn_interactions(int gmx_unused             excl,
+ gmx_load_simd_4xn_interactions(int                        excl,
                                 gmx_exclfilter gmx_unused  filter_S0,
                                 gmx_exclfilter gmx_unused  filter_S1,
                                 gmx_exclfilter gmx_unused  filter_S2,
                                 gmx_exclfilter gmx_unused  filter_S3,
-                                const char gmx_unused     *interaction_mask_indices,
                                 real gmx_unused           *simd_interaction_array,
                                 gmx_simd_bool_t           *interact_S0,
                                 gmx_simd_bool_t           *interact_S1,
      *interact_S1  = gmx_checkbitmask_pb(mask_pr_S, filter_S1);
      *interact_S2  = gmx_checkbitmask_pb(mask_pr_S, filter_S2);
      *interact_S3  = gmx_checkbitmask_pb(mask_pr_S, filter_S3);
- #endif
- #ifdef GMX_SIMD_IBM_QPX
+ #elif defined GMX_SIMD_IBM_QPX
      const int size = GMX_SIMD_REAL_WIDTH * sizeof(real);
-     *interact_S0  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[0], simd_interaction_array);
-     *interact_S1  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[1], simd_interaction_array);
-     *interact_S2  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[2], simd_interaction_array);
-     *interact_S3  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[3], simd_interaction_array);
+     *interact_S0  = gmx_load_interaction_mask_pb(size*((excl >> (0 * UNROLLJ)) & 0xF), simd_interaction_array);
+     *interact_S1  = gmx_load_interaction_mask_pb(size*((excl >> (1 * UNROLLJ)) & 0xF), simd_interaction_array);
+     *interact_S2  = gmx_load_interaction_mask_pb(size*((excl >> (2 * UNROLLJ)) & 0xF), simd_interaction_array);
+     *interact_S3  = gmx_load_interaction_mask_pb(size*((excl >> (3 * UNROLLJ)) & 0xF), simd_interaction_array);
+ #else
+ #error "Need implementation of gmx_load_simd_4xn_interactions"
  #endif
  }
  
index 4558266bc39a2c899a9ad0c763ee9edea58510b8,f89a418ce38db08c05f548d3337aca3b3bf6f052..09d429a27d79ce456d69d0933ff5a038f3ec941d
  #ifndef _nbnxn_pairlist_h
  #define _nbnxn_pairlist_h
  
 -#include "nblist.h"
 +#include <stddef.h>
 +
 +#include "thread_mpi/atomic.h"
 +
 +#include "gromacs/legacyheaders/types/nblist.h"
 +#include "gromacs/math/vectypes.h"
 +#include "gromacs/mdlib/bitmask.h"
 +#include "gromacs/utility/basedefinitions.h"
 +#include "gromacs/utility/real.h"
  
  #ifdef __cplusplus
  extern "C" {
@@@ -83,8 -75,6 +83,6 @@@ typedef void nbnxn_free_t (void *ptr)
  typedef struct {
      int          cj;    /* The j-cluster                    */
      unsigned int excl;  /* The exclusion (interaction) bits */
-     /* Indices into the arrays of SIMD interaction masks. */
-     char         interaction_mask_indices[4];
  } nbnxn_cj_t;
  
  /* In nbnxn_ci_t the integer shift contains the shift in the lower 7 bits.
@@@ -132,7 -122,7 +130,7 @@@ typedef struct 
                              */
  } nbnxn_excl_t;
  
 -typedef struct {
 +typedef struct nbnxn_pairlist_t {
      gmx_cache_protect_t cp0;
  
      nbnxn_alloc_t      *alloc;
@@@ -208,16 -198,21 +206,16 @@@ typedef struct 
  #define NBNXN_BUFFERFLAG_SIZE  16
  #endif
  
 -/* We currently store the reduction flags as bits in an unsigned int.
 - * In most cases this limits the number of flags to 32.
 - * The reduction will automatically disable the flagging and do a full
 - * reduction when the flags won't fit, but this will lead to very slow
 - * reduction. As we anyhow don't expect reasonable performance with
 - * more than 32 threads, we put in this hard limit.
 - * You can increase this number, but the reduction will be very slow.
 +/* We store the reduction flags as gmx_bitmask_t.
 + * This limits the number of flags to BITMASK_SIZE.
   */
 -#define NBNXN_BUFFERFLAG_MAX_THREADS  32
 +#define NBNXN_BUFFERFLAG_MAX_THREADS  (BITMASK_SIZE)
  
  /* Flags for telling if threads write to force output buffers */
  typedef struct {
 -    int           nflag;       /* The number of flag blocks                         */
 -    unsigned int *flag;        /* Bit i is set when thread i writes to a cell-block */
 -    int           flag_nalloc; /* Allocation size of cxy_flag                       */
 +    int               nflag;       /* The number of flag blocks                         */
 +    gmx_bitmask_t    *flag;        /* Bit i is set when thread i writes to a cell-block */
 +    int               flag_nalloc; /* Allocation size of cxy_flag                       */
  } nbnxn_buffer_flags_t;
  
  /* LJ combination rules: geometric, Lorentz-Berthelot, none */
@@@ -225,7 -220,10 +223,7 @@@ enum 
      ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
  };
  
 -/* TODO: Remove need for forward declare */
 -struct tMPI_Atomic;
 -
 -typedef struct {
 +typedef struct nbnxn_atomdata_t {
      nbnxn_alloc_t           *alloc;
      nbnxn_free_t            *free;
      int                      ntype;           /* The number of different atom types                 */
      gmx_bool                 bUseBufferFlags;        /* Use the flags or operate on all atoms     */
      nbnxn_buffer_flags_t     buffer_flags;           /* Flags for buffer zeroing+reduc.  */
      gmx_bool                 bUseTreeReduce;         /* Use tree for force reduction */
 -    struct tMPI_Atomic      *syncStep;               /* Synchronization step for tree reduce */
 +    tMPI_Atomic_t           *syncStep;               /* Synchronization step for tree reduce */
  } nbnxn_atomdata_t;
  
  #ifdef __cplusplus
index 856ae32d0f0d8c00a97f736ab14d5d66b64026d1,d201407bfda9af23daef7e4e4dffea23951f2f0c..f7c27be2f28fd8f310acd44252ceb5ba9527a79b
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
 -#ifdef HAVE_CONFIG_H
 -#include <config.h>
 -#endif
 +#include "gmxpre.h"
 +
 +#include "nbnxn_search.h"
  
- #include "config.h"
 +#include <assert.h>
  #include <math.h>
  #include <string.h>
 -#include <assert.h>
  
 -#include "sysstuff.h"
 -#include "gromacs/utility/smalloc.h"
 -#include "types/commrec.h"
 -#include "macros.h"
 +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 +#include "gromacs/legacyheaders/macros.h"
 +#include "gromacs/legacyheaders/nrnb.h"
 +#include "gromacs/legacyheaders/ns.h"
 +#include "gromacs/legacyheaders/types/commrec.h"
  #include "gromacs/math/utilities.h"
 -#include "vec.h"
 -#include "pbc.h"
 -#include "nbnxn_consts.h"
 +#include "gromacs/math/vec.h"
 +#include "gromacs/mdlib/nb_verlet.h"
 +#include "gromacs/mdlib/nbnxn_atomdata.h"
 +#include "gromacs/mdlib/nbnxn_consts.h"
 +#include "gromacs/pbcutil/ishift.h"
 +#include "gromacs/pbcutil/pbc.h"
 +#include "gromacs/utility/smalloc.h"
 +
  /* nbnxn_internal.h included gromacs/simd/macros.h */
 -#include "nbnxn_internal.h"
 -#ifdef GMX_NBNXN_SIMD
 +#include "gromacs/mdlib/nbnxn_internal.h"
 +#ifdef GMX_SIMD
  #include "gromacs/simd/vector_operations.h"
  #endif
 -#include "nbnxn_atomdata.h"
 -#include "nbnxn_search.h"
 -#include "gmx_omp_nthreads.h"
 -#include "nrnb.h"
 -#include "ns.h"
 -
 -#include "gromacs/fileio/gmxfio.h"
  
  #ifdef NBNXN_SEARCH_BB_SIMD4
  /* Always use 4-wide SIMD for bounding box calculations */
@@@ -1382,13 -1381,14 +1380,13 @@@ static void sort_columns_supersub(cons
                                    int cxy_start, int cxy_end,
                                    int *sort_work)
  {
 -    int  cxy;
 -    int  cx, cy, cz = -1, c = -1, ncz;
 -    int  na, ash, na_c, ind, a;
 -    int  subdiv_z, sub_z, na_z, ash_z;
 -    int  subdiv_y, sub_y, na_y, ash_y;
 -    int  subdiv_x, sub_x, na_x, ash_x;
 +    int        cxy;
 +    int        cx, cy, cz = -1, c = -1, ncz;
 +    int        na, ash, na_c, ind, a;
 +    int        subdiv_z, sub_z, na_z, ash_z;
 +    int        subdiv_y, sub_y, na_y, ash_y;
 +    int        subdiv_x, sub_x, na_x, ash_x;
  
 -    /* cppcheck-suppress unassignedVariable */
      nbnxn_bb_t bb_work_array[2], *bb_work_aligned;
  
      bb_work_aligned = (nbnxn_bb_t *)(((size_t)(bb_work_array+1)) & (~((size_t)15)));
@@@ -1766,7 -1766,7 +1764,7 @@@ static void init_buffer_flags(nbnxn_buf
      }
      for (b = 0; b < flags->nflag; b++)
      {
 -        flags->flag[b] = 0;
 +        bitmask_clear(&(flags->flag[b]));
      }
  }
  
@@@ -2943,10 -2943,10 +2941,10 @@@ static void make_cluster_list_simple(co
  }
  
  #ifdef GMX_NBNXN_SIMD_4XN
 -#include "nbnxn_search_simd_4xn.h"
 +#include "gromacs/mdlib/nbnxn_search_simd_4xn.h"
  #endif
  #ifdef GMX_NBNXN_SIMD_2XNN
 -#include "nbnxn_search_simd_2xnn.h"
 +#include "gromacs/mdlib/nbnxn_search_simd_2xnn.h"
  #endif
  
  /* Plain C or SIMD4 code for making a pair list of super-cell sci vs scj.
@@@ -3258,12 -3258,6 +3256,6 @@@ static void set_ci_top_excls(const nbnx
                          inner_e = ge - (se << na_cj_2log);
  
                          nbl->cj[found].excl &= ~(1U<<((inner_i<<na_cj_2log) + inner_e));
- /* The next code line is usually not needed. We do not want to version
-  * away the above line, because there is logic that relies on being
-  * able to detect easily whether any exclusions exist. */
- #if (defined GMX_SIMD_IBM_QPX)
-                         nbl->cj[found].interaction_mask_indices[inner_i] &= ~(1U << inner_e);
- #endif
                      }
                  }
              }
@@@ -4845,7 -4839,7 +4837,7 @@@ static void nbnxn_make_pairlist_part(co
      int               ndistc;
      int               ncpcheck;
      int               gridi_flag_shift = 0, gridj_flag_shift = 0;
 -    unsigned int     *gridj_flag       = NULL;
 +    gmx_bitmask_t    *gridj_flag       = NULL;
      int               ncj_old_i, ncj_old_j;
  
      nbs_cycle_start(&work->cc[enbsCCsearch]);
                                          cbl = nbl->cj[nbl->ncj-1].cj >> gridj_flag_shift;
                                          for (cb = cbf; cb <= cbl; cb++)
                                          {
 -                                            gridj_flag[cb] = 1U<<th;
 +                                            bitmask_init_bit(&gridj_flag[cb], th);
                                          }
                                      }
                                  }
  
          if (bFBufferFlag && nbl->ncj > ncj_old_i)
          {
 -            work->buffer_flags.flag[(gridi->cell0+ci)>>gridi_flag_shift] = 1U<<th;
 +            bitmask_init_bit(&(work->buffer_flags.flag[(gridi->cell0+ci)>>gridi_flag_shift]), th);
          }
      }
  
@@@ -5450,8 -5444,8 +5442,8 @@@ static void reduce_buffer_flags(const n
                                  int                         nsrc,
                                  const nbnxn_buffer_flags_t *dest)
  {
 -    int                 s, b;
 -    const unsigned int *flag;
 +    int            s, b;
 +    gmx_bitmask_t *flag;
  
      for (s = 0; s < nsrc; s++)
      {
  
          for (b = 0; b < dest->nflag; b++)
          {
 -            dest->flag[b] |= flag[b];
 +            bitmask_union(&(dest->flag[b]), flag[b]);
          }
      }
  }
  
  static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout)
  {
 -    int nelem, nkeep, ncopy, nred, b, c, out;
 +    int           nelem, nkeep, ncopy, nred, b, c, out;
 +    gmx_bitmask_t mask_0;
  
      nelem = 0;
      nkeep = 0;
      ncopy = 0;
      nred  = 0;
 +    bitmask_init_bit(&mask_0, 0);
      for (b = 0; b < flags->nflag; b++)
      {
 -        if (flags->flag[b] == 1)
 +        if (bitmask_is_equal(flags->flag[b], mask_0))
          {
              /* Only flag 0 is set, no copy of reduction required */
              nelem++;
              nkeep++;
          }
 -        else if (flags->flag[b] > 0)
 +        else if (!bitmask_is_zero(flags->flag[b]))
          {
              c = 0;
              for (out = 0; out < nout; out++)
              {
 -                if (flags->flag[b] & (1U<<out))
 +                if (bitmask_is_set(flags->flag[b], out))
                  {
                      c++;
                  }
index 33a307b58456ab8080b37e6c0309915d0f148217,8a328404badb64065b5791bef66e029bb0de6868..4e1c15a0ee65c8b19e4bc59d12345db7971d06f4
@@@ -33,8 -33,7 +33,6 @@@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
- #include "config.h"
--
  #if GMX_SIMD_REAL_WIDTH >= NBNXN_CPU_CLUSTER_I_SIZE
  #define STRIDE_S  (GMX_SIMD_REAL_WIDTH)
  #else
@@@ -111,7 -110,6 +109,7 @@@ make_cluster_list_simd_4xn(const nbnxn_
      float                              d2;
      int                                xind_f, xind_l, cj;
  
 +    /* cppcheck-suppress selfAssignment . selfAssignment for width 4.*/
      cjf = CI_TO_CJ_SIMD_4XN(cjf);
      cjl = CI_TO_CJ_SIMD_4XN(cjl+1) - 1;
  
              /* Store cj and the interaction mask */
              nbl->cj[nbl->ncj].cj   = CI_TO_CJ_SIMD_4XN(gridj->cell0) + cj;
              nbl->cj[nbl->ncj].excl = get_imask_simd_4xn(remove_sub_diag, ci, cj);
- #ifdef GMX_SIMD_IBM_QPX
-             nbl->cj[nbl->ncj].interaction_mask_indices[0] = (nbl->cj[nbl->ncj].excl & 0x000F) >> (0 * 4);
-             nbl->cj[nbl->ncj].interaction_mask_indices[1] = (nbl->cj[nbl->ncj].excl & 0x00F0) >> (1 * 4);
-             nbl->cj[nbl->ncj].interaction_mask_indices[2] = (nbl->cj[nbl->ncj].excl & 0x0F00) >> (2 * 4);
-             nbl->cj[nbl->ncj].interaction_mask_indices[3] = (nbl->cj[nbl->ncj].excl & 0xF000) >> (3 * 4);
- #endif
              nbl->ncj++;
          }
          /* Increase the closing index in i super-cell list */