Merge release-5-0 into master

author Roland Schulz <roland@utk.edu>

Fri, 5 Dec 2014 23:31:17 +0000 (18:31 -0500)

committer Roland Schulz <roland@utk.edu>

Sun, 7 Dec 2014 00:08:19 +0000 (19:08 -0500)
author Roland Schulz <roland@utk.edu>
Fri, 5 Dec 2014 23:31:17 +0000 (18:31 -0500)
committer Roland Schulz <roland@utk.edu>
Sun, 7 Dec 2014 00:08:19 +0000 (19:08 -0500)
diff --combined docs/doxygen/suppressions.txt

index d1337b46dab06f6afe85173a414a23a45efa0ee5,592505a9621365d18c49fe914f035d8ae78291a4..71d9f29d23e6ac0dedbf67ee83ac07c9ed0f61ae
--- 1/docs/doxygen/suppressions.txt
--- 2/docs/doxygen/suppressions.txt
+++ b/docs/doxygen/suppressions.txt
@@@ -1,31 -1,21 +1,30 @@@
- -# These look like bugs in Doxygen 1.8.5
- -src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_vendor'
- -src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_x86_smt'
- -src/gromacs/gmxlib/gmx_cpuid.c: warning: duplicate declarations for a member 'gmx_cpuid_simd_suggest'
- -
   # The script is currently a bit too eager
   share/template/template.cpp: error: source file documentation appears outside full documentation
+ +# The parser in the script is not clever enough
+ +src/gromacs/version.h: warning: includes local file as <gromacs/version.h>
+ +
+ +# These are OK
+ +src/gromacs/math/vec.h: warning: installed header includes non-installed "config.h"
+ +src/gromacs/linearalgebra/eigensolver.c: warning: should include "config.h"
+ +src/gromacs/linearalgebra/gmx_arpack.c: warning: should include "config.h"
+ +src/gromacs/linearalgebra/gmx_blas/*: warning: does not include "gmxpre.h" first
+ +src/gromacs/linearalgebra/gmx_blas/*: warning: should include "config.h"
+ +src/gromacs/linearalgebra/gmx_lapack/*: warning: does not include "gmxpre.h" first
+ +src/gromacs/linearalgebra/gmx_lapack/*: warning: should include "config.h"
+ +src/gromacs/utility/baseversion-gen.c: warning: does not include "gmxpre.h" first
   
   # This module name doesn't really fall into any currently used pattern; needs some thought
   : error: no matching directory for module: module_mdrun_integration_tests
   
   # These would be nice to fix, but can wait for later
- -*: warning: includes local file as <config.h>
   src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: included file "gromacs/simd/math_x86_*.h" is not documented as exposed outside its module
- src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h: warning: should include "config.h"
+ +src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: includes "config.h" unnecessarily
+ +src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c: warning: includes "config.h" unnecessarily
+ +src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c: warning: includes "config.h" unnecessarily
+ +src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c: warning: includes "config.h" unnecessarily
+ +src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h"
+ +src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c: warning: includes "config.h" unnecessarily
   
   # These are specific to Folding@Home, and easiest to suppress here
   *: warning: includes non-local file as "corewrap.h"
- -src/config.h.cmakein: warning: includes non-local file as "swindirect.h"
- -
- -# These are limitations in the current script
- -src/gromacs/utility/gmx_header_config.h: warning: includes non-local file as "gmx_header_config_gen.h"
+ +src/gmxpre.h: warning: includes non-local file as "swindirect.h"
diff --combined src/gromacs/domdec/domdec.cpp

index ba974d348f92b7afff6aa1703fc7e63de343fcb0,bf53531323672cbabd24c2af99727812b05b5dd2..54fb8f7c35120334dcf036902d8ee1d5cd5872c4
--- 1/src/gromacs/domdec/domdec.cpp
--- 2/src/gromacs/mdlib/domdec.c
+++ b/src/gromacs/domdec/domdec.cpp
@@@ -33,83 -33,53 +33,83 @@@
    * the research papers on the package. Check out http://www.gromacs.org.
    */
   
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
   
- -#include <stdio.h>
- -#include <time.h>
+ +#include "domdec.h"
+ +
+ +#include "config.h"
+ +
+ +#include <assert.h>
+ +#include <limits.h>
   #include <math.h>
- -#include <string.h>
+ +#include <stdio.h>
   #include <stdlib.h>
- -#include <assert.h>
+ +#include <string.h>
   
- -#include "typedefs.h"
- -#include "gromacs/utility/smalloc.h"
- -#include "gmx_fatal.h"
- -#include "gmx_fatal_collective.h"
- -#include "vec.h"
- -#include "domdec.h"
- -#include "domdec_network.h"
- -#include "nrnb.h"
- -#include "pbc.h"
- -#include "chargegroup.h"
- -#include "constr.h"
- -#include "mdatoms.h"
- -#include "names.h"
- -#include "force.h"
- -#include "pme.h"
- -#include "mdrun.h"
- -#include "nsgrid.h"
- -#include "shellfc.h"
- -#include "mtop_util.h"
- -#include "gmx_ga2la.h"
- -#include "macros.h"
- -#include "nbnxn_search.h"
- -#include "bondf.h"
- -#include "gmx_omp_nthreads.h"
- -#include "gpu_utils.h"
- -
- -#include "gromacs/fileio/futil.h"
+ +#include <algorithm>
+ +
+ +#include "gromacs/domdec/domdec_network.h"
+ +#include "gromacs/ewald/pme.h"
   #include "gromacs/fileio/gmxfio.h"
   #include "gromacs/fileio/pdbio.h"
+ +#include "gromacs/imd/imd.h"
+ +#include "gromacs/legacyheaders/chargegroup.h"
+ +#include "gromacs/legacyheaders/constr.h"
+ +#include "gromacs/legacyheaders/force.h"
+ +#include "gromacs/legacyheaders/genborn.h"
+ +#include "gromacs/legacyheaders/gmx_ga2la.h"
+ +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+ +#include "gromacs/legacyheaders/gpu_utils.h"
+ +#include "gromacs/legacyheaders/mdatoms.h"
+ +#include "gromacs/legacyheaders/mdrun.h"
+ +#include "gromacs/legacyheaders/names.h"
+ +#include "gromacs/legacyheaders/network.h"
+ +#include "gromacs/legacyheaders/nrnb.h"
+ +#include "gromacs/legacyheaders/nsgrid.h"
+ +#include "gromacs/legacyheaders/shellfc.h"
+ +#include "gromacs/legacyheaders/typedefs.h"
+ +#include "gromacs/legacyheaders/vsite.h"
+ +#include "gromacs/legacyheaders/types/commrec.h"
+ +#include "gromacs/legacyheaders/types/constr.h"
+ +#include "gromacs/legacyheaders/types/enums.h"
+ +#include "gromacs/legacyheaders/types/forcerec.h"
+ +#include "gromacs/legacyheaders/types/hw_info.h"
+ +#include "gromacs/legacyheaders/types/ifunc.h"
+ +#include "gromacs/legacyheaders/types/inputrec.h"
+ +#include "gromacs/legacyheaders/types/mdatom.h"
+ +#include "gromacs/legacyheaders/types/nrnb.h"
+ +#include "gromacs/legacyheaders/types/ns.h"
+ +#include "gromacs/legacyheaders/types/nsgrid.h"
+ +#include "gromacs/legacyheaders/types/shellfc.h"
+ +#include "gromacs/legacyheaders/types/simple.h"
+ +#include "gromacs/legacyheaders/types/state.h"
+ +#include "gromacs/listed-forces/manage-threading.h"
+ +#include "gromacs/math/vec.h"
+ +#include "gromacs/math/vectypes.h"
+ +#include "gromacs/mdlib/nb_verlet.h"
+ +#include "gromacs/mdlib/nbnxn_search.h"
+ +#include "gromacs/pbcutil/ishift.h"
+ +#include "gromacs/pbcutil/pbc.h"
+ +#include "gromacs/pulling/pull.h"
+ +#include "gromacs/pulling/pull_rotation.h"
+ +#include "gromacs/swap/swapcoords.h"
   #include "gromacs/timing/wallcycle.h"
+ +#include "gromacs/topology/block.h"
+ +#include "gromacs/topology/idef.h"
+ +#include "gromacs/topology/mtop_util.h"
+ +#include "gromacs/topology/topology.h"
+ +#include "gromacs/utility/basedefinitions.h"
+ +#include "gromacs/utility/basenetwork.h"
+ +#include "gromacs/utility/cstringutil.h"
+ +#include "gromacs/utility/fatalerror.h"
   #include "gromacs/utility/gmxmpi.h"
- -#include "gromacs/swap/swapcoords.h"
   #include "gromacs/utility/qsort_threadsafe.h"
- -#include "gromacs/pulling/pull.h"
- -#include "gromacs/pulling/pull_rotation.h"
- -#include "gromacs/imd/imd.h"
+ +#include "gromacs/utility/real.h"
+ +#include "gromacs/utility/smalloc.h"
+ +
+ +#include "domdec_constraints.h"
+ +#include "domdec_internal.h"
+ +#include "domdec_vsite.h"
   
   #define DDRANK(dd, rank)    (rank)
   #define DDMASTERRANK(dd)   (dd->masterrank)
@@@ -798,7 -768,7 +798,7 @@@ void dd_move_f(gmx_domdec_t *dd, rvec f
       rvec                  *buf, *sbuf;
       ivec                   vis;
       int                    is;
- -    gmx_bool               bPBC, bScrew;
+ +    gmx_bool               bShiftForcesNeedPbc, bScrew;
   
       comm = dd->comm;
   
@@@ -806,17 -776,16 +806,17 @@@
   
       buf = comm->vbuf.v;
   
- -    n       = 0;
       nzone   = comm->zones.n/2;
       nat_tot = dd->nat_tot;
       for (d = dd->ndim-1; d >= 0; d--)
       {
- -        bPBC   = (dd->ci[dd->dim[d]] == 0);
- -        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
+ +        /* Only forces in domains near the PBC boundaries need to
+ +           consider PBC in the treatment of fshift */
+ +        bShiftForcesNeedPbc   = (dd->ci[dd->dim[d]] == 0);
+ +        bScrew                = (bShiftForcesNeedPbc && dd->bScrewPBC && dd->dim[d] == XX);
           if (fshift == NULL && !bScrew)
           {
- -            bPBC = FALSE;
+ +            bShiftForcesNeedPbc = FALSE;
           }
           /* Determine which shift vector we need */
           clear_ivec(vis);
@@@ -852,7 -821,7 +852,7 @@@
               index = ind->index;
               /* Add the received forces */
               n = 0;
- -            if (!bPBC)
+ +            if (!bShiftForcesNeedPbc)
               {
                   for (i = 0; i < ind->nsend[nzone]; i++)
                   {
@@@ -867,9 -836,6 +867,9 @@@
               }
               else if (!bScrew)
               {
+ +                /* fshift should always be defined if this function is
+ +                 * called when bShiftForcesNeedPbc is true */
+ +                assert(NULL != fshift);
                   for (i = 0; i < ind->nsend[nzone]; i++)
                   {
                       at0 = cgindex[index[i]];
@@@ -990,6 -956,7 +990,6 @@@ void dd_atom_sum_real(gmx_domdec_t *dd
   
       buf = &comm->vbuf.v[0][0];
   
- -    n       = 0;
       nzone   = comm->zones.n/2;
       nat_tot = dd->nat_tot;
       for (d = dd->ndim-1; d >= 0; d--)
@@@ -1095,7 -1062,7 +1095,7 @@@ static void dd_sendrecv_ddzone(const gm
   static void dd_move_cellx(gmx_domdec_t *dd, gmx_ddbox_t *ddbox,
                             rvec cell_ns_x0, rvec cell_ns_x1)
   {
- -    int                d, d1, dim, dim1, pos, buf_size, i, j, k, p, npulse, npulse_min;
+ +    int                d, d1, dim, pos, buf_size, i, j, p, npulse, npulse_min;
       gmx_ddzone_t      *zp;
       gmx_ddzone_t       buf_s[DDZONECOMM_MAXZONE];
       gmx_ddzone_t       buf_r[DDZONECOMM_MAXZONE];
@@@ -1167,7 -1134,7 +1167,7 @@@
           if (bPBC)
           {
               /* Take the minimum to avoid double communication */
- -            npulse_min = min(npulse, dd->nc[dim]-1-npulse);
+ +            npulse_min = std::min(npulse, dd->nc[dim]-1-npulse);
           }
           else
           {
@@@ -1191,9 -1158,9 +1191,9 @@@
               {
                   for (d1 = d; d1 < dd->ndim-1; d1++)
                   {
- -                    extr_s[d1][0] = max(extr_s[d1][0], extr_r[d1][0]);
- -                    extr_s[d1][1] = min(extr_s[d1][1], extr_r[d1][1]);
- -                    extr_s[d1][2] = min(extr_s[d1][2], extr_r[d1][2]);
+ +                    extr_s[d1][0] = std::max(extr_s[d1][0], extr_r[d1][0]);
+ +                    extr_s[d1][1] = std::min(extr_s[d1][1], extr_r[d1][1]);
+ +                    extr_s[d1][2] = std::min(extr_s[d1][2], extr_r[d1][2]);
                   }
               }
           }
@@@ -1254,9 -1221,9 +1254,9 @@@
                   {
                       if (bUse)
                       {
- -                        buf_e[i].min0 = min(buf_e[i].min0, buf_r[i].min0);
- -                        buf_e[i].max1 = max(buf_e[i].max1, buf_r[i].max1);
- -                        buf_e[i].min1 = min(buf_e[i].min1, buf_r[i].min1);
+ +                        buf_e[i].min0 = std::min(buf_e[i].min0, buf_r[i].min0);
+ +                        buf_e[i].max1 = std::max(buf_e[i].max1, buf_r[i].max1);
+ +                        buf_e[i].min1 = std::min(buf_e[i].min1, buf_r[i].min1);
                       }
   
                       if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
@@@ -1269,8 -1236,8 +1269,8 @@@
                       }
                       if (bUse && dh[d1] >= 0)
                       {
- -                        buf_e[i].mch0 = max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
- -                        buf_e[i].mch1 = max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
+ +                        buf_e[i].mch0 = std::max(buf_e[i].mch0, buf_r[i].mch0-dh[d1]);
+ +                        buf_e[i].mch1 = std::max(buf_e[i].mch1, buf_r[i].mch1-dh[d1]);
                       }
                   }
                   /* Copy the received buffer to the send buffer,
@@@ -1286,9 -1253,9 +1286,9 @@@
   
                   for (d1 = d; d1 < dd->ndim-1; d1++)
                   {
- -                    extr_s[d1][1] = min(extr_s[d1][1], buf_e[pos].min0);
- -                    extr_s[d1][0] = max(extr_s[d1][0], buf_e[pos].max1);
- -                    extr_s[d1][2] = min(extr_s[d1][2], buf_e[pos].min1);
+ +                    extr_s[d1][1] = std::min(extr_s[d1][1], buf_e[pos].min0);
+ +                    extr_s[d1][0] = std::max(extr_s[d1][0], buf_e[pos].max1);
+ +                    extr_s[d1][2] = std::min(extr_s[d1][2], buf_e[pos].min1);
                       pos++;
                   }
   
@@@ -1318,8 -1285,8 +1318,8 @@@
               {
                   print_ddzone(debug, 1, i, 0, &comm->zone_d1[i]);
               }
- -            cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d1[i].min0);
- -            cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d1[i].max1);
+ +            cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d1[i].min0);
+ +            cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d1[i].max1);
           }
       }
       if (dd->ndim >= 3)
@@@ -1333,8 -1300,8 +1333,8 @@@
                   {
                       print_ddzone(debug, 2, i, j, &comm->zone_d2[i][j]);
                   }
- -                cell_ns_x0[dim] = min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
- -                cell_ns_x1[dim] = max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
+ +                cell_ns_x0[dim] = std::min(cell_ns_x0[dim], comm->zone_d2[i][j].min0);
+ +                cell_ns_x1[dim] = std::max(cell_ns_x1[dim], comm->zone_d2[i][j].max1);
               }
           }
       }
@@@ -1560,6 -1527,10 +1560,6 @@@ static void dd_collect_vec_gatherv(gmx_
   void dd_collect_vec(gmx_domdec_t *dd,
                       t_state *state_local, rvec *lv, rvec *v)
   {
- -    gmx_domdec_master_t *ma;
- -    int                  n, i, c, a, nalloc = 0;
- -    rvec                *buf = NULL;
- -
       dd_collect_cg(dd, state_local);
   
       if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
@@@ -1781,7 -1752,7 +1781,7 @@@ static void dd_distribute_vec_scatterv(
   {
       gmx_domdec_master_t *ma;
       int                 *scounts = NULL, *disps = NULL;
- -    int                  n, i, c, a, nalloc = 0;
+ +    int                  n, i, c, a;
       rvec                *buf = NULL;
   
       if (DDMASTER(dd))
@@@ -1975,7 -1946,7 +1975,7 @@@ static void write_dd_grid_pdb(const cha
           snew(grid_r, 2*dd->nnodes);
       }
   
- -    dd_gather(dd, 2*sizeof(rvec), grid_s[0], DDMASTER(dd) ? grid_r[0] : NULL);
+ +    dd_gather(dd, 2*sizeof(rvec), grid_s, DDMASTER(dd) ? grid_r : NULL);
   
       if (DDMASTER(dd))
       {
@@@ -2097,7 -2068,7 +2097,7 @@@ void write_dd_pdb(const char *fn, gmx_i
       gmx_fio_fclose(out);
   }
   
- -real dd_cutoff_mbody(gmx_domdec_t *dd)
+ +real dd_cutoff_multibody(const gmx_domdec_t *dd)
   {
       gmx_domdec_comm_t *comm;
       int                di;
@@@ -2118,15 -2089,15 +2118,15 @@@
               r = comm->cellsize_min[dd->dim[0]];
               for (di = 1; di < dd->ndim; di++)
               {
- -                r = min(r, comm->cellsize_min[dd->dim[di]]);
+ +                r = std::min(r, comm->cellsize_min[dd->dim[di]]);
               }
               if (comm->bBondComm)
               {
- -                r = max(r, comm->cutoff_mbody);
+ +                r = std::max(r, comm->cutoff_mbody);
               }
               else
               {
- -                r = min(r, comm->cutoff);
+ +                r = std::min(r, comm->cutoff);
               }
           }
       }
@@@ -2134,13 -2105,13 +2134,13 @@@
       return r;
   }
   
- -real dd_cutoff_twobody(gmx_domdec_t *dd)
+ +real dd_cutoff_twobody(const gmx_domdec_t *dd)
   {
       real r_mb;
   
- -    r_mb = dd_cutoff_mbody(dd);
+ +    r_mb = dd_cutoff_multibody(dd);
   
- -    return max(dd->comm->cutoff, r_mb);
+ +    return std::max(dd->comm->cutoff, r_mb);
   }
   
   
@@@ -2202,7 -2173,7 +2202,7 @@@ static int *dd_pmenodes(t_commrec *cr
   static int gmx_ddcoord2pmeindex(t_commrec *cr, int x, int y, int z)
   {
       gmx_domdec_t *dd;
- -    ivec          coords, coords_pme, nc;
+ +    ivec          coords;
       int           slab;
   
       dd = cr->dd;
@@@ -2271,6 -2242,7 +2271,6 @@@ static int dd_simnode2pmenode(t_commre
   {
       gmx_domdec_t      *dd;
       gmx_domdec_comm_t *comm;
- -    ivec               coord, coord_pme;
       int                i;
       int                pmenode = -1;
   
@@@ -2281,7 -2253,6 +2281,7 @@@
       if (comm->bCartesianPP_PME)
       {
   #ifdef GMX_MPI
+ +        ivec coord, coord_pme;
           MPI_Cart_coords(cr->mpi_comm_mysim, sim_nodeid, DIM, coord);
           if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
           {
@@@ -2343,6 -2314,22 +2343,6 @@@ void get_pme_nnodes(const gmx_domdec_t 
       }
   }
   
- -gmx_bool gmx_pmeonlynode(t_commrec *cr, int sim_nodeid)
- -{
- -    gmx_bool bPMEOnlyNode;
- -
- -    if (DOMAINDECOMP(cr))
- -    {
- -        bPMEOnlyNode = (dd_simnode2pmenode(cr, sim_nodeid) == -1);
- -    }
- -    else
- -    {
- -        bPMEOnlyNode = FALSE;
- -    }
- -
- -    return bPMEOnlyNode;
- -}
- -
   void get_pme_ddnodes(t_commrec *cr, int pmenodeid,
                        int *nmy_ddnodes, int **my_ddnodes, int *node_peer)
   {
@@@ -2403,7 -2390,7 +2403,7 @@@
   static gmx_bool receive_vir_ener(t_commrec *cr)
   {
       gmx_domdec_comm_t *comm;
- -    int                pmenode, coords[DIM], rank;
+ +    int                pmenode;
       gmx_bool           bReceive;
   
       bReceive = TRUE;
@@@ -2414,12 -2401,10 +2414,12 @@@
           {
               pmenode = dd_simnode2pmenode(cr, cr->sim_nodeid);
   #ifdef GMX_MPI
+ +            ivec coords;
               MPI_Cart_coords(cr->mpi_comm_mysim, cr->sim_nodeid, DIM, coords);
               coords[comm->cartpmedim]++;
               if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
               {
+ +                int rank;
                   MPI_Cart_rank(cr->mpi_comm_mysim, coords, &rank);
                   if (dd_simnode2pmenode(cr, rank) == pmenode)
                   {
@@@ -2527,8 -2512,12 +2527,8 @@@ static void make_dd_indices(gmx_domdec_
   {
       int          nzone, zone, zone1, cg0, cg1, cg1_p1, cg, cg_gl, a, a_gl;
       int         *zone2cg, *zone_ncg1, *index_gl, *gatindex;
- -    gmx_ga2la_t *ga2la;
- -    char        *bLocalCG;
       gmx_bool     bCGs;
   
- -    bLocalCG = dd->comm->bLocalCG;
- -
       if (dd->nat_tot > dd->gatindex_nalloc)
       {
           dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
@@@ -2593,7 -2582,7 +2593,7 @@@
   static int check_bLocalCG(gmx_domdec_t *dd, int ncg_sys, const char *bLocalCG,
                             const char *where)
   {
- -    int ncg, i, ngl, nerr;
+ +    int i, ngl, nerr;
   
       nerr = 0;
       if (bLocalCG == NULL)
@@@ -2753,12 -2742,12 +2753,12 @@@ static real cellsize_min_dlb(gmx_domdec
           /* The cut-off might have changed, e.g. by PME load balacning,
            * from the value used to set comm->cellsize_min, so check it.
            */
- -        cellsize_min = max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
+ +        cellsize_min = std::max(cellsize_min, comm->cutoff/comm->cd[dim_ind].np_dlb);
   
           if (comm->bPMELoadBalDLBLimits)
           {
               /* Check for the cut-off limit set by the PME load balancing */
- -            cellsize_min = max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
+ +            cellsize_min = std::max(cellsize_min, comm->PMELoadBal_max_cutoff/comm->cd[dim_ind].np_dlb);
           }
       }
   
@@@ -2781,10 -2770,10 +2781,10 @@@ static real grid_jump_limit(gmx_domdec_
       {
           if (comm->bPMELoadBalDLBLimits)
           {
- -            cutoff = max(cutoff, comm->PMELoadBal_max_cutoff);
+ +            cutoff = std::max(cutoff, comm->PMELoadBal_max_cutoff);
           }
- -        grid_jump_limit = max(grid_jump_limit,
- -                              cutoff/comm->cd[dim_ind].np);
+ +        grid_jump_limit = std::max(grid_jump_limit,
+ +                                   cutoff/comm->cd[dim_ind].np);
       }
   
       return grid_jump_limit;
@@@ -2971,8 -2960,8 +2971,8 @@@ static void init_ddpme(gmx_domdec_t *dd
               {
                   slab = pmeindex % ddpme->nslab;
               }
- -            ddpme->pp_min[slab] = min(ddpme->pp_min[slab], xyz[dimind]);
- -            ddpme->pp_max[slab] = max(ddpme->pp_max[slab], xyz[dimind]);
+ +            ddpme->pp_min[slab] = std::min(ddpme->pp_min[slab], xyz[dimind]);
+ +            ddpme->pp_max[slab] = std::max(ddpme->pp_max[slab], xyz[dimind]);
           }
       }
   
@@@ -3173,7 -3162,7 +3173,7 @@@ static void set_dd_cell_sizes_slb(gmx_d
                   {
                       npulse[d]++;
                   }
- -                cellsize_min[d] = min(cellsize_min[d], cellsize);
+ +                cellsize_min[d] = std::min(cellsize_min[d], cellsize);
               }
               if (setmode == setcellsizeslbLOCAL)
               {
@@@ -3443,7 -3432,7 +3443,7 @@@ static void set_dd_cell_sizes_dlb_root(
                                          gmx_bool bUniform, gmx_int64_t step)
   {
       gmx_domdec_comm_t *comm;
- -    int                ncd, d1, i, j, pos;
+ +    int                ncd, d1, i, pos;
       real              *cell_size;
       real               load_aver, load_i, imbalance, change, change_max, sc;
       real               cellsize_limit_f, dist_min_f, dist_min_f_hard, space;
@@@ -3486,7 -3475,7 +3486,7 @@@
               imbalance = (load_i - load_aver)/(load_aver > 0 ? load_aver : 1);
               /* Determine the change of the cell size using underrelaxation */
               change     = -relax*imbalance;
- -            change_max = max(change_max, max(change, -change));
+ +            change_max = std::max(change_max, std::max(change, -change));
           }
           /* Limit the amount of scaling.
            * We need to use the same rescaling for all cells in one row,
@@@ -3629,7 -3618,7 +3629,7 @@@ static void distribute_dd_cell_sizes_dl
                                            gmx_ddbox_t *ddbox)
   {
       gmx_domdec_comm_t *comm;
- -    int                d1, dim1, pos;
+ +    int                d1, pos;
   
       comm = dd->comm;
   
@@@ -3917,13 -3906,13 +3917,13 @@@ static void distribute_cg(FILE *fplog, 
   {
       gmx_domdec_master_t *ma;
       int                **tmp_ind = NULL, *tmp_nalloc = NULL;
- -    int                  i, icg, j, k, k0, k1, d, npbcdim;
+ +    int                  i, icg, j, k, k0, k1, d;
       matrix               tcm;
- -    rvec                 box_size, cg_cm;
+ +    rvec                 cg_cm;
       ivec                 ind;
       real                 nrcg, inv_ncg, pos_d;
       atom_id             *cgindex;
- -    gmx_bool             bUnbounded, bScrew;
+ +    gmx_bool             bScrew;
   
       ma = dd->ma;
   
@@@ -4131,9 -4120,9 +4131,9 @@@ static void get_cg_distribution(FILE *f
       }
   
       dd_scatterv(dd,
- -                DDMASTER(dd) ? ma->ibuf : NULL,
- -                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
- -                DDMASTER(dd) ? ma->cg : NULL,
+ +                bMaster ? ma->ibuf : NULL,
+ +                bMaster ? ma->ibuf+dd->nnodes : NULL,
+ +                bMaster ? ma->cg : NULL,
                   dd->ncg_home*sizeof(int), dd->index_gl);
   
       /* Determine the home charge group sizes */
@@@ -4468,8 -4457,8 +4468,8 @@@ static void calc_cg_move(FILE *fplog, g
                            int *move)
   {
       int      npbcdim;
- -    int      c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
- -    int      mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
+ +    int      cg, k, k0, k1, d, dim, d2;
+ +    int      mc, nrcg;
       int      flag;
       gmx_bool bScrew;
       ivec     dev;
@@@ -4645,15 -4634,17 +4645,15 @@@ static void dd_redistribute_cg(FILE *fp
       int               *move;
       int                npbcdim;
       int                ncg[DIM*2], nat[DIM*2];
- -    int                c, i, cg, k, k0, k1, d, dim, dim2, dir, d2, d3, d4, cell_d;
- -    int                mc, cdd, nrcg, ncg_recv, nat_recv, nvs, nvr, nvec, vec;
+ +    int                c, i, cg, k, d, dim, dim2, dir, d2, d3;
+ +    int                mc, cdd, nrcg, ncg_recv, nvs, nvr, nvec, vec;
       int                sbuf[2], rbuf[2];
       int                home_pos_cg, home_pos_at, buf_pos;
       int                flag;
       gmx_bool           bV = FALSE, bSDX = FALSE, bCGP = FALSE;
- -    gmx_bool           bScrew;
- -    ivec               dev;
- -    real               inv_ncg, pos_d;
+ +    real               pos_d;
       matrix             tcm;
- -    rvec              *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1, cm_new;
+ +    rvec              *cg_cm = NULL, cell_x0, cell_x1, limitd, limit0, limit1;
       atom_id           *cgindex;
       cginfo_mb_t       *cginfo_mb;
       gmx_domdec_comm_t *comm;
@@@ -4910,6 -4901,7 +4910,6 @@@
       {
           dim      = dd->dim[d];
           ncg_recv = 0;
- -        nat_recv = 0;
           nvr      = 0;
           for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
           {
@@@ -4944,6 -4936,7 +4944,6 @@@
                                comm->cgcm_state[cdd], nvs,
                                comm->vbuf.v+nvr, i);
               ncg_recv += rbuf[0];
- -            nat_recv += rbuf[1];
               nvr      += i;
           }
   
@@@ -5247,7 -5240,7 +5247,7 @@@ static void get_load_distribution(gmx_d
       gmx_domdec_comm_t *comm;
       gmx_domdec_load_t *load;
       gmx_domdec_root_t *root = NULL;
- -    int                d, dim, cid, i, pos;
+ +    int                d, dim, i, pos;
       float              cell_frac = 0, sbuf[DD_NLOAD_MAX];
       gmx_bool           bSepPME;
   
@@@ -5343,7 -5336,7 +5343,7 @@@
                   for (i = 0; i < dd->nc[dim]; i++)
                   {
                       load->sum += load->load[pos++];
- -                    load->max  = max(load->max, load->load[pos]);
+ +                    load->max  = std::max(load->max, load->load[pos]);
                       pos++;
                       if (dd->bGridJump)
                       {
@@@ -5352,14 -5345,14 +5352,14 @@@
                               /* This direction could not be load balanced properly,
                                * therefore we need to use the maximum iso the average load.
                                */
- -                            load->sum_m = max(load->sum_m, load->load[pos]);
+ +                            load->sum_m = std::max(load->sum_m, load->load[pos]);
                           }
                           else
                           {
                               load->sum_m += load->load[pos];
                           }
                           pos++;
- -                        load->cvol_min = min(load->cvol_min, load->load[pos]);
+ +                        load->cvol_min = std::min(load->cvol_min, load->load[pos]);
                           pos++;
                           if (d < dd->ndim-1)
                           {
@@@ -5373,9 -5366,9 +5373,9 @@@
                       }
                       if (bSepPME)
                       {
- -                        load->mdf = max(load->mdf, load->load[pos]);
+ +                        load->mdf = std::max(load->mdf, load->load[pos]);
                           pos++;
- -                        load->pme = max(load->pme, load->load[pos]);
+ +                        load->pme = std::max(load->pme, load->load[pos]);
                           pos++;
                       }
                   }
@@@ -5769,6 -5762,7 +5769,6 @@@ static void make_load_communicators(gmx
   
   void setup_dd_grid(FILE *fplog, gmx_domdec_t *dd)
   {
- -    gmx_bool                bZYX;
       int                     d, dim, i, j, m;
       ivec                    tmp, s;
       int                     nzone, nzonep;
@@@ -5927,16 -5921,17 +5927,16 @@@
   static void make_pp_communicator(FILE *fplog, t_commrec *cr, int gmx_unused reorder)
   {
       gmx_domdec_t      *dd;
+ +    dd   = cr->dd;
+ +
+ +#ifdef GMX_MPI
       gmx_domdec_comm_t *comm;
- -    int                i, rank, *buf;
+ +    int                rank, *buf;
       ivec               periods;
- -#ifdef GMX_MPI
       MPI_Comm           comm_cart;
- -#endif
   
- -    dd   = cr->dd;
       comm = dd->comm;
   
- -#ifdef GMX_MPI
       if (comm->bCartesianPP)
       {
           /* Set up cartesian communication for the particle-particle part */
@@@ -5946,7 -5941,7 +5946,7 @@@
                       dd->nc[XX], dd->nc[YY], dd->nc[ZZ]);
           }
   
- -        for (i = 0; i < DIM; i++)
+ +        for (int i = 0; i < DIM; i++)
           {
               periods[i] = TRUE;
           }
@@@ -6010,7 -6005,7 +6010,7 @@@
           /* Determine the master coordinates and rank.
            * The DD master should be the same node as the master of this sim.
            */
- -        for (i = 0; i < dd->nnodes; i++)
+ +        for (int i = 0; i < dd->nnodes; i++)
           {
               if (comm->ddindex2simnodeid[i] == 0)
               {
@@@ -6048,27 -6043,30 +6048,27 @@@
       }
   }
   
- -static void receive_ddindex2simnodeid(t_commrec *cr)
+ +static void receive_ddindex2simnodeid(t_commrec gmx_unused *cr)
   {
+ +#ifdef GMX_MPI
       gmx_domdec_t      *dd;
- -
       gmx_domdec_comm_t *comm;
- -    int               *buf;
   
       dd   = cr->dd;
       comm = dd->comm;
   
- -#ifdef GMX_MPI
       if (!comm->bCartesianPP_PME && comm->bCartesianPP)
       {
+ +        int *buf;
           snew(comm->ddindex2simnodeid, dd->nnodes);
           snew(buf, dd->nnodes);
           if (cr->duty & DUTY_PP)
           {
               buf[dd_index(dd->nc, dd->ci)] = cr->sim_nodeid;
           }
- -#ifdef GMX_MPI
           /* Communicate the ddindex to simulation nodeid index */
           MPI_Allreduce(buf, comm->ddindex2simnodeid, dd->nnodes, MPI_INT, MPI_SUM,
                         cr->mpi_comm_mysim);
- -#endif
           sfree(buf);
       }
   #endif
@@@ -6110,8 -6108,9 +6110,8 @@@ static void split_communicator(FILE *fp
   {
       gmx_domdec_t      *dd;
       gmx_domdec_comm_t *comm;
- -    int                i, rank;
+ +    int                i;
       gmx_bool           bDiv[DIM];
- -    ivec               periods;
   #ifdef GMX_MPI
       MPI_Comm           comm_cart;
   #endif
@@@ -6159,9 -6158,6 +6159,9 @@@
   #ifdef GMX_MPI
       if (comm->bCartesianPP_PME)
       {
+ +        int  rank;
+ +        ivec periods;
+ +
           if (fplog)
           {
               fprintf(fplog, "Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n", comm->ntot[XX], comm->ntot[YY], comm->ntot[ZZ]);
@@@ -6173,6 -6169,7 +6173,6 @@@
           }
           MPI_Cart_create(cr->mpi_comm_mysim, DIM, comm->ntot, periods, reorder,
                           &comm_cart);
- -
           MPI_Comm_rank(comm_cart, &rank);
           if (MASTERNODE(cr) && rank != 0)
           {
@@@ -6356,7 -6353,7 +6356,7 @@@ static real *get_slb_frac(FILE *fplog, 
           for (i = 0; i < nc; i++)
           {
               dbl = 0;
- -            sscanf(size_string, "%lf%n", &dbl, &n);
+ +            sscanf(size_string, "%20lf%n", &dbl, &n);
               if (dbl == 0)
               {
                   gmx_fatal(FARGS, "Incorrect or not enough DD cell size entries for direction %s: '%s'", dir, size_string);
@@@ -6419,7 -6416,7 +6419,7 @@@ static int dd_getenv(FILE *fplog, cons
       val = getenv(env_var);
       if (val)
       {
- -        if (sscanf(val, "%d", &nst) <= 0)
+ +        if (sscanf(val, "%20d", &nst) <= 0)
           {
               nst = 1;
           }
@@@ -6480,7 -6477,7 +6480,7 @@@ static real average_cellsize_min(gmx_do
       {
           d = dd->dim[di];
           /* Check using the initial average cell size */
- -        r = min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
+ +        r = std::min(r, ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
       }
   
       return r;
@@@ -6490,6 -6487,7 +6490,6 @@@ static int check_dlb_support(FILE *fplo
                                const char *dlb_opt, gmx_bool bRecordLoad,
                                unsigned long Flags, t_inputrec *ir)
   {
- -    gmx_domdec_t *dd;
       int           eDLB = -1;
       char          buf[STRLEN];
   
@@@ -6631,10 -6629,10 +6631,10 @@@ gmx_domdec_t *init_domain_decomposition
       gmx_domdec_t      *dd;
       gmx_domdec_comm_t *comm;
       int                recload;
- -    int                d, i, j;
       real               r_2b, r_mb, r_bonded = -1, r_bonded_limit = -1, limit, acs;
       gmx_bool           bC;
       char               buf[STRLEN];
+ +    const real         tenPercentMargin = 1.1;
   
       if (fplog)
       {
@@@ -6760,8 -6758,8 +6760,8 @@@
        * within nstlist steps. Since boundaries are allowed to displace by half
        * a cell size, DD cells should be at least the size of the list buffer.
        */
- -    comm->cellsize_limit = max(comm->cellsize_limit,
- -                               ir->rlistlong - max(ir->rvdw, ir->rcoulomb));
+ +    comm->cellsize_limit = std::max(comm->cellsize_limit,
+ +                                    ir->rlistlong - std::max(ir->rvdw, ir->rcoulomb));
   
       if (comm->bInterCGBondeds)
       {
@@@ -6774,7 -6772,7 +6774,7 @@@
               }
               else
               {
- -                comm->cutoff = max(comm->cutoff, comm->cutoff_mbody);
+ +                comm->cutoff = std::max(comm->cutoff, comm->cutoff_mbody);
               }
               r_bonded_limit = comm->cutoff_mbody;
           }
@@@ -6800,16 -6798,16 +6800,16 @@@
                */
               if (Flags & MD_DDBONDCOMM)
               {
- -                if (max(r_2b, r_mb) > comm->cutoff)
+ +                if (std::max(r_2b, r_mb) > comm->cutoff)
                   {
- -                    r_bonded        = max(r_2b, r_mb);
- -                    r_bonded_limit  = 1.1*r_bonded;
+ +                    r_bonded        = std::max(r_2b, r_mb);
+ +                    r_bonded_limit  = tenPercentMargin*r_bonded;
                       comm->bBondComm = TRUE;
                   }
                   else
                   {
                       r_bonded       = r_mb;
- -                    r_bonded_limit = min(1.1*r_bonded, comm->cutoff);
+ +                    r_bonded_limit = std::min(tenPercentMargin*r_bonded, comm->cutoff);
                   }
                   /* We determine cutoff_mbody later */
               }
@@@ -6818,9 -6816,9 +6818,9 @@@
                   /* No special bonded communication,
                    * simply increase the DD cut-off.
                    */
- -                r_bonded_limit     = 1.1*max(r_2b, r_mb);
+ +                r_bonded_limit     = tenPercentMargin*std::max(r_2b, r_mb);
                   comm->cutoff_mbody = r_bonded_limit;
- -                comm->cutoff       = max(comm->cutoff, comm->cutoff_mbody);
+ +                comm->cutoff       = std::max(comm->cutoff, comm->cutoff_mbody);
               }
           }
           if (fplog)
@@@ -6829,7 -6827,7 +6829,7 @@@
                       "Minimum cell size due to bonded interactions: %.3f nm\n",
                       r_bonded_limit);
           }
- -        comm->cellsize_limit = max(comm->cellsize_limit, r_bonded_limit);
+ +        comm->cellsize_limit = std::max(comm->cellsize_limit, r_bonded_limit);
       }
   
       if (dd->bInterCGcons && rconstr <= 0)
@@@ -6857,7 -6855,7 +6857,7 @@@
                   "User supplied maximum distance required for P-LINCS: %.3f nm\n",
                   rconstr);
       }
- -    comm->cellsize_limit = max(comm->cellsize_limit, rconstr);
+ +    comm->cellsize_limit = std::max(comm->cellsize_limit, rconstr);
   
       comm->cgs_gl = gmx_mtop_global_cgs(mtop);
   
@@@ -7013,12 -7011,12 +7013,12 @@@
               if (comm->eDLB != edlbNO)
               {
                   /* Check if this does not limit the scaling */
- -                comm->cutoff_mbody = min(comm->cutoff_mbody, dlb_scale*acs);
+ +                comm->cutoff_mbody = std::min(comm->cutoff_mbody, dlb_scale*acs);
               }
               if (!comm->bBondComm)
               {
                   /* Without bBondComm do not go beyond the n.b. cut-off */
- -                comm->cutoff_mbody = min(comm->cutoff_mbody, comm->cutoff);
+ +                comm->cutoff_mbody = std::min(comm->cutoff_mbody, comm->cutoff);
                   if (comm->cellsize_limit >= comm->cutoff)
                   {
                       /* We don't loose a lot of efficieny
@@@ -7030,7 -7028,7 +7030,7 @@@
                   }
               }
               /* Check if we did not end up below our original limit */
- -            comm->cutoff_mbody = max(comm->cutoff_mbody, r_bonded_limit);
+ +            comm->cutoff_mbody = std::max(comm->cutoff_mbody, r_bonded_limit);
   
               if (comm->cutoff_mbody > comm->cellsize_limit)
               {
@@@ -7093,7 -7091,7 +7093,7 @@@ static void turn_on_dlb(FILE *fplog, t_
       cellsize_min = comm->cellsize_min[dd->dim[0]];
       for (d = 1; d < dd->ndim; d++)
       {
- -        cellsize_min = min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
+ +        cellsize_min = std::min(cellsize_min, comm->cellsize_min[dd->dim[d]]);
       }
   
       if (cellsize_min < comm->cellsize_limit*1.05)
@@@ -7158,6 -7156,8 +7158,6 @@@ void dd_init_bondeds(FILE *fplog
                        t_inputrec *ir, gmx_bool bBCheck, cginfo_mb_t *cginfo_mb)
   {
       gmx_domdec_comm_t *comm;
- -    gmx_bool           bBondComm;
- -    int                d;
   
       dd_make_reverse_top(fplog, dd, mtop, vsite, ir, bBCheck);
   
@@@ -7268,7 -7268,7 +7268,7 @@@ static void print_dd_settings(FILE *fpl
               limit = dd->comm->cellsize_min[XX];
               for (d = 1; d < DIM; d++)
               {
- -                limit = min(limit, dd->comm->cellsize_min[d]);
+ +                limit = std::min(limit, dd->comm->cellsize_min[d]);
               }
           }
   
@@@ -7276,10 -7276,10 +7276,10 @@@
           {
               fprintf(fplog, "%40s  %-7s %6.3f nm\n",
                       "two-body bonded interactions", "(-rdd)",
- -                    max(comm->cutoff, comm->cutoff_mbody));
+ +                    std::max(comm->cutoff, comm->cutoff_mbody));
               fprintf(fplog, "%40s  %-7s %6.3f nm\n",
                       "multi-body bonded interactions", "(-rdd)",
- -                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff, limit));
+ +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : std::min(comm->cutoff, limit));
           }
           if (dd->vsite_comm)
           {
@@@ -7314,7 -7314,7 +7314,7 @@@ static void set_cell_limits_dlb(gmx_dom
   
       /* Determine the maximum number of comm. pulses in one dimension */
   
- -    comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
+ +    comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
   
       /* Determine the maximum required number of grid pulses */
       if (comm->cellsize_limit >= comm->cutoff)
@@@ -7335,7 -7335,7 +7335,7 @@@
       else
       {
           /* There is no cell size limit */
- -        npulse = max(dd->nc[XX]-1, max(dd->nc[YY]-1, dd->nc[ZZ]-1));
+ +        npulse = std::max(dd->nc[XX]-1, std::max(dd->nc[YY]-1, dd->nc[ZZ]-1));
       }
   
       if (!bNoCutOff && npulse > 1)
@@@ -7347,9 -7347,9 +7347,9 @@@
               dim      = dd->dim[d];
               npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
                                /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
- -            npulse_d_max = max(npulse_d_max, npulse_d);
+ +            npulse_d_max = std::max(npulse_d_max, npulse_d);
           }
- -        npulse = min(npulse, npulse_d_max);
+ +        npulse = std::min(npulse, npulse_d_max);
       }
   
       /* This env var can override npulse */
@@@ -7363,10 -7363,10 +7363,10 @@@
       comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
       for (d = 0; d < dd->ndim; d++)
       {
- -        comm->cd[d].np_dlb    = min(npulse, dd->nc[dd->dim[d]]-1);
+ +        comm->cd[d].np_dlb    = std::min(npulse, dd->nc[dd->dim[d]]-1);
           comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
           snew(comm->cd[d].ind, comm->cd[d].np_nalloc);
- -        comm->maxpulse = max(comm->maxpulse, comm->cd[d].np_dlb);
+ +        comm->maxpulse = std::max(comm->maxpulse, comm->cd[d].np_dlb);
           if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
           {
               comm->bVacDLBNoLimit = FALSE;
@@@ -7376,10 -7376,10 +7376,10 @@@
       /* cellsize_limit is set for LINCS in init_domain_decomposition */
       if (!comm->bVacDLBNoLimit)
       {
- -        comm->cellsize_limit = max(comm->cellsize_limit,
- -                                   comm->cutoff/comm->maxpulse);
+ +        comm->cellsize_limit = std::max(comm->cellsize_limit,
+ +                                        comm->cutoff/comm->maxpulse);
       }
- -    comm->cellsize_limit = max(comm->cellsize_limit, comm->cutoff_mbody);
+ +    comm->cellsize_limit = std::max(comm->cellsize_limit, comm->cutoff_mbody);
       /* Set the minimum cell size for each DD dimension */
       for (d = 0; d < dd->ndim; d++)
       {
@@@ -7396,7 -7396,7 +7396,7 @@@
       }
       if (comm->cutoff_mbody <= 0)
       {
- -        comm->cutoff_mbody = min(comm->cutoff, comm->cellsize_limit);
+ +        comm->cutoff_mbody = std::min(comm->cutoff, comm->cellsize_limit);
       }
       if (comm->bDynLoadBal)
       {
@@@ -7487,7 -7487,7 +7487,7 @@@ void set_dd_parameters(FILE *fplog, gmx
       }
       natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
   
- -    dd->ga2la = ga2la_init(natoms_tot, vol_frac*natoms_tot);
+ +    dd->ga2la = ga2la_init(natoms_tot, static_cast<int>(vol_frac*natoms_tot));
   }
   
   static gmx_bool test_dd_cutoff(t_commrec *cr,
@@@ -7765,11 -7765,11 +7765,11 @@@ set_dd_corners(const gmx_domdec_t *dd
           c->c[1][1] = comm->cell_x0[dim1];
           if (dd->bGridJump)
           {
- -            c->c[1][1] = max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
+ +            c->c[1][1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].mch0);
               if (bDistMB)
               {
                   /* For the multi-body distance we need the maximum */
- -                c->bc[1] = max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
+ +                c->bc[1] = std::max(comm->cell_x0[dim1], comm->zone_d1[1].p1_0);
               }
           }
           /* Set the upper-right corner for rounding */
@@@ -7792,8 -7792,8 +7792,8 @@@
                           if (j >= 4)
                           {
                               c->c[2][j-4] =
- -                                max(c->c[2][j-4],
- -                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
+ +                                std::max(c->c[2][j-4],
+ +                                         comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
                           }
                       }
                   }
@@@ -7805,7 -7805,7 +7805,7 @@@
                       {
                           for (j = 0; j < 2; j++)
                           {
- -                            c->bc[2] = max(c->bc[2], comm->zone_d2[i][j].p1_0);
+ +                            c->bc[2] = std::max(c->bc[2], comm->zone_d2[i][j].p1_0);
                           }
                       }
                   }
@@@ -7819,11 -7819,11 +7819,11 @@@
               c->cr1[3] = comm->cell_x1[dim1];
               if (dd->bGridJump)
               {
- -                c->cr1[0] = max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
+ +                c->cr1[0] = std::max(comm->cell_x1[dim1], comm->zone_d1[1].mch1);
                   if (bDistMB)
                   {
                       /* For the multi-body distance we need the maximum */
- -                    c->bcr1 = max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
+ +                    c->bcr1 = std::max(comm->cell_x1[dim1], comm->zone_d1[1].p1_1);
                   }
               }
           }
@@@ -8103,7 -8103,7 +8103,7 @@@ static void setup_dd_communication(gmx_
   {
       int                    dim_ind, dim, dim0, dim1, dim2, dimd, p, nat_tot;
       int                    nzone, nzone_send, zone, zonei, cg0, cg1;
- -    int                    c, i, j, cg, cg_gl, nrcg;
+ +    int                    c, i, cg, cg_gl, nrcg;
       int                   *zone_cg_range, pos_cg, *index_gl, *cgindex, *recv_i;
       gmx_domdec_comm_t     *comm;
       gmx_domdec_zones_t    *zones;
@@@ -8111,7 -8111,7 +8111,7 @@@
       gmx_domdec_ind_t      *ind;
       cginfo_mb_t           *cginfo_mb;
       gmx_bool               bBondComm, bDist2B, bDistMB, bDistBonded;
- -    real                   r_mb, r_comm2, r_scomm2, r_bcomm2, r_0, r_1, r2inc, inv_ncg;
+ +    real                   r_comm2, r_bcomm2;
       dd_corners_t           corners;
       ivec                   tric_dist;
       rvec                  *cg_cm, *normal, *v_d, *v_0 = NULL, *v_1 = NULL, *recv_vr;
@@@ -8142,6 -8142,8 +8142,6 @@@
   
       for (dim_ind = 0; dim_ind < dd->ndim; dim_ind++)
       {
- -        dim = dd->dim[dim_ind];
- -
           /* Check if we need to use triclinic distances */
           tric_dist[dim_ind] = 0;
           for (i = 0; i <= dim_ind; i++)
@@@ -8433,7 -8435,7 +8433,7 @@@
                       /* The rvec buffer is also required for atom buffers
                        * of size nrecv in dd_move_x and dd_move_f.
                        */
- -                    i = max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
+ +                    i = std::max(cd->ind[0].nrecv[nzone+1], ind->nrecv[nzone+1]);
                       vec_rvec_check_alloc(&comm->vbuf2, i);
                   }
               }
@@@ -8578,9 -8580,10 +8578,9 @@@ static void set_zones_size(gmx_domdec_
       gmx_domdec_comm_t  *comm;
       gmx_domdec_zones_t *zones;
       gmx_bool            bDistMB;
- -    int                 z, zi, zj0, zj1, d, dim;
+ +    int                 z, zi, d, dim;
       real                rcs, rcmbs;
       int                 i, j;
- -    real                size_j, add_tric;
       real                vol;
   
       comm = dd->comm;
@@@ -8678,8 -8681,8 +8678,8 @@@
                                    * With multiple pulses this will lead
                                    * to a larger zone then strictly necessary.
                                    */
- -                                zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
- -                                                             zones->size[zi].x1[dim]+rcmbs);
+ +                                zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
+ +                                                                  zones->size[zi].x1[dim]+rcmbs);
                               }
                           }
                       }
@@@ -8698,8 -8701,8 +8698,8 @@@
                   {
                       if (zones->shift[z][dim] > 0)
                       {
- -                        zones->size[z].x1[dim] = max(zones->size[z].x1[dim],
- -                                                     zones->size[zi].x1[dim]+rcs);
+ +                        zones->size[z].x1[dim] = std::max(zones->size[z].x1[dim],
+ +                                                          zones->size[zi].x1[dim]+rcs);
                       }
                   }
               }
@@@ -8737,21 -8740,22 +8737,22 @@@
               {
                   corner[ZZ] = zones->size[z].x1[ZZ];
               }
-             if (dd->ndim == 1 && box[ZZ][YY] != 0)
-             {
-                 /* With 1D domain decomposition the cg's are not in
-                  * the triclinic box, but triclinic x-y and rectangular y-z.
-                  * Shift y back, so it will later end up at 0.
-                  */
-                 corner[YY] -= corner[ZZ]*box[ZZ][YY]/box[ZZ][ZZ];
-             }
               /* Apply the triclinic couplings */
               assert(ddbox->npbcdim <= DIM);
               for (i = YY; i < ddbox->npbcdim; i++)
               {
                   for (j = XX; j < i; j++)
                   {
-                     corner[j] += corner[i]*box[i][j]/box[i][i];
+                     /* With 1D domain decomposition the cg's are not in
+                      * a triclinic box, but triclinic x-y and rectangular y/x-z.
+                      * So we should ignore the coupling for the non
+                      * domain-decomposed dimension of the pair x and y.
+                      */
+                     if (!(dd->ndim == 1 && ((dd->dim[0] == XX && j == YY) ||
+                                             (dd->dim[0] == YY && j == XX))))
+                     {
+                         corner[j] += corner[i]*box[i][j]/box[i][i];
+                     }
                   }
               }
               if (c == 0)
@@@ -8763,8 -8767,8 +8764,8 @@@
               {
                   for (i = 0; i < DIM; i++)
                   {
- -                    corner_min[i] = min(corner_min[i], corner[i]);
- -                    corner_max[i] = max(corner_max[i], corner[i]);
+ +                    corner_min[i] = std::min(corner_min[i], corner[i]);
+ +                    corner_max[i] = std::max(corner_max[i], corner[i]);
                   }
               }
           }
@@@ -8934,7 -8938,8 +8935,7 @@@ static int dd_sort_order(gmx_domdec_t *
   {
       gmx_domdec_sort_t *sort;
       gmx_cgsort_t      *cgsort, *sort_i;
- -    int                ncg_new, nsort2, nsort_new, i, *a, moved, *ibuf;
- -    int                sort_last, sort_skip;
+ +    int                ncg_new, nsort2, nsort_new, i, *a, moved;
   
       sort = dd->comm->sort;
   
@@@ -9045,7 -9050,7 +9046,7 @@@ static void dd_sort_state(gmx_domdec_t 
                             int ncg_home_old)
   {
       gmx_domdec_sort_t *sort;
- -    gmx_cgsort_t      *cgsort, *sort_i;
+ +    gmx_cgsort_t      *cgsort;
       int               *cgindex;
       int                ncg_new, i, *ibuf, cgsize;
       rvec              *vbuf;
@@@ -9302,7 -9307,7 +9303,7 @@@ void dd_partition_system(FIL
       t_block           *cgs_gl;
       gmx_int64_t        step_pcoupl;
       rvec               cell_ns_x0, cell_ns_x1;
- -    int                i, j, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
+ +    int                i, n, ncgindex_set, ncg_home_old = -1, ncg_moved, nat_f_novirsum;
       gmx_bool           bBoxChanged, bNStGlobalComm, bDoDLB, bCheckDLB, bTurnOnDLB, bLogLoad;
       gmx_bool           bRedist, bSortCG, bResortAll;
       ivec               ncells_old = {0, 0, 0}, ncells_new = {0, 0, 0}, np;
@@@ -9371,6 -9376,7 +9372,6 @@@
                * so the extra communication cost is negligible.
                */
               const int nddp_chk_dlb = 100;
- -
               bCheckDLB = (comm->n_load_collect == 0 ||
                            comm->n_load_have % nddp_chk_dlb == nddp_chk_dlb - 1);
           }
diff --combined src/gromacs/gmxlib/gmx_thread_affinity.c

index 75e277999be1fcef991d5d14a30d2456a8449214,78695c64d21f81a4427ad126fb8c71dfb9e1ac33..e4cc7386e1324c5e825e251ec8f25460f38cb1f8
--- 1/src/gromacs/gmxlib/gmx_thread_affinity.c
--- 2/src/gromacs/gmxlib/gmx_thread_affinity.c
+++ b/src/gromacs/gmxlib/gmx_thread_affinity.c
@@@ -32,36 -32,34 +32,36 @@@
    * To help us fund GROMACS development, we humbly ask that you cite
    * the research papers on the package. Check out http://www.gromacs.org.
    */
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
+ +
+ +#include "gromacs/legacyheaders/gmx_thread_affinity.h"
+ +
+ +#include "config.h"
+ +
+ +#include <assert.h>
+ +#include <errno.h>
+ +#include <stdio.h>
+ +#include <string.h>
+ +
   #ifdef HAVE_SCHED_AFFINITY
- -#  ifndef _GNU_SOURCE
- -#    define _GNU_SOURCE 1
- -#  endif
   #  include <sched.h>
   #  include <sys/syscall.h>
   #endif
- -#include <string.h>
- -#include <errno.h>
- -#include <assert.h>
- -#include <stdio.h>
   
   #include "thread_mpi/threads.h"
   
- -#include "typedefs.h"
- -#include "types/commrec.h"
- -#include "types/hw_info.h"
- -#include "copyrite.h"
- -#include "gmx_cpuid.h"
- -#include "gmx_omp_nthreads.h"
- -#include "md_logging.h"
- -#include "gmx_thread_affinity.h"
- -
- -#include "gmx_fatal.h"
+ +#include "gromacs/legacyheaders/copyrite.h"
+ +#include "gromacs/legacyheaders/gmx_cpuid.h"
+ +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+ +#include "gromacs/legacyheaders/md_logging.h"
+ +#include "gromacs/legacyheaders/typedefs.h"
+ +#include "gromacs/legacyheaders/types/commrec.h"
+ +#include "gromacs/legacyheaders/types/hw_info.h"
+ +#include "gromacs/utility/basenetwork.h"
+ +#include "gromacs/utility/cstringutil.h"
+ +#include "gromacs/utility/fatalerror.h"
   #include "gromacs/utility/gmxomp.h"
+ +#include "gromacs/utility/smalloc.h"
   
   static int
   get_thread_affinity_layout(FILE *fplog,
@@@ -373,53 -371,21 +373,56 @@@ gmx_set_thread_affinity(FIL
    * Note that this will only work on Linux as we use a GNU feature.
    */
   void
- -gmx_check_thread_affinity_set(FILE            gmx_unused *fplog,
- -                              const t_commrec gmx_unused *cr,
- -                              gmx_hw_opt_t    gmx_unused *hw_opt,
- -                              int             gmx_unused  nthreads_hw_avail,
- -                              gmx_bool        gmx_unused  bAfterOpenmpInit)
+ +gmx_check_thread_affinity_set(FILE            *fplog,
+ +                              const t_commrec *cr,
+ +                              gmx_hw_opt_t    *hw_opt,
+ +                              int  gmx_unused  nthreads_hw_avail,
+ +                              gmx_bool         bAfterOpenmpInit)
   {
   #ifdef HAVE_SCHED_AFFINITY
       cpu_set_t mask_current;
       int       i, ret, cpu_count, cpu_set;
       gmx_bool  bAllSet;
+ +#endif
+ #ifdef GMX_LIB_MPI
+     gmx_bool  bAllSet_All;
+ #endif
   
       assert(hw_opt);
+ +    if (!bAfterOpenmpInit)
+ +    {
+ +        /* Check for externally set OpenMP affinity and turn off internal
+ +         * pinning if any is found. We need to do this check early to tell
+ +         * thread-MPI whether it should do pinning when spawning threads.
+ +         * TODO: the above no longer holds, we should move these checks later
+ +         */
+ +        if (hw_opt->thread_affinity != threadaffOFF)
+ +        {
+ +            char *message;
+ +            if (!gmx_omp_check_thread_affinity(&message))
+ +            {
+ +                /* TODO: with -pin auto we should only warn when using all cores */
+ +                md_print_warn(cr, fplog, "%s", message);
+ +                sfree(message);
+ +                hw_opt->thread_affinity = threadaffOFF;
+ +            }
+ +        }
+ +
+ +        /* With thread-MPI this is needed as pinning might get turned off,
+ +         * which needs to be known before starting thread-MPI.
+ +         * With thread-MPI hw_opt is processed here on the master rank
+ +         * and passed to the other ranks later, so we only do this on master.
+ +         */
+ +        if (!SIMMASTER(cr))
+ +        {
+ +            return;
+ +        }
+ +#ifndef GMX_THREAD_MPI
+ +        return;
+ +#endif
+ +    }
+ +
+ +#ifdef HAVE_SCHED_GETAFFINITY
       if (hw_opt->thread_affinity == threadaffOFF)
       {
           /* internal affinity setting is off, don't bother checking process affinity */
@@@ -458,6 -424,11 +461,11 @@@
           bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
       }
   
+ #ifdef GMX_LIB_MPI
+     MPI_Allreduce(&bAllSet, &bAllSet_All, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
+     bAllSet = bAllSet_All;
+ #endif
+ 
       if (!bAllSet)
       {
           if (hw_opt->thread_affinity == threadaffAUTO)
diff --combined src/gromacs/gmxpreprocess/readir.c

index 8f55b6c6609358dcc0d19c872e9da8e9fdb006d3,3319882e7fd9a83f913fe853fd79047e49687f8e..9a9cc3ed3c23c06b56ea454806cf1dad7218aab2
--- 1/src/gromacs/gmxpreprocess/readir.c
--- 2/src/gromacs/gmxpreprocess/readir.c
+++ b/src/gromacs/gmxpreprocess/readir.c
@@@ -34,34 -34,35 +34,34 @@@
    * To help us fund GROMACS development, we humbly ask that you cite
    * the research papers on the package. Check out http://www.gromacs.org.
    */
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
+ +
+ +#include "readir.h"
   
   #include <ctype.h>
- -#include <stdlib.h>
   #include <limits.h>
- -#include "sysstuff.h"
- -#include "gromacs/utility/smalloc.h"
- -#include "typedefs.h"
- -#include "physics.h"
- -#include "names.h"
- -#include "gmx_fatal.h"
- -#include "macros.h"
- -#include "index.h"
- -#include "symtab.h"
+ +#include <stdlib.h>
+ +
+ +#include "gromacs/gmxpreprocess/calc_verletbuf.h"
+ +#include "gromacs/gmxpreprocess/toputil.h"
+ +#include "gromacs/legacyheaders/chargegroup.h"
+ +#include "gromacs/legacyheaders/inputrec.h"
+ +#include "gromacs/legacyheaders/macros.h"
+ +#include "gromacs/legacyheaders/names.h"
+ +#include "gromacs/legacyheaders/network.h"
+ +#include "gromacs/legacyheaders/readinp.h"
+ +#include "gromacs/legacyheaders/typedefs.h"
+ +#include "gromacs/legacyheaders/warninp.h"
+ +#include "gromacs/math/units.h"
+ +#include "gromacs/math/vec.h"
+ +#include "gromacs/pbcutil/pbc.h"
+ +#include "gromacs/topology/block.h"
+ +#include "gromacs/topology/index.h"
+ +#include "gromacs/topology/mtop_util.h"
+ +#include "gromacs/topology/symtab.h"
   #include "gromacs/utility/cstringutil.h"
- -#include "readinp.h"
- -#include "warninp.h"
- -#include "readir.h"
- -#include "toputil.h"
- -#include "index.h"
- -#include "network.h"
- -#include "vec.h"
- -#include "pbc.h"
- -#include "mtop_util.h"
- -#include "chargegroup.h"
- -#include "inputrec.h"
- -#include "calc_verletbuf.h"
+ +#include "gromacs/utility/fatalerror.h"
+ +#include "gromacs/utility/smalloc.h"
   
   #define MAXPTR 254
   #define NOGID  255
@@@ -455,7 -456,7 +455,7 @@@ void check_ir(const char *mdparin, t_in
           warning_error(wi, "nstcalclr must be a positive number (divisor of nstcalclr), or -1 to follow nstlist.");
       }
   
-     if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rvdw && ir->nstcalclr > 1)
+     if (EEL_PME(ir->coulombtype) && ir->rcoulomb > ir->rlist && ir->nstcalclr > 1)
       {
           warning_error(wi, "When used with PME, the long-range component of twin-range interactions must be updated every step (nstcalclr)");
       }
@@@ -1225,6 -1226,28 +1225,28 @@@
           sprintf(err_buf, "wall-ewald-zfac should be >= 2");
           CHECK(ir->wall_ewald_zfac < 2);
       }
+     if ((ir->ewald_geometry == eewg3DC) && (ir->ePBC != epbcXY) &&
+         EEL_FULL(ir->coulombtype))
+     {
+         sprintf(warn_buf, "With %s and ewald_geometry = %s you should use pbc = %s",
+                 eel_names[ir->coulombtype], eewg_names[eewg3DC], epbc_names[epbcXY]);
+         warning(wi, warn_buf);
+     }
+     if ((ir->epsilon_surface != 0) && EEL_FULL(ir->coulombtype))
+     {
+         if (ir->cutoff_scheme == ecutsVERLET)
+         {
+             sprintf(warn_buf, "Since molecules/charge groups are broken using the Verlet scheme, you can not use a dipole correction to the %s electrostatics.",
+                     eel_names[ir->coulombtype]);
+             warning(wi, warn_buf);
+         }
+         else
+         {
+             sprintf(warn_buf, "Dipole corrections to %s electrostatics only work if all charge groups that can cross PBC boundaries are dipoles. If this is not the case set epsilon_surface to 0",
+                     eel_names[ir->coulombtype]);
+             warning_note(wi, warn_buf);
+         }
+     }
   
       if (ir_vdw_switched(ir))
       {
@@@ -1425,7 -1448,7 +1447,7 @@@ int str_nelem(const char *str, int maxp
       int   np = 0;
       char *copy0, *copy;
   
- -    copy0 = strdup(str);
+ +    copy0 = gmx_strdup(str);
       copy  = copy0;
       ltrim(copy);
       while (*copy != '\0')
@@@ -1680,7 -1703,7 +1702,7 @@@ static void do_wall_params(t_inputrec *
           }
           for (i = 0; i < ir->nwall; i++)
           {
- -            opts->wall_atomtype[i] = strdup(names[i]);
+ +            opts->wall_atomtype[i] = gmx_strdup(names[i]);
           }
   
           if (ir->wall_type == ewt93 || ir->wall_type == ewt104)
@@@ -2343,7 -2366,7 +2365,7 @@@ void get_ir(const char *mdparin, const 
       {
           if (ir->efep != efepNO)
           {
- -            opts->couple_moltype = strdup(is->couple_moltype);
+ +            opts->couple_moltype = gmx_strdup(is->couple_moltype);
               if (opts->couple_lam0 == opts->couple_lam1)
               {
                   warning(wi, "The lambda=0 and lambda=1 states for coupling are identical");
@@@ -2399,6 -2422,22 +2421,22 @@@
           {
               do_simtemp_params(ir);
           }
+ 
+         /* Because sc-coul (=FALSE by default) only acts on the lambda state
+          * setup and not on the old way of specifying the free-energy setup,
+          * we should check for using soft-core when not needed, since that
+          * can complicate the sampling significantly.
+          * Note that we only check for the automated coupling setup.
+          * If the (advanced) user does FEP through manual topology changes,
+          * this check will not be triggered.
+          */
+         if (ir->efep != efepNO && ir->fepvals->n_lambda == 0 &&
+             ir->fepvals->sc_alpha != 0 &&
+             ((opts->couple_lam0 == ecouplamVDW  && opts->couple_lam0 == ecouplamVDWQ) ||
+              (opts->couple_lam1 == ecouplamVDWQ && opts->couple_lam1 == ecouplamVDW)))
+         {
+             warning(wi, "You are using soft-core interactions while the Van der Waals interactions are not decoupled (note that the sc-coul option is only active when using lambda states). Although this will not lead to errors, you will need much more sampling than without soft-core interactions. Consider using sc-alpha=0.");
+         }
       }
       else
       {
@@@ -2935,7 -2974,7 +2973,7 @@@ static void decode_cos(char *s, t_cosin
       double  a, phi;
       int     i;
   
- -    t = strdup(s);
+ +    t = gmx_strdup(s);
       trim(t);
   
       cosine->n   = 0;
@@@ -3116,7 -3155,7 +3154,7 @@@ static void make_swap_groups
   
   void make_IMD_group(t_IMD *IMDgroup, char *IMDgname, t_blocka *grps, char **gnames)
   {
- -    int      ig = -1, i;
+ +    int      ig, i;
   
   
       ig            = search_string(IMDgname, grps->nr, gnames);
@@@ -3823,15 -3862,7 +3861,15 @@@ static gmx_bool absolute_reference(t_in
                           case efbposresSPHERE:
                               AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
                               break;
+ +                        case efbposresCYLINDERX:
+ +                            AbsRef[YY] = AbsRef[ZZ] = 1;
+ +                            break;
+ +                        case efbposresCYLINDERY:
+ +                            AbsRef[XX] = AbsRef[ZZ] = 1;
+ +                            break;
                           case efbposresCYLINDER:
+ +                        /* efbposres is a synonym for efbposresCYLINDERZ for backwards compatibility */
+ +                        case efbposresCYLINDERZ:
                               AbsRef[XX] = AbsRef[YY] = 1;
                               break;
                           case efbposresX: /* d=XX */
diff --combined src/gromacs/gmxpreprocess/readpull.c

index 2e78022c5b1d32b148f31348616f69ab83af7e1a,4ae7480f0f32d3bb46c33ee5747e14bcbc81b1a1..e181244bf516fb04d18096bced06e5bf4dfdb890
--- 1/src/gromacs/gmxpreprocess/readpull.c
--- 2/src/gromacs/gmxpreprocess/readpull.c
+++ b/src/gromacs/gmxpreprocess/readpull.c
@@@ -34,24 -34,30 +34,24 @@@
    * To help us fund GROMACS development, we humbly ask that you cite
    * the research papers on the package. Check out http://www.gromacs.org.
    */
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
   
- -#include <string.h>
   #include <stdlib.h>
+ +#include <string.h>
   
+ +#include "gromacs/gmxpreprocess/readir.h"
+ +#include "gromacs/legacyheaders/macros.h"
+ +#include "gromacs/legacyheaders/mdatoms.h"
+ +#include "gromacs/legacyheaders/names.h"
+ +#include "gromacs/legacyheaders/readinp.h"
+ +#include "gromacs/legacyheaders/typedefs.h"
+ +#include "gromacs/math/vec.h"
+ +#include "gromacs/pbcutil/pbc.h"
+ +#include "gromacs/pulling/pull.h"
   #include "gromacs/utility/cstringutil.h"
- -#include "sysstuff.h"
- -#include "princ.h"
- -#include "gromacs/fileio/futil.h"
- -#include "vec.h"
+ +#include "gromacs/utility/fatalerror.h"
+ +#include "gromacs/utility/futil.h"
   #include "gromacs/utility/smalloc.h"
- -#include "typedefs.h"
- -#include "names.h"
- -#include "gmx_fatal.h"
- -#include "macros.h"
- -#include "index.h"
- -#include "symtab.h"
- -#include "readinp.h"
- -#include "readir.h"
- -#include "mdatoms.h"
- -#include "pbc.h"
- -#include "gromacs/pulling/pull.h"
   
   
   static char pulldim[STRLEN];
@@@ -204,7 -210,8 +204,8 @@@ char **read_pullparams(int *ninp_p, t_i
           nscan = sscanf(groups, "%d %d %d", &pcrd->group[0], &pcrd->group[1], &idum);
           if (nscan != 2)
           {
-             fprintf(stderr, "ERROR: %s should have %d components\n", buf, 2);
+             fprintf(stderr, "ERROR: %s should contain %d pull group indices\n",
+                     buf, 2);
               nerror++;
           }
           sprintf(buf, "pull-coord%d-origin", i);
@@@ -248,7 -255,7 +249,7 @@@ void make_pull_groups(t_pull *pull
   
           if (strcmp(pgnames[g], "") == 0)
           {
-             gmx_fatal(FARGS, "Group pull_group%d required by grompp was undefined.", g);
+             gmx_fatal(FARGS, "Pull option pull_group%d required by grompp has not been set.", g);
           }
   
           ig        = search_string(pgnames[g], grps->nr, gnames);
diff --combined src/gromacs/mdlib/clincs.cpp

index 57ff2d7b8fb86f5924a748bdba5f6f0a6d92725b,783ad72c8530158783b5eab8f7f3d647489c9f75..aa4e633f77e0729350f3b990760068ca7f8bafa8
--- 1/src/gromacs/mdlib/clincs.cpp
--- 2/src/gromacs/mdlib/clincs.c
+++ b/src/gromacs/mdlib/clincs.cpp
@@@ -35,28 -35,27 +35,28 @@@
    * the research papers on the package. Check out http://www.gromacs.org.
    */
   /* This file is completely threadsafe - keep it that way! */
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
   
   #include <math.h>
- -#include "main.h"
- -#include "constr.h"
- -#include "copyrite.h"
- -#include "physics.h"
- -#include "vec.h"
- -#include "pbc.h"
- -#include "gromacs/utility/smalloc.h"
- -#include "mdrun.h"
- -#include "nrnb.h"
- -#include "domdec.h"
- -#include "mtop_util.h"
- -#include "gmx_omp_nthreads.h"
+ +#include <stdlib.h>
   
+ +#include "gromacs/domdec/domdec.h"
   #include "gromacs/fileio/gmxfio.h"
- -#include "gmx_fatal.h"
+ +#include "gromacs/legacyheaders/constr.h"
+ +#include "gromacs/legacyheaders/copyrite.h"
+ +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+ +#include "gromacs/legacyheaders/mdrun.h"
+ +#include "gromacs/legacyheaders/nrnb.h"
+ +#include "gromacs/legacyheaders/types/commrec.h"
+ +#include "gromacs/math/units.h"
+ +#include "gromacs/math/vec.h"
+ +#include "gromacs/mdlib/bitmask.h"
+ +#include "gromacs/pbcutil/pbc.h"
+ +#include "gromacs/topology/block.h"
+ +#include "gromacs/topology/mtop_util.h"
+ +#include "gromacs/utility/fatalerror.h"
   #include "gromacs/utility/gmxomp.h"
+ +#include "gromacs/utility/smalloc.h"
   
   typedef struct {
       int    b0;         /* first constraint for this thread */
@@@ -67,6 -66,7 +67,7 @@@
       int   *ind_r;      /* constraint index for updating atom data */
       int    ind_nalloc; /* allocation size of ind and ind_r */
       tensor vir_r_m_dr; /* temporary variable for virial calculation */
+     real   dhdlambda;  /* temporary variable for lambda derivative */
   } lincs_thread_t;
   
   typedef struct gmx_lincsdata {
@@@ -97,7 -97,7 +98,7 @@@
       real           *bllen;        /* the reference bond length */
       int             nth;          /* The number of threads doing LINCS */
       lincs_thread_t *th;           /* LINCS thread division */
- -    unsigned       *atf;          /* atom flags for thread parallelization */
+ +    gmx_bitmask_t  *atf;          /* atom flags for thread parallelization */
       int             atf_nalloc;   /* allocation size of atf */
       /* arrays for temporary storage in the LINCS algorithm */
       rvec           *tmpv;
@@@ -360,11 -360,11 +361,11 @@@ static void lincs_update_atoms(struct g
   static void do_lincsp(rvec *x, rvec *f, rvec *fp, t_pbc *pbc,
                         struct gmx_lincsdata *lincsd, int th,
                         real *invmass,
-                       int econq, real *dvdlambda,
+                       int econq, gmx_bool bCalcDHDL,
                         gmx_bool bCalcVir, tensor rmdf)
   {
       int      b0, b1, b, i, j, k, n;
- -    real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, wfac, lam;
+ +    real     tmp0, tmp1, tmp2, mvb;
       rvec     dx;
       int     *bla, *blnr, *blbnb;
       rvec    *r;
@@@ -463,14 -463,17 +464,17 @@@
       lincs_update_atoms(lincsd, th, 1.0, sol, r,
                          (econq != econqForce) ? invmass : NULL, fp);
   
-     if (dvdlambda != NULL)
+     if (bCalcDHDL)
       {
- #pragma omp barrier
+         real dhdlambda;
+ 
+         dhdlambda = 0;
           for (b = b0; b < b1; b++)
           {
-             *dvdlambda -= sol[b]*lincsd->ddist[b];
+             dhdlambda -= sol[b]*lincsd->ddist[b];
           }
-         /* 10 ncons flops */
+ 
+         lincsd->th[th].dhdlambda = dhdlambda;
       }
   
       if (bCalcVir)
@@@ -499,13 -502,13 +503,13 @@@ static void do_lincs(rvec *x, rvec *xp
                        struct gmx_lincsdata *lincsd, int th,
                        real *invmass,
                        t_commrec *cr,
-                      gmx_bool bCalcLambda,
+                      gmx_bool bCalcDHDL,
                        real wangle, int *warn,
                        real invdt, rvec *v,
                        gmx_bool bCalcVir, tensor vir_r_m_dr)
   {
       int      b0, b1, b, i, j, k, n, iter;
- -    real     tmp0, tmp1, tmp2, im1, im2, mvb, rlen, len, len2, dlen2, wfac;
+ +    real     tmp0, tmp1, tmp2, mvb, rlen, len, len2, dlen2, wfac;
       rvec     dx;
       int     *bla, *blnr, *blbnb;
       rvec    *r;
@@@ -648,7 -651,6 +652,7 @@@
               dlen2 = 2*len2 - norm2(dx);
               if (dlen2 < wfac*len2 && (nlocat == NULL || nlocat[b]))
               {
+ +                /* not race free - see detailed comment in caller */
                   *warn = b;
               }
               if (dlen2 > 0)
@@@ -685,9 -687,9 +689,9 @@@
           /* 16 ncons flops */
       }
   
-     if (nlocat != NULL && bCalcLambda)
+     if (nlocat != NULL && (bCalcDHDL || bCalcVir))
       {
-         /* In lincs_update_atoms thread might cross-read mlambda */
+         /* In lincs_update_atoms threads might cross-read mlambda */
   #pragma omp barrier
   
           /* Only account for local atoms */
@@@ -697,6 -699,21 +701,21 @@@
           }
       }
   
+     if (bCalcDHDL)
+     {
+         real dhdl;
+ 
+         dhdl = 0;
+         for (b = b0; b < b1; b++)
+         {
+             /* Note that this this is dhdl*dt^2, the dt^2 factor is corrected
+              * later after the contributions are reduced over the threads.
+              */
+             dhdl -= lincsd->mlambda[b]*lincsd->ddist[b];
+         }
+         lincsd->th[th].dhdlambda = dhdl;
+     }
+ 
       if (bCalcVir)
       {
           /* Constraint virial */
@@@ -785,7 -802,7 +804,7 @@@ void set_lincs_matrix(struct gmx_lincsd
                               li->triangle[li->ntriangle] = i;
                               li->tri_bits[li->ntriangle] = 0;
                               li->ntriangle++;
- -                            if (li->blnr[i+1] - li->blnr[i] > sizeof(li->tri_bits[0])*8 - 1)
+ +                            if (li->blnr[i+1] - li->blnr[i] > static_cast<int>(sizeof(li->tri_bits[0])*8 - 1))
                               {
                                   gmx_fatal(FARGS, "A constraint is connected to %d constraints, this is more than the %d allowed for constraints participating in triangles",
                                             li->blnr[i+1] - li->blnr[i],
@@@ -1017,7 -1034,7 +1036,7 @@@ static void lincs_thread_setup(struct g
   {
       lincs_thread_t *li_m;
       int             th;
- -    unsigned       *atf;
+ +    gmx_bitmask_t  *atf;
       int             a;
   
       if (natoms > li->atf_nalloc)
@@@ -1030,12 -1047,7 +1049,12 @@@
       /* Clear the atom flags */
       for (a = 0; a < natoms; a++)
       {
- -        atf[a] = 0;
+ +        bitmask_clear(&atf[a]);
+ +    }
+ +
+ +    if (li->nth > BITMASK_SIZE)
+ +    {
+ +        gmx_fatal(FARGS, "More than %d threads is not supported for LINCS.", BITMASK_SIZE);
       }
   
       for (th = 0; th < li->nth; th++)
@@@ -1049,11 -1061,14 +1068,11 @@@
           li_th->b0 = (li->nc* th   )/li->nth;
           li_th->b1 = (li->nc*(th+1))/li->nth;
   
- -        if (th < sizeof(*atf)*8)
+ +        /* For each atom set a flag for constraints from each */
+ +        for (b = li_th->b0; b < li_th->b1; b++)
           {
- -            /* For each atom set a flag for constraints from each */
- -            for (b = li_th->b0; b < li_th->b1; b++)
- -            {
- -                atf[li->bla[b*2]  ] |= (1U<<th);
- -                atf[li->bla[b*2+1]] |= (1U<<th);
- -            }
+ +            bitmask_set_bit(&atf[li->bla[b*2]], th);
+ +            bitmask_set_bit(&atf[li->bla[b*2+1]], th);
           }
       }
   
@@@ -1061,7 -1076,7 +1080,7 @@@
       for (th = 0; th < li->nth; th++)
       {
           lincs_thread_t *li_th;
- -        unsigned        mask;
+ +        gmx_bitmask_t   mask;
           int             b;
   
           li_th = &li->th[th];
@@@ -1073,24 -1088,35 +1092,24 @@@
               srenew(li_th->ind_r, li_th->ind_nalloc);
           }
   
- -        if (th < sizeof(*atf)*8)
- -        {
- -            mask = (1U<<th) - 1U;
+ +        bitmask_init_low_bits(&mask, th);
   
- -            li_th->nind   = 0;
- -            li_th->nind_r = 0;
- -            for (b = li_th->b0; b < li_th->b1; b++)
+ +        li_th->nind   = 0;
+ +        li_th->nind_r = 0;
+ +        for (b = li_th->b0; b < li_th->b1; b++)
+ +        {
+ +            /* We let the constraint with the lowest thread index
+ +             * operate on atoms with constraints from multiple threads.
+ +             */
+ +            if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) &&
+ +                bitmask_is_disjoint(atf[li->bla[b*2+1]], mask))
               {
- -                /* We let the constraint with the lowest thread index
- -                 * operate on atoms with constraints from multiple threads.
- -                 */
- -                if (((atf[li->bla[b*2]]   & mask) == 0) &&
- -                    ((atf[li->bla[b*2+1]] & mask) == 0))
- -                {
- -                    /* Add the constraint to the local atom update index */
- -                    li_th->ind[li_th->nind++] = b;
- -                }
- -                else
- -                {
- -                    /* Add the constraint to the rest block */
- -                    li_th->ind_r[li_th->nind_r++] = b;
- -                }
+ +                /* Add the constraint to the local atom update index */
+ +                li_th->ind[li_th->nind++] = b;
               }
- -        }
- -        else
- -        {
- -            /* We are out of bits, assign all constraints to rest */
- -            for (b = li_th->b0; b < li_th->b1; b++)
+ +            else
               {
+ +                /* Add the constraint to the rest block */
                   li_th->ind_r[li_th->nind_r++] = b;
               }
           }
@@@ -1145,6 -1171,7 +1164,6 @@@ void set_lincs(t_idef *idef, t_mdatoms 
       int          i, k, ncc_alloc, ni, con, nconnect, concon;
       int          type, a1, a2;
       real         lenA = 0, lenB;
- -    gmx_bool     bLocal;
   
       li->nc  = 0;
       li->ncc = 0;
@@@ -1233,6 -1260,7 +1252,6 @@@
       li->blnr[con] = nconnect;
       for (i = 0; i < ni; i++)
       {
- -        bLocal = TRUE;
           type   = iatom[3*i];
           a1     = iatom[3*i+1];
           a2     = iatom[3*i+2];
@@@ -1462,14 -1490,21 +1481,21 @@@ gmx_bool constrain_lincs(FILE *fplog, g
                            t_nrnb *nrnb,
                            int maxwarn, int *warncount)
   {
+     gmx_bool  bCalcDHDL;
       char      buf[STRLEN], buf2[22], buf3[STRLEN];
- -    int       i, warn, p_imax, error;
+ +    int       i, warn, p_imax;
       real      ncons_loc, p_ssd, p_max = 0;
       rvec      dx;
       gmx_bool  bOK;
   
       bOK = TRUE;
   
+     /* This boolean should be set by a flag passed to this routine.
+      * We can also easily check if any constraint length is changed,
+      * if not dH/dlambda=0 and we can also set the boolean to FALSE.
+      */
+     bCalcDHDL = (ir->efep != efepNO && dvdlambda != NULL);
+ 
       if (lincsd->nc == 0 && cr->dd == NULL)
       {
           if (bLog || bEner)
@@@ -1491,6 -1526,9 +1517,9 @@@
   
       if (econq == econqCoord)
       {
+         /* We can't use bCalcDHDL here, since NULL can be passed for dvdlambda
+          * also with efep!=fepNO.
+          */
           if (ir->efep != efepNO)
           {
               if (md->nMassPerturbed && lincsd->matlam != md->lambda)
@@@ -1553,25 -1591,12 +1582,12 @@@
   
               do_lincs(x, xprime, box, pbc, lincsd, th,
                        md->invmass, cr,
-                      bCalcVir || (ir->efep != efepNO),
+                      bCalcDHDL,
                        ir->LincsWarnAngle, &warn,
                        invdt, v, bCalcVir,
                        th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
           }
   
-         if (ir->efep != efepNO)
-         {
-             real dt_2, dvdl = 0;
- 
-             /* TODO This should probably use invdt, so that sd integrator scaling works properly */
-             dt_2 = 1.0/(ir->delta_t*ir->delta_t);
-             for (i = 0; (i < lincsd->nc); i++)
-             {
-                 dvdl -= lincsd->mlambda[i]*dt_2*lincsd->ddist[i];
-             }
-             *dvdlambda += dvdl;
-         }
- 
           if (bLog && fplog && lincsd->nc > 0)
           {
               fprintf(fplog, "   Rel. Constraint Deviation:  RMS         MAX     between atoms\n");
@@@ -1664,11 -1689,30 +1680,31 @@@
               int th = gmx_omp_get_thread_num();
   
               do_lincsp(x, xprime, min_proj, pbc, lincsd, th,
-                       md->invmass, econq, ir->efep != efepNO ? dvdlambda : NULL,
+                       md->invmass, econq, bCalcDHDL,
                         bCalcVir, th == 0 ? vir_r_m_dr : lincsd->th[th].vir_r_m_dr);
           }
       }
   
+     if (bCalcDHDL)
+     {
+         /* Reduce the dH/dlambda contributions over the threads */
+         real dhdlambda;
+         int  th;
+ 
+         dhdlambda = 0;
+         for (th = 0; th < lincsd->nth; th++)
+         {
+             dhdlambda += lincsd->th[th].dhdlambda;
+         }
+         if (econqCoord)
+         {
+             /* dhdlambda contains dH/dlambda*dt^2, correct for this */
++            /* TODO This should probably use invdt, so that sd integrator scaling works properly */
+             dhdlambda /= ir->delta_t*ir->delta_t;
+         }
+         *dvdlambda += dhdlambda;
+     }
+ 
       if (bCalcVir && lincsd->nth > 1)
       {
           for (i = 1; i < lincsd->nth; i++)
diff --combined src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h

index 146c95855be950c76df0f85aa910b34dd6994de0,7b4bc4f524eac0e8bfa8e03b1b35208291004842..70d6edff67ce522c8c678f1f7a8cab28bd5163e7
--- 1/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
--- 2/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h
@@@ -32,17 -32,14 +32,17 @@@
    * To help us fund GROMACS development, we humbly ask that you cite
    * the research papers on the package. Check out http://www.gromacs.org.
    */
+ +#include "gromacs/mdlib/nbnxn_consts.h"
+ +#include "gromacs/pbcutil/ishift.h"
   #include "gromacs/simd/simd.h"
   #include "gromacs/simd/simd_math.h"
   #include "gromacs/simd/vector_operations.h"
- -#include "../../nbnxn_consts.h"
   #ifdef CALC_COUL_EWALD
   #include "gromacs/math/utilities.h"
   #endif
   
+ +#include "config.h"
+ +
   #ifndef GMX_SIMD_J_UNROLL_SIZE
   #error "Need to define GMX_SIMD_J_UNROLL_SIZE before including the 4xn kernel common header file"
   #endif
@@@ -57,15 -54,14 +57,14 @@@
   #define STRIDE     (UNROLLI)
   #endif
   
- -#include "../nbnxn_kernel_simd_utils.h"
+ +#include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h"
   
   static gmx_inline void gmx_simdcall
- gmx_load_simd_4xn_interactions(int gmx_unused             excl,
+ gmx_load_simd_4xn_interactions(int                        excl,
                                  gmx_exclfilter gmx_unused  filter_S0,
                                  gmx_exclfilter gmx_unused  filter_S1,
                                  gmx_exclfilter gmx_unused  filter_S2,
                                  gmx_exclfilter gmx_unused  filter_S3,
-                                const char gmx_unused     *interaction_mask_indices,
                                  real gmx_unused           *simd_interaction_array,
                                  gmx_simd_bool_t           *interact_S0,
                                  gmx_simd_bool_t           *interact_S1,
@@@ -79,13 -75,14 +78,14 @@@
       *interact_S1  = gmx_checkbitmask_pb(mask_pr_S, filter_S1);
       *interact_S2  = gmx_checkbitmask_pb(mask_pr_S, filter_S2);
       *interact_S3  = gmx_checkbitmask_pb(mask_pr_S, filter_S3);
- #endif
- #ifdef GMX_SIMD_IBM_QPX
+ #elif defined GMX_SIMD_IBM_QPX
       const int size = GMX_SIMD_REAL_WIDTH * sizeof(real);
-     *interact_S0  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[0], simd_interaction_array);
-     *interact_S1  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[1], simd_interaction_array);
-     *interact_S2  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[2], simd_interaction_array);
-     *interact_S3  = gmx_load_interaction_mask_pb(size*interaction_mask_indices[3], simd_interaction_array);
+     *interact_S0  = gmx_load_interaction_mask_pb(size*((excl >> (0 * UNROLLJ)) & 0xF), simd_interaction_array);
+     *interact_S1  = gmx_load_interaction_mask_pb(size*((excl >> (1 * UNROLLJ)) & 0xF), simd_interaction_array);
+     *interact_S2  = gmx_load_interaction_mask_pb(size*((excl >> (2 * UNROLLJ)) & 0xF), simd_interaction_array);
+     *interact_S3  = gmx_load_interaction_mask_pb(size*((excl >> (3 * UNROLLJ)) & 0xF), simd_interaction_array);
+ #else
+ #error "Need implementation of gmx_load_simd_4xn_interactions"
   #endif
   }
   
diff --combined src/gromacs/mdlib/nbnxn_pairlist.h

index 4558266bc39a2c899a9ad0c763ee9edea58510b8,f89a418ce38db08c05f548d3337aca3b3bf6f052..09d429a27d79ce456d69d0933ff5a038f3ec941d
--- 1/src/gromacs/mdlib/nbnxn_pairlist.h
--- 2/src/gromacs/legacyheaders/types/nbnxn_pairlist.h
+++ b/src/gromacs/mdlib/nbnxn_pairlist.h
@@@ -36,15 -36,7 +36,15 @@@
   #ifndef _nbnxn_pairlist_h
   #define _nbnxn_pairlist_h
   
- -#include "nblist.h"
+ +#include <stddef.h>
+ +
+ +#include "thread_mpi/atomic.h"
+ +
+ +#include "gromacs/legacyheaders/types/nblist.h"
+ +#include "gromacs/math/vectypes.h"
+ +#include "gromacs/mdlib/bitmask.h"
+ +#include "gromacs/utility/basedefinitions.h"
+ +#include "gromacs/utility/real.h"
   
   #ifdef __cplusplus
   extern "C" {
@@@ -83,8 -75,6 +83,6 @@@ typedef void nbnxn_free_t (void *ptr)
   typedef struct {
       int          cj;    /* The j-cluster                    */
       unsigned int excl;  /* The exclusion (interaction) bits */
-     /* Indices into the arrays of SIMD interaction masks. */
-     char         interaction_mask_indices[4];
   } nbnxn_cj_t;
   
   /* In nbnxn_ci_t the integer shift contains the shift in the lower 7 bits.
@@@ -132,7 -122,7 +130,7 @@@ typedef struct 
                               */
   } nbnxn_excl_t;
   
- -typedef struct {
+ +typedef struct nbnxn_pairlist_t {
       gmx_cache_protect_t cp0;
   
       nbnxn_alloc_t      *alloc;
@@@ -208,16 -198,21 +206,16 @@@ typedef struct 
   #define NBNXN_BUFFERFLAG_SIZE  16
   #endif
   
- -/* We currently store the reduction flags as bits in an unsigned int.
- - * In most cases this limits the number of flags to 32.
- - * The reduction will automatically disable the flagging and do a full
- - * reduction when the flags won't fit, but this will lead to very slow
- - * reduction. As we anyhow don't expect reasonable performance with
- - * more than 32 threads, we put in this hard limit.
- - * You can increase this number, but the reduction will be very slow.
+ +/* We store the reduction flags as gmx_bitmask_t.
+ + * This limits the number of flags to BITMASK_SIZE.
    */
- -#define NBNXN_BUFFERFLAG_MAX_THREADS  32
+ +#define NBNXN_BUFFERFLAG_MAX_THREADS  (BITMASK_SIZE)
   
   /* Flags for telling if threads write to force output buffers */
   typedef struct {
- -    int           nflag;       /* The number of flag blocks                         */
- -    unsigned int *flag;        /* Bit i is set when thread i writes to a cell-block */
- -    int           flag_nalloc; /* Allocation size of cxy_flag                       */
+ +    int               nflag;       /* The number of flag blocks                         */
+ +    gmx_bitmask_t    *flag;        /* Bit i is set when thread i writes to a cell-block */
+ +    int               flag_nalloc; /* Allocation size of cxy_flag                       */
   } nbnxn_buffer_flags_t;
   
   /* LJ combination rules: geometric, Lorentz-Berthelot, none */
@@@ -225,7 -220,10 +223,7 @@@ enum 
       ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
   };
   
- -/* TODO: Remove need for forward declare */
- -struct tMPI_Atomic;
- -
- -typedef struct {
+ +typedef struct nbnxn_atomdata_t {
       nbnxn_alloc_t           *alloc;
       nbnxn_free_t            *free;
       int                      ntype;           /* The number of different atom types                 */
@@@ -269,7 -267,7 +267,7 @@@
       gmx_bool                 bUseBufferFlags;        /* Use the flags or operate on all atoms     */
       nbnxn_buffer_flags_t     buffer_flags;           /* Flags for buffer zeroing+reduc.  */
       gmx_bool                 bUseTreeReduce;         /* Use tree for force reduction */
- -    struct tMPI_Atomic      *syncStep;               /* Synchronization step for tree reduce */
+ +    tMPI_Atomic_t           *syncStep;               /* Synchronization step for tree reduce */
   } nbnxn_atomdata_t;
   
   #ifdef __cplusplus
diff --combined src/gromacs/mdlib/nbnxn_search.c

index 856ae32d0f0d8c00a97f736ab14d5d66b64026d1,d201407bfda9af23daef7e4e4dffea23951f2f0c..f7c27be2f28fd8f310acd44252ceb5ba9527a79b
--- 1/src/gromacs/mdlib/nbnxn_search.c
--- 2/src/gromacs/mdlib/nbnxn_search.c
+++ b/src/gromacs/mdlib/nbnxn_search.c
@@@ -33,35 -33,34 +33,33 @@@
    * the research papers on the package. Check out http://www.gromacs.org.
    */
   
- -#ifdef HAVE_CONFIG_H
- -#include <config.h>
- -#endif
+ +#include "gmxpre.h"
+ +
+ +#include "nbnxn_search.h"
   
- #include "config.h"
- 
+ +#include <assert.h>
   #include <math.h>
   #include <string.h>
- -#include <assert.h>
   
- -#include "sysstuff.h"
- -#include "gromacs/utility/smalloc.h"
- -#include "types/commrec.h"
- -#include "macros.h"
+ +#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
+ +#include "gromacs/legacyheaders/macros.h"
+ +#include "gromacs/legacyheaders/nrnb.h"
+ +#include "gromacs/legacyheaders/ns.h"
+ +#include "gromacs/legacyheaders/types/commrec.h"
   #include "gromacs/math/utilities.h"
- -#include "vec.h"
- -#include "pbc.h"
- -#include "nbnxn_consts.h"
+ +#include "gromacs/math/vec.h"
+ +#include "gromacs/mdlib/nb_verlet.h"
+ +#include "gromacs/mdlib/nbnxn_atomdata.h"
+ +#include "gromacs/mdlib/nbnxn_consts.h"
+ +#include "gromacs/pbcutil/ishift.h"
+ +#include "gromacs/pbcutil/pbc.h"
+ +#include "gromacs/utility/smalloc.h"
+ +
   /* nbnxn_internal.h included gromacs/simd/macros.h */
- -#include "nbnxn_internal.h"
- -#ifdef GMX_NBNXN_SIMD
+ +#include "gromacs/mdlib/nbnxn_internal.h"
+ +#ifdef GMX_SIMD
   #include "gromacs/simd/vector_operations.h"
   #endif
- -#include "nbnxn_atomdata.h"
- -#include "nbnxn_search.h"
- -#include "gmx_omp_nthreads.h"
- -#include "nrnb.h"
- -#include "ns.h"
- -
- -#include "gromacs/fileio/gmxfio.h"
   
   #ifdef NBNXN_SEARCH_BB_SIMD4
   /* Always use 4-wide SIMD for bounding box calculations */
@@@ -1382,13 -1381,14 +1380,13 @@@ static void sort_columns_supersub(cons
                                     int cxy_start, int cxy_end,
                                     int *sort_work)
   {
- -    int  cxy;
- -    int  cx, cy, cz = -1, c = -1, ncz;
- -    int  na, ash, na_c, ind, a;
- -    int  subdiv_z, sub_z, na_z, ash_z;
- -    int  subdiv_y, sub_y, na_y, ash_y;
- -    int  subdiv_x, sub_x, na_x, ash_x;
+ +    int        cxy;
+ +    int        cx, cy, cz = -1, c = -1, ncz;
+ +    int        na, ash, na_c, ind, a;
+ +    int        subdiv_z, sub_z, na_z, ash_z;
+ +    int        subdiv_y, sub_y, na_y, ash_y;
+ +    int        subdiv_x, sub_x, na_x, ash_x;
   
- -    /* cppcheck-suppress unassignedVariable */
       nbnxn_bb_t bb_work_array[2], *bb_work_aligned;
   
       bb_work_aligned = (nbnxn_bb_t *)(((size_t)(bb_work_array+1)) & (~((size_t)15)));
@@@ -1766,7 -1766,7 +1764,7 @@@ static void init_buffer_flags(nbnxn_buf
       }
       for (b = 0; b < flags->nflag; b++)
       {
- -        flags->flag[b] = 0;
+ +        bitmask_clear(&(flags->flag[b]));
       }
   }
   
@@@ -2943,10 -2943,10 +2941,10 @@@ static void make_cluster_list_simple(co
   }
   
   #ifdef GMX_NBNXN_SIMD_4XN
- -#include "nbnxn_search_simd_4xn.h"
+ +#include "gromacs/mdlib/nbnxn_search_simd_4xn.h"
   #endif
   #ifdef GMX_NBNXN_SIMD_2XNN
- -#include "nbnxn_search_simd_2xnn.h"
+ +#include "gromacs/mdlib/nbnxn_search_simd_2xnn.h"
   #endif
   
   /* Plain C or SIMD4 code for making a pair list of super-cell sci vs scj.
@@@ -3258,12 -3258,6 +3256,6 @@@ static void set_ci_top_excls(const nbnx
                           inner_e = ge - (se << na_cj_2log);
   
                           nbl->cj[found].excl &= ~(1U<<((inner_i<<na_cj_2log) + inner_e));
- /* The next code line is usually not needed. We do not want to version
-  * away the above line, because there is logic that relies on being
-  * able to detect easily whether any exclusions exist. */
- #if (defined GMX_SIMD_IBM_QPX)
-                         nbl->cj[found].interaction_mask_indices[inner_i] &= ~(1U << inner_e);
- #endif
                       }
                   }
               }
@@@ -4845,7 -4839,7 +4837,7 @@@ static void nbnxn_make_pairlist_part(co
       int               ndistc;
       int               ncpcheck;
       int               gridi_flag_shift = 0, gridj_flag_shift = 0;
- -    unsigned int     *gridj_flag       = NULL;
+ +    gmx_bitmask_t    *gridj_flag       = NULL;
       int               ncj_old_i, ncj_old_j;
   
       nbs_cycle_start(&work->cc[enbsCCsearch]);
@@@ -5351,7 -5345,7 +5343,7 @@@
                                           cbl = nbl->cj[nbl->ncj-1].cj >> gridj_flag_shift;
                                           for (cb = cbf; cb <= cbl; cb++)
                                           {
- -                                            gridj_flag[cb] = 1U<<th;
+ +                                            bitmask_init_bit(&gridj_flag[cb], th);
                                           }
                                       }
                                   }
@@@ -5416,7 -5410,7 +5408,7 @@@
   
           if (bFBufferFlag && nbl->ncj > ncj_old_i)
           {
- -            work->buffer_flags.flag[(gridi->cell0+ci)>>gridi_flag_shift] = 1U<<th;
+ +            bitmask_init_bit(&(work->buffer_flags.flag[(gridi->cell0+ci)>>gridi_flag_shift]), th);
           }
       }
   
@@@ -5450,8 -5444,8 +5442,8 @@@ static void reduce_buffer_flags(const n
                                   int                         nsrc,
                                   const nbnxn_buffer_flags_t *dest)
   {
- -    int                 s, b;
- -    const unsigned int *flag;
+ +    int            s, b;
+ +    gmx_bitmask_t *flag;
   
       for (s = 0; s < nsrc; s++)
       {
@@@ -5459,35 -5453,33 +5451,35 @@@
   
           for (b = 0; b < dest->nflag; b++)
           {
- -            dest->flag[b] |= flag[b];
+ +            bitmask_union(&(dest->flag[b]), flag[b]);
           }
       }
   }
   
   static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout)
   {
- -    int nelem, nkeep, ncopy, nred, b, c, out;
+ +    int           nelem, nkeep, ncopy, nred, b, c, out;
+ +    gmx_bitmask_t mask_0;
   
       nelem = 0;
       nkeep = 0;
       ncopy = 0;
       nred  = 0;
+ +    bitmask_init_bit(&mask_0, 0);
       for (b = 0; b < flags->nflag; b++)
       {
- -        if (flags->flag[b] == 1)
+ +        if (bitmask_is_equal(flags->flag[b], mask_0))
           {
               /* Only flag 0 is set, no copy of reduction required */
               nelem++;
               nkeep++;
           }
- -        else if (flags->flag[b] > 0)
+ +        else if (!bitmask_is_zero(flags->flag[b]))
           {
               c = 0;
               for (out = 0; out < nout; out++)
               {
- -                if (flags->flag[b] & (1U<<out))
+ +                if (bitmask_is_set(flags->flag[b], out))
                   {
                       c++;
                   }
diff --combined src/gromacs/mdlib/nbnxn_search_simd_4xn.h

index 33a307b58456ab8080b37e6c0309915d0f148217,8a328404badb64065b5791bef66e029bb0de6868..4e1c15a0ee65c8b19e4bc59d12345db7971d06f4
--- 1/src/gromacs/mdlib/nbnxn_search_simd_4xn.h
--- 2/src/gromacs/mdlib/nbnxn_search_simd_4xn.h
+++ b/src/gromacs/mdlib/nbnxn_search_simd_4xn.h
@@@ -33,8 -33,7 +33,6 @@@
    * the research papers on the package. Check out http://www.gromacs.org.
    */
   
- #include "config.h"
--
   #if GMX_SIMD_REAL_WIDTH >= NBNXN_CPU_CLUSTER_I_SIZE
   #define STRIDE_S  (GMX_SIMD_REAL_WIDTH)
   #else
@@@ -111,7 -110,6 +109,7 @@@ make_cluster_list_simd_4xn(const nbnxn_
       float                              d2;
       int                                xind_f, xind_l, cj;
   
+ +    /* cppcheck-suppress selfAssignment . selfAssignment for width 4.*/
       cjf = CI_TO_CJ_SIMD_4XN(cjf);
       cjl = CI_TO_CJ_SIMD_4XN(cjl+1) - 1;
   
@@@ -265,12 -263,6 +263,6 @@@
               /* Store cj and the interaction mask */
               nbl->cj[nbl->ncj].cj   = CI_TO_CJ_SIMD_4XN(gridj->cell0) + cj;
               nbl->cj[nbl->ncj].excl = get_imask_simd_4xn(remove_sub_diag, ci, cj);
- #ifdef GMX_SIMD_IBM_QPX
-             nbl->cj[nbl->ncj].interaction_mask_indices[0] = (nbl->cj[nbl->ncj].excl & 0x000F) >> (0 * 4);
-             nbl->cj[nbl->ncj].interaction_mask_indices[1] = (nbl->cj[nbl->ncj].excl & 0x00F0) >> (1 * 4);
-             nbl->cj[nbl->ncj].interaction_mask_indices[2] = (nbl->cj[nbl->ncj].excl & 0x0F00) >> (2 * 4);
-             nbl->cj[nbl->ncj].interaction_mask_indices[3] = (nbl->cj[nbl->ncj].excl & 0xF000) >> (3 * 4);
- #endif
               nbl->ncj++;
           }
           /* Increase the closing index in i super-cell list */
author	Roland Schulz <roland@utk.edu>
	Fri, 5 Dec 2014 23:31:17 +0000 (18:31 -0500)
committer	Roland Schulz <roland@utk.edu>
	Sun, 7 Dec 2014 00:08:19 +0000 (19:08 -0500)
		1	2
docs/doxygen/suppressions.txt	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/domdec/domdec.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gmx_thread_affinity.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/readir.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/readpull.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/clincs.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_common.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_pairlist.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_search.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/nbnxn_search_simd_4xn.h	patch \|	diff1 \|	diff2 \|	blob \| history