From: Roland Schulz <roland@utk.edu>
Date: Sun, 11 Dec 2011 20:17:43 +0000 (-0500)
Subject: Merge gromacs-4-6 into master
X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=6d7620f75d5b647770cd36740816008c82956ca2;p=alexxy%2Fgromacs.git

Merge gromacs-4-6 into master

Conflicts:
	CMakeLists.txt
	README
	bootstrap
	share/template/.gitignore
	src/gmxlib/.gitignore
	src/gmxlib/CMakeLists.txt
	src/kernel/.gitignore
	src/ngmx/.gitignore
	src/tools/.gitignore
	src/tools/CMakeLists.txt
	src/tools/gmx_dos.c
	src/tools/gmx_energy.c
	src/gromacs/CMakeLists.txt

Change-Id: I18a484ef470a8af9fc0eb80d086eb80f45de2d2c
---

6d7620f75d5b647770cd36740816008c82956ca2
diff --cc src/gromacs/CMakeLists.txt
index f87ebf868b,0000000000..b8310c0761
mode 100644,000000..100644
--- a/src/gromacs/CMakeLists.txt
+++ b/src/gromacs/CMakeLists.txt
@@@ -1,86 -1,0 +1,86 @@@
 +set(LIBGROMACS_SOURCES)
 +
 +add_subdirectory(legacyheaders)
 +add_subdirectory(gmxlib)
 +add_subdirectory(mdlib)
 +add_subdirectory(gmxpreprocess)
 +add_subdirectory(analysisdata)
 +add_subdirectory(fatalerror)
 +add_subdirectory(options)
 +add_subdirectory(selection)
 +add_subdirectory(trajectoryanalysis)
 +add_subdirectory(utility)
 +
 +file(GLOB LIBGROMACS_HEADERS *.h)
 +install(FILES ${LIBGROMACS_HEADERS} DESTINATION ${INCL_INSTALL_DIR}/gromacs
 +        COMPONENT development)
 +
 +# only fiddle with assembly kernels if we're not doing OpenMM build
 +if(NOT GMX_OPENMM) 
 +if(GMX_ASM_USEASM-NASM)
 +  enable_language(ASM-NASM)
 +  # if NASM is used, we need a special build command for windows...
 +  FOREACH(SRC ${GMX_SSEKERNEL_ASM_SRC})
 +    GET_FILENAME_COMPONENT(FILE_BASE ${SRC} NAME_WE)
 +    SET(OBJ ${CMAKE_CURRENT_BINARY_DIR}/${FILE_BASE}${CMAKE_C_OUTPUT_EXTENSION})
 +
 +    ADD_CUSTOM_COMMAND(OUTPUT ${OBJ}
 +                       MAIN_DEPENDENCY ${SRC}
 +                       COMMAND ${CMAKE_ASM-NASM_COMPILER} -f ${CMAKE_ASM-NASM_OBJECT_FORMAT} -o ${OBJ} ${SRC})
 +
 +    SET(ALL_ASM_OBJS ${ALL_ASM_OBJS} ${OBJ})
 +  ENDFOREACH(SRC ${GMX_SSEKERNEL_ASM_SRC})
 +  set(GMX_SSEKERNEL_ASM_SRC ${ALL_ASM_OBJS})
 +else(GMX_ASM_USEASM-NASM)
 +  enable_language(ASM-ATT)
 +  SET(CMAKE_ASM-ATT_COMPILER ${CMAKE_C_COMPILER})
 +  if(GMX_IA32_ASM)
 +    set_source_files_properties(${GMX_SSEKERNEL_ASM_SRC} PROPERTIES COMPILE_FLAGS "-c -m32")
 +  else()
 +    set_source_files_properties(${GMX_SSEKERNEL_ASM_SRC} PROPERTIES COMPILE_FLAGS "-c -m64")
 +  endif()
 +endif(GMX_ASM_USEASM-NASM)
 +endif(NOT GMX_OPENMM)
 +
 +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${GMX_SSEKERNEL_ASM_SRC} ${MDLIB_SOURCES})
 +
 +# add target that generates version.c every time a make is run
 +# only do this if we generate the version
 +if (USE_VERSION_H)
 +    add_custom_target(gmx_version ALL
 +            COMMAND ${CMAKE_COMMAND} 
-                 -D Git_EXECUTABLE="${Git_EXECUTABLE}"
-                 -D Git_VERSION="${Git_VERSION}"
++                -D GIT_EXECUTABLE="${GIT_EXECUTABLE}"
++                -D GIT_VERSION="${GIT_VERSION}"
 +                -D PROJECT_VERSION="${PROJECT_VERSION}"
 +                -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}"
 +                -D VERSION_C_CMAKEIN="${CMAKE_CURRENT_SOURCE_DIR}/version.c.cmakein"
 +                -D VERSION_C_OUT="${CMAKE_CURRENT_BINARY_DIR}/version.c"
 +                -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake 
 +            WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/gmxlib 
 +            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/version.c.cmakein
 +            COMMENT "Generating version information")
 +    list(APPEND LIBGROMACS_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/version.c) # auto-generated
 +    set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/version.c 
 +                                PROPERTIES GENERATED true)
 +endif (USE_VERSION_H)
 +
 +add_library(libgromacs ${LIBGROMACS_SOURCES})
 +if (USE_VERSION_H)
 +    add_dependencies(libgromacs gmx_version)
 +endif (USE_VERSION_H)
 +target_link_libraries(libgromacs
 +                      ${GMX_EXTRA_LIBRARIES} ${FFT_LIBRARIES} ${XML_LIBRARIES}
 +                      ${THREAD_LIB})
 +set_target_properties(libgromacs PROPERTIES
 +                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
 +                      SOVERSION ${SOVERSION}
 +                      INSTALL_NAME_DIR "${LIB_INSTALL_DIR}")
 +
 +install(TARGETS libgromacs DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
 +
 +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein
 +               ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY)
 +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc
 +        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
 +        RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc"
 +        COMPONENT development)
diff --cc src/gromacs/gmxlib/names.c
index a49e360836,0000000000..470a0c17c9
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/names.c
+++ b/src/gromacs/gmxlib/names.c
@@@ -1,219 -1,0 +1,219 @@@
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "names.h"
 +
 +/* note: these arrays should correspond to enums in include/types/enums.h */
 +
 +const char *epbc_names[epbcNR+1]=
 +{
 +  "xyz", "no", "xy", "screw", NULL
 +};
 +
 +const char *ens_names[ensNR+1]=
 +{
 +  "Grid","Simple", NULL
 +};
 +
 +const char *ei_names[eiNR+1]=
 +{
 +  "md", "steep", "cg", "bd", "sd", "nm", "l-bfgs", "tpi", "tpic", "sd1", "md-vv", "md-vv-avek",NULL 
 +};
 +
 +const char *bool_names[BOOL_NR+1]=
 +{
 +  "FALSE","TRUE", NULL
 +};
 +
 +const char *yesno_names[BOOL_NR+1]=
 +{
 +  "no","yes", NULL
 +};
 +
 +const char *ptype_str[eptNR+1] = {
 +  "Atom", "Nucleus", "Shell", "Bond", "VSite", NULL
 +};
 +
 +const char *eel_names[eelNR+1] = {
 +  "Cut-off", "Reaction-Field", "Generalized-Reaction-Field",
 +  "PME", "Ewald", "PPPM", "Poisson", "Switch", "Shift", "User", 
 +  "Generalized-Born", "Reaction-Field-nec", "Encad-shift", 
 +  "PME-User", "PME-Switch", "PME-User-Switch", 
 +  "Reaction-Field-zero", NULL
 +};
 +
 +const char *eewg_names[eewgNR+1] = {
 +  "3d", "3dc", NULL
 +};
 +
 +const char *evdw_names[evdwNR+1] = {
 +  "Cut-off", "Switch", "Shift", "User", "Encad-shift", NULL
 +};
 +
 +const char *econstr_names[econtNR+1] = {
 +  "Lincs", "Shake", NULL
 +};
 +
 +const char *egrp_nm[egNR+1] = { 
 +  "Coul-SR","LJ-SR","Buck-SR", "Coul-LR", "LJ-LR", "Buck-LR",
 +  "Coul-14", "LJ-14", NULL
 +};
 +
 +const char *etcoupl_names[etcNR+1] = {
 +  "No", "Berendsen", "Nose-Hoover", "yes", "Andersen", "Andersen-interval", "V-rescale", NULL
 +}; /* yes is alias for berendsen */
 +
 +const char *epcoupl_names[epcNR+1] = {
 +  "No", "Berendsen", "Parrinello-Rahman", "Isotropic", "MTTK", NULL
 +}; /* isotropic is alias for berendsen */
 +
 +const char *epcoupltype_names[epctNR+1] = {
 +  "Isotropic", "Semiisotropic", "Anisotropic", "Surface-Tension", NULL
 +};
 +
 +const char *erefscaling_names[erscNR+1] = {
 +  "No", "All", "COM", NULL
 +};
 +
 +const char *edisre_names[edrNR+1] = {
 +  "No", "Simple", "Ensemble", NULL
 +};
 +
 +const char *edisreweighting_names[edrwNR+1] = {
 +  "Conservative", "Equal", NULL
 +};
 +
 +const char *enbf_names[eNBF_NR+1] = {
 +  "", "LJ", "Buckingham", NULL
 +};
 +
 +const char *ecomb_names[eCOMB_NR+1] = {
 +  "", "Geometric", "Arithmetic", "GeomSigEps", NULL
 +};
 +
 +const char *gtypes[egcNR+1] = {
 +  "T-Coupling", "Energy Mon.", "Acceleration", "Freeze",
 +  "User1", "User2", "VCM", "XTC", "Or. Res. Fit", "QMMM", NULL
 +};
 +
 +const char *efep_names[efepNR+1] = {
 +  "no", "yes", NULL
 +};
 +
 +const char *separate_dhdl_file_names[sepdhdlfileNR+1] = {
 +  "yes", "no", NULL
 +};
 +
 +const char *dhdl_derivatives_names[dhdlderivativesNR+1] = {
 +  "yes", "no", NULL
 +};
 +
 +const char *esol_names[esolNR+1] = {
 +  "No", "SPC", "TIP4p", NULL
 +};
 +
 +const char *enlist_names[enlistNR+1] = {
 +  "Atom-Atom", "SPC-Atom", "SPC-SPC", "TIP4p-Atom", "TIP4p-TIP4p", "CG-CG", NULL
 +};
 +
 +const char *edispc_names[edispcNR+1] = {
 +  "No", "EnerPres", "Ener", "AllEnerPres", "AllEner", NULL
 +};
 +
 +const char *ecm_names[ecmNR+1] = { 
 +  "Linear", "Angular", "None", NULL 
 +};
 +
 +const char *eann_names[eannNR+1] = {
 +  "No", "Single", "Periodic", NULL
 +};
 +
 +const char *eis_names[eisNR+1] = {
 +	"No", "GBSA", NULL
 +};
 +
 +const char *egb_names[egbNR+1] = {
 +  "Still", "HCT", "OBC", NULL
 +};
 +
 +const char *esa_names[esaNR+1] = {
 +  "Ace-approximation", "None", "Still", NULL
 +};
 +
 +const char *ewt_names[ewtNR+1] = {
 +  "9-3", "10-4", "table", "12-6", NULL
 +};
 +
 +const char *epull_names[epullNR+1] = { 
-   "no", "umbrella", "constraint", "constant_force", NULL
++  "no", "umbrella", "constraint", "constant-force", NULL
 +};
 +
 +const char *epullg_names[epullgNR+1] = { 
-   "distance", "direction", "cylinder", "position", "direction_periodic", NULL
++  "distance", "direction", "cylinder", "position", "direction-periodic", NULL
 +};
 +
 +const char *erotg_names[erotgNR+1] = { 
 +  "iso", "iso-pf", "pm", "pm-pf", "rm", "rm-pf", "rm2", "rm2-pf", "flex", "flex-t", "flex2", "flex2-t", NULL
 +};
 +
 +const char *erotg_fitnames[erotgFitNR+1] = { 
 +  "rmsd", "norm", "potential", NULL
 +};
 +
 +const char *eQMmethod_names[eQMmethodNR+1] = {
 +  "AM1", "PM3", "RHF",
 +  "UHF", "DFT", "B3LYP", "MP2", "CASSCF","B3LYPLAN",
 +  "DIRECT", NULL
 +};
 +
 +const char *eQMbasis_names[eQMbasisNR+1] = {
 +  "STO3G", "STO-3G", "3-21G",
 +  "3-21G*", "3-21+G*", "6-21G",
 +  "6-31G", "6-31G*", "6-31+G*",
 +  "6-311G", NULL
 +};
 +
 +const char *eQMMMscheme_names[eQMMMschemeNR+1] = {
 +  "normal", "ONIOM", NULL
 +};
 +
 +const char *eMultentOpt_names[eMultentOptNR+1] = {
 +  "multiple_entries", "no", "use_last", NULL
 +};
 +
diff --cc src/gromacs/gmxlib/network.c
index fedcd8dcba,0000000000..12514a652b
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/network.c
+++ b/src/gromacs/gmxlib/network.c
@@@ -1,638 -1,0 +1,638 @@@
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include "gmx_fatal.h"
 +#include "main.h"
 +#include "smalloc.h"
 +#include "network.h"
 +#include "copyrite.h"
 +#include "statutil.h"
 +#include "ctype.h"
 +#include "macros.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +
 +#ifdef GMX_THREADS
 +#include "tmpi.h"
 +#endif
 +
 +
 +/* The source code in this file should be thread-safe. 
 +      Please keep it that way. */
 +
 +gmx_bool gmx_mpi_initialized(void)
 +{
 +  int n;
 +#ifndef GMX_MPI
 +  return 0;
 +#else
 +  MPI_Initialized(&n);
 +  
 +  return n;
 +#endif
 +}
 +
 +int gmx_setup(int *argc,char **argv,int *nnodes)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_setup");
 +  return 0;
 +#else
 +  char   buf[256];
 +  int    resultlen;               /* actual length of node name      */
 +  int    i,flag;
 +  int  mpi_num_nodes;
 +  int  mpi_my_rank;
 +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME];
 +
 +  /* Call the MPI routines */
 +#ifdef GMX_LIB_MPI
 +#ifdef GMX_FAHCORE
 +  (void) fah_MPI_Init(argc,&argv);
 +#else
 +  (void) MPI_Init(argc,&argv);
 +#endif
 +#endif
 +  (void) MPI_Comm_size( MPI_COMM_WORLD, &mpi_num_nodes );
 +  (void) MPI_Comm_rank( MPI_COMM_WORLD, &mpi_my_rank );
 +  (void) MPI_Get_processor_name( mpi_hostname, &resultlen );
 + 
 +#ifdef GMX_LIB_MPI 
 +  fprintf(stderr,"NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
 +	  mpi_num_nodes,mpi_my_rank,mpi_hostname);
 +#endif
 +  
 +  *nnodes=mpi_num_nodes;
 +  
 +  return mpi_my_rank;
 +#endif
 +}
 +
 +int  gmx_node_num(void)
 +{
 +#ifndef GMX_MPI
 +  return 1;
 +#else
 +  int i;
 +  (void) MPI_Comm_size(MPI_COMM_WORLD, &i);
 +  return i;
 +#endif
 +}
 +
 +int gmx_node_rank(void)
 +{
 +#ifndef GMX_MPI
 +  return 0;
 +#else
 +  int i;
 +  (void) MPI_Comm_rank(MPI_COMM_WORLD, &i);
 +  return i;
 +#endif
 +}
 +
 +void gmx_setup_nodecomm(FILE *fplog,t_commrec *cr)
 +{
 +  gmx_nodecomm_t *nc;
 +  int  n,rank,resultlen,hostnum,i,j,ng,ni;
 +#ifdef GMX_MPI
 +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME],num[MPI_MAX_PROCESSOR_NAME];
 +#endif
 +
 +  /* Many MPI implementations do not optimize MPI_Allreduce
 +   * (and probably also other global communication calls)
 +   * for multi-core nodes connected by a network.
 +   * We can optimize such communication by using one MPI call
 +   * within each node and one between the nodes.
 +   * For MVAPICH2 and Intel MPI this reduces the time for
 +   * the global_stat communication by 25%
 +   * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 +   * B. Hess, November 2007
 +   */
 +
 +  nc = &cr->nc;
 +
 +  nc->bUse = FALSE;
 +#ifndef GMX_THREADS
 +  if (getenv("GMX_NO_NODECOMM") == NULL) {
 +#ifdef GMX_MPI
 +    MPI_Comm_size(cr->mpi_comm_mygroup,&n);
 +    MPI_Comm_rank(cr->mpi_comm_mygroup,&rank);
 +    MPI_Get_processor_name(mpi_hostname,&resultlen);
 +    /* This procedure can only differentiate nodes with host names
 +     * that end on unique numbers.
 +     */
 +    i = 0;
 +    j = 0;
 +    /* Only parse the host name up to the first dot */
 +    while(i < resultlen && mpi_hostname[i] != '.') {
 +      if (isdigit(mpi_hostname[i])) {
 +	num[j++] = mpi_hostname[i];
 +      }
 +      i++;
 +    }
 +    num[j] = '\0';
 +    if (j == 0) {
 +      hostnum = 0;
 +    } else {
 +      /* Use only the last 9 decimals, so we don't overflow an int */
 +      hostnum = strtol(num + max(0,j-9), NULL, 10); 
 +    }
 +
 +    if (debug) {
 +      fprintf(debug,
 +	      "In gmx_setup_nodecomm: splitting communicator of size %d\n",
 +	      n);
 +      fprintf(debug,"In gmx_setup_nodecomm: hostname '%s', hostnum %d\n",
 +	      mpi_hostname,hostnum);
 +    }
 +
 +    /* The intra-node communicator, split on node number */
 +    MPI_Comm_split(cr->mpi_comm_mygroup,hostnum,rank,&nc->comm_intra);
 +    MPI_Comm_rank(nc->comm_intra,&nc->rank_intra);
 +    if (debug) {
 +      fprintf(debug,"In gmx_setup_nodecomm: node rank %d rank_intra %d\n",
 +	      rank,nc->rank_intra);
 +    }
 +    /* The inter-node communicator, split on rank_intra.
 +     * We actually only need the one for rank=0,
 +     * but it is easier to create them all.
 +     */
 +    MPI_Comm_split(cr->mpi_comm_mygroup,nc->rank_intra,rank,&nc->comm_inter);
 +    /* Check if this really created two step communication */
 +    MPI_Comm_size(nc->comm_inter,&ng);
 +    MPI_Comm_size(nc->comm_intra,&ni);
 +    if (debug) {
 +      fprintf(debug,"In gmx_setup_nodecomm: groups %d, my group size %d\n",
 +	      ng,ni);
 +    }
 +    if ((ng > 1 && ng < n) || (ni > 1 && ni < n)) {
 +      nc->bUse = TRUE;
 +      if (fplog)
 +	fprintf(fplog,"Using two step summing over %d groups of on average %.1f processes\n\n",ng,(real)n/(real)ng);
 +      if (nc->rank_intra > 0)
 +	MPI_Comm_free(&nc->comm_inter);
 +    } else {
 +      /* One group or all processes in a separate group, use normal summing */
 +      MPI_Comm_free(&nc->comm_inter);
 +      MPI_Comm_free(&nc->comm_intra);
 +    }
 +#endif
 +  }
 +#endif
 +}
 +
 +void gmx_barrier(const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_barrier");
 +#else
 +  MPI_Barrier(cr->mpi_comm_mygroup);
 +#endif
 +}
 +
 +void gmx_abort(int noderank,int nnodes,int errorno)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_abort");
 +#else
 +#ifdef GMX_THREADS
 +  fprintf(stderr,"Halting program %s\n",ShortProgram());
 +  thanx(stderr);
 +  exit(1);
 +#else
 +  if (nnodes > 1)
 +  {
 +      fprintf(stderr,"Halting parallel program %s on CPU %d out of %d\n",
 +              ShortProgram(),noderank,nnodes);
 +  }
 +  else
 +  {
 +      fprintf(stderr,"Halting program %s\n",ShortProgram());
 +  }
 +
 +  thanx(stderr);
 +  MPI_Abort(MPI_COMM_WORLD,errorno);
 +  exit(1);
 +#endif
 +#endif
 +}
 +
 +void gmx_bcast(int nbytes,void *b,const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_bast");
 +#else
 +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mygroup);
 +#endif
 +}
 +
 +void gmx_bcast_sim(int nbytes,void *b,const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_bast");
 +#else
 +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mysim);
 +#endif
 +}
 +
 +void gmx_sumd(int nr,double r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumd");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    if (cr->nc.bUse) {
 +        if (cr->nc.rank_intra == 0)
 +        {
 +            /* Use two step summing. */
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,0,
 +                       cr->nc.comm_intra);
 +            /* Sum the roots of the internal (intra) buffers. */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,MPI_DOUBLE,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM, 
 +                      cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
 +    if (nr > cr->mpb->dbuf_alloc) {
 +        cr->mpb->dbuf_alloc = nr;
 +        srenew(cr->mpb->dbuf,cr->mpb->dbuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(cr->mpb->dbuf,r,nr,MPI_DOUBLE,MPI_SUM, 
 +                          cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
 +    } else {
 +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,
 +                      cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
 +            r[i] = cr->mpb->dbuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +void gmx_sumf(int nr,float r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumf");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    if (cr->nc.bUse) {
 +        /* Use two step summing.  */
 +        if (cr->nc.rank_intra == 0)
 +        {
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,0,
 +                       cr->nc.comm_intra);
 +            /* Sum the roots of the internal (intra) buffers */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,MPI_FLOAT,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
 +    if (nr > cr->mpb->fbuf_alloc) {
 +        cr->mpb->fbuf_alloc = nr;
 +        srenew(cr->mpb->fbuf,cr->mpb->fbuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(cr->mpb->fbuf,r,nr,MPI_FLOAT,MPI_SUM, 
 +                          cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
 +    } else {
 +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,
 +                      cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
 +            r[i] = cr->mpb->fbuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +void gmx_sumi(int nr,int r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumi");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        if (cr->nc.rank_intra == 0) 
 +        {
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
 +    if (nr > cr->mpb->ibuf_alloc) {
 +        cr->mpb->ibuf_alloc = nr;
 +        srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(cr->mpb->ibuf,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
 +    } else {
 +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
 +            r[i] = cr->mpb->ibuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +void gmx_sumli(int nr,gmx_large_int_t r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumli");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        if (cr->nc.rank_intra == 0) 
 +        {
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,
 +                       cr->nc.comm_intra);
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
-     if (nr > cr->mpb->ibuf_alloc) {
-         cr->mpb->ibuf_alloc = nr;
-         srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
++    if (nr > cr->mpb->libuf_alloc) {
++        cr->mpb->libuf_alloc = nr;
++        srenew(cr->mpb->libuf,cr->mpb->libuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
-         MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                      cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
-             MPI_Allreduce(cr->mpb->ibuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++            MPI_Allreduce(cr->mpb->libuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
 +    } else {
-         MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                      cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
-             r[i] = cr->mpb->ibuf[i];
++            r[i] = cr->mpb->libuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +
 +
 +#ifdef GMX_MPI
 +void gmx_sumd_comm(int nr,double r[],MPI_Comm mpi_comm)
 +{
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
 +#else
 +    /* this function is only used in code that is not performance critical,
 +       (during setup, when comm_rec is not the appropriate communication  
 +       structure), so this isn't as bad as it looks. */
 +    double *buf;
 +    int i;
 +
 +    snew(buf, nr);
 +    MPI_Allreduce(r,buf,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
 +    for(i=0; i<nr; i++)
 +        r[i] = buf[i];
 +    sfree(buf);
 +#endif
 +}
 +#endif
 +
 +#ifdef GMX_MPI
 +void gmx_sumf_comm(int nr,float r[],MPI_Comm mpi_comm)
 +{
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
 +#else
 +    /* this function is only used in code that is not performance critical,
 +       (during setup, when comm_rec is not the appropriate communication  
 +       structure), so this isn't as bad as it looks. */
 +    float *buf;
 +    int i;
 +
 +    snew(buf, nr);
 +    MPI_Allreduce(r,buf,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
 +    for(i=0; i<nr; i++)
 +        r[i] = buf[i];
 +    sfree(buf);
 +#endif
 +}
 +#endif
 +
 +void gmx_sumd_sim(int nr,double r[],const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_sumd_sim");
 +#else
 +  gmx_sumd_comm(nr,r,ms->mpi_comm_masters);
 +#endif
 +}
 +
 +void gmx_sumf_sim(int nr,float r[],const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_sumf_sim");
 +#else
 +  gmx_sumf_comm(nr,r,ms->mpi_comm_masters);
 +#endif
 +}
 +
 +void gmx_sumi_sim(int nr,int r[], const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumi_sim");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
 +#else
 +    /* this is thread-unsafe, but it will do for now: */
 +    int i;
 +
 +    if (nr > ms->mpb->ibuf_alloc) {
 +        ms->mpb->ibuf_alloc = nr;
 +        srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
 +    }
 +    MPI_Allreduce(r,ms->mpb->ibuf,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
 +    for(i=0; i<nr; i++)
 +        r[i] = ms->mpb->ibuf[i];
 +#endif
 +#endif
 +}
 +
 +void gmx_sumli_sim(int nr,gmx_large_int_t r[], const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumli_sim");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                  ms->mpi_comm_masters);
 +#else
 +    /* this is thread-unsafe, but it will do for now: */
 +    int i;
 +
-     if (nr > ms->mpb->ibuf_alloc) {
-         ms->mpb->ibuf_alloc = nr;
-         srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
++    if (nr > ms->mpb->libuf_alloc) {
++        ms->mpb->libuf_alloc = nr;
++        srenew(ms->mpb->libuf,ms->mpb->libuf_alloc);
 +    }
-     MPI_Allreduce(r,ms->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++    MPI_Allreduce(r,ms->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                  ms->mpi_comm_masters);
 +    for(i=0; i<nr; i++)
-         r[i] = ms->mpb->ibuf[i];
++        r[i] = ms->mpb->libuf[i];
 +#endif
 +#endif
 +}
 +
 +
 +void gmx_finalize(void)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_finalize");
 +#else
 +  int ret;
 +
 +  /* just as a check; we don't want to finalize twice */
 +  int finalized;
 +  MPI_Finalized(&finalized);
 +  if (finalized)
 +      return;
 +
 +  /* We sync the processes here to try to avoid problems
 +   * with buggy MPI implementations that could cause
 +   * unfinished processes to terminate.
 +   */
 +  MPI_Barrier(MPI_COMM_WORLD);
 +
 +  /*
 +  if (DOMAINDECOMP(cr)) {
 +    if (cr->npmenodes > 0 || cr->dd->bCartesian) 
 +      MPI_Comm_free(&cr->mpi_comm_mygroup);
 +    if (cr->dd->bCartesian)
 +      MPI_Comm_free(&cr->mpi_comm_mysim);
 +  }
 +  */
 +
 +  /* Apparently certain mpich implementations cause problems
 +   * with MPI_Finalize. In that case comment out MPI_Finalize.
 +   */
 +  if (debug)
 +    fprintf(debug,"Will call MPI_Finalize now\n");
 +
 +  ret = MPI_Finalize();
 +  if (debug)
 +    fprintf(debug,"Return code from MPI_Finalize = %d\n",ret);
 +#endif
 +}
 +
diff --cc src/gromacs/gmxlib/txtdump.c
index dc3b884e90,0000000000..33c98378c7
mode 100644,000000..100644
--- a/src/gromacs/gmxlib/txtdump.c
+++ b/src/gromacs/gmxlib/txtdump.c
@@@ -1,1501 -1,0 +1,1501 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +/* This file is completely threadsafe - please keep it that way! */
 +#ifdef GMX_THREADS
 +#include <thread_mpi.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "vec.h"
 +
 +
 +int pr_indent(FILE *fp,int n)
 +{
 +  int i;
 +
 +  for (i=0; i<n; i++) (void) fprintf(fp," ");
 +  return n;
 +}
 +
 +int available(FILE *fp,void *p,int indent,const char *title)
 +{
 +  if (!p) {
 +    if (indent > 0)
 +      pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s: not available\n",title);
 +  }
 +  return (p!=NULL);
 +}
 +
 +int pr_title(FILE *fp,int indent,const char *title)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s:\n",title);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_n(FILE *fp,int indent,const char *title,int n)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%d):\n",title,n);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_nxn(FILE *fp,int indent,const char *title,int n1,int n2)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%dx%d):\n",title,n1,n2);
 +  return (indent+INDENT);
 +}
 +
 +void pr_ivec(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%d\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_ivec_block(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +    
 +    if (available(fp,vec,indent,title))
 +    {
 +        indent=pr_title_n(fp,indent,title,n);
 +        i = 0;
 +        while (i < n)
 +        {
 +            j = i+1;
 +            while (j < n && vec[j] == vec[j-1]+1)
 +            {
 +                j++;
 +            }
 +            /* Print consecutive groups of 3 or more as blocks */
 +            if (j - i < 3)
 +            {
 +                while(i < j)
 +                {
 +                    (void) pr_indent(fp,indent);
 +                    (void) fprintf(fp,"%s[%d]=%d\n",
 +                                   title,bShowNumbers?i:-1,vec[i]);
 +                    i++;
 +                }
 +            }
 +            else
 +            {
 +                (void) pr_indent(fp,indent);
 +                (void) fprintf(fp,"%s[%d,...,%d] = {%d,...,%d}\n",
 +                               title,
 +                               bShowNumbers?i:-1,
 +                               bShowNumbers?j-1:-1,
 +                               vec[i],vec[j-1]); 
 +                i = j;
 +            }
 +        }
 +    }
 +}
 +
 +void pr_bvec(FILE *fp,int indent,const char *title,gmx_bool vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%s\n",title,bShowNumbers?i:-1,
 +			 BOOL(vec[i]));
 +        }
 +    }
 +}
 +
 +void pr_ivecs(FILE *fp,int indent,const char *title,ivec vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_nxn(fp,indent,title,n,DIM);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={",title,bShowNumbers?i:-1);
 +          for (j=0; j<DIM; j++)
 +            {
 +              if (j!=0) (void) fprintf(fp,", ");
 +              fprintf(fp,"%d",vec[i][j]);
 +            }
 +          (void) fprintf(fp,"}\n");
 +        }
 +    }
 +}
 +
 +void pr_rvec(FILE *fp,int indent,const char *title,real vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          pr_indent(fp,indent);
 +          fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_dvec(FILE *fp,int indent,const char *title,double vec[],int n, gmx_bool bShowNumbers)
 +{
 +	int i;
 +	
 +	if (available(fp,vec,indent,title))
 +    {  
 +		indent=pr_title_n(fp,indent,title,n);
 +		for (i=0; i<n; i++)
 +        {
 +			pr_indent(fp,indent);
 +			fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/*
 +void pr_mat(FILE *fp,int indent,char *title,matrix m)
 +{
 +  int i,j;
 +  
 +  if (available(fp,m,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for(i=0; i<n; i++) {
 +      pr_indent(fp,indent);
 +      fprintf(fp,"%s[%d]=%12.5e %12.5e %12.5e\n",
 +	      title,bShowNumbers?i:-1,m[i][XX],m[i][YY],m[i][ZZ]);
 +    }
 +  }
 +}
 +*/
 +
 +void pr_rvecs_len(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +	if (j != 0) 
 +	  (void) fprintf(fp,", ");
 +	(void) fprintf(fp,"%12.5e",vec[i][j]);
 +      }
 +      (void) fprintf(fp,"} len=%12.5e\n",norm(vec[i]));
 +    }
 +  }
 +}
 +
 +void pr_rvecs(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  const char *fshort = "%12.5e";
 +  const char *flong  = "%15.8e";
 +  const char *format;
 +  int i,j;
 +
 +  if (getenv("LONGFORMAT") != NULL)
 +    format = flong;
 +  else
 +    format = fshort;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +	if (j != 0) 
 +	  (void) fprintf(fp,", ");
 +	(void) fprintf(fp,format,vec[i][j]);
 +      }
 +      (void) fprintf(fp,"}\n");
 +    }
 +  }
 +}
 +
 +
 +void pr_reals(FILE *fp,int indent,const char *title,real *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +void pr_doubles(FILE *fp,int indent,const char *title,double *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +static void pr_int(FILE *fp,int indent,const char *title,int i)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %d\n",title,i);
 +}
 +
 +static void pr_gmx_large_int(FILE *fp,int indent,const char *title,gmx_large_int_t i)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,gmx_step_str(i,buf));
 +}
 +
 +static void pr_real(FILE *fp,int indent,const char *title,real r)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,r);
 +}
 +
 +static void pr_double(FILE *fp,int indent,const char *title,double d)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,d);
 +}
 +
 +static void pr_str(FILE *fp,int indent,const char *title,const char *s)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,s);
 +}
 +
 +void pr_qm_opts(FILE *fp,int indent,const char *title,t_grpopts *opts)
 +{
 +  int i,m,j;
 +
 +  fprintf(fp,"%s:\n",title);
 +  
 +  pr_int(fp,indent,"ngQM",opts->ngQM);
 +  if (opts->ngQM > 0) {
 +    pr_ivec(fp,indent,"QMmethod",opts->QMmethod,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMbasis",opts->QMbasis,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMcharge",opts->QMcharge,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMmult",opts->QMmult,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bSH",opts->bSH,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASorbitals",opts->CASorbitals,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASelectrons",opts->CASelectrons,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"SAsteps",opts->SAsteps,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bOPT",opts->bOPT,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bTS",opts->bTS,opts->ngQM,FALSE);
 +  }
 +}
 +
 +static void pr_grp_opts(FILE *out,int indent,const char *title,t_grpopts *opts,
 +			gmx_bool bMDPformat)
 +{
 +  int i,m,j;
 +
 +  if (!bMDPformat)
 +    fprintf(out,"%s:\n",title);
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"nrdf%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->nrdf[i]);
 +  fprintf(out,"\n");
 +  
 +  pr_indent(out,indent);
-   fprintf(out,"ref_t%s",bMDPformat ? " = " : ":");
++  fprintf(out,"ref-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->ref_t[i]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
-   fprintf(out,"tau_t%s",bMDPformat ? " = " : ":");
++  fprintf(out,"tau-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->tau_t[i]);
 +  fprintf(out,"\n");  
 +  
 +  /* Pretty-print the simulated annealing info */
 +  fprintf(out,"anneal%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10s",EANNEAL(opts->annealing[i]));
 +  fprintf(out,"\n");  
 + 
-   fprintf(out,"ann_npoints%s",bMDPformat ? " = " : ":");
++  fprintf(out,"ann-npoints%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10d",opts->anneal_npoints[i]);
 +  fprintf(out,"\n");  
 + 
 +  for(i=0; (i<opts->ngtc); i++) {
 +    if(opts->anneal_npoints[i]>0) {
 +      fprintf(out,"ann. times [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +	fprintf(out,"  %10.1f",opts->anneal_time[i][j]);
 +      fprintf(out,"\n");  
 +      fprintf(out,"ann. temps [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +	fprintf(out,"  %10.1f",opts->anneal_temp[i][j]);
 +      fprintf(out,"\n");  
 +    }
 +  }
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"acc:\t");
 +  for(i=0; (i<opts->ngacc); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10g",opts->acc[i][m]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"nfreeze:");
 +  for(i=0; (i<opts->ngfrz); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10s",opts->nFreeze[i][m] ? "Y" : "N");
 +  fprintf(out,"\n");
 +
 +
 +  for(i=0; (i<opts->ngener); i++) {
 +    pr_indent(out,indent);
-     fprintf(out,"energygrp_flags[%3d]:",i);
++    fprintf(out,"energygrp-flags[%3d]:",i);
 +    for(m=0; (m<opts->ngener); m++)
 +      fprintf(out," %d",opts->egp_flags[opts->ngener*i+m]);
 +    fprintf(out,"\n");
 +  }
 +
 +  fflush(out);
 +}
 +
 +static void pr_matrix(FILE *fp,int indent,const char *title,rvec *m,
 +		      gmx_bool bMDPformat)
 +{
 +  if (bMDPformat)
 +    fprintf(fp,"%-10s    = %g %g %g %g %g %g\n",title,
 +	    m[XX][XX],m[YY][YY],m[ZZ][ZZ],m[XX][YY],m[XX][ZZ],m[YY][ZZ]);
 +  else
 +    pr_rvecs(fp,indent,title,m,DIM);
 +}
 +
 +static void pr_cosine(FILE *fp,int indent,const char *title,t_cosines *cos,
 +		      gmx_bool bMDPformat)
 +{
 +  int j;
 +  
 +  if (bMDPformat) {
 +    fprintf(fp,"%s = %d\n",title,cos->n);
 +  }
 +  else {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"n = %d\n",cos->n);
 +    if (cos->n > 0) {
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"a =");
 +      for(j=0; (j<cos->n); j++)
 +	fprintf(fp," %e",cos->a[j]);
 +      fprintf(fp,"\n");
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"phi =");
 +      for(j=0; (j<cos->n); j++)
 +	fprintf(fp," %e",cos->phi[j]);
 +      fprintf(fp,"\n");
 +    }
 +  }
 +}
 +
 +#define PS(t,s) pr_str(fp,indent,t,s)
 +#define PI(t,s) pr_int(fp,indent,t,s)
 +#define PSTEP(t,s) pr_gmx_large_int(fp,indent,t,s)
 +#define PR(t,s) pr_real(fp,indent,t,s)
 +#define PD(t,s) pr_double(fp,indent,t,s)
 +
 +static void pr_pullgrp(FILE *fp,int indent,int g,t_pullgrp *pg)
 +{
 +  pr_indent(fp,indent);
-   fprintf(fp,"pull_group %d:\n",g);
++  fprintf(fp,"pull-group %d:\n",g);
 +  indent += 2;
 +  pr_ivec_block(fp,indent,"atom",pg->ind,pg->nat,TRUE);
 +  pr_rvec(fp,indent,"weight",pg->weight,pg->nweight,TRUE);
 +  PI("pbcatom",pg->pbcatom);
 +  pr_rvec(fp,indent,"vec",pg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"init",pg->init,DIM,TRUE);
 +  PR("rate",pg->rate);
 +  PR("k",pg->k);
 +  PR("kB",pg->kB);
 +}
 +
 +static void pr_pull(FILE *fp,int indent,t_pull *pull)
 +{
 +  int g;
 +
-   PS("pull_geometry",EPULLGEOM(pull->eGeom));
-   pr_ivec(fp,indent,"pull_dim",pull->dim,DIM,TRUE);
-   PR("pull_r1",pull->cyl_r1);
-   PR("pull_r0",pull->cyl_r0);
-   PR("pull_constr_tol",pull->constr_tol);
-   PI("pull_nstxout",pull->nstxout);
-   PI("pull_nstfout",pull->nstfout);
-   PI("pull_ngrp",pull->ngrp);
++  PS("pull-geometry",EPULLGEOM(pull->eGeom));
++  pr_ivec(fp,indent,"pull-dim",pull->dim,DIM,TRUE);
++  PR("pull-r1",pull->cyl_r1);
++  PR("pull-r0",pull->cyl_r0);
++  PR("pull-constr-tol",pull->constr_tol);
++  PI("pull-nstxout",pull->nstxout);
++  PI("pull-nstfout",pull->nstfout);
++  PI("pull-ngrp",pull->ngrp);
 +  for(g=0; g<pull->ngrp+1; g++)
 +    pr_pullgrp(fp,indent,g,&pull->grp[g]);
 +}
 +
 +static void pr_rotgrp(FILE *fp,int indent,int g,t_rotgrp *rotg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"rotation_group %d:\n",g);
 +  indent += 2;
 +  PS("type",EROTGEOM(rotg->eType));
 +  PS("massw",BOOL(rotg->bMassW));
 +  pr_ivec_block(fp,indent,"atom",rotg->ind,rotg->nat,TRUE);
 +  pr_rvecs(fp,indent,"x_ref",rotg->x_ref,rotg->nat);
 +  pr_rvec(fp,indent,"vec",rotg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"pivot",rotg->pivot,DIM,TRUE);
 +  PR("rate",rotg->rate);
 +  PR("k",rotg->k);
 +  PR("slab_dist",rotg->slab_dist);
 +  PR("min_gaussian",rotg->min_gaussian);
 +  PR("epsilon",rotg->eps);
 +  PS("fit_method",EROTFIT(rotg->eFittype));
 +  PI("potfitangle_nstep",rotg->PotAngle_nstep);
 +  PR("potfitangle_step",rotg->PotAngle_step);
 +}
 +
 +static void pr_rot(FILE *fp,int indent,t_rot *rot)
 +{
 +  int g;
 +
 +  PI("rot_nstrout",rot->nstrout);
 +  PI("rot_nstsout",rot->nstsout);
 +  PI("rot_ngrp",rot->ngrp);
 +  for(g=0; g<rot->ngrp; g++)
 +    pr_rotgrp(fp,indent,g,&rot->grp[g]);
 +}
 +
 +void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
 +                 gmx_bool bMDPformat)
 +{
 +  const char *infbuf="inf";
 +  int  i;
 +  
 +  if (available(fp,ir,indent,title)) {
 +    if (!bMDPformat)
 +      indent=pr_title(fp,indent,title);
 +    PS("integrator",EI(ir->eI));
 +    PSTEP("nsteps",ir->nsteps);
-     PSTEP("init_step",ir->init_step);
-     PS("ns_type",ENS(ir->ns_type));
++    PSTEP("init-step",ir->init_step);
++    PS("ns-type",ENS(ir->ns_type));
 +    PI("nstlist",ir->nstlist);
 +    PI("ndelta",ir->ndelta);
 +    PI("nstcomm",ir->nstcomm);
-     PS("comm_mode",ECOM(ir->comm_mode));
++    PS("comm-mode",ECOM(ir->comm_mode));
 +    PI("nstlog",ir->nstlog);
 +    PI("nstxout",ir->nstxout);
 +    PI("nstvout",ir->nstvout);
 +    PI("nstfout",ir->nstfout);
 +    PI("nstcalcenergy",ir->nstcalcenergy);
 +    PI("nstenergy",ir->nstenergy);
 +    PI("nstxtcout",ir->nstxtcout);
-     PR("init_t",ir->init_t);
-     PR("delta_t",ir->delta_t);
++    PR("init-t",ir->init_t);
++    PR("delta-t",ir->delta_t);
 +    
 +    PR("xtcprec",ir->xtcprec);
 +    PI("nkx",ir->nkx);
 +    PI("nky",ir->nky);
 +    PI("nkz",ir->nkz);
-     PI("pme_order",ir->pme_order);
-     PR("ewald_rtol",ir->ewald_rtol);
-     PR("ewald_geometry",ir->ewald_geometry);
-     PR("epsilon_surface",ir->epsilon_surface);
-     PS("optimize_fft",BOOL(ir->bOptFFT));
++    PI("pme-order",ir->pme_order);
++    PR("ewald-rtol",ir->ewald_rtol);
++    PR("ewald-geometry",ir->ewald_geometry);
++    PR("epsilon-surface",ir->epsilon_surface);
++    PS("optimize-fft",BOOL(ir->bOptFFT));
 +    PS("ePBC",EPBC(ir->ePBC));
 +    PS("bPeriodicMols",BOOL(ir->bPeriodicMols));
 +    PS("bContinuation",BOOL(ir->bContinuation));
 +    PS("bShakeSOR",BOOL(ir->bShakeSOR));
 +    PS("etc",ETCOUPLTYPE(ir->etc));
 +    PI("nsttcouple",ir->nsttcouple);
 +    PS("epc",EPCOUPLTYPE(ir->epc));
 +    PS("epctype",EPCOUPLTYPETYPE(ir->epct));
 +    PI("nstpcouple",ir->nstpcouple);
-     PR("tau_p",ir->tau_p);
-     pr_matrix(fp,indent,"ref_p",ir->ref_p,bMDPformat);
++    PR("tau-p",ir->tau_p);
++    pr_matrix(fp,indent,"ref-p",ir->ref_p,bMDPformat);
 +    pr_matrix(fp,indent,"compress",ir->compress,bMDPformat);
-     PS("refcoord_scaling",EREFSCALINGTYPE(ir->refcoord_scaling));
++    PS("refcoord-scaling",EREFSCALINGTYPE(ir->refcoord_scaling));
 +    if (bMDPformat)
-       fprintf(fp,"posres_com  = %g %g %g\n",ir->posres_com[XX],
++      fprintf(fp,"posres-com  = %g %g %g\n",ir->posres_com[XX],
 +	      ir->posres_com[YY],ir->posres_com[ZZ]);
 +    else
-       pr_rvec(fp,indent,"posres_com",ir->posres_com,DIM,TRUE);
++      pr_rvec(fp,indent,"posres-com",ir->posres_com,DIM,TRUE);
 +    if (bMDPformat)
-       fprintf(fp,"posres_comB = %g %g %g\n",ir->posres_comB[XX],
++      fprintf(fp,"posres-comB = %g %g %g\n",ir->posres_comB[XX],
 +	      ir->posres_comB[YY],ir->posres_comB[ZZ]);
 +    else
-       pr_rvec(fp,indent,"posres_comB",ir->posres_comB,DIM,TRUE);
-     PI("andersen_seed",ir->andersen_seed);
++      pr_rvec(fp,indent,"posres-comB",ir->posres_comB,DIM,TRUE);
++    PI("andersen-seed",ir->andersen_seed);
 +    PR("rlist",ir->rlist);
 +    PR("rlistlong",ir->rlistlong);
 +    PR("rtpi",ir->rtpi);
 +    PS("coulombtype",EELTYPE(ir->coulombtype));
-     PR("rcoulomb_switch",ir->rcoulomb_switch);
++    PR("rcoulomb-switch",ir->rcoulomb_switch);
 +    PR("rcoulomb",ir->rcoulomb);
 +    PS("vdwtype",EVDWTYPE(ir->vdwtype));
-     PR("rvdw_switch",ir->rvdw_switch);
++    PR("rvdw-switch",ir->rvdw_switch);
 +    PR("rvdw",ir->rvdw);
 +    if (ir->epsilon_r != 0)
-       PR("epsilon_r",ir->epsilon_r);
++      PR("epsilon-r",ir->epsilon_r);
 +    else
-       PS("epsilon_r",infbuf);
++      PS("epsilon-r",infbuf);
 +    if (ir->epsilon_rf != 0)
-       PR("epsilon_rf",ir->epsilon_rf);
++      PR("epsilon-rf",ir->epsilon_rf);
 +    else
-       PS("epsilon_rf",infbuf);
++      PS("epsilon-rf",infbuf);
 +    PR("tabext",ir->tabext);
-     PS("implicit_solvent",EIMPLICITSOL(ir->implicit_solvent));
-     PS("gb_algorithm",EGBALGORITHM(ir->gb_algorithm));
-     PR("gb_epsilon_solvent",ir->gb_epsilon_solvent);
++    PS("implicit-solvent",EIMPLICITSOL(ir->implicit_solvent));
++    PS("gb-algorithm",EGBALGORITHM(ir->gb_algorithm));
++    PR("gb-epsilon-solvent",ir->gb_epsilon_solvent);
 +    PI("nstgbradii",ir->nstgbradii);
 +    PR("rgbradii",ir->rgbradii);
-     PR("gb_saltconc",ir->gb_saltconc);
-     PR("gb_obc_alpha",ir->gb_obc_alpha);
-     PR("gb_obc_beta",ir->gb_obc_beta);
-     PR("gb_obc_gamma",ir->gb_obc_gamma);
-     PR("gb_dielectric_offset",ir->gb_dielectric_offset);
-     PS("sa_algorithm",ESAALGORITHM(ir->gb_algorithm));
-     PR("sa_surface_tension",ir->sa_surface_tension);
++    PR("gb-saltconc",ir->gb_saltconc);
++    PR("gb-obc-alpha",ir->gb_obc_alpha);
++    PR("gb-obc-beta",ir->gb_obc_beta);
++    PR("gb-obc-gamma",ir->gb_obc_gamma);
++    PR("gb-dielectric-offset",ir->gb_dielectric_offset);
++    PS("sa-algorithm",ESAALGORITHM(ir->gb_algorithm));
++    PR("sa-surface-tension",ir->sa_surface_tension);
 +	  
 +    PS("DispCorr",EDISPCORR(ir->eDispCorr));
-     PS("free_energy",EFEPTYPE(ir->efep));
-     PR("init_lambda",ir->init_lambda);
-     PR("delta_lambda",ir->delta_lambda);
++    PS("free-energy",EFEPTYPE(ir->efep));
++    PR("init-lambda",ir->init_lambda);
++    PR("delta-lambda",ir->delta_lambda);
 +    if (!bMDPformat)
 +    {
-         PI("n_foreign_lambda",ir->n_flambda);
++        PI("n-foreign-lambda",ir->n_flambda);
 +    }
 +    if (ir->n_flambda > 0)
 +    {
 +        pr_indent(fp,indent);
-         fprintf(fp,"foreign_lambda%s",bMDPformat ? " = " : ":");
++        fprintf(fp,"foreign-lambda%s",bMDPformat ? " = " : ":");
 +        for(i=0; i<ir->n_flambda; i++)
 +        {
 +            fprintf(fp,"  %10g",ir->flambda[i]);
 +        }
 +        fprintf(fp,"\n");
 +    }
-     PR("sc_alpha",ir->sc_alpha);
-     PI("sc_power",ir->sc_power);
-     PR("sc_sigma",ir->sc_sigma);
-     PR("sc_sigma_min",ir->sc_sigma_min);
++    PR("sc-alpha",ir->sc_alpha);
++    PI("sc-power",ir->sc_power);
++    PR("sc-sigma",ir->sc_sigma);
++    PR("sc-sigma-min",ir->sc_sigma_min);
 +    PI("nstdhdl", ir->nstdhdl);
-     PS("separate_dhdl_file", SEPDHDLFILETYPE(ir->separate_dhdl_file));
-     PS("dhdl_derivatives", DHDLDERIVATIVESTYPE(ir->dhdl_derivatives));
-     PI("dh_hist_size", ir->dh_hist_size);
-     PD("dh_hist_spacing", ir->dh_hist_spacing);
++    PS("separate-dhdl-file", SEPDHDLFILETYPE(ir->separate_dhdl_file));
++    PS("dhdl-derivatives", DHDLDERIVATIVESTYPE(ir->dhdl_derivatives));
++    PI("dh-hist-size", ir->dh_hist_size);
++    PD("dh-hist-spacing", ir->dh_hist_spacing);
 +
 +    PI("nwall",ir->nwall);
-     PS("wall_type",EWALLTYPE(ir->wall_type));
-     PI("wall_atomtype[0]",ir->wall_atomtype[0]);
-     PI("wall_atomtype[1]",ir->wall_atomtype[1]);
-     PR("wall_density[0]",ir->wall_density[0]);
-     PR("wall_density[1]",ir->wall_density[1]);
-     PR("wall_ewald_zfac",ir->wall_ewald_zfac);
++    PS("wall-type",EWALLTYPE(ir->wall_type));
++    PI("wall-atomtype[0]",ir->wall_atomtype[0]);
++    PI("wall-atomtype[1]",ir->wall_atomtype[1]);
++    PR("wall-density[0]",ir->wall_density[0]);
++    PR("wall-density[1]",ir->wall_density[1]);
++    PR("wall-ewald-zfac",ir->wall_ewald_zfac);
 +
 +    PS("pull",EPULLTYPE(ir->ePull));
 +    if (ir->ePull != epullNO)
 +      pr_pull(fp,indent,ir->pull);
 +    
 +    PS("rotation",BOOL(ir->bRot));
 +    if (ir->bRot)
 +      pr_rot(fp,indent,ir->rot);
 +
 +    PS("disre",EDISRETYPE(ir->eDisre));
-     PS("disre_weighting",EDISREWEIGHTING(ir->eDisreWeighting));
-     PS("disre_mixed",BOOL(ir->bDisreMixed));
-     PR("dr_fc",ir->dr_fc);
-     PR("dr_tau",ir->dr_tau);
++    PS("disre-weighting",EDISREWEIGHTING(ir->eDisreWeighting));
++    PS("disre-mixed",BOOL(ir->bDisreMixed));
++    PR("dr-fc",ir->dr_fc);
++    PR("dr-tau",ir->dr_tau);
 +    PR("nstdisreout",ir->nstdisreout);
-     PR("orires_fc",ir->orires_fc);
-     PR("orires_tau",ir->orires_tau);
++    PR("orires-fc",ir->orires_fc);
++    PR("orires-tau",ir->orires_tau);
 +    PR("nstorireout",ir->nstorireout);
 +
 +    PR("dihre-fc",ir->dihre_fc);
 +    
-     PR("em_stepsize",ir->em_stepsize);
-     PR("em_tol",ir->em_tol);
++    PR("em-stepsize",ir->em_stepsize);
++    PR("em-tol",ir->em_tol);
 +    PI("niter",ir->niter);
-     PR("fc_stepsize",ir->fc_stepsize);
++    PR("fc-stepsize",ir->fc_stepsize);
 +    PI("nstcgsteep",ir->nstcgsteep);
 +    PI("nbfgscorr",ir->nbfgscorr);
 +
 +    PS("ConstAlg",ECONSTRTYPE(ir->eConstrAlg));
-     PR("shake_tol",ir->shake_tol);
-     PI("lincs_order",ir->nProjOrder);
-     PR("lincs_warnangle",ir->LincsWarnAngle);
-     PI("lincs_iter",ir->nLincsIter);
-     PR("bd_fric",ir->bd_fric);
-     PI("ld_seed",ir->ld_seed);
-     PR("cos_accel",ir->cos_accel);
++    PR("shake-tol",ir->shake_tol);
++    PI("lincs-order",ir->nProjOrder);
++    PR("lincs-warnangle",ir->LincsWarnAngle);
++    PI("lincs-iter",ir->nLincsIter);
++    PR("bd-fric",ir->bd_fric);
++    PI("ld-seed",ir->ld_seed);
++    PR("cos-accel",ir->cos_accel);
 +    pr_matrix(fp,indent,"deform",ir->deform,bMDPformat);
 +    PI("userint1",ir->userint1);
 +    PI("userint2",ir->userint2);
 +    PI("userint3",ir->userint3);
 +    PI("userint4",ir->userint4);
 +    PR("userreal1",ir->userreal1);
 +    PR("userreal2",ir->userreal2);
 +    PR("userreal3",ir->userreal3);
 +    PR("userreal4",ir->userreal4);
 +    pr_grp_opts(fp,indent,"grpopts",&(ir->opts),bMDPformat);
 +    pr_cosine(fp,indent,"efield-x",&(ir->ex[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-xt",&(ir->et[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-y",&(ir->ex[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-yt",&(ir->et[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-z",&(ir->ex[ZZ]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-zt",&(ir->et[ZZ]),bMDPformat);
 +    PS("bQMMM",BOOL(ir->bQMMM));
 +    PI("QMconstraints",ir->QMconstraints);
 +    PI("QMMMscheme",ir->QMMMscheme);
 +    PR("scalefactor",ir->scalefactor);
-     pr_qm_opts(fp,indent,"qm_opts",&(ir->opts));
++    pr_qm_opts(fp,indent,"qm-opts",&(ir->opts));
 +  }
 +}
 +#undef PS
 +#undef PR
 +#undef PI
 +
 +static void pr_harm(FILE *fp,t_iparams *iparams,const char *r,const char *kr)
 +{
 +  fprintf(fp,"%sA=%12.5e, %sA=%12.5e, %sB=%12.5e, %sB=%12.5e\n",
 +	  r,iparams->harmonic.rA,kr,iparams->harmonic.krA,
 +	  r,iparams->harmonic.rB,kr,iparams->harmonic.krB);
 +}
 +
 +void pr_iparams(FILE *fp,t_functype ftype,t_iparams *iparams)
 +{
 +  int i;
 +  real VA[4],VB[4],*rbcA,*rbcB;
 +
 +  switch (ftype) {
 +  case F_ANGLES:
 +  case F_G96ANGLES:
 +    pr_harm(fp,iparams,"th","ct");
 +    break;
 +  case F_CROSS_BOND_BONDS:
 +    fprintf(fp,"r1e=%15.8e, r2e=%15.8e, krr=%15.8e\n",
 +	    iparams->cross_bb.r1e,iparams->cross_bb.r2e,
 +	    iparams->cross_bb.krr);
 +    break;
 +  case F_CROSS_BOND_ANGLES:
 +    fprintf(fp,"r1e=%15.8e, r1e=%15.8e, r3e=%15.8e, krt=%15.8e\n",
 +	    iparams->cross_ba.r1e,iparams->cross_ba.r2e,
 +	    iparams->cross_ba.r3e,iparams->cross_ba.krt);
 +    break;
 +  case F_UREY_BRADLEY:
 +    fprintf(fp,"theta=%15.8e, ktheta=%15.8e, r13=%15.8e, kUB=%15.8e\n",
 +	    iparams->u_b.theta,iparams->u_b.ktheta,iparams->u_b.r13,iparams->u_b.kUB);
 +    break;
 +  case F_QUARTIC_ANGLES:
 +    fprintf(fp,"theta=%15.8e",iparams->qangle.theta);
 +    for(i=0; i<5; i++)
 +      fprintf(fp,", c%c=%15.8e",'0'+i,iparams->qangle.c[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_BHAM:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +	    iparams->bham.a,iparams->bham.b,iparams->bham.c);
 +    break;
 +  case F_BONDS:
 +  case F_G96BONDS:
 +  case F_HARMONIC:
 +    pr_harm(fp,iparams,"b0","cb");
 +    break;
 +  case F_IDIHS:
 +    pr_harm(fp,iparams,"xi","cx");
 +    break;
 +  case F_MORSE:
 +    fprintf(fp,"b0=%15.8e, cb=%15.8e, beta=%15.8e\n",
 +	    iparams->morse.b0,iparams->morse.cb,iparams->morse.beta);
 +    break;
 +  case F_CUBICBONDS:
 +    fprintf(fp,"b0=%15.8e, kb=%15.8e, kcub=%15.8e\n",
 +	    iparams->cubic.b0,iparams->cubic.kb,iparams->cubic.kcub);
 +    break;
 +  case F_CONNBONDS:
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FENEBONDS:
 +    fprintf(fp,"bm=%15.8e, kb=%15.8e\n",iparams->fene.bm,iparams->fene.kb);
 +    break;
 +  case F_RESTRBONDS:
 +      fprintf(fp,"lowA=%15.8e, up1A=%15.8e, up2A=%15.8e, kA=%15.8e, lowB=%15.8e, up1B=%15.8e, up2B=%15.8e, kB=%15.8e,\n",
 +              iparams->restraint.lowA,iparams->restraint.up1A,
 +              iparams->restraint.up2A,iparams->restraint.kA,
 +              iparams->restraint.lowB,iparams->restraint.up1B,
 +              iparams->restraint.up2B,iparams->restraint.kB);
 +      break;
 +  case F_TABBONDS:
 +  case F_TABBONDSNC:
 +  case F_TABANGLES:
 +  case F_TABDIHS:
 +    fprintf(fp,"tab=%d, kA=%15.8e, kB=%15.8e\n",
 +	    iparams->tab.table,iparams->tab.kA,iparams->tab.kB);
 +    break;
 +  case F_POLARIZATION:
 +    fprintf(fp,"alpha=%15.8e\n",iparams->polarize.alpha);
 +    break;
 +  case F_THOLE_POL:
 +    fprintf(fp,"a=%15.8e, alpha1=%15.8e, alpha2=%15.8e, rfac=%15.8e\n",
 +	    iparams->thole.a,iparams->thole.alpha1,iparams->thole.alpha2,
 +	    iparams->thole.rfac);
 +    break;
 +  case F_WATER_POL:
 +    fprintf(fp,"al_x=%15.8e, al_y=%15.8e, al_z=%15.8e, rOH=%9.6f, rHH=%9.6f, rOD=%9.6f\n",
 +	    iparams->wpol.al_x,iparams->wpol.al_y,iparams->wpol.al_z,
 +	    iparams->wpol.rOH,iparams->wpol.rHH,iparams->wpol.rOD);
 +    break;
 +  case F_LJ:
 +    fprintf(fp,"c6=%15.8e, c12=%15.8e\n",iparams->lj.c6,iparams->lj.c12);
 +    break;
 +  case F_LJ14:
 +    fprintf(fp,"c6A=%15.8e, c12A=%15.8e, c6B=%15.8e, c12B=%15.8e\n",
 +	    iparams->lj14.c6A,iparams->lj14.c12A,
 +	    iparams->lj14.c6B,iparams->lj14.c12B);
 +    break;
 +  case F_LJC14_Q:
 +    fprintf(fp,"fqq=%15.8e, qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +	    iparams->ljc14.fqq,
 +	    iparams->ljc14.qi,iparams->ljc14.qj,
 +	    iparams->ljc14.c6,iparams->ljc14.c12);
 +    break;
 +  case F_LJC_PAIRS_NB:
 +    fprintf(fp,"qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +	    iparams->ljcnb.qi,iparams->ljcnb.qj,
 +	    iparams->ljcnb.c6,iparams->ljcnb.c12);
 +    break;
 +  case F_PDIHS:
 +  case F_PIDIHS:
 +  case F_ANGRES:
 +  case F_ANGRESZ:
 +    fprintf(fp,"phiA=%15.8e, cpA=%15.8e, phiB=%15.8e, cpB=%15.8e, mult=%d\n",
 +	    iparams->pdihs.phiA,iparams->pdihs.cpA,
 +	    iparams->pdihs.phiB,iparams->pdihs.cpB,
 +	    iparams->pdihs.mult);
 +    break;
 +  case F_DISRES:
 +    fprintf(fp,"label=%4d, type=%1d, low=%15.8e, up1=%15.8e, up2=%15.8e, fac=%15.8e)\n",
 +	    iparams->disres.label,iparams->disres.type,
 +	    iparams->disres.low,iparams->disres.up1,
 +	    iparams->disres.up2,iparams->disres.kfac);
 +    break;
 +  case F_ORIRES:
 +    fprintf(fp,"ex=%4d, label=%d, power=%4d, c=%15.8e, obs=%15.8e, kfac=%15.8e)\n",
 +	    iparams->orires.ex,iparams->orires.label,iparams->orires.power,
 +	    iparams->orires.c,iparams->orires.obs,iparams->orires.kfac);
 +    break;
 +  case F_DIHRES:
 +    fprintf(fp,"label=%d, power=%4d phi=%15.8e, dphi=%15.8e, kfac=%15.8e)\n",
 +	    iparams->dihres.label,iparams->dihres.power,
 +	    iparams->dihres.phi,iparams->dihres.dphi,iparams->dihres.kfac);
 +    break;
 +  case F_POSRES:
 +    fprintf(fp,"pos0A=(%15.8e,%15.8e,%15.8e), fcA=(%15.8e,%15.8e,%15.8e), pos0B=(%15.8e,%15.8e,%15.8e), fcB=(%15.8e,%15.8e,%15.8e)\n",
 +	    iparams->posres.pos0A[XX],iparams->posres.pos0A[YY],
 +	    iparams->posres.pos0A[ZZ],iparams->posres.fcA[XX],
 +	    iparams->posres.fcA[YY],iparams->posres.fcA[ZZ],
 +	    iparams->posres.pos0B[XX],iparams->posres.pos0B[YY],
 +	    iparams->posres.pos0B[ZZ],iparams->posres.fcB[XX],
 +	    iparams->posres.fcB[YY],iparams->posres.fcB[ZZ]);
 +    break;
 +  case F_RBDIHS:
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcA[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcB[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FOURDIHS:
 +    /* Use the OPLS -> Ryckaert-Bellemans formula backwards to get the
 +     * OPLS potential constants back.
 +     */
 +    rbcA = iparams->rbdihs.rbcA;
 +    rbcB = iparams->rbdihs.rbcB;
 +
 +    VA[3] = -0.25*rbcA[4];
 +    VA[2] = -0.5*rbcA[3];
 +    VA[1] = 4.0*VA[3]-rbcA[2];
 +    VA[0] = 3.0*VA[2]-2.0*rbcA[1];
 +
 +    VB[3] = -0.25*rbcB[4];
 +    VB[2] = -0.5*rbcB[3];
 +    VB[1] = 4.0*VB[3]-rbcB[2];
 +    VB[0] = 3.0*VB[2]-2.0*rbcB[1];
 +
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourA[%d]=%15.8e",i==0?"":", ",i,VA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourB[%d]=%15.8e",i==0?"":", ",i,VB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +   
 +  case F_CONSTR:
 +  case F_CONSTRNC:
 +    fprintf(fp,"dA=%15.8e, dB=%15.8e\n",iparams->constr.dA,iparams->constr.dB);
 +    break;
 +  case F_SETTLE:
 +    fprintf(fp,"doh=%15.8e, dhh=%15.8e\n",iparams->settle.doh,
 +	    iparams->settle.dhh);
 +    break;
 +  case F_VSITE2:
 +    fprintf(fp,"a=%15.8e\n",iparams->vsite.a);
 +    break;
 +  case F_VSITE3:
 +  case F_VSITE3FD:
 +  case F_VSITE3FAD:
 +    fprintf(fp,"a=%15.8e, b=%15.8e\n",iparams->vsite.a,iparams->vsite.b);
 +    break;
 +  case F_VSITE3OUT:
 +  case F_VSITE4FD:
 +  case F_VSITE4FDN:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +	    iparams->vsite.a,iparams->vsite.b,iparams->vsite.c);
 +    break;
 +  case F_VSITEN:
 +    fprintf(fp,"n=%2d, a=%15.8e\n",iparams->vsiten.n,iparams->vsiten.a);
 +    break;
 +  case F_GB12:
 +  case F_GB13:
 +  case F_GB14:
 +    fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n",iparams->gb.sar,iparams->gb.st,iparams->gb.pi,iparams->gb.gbr,iparams->gb.bmlt);
 +    break;		  
 +  case F_CMAP:
 +    fprintf(fp, "cmapA=%1d, cmapB=%1d\n",iparams->cmap.cmapA, iparams->cmap.cmapB);
 +    break;		  
 +  default:
 +    gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
 +	      ftype,interaction_function[ftype].name,__FILE__,__LINE__);
 +  }
 +}
 +
 +void pr_ilist(FILE *fp,int indent,const char *title,
 +              t_functype *functype,t_ilist *ilist, gmx_bool bShowNumbers)
 +{
 +    int i,j,k,type,ftype;
 +    t_iatom *iatoms;
 +    
 +    if (available(fp,ilist,indent,title) && ilist->nr > 0)
 +    {  
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"nr: %d\n",ilist->nr);
 +        if (ilist->nr > 0) {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"iatoms:\n");
 +            iatoms=ilist->iatoms;
 +            for (i=j=0; i<ilist->nr;) {
 +#ifndef DEBUG
 +                (void) pr_indent(fp,indent+INDENT);
 +                type=*(iatoms++);
 +                ftype=functype[type];
 +                (void) fprintf(fp,"%d type=%d (%s)",
 +                               bShowNumbers?j:-1,bShowNumbers?type:-1,
 +                               interaction_function[ftype].name);
 +                j++;
 +                for (k=0; k<interaction_function[ftype].nratoms; k++)
 +                    (void) fprintf(fp," %u",*(iatoms++));
 +                (void) fprintf(fp,"\n");
 +                i+=1+interaction_function[ftype].nratoms;
 +#else
 +                fprintf(fp,"%5d%5d\n",i,iatoms[i]);
 +                i++;
 +#endif
 +            }
 +        }
 +    }
 +}
 +
 +static void pr_cmap(FILE *fp, int indent, const char *title,
 +                    gmx_cmap_t *cmap_grid, gmx_bool bShowNumbers)
 +{
 +    int i,j,nelem;
 +    real dx,idx;
 +	
 +    dx    = 360.0 / cmap_grid->grid_spacing;
 +    nelem = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
 +	
 +    if(available(fp,cmap_grid,indent,title))
 +    {
 +        fprintf(fp,"%s\n",title);
 +		
 +        for(i=0;i<cmap_grid->ngrid;i++)
 +        {
 +            idx = -180.0;
 +            fprintf(fp,"%8s %8s %8s %8s\n","V","dVdx","dVdy","d2dV");
 +			
 +            fprintf(fp,"grid[%3d]={\n",bShowNumbers?i:-1);
 +			
 +            for(j=0;j<nelem;j++)
 +            {
 +                if( (j%cmap_grid->grid_spacing)==0)
 +                {
 +                    fprintf(fp,"%8.1f\n",idx);
 +                    idx+=dx;
 +                }
 +				
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+1]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+2]);
 +                fprintf(fp,"%8.3f\n",cmap_grid->cmapdata[i].cmap[j*4+3]);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +	
 +}
 +
 +void pr_ffparams(FILE *fp,int indent,const char *title,
 +                 gmx_ffparams_t *ffparams,
 +                 gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  indent=pr_title(fp,indent,title);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"atnr=%d\n",ffparams->atnr);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"ntypes=%d\n",ffparams->ntypes);
 +  for (i=0; i<ffparams->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                     bShowNumbers?i:-1,
 +                     interaction_function[ffparams->functype[i]].name);
 +      pr_iparams(fp,ffparams->functype[i],&ffparams->iparams[i]);
 +  }
 +  (void) pr_double(fp,indent,"reppow",ffparams->reppow);
 +  (void) pr_real(fp,indent,"fudgeQQ",ffparams->fudgeQQ);
 +  pr_cmap(fp,indent,"cmap",&ffparams->cmap_grid,bShowNumbers);
 +}
 +
 +void pr_idef(FILE *fp,int indent,const char *title,t_idef *idef, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  if (available(fp,idef,indent,title)) {  
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"atnr=%d\n",idef->atnr);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"ntypes=%d\n",idef->ntypes);
 +    for (i=0; i<idef->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +		     bShowNumbers?i:-1,
 +		     interaction_function[idef->functype[i]].name);
 +      pr_iparams(fp,idef->functype[i],&idef->iparams[i]);
 +    }
 +    (void) pr_real(fp,indent,"fudgeQQ",idef->fudgeQQ);
 +
 +    for(j=0; (j<F_NRE); j++)
 +      pr_ilist(fp,indent,interaction_function[j].longname,
 +               idef->functype,&idef->il[j],bShowNumbers);
 +  }
 +}
 +
 +static int pr_block_title(FILE *fp,int indent,const char *title,t_block *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +    }
 +  return indent;
 +}
 +
 +static int pr_blocka_title(FILE *fp,int indent,const char *title,t_blocka *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nra=%d\n",block->nra);
 +    }
 +  return indent;
 +}
 +
 +static void low_pr_block(FILE *fp,int indent,const char *title,t_block *block, gmx_bool bShowNumbers)
 +{
 +  int i;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_block_title(fp,indent,title,block);
 +      for (i=0; i<=block->nr; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->index[%d]=%u\n",
 +			 title,bShowNumbers?i:-1,block->index[i]);
 +        }
 +    }
 +}
 +
 +static void low_pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block, gmx_bool bShowNumbers)
 +{
 +  int i;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      for (i=0; i<=block->nr; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->index[%d]=%u\n",
 +			 title,bShowNumbers?i:-1,block->index[i]);
 +        }
 +      for (i=0; i<block->nra; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->a[%d]=%u\n",
 +			 title,bShowNumbers?i:-1,block->a[i]);
 +        }
 +    }
 +}
 +
 +void pr_block(FILE *fp,int indent,const char *title,t_block *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_block_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={}\n",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d]={%d..%d}\n",
 +			    title,bShowNumbers?i:-1,
 +			    bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            start=end;
 +          }
 +    }
 +}
 +
 +void pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d][%d..%d]={",
 +			    title,bShowNumbers?i:-1,
 +			    bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            for (j=start; j<end; j++)
 +              {
 +                if (j>start) size+=fprintf(fp,", ");
 +                if ((size)>(USE_WIDTH))
 +                  {
 +                    (void) fprintf(fp,"\n");
 +                    size=pr_indent(fp,indent+INDENT);
 +                  }
 +                size+=fprintf(fp,"%u",block->a[j]);
 +              }
 +            (void) fprintf(fp,"}\n");
 +            start=end;
 +          }
 +      if ((end!=block->nra)||(!ok)) 
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"tables inconsistent, dumping complete tables:\n");
 +          low_pr_blocka(fp,indent,title,block,bShowNumbers);
 +        }
 +    }
 +}
 +
 +static void pr_strings(FILE *fp,int indent,const char *title,char ***nm,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\"}\n",
 +			 title,bShowNumbers?i:-1,*(nm[i]));
 +        }
 +    }
 +}
 +
 +static void pr_strings2(FILE *fp,int indent,const char *title,
 +			char ***nm,char ***nmB,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\",nameB=\"%s\"}\n",
 +			 title,bShowNumbers?i:-1,*(nm[i]),*(nmB[i]));
 +        }
 +    }
 +}
 +
 +static void pr_resinfo(FILE *fp,int indent,const char *title,t_resinfo *resinfo,int n, gmx_bool bShowNumbers)
 +{
 +    int i;
 +    
 +    if (available(fp,resinfo,indent,title))
 +    {  
 +        indent=pr_title_n(fp,indent,title,n);
 +        for (i=0; i<n; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            (void) fprintf(fp,"%s[%d]={name=\"%s\", nr=%d, ic='%c'}\n",
 +                           title,bShowNumbers?i:-1,
 +                           *(resinfo[i].name),resinfo[i].nr,
 +                           (resinfo[i].ic == '\0') ? ' ' : resinfo[i].ic);
 +        }
 +    }
 +}
 +
 +static void pr_atom(FILE *fp,int indent,const char *title,t_atom *atom,int n)
 +{
 +  int i,j;
 +  
 +  if (available(fp,atom,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      fprintf(fp,"%s[%6d]={type=%3d, typeB=%3d, ptype=%8s, m=%12.5e, "
 +              "q=%12.5e, mB=%12.5e, qB=%12.5e, resind=%5d, atomnumber=%3d}\n",
 +              title,i,atom[i].type,atom[i].typeB,ptype_str[atom[i].ptype],
 +              atom[i].m,atom[i].q,atom[i].mB,atom[i].qB,
 +              atom[i].resind,atom[i].atomnumber);
 +    }
 +  }
 +}
 +
 +static void pr_grps(FILE *fp,int indent,const char *title,t_grps grps[],
 +		    char **grpname[], gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +
 +    for(i=0; (i<egcNR); i++)
 +    {
 +        fprintf(fp,"%s[%-12s] nr=%d, name=[",title,gtypes[i],grps[i].nr);
 +        for(j=0; (j<grps[i].nr); j++)
 +        {
 +            fprintf(fp," %s",*(grpname[grps[i].nm_ind[j]]));
 +        }
 +        fprintf(fp,"]\n");
 +    }
 +}
 +
 +static void pr_groups(FILE *fp,int indent,const char *title,
 +                      gmx_groups_t *groups,
 +                      gmx_bool bShowNumbers)
 +{
 +    int grpnr[egcNR];
 +    int nat_max,i,g;
 +
 +    pr_grps(fp,indent,"grp",groups->grps,groups->grpname,bShowNumbers);
 +    pr_strings(fp,indent,"grpname",groups->grpname,groups->ngrpname,bShowNumbers);
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"groups          ");
 +    for(g=0; g<egcNR; g++)
 +    {
 +       printf(" %5.5s",gtypes[g]);
 +    }
 +    printf("\n");
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"allocated       ");
 +    nat_max = 0;
 +    for(g=0; g<egcNR; g++)
 +    {
 +        printf(" %5d",groups->ngrpnr[g]);
 +        nat_max = max(nat_max,groups->ngrpnr[g]);
 +    }
 +    printf("\n");
 +
 +    if (nat_max == 0)
 +    {
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"groupnr[%5s] =","*");
 +        for(g=0; g<egcNR; g++)
 +        {
 +            fprintf(fp,"  %3d ",0);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +    else
 +    {
 +        for(i=0; i<nat_max; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"groupnr[%5d] =",i);
 +            for(g=0; g<egcNR; g++)
 +            {
 +                fprintf(fp,"  %3d ",
 +                        groups->grpnr[g] ? groups->grpnr[g][i] : 0);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +}
 +
 +void pr_atoms(FILE *fp,int indent,const char *title,t_atoms *atoms, 
 +	      gmx_bool bShownumbers)
 +{
 +  if (available(fp,atoms,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_atom(fp,indent,"atom",atoms->atom,atoms->nr);
 +      pr_strings(fp,indent,"atom",atoms->atomname,atoms->nr,bShownumbers);
 +      pr_strings2(fp,indent,"type",atoms->atomtype,atoms->atomtypeB,atoms->nr,bShownumbers);
 +      pr_resinfo(fp,indent,"residue",atoms->resinfo,atoms->nres,bShownumbers);
 +    }
 +}
 +
 +
 +void pr_atomtypes(FILE *fp,int indent,const char *title,t_atomtypes *atomtypes, 
 +		  gmx_bool bShowNumbers)
 +{
 +  int i;
 +  if (available(fp,atomtypes,indent,title)) 
 +  {
 +    indent=pr_title(fp,indent,title);
 +    for(i=0;i<atomtypes->nr;i++) {
 +      pr_indent(fp,indent);
 +		fprintf(fp,
 +				"atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n",
 +				bShowNumbers?i:-1,atomtypes->radius[i],atomtypes->vol[i],
 +				atomtypes->gb_radius[i],
 +				atomtypes->surftens[i],atomtypes->atomnumber[i],atomtypes->S_hct[i]);
 +    }
 +  }
 +}
 +
 +static void pr_moltype(FILE *fp,int indent,const char *title,
 +                       gmx_moltype_t *molt,int n,
 +                       gmx_ffparams_t *ffparams,
 +                       gmx_bool bShowNumbers)
 +{
 +    int j;
 +
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(molt->name));
 +    pr_atoms(fp,indent,"atoms",&(molt->atoms),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&molt->cgs, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&molt->excls, bShowNumbers);
 +    for(j=0; (j<F_NRE); j++) {
 +        pr_ilist(fp,indent,interaction_function[j].longname,
 +                 ffparams->functype,&molt->ilist[j],bShowNumbers);
 +    }
 +}
 +
 +static void pr_molblock(FILE *fp,int indent,const char *title,
 +                        gmx_molblock_t *molb,int n,
 +                        gmx_moltype_t *molt,
 +                        gmx_bool bShowNumbers)
 +{
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%-20s = %d \"%s\"\n",
 +                   "moltype",molb->type,*(molt[molb->type].name));
 +    pr_int(fp,indent,"#molecules",molb->nmol);
 +    pr_int(fp,indent,"#atoms_mol",molb->natoms_mol);
 +    pr_int(fp,indent,"#posres_xA",molb->nposres_xA);
 +    if (molb->nposres_xA > 0) {
 +        pr_rvecs(fp,indent,"posres_xA",molb->posres_xA,molb->nposres_xA);
 +    }
 +    pr_int(fp,indent,"#posres_xB",molb->nposres_xB);
 +    if (molb->nposres_xB > 0) {
 +        pr_rvecs(fp,indent,"posres_xB",molb->posres_xB,molb->nposres_xB);
 +    }
 +}
 +
 +void pr_mtop(FILE *fp,int indent,const char *title,gmx_mtop_t *mtop,
 +             gmx_bool bShowNumbers)
 +{
 +    int mt,mb;
 +
 +    if (available(fp,mtop,indent,title)) {
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        (void) fprintf(fp,"name=\"%s\"\n",*(mtop->name));
 +        pr_int(fp,indent,"#atoms",mtop->natoms);
 +        for(mb=0; mb<mtop->nmolblock; mb++) {
 +            pr_molblock(fp,indent,"molblock",&mtop->molblock[mb],mb,
 +                        mtop->moltype,bShowNumbers);
 +        }
 +        pr_ffparams(fp,indent,"ffparams",&(mtop->ffparams),bShowNumbers);
 +        pr_atomtypes(fp,indent,"atomtypes",&(mtop->atomtypes),bShowNumbers);
 +        for(mt=0; mt<mtop->nmoltype; mt++) {
 +            pr_moltype(fp,indent,"moltype",&mtop->moltype[mt],mt,
 +                       &mtop->ffparams,bShowNumbers);
 +        }
 +        pr_groups(fp,indent,"groups",&mtop->groups,bShowNumbers);
 +    }
 +}
 +
 +void pr_top(FILE *fp,int indent,const char *title,t_topology *top, gmx_bool bShowNumbers)
 +{
 +  if (available(fp,top,indent,title)) {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(top->name));
 +    pr_atoms(fp,indent,"atoms",&(top->atoms),bShowNumbers);
 +    pr_atomtypes(fp,indent,"atomtypes",&(top->atomtypes),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&top->cgs, bShowNumbers);
 +    pr_block(fp,indent,"mols",&top->mols, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&top->excls, bShowNumbers);
 +    pr_idef(fp,indent,"idef",&top->idef,bShowNumbers);
 +  }
 +}
 +
 +void pr_header(FILE *fp,int indent,const char *title,t_tpxheader *sh)
 +{
 +  char buf[22];
 +    
 +  if (available(fp,sh,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bIr    = %spresent\n",sh->bIr?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bBox   = %spresent\n",sh->bBox?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bTop   = %spresent\n",sh->bTop?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bX     = %spresent\n",sh->bX?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bV     = %spresent\n",sh->bV?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bF     = %spresent\n",sh->bF?"":"not ");
 +      
 +      pr_indent(fp,indent);
 +      fprintf(fp,"natoms = %d\n",sh->natoms);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"lambda = %e\n",sh->lambda);
 +    }
 +}
 +
 +void pr_commrec(FILE *fp,int indent,t_commrec *cr)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"commrec:\n");
 +  indent+=2;
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nodeid    = %d\n",cr->nodeid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nnodes    = %d\n",cr->nnodes);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"npmenodes = %d\n",cr->npmenodes);
 +  /*
 +  pr_indent(fp,indent);
 +  fprintf(fp,"threadid  = %d\n",cr->threadid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nthreads  = %d\n",cr->nthreads);
 +  */
 +}
diff --cc src/gromacs/gmxpreprocess/readir.c
index 3f280a3c22,0000000000..97f2078616
mode 100644,000000..100644
--- a/src/gromacs/gmxpreprocess/readir.c
+++ b/src/gromacs/gmxpreprocess/readir.c
@@@ -1,2571 -1,0 +1,2605 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <limits.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "index.h"
 +#include "symtab.h"
 +#include "string2.h"
 +#include "readinp.h"
 +#include "warninp.h"
 +#include "readir.h" 
 +#include "toputil.h"
 +#include "index.h"
 +#include "network.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "mtop_util.h"
 +#include "chargegroup.h"
 +#include "inputrec.h"
 +
 +#define MAXPTR 254
 +#define NOGID  255
 +
 +/* Resource parameters 
 + * Do not change any of these until you read the instruction
 + * in readinp.h. Some cpp's do not take spaces after the backslash
 + * (like the c-shell), which will give you a very weird compiler
 + * message.
 + */
 +
 +static char tcgrps[STRLEN],tau_t[STRLEN],ref_t[STRLEN],
 +  acc[STRLEN],accgrps[STRLEN],freeze[STRLEN],frdim[STRLEN],
 +  energy[STRLEN],user1[STRLEN],user2[STRLEN],vcm[STRLEN],xtc_grps[STRLEN],
 +  couple_moltype[STRLEN],orirefitgrp[STRLEN],egptable[STRLEN],egpexcl[STRLEN],
 +  wall_atomtype[STRLEN],wall_density[STRLEN],deform[STRLEN],QMMM[STRLEN];
 +static char foreign_lambda[STRLEN];
 +static char **pull_grp;
 +static char **rot_grp;
 +static char anneal[STRLEN],anneal_npoints[STRLEN],
 +  anneal_time[STRLEN],anneal_temp[STRLEN];
 +static char QMmethod[STRLEN],QMbasis[STRLEN],QMcharge[STRLEN],QMmult[STRLEN],
 +  bSH[STRLEN],CASorbitals[STRLEN], CASelectrons[STRLEN],SAon[STRLEN],
 +  SAoff[STRLEN],SAsteps[STRLEN],bTS[STRLEN],bOPT[STRLEN]; 
 +static char efield_x[STRLEN],efield_xt[STRLEN],efield_y[STRLEN],
 +  efield_yt[STRLEN],efield_z[STRLEN],efield_zt[STRLEN];
 +
 +enum {
 +    egrptpALL,         /* All particles have to be a member of a group.     */
 +    egrptpALL_GENREST, /* A rest group with name is generated for particles *
 +                        * that are not part of any group.                   */
 +    egrptpPART,        /* As egrptpALL_GENREST, but no name is generated    *
 +                        * for the rest group.                               */
 +    egrptpONE          /* Merge all selected groups into one group,         *
 +                        * make a rest group for the remaining particles.    */
 +};
 +
 +
 +void init_ir(t_inputrec *ir, t_gromppopts *opts)
 +{
 +  snew(opts->include,STRLEN); 
 +  snew(opts->define,STRLEN);
 +}
 +
 +static void _low_check(gmx_bool b,char *s,warninp_t wi)
 +{
 +    if (b)
 +    {
 +        warning_error(wi,s);
 +    }
 +}
 +
 +static void check_nst(const char *desc_nst,int nst,
 +                      const char *desc_p,int *p,
 +                      warninp_t wi)
 +{
 +    char buf[STRLEN];
 +
 +    if (*p > 0 && *p % nst != 0)
 +    {
 +        /* Round up to the next multiple of nst */
 +        *p = ((*p)/nst + 1)*nst;
 +        sprintf(buf,"%s should be a multiple of %s, changing %s to %d\n",
 +		desc_p,desc_nst,desc_p,*p);
 +        warning(wi,buf);
 +    }
 +}
 +
 +static gmx_bool ir_NVE(const t_inputrec *ir)
 +{
 +    return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO);
 +}
 +
 +static int lcd(int n1,int n2)
 +{
 +    int d,i;
 +    
 +    d = 1;
 +    for(i=2; (i<=n1 && i<=n2); i++)
 +    {
 +        if (n1 % i == 0 && n2 % i == 0)
 +        {
 +            d = i;
 +        }
 +    }
 +    
 +  return d;
 +}
 +
 +void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
 +              warninp_t wi)
 +/* Check internal consistency */
 +{
 +    /* Strange macro: first one fills the err_buf, and then one can check 
 +     * the condition, which will print the message and increase the error
 +     * counter.
 +     */
 +#define CHECK(b) _low_check(b,err_buf,wi)
 +    char err_buf[256],warn_buf[STRLEN];
 +    int  ns_type=0;
 +    real dt_pcoupl;
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  /* BASIC CUT-OFF STUFF */
 +  if (ir->rlist == 0 ||
 +      !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
 +        (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)    && ir->rvdw     > ir->rlist))) {
 +    /* No switched potential and/or no twin-range:
 +     * we can set the long-range cut-off to the maximum of the other cut-offs.
 +     */
 +    ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
 +  } else if (ir->rlistlong < 0) {
 +    ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
 +    sprintf(warn_buf,"rlistlong was not set, setting it to %g (no buffer)",
 +	    ir->rlistlong);
 +    warning(wi,warn_buf);
 +  }
 +  if (ir->rlistlong == 0 && ir->ePBC != epbcNONE) {
 +      warning_error(wi,"Can not have an infinite cut-off with PBC");
 +  }
 +  if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist)) {
 +      warning_error(wi,"rlistlong can not be shorter than rlist");
 +  }
 +  if (IR_TWINRANGE(*ir) && ir->nstlist <= 0) {
 +      warning_error(wi,"Can not have nstlist<=0 with twin-range interactions");
 +  }
 +
 +    /* GENERAL INTEGRATOR STUFF */
 +    if (!(ir->eI == eiMD || EI_VV(ir->eI)))
 +    {
 +        ir->etc = etcNO;
 +    }
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        ir->epc = epcNO;
 +    }
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        if (ir->nstcalcenergy < 0)
 +        {
 +            ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir);
 +            if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy)
 +            {
 +                /* nstcalcenergy larger than nstener does not make sense.
 +                 * We ideally want nstcalcenergy=nstener.
 +                 */
 +                if (ir->nstlist > 0)
 +                {
 +                    ir->nstcalcenergy = lcd(ir->nstenergy,ir->nstlist);
 +                }
 +                else
 +                {
 +                    ir->nstcalcenergy = ir->nstenergy;
 +                }
 +            }
 +        }
 +        if (ir->epc != epcNO)
 +        {
 +            if (ir->nstpcouple < 0)
 +            {
 +                ir->nstpcouple = ir_optimal_nstpcouple(ir);
 +            }
 +        }
 +        if (IR_TWINRANGE(*ir))
 +        {
 +            check_nst("nstlist",ir->nstlist,
 +                      "nstcalcenergy",&ir->nstcalcenergy,wi);
 +            if (ir->epc != epcNO)
 +            {
 +                check_nst("nstlist",ir->nstlist,
 +                          "nstpcouple",&ir->nstpcouple,wi); 
 +            }
 +        }
 +
 +        if (ir->nstcalcenergy > 1)
 +        {
 +            /* for storing exact averages nstenergy should be
 +             * a multiple of nstcalcenergy
 +             */
 +            check_nst("nstcalcenergy",ir->nstcalcenergy,
 +                      "nstenergy",&ir->nstenergy,wi);
 +            if (ir->efep != efepNO)
 +            {
 +                /* nstdhdl should be a multiple of nstcalcenergy */
 +                check_nst("nstcalcenergy",ir->nstcalcenergy,
 +                          "nstdhdl",&ir->nstdhdl,wi);
 +            }
 +        }
 +    }
 +
 +  /* LD STUFF */
 +  if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +      ir->bContinuation && ir->ld_seed != -1) {
 +      warning_note(wi,"You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +  }
 +
 +  /* TPI STUFF */
 +  if (EI_TPI(ir->eI)) {
 +    sprintf(err_buf,"TPI only works with pbc = %s",epbc_names[epbcXYZ]);
 +    CHECK(ir->ePBC != epbcXYZ);
 +    sprintf(err_buf,"TPI only works with ns = %s",ens_names[ensGRID]);
 +    CHECK(ir->ns_type != ensGRID);
 +    sprintf(err_buf,"with TPI nstlist should be larger than zero");
 +    CHECK(ir->nstlist <= 0);
 +    sprintf(err_buf,"TPI does not work with full electrostatics other than PME");
 +    CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
 +  }
 +
 +  /* SHAKE / LINCS */
 +  if ( (opts->nshake > 0) && (opts->bMorse) ) {
 +    sprintf(warn_buf,
 +	    "Using morse bond-potentials while constraining bonds is useless");
 +    warning(wi,warn_buf);
 +  }
 +  
 +  sprintf(err_buf,"shake_tol must be > 0 instead of %g while using shake",
 +	  ir->shake_tol);
 +  CHECK(((ir->shake_tol <= 0.0) && (opts->nshake>0) && 
 +	 (ir->eConstrAlg == econtSHAKE)));
 +     
 +  /* PBC/WALLS */
 +  sprintf(err_buf,"walls only work with pbc=%s",epbc_names[epbcXY]);
 +  CHECK(ir->nwall && ir->ePBC!=epbcXY);
 +
 +  /* VACUUM STUFF */
 +  if (ir->ePBC != epbcXYZ && ir->nwall != 2) {
 +    if (ir->ePBC == epbcNONE) {
 +      if (ir->epc != epcNO) {
 +          warning(wi,"Turning off pressure coupling for vacuum system");
 +          ir->epc = epcNO;
 +      }
 +    } else {
 +      sprintf(err_buf,"Can not have pressure coupling with pbc=%s",
 +	      epbc_names[ir->ePBC]);
 +      CHECK(ir->epc != epcNO);
 +    }
 +    sprintf(err_buf,"Can not have Ewald with pbc=%s",epbc_names[ir->ePBC]);
 +    CHECK(EEL_FULL(ir->coulombtype));
 +    
 +    sprintf(err_buf,"Can not have dispersion correction with pbc=%s",
 +	    epbc_names[ir->ePBC]);
 +    CHECK(ir->eDispCorr != edispcNO);
 +  }
 +
 +  if (ir->rlist == 0.0) {
 +    sprintf(err_buf,"can only have neighborlist cut-off zero (=infinite)\n"
 +	    "with coulombtype = %s or coulombtype = %s\n"
 +	    "without periodic boundary conditions (pbc = %s) and\n"
 +	    "rcoulomb and rvdw set to zero",
 +	    eel_names[eelCUT],eel_names[eelUSER],epbc_names[epbcNONE]);
 +    CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) ||
 +	  (ir->ePBC     != epbcNONE) || 
 +	  (ir->rcoulomb != 0.0)      || (ir->rvdw != 0.0));
 +
 +    if (ir->nstlist < 0) {
 +        warning_error(wi,"Can not have heuristic neighborlist updates without cut-off");
 +    }
 +    if (ir->nstlist > 0) {
 +        warning_note(wi,"Simulating without cut-offs is usually (slightly) faster with nstlist=0, nstype=simple and particle decomposition");
 +    }
 +  }
 +
 +  /* COMM STUFF */
 +  if (ir->nstcomm == 0) {
 +    ir->comm_mode = ecmNO;
 +  }
 +  if (ir->comm_mode != ecmNO) {
 +    if (ir->nstcomm < 0) {
 +        warning(wi,"If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value");
 +      ir->nstcomm = abs(ir->nstcomm);
 +    }
 +    
 +    if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy) {
 +        warning_note(wi,"nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy");
 +      ir->nstcomm = ir->nstcalcenergy;
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR) {
 +      sprintf(err_buf,"Can not remove the rotation around the center of mass with periodic molecules");
 +      CHECK(ir->bPeriodicMols);
 +      if (ir->ePBC != epbcNONE)
 +          warning(wi,"Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule).");
 +    }
 +  }
 +    
 +  if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR) {
 +      warning_note(wi,"Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR.");
 +  }
 +  
 +  sprintf(err_buf,"Free-energy not implemented for Ewald and PPPM");
 +  CHECK((ir->coulombtype==eelEWALD || ir->coulombtype==eelPPPM)
 +	&& (ir->efep!=efepNO));
 +  
 +  sprintf(err_buf,"Twin-range neighbour searching (NS) with simple NS"
 +	  " algorithm not implemented");
 +  CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist)) 
 +	&& (ir->ns_type == ensSIMPLE));
 +  
 +    /* TEMPERATURE COUPLING */
 +    if (ir->etc == etcYES)
 +    {
 +        ir->etc = etcBERENDSEN;
 +        warning_note(wi,"Old option for temperature coupling given: "
 +                     "changing \"yes\" to \"Berendsen\"\n");
 +    }
 +  
 +    if (ir->etc == etcNOSEHOOVER)
 +    {
 +        if (ir->opts.nhchainlength < 1) 
 +        {
 +            sprintf(warn_buf,"number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n",ir->opts.nhchainlength);
 +            ir->opts.nhchainlength =1;
 +            warning(wi,warn_buf);
 +        }
 +        
 +        if (ir->etc==etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1)
 +        {
 +            warning_note(wi,"leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1");
 +            ir->opts.nhchainlength = 1;
 +        }
 +    }
 +    else
 +    {
 +        ir->opts.nhchainlength = 0;
 +    }
 +
 +    if (ir->etc == etcBERENDSEN)
 +    {
 +        sprintf(warn_buf,"The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.",
 +                ETCOUPLTYPE(ir->etc),ETCOUPLTYPE(etcVRESCALE));
 +        warning_note(wi,warn_buf);
 +    }
 +
 +    if ((ir->etc==etcNOSEHOOVER || ir->etc==etcANDERSEN || ir->etc==etcANDERSENINTERVAL) 
 +        && ir->epc==epcBERENDSEN)
 +    {
 +        sprintf(warn_buf,"Using Berendsen pressure coupling invalidates the "
 +                "true ensemble for the thermostat");
 +        warning(wi,warn_buf);
 +    }
 +
 +    /* PRESSURE COUPLING */
 +    if (ir->epc == epcISOTROPIC)
 +    {
 +        ir->epc = epcBERENDSEN;
 +        warning_note(wi,"Old option for pressure coupling given: "
 +                     "changing \"Isotropic\" to \"Berendsen\"\n"); 
 +    }
 +
 +    if (ir->epc != epcNO)
 +    {
 +        dt_pcoupl = ir->nstpcouple*ir->delta_t;
 +
-         sprintf(err_buf,"tau_p must be > 0 instead of %g\n",ir->tau_p);
++        sprintf(err_buf,"tau-p must be > 0 instead of %g\n",ir->tau_p);
 +        CHECK(ir->tau_p <= 0);
 +        
 +        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
 +        {
-             sprintf(warn_buf,"For proper integration of the %s barostat, tau_p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
++            sprintf(warn_buf,"For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
 +                    EPCOUPLTYPE(ir->epc),ir->tau_p,pcouple_min_integration_steps(ir->epc),dt_pcoupl);
 +            warning(wi,warn_buf);
 +        }	
 +        
 +        sprintf(err_buf,"compressibility must be > 0 when using pressure" 
 +                " coupling %s\n",EPCOUPLTYPE(ir->epc));
 +        CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 || 
 +              ir->compress[ZZ][ZZ] < 0 || 
 +              (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 &&
 +               ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0));
 +        
 +        sprintf(err_buf,"pressure coupling with PPPM not implemented, use PME");
 +        CHECK(ir->coulombtype == eelPPPM);
 +        
 +    }
 +    else if (ir->coulombtype == eelPPPM)
 +    {
 +        sprintf(warn_buf,"The pressure with PPPM is incorrect, if you need the pressure use PME");
 +        warning(wi,warn_buf);
 +    }
 +    
 +    if (EI_VV(ir->eI))
 +    {
 +        if (ir->epc > epcNO)
 +        {
 +            if (ir->epc!=epcMTTK)
 +            {
 +                warning_error(wi,"NPT only defined for vv using Martyna-Tuckerman-Tobias-Klein equations");	      
 +            }
 +        }
 +    }
 +
 +  /* ELECTROSTATICS */
 +  /* More checks are in triple check (grompp.c) */
 +    if (ir->coulombtype == eelPPPM)
 +    {
 +        warning_error(wi,"PPPM is not functional in the current version, we plan to implement PPPM through a small modification of the PME code");
 +    }
 +
 +  if (ir->coulombtype == eelSWITCH) {
 +    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious artifacts, advice: use coulombtype = %s",
 +	    eel_names[ir->coulombtype],
 +	    eel_names[eelRF_ZERO]);
 +    warning(wi,warn_buf);
 +  }
 +
 +  if (ir->epsilon_r!=1 && ir->implicit_solvent==eisGBSA) {
-     sprintf(warn_buf,"epsilon_r = %g with GB implicit solvent, will use this value for inner dielectric",ir->epsilon_r);
++    sprintf(warn_buf,"epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric",ir->epsilon_r);
 +    warning_note(wi,warn_buf);
 +  }
 +
 +  if (EEL_RF(ir->coulombtype) && ir->epsilon_rf==1 && ir->epsilon_r!=1) {
-     sprintf(warn_buf,"epsilon_r = %g and epsilon_rf = 1 with reaction field, assuming old format and exchanging epsilon_r and epsilon_rf",ir->epsilon_r);
++    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
 +    warning(wi,warn_buf);
 +    ir->epsilon_rf = ir->epsilon_r;
 +    ir->epsilon_r  = 1.0;
 +  }
 +
 +  if (getenv("GALACTIC_DYNAMICS") == NULL) {  
-     sprintf(err_buf,"epsilon_r must be >= 0 instead of %g\n",ir->epsilon_r);
++    sprintf(err_buf,"epsilon-r must be >= 0 instead of %g\n",ir->epsilon_r);
 +    CHECK(ir->epsilon_r < 0);
 +  }
 +  
 +  if (EEL_RF(ir->coulombtype)) {
 +    /* reaction field (at the cut-off) */
 +    
 +    if (ir->coulombtype == eelRF_ZERO) {
-        sprintf(err_buf,"With coulombtype = %s, epsilon_rf must be 0",
++       sprintf(err_buf,"With coulombtype = %s, epsilon-rf must be 0",
 +	       eel_names[ir->coulombtype]);
 +      CHECK(ir->epsilon_rf != 0);
 +    }
 +
-     sprintf(err_buf,"epsilon_rf must be >= epsilon_r");
++    sprintf(err_buf,"epsilon-rf must be >= epsilon-r");
 +    CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) ||
 +	  (ir->epsilon_r == 0));
 +    if (ir->epsilon_rf == ir->epsilon_r) {
-       sprintf(warn_buf,"Using epsilon_rf = epsilon_r with %s does not make sense",
++      sprintf(warn_buf,"Using epsilon-rf = epsilon-r with %s does not make sense",
 +	      eel_names[ir->coulombtype]);
 +      warning(wi,warn_buf);
 +    }
 +  }
 +  /* Allow rlist>rcoulomb for tabulated long range stuff. This just
 +   * means the interaction is zero outside rcoulomb, but it helps to
 +   * provide accurate energy conservation.
 +   */
 +  if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype)) {
 +    if (EEL_SWITCHED(ir->coulombtype)) {
 +      sprintf(err_buf,
 +	      "With coulombtype = %s rcoulomb_switch must be < rcoulomb",
 +	      eel_names[ir->coulombtype]);
 +      CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
 +    }
 +  } else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) {
 +    sprintf(err_buf,"With coulombtype = %s, rcoulomb must be >= rlist",
 +	    eel_names[ir->coulombtype]);
 +    CHECK(ir->rlist > ir->rcoulomb);
 +  }
 +
 +  if (EEL_FULL(ir->coulombtype)) {
 +    if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
 +        ir->coulombtype==eelPMEUSERSWITCH) {
 +      sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
 +	      eel_names[ir->coulombtype]);
 +      CHECK(ir->rcoulomb > ir->rlist);
 +    } else {
 +      if (ir->coulombtype == eelPME) {
 +	sprintf(err_buf,
 +		"With coulombtype = %s, rcoulomb must be equal to rlist\n"
 +		"If you want optimal energy conservation or exact integration use %s",
 +		eel_names[ir->coulombtype],eel_names[eelPMESWITCH]);
 +      } else { 
 +	sprintf(err_buf,
 +		"With coulombtype = %s, rcoulomb must be equal to rlist",
 +		eel_names[ir->coulombtype]);
 +      }
 +      CHECK(ir->rcoulomb != ir->rlist);
 +    }
 +  }
 +
 +  if (EEL_PME(ir->coulombtype)) {
 +    if (ir->pme_order < 3) {
-         warning_error(wi,"pme_order can not be smaller than 3");
++        warning_error(wi,"pme-order can not be smaller than 3");
 +    }
 +  }
 +
 +  if (ir->nwall==2 && EEL_FULL(ir->coulombtype)) {
 +    if (ir->ewald_geometry == eewg3D) {
-       sprintf(warn_buf,"With pbc=%s you should use ewald_geometry=%s",
++      sprintf(warn_buf,"With pbc=%s you should use ewald-geometry=%s",
 +	      epbc_names[ir->ePBC],eewg_names[eewg3DC]);
 +      warning(wi,warn_buf);
 +    }
 +    /* This check avoids extra pbc coding for exclusion corrections */
-     sprintf(err_buf,"wall_ewald_zfac should be >= 2");
++    sprintf(err_buf,"wall-ewald-zfac should be >= 2");
 +    CHECK(ir->wall_ewald_zfac < 2);
 +  }
 +
 +  if (EVDW_SWITCHED(ir->vdwtype)) {
-     sprintf(err_buf,"With vdwtype = %s rvdw_switch must be < rvdw",
++    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw",
 +	    evdw_names[ir->vdwtype]);
 +    CHECK(ir->rvdw_switch >= ir->rvdw);
 +  } else if (ir->vdwtype == evdwCUT) {
 +    sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist",evdw_names[ir->vdwtype]);
 +    CHECK(ir->rlist > ir->rvdw);
 +  }
 +  if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype)
 +      && (ir->rlistlong <= ir->rcoulomb)) {
 +    sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
 +	    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +    warning_note(wi,warn_buf);
 +  }
 +  if (EVDW_SWITCHED(ir->vdwtype) && (ir->rlistlong <= ir->rvdw)) {
 +    sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
 +	    IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +    warning_note(wi,warn_buf);
 +  }
 +
 +  if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO) {
 +      warning_note(wi,"You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
 +  }
 +
 +  if (ir->nstlist == -1) {
 +    sprintf(err_buf,
 +	    "nstlist=-1 only works with switched or shifted potentials,\n"
 +	    "suggestion: use vdw-type=%s and coulomb-type=%s",
 +	    evdw_names[evdwSHIFT],eel_names[eelPMESWITCH]);
 +    CHECK(!(EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) &&
 +            EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)));
 +
 +    sprintf(err_buf,"With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
 +    CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
 +  }
 +  sprintf(err_buf,"nstlist can not be smaller than -1");
 +  CHECK(ir->nstlist < -1);
 +
 +  if (ir->eI == eiLBFGS && (ir->coulombtype==eelCUT || ir->vdwtype==evdwCUT)
 +     && ir->rvdw != 0) {
 +    warning(wi,"For efficient BFGS minimization, use switch/shift/pme instead of cut-off.");
 +  }
 +
 +  if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0) {
 +    warning(wi,"Using L-BFGS with nbfgscorr<=0 just gets you steepest descent.");
 +  }
 +
 +  /* FREE ENERGY */
 +  if (ir->efep != efepNO) {
 +    sprintf(err_buf,"The soft-core power is %d and can only be 1 or 2",
 +	    ir->sc_power);
 +    CHECK(ir->sc_alpha!=0 && ir->sc_power!=1 && ir->sc_power!=2);
 +  }
 +
 +    /* ENERGY CONSERVATION */
 +    if (ir_NVE(ir))
 +    {
 +        if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0)
 +        {
 +            sprintf(warn_buf,"You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
 +                    evdw_names[evdwSHIFT]);
 +            warning_note(wi,warn_buf);
 +        }
 +        if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0)
 +        {
 +            sprintf(warn_buf,"You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
 +                    eel_names[eelPMESWITCH],eel_names[eelRF_ZERO]);
 +            warning_note(wi,warn_buf);
 +        }
 +    }
 +  
 +  /* IMPLICIT SOLVENT */
 +  if(ir->coulombtype==eelGB_NOTUSED)
 +  {
 +    ir->coulombtype=eelCUT;
 +    ir->implicit_solvent=eisGBSA;
 +    fprintf(stderr,"Note: Old option for generalized born electrostatics given:\n"
 +	    "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n"
- 	    "setting implicit_solvent value to \"GBSA\" in input section.\n");
++            "setting implicit-solvent value to \"GBSA\" in input section.\n");
 +  }
 +
 +  if(ir->sa_algorithm==esaSTILL)
 +  {
 +    sprintf(err_buf,"Still SA algorithm not available yet, use %s or %s instead\n",esa_names[esaAPPROX],esa_names[esaNO]);
 +    CHECK(ir->sa_algorithm == esaSTILL);
 +  }
 +  
 +  if(ir->implicit_solvent==eisGBSA)
 +  {
 +    sprintf(err_buf,"With GBSA implicit solvent, rgbradii must be equal to rlist.");
 +    CHECK(ir->rgbradii != ir->rlist);
 +	  
 +    if(ir->coulombtype!=eelCUT)
 +	  {
 +		  sprintf(err_buf,"With GBSA, coulombtype must be equal to %s\n",eel_names[eelCUT]);
 +		  CHECK(ir->coulombtype!=eelCUT);
 +	  }
 +	  if(ir->vdwtype!=evdwCUT)
 +	  {
 +		  sprintf(err_buf,"With GBSA, vdw-type must be equal to %s\n",evdw_names[evdwCUT]);
 +		  CHECK(ir->vdwtype!=evdwCUT);
 +	  }
 +    if(ir->nstgbradii<1)
 +    {
 +      sprintf(warn_buf,"Using GBSA with nstgbradii<1, setting nstgbradii=1");
 +      warning_note(wi,warn_buf);
 +      ir->nstgbradii=1;
 +    }
 +    if(ir->sa_algorithm==esaNO)
 +    {
 +      sprintf(warn_buf,"No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
 +      warning_note(wi,warn_buf);
 +    }
 +    if(ir->sa_surface_tension<0 && ir->sa_algorithm!=esaNO)
 +    {
 +      sprintf(warn_buf,"Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
 +      warning_note(wi,warn_buf);
 +      
 +      if(ir->gb_algorithm==egbSTILL)
 +      {
 +        ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
 +      }
 +      else
 +      {
 +        ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
 +      }
 +    }
 +    if(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO)
 +    {
 +      sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
 +      CHECK(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO);
 +    }
 +    
 +  }
 +}
 +
 +static int str_nelem(const char *str,int maxptr,char *ptr[])
 +{
 +  int  np=0;
 +  char *copy0,*copy;
 +  
 +  copy0=strdup(str); 
 +  copy=copy0;
 +  ltrim(copy);
 +  while (*copy != '\0') {
 +    if (np >= maxptr)
 +      gmx_fatal(FARGS,"Too many groups on line: '%s' (max is %d)",
 +		  str,maxptr);
 +    if (ptr) 
 +      ptr[np]=copy;
 +    np++;
 +    while ((*copy != '\0') && !isspace(*copy))
 +      copy++;
 +    if (*copy != '\0') {
 +      *copy='\0';
 +      copy++;
 +    }
 +    ltrim(copy);
 +  }
 +  if (ptr == NULL)
 +    sfree(copy0);
 +
 +  return np;
 +}
 +
 +static void parse_n_double(char *str,int *n,double **r)
 +{
 +  char *ptr[MAXPTR];
 +  int  i;
 +
 +  *n = str_nelem(str,MAXPTR,ptr);
 +
 +  snew(*r,*n);
 +  for(i=0; i<*n; i++) {
 +    (*r)[i] = strtod(ptr[i],NULL);
 +  }
 +}
 +
 +static void do_wall_params(t_inputrec *ir,
 +                           char *wall_atomtype, char *wall_density,
 +                           t_gromppopts *opts)
 +{
 +    int  nstr,i;
 +    char *names[MAXPTR];
 +    double dbl;
 +
 +    opts->wall_atomtype[0] = NULL;
 +    opts->wall_atomtype[1] = NULL;
 +
 +    ir->wall_atomtype[0] = -1;
 +    ir->wall_atomtype[1] = -1;
 +    ir->wall_density[0] = 0;
 +    ir->wall_density[1] = 0;
 +  
 +    if (ir->nwall > 0)
 +    {
 +        nstr = str_nelem(wall_atomtype,MAXPTR,names);
 +        if (nstr != ir->nwall)
 +        {
 +            gmx_fatal(FARGS,"Expected %d elements for wall_atomtype, found %d",
 +                      ir->nwall,nstr);
 +        }
 +        for(i=0; i<ir->nwall; i++)
 +        {
 +            opts->wall_atomtype[i] = strdup(names[i]);
 +        }
 +    
 +        if (ir->wall_type == ewt93 || ir->wall_type == ewt104) {
 +            nstr = str_nelem(wall_density,MAXPTR,names);
 +            if (nstr != ir->nwall)
 +            {
-                 gmx_fatal(FARGS,"Expected %d elements for wall_density, found %d",ir->nwall,nstr);
++                gmx_fatal(FARGS,"Expected %d elements for wall-density, found %d",ir->nwall,nstr);
 +            }
 +            for(i=0; i<ir->nwall; i++)
 +            {
 +                sscanf(names[i],"%lf",&dbl);
 +                if (dbl <= 0)
 +                {
-                     gmx_fatal(FARGS,"wall_density[%d] = %f\n",i,dbl);
++                    gmx_fatal(FARGS,"wall-density[%d] = %f\n",i,dbl);
 +                }
 +                ir->wall_density[i] = dbl;
 +            }
 +        }
 +    }
 +}
 +
 +static void add_wall_energrps(gmx_groups_t *groups,int nwall,t_symtab *symtab)
 +{
 +  int  i;
 +  t_grps *grps;
 +  char str[STRLEN];
 +  
 +  if (nwall > 0) {
 +    srenew(groups->grpname,groups->ngrpname+nwall);
 +    grps = &(groups->grps[egcENER]);
 +    srenew(grps->nm_ind,grps->nr+nwall);
 +    for(i=0; i<nwall; i++) {
 +      sprintf(str,"wall%d",i);
 +      groups->grpname[groups->ngrpname] = put_symtab(symtab,str);
 +      grps->nm_ind[grps->nr++] = groups->ngrpname++;
 +    }
 +  }
 +}
 +
 +void get_ir(const char *mdparin,const char *mdparout,
 +            t_inputrec *ir,t_gromppopts *opts,
 +            warninp_t wi)
 +{
 +  char      *dumstr[2];
 +  double    dumdub[2][6];
 +  t_inpfile *inp;
 +  const char *tmp;
 +  int       i,j,m,ninp;
 +  char      warn_buf[STRLEN];
 +  
 +  inp = read_inpfile(mdparin, &ninp, NULL, wi);
 +
 +  snew(dumstr[0],STRLEN);
 +  snew(dumstr[1],STRLEN);
 +
 +  REM_TYPE("title");
 +  REM_TYPE("cpp");
 +  REM_TYPE("domain-decomposition");
 +  REPL_TYPE("unconstrained-start","continuation");
 +  REM_TYPE("dihre-tau");
 +  REM_TYPE("nstdihreout");
 +  REM_TYPE("nstcheckpoint");
 +
 +  CCTYPE ("VARIOUS PREPROCESSING OPTIONS");
 +  CTYPE ("Preprocessor information: use cpp syntax.");
 +  CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe");
 +  STYPE ("include",	opts->include,	NULL);
 +  CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)");
 +  STYPE ("define",	opts->define,	NULL);
 +    
 +  CCTYPE ("RUN CONTROL PARAMETERS");
 +  EETYPE("integrator",  ir->eI,         ei_names);
 +  CTYPE ("Start time and timestep in ps");
 +  RTYPE ("tinit",	ir->init_t,	0.0);
 +  RTYPE ("dt",		ir->delta_t,	0.001);
 +  STEPTYPE ("nsteps",   ir->nsteps,     0);
 +  CTYPE ("For exact run continuation or redoing part of a run");
-   STEPTYPE ("init_step",ir->init_step,  0);
++  STEPTYPE ("init-step",ir->init_step,  0);
 +  CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)");
-   ITYPE ("simulation_part", ir->simulation_part, 1);
++  ITYPE ("simulation-part", ir->simulation_part, 1);
 +  CTYPE ("mode for center of mass motion removal");
 +  EETYPE("comm-mode",   ir->comm_mode,  ecm_names);
 +  CTYPE ("number of steps for center of mass motion removal");
 +  ITYPE ("nstcomm",	ir->nstcomm,	10);
 +  CTYPE ("group(s) for center of mass motion removal");
 +  STYPE ("comm-grps",   vcm,            NULL);
 +  
 +  CCTYPE ("LANGEVIN DYNAMICS OPTIONS");
 +  CTYPE ("Friction coefficient (amu/ps) and random seed");
 +  RTYPE ("bd-fric",     ir->bd_fric,    0.0);
 +  ITYPE ("ld-seed",     ir->ld_seed,    1993);
 +  
 +  /* Em stuff */
 +  CCTYPE ("ENERGY MINIMIZATION OPTIONS");
 +  CTYPE ("Force tolerance and initial step-size");
 +  RTYPE ("emtol",       ir->em_tol,     10.0);
 +  RTYPE ("emstep",      ir->em_stepsize,0.01);
-   CTYPE ("Max number of iterations in relax_shells");
++  CTYPE ("Max number of iterations in relax-shells");
 +  ITYPE ("niter",       ir->niter,      20);
 +  CTYPE ("Step size (ps^2) for minimization of flexible constraints");
 +  RTYPE ("fcstep",      ir->fc_stepsize, 0);
 +  CTYPE ("Frequency of steepest descents steps when doing CG");
 +  ITYPE ("nstcgsteep",	ir->nstcgsteep,	1000);
 +  ITYPE ("nbfgscorr",   ir->nbfgscorr,  10); 
 +
 +  CCTYPE ("TEST PARTICLE INSERTION OPTIONS");
 +  RTYPE ("rtpi",	ir->rtpi,	0.05);
 +
 +  /* Output options */
 +  CCTYPE ("OUTPUT CONTROL OPTIONS");
 +  CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)");
 +  ITYPE ("nstxout",	ir->nstxout,	100);
 +  ITYPE ("nstvout",	ir->nstvout,	100);
 +  ITYPE ("nstfout",	ir->nstfout,	0);
 +  ir->nstcheckpoint = 1000;
 +  CTYPE ("Output frequency for energies to log file and energy file");
 +  ITYPE ("nstlog",	ir->nstlog,	100);
 +  ITYPE ("nstcalcenergy",ir->nstcalcenergy,	-1);
 +  ITYPE ("nstenergy",   ir->nstenergy,  100);
 +  CTYPE ("Output frequency and precision for .xtc file");
 +  ITYPE ("nstxtcout",   ir->nstxtcout,  0);
 +  RTYPE ("xtc-precision",ir->xtcprec,   1000.0);
 +  CTYPE ("This selects the subset of atoms for the .xtc file. You can");
 +  CTYPE ("select multiple groups. By default all atoms will be written.");
 +  STYPE ("xtc-grps",    xtc_grps,       NULL);
 +  CTYPE ("Selection of energy groups");
 +  STYPE ("energygrps",  energy,         NULL);
 +
 +  /* Neighbor searching */  
 +  CCTYPE ("NEIGHBORSEARCHING PARAMETERS");
 +  CTYPE ("nblist update frequency");
 +  ITYPE ("nstlist",	ir->nstlist,	10);
 +  CTYPE ("ns algorithm (simple or grid)");
 +  EETYPE("ns-type",     ir->ns_type,    ens_names);
 +  /* set ndelta to the optimal value of 2 */
 +  ir->ndelta = 2;
 +  CTYPE ("Periodic boundary conditions: xyz, no, xy");
 +  EETYPE("pbc",         ir->ePBC,       epbc_names);
-   EETYPE("periodic_molecules", ir->bPeriodicMols, yesno_names);
++  EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names);
 +  CTYPE ("nblist cut-off");
 +  RTYPE ("rlist",	ir->rlist,	1.0);
 +  CTYPE ("long-range cut-off for switched potentials");
 +  RTYPE ("rlistlong",	ir->rlistlong,	-1);
 +
 +  /* Electrostatics */
 +  CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
 +  CTYPE ("Method for doing electrostatics");
 +  EETYPE("coulombtype",	ir->coulombtype,    eel_names);
 +  CTYPE ("cut-off lengths");
 +  RTYPE ("rcoulomb-switch",	ir->rcoulomb_switch,	0.0);
 +  RTYPE ("rcoulomb",	ir->rcoulomb,	1.0);
 +  CTYPE ("Relative dielectric constant for the medium and the reaction field");
-   RTYPE ("epsilon_r",   ir->epsilon_r,  1.0);
-   RTYPE ("epsilon_rf",  ir->epsilon_rf, 1.0);
++  RTYPE ("epsilon-r",   ir->epsilon_r,  1.0);
++  RTYPE ("epsilon-rf",  ir->epsilon_rf, 1.0);
 +  CTYPE ("Method for doing Van der Waals");
 +  EETYPE("vdw-type",	ir->vdwtype,    evdw_names);
 +  CTYPE ("cut-off lengths");
 +  RTYPE ("rvdw-switch",	ir->rvdw_switch,	0.0);
 +  RTYPE ("rvdw",	ir->rvdw,	1.0);
 +  CTYPE ("Apply long range dispersion corrections for Energy and Pressure");
 +  EETYPE("DispCorr",    ir->eDispCorr,  edispc_names);
 +  CTYPE ("Extension of the potential lookup tables beyond the cut-off");
 +  RTYPE ("table-extension", ir->tabext, 1.0);
 +  CTYPE ("Seperate tables between energy group pairs");
-   STYPE ("energygrp_table", egptable,   NULL);
++  STYPE ("energygrp-table", egptable,   NULL);
 +  CTYPE ("Spacing for the PME/PPPM FFT grid");
 +  RTYPE ("fourierspacing", opts->fourierspacing,0.12);
 +  CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used");
-   ITYPE ("fourier_nx",  ir->nkx,         0);
-   ITYPE ("fourier_ny",  ir->nky,         0);
-   ITYPE ("fourier_nz",  ir->nkz,         0);
++  ITYPE ("fourier-nx",  ir->nkx,         0);
++  ITYPE ("fourier-ny",  ir->nky,         0);
++  ITYPE ("fourier-nz",  ir->nkz,         0);
 +  CTYPE ("EWALD/PME/PPPM parameters");
-   ITYPE ("pme_order",   ir->pme_order,   4);
-   RTYPE ("ewald_rtol",  ir->ewald_rtol, 0.00001);
-   EETYPE("ewald_geometry", ir->ewald_geometry, eewg_names);
-   RTYPE ("epsilon_surface", ir->epsilon_surface, 0.0);
-   EETYPE("optimize_fft",ir->bOptFFT,  yesno_names);
++  ITYPE ("pme-order",   ir->pme_order,   4);
++  RTYPE ("ewald-rtol",  ir->ewald_rtol, 0.00001);
++  EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
++  RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
++  EETYPE("optimize-fft",ir->bOptFFT,  yesno_names);
 +
 +  CCTYPE("IMPLICIT SOLVENT ALGORITHM");
-   EETYPE("implicit_solvent", ir->implicit_solvent, eis_names);
++  EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
 +	
 +  CCTYPE ("GENERALIZED BORN ELECTROSTATICS"); 
 +  CTYPE ("Algorithm for calculating Born radii");
-   EETYPE("gb_algorithm", ir->gb_algorithm, egb_names);
++  EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
 +  CTYPE ("Frequency of calculating the Born radii inside rlist");
 +  ITYPE ("nstgbradii", ir->nstgbradii, 1);
 +  CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
 +  CTYPE ("between rlist and rgbradii is updated every nstlist steps");
 +  RTYPE ("rgbradii",  ir->rgbradii, 1.0);
 +  CTYPE ("Dielectric coefficient of the implicit solvent");
-   RTYPE ("gb_epsilon_solvent",ir->gb_epsilon_solvent, 80.0);	
++  RTYPE ("gb-epsilon-solvent",ir->gb_epsilon_solvent, 80.0);
 +  CTYPE ("Salt concentration in M for Generalized Born models");
-   RTYPE ("gb_saltconc",  ir->gb_saltconc, 0.0); 
++  RTYPE ("gb-saltconc",  ir->gb_saltconc, 0.0);
 +  CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
-   RTYPE ("gb_obc_alpha", ir->gb_obc_alpha, 1.0);
-   RTYPE ("gb_obc_beta", ir->gb_obc_beta, 0.8);
-   RTYPE ("gb_obc_gamma", ir->gb_obc_gamma, 4.85);	
-   RTYPE ("gb_dielectric_offset", ir->gb_dielectric_offset, 0.009);
-   EETYPE("sa_algorithm", ir->sa_algorithm, esa_names);
++  RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
++  RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
++  RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
++  RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
++  EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
 +  CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
 +  CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
-   RTYPE ("sa_surface_tension", ir->sa_surface_tension, -1);
++  RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
 +		 
 +  /* Coupling stuff */
 +  CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
 +  CTYPE ("Temperature coupling");
 +  EETYPE("tcoupl",	ir->etc,        etcoupl_names);
 +  ITYPE ("nsttcouple", ir->nsttcouple,  -1);
 +  ITYPE("nh-chain-length",     ir->opts.nhchainlength, NHCHAINLENGTH);
 +  CTYPE ("Groups to couple separately");
 +  STYPE ("tc-grps",     tcgrps,         NULL);
 +  CTYPE ("Time constant (ps) and reference temperature (K)");
 +  STYPE ("tau-t",	tau_t,		NULL);
 +  STYPE ("ref-t",	ref_t,		NULL);
-   CTYPE ("Pressure coupling");
-   EETYPE("Pcoupl",	ir->epc,        epcoupl_names);
-   EETYPE("Pcoupltype",	ir->epct,       epcoupltype_names);
++  CTYPE ("pressure coupling");
++  EETYPE("pcoupl",	ir->epc,        epcoupl_names);
++  EETYPE("pcoupltype",	ir->epct,       epcoupltype_names);
 +  ITYPE ("nstpcouple", ir->nstpcouple,  -1);
 +  CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)");
 +  RTYPE ("tau-p",	ir->tau_p,	1.0);
 +  STYPE ("compressibility",	dumstr[0],	NULL);
 +  STYPE ("ref-p",       dumstr[1],      NULL);
 +  CTYPE ("Scaling of reference coordinates, No, All or COM");
-   EETYPE ("refcoord_scaling",ir->refcoord_scaling,erefscaling_names);
++  EETYPE ("refcoord-scaling",ir->refcoord_scaling,erefscaling_names);
 +
 +  CTYPE ("Random seed for Andersen thermostat");
-   ITYPE ("andersen_seed", ir->andersen_seed, 815131);
++  ITYPE ("andersen-seed", ir->andersen_seed, 815131);
 +
 +  /* QMMM */
 +  CCTYPE ("OPTIONS FOR QMMM calculations");
 +  EETYPE("QMMM", ir->bQMMM, yesno_names);
 +  CTYPE ("Groups treated Quantum Mechanically");
 +  STYPE ("QMMM-grps",  QMMM,          NULL);
 +  CTYPE ("QM method");
 +  STYPE("QMmethod",     QMmethod, NULL);
 +  CTYPE ("QMMM scheme");
 +  EETYPE("QMMMscheme",  ir->QMMMscheme,    eQMMMscheme_names);
 +  CTYPE ("QM basisset");
 +  STYPE("QMbasis",      QMbasis, NULL);
 +  CTYPE ("QM charge");
 +  STYPE ("QMcharge",    QMcharge,NULL);
 +  CTYPE ("QM multiplicity");
 +  STYPE ("QMmult",      QMmult,NULL);
 +  CTYPE ("Surface Hopping");
 +  STYPE ("SH",          bSH, NULL);
 +  CTYPE ("CAS space options");
 +  STYPE ("CASorbitals",      CASorbitals,   NULL);
 +  STYPE ("CASelectrons",     CASelectrons,  NULL);
 +  STYPE ("SAon", SAon, NULL);
 +  STYPE ("SAoff",SAoff,NULL);
 +  STYPE ("SAsteps",  SAsteps, NULL);
 +  CTYPE ("Scale factor for MM charges");
 +  RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
 +  CTYPE ("Optimization of QM subsystem");
 +  STYPE ("bOPT",          bOPT, NULL);
 +  STYPE ("bTS",          bTS, NULL);
 +
 +  /* Simulated annealing */
 +  CCTYPE("SIMULATED ANNEALING");
 +  CTYPE ("Type of annealing for each temperature group (no/single/periodic)");
 +  STYPE ("annealing",   anneal,      NULL);
 +  CTYPE ("Number of time points to use for specifying annealing in each group");
-   STYPE ("annealing_npoints", anneal_npoints, NULL);
++  STYPE ("annealing-npoints", anneal_npoints, NULL);
 +  CTYPE ("List of times at the annealing points for each group");
-   STYPE ("annealing_time",       anneal_time,       NULL);
++  STYPE ("annealing-time",       anneal_time,       NULL);
 +  CTYPE ("Temp. at each annealing point, for each group.");
-   STYPE ("annealing_temp",  anneal_temp,  NULL);
++  STYPE ("annealing-temp",  anneal_temp,  NULL);
 +  
 +  /* Startup run */
 +  CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN");
 +  EETYPE("gen-vel",     opts->bGenVel,  yesno_names);
 +  RTYPE ("gen-temp",    opts->tempi,    300.0);
 +  ITYPE ("gen-seed",    opts->seed,     173529);
 +  
 +  /* Shake stuff */
 +  CCTYPE ("OPTIONS FOR BONDS");
 +  EETYPE("constraints",	opts->nshake,	constraints);
 +  CTYPE ("Type of constraint algorithm");
 +  EETYPE("constraint-algorithm",  ir->eConstrAlg, econstr_names);
 +  CTYPE ("Do not constrain the start configuration");
 +  EETYPE("continuation", ir->bContinuation, yesno_names);
 +  CTYPE ("Use successive overrelaxation to reduce the number of shake iterations");
 +  EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names);
 +  CTYPE ("Relative tolerance of shake");
 +  RTYPE ("shake-tol", ir->shake_tol, 0.0001);
 +  CTYPE ("Highest order in the expansion of the constraint coupling matrix");
 +  ITYPE ("lincs-order", ir->nProjOrder, 4);
 +  CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for");
 +  CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs.");
 +  CTYPE ("For energy minimization with constraints it should be 4 to 8.");
 +  ITYPE ("lincs-iter", ir->nLincsIter, 1);
 +  CTYPE ("Lincs will write a warning to the stderr if in one step a bond"); 
 +  CTYPE ("rotates over more degrees than");
 +  RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0);
 +  CTYPE ("Convert harmonic bonds to morse potentials");
 +  EETYPE("morse",       opts->bMorse,yesno_names);
 +
 +  /* Energy group exclusions */
 +  CCTYPE ("ENERGY GROUP EXCLUSIONS");
 +  CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded");
-   STYPE ("energygrp_excl", egpexcl,     NULL);
++  STYPE ("energygrp-excl", egpexcl,     NULL);
 +  
 +  /* Walls */
 +  CCTYPE ("WALLS");
 +  CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald");
 +  ITYPE ("nwall", ir->nwall, 0);
-   EETYPE("wall_type",     ir->wall_type,   ewt_names);
-   RTYPE ("wall_r_linpot", ir->wall_r_linpot, -1);
-   STYPE ("wall_atomtype", wall_atomtype, NULL);
-   STYPE ("wall_density",  wall_density,  NULL);
-   RTYPE ("wall_ewald_zfac", ir->wall_ewald_zfac, 3);
++  EETYPE("wall-type",     ir->wall_type,   ewt_names);
++  RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1);
++  STYPE ("wall-atomtype", wall_atomtype, NULL);
++  STYPE ("wall-density",  wall_density,  NULL);
++  RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3);
 +  
 +  /* COM pulling */
 +  CCTYPE("COM PULLING");
-   CTYPE("Pull type: no, umbrella, constraint or constant_force");
++  CTYPE("Pull type: no, umbrella, constraint or constant-force");
 +  EETYPE("pull",          ir->ePull, epull_names);
 +  if (ir->ePull != epullNO) {
 +    snew(ir->pull,1);
 +    pull_grp = read_pullparams(&ninp,&inp,ir->pull,&opts->pull_start,wi);
 +  }
 +  
 +  /* Enforced rotation */
 +  CCTYPE("ENFORCED ROTATION");
 +  CTYPE("Enforced rotation: No or Yes");
 +  EETYPE("rotation",       ir->bRot, yesno_names);
 +  if (ir->bRot) {
 +    snew(ir->rot,1);
 +    rot_grp = read_rotparams(&ninp,&inp,ir->rot,wi);
 +  }
 +
 +  /* Refinement */
 +  CCTYPE("NMR refinement stuff");
 +  CTYPE ("Distance restraints type: No, Simple or Ensemble");
 +  EETYPE("disre",       ir->eDisre,     edisre_names);
 +  CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal");
 +  EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names);
 +  CTYPE ("Use sqrt of the time averaged times the instantaneous violation");
 +  EETYPE("disre-mixed", ir->bDisreMixed, yesno_names);
 +  RTYPE ("disre-fc",	ir->dr_fc,	1000.0);
 +  RTYPE ("disre-tau",	ir->dr_tau,	0.0);
 +  CTYPE ("Output frequency for pair distances to energy file");
 +  ITYPE ("nstdisreout", ir->nstdisreout, 100);
 +  CTYPE ("Orientation restraints: No or Yes");
 +  EETYPE("orire",       opts->bOrire,   yesno_names);
 +  CTYPE ("Orientation restraints force constant and tau for time averaging");
 +  RTYPE ("orire-fc",	ir->orires_fc,	0.0);
 +  RTYPE ("orire-tau",	ir->orires_tau,	0.0);
 +  STYPE ("orire-fitgrp",orirefitgrp,    NULL);
 +  CTYPE ("Output frequency for trace(SD) and S to energy file");
 +  ITYPE ("nstorireout", ir->nstorireout, 100);
 +  CTYPE ("Dihedral angle restraints: No or Yes");
 +  EETYPE("dihre",       opts->bDihre,   yesno_names);
 +  RTYPE ("dihre-fc",	ir->dihre_fc,	1000.0);
 +
 +  /* Free energy stuff */
 +  CCTYPE ("Free energy control stuff");
 +  EETYPE("free-energy",	ir->efep, efep_names);
 +  RTYPE ("init-lambda",	ir->init_lambda,0.0);
 +  RTYPE ("delta-lambda",ir->delta_lambda,0.0);
-   STYPE ("foreign_lambda", foreign_lambda, NULL);
++  STYPE ("foreign-lambda", foreign_lambda, NULL);
 +  RTYPE ("sc-alpha",ir->sc_alpha,0.0);
 +  ITYPE ("sc-power",ir->sc_power,0);
 +  RTYPE ("sc-sigma",ir->sc_sigma,0.3);
 +  ITYPE ("nstdhdl",     ir->nstdhdl, 10);
 +  EETYPE("separate-dhdl-file", ir->separate_dhdl_file, 
 +                               separate_dhdl_file_names);
 +  EETYPE("dhdl-derivatives", ir->dhdl_derivatives, dhdl_derivatives_names);
-   ITYPE ("dh_hist_size", ir->dh_hist_size, 0);
-   RTYPE ("dh_hist_spacing", ir->dh_hist_spacing, 0.1);
++  ITYPE ("dh-hist-size", ir->dh_hist_size, 0);
++  RTYPE ("dh-hist-spacing", ir->dh_hist_spacing, 0.1);
 +  STYPE ("couple-moltype",  couple_moltype,  NULL);
 +  EETYPE("couple-lambda0", opts->couple_lam0, couple_lam);
 +  EETYPE("couple-lambda1", opts->couple_lam1, couple_lam);
 +  EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names);
 +
 +  /* Non-equilibrium MD stuff */  
 +  CCTYPE("Non-equilibrium MD stuff");
 +  STYPE ("acc-grps",    accgrps,        NULL);
 +  STYPE ("accelerate",  acc,            NULL);
 +  STYPE ("freezegrps",  freeze,         NULL);
 +  STYPE ("freezedim",   frdim,          NULL);
 +  RTYPE ("cos-acceleration", ir->cos_accel, 0);
 +  STYPE ("deform",      deform,         NULL);
 +
 +  /* Electric fields */
 +  CCTYPE("Electric fields");
 +  CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
 +  CTYPE ("and a phase angle (real)");
 +  STYPE ("E-x",   	efield_x,	NULL);
 +  STYPE ("E-xt",	efield_xt,	NULL);
 +  STYPE ("E-y",   	efield_y,	NULL);
 +  STYPE ("E-yt",	efield_yt,	NULL);
 +  STYPE ("E-z",   	efield_z,	NULL);
 +  STYPE ("E-zt",	efield_zt,	NULL);
 +  
 +  /* User defined thingies */
 +  CCTYPE ("User defined thingies");
 +  STYPE ("user1-grps",  user1,          NULL);
 +  STYPE ("user2-grps",  user2,          NULL);
 +  ITYPE ("userint1",    ir->userint1,   0);
 +  ITYPE ("userint2",    ir->userint2,   0);
 +  ITYPE ("userint3",    ir->userint3,   0);
 +  ITYPE ("userint4",    ir->userint4,   0);
 +  RTYPE ("userreal1",   ir->userreal1,  0);
 +  RTYPE ("userreal2",   ir->userreal2,  0);
 +  RTYPE ("userreal3",   ir->userreal3,  0);
 +  RTYPE ("userreal4",   ir->userreal4,  0);
 +#undef CTYPE
 +
 +  write_inpfile(mdparout,ninp,inp,FALSE,wi);
 +  for (i=0; (i<ninp); i++) {
 +    sfree(inp[i].name);
 +    sfree(inp[i].value);
 +  }
 +  sfree(inp);
 +
 +  /* Process options if necessary */
 +  for(m=0; m<2; m++) {
 +    for(i=0; i<2*DIM; i++)
 +      dumdub[m][i]=0.0;
 +    if(ir->epc) {
 +      switch (ir->epct) {
 +      case epctISOTROPIC:
 +	if (sscanf(dumstr[m],"%lf",&(dumdub[m][XX]))!=1) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 1)");
 +	}
 +	dumdub[m][YY]=dumdub[m][ZZ]=dumdub[m][XX];
 +	break;
 +      case epctSEMIISOTROPIC:
 +      case epctSURFACETENSION:
 +	if (sscanf(dumstr[m],"%lf%lf",
 +		   &(dumdub[m][XX]),&(dumdub[m][ZZ]))!=2) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 2)");
 +	}
 +	dumdub[m][YY]=dumdub[m][XX];
 +	break;
 +      case epctANISOTROPIC:
 +	if (sscanf(dumstr[m],"%lf%lf%lf%lf%lf%lf",
 +		   &(dumdub[m][XX]),&(dumdub[m][YY]),&(dumdub[m][ZZ]),
 +		   &(dumdub[m][3]),&(dumdub[m][4]),&(dumdub[m][5]))!=6) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 6)");
 +	}
 +	break;
 +      default:
 +	gmx_fatal(FARGS,"Pressure coupling type %s not implemented yet",
 +		    epcoupltype_names[ir->epct]);
 +      }
 +    }
 +  }
 +  clear_mat(ir->ref_p);
 +  clear_mat(ir->compress);
 +  for(i=0; i<DIM; i++) {
 +    ir->ref_p[i][i]    = dumdub[1][i];
 +    ir->compress[i][i] = dumdub[0][i];
 +  }
 +  if (ir->epct == epctANISOTROPIC) {
 +    ir->ref_p[XX][YY] = dumdub[1][3];
 +    ir->ref_p[XX][ZZ] = dumdub[1][4];
 +    ir->ref_p[YY][ZZ] = dumdub[1][5];
 +    if (ir->ref_p[XX][YY]!=0 && ir->ref_p[XX][ZZ]!=0 && ir->ref_p[YY][ZZ]!=0) {
 +      warning(wi,"All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n");
 +    }
 +    ir->compress[XX][YY] = dumdub[0][3];
 +    ir->compress[XX][ZZ] = dumdub[0][4];
 +    ir->compress[YY][ZZ] = dumdub[0][5];
 +    for(i=0; i<DIM; i++) {
 +      for(m=0; m<i; m++) {
 +	ir->ref_p[i][m] = ir->ref_p[m][i];
 +	ir->compress[i][m] = ir->compress[m][i];
 +      }
 +    }
 +  } 
 +  
 +  if (ir->comm_mode == ecmNO)
 +    ir->nstcomm = 0;
 +
 +  opts->couple_moltype = NULL;
 +  if (strlen(couple_moltype) > 0) {
 +    if (ir->efep != efepNO) {
 +      opts->couple_moltype = strdup(couple_moltype);
 +      if (opts->couple_lam0 == opts->couple_lam1)
 +	warning(wi,"The lambda=0 and lambda=1 states for coupling are identical");
 +      if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE ||
 +			     opts->couple_lam1 == ecouplamNONE)) {
 +	warning(wi,"For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used");
 +      }
 +    } else {
-       warning(wi,"Can not couple a molecule with free_energy = no");
++      warning(wi,"Can not couple a molecule with free-energy = no");
 +    }
 +  }
 +
 +  do_wall_params(ir,wall_atomtype,wall_density,opts);
 +  
 +  if (opts->bOrire && str_nelem(orirefitgrp,MAXPTR,NULL)!=1) {
 +      warning_error(wi,"ERROR: Need one orientation restraint fit group\n");
 +  }
 +
 +  clear_mat(ir->deform);
 +  for(i=0; i<6; i++)
 +    dumdub[0][i] = 0;
 +  m = sscanf(deform,"%lf %lf %lf %lf %lf %lf",
 +	     &(dumdub[0][0]),&(dumdub[0][1]),&(dumdub[0][2]),
 +	     &(dumdub[0][3]),&(dumdub[0][4]),&(dumdub[0][5]));
 +  for(i=0; i<3; i++)
 +    ir->deform[i][i] = dumdub[0][i];
 +  ir->deform[YY][XX] = dumdub[0][3];
 +  ir->deform[ZZ][XX] = dumdub[0][4];
 +  ir->deform[ZZ][YY] = dumdub[0][5];
 +  if (ir->epc != epcNO) {
 +    for(i=0; i<3; i++)
 +      for(j=0; j<=i; j++)
 +	if (ir->deform[i][j]!=0 && ir->compress[i][j]!=0) {
 +        warning_error(wi,"A box element has deform set and compressibility > 0");
 +	}
 +    for(i=0; i<3; i++)
 +      for(j=0; j<i; j++)
 +	if (ir->deform[i][j]!=0) {
 +	  for(m=j; m<DIM; m++)
 +	    if (ir->compress[m][j]!=0) {
 +	      sprintf(warn_buf,"An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects.");
 +	      warning(wi,warn_buf);
 +	    }
 +	}
 +  }
 +
 +  if (ir->efep != efepNO) {
 +    parse_n_double(foreign_lambda,&ir->n_flambda,&ir->flambda);
 +    if (ir->n_flambda > 0 && ir->rlist < max(ir->rvdw,ir->rcoulomb)) {
 +      warning_note(wi,"For foreign lambda free energy differences it is assumed that the soft-core interactions have no effect beyond the neighborlist cut-off");
 +    }
 +  } else {
 +    ir->n_flambda = 0;
 +  }
 +
 +  sfree(dumstr[0]);
 +  sfree(dumstr[1]);
 +}
 +
 +static int search_QMstring(char *s,int ng,const char *gn[])
 +{
 +  /* same as normal search_string, but this one searches QM strings */
 +  int i;
 +
 +  for(i=0; (i<ng); i++)
 +    if (gmx_strcasecmp(s,gn[i]) == 0)
 +      return i;
 +
 +  gmx_fatal(FARGS,"this QM method or basisset (%s) is not implemented\n!",s);
 +
 +  return -1;
 +
 +} /* search_QMstring */
 +
 +
 +int search_string(char *s,int ng,char *gn[])
 +{
 +  int i;
 +  
 +  for(i=0; (i<ng); i++)
 +  {
 +    if (gmx_strcasecmp(s,gn[i]) == 0)
 +    {
 +      return i;
 +    }
 +  }
 +    
 +  gmx_fatal(FARGS,"Group %s not found in index file.\nGroup names must match either [moleculetype] names\nor custom index group names,in which case you\nmust supply an index file to the '-n' option of grompp.",s);
 +  
 +  return -1;
 +}
 +
 +static gmx_bool do_numbering(int natoms,gmx_groups_t *groups,int ng,char *ptrs[],
 +                         t_blocka *block,char *gnames[],
 +                         int gtype,int restnm,
 +                         int grptp,gmx_bool bVerbose,
 +                         warninp_t wi)
 +{
 +    unsigned short *cbuf;
 +    t_grps *grps=&(groups->grps[gtype]);
 +    int    i,j,gid,aj,ognr,ntot=0;
 +    const char *title;
 +    gmx_bool   bRest;
 +    char   warn_buf[STRLEN];
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Starting numbering %d groups of type %d\n",ng,gtype);
 +    }
 +  
 +    title = gtypes[gtype];
 +    
 +    snew(cbuf,natoms);
 +    /* Mark all id's as not set */
 +    for(i=0; (i<natoms); i++)
 +    {
 +        cbuf[i] = NOGID;
 +    }
 +  
 +    snew(grps->nm_ind,ng+1); /* +1 for possible rest group */
 +    for(i=0; (i<ng); i++)
 +    {
 +        /* Lookup the group name in the block structure */
 +        gid = search_string(ptrs[i],block->nr,gnames);
 +        if ((grptp != egrptpONE) || (i == 0))
 +        {
 +            grps->nm_ind[grps->nr++]=gid;
 +        }
 +        if (debug) 
 +        {
 +            fprintf(debug,"Found gid %d for group %s\n",gid,ptrs[i]);
 +        }
 +    
 +        /* Now go over the atoms in the group */
 +        for(j=block->index[gid]; (j<block->index[gid+1]); j++)
 +        {
 +
 +            aj=block->a[j];
 +      
 +            /* Range checking */
 +            if ((aj < 0) || (aj >= natoms)) 
 +            {
 +                gmx_fatal(FARGS,"Invalid atom number %d in indexfile",aj);
 +            }
 +            /* Lookup up the old group number */
 +            ognr = cbuf[aj];
 +            if (ognr != NOGID)
 +            {
 +                gmx_fatal(FARGS,"Atom %d in multiple %s groups (%d and %d)",
 +                          aj+1,title,ognr+1,i+1);
 +            }
 +            else
 +            {
 +                /* Store the group number in buffer */
 +                if (grptp == egrptpONE)
 +                {
 +                    cbuf[aj] = 0;
 +                }
 +                else
 +                {
 +                    cbuf[aj] = i;
 +                }
 +                ntot++;
 +            }
 +        }
 +    }
 +    
 +    /* Now check whether we have done all atoms */
 +    bRest = FALSE;
 +    if (ntot != natoms)
 +    {
 +        if (grptp == egrptpALL)
 +        {
 +            gmx_fatal(FARGS,"%d atoms are not part of any of the %s groups",
 +                      natoms-ntot,title);
 +        }
 +        else if (grptp == egrptpPART)
 +        {
 +            sprintf(warn_buf,"%d atoms are not part of any of the %s groups",
 +                    natoms-ntot,title);
 +            warning_note(wi,warn_buf);
 +        }
 +        /* Assign all atoms currently unassigned to a rest group */
 +        for(j=0; (j<natoms); j++)
 +        {
 +            if (cbuf[j] == NOGID)
 +            {
 +                cbuf[j] = grps->nr;
 +                bRest = TRUE;
 +            }
 +        }
 +        if (grptp != egrptpPART)
 +        {
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,
 +                        "Making dummy/rest group for %s containing %d elements\n",
 +                        title,natoms-ntot);
 +            }
 +            /* Add group name "rest" */ 
 +            grps->nm_ind[grps->nr] = restnm;
 +            
 +            /* Assign the rest name to all atoms not currently assigned to a group */
 +            for(j=0; (j<natoms); j++)
 +            {
 +                if (cbuf[j] == NOGID)
 +                {
 +                    cbuf[j] = grps->nr;
 +                }
 +            }
 +            grps->nr++;
 +        }
 +    }
 +    
 +    if (grps->nr == 1)
 +    {
 +        groups->ngrpnr[gtype] = 0;
 +        groups->grpnr[gtype]  = NULL;
 +    }
 +    else
 +    {
 +        groups->ngrpnr[gtype] = natoms;
 +        snew(groups->grpnr[gtype],natoms);
 +        for(j=0; (j<natoms); j++)
 +        {
 +            groups->grpnr[gtype][j] = cbuf[j];
 +        }
 +    }
 +    
 +    sfree(cbuf);
 +
 +    return (bRest && grptp == egrptpPART);
 +}
 +
 +static void calc_nrdf(gmx_mtop_t *mtop,t_inputrec *ir,char **gnames)
 +{
 +  t_grpopts *opts;
 +  gmx_groups_t *groups;
 +  t_pull  *pull;
 +  int     natoms,ai,aj,i,j,d,g,imin,jmin,nc;
 +  t_iatom *ia;
 +  int     *nrdf2,*na_vcm,na_tot;
 +  double  *nrdf_tc,*nrdf_vcm,nrdf_uc,n_sub=0;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom  *atom;
 +  int     mb,mol,ftype,as;
 +  gmx_molblock_t *molb;
 +  gmx_moltype_t *molt;
 +
 +  /* Calculate nrdf. 
 +   * First calc 3xnr-atoms for each group
 +   * then subtract half a degree of freedom for each constraint
 +   *
 +   * Only atoms and nuclei contribute to the degrees of freedom...
 +   */
 +
 +  opts = &ir->opts;
 +  
 +  groups = &mtop->groups;
 +  natoms = mtop->natoms;
 +
 +  /* Allocate one more for a possible rest group */
 +  /* We need to sum degrees of freedom into doubles,
 +   * since floats give too low nrdf's above 3 million atoms.
 +   */
 +  snew(nrdf_tc,groups->grps[egcTC].nr+1);
 +  snew(nrdf_vcm,groups->grps[egcVCM].nr+1);
 +  snew(na_vcm,groups->grps[egcVCM].nr+1);
 +  
 +  for(i=0; i<groups->grps[egcTC].nr; i++)
 +    nrdf_tc[i] = 0;
 +  for(i=0; i<groups->grps[egcVCM].nr+1; i++)
 +    nrdf_vcm[i] = 0;
 +
 +  snew(nrdf2,natoms);
 +  aloop = gmx_mtop_atomloop_all_init(mtop);
 +  while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +    nrdf2[i] = 0;
 +    if (atom->ptype == eptAtom || atom->ptype == eptNucleus) {
 +      g = ggrpnr(groups,egcFREEZE,i);
 +      /* Double count nrdf for particle i */
 +      for(d=0; d<DIM; d++) {
 +	if (opts->nFreeze[g][d] == 0) {
 +	  nrdf2[i] += 2;
 +	}
 +      }
 +      nrdf_tc [ggrpnr(groups,egcTC ,i)] += 0.5*nrdf2[i];
 +      nrdf_vcm[ggrpnr(groups,egcVCM,i)] += 0.5*nrdf2[i];
 +    }
 +  }
 +
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    molt = &mtop->moltype[molb->type];
 +    atom = molt->atoms.atom;
 +    for(mol=0; mol<molb->nmol; mol++) {
 +      for (ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +	ia = molt->ilist[ftype].iatoms;
 +	for(i=0; i<molt->ilist[ftype].nr; ) {
 +	  /* Subtract degrees of freedom for the constraints,
 +	   * if the particles still have degrees of freedom left.
 +	   * If one of the particles is a vsite or a shell, then all
 +	   * constraint motion will go there, but since they do not
 +	   * contribute to the constraints the degrees of freedom do not
 +	   * change.
 +	   */
 +	  ai = as + ia[1];
 +	  aj = as + ia[2];
 +	  if (((atom[ia[1]].ptype == eptNucleus) ||
 +	       (atom[ia[1]].ptype == eptAtom)) &&
 +	      ((atom[ia[2]].ptype == eptNucleus) ||
 +	       (atom[ia[2]].ptype == eptAtom))) {
 +	    if (nrdf2[ai] > 0) 
 +	      jmin = 1;
 +	    else
 +	      jmin = 2;
 +	    if (nrdf2[aj] > 0)
 +	      imin = 1;
 +	    else
 +	      imin = 2;
 +	    imin = min(imin,nrdf2[ai]);
 +	    jmin = min(jmin,nrdf2[aj]);
 +	    nrdf2[ai] -= imin;
 +	    nrdf2[aj] -= jmin;
 +	    nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +	    nrdf_tc [ggrpnr(groups,egcTC ,aj)] -= 0.5*jmin;
 +	    nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +	    nrdf_vcm[ggrpnr(groups,egcVCM,aj)] -= 0.5*jmin;
 +	  }
 +	  ia += interaction_function[ftype].nratoms+1;
 +	  i  += interaction_function[ftype].nratoms+1;
 +	}
 +      }
 +      ia = molt->ilist[F_SETTLE].iatoms;
 +      for(i=0; i<molt->ilist[F_SETTLE].nr; ) {
 +	/* Subtract 1 dof from every atom in the SETTLE */
 +	for(ai=as+ia[1]; ai<as+ia[1]+3; ai++) {
 +	  imin = min(2,nrdf2[ai]);
 +	  nrdf2[ai] -= imin;
 +	  nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +	  nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +	}
 +	ia += 2;
 +	i  += 2;
 +      }
 +      as += molt->atoms.nr;
 +    }
 +  }
 +
 +  if (ir->ePull == epullCONSTRAINT) {
 +    /* Correct nrdf for the COM constraints.
 +     * We correct using the TC and VCM group of the first atom
 +     * in the reference and pull group. If atoms in one pull group
 +     * belong to different TC or VCM groups it is anyhow difficult
 +     * to determine the optimal nrdf assignment.
 +     */
 +    pull = ir->pull;
 +    if (pull->eGeom == epullgPOS) {
 +      nc = 0;
 +      for(i=0; i<DIM; i++) {
 +	if (pull->dim[i])
 +	  nc++;
 +      }
 +    } else {
 +      nc = 1;
 +    }
 +    for(i=0; i<pull->ngrp; i++) {
 +      imin = 2*nc;
 +      if (pull->grp[0].nat > 0) {
 +	/* Subtract 1/2 dof from the reference group */
 +	ai = pull->grp[0].ind[0];
 +	if (nrdf_tc[ggrpnr(groups,egcTC,ai)] > 1) {
 +	  nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5;
 +	  nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5;
 +	  imin--;
 +	}
 +      }
 +      /* Subtract 1/2 dof from the pulled group */
 +      ai = pull->grp[1+i].ind[0];
 +      nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +      nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +      if (nrdf_tc[ggrpnr(groups,egcTC,ai)] < 0)
 +	gmx_fatal(FARGS,"Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative",gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups,egcTC,ai)]]);
 +    }
 +  }
 +  
 +  if (ir->nstcomm != 0) {
 +    /* Subtract 3 from the number of degrees of freedom in each vcm group
 +     * when com translation is removed and 6 when rotation is removed
 +     * as well.
 +     */
 +    switch (ir->comm_mode) {
 +    case ecmLINEAR:
 +      n_sub = ndof_com(ir);
 +      break;
 +    case ecmANGULAR:
 +      n_sub = 6;
 +      break;
 +    default:
 +      n_sub = 0;
 +      gmx_incons("Checking comm_mode");
 +    }
 +    
 +    for(i=0; i<groups->grps[egcTC].nr; i++) {
 +      /* Count the number of atoms of TC group i for every VCM group */
 +      for(j=0; j<groups->grps[egcVCM].nr+1; j++)
 +	na_vcm[j] = 0;
 +      na_tot = 0;
 +      for(ai=0; ai<natoms; ai++)
 +	if (ggrpnr(groups,egcTC,ai) == i) {
 +	  na_vcm[ggrpnr(groups,egcVCM,ai)]++;
 +	  na_tot++;
 +	}
 +      /* Correct for VCM removal according to the fraction of each VCM
 +       * group present in this TC group.
 +       */
 +      nrdf_uc = nrdf_tc[i];
 +      if (debug) {
 +	fprintf(debug,"T-group[%d] nrdf_uc = %g, n_sub = %g\n",
 +		i,nrdf_uc,n_sub);
 +      }
 +      nrdf_tc[i] = 0;
 +      for(j=0; j<groups->grps[egcVCM].nr+1; j++) {
 +	if (nrdf_vcm[j] > n_sub) {
 +	  nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)*
 +	    (nrdf_vcm[j] - n_sub)/nrdf_vcm[j];
 +	}
 +	if (debug) {
 +	  fprintf(debug,"  nrdf_vcm[%d] = %g, nrdf = %g\n",
 +		  j,nrdf_vcm[j],nrdf_tc[i]);
 +	}
 +      }
 +    }
 +  }
 +  for(i=0; (i<groups->grps[egcTC].nr); i++) {
 +    opts->nrdf[i] = nrdf_tc[i];
 +    if (opts->nrdf[i] < 0)
 +      opts->nrdf[i] = 0;
 +    fprintf(stderr,
 +	    "Number of degrees of freedom in T-Coupling group %s is %.2f\n",
 +	    gnames[groups->grps[egcTC].nm_ind[i]],opts->nrdf[i]);
 +  }
 +  
 +  sfree(nrdf2);
 +  sfree(nrdf_tc);
 +  sfree(nrdf_vcm);
 +  sfree(na_vcm);
 +}
 +
 +static void decode_cos(char *s,t_cosines *cosine,gmx_bool bTime)
 +{
 +  char   *t;
 +  char   format[STRLEN],f1[STRLEN];
 +  double a,phi;
 +  int    i;
 +  
 +  t=strdup(s);
 +  trim(t);
 +  
 +  cosine->n=0;
 +  cosine->a=NULL;
 +  cosine->phi=NULL;
 +  if (strlen(t)) {
 +    sscanf(t,"%d",&(cosine->n));
 +    if (cosine->n <= 0) {
 +      cosine->n=0;
 +    } else {
 +      snew(cosine->a,cosine->n);
 +      snew(cosine->phi,cosine->n);
 +      
 +      sprintf(format,"%%*d");
 +      for(i=0; (i<cosine->n); i++) {
 +	strcpy(f1,format);
 +	strcat(f1,"%lf%lf");
 +	if (sscanf(t,f1,&a,&phi) < 2)
 +	  gmx_fatal(FARGS,"Invalid input for electric field shift: '%s'",t);
 +	cosine->a[i]=a;
 +	cosine->phi[i]=phi;
 +	strcat(format,"%*lf%*lf");
 +      }
 +    }
 +  }
 +  sfree(t);
 +}
 +
 +static gmx_bool do_egp_flag(t_inputrec *ir,gmx_groups_t *groups,
 +			const char *option,const char *val,int flag)
 +{
 +  /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2.
 +   * But since this is much larger than STRLEN, such a line can not be parsed.
 +   * The real maximum is the number of names that fit in a string: STRLEN/2.
 +   */
 +#define EGP_MAX (STRLEN/2)
 +  int  nelem,i,j,k,nr;
 +  char *names[EGP_MAX];
 +  char ***gnames;
 +  gmx_bool bSet;
 +
 +  gnames = groups->grpname;
 +
 +  nelem = str_nelem(val,EGP_MAX,names);
 +  if (nelem % 2 != 0)
 +    gmx_fatal(FARGS,"The number of groups for %s is odd",option);
 +  nr = groups->grps[egcENER].nr;
 +  bSet = FALSE;
 +  for(i=0; i<nelem/2; i++) {
 +    j = 0;
 +    while ((j < nr) &&
 +	   gmx_strcasecmp(names[2*i],*(gnames[groups->grps[egcENER].nm_ind[j]])))
 +      j++;
 +    if (j == nr)
 +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
 +		  names[2*i],option);
 +    k = 0;
 +    while ((k < nr) &&
 +	   gmx_strcasecmp(names[2*i+1],*(gnames[groups->grps[egcENER].nm_ind[k]])))
 +      k++;
 +    if (k==nr)
 +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
 +	      names[2*i+1],option);
 +    if ((j < nr) && (k < nr)) {
 +      ir->opts.egp_flags[nr*j+k] |= flag;
 +      ir->opts.egp_flags[nr*k+j] |= flag;
 +      bSet = TRUE;
 +    }
 +  }
 +
 +  return bSet;
 +}
 +
 +void do_index(const char* mdparin, const char *ndx,
 +              gmx_mtop_t *mtop,
 +              gmx_bool bVerbose,
 +              t_inputrec *ir,rvec *v,
 +              warninp_t wi)
 +{
 +  t_blocka *grps;
 +  gmx_groups_t *groups;
 +  int     natoms;
 +  t_symtab *symtab;
 +  t_atoms atoms_all;
 +  char    warnbuf[STRLEN],**gnames;
 +  int     nr,ntcg,ntau_t,nref_t,nacc,nofg,nSA,nSA_points,nSA_time,nSA_temp;
 +  real    tau_min;
 +  int     nstcmin;
 +  int     nacg,nfreeze,nfrdim,nenergy,nvcm,nuser;
 +  char    *ptr1[MAXPTR],*ptr2[MAXPTR],*ptr3[MAXPTR];
 +  int     i,j,k,restnm;
 +  real    SAtime;
 +  gmx_bool    bExcl,bTable,bSetTCpar,bAnneal,bRest;
 +  int     nQMmethod,nQMbasis,nQMcharge,nQMmult,nbSH,nCASorb,nCASelec,
 +    nSAon,nSAoff,nSAsteps,nQMg,nbOPT,nbTS;
 +  char    warn_buf[STRLEN];
 +
 +  if (bVerbose)
 +    fprintf(stderr,"processing index file...\n");
 +  debug_gmx();
 +  if (ndx == NULL) {
 +    snew(grps,1);
 +    snew(grps->index,1);
 +    snew(gnames,1);
 +    atoms_all = gmx_mtop_global_atoms(mtop);
 +    analyse(&atoms_all,grps,&gnames,FALSE,TRUE);
 +    free_t_atoms(&atoms_all,FALSE);
 +  } else {
 +    grps = init_index(ndx,&gnames);
 +  }
 +
 +  groups = &mtop->groups;
 +  natoms = mtop->natoms;
 +  symtab = &mtop->symtab;
 +
 +  snew(groups->grpname,grps->nr+1);
 +  
 +  for(i=0; (i<grps->nr); i++) {
 +    groups->grpname[i] = put_symtab(symtab,gnames[i]);
 +  }
 +  groups->grpname[i] = put_symtab(symtab,"rest");
 +  restnm=i;
 +  srenew(gnames,grps->nr+1);
 +  gnames[restnm] = *(groups->grpname[i]);
 +  groups->ngrpname = grps->nr+1;
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  ntau_t = str_nelem(tau_t,MAXPTR,ptr1);
 +  nref_t = str_nelem(ref_t,MAXPTR,ptr2);
 +  ntcg   = str_nelem(tcgrps,MAXPTR,ptr3);
 +  if ((ntau_t != ntcg) || (nref_t != ntcg)) {
-     gmx_fatal(FARGS,"Invalid T coupling input: %d groups, %d ref_t values and "
- 		"%d tau_t values",ntcg,nref_t,ntau_t);
++    gmx_fatal(FARGS,"Invalid T coupling input: %d groups, %d ref-t values and "
++                "%d tau-t values",ntcg,nref_t,ntau_t);
 +  }
 +
 +  bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI==eiBD || EI_TPI(ir->eI));
 +  do_numbering(natoms,groups,ntcg,ptr3,grps,gnames,egcTC,
 +               restnm,bSetTCpar ? egrptpALL : egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcTC].nr;
 +  ir->opts.ngtc = nr;
 +  snew(ir->opts.nrdf,nr);
 +  snew(ir->opts.tau_t,nr);
 +  snew(ir->opts.ref_t,nr);
 +  if (ir->eI==eiBD && ir->bd_fric==0) {
-     fprintf(stderr,"bd_fric=0, so tau_t will be used as the inverse friction constant(s)\n"); 
++    fprintf(stderr,"bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n");
 +  }
 +
 +  if (bSetTCpar)
 +  {
 +      if (nr != nref_t)
 +      {
-           gmx_fatal(FARGS,"Not enough ref_t and tau_t values!");
++          gmx_fatal(FARGS,"Not enough ref-t and tau-t values!");
 +      }
 +      
 +      tau_min = 1e20;
 +      for(i=0; (i<nr); i++)
 +      {
 +          ir->opts.tau_t[i] = strtod(ptr1[i],NULL);
 +          if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0)
 +          {
-               sprintf(warn_buf,"With integrator %s tau_t should be larger than 0",ei_names[ir->eI]);
++              sprintf(warn_buf,"With integrator %s tau-t should be larger than 0",ei_names[ir->eI]);
 +              warning_error(wi,warn_buf);
 +          }
 +          if ((ir->etc == etcVRESCALE && ir->opts.tau_t[i] >= 0) || 
 +              (ir->etc != etcVRESCALE && ir->opts.tau_t[i] >  0))
 +          {
 +              tau_min = min(tau_min,ir->opts.tau_t[i]);
 +          }
 +      }
 +      if (ir->etc != etcNO && ir->nsttcouple == -1)
 +      {
 +            ir->nsttcouple = ir_optimal_nsttcouple(ir);
 +      }
 +      if (EI_VV(ir->eI)) 
 +      {
 +          if ((ir->epc==epcMTTK) && (ir->etc>etcNO))
 +          {
 +              int mincouple;
 +              mincouple = ir->nsttcouple;
 +              if (ir->nstpcouple < mincouple)
 +              {
 +                  mincouple = ir->nstpcouple;
 +              }
 +              ir->nstpcouple = mincouple;
 +              ir->nsttcouple = mincouple;
 +              warning_note(wi,"for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal.  Both have been reset to min(nsttcouple,nstpcouple)");
 +          }
 +      }
 +      nstcmin = tcouple_min_integration_steps(ir->etc);
 +      if (nstcmin > 1)
 +      {
 +          if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
 +          {
-               sprintf(warn_buf,"For proper integration of the %s thermostat, tau_t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
++              sprintf(warn_buf,"For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
 +                      ETCOUPLTYPE(ir->etc),
 +                      tau_min,nstcmin,
 +                      ir->nsttcouple*ir->delta_t);
 +              warning(wi,warn_buf);
 +          }
 +      }
 +      for(i=0; (i<nr); i++)
 +      {
 +          ir->opts.ref_t[i] = strtod(ptr2[i],NULL);
 +          if (ir->opts.ref_t[i] < 0)
 +          {
-               gmx_fatal(FARGS,"ref_t for group %d negative",i);
++              gmx_fatal(FARGS,"ref-t for group %d negative",i);
 +          }
 +      }
 +  }
 +    
 +  /* Simulated annealing for each group. There are nr groups */
 +  nSA = str_nelem(anneal,MAXPTR,ptr1);
 +  if (nSA == 1 && (ptr1[0][0]=='n' || ptr1[0][0]=='N'))
 +     nSA = 0;
 +  if(nSA>0 && nSA != nr) 
 +    gmx_fatal(FARGS,"Not enough annealing values: %d (for %d groups)\n",nSA,nr);
 +  else {
 +    snew(ir->opts.annealing,nr);
 +    snew(ir->opts.anneal_npoints,nr);
 +    snew(ir->opts.anneal_time,nr);
 +    snew(ir->opts.anneal_temp,nr);
 +    for(i=0;i<nr;i++) {
 +      ir->opts.annealing[i]=eannNO;
 +      ir->opts.anneal_npoints[i]=0;
 +      ir->opts.anneal_time[i]=NULL;
 +      ir->opts.anneal_temp[i]=NULL;
 +    }
 +    if (nSA > 0) {
 +      bAnneal=FALSE;
 +      for(i=0;i<nr;i++) { 
 +	if(ptr1[i][0]=='n' || ptr1[i][0]=='N') {
 +	  ir->opts.annealing[i]=eannNO;
 +	} else if(ptr1[i][0]=='s'|| ptr1[i][0]=='S') {
 +	  ir->opts.annealing[i]=eannSINGLE;
 +	  bAnneal=TRUE;
 +	} else if(ptr1[i][0]=='p'|| ptr1[i][0]=='P') {
 +	  ir->opts.annealing[i]=eannPERIODIC;
 +	  bAnneal=TRUE;
 +	} 
 +      } 
 +      if(bAnneal) {
 +	/* Read the other fields too */
 +	nSA_points = str_nelem(anneal_npoints,MAXPTR,ptr1);
 +	if(nSA_points!=nSA) 
- 	  gmx_fatal(FARGS,"Found %d annealing_npoints values for %d groups\n",nSA_points,nSA);
++          gmx_fatal(FARGS,"Found %d annealing-npoints values for %d groups\n",nSA_points,nSA);
 +	for(k=0,i=0;i<nr;i++) {
 +	  ir->opts.anneal_npoints[i]=strtol(ptr1[i],NULL,10);
 +	  if(ir->opts.anneal_npoints[i]==1)
 +	    gmx_fatal(FARGS,"Please specify at least a start and an end point for annealing\n");
 +	  snew(ir->opts.anneal_time[i],ir->opts.anneal_npoints[i]);
 +	  snew(ir->opts.anneal_temp[i],ir->opts.anneal_npoints[i]);
 +	  k += ir->opts.anneal_npoints[i];
 +	}
 +
 +	nSA_time = str_nelem(anneal_time,MAXPTR,ptr1);
 +	if(nSA_time!=k) 
- 	  gmx_fatal(FARGS,"Found %d annealing_time values, wanter %d\n",nSA_time,k);
++          gmx_fatal(FARGS,"Found %d annealing-time values, wanter %d\n",nSA_time,k);
 +	nSA_temp = str_nelem(anneal_temp,MAXPTR,ptr2);
 +	if(nSA_temp!=k) 
- 	  gmx_fatal(FARGS,"Found %d annealing_temp values, wanted %d\n",nSA_temp,k);
++          gmx_fatal(FARGS,"Found %d annealing-temp values, wanted %d\n",nSA_temp,k);
 +
 +	for(i=0,k=0;i<nr;i++) {
 +	  
 +	  for(j=0;j<ir->opts.anneal_npoints[i];j++) {
 +	    ir->opts.anneal_time[i][j]=strtod(ptr1[k],NULL);
 +	    ir->opts.anneal_temp[i][j]=strtod(ptr2[k],NULL);
 +	    if(j==0) {
 +	      if(ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS))
 +		gmx_fatal(FARGS,"First time point for annealing > init_t.\n");      
 +	    } else { 
 +	      /* j>0 */
 +	      if(ir->opts.anneal_time[i][j]<ir->opts.anneal_time[i][j-1])
 +		gmx_fatal(FARGS,"Annealing timepoints out of order: t=%f comes after t=%f\n",
 +			    ir->opts.anneal_time[i][j],ir->opts.anneal_time[i][j-1]);
 +	    }
 +	    if(ir->opts.anneal_temp[i][j]<0) 
 +	      gmx_fatal(FARGS,"Found negative temperature in annealing: %f\n",ir->opts.anneal_temp[i][j]);    
 +	    k++;
 +	  }
 +	}
 +	/* Print out some summary information, to make sure we got it right */
 +	for(i=0,k=0;i<nr;i++) {
 +	  if(ir->opts.annealing[i]!=eannNO) {
 +	    j = groups->grps[egcTC].nm_ind[i];
 +	    fprintf(stderr,"Simulated annealing for group %s: %s, %d timepoints\n",
 +		    *(groups->grpname[j]),eann_names[ir->opts.annealing[i]],
 +		    ir->opts.anneal_npoints[i]);
 +	    fprintf(stderr,"Time (ps)   Temperature (K)\n");
 +	    /* All terms except the last one */
 +	    for(j=0;j<(ir->opts.anneal_npoints[i]-1);j++) 
 +		fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +	    
 +	    /* Finally the last one */
 +	    j = ir->opts.anneal_npoints[i]-1;
 +	    if(ir->opts.annealing[i]==eannSINGLE)
 +	      fprintf(stderr,"%9.1f-     %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +	    else {
 +	      fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +	      if(fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0])>GMX_REAL_EPS)
 +		warning_note(wi,"There is a temperature jump when your annealing loops back.\n");
 +	    }
 +	  }
 +	} 
 +      }
 +    }
 +  }	
 +
 +  if (ir->ePull != epullNO) {
 +    make_pull_groups(ir->pull,pull_grp,grps,gnames);
 +  }
 +  
 +  if (ir->bRot) {
 +    make_rotation_groups(ir->rot,rot_grp,grps,gnames);
 +  }
 +
 +  nacc = str_nelem(acc,MAXPTR,ptr1);
 +  nacg = str_nelem(accgrps,MAXPTR,ptr2);
 +  if (nacg*DIM != nacc)
 +    gmx_fatal(FARGS,"Invalid Acceleration input: %d groups and %d acc. values",
 +		nacg,nacc);
 +  do_numbering(natoms,groups,nacg,ptr2,grps,gnames,egcACC,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcACC].nr;
 +  snew(ir->opts.acc,nr);
 +  ir->opts.ngacc=nr;
 +  
 +  for(i=k=0; (i<nacg); i++)
 +    for(j=0; (j<DIM); j++,k++)
 +      ir->opts.acc[i][j]=strtod(ptr1[k],NULL);
 +  for( ;(i<nr); i++)
 +    for(j=0; (j<DIM); j++)
 +      ir->opts.acc[i][j]=0;
 +  
 +  nfrdim  = str_nelem(frdim,MAXPTR,ptr1);
 +  nfreeze = str_nelem(freeze,MAXPTR,ptr2);
 +  if (nfrdim != DIM*nfreeze)
 +    gmx_fatal(FARGS,"Invalid Freezing input: %d groups and %d freeze values",
 +		nfreeze,nfrdim);
 +  do_numbering(natoms,groups,nfreeze,ptr2,grps,gnames,egcFREEZE,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcFREEZE].nr;
 +  ir->opts.ngfrz=nr;
 +  snew(ir->opts.nFreeze,nr);
 +  for(i=k=0; (i<nfreeze); i++)
 +    for(j=0; (j<DIM); j++,k++) {
 +      ir->opts.nFreeze[i][j]=(gmx_strncasecmp(ptr1[k],"Y",1)==0);
 +      if (!ir->opts.nFreeze[i][j]) {
 +	if (gmx_strncasecmp(ptr1[k],"N",1) != 0) {
 +	  sprintf(warnbuf,"Please use Y(ES) or N(O) for freezedim only "
 +		  "(not %s)", ptr1[k]);
 +	  warning(wi,warn_buf);
 +	}
 +      }
 +    }
 +  for( ; (i<nr); i++)
 +    for(j=0; (j<DIM); j++)
 +      ir->opts.nFreeze[i][j]=0;
 +  
 +  nenergy=str_nelem(energy,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nenergy,ptr1,grps,gnames,egcENER,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  add_wall_energrps(groups,ir->nwall,symtab);
 +  ir->opts.ngener = groups->grps[egcENER].nr;
 +  nvcm=str_nelem(vcm,MAXPTR,ptr1);
 +  bRest =
 +    do_numbering(natoms,groups,nvcm,ptr1,grps,gnames,egcVCM,
 +                 restnm,nvcm==0 ? egrptpALL_GENREST : egrptpPART,bVerbose,wi);
 +  if (bRest) {
 +    warning(wi,"Some atoms are not part of any center of mass motion removal group.\n"
 +	    "This may lead to artifacts.\n"
 +	    "In most cases one should use one group for the whole system.");
 +  }
 +
 +  /* Now we have filled the freeze struct, so we can calculate NRDF */ 
 +  calc_nrdf(mtop,ir,gnames);
 +
 +  if (v && NULL) {
 +    real fac,ntot=0;
 +    
 +    /* Must check per group! */
 +    for(i=0; (i<ir->opts.ngtc); i++) 
 +      ntot += ir->opts.nrdf[i];
 +    if (ntot != (DIM*natoms)) {
 +      fac = sqrt(ntot/(DIM*natoms));
 +      if (bVerbose)
 +	fprintf(stderr,"Scaling velocities by a factor of %.3f to account for constraints\n"
 +		"and removal of center of mass motion\n",fac);
 +      for(i=0; (i<natoms); i++)
 +	svmul(fac,v[i],v[i]);
 +    }
 +  }
 +  
 +  nuser=str_nelem(user1,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser1,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nuser=str_nelem(user2,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser2,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nuser=str_nelem(xtc_grps,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcXTC,
 +               restnm,egrptpONE,bVerbose,wi);
 +  nofg = str_nelem(orirefitgrp,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nofg,ptr1,grps,gnames,egcORFIT,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +
 +  /* QMMM input processing */
 +  nQMg          = str_nelem(QMMM,MAXPTR,ptr1);
 +  nQMmethod     = str_nelem(QMmethod,MAXPTR,ptr2);
 +  nQMbasis      = str_nelem(QMbasis,MAXPTR,ptr3);
 +  if((nQMmethod != nQMg)||(nQMbasis != nQMg)){
 +    gmx_fatal(FARGS,"Invalid QMMM input: %d groups %d basissets"
 +	      " and %d methods\n",nQMg,nQMbasis,nQMmethod);
 +  }
 +  /* group rest, if any, is always MM! */
 +  do_numbering(natoms,groups,nQMg,ptr1,grps,gnames,egcQMMM,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = nQMg; /*atoms->grps[egcQMMM].nr;*/
 +  ir->opts.ngQM = nQMg;
 +  snew(ir->opts.QMmethod,nr);
 +  snew(ir->opts.QMbasis,nr);
 +  for(i=0;i<nr;i++){
 +    /* input consists of strings: RHF CASSCF PM3 .. These need to be
 +     * converted to the corresponding enum in names.c
 +     */
 +    ir->opts.QMmethod[i] = search_QMstring(ptr2[i],eQMmethodNR,
 +                                           eQMmethod_names);
 +    ir->opts.QMbasis[i]  = search_QMstring(ptr3[i],eQMbasisNR,
 +                                           eQMbasis_names);
 +
 +  }
 +  nQMmult   = str_nelem(QMmult,MAXPTR,ptr1);
 +  nQMcharge = str_nelem(QMcharge,MAXPTR,ptr2);
 +  nbSH      = str_nelem(bSH,MAXPTR,ptr3);
 +  snew(ir->opts.QMmult,nr);
 +  snew(ir->opts.QMcharge,nr);
 +  snew(ir->opts.bSH,nr);
 +
 +  for(i=0;i<nr;i++){
 +    ir->opts.QMmult[i]   = strtol(ptr1[i],NULL,10);
 +    ir->opts.QMcharge[i] = strtol(ptr2[i],NULL,10);
 +    ir->opts.bSH[i]      = (gmx_strncasecmp(ptr3[i],"Y",1)==0);
 +  }
 +
 +  nCASelec  = str_nelem(CASelectrons,MAXPTR,ptr1);
 +  nCASorb   = str_nelem(CASorbitals,MAXPTR,ptr2);
 +  snew(ir->opts.CASelectrons,nr);
 +  snew(ir->opts.CASorbitals,nr);
 +  for(i=0;i<nr;i++){
 +    ir->opts.CASelectrons[i]= strtol(ptr1[i],NULL,10);
 +    ir->opts.CASorbitals[i] = strtol(ptr2[i],NULL,10);
 +  }
 +  /* special optimization options */
 +
 +  nbOPT = str_nelem(bOPT,MAXPTR,ptr1);
 +  nbTS = str_nelem(bTS,MAXPTR,ptr2);
 +  snew(ir->opts.bOPT,nr);
 +  snew(ir->opts.bTS,nr);
 +  for(i=0;i<nr;i++){
 +    ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i],"Y",1)==0);
 +    ir->opts.bTS[i]  = (gmx_strncasecmp(ptr2[i],"Y",1)==0);
 +  }
 +  nSAon     = str_nelem(SAon,MAXPTR,ptr1);
 +  nSAoff    = str_nelem(SAoff,MAXPTR,ptr2);
 +  nSAsteps  = str_nelem(SAsteps,MAXPTR,ptr3);
 +  snew(ir->opts.SAon,nr);
 +  snew(ir->opts.SAoff,nr);
 +  snew(ir->opts.SAsteps,nr);
 +
 +  for(i=0;i<nr;i++){
 +    ir->opts.SAon[i]    = strtod(ptr1[i],NULL);
 +    ir->opts.SAoff[i]   = strtod(ptr2[i],NULL);
 +    ir->opts.SAsteps[i] = strtol(ptr3[i],NULL,10);
 +  }
 +  /* end of QMMM input */
 +
 +  if (bVerbose)
 +    for(i=0; (i<egcNR); i++) {
 +      fprintf(stderr,"%-16s has %d element(s):",gtypes[i],groups->grps[i].nr); 
 +      for(j=0; (j<groups->grps[i].nr); j++)
 +	fprintf(stderr," %s",*(groups->grpname[groups->grps[i].nm_ind[j]]));
 +      fprintf(stderr,"\n");
 +    }
 +
 +  nr = groups->grps[egcENER].nr;
 +  snew(ir->opts.egp_flags,nr*nr);
 +
-   bExcl = do_egp_flag(ir,groups,"energygrp_excl",egpexcl,EGP_EXCL);
++  bExcl = do_egp_flag(ir,groups,"energygrp-excl",egpexcl,EGP_EXCL);
 +  if (bExcl && EEL_FULL(ir->coulombtype))
 +    warning(wi,"Can not exclude the lattice Coulomb energy between energy groups");
 +
-   bTable = do_egp_flag(ir,groups,"energygrp_table",egptable,EGP_TABLE);
++  bTable = do_egp_flag(ir,groups,"energygrp-table",egptable,EGP_TABLE);
 +  if (bTable && !(ir->vdwtype == evdwUSER) && 
 +      !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) &&
 +      !(ir->coulombtype == eelPMEUSERSWITCH))
 +    gmx_fatal(FARGS,"Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
 +
 +  decode_cos(efield_x,&(ir->ex[XX]),FALSE);
 +  decode_cos(efield_xt,&(ir->et[XX]),TRUE);
 +  decode_cos(efield_y,&(ir->ex[YY]),FALSE);
 +  decode_cos(efield_yt,&(ir->et[YY]),TRUE);
 +  decode_cos(efield_z,&(ir->ex[ZZ]),FALSE);
 +  decode_cos(efield_zt,&(ir->et[ZZ]),TRUE);
 +  
 +  for(i=0; (i<grps->nr); i++)
 +    sfree(gnames[i]);
 +  sfree(gnames);
 +  done_blocka(grps);
 +  sfree(grps);
 +
 +}
 +
 +
 +
 +static void check_disre(gmx_mtop_t *mtop)
 +{
 +  gmx_ffparams_t *ffparams;
 +  t_functype *functype;
 +  t_iparams  *ip;
 +  int i,ndouble,ftype;
 +  int label,old_label;
 +  
 +  if (gmx_mtop_ftype_count(mtop,F_DISRES) > 0) {
 +    ffparams  = &mtop->ffparams;
 +    functype  = ffparams->functype;
 +    ip        = ffparams->iparams;
 +    ndouble   = 0;
 +    old_label = -1;
 +    for(i=0; i<ffparams->ntypes; i++) {
 +      ftype = functype[i];
 +      if (ftype == F_DISRES) {
 +	label = ip[i].disres.label;
 +	if (label == old_label) {
 +	  fprintf(stderr,"Distance restraint index %d occurs twice\n",label);
 +	  ndouble++;
 +	}
 +	old_label = label;
 +      }
 +    }
 +    if (ndouble>0)
 +      gmx_fatal(FARGS,"Found %d double distance restraint indices,\n"
 +		"probably the parameters for multiple pairs in one restraint "
 +		"are not identical\n",ndouble);
 +  }
 +}
 +
- static gmx_bool absolute_reference(t_inputrec *ir,gmx_mtop_t *sys,ivec AbsRef)
++static gmx_bool absolute_reference(t_inputrec *ir,gmx_mtop_t *sys,
++                                   gmx_bool posres_only,
++                                   ivec AbsRef)
 +{
-   int d,g,i;
-   gmx_mtop_ilistloop_t iloop;
-   t_ilist *ilist;
-   int nmol;
-   t_iparams *pr;
- 
-   /* Check the COM */
-   for(d=0; d<DIM; d++) {
-     AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
-   }
-   /* Check for freeze groups */
-   for(g=0; g<ir->opts.ngfrz; g++) {
-     for(d=0; d<DIM; d++) {
-       if (ir->opts.nFreeze[g][d] != 0) {
- 	AbsRef[d] = 1;
-       }
++    int d,g,i;
++    gmx_mtop_ilistloop_t iloop;
++    t_ilist *ilist;
++    int nmol;
++    t_iparams *pr;
++
++    clear_ivec(AbsRef);
++
++    if (!posres_only)
++    {
++        /* Check the COM */
++        for(d=0; d<DIM; d++)
++        {
++            AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
++        }
++        /* Check for freeze groups */
++        for(g=0; g<ir->opts.ngfrz; g++)
++        {
++            for(d=0; d<DIM; d++)
++            {
++                if (ir->opts.nFreeze[g][d] != 0)
++                {
++                    AbsRef[d] = 1;
++                }
++            }
++        }
 +    }
-   }
-   /* Check for position restraints */
-   iloop = gmx_mtop_ilistloop_init(sys);
-   while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol)) {
-     if (nmol > 0) {
-       for(i=0; i<ilist[F_POSRES].nr; i+=2) {
- 	pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
- 	for(d=0; d<DIM; d++) {
- 	  if (pr->posres.fcA[d] != 0) {
- 	    AbsRef[d] = 1;
- 	  }
- 	}
-       }
++
++    /* Check for position restraints */
++    iloop = gmx_mtop_ilistloop_init(sys);
++    while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol))
++    {
++        if (nmol > 0 &&
++            (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0))
++        {
++            for(i=0; i<ilist[F_POSRES].nr; i+=2)
++            {
++                pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
++                for(d=0; d<DIM; d++)
++                {
++                    if (pr->posres.fcA[d] != 0)
++                    {
++                        AbsRef[d] = 1;
++                    }
++                }
++            }
++        }
 +    }
-   }
 +
-   return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
++    return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
 +}
 +
 +void triple_check(const char *mdparin,t_inputrec *ir,gmx_mtop_t *sys,
 +                  warninp_t wi)
 +{
 +  char err_buf[256];
 +  int  i,m,g,nmol,npct;
 +  gmx_bool bCharge,bAcc;
 +  real gdt_max,*mgrp,mt;
 +  rvec acc;
 +  gmx_mtop_atomloop_block_t aloopb;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom *atom;
 +  ivec AbsRef;
 +  char warn_buf[STRLEN];
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD &&
 +      ir->comm_mode == ecmNO &&
-       !(absolute_reference(ir,sys,AbsRef) || ir->nsteps <= 10)) {
++      !(absolute_reference(ir,sys,FALSE,AbsRef) || ir->nsteps <= 10)) {
 +    warning(wi,"You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass");
 +  }
-   
++
++    /* Check for pressure coupling with absolute position restraints */
++    if (ir->epc != epcNO && ir->refcoord_scaling == erscNO)
++    {
++        absolute_reference(ir,sys,TRUE,AbsRef);
++        {
++            for(m=0; m<DIM; m++)
++            {
++                if (AbsRef[m] && norm2(ir->compress[m]) > 0)
++                {
++                    warning(wi,"You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option.");
++                    break;
++                }
++            }
++        }
++    }
++
 +  bCharge = FALSE;
 +  aloopb = gmx_mtop_atomloop_block_init(sys);
 +  while (gmx_mtop_atomloop_block_next(aloopb,&atom,&nmol)) {
 +    if (atom->q != 0 || atom->qB != 0) {
 +      bCharge = TRUE;
 +    }
 +  }
 +  
 +  if (!bCharge) {
 +    if (EEL_FULL(ir->coulombtype)) {
 +      sprintf(err_buf,
 +	      "You are using full electrostatics treatment %s for a system without charges.\n"
 +	      "This costs a lot of performance for just processing zeros, consider using %s instead.\n",
 +	      EELTYPE(ir->coulombtype),EELTYPE(eelCUT));
 +      warning(wi,err_buf);
 +    }
 +  } else {
 +    if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent) {
 +      sprintf(err_buf,
 +	      "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
 +	      "You might want to consider using %s electrostatics.\n",
 +	      EELTYPE(eelPME));
 +      warning_note(wi,err_buf);
 +    }
 +  }
 +
 +  /* Generalized reaction field */  
 +  if (ir->opts.ngtc == 0) {
 +    sprintf(err_buf,"No temperature coupling while using coulombtype %s",
 +	    eel_names[eelGRF]);
 +    CHECK(ir->coulombtype == eelGRF);
 +  }
 +  else {
 +    sprintf(err_buf,"When using coulombtype = %s"
 +	    " ref_t for temperature coupling should be > 0",
 +	    eel_names[eelGRF]);
 +    CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0));
 +  }
 +    
 +  if (ir->eI == eiSD1) {
 +    gdt_max = 0;
 +    for(i=0; (i<ir->opts.ngtc); i++)
 +      gdt_max = max(gdt_max,ir->delta_t/ir->opts.tau_t[i]);
 +    if (0.5*gdt_max > 0.0015) {
-       sprintf(warn_buf,"The relative error with integrator %s is 0.5*delta_t/tau_t = %g, you might want to switch to integrator %s\n",
++      sprintf(warn_buf,"The relative error with integrator %s is 0.5*delta-t/tau-t = %g, you might want to switch to integrator %s\n",
 +	      ei_names[ir->eI],0.5*gdt_max,ei_names[eiSD2]);
 +      warning_note(wi,warn_buf);
 +    }
 +  }
 +
 +  bAcc = FALSE;
 +  for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
 +    for(m=0; (m<DIM); m++) {
 +      if (fabs(ir->opts.acc[i][m]) > 1e-6) {
 +	bAcc = TRUE;
 +      }
 +    }
 +  }
 +  if (bAcc) {
 +    clear_rvec(acc);
 +    snew(mgrp,sys->groups.grps[egcACC].nr);
 +    aloop = gmx_mtop_atomloop_all_init(sys);
 +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +      mgrp[ggrpnr(&sys->groups,egcACC,i)] += atom->m;
 +    }
 +    mt = 0.0;
 +    for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
 +      for(m=0; (m<DIM); m++)
 +	acc[m] += ir->opts.acc[i][m]*mgrp[i];
 +      mt += mgrp[i];
 +    }
 +    for(m=0; (m<DIM); m++) {
 +      if (fabs(acc[m]) > 1e-6) {
 +	const char *dim[DIM] = { "X", "Y", "Z" };
 +	fprintf(stderr,
 +		"Net Acceleration in %s direction, will %s be corrected\n",
 +		dim[m],ir->nstcomm != 0 ? "" : "not");
 +	if (ir->nstcomm != 0 && m < ndof_com(ir)) {
 +	  acc[m] /= mt;
 +	  for (i=0; (i<sys->groups.grps[egcACC].nr); i++)
 +	    ir->opts.acc[i][m] -= acc[m];
 +	}
 +      }
 +    }
 +    sfree(mgrp);
 +  }
 +
 +  if (ir->efep != efepNO && ir->sc_alpha != 0 &&
 +      !gmx_within_tol(sys->ffparams.reppow,12.0,10*GMX_DOUBLE_EPS)) {
 +    gmx_fatal(FARGS,"Soft-core interactions are only supported with VdW repulsion power 12");
 +  }
 +
 +  if (ir->ePull != epullNO) {
 +    if (ir->pull->grp[0].nat == 0) {
-       absolute_reference(ir,sys,AbsRef);
++        absolute_reference(ir,sys,FALSE,AbsRef);
 +      for(m=0; m<DIM; m++) {
 +	if (ir->pull->dim[m] && !AbsRef[m]) {
 +	  warning(wi,"You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
 +	  break;
 +	}
 +      }
 +    }
 +
 +    if (ir->pull->eGeom == epullgDIRPBC) {
 +      for(i=0; i<3; i++) {
 +	for(m=0; m<=i; m++) {
 +	  if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
 +	      ir->deform[i][m] != 0) {
 +	    for(g=1; g<ir->pull->ngrp; g++) {
 +	      if (ir->pull->grp[g].vec[m] != 0) {
 +		gmx_fatal(FARGS,"Can not have dynamic box while using pull geometry '%s' (dim %c)",EPULLGEOM(ir->pull->eGeom),'x'+m);
 +	      }
 +	    }
 +	  }
 +	}
 +      }
 +    }
 +  }
 +
 +  check_disre(sys);
 +}
 +
 +void double_check(t_inputrec *ir,matrix box,gmx_bool bConstr,warninp_t wi)
 +{
 +  real min_size;
 +  gmx_bool bTWIN;
 +  char warn_buf[STRLEN];
 +  const char *ptr;
 +  
 +  ptr = check_box(ir->ePBC,box);
 +  if (ptr) {
 +      warning_error(wi,ptr);
 +  }  
 +
 +  if (bConstr && ir->eConstrAlg == econtSHAKE) {
 +    if (ir->shake_tol <= 0.0) {
-       sprintf(warn_buf,"ERROR: shake_tol must be > 0 instead of %g\n",
++      sprintf(warn_buf,"ERROR: shake-tol must be > 0 instead of %g\n",
 +              ir->shake_tol);
 +      warning_error(wi,warn_buf);
 +    }
 +
 +    if (IR_TWINRANGE(*ir) && ir->nstlist > 1) {
 +      sprintf(warn_buf,"With twin-range cut-off's and SHAKE the virial and the pressure are incorrect.");
 +      if (ir->epc == epcNO) {
 +	warning(wi,warn_buf);
 +      } else {
 +          warning_error(wi,warn_buf);
 +      }
 +    }
 +  }
 +
 +  if( (ir->eConstrAlg == econtLINCS) && bConstr) {
 +    /* If we have Lincs constraints: */
 +    if(ir->eI==eiMD && ir->etc==etcNO &&
 +       ir->eConstrAlg==econtLINCS && ir->nLincsIter==1) {
 +      sprintf(warn_buf,"For energy conservation with LINCS, lincs_iter should be 2 or larger.\n");
 +      warning_note(wi,warn_buf);
 +    }
 +    
 +    if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder<8)) {
-       sprintf(warn_buf,"For accurate %s with LINCS constraints, lincs_order should be 8 or more.",ei_names[ir->eI]);
++      sprintf(warn_buf,"For accurate %s with LINCS constraints, lincs-order should be 8 or more.",ei_names[ir->eI]);
 +      warning_note(wi,warn_buf);
 +    }
 +    if (ir->epc==epcMTTK) {
 +        warning_error(wi,"MTTK not compatible with lincs -- use shake instead.");
 +    }
 +  }
 +
 +  if (ir->LincsWarnAngle > 90.0) {
 +    sprintf(warn_buf,"lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n");
 +    warning(wi,warn_buf);
 +    ir->LincsWarnAngle = 90.0;
 +  }
 +
 +  if (ir->ePBC != epbcNONE) {
 +    if (ir->nstlist == 0) {
 +      warning(wi,"With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash.");
 +    }
 +    bTWIN = (ir->rlistlong > ir->rlist);
 +    if (ir->ns_type == ensGRID) {
 +      if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC,box)) {
 +          sprintf(warn_buf,"ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n",
 +		bTWIN ? (ir->rcoulomb==ir->rlistlong ? "rcoulomb" : "rvdw"):"rlist");
 +          warning_error(wi,warn_buf);
 +      }
 +    } else {
 +      min_size = min(box[XX][XX],min(box[YY][YY],box[ZZ][ZZ]));
 +      if (2*ir->rlistlong >= min_size) {
 +          sprintf(warn_buf,"ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist.");
 +          warning_error(wi,warn_buf);
 +	if (TRICLINIC(box))
 +	  fprintf(stderr,"Grid search might allow larger cut-off's than simple search with triclinic boxes.");
 +      }
 +    }
 +  }
 +}
 +
 +void check_chargegroup_radii(const gmx_mtop_t *mtop,const t_inputrec *ir,
 +                             rvec *x,
 +                             warninp_t wi)
 +{
 +    real rvdw1,rvdw2,rcoul1,rcoul2;
 +    char warn_buf[STRLEN];
 +
 +    calc_chargegroup_radii(mtop,x,&rvdw1,&rvdw2,&rcoul1,&rcoul2);
 +
 +    if (rvdw1 > 0)
 +    {
 +        printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n",
 +               rvdw1,rvdw2);
 +    }
 +    if (rcoul1 > 0)
 +    {
 +        printf("Largest charge group radii for Coulomb:       %5.3f, %5.3f nm\n",
 +               rcoul1,rcoul2);
 +    }
 +
 +    if (ir->rlist > 0)
 +    {
 +        if (rvdw1  + rvdw2  > ir->rlist ||
 +            rcoul1 + rcoul2 > ir->rlist)
 +        {
 +            sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f)\n",max(rvdw1+rvdw2,rcoul1+rcoul2),ir->rlist);
 +            warning(wi,warn_buf);
 +        }
 +        else
 +        {
 +            /* Here we do not use the zero at cut-off macro,
 +             * since user defined interactions might purposely
 +             * not be zero at the cut-off.
 +             */
 +            if (EVDW_IS_ZERO_AT_CUTOFF(ir->vdwtype) &&
 +                rvdw1 + rvdw2 > ir->rlist - ir->rvdw)
 +            {
 +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f) - rvdw (%f)\n",
 +                        rvdw1+rvdw2,
 +                        ir->rlist,ir->rvdw);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi,warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi,warn_buf);
 +                }
 +            }
 +            if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype) &&
 +                rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb)
 +            {
 +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f)\n",
 +                        rcoul1+rcoul2,
 +                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
 +                        ir->rlistlong,ir->rcoulomb);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi,warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi,warn_buf);
 +                }
 +            }
 +        }
 +    }
 +}
diff --cc src/gromacs/mdlib/domdec.c
index ac1ec2ee31,0000000000..ec9775d4fc
mode 100644,000000..100644
--- a/src/gromacs/mdlib/domdec.c
+++ b/src/gromacs/mdlib/domdec.c
@@@ -1,8653 -1,0 +1,8656 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include <math.h>
 +#include <string.h>
 +#include <stdlib.h>
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "vec.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "nrnb.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "constr.h"
 +#include "mdatoms.h"
 +#include "names.h"
 +#include "pdbio.h"
 +#include "futil.h"
 +#include "force.h"
 +#include "pme.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "gmx_wallcycle.h"
 +#include "mdrun.h"
 +#include "nsgrid.h"
 +#include "shellfc.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "gmx_ga2la.h"
 +#include "gmx_sort.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREADS
 +#include "tmpi.h"
 +#endif
 +
 +#define DDRANK(dd,rank)    (rank)
 +#define DDMASTERRANK(dd)   (dd->masterrank)
 +
 +typedef struct gmx_domdec_master
 +{
 +    /* The cell boundaries */
 +    real **cell_x;
 +    /* The global charge group division */
 +    int  *ncg;     /* Number of home charge groups for each node */
 +    int  *index;   /* Index of nnodes+1 into cg */
 +    int  *cg;      /* Global charge group index */
 +    int  *nat;     /* Number of home atoms for each node. */
 +    int  *ibuf;    /* Buffer for communication */
 +    rvec *vbuf;    /* Buffer for state scattering and gathering */
 +} gmx_domdec_master_t;
 +
 +typedef struct
 +{
 +    /* The numbers of charge groups to send and receive for each cell
 +     * that requires communication, the last entry contains the total
 +     * number of atoms that needs to be communicated.
 +     */
 +    int nsend[DD_MAXIZONE+2];
 +    int nrecv[DD_MAXIZONE+2];
 +    /* The charge groups to send */
 +    int *index;
 +    int nalloc;
 +    /* The atom range for non-in-place communication */
 +    int cell2at0[DD_MAXIZONE];
 +    int cell2at1[DD_MAXIZONE];
 +} gmx_domdec_ind_t;
 +
 +typedef struct
 +{
 +    int  np;                   /* Number of grid pulses in this dimension */
 +    int  np_dlb;               /* For dlb, for use with edlbAUTO          */
 +    gmx_domdec_ind_t *ind;     /* The indices to communicate, size np     */
 +    int  np_nalloc;
 +    gmx_bool bInPlace;             /* Can we communicate in place?            */
 +} gmx_domdec_comm_dim_t;
 +
 +typedef struct
 +{
 +    gmx_bool *bCellMin;    /* Temp. var.: is this cell size at the limit     */
 +    real *cell_f;      /* State var.: cell boundaries, box relative      */
 +    real *old_cell_f;  /* Temp. var.: old cell size                      */
 +    real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
 +    real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
 +    real *bound_min;   /* Temp. var.: lower limit for cell boundary      */
 +    real *bound_max;   /* Temp. var.: upper limit for cell boundary      */
 +    gmx_bool bLimited;     /* State var.: is DLB limited in this dim and row */
 +    real *buf_ncd;     /* Temp. var.                                     */
 +} gmx_domdec_root_t;
 +
 +#define DD_NLOAD_MAX 9
 +
 +/* Here floats are accurate enough, since these variables
 + * only influence the load balancing, not the actual MD results.
 + */
 +typedef struct
 +{
 +    int  nload;
 +    float *load;
 +    float sum;
 +    float max;
 +    float sum_m;
 +    float cvol_min;
 +    float mdf;
 +    float pme;
 +    int   flags;
 +} gmx_domdec_load_t;
 +
 +typedef struct
 +{
 +    int  nsc;
 +    int  ind_gl;
 +    int  ind;
 +} gmx_cgsort_t;
 +
 +typedef struct
 +{
 +    gmx_cgsort_t *sort1,*sort2;
 +    int  sort_nalloc;
 +    gmx_cgsort_t *sort_new;
 +    int  sort_new_nalloc;
 +    int  *ibuf;
 +    int  ibuf_nalloc;
 +} gmx_domdec_sort_t;
 +
 +typedef struct
 +{
 +    rvec *v;
 +    int  nalloc;
 +} vec_rvec_t;
 +
 +/* This enum determines the order of the coordinates.
 + * ddnatHOME and ddnatZONE should be first and second,
 + * the others can be ordered as wanted.
 + */
 +enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
 +
 +enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
 +const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
 +
 +typedef struct
 +{
 +    int  dim;      /* The dimension                                          */
 +    gmx_bool dim_match;/* Tells if DD and PME dims match                         */
 +    int  nslab;    /* The number of PME slabs in this dimension              */
 +    real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB    */
 +    int  *pp_min;  /* The minimum pp node location, size nslab               */
 +    int  *pp_max;  /* The maximum pp node location,size nslab                */
 +    int  maxshift; /* The maximum shift for coordinate redistribution in PME */
 +} gmx_ddpme_t;
 +
 +typedef struct
 +{
 +    real min0;    /* The minimum bottom of this zone                        */
 +    real max1;    /* The maximum top of this zone                           */
 +    real mch0;    /* The maximum bottom communicaton height for this zone   */
 +    real mch1;    /* The maximum top communicaton height for this zone      */
 +    real p1_0;    /* The bottom value of the first cell in this zone        */
 +    real p1_1;    /* The top value of the first cell in this zone           */
 +} gmx_ddzone_t;
 +
 +typedef struct gmx_domdec_comm
 +{
 +    /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
 +     * unless stated otherwise.
 +     */
 +
 +    /* The number of decomposition dimensions for PME, 0: no PME */
 +    int  npmedecompdim;
 +    /* The number of nodes doing PME (PP/PME or only PME) */
 +    int  npmenodes;
 +    int  npmenodes_x;
 +    int  npmenodes_y;
 +    /* The communication setup including the PME only nodes */
 +    gmx_bool bCartesianPP_PME;
 +    ivec ntot;
 +    int  cartpmedim;
 +    int  *pmenodes;          /* size npmenodes                         */
 +    int  *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
 +                              * but with bCartesianPP_PME              */
 +    gmx_ddpme_t ddpme[2];
 +    
 +    /* The DD particle-particle nodes only */
 +    gmx_bool bCartesianPP;
 +    int  *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
 +    
 +    /* The global charge groups */
 +    t_block cgs_gl;
 +
 +    /* Should we sort the cgs */
 +    int  nstSortCG;
 +    gmx_domdec_sort_t *sort;
 +    
 +    /* Are there bonded and multi-body interactions between charge groups? */
 +    gmx_bool bInterCGBondeds;
 +    gmx_bool bInterCGMultiBody;
 +
 +    /* Data for the optional bonded interaction atom communication range */
 +    gmx_bool bBondComm;
 +    t_blocka *cglink;
 +    char *bLocalCG;
 +
 +    /* The DLB option */
 +    int  eDLB;
 +    /* Are we actually using DLB? */
 +    gmx_bool bDynLoadBal;
 +
 +    /* Cell sizes for static load balancing, first index cartesian */
 +    real **slb_frac;
 +    
 +    /* The width of the communicated boundaries */
 +    real cutoff_mbody;
 +    real cutoff;
 +    /* The minimum cell size (including triclinic correction) */
 +    rvec cellsize_min;
 +    /* For dlb, for use with edlbAUTO */
 +    rvec cellsize_min_dlb;
 +    /* The lower limit for the DD cell size with DLB */
 +    real cellsize_limit;
 +    /* Effectively no NB cut-off limit with DLB for systems without PBC? */
 +    gmx_bool bVacDLBNoLimit;
 +
 +    /* tric_dir is only stored here because dd_get_ns_ranges needs it */
 +    ivec tric_dir;
 +    /* box0 and box_size are required with dim's without pbc and -gcom */
 +    rvec box0;
 +    rvec box_size;
 +    
 +    /* The cell boundaries */
 +    rvec cell_x0;
 +    rvec cell_x1;
 +
 +    /* The old location of the cell boundaries, to check cg displacements */
 +    rvec old_cell_x0;
 +    rvec old_cell_x1;
 +
 +    /* The communication setup and charge group boundaries for the zones */
 +    gmx_domdec_zones_t zones;
 +    
 +    /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
 +     * cell boundaries of neighboring cells for dynamic load balancing.
 +     */
 +    gmx_ddzone_t zone_d1[2];
 +    gmx_ddzone_t zone_d2[2][2];
 +    
 +    /* The coordinate/force communication setup and indices */
 +    gmx_domdec_comm_dim_t cd[DIM];
 +    /* The maximum number of cells to communicate with in one dimension */
 +    int  maxpulse;
 +    
 +    /* Which cg distribution is stored on the master node */
 +    int master_cg_ddp_count;
 +    
 +    /* The number of cg's received from the direct neighbors */
 +    int  zone_ncg1[DD_MAXZONE];
 +    
 +    /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
 +    int  nat[ddnatNR];
 +    
 +    /* Communication buffer for general use */
 +    int  *buf_int;
 +    int  nalloc_int;
 +
 +     /* Communication buffer for general use */
 +    vec_rvec_t vbuf;
 +    
 +    /* Communication buffers only used with multiple grid pulses */
 +    int  *buf_int2;
 +    int  nalloc_int2;
 +    vec_rvec_t vbuf2;
 +    
 +    /* Communication buffers for local redistribution */
 +    int  **cggl_flag;
 +    int  cggl_flag_nalloc[DIM*2];
 +    rvec **cgcm_state;
 +    int  cgcm_state_nalloc[DIM*2];
 +    
 +    /* Cell sizes for dynamic load balancing */
 +    gmx_domdec_root_t **root;
 +    real *cell_f_row;
 +    real cell_f0[DIM];
 +    real cell_f1[DIM];
 +    real cell_f_max0[DIM];
 +    real cell_f_min1[DIM];
 +    
 +    /* Stuff for load communication */
 +    gmx_bool bRecordLoad;
 +    gmx_domdec_load_t *load;
 +#ifdef GMX_MPI
 +    MPI_Comm *mpi_comm_load;
 +#endif
 +
 +    /* Maximum DLB scaling per load balancing step in percent */
 +    int dlb_scale_lim;
 +
 +    /* Cycle counters */
 +    float cycl[ddCyclNr];
 +    int   cycl_n[ddCyclNr];
 +    float cycl_max[ddCyclNr];
 +    /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
 +    int eFlop;
 +    double flop;
 +    int    flop_n;
 +    /* Have often have did we have load measurements */
 +    int    n_load_have;
 +    /* Have often have we collected the load measurements */
 +    int    n_load_collect;
 +    
 +    /* Statistics */
 +    double sum_nat[ddnatNR-ddnatZONE];
 +    int    ndecomp;
 +    int    nload;
 +    double load_step;
 +    double load_sum;
 +    double load_max;
 +    ivec   load_lim;
 +    double load_mdf;
 +    double load_pme;
 +
 +    /* The last partition step */
 +    gmx_large_int_t globalcomm_step;
 +
 +    /* Debugging */
 +    int  nstDDDump;
 +    int  nstDDDumpGrid;
 +    int  DD_debug;
 +} gmx_domdec_comm_t;
 +
 +/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_CGIBS 2
 +
 +/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_FLAG_NRCG  65535
 +#define DD_FLAG_FW(d) (1<<(16+(d)*2))
 +#define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 +
 +/* Zone permutation required to obtain consecutive charge groups
 + * for neighbor searching.
 + */
 +static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
 +
 +/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
 + * components see only j zones with that component 0.
 + */
 +
 +/* The DD zone order */
 +static const ivec dd_zo[DD_MAXZONE] =
 +  {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
 +
 +/* The 3D setup */
 +#define dd_z3n  8
 +#define dd_zp3n 4
 +static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
 +
 +/* The 2D setup */
 +#define dd_z2n  4
 +#define dd_zp2n 2
 +static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
 +
 +/* The 1D setup */
 +#define dd_z1n  2
 +#define dd_zp1n 1
 +static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
 +
 +/* Factors used to avoid problems due to rounding issues */
 +#define DD_CELL_MARGIN       1.0001
 +#define DD_CELL_MARGIN2      1.00005
 +/* Factor to account for pressure scaling during nstlist steps */
 +#define DD_PRES_SCALE_MARGIN 1.02
 +
 +/* Allowed performance loss before we DLB or warn */
 +#define DD_PERF_LOSS 0.05
 +
 +#define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
 +
 +/* Use separate MPI send and receive commands
 + * when nnodes <= GMX_DD_NNODES_SENDRECV.
 + * This saves memory (and some copying for small nnodes).
 + * For high parallelization scatter and gather calls are used.
 + */
 +#define GMX_DD_NNODES_SENDRECV 4
 +
 +
 +/*
 +#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 +
 +static void index2xyz(ivec nc,int ind,ivec xyz)
 +{
 +  xyz[XX] = ind % nc[XX];
 +  xyz[YY] = (ind / nc[XX]) % nc[YY];
 +  xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 +}
 +*/
 +
 +/* This order is required to minimize the coordinate communication in PME
 + * which uses decomposition in the x direction.
 + */
 +#define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
 +
 +static void ddindex2xyz(ivec nc,int ind,ivec xyz)
 +{
 +    xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 +    xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 +    xyz[ZZ] = ind % nc[ZZ];
 +}
 +
 +static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
 +{
 +    int ddindex;
 +    int ddnodeid=-1;
 +    
 +    ddindex = dd_index(dd->nc,c);
 +    if (dd->comm->bCartesianPP_PME)
 +    {
 +        ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
 +    }
 +    else if (dd->comm->bCartesianPP)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddnodeid = ddindex;
 +    }
 +    
 +    return ddnodeid;
 +}
 +
 +static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
 +{
 +    return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
 +}
 +
 +int ddglatnr(gmx_domdec_t *dd,int i)
 +{
 +    int atnr;
 +    
 +    if (dd == NULL)
 +    {
 +        atnr = i + 1;
 +    }
 +    else
 +    {
 +        if (i >= dd->comm->nat[ddnatNR-1])
 +        {
 +            gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
 +        }
 +        atnr = dd->gatindex[i] + 1;
 +    }
 +    
 +    return atnr;
 +}
 +
 +t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->cgs_gl;
 +}
 +
 +static void vec_rvec_init(vec_rvec_t *v)
 +{
 +    v->nalloc = 0;
 +    v->v      = NULL;
 +}
 +
 +static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
 +{
 +    if (n > v->nalloc)
 +    {
 +        v->nalloc = over_alloc_dd(n);
 +        srenew(v->v,v->nalloc);
 +    }
 +}
 +
 +void dd_store_state(gmx_domdec_t *dd,t_state *state)
 +{
 +    int i;
 +    
 +    if (state->ddp_count != dd->ddp_count)
 +    {
 +        gmx_incons("The state does not the domain decomposition state");
 +    }
 +    
 +    state->ncg_gl = dd->ncg_home;
 +    if (state->ncg_gl > state->cg_gl_nalloc)
 +    {
 +        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
 +        srenew(state->cg_gl,state->cg_gl_nalloc);
 +    }
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        state->cg_gl[i] = dd->index_gl[i];
 +    }
 +    
 +    state->ddp_count_cg_gl = dd->ddp_count;
 +}
 +
 +gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->zones;
 +}
 +
 +void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
 +                      int *jcg0,int *jcg1,ivec shift0,ivec shift1)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int izone,d,dim;
 +
 +    zones = &dd->comm->zones;
 +
 +    izone = 0;
 +    while (icg >= zones->izone[izone].cg1)
 +    {
 +        izone++;
 +    }
 +    
 +    if (izone == 0)
 +    {
 +        *jcg0 = icg;
 +    }
 +    else if (izone < zones->nizone)
 +    {
 +        *jcg0 = zones->izone[izone].jcg0;
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
 +                  icg,izone,zones->nizone);
 +    }
 +        
 +    *jcg1 = zones->izone[izone].jcg1;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        shift0[dim] = zones->izone[izone].shift0[dim];
 +        shift1[dim] = zones->izone[izone].shift1[dim];
 +        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
 +        {
 +            /* A conservative approach, this can be optimized */
 +            shift0[dim] -= 1;
 +            shift1[dim] += 1;
 +        }
 +    }
 +}
 +
 +int dd_natoms_vsite(gmx_domdec_t *dd)
 +{
 +    return dd->comm->nat[ddnatVSITE];
 +}
 +
 +void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
 +{
 +    *at_start = dd->comm->nat[ddnatCON-1];
 +    *at_end   = dd->comm->nat[ddnatCON];
 +}
 +
 +void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec shift={0,0,0},*buf,*rbuf;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = comm->vbuf.v;
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (bPBC)
 +        {
 +            copy_rvec(box[dd->dim[d]],shift);
 +        }
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        copy_rvec(x[j],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* We need to shift the coordinates */
 +                        rvec_add(x[j],shift,buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Shift x */
 +                        buf[n][XX] = x[j][XX] + shift[XX];
 +                        /* Rotate y and z.
 +                         * This operation requires a special shift force
 +                         * treatment, which is performed in calc_vir.
 +                         */
 +                        buf[n][YY] = box[YY][YY] - x[j][YY];
 +                        buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 +                        n++;
 +                    }
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = x + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = comm->vbuf2.v;
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_rvec(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(rbuf[j],x[i]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec *buf,*sbuf;
 +    ivec vis;
 +    int  is;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = comm->vbuf.v;
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (fshift == NULL && !bScrew)
 +        {
 +            bPBC = FALSE;
 +        }
 +        /* Determine which shift vector we need */
 +        clear_ivec(vis);
 +        vis[dd->dim[d]] = 1;
 +        is = IVEC2IS(vis);
 +        
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = f + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = comm->vbuf2.v;
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(f[i],sbuf[j]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        n++;
 +                    }
 +                } 
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        /* Add this force to the shift force */
 +                        rvec_inc(fshift[is],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Rotate the force */
 +                        f[j][XX] += buf[n][XX];
 +                        f[j][YY] -= buf[n][YY];
 +                        f[j][ZZ] -= buf[n][ZZ];
 +                        if (fshift)
 +                        {
 +                            /* Add this force to the shift force */
 +                            rvec_inc(fshift[is],buf[n]);
 +                        }
 +                        n++;
 +                    }
 +                }
 +            }
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*rbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = &comm->vbuf.v[0][0];
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    buf[n] = v[j];
 +                    n++;
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = &comm->vbuf2.v[0][0];
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_real(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        v[i] = rbuf[j];
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*sbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = &comm->vbuf.v[0][0];
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = &comm->vbuf2.v[0][0];
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        sbuf[j] = v[i];
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_real(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    v[j] += buf[n];
 +                    n++;
 +                }
 +            } 
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
 +{
 +    fprintf(fp,"zone d0 %d d1 %d d2 %d  min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
 +            d,i,j,
 +            zone->min0,zone->max1,
 +            zone->mch0,zone->mch0,
 +            zone->p1_0,zone->p1_1);
 +}
 +
 +static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
 +                               int ddimind,int direction,
 +                               gmx_ddzone_t *buf_s,int n_s,
 +                               gmx_ddzone_t *buf_r,int n_r)
 +{
 +    rvec vbuf_s[5*2],vbuf_r[5*2];
 +    int i;
 +
 +    for(i=0; i<n_s; i++)
 +    {
 +        vbuf_s[i*2  ][0] = buf_s[i].min0;
 +        vbuf_s[i*2  ][1] = buf_s[i].max1;
 +        vbuf_s[i*2  ][2] = buf_s[i].mch0;
 +        vbuf_s[i*2+1][0] = buf_s[i].mch1;
 +        vbuf_s[i*2+1][1] = buf_s[i].p1_0;
 +        vbuf_s[i*2+1][2] = buf_s[i].p1_1;
 +    }
 +
 +    dd_sendrecv_rvec(dd, ddimind, direction,
 +                     vbuf_s, n_s*2,
 +                     vbuf_r, n_r*2);
 +
 +    for(i=0; i<n_r; i++)
 +    {
 +        buf_r[i].min0 = vbuf_r[i*2  ][0];
 +        buf_r[i].max1 = vbuf_r[i*2  ][1];
 +        buf_r[i].mch0 = vbuf_r[i*2  ][2];
 +        buf_r[i].mch1 = vbuf_r[i*2+1][0];
 +        buf_r[i].p1_0 = vbuf_r[i*2+1][1];
 +        buf_r[i].p1_1 = vbuf_r[i*2+1][2];
 +    }
 +}
 +
 +static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                          rvec cell_ns_x0,rvec cell_ns_x1)
 +{
 +    int  d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
 +    gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
 +    rvec extr_s[2],extr_r[2];
 +    rvec dh;
 +    real dist_d,c=0,det;
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bPBC,bUse;
 +
 +    comm = dd->comm;
 +
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
 +        zp->min0 = cell_ns_x0[dim];
 +        zp->max1 = cell_ns_x1[dim];
 +        zp->mch0 = cell_ns_x0[dim];
 +        zp->mch1 = cell_ns_x1[dim];
 +        zp->p1_0 = cell_ns_x0[dim];
 +        zp->p1_1 = cell_ns_x1[dim];
 +    }
 +    
 +    for(d=dd->ndim-2; d>=0; d--)
 +    {
 +        dim  = dd->dim[d];
 +        bPBC = (dim < ddbox->npbcdim);
 +
 +        /* Use an rvec to store two reals */
 +        extr_s[d][0] = comm->cell_f0[d+1];
 +        extr_s[d][1] = comm->cell_f1[d+1];
 +        extr_s[d][2] = 0;
 +
 +        pos = 0;
 +        /* Store the extremes in the backward sending buffer,
 +         * so the get updated separately from the forward communication.
 +         */
 +        for(d1=d; d1<dd->ndim-1; d1++)
 +        {
 +            /* We invert the order to be able to use the same loop for buf_e */
 +            buf_s[pos].min0 = extr_s[d1][1];
 +            buf_s[pos].max1 = extr_s[d1][0];
 +            buf_s[pos].mch0 = 0;
 +            buf_s[pos].mch1 = 0;
 +            /* Store the cell corner of the dimension we communicate along */
 +            buf_s[pos].p1_0 = comm->cell_x0[dim];
 +            buf_s[pos].p1_1 = 0;
 +            pos++;
 +        }
 +
 +        buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
 +        pos++;
 +
 +        if (dd->ndim == 3 && d == 0)
 +        {
 +            buf_s[pos] = comm->zone_d2[0][1];
 +            pos++;
 +            buf_s[pos] = comm->zone_d1[0];
 +            pos++;
 +        }
 +
 +        /* We only need to communicate the extremes
 +         * in the forward direction
 +         */
 +        npulse = comm->cd[d].np;
 +        if (bPBC)
 +        {
 +            /* Take the minimum to avoid double communication */
 +            npulse_min = min(npulse,dd->nc[dim]-1-npulse);
 +        }
 +        else
 +        {
 +            /* Without PBC we should really not communicate over
 +             * the boundaries, but implementing that complicates
 +             * the communication setup and therefore we simply
 +             * do all communication, but ignore some data.
 +             */
 +            npulse_min = npulse;
 +        }
 +        for(p=0; p<npulse_min; p++)
 +        {
 +            /* Communicate the extremes forward */
 +            bUse = (bPBC || dd->ci[dim] > 0);
 +
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             extr_s+d, dd->ndim-d-1,
 +                             extr_r+d, dd->ndim-d-1);
 +
 +            if (bUse)
 +            {
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
 +                    extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
 +                }
 +            }
 +        }
 +
 +        buf_size = pos;
 +        for(p=0; p<npulse; p++)
 +        {
 +            /* Communicate all the zone information backward */
 +            bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
 +
 +            dd_sendrecv_ddzone(dd, d, dddirBackward,
 +                               buf_s, buf_size,
 +                               buf_r, buf_size);
 +
 +            clear_rvec(dh);
 +            if (p > 0)
 +            {
 +                for(d1=d+1; d1<dd->ndim; d1++)
 +                {
 +                    /* Determine the decrease of maximum required
 +                     * communication height along d1 due to the distance along d,
 +                     * this avoids a lot of useless atom communication.
 +                     */
 +                    dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
 +
 +                    if (ddbox->tric_dir[dim])
 +                    {
 +                        /* c is the off-diagonal coupling between the cell planes
 +                         * along directions d and d1.
 +                         */
 +                        c = ddbox->v[dim][dd->dim[d1]][dim];
 +                    }
 +                    else
 +                    {
 +                        c = 0;
 +                    }
 +                    det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
 +                    if (det > 0)
 +                    {
 +                        dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
 +                    }
 +                    else
 +                    {
 +                        /* A negative value signals out of range */
 +                        dh[d1] = -1;
 +                    }
 +                }
 +            }
 +
 +            /* Accumulate the extremes over all pulses */
 +            for(i=0; i<buf_size; i++)
 +            {
 +                if (p == 0)
 +                {
 +                    buf_e[i] = buf_r[i];
 +                }
 +                else
 +                {
 +                    if (bUse)
 +                    {
 +                        buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
 +                        buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
 +                    }
 +
 +                    if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
 +                    {
 +                        d1 = 1;
 +                    }
 +                    else
 +                    {
 +                        d1 = d + 1;
 +                    }
 +                    if (bUse && dh[d1] >= 0)
 +                    {
 +                        buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
 +                        buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
 +                    }
 +                }
 +                /* Copy the received buffer to the send buffer,
 +                 * to pass the data through with the next pulse.
 +                 */
 +                buf_s[i] = buf_r[i];
 +            }
 +            if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
 +                (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
 +            {
 +                /* Store the extremes */ 
 +                pos = 0;
 +
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
 +                    extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
 +                    pos++;
 +                }
 +
 +                if (d == 1 || (d == 0 && dd->ndim == 3))
 +                {
 +                    for(i=d; i<2; i++)
 +                    {
 +                        comm->zone_d2[1-d][i] = buf_e[pos];
 +                        pos++;
 +                    }
 +                }
 +                if (d == 0)
 +                {
 +                    comm->zone_d1[1] = buf_e[pos];
 +                    pos++;
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->ndim >= 2)
 +    {
 +        dim = dd->dim[1];
 +        for(i=0; i<2; i++)
 +        {
 +            if (debug)
 +            {
 +                print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
 +            }
 +            cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
 +            cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        dim = dd->dim[2];
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0; j<2; j++)
 +            {
 +                if (debug)
 +                {
 +                    print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
 +                }
 +                cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
 +                cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
 +            }
 +        }
 +    }
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        comm->cell_f_max0[d] = extr_s[d-1][0];
 +        comm->cell_f_min1[d] = extr_s[d-1][1];
 +        if (debug)
 +        {
 +            fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
 +                    d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
 +        }
 +    }
 +}
 +
 +static void dd_collect_cg(gmx_domdec_t *dd,
 +                          t_state *state_local)
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
 +    t_block *cgs_gl;
 +
 +    if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
 +    {
 +        /* The master has the correct distribution */
 +        return;
 +    }
 +    
 +    if (state_local->ddp_count == dd->ddp_count)
 +    {
 +        ncg_home = dd->ncg_home;
 +        cg       = dd->index_gl;
 +        nat_home = dd->nat_home;
 +    } 
 +    else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        ncg_home = state_local->ncg_gl;
 +        cg       = state_local->cg_gl;
 +        nat_home = 0;
 +        for(i=0; i<ncg_home; i++)
 +        {
 +            nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
 +        }
 +    }
 +    else
 +    {
 +        gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
 +    }
 +    
 +    buf2[0] = dd->ncg_home;
 +    buf2[1] = dd->nat_home;
 +    if (DDMASTER(dd))
 +    {
 +        ma = dd->ma;
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    /* Collect the charge group and atom counts on the master */
 +    dd_gather(dd,2*sizeof(int),buf2,ibuf);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma->index[0] = 0;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ncg[i] = ma->ibuf[2*i];
 +            ma->nat[i] = ma->ibuf[2*i+1];
 +            ma->index[i+1] = ma->index[i] + ma->ncg[i];
 +            
 +        }
 +        /* Make byte counts and indices */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"Initial charge group distribution: ");
 +            for(i=0; i<dd->nnodes; i++)
 +                fprintf(debug," %d",ma->ncg[i]);
 +            fprintf(debug,"\n");
 +        }
 +    }
 +    
 +    /* Collect the charge group indices on the master */
 +    dd_gatherv(dd,
 +               dd->ncg_home*sizeof(int),dd->index_gl,
 +               DDMASTER(dd) ? ma->ibuf : NULL,
 +               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +               DDMASTER(dd) ? ma->cg : NULL);
 +    
 +    dd->comm->master_cg_ddp_count = state_local->ddp_count;
 +}
 +
 +static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
 +                                    rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +
 +    ma = dd->ma;
 +    
 +    if (!DDMASTER(dd))
 +    {
 +#ifdef GMX_MPI
 +        MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 dd->rank,dd->mpi_comm_all);
 +#endif
 +    } else {
 +        /* Copy the master coordinates to the global array */
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(lv[a++],v[c]);
 +            }
 +        }
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +#ifdef GMX_MPI
 +                MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
 +                         n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(buf[a++],v[c]);
 +                    }
 +                }
 +            }
 +        }
 +        sfree(buf);
 +    }
 +}
 +
 +static void get_commbuffer_counts(gmx_domdec_t *dd,
 +                                  int **counts,int **disps)
 +{
 +    gmx_domdec_master_t *ma;
 +    int n;
 +
 +    ma = dd->ma;
 +    
 +    /* Make the rvec count and displacment arrays */
 +    *counts  = ma->ibuf;
 +    *disps   = ma->ibuf + dd->nnodes;
 +    for(n=0; n<dd->nnodes; n++)
 +    {
 +        (*counts)[n] = ma->nat[n]*sizeof(rvec);
 +        (*disps)[n]  = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
 +    }
 +}
 +
 +static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
 +                                   rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *rcounts=NULL,*disps=NULL;
 +    int  n,i,c,a;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +    
 +    ma = dd->ma;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        get_commbuffer_counts(dd,&rcounts,&disps);
 +
 +        buf = ma->vbuf;
 +    }
 +    
 +    dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
 +
 +    if (DDMASTER(dd))
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(buf[a++],v[c]);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +void dd_collect_vec(gmx_domdec_t *dd,
 +                    t_state *state_local,rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    dd_collect_cg(dd,state_local);
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_collect_vec_sendrecv(dd,lv,v);
 +    }
 +    else
 +    {
 +        dd_collect_vec_gatherv(dd,lv,v);
 +    }
 +}
 +
 +
 +void dd_collect_state(gmx_domdec_t *dd,
 +                      t_state *state_local,t_state *state)
 +{
 +    int est,i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        state->lambda = state_local->lambda;
 +        state->veta = state_local->veta;
 +        state->vol0 = state_local->vol0;
 +        copy_mat(state_local->box,state->box);
 +        copy_mat(state_local->boxv,state->boxv);
 +        copy_mat(state_local->svir_prev,state->svir_prev);
 +        copy_mat(state_local->fvir_prev,state->fvir_prev);
 +        copy_mat(state_local->pres_prev,state->pres_prev);
 +
 +
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
 +                state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
 +            }
 +            state->therm_integral[i] = state_local->therm_integral[i];            
 +        }
 +        for(i=0; i<state_local->nnhpres; i++) 
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
 +                state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    for(est=0; est<estNR; est++)
 +    {
-         if (EST_DISTR(est) && state_local->flags & (1<<est))
++        if (EST_DISTR(est) && (state_local->flags & (1<<est)))
 +        {
 +            switch (est) {
 +            case estX:
 +                dd_collect_vec(dd,state_local,state_local->x,state->x);
 +                break;
 +            case estV:
 +                dd_collect_vec(dd,state_local,state_local->v,state->v);
 +                break;
 +            case estSDX:
 +                dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
 +                break;
 +            case estCGP:
 +                dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    if (DDMASTER(dd))
 +                    {
 +                        for(i=0; i<state_local->nrng; i++)
 +                        {
 +                            state->ld_rng[i] = state_local->ld_rng[i];
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
 +                              state_local->ld_rng,state->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                   if (DDMASTER(dd))
 +                    {
 +                        state->ld_rngi[0] = state_local->ld_rngi[0];
 +                    } 
 +                }
 +                else
 +                {
 +                    dd_gather(dd,sizeof(state->ld_rngi[0]),
 +                              state_local->ld_rngi,state->ld_rngi);
 +                }
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_collect_state");
 +            }
 +        }
 +    }
 +}
 +
 +static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
 +{
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +    fr->cg_nalloc = over_alloc_dd(nalloc);
 +    srenew(fr->cg_cm,fr->cg_nalloc);
 +    srenew(fr->cginfo,fr->cg_nalloc);
 +}
 +
 +static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
 +{
 +    int est;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +
 +    state->nalloc = over_alloc_dd(nalloc);
 +    
 +    for(est=0; est<estNR; est++)
 +    {
-         if (EST_DISTR(est) && state->flags & (1<<est))
++        if (EST_DISTR(est) && (state->flags & (1<<est)))
 +        {
 +            switch(est) {
 +            case estX:
 +                srenew(state->x,state->nalloc);
 +                break;
 +            case estV:
 +                srenew(state->v,state->nalloc);
 +                break;
 +            case estSDX:
 +                srenew(state->sd_X,state->nalloc);
 +                break;
 +            case estCGP:
 +                srenew(state->cg_p,state->nalloc);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No reallocation required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_realloc_state");            
 +            }
 +        }
 +    }
 +    
 +    if (f != NULL)
 +    {
 +        srenew(*f,state->nalloc);
 +    }
 +}
 +
 +static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +                /* Use lv as a temporary buffer */
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(v[c],buf[a++]);
 +                    }
 +                }
 +                if (a != ma->nat[n])
 +                {
 +                    gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
 +                              a,ma->nat[n]);
 +                }
 +                
 +#ifdef GMX_MPI
 +                MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
 +                         DDRANK(dd,n),n,dd->mpi_comm_all);
 +#endif
 +            }
 +        }
 +        sfree(buf);
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(v[c],lv[a++]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +#ifdef GMX_MPI
 +        MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +    }
 +}
 +
 +static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *scounts=NULL,*disps=NULL;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +     
 +        get_commbuffer_counts(dd,&scounts,&disps);
 +
 +        buf = ma->vbuf;
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(v[c],buf[a++]);
 +                }
 +            }
 +        }
 +    }
 +
 +    dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
 +}
 +
 +static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
 +{
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_distribute_vec_sendrecv(dd,cgs,v,lv);
 +    }
 +    else
 +    {
 +        dd_distribute_vec_scatterv(dd,cgs,v,lv);
 +    }
 +}
 +
 +static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
 +                                t_state *state,t_state *state_local,
 +                                rvec **f)
 +{
 +    int  i,j,ngtch,ngtcp,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        state_local->lambda = state->lambda;
 +        state_local->veta   = state->veta;
 +        state_local->vol0   = state->vol0;
 +        copy_mat(state->box,state_local->box);
 +        copy_mat(state->box_rel,state_local->box_rel);
 +        copy_mat(state->boxv,state_local->boxv);
 +        copy_mat(state->svir_prev,state_local->svir_prev);
 +        copy_mat(state->fvir_prev,state_local->fvir_prev);
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
 +                state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
 +            }
 +            state_local->therm_integral[i] = state->therm_integral[i];
 +        }
 +        for(i=0; i<state_local->nnhpres; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
 +                state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    dd_bcast(dd,sizeof(real),&state_local->lambda);
 +    dd_bcast(dd,sizeof(real),&state_local->veta);
 +    dd_bcast(dd,sizeof(real),&state_local->vol0);
 +    dd_bcast(dd,sizeof(state_local->box),state_local->box);
 +    dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
 +    dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
 +    dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
 +    dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
 +    dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
 +
 +    if (dd->nat_home > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,dd->nat_home);
 +    }
 +    for(i=0; i<estNR; i++)
 +    {
-         if (EST_DISTR(i) && state_local->flags & (1<<i))
++        if (EST_DISTR(i) && (state_local->flags & (1<<i)))
 +        {
 +            switch (i) {
 +            case estX:
 +                dd_distribute_vec(dd,cgs,state->x,state_local->x);
 +                break;
 +            case estV:
 +                dd_distribute_vec(dd,cgs,state->v,state_local->v);
 +                break;
 +            case estSDX:
 +                dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
 +                break;
 +            case estCGP:
 +                dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,
 +                              state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                              state->ld_rng,state_local->ld_rng);
 +                }
 +                else
 +                {
 +                    dd_scatter(dd,
 +                               state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                               state->ld_rng,state_local->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
 +                              state->ld_rngi,state_local->ld_rngi);
 +                }
 +                else
 +                {
 +                     dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
 +                               state->ld_rngi,state_local->ld_rngi);
 +                }   
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* Not implemented yet */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_distribute_state");
 +            }
 +        }
 +    }
 +}
 +
 +static char dim2char(int dim)
 +{
 +    char c='?';
 +    
 +    switch (dim)
 +    {
 +    case XX: c = 'X'; break;
 +    case YY: c = 'Y'; break;
 +    case ZZ: c = 'Z'; break;
 +    default: gmx_fatal(FARGS,"Unknown dim %d",dim);
 +    }
 +    
 +    return c;
 +}
 +
 +static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
 +{
 +    rvec grid_s[2],*grid_r=NULL,cx,r;
 +    char fname[STRLEN],format[STRLEN],buf[22];
 +    FILE *out;
 +    int  a,i,d,z,y,x;
 +    matrix tric;
 +    real vol;
 +
 +    copy_rvec(dd->comm->cell_x0,grid_s[0]);
 +    copy_rvec(dd->comm->cell_x1,grid_s[1]);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        snew(grid_r,2*dd->nnodes);
 +    }
 +    
 +    dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            for(i=0; i<DIM; i++)
 +            {
 +                if (d == i)
 +                {
 +                    tric[d][i] = 1;
 +                }
 +                else
 +                {
 +                    if (dd->nc[d] > 1 && d < ddbox->npbcdim)
 +                    {
 +                        tric[d][i] = box[i][d]/box[i][i];
 +                    }
 +                    else
 +                    {
 +                        tric[d][i] = 0;
 +                    }
 +                }
 +            }
 +        }
 +        sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
 +        sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
 +        out = gmx_fio_fopen(fname,"w");
 +        gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +        a = 1;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 +            for(d=0; d<DIM; d++)
 +            {
 +                vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
 +            }
 +            for(z=0; z<2; z++)
 +            {
 +                for(y=0; y<2; y++)
 +                {
 +                    for(x=0; x<2; x++)
 +                    {
 +                        cx[XX] = grid_r[i*2+x][XX];
 +                        cx[YY] = grid_r[i*2+y][YY];
 +                        cx[ZZ] = grid_r[i*2+z][ZZ];
 +                        mvmul(tric,cx,r);
 +                        fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
 +                                10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
 +                    }
 +                }
 +            }
 +            for(d=0; d<DIM; d++)
 +            {
 +                for(x=0; x<4; x++)
 +                {
 +                    switch(d)
 +                    {
 +                    case 0: y = 1 + i*8 + 2*x; break;
 +                    case 1: y = 1 + i*8 + 2*x - (x % 2); break;
 +                    case 2: y = 1 + i*8 + x; break;
 +                    }
 +                    fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
 +                }
 +            }
 +        }
 +        gmx_fio_fclose(out);
 +        sfree(grid_r);
 +    }
 +}
 +
 +void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
 +                  gmx_mtop_t *mtop,t_commrec *cr,
 +                  int natoms,rvec x[],matrix box)
 +{
 +    char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
 +    FILE *out;
 +    int  i,ii,resnr,c;
 +    char *atomname,*resname;
 +    real b;
 +    gmx_domdec_t *dd;
 +    
 +    dd = cr->dd;
 +    if (natoms == -1)
 +    {
 +        natoms = dd->comm->nat[ddnatVSITE];
 +    }
 +    
 +    sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
 +    
 +    sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
 +    sprintf(format4,"%s%s\n",pdbformat4,"%6.2f%6.2f");
 +    
 +    out = gmx_fio_fopen(fname,"w");
 +    
 +    fprintf(out,"TITLE     %s\n",title);
 +    gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +    for(i=0; i<natoms; i++)
 +    {
 +        ii = dd->gatindex[i];
 +        gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
 +        if (i < dd->comm->nat[ddnatZONE])
 +        {
 +            c = 0;
 +            while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
 +            {
 +                c++;
 +            }
 +            b = c;
 +        }
 +        else if (i < dd->comm->nat[ddnatVSITE])
 +        {
 +            b = dd->comm->zones.n;
 +        }
 +        else
 +        {
 +            b = dd->comm->zones.n + 1;
 +        }
 +        fprintf(out,strlen(atomname)<4 ? format : format4,
 +                "ATOM",(ii+1)%100000,
 +                atomname,resname,' ',resnr%10000,' ',
 +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
 +    }
 +    fprintf(out,"TER\n");
 +    
 +    gmx_fio_fclose(out);
 +}
 +
 +real dd_cutoff_mbody(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  di;
 +    real r;
 +
 +    comm = dd->comm;
 +
 +    r = -1;
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm->cutoff_mbody > 0)
 +        {
 +            r = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            /* cutoff_mbody=0 means we do not have DLB */
 +            r = comm->cellsize_min[dd->dim[0]];
 +            for(di=1; di<dd->ndim; di++)
 +            {
 +                r = min(r,comm->cellsize_min[dd->dim[di]]);
 +            }
 +            if (comm->bBondComm)
 +            {
 +                r = max(r,comm->cutoff_mbody);
 +            }
 +            else
 +            {
 +                r = min(r,comm->cutoff);
 +            }
 +        }
 +    }
 +
 +    return r;
 +}
 +
 +real dd_cutoff_twobody(gmx_domdec_t *dd)
 +{
 +    real r_mb;
 +
 +    r_mb = dd_cutoff_mbody(dd);
 +
 +    return max(dd->comm->cutoff,r_mb);
 +}
 +
 +
 +static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
 +{
 +    int nc,ntot;
 +    
 +    nc   = dd->nc[dd->comm->cartpmedim];
 +    ntot = dd->comm->ntot[dd->comm->cartpmedim];
 +    copy_ivec(coord,coord_pme);
 +    coord_pme[dd->comm->cartpmedim] =
 +        nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 +}
 +
 +static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
 +{
 +    /* Here we assign a PME node to communicate with this DD node
 +     * by assuming that the major index of both is x.
 +     * We add cr->npmenodes/2 to obtain an even distribution.
 +     */
 +    return (ddindex*npme + npme/2)/ndd;
 +}
 +
 +static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
 +{
 +    return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
 +}
 +
 +static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
 +{
 +    return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
 +}
 +
 +static int *dd_pmenodes(t_commrec *cr)
 +{
 +    int *pmenodes;
 +    int n,i,p0,p1;
 +    
 +    snew(pmenodes,cr->npmenodes);
 +    n = 0;
 +    for(i=0; i<cr->dd->nnodes; i++) {
 +        p0 = cr_ddindex2pmeindex(cr,i);
 +        p1 = cr_ddindex2pmeindex(cr,i+1);
 +        if (i+1 == cr->dd->nnodes || p1 > p0) {
 +            if (debug)
 +                fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
 +            pmenodes[n] = i + 1 + n;
 +            n++;
 +        }
 +    }
 +
 +    return pmenodes;
 +}
 +
 +static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_t *dd;
 +    ivec coords,coords_pme,nc;
 +    int  slab;
 +    
 +    dd = cr->dd;
 +    /*
 +      if (dd->comm->bCartesian) {
 +      gmx_ddindex2xyz(dd->nc,ddindex,coords);
 +      dd_coords2pmecoords(dd,coords,coords_pme);
 +      copy_ivec(dd->ntot,nc);
 +      nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 +      coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 +      
 +      slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 +      } else {
 +      slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 +      }
 +    */
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
 +    
 +    return slab;
 +}
 +
 +static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_comm_t *comm;
 +    ivec coords;
 +    int  ddindex,nodeid=-1;
 +    
 +    comm = cr->dd->comm;
 +    
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddindex = dd_index(cr->dd->nc,coords);
 +        if (comm->bCartesianPP)
 +        {
 +            nodeid = comm->ddindex2simnodeid[ddindex];
 +        }
 +        else
 +        {
 +            if (comm->pmenodes)
 +            {
 +                nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
 +            }
 +            else
 +            {
 +                nodeid = ddindex;
 +            }
 +        }
 +    }
 +  
 +    return nodeid;
 +}
 +
 +static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    ivec coord,coord_pme;
 +    int  i;
 +    int  pmenode=-1;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    /* This assumes a uniform x domain decomposition grid cell size */
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
 +        if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            /* This is a PP node */
 +            dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +            MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
 +        }
 +#endif
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (sim_nodeid < dd->nnodes)
 +        {
 +            pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +        }
 +    }
 +    else
 +    {
 +        /* This assumes DD cells with identical x coordinates
 +         * are numbered sequentially.
 +         */
 +        if (dd->comm->pmenodes == NULL)
 +        {
 +            if (sim_nodeid < dd->nnodes)
 +            {
 +                /* The DD index equals the nodeid */
 +                pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +            }
 +        }
 +        else
 +        {
 +            i = 0;
 +            while (sim_nodeid > dd->comm->pmenodes[i])
 +            {
 +                i++;
 +            }
 +            if (sim_nodeid < dd->comm->pmenodes[i])
 +            {
 +                pmenode = dd->comm->pmenodes[i];
 +            }
 +        }
 +    }
 +    
 +    return pmenode;
 +}
 +
 +gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_bool bPMEOnlyNode;
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
 +    }
 +    else
 +    {
 +        bPMEOnlyNode = FALSE;
 +    }
 +    
 +    return bPMEOnlyNode;
 +}
 +
 +void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
 +                     int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
 +{
 +    gmx_domdec_t *dd;
 +    int x,y,z;
 +    ivec coord,coord_pme;
 +    
 +    dd = cr->dd;
 +    
 +    snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
 +    
 +    *nmy_ddnodes = 0;
 +    for(x=0; x<dd->nc[XX]; x++)
 +    {
 +        for(y=0; y<dd->nc[YY]; y++)
 +        {
 +            for(z=0; z<dd->nc[ZZ]; z++)
 +            {
 +                if (dd->comm->bCartesianPP_PME)
 +                {
 +                    coord[XX] = x;
 +                    coord[YY] = y;
 +                    coord[ZZ] = z;
 +                    dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +                    if (dd->ci[XX] == coord_pme[XX] &&
 +                        dd->ci[YY] == coord_pme[YY] &&
 +                        dd->ci[ZZ] == coord_pme[ZZ])
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                }
 +                else
 +                {
 +                    /* The slab corresponds to the nodeid in the PME group */
 +                    if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
 +                    {
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* The last PP-only node is the peer node */
 +    *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Receive coordinates from PP nodes:");
 +        for(x=0; x<*nmy_ddnodes; x++)
 +        {
 +            fprintf(debug," %d",(*my_ddnodes)[x]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static gmx_bool receive_vir_ener(t_commrec *cr)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  pmenode,coords[DIM],rank;
 +    gmx_bool bReceive;
 +    
 +    bReceive = TRUE;
 +    if (cr->npmenodes < cr->dd->nnodes)
 +    {
 +        comm = cr->dd->comm;
 +        if (comm->bCartesianPP_PME)
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +#ifdef GMX_MPI
 +            MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
 +            coords[comm->cartpmedim]++;
 +            if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
 +            {
 +                MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
 +                if (dd_simnode2pmenode(cr,rank) == pmenode)
 +                {
 +                    /* This is not the last PP node for pmenode */
 +                    bReceive = FALSE;
 +                }
 +            }
 +#endif  
 +        }
 +        else
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +            if (cr->sim_nodeid+1 < cr->nnodes &&
 +                dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
 +            {
 +                /* This is not the last PP node for pmenode */
 +                bReceive = FALSE;
 +            }
 +        }
 +    }
 +    
 +    return bReceive;
 +}
 +
 +static void set_zones_ncg_home(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int i;
 +
 +    zones = &dd->comm->zones;
 +
 +    zones->cg_range[0] = 0;
 +    for(i=1; i<zones->n+1; i++)
 +    {
 +        zones->cg_range[i] = dd->ncg_home;
 +    }
 +}
 +
 +static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
 +{
 +    int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
 +    
 +    ind = state->cg_gl;
 +    dd_cg_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    nat = 0;
 +    cgindex[0] = nat;
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        cgindex[i] = nat;
 +        cg_gl = ind[i];
 +        dd_cg_gl[i] = cg_gl;
 +        nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
 +    }
 +    cgindex[i] = nat;
 +    
 +    dd->ncg_home = state->ncg_gl;
 +    dd->nat_home = nat;
 +
 +    set_zones_ncg_home(dd);
 +}
 +
 +static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
 +{
 +    while (cg >= cginfo_mb->cg_end)
 +    {
 +        cginfo_mb++;
 +    }
 +
 +    return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
 +}
 +
 +static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
 +                          t_forcerec *fr,char *bLocalCG)
 +{
 +    cginfo_mb_t *cginfo_mb;
 +    int *cginfo;
 +    int cg;
 +
 +    if (fr != NULL)
 +    {
 +        cginfo_mb = fr->cginfo_mb;
 +        cginfo    = fr->cginfo;
 +
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
 +        }
 +    }
 +
 +    if (bLocalCG != NULL)
 +    {
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            bLocalCG[index_gl[cg]] = TRUE;
 +        }
 +    }
 +}
 +
 +static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
 +{
 +    int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
 +    int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
 +    gmx_ga2la_t *ga2la;
 +    char *bLocalCG;
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +
 +    if (dd->nat_tot > dd->gatindex_nalloc)
 +    {
 +        dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
 +        srenew(dd->gatindex,dd->gatindex_nalloc);
 +    }
 +
 +    nzone      = dd->comm->zones.n;
 +    zone2cg    = dd->comm->zones.cg_range;
 +    zone_ncg1  = dd->comm->zone_ncg1;
 +    index_gl   = dd->index_gl;
 +    gatindex   = dd->gatindex;
 +
 +    if (zone2cg[1] != dd->ncg_home)
 +    {
 +        gmx_incons("dd->ncg_zone is not up to date");
 +    }
 +    
 +    /* Make the local to global and global to local atom index */
 +    a = dd->cgindex[cg_start];
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        if (zone == 0)
 +        {
 +            cg0 = cg_start;
 +        }
 +        else
 +        {
 +            cg0 = zone2cg[zone];
 +        }
 +        for(cg=cg0; cg<zone2cg[zone+1]; cg++)
 +        {
 +            zone1 = zone;
 +            if (cg - cg0 >= zone_ncg1[zone])
 +            {
 +                /* Signal that this cg is from more than one zone away */
 +                zone1 += nzone;
 +            }
 +            cg_gl = index_gl[cg];
 +            for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
 +            {
 +                gatindex[a] = a_gl;
 +                ga2la_set(dd->ga2la,a_gl,a,zone1);
 +                a++;
 +            }
 +        }
 +    }
 +}
 +
 +static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
 +                          const char *where)
 +{
 +    int ncg,i,ngl,nerr;
 +
 +    nerr = 0;
 +    if (bLocalCG == NULL)
 +    {
 +        return nerr;
 +    }
 +    for(i=0; i<dd->ncg_tot; i++)
 +    {
 +        if (!bLocalCG[dd->index_gl[i]])
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
 +            nerr++;
 +        }
 +    }
 +    ngl = 0;
 +    for(i=0; i<ncg_sys; i++)
 +    {
 +        if (bLocalCG[i])
 +        {
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->ncg_tot)
 +    {
 +        fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
 +        nerr++;
 +    }
 +
 +    return nerr;
 +}
 +
 +static void check_index_consistency(gmx_domdec_t *dd,
 +                                    int natoms_sys,int ncg_sys,
 +                                    const char *where)
 +{
 +    int  nerr,ngl,i,a,cell;
 +    int  *have;
 +
 +    nerr = 0;
 +
 +    if (dd->comm->DD_debug > 1)
 +    {
 +        snew(have,natoms_sys);
 +        for(a=0; a<dd->nat_tot; a++)
 +        {
 +            if (have[dd->gatindex[a]] > 0)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
 +            }
 +            else
 +            {
 +                have[dd->gatindex[a]] = a + 1;
 +            }
 +        }
 +        sfree(have);
 +    }
 +
 +    snew(have,dd->nat_tot);
 +
 +    ngl  = 0;
 +    for(i=0; i<natoms_sys; i++)
 +    {
 +        if (ga2la_get(dd->ga2la,i,&a,&cell))
 +        {
 +            if (a >= dd->nat_tot)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
 +                nerr++;
 +            }
 +            else
 +            {
 +                have[a] = 1;
 +                if (dd->gatindex[a] != i)
 +                {
 +                    fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
 +                    nerr++;
 +                }
 +            }
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->nat_tot)
 +    {
 +        fprintf(stderr,
 +                "DD node %d, %s: %d global atom indices, %d local atoms\n",
 +                dd->rank,where,ngl,dd->nat_tot);
 +    }
 +    for(a=0; a<dd->nat_tot; a++)
 +    {
 +        if (have[a] == 0)
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: local atom %d, global %d has no global index\n",
 +                    dd->rank,where,a+1,dd->gatindex[a]+1);
 +        }
 +    }
 +    sfree(have);
 +
 +    nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
 +
 +    if (nerr > 0) {
 +        gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
 +                  dd->rank,where,nerr);
 +    }
 +}
 +
 +static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
 +{
 +    int  i;
 +    char *bLocalCG;
 +
 +    if (a_start == 0)
 +    {
 +        /* Clear the whole list without searching */
 +        ga2la_clear(dd->ga2la);
 +    }
 +    else
 +    {
 +        for(i=a_start; i<dd->nat_tot; i++)
 +        {
 +            ga2la_del(dd->ga2la,dd->gatindex[i]);
 +        }
 +    }
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +    if (bLocalCG)
 +    {
 +        for(i=cg_start; i<dd->ncg_tot; i++)
 +        {
 +            bLocalCG[dd->index_gl[i]] = FALSE;
 +        }
 +    }
 +
 +    dd_clear_local_vsite_indices(dd);
 +    
 +    if (dd->constraints)
 +    {
 +        dd_clear_local_constraint_indices(dd);
 +    }
 +}
 +
 +static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
 +{
 +    real grid_jump_limit;
 +
 +    /* The distance between the boundaries of cells at distance
 +     * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
 +     * and by the fact that cells should not be shifted by more than
 +     * half their size, such that cg's only shift by one cell
 +     * at redecomposition.
 +     */
 +    grid_jump_limit = comm->cellsize_limit;
 +    if (!comm->bVacDLBNoLimit)
 +    {
 +        grid_jump_limit = max(grid_jump_limit,
 +                              comm->cutoff/comm->cd[dim_ind].np);
 +    }
 +
 +    return grid_jump_limit;
 +}
 +
 +static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim;
 +    real limit,bfac;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        limit = grid_jump_limit(comm,d);
 +        bfac = ddbox->box_size[dim];
 +        if (ddbox->tric_dir[dim])
 +        {
 +            bfac *= ddbox->skew_fac[dim];
 +        }
 +        if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac <  limit ||
 +            (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
 +                      gmx_step_str(step,buf),
 +                      dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +}
 +
 +static int dd_load_count(gmx_domdec_comm_t *comm)
 +{
 +    return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
 +}
 +
 +static float dd_force_load(gmx_domdec_comm_t *comm)
 +{
 +    float load;
 +    
 +    if (comm->eFlop)
 +    {
 +        load = comm->flop;
 +        if (comm->eFlop > 1)
 +        {
 +            load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
 +        }
 +    } 
 +    else
 +    {
 +        load = comm->cycl[ddCyclF];
 +        if (comm->cycl_n[ddCyclF] > 1)
 +        {
 +            /* Subtract the maximum of the last n cycle counts
 +             * to get rid of possible high counts due to other soures,
 +             * for instance system activity, that would otherwise
 +             * affect the dynamic load balancing.
 +             */
 +            load -= comm->cycl_max[ddCyclF];
 +        }
 +    }
 +    
 +    return load;
 +}
 +
 +static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int i;
 +    
 +    comm = dd->comm;
 +    
 +    snew(*dim_f,dd->nc[dim]+1);
 +    (*dim_f)[0] = 0;
 +    for(i=1; i<dd->nc[dim]; i++)
 +    {
 +        if (comm->slb_frac[dim])
 +        {
 +            (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 +        }
 +        else
 +        {
 +            (*dim_f)[i] = (real)i/(real)dd->nc[dim];
 +        }
 +    }
 +    (*dim_f)[dd->nc[dim]] = 1;
 +}
 +
 +static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
 +{
 +    int	 pmeindex,slab,nso,i;
 +    ivec xyz;
 +    
 +    if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
 +    {
 +        ddpme->dim = YY;
 +    }
 +    else
 +    {
 +        ddpme->dim = dimind;
 +    }
 +    ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 +    
 +    ddpme->nslab = (ddpme->dim == 0 ?
 +                    dd->comm->npmenodes_x :
 +                    dd->comm->npmenodes_y);
 +
 +    if (ddpme->nslab <= 1)
 +    {
 +        return;
 +    }
 +
 +    nso = dd->comm->npmenodes/ddpme->nslab;
 +    /* Determine for each PME slab the PP location range for dimension dim */
 +    snew(ddpme->pp_min,ddpme->nslab);
 +    snew(ddpme->pp_max,ddpme->nslab);
 +    for(slab=0; slab<ddpme->nslab; slab++) {
 +        ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 +        ddpme->pp_max[slab] = 0;
 +    }
 +    for(i=0; i<dd->nnodes; i++) {
 +        ddindex2xyz(dd->nc,i,xyz);
 +        /* For y only use our y/z slab.
 +         * This assumes that the PME x grid size matches the DD grid size.
 +         */
 +        if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
 +            pmeindex = ddindex2pmeindex(dd,i);
 +            if (dimind == 0) {
 +                slab = pmeindex/nso;
 +            } else {
 +                slab = pmeindex % ddpme->nslab;
 +            }
 +            ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
 +            ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
 +        }
 +    }
 +
 +    set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
 +}
 +
 +int dd_pme_maxshift_x(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == XX)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +int dd_pme_maxshift_y(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == YY)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
 +    {
 +        return dd->comm->ddpme[1].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
 +                             gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  nc,ns,s;
 +    int  *xmin,*xmax;
 +    real range,pme_boundary;
 +    int  sh;
 +    
 +    comm = dd->comm;
 +    nc  = dd->nc[ddpme->dim];
 +    ns  = ddpme->nslab;
 +    
 +    if (!ddpme->dim_match)
 +    {
 +        /* PP decomposition is not along dim: the worst situation */
 +        sh = ns/2;
 +    }
 +    else if (ns <= 3 || (bUniform && ns == nc))
 +    {
 +        /* The optimal situation */
 +        sh = 1;
 +    }
 +    else
 +    {
 +        /* We need to check for all pme nodes which nodes they
 +         * could possibly need to communicate with.
 +         */
 +        xmin = ddpme->pp_min;
 +        xmax = ddpme->pp_max;
 +        /* Allow for atoms to be maximally 2/3 times the cut-off
 +         * out of their DD cell. This is a reasonable balance between
 +         * between performance and support for most charge-group/cut-off
 +         * combinations.
 +         */
 +        range  = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
 +        /* Avoid extra communication when we are exactly at a boundary */
 +        range *= 0.999;
 +        
 +        sh = 1;
 +        for(s=0; s<ns; s++)
 +        {
 +            /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
 +            pme_boundary = (real)s/ns;
 +            while (sh+1 < ns &&
 +                   ((s-(sh+1) >= 0 &&
 +                     cell_f[xmax[s-(sh+1)   ]+1]     + range > pme_boundary) ||
 +                    (s-(sh+1) <  0 &&
 +                     cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
 +            {
 +                sh++;
 +            }
 +            pme_boundary = (real)(s+1)/ns;
 +            while (sh+1 < ns &&
 +                   ((s+(sh+1) <  ns &&
 +                     cell_f[xmin[s+(sh+1)   ]  ]     - range < pme_boundary) ||
 +                    (s+(sh+1) >= ns &&
 +                     cell_f[xmin[s+(sh+1)-ns]  ] + 1 - range < pme_boundary)))
 +            {
 +                sh++;
 +            }
 +        }
 +    }
 +    
 +    ddpme->maxshift = sh;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"PME slab communication range for dim %d is %d\n",
 +                ddpme->dim,ddpme->maxshift);
 +    }
 +}
 +
 +static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d,dim;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        if (dim < ddbox->nboundeddim &&
 +            ddbox->box_size[dim]*ddbox->skew_fac[dim] <
 +            dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
 +        {
 +            gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
 +                      dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                      dd->nc[dim],dd->comm->cellsize_limit);
 +        }
 +    }
 +}
 +
 +static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                                  gmx_bool bMaster,ivec npulse)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,j;
 +    rvec cellsize_min;
 +    real *cell_x,cell_dx,cellsize;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=0; d<DIM; d++)
 +    {
 +        cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
 +        npulse[d] = 1;
 +        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
 +        {
 +            /* Uniform grid */
 +            cell_dx = ddbox->box_size[d]/dd->nc[d];
 +            if (bMaster)
 +            {
 +                for(j=0; j<dd->nc[d]+1; j++)
 +                {
 +                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
 +                }
 +            }
 +            else
 +            {
 +                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
 +                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
 +            }
 +            cellsize = cell_dx*ddbox->skew_fac[d];
 +            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
 +            {
 +                npulse[d]++;
 +            }
 +            cellsize_min[d] = cellsize;
 +        }
 +        else
 +        {
 +            /* Statically load balanced grid */
 +            /* Also when we are not doing a master distribution we determine
 +             * all cell borders in a loop to obtain identical values
 +             * to the master distribution case and to determine npulse.
 +             */
 +            if (bMaster)
 +            {
 +                cell_x = dd->ma->cell_x[d];
 +            }
 +            else
 +            {
 +                snew(cell_x,dd->nc[d]+1);
 +            }
 +            cell_x[0] = ddbox->box0[d];
 +            for(j=0; j<dd->nc[d]; j++)
 +            {
 +                cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
 +                cell_x[j+1] = cell_x[j] + cell_dx;
 +                cellsize = cell_dx*ddbox->skew_fac[d];
 +                while (cellsize*npulse[d] < comm->cutoff &&
 +                       npulse[d] < dd->nc[d]-1)
 +                {
 +                    npulse[d]++;
 +                }
 +                cellsize_min[d] = min(cellsize_min[d],cellsize);
 +            }
 +            if (!bMaster)
 +            {
 +                comm->cell_x0[d] = cell_x[dd->ci[d]];
 +                comm->cell_x1[d] = cell_x[dd->ci[d]+1];
 +                sfree(cell_x);
 +            }
 +        }
 +        /* The following limitation is to avoid that a cell would receive
 +         * some of its own home charge groups back over the periodic boundary.
 +         * Double charge groups cause trouble with the global indices.
 +         */
 +        if (d < ddbox->npbcdim &&
 +            dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
 +                                 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
 +                                 comm->cutoff,
 +                                 dd->nc[d],dd->nc[d],
 +                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
 +        }
 +    }
 +    
 +    if (!comm->bDynLoadBal)
 +    {
 +        copy_rvec(cellsize_min,comm->cellsize_min);
 +    }
 +   
 +    for(d=0; d<comm->npmedecompdim; d++)
 +    {
 +        set_pme_maxshift(dd,&comm->ddpme[d],
 +                         comm->slb_frac[dd->dim[d]]==NULL,ddbox,
 +                         comm->ddpme[d].slb_dim_f);
 +    }
 +}
 +
 +
 +static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,
 +                                       gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,i,j,nmin,nmin_old;
 +    gmx_bool bLimLo,bLimHi;
 +    real *cell_size;
 +    real fac,halfway,cellsize_limit_f_i,region_size;
 +    gmx_bool bPBC,bLastHi=FALSE;
 +    int nrange[]={range[0],range[1]};
 +
 +    region_size= root->cell_f[range[1]]-root->cell_f[range[0]];  
 +
 +    comm = dd->comm;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    if (debug) 
 +    {
 +        fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
 +    }
 +
 +    /* First we need to check if the scaling does not make cells
 +     * smaller than the smallest allowed size.
 +     * We need to do this iteratively, since if a cell is too small,
 +     * it needs to be enlarged, which makes all the other cells smaller,
 +     * which could in turn make another cell smaller than allowed.
 +     */
 +    for(i=range[0]; i<range[1]; i++)
 +    {
 +        root->bCellMin[i] = FALSE;
 +    }
 +    nmin = 0;
 +    do
 +    {
 +        nmin_old = nmin;
 +        /* We need the total for normalization */
 +        fac = 0;
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                fac += cell_size[i];
 +            }
 +        }
 +        fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
 +        /* Determine the cell boundaries */
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                cell_size[i] *= fac;
 +                if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
 +                {
 +                    cellsize_limit_f_i = 0;
 +                }
 +                else
 +                {
 +                    cellsize_limit_f_i = cellsize_limit_f;
 +                }
 +                if (cell_size[i] < cellsize_limit_f_i)
 +                {
 +                    root->bCellMin[i] = TRUE;
 +                    cell_size[i] = cellsize_limit_f_i;
 +                    nmin++;
 +                }
 +            }
 +            root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
 +        }
 +    }
 +    while (nmin > nmin_old);
 +    
 +    i=range[1]-1;
 +    cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
 +    /* For this check we should not use DD_CELL_MARGIN,
 +     * but a slightly smaller factor,
 +     * since rounding could get use below the limit.
 +     */
 +    if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
 +    {
 +        char buf[22];
 +        gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
 +                  gmx_step_str(step,buf),
 +                  dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                  ncd,comm->cellsize_min[dim]);
 +    }
 +    
 +    root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
 +    
 +    if (!bUniform)
 +    {
 +        /* Check if the boundary did not displace more than halfway
 +         * each of the cells it bounds, as this could cause problems,
 +         * especially when the differences between cell sizes are large.
 +         * If changes are applied, they will not make cells smaller
 +         * than the cut-off, as we check all the boundaries which
 +         * might be affected by a change and if the old state was ok,
 +         * the cells will at most be shrunk back to their old size.
 +         */
 +        for(i=range[0]+1; i<range[1]; i++)
 +        {
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
 +            if (root->cell_f[i] < halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i+1; j<range[1]; j++)
 +                {
 +                    if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
 +                        root->cell_f[j] =  root->cell_f[j-1] + cellsize_limit_f;
 +                }
 +            }
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
 +            if (root->cell_f[i] > halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i-1; j>=range[0]+1; j--)
 +                {
 +                    if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
 +                        root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* nrange is defined as [lower, upper) range for new call to enforce_limits */
 +    /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
 +     * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
 +     * for a and b nrange is used */
 +    if (d > 0)
 +    {
 +        /* Take care of the staggering of the cell boundaries */
 +        if (bUniform)
 +        {
 +            for(i=range[0]; i<range[1]; i++)
 +            {
 +                root->cell_f_max0[i] = root->cell_f[i];
 +                root->cell_f_min1[i] = root->cell_f[i+1];
 +            }
 +        }
 +        else
 +        {
 +            for(i=range[0]+1; i<range[1]; i++)
 +            {
 +                bLimLo = (root->cell_f[i] < root->bound_min[i]);
 +                bLimHi = (root->cell_f[i] > root->bound_max[i]);
 +                if (bLimLo && bLimHi)
 +                {
 +                    /* Both limits violated, try the best we can */
 +                    /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
 +                    root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
 +                    nrange[0]=range[0];
 +                    nrange[1]=i;
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    return;
 +                }
 +                else if (bLimLo)
 +                {
 +                    /* root->cell_f[i] = root->bound_min[i]; */
 +                    nrange[1]=i;  /* only store violation location. There could be a LimLo violation following with an higher index */
 +                    bLastHi=FALSE;
 +                }
 +                else if (bLimHi && !bLastHi)
 +                {
 +                    bLastHi=TRUE;
 +                    if (nrange[1] < range[1])   /* found a LimLo before */
 +                    {
 +                        root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                        dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                        nrange[0]=nrange[1];
 +                    }
 +                    root->cell_f[i] = root->bound_max[i];
 +                    nrange[1]=i; 
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                }
 +            }
 +            if (nrange[1] < range[1])   /* found last a LimLo */
 +            {
 +                root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                nrange[0]=nrange[1];
 +                nrange[1]=range[1];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            } 
 +            else if (nrange[0] > range[0]) /* found at least one LimHi */
 +            {
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                       gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,d1,i,j,pos;
 +    real *cell_size;
 +    real load_aver,load_i,imbalance,change,change_max,sc;
 +    real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
 +    real change_limit;
 +    real relax = 0.5;
 +    gmx_bool bPBC;
 +    int range[] = { 0, 0 };
 +
 +    comm = dd->comm;
 +
 +    /* Convert the maximum change from the input percentage to a fraction */
 +    change_limit = comm->dlb_scale_lim*0.01;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    /* Store the original boundaries */
 +    for(i=0; i<ncd+1; i++)
 +    {
 +        root->old_cell_f[i] = root->cell_f[i];
 +    }
 +    if (bUniform) {
 +        for(i=0; i<ncd; i++)
 +        {
 +            cell_size[i] = 1.0/ncd;
 +        }
 +    }
 +    else if (dd_load_count(comm))
 +    {
 +        load_aver = comm->load[d].sum_m/ncd;
 +        change_max = 0;
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -relax*imbalance;
 +            change_max = max(change_max,max(change,-change));
 +        }
 +        /* Limit the amount of scaling.
 +         * We need to use the same rescaling for all cells in one row,
 +         * otherwise the load balancing might not converge.
 +         */
 +        sc = relax;
 +        if (change_max > change_limit)
 +        {
 +            sc *= change_limit/change_max;
 +        }
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -sc*imbalance;
 +            cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
 +        }
 +    }
 +    
 +    cellsize_limit_f  = comm->cellsize_min[dim]/ddbox->box_size[dim];
 +    cellsize_limit_f *= DD_CELL_MARGIN;
 +    dist_min_f_hard        = grid_jump_limit(comm,d)/ddbox->box_size[dim];
 +    dist_min_f       = dist_min_f_hard * DD_CELL_MARGIN;
 +    if (ddbox->tric_dir[dim])
 +    {
 +        cellsize_limit_f /= ddbox->skew_fac[dim];
 +        dist_min_f       /= ddbox->skew_fac[dim];
 +    }
 +    if (bDynamicBox && d > 0)
 +    {
 +        dist_min_f *= DD_PRES_SCALE_MARGIN;
 +    }
 +    if (d > 0 && !bUniform)
 +    {
 +        /* Make sure that the grid is not shifted too much */
 +        for(i=1; i<ncd; i++) {
 +            if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard) 
 +            {
 +                gmx_incons("Inconsistent DD boundary staggering limits!");
 +            }
 +            root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
 +            if (space > 0) {
 +                root->bound_min[i] += 0.5*space;
 +            }
 +            root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
 +            if (space < 0) {
 +                root->bound_max[i] += 0.5*space;
 +            }
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
 +                        d,i,
 +                        root->cell_f_max0[i-1] + dist_min_f,
 +                        root->bound_min[i],root->cell_f[i],root->bound_max[i],
 +                        root->cell_f_min1[i] - dist_min_f);
 +            }
 +        }
 +    }
 +    range[1]=ncd;
 +    root->cell_f[0] = 0;
 +    root->cell_f[ncd] = 1;
 +    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
 +
 +
 +    /* After the checks above, the cells should obey the cut-off
 +     * restrictions, but it does not hurt to check.
 +     */
 +    for(i=0; i<ncd; i++)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug,"Relative bounds dim %d  cell %d: %f %f\n",
 +                    dim,i,root->cell_f[i],root->cell_f[i+1]);
 +        }
 +
 +        if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
 +            root->cell_f[i+1] - root->cell_f[i] <
 +            cellsize_limit_f/DD_CELL_MARGIN)
 +        {
 +            char buf[22];
 +            fprintf(stderr,
 +                    "\nWARNING step %s: direction %c, cell %d too small: %f\n",
 +                    gmx_step_str(step,buf),dim2char(dim),i,
 +                    (root->cell_f[i+1] - root->cell_f[i])
 +                    *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
 +        }
 +    }
 +    
 +    pos = ncd + 1;
 +    /* Store the cell boundaries of the lower dimensions at the end */
 +    for(d1=0; d1<d; d1++)
 +    {
 +        root->cell_f[pos++] = comm->cell_f0[d1];
 +        root->cell_f[pos++] = comm->cell_f1[d1];
 +    }
 +    
 +    if (d < comm->npmedecompdim)
 +    {
 +        /* The master determines the maximum shift for
 +         * the coordinate communication between separate PME nodes.
 +         */
 +        set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
 +    }
 +    root->cell_f[pos++] = comm->ddpme[0].maxshift;
 +    if (d >= 1)
 +    {
 +        root->cell_f[pos++] = comm->ddpme[1].maxshift;
 +    }
 +}    
 +
 +static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
 +                                             gmx_ddbox_t *ddbox,int dimind)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +
 +    /* Set the cell dimensions */
 +    dim = dd->dim[dimind];
 +    comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
 +    comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
 +    if (dim >= ddbox->nboundeddim)
 +    {
 +        comm->cell_x0[dim] += ddbox->box0[dim];
 +        comm->cell_x1[dim] += ddbox->box0[dim];
 +    }
 +}
 +
 +static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                         int d,int dim,real *cell_f_row,
 +                                         gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d1,dim1,pos;
 +
 +    comm = dd->comm;
 +
 +#ifdef GMX_MPI
 +    /* Each node would only need to know two fractions,
 +     * but it is probably cheaper to broadcast the whole array.
 +     */
 +    MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
 +              0,comm->mpi_comm_load[d]);
 +#endif
 +    /* Copy the fractions for this dimension from the buffer */
 +    comm->cell_f0[d] = cell_f_row[dd->ci[dim]  ];
 +    comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
 +    /* The whole array was communicated, so set the buffer position */
 +    pos = dd->nc[dim] + 1;
 +    for(d1=0; d1<=d; d1++)
 +    {
 +        if (d1 < d)
 +        {
 +            /* Copy the cell fractions of the lower dimensions */
 +            comm->cell_f0[d1] = cell_f_row[pos++];
 +            comm->cell_f1[d1] = cell_f_row[pos++];
 +        }
 +        relative_to_absolute_cell_bounds(dd,ddbox,d1);
 +    }
 +    /* Convert the communicated shift from float to int */
 +    comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    if (d >= 1)
 +    {
 +        comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    }
 +}
 +
 +static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
 +                                         gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                         gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d,dim,d1;
 +    gmx_bool bRowMember,bRowRoot;
 +    real *cell_f_row;
 +    
 +    comm = dd->comm;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        bRowMember = TRUE;
 +        bRowRoot = TRUE;
 +        for(d1=d; d1<dd->ndim; d1++)
 +        {
 +            if (dd->ci[dd->dim[d1]] > 0)
 +            {
 +                if (d1 > d)
 +                {
 +                    bRowMember = FALSE;
 +                }
 +                bRowRoot = FALSE;
 +            }
 +        }
 +        if (bRowMember)
 +        {
 +            if (bRowRoot)
 +            {
 +                set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
 +                                           ddbox,bDynamicBox,bUniform,step);
 +                cell_f_row = comm->root[d]->cell_f;
 +            }
 +            else
 +            {
 +                cell_f_row = comm->cell_f_row;
 +            }
 +            distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
 +        }
 +    }
 +}    
 +
 +static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d;
 +
 +    /* This function assumes the box is static and should therefore
 +     * not be called when the box has changed since the last
 +     * call to dd_partition_system.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        relative_to_absolute_cell_bounds(dd,ddbox,d); 
 +    }
 +}
 +
 +
 +
 +static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                  gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                                  gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +    
 +    if (bDoDLB)
 +    {
 +        wallcycle_start(wcycle,ewcDDCOMMBOUND);
 +        set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
 +        wallcycle_stop(wcycle,ewcDDCOMMBOUND);
 +    }
 +    else if (bDynamicBox)
 +    {
 +        set_dd_cell_sizes_dlb_nochange(dd,ddbox);
 +    }
 +    
 +    /* Set the dimensions for which no DD is used */
 +    for(dim=0; dim<DIM; dim++) {
 +        if (dd->nc[dim] == 1) {
 +            comm->cell_x0[dim] = 0;
 +            comm->cell_x1[dim] = ddbox->box_size[dim];
 +            if (dim >= ddbox->nboundeddim)
 +            {
 +                comm->cell_x0[dim] += ddbox->box0[dim];
 +                comm->cell_x1[dim] += ddbox->box0[dim];
 +            }
 +        }
 +    }
 +}
 +
 +static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
 +{
 +    int d,np,i;
 +    gmx_domdec_comm_dim_t *cd;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &dd->comm->cd[d];
 +        np = npulse[dd->dim[d]];
 +        if (np > cd->np_nalloc)
 +        {
 +            if (debug)
 +            {
 +                fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
 +                        dim2char(dd->dim[d]),np);
 +            }
 +            if (DDMASTER(dd) && cd->np_nalloc > 0)
 +            {
 +                fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
 +            }
 +            srenew(cd->ind,np);
 +            for(i=cd->np_nalloc; i<np; i++)
 +            {
 +                cd->ind[i].index  = NULL;
 +                cd->ind[i].nalloc = 0;
 +            }
 +            cd->np_nalloc = np;
 +        }
 +        cd->np = np;
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes(gmx_domdec_t *dd,
 +                              gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                              gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                              gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec npulse;
 +    
 +    comm = dd->comm;
 +
 +    /* Copy the old cell boundaries for the cg displacement check */
 +    copy_rvec(comm->cell_x0,comm->old_cell_x0);
 +    copy_rvec(comm->cell_x1,comm->old_cell_x1);
 +    
 +    if (comm->bDynLoadBal)
 +    {
 +        if (DDMASTER(dd))
 +        {
 +            check_box_size(dd,ddbox);
 +        }
 +        set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
 +        realloc_comm_ind(dd,npulse);
 +    }
 +    
 +    if (debug)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
 +                    d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
 +        }
 +    }
 +}
 +
 +static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,
 +                                  rvec cell_ns_x0,rvec cell_ns_x1,
 +                                  gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim_ind,dim;
 +    
 +    comm = dd->comm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        
 +        /* Without PBC we don't have restrictions on the outer cells */
 +        if (!(dim >= ddbox->npbcdim && 
 +              (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
 +            comm->bDynLoadBal &&
 +            (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
 +            comm->cellsize_min[dim])
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
 +                      gmx_step_str(step,buf),dim2char(dim),
 +                      comm->cell_x1[dim] - comm->cell_x0[dim],
 +                      ddbox->skew_fac[dim],
 +                      dd->comm->cellsize_min[dim],
 +                      dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +    
 +    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
 +    {
 +        /* Communicate the boundaries and update cell_ns_x0/1 */
 +        dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
 +        if (dd->bGridJump && dd->ndim > 1)
 +        {
 +            check_grid_jump(step,dd,ddbox);
 +        }
 +    }
 +}
 +
 +static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
 +{
 +    if (YY < npbcdim)
 +    {
 +        tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
 +    }
 +    else
 +    {
 +        tcm[YY][XX] = 0;
 +    }
 +    if (ZZ < npbcdim)
 +    {
 +        tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
 +        tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
 +    }
 +    else
 +    {
 +        tcm[ZZ][XX] = 0;
 +        tcm[ZZ][YY] = 0;
 +    }
 +}
 +
 +static void check_screw_box(matrix box)
 +{
 +    /* Mathematical limitation */
 +    if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
 +    {
 +        gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
 +    }
 +    
 +    /* Limitation due to the asymmetry of the eighth shell method */
 +    if (box[ZZ][YY] != 0)
 +    {
 +        gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
 +    }
 +}
 +
 +static void distribute_cg(FILE *fplog,gmx_large_int_t step,
 +                          matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
 +                          gmx_domdec_t *dd)
 +{
 +    gmx_domdec_master_t *ma;
 +    int **tmp_ind=NULL,*tmp_nalloc=NULL;
 +    int  i,icg,j,k,k0,k1,d,npbcdim;
 +    matrix tcm;
 +    rvec box_size,cg_cm;
 +    ivec ind;
 +    real nrcg,inv_ncg,pos_d;
 +    atom_id *cgindex;
 +    gmx_bool bUnbounded,bScrew;
 +
 +    ma = dd->ma;
 +    
 +    if (tmp_ind == NULL)
 +    {
 +        snew(tmp_nalloc,dd->nnodes);
 +        snew(tmp_ind,dd->nnodes);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
 +            snew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +    }
 +    
 +    /* Clear the count */
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->ncg[i] = 0;
 +        ma->nat[i] = 0;
 +    }
 +    
 +    make_tric_corr_matrix(dd->npbcdim,box,tcm);
 +    
 +    cgindex = cgs->index;
 +    
 +    /* Compute the center of geometry for all charge groups */
 +    for(icg=0; icg<cgs->nr; icg++)
 +    {
 +        k0      = cgindex[icg];
 +        k1      = cgindex[icg+1];
 +        nrcg    = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(pos[k0],cg_cm);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cg_cm);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cg_cm,pos[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cg_cm[d] *= inv_ncg;
 +            }
 +        }
 +        /* Put the charge group in the box and determine the cell index */
 +        for(d=DIM-1; d>=0; d--) {
 +            pos_d = cg_cm[d];
 +            if (d < dd->npbcdim)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                if (tric_dir[d] && dd->nc[d] > 1)
 +                {
 +                    /* Use triclinic coordintates for this dimension */
 +                    for(j=d+1; j<DIM; j++)
 +                    {
 +                        pos_d += cg_cm[j]*tcm[j][d];
 +                    }
 +                }
 +                while(pos_d >= box[d][d])
 +                {
 +                    pos_d -= box[d][d];
 +                    rvec_dec(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(pos[k],box[d]);
 +                        if (bScrew)
 +                        {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +                while(pos_d < 0)
 +                {
 +                    pos_d += box[d][d];
 +                    rvec_inc(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(pos[k],box[d]);
 +                        if (bScrew) {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +            }
 +            /* This could be done more efficiently */
 +            ind[d] = 0;
 +            while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
 +            {
 +                ind[d]++;
 +            }
 +        }
 +        i = dd_index(dd->nc,ind);
 +        if (ma->ncg[i] == tmp_nalloc[i])
 +        {
 +            tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
 +            srenew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +        tmp_ind[i][ma->ncg[i]] = icg;
 +        ma->ncg[i]++;
 +        ma->nat[i] += cgindex[icg+1] - cgindex[icg];
 +    }
 +    
 +    k1 = 0;
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->index[i] = k1;
 +        for(k=0; k<ma->ncg[i]; k++)
 +        {
 +            ma->cg[k1++] = tmp_ind[i][k];
 +        }
 +    }
 +    ma->index[dd->nnodes] = k1;
 +    
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        sfree(tmp_ind[i]);
 +    }
 +    sfree(tmp_ind);
 +    sfree(tmp_nalloc);
 +    
 +    if (fplog)
 +    {
 +        char buf[22];
 +        fprintf(fplog,"Charge group distribution at step %s:",
 +                gmx_step_str(step,buf));
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            fprintf(fplog," %d",ma->ncg[i]);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +}
 +
 +static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
 +                                t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
 +                                rvec pos[])
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    ivec npulse;
 +    int  i,cg_gl;
 +    int  *ibuf,buf2[2] = { 0, 0 };
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma = dd->ma;
 +        
 +        if (dd->bScrewPBC)
 +        {
 +            check_screw_box(box);
 +        }
 +    
 +        set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
 +    
 +        distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[2*i]   = ma->ncg[i];
 +            ma->ibuf[2*i+1] = ma->nat[i];
 +        }
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    dd_scatter(dd,2*sizeof(int),ibuf,buf2);
 +    
 +    dd->ncg_home = buf2[0];
 +    dd->nat_home = buf2[1];
 +    dd->ncg_tot  = dd->ncg_home;
 +    dd->nat_tot  = dd->nat_home;
 +    if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
 +    {
 +        dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(dd->index_gl,dd->cg_nalloc);
 +        srenew(dd->cgindex,dd->cg_nalloc+1);
 +    }
 +    if (DDMASTER(dd))
 +    {
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +    }
 +    
 +    dd_scatterv(dd,
 +                DDMASTER(dd) ? ma->ibuf : NULL,
 +                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +                DDMASTER(dd) ? ma->cg : NULL,
 +                dd->ncg_home*sizeof(int),dd->index_gl);
 +    
 +    /* Determine the home charge group sizes */
 +    dd->cgindex[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cg_gl = dd->index_gl[i];
 +        dd->cgindex[i+1] =
 +            dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
 +    }
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Home charge groups:\n");
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            fprintf(debug," %d",dd->index_gl[i]);
 +            if (i % 10 == 9) 
 +                fprintf(debug,"\n");
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static int compact_and_copy_vec_at(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,int vec,
 +                                   rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                for(i=i0; i<i1; i++)
 +                {
 +                    copy_rvec(src[i],src[home_pos++]);
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Copy to the communication buffer */
 +            nrcg = i1 - i0;
 +            pos_vec[m] += 1 + vec*nrcg;
 +            for(i=i0; i<i1; i++)
 +            {
 +                copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
 +            }
 +            pos_vec[m] += (nvec - vec - 1)*nrcg;
 +        }
 +        if (!bCompact)
 +        {
 +            home_pos += i1 - i0;
 +        }
 +        i0 = i1;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_and_copy_vec_cg(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +    
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                copy_rvec(src[icg],src[home_pos++]);
 +            }
 +        }
 +        else
 +        {
 +            nrcg = i1 - i0;
 +            /* Copy to the communication buffer */
 +            copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
 +            pos_vec[m] += 1 + nrcg*nvec;
 +        }
 +        i0 = i1;
 +    }
 +    if (!bCompact)
 +    {
 +        home_pos = ncg;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_ind(int ncg,int *move,
 +                       int *index_gl,int *cgindex,
 +                       int *gatindex,
 +                       gmx_ga2la_t ga2la,char *bLocalCG,
 +                       int *cginfo)
 +{
 +    int cg,nat,a0,a1,a,a_gl;
 +    int home_pos;
 +
 +    home_pos = 0;
 +    nat = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        a0 = cgindex[cg];
 +        a1 = cgindex[cg+1];
 +        if (move[cg] == -1)
 +        {
 +            /* Compact the home arrays in place.
 +             * Anything that can be done here avoids access to global arrays.
 +             */
 +            cgindex[home_pos] = nat;
 +            for(a=a0; a<a1; a++)
 +            {
 +                a_gl = gatindex[a];
 +                gatindex[nat] = a_gl;
 +                /* The cell number stays 0, so we don't need to set it */
 +                ga2la_change_la(ga2la,a_gl,nat);
 +                nat++;
 +            }
 +            index_gl[home_pos] = index_gl[cg];
 +            cginfo[home_pos]   = cginfo[cg];
 +            /* The charge group remains local, so bLocalCG does not change */
 +            home_pos++;
 +        }
 +        else
 +        {
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +        }
 +    }
 +    cgindex[home_pos] = nat;
 +    
 +    return home_pos;
 +}
 +
 +static void clear_and_mark_ind(int ncg,int *move,
 +                               int *index_gl,int *cgindex,int *gatindex,
 +                               gmx_ga2la_t ga2la,char *bLocalCG,
 +                               int *cell_index)
 +{
 +    int cg,a0,a1,a;
 +    
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        if (move[cg] >= 0)
 +        {
 +            a0 = cgindex[cg];
 +            a1 = cgindex[cg+1];
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +            /* Signal that this cg has moved using the ns cell index.
 +             * Here we set it to -1.
 +             * fill_grid will change it from -1 to 4*grid->ncells.
 +             */
 +            cell_index[cg] = -1;
 +        }
 +    }
 +}
 +
 +static void print_cg_move(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    gmx_domdec_comm_t *comm;
 +    char buf[22];
 +
 +    comm = dd->comm;
 +
 +    fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
 +    }
 +    else
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
 +    }
 +    fprintf(fplog,"distance out of cell %f\n",
 +            dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
 +                cm_old[XX],cm_old[YY],cm_old[ZZ]);
 +    }
 +    fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
 +            cm_new[XX],cm_new[YY],cm_new[ZZ]);
 +    fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
 +    fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->cell_x0[dim],comm->cell_x1[dim]);
 +}
 +
 +static void cg_move_error(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    if (fplog)
 +    {
 +        print_cg_move(fplog, dd,step,cg,dim,dir,
 +                      bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    }
 +    print_cg_move(stderr,dd,step,cg,dim,dir,
 +                  bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    gmx_fatal(FARGS,
 +              "A charge group moved too far between two domain decomposition steps\n"
 +              "This usually means that your system is not well equilibrated");
 +}
 +
 +static void rotate_state_atom(t_state *state,int a)
 +{
 +    int est;
 +
 +    for(est=0; est<estNR; est++)
 +    {
-         if (EST_DISTR(est) && state->flags & (1<<est)) {
++        if (EST_DISTR(est) && (state->flags & (1<<est))) {
 +            switch (est) {
 +            case estX:
 +                /* Rotate the complete state; for a rectangular box only */
 +                state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
 +                state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
 +                break;
 +            case estV:
 +                state->v[a][YY] = -state->v[a][YY];
 +                state->v[a][ZZ] = -state->v[a][ZZ];
 +                break;
 +            case estSDX:
 +                state->sd_X[a][YY] = -state->sd_X[a][YY];
 +                state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
 +                break;
 +            case estCGP:
 +                state->cg_p[a][YY] = -state->cg_p[a][YY];
 +                state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* These are distances, so not affected by rotation */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in rotate_state_atom");            
 +            }
 +        }
 +    }
 +}
 +
 +static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,ivec tric_dir,
 +                              t_state *state,rvec **f,
 +                              t_forcerec *fr,t_mdatoms *md,
 +                              gmx_bool bCompact,
 +                              t_nrnb *nrnb)
 +{
 +    int  *move;
 +    int  npbcdim;
 +    int  ncg[DIM*2],nat[DIM*2];
 +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
 +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
 +    int  sbuf[2],rbuf[2];
 +    int  home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
 +    int  flag;
 +    gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
 +    gmx_bool bScrew;
 +    ivec dev;
 +    real inv_ncg,pos_d;
 +    matrix tcm;
 +    rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
 +    atom_id *cgindex;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_domdec_comm_t *comm;
 +    
 +    if (dd->bScrewPBC)
 +    {
 +        check_screw_box(state->box);
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +    
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i))
 +        {
 +            switch (i)
 +            {
 +            case estX:   /* Always present */            break;
 +            case estV:   bV   = (state->flags & (1<<i)); break;
 +            case estSDX: bSDX = (state->flags & (1<<i)); break;
 +            case estCGP: bCGP = (state->flags & (1<<i)); break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No processing required */
 +                break;
 +            default:
 +            gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
 +            }
 +        }
 +    }
 +    
 +    if (dd->ncg_tot > comm->nalloc_int)
 +    {
 +        comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
 +        srenew(comm->buf_int,comm->nalloc_int);
 +    }
 +    move = comm->buf_int;
 +    
 +    /* Clear the count */
 +    for(c=0; c<dd->ndim*2; c++)
 +    {
 +        ncg[c] = 0;
 +        nat[c] = 0;
 +    }
 +
 +    npbcdim = dd->npbcdim;
 +
 +    for(d=0; (d<DIM); d++)
 +    {
 +        limitd[d] = dd->comm->cellsize_min[d];
 +        if (d >= npbcdim && dd->ci[d] == 0)
 +        {
 +            cell_x0[d] = -GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x0[d] = comm->cell_x0[d];
 +        }
 +        if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
 +        {
 +            cell_x1[d] = GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x1[d] = comm->cell_x1[d];
 +        }
 +        if (d < npbcdim)
 +        {
 +            limit0[d] = comm->old_cell_x0[d] - limitd[d];
 +            limit1[d] = comm->old_cell_x1[d] + limitd[d];
 +        }
 +        else
 +        {
 +            /* We check after communication if a charge group moved
 +             * more than one cell. Set the pre-comm check limit to float_max.
 +             */
 +            limit0[d] = -GMX_FLOAT_MAX;
 +            limit1[d] =  GMX_FLOAT_MAX;
 +        }
 +    }
 +    
 +    make_tric_corr_matrix(npbcdim,state->box,tcm);
 +    
 +    cgindex = dd->cgindex;
 +    
 +    /* Compute the center of geometry for all home charge groups
 +     * and put them in the box and determine where they should go.
 +     */
 +    for(cg=0; cg<dd->ncg_home; cg++)
 +    {
 +        k0   = cgindex[cg];
 +        k1   = cgindex[cg+1];
 +        nrcg = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(state->x[k0],cm_new);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cm_new);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cm_new,state->x[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cm_new[d] = inv_ncg*cm_new[d];
 +            }
 +        }
 +        
 +        clear_ivec(dev);
 +        /* Do pbc and check DD cell boundary crossings */
 +        for(d=DIM-1; d>=0; d--)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                /* Determine the location of this cg in lattice coordinates */
 +                pos_d = cm_new[d];
 +                if (tric_dir[d])
 +                {
 +                    for(d2=d+1; d2<DIM; d2++)
 +                    {
 +                        pos_d += cm_new[d2]*tcm[d2][d];
 +                    }
 +                }
 +                /* Put the charge group in the triclinic unit-cell */
 +                if (pos_d >= cell_x1[d])
 +                {
 +                    if (pos_d >= limit1[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = 1;
 +                    if (dd->ci[d] == dd->nc[d] - 1)
 +                    {
 +                        rvec_dec(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_dec(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +                else if (pos_d < cell_x0[d])
 +                {
 +                    if (pos_d < limit0[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = -1;
 +                    if (dd->ci[d] == 0)
 +                    {
 +                        rvec_inc(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_inc(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            else if (d < npbcdim)
 +            {
 +                /* Put the charge group in the rectangular unit-cell */
 +                while (cm_new[d] >= state->box[d][d])
 +                {
 +                    rvec_dec(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(state->x[k],state->box[d]);
 +                    }
 +                }
 +                while (cm_new[d] < 0)
 +                {
 +                    rvec_inc(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(state->x[k],state->box[d]);
 +                    }
 +                }
 +            }
 +        }
 +    
 +        copy_rvec(cm_new,cg_cm[cg]);
 +        
 +        /* Determine where this cg should go */
 +        flag = 0;
 +        mc = -1;
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            dim = dd->dim[d];
 +            if (dev[dim] == 1)
 +            {
 +                flag |= DD_FLAG_FW(d);
 +                if (mc == -1)
 +                {
 +                    mc = d*2;
 +                }
 +            }
 +            else if (dev[dim] == -1)
 +            {
 +                flag |= DD_FLAG_BW(d);
 +                if (mc == -1) {
 +                    if (dd->nc[dim] > 2)
 +                    {
 +                        mc = d*2 + 1;
 +                    }
 +                    else
 +                    {
 +                        mc = d*2;
 +                    }
 +                }
 +            }
 +        }
 +        move[cg] = mc;
 +        if (mc >= 0)
 +        {
 +            if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +            {
 +                comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +            }
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
 +            /* We store the cg size in the lower 16 bits
 +             * and the place where the charge group should go
 +             * in the next 6 bits. This saves some communication volume.
 +             */
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
 +            ncg[mc] += 1;
 +            nat[mc] += nrcg;
 +        }
 +    }
 +    
 +    inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +    inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
 +    
 +    nvec = 1;
 +    if (bV)
 +    {
 +        nvec++;
 +    }
 +    if (bSDX)
 +    {
 +        nvec++;
 +    }
 +    if (bCGP)
 +    {
 +        nvec++;
 +    }
 +    
 +    /* Make sure the communication buffers are large enough */
 +    for(mc=0; mc<dd->ndim*2; mc++)
 +    {
 +        nvr = ncg[mc] + nat[mc]*nvec;
 +        if (nvr > comm->cgcm_state_nalloc[mc])
 +        {
 +            comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
 +            srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +        }
 +    }
 +    
 +    /* Recalculating cg_cm might be cheaper than communicating,
 +     * but that could give rise to rounding issues.
 +     */
 +    home_pos_cg =
 +        compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
 +                                nvec,cg_cm,comm,bCompact);
 +    
 +    vec = 0;
 +    home_pos_at =
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->x,comm,bCompact);
 +    if (bV)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->v,comm,bCompact);
 +    }
 +    if (bSDX)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->sd_X,comm,bCompact);
 +    }
 +    if (bCGP)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->cg_p,comm,bCompact);
 +    }
 +    
 +    if (bCompact)
 +    {
 +        compact_ind(dd->ncg_home,move,
 +                    dd->index_gl,dd->cgindex,dd->gatindex,
 +                    dd->ga2la,comm->bLocalCG,
 +                    fr->cginfo);
 +    }
 +    else
 +    {
 +        clear_and_mark_ind(dd->ncg_home,move,
 +                           dd->index_gl,dd->cgindex,dd->gatindex,
 +                           dd->ga2la,comm->bLocalCG,
 +                           fr->ns.grid->cell_index);
 +    }
 +    
 +    cginfo_mb = fr->cginfo_mb;
 +
 +    ncg_stay_home = home_pos_cg;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        ncg_recv = 0;
 +        nat_recv = 0;
 +        nvr      = 0;
 +        for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
 +        {
 +            cdd = d*2 + dir;
 +            /* Communicate the cg and atom counts */
 +            sbuf[0] = ncg[cdd];
 +            sbuf[1] = nat[cdd];
 +            if (debug)
 +            {
 +                fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
 +                        d,dir,sbuf[0],sbuf[1]);
 +            }
 +            dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
 +            
 +            if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
 +            {
 +                comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
 +                srenew(comm->buf_int,comm->nalloc_int);
 +            }
 +            
 +            /* Communicate the charge group indices, sizes and flags */
 +            dd_sendrecv_int(dd, d, dir,
 +                            comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
 +                            comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
 +            
 +            nvs = ncg[cdd] + nat[cdd]*nvec;
 +            i   = rbuf[0]  + rbuf[1] *nvec;
 +            vec_rvec_check_alloc(&comm->vbuf,nvr+i);
 +            
 +            /* Communicate cgcm and state */
 +            dd_sendrecv_rvec(dd, d, dir,
 +                             comm->cgcm_state[cdd], nvs,
 +                             comm->vbuf.v+nvr, i);
 +            ncg_recv += rbuf[0];
 +            nat_recv += rbuf[1];
 +            nvr      += i;
 +        }
 +        
 +        /* Process the received charge groups */
 +        buf_pos = 0;
 +        for(cg=0; cg<ncg_recv; cg++)
 +        {
 +            flag = comm->buf_int[cg*DD_CGIBS+1];
 +
 +            if (dim >= npbcdim && dd->nc[dim] > 2)
 +            {
 +                /* No pbc in this dim and more than one domain boundary.
 +                 * We to a separate check if a charge did not move too far.
 +                 */
 +                if (((flag & DD_FLAG_FW(d)) &&
 +                     comm->vbuf.v[buf_pos][d] > cell_x1[dim]) ||
 +                    ((flag & DD_FLAG_BW(d)) &&
 +                     comm->vbuf.v[buf_pos][d] < cell_x0[dim]))
 +                {
 +                    cg_move_error(fplog,dd,step,cg,d,
 +                                  (flag & DD_FLAG_FW(d)) ? 1 : 0,
 +                                   FALSE,0,
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos][d]);
 +                }
 +            }
 +
 +            mc = -1;
 +            if (d < dd->ndim-1)
 +            {
 +                /* Check which direction this cg should go */
 +                for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
 +                {
 +                    if (dd->bGridJump)
 +                    {
 +                        /* The cell boundaries for dimension d2 are not equal
 +                         * for each cell row of the lower dimension(s),
 +                         * therefore we might need to redetermine where
 +                         * this cg should go.
 +                         */
 +                        dim2 = dd->dim[d2];
 +                        /* If this cg crosses the box boundary in dimension d2
 +                         * we can use the communicated flag, so we do not
 +                         * have to worry about pbc.
 +                         */
 +                        if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
 +                               (flag & DD_FLAG_FW(d2))) ||
 +                              (dd->ci[dim2] == 0 &&
 +                               (flag & DD_FLAG_BW(d2)))))
 +                        {
 +                            /* Clear the two flags for this dimension */
 +                            flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
 +                            /* Determine the location of this cg
 +                             * in lattice coordinates
 +                             */
 +                            pos_d = comm->vbuf.v[buf_pos][dim2];
 +                            if (tric_dir[dim2])
 +                            {
 +                                for(d3=dim2+1; d3<DIM; d3++)
 +                                {
 +                                    pos_d +=
 +                                        comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
 +                                }
 +                            }
 +                            /* Check of we are not at the box edge.
 +                             * pbc is only handled in the first step above,
 +                             * but this check could move over pbc while
 +                             * the first step did not due to different rounding.
 +                             */
 +                            if (pos_d >= cell_x1[dim2] &&
 +                                dd->ci[dim2] != dd->nc[dim2]-1)
 +                            {
 +                                flag |= DD_FLAG_FW(d2);
 +                            }
 +                            else if (pos_d < cell_x0[dim2] &&
 +                                     dd->ci[dim2] != 0)
 +                            {
 +                                flag |= DD_FLAG_BW(d2);
 +                            }
 +                            comm->buf_int[cg*DD_CGIBS+1] = flag;
 +                        }
 +                    }
 +                    /* Set to which neighboring cell this cg should go */
 +                    if (flag & DD_FLAG_FW(d2))
 +                    {
 +                        mc = d2*2;
 +                    }
 +                    else if (flag & DD_FLAG_BW(d2))
 +                    {
 +                        if (dd->nc[dd->dim[d2]] > 2)
 +                        {
 +                            mc = d2*2+1;
 +                        }
 +                        else
 +                        {
 +                            mc = d2*2;
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            nrcg = flag & DD_FLAG_NRCG;
 +            if (mc == -1)
 +            {
 +                if (home_pos_cg+1 > dd->cg_nalloc)
 +                {
 +                    dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
 +                    srenew(dd->index_gl,dd->cg_nalloc);
 +                    srenew(dd->cgindex,dd->cg_nalloc+1);
 +                }
 +                /* Set the global charge group index and size */
 +                dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
 +                dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
 +                /* Copy the state from the buffer */
 +                if (home_pos_cg >= fr->cg_nalloc)
 +                {
 +                    dd_realloc_fr_cg(fr,home_pos_cg+1);
 +                    cg_cm = fr->cg_cm;
 +                }
 +                copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
 +                /* Set the cginfo */
 +                fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
 +                                                   dd->index_gl[home_pos_cg]);
 +                if (comm->bLocalCG)
 +                {
 +                    comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
 +                }
 +
 +                if (home_pos_at+nrcg > state->nalloc)
 +                {
 +                    dd_realloc_state(state,f,home_pos_at+nrcg);
 +                }
 +                for(i=0; i<nrcg; i++)
 +                {
 +                    copy_rvec(comm->vbuf.v[buf_pos++],
 +                              state->x[home_pos_at+i]);
 +                }
 +                if (bV)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->v[home_pos_at+i]);
 +                    }
 +                }
 +                if (bSDX)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->sd_X[home_pos_at+i]);
 +                    }
 +                }
 +                if (bCGP)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->cg_p[home_pos_at+i]);
 +                    }
 +                }
 +                home_pos_cg += 1;
 +                home_pos_at += nrcg;
 +            }
 +            else
 +            {
 +                /* Reallocate the buffers if necessary  */
 +                if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +                {
 +                    comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                    srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +                }
 +                nvr = ncg[mc] + nat[mc]*nvec;
 +                if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
 +                {
 +                    comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
 +                    srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +                }
 +                /* Copy from the receive to the send buffers */
 +                memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
 +                       comm->buf_int + cg*DD_CGIBS,
 +                       DD_CGIBS*sizeof(int));
 +                memcpy(comm->cgcm_state[mc][nvr],
 +                       comm->vbuf.v[buf_pos],
 +                       (1+nrcg*nvec)*sizeof(rvec));
 +                buf_pos += 1 + nrcg*nvec;
 +                ncg[mc] += 1;
 +                nat[mc] += nrcg;
 +            }
 +        }
 +    }
 +    
 +    /* With sorting (!bCompact) the indices are now only partially up to date
 +     * and ncg_home and nat_home are not the real count, since there are
 +     * "holes" in the arrays for the charge groups that moved to neighbors.
 +     */
 +    dd->ncg_home = home_pos_cg;
 +    dd->nat_home = home_pos_at;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished repartitioning\n");
 +    }
 +
 +    return ncg_stay_home;
 +}
 +
 +void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
 +{
 +    dd->comm->cycl[ddCycl] += cycles;
 +    dd->comm->cycl_n[ddCycl]++;
 +    if (cycles > dd->comm->cycl_max[ddCycl])
 +    {
 +        dd->comm->cycl_max[ddCycl] = cycles;
 +    }
 +}
 +
 +static double force_flop_count(t_nrnb *nrnb)
 +{
 +    int i;
 +    double sum;
 +    const char *name;
 +
 +    sum = 0;
 +    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
 +    {
 +        /* To get closer to the real timings, we half the count
 +         * for the normal loops and again half it for water loops.
 +         */
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        {
 +            sum += nrnb->n[i]*0.25*cost_nrnb(i);
 +        }
 +        else
 +        {
 +            sum += nrnb->n[i]*0.50*cost_nrnb(i);
 +        }
 +    }
 +    for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
 +    {
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +    for(i=eNR_BONDS; i<=eNR_WALLS; i++)
 +    {
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +
 +    return sum;
 +}
 +
 +void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop -= force_flop_count(nrnb);
 +    }
 +}
 +void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop += force_flop_count(nrnb);
 +        dd->comm->flop_n++;
 +    }
 +}  
 +
 +static void clear_dd_cycle_counts(gmx_domdec_t *dd)
 +{
 +    int i;
 +    
 +    for(i=0; i<ddCyclNr; i++)
 +    {
 +        dd->comm->cycl[i] = 0;
 +        dd->comm->cycl_n[i] = 0;
 +        dd->comm->cycl_max[i] = 0;
 +    }
 +    dd->comm->flop = 0;
 +    dd->comm->flop_n = 0;
 +}
 +
 +static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_load_t *load;
 +    gmx_domdec_root_t *root=NULL;
 +    int  d,dim,cid,i,pos;
 +    float cell_frac=0,sbuf[DD_NLOAD_MAX];
 +    gmx_bool bSepPME;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution start\n");
 +    }
 +
 +    wallcycle_start(wcycle,ewcDDCOMMLOAD);
 +    
 +    comm = dd->comm;
 +    
 +    bSepPME = (dd->pme_nodeid >= 0);
 +    
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        dim = dd->dim[d];
 +        /* Check if we participate in the communication in this dimension */
 +        if (d == dd->ndim-1 || 
 +            (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
 +        {
 +            load = &comm->load[d];
 +            if (dd->bGridJump)
 +            {
 +                cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
 +            }
 +            pos = 0;
 +            if (d == dd->ndim-1)
 +            {
 +                sbuf[pos++] = dd_force_load(comm);
 +                sbuf[pos++] = sbuf[0];
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = sbuf[0];
 +                    sbuf[pos++] = cell_frac;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
 +                    sbuf[pos++] = comm->cycl[ddCyclPME];
 +                }
 +            }
 +            else
 +            {
 +                sbuf[pos++] = comm->load[d+1].sum;
 +                sbuf[pos++] = comm->load[d+1].max;
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].sum_m;
 +                    sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
 +                    sbuf[pos++] = comm->load[d+1].flags;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].mdf;
 +                    sbuf[pos++] = comm->load[d+1].pme;
 +                }
 +            }
 +            load->nload = pos;
 +            /* Communicate a row in DD direction d.
 +             * The communicators are setup such that the root always has rank 0.
 +             */
 +#ifdef GMX_MPI
 +            MPI_Gather(sbuf      ,load->nload*sizeof(float),MPI_BYTE,
 +                       load->load,load->nload*sizeof(float),MPI_BYTE,
 +                       0,comm->mpi_comm_load[d]);
 +#endif
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* We are the root, process this row */
 +                if (comm->bDynLoadBal)
 +                {
 +                    root = comm->root[d];
 +                }
 +                load->sum = 0;
 +                load->max = 0;
 +                load->sum_m = 0;
 +                load->cvol_min = 1;
 +                load->flags = 0;
 +                load->mdf = 0;
 +                load->pme = 0;
 +                pos = 0;
 +                for(i=0; i<dd->nc[dim]; i++)
 +                {
 +                    load->sum += load->load[pos++];
 +                    load->max = max(load->max,load->load[pos]);
 +                    pos++;
 +                    if (dd->bGridJump)
 +                    {
 +                        if (root->bLimited)
 +                        {
 +                            /* This direction could not be load balanced properly,
 +                             * therefore we need to use the maximum iso the average load.
 +                             */
 +                            load->sum_m = max(load->sum_m,load->load[pos]);
 +                        }
 +                        else
 +                        {
 +                            load->sum_m += load->load[pos];
 +                        }
 +                        pos++;
 +                        load->cvol_min = min(load->cvol_min,load->load[pos]);
 +                        pos++;
 +                        if (d < dd->ndim-1)
 +                        {
 +                            load->flags = (int)(load->load[pos++] + 0.5);
 +                        }
 +                        if (d > 0)
 +                        {
 +                            root->cell_f_max0[i] = load->load[pos++];
 +                            root->cell_f_min1[i] = load->load[pos++];
 +                        }
 +                    }
 +                    if (bSepPME)
 +                    {
 +                        load->mdf = max(load->mdf,load->load[pos]);
 +                        pos++;
 +                        load->pme = max(load->pme,load->load[pos]);
 +                        pos++;
 +                    }
 +                }
 +                if (comm->bDynLoadBal && root->bLimited)
 +                {
 +                    load->sum_m *= dd->nc[dim];
 +                    load->flags |= (1<<d);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        comm->nload      += dd_load_count(comm);
 +        comm->load_step  += comm->cycl[ddCyclStep];
 +        comm->load_sum   += comm->load[0].sum;
 +        comm->load_max   += comm->load[0].max;
 +        if (comm->bDynLoadBal)
 +        {
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                if (comm->load[0].flags & (1<<d))
 +                {
 +                    comm->load_lim[d]++;
 +                }
 +            }
 +        }
 +        if (bSepPME)
 +        {
 +            comm->load_mdf += comm->load[0].mdf;
 +            comm->load_pme += comm->load[0].pme;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcDDCOMMLOAD);
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution finished\n");
 +    }
 +}
 +
 +static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
 +{
 +    /* Return the relative performance loss on the total run time
 +     * due to the force calculation load imbalance.
 +     */
 +    if (dd->comm->nload > 0)
 +    {
 +        return
 +            (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
 +            (dd->comm->load_step*dd->nnodes);
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    char  buf[STRLEN];
 +    int   npp,npme,nnodes,d,limp;
 +    float imbal,pme_f_ratio,lossf,lossp=0;
 +    gmx_bool  bLim;
 +    gmx_domdec_comm_t *comm;
 +
 +    comm = dd->comm;
 +    if (DDMASTER(dd) && comm->nload > 0)
 +    {
 +        npp    = dd->nnodes;
 +        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
 +        nnodes = npp + npme;
 +        imbal = comm->load_max*npp/comm->load_sum - 1;
 +        lossf = dd_force_imb_perf_loss(dd);
 +        sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"%s",buf);
 +        sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"%s",buf);
 +        bLim = FALSE;
 +        if (comm->bDynLoadBal)
 +        {
 +            sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
 +                sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
 +                if (limp >= 50)
 +                {
 +                    bLim = TRUE;
 +                }
 +            }
 +            sprintf(buf+strlen(buf),"\n");
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        if (npme > 0)
 +        {
 +            pme_f_ratio = comm->load_pme/comm->load_mdf;
 +            lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
 +            if (lossp <= 0)
 +            {
 +                lossp *= (float)npme/(float)nnodes;
 +            }
 +            else
 +            {
 +                lossp *= (float)npp/(float)nnodes;
 +            }
 +            sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +            sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(stderr,"\n");
 +        
 +        if (lossf >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost due to load imbalance\n"
 +                    "      in the domain decomposition.\n",lossf*100);
 +            if (!comm->bDynLoadBal)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to use dynamic load balancing (option -dlb.)\n");
 +            }
 +            else if (bLim)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
 +            }
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
 +                    "      had %s work to do than the PP nodes.\n"
 +                    "      You might want to %s the number of PME nodes\n"
 +                    "      or %s the cut-off and the grid spacing.\n",
 +                    fabs(lossp*100),
 +                    (lossp < 0) ? "less"     : "more",
 +                    (lossp < 0) ? "decrease" : "increase",
 +                    (lossp < 0) ? "decrease" : "increase");
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +    }
 +}
 +
 +static float dd_vol_min(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].cvol_min*dd->nnodes;
 +}
 +
 +static gmx_bool dd_load_flags(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].flags;
 +}
 +
 +static float dd_f_imbal(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
 +}
 +
 +static float dd_pme_f_ratio(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].pme/dd->comm->load[0].mdf;
 +}
 +
 +static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
 +{
 +    int flags,d;
 +    char buf[22];
 +    
 +    flags = dd_load_flags(dd);
 +    if (flags)
 +    {
 +        fprintf(fplog,
 +                "DD  load balancing is limited by minimum cell size in dimension");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (flags & (1<<d))
 +            {
 +                fprintf(fplog," %c",dim2char(dd->dim[d]));
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    fprintf(fplog,"DD  step %s",gmx_step_str(step,buf));
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(fplog,"  vol min/aver %5.3f%c",
 +                dd_vol_min(dd),flags ? '!' : ' ');
 +    }
 +    fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(fplog,"  pme mesh/force %5.3f",dd_pme_f_ratio(dd));
 +    }
 +    fprintf(fplog,"\n\n");
 +}
 +
 +static void dd_print_load_verbose(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(stderr,"vol %4.2f%c ",
 +                dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
 +    }
 +    fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
 +                                   int dim_ind,ivec loc)
 +{
-     MPI_Group g_row;
++    MPI_Group g_row = MPI_GROUP_EMPTY;
 +    MPI_Comm  c_row;
 +    int  dim,i,*rank;
 +    ivec loc_c;
 +    gmx_domdec_root_t *root;
++    gmx_bool bPartOfGroup = FALSE;
 +    
 +    dim = dd->dim[dim_ind];
 +    copy_ivec(loc,loc_c);
 +    snew(rank,dd->nc[dim]);
 +    for(i=0; i<dd->nc[dim]; i++)
 +    {
 +        loc_c[dim] = i;
 +        rank[i] = dd_index(dd->nc,loc_c);
++        if (rank[i] == dd->rank)
++        {
++            /* This process is part of the group */
++            bPartOfGroup = TRUE;
++        }
++    }
++    if (bPartOfGroup)
++    {
++        MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
 +    }
-     /* Here we create a new group, that does not necessarily
-      * include our process. But MPI_Comm_create needs to be
-      * called by all the processes in the original communicator.
-      * Calling MPI_Group_free afterwards gives errors, so I assume
-      * also the group is needed by all processes. (B. Hess)
-      */
-     MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
 +    MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
-     if (c_row != MPI_COMM_NULL)
++    if (bPartOfGroup)
 +    {
-         /* This process is part of the group */
 +        dd->comm->mpi_comm_load[dim_ind] = c_row;
 +        if (dd->comm->eDLB != edlbNO)
 +        {
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* This is the root process of this row */
 +                snew(dd->comm->root[dim_ind],1);
 +                root = dd->comm->root[dim_ind];
 +                snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
 +                snew(root->old_cell_f,dd->nc[dim]+1);
 +                snew(root->bCellMin,dd->nc[dim]);
 +                if (dim_ind > 0)
 +                {
 +                    snew(root->cell_f_max0,dd->nc[dim]);
 +                    snew(root->cell_f_min1,dd->nc[dim]);
 +                    snew(root->bound_min,dd->nc[dim]);
 +                    snew(root->bound_max,dd->nc[dim]);
 +                }
 +                snew(root->buf_ncd,dd->nc[dim]);
 +            }
 +            else
 +            {
 +                /* This is not a root process, we only need to receive cell_f */
 +                snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
 +            }
 +        }
 +        if (dd->ci[dim] == dd->master_ci[dim])
 +        {
 +            snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
 +        }
 +    }
 +    sfree(rank);
 +}
 +#endif
 +
 +static void make_load_communicators(gmx_domdec_t *dd)
 +{
 +#ifdef GMX_MPI
 +  MPI_Group g_all;
 +  int  dim0,dim1,i,j;
 +  ivec loc;
 +
 +  if (debug)
 +    fprintf(debug,"Making load communicators\n");
 +
 +  MPI_Comm_group(dd->mpi_comm_all,&g_all);
 +  
 +  snew(dd->comm->load,dd->ndim);
 +  snew(dd->comm->mpi_comm_load,dd->ndim);
 +  
 +  clear_ivec(loc);
 +  make_load_communicator(dd,g_all,0,loc);
 +  if (dd->ndim > 1) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      make_load_communicator(dd,g_all,1,loc);
 +    }
 +  }
 +  if (dd->ndim > 2) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      dim1 = dd->dim[1];
 +      for(j=0; j<dd->nc[dim1]; j++) {
 +	  loc[dim1] = j;
 +	  make_load_communicator(dd,g_all,2,loc);
 +      }
 +    }
 +  }
 +
 +  MPI_Group_free(&g_all);
 +
 +  if (debug)
 +    fprintf(debug,"Finished making load communicators\n");
 +#endif
 +}
 +
 +void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    gmx_bool bZYX;
 +    int  d,dim,i,j,m;
 +    ivec tmp,s;
 +    int  nzone,nzonep;
 +    ivec dd_zp[DD_MAXIZONE];
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_ns_ranges_t *izone;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
 +        dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
 +        dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
 +        if (debug)
 +        {
 +            fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
 +                    dd->rank,dim,
 +                    dd->neighbor[d][0],
 +                    dd->neighbor[d][1]);
 +        }
 +    }
 +    
 +    if (DDMASTER(dd))
 +    {
 +        fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
 +	    dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
 +                dd->ndim,
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
 +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    switch (dd->ndim)
 +    {
 +    case 3:
 +        nzone  = dd_z3n;
 +        nzonep = dd_zp3n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp3[i],dd_zp[i]);
 +        }
 +        break;
 +    case 2:
 +        nzone  = dd_z2n;
 +        nzonep = dd_zp2n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp2[i],dd_zp[i]);
 +        }
 +        break;
 +    case 1:
 +        nzone  = dd_z1n;
 +        nzonep = dd_zp1n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp1[i],dd_zp[i]);
 +        }
 +        break;
 +    default:
 +        gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
 +        nzone = 0;
 +        nzonep = 0;
 +    }
 +
 +    zones = &dd->comm->zones;
 +
 +    for(i=0; i<nzone; i++)
 +    {
 +        m = 0;
 +        clear_ivec(zones->shift[i]);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
 +        }
 +    }
 +    
 +    zones->n = nzone;
 +    for(i=0; i<nzone; i++)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            s[d] = dd->ci[d] - zones->shift[i][d];
 +            if (s[d] < 0)
 +            {
 +                s[d] += dd->nc[d];
 +            }
 +            else if (s[d] >= dd->nc[d])
 +            {
 +                s[d] -= dd->nc[d];
 +            }
 +        }
 +    }
 +    zones->nizone = nzonep;
 +    for(i=0; i<zones->nizone; i++)
 +    {
 +        if (dd_zp[i][0] != i)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
 +        }
 +        izone = &zones->izone[i];
 +        izone->j0 = dd_zp[i][1];
 +        izone->j1 = dd_zp[i][2];
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] == 1)
 +            {
 +                /* All shifts should be allowed */
 +                izone->shift0[dim] = -1;
 +                izone->shift1[dim] = 1;
 +            }
 +            else
 +            {
 +                /*
 +                  izone->shift0[d] = 0;
 +                  izone->shift1[d] = 0;
 +                  for(j=izone->j0; j<izone->j1; j++) {
 +                  if (dd->shift[j][d] > dd->shift[i][d])
 +                  izone->shift0[d] = -1;
 +                  if (dd->shift[j][d] < dd->shift[i][d])
 +                  izone->shift1[d] = 1;
 +                  }
 +                */
 +                
 +                int shift_diff;
 +                
 +                /* Assume the shift are not more than 1 cell */
 +                izone->shift0[dim] = 1;
 +                izone->shift1[dim] = -1;
 +                for(j=izone->j0; j<izone->j1; j++)
 +                {
 +                    shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
 +                    if (shift_diff < izone->shift0[dim])
 +                    {
 +                        izone->shift0[dim] = shift_diff;
 +                    }
 +                    if (shift_diff > izone->shift1[dim])
 +                    {
 +                        izone->shift1[dim] = shift_diff;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->comm->eDLB != edlbNO)
 +    {
 +        snew(dd->comm->root,dd->ndim);
 +    }
 +    
 +    if (dd->comm->bRecordLoad)
 +    {
 +        make_load_communicators(dd);
 +    }
 +}
 +
 +static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank,*buf;
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP)
 +    {
 +        /* Set up cartesian communication for the particle-particle part */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
 +                    dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
 +                        &comm_cart);
 +        /* We overwrite the old communicator with the new cartesian one */
 +        cr->mpi_comm_mygroup = comm_cart;
 +    }
 +    
 +    dd->mpi_comm_all = cr->mpi_comm_mygroup;
 +    MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
 +    
 +    if (comm->bCartesianPP_PME)
 +    {
 +        /* Since we want to use the original cartesian setup for sim,
 +         * and not the one after split, we need to make an index.
 +         */
 +        snew(comm->ddindex2ddnodeid,dd->nnodes);
 +        comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
 +        gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
 +        /* Get the rank of the DD master,
 +         * above we made sure that the master node is a PP node.
 +         */
 +        if (MASTER(cr))
 +        {
 +            rank = dd->rank;
 +        }
 +        else
 +        {
 +            rank = 0;
 +        }
 +        MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (cr->npmenodes == 0)
 +        {
 +            /* The PP communicator is also
 +             * the communicator for this simulation
 +             */
 +            cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
 +        }
 +        cr->nodeid = dd->rank;
 +        
 +        MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
 +        
 +        /* We need to make an index to go from the coordinates
 +         * to the nodeid of this simulation.
 +         */
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        sfree(buf);
 +        
 +        /* Determine the master coordinates and rank.
 +         * The DD master should be the same node as the master of this sim.
 +         */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            if (comm->ddindex2simnodeid[i] == 0)
 +            {
 +                ddindex2xyz(dd->nc,i,dd->master_ci);
 +                MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"The master rank is %d\n",dd->masterrank);
 +        }
 +    }
 +    else
 +    {
 +        /* No Cartesian communicators */
 +        /* We use the rank in dd->comm->all as DD index */
 +        ddindex2xyz(dd->nc,dd->rank,dd->ci);
 +        /* The simulation master nodeid is 0, so the DD master rank is also 0 */
 +        dd->masterrank = 0;
 +        clear_ivec(dd->master_ci);
 +    }
 +#endif
 +  
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +}
 +
 +static void receive_ddindex2simnodeid(t_commrec *cr)
 +{
 +    gmx_domdec_t *dd;
 +    
 +    gmx_domdec_comm_t *comm;
 +    int  *buf;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (!comm->bCartesianPP_PME && comm->bCartesianPP)
 +    {
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +#ifdef GMX_MPI
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +        sfree(buf);
 +    }
 +#endif
 +}
 +
 +static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
 +                                                     int ncg,int natoms)
 +{
 +    gmx_domdec_master_t *ma;
 +    int i;
 +
 +    snew(ma,1);
 +    
 +    snew(ma->ncg,dd->nnodes);
 +    snew(ma->index,dd->nnodes+1);
 +    snew(ma->cg,ncg);
 +    snew(ma->nat,dd->nnodes);
 +    snew(ma->ibuf,dd->nnodes*2);
 +    snew(ma->cell_x,DIM);
 +    for(i=0; i<DIM; i++)
 +    {
 +        snew(ma->cell_x[i],dd->nc[i]+1);
 +    }
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        ma->vbuf = NULL;
 +    }
 +    else
 +    {
 +        snew(ma->vbuf,natoms);
 +    }
 +
 +    return ma;
 +}
 +
 +static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
 +                               int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank;
 +    gmx_bool bDiv[DIM];
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (comm->bCartesianPP)
 +    {
 +        for(i=1; i<DIM; i++)
 +        {
 +            bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
 +        }
 +        if (bDiv[YY] || bDiv[ZZ])
 +        {
 +            comm->bCartesianPP_PME = TRUE;
 +            /* If we have 2D PME decomposition, which is always in x+y,
 +             * we stack the PME only nodes in z.
 +             * Otherwise we choose the direction that provides the thinnest slab
 +             * of PME only nodes as this will have the least effect
 +             * on the PP communication.
 +             * But for the PME communication the opposite might be better.
 +             */
 +            if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
 +                             !bDiv[YY] ||
 +                             dd->nc[YY] > dd->nc[ZZ]))
 +            {
 +                comm->cartpmedim = ZZ;
 +            }
 +            else
 +            {
 +                comm->cartpmedim = YY;
 +            }
 +            comm->ntot[comm->cartpmedim]
 +                += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
 +        }
 +        else if (fplog)
 +        {
 +            fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
 +            fprintf(fplog,
 +                    "Will not use a Cartesian communicator for PP <-> PME\n\n");
 +        }
 +    }
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP_PME)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
 +                        &comm_cart);
 +        
 +        MPI_Comm_rank(comm_cart,&rank);
 +        if (MASTERNODE(cr) && rank != 0)
 +        {
 +            gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
 +        }
 +        
 +        /* With this assigment we loose the link to the original communicator
 +         * which will usually be MPI_COMM_WORLD, unless have multisim.
 +         */
 +        cr->mpi_comm_mysim = comm_cart;
 +        cr->sim_nodeid = rank;
 +        
 +        MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
 +        
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
 +                    cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +        
 +        if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        if (cr->npmenodes == 0 ||
 +            dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       dd_index(comm->ntot,dd->ci),
 +                       &cr->mpi_comm_mygroup);
 +    }
 +    else
 +    {
 +        switch (dd_node_order)
 +        {
 +        case ddnoPP_PME:
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Order of the nodes: PP first, PME last\n");
 +            }
 +            break;
 +        case ddnoINTERLEAVE:
 +            /* Interleave the PP-only and PME-only nodes,
 +             * as on clusters with dual-core machines this will double
 +             * the communication bandwidth of the PME processes
 +             * and thus speed up the PP <-> PME and inter PME communication.
 +             */
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Interleaving PP and PME nodes\n");
 +            }
 +            comm->pmenodes = dd_pmenodes(cr);
 +            break;
 +        case ddnoCARTESIAN:
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
 +        }
 +    
 +        if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        else
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       cr->nodeid,
 +                       &cr->mpi_comm_mygroup);
 +        MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
 +    }
 +#endif
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"This is a %s only node\n\n",
 +                (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
 +    }
 +}
 +
 +void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int CartReorder;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    copy_ivec(dd->nc,comm->ntot);
 +    
 +    comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
 +    comm->bCartesianPP_PME = FALSE;
 +    
 +    /* Reorder the nodes by default. This might change the MPI ranks.
 +     * Real reordering is only supported on very few architectures,
 +     * Blue Gene is one of them.
 +     */
 +    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
 +    
 +    if (cr->npmenodes > 0)
 +    {
 +        /* Split the communicator into a PP and PME part */
 +        split_communicator(fplog,cr,dd_node_order,CartReorder);
 +        if (comm->bCartesianPP_PME)
 +        {
 +            /* We (possibly) reordered the nodes in split_communicator,
 +             * so it is no longer required in make_pp_communicator.
 +             */
 +            CartReorder = FALSE;
 +        }
 +    }
 +    else
 +    {
 +        /* All nodes do PP and PME */
 +#ifdef GMX_MPI    
 +        /* We do not require separate communicators */
 +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#endif
 +    }
 +    
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* Copy or make a new PP communicator */
 +        make_pp_communicator(fplog,cr,CartReorder);
 +    }
 +    else
 +    {
 +        receive_ddindex2simnodeid(cr);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Set up the commnuication to our PME node */
 +        dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +        dd->pme_receive_vir_ener = receive_vir_ener(cr);
 +        if (debug)
 +        {
 +            fprintf(debug,"My pme_nodeid %d receive ener %d\n",
 +                    dd->pme_nodeid,dd->pme_receive_vir_ener);
 +        }
 +    }
 +    else
 +    {
 +        dd->pme_nodeid = -1;
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        dd->ma = init_gmx_domdec_master_t(dd,
 +                                          comm->cgs_gl.nr,
 +                                          comm->cgs_gl.index[comm->cgs_gl.nr]);
 +    }
 +}
 +
 +static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
 +{
 +    real *slb_frac,tot;
 +    int  i,n;
 +    double dbl;
 +    
 +    slb_frac = NULL;
 +    if (nc > 1 && size_string != NULL)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using static load balancing for the %s direction\n",
 +                    dir);
 +        }
 +        snew(slb_frac,nc);
 +        tot = 0;
 +        for (i=0; i<nc; i++)
 +        {
 +            dbl = 0;
 +            sscanf(size_string,"%lf%n",&dbl,&n);
 +            if (dbl == 0)
 +            {
 +                gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
 +            }
 +            slb_frac[i] = dbl;
 +            size_string += n;
 +            tot += slb_frac[i];
 +        }
 +        /* Normalize */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Relative cell sizes:");
 +        }
 +        for (i=0; i<nc; i++)
 +        {
 +            slb_frac[i] /= tot;
 +            if (fplog)
 +            {
 +                fprintf(fplog," %5.3f",slb_frac[i]);
 +            }
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\n");
 +        }
 +    }
 +    
 +    return slb_frac;
 +}
 +
 +static int multi_body_bondeds_count(gmx_mtop_t *mtop)
 +{
 +    int n,nmol,ftype;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist *il;
 +    
 +    n = 0;
 +    iloop = gmx_mtop_ilistloop_init(mtop);
 +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
 +    {
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if ((interaction_function[ftype].flags & IF_BOND) &&
 +                NRAL(ftype) >  2)
 +            {
 +                n += nmol*il[ftype].nr/(1 + NRAL(ftype));
 +            }
 +        }
 +  }
 +
 +  return n;
 +}
 +
 +static int dd_nst_env(FILE *fplog,const char *env_var,int def)
 +{
 +    char *val;
 +    int  nst;
 +    
 +    nst = def;
 +    val = getenv(env_var);
 +    if (val)
 +    {
 +        if (sscanf(val,"%d",&nst) <= 0)
 +        {
 +            nst = 1;
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
 +                    env_var,val,nst);
 +        }
 +    }
 +    
 +    return nst;
 +}
 +
 +static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
 +{
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n%s\n",warn_string);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n%s\n",warn_string);
 +    }
 +}
 +
 +static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
 +                                  t_inputrec *ir,FILE *fplog)
 +{
 +    if (ir->ePBC == epbcSCREW &&
 +        (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
 +    {
 +        gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
 +    }
 +
 +    if (ir->ns_type == ensSIMPLE)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
 +    }
 +
 +    if (ir->nstlist == 0)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
 +    {
 +        dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
 +    }
 +}
 +
 +static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int  di,d;
 +    real r;
 +
 +    r = ddbox->box_size[XX];
 +    for(di=0; di<dd->ndim; di++)
 +    {
 +        d = dd->dim[di];
 +        /* Check using the initial average cell size */
 +        r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +    }
 +
 +    return r;
 +}
 +
 +static int check_dlb_support(FILE *fplog,t_commrec *cr,
 +                             const char *dlb_opt,gmx_bool bRecordLoad,
 +                             unsigned long Flags,t_inputrec *ir)
 +{
 +    gmx_domdec_t *dd;
 +    int  eDLB=-1;
 +    char buf[STRLEN];
 +
 +    switch (dlb_opt[0])
 +    {
 +    case 'a': eDLB = edlbAUTO; break;
 +    case 'n': eDLB = edlbNO;   break;
 +    case 'y': eDLB = edlbYES;  break;
 +    default: gmx_incons("Unknown dlb_opt");
 +    }
 +
 +    if (Flags & MD_RERUN)
 +    {
 +        return edlbNO;
 +    }
 +
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        if (eDLB == edlbYES)
 +        {
 +            sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
 +            dd_warning(cr,fplog,buf);
 +        }
 +            
 +        return edlbNO;
 +    }
 +
 +    if (!bRecordLoad)
 +    {
 +        dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
 +
 +        return edlbNO;
 +    }
 +
 +    if (Flags & MD_REPRODUCIBLE)
 +    {
 +        switch (eDLB)
 +        {
 +			case edlbNO: 
 +				break;
 +			case edlbAUTO:
 +				dd_warning(cr,fplog,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
 +				eDLB = edlbNO;
 +				break;
 +			case edlbYES:
 +				dd_warning(cr,fplog,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
 +				break;
 +			default:
 +				gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
 +				break;
 +        }
 +    }
 +
 +    return eDLB;
 +}
 +
 +static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    int dim;
 +
 +    dd->ndim = 0;
 +    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
 +    {
 +        /* Decomposition order z,y,x */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using domain decomposition order z, y, x\n");
 +        }
 +        for(dim=DIM-1; dim>=0; dim--)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* Decomposition order x,y,z */
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +}
 +
 +static gmx_domdec_comm_t *init_dd_comm()
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  i;
 +
 +    snew(comm,1);
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +    for(i=0; i<DIM*2; i++)
 +    {
 +        comm->cggl_flag_nalloc[i]  = 0;
 +        comm->cgcm_state_nalloc[i] = 0;
 +    }
 +    
 +    comm->nalloc_int = 0;
 +    comm->buf_int    = NULL;
 +
 +    vec_rvec_init(&comm->vbuf);
 +
 +    comm->n_load_have    = 0;
 +    comm->n_load_collect = 0;
 +
 +    for(i=0; i<ddnatNR-ddnatZONE; i++)
 +    {
 +        comm->sum_nat[i] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload   = 0;
 +    comm->load_step = 0;
 +    comm->load_sum  = 0;
 +    comm->load_max  = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf  = 0;
 +    comm->load_pme  = 0;
 +
 +    return comm;
 +}
 +
 +gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
 +                                        unsigned long Flags,
 +                                        ivec nc,
 +                                        real comm_distance_min,real rconstr,
 +                                        const char *dlb_opt,real dlb_scale,
 +                                        const char *sizex,const char *sizey,const char *sizez,
 +                                        gmx_mtop_t *mtop,t_inputrec *ir,
 +                                        matrix box,rvec *x,
 +                                        gmx_ddbox_t *ddbox,
 +                                        int *npme_x,int *npme_y)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  recload;
 +    int  d,i,j;
 +    real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
 +    gmx_bool bC;
 +    char buf[STRLEN];
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
 +    }
 +    
 +    snew(dd,1);
 +
 +    dd->comm = init_dd_comm();
 +    comm = dd->comm;
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +
 +    dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
 +    dd->bScrewPBC = (ir->ePBC == epbcSCREW);
 +    
 +    dd->bSendRecv2      = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
 +    comm->dlb_scale_lim = dd_nst_env(fplog,"GMX_DLB_MAX",10);
 +    comm->eFlop         = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
 +    recload             = dd_nst_env(fplog,"GMX_DD_LOAD",1);
 +    comm->nstSortCG     = dd_nst_env(fplog,"GMX_DD_SORT",1);
 +    comm->nstDDDump     = dd_nst_env(fplog,"GMX_DD_DUMP",0);
 +    comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
 +    comm->DD_debug      = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
 +
 +    dd->pme_recv_f_alloc = 0;
 +    dd->pme_recv_f_buf = NULL;
 +
 +    if (dd->bSendRecv2 && fplog)
 +    {
 +        fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
 +    }
 +    if (comm->eFlop)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will load balance based on FLOP count\n");
 +        }
 +        if (comm->eFlop > 1)
 +        {
 +            srand(1+cr->nodeid);
 +        }
 +        comm->bRecordLoad = TRUE;
 +    }
 +    else
 +    {
 +        comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
 +                             
 +    }
 +    
 +    comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
 +    
 +    comm->bDynLoadBal = (comm->eDLB == edlbYES);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
 +    }
 +    dd->bGridJump = comm->bDynLoadBal;
 +    
 +    if (comm->nstSortCG)
 +    {
 +        if (fplog)
 +        {
 +            if (comm->nstSortCG == 1)
 +            {
 +                fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
 +            }
 +            else
 +            {
 +                fprintf(fplog,"Will sort the charge groups every %d steps\n",
 +                        comm->nstSortCG);
 +            }
 +        }
 +        snew(comm->sort,1);
 +    }
 +    else
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will not sort the charge groups\n");
 +        }
 +    }
 +    
 +    comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
 +    if (comm->bInterCGBondeds)
 +    {
 +        comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
 +    }
 +    else
 +    {
 +        comm->bInterCGMultiBody = FALSE;
 +    }
 +    
 +    dd->bInterCGcons = inter_charge_group_constraints(mtop);
 +
 +    if (ir->rlistlong == 0)
 +    {
 +        /* Set the cut-off to some very large value,
 +         * so we don't need if statements everywhere in the code.
 +         * We use sqrt, since the cut-off is squared in some places.
 +         */
 +        comm->cutoff   = GMX_CUTOFF_INF;
 +    }
 +    else
 +    {
 +        comm->cutoff   = ir->rlistlong;
 +    }
 +    comm->cutoff_mbody = 0;
 +    
 +    comm->cellsize_limit = 0;
 +    comm->bBondComm = FALSE;
 +
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm_distance_min > 0)
 +        {
 +            comm->cutoff_mbody = comm_distance_min;
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
 +            }
 +            else
 +            {
 +                comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else if (ir->bPeriodicMols)
 +        {
 +            /* Can not easily determine the required cut-off */
 +            dd_warning(cr,fplog,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
 +            comm->cutoff_mbody = comm->cutoff/2;
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            if (MASTER(cr))
 +            {
 +                dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
 +                                      Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
 +            }
 +            gmx_bcast(sizeof(r_2b),&r_2b,cr);
 +            gmx_bcast(sizeof(r_mb),&r_mb,cr);
 +
 +            /* We use an initial margin of 10% for the minimum cell size,
 +             * except when we are just below the non-bonded cut-off.
 +             */
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                if (max(r_2b,r_mb) > comm->cutoff)
 +                {
 +                    r_bonded       = max(r_2b,r_mb);
 +                    r_bonded_limit = 1.1*r_bonded;
 +                    comm->bBondComm = TRUE;
 +                }
 +                else
 +                {
 +                    r_bonded       = r_mb;
 +                    r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
 +                }
 +                /* We determine cutoff_mbody later */
 +            }
 +            else
 +            {
 +                /* No special bonded communication,
 +                 * simply increase the DD cut-off.
 +                 */
 +                r_bonded_limit     = 1.1*max(r_2b,r_mb);
 +                comm->cutoff_mbody = r_bonded_limit;
 +                comm->cutoff       = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Minimum cell size due to bonded interactions: %.3f nm\n",
 +                    comm->cellsize_limit);
 +        }
 +    }
 +
 +    if (dd->bInterCGcons && rconstr <= 0)
 +    {
 +        /* There is a cell size limit due to the constraints (P-LINCS) */
 +        rconstr = constr_r_max(fplog,mtop,ir);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Estimated maximum distance required for P-LINCS: %.3f nm\n",
 +                    rconstr);
 +            if (rconstr > comm->cellsize_limit)
 +            {
 +                fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
 +            }
 +        }
 +    }
 +    else if (rconstr > 0 && fplog)
 +    {
 +        /* Here we do not check for dd->bInterCGcons,
 +         * because one can also set a cell size limit for virtual sites only
 +         * and at this point we don't know yet if there are intercg v-sites.
 +         */
 +        fprintf(fplog,
 +                "User supplied maximum distance required for P-LINCS: %.3f nm\n",
 +                rconstr);
 +    }
 +    comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
 +
 +    comm->cgs_gl = gmx_mtop_global_cgs(mtop);
 +
 +    if (nc[XX] > 0)
 +    {
 +        copy_ivec(nc,dd->nc);
 +        set_dd_dim(fplog,dd);
 +        set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        if (cr->npmenodes == -1)
 +        {
 +            cr->npmenodes = 0;
 +        }
 +        acs = average_cellsize_min(dd,ddbox);
 +        if (acs < comm->cellsize_limit)
 +        {
 +            if (fplog)
 +            {
 +                fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
 +            }
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
 +                                 acs,comm->cellsize_limit);
 +        }
 +    }
 +    else
 +    {
 +        set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        /* We need to choose the optimal DD grid and possibly PME nodes */
 +        limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
 +                               comm->eDLB!=edlbNO,dlb_scale,
 +                               comm->cellsize_limit,comm->cutoff,
 +                               comm->bInterCGBondeds,comm->bInterCGMultiBody);
 +        
 +        if (dd->nc[XX] == 0)
 +        {
 +            bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
 +            sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
 +                    !bC ? "-rdd" : "-rcon",
 +                    comm->eDLB!=edlbNO ? " or -dds" : "",
 +                    bC ? " or your LINCS settings" : "");
 +
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
 +                                 "%s\n"
 +                                 "Look in the log file for details on the domain decomposition",
 +                                 cr->nnodes-cr->npmenodes,limit,buf);
 +        }
 +        set_dd_dim(fplog,dd);
 +    }
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
 +    }
 +    
 +    dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
 +    if (cr->nnodes - dd->nnodes != cr->npmenodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
 +                             dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
 +    }
 +    if (cr->npmenodes > dd->nnodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
-                              "The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
++                             "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
 +    }
 +    if (cr->npmenodes > 0)
 +    {
 +        comm->npmenodes = cr->npmenodes;
 +    }
 +    else
 +    {
 +        comm->npmenodes = dd->nnodes;
 +    }
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        /* The following choices should match those
 +         * in comm_cost_est in domdec_setup.c.
 +         * Note that here the checks have to take into account
 +         * that the decomposition might occur in a different order than xyz
 +         * (for instance through the env.var. GMX_DD_ORDER_ZYX),
 +         * in which case they will not match those in comm_cost_est,
 +         * but since that is mainly for testing purposes that's fine.
 +         */
 +        if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
 +            comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
 +            getenv("GMX_PMEONEDD") == NULL)
 +        {
 +            comm->npmedecompdim = 2;
 +            comm->npmenodes_x   = dd->nc[XX];
 +            comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
 +        }
 +        else
 +        {
 +            /* In case nc is 1 in both x and y we could still choose to
 +             * decompose pme in y instead of x, but we use x for simplicity.
 +             */
 +            comm->npmedecompdim = 1;
 +            if (dd->dim[0] == YY)
 +            {
 +                comm->npmenodes_x = 1;
 +                comm->npmenodes_y = comm->npmenodes;
 +            }
 +            else
 +            {
 +                comm->npmenodes_x = comm->npmenodes;
 +                comm->npmenodes_y = 1;
 +            }
 +        }    
 +        if (fplog)
 +        {
 +            fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
 +                    comm->npmenodes_x,comm->npmenodes_y,1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmedecompdim = 0;
 +        comm->npmenodes_x   = 0;
 +        comm->npmenodes_y   = 0;
 +    }
 +    
 +    /* Technically we don't need both of these,
 +     * but it simplifies code not having to recalculate it.
 +     */
 +    *npme_x = comm->npmenodes_x;
 +    *npme_y = comm->npmenodes_y;
 +        
 +    snew(comm->slb_frac,DIM);
 +    if (comm->eDLB == edlbNO)
 +    {
 +        comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
 +        comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
 +        comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
 +    }
 +
 +    if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
 +    {
 +        if (comm->bBondComm || comm->eDLB != edlbNO)
 +        {
 +            /* Set the bonded communication distance to halfway
 +             * the minimum and the maximum,
 +             * since the extra communication cost is nearly zero.
 +             */
 +            acs = average_cellsize_min(dd,ddbox);
 +            comm->cutoff_mbody = 0.5*(r_bonded + acs);
 +            if (comm->eDLB != edlbNO)
 +            {
 +                /* Check if this does not limit the scaling */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
 +            }
 +            if (!comm->bBondComm)
 +            {
 +                /* Without bBondComm do not go beyond the n.b. cut-off */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
 +                if (comm->cellsize_limit >= comm->cutoff)
 +                {
 +                    /* We don't loose a lot of efficieny
 +                     * when increasing it to the n.b. cut-off.
 +                     * It can even be slightly faster, because we need
 +                     * less checks for the communication setup.
 +                     */
 +                    comm->cutoff_mbody = comm->cutoff;
 +                }
 +            }
 +            /* Check if we did not end up below our original limit */
 +            comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
 +
 +            if (comm->cutoff_mbody > comm->cellsize_limit)
 +            {
 +                comm->cellsize_limit = comm->cutoff_mbody;
 +            }
 +        }
 +        /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
 +                "cellsize limit %f\n",
 +                comm->bBondComm,comm->cellsize_limit);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        check_dd_restrictions(cr,dd,ir,fplog);
 +    }
 +
 +    comm->globalcomm_step = INT_MIN;
 +    dd->ddp_count = 0;
 +
 +    clear_dd_cycle_counts(dd);
 +
 +    return dd;
 +}
 +
 +static void set_dlb_limits(gmx_domdec_t *dd)
 +
 +{
 +    int d;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
 +        dd->comm->cellsize_min[dd->dim[d]] =
 +            dd->comm->cellsize_min_dlb[dd->dim[d]];
 +    }
 +}
 +
 +
 +static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    real cellsize_min;
 +    int  d,nc,i;
 +    char buf[STRLEN];
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
 +    }
 +
 +    cellsize_min = comm->cellsize_min[dd->dim[0]];
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
 +    }
 +
 +    if (cellsize_min < comm->cellsize_limit*1.05)
 +    {
 +        dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
 +
 +        /* Change DLB from "auto" to "no". */
 +        comm->eDLB = edlbNO;
 +
 +        return;
 +    }
 +
 +    dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
 +    comm->bDynLoadBal = TRUE;
 +    dd->bGridJump = TRUE;
 +    
 +    set_dlb_limits(dd);
 +
 +    /* We can set the required cell size info here,
 +     * so we do not need to communicate this.
 +     * The grid is completely uniform.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        if (comm->root[d])
 +        {
 +            comm->load[d].sum_m = comm->load[d].sum;
 +
 +            nc = dd->nc[dd->dim[d]];
 +            for(i=0; i<nc; i++)
 +            {
 +                comm->root[d]->cell_f[i]    = i/(real)nc;
 +                if (d > 0)
 +                {
 +                    comm->root[d]->cell_f_max0[i] =  i   /(real)nc;
 +                    comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
 +                }
 +            }
 +            comm->root[d]->cell_f[nc] = 1.0;
 +        }
 +    }
 +}
 +
 +static char *init_bLocalCG(gmx_mtop_t *mtop)
 +{
 +    int  ncg,cg;
 +    char *bLocalCG;
 +    
 +    ncg = ncg_mtop(mtop);
 +    snew(bLocalCG,ncg);
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        bLocalCG[cg] = FALSE;
 +    }
 +
 +    return bLocalCG;
 +}
 +
 +void dd_init_bondeds(FILE *fplog,
 +                     gmx_domdec_t *dd,gmx_mtop_t *mtop,
 +                     gmx_vsite_t *vsite,gmx_constr_t constr,
 +                     t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bBondComm;
 +    int  d;
 +
 +    dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
 +
 +    comm = dd->comm;
 +
 +    if (comm->bBondComm)
 +    {
 +        /* Communicate atoms beyond the cut-off for bonded interactions */
 +        comm = dd->comm;
 +
 +        comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
 +
 +        comm->bLocalCG = init_bLocalCG(mtop);
 +    }
 +    else
 +    {
 +        /* Only communicate atoms based on cut-off */
 +        comm->cglink   = NULL;
 +        comm->bLocalCG = NULL;
 +    }
 +}
 +
 +static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
 +                              t_inputrec *ir,
 +                              gmx_bool bDynLoadBal,real dlb_scale,
 +                              gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec np;
 +    real limit,shrink;
 +    char buf[64];
 +
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +
 +    comm = dd->comm;
 +
 +    if (bDynLoadBal)
 +    {
 +        fprintf(fplog,"The maximum number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
 +        fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
 +        fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
 +        for(d=0; d<DIM; d++)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                if (d >= ddbox->npbcdim && dd->nc[d] == 2)
 +                {
 +                    shrink = 0;
 +                }
 +                else
 +                {
 +                    shrink =
 +                        comm->cellsize_min_dlb[d]/
 +                        (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +                }
 +                fprintf(fplog," %c %.2f",dim2char(d),shrink);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
 +        fprintf(fplog,"The initial number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The initial domain decomposition cell size is:");
 +        for(d=0; d<DIM; d++) {
 +            if (dd->nc[d] > 1)
 +            {
 +                fprintf(fplog," %c %.2f nm",
 +                        dim2char(d),dd->comm->cellsize_min[d]);
 +            }
 +        }
 +        fprintf(fplog,"\n\n");
 +    }
 +    
 +    if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
 +    {
 +        fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
 +        fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                "non-bonded interactions","",comm->cutoff);
 +
 +        if (bDynLoadBal)
 +        {
 +            limit = dd->comm->cellsize_limit;
 +        }
 +        else
 +        {
 +            if (dynamic_dd_box(ddbox,ir))
 +            {
 +                fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
 +            }
 +            limit = dd->comm->cellsize_min[XX];
 +            for(d=1; d<DIM; d++)
 +            {
 +                limit = min(limit,dd->comm->cellsize_min[d]);
 +            }
 +        }
 +
 +        if (comm->bInterCGBondeds)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "two-body bonded interactions","(-rdd)",
 +                    max(comm->cutoff,comm->cutoff_mbody));
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "multi-body bonded interactions","(-rdd)",
 +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
 +        }
 +        if (dd->vsite_comm)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "virtual site constructions","(-rcon)",limit);
 +        }
 +        if (dd->constraint_comm)
 +        {
 +            sprintf(buf,"atoms separated by up to %d constraints",
 +                    1+ir->nProjOrder);
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    buf,"(-rcon)",limit);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    fflush(fplog);
 +}
 +
 +void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
 +                       t_inputrec *ir,t_forcerec *fr,
 +                       gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim,npulse,npulse_d_max,npulse_d;
 +    gmx_bool bNoCutOff;
 +    int  natoms_tot;
 +    real vol_frac;
 +
 +    comm = dd->comm;
 +
 +    bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        init_ddpme(dd,&comm->ddpme[0],0);
 +        if (comm->npmedecompdim >= 2)
 +        {
 +            init_ddpme(dd,&comm->ddpme[1],1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmenodes = 0;
 +        if (dd->pme_nodeid >= 0)
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "Can not have separate PME nodes without PME electrostatics");
 +        }
 +    }
 +    
 +    /* If each molecule is a single charge group
 +     * or we use domain decomposition for each periodic dimension,
 +     * we do not need to take pbc into account for the bonded interactions.
 +     */
 +    if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
 +        (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        fr->bMolPBC = TRUE;
 +    }
 +        
 +    if (debug)
 +    {
 +        fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
 +    }
 +    if (comm->eDLB != edlbNO)
 +    {
 +        /* Determine the maximum number of comm. pulses in one dimension */
 +        
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        
 +        /* Determine the maximum required number of grid pulses */
 +        if (comm->cellsize_limit >= comm->cutoff)
 +        {
 +            /* Only a single pulse is required */
 +            npulse = 1;
 +        }
 +        else if (!bNoCutOff && comm->cellsize_limit > 0)
 +        {
 +            /* We round down slightly here to avoid overhead due to the latency
 +             * of extra communication calls when the cut-off
 +             * would be only slightly longer than the cell size.
 +             * Later cellsize_limit is redetermined,
 +             * so we can not miss interactions due to this rounding.
 +             */
 +            npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
 +        }
 +        else
 +        {
 +            /* There is no cell size limit */
 +            npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
 +        }
 +
 +        if (!bNoCutOff && npulse > 1)
 +        {
 +            /* See if we can do with less pulses, based on dlb_scale */
 +            npulse_d_max = 0;
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                dim = dd->dim[d];
 +                npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
 +                                 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
 +                npulse_d_max = max(npulse_d_max,npulse_d);
 +            }
 +            npulse = min(npulse,npulse_d_max);
 +        }
 +        
 +        /* This env var can override npulse */
 +        d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
 +        if (d > 0)
 +        {
 +            npulse = d;
 +        }
 +
 +        comm->maxpulse = 1;
 +        comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
 +            comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
 +            snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
 +            comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
 +            if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
 +            {
 +                comm->bVacDLBNoLimit = FALSE;
 +            }
 +        }
 +        
 +        /* cellsize_limit is set for LINCS in init_domain_decomposition */
 +        if (!comm->bVacDLBNoLimit)
 +        {
 +            comm->cellsize_limit = max(comm->cellsize_limit,
 +                                       comm->cutoff/comm->maxpulse);
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        /* Set the minimum cell size for each DD dimension */
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (comm->bVacDLBNoLimit ||
 +                comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
 +            }
 +            else
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] =
 +                    comm->cutoff/comm->cd[d].np_dlb;
 +            }
 +        }
 +        if (comm->cutoff_mbody <= 0)
 +        {
 +            comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
 +        }
 +        if (comm->bDynLoadBal)
 +        {
 +            set_dlb_limits(dd);
 +        }
 +    }
 +    
 +    print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
 +    if (comm->eDLB == edlbAUTO)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
 +        }
 +        print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
 +    }
 +
 +    if (ir->ePBC == epbcNONE)
 +    {
 +        vol_frac = 1 - 1/(double)dd->nnodes;
 +    }
 +    else
 +    {
 +        vol_frac =
 +            (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
 +    }
 +    natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
 +   
 +    dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
 +}
 +
 +static void merge_cg_buffers(int ncell,
 +                             gmx_domdec_comm_dim_t *cd, int pulse,
 +                             int  *ncg_cell,
 +                             int  *index_gl, int  *recv_i,
 +                             rvec *cg_cm,    rvec *recv_vr,
 +                             int *cgindex,
 +                             cginfo_mb_t *cginfo_mb,int *cginfo)
 +{
 +    gmx_domdec_ind_t *ind,*ind_p;
 +    int p,cell,c,cg,cg0,cg1,cg_gl,nat;
 +    int shift,shift_at;
 +    
 +    ind = &cd->ind[pulse];
 +    
 +    /* First correct the already stored data */
 +    shift = ind->nrecv[ncell];
 +    for(cell=ncell-1; cell>=0; cell--)
 +    {
 +        shift -= ind->nrecv[cell];
 +        if (shift > 0)
 +        {
 +            /* Move the cg's present from previous grid pulses */
 +            cg0 = ncg_cell[ncell+cell];
 +            cg1 = ncg_cell[ncell+cell+1];
 +            cgindex[cg1+shift] = cgindex[cg1];
 +            for(cg=cg1-1; cg>=cg0; cg--)
 +            {
 +                index_gl[cg+shift] = index_gl[cg];
 +                copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
 +                cgindex[cg+shift] = cgindex[cg];
 +                cginfo[cg+shift] = cginfo[cg];
 +            }
 +            /* Correct the already stored send indices for the shift */
 +            for(p=1; p<=pulse; p++)
 +            {
 +                ind_p = &cd->ind[p];
 +                cg0 = 0;
 +                for(c=0; c<cell; c++)
 +                {
 +                    cg0 += ind_p->nsend[c];
 +                }
 +                cg1 = cg0 + ind_p->nsend[cell];
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    ind_p->index[cg] += shift;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Merge in the communicated buffers */
 +    shift = 0;
 +    shift_at = 0;
 +    cg0 = 0;
 +    for(cell=0; cell<ncell; cell++)
 +    {
 +        cg1 = ncg_cell[ncell+cell+1] + shift;
 +        if (shift_at > 0)
 +        {
 +            /* Correct the old cg indices */
 +            for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
 +            {
 +                cgindex[cg+1] += shift_at;
 +            }
 +        }
 +        for(cg=0; cg<ind->nrecv[cell]; cg++)
 +        {
 +            /* Copy this charge group from the buffer */
 +            index_gl[cg1] = recv_i[cg0];
 +            copy_rvec(recv_vr[cg0],cg_cm[cg1]);
 +            /* Add it to the cgindex */
 +            cg_gl = index_gl[cg1];
 +            cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
 +            nat = GET_CGINFO_NATOMS(cginfo[cg1]);
 +            cgindex[cg1+1] = cgindex[cg1] + nat;
 +            cg0++;
 +            cg1++;
 +            shift_at += nat;
 +        }
 +        shift += ind->nrecv[cell];
 +        ncg_cell[ncell+cell+1] = cg1;
 +    }
 +}
 +
 +static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
 +                               int nzone,int cg0,const int *cgindex)
 +{
 +    int cg,zone,p;
 +    
 +    /* Store the atom block boundaries for easy copying of communication buffers
 +     */
 +    cg = cg0;
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        for(p=0; p<cd->np; p++) {
 +            cd->ind[p].cell2at0[zone] = cgindex[cg];
 +            cg += cd->ind[p].nrecv[zone];
 +            cd->ind[p].cell2at1[zone] = cgindex[cg];
 +        }
 +    }
 +}
 +
 +static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
 +{
 +    int  i;
 +    gmx_bool bMiss;
 +
 +    bMiss = FALSE;
 +    for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
 +    {
 +        if (!bLocalCG[link->a[i]])
 +        {
 +            bMiss = TRUE;
 +        }
 +    }
 +
 +    return bMiss;
 +}
 +
 +static void setup_dd_communication(gmx_domdec_t *dd,
 +                                   matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
 +{
 +    int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
 +    int nzone,nzone_send,zone,zonei,cg0,cg1;
 +    int c,i,j,cg,cg_gl,nrcg;
 +    int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
 +    real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
 +    rvec rb,rn;
 +    real corner[DIM][4],corner_round_0=0,corner_round_1[4];
 +    real bcorner[DIM],bcorner_round_1=0;
 +    ivec tric_dist;
 +    rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
 +    real skew_fac2_d,skew_fac_01;
 +    rvec sf2_round;
 +    int  nsend,nat;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Setting up DD communication\n");
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +
 +        /* Check if we need to use triclinic distances */
 +        tric_dist[dim_ind] = 0;
 +        for(i=0; i<=dim_ind; i++)
 +        {
 +            if (ddbox->tric_dir[dd->dim[i]])
 +            {
 +                tric_dist[dim_ind] = 1;
 +            }
 +        }
 +    }
 +
 +    bBondComm = comm->bBondComm;
 +
 +    /* Do we need to determine extra distances for multi-body bondeds? */
 +    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
 +    
 +    /* Do we need to determine extra distances for only two-body bondeds? */
 +    bDist2B = (bBondComm && !bDistMB);
 +
 +    r_comm2  = sqr(comm->cutoff);
 +    r_bcomm2 = sqr(comm->cutoff_mbody);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
 +    }
 +
 +    zones = &comm->zones;
 +    
 +    dim0 = dd->dim[0];
 +    /* The first dimension is equal for all cells */
 +    corner[0][0] = comm->cell_x0[dim0];
 +    if (bDistMB)
 +    {
 +        bcorner[0] = corner[0][0];
 +    }
 +    if (dd->ndim >= 2)
 +    {
 +        dim1 = dd->dim[1];
 +        /* This cell row is only seen from the first row */
 +        corner[1][0] = comm->cell_x0[dim1];
 +        /* All rows can see this row */
 +        corner[1][1] = comm->cell_x0[dim1];
 +        if (dd->bGridJump)
 +        {
 +            corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
 +            if (bDistMB)
 +            {
 +                /* For the multi-body distance we need the maximum */
 +                bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
 +            }
 +        }
 +        /* Set the upper-right corner for rounding */
 +        corner_round_0 = comm->cell_x1[dim0];
 +        
 +        if (dd->ndim >= 3)
 +        {
 +            dim2 = dd->dim[2];
 +            for(j=0; j<4; j++)
 +            {
 +                corner[2][j] = comm->cell_x0[dim2];
 +            }
 +            if (dd->bGridJump)
 +            {
 +                /* Use the maximum of the i-cells that see a j-cell */
 +                for(i=0; i<zones->nizone; i++)
 +                {
 +                    for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
 +                    {
 +                        if (j >= 4)
 +                        {
 +                            corner[2][j-4] =
 +                                max(corner[2][j-4],
 +                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
 +                        }
 +                    }
 +                }
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner[2] = comm->cell_x0[dim2];
 +                    for(i=0; i<2; i++)
 +                    {
 +                        for(j=0; j<2; j++)
 +                        {
 +                            bcorner[2] = max(bcorner[2],
 +                                             comm->zone_d2[i][j].p1_0);
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            /* Set the upper-right corner for rounding */
 +            /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
 +             * Only cell (0,0,0) can see cell 7 (1,1,1)
 +             */
 +            corner_round_1[0] = comm->cell_x1[dim1];
 +            corner_round_1[3] = comm->cell_x1[dim1];
 +            if (dd->bGridJump)
 +            {
 +                corner_round_1[0] = max(comm->cell_x1[dim1],
 +                                        comm->zone_d1[1].mch1);
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner_round_1 = max(comm->cell_x1[dim1],
 +                                          comm->zone_d1[1].p1_1);
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* Triclinic stuff */
 +    normal = ddbox->normal;
 +    skew_fac_01 = 0;
 +    if (dd->ndim >= 2)
 +    {
 +        v_0 = ddbox->v[dim0];
 +        if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
 +        {
 +            /* Determine the coupling coefficient for the distances
 +             * to the cell planes along dim0 and dim1 through dim2.
 +             * This is required for correct rounding.
 +             */
 +            skew_fac_01 =
 +                ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
 +            if (debug)
 +            {
 +                fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
 +            }
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        v_1 = ddbox->v[dim1];
 +    }
 +    
 +    zone_cg_range = zones->cg_range;
 +    index_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    cginfo_mb = fr->cginfo_mb;
 +    
 +    zone_cg_range[0]   = 0;
 +    zone_cg_range[1]   = dd->ncg_home;
 +    comm->zone_ncg1[0] = dd->ncg_home;
 +    pos_cg             = dd->ncg_home;
 +    
 +    nat_tot = dd->nat_home;
 +    nzone = 1;
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        cd = &comm->cd[dim_ind];
 +        
 +        if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
 +        {
 +            /* No pbc in this dimension, the first node should not comm. */
 +            nzone_send = 0;
 +        }
 +        else
 +        {
 +            nzone_send = nzone;
 +        }
 +
 +        bScrew = (dd->bScrewPBC && dim == XX);
 +        
 +        v_d = ddbox->v[dim];
 +        skew_fac2_d = sqr(ddbox->skew_fac[dim]);
 +
 +        cd->bInPlace = TRUE;
 +        for(p=0; p<cd->np; p++)
 +        {
 +            /* Only atoms communicated in the first pulse are used
 +             * for multi-body bonded interactions or for bBondComm.
 +             */
 +            bDistBonded   = ((bDistMB || bDist2B) && p == 0);
 +            bDistMB_pulse = (bDistMB && bDistBonded);
 +
 +            ind = &cd->ind[p];
 +            nsend = 0;
 +            nat = 0;
 +            for(zone=0; zone<nzone_send; zone++)
 +            {
 +                if (tric_dist[dim_ind] && dim_ind > 0)
 +                {
 +                    /* Determine slightly more optimized skew_fac's
 +                     * for rounding.
 +                     * This reduces the number of communicated atoms
 +                     * by about 10% for 3D DD of rhombic dodecahedra.
 +                     */
 +                    for(dimd=0; dimd<dim; dimd++)
 +                    {
 +                        sf2_round[dimd] = 1;
 +                        if (ddbox->tric_dir[dimd])
 +                        {
 +                            for(i=dd->dim[dimd]+1; i<DIM; i++)
 +                            {
 +                                /* If we are shifted in dimension i
 +                                 * and the cell plane is tilted forward
 +                                 * in dimension i, skip this coupling.
 +                                 */
 +                                if (!(zones->shift[nzone+zone][i] &&
 +                                      ddbox->v[dimd][i][dimd] >= 0))
 +                                {
 +                                    sf2_round[dimd] +=
 +                                        sqr(ddbox->v[dimd][i][dimd]);
 +                                }
 +                            }
 +                            sf2_round[dimd] = 1/sf2_round[dimd];
 +                        }
 +                    }
 +                }
 +
 +                zonei = zone_perm[dim_ind][zone];
 +                if (p == 0)
 +                {
 +                    /* Here we permutate the zones to obtain a convenient order
 +                     * for neighbor searching
 +                     */
 +                    cg0 = zone_cg_range[zonei];
 +                    cg1 = zone_cg_range[zonei+1];
 +                }
 +                else
 +                {
 +                    /* Look only at the cg's received in the previous grid pulse
 +                     */
 +                    cg1 = zone_cg_range[nzone+zone+1];
 +                    cg0 = cg1 - cd->ind[p-1].nrecv[zone];
 +                }
 +                ind->nsend[zone] = 0;
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    r2  = 0;
 +                    rb2 = 0;
 +                    if (tric_dist[dim_ind] == 0)
 +                    {
 +                        /* Rectangular direction, easy */
 +                        r = cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        if (r > 0)
 +                        {
 +                            r2 += r*r;
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            r = cg_cm[cg][dim] - bcorner[dim_ind];
 +                            if (r > 0)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        /* Rounding gives at most a 16% reduction
 +                         * in communicated atoms
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            r = cg_cm[cg][dim0] - corner_round_0;
 +                            /* This is the first dimension, so always r >= 0 */
 +                            r2 += r*r;
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            r = cg_cm[cg][dim1] - corner_round_1[zone];
 +                            if (r > 0)
 +                            {
 +                                r2 += r*r;
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                r = cg_cm[cg][dim1] - bcorner_round_1;
 +                                if (r > 0)
 +                                {
 +                                    rb2 += r*r;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    else
 +                    {
 +                        /* Triclinic direction, more complicated */
 +                        clear_rvec(rn);
 +                        clear_rvec(rb);
 +                        /* Rounding, conservative as the skew_fac multiplication
 +                         * will slightly underestimate the distance.
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
 +                            for(i=dim0+1; i<DIM; i++)
 +                            {
 +                                rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
 +                            }
 +                            r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim0] = rn[dim0];
 +                                rb2 = r2;
 +                            }
 +                            /* Take care that the cell planes along dim0 might not
 +                             * be orthogonal to those along dim1 and dim2.
 +                             */
 +                            for(i=1; i<=dim_ind; i++)
 +                            {
 +                                dimd = dd->dim[i];
 +                                if (normal[dim0][dimd] > 0)
 +                                {
 +                                    rn[dimd] -= rn[dim0]*normal[dim0][dimd];
 +                                    if (bDistMB_pulse)
 +                                    {
 +                                        rb[dimd] -= rb[dim0]*normal[dim0][dimd];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
 +                            tric_sh = 0;
 +                            for(i=dim1+1; i<DIM; i++)
 +                            {
 +                                tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
 +                            }
 +                            rn[dim1] += tric_sh;
 +                            if (rn[dim1] > 0)
 +                            {
 +                                r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
 +                                /* Take care that the cell planes along dim1
 +                                 * might not be orthogonal to that along dim2.
 +                                 */
 +                                if (normal[dim1][dim2] > 0)
 +                                {
 +                                    rn[dim2] -= rn[dim1]*normal[dim1][dim2];
 +                                }
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim1] +=
 +                                    cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
 +                                if (rb[dim1] > 0)
 +                                {
 +                                    rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
 +                                    /* Take care of coupling of the distances
 +                                     * to the planes along dim0 and dim1 through dim2.
 +                                     */
 +                                    rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
 +                                    /* Take care that the cell planes along dim1
 +                                     * might not be orthogonal to that along dim2.
 +                                     */
 +                                    if (normal[dim1][dim2] > 0)
 +                                    {
 +                                        rb[dim2] -= rb[dim1]*normal[dim1][dim2];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        /* The distance along the communication direction */
 +                        rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        tric_sh = 0;
 +                        for(i=dim+1; i<DIM; i++)
 +                        {
 +                            tric_sh -= cg_cm[cg][i]*v_d[i][dim];
 +                        }
 +                        rn[dim] += tric_sh;
 +                        if (rn[dim] > 0)
 +                        {
 +                            r2 += rn[dim]*rn[dim]*skew_fac2_d;
 +                            /* Take care of coupling of the distances
 +                             * to the planes along dim0 and dim1 through dim2.
 +                             */
 +                            if (dim_ind == 1 && zonei == 1)
 +                            {
 +                                r2 -= rn[dim0]*rn[dim]*skew_fac_01;
 +                            }
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            clear_rvec(rb);
 +                            rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
 +                            if (rb[dim] > 0)
 +                            {
 +                                rb2 += rb[dim]*rb[dim]*skew_fac2_d;
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                if (dim_ind == 1 && zonei == 1)
 +                                {
 +                                    rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    
 +                    if (r2 < r_comm2 ||
 +                        (bDistBonded &&
 +                         ((bDistMB && rb2 < r_bcomm2) ||
 +                          (bDist2B && r2  < r_bcomm2)) &&
 +                         (!bBondComm ||
 +                          (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
 +                           missing_link(comm->cglink,index_gl[cg],
 +                                        comm->bLocalCG)))))
 +                    {
 +                        /* Make an index to the local charge groups */
 +                        if (nsend+1 > ind->nalloc)
 +                        {
 +                            ind->nalloc = over_alloc_large(nsend+1);
 +                            srenew(ind->index,ind->nalloc);
 +                        }
 +                        if (nsend+1 > comm->nalloc_int)
 +                        {
 +                            comm->nalloc_int = over_alloc_large(nsend+1);
 +                            srenew(comm->buf_int,comm->nalloc_int);
 +                        }
 +                        ind->index[nsend] = cg;
 +                        comm->buf_int[nsend] = index_gl[cg];
 +                        ind->nsend[zone]++;
 +                        vec_rvec_check_alloc(&comm->vbuf,nsend+1);
 +
 +                        if (dd->ci[dim] == 0)
 +                        {
 +                            /* Correct cg_cm for pbc */
 +                            rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
 +                            if (bScrew)
 +                            {
 +                                comm->vbuf.v[nsend][YY] =
 +                                    box[YY][YY]-comm->vbuf.v[nsend][YY];
 +                                comm->vbuf.v[nsend][ZZ] =
 +                                    box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
 +                            }
 +                        }
 +                        else
 +                        {
 +                            copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
 +                        }
 +                        nsend++;
 +                        nat += cgindex[cg+1] - cgindex[cg];
 +                    }
 +                }
 +            }
 +            /* Clear the counts in case we do not have pbc */
 +            for(zone=nzone_send; zone<nzone; zone++)
 +            {
 +                ind->nsend[zone] = 0;
 +            }
 +            ind->nsend[nzone]   = nsend;
 +            ind->nsend[nzone+1] = nat;
 +            /* Communicate the number of cg's and atoms to receive */
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            ind->nsend, nzone+2,
 +                            ind->nrecv, nzone+2);
 +            
 +            /* The rvec buffer is also required for atom buffers of size nsend
 +             * in dd_move_x and dd_move_f.
 +             */
 +            vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
 +
 +            if (p > 0)
 +            {
 +                /* We can receive in place if only the last zone is not empty */
 +                for(zone=0; zone<nzone-1; zone++)
 +                {
 +                    if (ind->nrecv[zone] > 0)
 +                    {
 +                        cd->bInPlace = FALSE;
 +                    }
 +                }
 +                if (!cd->bInPlace)
 +                {
 +                    /* The int buffer is only required here for the cg indices */
 +                    if (ind->nrecv[nzone] > comm->nalloc_int2)
 +                    {
 +                        comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
 +                        srenew(comm->buf_int2,comm->nalloc_int2);
 +                    }
 +                    /* The rvec buffer is also required for atom buffers
 +                     * of size nrecv in dd_move_x and dd_move_f.
 +                     */
 +                    i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
 +                    vec_rvec_check_alloc(&comm->vbuf2,i);
 +                }
 +            }
 +            
 +            /* Make space for the global cg indices */
 +            if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
 +                || dd->cg_nalloc == 0)
 +            {
 +                dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
 +                srenew(index_gl,dd->cg_nalloc);
 +                srenew(cgindex,dd->cg_nalloc+1);
 +            }
 +            /* Communicate the global cg indices */
 +            if (cd->bInPlace)
 +            {
 +                recv_i = index_gl + pos_cg;
 +            }
 +            else
 +            {
 +                recv_i = comm->buf_int2;
 +            }
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            comm->buf_int, nsend,
 +                            recv_i,        ind->nrecv[nzone]);
 +
 +            /* Make space for cg_cm */
 +            if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
 +            {
 +                dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
 +                cg_cm = fr->cg_cm;
 +            }
 +            /* Communicate cg_cm */
 +            if (cd->bInPlace)
 +            {
 +                recv_vr = cg_cm + pos_cg;
 +            }
 +            else
 +            {
 +                recv_vr = comm->vbuf2.v;
 +            }
 +            dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
 +                             comm->vbuf.v, nsend,
 +                             recv_vr,      ind->nrecv[nzone]);
 +            
 +            /* Make the charge group index */
 +            if (cd->bInPlace)
 +            {
 +                zone = (p == 0 ? 0 : nzone - 1);
 +                while (zone < nzone)
 +                {
 +                    for(cg=0; cg<ind->nrecv[zone]; cg++)
 +                    {
 +                        cg_gl = index_gl[pos_cg];
 +                        fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
 +                        nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
 +                        cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
 +                        if (bBondComm)
 +                        {
 +                            /* Update the charge group presence,
 +                             * so we can use it in the next pass of the loop.
 +                             */
 +                            comm->bLocalCG[cg_gl] = TRUE;
 +                        }
 +                        pos_cg++;
 +                    }
 +                    if (p == 0)
 +                    {
 +                        comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
 +                    }
 +                    zone++;
 +                    zone_cg_range[nzone+zone] = pos_cg;
 +                }
 +            }
 +            else
 +            {
 +                /* This part of the code is never executed with bBondComm. */
 +                merge_cg_buffers(nzone,cd,p,zone_cg_range,
 +                                 index_gl,recv_i,cg_cm,recv_vr,
 +                                 cgindex,fr->cginfo_mb,fr->cginfo);
 +                pos_cg += ind->nrecv[nzone];
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        if (!cd->bInPlace)
 +        {
 +            /* Store the atom block for easy copying of communication buffers */
 +            make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
 +        }
 +        nzone += nzone;
 +    }
 +    dd->index_gl = index_gl;
 +    dd->cgindex  = cgindex;
 +    
 +    dd->ncg_tot = zone_cg_range[zones->n];
 +    dd->nat_tot = nat_tot;
 +    comm->nat[ddnatHOME] = dd->nat_home;
 +    for(i=ddnatZONE; i<ddnatNR; i++)
 +    {
 +        comm->nat[i] = dd->nat_tot;
 +    }
 +
 +    if (!bBondComm)
 +    {
 +        /* We don't need to update cginfo, since that was alrady done above.
 +         * So we pass NULL for the forcerec.
 +         */
 +        dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
 +                      NULL,comm->bLocalCG);
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished setting up DD communication, zones:");
 +        for(c=0; c<zones->n; c++)
 +        {
 +            fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static void set_cg_boundaries(gmx_domdec_zones_t *zones)
 +{
 +    int c;
 +    
 +    for(c=0; c<zones->nizone; c++)
 +    {
 +        zones->izone[c].cg1  = zones->cg_range[c+1];
 +        zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
 +        zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
 +    }
 +}
 +
 +static int comp_cgsort(const void *a,const void *b)
 +{
 +    int comp;
 +    
 +    gmx_cgsort_t *cga,*cgb;
 +    cga = (gmx_cgsort_t *)a;
 +    cgb = (gmx_cgsort_t *)b;
 +    
 +    comp = cga->nsc - cgb->nsc;
 +    if (comp == 0)
 +    {
 +        comp = cga->ind_gl - cgb->ind_gl;
 +    }
 +    
 +    return comp;
 +}
 +
 +static void order_int_cg(int n,gmx_cgsort_t *sort,
 +                         int *a,int *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        buf[i] = a[sort[i].ind];
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        a[i] = buf[i];
 +    }
 +}
 +
 +static void order_vec_cg(int n,gmx_cgsort_t *sort,
 +                         rvec *v,rvec *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(v[sort[i].ind],buf[i]);
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(buf[i],v[i]);
 +    }
 +}
 +
 +static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
 +                           rvec *v,rvec *buf)
 +{
 +    int a,atot,cg,cg0,cg1,i;
 +    
 +    /* Order the data */
 +    a = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        cg0 = cgindex[sort[cg].ind];
 +        cg1 = cgindex[sort[cg].ind+1];
 +        for(i=cg0; i<cg1; i++)
 +        {
 +            copy_rvec(v[i],buf[a]);
 +            a++;
 +        }
 +    }
 +    atot = a;
 +    
 +    /* Copy back to the original array */
 +    for(a=0; a<atot; a++)
 +    {
 +        copy_rvec(buf[a],v[a]);
 +    }
 +}
 +
 +static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
 +                         int nsort_new,gmx_cgsort_t *sort_new,
 +                         gmx_cgsort_t *sort1)
 +{
 +    int i1,i2,i_new;
 +    
 +    /* The new indices are not very ordered, so we qsort them */
 +    qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
 +    
 +    /* sort2 is already ordered, so now we can merge the two arrays */
 +    i1 = 0;
 +    i2 = 0;
 +    i_new = 0;
 +    while(i2 < nsort2 || i_new < nsort_new)
 +    {
 +        if (i2 == nsort2)
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +        else if (i_new == nsort_new)
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else if (sort2[i2].nsc < sort_new[i_new].nsc ||
 +                 (sort2[i2].nsc == sort_new[i_new].nsc &&
 +                  sort2[i2].ind_gl < sort_new[i_new].ind_gl))
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +    }
 +}
 +
 +static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
 +                          rvec *cgcm,t_forcerec *fr,t_state *state,
 +                          int ncg_home_old)
 +{
 +    gmx_domdec_sort_t *sort;
 +    gmx_cgsort_t *cgsort,*sort_i;
 +    int  ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
 +    rvec *vbuf;
 +    
 +    sort = dd->comm->sort;
 +    
 +    if (dd->ncg_home > sort->sort_nalloc)
 +    {
 +        sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(sort->sort1,sort->sort_nalloc);
 +        srenew(sort->sort2,sort->sort_nalloc);
 +    }
 +    
 +    if (ncg_home_old >= 0)
 +    {
 +        /* The charge groups that remained in the same ns grid cell
 +         * are completely ordered. So we can sort efficiently by sorting
 +         * the charge groups that did move into the stationary list.
 +         */
 +        ncg_new = 0;
 +        nsort2 = 0;
 +        nsort_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Check if this cg did not move to another node */
 +            cell_index = fr->ns.grid->cell_index[i];
 +            if (cell_index !=  4*fr->ns.grid->ncells)
 +            {
 +                if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
 +                {
 +                    /* This cg is new on this node or moved ns grid cell */
 +                    if (nsort_new >= sort->sort_new_nalloc)
 +                    {
 +                        sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
 +                        srenew(sort->sort_new,sort->sort_new_nalloc);
 +                    }
 +                    sort_i = &(sort->sort_new[nsort_new++]);
 +                }
 +                else
 +                {
 +                    /* This cg did not move */
 +                    sort_i = &(sort->sort2[nsort2++]);
 +                }
 +                /* Sort on the ns grid cell indices
 +                 * and the global topology index
 +                 */
 +                sort_i->nsc    = cell_index;
 +                sort_i->ind_gl = dd->index_gl[i];
 +                sort_i->ind    = i;
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
 +                    nsort2,nsort_new);
 +        }
 +        /* Sort efficiently */
 +        ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
 +    }
 +    else
 +    {
 +        cgsort = sort->sort1;
 +        ncg_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Sort on the ns grid cell indices
 +             * and the global topology index
 +             */
 +            cgsort[i].nsc    = fr->ns.grid->cell_index[i];
 +            cgsort[i].ind_gl = dd->index_gl[i];
 +            cgsort[i].ind    = i;
 +            if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
 +            {
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
 +        }
 +        /* Determine the order of the charge groups using qsort */
 +        qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
 +    }
 +    cgsort = sort->sort1;
 +    
 +    /* We alloc with the old size, since cgindex is still old */
 +    vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
 +    vbuf = dd->comm->vbuf.v;
 +    
 +    /* Remove the charge groups which are no longer at home here */
 +    dd->ncg_home = ncg_new;
 +    
 +    /* Reorder the state */
 +    for(i=0; i<estNR; i++)
 +    {
-         if (EST_DISTR(i) && state->flags & (1<<i))
++        if (EST_DISTR(i) && (state->flags & (1<<i)))
 +        {
 +            switch (i)
 +            {
 +            case estX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
 +                break;
 +            case estV:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
 +                break;
 +            case estSDX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
 +                break;
 +            case estCGP:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No ordering required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_sort_state");
 +                break;
 +            }
 +        }
 +    }
 +    /* Reorder cgcm */
 +    order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
 +    
 +    if (dd->ncg_home+1 > sort->ibuf_nalloc)
 +    {
 +        sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
 +        srenew(sort->ibuf,sort->ibuf_nalloc);
 +    }
 +    ibuf = sort->ibuf;
 +    /* Reorder the global cg index */
 +    order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
 +    /* Reorder the cginfo */
 +    order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
 +    /* Rebuild the local cg index */
 +    ibuf[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
 +        ibuf[i+1] = ibuf[i] + cgsize;
 +    }
 +    for(i=0; i<dd->ncg_home+1; i++)
 +    {
 +        dd->cgindex[i] = ibuf[i];
 +    }
 +    /* Set the home atom number */
 +    dd->nat_home = dd->cgindex[dd->ncg_home];
 +    
 +    /* Copy the sorted ns cell indices back to the ns grid struct */
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        fr->ns.grid->cell_index[i] = cgsort[i].nsc;
 +    }
 +    fr->ns.grid->nr = dd->ncg_home;
 +}
 +
 +static void add_dd_statistics(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +    
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] +=
 +            comm->nat[ddnat] - comm->nat[ddnat-1];
 +    }
 +    comm->ndecomp++;
 +}
 +
 +void reset_dd_statistics_counters(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +
 +    /* Reset all the statistics and counters for total run counting */
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload = 0;
 +    comm->load_step = 0;
 +    comm->load_sum = 0;
 +    comm->load_max = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf = 0;
 +    comm->load_pme = 0;
 +}
 +
 +void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    double av;
 +   
 +    comm = cr->dd->comm;
 +    
 +    gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
 +    
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +    
 +    fprintf(fplog,"\n    D O M A I N   D E C O M P O S I T I O N   S T A T I S T I C S\n\n");
 +            
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
 +        switch(ddnat)
 +        {
 +        case ddnatZONE:
 +            fprintf(fplog,
 +                    " av. #atoms communicated per step for force:  %d x %.1f\n",
 +                    2,av);
 +            break;
 +        case ddnatVSITE:
 +            if (cr->dd->vsite_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for vsites: %d x %.1f\n",
 +                        (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
 +                        av);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (cr->dd->constraint_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for LINCS:  %d x %.1f\n",
 +                        1 + ir->nLincsIter,av);
 +            }
 +            break;
 +        default:
 +            gmx_incons(" Unknown type for DD statistics");
 +        }
 +    }
 +    fprintf(fplog,"\n");
 +    
 +    if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
 +    {
 +        print_dd_load_av(fplog,cr->dd);
 +    }
 +}
 +
 +void dd_partition_system(FILE            *fplog,
 +                         gmx_large_int_t      step,
 +                         t_commrec       *cr,
 +                         gmx_bool            bMasterState,
 +                         int             nstglobalcomm,
 +                         t_state         *state_global,
 +                         gmx_mtop_t      *top_global,
 +                         t_inputrec      *ir,
 +                         t_state         *state_local,
 +                         rvec            **f,
 +                         t_mdatoms       *mdatoms,
 +                         gmx_localtop_t  *top_local,
 +                         t_forcerec      *fr,
 +                         gmx_vsite_t     *vsite,
 +                         gmx_shellfc_t   shellfc,
 +                         gmx_constr_t    constr,
 +                         t_nrnb          *nrnb,
 +                         gmx_wallcycle_t wcycle,
 +                         gmx_bool            bVerbose)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    gmx_ddbox_t ddbox={0};
 +    t_block *cgs_gl;
 +    gmx_large_int_t step_pcoupl;
 +    rvec cell_ns_x0,cell_ns_x1;
 +    int  i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
 +    gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
 +    gmx_bool bRedist,bSortCG,bResortAll;
 +    ivec ncells_old,np;
 +    real grid_density;
 +    char sbuf[22];
 +	
 +    dd = cr->dd;
 +    comm = dd->comm;
 +
 +    bBoxChanged = (bMasterState || DEFORM(*ir));
 +    if (ir->epc != epcNO)
 +    {
 +        /* With nstpcouple > 1 pressure coupling happens.
 +         * one step after calculating the pressure.
 +         * Box scaling happens at the end of the MD step,
 +         * after the DD partitioning.
 +         * We therefore have to do DLB in the first partitioning
 +         * after an MD step where P-coupling occured.
 +         * We need to determine the last step in which p-coupling occurred.
 +         * MRS -- need to validate this for vv?
 +         */
 +        n = ir->nstpcouple;
 +        if (n == 1)
 +        {
 +            step_pcoupl = step - 1;
 +        }
 +        else
 +        {
 +            step_pcoupl = ((step - 1)/n)*n + 1;
 +        }
 +        if (step_pcoupl >= comm->globalcomm_step)
 +        {
 +            bBoxChanged = TRUE;
 +        }
 +    }
 +
 +    bNStGlobalComm = (step >= comm->globalcomm_step + nstglobalcomm);
 +
 +    if (!comm->bDynLoadBal)
 +    {
 +        bDoDLB = FALSE;
 +    }
 +    else
 +    {
 +        /* Should we do dynamic load balacing this step?
 +         * Since it requires (possibly expensive) global communication,
 +         * we might want to do DLB less frequently.
 +         */
 +        if (bBoxChanged || ir->epc != epcNO)
 +        {
 +            bDoDLB = bBoxChanged;
 +        }
 +        else
 +        {
 +            bDoDLB = bNStGlobalComm;
 +        }
 +    }
 +
 +    /* Check if we have recorded loads on the nodes */
 +    if (comm->bRecordLoad && dd_load_count(comm))
 +    {
 +        if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
 +        {
 +            /* Check if we should use DLB at the second partitioning
 +             * and every 100 partitionings,
 +             * so the extra communication cost is negligible.
 +             */
 +            n = max(100,nstglobalcomm);
 +            bCheckDLB = (comm->n_load_collect == 0 ||
 +                         comm->n_load_have % n == n-1);
 +        }
 +        else
 +        {
 +            bCheckDLB = FALSE;
 +        }
 +        
 +        /* Print load every nstlog, first and last step to the log file */
 +        bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
 +                    comm->n_load_collect == 0 ||
-                     (step + ir->nstlist > ir->init_step + ir->nsteps));
++                    (ir->nsteps >= 0 &&
++                     (step + ir->nstlist > ir->init_step + ir->nsteps)));
 +
 +        /* Avoid extra communication due to verbose screen output
 +         * when nstglobalcomm is set.
 +         */
 +        if (bDoDLB || bLogLoad || bCheckDLB ||
 +            (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
 +        {
 +            get_load_distribution(dd,wcycle);
 +            if (DDMASTER(dd))
 +            {
 +                if (bLogLoad)
 +                {
 +                    dd_print_load(fplog,dd,step-1);
 +                }
 +                if (bVerbose)
 +                {
 +                    dd_print_load_verbose(dd);
 +                }
 +            }
 +            comm->n_load_collect++;
 +
 +            if (bCheckDLB) {
 +                /* Since the timings are node dependent, the master decides */
 +                if (DDMASTER(dd))
 +                {
 +                    bTurnOnDLB =
 +                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
 +                    if (debug)
 +                    {
 +                        fprintf(debug,"step %s, imb loss %f\n",
 +                                gmx_step_str(step,sbuf),
 +                                dd_force_imb_perf_loss(dd));
 +                    }
 +                }
 +                dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
 +                if (bTurnOnDLB)
 +                {
 +                    turn_on_dlb(fplog,cr,step);
 +                    bDoDLB = TRUE;
 +                }
 +            }
 +        }
 +        comm->n_load_have++;
 +    }
 +
 +    cgs_gl = &comm->cgs_gl;
 +
 +    bRedist = FALSE;
 +    if (bMasterState)
 +    {
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_global->box,
 +                  TRUE,cgs_gl,state_global->x,&ddbox);
 +    
 +        get_cg_distribution(fplog,step,dd,cgs_gl,
 +                            state_global->box,&ddbox,state_global->x);
 +        
 +        dd_distribute_state(dd,cgs_gl,
 +                            state_global,state_local,f);
 +        
 +        dd_make_local_cgs(dd,&top_local->cgs);
 +        
 +        if (dd->ncg_home > fr->cg_nalloc)
 +        {
 +            dd_realloc_fr_cg(fr,dd->ncg_home);
 +        }
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +        
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        cg0 = 0;
 +    }
 +    else if (state_local->ddp_count != dd->ddp_count)
 +    {
 +        if (state_local->ddp_count > dd->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
 +        }
 +        
 +        if (state_local->ddp_count_cg_gl != state_local->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
 +        }
 +        
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +        
 +        /* Build the new indices */
 +        rebuild_cgindex(dd,cgs_gl->index,state_local);
 +        make_dd_indices(dd,cgs_gl->index,0);
 +        
 +        /* Redetermine the cg COMs */
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  TRUE,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bRedist = comm->bDynLoadBal;
 +    }
 +    else
 +    {
 +        /* We have the full state, only redistribute the cgs */
 +
 +        /* Clear the non-home indices */
 +        clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
 +
 +        /* Avoid global communication for dim's without pbc and -gcom */
 +        if (!bNStGlobalComm)
 +        {
 +            copy_rvec(comm->box0    ,ddbox.box0    );
 +            copy_rvec(comm->box_size,ddbox.box_size);
 +        }
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bBoxChanged = TRUE;
 +        bRedist = TRUE;
 +    }
 +    /* For dim's without pbc and -gcom */
 +    copy_rvec(ddbox.box0    ,comm->box0    );
 +    copy_rvec(ddbox.box_size,comm->box_size);
 +    
 +    set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
 +                      step,wcycle);
 +    
 +    if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
 +    {
 +        write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
 +    }
 +    
 +    /* Check if we should sort the charge groups */
 +    if (comm->nstSortCG > 0)
 +    {
 +        bSortCG = (bMasterState ||
 +                   (bRedist && (step % comm->nstSortCG == 0)));
 +    }
 +    else
 +    {
 +        bSortCG = FALSE;
 +    }
 +
 +    ncg_home_old = dd->ncg_home;
 +
 +    if (bRedist)
 +    {
 +        cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
 +                                 state_local,f,fr,mdatoms,
 +                                 !bSortCG,nrnb);
 +    }
 +    
 +    get_nsgrid_boundaries(fr->ns.grid,dd,
 +                          state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
 +                          dd->ncg_home,fr->cg_cm,
 +                          cell_ns_x0,cell_ns_x1,&grid_density);
 +
 +    if (bBoxChanged)
 +    {
 +        comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
 +    }
 +
 +    copy_ivec(fr->ns.grid->n,ncells_old);
 +    grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
 +               state_local->box,cell_ns_x0,cell_ns_x1,
 +               fr->rlistlong,grid_density);
 +    /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
 +    copy_ivec(ddbox.tric_dir,comm->tric_dir);
 +
 +    if (bSortCG)
 +    {
 +        /* Sort the state on charge group position.
 +         * This enables exact restarts from this step.
 +         * It also improves performance by about 15% with larger numbers
 +         * of atoms per node.
 +         */
 +        
 +        /* Fill the ns grid with the home cell,
 +         * so we can sort with the indices.
 +         */
 +        set_zones_ncg_home(dd);
 +        fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
 +                  0,dd->ncg_home,fr->cg_cm);
 +        
 +        /* Check if we can user the old order and ns grid cell indices
 +         * of the charge groups to sort the charge groups efficiently.
 +         */
 +        bResortAll = (bMasterState ||
 +                      fr->ns.grid->n[XX] != ncells_old[XX] ||
 +                      fr->ns.grid->n[YY] != ncells_old[YY] ||
 +                      fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"Step %s, sorting the %d home charge groups\n",
 +                    gmx_step_str(step,sbuf),dd->ncg_home);
 +        }
 +        dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
 +                      bResortAll ? -1 : ncg_home_old);
 +        /* Rebuild all the indices */
 +        cg0 = 0;
 +        ga2la_clear(dd->ga2la);
 +    }
 +    
 +    /* Setup up the communication and communicate the coordinates */
 +    setup_dd_communication(dd,state_local->box,&ddbox,fr);
 +    
 +    /* Set the indices */
 +    make_dd_indices(dd,cgs_gl->index,cg0);
 +
 +    /* Set the charge group boundaries for neighbor searching */
 +    set_cg_boundaries(&comm->zones);
 +    
 +    /*
 +    write_dd_pdb("dd_home",step,"dump",top_global,cr,
 +                 -1,state_local->x,state_local->box);
 +    */
 +    
 +    /* Extract a local topology from the global topology */
 +    for(i=0; i<dd->ndim; i++)
 +    {
 +        np[dd->dim[i]] = comm->cd[i].np;
 +    }
 +    dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
 +                      comm->cellsize_min,np,
 +                      fr,vsite,top_global,top_local);
 +    
 +    /* Set up the special atom communication */
 +    n = comm->nat[ddnatZONE];
 +    for(i=ddnatZONE+1; i<ddnatNR; i++)
 +    {
 +        switch(i)
 +        {
 +        case ddnatVSITE:
 +            if (vsite && vsite->n_intercg_vsite)
 +            {
 +                n = dd_make_local_vsites(dd,n,top_local->idef.il);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (dd->bInterCGcons)
 +            {
 +                /* Only for inter-cg constraints we need special code */
 +                n = dd_make_local_constraints(dd,n,top_global,
 +                                              constr,ir->nProjOrder,
 +                                              &top_local->idef.il[F_CONSTR]);
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown special atom type setup");
 +        }
 +        comm->nat[i] = n;
 +    }
 +    
 +    /* Make space for the extra coordinates for virtual site
 +     * or constraint communication.
 +     */
 +    state_local->natoms = comm->nat[ddnatNR-1];
 +    if (state_local->natoms > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,state_local->natoms);
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite && vsite->n_intercg_vsite)
 +        {
 +            nat_f_novirsum = comm->nat[ddnatVSITE];
 +        }
 +        else
 +        {
 +            if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
 +            {
 +                nat_f_novirsum = dd->nat_tot;
 +            }
 +            else
 +            {
 +                nat_f_novirsum = dd->nat_home;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nat_f_novirsum = 0;
 +    }
 +
 +    /* Set the number of atoms required for the force calculation.
 +     * Forces need to be constrained when using a twin-range setup
 +     * or with energy minimization. For simple simulations we could
 +     * avoid some allocation, zeroing and copying, but this is
 +     * probably not worth the complications ande checking.
 +     */
 +    forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
 +                        dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
 +
 +    /* We make the all mdatoms up to nat_tot_con.
 +     * We could save some work by only setting invmass
 +     * between nat_tot and nat_tot_con.
 +     */
 +    /* This call also sets the new number of home particles to dd->nat_home */
 +    atoms2md(top_global,ir,
 +             comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
 +
 +    /* Now we have the charges we can sort the FE interactions */
 +    dd_sort_local_top(dd,mdatoms,top_local);
 +
 +    if (shellfc)
 +    {
 +        /* Make the local shell stuff, currently no communication is done */
 +        make_local_shells(cr,mdatoms,shellfc);
 +    }
 +    
 +	if (ir->implicit_solvent)
 +    {
 +        make_local_gb(cr,fr->born,ir->gb_algorithm);
 +    }
 +	
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Send the charges to our PME only node */
 +        gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
 +                       mdatoms->chargeA,mdatoms->chargeB,
 +                       dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
 +    }
 +    
 +    if (constr)
 +    {
 +        set_constraints(constr,top_local,ir,mdatoms,cr);
 +    }
 +    
 +    if (ir->ePull != epullNO)
 +    {
 +        /* Update the local pull groups */
 +        dd_make_local_pull_groups(dd,ir->pull,mdatoms);
 +    }
 +    
 +    if (ir->bRot)
 +    {
 +        /* Update the local rotation groups */
 +        dd_make_local_rotation_groups(dd,ir->rot);
 +    }
 +
 +
 +    add_dd_statistics(dd);
 +    
 +    /* Make sure we only count the cycles for this DD partitioning */
 +    clear_dd_cycle_counts(dd);
 +    
 +    /* Because the order of the atoms might have changed since
 +     * the last vsite construction, we need to communicate the constructing
 +     * atom coordinates again (for spreading the forces this MD step).
 +     */
 +    dd_move_x_vsites(dd,state_local->box,state_local->x);
 +    
 +    if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
 +    {
 +        dd_move_x(dd,state_local->box,state_local->x);
 +        write_dd_pdb("dd_dump",step,"dump",top_global,cr,
 +                     -1,state_local->x,state_local->box);
 +    }
 +
 +    if (bNStGlobalComm)
 +    {
 +        /* Store the global communication step */
 +        comm->globalcomm_step = step;
 +    }
 +    
 +    /* Increase the DD partitioning counter */
 +    dd->ddp_count++;
 +    /* The state currently matches this DD partitioning count, store it */
 +    state_local->ddp_count = dd->ddp_count;
 +    if (bMasterState)
 +    {
 +        /* The DD master node knows the complete cg distribution,
 +         * store the count so we can possibly skip the cg info communication.
 +         */
 +        comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
 +    }
 +
 +    if (comm->DD_debug > 0)
 +    {
 +        /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
 +        check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
 +                                "after partitioning");
 +    }
 +}
diff --cc src/gromacs/mdlib/edsam.c
index 9a08468f0c,0000000000..e7e168aebf
mode 100644,000000..100644
--- a/src/gromacs/mdlib/edsam.c
+++ b/src/gromacs/mdlib/edsam.c
@@@ -1,2577 -1,0 +1,2575 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "time.h"
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "nrjac.h"
 +#include "mtop_util.h"
 +#include "edsam.h"
 +#include "gmxfio.h"
 +#include "groupcoord.h"
 +
 +
 +/* We use the same defines as in mvdata.c here */
 +#define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d),(cr))
 +#define nblock_bc(cr,nr,d) gmx_bcast((nr)*sizeof((d)[0]), (d),(cr))
 +#define   snew_bc(cr,d,nr) { if (!MASTER(cr)) snew((d),(nr)); }
 +
 +
 +/* enum to identify the type of ED: none, normal ED, flooding */
 +enum {eEDnone, eEDedsam, eEDflood, eEDnr};
 +
 +/* enum to identify operations on reference, average, origin, target structures */
 +enum {eedREF, eedAV, eedORI, eedTAR, eedNR};
 +
 +
 +typedef struct
 +{
 +    int    neig;     /* nr of eigenvectors             */
 +    int   *ieig;     /* index nrs of eigenvectors      */
 +    real  *stpsz;    /* stepsizes (per eigenvector)    */
 +    rvec  **vec;     /* eigenvector components         */
 +    real  *xproj;    /* instantaneous x projections    */
 +    real  *fproj;    /* instantaneous f projections    */
 +    real  radius;    /* instantaneous radius           */
 +    real  *refproj;  /* starting or target projecions  */
 +    /* When using flooding as harmonic restraint: The current reference projection
 +     * is at each step calculated from the initial refproj0 and the slope. */
 +    real  *refproj0,*refprojslope;
 +} t_eigvec;
 +
 +
 +typedef struct
 +{
 +    t_eigvec      mon;            /* only monitored, no constraints       */
 +    t_eigvec      linfix;         /* fixed linear constraints             */
 +    t_eigvec      linacc;         /* acceptance linear constraints        */
 +    t_eigvec      radfix;         /* fixed radial constraints (exp)       */
 +    t_eigvec      radacc;         /* acceptance radial constraints (exp)  */
 +    t_eigvec      radcon;         /* acceptance rad. contraction constr.  */
 +} t_edvecs;
 +
 +
 +typedef struct
 +{
 +    real deltaF0;
 +    gmx_bool bHarmonic;           /* Use flooding for harmonic restraint on
 +                                     the eigenvector                          */
 +    gmx_bool bConstForce;         /* Do not calculate a flooding potential,
 +                                     instead flood with a constant force      */
 +    real tau;
 +    real deltaF;
 +    real Efl;
 +    real kT;
 +    real Vfl;
 +    real dt;
 +    real constEfl;
 +    real alpha2;
 +    int flood_id;
 +    rvec *forces_cartesian;
 +    t_eigvec vecs;         /* use flooding for these */
 +} t_edflood;
 +
 +
 +/* This type is for the average, reference, target, and origin structure    */
 +typedef struct gmx_edx
 +{
 +    int           nr;             /* number of atoms this structure contains  */
 +    int           nr_loc;         /* number of atoms on local node            */
 +    int           *anrs;          /* atom index numbers                       */
 +    int           *anrs_loc;      /* local atom index numbers                 */
 +    int           nalloc_loc;     /* allocation size of anrs_loc              */
 +    int           *c_ind;         /* at which position of the whole anrs
 +                                   * array is a local atom?, i.e.
 +                                   * c_ind[0...nr_loc-1] gives the atom index
 +                                   * with respect to the collective
 +                                   * anrs[0...nr-1] array                     */
 +    rvec          *x;             /* positions for this structure             */
 +    rvec          *x_old;         /* used to keep track of the shift vectors
 +                                     such that the ED molecule can always be
 +                                     made whole in the parallel case          */
 +    real          *m;             /* masses                                   */
 +    real          mtot;           /* total mass (only used in sref)           */
 +    real          *sqrtm;         /* sqrt of the masses used for mass-
 +                                   * weighting of analysis (only used in sav) */
 +} t_gmx_edx;
 +
 +
 +typedef struct edpar
 +{
 +    int            nini;           /* total Nr of atoms                    */
 +    gmx_bool       fitmas;         /* true if trans fit with cm            */
 +    gmx_bool       pcamas;         /* true if mass-weighted PCA            */
 +    int            presteps;       /* number of steps to run without any
 +                                    *    perturbations ... just monitoring */
 +    int            outfrq;         /* freq (in steps) of writing to edo    */
 +    int            maxedsteps;     /* max nr of steps per cycle            */
 +
 +    /* all gmx_edx datasets are copied to all nodes in the parallel case   */
 +    struct gmx_edx sref;           /* reference positions, to these fitting
 +                                    * will be done                         */
 +    gmx_bool       bRefEqAv;       /* If true, reference & average indices
 +                                    * are the same. Used for optimization  */
 +    struct gmx_edx sav;            /* average positions                    */
 +    struct gmx_edx star;           /* target positions                     */
 +    struct gmx_edx sori;           /* origin positions                     */
 +
 +    t_edvecs       vecs;           /* eigenvectors                         */
 +    real           slope;          /* minimal slope in acceptance radexp   */
 +
 +    gmx_bool       bNeedDoEdsam;   /* if any of the options mon, linfix, ...
 +                                    * is used (i.e. apart from flooding)   */
 +    t_edflood      flood;          /* parameters especially for flooding   */
 +    struct t_ed_buffer *buf;       /* handle to local buffers              */
 +    struct edpar   *next_edi;      /* Pointer to another ed dataset        */
 +} t_edpar;
 +
 +
 +typedef struct gmx_edsam
 +{
 +    int           eEDtype;        /* Type of ED: see enums above          */
 +    const char    *edinam;        /* name of ED sampling input file       */
 +    const char    *edonam;        /*                     output           */
 +    FILE          *edo;           /* output file pointer                  */
 +    t_edpar       *edpar;
 +    gmx_bool      bFirst;
 +    gmx_bool      bStartFromCpt;
 +} t_gmx_edsam;
 +
 +
 +struct t_do_edsam
 +{
 +    matrix old_rotmat;
 +    real oldrad;
 +    rvec old_transvec,older_transvec,transvec_compact;
 +    rvec *xcoll;         /* Positions from all nodes, this is the
 +                            collective set we work on.
 +                            These are the positions of atoms with
 +                            average structure indices */
 +    rvec *xc_ref;        /* same but with reference structure indices */
 +    ivec *shifts_xcoll;        /* Shifts for xcoll  */
 +    ivec *extra_shifts_xcoll;  /* xcoll shift changes since last NS step */
 +    ivec *shifts_xc_ref;       /* Shifts for xc_ref */
 +    ivec *extra_shifts_xc_ref; /* xc_ref shift changes since last NS step */
 +    gmx_bool bUpdateShifts;    /* TRUE in NS steps to indicate that the
 +                                  ED shifts for this ED dataset need to
 +                                  be updated */
 +};
 +
 +
 +/* definition of ED buffer structure */
 +struct t_ed_buffer
 +{
 +    struct t_fit_to_ref *           fit_to_ref;
 +    struct t_do_edfit *             do_edfit;
 +    struct t_do_edsam *             do_edsam;
 +    struct t_do_radcon *            do_radcon;
 +};
 +
 +
 +/* Function declarations */
 +static void fit_to_reference(rvec *xcoll,rvec transvec,matrix rotmat,t_edpar *edi);
 +
 +static void translate_and_rotate(rvec *x,int nat,rvec transvec,matrix rotmat);
 +/* End function declarations */
 +
 +
 +/* Does not subtract average positions, projection on single eigenvector is returned
 + * used by: do_linfix, do_linacc, do_radfix, do_radacc, do_radcon
 + * Average position is subtracted in ed_apply_constraints prior to calling projectx
 + */
 +static real projectx(t_edpar *edi, rvec *xcoll, rvec *vec)
 +{
 +    int  i;
 +    real proj=0.0;
 +
 +
 +    for (i=0; i<edi->sav.nr; i++)
 +        proj += edi->sav.sqrtm[i]*iprod(vec[i], xcoll[i]);
 +
 +    return proj;
 +}
 +
 +
 +/* Specialized: projection is stored in vec->refproj
 + * -> used for radacc, radfix, radcon  and center of flooding potential
 + * subtracts average positions, projects vector x */
 +static void rad_project(t_edpar *edi, rvec *x, t_eigvec *vec, t_commrec *cr)
 +{
 +    int i;
 +    real rad=0.0;
 +
 +    /* Subtract average positions */
 +    for (i = 0; i < edi->sav.nr; i++)
 +        rvec_dec(x[i], edi->sav.x[i]);
 +
 +    for (i = 0; i < vec->neig; i++)
 +    {
 +        vec->refproj[i] = projectx(edi,x,vec->vec[i]);
 +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
 +    }
 +    vec->radius=sqrt(rad);
 +
 +    /* Add average positions */
 +    for (i = 0; i < edi->sav.nr; i++)
 +        rvec_inc(x[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Project vector x, subtract average positions prior to projection and add
 + * them afterwards to retain the unchanged vector. Store in xproj. Mass-weighting
 + * is applied. */
 +static void project_to_eigvectors(rvec       *x,    /* The positions to project to an eigenvector */
 +                                  t_eigvec   *vec,  /* The eigenvectors */
 +                                  t_edpar    *edi)
 +{
 +    int  i;
 +
 +
 +    if (!vec->neig) return;
 +
 +    /* Subtract average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_dec(x[i], edi->sav.x[i]);
 +
 +    for (i=0; i<vec->neig; i++)
 +        vec->xproj[i] = projectx(edi, x, vec->vec[i]);
 +
 +    /* Add average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_inc(x[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Project vector x onto all edi->vecs (mon, linfix,...) */
 +static void project(rvec      *x,     /* positions to project */
 +                    t_edpar   *edi)   /* edi data set */
 +{
 +    /* It is not more work to subtract the average position in every
 +     * subroutine again, because these routines are rarely used simultanely */
 +    project_to_eigvectors(x, &edi->vecs.mon   , edi);
 +    project_to_eigvectors(x, &edi->vecs.linfix, edi);
 +    project_to_eigvectors(x, &edi->vecs.linacc, edi);
 +    project_to_eigvectors(x, &edi->vecs.radfix, edi);
 +    project_to_eigvectors(x, &edi->vecs.radacc, edi);
 +    project_to_eigvectors(x, &edi->vecs.radcon, edi);
 +}
 +
 +
 +static real calc_radius(t_eigvec *vec)
 +{
 +    int i;
 +    real rad=0.0;
 +
 +
 +    for (i=0; i<vec->neig; i++)
 +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
 +
 +    return rad=sqrt(rad);
 +}
 +
 +
 +/* Debug helper */
 +#ifdef DEBUGHELPERS
 +static void dump_xcoll(t_edpar *edi, struct t_do_edsam *buf, t_commrec *cr,
 +                       int step)
 +{
 +    int i;
 +    FILE *fp;
 +    char fn[STRLEN];
 +    rvec *xcoll;
 +    ivec *shifts, *eshifts;
 +
 +
 +    if (!MASTER(cr))
 +        return;
 +
 +    xcoll   = buf->xcoll;
 +    shifts  = buf->shifts_xcoll;
 +    eshifts = buf->extra_shifts_xcoll;
 +
 +    sprintf(fn, "xcolldump_step%d.txt", step);
 +    fp = fopen(fn, "w");
 +
 +    for (i=0; i<edi->sav.nr; i++)
 +        fprintf(fp, "%d %9.5f %9.5f %9.5f   %d %d %d   %d %d %d\n",
 +                edi->sav.anrs[i]+1,
 +                xcoll[i][XX]  , xcoll[i][YY]  , xcoll[i][ZZ],
 +                shifts[i][XX] , shifts[i][YY] , shifts[i][ZZ],
 +                eshifts[i][XX], eshifts[i][YY], eshifts[i][ZZ]);
 +
 +    fclose(fp);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi_positions(FILE *out, struct gmx_edx *s, const char name[])
 +{
 +    int i;
 +
 +
 +    fprintf(out, "#%s positions:\n%d\n", name, s->nr);
 +    if (s->nr == 0)
 +        return;
 +
 +    fprintf(out, "#index, x, y, z");
 +    if (s->sqrtm)
 +        fprintf(out, ", sqrt(m)");
 +    for (i=0; i<s->nr; i++)
 +    {
 +        fprintf(out, "\n%6d  %11.6f %11.6f %11.6f",s->anrs[i], s->x[i][XX], s->x[i][YY], s->x[i][ZZ]);
 +        if (s->sqrtm)
 +            fprintf(out,"%9.3f",s->sqrtm[i]);
 +    }
 +    fprintf(out, "\n");
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi_eigenvecs(FILE *out, t_eigvec *ev,
 +                               const char name[], int length)
 +{
 +    int i,j;
 +
 +
 +    fprintf(out, "#%s eigenvectors:\n%d\n", name, ev->neig);
 +    /* Dump the data for every eigenvector: */
 +    for (i=0; i<ev->neig; i++)
 +    {
 +        fprintf(out, "EV %4d\ncomponents %d\nstepsize %f\nxproj %f\nfproj %f\nrefproj %f\nradius %f\nComponents:\n",
 +                ev->ieig[i], length, ev->stpsz[i], ev->xproj[i], ev->fproj[i], ev->refproj[i], ev->radius);
 +        for (j=0; j<length; j++)
 +            fprintf(out, "%11.6f %11.6f %11.6f\n", ev->vec[i][j][XX], ev->vec[i][j][YY], ev->vec[i][j][ZZ]);
 +    }
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi(t_edpar *edpars, t_commrec *cr, int nr_edi)
 +{
 +    FILE  *out;
 +    char  fn[STRLEN];
 +
 +
 +    sprintf(fn, "EDdump_node%d_edi%d", cr->nodeid, nr_edi);
 +    out = ffopen(fn, "w");
 +
 +    fprintf(out,"#NINI\n %d\n#FITMAS\n %d\n#ANALYSIS_MAS\n %d\n",
 +            edpars->nini,edpars->fitmas,edpars->pcamas);
 +    fprintf(out,"#OUTFRQ\n %d\n#MAXLEN\n %d\n#SLOPECRIT\n %f\n",
 +            edpars->outfrq,edpars->maxedsteps,edpars->slope);
 +    fprintf(out,"#PRESTEPS\n %d\n#DELTA_F0\n %f\n#TAU\n %f\n#EFL_NULL\n %f\n#ALPHA2\n %f\n",
 +            edpars->presteps,edpars->flood.deltaF0,edpars->flood.tau,
 +            edpars->flood.constEfl,edpars->flood.alpha2);
 +
 +    /* Dump reference, average, target, origin positions */
 +    dump_edi_positions(out, &edpars->sref, "REFERENCE");
 +    dump_edi_positions(out, &edpars->sav , "AVERAGE"  );
 +    dump_edi_positions(out, &edpars->star, "TARGET"   );
 +    dump_edi_positions(out, &edpars->sori, "ORIGIN"   );
 +
 +    /* Dump eigenvectors */
 +    dump_edi_eigenvecs(out, &edpars->vecs.mon   , "MONITORED", edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.linfix, "LINFIX"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.linacc, "LINACC"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radfix, "RADFIX"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radacc, "RADACC"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radcon, "RADCON"   , edpars->sav.nr);
 +
 +    /* Dump flooding eigenvectors */
 +    dump_edi_eigenvecs(out, &edpars->flood.vecs, "FLOODING"  , edpars->sav.nr);
 +
 +    /* Dump ed local buffer */
 +    fprintf(out, "buf->do_edfit         =%p\n", (void*)edpars->buf->do_edfit  );
 +    fprintf(out, "buf->do_edsam         =%p\n", (void*)edpars->buf->do_edsam  );
 +    fprintf(out, "buf->do_radcon        =%p\n", (void*)edpars->buf->do_radcon );
 +
 +    ffclose(out);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_rotmat(FILE* out,matrix rotmat)
 +{
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[XX][XX],rotmat[XX][YY],rotmat[XX][ZZ]);
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[YY][XX],rotmat[YY][YY],rotmat[YY][ZZ]);
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[ZZ][XX],rotmat[ZZ][YY],rotmat[ZZ][ZZ]);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_rvec(FILE *out, int dim, rvec *x)
 +{
 +    int i;
 +
 +
 +    for (i=0; i<dim; i++)
 +        fprintf(out,"%4d   %f %f %f\n",i,x[i][XX],x[i][YY],x[i][ZZ]);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_mat(FILE* out, int dim, double** mat)
 +{
 +    int i,j;
 +
 +
 +    fprintf(out,"MATRIX:\n");
 +    for (i=0;i<dim;i++)
 +    {
 +        for (j=0;j<dim;j++)
 +            fprintf(out,"%f ",mat[i][j]);
 +        fprintf(out,"\n");
 +    }
 +}
 +#endif
 +
 +
 +struct t_do_edfit {
 +    double **omega;
 +    double **om;
 +};
 +
 +static void do_edfit(int natoms,rvec *xp,rvec *x,matrix R,t_edpar *edi)
 +{
 +    /* this is a copy of do_fit with some modifications */
 +    int    c,r,n,j,i,irot;
 +    double d[6],xnr,xpc;
 +    matrix vh,vk,u;
 +    int    index;
 +    real   max_d;
 +
 +    struct t_do_edfit *loc;
 +    gmx_bool bFirst;
 +
 +    if(edi->buf->do_edfit != NULL)
 +        bFirst = FALSE;
 +    else
 +    {
 +        bFirst = TRUE;
 +        snew(edi->buf->do_edfit,1);
 +    }
 +    loc = edi->buf->do_edfit;
 +
 +    if (bFirst)
 +    {
 +        snew(loc->omega,2*DIM);
 +        snew(loc->om,2*DIM);
 +        for(i=0; i<2*DIM; i++)
 +        {
 +            snew(loc->omega[i],2*DIM);
 +            snew(loc->om[i],2*DIM);
 +        }
 +    }
 +
 +    for(i=0;(i<6);i++)
 +    {
 +        d[i]=0;
 +        for(j=0;(j<6);j++)
 +        {
 +            loc->omega[i][j]=0;
 +            loc->om[i][j]=0;
 +        }
 +    }
 +
 +    /* calculate the matrix U */
 +    clear_mat(u);
 +    for(n=0;(n<natoms);n++)
 +    {
 +        for(c=0; (c<DIM); c++)
 +        {
 +            xpc=xp[n][c];
 +            for(r=0; (r<DIM); r++)
 +            {
 +                xnr=x[n][r];
 +                u[c][r]+=xnr*xpc;
 +            }
 +        }
 +    }
 +
 +    /* construct loc->omega */
 +    /* loc->omega is symmetric -> loc->omega==loc->omega' */
 +    for(r=0;(r<6);r++)
 +        for(c=0;(c<=r);c++)
 +            if ((r>=3) && (c<3))
 +            {
 +                loc->omega[r][c]=u[r-3][c];
 +                loc->omega[c][r]=u[r-3][c];
 +            }
 +            else
 +            {
 +                loc->omega[r][c]=0;
 +                loc->omega[c][r]=0;
 +            }
 +
 +    /* determine h and k */
 +#ifdef DEBUG
 +    {
 +        int i;
 +        dump_mat(stderr,2*DIM,loc->omega);
 +        for (i=0; i<6; i++)
 +            fprintf(stderr,"d[%d] = %f\n",i,d[i]);
 +    }
 +#endif
 +    jacobi(loc->omega,6,d,loc->om,&irot);
 +
 +    if (irot==0)
 +        fprintf(stderr,"IROT=0\n");
 +
 +    index=0; /* For the compiler only */
 +
 +    for(j=0;(j<3);j++)
 +    {
 +        max_d=-1000;
 +        for(i=0;(i<6);i++)
 +            if (d[i]>max_d)
 +            {
 +                max_d=d[i];
 +                index=i;
 +            }
 +        d[index]=-10000;
 +        for(i=0;(i<3);i++)
 +        {
 +            vh[j][i]=M_SQRT2*loc->om[i][index];
 +            vk[j][i]=M_SQRT2*loc->om[i+DIM][index];
 +        }
 +    }
 +
 +    /* determine R */
 +    for(c=0;(c<3);c++)
 +        for(r=0;(r<3);r++)
 +            R[c][r]=vk[0][r]*vh[0][c]+
 +            vk[1][r]*vh[1][c]+
 +            vk[2][r]*vh[2][c];
 +    if (det(R) < 0)
 +        for(c=0;(c<3);c++)
 +            for(r=0;(r<3);r++)
 +                R[c][r]=vk[0][r]*vh[0][c]+
 +                vk[1][r]*vh[1][c]-
 +                vk[2][r]*vh[2][c];
 +}
 +
 +
 +static void rmfit(int nat, rvec *xcoll, rvec transvec, matrix rotmat)
 +{
 +    rvec vec;
 +    matrix tmat;
 +
 +
 +    /* Remove rotation.
 +     * The inverse rotation is described by the transposed rotation matrix */
 +    transpose(rotmat,tmat);
 +    rotate_x(xcoll, nat, tmat);
 +
 +    /* Remove translation */
 +    vec[XX]=-transvec[XX];
 +    vec[YY]=-transvec[YY];
 +    vec[ZZ]=-transvec[ZZ];
 +    translate_x(xcoll, nat, vec);
 +}
 +
 +
 +/**********************************************************************************
 + ******************** FLOODING ****************************************************
 + **********************************************************************************
 +
 +The flooding ability was added later to edsam. Many of the edsam functionality could be reused for that purpose.
 +The flooding covariance matrix, i.e. the selected eigenvectors and their corresponding eigenvalues are
 +read as 7th Component Group. The eigenvalues are coded into the stepsize parameter (as used by -linfix or -linacc).
 +
 +do_md clls right in the beginning the function init_edsam, which reads the edi file, saves all the necessary information in
 +the edi structure and calls init_flood, to initialise some extra fields in the edi->flood structure.
 +
 +since the flooding acts on forces do_flood is called from the function force() (force.c), while the other
 +edsam functionality is hooked into md via the update() (update.c) function acting as constraint on positions.
 +
 +do_flood makes a copy of the positions,
 +fits them, projects them computes flooding_energy, and flooding forces. The forces are computed in the
 +space of the eigenvectors and are then blown up to the full cartesian space and rotated back to remove the
 +fit. Then do_flood adds these forces to the forcefield-forces
 +(given as parameter) and updates the adaptive flooding parameters Efl and deltaF.
 +
 +To center the flooding potential at a different location one can use the -ori option in make_edi. The ori
 +structure is projected to the system of eigenvectors and then this position in the subspace is used as
 +center of the flooding potential.   If the option is not used, the center will be zero in the subspace,
 +i.e. the average structure as given in the make_edi file.
 +
 +To use the flooding potential as restraint, make_edi has the option -restrain, which leads to inverted
 +signs of alpha2 and Efl, such that the sign in the exponential of Vfl is not inverted but the sign of
 +Vfl is inverted. Vfl = Efl * exp (- .../Efl/alpha2*x^2...) With tau>0 the negative Efl will grow slowly
 +so that the restraint is switched off slowly. When Efl==0 and inverted flooding is ON is reached no
 + further adaption is applied, Efl will stay constant at zero.
 +
 +To use restraints with harmonic potentials switch -restrain and -harmonic. Then the eigenvalues are
 +used as spring constants for the harmonic potential.
 +Note that eq3 in the flooding paper (J. Comp. Chem. 2006, 27, 1693-1702) defines the parameter lambda \
 +as the inverse of the spring constant, whereas the implementation uses lambda as the spring constant.
 +
 +To use more than one flooding matrix just concatenate several .edi files (cat flood1.edi flood2.edi > flood_all.edi)
 +the routine read_edi_file reads all of theses flooding files.
 +The structure t_edi is now organized as a list of t_edis and the function do_flood cycles through the list
 +calling the do_single_flood() routine for every single entry. Since every state variables have been kept in one
 +edi there is no interdependence whatsoever. The forces are added together.
 +
 +  To write energies into the .edr file, call the function
 +        get_flood_enx_names(char**, int *nnames) to get the Header (Vfl1 Vfl2... Vfln)
 +and call
 +        get_flood_energies(real Vfl[],int nnames);
 +
 +  TODO:
 +- one could program the whole thing such that Efl, Vfl and deltaF is written to the .edr file. -- i dont know how to do that, yet.
 +
 +  Maybe one should give a range of atoms for which to remove motion, so that motion is removed with
 +  two edsam files from two peptide chains
 +*/
 +
 +static void write_edo_flood(t_edpar *edi, FILE *fp, gmx_large_int_t step)
 +{
 +    int i;
 +    char buf[22];
 +    gmx_bool bOutputRef=FALSE;
 +
 +
 +    fprintf(fp,"%d.th FL: %s %12.5e %12.5e %12.5e\n",
 +            edi->flood.flood_id, gmx_step_str(step,buf),
 +            edi->flood.Efl, edi->flood.Vfl, edi->flood.deltaF);
 +
 +
 +    /* Check whether any of the references changes with time (this can happen
 +     * in case flooding is used as harmonic restraint). If so, output all the
 +     * current reference projections. */
 +    if (edi->flood.bHarmonic)
 +    {
 +        for (i = 0; i < edi->flood.vecs.neig; i++)
 +        {
 +            if (edi->flood.vecs.refprojslope[i] != 0.0)
 +                bOutputRef=TRUE;
 +        }
 +        if (bOutputRef)
 +        {
 +            fprintf(fp, "Ref. projs.: ");
 +            for (i = 0; i < edi->flood.vecs.neig; i++)
 +            {
 +                fprintf(fp, "%12.5e ", edi->flood.vecs.refproj[i]);
 +            }
 +            fprintf(fp, "\n");
 +        }
 +    }
 +    fprintf(fp,"FL_FORCES: ");
 +
 +    for (i=0; i<edi->flood.vecs.neig; i++)
 +        fprintf(fp," %12.5e",edi->flood.vecs.fproj[i]);
 +
 +    fprintf(fp,"\n");
 +}
 +
 +
 +/* From flood.xproj compute the Vfl(x) at this point */
 +static real flood_energy(t_edpar *edi, gmx_large_int_t step)
 +{
 +    /* compute flooding energy Vfl
 +     Vfl = Efl * exp( - \frac {kT} {2Efl alpha^2} * sum_i { \lambda_i c_i^2 } )
 +     \lambda_i is the reciprocal eigenvalue 1/\sigma_i
 +         it is already computed by make_edi and stored in stpsz[i]
 +     bHarmonic:
 +       Vfl = - Efl * 1/2(sum _i {\frac 1{\lambda_i} c_i^2})
 +     */
 +    real sum;
 +    real Vfl;
 +    int i;
 +
 +
 +    /* Each time this routine is called (i.e. each time step), we add a small
 +     * value to the reference projection. This way a harmonic restraint towards
 +     * a moving reference is realized. If no value for the additive constant
 +     * is provided in the edi file, the reference will not change. */
 +    if (edi->flood.bHarmonic)
 +    {
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i] + step * edi->flood.vecs.refprojslope[i];
 +        }
 +    }
 +
 +    sum=0.0;
 +    /* Compute sum which will be the exponent of the exponential */
 +    for (i=0; i<edi->flood.vecs.neig; i++)
 +    {
 +        /* stpsz stores the reciprocal eigenvalue 1/sigma_i */
 +        sum += edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i])*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
 +    }
 +
 +    /* Compute the Gauss function*/
 +    if (edi->flood.bHarmonic)
 +    {
 +        Vfl = -0.5*edi->flood.Efl*sum;  /* minus sign because Efl is negative, if restrain is on. */
 +    }
 +    else
 +    {
 +        Vfl = edi->flood.Efl!=0 ? edi->flood.Efl*exp(-edi->flood.kT/2/edi->flood.Efl/edi->flood.alpha2*sum) :0;
 +    }
 +
 +    return Vfl;
 +}
 +
 +
 +/* From the position and from Vfl compute forces in subspace -> store in edi->vec.flood.fproj */
 +static void flood_forces(t_edpar *edi)
 +{
 +    /* compute the forces in the subspace of the flooding eigenvectors
 +     * by the formula F_i= V_{fl}(c) * ( \frac {kT} {E_{fl}} \lambda_i c_i */
 +
 +    int i;
 +    real energy=edi->flood.Vfl;
 +
 +
 +    if (edi->flood.bHarmonic)
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            edi->flood.vecs.fproj[i] = edi->flood.Efl* edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
 +        }
 +    else
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            /* if Efl is zero the forces are zero if not use the formula */
 +            edi->flood.vecs.fproj[i] = edi->flood.Efl!=0 ? edi->flood.kT/edi->flood.Efl/edi->flood.alpha2*energy*edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]) : 0;
 +        }
 +}
 +
 +
 +/* Raise forces from subspace into cartesian space */
 +static void flood_blowup(t_edpar *edi, rvec *forces_cart)
 +{
 +    /* this function lifts the forces from the subspace to the cartesian space
 +     all the values not contained in the subspace are assumed to be zero and then
 +     a coordinate transformation from eigenvector to cartesian vectors is performed
 +     The nonexistent values don't have to be set to zero explicitly, they would occur
 +     as zero valued summands, hence we just stop to compute this part of the sum.
 +
 +     for every atom we add all the contributions to this atom from all the different eigenvectors.
 +
 +     NOTE: one could add directly to the forcefield forces, would mean we wouldn't have to clear the
 +     field forces_cart prior the computation, but we compute the forces separately
 +     to have them accessible for diagnostics
 +     */
 +    int  j,eig;
 +    rvec dum;
 +    real *forces_sub;
 +
 +
 +    forces_sub = edi->flood.vecs.fproj;
 +
 +
 +    /* Calculate the cartesian forces for the local atoms */
 +
 +    /* Clear forces first */
 +    for (j=0; j<edi->sav.nr_loc; j++)
 +        clear_rvec(forces_cart[j]);
 +
 +    /* Now compute atomwise */
 +    for (j=0; j<edi->sav.nr_loc; j++)
 +    {
 +        /* Compute forces_cart[edi->sav.anrs[j]] */
 +        for (eig=0; eig<edi->flood.vecs.neig; eig++)
 +        {
 +            /* Force vector is force * eigenvector (compute only atom j) */
 +            svmul(forces_sub[eig],edi->flood.vecs.vec[eig][edi->sav.c_ind[j]],dum);
 +            /* Add this vector to the cartesian forces */
 +            rvec_inc(forces_cart[j],dum);
 +        }
 +    }
 +}
 +
 +
 +/* Update the values of Efl, deltaF depending on tau and Vfl */
 +static void update_adaption(t_edpar *edi)
 +{
 +    /* this function updates the parameter Efl and deltaF according to the rules given in
 +     * 'predicting unimolecular chemical reactions: chemical flooding' M Mueller et al,
 +     * J. chem Phys. */
 +
 +    if ((edi->flood.tau < 0 ? -edi->flood.tau : edi->flood.tau ) > 0.00000001)
 +    {
 +        edi->flood.Efl = edi->flood.Efl+edi->flood.dt/edi->flood.tau*(edi->flood.deltaF0-edi->flood.deltaF);
 +        /* check if restrain (inverted flooding) -> don't let EFL become positive */
 +        if (edi->flood.alpha2<0 && edi->flood.Efl>-0.00000001)
 +            edi->flood.Efl = 0;
 +
 +        edi->flood.deltaF = (1-edi->flood.dt/edi->flood.tau)*edi->flood.deltaF+edi->flood.dt/edi->flood.tau*edi->flood.Vfl;
 +    }
 +}
 +
 +
 +static void do_single_flood(
 +        FILE *edo,
 +        rvec x[],
 +        rvec force[],
 +        t_edpar *edi,
 +        gmx_large_int_t step,
 +        matrix box,
 +        t_commrec *cr)
 +{
 +    int i;
 +    matrix  rotmat;         /* rotation matrix */
 +    matrix  tmat;           /* inverse rotation */
 +    rvec    transvec;       /* translation vector */
 +    struct t_do_edsam *buf;
 +
 +
 +    buf=edi->buf->do_edsam;
 +
 +    /* Broadcast the positions of the AVERAGE structure such that they are known on
 +     * every processor. Each node contributes its local positions x and stores them in
 +     * the collective ED array buf->xcoll */
 +    communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, buf->bUpdateShifts, x,
 +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old, box);
 +
 +    /* Only assembly REFERENCE positions if their indices differ from the average ones */
 +    if (!edi->bRefEqAv)
 +        communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, buf->bUpdateShifts, x,
 +                edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
 +
 +    /* If bUpdateShifts was TRUE, the shifts have just been updated in get_positions.
 +     * We do not need to update the shifts until the next NS step */
 +    buf->bUpdateShifts = FALSE;
 +
 +    /* Now all nodes have all of the ED/flooding positions in edi->sav->xcoll,
 +     * as well as the indices in edi->sav.anrs */
 +
 +    /* Fit the reference indices to the reference structure */
 +    if (edi->bRefEqAv)
 +        fit_to_reference(buf->xcoll , transvec, rotmat, edi);
 +    else
 +        fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
 +
 +    /* Now apply the translation and rotation to the ED structure */
 +    translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
 +
 +    /* Project fitted structure onto supbspace -> store in edi->flood.vecs.xproj */
 +    project_to_eigvectors(buf->xcoll,&edi->flood.vecs,edi);
 +
 +    if (FALSE == edi->flood.bConstForce)
 +    {
 +        /* Compute Vfl(x) from flood.xproj */
 +        edi->flood.Vfl = flood_energy(edi, step);
 +
 +        update_adaption(edi);
 +
 +        /* Compute the flooding forces */
 +        flood_forces(edi);
 +    }
 +
 +    /* Translate them into cartesian positions */
 +    flood_blowup(edi, edi->flood.forces_cartesian);
 +
 +    /* Rotate forces back so that they correspond to the given structure and not to the fitted one */
 +    /* Each node rotates back its local forces */
 +    transpose(rotmat,tmat);
 +    rotate_x(edi->flood.forces_cartesian, edi->sav.nr_loc, tmat);
 +
 +    /* Finally add forces to the main force variable */
 +    for (i=0; i<edi->sav.nr_loc; i++)
 +        rvec_inc(force[edi->sav.anrs_loc[i]],edi->flood.forces_cartesian[i]);
 +
 +    /* Output is written by the master process */
 +    if (do_per_step(step,edi->outfrq) && MASTER(cr))
 +        write_edo_flood(edi,edo,step);
 +}
 +
 +
 +/* Main flooding routine, called from do_force */
 +extern void do_flood(
 +        FILE            *log,    /* md.log file */
 +        t_commrec       *cr,     /* Communication record */
 +        rvec            x[],     /* Positions on the local processor */
 +        rvec            force[], /* forcefield forces, to these the flooding forces are added */
 +        gmx_edsam_t     ed,      /* ed data structure contains all ED and flooding datasets */
 +        matrix          box,     /* the box */
 +        gmx_large_int_t step)    /* The relative time step since ir->init_step is already subtracted */
 +{
 +    t_edpar *edi;
 +
 +
 +    if (ed->eEDtype != eEDflood)
 +        return;
 +
 +    edi = ed->edpar;
 +    while (edi)
 +    {
 +        /* Call flooding for one matrix */
 +        if (edi->flood.vecs.neig)
 +            do_single_flood(ed->edo,x,force,edi,step,box,cr);
 +        edi = edi->next_edi;
 +    }
 +}
 +
 +
 +/* Called by init_edi, configure some flooding related variables and structures,
 + * print headers to output files */
 +static void init_flood(t_edpar *edi, gmx_edsam_t ed, real dt, t_commrec *cr)
 +{
 +    int i;
 +
 +
 +    edi->flood.Efl = edi->flood.constEfl;
 +    edi->flood.Vfl = 0;
 +    edi->flood.dt  = dt;
 +
 +    if (edi->flood.vecs.neig)
 +    {
 +        /* If in any of the datasets we find a flooding vector, flooding is turned on */
 +        ed->eEDtype = eEDflood;
 +
 +        fprintf(stderr,"ED: Flooding of matrix %d is switched on.\n", edi->flood.flood_id);
 +
 +        if (edi->flood.bConstForce)
 +        {
 +            /* We have used stpsz as a vehicle to carry the fproj values for constant
 +             * force flooding. Now we copy that to flood.vecs.fproj. Note that
 +             * in const force flooding, fproj is never changed. */
 +            for (i=0; i<edi->flood.vecs.neig; i++)
 +            {
 +                edi->flood.vecs.fproj[i] = edi->flood.vecs.stpsz[i];
 +
 +                fprintf(stderr, "ED: applying on eigenvector %d a constant force of %g\n",
 +                        edi->flood.vecs.ieig[i], edi->flood.vecs.fproj[i]);
 +            }
 +        }
 +        fprintf(ed->edo,"FL_HEADER: Flooding of matrix %d is switched on! The flooding output will have the following format:\n",
 +                edi->flood.flood_id);
 +        fprintf(ed->edo,"FL_HEADER: Step     Efl          Vfl       deltaF\n");
 +    }
 +}
 +
 +
 +#ifdef DEBUGHELPERS
 +/*********** Energy book keeping ******/
 +static void get_flood_enx_names(t_edpar *edi, char** names, int *nnames)  /* get header of energies */
 +{
 +    t_edpar *actual;
 +    int count;
 +    char buf[STRLEN];
 +    actual=edi;
 +    count = 1;
 +    while (actual)
 +    {
 +        srenew(names,count);
 +        sprintf(buf,"Vfl_%d",count);
 +        names[count-1]=strdup(buf);
 +        actual=actual->next_edi;
 +        count++;
 +    }
 +    *nnames=count-1;
 +}
 +
 +
 +static void get_flood_energies(t_edpar *edi, real Vfl[],int nnames)
 +{
 +    /*fl has to be big enough to capture nnames-many entries*/
 +    t_edpar *actual;
 +    int count;
 +
 +
 +    actual=edi;
 +    count = 1;
 +    while (actual)
 +    {
 +        Vfl[count-1]=actual->flood.Vfl;
 +        actual=actual->next_edi;
 +        count++;
 +    }
 +    if (nnames!=count-1)
 +        gmx_fatal(FARGS,"Number of energies is not consistent with t_edi structure");
 +}
 +/************* END of FLOODING IMPLEMENTATION ****************************/
 +#endif
 +
 +
 +gmx_edsam_t ed_open(int nfile,const t_filenm fnm[],unsigned long Flags,t_commrec *cr)
 +{
 +    gmx_edsam_t ed;
 +
 +
 +    /* Allocate space for the ED data structure */
 +    snew(ed, 1);
 +
 +    /* We want to perform ED (this switch might later be upgraded to eEDflood) */
 +    ed->eEDtype = eEDedsam;
 +
 +    if (MASTER(cr))
 +    {
 +        /* Open .edi input file: */
 +        ed->edinam=ftp2fn(efEDI,nfile,fnm);
 +        /* The master opens the .edo output file */
 +        fprintf(stderr,"ED sampling will be performed!\n");
 +        ed->edonam = ftp2fn(efEDO,nfile,fnm);
 +        ed->edo    = gmx_fio_fopen(ed->edonam,(Flags & MD_APPENDFILES)? "a+" : "w+");
 +        ed->bStartFromCpt = Flags & MD_STARTFROMCPT;
 +    }
 +    return ed;
 +}
 +
 +
 +/* Broadcasts the structure data */
 +static void bc_ed_positions(t_commrec *cr, struct gmx_edx *s, int stype)
 +{
 +    snew_bc(cr, s->anrs, s->nr   );    /* Index numbers     */
 +    snew_bc(cr, s->x   , s->nr   );    /* Positions         */
 +    nblock_bc(cr, s->nr, s->anrs );
 +    nblock_bc(cr, s->nr, s->x    );
 +
 +    /* For the average & reference structures we need an array for the collective indices,
 +     * and we need to broadcast the masses as well */
 +    if (stype == eedAV || stype == eedREF)
 +    {
 +        /* We need these additional variables in the parallel case: */
 +        snew(s->c_ind    , s->nr   );   /* Collective indices */
 +        /* Local atom indices get assigned in dd_make_local_group_indices.
 +         * There, also memory is allocated */
 +        s->nalloc_loc = 0;              /* allocation size of s->anrs_loc */
 +        snew_bc(cr, s->x_old, s->nr);   /* To be able to always make the ED molecule whole, ...        */
 +        nblock_bc(cr, s->nr, s->x_old); /* ... keep track of shift changes with the help of old coords */
 +    }
 +
 +    /* broadcast masses for the reference structure (for mass-weighted fitting) */
 +    if (stype == eedREF)
 +    {
 +        snew_bc(cr, s->m, s->nr);
 +        nblock_bc(cr, s->nr, s->m);
 +    }
 +
 +    /* For the average structure we might need the masses for mass-weighting */
 +    if (stype == eedAV)
 +    {
 +        snew_bc(cr, s->sqrtm, s->nr);
 +        nblock_bc(cr, s->nr, s->sqrtm);
 +        snew_bc(cr, s->m, s->nr);
 +        nblock_bc(cr, s->nr, s->m);
 +    }
 +}
 +
 +
 +/* Broadcasts the eigenvector data */
 +static void bc_ed_vecs(t_commrec *cr, t_eigvec *ev, int length, gmx_bool bHarmonic)
 +{
 +    int i;
 +
 +    snew_bc(cr, ev->ieig   , ev->neig);  /* index numbers of eigenvector  */
 +    snew_bc(cr, ev->stpsz  , ev->neig);  /* stepsizes per eigenvector     */
 +    snew_bc(cr, ev->xproj  , ev->neig);  /* instantaneous x projection    */
 +    snew_bc(cr, ev->fproj  , ev->neig);  /* instantaneous f projection    */
 +    snew_bc(cr, ev->refproj, ev->neig);  /* starting or target projection */
 +
 +    nblock_bc(cr, ev->neig, ev->ieig   );
 +    nblock_bc(cr, ev->neig, ev->stpsz  );
 +    nblock_bc(cr, ev->neig, ev->xproj  );
 +    nblock_bc(cr, ev->neig, ev->fproj  );
 +    nblock_bc(cr, ev->neig, ev->refproj);
 +
 +    snew_bc(cr, ev->vec, ev->neig);      /* Eigenvector components        */
 +    for (i=0; i<ev->neig; i++)
 +    {
 +        snew_bc(cr, ev->vec[i], length);
 +        nblock_bc(cr, length, ev->vec[i]);
 +    }
 +
 +    /* For harmonic restraints the reference projections can change with time */
 +    if (bHarmonic)
 +    {
 +        snew_bc(cr, ev->refproj0    , ev->neig);
 +        snew_bc(cr, ev->refprojslope, ev->neig);
 +        nblock_bc(cr, ev->neig, ev->refproj0    );
 +        nblock_bc(cr, ev->neig, ev->refprojslope);
 +    }
 +}
 +
 +
 +/* Broadcasts the ED / flooding data to other nodes
 + * and allocates memory where needed */
 +static void broadcast_ed_data(t_commrec *cr, gmx_edsam_t ed, int numedis)
 +{
 +    int     nr;
 +    t_edpar *edi;
 +
 +
 +    /* Master lets the other nodes know if its ED only or also flooding */
 +    gmx_bcast(sizeof(ed->eEDtype), &(ed->eEDtype), cr);
 +
 +    snew_bc(cr, ed->edpar,1);
 +    /* Now transfer the ED data set(s) */
 +    edi = ed->edpar;
 +    for (nr=0; nr<numedis; nr++)
 +    {
 +        /* Broadcast a single ED data set */
 +        block_bc(cr, *edi);
 +
 +        /* Broadcast positions */
 +        bc_ed_positions(cr, &(edi->sref), eedREF); /* reference positions (don't broadcast masses)    */
 +        bc_ed_positions(cr, &(edi->sav ), eedAV ); /* average positions (do broadcast masses as well) */
 +        bc_ed_positions(cr, &(edi->star), eedTAR); /* target positions                                */
 +        bc_ed_positions(cr, &(edi->sori), eedORI); /* origin positions                                */
 +
 +        /* Broadcast eigenvectors */
 +        bc_ed_vecs(cr, &edi->vecs.mon   , edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.linfix, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.linacc, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radfix, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radacc, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radcon, edi->sav.nr, FALSE);
 +        /* Broadcast flooding eigenvectors and, if needed, values for the moving reference */
 +        bc_ed_vecs(cr, &edi->flood.vecs,  edi->sav.nr, edi->flood.bHarmonic);
 +
 +        /* Set the pointer to the next ED dataset */
 +        if (edi->next_edi)
 +        {
 +          snew_bc(cr, edi->next_edi, 1);
 +          edi = edi->next_edi;
 +        }
 +    }
 +}
 +
 +
 +/* init-routine called for every *.edi-cycle, initialises t_edpar structure */
 +static void init_edi(gmx_mtop_t *mtop,t_inputrec *ir,
 +                     t_commrec *cr,gmx_edsam_t ed,t_edpar *edi)
 +{
 +    int  i;
 +    real totalmass = 0.0;
 +    rvec com;
 +    t_atom *atom;
 +
 +    /* NOTE Init_edi is executed on the master process only
 +     * The initialized data sets are then transmitted to the
 +     * other nodes in broadcast_ed_data */
 +
 +    edi->bNeedDoEdsam = edi->vecs.mon.neig
 +                     || edi->vecs.linfix.neig
 +                     || edi->vecs.linacc.neig
 +                     || edi->vecs.radfix.neig
 +                     || edi->vecs.radacc.neig
 +                     || edi->vecs.radcon.neig;
 +
 +    /* evaluate masses (reference structure) */
 +    snew(edi->sref.m, edi->sref.nr);
 +    for (i = 0; i < edi->sref.nr; i++)
 +    {
 +        if (edi->fitmas)
 +        {
 +            gmx_mtop_atomnr_to_atom(mtop,edi->sref.anrs[i],&atom);
 +            edi->sref.m[i] = atom->m;
 +        }
 +        else
 +        {
 +            edi->sref.m[i] = 1.0;
 +        }
 +
 +        /* Check that every m > 0. Bad things will happen otherwise. */
 +        if (edi->sref.m[i] <= 0.0)
 +        {
 +            gmx_fatal(FARGS, "Reference structure atom %d (sam.edi index %d) has a mass of %g.\n"
 +                             "For a mass-weighted fit, all reference structure atoms need to have a mass >0.\n"
 +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
 +                             "atoms from the reference structure by creating a proper index group.\n",
 +                      i, edi->sref.anrs[i]+1, edi->sref.m[i]);
 +        }
 +
 +        totalmass += edi->sref.m[i];
 +    }
 +    edi->sref.mtot = totalmass;
 +
 +    /* Masses m and sqrt(m) for the average structure. Note that m
 +     * is needed if forces have to be evaluated in do_edsam */
 +    snew(edi->sav.sqrtm, edi->sav.nr );
 +    snew(edi->sav.m    , edi->sav.nr );
 +    for (i = 0; i < edi->sav.nr; i++)
 +    {
 +        gmx_mtop_atomnr_to_atom(mtop,edi->sav.anrs[i],&atom);
 +        edi->sav.m[i] = atom->m;
 +        if (edi->pcamas)
 +        {
 +            edi->sav.sqrtm[i] = sqrt(atom->m);
 +        }
 +        else
 +        {
 +            edi->sav.sqrtm[i] = 1.0;
 +        }
 +
 +        /* Check that every m > 0. Bad things will happen otherwise. */
 +        if (edi->sav.sqrtm[i] <= 0.0)
 +        {
 +            gmx_fatal(FARGS, "Average structure atom %d (sam.edi index %d) has a mass of %g.\n"
 +                             "For ED with mass-weighting, all average structure atoms need to have a mass >0.\n"
 +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
 +                             "atoms from the average structure by creating a proper index group.\n",
 +                      i, edi->sav.anrs[i]+1, atom->m);
 +        }
 +    }
 +
 +    /* put reference structure in origin */
 +    get_center(edi->sref.x, edi->sref.m, edi->sref.nr, com);
 +    com[XX] = -com[XX];
 +    com[YY] = -com[YY];
 +    com[ZZ] = -com[ZZ];
 +    translate_x(edi->sref.x, edi->sref.nr, com);
 +
 +    /* Init ED buffer */
 +    snew(edi->buf, 1);
 +}
 +
 +
 +static void check(const char *line, const char *label)
 +{
 +    if (!strstr(line,label))
 +        gmx_fatal(FARGS,"Could not find input parameter %s at expected position in edsam input-file (.edi)\nline read instead is %s",label,line);
 +}
 +
 +
 +static int read_checked_edint(FILE *file,const char *label)
 +{
 +    char line[STRLEN+1];
 +    int idum;
 +
 +
 +    fgets2 (line,STRLEN,file);
 +    check(line,label);
 +    fgets2 (line,STRLEN,file);
 +    sscanf (line,"%d",&idum);
 +    return idum;
 +}
 +
 +
 +static int read_edint(FILE *file,gmx_bool *bEOF)
 +{
 +    char line[STRLEN+1];
 +    int idum;
 +    char *eof;
 +
 +
 +    eof=fgets2 (line,STRLEN,file);
 +    if (eof==NULL)
 +    {
 +        *bEOF = TRUE;
 +        return -1;
 +    }
 +    eof=fgets2 (line,STRLEN,file);
 +    if (eof==NULL)
 +    {
 +        *bEOF = TRUE;
 +        return -1;
 +    }
 +    sscanf (line,"%d",&idum);
 +    *bEOF = FALSE;
 +    return idum;
 +}
 +
 +
 +static real read_checked_edreal(FILE *file,const char *label)
 +{
 +    char line[STRLEN+1];
 +    double rdum;
 +
 +
 +    fgets2 (line,STRLEN,file);
 +    check(line,label);
 +    fgets2 (line,STRLEN,file);
 +    sscanf (line,"%lf",&rdum);
 +    return (real) rdum; /* always read as double and convert to single */
 +}
 +
 +
 +static void read_edx(FILE *file,int number,int *anrs,rvec *x)
 +{
 +    int i,j;
 +    char line[STRLEN+1];
 +    double d[3];
 +
 +
 +    for(i=0; i<number; i++)
 +    {
 +        fgets2 (line,STRLEN,file);
 +        sscanf (line,"%d%lf%lf%lf",&anrs[i],&d[0],&d[1],&d[2]);
 +        anrs[i]--; /* we are reading FORTRAN indices */
 +        for(j=0; j<3; j++)
 +            x[i][j]=d[j]; /* always read as double and convert to single */
 +    }
 +}
 +
 +
 +static void scan_edvec(FILE *in,int nr,rvec *vec)
 +{
 +    char line[STRLEN+1];
 +    int i;
 +    double x,y,z;
 +
 +
 +    for(i=0; (i < nr); i++)
 +    {
 +        fgets2 (line,STRLEN,in);
 +        sscanf (line,"%le%le%le",&x,&y,&z);
 +        vec[i][XX]=x;
 +        vec[i][YY]=y;
 +        vec[i][ZZ]=z;
 +    }
 +}
 +
 +
 +static void read_edvec(FILE *in,int nr,t_eigvec *tvec,gmx_bool bReadRefproj, gmx_bool *bHaveReference)
 +{
 +    int i,idum,nscan;
 +    double rdum,refproj_dum=0.0,refprojslope_dum=0.0;
 +    char line[STRLEN+1];
 +
 +
 +    tvec->neig=read_checked_edint(in,"NUMBER OF EIGENVECTORS");
 +    if (tvec->neig >0)
 +    {
 +        snew(tvec->ieig   ,tvec->neig);
 +        snew(tvec->stpsz  ,tvec->neig);
 +        snew(tvec->vec    ,tvec->neig);
 +        snew(tvec->xproj  ,tvec->neig);
 +        snew(tvec->fproj  ,tvec->neig);
 +        snew(tvec->refproj,tvec->neig);
 +        if (bReadRefproj)
 +        {
 +            snew(tvec->refproj0    ,tvec->neig);
 +            snew(tvec->refprojslope,tvec->neig);
 +        }
 +
 +        for(i=0; (i < tvec->neig); i++)
 +        {
 +            fgets2 (line,STRLEN,in);
 +            if (bReadRefproj) /* ONLY when using flooding as harmonic restraint */
 +            {
 +                nscan = sscanf(line,"%d%lf%lf%lf",&idum,&rdum,&refproj_dum,&refprojslope_dum);
 +                /* Zero out values which were not scanned */
 +                switch(nscan)
 +                {
 +                    case 4:
 +                        /* Every 4 values read, including reference position */
 +                        *bHaveReference = TRUE;
 +                        break;
 +                    case 3:
 +                        /* A reference position is provided */
 +                        *bHaveReference = TRUE;
 +                        /* No value for slope, set to 0 */
 +                        refprojslope_dum = 0.0;
 +                        break;
 +                    case 2:
 +                        /* No values for reference projection and slope, set to 0 */
 +                        refproj_dum      = 0.0;
 +                        refprojslope_dum = 0.0;
 +                        break;
 +                    default:
 +                        gmx_fatal(FARGS,"Expected 2 - 4 (not %d) values for flooding vec: <nr> <spring const> <refproj> <refproj-slope>\n", nscan);
 +                        break;
 +                }
 +                tvec->refproj[i]=refproj_dum;
 +                tvec->refproj0[i]=refproj_dum;
 +                tvec->refprojslope[i]=refprojslope_dum;
 +            }
 +            else /* Normal flooding */
 +            {
 +                nscan = sscanf(line,"%d%lf",&idum,&rdum);
 +                if (nscan != 2)
 +                    gmx_fatal(FARGS,"Expected 2 values for flooding vec: <nr> <stpsz>\n");
 +            }
 +            tvec->ieig[i]=idum;
 +            tvec->stpsz[i]=rdum;
 +        } /* end of loop over eigenvectors */
 +
 +        for(i=0; (i < tvec->neig); i++)
 +        {
 +            snew(tvec->vec[i],nr);
 +            scan_edvec(in,nr,tvec->vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/* calls read_edvec for the vector groups, only for flooding there is an extra call */
 +static void read_edvecs(FILE *in,int nr,t_edvecs *vecs)
 +{
 +	gmx_bool bHaveReference = FALSE;
 +
 +
 +    read_edvec(in, nr, &vecs->mon   , FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->linfix, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->linacc, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radfix, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radacc, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radcon, FALSE, &bHaveReference);
 +}
 +
 +
 +/* Check if the same atom indices are used for reference and average positions */
 +static gmx_bool check_if_same(struct gmx_edx sref, struct gmx_edx sav)
 +{
 +    int i;
 +
 +
 +    /* If the number of atoms differs between the two structures,
 +     * they cannot be identical */
 +    if (sref.nr != sav.nr)
 +        return FALSE;
 +
 +    /* Now that we know that both stuctures have the same number of atoms,
 +     * check if also the indices are identical */
 +    for (i=0; i < sav.nr; i++)
 +    {
 +        if (sref.anrs[i] != sav.anrs[i])
 +            return FALSE;
 +    }
 +    fprintf(stderr, "ED: Note: Reference and average structure are composed of the same atom indices.\n");
 +
 +    return TRUE;
 +}
 +
 +
 +static int read_edi(FILE* in, gmx_edsam_t ed,t_edpar *edi,int nr_mdatoms, int edi_nr, t_commrec *cr)
 +{
 +    int readmagic;
 +    const int magic=670;
 +    gmx_bool bEOF;
 +
 +    /* Was a specific reference point for the flooding/umbrella potential provided in the edi file? */
 +    gmx_bool bHaveReference = FALSE;
 +
 +
 +    /* the edi file is not free format, so expect problems if the input is corrupt. */
 +
 +    /* check the magic number */
 +    readmagic=read_edint(in,&bEOF);
 +    /* Check whether we have reached the end of the input file */
 +    if (bEOF)
 +        return 0;
 +
 +    if (readmagic != magic)
 +    {
 +        if (readmagic==666 || readmagic==667 || readmagic==668)
 +            gmx_fatal(FARGS,"Wrong magic number: Use newest version of make_edi to produce edi file");
-         else if (readmagic == 669)
-             ;
-         else
++        else if (readmagic != 669)
 +            gmx_fatal(FARGS,"Wrong magic number %d in %s",readmagic,ed->edinam);
 +    }
 +
 +    /* check the number of atoms */
 +    edi->nini=read_edint(in,&bEOF);
 +    if (edi->nini != nr_mdatoms)
 +        gmx_fatal(FARGS,"Nr of atoms in %s (%d) does not match nr of md atoms (%d)",
 +                ed->edinam,edi->nini,nr_mdatoms);
 +
 +    /* Done checking. For the rest we blindly trust the input */
 +    edi->fitmas          = read_checked_edint(in,"FITMAS");
 +    edi->pcamas          = read_checked_edint(in,"ANALYSIS_MAS");
 +    edi->outfrq          = read_checked_edint(in,"OUTFRQ");
 +    edi->maxedsteps      = read_checked_edint(in,"MAXLEN");
 +    edi->slope           = read_checked_edreal(in,"SLOPECRIT");
 +
 +    edi->presteps        = read_checked_edint(in,"PRESTEPS");
 +    edi->flood.deltaF0   = read_checked_edreal(in,"DELTA_F0");
 +    edi->flood.deltaF    = read_checked_edreal(in,"INIT_DELTA_F");
 +    edi->flood.tau       = read_checked_edreal(in,"TAU");
 +    edi->flood.constEfl  = read_checked_edreal(in,"EFL_NULL");
 +    edi->flood.alpha2    = read_checked_edreal(in,"ALPHA2");
 +    edi->flood.kT        = read_checked_edreal(in,"KT");
 +    edi->flood.bHarmonic = read_checked_edint(in,"HARMONIC");
 +    if (readmagic > 669)
 +        edi->flood.bConstForce = read_checked_edint(in,"CONST_FORCE_FLOODING");
 +    else
 +        edi->flood.bConstForce = FALSE;
 +    edi->flood.flood_id  = edi_nr;
 +    edi->sref.nr         = read_checked_edint(in,"NREF");
 +
 +    /* allocate space for reference positions and read them */
 +    snew(edi->sref.anrs,edi->sref.nr);
 +    snew(edi->sref.x   ,edi->sref.nr);
 +    if (PAR(cr))
 +        snew(edi->sref.x_old,edi->sref.nr);
 +    edi->sref.sqrtm    =NULL;
 +    read_edx(in,edi->sref.nr,edi->sref.anrs,edi->sref.x);
 +
 +    /* average positions. they define which atoms will be used for ED sampling */
 +    edi->sav.nr=read_checked_edint(in,"NAV");
 +    snew(edi->sav.anrs,edi->sav.nr);
 +    snew(edi->sav.x   ,edi->sav.nr);
 +    if (PAR(cr))
 +        snew(edi->sav.x_old,edi->sav.nr);
 +    read_edx(in,edi->sav.nr,edi->sav.anrs,edi->sav.x);
 +
 +    /* Check if the same atom indices are used for reference and average positions */
 +    edi->bRefEqAv = check_if_same(edi->sref, edi->sav);
 +
 +    /* eigenvectors */
 +    read_edvecs(in,edi->sav.nr,&edi->vecs);
 +    read_edvec(in,edi->sav.nr,&edi->flood.vecs,edi->flood.bHarmonic, &bHaveReference);
 +
 +    /* target positions */
 +    edi->star.nr=read_edint(in,&bEOF);
 +    if (edi->star.nr > 0)
 +    {
 +        snew(edi->star.anrs,edi->star.nr);
 +        snew(edi->star.x   ,edi->star.nr);
 +        edi->star.sqrtm    =NULL;
 +        read_edx(in,edi->star.nr,edi->star.anrs,edi->star.x);
 +    }
 +
 +    /* positions defining origin of expansion circle */
 +    edi->sori.nr=read_edint(in,&bEOF);
 +    if (edi->sori.nr > 0)
 +    {
 +    	if (bHaveReference)
 +    	{
 +    		/* Both an -ori structure and a at least one manual reference point have been
 +    		 * specified. That's ambiguous and probably not intentional. */
 +    		gmx_fatal(FARGS, "ED: An origin structure has been provided and a at least one (moving) reference\n"
 +    		                 "    point was manually specified in the edi file. That is ambiguous. Aborting.\n");
 +    	}
 +        snew(edi->sori.anrs,edi->sori.nr);
 +        snew(edi->sori.x   ,edi->sori.nr);
 +        edi->sori.sqrtm    =NULL;
 +        read_edx(in,edi->sori.nr,edi->sori.anrs,edi->sori.x);
 +    }
 +
 +    /* all done */
 +    return 1;
 +}
 +
 +
 +
 +/* Read in the edi input file. Note that it may contain several ED data sets which were
 + * achieved by concatenating multiple edi files. The standard case would be a single ED
 + * data set, though. */
 +static void read_edi_file(gmx_edsam_t ed, t_edpar *edi, int nr_mdatoms, t_commrec *cr)
 +{
 +    FILE    *in;
 +    t_edpar *curr_edi,*last_edi;
 +    t_edpar *edi_read;
 +    int     edi_nr = 0;
 +
 +
 +    /* This routine is executed on the master only */
 +
 +    /* Open the .edi parameter input file */
 +    in = gmx_fio_fopen(ed->edinam,"r");
 +    fprintf(stderr, "ED: Reading edi file %s\n", ed->edinam);
 +
 +    /* Now read a sequence of ED input parameter sets from the edi file */
 +    curr_edi=edi;
 +    last_edi=edi;
 +    while( read_edi(in, ed, curr_edi, nr_mdatoms, edi_nr, cr) )
 +    {
 +        edi_nr++;
 +        /* Make shure that the number of atoms in each dataset is the same as in the tpr file */
 +        if (edi->nini != nr_mdatoms)
 +            gmx_fatal(FARGS,"edi file %s (dataset #%d) was made for %d atoms, but the simulation contains %d atoms.",
 +                    ed->edinam, edi_nr, edi->nini, nr_mdatoms);
 +        /* Since we arrived within this while loop we know that there is still another data set to be read in */
 +        /* We need to allocate space for the data: */
 +        snew(edi_read,1);
 +        /* Point the 'next_edi' entry to the next edi: */
 +        curr_edi->next_edi=edi_read;
 +        /* Keep the curr_edi pointer for the case that the next dataset is empty: */
 +        last_edi = curr_edi;
 +        /* Let's prepare to read in the next edi data set: */
 +        curr_edi = edi_read;
 +    }
 +    if (edi_nr == 0)
 +        gmx_fatal(FARGS, "No complete ED data set found in edi file %s.", ed->edinam);
 +
 +    /* Terminate the edi dataset list with a NULL pointer: */
 +    last_edi->next_edi = NULL;
 +
 +    fprintf(stderr, "ED: Found %d ED dataset%s.\n", edi_nr, edi_nr>1? "s" : "");
 +
 +    /* Close the .edi file again */
 +    gmx_fio_fclose(in);
 +}
 +
 +
 +struct t_fit_to_ref {
 +    rvec *xcopy;       /* Working copy of the positions in fit_to_reference */
 +};
 +
 +/* Fit the current positions to the reference positions
 + * Do not actually do the fit, just return rotation and translation.
 + * Note that the COM of the reference structure was already put into
 + * the origin by init_edi. */
 +static void fit_to_reference(rvec      *xcoll,    /* The positions to be fitted */
 +                             rvec      transvec,  /* The translation vector */
 +                             matrix    rotmat,    /* The rotation matrix */
 +                             t_edpar   *edi)      /* Just needed for do_edfit */
 +{
 +    rvec com;          /* center of mass */
 +    int  i;
 +    struct t_fit_to_ref *loc;
 +
 +
 +    /* Allocate memory the first time this routine is called for each edi dataset */
 +    if (NULL == edi->buf->fit_to_ref)
 +    {
 +        snew(edi->buf->fit_to_ref, 1);
 +        snew(edi->buf->fit_to_ref->xcopy, edi->sref.nr);
 +    }
 +    loc = edi->buf->fit_to_ref;
 +
 +    /* We do not touch the original positions but work on a copy. */
 +    for (i=0; i<edi->sref.nr; i++)
 +        copy_rvec(xcoll[i], loc->xcopy[i]);
 +
 +    /* Calculate the center of mass */
 +    get_center(loc->xcopy, edi->sref.m, edi->sref.nr, com);
 +
 +    transvec[XX] = -com[XX];
 +    transvec[YY] = -com[YY];
 +    transvec[ZZ] = -com[ZZ];
 +
 +    /* Subtract the center of mass from the copy */
 +    translate_x(loc->xcopy, edi->sref.nr, transvec);
 +
 +    /* Determine the rotation matrix */
 +    do_edfit(edi->sref.nr, edi->sref.x, loc->xcopy, rotmat, edi);
 +}
 +
 +
 +static void translate_and_rotate(rvec *x,         /* The positions to be translated and rotated */
 +                                 int nat,         /* How many positions are there? */
 +                                 rvec transvec,   /* The translation vector */
 +                                 matrix rotmat)   /* The rotation matrix */
 +{
 +    /* Translation */
 +    translate_x(x, nat, transvec);
 +
 +    /* Rotation */
 +    rotate_x(x, nat, rotmat);
 +}
 +
 +
 +/* Gets the rms deviation of the positions to the structure s */
 +/* fit_to_structure has to be called before calling this routine! */
 +static real rmsd_from_structure(rvec           *x,  /* The positions under consideration */
 +                                struct gmx_edx *s)  /* The structure from which the rmsd shall be computed */
 +{
 +    real  rmsd=0.0;
 +    int   i;
 +
 +
 +    for (i=0; i < s->nr; i++)
 +        rmsd += distance2(s->x[i], x[i]);
 +
 +    rmsd /= (real) s->nr;
 +    rmsd = sqrt(rmsd);
 +
 +    return rmsd;
 +}
 +
 +
 +void dd_make_local_ed_indices(gmx_domdec_t *dd, struct gmx_edsam *ed)
 +{
 +    t_edpar *edi;
 +
 +
 +    if (ed->eEDtype != eEDnone)
 +    {
 +        /* Loop over ED datasets (usually there is just one dataset, though) */
 +        edi=ed->edpar;
 +        while (edi)
 +        {
 +            /* Local atoms of the reference structure (for fitting), need only be assembled
 +             * if their indices differ from the average ones */
 +            if (!edi->bRefEqAv)
 +                dd_make_local_group_indices(dd->ga2la, edi->sref.nr, edi->sref.anrs,
 +                        &edi->sref.nr_loc, &edi->sref.anrs_loc, &edi->sref.nalloc_loc, edi->sref.c_ind);
 +
 +            /* Local atoms of the average structure (on these ED will be performed) */
 +            dd_make_local_group_indices(dd->ga2la, edi->sav.nr, edi->sav.anrs,
 +                    &edi->sav.nr_loc, &edi->sav.anrs_loc, &edi->sav.nalloc_loc, edi->sav.c_ind);
 +
 +            /* Indicate that the ED shift vectors for this structure need to be updated
 +             * at the next call to communicate_group_positions, since obviously we are in a NS step */
 +            edi->buf->do_edsam->bUpdateShifts = TRUE;
 +
 +            /* Set the pointer to the next ED dataset (if any) */
 +            edi=edi->next_edi;
 +        }
 +    }
 +}
 +
 +
 +static inline void ed_unshift_single_coord(matrix box, const rvec x, const ivec is, rvec xu)
 +{
 +    int tx,ty,tz;
 +
 +
 +    tx=is[XX];
 +    ty=is[YY];
 +    tz=is[ZZ];
 +
 +    if(TRICLINIC(box))
 +    {
 +        xu[XX] = x[XX]-tx*box[XX][XX]-ty*box[YY][XX]-tz*box[ZZ][XX];
 +        xu[YY] = x[YY]-ty*box[YY][YY]-tz*box[ZZ][YY];
 +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
 +    } else
 +    {
 +        xu[XX] = x[XX]-tx*box[XX][XX];
 +        xu[YY] = x[YY]-ty*box[YY][YY];
 +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
 +    }
 +}
 +
 +
 +static void do_linfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
 +{
 +    int  i, j;
 +    real proj, add;
 +    rvec vec_dum;
 +
 +
 +    /* loop over linfix vectors */
 +    for (i=0; i<edi->vecs.linfix.neig; i++)
 +    {
 +        /* calculate the projection */
 +        proj = projectx(edi, xcoll, edi->vecs.linfix.vec[i]);
 +
 +        /* calculate the correction */
 +        add = edi->vecs.linfix.refproj[i] + step*edi->vecs.linfix.stpsz[i] - proj;
 +
 +        /* apply the correction */
 +        add /= edi->sav.sqrtm[i];
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(add, edi->vecs.linfix.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +}
 +
 +
 +static void do_linacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i, j;
 +    real proj, add;
 +    rvec vec_dum;
 +
 +
 +    /* loop over linacc vectors */
 +    for (i=0; i<edi->vecs.linacc.neig; i++)
 +    {
 +        /* calculate the projection */
 +        proj=projectx(edi, xcoll, edi->vecs.linacc.vec[i]);
 +
 +        /* calculate the correction */
 +        add = 0.0;
 +        if (edi->vecs.linacc.stpsz[i] > 0.0)
 +        {
 +            if ((proj-edi->vecs.linacc.refproj[i]) < 0.0)
 +                add = edi->vecs.linacc.refproj[i] - proj;
 +        }
 +        if (edi->vecs.linacc.stpsz[i] < 0.0)
 +        {
 +            if ((proj-edi->vecs.linacc.refproj[i]) > 0.0)
 +                add = edi->vecs.linacc.refproj[i] - proj;
 +        }
 +
 +        /* apply the correction */
 +        add /= edi->sav.sqrtm[i];
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(add, edi->vecs.linacc.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +
 +        /* new positions will act as reference */
 +        edi->vecs.linacc.refproj[i] = proj + add;
 +    }
 +}
 +
 +
 +static void do_radfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
 +{
 +    int  i,j;
 +    real *proj, rad=0.0, ratio;
 +    rvec vec_dum;
 +
 +
 +    if (edi->vecs.radfix.neig == 0)
 +        return;
 +
 +    snew(proj, edi->vecs.radfix.neig);
 +
 +    /* loop over radfix vectors */
 +    for (i=0; i<edi->vecs.radfix.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        proj[i] = projectx(edi, xcoll, edi->vecs.radfix.vec[i]);
 +        rad += pow(proj[i] - edi->vecs.radfix.refproj[i], 2);
 +    }
 +
 +    rad   = sqrt(rad);
 +    ratio = (edi->vecs.radfix.stpsz[0]+edi->vecs.radfix.radius)/rad - 1.0;
 +    edi->vecs.radfix.radius += edi->vecs.radfix.stpsz[0];
 +
 +    /* loop over radfix vectors */
 +    for (i=0; i<edi->vecs.radfix.neig; i++)
 +    {
 +        proj[i] -= edi->vecs.radfix.refproj[i];
 +
 +        /* apply the correction */
 +        proj[i] /= edi->sav.sqrtm[i];
 +        proj[i] *= ratio;
 +        for (j=0; j<edi->sav.nr; j++) {
 +            svmul(proj[i], edi->vecs.radfix.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +
 +    sfree(proj);
 +}
 +
 +
 +static void do_radacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i,j;
 +    real *proj, rad=0.0, ratio=0.0;
 +    rvec vec_dum;
 +
 +
 +    if (edi->vecs.radacc.neig == 0)
 +        return;
 +
 +    snew(proj,edi->vecs.radacc.neig);
 +
 +    /* loop over radacc vectors */
 +    for (i=0; i<edi->vecs.radacc.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        proj[i] = projectx(edi, xcoll, edi->vecs.radacc.vec[i]);
 +        rad += pow(proj[i] - edi->vecs.radacc.refproj[i], 2);
 +    }
 +    rad = sqrt(rad);
 +
 +    /* only correct when radius decreased */
 +    if (rad < edi->vecs.radacc.radius)
 +    {
 +        ratio = edi->vecs.radacc.radius/rad - 1.0;
 +        rad   = edi->vecs.radacc.radius;
 +    }
 +    else
 +        edi->vecs.radacc.radius = rad;
 +
 +    /* loop over radacc vectors */
 +    for (i=0; i<edi->vecs.radacc.neig; i++)
 +    {
 +        proj[i] -= edi->vecs.radacc.refproj[i];
 +
 +        /* apply the correction */
 +        proj[i] /= edi->sav.sqrtm[i];
 +        proj[i] *= ratio;
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(proj[i], edi->vecs.radacc.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +    sfree(proj);
 +}
 +
 +
 +struct t_do_radcon {
 +    real *proj;
 +};
 +
 +static void do_radcon(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i,j;
 +    real rad=0.0, ratio=0.0;
 +    struct t_do_radcon *loc;
 +    gmx_bool bFirst;
 +    rvec vec_dum;
 +
 +
 +    if(edi->buf->do_radcon != NULL)
 +    {
 +        bFirst = FALSE;
 +        loc    = edi->buf->do_radcon;
 +    }
 +    else
 +    {
 +        bFirst = TRUE;
 +        snew(edi->buf->do_radcon, 1);
 +    }
 +    loc = edi->buf->do_radcon;
 +
 +    if (edi->vecs.radcon.neig == 0)
 +        return;
 +
 +    if (bFirst)
 +        snew(loc->proj, edi->vecs.radcon.neig);
 +
 +    /* loop over radcon vectors */
 +    for (i=0; i<edi->vecs.radcon.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
 +        rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
 +    }
 +    rad = sqrt(rad);
 +    /* only correct when radius increased */
 +    if (rad > edi->vecs.radcon.radius)
 +    {
 +        ratio = edi->vecs.radcon.radius/rad - 1.0;
 +
 +        /* loop over radcon vectors */
 +        for (i=0; i<edi->vecs.radcon.neig; i++)
 +        {
 +            /* apply the correction */
 +            loc->proj[i] -= edi->vecs.radcon.refproj[i];
 +            loc->proj[i] /= edi->sav.sqrtm[i];
 +            loc->proj[i] *= ratio;
 +
 +            for (j=0; j<edi->sav.nr; j++)
 +            {
 +                svmul(loc->proj[i], edi->vecs.radcon.vec[i][j], vec_dum);
 +                rvec_inc(xcoll[j], vec_dum);
 +            }
 +        }
 +    }
 +    else
 +        edi->vecs.radcon.radius = rad;
 +
 +    if (rad != edi->vecs.radcon.radius)
 +    {
 +        rad = 0.0;
 +        for (i=0; i<edi->vecs.radcon.neig; i++)
 +        {
 +            /* calculate the projections, radius */
 +            loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
 +            rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
 +        }
 +        rad = sqrt(rad);
 +    }
 +}
 +
 +
 +static void ed_apply_constraints(rvec *xcoll, t_edpar *edi, gmx_large_int_t step, t_commrec *cr)
 +{
 +    int i;
 +
 +
 +    /* subtract the average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_dec(xcoll[i], edi->sav.x[i]);
 +
 +    /* apply the constraints */
 +    if (step >= 0)
 +        do_linfix(xcoll, edi, step, cr);
 +    do_linacc(xcoll, edi, cr);
 +    if (step >= 0)
 +        do_radfix(xcoll, edi, step, cr);
 +    do_radacc(xcoll, edi, cr);
 +    do_radcon(xcoll, edi, cr);
 +
 +    /* add back the average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_inc(xcoll[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Write out the projections onto the eigenvectors */
 +static void write_edo(int nr_edi, t_edpar *edi, gmx_edsam_t ed, gmx_large_int_t step,real rmsd)
 +{
 +    int i;
 +    char buf[22];
 +
 +
 +    if (edi->bNeedDoEdsam)
 +    {
 +        if (step == -1)
 +            fprintf(ed->edo, "Initial projections:\n");
 +        else
 +        {
 +            fprintf(ed->edo,"Step %s, ED #%d  ", gmx_step_str(step, buf), nr_edi);
 +            fprintf(ed->edo,"  RMSD %f nm\n",rmsd);
 +        }
 +
 +        if (edi->vecs.mon.neig)
 +        {
 +            fprintf(ed->edo,"  Monitor eigenvectors");
 +            for (i=0; i<edi->vecs.mon.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.mon.ieig[i],edi->vecs.mon.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.linfix.neig)
 +        {
 +            fprintf(ed->edo,"  Linfix  eigenvectors");
 +            for (i=0; i<edi->vecs.linfix.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linfix.ieig[i],edi->vecs.linfix.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.linacc.neig)
 +        {
 +            fprintf(ed->edo,"  Linacc  eigenvectors");
 +            for (i=0; i<edi->vecs.linacc.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linacc.ieig[i],edi->vecs.linacc.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.radfix.neig)
 +        {
 +            fprintf(ed->edo,"  Radfix  eigenvectors");
 +            for (i=0; i<edi->vecs.radfix.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radfix.ieig[i],edi->vecs.radfix.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  fixed increment radius = %f\n", calc_radius(&edi->vecs.radfix));
 +        }
 +        if (edi->vecs.radacc.neig)
 +        {
 +            fprintf(ed->edo,"  Radacc  eigenvectors");
 +            for (i=0; i<edi->vecs.radacc.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radacc.ieig[i],edi->vecs.radacc.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  acceptance radius      = %f\n", calc_radius(&edi->vecs.radacc));
 +        }
 +        if (edi->vecs.radcon.neig)
 +        {
 +            fprintf(ed->edo,"  Radcon  eigenvectors");
 +            for (i=0; i<edi->vecs.radcon.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radcon.ieig[i],edi->vecs.radcon.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  contracting radius     = %f\n", calc_radius(&edi->vecs.radcon));
 +        }
 +    }
 +}
 +
 +/* Returns if any constraints are switched on */
 +static int ed_constraints(gmx_bool edtype, t_edpar *edi)
 +{
 +    if (edtype == eEDedsam || edtype == eEDflood)
 +    {
 +        return (edi->vecs.linfix.neig || edi->vecs.linacc.neig ||
 +                edi->vecs.radfix.neig || edi->vecs.radacc.neig ||
 +                edi->vecs.radcon.neig);
 +    }
 +    return 0;
 +}
 +
 +
 +/* Copies reference projection 'refproj' to fixed 'refproj0' variable for flooding/
 + * umbrella sampling simulations. */
 +static void copyEvecReference(t_eigvec* floodvecs)
 +{
 +	int i;
 +
 +
 +	for (i=0; i<floodvecs->neig; i++)
 +	{
 +		floodvecs->refproj0[i] = floodvecs->refproj[i];
 +	}
 +}
 +
 +
 +void init_edsam(gmx_mtop_t  *mtop,   /* global topology                    */
 +                t_inputrec  *ir,     /* input record                       */
 +                t_commrec   *cr,     /* communication record               */
 +                gmx_edsam_t ed,      /* contains all ED data               */
 +                rvec        x[],     /* positions of the whole MD system   */
 +                matrix      box)     /* the box                            */
 +{
 +    t_edpar *edi = NULL;    /* points to a single edi data set */
 +    int     numedis=0;      /* keep track of the number of ED data sets in edi file */
 +    int     i,nr_edi;
 +    rvec    *x_pbc  = NULL; /* positions of the whole MD system with pbc removed  */
 +    rvec    *xfit   = NULL; /* the positions which will be fitted to the reference structure  */
 +    rvec    *xstart = NULL; /* the positions which are subject to ED sampling */
 +    rvec    fit_transvec;   /* translation ... */
 +    matrix  fit_rotmat;     /* ... and rotation from fit to reference structure */
 +
 +
 +    if (!DOMAINDECOMP(cr) && PAR(cr) && MASTER(cr))
 +        gmx_fatal(FARGS, "Please switch on domain decomposition to use essential dynamics in parallel.");
 +
 +    if (MASTER(cr))
 +        fprintf(stderr, "ED: Initializing essential dynamics constraints.\n");
 +
 +    /* Needed for initializing radacc radius in do_edsam */
 +    ed->bFirst = 1;
 +
 +    /* The input file is read by the master and the edi structures are
 +     * initialized here. Input is stored in ed->edpar. Then the edi
 +     * structures are transferred to the other nodes */
 +    if (MASTER(cr))
 +    {
 +        snew(ed->edpar,1);
 +        /* Read the whole edi file at once: */
 +        read_edi_file(ed,ed->edpar,mtop->natoms,cr);
 +
 +        /* Initialization for every ED/flooding dataset. Flooding uses one edi dataset per
 +         * flooding vector, Essential dynamics can be applied to more than one structure
 +         * as well, but will be done in the order given in the edi file, so
 +         * expect different results for different order of edi file concatenation! */
 +        edi=ed->edpar;
 +        while(edi != NULL)
 +        {
 +            init_edi(mtop,ir,cr,ed,edi);
 +
 +            /* Init flooding parameters if needed */
 +            init_flood(edi,ed,ir->delta_t,cr);
 +
 +            edi=edi->next_edi;
 +            numedis++;
 +        }
 +    }
 +
 +    /* The master does the work here. The other nodes get the positions
 +     * not before dd_partition_system which is called after init_edsam */
 +    if (MASTER(cr))
 +    {
 +        /* Remove pbc, make molecule whole.
 +         * When ir->bContinuation=TRUE this has already been done, but ok.
 +         */
 +        snew(x_pbc,mtop->natoms);
 +        m_rveccopy(mtop->natoms,x,x_pbc);
 +        do_pbc_first_mtop(NULL,ir->ePBC,box,mtop,x_pbc);
 +
 +        /* Reset pointer to first ED data set which contains the actual ED data */
 +        edi=ed->edpar;
 +
 +        /* Loop over all ED/flooding data sets (usually only one, though) */
 +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +        {
 +            /* We use srenew to allocate memory since the size of the buffers
 +             * is likely to change with every ED dataset */
 +            srenew(xfit  , edi->sref.nr );
 +            srenew(xstart, edi->sav.nr  );
 +
 +            /* Extract the positions of the atoms to which will be fitted */
 +            for (i=0; i < edi->sref.nr; i++)
 +            {
 +                copy_rvec(x_pbc[edi->sref.anrs[i]], xfit[i]);
 +
 +                /* Save the sref positions such that in the next time step the molecule can
 +                 * be made whole again (in the parallel case) */
 +                if (PAR(cr))
 +                    copy_rvec(xfit[i], edi->sref.x_old[i]);
 +            }
 +
 +            /* Extract the positions of the atoms subject to ED sampling */
 +            for (i=0; i < edi->sav.nr; i++)
 +            {
 +                copy_rvec(x_pbc[edi->sav.anrs[i]], xstart[i]);
 +
 +                /* Save the sav positions such that in the next time step the molecule can
 +                 * be made whole again (in the parallel case) */
 +                if (PAR(cr))
 +                    copy_rvec(xstart[i], edi->sav.x_old[i]);
 +            }
 +
 +            /* Make the fit to the REFERENCE structure, get translation and rotation */
 +            fit_to_reference(xfit, fit_transvec, fit_rotmat, edi);
 +
 +            /* Output how well we fit to the reference at the start */
 +            translate_and_rotate(xfit, edi->sref.nr, fit_transvec, fit_rotmat);
 +            fprintf(stderr, "ED: Initial RMSD from reference after fit = %f nm (dataset #%d)\n",
 +                    rmsd_from_structure(xfit, &edi->sref), nr_edi);
 +
 +            /* Now apply the translation and rotation to the atoms on which ED sampling will be performed */
 +            translate_and_rotate(xstart, edi->sav.nr, fit_transvec, fit_rotmat);
 +
 +            /* calculate initial projections */
 +            project(xstart, edi);
 +
 +            /* process target structure, if required */
 +            if (edi->star.nr > 0)
 +            {
 +                fprintf(stderr, "ED: Fitting target structure to reference structure\n");
 +                /* get translation & rotation for fit of target structure to reference structure */
 +                fit_to_reference(edi->star.x, fit_transvec, fit_rotmat, edi);
 +                /* do the fit */
 +                translate_and_rotate(edi->star.x, edi->sav.nr, fit_transvec, fit_rotmat);
 +                rad_project(edi, edi->star.x, &edi->vecs.radcon, cr);
 +            } else
 +                rad_project(edi, xstart, &edi->vecs.radcon, cr);
 +
 +            /* process structure that will serve as origin of expansion circle */
 +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                fprintf(stderr, "ED: Setting center of flooding potential (0 = average structure)\n");
 +            if (edi->sori.nr > 0)
 +            {
 +                fprintf(stderr, "ED: Fitting origin structure to reference structure\n");
 +                /* fit this structure to reference structure */
 +                fit_to_reference(edi->sori.x, fit_transvec, fit_rotmat, edi);
 +                /* do the fit */
 +                translate_and_rotate(edi->sori.x, edi->sav.nr, fit_transvec, fit_rotmat);
 +                rad_project(edi, edi->sori.x, &edi->vecs.radacc, cr);
 +                rad_project(edi, edi->sori.x, &edi->vecs.radfix, cr);
 +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                {
 +                    fprintf(stderr, "ED: The ORIGIN structure will define the flooding potential center.\n");
 +                    /* Set center of flooding potential to the ORIGIN structure */
 +                    rad_project(edi, edi->sori.x, &edi->flood.vecs, cr);
 +                    /* We already know that no (moving) reference position was provided,
 +                     * therefore we can overwrite refproj[0]*/
 +                    copyEvecReference(&edi->flood.vecs);
 +                }
 +            }
 +            else /* No origin structure given */
 +            {
 +                rad_project(edi, xstart, &edi->vecs.radacc, cr);
 +                rad_project(edi, xstart, &edi->vecs.radfix, cr);
 +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                {
 +                    if (edi->flood.bHarmonic)
 +                    {
 +                        fprintf(stderr, "ED: A (possibly changing) ref. projection will define the flooding potential center.\n");
 +                        for (i=0; i<edi->flood.vecs.neig; i++)
 +                            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i];
 +                    }
 +                    else
 +                    {
 +                        fprintf(stderr, "ED: The AVERAGE structure will define the flooding potential center.\n");
 +                        /* Set center of flooding potential to the center of the covariance matrix,
 +                         * i.e. the average structure, i.e. zero in the projected system */
 +                        for (i=0; i<edi->flood.vecs.neig; i++)
 +                            edi->flood.vecs.refproj[i] = 0.0;
 +                    }
 +                }
 +            }
 +            /* For convenience, output the center of the flooding potential for the eigenvectors */
 +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +            {
 +                for (i=0; i<edi->flood.vecs.neig; i++)
 +                {
 +                    fprintf(stdout, "ED: EV %d flooding potential center: %11.4e", i, edi->flood.vecs.refproj[i]);
 +                    if (edi->flood.bHarmonic)
 +                        fprintf(stdout, " (adding %11.4e/timestep)", edi->flood.vecs.refprojslope[i]);
 +                    fprintf(stdout, "\n");
 +                }
 +            }
 +
 +            /* set starting projections for linsam */
 +            rad_project(edi, xstart, &edi->vecs.linacc, cr);
 +            rad_project(edi, xstart, &edi->vecs.linfix, cr);
 +
 +            /* Output to file, set the step to -1 so that write_edo knows it was called from init_edsam */
 +            if (ed->edo && !(ed->bStartFromCpt))
 +                write_edo(nr_edi, edi, ed, -1, 0);
 +
 +            /* Prepare for the next edi data set: */
 +            edi=edi->next_edi;
 +        }
 +        /* Cleaning up on the master node: */
 +        sfree(x_pbc);
 +        sfree(xfit);
 +        sfree(xstart);
 +
 +    } /* end of MASTER only section */
 +
 +    if (PAR(cr))
 +    {
 +        /* First let everybody know how many ED data sets to expect */
 +        gmx_bcast(sizeof(numedis), &numedis, cr);
 +        /* Broadcast the essential dynamics / flooding data to all nodes */
 +        broadcast_ed_data(cr, ed, numedis);
 +    }
 +    else
 +    {
 +        /* In the single-CPU case, point the local atom numbers pointers to the global
 +         * one, so that we can use the same notation in serial and parallel case: */
 +
 +        /* Loop over all ED data sets (usually only one, though) */
 +        edi=ed->edpar;
 +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +        {
 +            edi->sref.anrs_loc = edi->sref.anrs;
 +            edi->sav.anrs_loc  = edi->sav.anrs;
 +            edi->star.anrs_loc = edi->star.anrs;
 +            edi->sori.anrs_loc = edi->sori.anrs;
 +            /* For the same reason as above, make a dummy c_ind array: */
 +            snew(edi->sav.c_ind, edi->sav.nr);
 +            /* Initialize the array */
 +            for (i=0; i<edi->sav.nr; i++)
 +                edi->sav.c_ind[i] = i;
 +            /* In the general case we will need a different-sized array for the reference indices: */
 +            if (!edi->bRefEqAv)
 +            {
 +                snew(edi->sref.c_ind, edi->sref.nr);
 +                for (i=0; i<edi->sref.nr; i++)
 +                    edi->sref.c_ind[i] = i;
 +            }
 +            /* Point to the very same array in case of other structures: */
 +            edi->star.c_ind = edi->sav.c_ind;
 +            edi->sori.c_ind = edi->sav.c_ind;
 +            /* In the serial case, the local number of atoms is the global one: */
 +            edi->sref.nr_loc = edi->sref.nr;
 +            edi->sav.nr_loc  = edi->sav.nr;
 +            edi->star.nr_loc = edi->star.nr;
 +            edi->sori.nr_loc = edi->sori.nr;
 +
 +            /* An on we go to the next edi dataset */
 +            edi=edi->next_edi;
 +        }
 +    }
 +
 +    /* Allocate space for ED buffer variables */
 +    /* Again, loop over ED data sets */
 +    edi=ed->edpar;
 +    for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +    {
 +        /* Allocate space for ED buffer */
 +        snew(edi->buf, 1);
 +        snew(edi->buf->do_edsam, 1);
 +
 +        /* Space for collective ED buffer variables */
 +
 +        /* Collective positions of atoms with the average indices */
 +        snew(edi->buf->do_edsam->xcoll                  , edi->sav.nr);
 +        snew(edi->buf->do_edsam->shifts_xcoll           , edi->sav.nr); /* buffer for xcoll shifts */
 +        snew(edi->buf->do_edsam->extra_shifts_xcoll     , edi->sav.nr);
 +        /* Collective positions of atoms with the reference indices */
 +        if (!edi->bRefEqAv)
 +        {
 +            snew(edi->buf->do_edsam->xc_ref             , edi->sref.nr);
 +            snew(edi->buf->do_edsam->shifts_xc_ref      , edi->sref.nr); /* To store the shifts in */
 +            snew(edi->buf->do_edsam->extra_shifts_xc_ref, edi->sref.nr);
 +        }
 +
 +        /* Get memory for flooding forces */
 +        snew(edi->flood.forces_cartesian                , edi->sav.nr);
 +
 +#ifdef DUMPEDI
 +        /* Dump it all into one file per process */
 +        dump_edi(edi, cr, nr_edi);
 +#endif
 +
 +        /* An on we go to the next edi dataset */
 +        edi=edi->next_edi;
 +    }
 +
 +    /* Flush the edo file so that the user can check some things
 +     * when the simulation has started */
 +    if (ed->edo)
 +        fflush(ed->edo);
 +}
 +
 +
 +void do_edsam(t_inputrec  *ir,
 +              gmx_large_int_t step,
 +              t_mdatoms   *md,
 +              t_commrec   *cr,
 +              rvec        xs[],   /* The local current positions on this processor */
 +              rvec        v[],    /* The velocities */
 +              matrix      box,
 +              gmx_edsam_t ed)
 +{
 +    int     i,edinr,iupdate=500;
 +    matrix  rotmat;         /* rotation matrix */
 +    rvec    transvec;       /* translation vector */
 +    rvec    dv,dx,x_unsh;   /* tmp vectors for velocity, distance, unshifted x coordinate */
 +    real    dt_1;           /* 1/dt */
 +    struct t_do_edsam *buf;
 +    t_edpar *edi;
 +    real    rmsdev=-1;      /* RMSD from reference structure prior to applying the constraints */
 +    gmx_bool bSuppress=FALSE; /* Write .edo file on master? */
 +
 +
 +    /* Check if ED sampling has to be performed */
 +    if ( ed->eEDtype==eEDnone )
 +        return;
 +
 +    /* Suppress output on first call of do_edsam if
 +     * two-step sd2 integrator is used */
 +    if ( (ir->eI==eiSD2) && (v != NULL) )
 +        bSuppress = TRUE;
 +
 +    dt_1 = 1.0/ir->delta_t;
 +
 +    /* Loop over all ED datasets (usually one) */
 +    edi  = ed->edpar;
 +    edinr = 0;
 +    while (edi != NULL)
 +    {
 +        edinr++;
 +        if (edi->bNeedDoEdsam)
 +        {
 +
 +            buf=edi->buf->do_edsam;
 +
 +            if (ed->bFirst)
 +                /* initialise radacc radius for slope criterion */
 +                buf->oldrad=calc_radius(&edi->vecs.radacc);
 +
 +            /* Copy the positions into buf->xc* arrays and after ED
 +             * feed back corrections to the official positions */
 +
 +            /* Broadcast the ED positions such that every node has all of them
 +             * Every node contributes its local positions xs and stores it in
 +             * the collective buf->xcoll array. Note that for edinr > 1
 +             * xs could already have been modified by an earlier ED */
 +
 +            communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, buf->bUpdateShifts, xs,
 +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old,  box);
 +
 +#ifdef DEBUG_ED
 +            dump_xcoll(edi, buf, cr, step);
 +#endif
 +            /* Only assembly reference positions if their indices differ from the average ones */
 +            if (!edi->bRefEqAv)
 +                communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, buf->bUpdateShifts, xs,
 +                        edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
 +
 +            /* If bUpdateShifts was TRUE then the shifts have just been updated in get_positions.
 +             * We do not need to uptdate the shifts until the next NS step */
 +            buf->bUpdateShifts = FALSE;
 +
 +            /* Now all nodes have all of the ED positions in edi->sav->xcoll,
 +             * as well as the indices in edi->sav.anrs */
 +
 +            /* Fit the reference indices to the reference structure */
 +            if (edi->bRefEqAv)
 +                fit_to_reference(buf->xcoll , transvec, rotmat, edi);
 +            else
 +                fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
 +
 +            /* Now apply the translation and rotation to the ED structure */
 +            translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
 +
 +            /* Find out how well we fit to the reference (just for output steps) */
 +            if (do_per_step(step,edi->outfrq) && MASTER(cr))
 +            {
 +                if (edi->bRefEqAv)
 +                {
 +                    /* Indices of reference and average structures are identical,
 +                     * thus we can calculate the rmsd to SREF using xcoll */
 +                    rmsdev = rmsd_from_structure(buf->xcoll,&edi->sref);
 +                }
 +                else
 +                {
 +                    /* We have to translate & rotate the reference atoms first */
 +                    translate_and_rotate(buf->xc_ref, edi->sref.nr, transvec, rotmat);
 +                    rmsdev = rmsd_from_structure(buf->xc_ref,&edi->sref);
 +                }
 +            }
 +
 +            /* update radsam references, when required */
 +            if (do_per_step(step,edi->maxedsteps) && step >= edi->presteps)
 +            {
 +                project(buf->xcoll, edi);
 +                rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
 +                rad_project(edi, buf->xcoll, &edi->vecs.radfix, cr);
 +                buf->oldrad=-1.e5;
 +            }
 +
 +            /* update radacc references, when required */
 +            if (do_per_step(step,iupdate) && step >= edi->presteps)
 +            {
 +                edi->vecs.radacc.radius = calc_radius(&edi->vecs.radacc);
 +                if (edi->vecs.radacc.radius - buf->oldrad < edi->slope)
 +                {
 +                    project(buf->xcoll, edi);
 +                    rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
 +                    buf->oldrad = 0.0;
 +                } else
 +                    buf->oldrad = edi->vecs.radacc.radius;
 +            }
 +
 +            /* apply the constraints */
 +            if (step >= edi->presteps && ed_constraints(ed->eEDtype, edi))
 +            {
 +                /* ED constraints should be applied already in the first MD step
 +                 * (which is step 0), therefore we pass step+1 to the routine */
 +                ed_apply_constraints(buf->xcoll, edi, step+1 - ir->init_step, cr);
 +            }
 +
 +            /* write to edo, when required */
 +            if (do_per_step(step,edi->outfrq))
 +            {
 +                project(buf->xcoll, edi);
 +                if (MASTER(cr) && !bSuppress)
 +                    write_edo(edinr, edi, ed, step, rmsdev);
 +            }
 +
 +            /* Copy back the positions unless monitoring only */
 +            if (ed_constraints(ed->eEDtype, edi))
 +            {
 +                /* remove fitting */
 +                rmfit(edi->sav.nr, buf->xcoll, transvec, rotmat);
 +
 +                /* Copy the ED corrected positions into the coordinate array */
 +                /* Each node copies its local part. In the serial case, nat_loc is the
 +                 * total number of ED atoms */
 +                for (i=0; i<edi->sav.nr_loc; i++)
 +                {
 +                    /* Unshift local ED coordinate and store in x_unsh */
 +                    ed_unshift_single_coord(box, buf->xcoll[edi->sav.c_ind[i]],
 +                                            buf->shifts_xcoll[edi->sav.c_ind[i]], x_unsh);
 +
 +                    /* dx is the ED correction to the positions: */
 +                    rvec_sub(x_unsh, xs[edi->sav.anrs_loc[i]], dx);
 +
 +                    if (v != NULL)
 +                    {
 +                        /* dv is the ED correction to the velocity: */
 +                        svmul(dt_1, dx, dv);
 +                        /* apply the velocity correction: */
 +                        rvec_inc(v[edi->sav.anrs_loc[i]], dv);
 +                    }
 +                    /* Finally apply the position correction due to ED: */
 +                    copy_rvec(x_unsh, xs[edi->sav.anrs_loc[i]]);
 +                }
 +            }
 +        } /* END of if (edi->bNeedDoEdsam) */
 +
 +        /* Prepare for the next ED dataset */
 +        edi = edi->next_edi;
 +
 +    } /* END of loop over ED datasets */
 +
 +    ed->bFirst = FALSE;
 +}
diff --cc src/gromacs/mdlib/mdebin.c
index 1d7ca41abd,0000000000..204a1305c9
mode 100644,000000..100644
--- a/src/gromacs/mdlib/mdebin.c
+++ b/src/gromacs/mdlib/mdebin.c
@@@ -1,1254 -1,0 +1,1236 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <float.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "mdebin.h"
 +#include "smalloc.h"
 +#include "physics.h"
 +#include "enxio.h"
 +#include "vec.h"
 +#include "disre.h"
 +#include "main.h"
 +#include "network.h"
 +#include "names.h"
 +#include "orires.h"
 +#include "constr.h"
 +#include "mtop_util.h"
 +#include "xvgr.h"
 +#include "gmxfio.h"
 +
 +#include "mdebin_bar.h"
 +
 +
 +static const char *conrmsd_nm[] = { "Constr. rmsd", "Constr.2 rmsd" };
 +
 +static const char *boxs_nm[] = { "Box-X", "Box-Y", "Box-Z" };
 +
 +static const char *tricl_boxs_nm[] = { 
 +    "Box-XX", "Box-YY", "Box-ZZ",
 +    "Box-YX", "Box-ZX", "Box-ZY" 
 +};
 +
 +static const char *vol_nm[] = { "Volume" };
 +
 +static const char *dens_nm[] = {"Density" };
 +
 +static const char *pv_nm[] = {"pV" };
 +
 +static const char *enthalpy_nm[] = {"Enthalpy" };
 +
 +static const char *boxvel_nm[] = {
 +    "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
 +    "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
 +};
 +
 +#define NBOXS asize(boxs_nm)
 +#define NTRICLBOXS asize(tricl_boxs_nm)
 +
- static gmx_bool bTricl,bDynBox;
- static int  f_nre=0,epc,etc,nCrmsd;
- 
- 
- 
- 
 +
 +t_mdebin *init_mdebin(ener_file_t fp_ene,
 +                      const gmx_mtop_t *mtop,
 +                      const t_inputrec *ir,
 +                      FILE *fp_dhdl)
 +{
 +    const char *ener_nm[F_NRE];
 +    static const char *vir_nm[] = {
 +        "Vir-XX", "Vir-XY", "Vir-XZ",
 +        "Vir-YX", "Vir-YY", "Vir-YZ",
 +        "Vir-ZX", "Vir-ZY", "Vir-ZZ"
 +    };
 +    static const char *sv_nm[] = {
 +        "ShakeVir-XX", "ShakeVir-XY", "ShakeVir-XZ",
 +        "ShakeVir-YX", "ShakeVir-YY", "ShakeVir-YZ",
 +        "ShakeVir-ZX", "ShakeVir-ZY", "ShakeVir-ZZ"
 +    };
 +    static const char *fv_nm[] = {
 +        "ForceVir-XX", "ForceVir-XY", "ForceVir-XZ",
 +        "ForceVir-YX", "ForceVir-YY", "ForceVir-YZ",
 +        "ForceVir-ZX", "ForceVir-ZY", "ForceVir-ZZ"
 +    };
 +    static const char *pres_nm[] = {
 +        "Pres-XX","Pres-XY","Pres-XZ",
 +        "Pres-YX","Pres-YY","Pres-YZ",
 +        "Pres-ZX","Pres-ZY","Pres-ZZ"
 +    };
 +    static const char *surft_nm[] = {
 +        "#Surf*SurfTen"
 +    };
 +    static const char *mu_nm[] = {
 +        "Mu-X", "Mu-Y", "Mu-Z"
 +    };
 +    static const char *vcos_nm[] = {
 +        "2CosZ*Vel-X"
 +    };
 +    static const char *visc_nm[] = {
 +        "1/Viscosity"
 +    };
 +    static const char *baro_nm[] = {
 +        "Barostat"
 +    };
 +
 +    char     **grpnms;
 +    const gmx_groups_t *groups;
 +    char     **gnm;
 +    char     buf[256];
 +    const char     *bufi;
 +    t_mdebin *md;
 +    int      i,j,ni,nj,n,nh,k,kk,ncon,nset;
 +    gmx_bool     bBHAM,bNoseHoover,b14;
 +
 +    snew(md,1);
 +
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        md->delta_t = ir->delta_t;
 +    }
 +    else
 +    {
 +        md->delta_t = 0;
 +    }
 +
 +    groups = &mtop->groups;
 +
 +    bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +    b14   = (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
 +             gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0);
 +
 +    ncon = gmx_mtop_ftype_count(mtop,F_CONSTR);
 +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
 +    md->bConstr    = (ncon > 0 || nset > 0);
 +    md->bConstrVir = FALSE;
 +    if (md->bConstr) {
 +        if (ncon > 0 && ir->eConstrAlg == econtLINCS) {
 +            if (ir->eI == eiSD2)
 +                md->nCrmsd = 2;
 +            else
 +                md->nCrmsd = 1;
 +        }
 +        md->bConstrVir = (getenv("GMX_CONSTRAINTVIR") != NULL);
 +    } else {
 +        md->nCrmsd = 0;
 +    }
 +
 +    /* Energy monitoring */
 +    for(i=0;i<egNR;i++)
 +    {
 +        md->bEInd[i]=FALSE;
 +    }
 +
 +#ifndef GMX_OPENMM
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        md->bEner[i] = FALSE;
 +        if (i == F_LJ)
 +            md->bEner[i] = !bBHAM;
 +        else if (i == F_BHAM)
 +            md->bEner[i] = bBHAM;
 +        else if (i == F_EQM)
 +            md->bEner[i] = ir->bQMMM;
 +        else if (i == F_COUL_LR)
 +            md->bEner[i] = (ir->rcoulomb > ir->rlist);
 +        else if (i == F_LJ_LR)
 +            md->bEner[i] = (!bBHAM && ir->rvdw > ir->rlist);
 +        else if (i == F_BHAM_LR)
 +            md->bEner[i] = (bBHAM && ir->rvdw > ir->rlist);
 +        else if (i == F_RF_EXCL)
 +            md->bEner[i] = (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC);
 +        else if (i == F_COUL_RECIP)
 +            md->bEner[i] = EEL_FULL(ir->coulombtype);
 +        else if (i == F_LJ14)
 +            md->bEner[i] = b14;
 +        else if (i == F_COUL14)
 +            md->bEner[i] = b14;
 +        else if (i == F_LJC14_Q || i == F_LJC_PAIRS_NB)
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_DVDL) || (i == F_DKDL))
 +            md->bEner[i] = (ir->efep != efepNO);
 +        else if (i == F_DHDL_CON)
 +            md->bEner[i] = (ir->efep != efepNO && md->bConstr);
 +        else if ((interaction_function[i].flags & IF_VSITE) ||
 +                 (i == F_CONSTR) || (i == F_CONSTRNC) || (i == F_SETTLE))
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_COUL_SR) || (i == F_EPOT) || (i == F_PRES)  || (i==F_EQM))
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_GBPOL) && ir->implicit_solvent==eisGBSA)
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_NPSOLVATION) && ir->implicit_solvent==eisGBSA && (ir->sa_algorithm != esaNO))
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_GB12) || (i == F_GB13) || (i == F_GB14))
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_ETOT) || (i == F_EKIN) || (i == F_TEMP))
 +            md->bEner[i] = EI_DYNAMICS(ir->eI);
 +        else if (i==F_VTEMP) 
 +            md->bEner[i] =  (EI_DYNAMICS(ir->eI) && getenv("GMX_VIRIAL_TEMPERATURE"));
 +        else if (i == F_DISPCORR || i == F_PDISPCORR)
 +            md->bEner[i] = (ir->eDispCorr != edispcNO);
 +        else if (i == F_DISRESVIOL)
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_DISRES) > 0);
 +        else if (i == F_ORIRESDEV)
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0);
 +        else if (i == F_CONNBONDS)
 +            md->bEner[i] = FALSE;
 +        else if (i == F_COM_PULL)
 +            md->bEner[i] = (ir->ePull == epullUMBRELLA || ir->ePull == epullCONST_F || ir->bRot);
 +        else if (i == F_ECONSERVED)
 +            md->bEner[i] = ((ir->etc == etcNOSEHOOVER || ir->etc == etcVRESCALE) &&
 +                            (ir->epc == epcNO || ir->epc==epcMTTK));
 +        else
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,i) > 0);
 +    }
 +#else
 +    /* OpenMM always produces only the following 4 energy terms */
 +    md->bEner[F_EPOT] = TRUE;
 +    md->bEner[F_EKIN] = TRUE;
 +    md->bEner[F_ETOT] = TRUE;
 +    md->bEner[F_TEMP] = TRUE;
 +#endif
 +
 +    md->f_nre=0;
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        if (md->bEner[i])
 +        {
 +            /* FIXME: The constness should not be cast away */
 +            /*ener_nm[f_nre]=(char *)interaction_function[i].longname;*/
 +            ener_nm[md->f_nre]=interaction_function[i].longname;
 +            md->f_nre++;
 +        }
 +    }
 +
 +    md->epc = ir->epc;
-     for (i=0;i<DIM;i++) 
-     {
-         for (j=0;j<DIM;j++) 
-         {
-             md->ref_p[i][j] = ir->ref_p[i][j];
-         }
-     }
++    md->bDiagPres = !TRICLINIC(ir->ref_p);
++    md->ref_p = (ir->ref_p[XX][XX]+ir->ref_p[YY][YY]+ir->ref_p[ZZ][ZZ])/DIM;
 +    md->bTricl = TRICLINIC(ir->compress) || TRICLINIC(ir->deform);
 +    md->bDynBox = DYNAMIC_BOX(*ir);
 +    md->etc = ir->etc;
 +    md->bNHC_trotter = IR_NVT_TROTTER(ir);
 +    md->bMTTK = IR_NPT_TROTTER(ir);
 +
 +    md->ebin  = mk_ebin();
 +    /* Pass NULL for unit to let get_ebin_space determine the units
 +     * for interaction_function[i].longname
 +     */
 +    md->ie    = get_ebin_space(md->ebin,md->f_nre,ener_nm,NULL);
 +    if (md->nCrmsd)
 +    {
 +        /* This should be called directly after the call for md->ie,
 +         * such that md->iconrmsd follows directly in the list.
 +         */
 +        md->iconrmsd = get_ebin_space(md->ebin,md->nCrmsd,conrmsd_nm,"");
 +    }
 +    if (md->bDynBox)
 +    {
 +        md->ib    = get_ebin_space(md->ebin, 
 +                                   md->bTricl ? NTRICLBOXS : NBOXS, 
 +                                   md->bTricl ? tricl_boxs_nm : boxs_nm,
 +                                   unit_length);
 +        md->ivol  = get_ebin_space(md->ebin, 1, vol_nm,  unit_volume);
 +        md->idens = get_ebin_space(md->ebin, 1, dens_nm, unit_density_SI);
-         md->ipv   = get_ebin_space(md->ebin, 1, pv_nm,   unit_energy);
-         md->ienthalpy = get_ebin_space(md->ebin, 1, enthalpy_nm,   unit_energy);
++        if (md->bDiagPres)
++        {
++            md->ipv   = get_ebin_space(md->ebin, 1, pv_nm,   unit_energy);
++            md->ienthalpy = get_ebin_space(md->ebin, 1, enthalpy_nm,   unit_energy);
++        }
 +    }
 +    if (md->bConstrVir)
 +    {
 +        md->isvir = get_ebin_space(md->ebin,asize(sv_nm),sv_nm,unit_energy);
 +        md->ifvir = get_ebin_space(md->ebin,asize(fv_nm),fv_nm,unit_energy);
 +    }
 +    md->ivir   = get_ebin_space(md->ebin,asize(vir_nm),vir_nm,unit_energy);
 +    md->ipres  = get_ebin_space(md->ebin,asize(pres_nm),pres_nm,unit_pres_bar);
 +    md->isurft = get_ebin_space(md->ebin,asize(surft_nm),surft_nm,
 +                                unit_surft_bar);
 +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
 +    {
 +        md->ipc = get_ebin_space(md->ebin,md->bTricl ? 6 : 3,
 +                                 boxvel_nm,unit_vel);
 +    }
 +    md->imu    = get_ebin_space(md->ebin,asize(mu_nm),mu_nm,unit_dipole_D);
 +    if (ir->cos_accel != 0)
 +    {
 +        md->ivcos = get_ebin_space(md->ebin,asize(vcos_nm),vcos_nm,unit_vel);
 +        md->ivisc = get_ebin_space(md->ebin,asize(visc_nm),visc_nm,
 +                                   unit_invvisc_SI);
 +    }
 +
 +    /* Energy monitoring */
 +    for(i=0;i<egNR;i++)
 +    {
 +        md->bEInd[i] = FALSE;
 +    }
 +    md->bEInd[egCOULSR] = TRUE;
 +    md->bEInd[egLJSR  ] = TRUE;
 +
 +    if (ir->rcoulomb > ir->rlist)
 +    {
 +        md->bEInd[egCOULLR] = TRUE;
 +    }
 +    if (!bBHAM)
 +    {
 +        if (ir->rvdw > ir->rlist)
 +        {
 +            md->bEInd[egLJLR]   = TRUE;
 +        }
 +    }
 +    else
 +    {
 +        md->bEInd[egLJSR]   = FALSE;
 +        md->bEInd[egBHAMSR] = TRUE;
 +        if (ir->rvdw > ir->rlist)
 +        {
 +            md->bEInd[egBHAMLR]   = TRUE;
 +        }
 +    }
 +    if (b14)
 +    {
 +        md->bEInd[egLJ14] = TRUE;
 +        md->bEInd[egCOUL14] = TRUE;
 +    }
 +    md->nEc=0;
 +    for(i=0; (i<egNR); i++)
 +    {
 +        if (md->bEInd[i])
 +        {
 +            md->nEc++;
 +        }
 +    }
 +
 +    n=groups->grps[egcENER].nr;
 +    md->nEg=n;
 +    md->nE=(n*(n+1))/2;
 +    snew(md->igrp,md->nE);
 +    if (md->nE > 1)
 +    {
 +        n=0;
 +        snew(gnm,md->nEc);
 +        for(k=0; (k<md->nEc); k++)
 +        {
 +            snew(gnm[k],STRLEN);
 +        }
 +        for(i=0; (i<groups->grps[egcENER].nr); i++)
 +        {
 +            ni=groups->grps[egcENER].nm_ind[i];
 +            for(j=i; (j<groups->grps[egcENER].nr); j++)
 +            {
 +                nj=groups->grps[egcENER].nm_ind[j];
 +                for(k=kk=0; (k<egNR); k++)
 +                {
 +                    if (md->bEInd[k])
 +                    {
 +                        sprintf(gnm[kk],"%s:%s-%s",egrp_nm[k],
 +                                *(groups->grpname[ni]),*(groups->grpname[nj]));
 +                        kk++;
 +                    }
 +                }
 +                md->igrp[n]=get_ebin_space(md->ebin,md->nEc,
 +                                           (const char **)gnm,unit_energy);
 +                n++;
 +            }
 +        }
 +        for(k=0; (k<md->nEc); k++)
 +        {
 +            sfree(gnm[k]);
 +        }
 +        sfree(gnm);
 +
 +        if (n != md->nE)
 +        {
 +            gmx_incons("Number of energy terms wrong");
 +        }
 +    }
 +
 +    md->nTC=groups->grps[egcTC].nr;
 +    md->nNHC = ir->opts.nhchainlength; /* shorthand for number of NH chains */ 
 +    if (md->bMTTK)
 +    {
 +        md->nTCP = 1;  /* assume only one possible coupling system for barostat 
 +                          for now */
 +    } 
 +    else 
 +    {
 +        md->nTCP = 0;
 +    }
 +
 +    if (md->etc == etcNOSEHOOVER) {
 +        if (md->bNHC_trotter) { 
 +            md->mde_n = 2*md->nNHC*md->nTC;
 +        }
 +        else 
 +        {
 +            md->mde_n = 2*md->nTC;
 +        }
 +        if (md->epc == epcMTTK)
 +        {
 +            md->mdeb_n = 2*md->nNHC*md->nTCP;
 +        }
 +    } else { 
 +        md->mde_n = md->nTC;
 +        md->mdeb_n = 0;
 +    }
 +
 +    snew(md->tmp_r,md->mde_n);
 +    snew(md->tmp_v,md->mde_n);
 +    snew(md->grpnms,md->mde_n);
 +    grpnms = md->grpnms;
 +
 +    for(i=0; (i<md->nTC); i++)
 +    {
 +        ni=groups->grps[egcTC].nm_ind[i];
 +        sprintf(buf,"T-%s",*(groups->grpname[ni]));
 +        grpnms[i]=strdup(buf);
 +    }
 +    md->itemp=get_ebin_space(md->ebin,md->nTC,(const char **)grpnms,
 +                             unit_temp_K);
 +
 +    bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); /* whether to print Nose-Hoover chains */
 +
 +    if (md->etc == etcNOSEHOOVER)
 +    {
 +        if (bNoseHoover) 
 +        {
 +            if (md->bNHC_trotter) 
 +            {
 +                for(i=0; (i<md->nTC); i++) 
 +                {
 +                    ni=groups->grps[egcTC].nm_ind[i];
 +                    bufi = *(groups->grpname[ni]);
 +                    for(j=0; (j<md->nNHC); j++) 
 +                    {
 +                        sprintf(buf,"Xi-%d-%s",j,bufi);
 +                        grpnms[2*(i*md->nNHC+j)]=strdup(buf);
 +                        sprintf(buf,"vXi-%d-%s",j,bufi);
 +                        grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
 +                    }
 +                }
 +                md->itc=get_ebin_space(md->ebin,md->mde_n,
 +                                       (const char **)grpnms,unit_invtime);
 +                if (md->bMTTK) 
 +                {
 +                    for(i=0; (i<md->nTCP); i++) 
 +                    {
 +                        bufi = baro_nm[0];  /* All barostat DOF's together for now. */
 +                        for(j=0; (j<md->nNHC); j++) 
 +                        {
 +                            sprintf(buf,"Xi-%d-%s",j,bufi);
 +                            grpnms[2*(i*md->nNHC+j)]=strdup(buf);
 +                            sprintf(buf,"vXi-%d-%s",j,bufi);
 +                            grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
 +                        }
 +                    }
 +                    md->itcb=get_ebin_space(md->ebin,md->mdeb_n,
 +                                            (const char **)grpnms,unit_invtime);
 +                }
 +            } 
 +            else
 +            {
 +                for(i=0; (i<md->nTC); i++) 
 +                {
 +                    ni=groups->grps[egcTC].nm_ind[i];
 +                    bufi = *(groups->grpname[ni]);
 +                    sprintf(buf,"Xi-%s",bufi);
 +                    grpnms[2*i]=strdup(buf);
 +                    sprintf(buf,"vXi-%s",bufi);
 +                    grpnms[2*i+1]=strdup(buf);
 +                }
 +                md->itc=get_ebin_space(md->ebin,md->mde_n,
 +                                       (const char **)grpnms,unit_invtime);
 +            }
 +        }
 +    }
 +    else if (md->etc == etcBERENDSEN || md->etc == etcYES || 
 +             md->etc == etcVRESCALE)
 +    {
 +        for(i=0; (i<md->nTC); i++)
 +        {
 +            ni=groups->grps[egcTC].nm_ind[i];
 +            sprintf(buf,"Lamb-%s",*(groups->grpname[ni]));
 +            grpnms[i]=strdup(buf);
 +        }
 +        md->itc=get_ebin_space(md->ebin,md->mde_n,(const char **)grpnms,"");
 +    }
 +
 +    sfree(grpnms);
 +
 +
 +    md->nU=groups->grps[egcACC].nr;
 +    if (md->nU > 1)
 +    {
 +        snew(grpnms,3*md->nU);
 +        for(i=0; (i<md->nU); i++)
 +        {
 +            ni=groups->grps[egcACC].nm_ind[i];
 +            sprintf(buf,"Ux-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+XX]=strdup(buf);
 +            sprintf(buf,"Uy-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+YY]=strdup(buf);
 +            sprintf(buf,"Uz-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+ZZ]=strdup(buf);
 +        }
 +        md->iu=get_ebin_space(md->ebin,3*md->nU,(const char **)grpnms,unit_vel);
 +        sfree(grpnms);
 +    }
 +
 +    if ( fp_ene )
 +    {
 +        do_enxnms(fp_ene,&md->ebin->nener,&md->ebin->enm);
 +    }
 +
 +    md->print_grpnms=NULL;
 +
 +    /* check whether we're going to write dh histograms */
 +    md->dhc=NULL; 
 +    if (ir->separate_dhdl_file == sepdhdlfileNO )
 +    {
 +        int i;
 +        snew(md->dhc, 1);
 +
 +        mde_delta_h_coll_init(md->dhc, ir);
 +        md->fp_dhdl = NULL;
 +    }
 +    else
 +    {
 +        md->fp_dhdl = fp_dhdl;
 +    }
 +    md->dhdl_derivatives = (ir->dhdl_derivatives==dhdlderivativesYES);
 +    return md;
 +}
 +
 +FILE *open_dhdl(const char *filename,const t_inputrec *ir,
 +                const output_env_t oenv)
 +{
 +    FILE *fp;
 +    const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
 +    char title[STRLEN],label_x[STRLEN],label_y[STRLEN];
 +    char **setname;
 +    char buf[STRLEN];
 +
 +    sprintf(label_x,"%s (%s)","Time",unit_time);
 +    if (ir->n_flambda == 0)
 +    {
 +        sprintf(title,"%s",dhdl);
 +        sprintf(label_y,"%s (%s %s)",
 +                dhdl,unit_energy,"[\\lambda]\\S-1\\N");
 +    }
 +    else
 +    {
 +        sprintf(title,"%s, %s",dhdl,deltag);
 +        sprintf(label_y,"(%s)",unit_energy);
 +    }
 +    fp = gmx_fio_fopen(filename,"w+");
 +    xvgr_header(fp,title,label_x,label_y,exvggtXNY,oenv);
 +
 +    if (ir->delta_lambda == 0)
 +    {
 +        sprintf(buf,"T = %g (K), %s = %g",
 +                ir->opts.ref_t[0],lambda,ir->init_lambda);
 +    }
 +    else
 +    {
 +        sprintf(buf,"T = %g (K)",
 +                ir->opts.ref_t[0]);
 +    }
 +    xvgr_subtitle(fp,buf,oenv);
 +
 +    if (ir->n_flambda > 0)
 +    {
 +        int nsets,s,nsi=0;
 +        /* g_bar has to determine the lambda values used in this simulation
 +         * from this xvg legend.  */
 +        nsets = ( (ir->dhdl_derivatives==dhdlderivativesYES) ? 1 : 0) + 
 +                  ir->n_flambda;
 +        snew(setname,nsets);
 +        if (ir->dhdl_derivatives == dhdlderivativesYES)
 +        {
 +            sprintf(buf,"%s %s %g",dhdl,lambda,ir->init_lambda);
 +            setname[nsi++] = gmx_strdup(buf);
 +        }
 +        for(s=0; s<ir->n_flambda; s++)
 +        {
 +            sprintf(buf,"%s %s %g",deltag,lambda,ir->flambda[s]);
 +            setname[nsi++] = gmx_strdup(buf);
 +        }
 +        xvgr_legend(fp,nsets,(const char**)setname,oenv);
 +
 +        for(s=0; s<nsets; s++)
 +        {
 +            sfree(setname[s]);
 +        }
 +        sfree(setname);
 +    }
 +
 +    return fp;
 +}
 +
 +static void copy_energy(t_mdebin *md, real e[],real ecpy[])
 +{
 +    int i,j;
 +
 +    for(i=j=0; (i<F_NRE); i++)
 +        if (md->bEner[i])
 +            ecpy[j++] = e[i];
 +    if (j != md->f_nre) 
 +        gmx_incons("Number of energy terms wrong");
 +}
 +
 +void upd_mdebin(t_mdebin *md, gmx_bool write_dhdl,
 +                gmx_bool bSum,
 +                double time,
 +                real tmass,
 +                gmx_enerdata_t *enerd,
 +                t_state *state,
 +                matrix  box,
 +                tensor svir,
 +                tensor fvir,
 +                tensor vir,
 +                tensor pres,
 +                gmx_ekindata_t *ekind,
 +                rvec mu_tot,
 +                gmx_constr_t constr)
 +{
 +    int    i,j,k,kk,m,n,gid;
 +    real   crmsd[2],tmp6[6];
 +    real   bs[NTRICLBOXS],vol,dens,pv,enthalpy;
 +    real   eee[egNR];
 +    real   ecopy[F_NRE];
 +    real   tmp;
 +    gmx_bool   bNoseHoover;
 +
 +    /* Do NOT use the box in the state variable, but the separate box provided
 +     * as an argument. This is because we sometimes need to write the box from
 +     * the last timestep to match the trajectory frames.
 +     */
 +    copy_energy(md, enerd->term,ecopy);
 +    add_ebin(md->ebin,md->ie,md->f_nre,ecopy,bSum);
 +    if (md->nCrmsd)
 +    {
 +        crmsd[0] = constr_rmsd(constr,FALSE);
 +        if (md->nCrmsd > 1)
 +        {
 +            crmsd[1] = constr_rmsd(constr,TRUE);
 +        }
 +        add_ebin(md->ebin,md->iconrmsd,md->nCrmsd,crmsd,FALSE);
 +    }
 +    if (md->bDynBox)
 +    {
 +        int nboxs;
 +        if(md->bTricl)
 +        {
 +            bs[0] = box[XX][XX];
 +            bs[1] = box[YY][YY];
 +            bs[2] = box[ZZ][ZZ];
 +            bs[3] = box[YY][XX];
 +            bs[4] = box[ZZ][XX];
 +            bs[5] = box[ZZ][YY];
 +            nboxs=NTRICLBOXS;
 +        }
 +        else
 +        {
 +            bs[0] = box[XX][XX];
 +            bs[1] = box[YY][YY];
 +            bs[2] = box[ZZ][ZZ];
 +            nboxs=NBOXS;
 +        }
 +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
 +
-         /* This is pV (in kJ/mol).  The pressure is the reference pressure,
-            not the instantaneous pressure */  
-         pv = 0;
-         for (i=0;i<DIM;i++) 
-         {
-             for (j=0;j<DIM;j++) 
-             {
-                 if (i>j) 
-                 {
-                     pv += box[i][j]*md->ref_p[i][j]/PRESFAC;
-                 } 
-                 else 
-                 {
-                     pv += box[j][i]*md->ref_p[j][i]/PRESFAC;
-                 }
-             }
-         }
- 
 +        add_ebin(md->ebin,md->ib   ,nboxs,bs   ,bSum);
 +        add_ebin(md->ebin,md->ivol ,1    ,&vol ,bSum);
 +        add_ebin(md->ebin,md->idens,1    ,&dens,bSum);
-         add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
-         enthalpy = pv + enerd->term[F_ETOT];
-         add_ebin(md->ebin,md->ienthalpy  ,1    ,&enthalpy  ,bSum);
++
++        if (md->bDiagPres)
++        {
++            /* This is pV (in kJ/mol).  The pressure is the reference pressure,
++               not the instantaneous pressure */  
++            pv = vol*md->ref_p/PRESFAC;
++
++            add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
++            enthalpy = pv + enerd->term[F_ETOT];
++            add_ebin(md->ebin,md->ienthalpy  ,1    ,&enthalpy  ,bSum);
++        }
 +    }
 +    if (md->bConstrVir)
 +    {
 +        add_ebin(md->ebin,md->isvir,9,svir[0],bSum);
 +        add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum);
 +    }
 +    add_ebin(md->ebin,md->ivir,9,vir[0],bSum);
 +    add_ebin(md->ebin,md->ipres,9,pres[0],bSum);
 +    tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
 +    add_ebin(md->ebin,md->isurft,1,&tmp,bSum);
 +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
 +    {
 +        tmp6[0] = state->boxv[XX][XX];
 +        tmp6[1] = state->boxv[YY][YY];
 +        tmp6[2] = state->boxv[ZZ][ZZ];
 +        tmp6[3] = state->boxv[YY][XX];
 +        tmp6[4] = state->boxv[ZZ][XX];
 +        tmp6[5] = state->boxv[ZZ][YY];
 +        add_ebin(md->ebin,md->ipc,md->bTricl ? 6 : 3,tmp6,bSum);
 +    }
 +    add_ebin(md->ebin,md->imu,3,mu_tot,bSum);
 +    if (ekind && ekind->cosacc.cos_accel != 0)
 +    {
 +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
 +        add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum);
 +        /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
 +        tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
 +                 *dens*vol*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
 +        add_ebin(md->ebin,md->ivisc,1,&tmp,bSum);    
 +    }
 +    if (md->nE > 1)
 +    {
 +        n=0;
 +        for(i=0; (i<md->nEg); i++)
 +        {
 +            for(j=i; (j<md->nEg); j++)
 +            {
 +                gid=GID(i,j,md->nEg);
 +                for(k=kk=0; (k<egNR); k++)
 +                {
 +                    if (md->bEInd[k])
 +                    {
 +                        eee[kk++] = enerd->grpp.ener[k][gid];
 +                    }
 +                }
 +                add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum);
 +                n++;
 +            }
 +        }
 +    }
 +
 +    if (ekind)
 +    {
 +        for(i=0; (i<md->nTC); i++)
 +        {
 +            md->tmp_r[i] = ekind->tcstat[i].T;
 +        }
 +        add_ebin(md->ebin,md->itemp,md->nTC,md->tmp_r,bSum);
 +
 +        /* whether to print Nose-Hoover chains: */
 +        bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); 
 +
 +        if (md->etc == etcNOSEHOOVER)
 +        {
 +            if (bNoseHoover) 
 +            {
 +                if (md->bNHC_trotter)
 +                {
 +                    for(i=0; (i<md->nTC); i++) 
 +                    {
 +                        for (j=0;j<md->nNHC;j++) 
 +                        {
 +                            k = i*md->nNHC+j;
 +                            md->tmp_r[2*k] = state->nosehoover_xi[k];
 +                            md->tmp_r[2*k+1] = state->nosehoover_vxi[k];
 +                        }
 +                    }
 +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);      
 +
 +                    if (md->bMTTK) {
 +                        for(i=0; (i<md->nTCP); i++) 
 +                        {
 +                            for (j=0;j<md->nNHC;j++) 
 +                            {
 +                                k = i*md->nNHC+j;
 +                                md->tmp_r[2*k] = state->nhpres_xi[k];
 +                                md->tmp_r[2*k+1] = state->nhpres_vxi[k];
 +                            }
 +                        }
 +                        add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum);      
 +                    }
 +
 +                } 
 +                else 
 +                {
 +                    for(i=0; (i<md->nTC); i++)
 +                    {
 +                        md->tmp_r[2*i] = state->nosehoover_xi[i];
 +                        md->tmp_r[2*i+1] = state->nosehoover_vxi[i];
 +                    }
 +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
 +                }
 +            }
 +        }
 +        else if (md->etc == etcBERENDSEN || md->etc == etcYES || 
 +                 md->etc == etcVRESCALE)
 +        {
 +            for(i=0; (i<md->nTC); i++)
 +            {
 +                md->tmp_r[i] = ekind->tcstat[i].lambda;
 +            }
 +            add_ebin(md->ebin,md->itc,md->nTC,md->tmp_r,bSum);
 +        }
 +    }
 +
 +    if (ekind && md->nU > 1)
 +    {
 +        for(i=0; (i<md->nU); i++)
 +        {
 +            copy_rvec(ekind->grpstat[i].u,md->tmp_v[i]);
 +        }
 +        add_ebin(md->ebin,md->iu,3*md->nU,md->tmp_v[0],bSum);
 +    }
 +
 +    ebin_increase_count(md->ebin,bSum);
 +
 +    /* BAR + thermodynamic integration values */
 +    if (write_dhdl)
 +    {
 +        if (md->fp_dhdl)
 +        {
 +            fprintf(md->fp_dhdl,"%.4f", time);
 +
 +            if (md->dhdl_derivatives)
 +            {
 +                fprintf(md->fp_dhdl," %g", enerd->term[F_DVDL]+ 
 +                                           enerd->term[F_DKDL]+
 +                                           enerd->term[F_DHDL_CON]);
 +            }
 +            for(i=1; i<enerd->n_lambda; i++)
 +            {
 +                fprintf(md->fp_dhdl," %g",
 +                        enerd->enerpart_lambda[i]-enerd->enerpart_lambda[0]);
 +            }
 +            fprintf(md->fp_dhdl,"\n");
 +        }
 +        /* and the binary BAR output */
 +        if (md->dhc)
 +        {
 +            mde_delta_h_coll_add_dh(md->dhc, 
 +                                    enerd->term[F_DVDL]+ enerd->term[F_DKDL]+
 +                                    enerd->term[F_DHDL_CON],
 +                                    enerd->enerpart_lambda, time, 
 +                                    state->lambda);
 +        }
 +    }
 +}
 +
 +void upd_mdebin_step(t_mdebin *md)
 +{
 +    ebin_increase_count(md->ebin,FALSE); 
 +}
 +
 +static void npr(FILE *log,int n,char c)
 +{
 +    for(; (n>0); n--) fprintf(log,"%c",c);
 +}
 +
 +static void pprint(FILE *log,const char *s,t_mdebin *md)
 +{
 +    char CHAR='#';
 +    int  slen;
 +    char buf1[22],buf2[22];
 +
 +    slen = strlen(s);
 +    fprintf(log,"\t<======  ");
 +    npr(log,slen,CHAR);
 +    fprintf(log,"  ==>\n");
 +    fprintf(log,"\t<====  %s  ====>\n",s);
 +    fprintf(log,"\t<==  ");
 +    npr(log,slen,CHAR);
 +    fprintf(log,"  ======>\n\n");
 +
 +    fprintf(log,"\tStatistics over %s steps using %s frames\n",
 +            gmx_step_str(md->ebin->nsteps_sim,buf1),
 +            gmx_step_str(md->ebin->nsum_sim,buf2));
 +    fprintf(log,"\n");
 +}
 +
 +void print_ebin_header(FILE *log,gmx_large_int_t steps,double time,real lamb)
 +{
 +    char buf[22];
 +
 +    fprintf(log,"   %12s   %12s   %12s\n"
 +            "   %12s   %12.5f   %12.5f\n\n",
 +            "Step","Time","Lambda",gmx_step_str(steps,buf),time,lamb);
 +}
 +
 +void print_ebin(ener_file_t fp_ene,gmx_bool bEne,gmx_bool bDR,gmx_bool bOR,
 +                FILE *log,
 +                gmx_large_int_t step,double time,
 +                int mode,gmx_bool bCompact,
 +                t_mdebin *md,t_fcdata *fcd,
 +                gmx_groups_t *groups,t_grpopts *opts)
 +{
 +    /*static char **grpnms=NULL;*/
 +    char        buf[246];
 +    int         i,j,n,ni,nj,ndr,nor,b;
 +    int         ndisre=0;
 +    real        *disre_rm3tav, *disre_rt;
 +
 +    /* these are for the old-style blocks (1 subblock, only reals), because
 +       there can be only one per ID for these */
 +    int         nr[enxNR];
 +    int         id[enxNR];
 +    real        *block[enxNR];
 +
 +    /* temporary arrays for the lambda values to write out */
 +    double      enxlambda_data[2]; 
 +
 +    t_enxframe  fr;
 +
 +    switch (mode)
 +    {
 +        case eprNORMAL:
 +            init_enxframe(&fr);
 +            fr.t            = time;
 +            fr.step         = step;
 +            fr.nsteps       = md->ebin->nsteps;
 +            fr.dt           = md->delta_t;
 +            fr.nsum         = md->ebin->nsum;
 +            fr.nre          = (bEne) ? md->ebin->nener : 0;
 +            fr.ener         = md->ebin->e;
 +            ndisre          = bDR ? fcd->disres.npair : 0;
 +            disre_rm3tav    = fcd->disres.rm3tav;
 +            disre_rt        = fcd->disres.rt;
 +            /* Optional additional old-style (real-only) blocks. */
 +            for(i=0; i<enxNR; i++)
 +            {
 +                nr[i] = 0;
 +            }
 +            if (fcd->orires.nr > 0 && bOR)
 +            {
 +                diagonalize_orires_tensors(&(fcd->orires));
 +                nr[enxOR]     = fcd->orires.nr;
 +                block[enxOR]  = fcd->orires.otav;
 +                id[enxOR]     = enxOR;
 +                nr[enxORI]    = (fcd->orires.oinsl != fcd->orires.otav) ? 
 +                          fcd->orires.nr : 0;
 +                block[enxORI] = fcd->orires.oinsl;
 +                id[enxORI]    = enxORI;
 +                nr[enxORT]    = fcd->orires.nex*12;
 +                block[enxORT] = fcd->orires.eig;
 +                id[enxORT]    = enxORT;
 +            }        
 +
 +            /* whether we are going to wrte anything out: */
 +            if (fr.nre || ndisre || nr[enxOR] || nr[enxORI])
 +            {
 +
 +                /* the old-style blocks go first */
 +                fr.nblock = 0;
 +                for(i=0; i<enxNR; i++)
 +                {
 +                    if (nr[i] > 0)
 +                    {
 +                        fr.nblock = i + 1;
 +                    }
 +                }
 +                add_blocks_enxframe(&fr, fr.nblock);
 +                for(b=0;b<fr.nblock;b++)
 +                {
 +                    add_subblocks_enxblock(&(fr.block[b]), 1);
 +                    fr.block[b].id=id[b]; 
 +                    fr.block[b].sub[0].nr = nr[b];
 +#ifndef GMX_DOUBLE
 +                    fr.block[b].sub[0].type = xdr_datatype_float;
 +                    fr.block[b].sub[0].fval = block[b];
 +#else
 +                    fr.block[b].sub[0].type = xdr_datatype_double;
 +                    fr.block[b].sub[0].dval = block[b];
 +#endif
 +                }
 +
 +                /* check for disre block & fill it. */
 +                if (ndisre>0)
 +                {
 +                    int db = fr.nblock;
 +                    fr.nblock+=1;
 +                    add_blocks_enxframe(&fr, fr.nblock);
 +
 +                    add_subblocks_enxblock(&(fr.block[db]), 2);
 +                    fr.block[db].id=enxDISRE;
 +                    fr.block[db].sub[0].nr=ndisre;
 +                    fr.block[db].sub[1].nr=ndisre;
 +#ifndef GMX_DOUBLE
 +                    fr.block[db].sub[0].type=xdr_datatype_float;
 +                    fr.block[db].sub[1].type=xdr_datatype_float;
 +                    fr.block[db].sub[0].fval=disre_rt;
 +                    fr.block[db].sub[1].fval=disre_rm3tav;
 +#else
 +                    fr.block[db].sub[0].type=xdr_datatype_double;
 +                    fr.block[db].sub[1].type=xdr_datatype_double;
 +                    fr.block[db].sub[0].dval=disre_rt;
 +                    fr.block[db].sub[1].dval=disre_rm3tav;
 +#endif
 +                }
 +                /* here we can put new-style blocks */
 +
 +                /* Free energy perturbation blocks */
 +                if (md->dhc)
 +                {
 +                    mde_delta_h_coll_handle_block(md->dhc, &fr, fr.nblock);
 +                }
 +
 +                /* do the actual I/O */
 +                do_enx(fp_ene,&fr);
 +                gmx_fio_check_file_position(enx_file_pointer(fp_ene));
 +                if (fr.nre)
 +                {
 +                    /* We have stored the sums, so reset the sum history */
 +                    reset_ebin_sums(md->ebin);
 +                }
 +
 +                /* we can now free & reset the data in the blocks */
 +                if (md->dhc)
 +                    mde_delta_h_coll_reset(md->dhc);
 +            }
 +            free_enxframe(&fr);
 +            break;
 +        case eprAVER:
 +            if (log)
 +            {
 +                pprint(log,"A V E R A G E S",md);
 +            }
 +            break;
 +        case eprRMS:
 +            if (log)
 +            {
 +                pprint(log,"R M S - F L U C T U A T I O N S",md);
 +            }
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Invalid print mode (%d)",mode);
 +    }
 +
 +    if (log)
 +    {
 +        for(i=0;i<opts->ngtc;i++)
 +        {
 +            if(opts->annealing[i]!=eannNO)
 +            {
 +                fprintf(log,"Current ref_t for group %s: %8.1f\n",
 +                        *(groups->grpname[groups->grps[egcTC].nm_ind[i]]),
 +                        opts->ref_t[i]);
 +            }
 +        }
 +        if (mode==eprNORMAL && fcd->orires.nr>0)
 +        {
 +            print_orires_log(log,&(fcd->orires));
 +        }
 +        fprintf(log,"   Energies (%s)\n",unit_energy);
 +        pr_ebin(log,md->ebin,md->ie,md->f_nre+md->nCrmsd,5,mode,TRUE);  
 +        fprintf(log,"\n");
 +
 +        if (!bCompact)
 +        {
 +            if (md->bDynBox)
 +            {
 +                pr_ebin(log,md->ebin,md->ib, md->bTricl ? NTRICLBOXS : NBOXS,5,
 +                        mode,TRUE);      
 +                fprintf(log,"\n");
 +            }
 +            if (md->bConstrVir)
 +            {
 +                fprintf(log,"   Constraint Virial (%s)\n",unit_energy);
 +                pr_ebin(log,md->ebin,md->isvir,9,3,mode,FALSE);  
 +                fprintf(log,"\n");
 +                fprintf(log,"   Force Virial (%s)\n",unit_energy);
 +                pr_ebin(log,md->ebin,md->ifvir,9,3,mode,FALSE);  
 +                fprintf(log,"\n");
 +            }
 +            fprintf(log,"   Total Virial (%s)\n",unit_energy);
 +            pr_ebin(log,md->ebin,md->ivir,9,3,mode,FALSE);   
 +            fprintf(log,"\n");
 +            fprintf(log,"   Pressure (%s)\n",unit_pres_bar);
 +            pr_ebin(log,md->ebin,md->ipres,9,3,mode,FALSE);  
 +            fprintf(log,"\n");
 +            fprintf(log,"   Total Dipole (%s)\n",unit_dipole_D);
 +            pr_ebin(log,md->ebin,md->imu,3,3,mode,FALSE);    
 +            fprintf(log,"\n");
 +
 +            if (md->nE > 1)
 +            {
 +                if (md->print_grpnms==NULL)
 +                {
 +                    snew(md->print_grpnms,md->nE);
 +                    n=0;
 +                    for(i=0; (i<md->nEg); i++)
 +                    {
 +                        ni=groups->grps[egcENER].nm_ind[i];
 +                        for(j=i; (j<md->nEg); j++)
 +                        {
 +                            nj=groups->grps[egcENER].nm_ind[j];
 +                            sprintf(buf,"%s-%s",*(groups->grpname[ni]),
 +                                    *(groups->grpname[nj]));
 +                            md->print_grpnms[n++]=strdup(buf);
 +                        }
 +                    }
 +                }
 +                sprintf(buf,"Epot (%s)",unit_energy);
 +                fprintf(log,"%15s   ",buf);
 +                for(i=0; (i<egNR); i++)
 +                {
 +                    if (md->bEInd[i])
 +                    {
 +                        fprintf(log,"%12s   ",egrp_nm[i]);
 +                    }
 +                }
 +                fprintf(log,"\n");
 +                for(i=0; (i<md->nE); i++)
 +                {
 +                    fprintf(log,"%15s",md->print_grpnms[i]);
 +                    pr_ebin(log,md->ebin,md->igrp[i],md->nEc,md->nEc,mode,
 +                            FALSE);
 +                }
 +                fprintf(log,"\n");
 +            }
 +            if (md->nTC > 1)
 +            {
 +                pr_ebin(log,md->ebin,md->itemp,md->nTC,4,mode,TRUE);
 +                fprintf(log,"\n");
 +            }
 +            if (md->nU > 1)
 +            {
 +                fprintf(log,"%15s   %12s   %12s   %12s\n",
 +                        "Group","Ux","Uy","Uz");
 +                for(i=0; (i<md->nU); i++)
 +                {
 +                    ni=groups->grps[egcACC].nm_ind[i];
 +                    fprintf(log,"%15s",*groups->grpname[ni]);
 +                    pr_ebin(log,md->ebin,md->iu+3*i,3,3,mode,FALSE);
 +                }
 +                fprintf(log,"\n");
 +            }
 +        }
 +    }
 +
 +}
 +
 +void update_energyhistory(energyhistory_t * enerhist,t_mdebin * mdebin)
 +{
 +    int i;
 +
 +    enerhist->nsteps     = mdebin->ebin->nsteps;
 +    enerhist->nsum       = mdebin->ebin->nsum;
 +    enerhist->nsteps_sim = mdebin->ebin->nsteps_sim;
 +    enerhist->nsum_sim   = mdebin->ebin->nsum_sim;
 +    enerhist->nener      = mdebin->ebin->nener;
 +
 +    if (mdebin->ebin->nsum > 0)
 +    {
 +        /* Check if we need to allocate first */
 +        if(enerhist->ener_ave == NULL)
 +        {
 +            snew(enerhist->ener_ave,enerhist->nener);
 +            snew(enerhist->ener_sum,enerhist->nener);
 +        }
 +
 +        for(i=0;i<enerhist->nener;i++)
 +        {
 +            enerhist->ener_ave[i] = mdebin->ebin->e[i].eav;
 +            enerhist->ener_sum[i] = mdebin->ebin->e[i].esum;
 +        }
 +    }
 +
 +    if (mdebin->ebin->nsum_sim > 0)
 +    {
 +        /* Check if we need to allocate first */
 +        if(enerhist->ener_sum_sim == NULL)
 +        {
 +            snew(enerhist->ener_sum_sim,enerhist->nener);
 +        }
 +
 +        for(i=0;i<enerhist->nener;i++)
 +        {
 +            enerhist->ener_sum_sim[i] = mdebin->ebin->e_sim[i].esum;
 +        }
 +    }
 +    if (mdebin->dhc)
 +    {
 +        mde_delta_h_coll_update_energyhistory(mdebin->dhc, enerhist);
 +    }
 +}
 +
 +void restore_energyhistory_from_state(t_mdebin * mdebin,
 +                                      energyhistory_t * enerhist)
 +{
 +    int i;
 +
 +    if ((enerhist->nsum > 0 || enerhist->nsum_sim > 0) &&
 +        mdebin->ebin->nener != enerhist->nener)
 +    {
 +        gmx_fatal(FARGS,"Mismatch between number of energies in run input (%d) and checkpoint file (%d).",
 +                  mdebin->ebin->nener,enerhist->nener);
 +    }
 +
 +    mdebin->ebin->nsteps     = enerhist->nsteps;
 +    mdebin->ebin->nsum       = enerhist->nsum;
 +    mdebin->ebin->nsteps_sim = enerhist->nsteps_sim;
 +    mdebin->ebin->nsum_sim   = enerhist->nsum_sim;
 +
 +    for(i=0; i<mdebin->ebin->nener; i++)
 +    {
 +        mdebin->ebin->e[i].eav  =
 +                  (enerhist->nsum > 0 ? enerhist->ener_ave[i] : 0);
 +        mdebin->ebin->e[i].esum =
 +                  (enerhist->nsum > 0 ? enerhist->ener_sum[i] : 0);
 +        mdebin->ebin->e_sim[i].esum =
 +                  (enerhist->nsum_sim > 0 ? enerhist->ener_sum_sim[i] : 0);
 +    }
 +    if (mdebin->dhc)
 +    {         
 +        mde_delta_h_coll_restore_energyhistory(mdebin->dhc, enerhist);
 +    }
 +}
diff --cc src/gromacs/mdlib/ns.c
index 7c1a2449f7,0000000000..2c97fdd8e6
mode 100644,000000..100644
--- a/src/gromacs/mdlib/ns.c
+++ b/src/gromacs/mdlib/ns.c
@@@ -1,2789 -1,0 +1,2794 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_SHM_FDECOMP
 +#include <pthread.h> 
 +#endif
 +
 +#include <math.h>
 +#include <string.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "maths.h"
 +#include "vec.h"
 +#include "network.h"
 +#include "nsgrid.h"
 +#include "force.h"
 +#include "nonbonded.h"
 +#include "ns.h"
 +#include "pbc.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "nrnb.h"
 +#include "txtdump.h"
 +#include "mtop_util.h"
 +
 +#include "domdec.h"
 +
 +
 +/* 
 + *    E X C L U S I O N   H A N D L I N G
 + */
 +
 +#ifdef DEBUG
 +static void SETEXCL_(t_excl e[],atom_id i,atom_id j)
 +{   e[j] = e[j] | (1<<i); }
 +static void RMEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ e[j]=e[j] & ~(1<<i); }
 +static gmx_bool ISEXCL_(t_excl e[],atom_id i,atom_id j) 
 +{ return (gmx_bool)(e[j] & (1<<i)); }
 +static gmx_bool NOTEXCL_(t_excl e[],atom_id i,atom_id j)
 +{  return !(ISEXCL(e,i,j)); }
 +#else
 +#define SETEXCL(e,i,j) (e)[((atom_id) (j))] |= (1<<((atom_id) (i)))
 +#define RMEXCL(e,i,j)  (e)[((atom_id) (j))] &= (~(1<<((atom_id) (i))))
 +#define ISEXCL(e,i,j)  (gmx_bool) ((e)[((atom_id) (j))] & (1<<((atom_id) (i))))
 +#define NOTEXCL(e,i,j) !(ISEXCL(e,i,j))
 +#endif
 +
 +/************************************************
 + *
 + *  U T I L I T I E S    F O R    N S
 + *
 + ************************************************/
 +
 +static void reallocate_nblist(t_nblist *nl)
 +{
 +    if (gmx_debug_at)
 +    {
 +        fprintf(debug,"reallocating neigborlist il_code=%d, maxnri=%d\n",
 +                nl->il_code,nl->maxnri); 
 +    }
 +    srenew(nl->iinr,   nl->maxnri);
 +    if (nl->enlist == enlistCG_CG)
 +    {
 +        srenew(nl->iinr_end,nl->maxnri);
 +    }
 +    srenew(nl->gid,    nl->maxnri);
 +    srenew(nl->shift,  nl->maxnri);
 +    srenew(nl->jindex, nl->maxnri+1);
 +}
 +
 +/* ivdw/icoul are used to determine the type of interaction, so we
 + * can set an innerloop index here. The obvious choice for this would have
 + * been the vdwtype/coultype values in the forcerecord, but unfortunately 
 + * those types are braindead - for instance both Buckingham and normal 
 + * Lennard-Jones use the same value (evdwCUT), and a separate gmx_boolean variable
 + * to determine which interaction is used. There is further no special value
 + * for 'no interaction'. For backward compatibility with old TPR files we won't
 + * change this in the 3.x series, so when calling this routine you should use:
 + *
 + * icoul=0 no coulomb interaction
 + * icoul=1 cutoff standard coulomb
 + * icoul=2 reaction-field coulomb
 + * icoul=3 tabulated coulomb
 + *
 + * ivdw=0 no vdw interaction
 + * ivdw=1 standard L-J interaction
 + * ivdw=2 Buckingham
 + * ivdw=3 tabulated vdw.
 + *
 + * Kind of ugly, but it works.
 + */
 +static void init_nblist(t_nblist *nl_sr,t_nblist *nl_lr,
 +                        int maxsr,int maxlr,
 +                        int ivdw, int icoul, 
 +                        gmx_bool bfree, int enlist)
 +{
 +    t_nblist *nl;
 +    int      homenr;
 +    int      i,nn;
 +    
 +    int inloop[20] =
 +    { 
 +        eNR_NBKERNEL_NONE,
 +        eNR_NBKERNEL010,
 +        eNR_NBKERNEL020,
 +        eNR_NBKERNEL030,
 +        eNR_NBKERNEL100,
 +        eNR_NBKERNEL110,
 +        eNR_NBKERNEL120,
 +        eNR_NBKERNEL130,
 +        eNR_NBKERNEL200,
 +        eNR_NBKERNEL210,
 +        eNR_NBKERNEL220,
 +        eNR_NBKERNEL230,
 +        eNR_NBKERNEL300,
 +        eNR_NBKERNEL310,
 +        eNR_NBKERNEL320,
 +        eNR_NBKERNEL330,
 +        eNR_NBKERNEL400,
 +        eNR_NBKERNEL410,
 +        eNR_NBKERNEL_NONE,
 +        eNR_NBKERNEL430
 +    };
 +  
 +    for(i=0; (i<2); i++)
 +    {
 +        nl     = (i == 0) ? nl_sr : nl_lr;
 +        homenr = (i == 0) ? maxsr : maxlr;
 +
 +        if (nl == NULL)
 +        {
 +            continue;
 +        }
 +        
 +        /* Set coul/vdw in neighborlist, and for the normal loops we determine
 +         * an index of which one to call.
 +         */
 +        nl->ivdw  = ivdw;
 +        nl->icoul = icoul;
 +        nl->free_energy = bfree;
 +    
 +        if (bfree)
 +        {
 +            nl->enlist  = enlistATOM_ATOM;
 +            nl->il_code = eNR_NBKERNEL_FREE_ENERGY;
 +        }
 +        else
 +        {
 +            nl->enlist = enlist;
 +
 +            nn = inloop[4*icoul + ivdw];
 +            
 +            /* solvent loops follow directly after the corresponding
 +            * ordinary loops, in the order:
 +            *
 +            * SPC, SPC-SPC, TIP4p, TIP4p-TIP4p
 +            *   
 +            */
 +            switch (enlist) {
 +            case enlistATOM_ATOM:
 +            case enlistCG_CG:
 +                break;
 +            case enlistSPC_ATOM:     nn += 1; break;
 +            case enlistSPC_SPC:      nn += 2; break;
 +            case enlistTIP4P_ATOM:   nn += 3; break;
 +            case enlistTIP4P_TIP4P:  nn += 4; break;
 +            }
 +            
 +            nl->il_code = nn;
 +        }
 +
 +        if (debug)
 +            fprintf(debug,"Initiating neighbourlist type %d for %s interactions,\nwith %d SR, %d LR atoms.\n",
 +                    nl->il_code,ENLISTTYPE(enlist),maxsr,maxlr);
 +        
 +        /* maxnri is influenced by the number of shifts (maximum is 8)
 +         * and the number of energy groups.
 +         * If it is not enough, nl memory will be reallocated during the run.
 +         * 4 seems to be a reasonable factor, which only causes reallocation
 +         * during runs with tiny and many energygroups.
 +         */
 +        nl->maxnri      = homenr*4;
 +        nl->maxnrj      = 0;
 +        nl->maxlen      = 0;
 +        nl->nri         = -1;
 +        nl->nrj         = 0;
 +        nl->iinr        = NULL;
 +        nl->gid         = NULL;
 +        nl->shift       = NULL;
 +        nl->jindex      = NULL;
 +        reallocate_nblist(nl);
 +        nl->jindex[0] = 0;
 +#ifdef GMX_THREAD_SHM_FDECOMP
 +        nl->counter = 0;
 +        snew(nl->mtx,1);
 +        pthread_mutex_init(nl->mtx,NULL);
 +#endif
 +    }
 +}
 +
 +void init_neighbor_list(FILE *log,t_forcerec *fr,int homenr)
 +{
 +   /* Make maxlr tunable! (does not seem to be a big difference though) 
 +    * This parameter determines the number of i particles in a long range 
 +    * neighbourlist. Too few means many function calls, too many means
 +    * cache trashing.
 +    */
 +   int maxsr,maxsr_wat,maxlr,maxlr_wat;
 +   int icoul,icoulf,ivdw;
 +   int solvent;
 +   int enlist_def,enlist_w,enlist_ww;
 +   int i;
 +   t_nblists *nbl;
 +
 +   /* maxsr     = homenr-fr->nWatMol*3; */
 +   maxsr     = homenr;
 +
 +   if (maxsr < 0)
 +   {
 +     gmx_fatal(FARGS,"%s, %d: Negative number of short range atoms.\n"
 +		 "Call your Gromacs dealer for assistance.",__FILE__,__LINE__);
 +   }
 +   /* This is just for initial allocation, so we do not reallocate
 +    * all the nlist arrays many times in a row.
 +    * The numbers seem very accurate, but they are uncritical.
 +    */
 +   maxsr_wat = min(fr->nWatMol,(homenr+2)/3); 
 +   if (fr->bTwinRange) 
 +   {
 +       maxlr     = 50;
 +       maxlr_wat = min(maxsr_wat,maxlr);
 +   }
 +   else
 +   {
 +     maxlr = maxlr_wat = 0;
 +   }  
 +
 +   /* Determine the values for icoul/ivdw. */
 +   /* Start with GB */
 +   if(fr->bGB)
 +   {
 +       icoul=4;
 +   }
 +   else if (fr->bcoultab)
 +   {
 +       icoul = 3;
 +   }
 +   else if (EEL_RF(fr->eeltype))
 +   {
 +       icoul = 2;
 +   }
 +   else 
 +   {
 +       icoul = 1;
 +   }
 +   
 +   if (fr->bvdwtab)
 +   {
 +       ivdw = 3;
 +   }
 +   else if (fr->bBHAM)
 +   {
 +       ivdw = 2;
 +   }
 +   else 
 +   {
 +       ivdw = 1;
 +   }
 +
 +   fr->ns.bCGlist = (getenv("GMX_NBLISTCG") != 0);
 +   if (!fr->ns.bCGlist)
 +   {
 +       enlist_def = enlistATOM_ATOM;
 +   }
 +   else
 +   {
 +       enlist_def = enlistCG_CG;
 +       if (log != NULL)
 +       {
 +           fprintf(log,"\nUsing charge-group - charge-group neighbor lists and kernels\n\n");
 +       }
-        if (!fr->bExcl_IntraCGAll_InterCGNone)
-        {
-            gmx_fatal(FARGS,"The charge-group - charge-group force loops only support systems with all intra-cg interactions excluded and no inter-cg exclusions, this is not the case for this system.");
-        }
 +   }
 +   
 +   if (fr->solvent_opt == esolTIP4P) {
 +       enlist_w  = enlistTIP4P_ATOM;
 +       enlist_ww = enlistTIP4P_TIP4P;
 +   } else {
 +       enlist_w  = enlistSPC_ATOM;
 +       enlist_ww = enlistSPC_SPC;
 +   }
 +
 +   for(i=0; i<fr->nnblists; i++) 
 +   {
 +       nbl = &(fr->nblists[i]);
 +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ],&nbl->nlist_lr[eNL_VDWQQ],
 +                   maxsr,maxlr,ivdw,icoul,FALSE,enlist_def);
 +       init_nblist(&nbl->nlist_sr[eNL_VDW],&nbl->nlist_lr[eNL_VDW],
 +                   maxsr,maxlr,ivdw,0,FALSE,enlist_def);
 +       init_nblist(&nbl->nlist_sr[eNL_QQ],&nbl->nlist_lr[eNL_QQ],
 +                   maxsr,maxlr,0,icoul,FALSE,enlist_def);
 +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATER],&nbl->nlist_lr[eNL_VDWQQ_WATER],
 +                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_w);
 +       init_nblist(&nbl->nlist_sr[eNL_QQ_WATER],&nbl->nlist_lr[eNL_QQ_WATER],
 +                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_w);
 +       init_nblist(&nbl->nlist_sr[eNL_VDWQQ_WATERWATER],&nbl->nlist_lr[eNL_VDWQQ_WATERWATER],
 +                   maxsr_wat,maxlr_wat,ivdw,icoul, FALSE,enlist_ww);
 +       init_nblist(&nbl->nlist_sr[eNL_QQ_WATERWATER],&nbl->nlist_lr[eNL_QQ_WATERWATER],
 +                   maxsr_wat,maxlr_wat,0,icoul, FALSE,enlist_ww);
 +       
 +       if (fr->efep != efepNO) 
 +       {
 +           if (fr->bEwald)
 +           {
 +               icoulf = 5;
 +           }
 +           else
 +           {
 +               icoulf = icoul;
 +           }
 +
 +           init_nblist(&nbl->nlist_sr[eNL_VDWQQ_FREE],&nbl->nlist_lr[eNL_VDWQQ_FREE],
 +                       maxsr,maxlr,ivdw,icoulf,TRUE,enlistATOM_ATOM);
 +           init_nblist(&nbl->nlist_sr[eNL_VDW_FREE],&nbl->nlist_lr[eNL_VDW_FREE],
 +                       maxsr,maxlr,ivdw,0,TRUE,enlistATOM_ATOM);
 +           init_nblist(&nbl->nlist_sr[eNL_QQ_FREE],&nbl->nlist_lr[eNL_QQ_FREE],
 +                       maxsr,maxlr,0,icoulf,TRUE,enlistATOM_ATOM);
 +       }  
 +   }
 +   /* QMMM MM list */
 +   if (fr->bQMMM && fr->qr->QMMMscheme != eQMMMschemeoniom)
 +   {
 +       init_nblist(&fr->QMMMlist,NULL,
 +                   maxsr,maxlr,0,icoul,FALSE,enlistATOM_ATOM);
 +   }
 +
 +   fr->ns.nblist_initialized=TRUE;
 +}
 +
 +static void reset_nblist(t_nblist *nl)
 +{
 +     nl->nri       = -1;
 +     nl->nrj       = 0;
 +     nl->maxlen    = 0;
 +     if (nl->jindex)
 +     {
 +         nl->jindex[0] = 0;
 +     }
 +}
 +
 +static void reset_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL)
 +{
 +    int n,i;
 +  
 +    if (bLR) 
 +    {
 +        reset_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
 +    }
 +    else 
 +    {
 +        for(n=0; n<fr->nnblists; n++)
 +        {
 +            for(i=0; i<eNL_NR; i++)
 +            {
 +                reset_nblist(&(fr->nblists[n].nlist_sr[i]));
 +            }
 +        }
 +        if (fr->bQMMM)
 +        { 
 +            /* only reset the short-range nblist */
 +            reset_nblist(&(fr->QMMMlist));
 +        }
 +    }
 +}
 +
 +
 +
 +
 +static inline void new_i_nblist(t_nblist *nlist,
 +                                gmx_bool bLR,atom_id i_atom,int shift,int gid)
 +{
 +    int    i,k,nri,nshift;
 +    
 +    nri = nlist->nri;
 +    
 +    /* Check whether we have to increase the i counter */
 +    if ((nri == -1) ||
 +        (nlist->iinr[nri]  != i_atom) || 
 +        (nlist->shift[nri] != shift) || 
 +        (nlist->gid[nri]   != gid))
 +    {
 +        /* This is something else. Now see if any entries have 
 +         * been added in the list of the previous atom.
 +         */
 +        if ((nri == -1) ||
 +            ((nlist->jindex[nri+1] > nlist->jindex[nri]) && 
 +             (nlist->gid[nri] != -1)))
 +        {
 +            /* If so increase the counter */
 +            nlist->nri++;
 +            nri++;
 +            if (nlist->nri >= nlist->maxnri)
 +            {
 +                nlist->maxnri += over_alloc_large(nlist->nri);
 +                reallocate_nblist(nlist);
 +            }
 +        }
 +        /* Set the number of neighbours and the atom number */
 +        nlist->jindex[nri+1] = nlist->jindex[nri];
 +        nlist->iinr[nri]     = i_atom;
 +        nlist->gid[nri]      = gid;
 +        nlist->shift[nri]    = shift;
 +    }
 +}
 +
 +static inline void close_i_nblist(t_nblist *nlist) 
 +{
 +    int nri = nlist->nri;
 +    int len;
 +    
 +    if (nri >= 0)
 +    {
 +        nlist->jindex[nri+1] = nlist->nrj;
 +        
 +        len=nlist->nrj -  nlist->jindex[nri];
 +        
 +        /* nlist length for water i molecules is treated statically 
 +         * in the innerloops 
 +         */
 +        if (len > nlist->maxlen)
 +        {
 +            nlist->maxlen = len;
 +        }
 +    }
 +}
 +
 +static inline void close_nblist(t_nblist *nlist)
 +{
 +    /* Only close this nblist when it has been initialized.
 +     * Avoid the creation of i-lists with no j-particles.
 +     */
 +    if (nlist->nrj == 0)
 +    {
 +        /* Some assembly kernels do not support empty lists,
 +         * make sure here that we don't generate any empty lists.
 +         * With the current ns code this branch is taken in two cases:
 +         * No i-particles at all: nri=-1 here
 +         * There are i-particles, but no j-particles; nri=0 here
 +         */
 +        nlist->nri = 0;
 +    }
 +    else
 +    {
 +        /* Close list number nri by incrementing the count */
 +        nlist->nri++;
 +    }
 +}
 +
 +static inline void close_neighbor_list(t_forcerec *fr,gmx_bool bLR,int nls,int eNL, 
 +                                       gmx_bool bMakeQMMMnblist)
 +{
 +    int n,i;
 +    
 +    if (bMakeQMMMnblist) {
 +        if (!bLR)
 +        {
 +            close_nblist(&(fr->QMMMlist));
 +        }
 +    }
 +    else 
 +    {
 +        if (bLR)
 +        {
 +            close_nblist(&(fr->nblists[nls].nlist_lr[eNL]));
 +        }
 +        else
 +        { 
 +            for(n=0; n<fr->nnblists; n++)
 +            {
 +                for(i=0; (i<eNL_NR); i++)
 +                {
 +                    close_nblist(&(fr->nblists[n].nlist_sr[i]));
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static inline void add_j_to_nblist(t_nblist *nlist,atom_id j_atom,gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
 +            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
 +                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr,nlist->maxnrj);
 +    }
 +
 +    nlist->jjnr[nrj] = j_atom;
 +    nlist->nrj ++;
 +}
 +
 +static inline void add_j_to_nblist_cg(t_nblist *nlist,
 +                                      atom_id j_start,int j_end,
-                                       t_excl *bexcl,gmx_bool bLR)
++                                      t_excl *bexcl,gmx_bool i_is_j,
++                                      gmx_bool bLR)
 +{
 +    int nrj=nlist->nrj;
 +    int j;
 +
 +    if (nlist->nrj >= nlist->maxnrj)
 +    {
 +        nlist->maxnrj = over_alloc_small(nlist->nrj + 1);
 +        if (gmx_debug_at)
 +            fprintf(debug,"Increasing %s nblist %s j size to %d\n",
 +                    bLR ? "LR" : "SR",nrnb_str(nlist->il_code),nlist->maxnrj);
 +        
 +        srenew(nlist->jjnr    ,nlist->maxnrj);
 +        srenew(nlist->jjnr_end,nlist->maxnrj);
 +        srenew(nlist->excl    ,nlist->maxnrj*MAX_CGCGSIZE);
 +    }
 +
 +    nlist->jjnr[nrj]     = j_start;
 +    nlist->jjnr_end[nrj] = j_end;
 +
 +    if (j_end - j_start > MAX_CGCGSIZE)
 +    {
 +        gmx_fatal(FARGS,"The charge-group - charge-group neighborlist do not support charge groups larger than %d, found a charge group of size %d",MAX_CGCGSIZE,j_end-j_start);
 +    }
 +
 +    /* Set the exclusions */
 +    for(j=j_start; j<j_end; j++)
 +    {
 +        nlist->excl[nrj*MAX_CGCGSIZE + j - j_start] = bexcl[j];
 +    }
++    if (i_is_j)
++    {
++        /* Avoid double counting of intra-cg interactions */
++        for(j=1; j<j_end-j_start; j++)
++        {
++            nlist->excl[nrj*MAX_CGCGSIZE + j] |= (1<<j) - 1;
++        }
++    }
 +
 +    nlist->nrj ++;
 +}
 +
 +typedef void
 +put_in_list_t(gmx_bool              bHaveVdW[],
 +              int               ngid,
 +              t_mdatoms *       md,
 +              int               icg,
 +              int               jgid,
 +              int               nj,
 +              atom_id           jjcg[],
 +              atom_id           index[],
 +              t_excl            bExcl[],
 +              int               shift,
 +              t_forcerec *      fr,
 +              gmx_bool              bLR,
 +              gmx_bool              bDoVdW,
 +              gmx_bool              bDoCoul);
 +
 +static void 
 +put_in_list_at(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
 +               gmx_bool              bLR,
 +               gmx_bool              bDoVdW,
 +               gmx_bool              bDoCoul)
 +{
 +    /* The a[] index has been removed,
 +     * to put it back in i_atom should be a[i0] and jj should be a[jj].
 +     */
 +    t_nblist *   vdwc;
 +    t_nblist *   vdw;
 +    t_nblist *   coul;
 +    t_nblist *   vdwc_free  = NULL;
 +    t_nblist *   vdw_free   = NULL;
 +    t_nblist *   coul_free  = NULL;
 +    t_nblist *   vdwc_ww    = NULL;
 +    t_nblist *   coul_ww    = NULL;
 +    
 +    int 	    i,j,jcg,igid,gid,nbl_ind,ind_ij;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg,len;
 +    
 +    int       *cginfo;
 +    int       *type,*typeB;
 +    real      *charge,*chargeB;
 +    real      qi,qiB,qq,rlj;
 +    gmx_bool      bFreeEnergy,bFree,bFreeJ,bNotEx,*bPert;
 +    gmx_bool      bDoVdW_i,bDoCoul_i,bDoCoul_i_sol;
 +    int       iwater,jwater;
 +    t_nblist  *nlist;
 +    
 +    /* Copy some pointers */
 +    cginfo  = fr->cginfo;
 +    charge  = md->chargeA;
 +    chargeB = md->chargeB;
 +    type    = md->typeA;
 +    typeB   = md->typeB;
 +    bPert   = md->bPerturbed;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(cginfo[icg]);
 +    iwater = GET_CGINFO_SOLOPT(cginfo[icg]);
 +    
 +    bFreeEnergy = FALSE;
 +    if (md->nPerturbed) 
 +    {
 +        /* Check if any of the particles involved are perturbed. 
 +         * If not we can do the cheaper normal put_in_list
 +         * and use more solvent optimization.
 +         */
 +        for(i=0; i<nicg; i++)
 +        {
 +            bFreeEnergy |= bPert[i0+i];
 +        }
 +        /* Loop over the j charge groups */
 +        for(j=0; (j<nj && !bFreeEnergy); j++) 
 +        {
 +            jcg = jjcg[j];
 +            jj0 = index[jcg];
 +            jj1 = index[jcg+1];
 +            /* Finally loop over the atoms in the j-charge group */	
 +            for(jj=jj0; jj<jj1; jj++)
 +            {
 +                bFreeEnergy |= bPert[jj];
 +            }
 +        }
 +    }
 +    
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[GID(igid,jgid,ngid)];
 +    }
 +    if (bLR)
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_lr;
 +    }
 +    else
 +    {
 +        nlist = fr->nblists[nbl_ind].nlist_sr;
 +    }
 +    
 +    if (iwater != esolNO)
 +    {
 +        vdwc = &nlist[eNL_VDWQQ_WATER];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ_WATER];
 +#ifndef DISABLE_WATERWATER_NLIST
 +        vdwc_ww = &nlist[eNL_VDWQQ_WATERWATER];
 +        coul_ww = &nlist[eNL_QQ_WATERWATER];
 +#endif
 +    } 
 +    else 
 +    {
 +        vdwc = &nlist[eNL_VDWQQ];
 +        vdw  = &nlist[eNL_VDW];
 +        coul = &nlist[eNL_QQ];
 +    }
 +    
 +    if (!bFreeEnergy) 
 +    {
 +        if (iwater != esolNO) 
 +        {
 +            /* Loop over the atoms in the i charge group */    
 +            i_atom  = i0;
 +            gid     = GID(igid,jgid,ngid);
 +            /* Create new i_atom for each energy group */
 +            if (bDoCoul && bDoVdW)
 +            {
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(vdwc_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }
 +            if (bDoVdW)
 +            {
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            }
 +            if (bDoCoul) 
 +            {
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +#ifndef DISABLE_WATERWATER_NLIST
 +                new_i_nblist(coul_ww,bLR,i_atom,shift,gid);
 +#endif
 +            }      
 +	  /* Loop over the j charge groups */
 +            for(j=0; (j<nj); j++) 
 +            {
 +                jcg=jjcg[j];
 +                
 +                if (jcg == icg)
 +                {
 +                    continue;
 +                }
 +                
 +                jj0 = index[jcg];
 +                jwater = GET_CGINFO_SOLOPT(cginfo[jcg]);
 +                
 +                if (iwater == esolSPC && jwater == esolSPC)
 +                {
 +                    /* Interaction between two SPC molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST	
 +                        /* Add entries for the three atoms - only do VdW if we need to */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);	    
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }  
 +                } 
 +                else if (iwater == esolTIP4P && jwater == esolTIP4P) 
 +                {
 +                    /* Interaction between two TIP4p molecules */
 +                    if (!bDoCoul)
 +                    {
 +                        /* VdW only - only first atoms in each water interact */
 +                        add_j_to_nblist(vdw,jj0,bLR);
 +                    }
 +                    else 
 +                    {
 +#ifdef DISABLE_WATERWATER_NLIST	
 +                        /* Add entries for the four atoms - only do VdW if we need to */
 +                        if (bDoVdW)
 +                        {
 +                            add_j_to_nblist(vdw,jj0,bLR);
 +                        }
 +                        add_j_to_nblist(coul,jj0+1,bLR);
 +                        add_j_to_nblist(coul,jj0+2,bLR);	    
 +                        add_j_to_nblist(coul,jj0+3,bLR);	    
 +#else
 +                        /* One entry for the entire water-water interaction */
 +                        if (!bDoVdW)
 +                        {
 +                            add_j_to_nblist(coul_ww,jj0,bLR);
 +                        }
 +                        else
 +                        {
 +                            add_j_to_nblist(vdwc_ww,jj0,bLR);
 +                        }
 +#endif
 +                    }  					
 +                }
 +                else 
 +                {
 +                    /* j charge group is not water, but i is.
 +                     * Add entries to the water-other_atom lists; the geometry of the water
 +                     * molecule doesn't matter - that is taken care of in the nonbonded kernel,
 +                     * so we don't care if it is SPC or TIP4P...
 +                     */
 +                    
 +                    jj1 = index[jcg+1];
 +                    
 +                    if (!bDoVdW) 
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else if (!bDoCoul)
 +                    {
 +                        for(jj=jj0; (jj<jj1); jj++)
 +                        {
 +                            if (bHaveVdW[type[jj]])
 +                            {
 +                                add_j_to_nblist(vdw,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                    else 
 +                    {
 +                        /* _charge_ _groups_ interact with both coulomb and LJ */
 +                        /* Check which atoms we should add to the lists!       */
 +                        for(jj=jj0; (jj<jj1); jj++) 
 +                        {
 +                            if (bHaveVdW[type[jj]]) 
 +                            {
 +                                if (charge[jj] != 0)
 +                                {
 +                                    add_j_to_nblist(vdwc,jj,bLR);
 +                                }
 +                                else
 +                                {
 +                                    add_j_to_nblist(vdw,jj,bLR);
 +                                }
 +                            }
 +                            else if (charge[jj] != 0)
 +                            {
 +                                add_j_to_nblist(coul,jj,bLR);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw); 
 +            close_i_nblist(coul); 
 +            close_i_nblist(vdwc);  
 +#ifndef DISABLE_WATERWATER_NLIST
 +            close_i_nblist(coul_ww);
 +            close_i_nblist(vdwc_ww); 
 +#endif
 +        } 
 +        else
 +        { 
 +            /* no solvent as i charge group */
 +            /* Loop over the atoms in the i charge group */    
 +            for(i=0; i<nicg; i++) 
 +            {
 +                i_atom  = i0+i;
 +                gid     = GID(igid,jgid,ngid);
 +                qi      = charge[i_atom];
 +                
 +                /* Create new i_atom for each energy group */
 +                if (bDoVdW && bDoCoul)
 +                {
 +                    new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoVdW)
 +                {
 +                    new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +                }
 +                if (bDoCoul)
 +                {
 +                    new_i_nblist(coul,bLR,i_atom,shift,gid);
 +                }
 +                bDoVdW_i  = (bDoVdW  && bHaveVdW[type[i_atom]]);
 +                bDoCoul_i = (bDoCoul && qi!=0);
 +                
 +                if (bDoVdW_i || bDoCoul_i) 
 +                {
 +                    /* Loop over the j charge groups */
 +                    for(j=0; (j<nj); j++) 
 +                    {
 +                        jcg=jjcg[j];
 +                        
 +                        /* Check for large charge groups */
 +                        if (jcg == icg)
 +                        {
 +                            jj0 = i0 + i + 1;
 +                        }
 +                        else
 +                        {
 +                            jj0 = index[jcg];
 +                        }
 +                        
 +                        jj1=index[jcg+1];
 +                        /* Finally loop over the atoms in the j-charge group */	
 +                        for(jj=jj0; jj<jj1; jj++) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (!bDoVdW_i) 
 +                                { 
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i) 
 +                                {
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +                close_i_nblist(vdw);
 +                close_i_nblist(coul);
 +                close_i_nblist(vdwc);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* we are doing free energy */
 +        vdwc_free = &nlist[eNL_VDWQQ_FREE];
 +        vdw_free  = &nlist[eNL_VDW_FREE];
 +        coul_free = &nlist[eNL_QQ_FREE];
 +        /* Loop over the atoms in the i charge group */    
 +        for(i=0; i<nicg; i++) 
 +        {
 +            i_atom  = i0+i;
 +            gid     = GID(igid,jgid,ngid);
 +            qi      = charge[i_atom];
 +            qiB     = chargeB[i_atom];
 +            
 +            /* Create new i_atom for each energy group */
 +            if (bDoVdW && bDoCoul) 
 +                new_i_nblist(vdwc,bLR,i_atom,shift,gid);
 +            if (bDoVdW)   
 +                new_i_nblist(vdw,bLR,i_atom,shift,gid);
 +            if (bDoCoul) 
 +                new_i_nblist(coul,bLR,i_atom,shift,gid);
 +            
 +            new_i_nblist(vdw_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(coul_free,bLR,i_atom,shift,gid);
 +            new_i_nblist(vdwc_free,bLR,i_atom,shift,gid);
 +            
 +            bDoVdW_i  = (bDoVdW  &&
 +                         (bHaveVdW[type[i_atom]] || bHaveVdW[typeB[i_atom]]));
 +            bDoCoul_i = (bDoCoul && (qi!=0 || qiB!=0));
 +            /* For TIP4P the first atom does not have a charge,
 +             * but the last three do. So we should still put an atom
 +             * without LJ but with charge in the water-atom neighborlist
 +             * for a TIP4p i charge group.
 +             * For SPC type water the first atom has LJ and charge,
 +             * so there is no such problem.
 +             */
 +            if (iwater == esolNO)
 +            {
 +                bDoCoul_i_sol = bDoCoul_i;
 +            }
 +            else
 +            {
 +                bDoCoul_i_sol = bDoCoul;
 +            }
 +            
 +            if (bDoVdW_i || bDoCoul_i_sol) 
 +            {
 +                /* Loop over the j charge groups */
 +                for(j=0; (j<nj); j++)
 +                {
 +                    jcg=jjcg[j];
 +                    
 +                    /* Check for large charge groups */
 +                    if (jcg == icg)
 +                    {
 +                        jj0 = i0 + i + 1;
 +                    }
 +                    else
 +                    {
 +                        jj0 = index[jcg];
 +                    }
 +                    
 +                    jj1=index[jcg+1];
 +                    /* Finally loop over the atoms in the j-charge group */	
 +                    bFree = bPert[i_atom];
 +                    for(jj=jj0; (jj<jj1); jj++) 
 +                    {
 +                        bFreeJ = bFree || bPert[jj];
 +                        /* Complicated if, because the water H's should also
 +                         * see perturbed j-particles
 +                         */
 +                        if (iwater==esolNO || i==0 || bFreeJ) 
 +                        {
 +                            bNotEx = NOTEXCL(bExcl,i,jj);
 +                            
 +                            if (bNotEx) 
 +                            {
 +                                if (bFreeJ)
 +                                {
 +                                    if (!bDoVdW_i) 
 +                                    {
 +                                        if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                        {
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else if (!bDoCoul_i) 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]])
 +                                        {
 +                                            add_j_to_nblist(vdw_free,jj,bLR);
 +                                        }
 +                                    }
 +                                    else 
 +                                    {
 +                                        if (bHaveVdW[type[jj]] || bHaveVdW[typeB[jj]]) 
 +                                        {
 +                                            if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            {
 +                                                add_j_to_nblist(vdwc_free,jj,bLR);
 +                                            }
 +                                            else
 +                                            {
 +                                                add_j_to_nblist(vdw_free,jj,bLR);
 +                                            }
 +                                        }
 +                                        else if (charge[jj]!=0 || chargeB[jj]!=0)
 +                                            add_j_to_nblist(coul_free,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoVdW_i) 
 +                                { 
 +                                    /* This is done whether or not bWater is set */
 +                                    if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                                else if (!bDoCoul_i_sol) 
 +                                { 
 +                                    if (bHaveVdW[type[jj]])
 +                                    {
 +                                        add_j_to_nblist(vdw,jj,bLR);
 +                                    }
 +                                }
 +                                else 
 +                                {
 +                                    if (bHaveVdW[type[jj]]) 
 +                                    {
 +                                        if (charge[jj] != 0)
 +                                        {
 +                                            add_j_to_nblist(vdwc,jj,bLR);
 +                                        }
 +                                        else
 +                                        {
 +                                            add_j_to_nblist(vdw,jj,bLR);
 +                                        }
 +                                    } 
 +                                    else if (charge[jj] != 0)
 +                                    {
 +                                        add_j_to_nblist(coul,jj,bLR);
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            close_i_nblist(vdw);
 +            close_i_nblist(coul);
 +            close_i_nblist(vdwc);
 +            close_i_nblist(vdw_free);
 +            close_i_nblist(coul_free);
 +            close_i_nblist(vdwc_free);
 +        }
 +    }
 +}
 +
 +static void 
 +put_in_list_qmmm(gmx_bool              bHaveVdW[],
 +                 int               ngid,
 +                 t_mdatoms *       md,
 +                 int               icg,
 +                 int               jgid,
 +                 int               nj,
 +                 atom_id           jjcg[],
 +                 atom_id           index[],
 +                 t_excl            bExcl[],
 +                 int               shift,
 +                 t_forcerec *      fr,
 +                 gmx_bool              bLR,
 +                 gmx_bool              bDoVdW,
 +                 gmx_bool              bDoCoul)
 +{
 +    t_nblist *   coul;
 +    int 	  i,j,jcg,igid,gid;
 +    atom_id   jj,jj0,jj1,i_atom;
 +    int       i0,nicg;
 +    gmx_bool      bNotEx;
 +    
 +    /* Get atom range */
 +    i0     = index[icg];
 +    nicg   = index[icg+1]-i0;
 +    
 +    /* Get the i charge group info */
 +    igid   = GET_CGINFO_GID(fr->cginfo[icg]);
 +    
 +    coul = &fr->QMMMlist;
 +    
 +    /* Loop over atoms in the ith charge group */
 +    for (i=0;i<nicg;i++)
 +    {
 +        i_atom = i0+i;
 +        gid    = GID(igid,jgid,ngid);
 +        /* Create new i_atom for each energy group */
 +        new_i_nblist(coul,bLR,i_atom,shift,gid);
 +        
 +        /* Loop over the j charge groups */
 +        for (j=0;j<nj;j++)
 +        {
 +            jcg=jjcg[j];
 +            
 +            /* Charge groups cannot have QM and MM atoms simultaneously */
 +            if (jcg!=icg)
 +            {
 +                jj0 = index[jcg];
 +                jj1 = index[jcg+1];
 +                /* Finally loop over the atoms in the j-charge group */
 +                for(jj=jj0; jj<jj1; jj++)
 +                {
 +                    bNotEx = NOTEXCL(bExcl,i,jj);
 +                    if(bNotEx)
 +                        add_j_to_nblist(coul,jj,bLR);
 +                }
 +            }
 +        }
 +        close_i_nblist(coul);
 +    }
 +}
 +
 +static void 
 +put_in_list_cg(gmx_bool              bHaveVdW[],
 +               int               ngid,
 +               t_mdatoms *       md,
 +               int               icg,
 +               int               jgid,
 +               int               nj,
 +               atom_id           jjcg[],
 +               atom_id           index[],
 +               t_excl            bExcl[],
 +               int               shift,
 +               t_forcerec *      fr,
 +               gmx_bool              bLR,
 +               gmx_bool              bDoVdW,
 +               gmx_bool              bDoCoul)
 +{
 +    int          cginfo;
 +    int          igid,gid,nbl_ind;
 +    t_nblist *   vdwc;
 +    int          j,jcg;
 +
 +    cginfo = fr->cginfo[icg];
 +
 +    igid = GET_CGINFO_GID(cginfo);
 +    gid  = GID(igid,jgid,ngid);
 +
 +    /* Unpack pointers to neighbourlist structs */
 +    if (fr->nnblists == 1)
 +    {
 +        nbl_ind = 0;
 +    }
 +    else
 +    {
 +        nbl_ind = fr->gid2nblists[gid];
 +    }
 +    if (bLR)
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_lr[eNL_VDWQQ];
 +    }
 +    else
 +    {
 +        vdwc = &fr->nblists[nbl_ind].nlist_sr[eNL_VDWQQ];
 +    }
 +
 +    /* Make a new neighbor list for charge group icg.
 +     * Currently simply one neighbor list is made with LJ and Coulomb.
 +     * If required, zero interactions could be removed here
 +     * or in the force loop.
 +     */
 +    new_i_nblist(vdwc,bLR,index[icg],shift,gid);
 +    vdwc->iinr_end[vdwc->nri] = index[icg+1];
 +
 +    for(j=0; (j<nj); j++) 
 +    {
 +        jcg = jjcg[j];
 +        /* Skip the icg-icg pairs if all self interactions are excluded */
 +        if (!(jcg == icg && GET_CGINFO_EXCL_INTRA(cginfo)))
 +        {
 +            /* Here we add the j charge group jcg to the list,
 +             * exclusions are also added to the list.
 +             */
-             add_j_to_nblist_cg(vdwc,index[jcg],index[jcg+1],bExcl,bLR);
++            add_j_to_nblist_cg(vdwc,index[jcg],index[jcg+1],bExcl,icg==jcg,bLR);
 +        }
 +    }
 +
 +    close_i_nblist(vdwc);  
 +}
 +
 +static void setexcl(atom_id start,atom_id end,t_blocka *excl,gmx_bool b,
 +                    t_excl bexcl[])
 +{
 +    atom_id i,k;
 +    
 +    if (b)
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                SETEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=start; i<end; i++)
 +        {
 +            for(k=excl->index[i]; k<excl->index[i+1]; k++)
 +            {
 +                RMEXCL(bexcl,i-start,excl->a[k]);
 +            }
 +        }
 +    }
 +}
 +
 +int calc_naaj(int icg,int cgtot)
 +{
 +    int naaj;
 +    
 +    if ((cgtot % 2) == 1)
 +    {
 +        /* Odd number of charge groups, easy */
 +        naaj = 1 + (cgtot/2);
 +    }
 +    else if ((cgtot % 4) == 0)
 +    {
 +    /* Multiple of four is hard */
 +        if (icg < cgtot/2)
 +        {
 +            if ((icg % 2) == 0)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +        else
 +        {
 +            if ((icg % 2) == 1)
 +            {
 +                naaj=1+(cgtot/2);
 +            }
 +            else
 +            {
 +                naaj=cgtot/2;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* cgtot/2 = odd */
 +        if ((icg % 2) == 0)
 +        {
 +            naaj=1+(cgtot/2);
 +        }
 +        else
 +        {
 +            naaj=cgtot/2;
 +        }
 +    }
 +#ifdef DEBUG
 +    fprintf(log,"naaj=%d\n",naaj);
 +#endif
 +
 +    return naaj;
 +}
 +
 +/************************************************
 + *
 + *  S I M P L E      C O R E     S T U F F
 + *
 + ************************************************/
 +
 +static real calc_image_tric(rvec xi,rvec xj,matrix box,
 +                            rvec b_inv,int *shift)
 +{
 +    /* This code assumes that the cut-off is smaller than
 +     * a half times the smallest diagonal element of the box.
 +     */
 +    const real h25=2.5;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dz = xj[ZZ] - xi[ZZ];
 +    dy = xj[YY] - xi[YY];
 +    dx = xj[XX] - xi[XX];
 +    
 +  /* Perform NINT operation, using trunc operation, therefore
 +   * we first add 2.5 then subtract 2 again
 +   */
 +    tz = dz*b_inv[ZZ] + h25;
 +    tz -= 2;
 +    dz -= tz*box[ZZ][ZZ];
 +    dy -= tz*box[ZZ][YY];
 +    dx -= tz*box[ZZ][XX];
 +
 +    ty = dy*b_inv[YY] + h25;
 +    ty -= 2;
 +    dy -= ty*box[YY][YY];
 +    dx -= ty*box[YY][XX];
 +    
 +    tx = dx*b_inv[XX]+h25;
 +    tx -= 2;
 +    dx -= tx*box[XX][XX];
 +  
 +    /* Distance squared */
 +    r2 = (dx*dx) + (dy*dy) + (dz*dz);
 +
 +    *shift = XYZ2IS(tx,ty,tz);
 +
 +    return r2;
 +}
 +
 +static real calc_image_rect(rvec xi,rvec xj,rvec box_size,
 +                            rvec b_inv,int *shift)
 +{
 +    const real h15=1.5;
 +    real ddx,ddy,ddz;
 +    real dx,dy,dz;
 +    real r2;
 +    int  tx,ty,tz;
 +    
 +    /* Compute diff vector */
 +    dx = xj[XX] - xi[XX];
 +    dy = xj[YY] - xi[YY];
 +    dz = xj[ZZ] - xi[ZZ];
 +  
 +    /* Perform NINT operation, using trunc operation, therefore
 +     * we first add 1.5 then subtract 1 again
 +     */
 +    tx = dx*b_inv[XX] + h15;
 +    ty = dy*b_inv[YY] + h15;
 +    tz = dz*b_inv[ZZ] + h15;
 +    tx--;
 +    ty--;
 +    tz--;
 +    
 +    /* Correct diff vector for translation */
 +    ddx = tx*box_size[XX] - dx;
 +    ddy = ty*box_size[YY] - dy;
 +    ddz = tz*box_size[ZZ] - dz;
 +    
 +    /* Distance squared */
 +    r2 = (ddx*ddx) + (ddy*ddy) + (ddz*ddz);
 +    
 +    *shift = XYZ2IS(tx,ty,tz);
 +    
 +    return r2;
 +}
 +
 +static void add_simple(t_ns_buf *nsbuf,int nrj,atom_id cg_j,
 +                       gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                       int icg,int jgid,t_block *cgs,t_excl bexcl[],
 +                       int shift,t_forcerec *fr,put_in_list_t *put_in_list)
 +{
 +    if (nsbuf->nj + nrj > MAX_CG)
 +    {
 +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nsbuf->ncg,nsbuf->jcg,
 +                    cgs->index,bexcl,shift,fr,FALSE,TRUE,TRUE);
 +        /* Reset buffer contents */
 +        nsbuf->ncg = nsbuf->nj = 0;
 +    }
 +    nsbuf->jcg[nsbuf->ncg++] = cg_j;
 +    nsbuf->nj += nrj;
 +}
 +
 +static void ns_inner_tric(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          matrix box,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +    
 +    cgindex = cgs->index;
 +    shift   = CENTRAL;
 +    for(j=0; (j<njcg); j++)
 +    {
 +        cg_j   = jcg[j];
 +        nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +        if (calc_image_tric(x[icg],x[cg_j],box,b_inv,&shift) < rcut2)
 +        {
 +            jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +            if (!(i_egp_flags[jgid] & EGP_EXCL))
 +            {
 +                add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                           bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                           put_in_list);
 +            }
 +        }
 +    }
 +}
 +
 +static void ns_inner_rect(rvec x[],int icg,int *i_egp_flags,
 +                          int njcg,atom_id jcg[],
 +                          gmx_bool bBox,rvec box_size,rvec b_inv,real rcut2,
 +                          t_block *cgs,t_ns_buf **ns_buf,
 +                          gmx_bool bHaveVdW[],int ngid,t_mdatoms *md,
 +                          t_excl bexcl[],t_forcerec *fr,
 +                          put_in_list_t *put_in_list)
 +{
 +    int      shift;
 +    int      j,nrj,jgid;
 +    int      *cginfo=fr->cginfo;
 +    atom_id  cg_j,*cgindex;
 +    t_ns_buf *nsbuf;
 +
 +    cgindex = cgs->index;
 +    if (bBox)
 +    {
 +        shift = CENTRAL;
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if (calc_image_rect(x[icg],x[cg_j],box_size,b_inv,&shift) < rcut2)
 +            {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][shift],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,shift,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    } 
 +    else
 +    {
 +        for(j=0; (j<njcg); j++)
 +        {
 +            cg_j   = jcg[j];
 +            nrj    = cgindex[cg_j+1]-cgindex[cg_j];
 +            if ((rcut2 == 0) || (distance2(x[icg],x[cg_j]) < rcut2)) {
 +                jgid  = GET_CGINFO_GID(cginfo[cg_j]);
 +                if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                {
 +                    add_simple(&ns_buf[jgid][CENTRAL],nrj,cg_j,
 +                               bHaveVdW,ngid,md,icg,jgid,cgs,bexcl,CENTRAL,fr,
 +                               put_in_list);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* ns_simple_core needs to be adapted for QMMM still 2005 */
 +
 +static int ns_simple_core(t_forcerec *fr,
 +                          gmx_localtop_t *top,
 +                          t_mdatoms *md,
 +                          matrix box,rvec box_size,
 +                          t_excl bexcl[],atom_id *aaj,
 +                          int ngid,t_ns_buf **ns_buf,
 +                          put_in_list_t *put_in_list,gmx_bool bHaveVdW[])
 +{
 +    int      naaj,k;
 +    real     rlist2;
 +    int      nsearch,icg,jcg,igid,i0,nri,nn;
 +    int      *cginfo;
 +    t_ns_buf *nsbuf;
 +    /* atom_id  *i_atoms; */
 +    t_block  *cgs=&(top->cgs);
 +    t_blocka *excl=&(top->excls);
 +    rvec     b_inv;
 +    int      m;
 +    gmx_bool     bBox,bTriclinic;
 +    int      *i_egp_flags;
 +    
 +    rlist2 = sqr(fr->rlist);
 +    
 +    bBox = (fr->ePBC != epbcNONE);
 +    if (bBox)
 +    {
 +        for(m=0; (m<DIM); m++)
 +        {
 +            b_inv[m] = divide_err(1.0,box_size[m]);
 +        }
 +        bTriclinic = TRICLINIC(box);
 +    }
 +    else
 +    {
 +        bTriclinic = FALSE;
 +    }
 +    
 +    cginfo = fr->cginfo;
 +    
 +    nsearch=0;
 +    for (icg=fr->cg0; (icg<fr->hcg); icg++)
 +    {
 +        /*
 +          i0        = cgs->index[icg];
 +          nri       = cgs->index[icg+1]-i0;
 +          i_atoms   = &(cgs->a[i0]);
 +          i_eg_excl = fr->eg_excl + ngid*md->cENER[*i_atoms];
 +          setexcl(nri,i_atoms,excl,TRUE,bexcl);
 +        */
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        i_egp_flags = fr->egp_flags + ngid*igid;
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,TRUE,bexcl);
 +        
 +        naaj=calc_naaj(icg,cgs->nr);
 +        if (bTriclinic)
 +        {
 +            ns_inner_tric(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          box,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        else
 +        {
 +            ns_inner_rect(fr->cg_cm,icg,i_egp_flags,naaj,&(aaj[icg]),
 +                          bBox,box_size,b_inv,rlist2,cgs,ns_buf,
 +                          bHaveVdW,ngid,md,bexcl,fr,put_in_list);
 +        }
 +        nsearch += naaj;
 +        
 +        for(nn=0; (nn<ngid); nn++)
 +        {
 +            for(k=0; (k<SHIFTS); k++)
 +            {
 +                nsbuf = &(ns_buf[nn][k]);
 +                if (nsbuf->ncg > 0)
 +                {
 +                    put_in_list(bHaveVdW,ngid,md,icg,nn,nsbuf->ncg,nsbuf->jcg,
 +                                cgs->index,bexcl,k,fr,FALSE,TRUE,TRUE);
 +                    nsbuf->ncg=nsbuf->nj=0;
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],excl,FALSE,bexcl);
 +    }
 +    close_neighbor_list(fr,FALSE,-1,-1,FALSE);
 +    
 +    return nsearch;
 +}
 +
 +/************************************************
 + *
 + *    N S 5     G R I D     S T U F F
 + *
 + ************************************************/
 +
 +static inline void get_dx(int Nx,real gridx,real rc2,int xgi,real x,
 +                          int *dx0,int *dx1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  xgi0,xgi1,i;
 +    
 +    if (xgi < 0)
 +    {
 +        *dx0 = 0;
 +        xgi0 = -1;
 +        *dx1 = -1;
 +        xgi1 = 0;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        *dx0 = Nx;
 +        xgi0 = Nx-1;
 +        *dx1 = Nx-1;
 +        xgi1 = Nx;
 +    }
 +    else
 +    {
 +        dcx2[xgi] = 0;
 +        *dx0 = xgi;
 +        xgi0 = xgi-1;
 +        *dx1 = xgi;
 +        xgi1 = xgi+1;
 +    }
 +    
 +    for(i=xgi0; i>=0; i--)
 +    {
 +        dcx = (i+1)*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +            break;
 +        *dx0 = i;
 +        dcx2[i] = tmp;
 +    }
 +    for(i=xgi1; i<Nx; i++)
 +    {
 +        dcx = i*gridx-x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        *dx1 = i;
 +        dcx2[i] = tmp;
 +    }
 +}
 +
 +static inline void get_dx_dd(int Nx,real gridx,real rc2,int xgi,real x,
 +                             int ncpddc,int shift_min,int shift_max,
 +                             int *g0,int *g1,real *dcx2)
 +{
 +    real dcx,tmp;
 +    int  g_min,g_max,shift_home;
 +    
 +    if (xgi < 0)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = 0;
 +        *g1   = -1;
 +    }
 +    else if (xgi >= Nx)
 +    {
 +        g_min = 0;
 +        g_max = Nx - 1;
 +        *g0   = Nx;
 +        *g1   = Nx - 1;
 +    }
 +    else
 +    {
 +        if (ncpddc == 0)
 +        {
 +            g_min = 0;
 +            g_max = Nx - 1;
 +        }
 +        else
 +        {
 +            if (xgi < ncpddc)
 +            {
 +                shift_home = 0;
 +            }
 +            else
 +            {
 +                shift_home = -1;
 +            }
 +            g_min = (shift_min == shift_home ? 0          : ncpddc);
 +            g_max = (shift_max == shift_home ? ncpddc - 1 : Nx - 1);
 +        }
 +        if (shift_min > 0)
 +        {
 +            *g0 = g_min;
 +            *g1 = g_min - 1;
 +        }
 +        else if (shift_max < 0)
 +        {
 +            *g0 = g_max + 1;
 +            *g1 = g_max;
 +        }
 +        else
 +        {
 +            *g0 = xgi;
 +            *g1 = xgi;
 +            dcx2[xgi] = 0;
 +        }
 +    }
 +    
 +    while (*g0 > g_min)
 +    {
 +        /* Check one grid cell down */
 +        dcx = ((*g0 - 1) + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g0)--;
 +        dcx2[*g0] = tmp;
 +    }
 +    
 +    while (*g1 < g_max)
 +    {
 +        /* Check one grid cell up */
 +        dcx = (*g1 + 1)*gridx - x;
 +        tmp = dcx*dcx;
 +        if (tmp >= rc2)
 +        {
 +            break;
 +        }
 +        (*g1)++;
 +        dcx2[*g1] = tmp;
 +    }
 +}
 +
 +
 +#define sqr(x) ((x)*(x))
 +#define calc_dx2(XI,YI,ZI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]) + sqr(ZI-y[ZZ]))
 +#define calc_cyl_dx2(XI,YI,y) (sqr(XI-y[XX]) + sqr(YI-y[YY]))
 +/****************************************************
 + *
 + *    F A S T   N E I G H B O R  S E A R C H I N G
 + *
 + *    Optimized neighboursearching routine using grid 
 + *    at least 1x1x1, see GROMACS manual
 + *
 + ****************************************************/
 +
 +static void do_longrange(t_commrec *cr,gmx_localtop_t *top,t_forcerec *fr,
 +                         int ngid,t_mdatoms *md,int icg,
 +                         int jgid,int nlr,
 +                         atom_id lr[],t_excl bexcl[],int shift,
 +                         rvec x[],rvec box_size,t_nrnb *nrnb,
 +                         real lambda,real *dvdlambda,
 +                         gmx_grppairener_t *grppener,
 +                         gmx_bool bDoVdW,gmx_bool bDoCoul,
 +                         gmx_bool bEvaluateNow,put_in_list_t *put_in_list,
 +                         gmx_bool bHaveVdW[],
 +                         gmx_bool bDoForces,rvec *f)
 +{
 +    int n,i;
 +    t_nblist *nl;
 +    
 +    for(n=0; n<fr->nnblists; n++)
 +    {
 +        for(i=0; (i<eNL_NR); i++)
 +        {
 +            nl = &fr->nblists[n].nlist_lr[i];
 +            if ((nl->nri > nl->maxnri-32) || bEvaluateNow)
 +            {
 +                close_neighbor_list(fr,TRUE,n,i,FALSE);
 +                /* Evaluate the energies and forces */
 +                do_nonbonded(cr,fr,x,f,md,NULL,
 +                             grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR],
 +                             grppener->ener[egCOULLR],
 +							 grppener->ener[egGB],box_size,
 +                             nrnb,lambda,dvdlambda,n,i,
 +                             GMX_DONB_LR | GMX_DONB_FORCES);
 +                
 +                reset_neighbor_list(fr,TRUE,n,i);
 +            }
 +        }
 +    }
 +    
 +    if (!bEvaluateNow)
 +    {  
 +        /* Put the long range particles in a list */
 +        /* do_longrange is never called for QMMM  */
 +        put_in_list(bHaveVdW,ngid,md,icg,jgid,nlr,lr,top->cgs.index,
 +                    bexcl,shift,fr,TRUE,bDoVdW,bDoCoul);
 +    }
 +}
 +
 +static void get_cutoff2(t_forcerec *fr,gmx_bool bDoLongRange,
 +                        real *rvdw2,real *rcoul2,
 +                        real *rs2,real *rm2,real *rl2)
 +{
 +    *rs2 = sqr(fr->rlist);
 +    if (bDoLongRange && fr->bTwinRange)
 +    {
 +        /* The VdW and elec. LR cut-off's could be different,
 +         * so we can not simply set them to rlistlong.
 +         */
 +        if (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(fr->vdwtype) &&
 +            fr->rvdw > fr->rlist)
 +        {
 +            *rvdw2  = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rvdw2  = sqr(fr->rvdw);
 +        }
 +        if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(fr->eeltype) &&
 +            fr->rcoulomb > fr->rlist)
 +        {
 +            *rcoul2 = sqr(fr->rlistlong);
 +        }
 +        else
 +        {
 +            *rcoul2 = sqr(fr->rcoulomb);
 +        }
 +    }
 +    else
 +    {
 +        /* Workaround for a gcc -O3 or -ffast-math problem */
 +        *rvdw2  = *rs2;
 +        *rcoul2 = *rs2;
 +    }
 +    *rm2 = min(*rvdw2,*rcoul2);
 +    *rl2 = max(*rvdw2,*rcoul2);
 +}
 +
 +static void init_nsgrid_lists(t_forcerec *fr,int ngid,gmx_ns_t *ns)
 +{
 +    real rvdw2,rcoul2,rs2,rm2,rl2;
 +    int j;
 +
 +    get_cutoff2(fr,TRUE,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    /* Short range buffers */
 +    snew(ns->nl_sr,ngid);
 +    /* Counters */
 +    snew(ns->nsr,ngid);
 +    snew(ns->nlr_ljc,ngid);
 +    snew(ns->nlr_one,ngid);
 +    
 +    if (rm2 > rs2)
 +    {
 +            /* Long range VdW and Coul buffers */
 +        snew(ns->nl_lr_ljc,ngid);
 +    }
 +    if (rl2 > rm2)
 +    {
 +        /* Long range VdW or Coul only buffers */
 +        snew(ns->nl_lr_one,ngid);
 +    }
 +    for(j=0; (j<ngid); j++) {
 +        snew(ns->nl_sr[j],MAX_CG);
 +        if (rm2 > rs2)
 +        {
 +            snew(ns->nl_lr_ljc[j],MAX_CG);
 +        }
 +        if (rl2 > rm2)
 +        {
 +            snew(ns->nl_lr_one[j],MAX_CG);
 +        }
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "ns5_core: rs2 = %g, rm2 = %g, rl2 = %g (nm^2)\n",
 +                rs2,rm2,rl2);
 +    }
 +}
 +
 +static int nsgrid_core(FILE *log,t_commrec *cr,t_forcerec *fr,
 +                       matrix box,rvec box_size,int ngid,
 +                       gmx_localtop_t *top,
 +                       t_grid *grid,rvec x[],
 +                       t_excl bexcl[],gmx_bool *bExcludeAlleg,
 +                       t_nrnb *nrnb,t_mdatoms *md,
 +                       real lambda,real *dvdlambda,
 +                       gmx_grppairener_t *grppener,
 +                       put_in_list_t *put_in_list,
 +                       gmx_bool bHaveVdW[],
 +                       gmx_bool bDoLongRange,gmx_bool bDoForces,rvec *f,
 +                       gmx_bool bMakeQMMMnblist)
 +{
 +    gmx_ns_t *ns;
 +    atom_id **nl_lr_ljc,**nl_lr_one,**nl_sr;
 +    int     *nlr_ljc,*nlr_one,*nsr;
 +    gmx_domdec_t *dd=NULL;
 +    t_block *cgs=&(top->cgs);
 +    int     *cginfo=fr->cginfo;
 +    /* atom_id *i_atoms,*cgsindex=cgs->index; */
 +    ivec    sh0,sh1,shp;
 +    int     cell_x,cell_y,cell_z;
 +    int     d,tx,ty,tz,dx,dy,dz,cj;
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    int     zsh_ty,zsh_tx,ysh_tx;
 +#endif
 +    int     dx0,dx1,dy0,dy1,dz0,dz1;
 +    int     Nx,Ny,Nz,shift=-1,j,nrj,nns,nn=-1;
 +    real    gridx,gridy,gridz,grid_x,grid_y,grid_z;
 +    real    *dcx2,*dcy2,*dcz2;
 +    int     zgi,ygi,xgi;
 +    int     cg0,cg1,icg=-1,cgsnr,i0,igid,nri,naaj,max_jcg;
 +    int     jcg0,jcg1,jjcg,cgj0,jgid;
 +    int     *grida,*gridnra,*gridind;
 +    gmx_bool    rvdw_lt_rcoul,rcoul_lt_rvdw;
 +    rvec    xi,*cgcm,grid_offset;
 +    real    r2,rs2,rvdw2,rcoul2,rm2,rl2,XI,YI,ZI,dcx,dcy,dcz,tmp1,tmp2;
 +    int     *i_egp_flags;
 +    gmx_bool    bDomDec,bTriclinicX,bTriclinicY;
 +    ivec    ncpddc;
 +    
 +    ns = &fr->ns;
 +    
 +    bDomDec = DOMAINDECOMP(cr);
 +    if (bDomDec)
 +    {
 +        dd = cr->dd;
 +    }
 +    
 +    bTriclinicX = ((YY < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[YY]==1) && box[YY][XX] != 0) ||
 +                   (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][XX] != 0));
 +    bTriclinicY =  (ZZ < grid->npbcdim &&
 +                    (!bDomDec || dd->nc[ZZ]==1) && box[ZZ][YY] != 0);
 +    
 +    cgsnr    = cgs->nr;
 +
 +    get_cutoff2(fr,bDoLongRange,&rvdw2,&rcoul2,&rs2,&rm2,&rl2);
 +
 +    rvdw_lt_rcoul = (rvdw2 >= rcoul2);
 +    rcoul_lt_rvdw = (rcoul2 >= rvdw2);
 +    
 +    if (bMakeQMMMnblist)
 +    {
 +        rm2 = rl2;
 +        rs2 = rl2;
 +    }
 +
 +    nl_sr     = ns->nl_sr;
 +    nsr       = ns->nsr;
 +    nl_lr_ljc = ns->nl_lr_ljc;
 +    nl_lr_one = ns->nl_lr_one;
 +    nlr_ljc   = ns->nlr_ljc;
 +    nlr_one   = ns->nlr_one;
 +    
 +    /* Unpack arrays */
 +    cgcm    = fr->cg_cm;
 +    Nx      = grid->n[XX];
 +    Ny      = grid->n[YY];
 +    Nz      = grid->n[ZZ];
 +    grida   = grid->a;
 +    gridind = grid->index;
 +    gridnra = grid->nra;
 +    nns     = 0;
 +    
 +    gridx      = grid->cell_size[XX];
 +    gridy      = grid->cell_size[YY];
 +    gridz      = grid->cell_size[ZZ];
 +    grid_x     = 1/gridx;
 +    grid_y     = 1/gridy;
 +    grid_z     = 1/gridz;
 +    copy_rvec(grid->cell_offset,grid_offset);
 +    copy_ivec(grid->ncpddc,ncpddc);
 +    dcx2       = grid->dcx2;
 +    dcy2       = grid->dcy2;
 +    dcz2       = grid->dcz2;
 +    
 +#ifdef ALLOW_OFFDIAG_LT_HALFDIAG
 +    zsh_ty = floor(-box[ZZ][YY]/box[YY][YY]+0.5);
 +    zsh_tx = floor(-box[ZZ][XX]/box[XX][XX]+0.5);
 +    ysh_tx = floor(-box[YY][XX]/box[XX][XX]+0.5);
 +    if (zsh_tx!=0 && ysh_tx!=0)
 +    {
 +        /* This could happen due to rounding, when both ratios are 0.5 */
 +        ysh_tx = 0;
 +    }
 +#endif
 +    
 +    debug_gmx();
 +
 +    if (fr->n_tpi)
 +    {
 +        /* We only want a list for the test particle */
 +        cg0 = cgsnr - 1;
 +    }
 +    else
 +    {
 +        cg0 = grid->icg0;
 +    }
 +    cg1 = grid->icg1;
 +
 +    /* Set the shift range */
 +    for(d=0; d<DIM; d++)
 +    {
 +        sh0[d] = -1;
 +        sh1[d] = 1;
 +        /* Check if we need periodicity shifts.
 +         * Without PBC or with domain decomposition we don't need them.
 +         */
 +        if (d >= ePBC2npbcdim(fr->ePBC) || (bDomDec && dd->nc[d] > 1))
 +        {
 +            shp[d] = 0;
 +        }
 +        else
 +        {
 +            if (d == XX &&
 +                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
 +            {
 +                shp[d] = 2;
 +            }
 +            else
 +            {
 +                shp[d] = 1;
 +            }
 +        }
 +    }
 +    
 +    /* Loop over charge groups */
 +    for(icg=cg0; (icg < cg1); icg++)
 +    {
 +        igid = GET_CGINFO_GID(cginfo[icg]);
 +        /* Skip this charge group if all energy groups are excluded! */
 +        if (bExcludeAlleg[igid])
 +        {
 +            continue;
 +        }
 +        
 +        i0   = cgs->index[icg];
 +        
 +        if (bMakeQMMMnblist)
 +        { 
 +            /* Skip this charge group if it is not a QM atom while making a
 +             * QM/MM neighbourlist
 +             */
 +            if (md->bQM[i0]==FALSE)
 +            {
 +                continue; /* MM particle, go to next particle */ 
 +            }
 +            
 +            /* Compute the number of charge groups that fall within the control
 +             * of this one (icg)
 +             */
 +            naaj    = calc_naaj(icg,cgsnr);
 +            jcg0    = icg;
 +            jcg1    = icg + naaj;
 +            max_jcg = cgsnr;       
 +        } 
 +        else
 +        { 
 +            /* make a normal neighbourlist */
 +            
 +            if (bDomDec)
 +            {
 +                /* Get the j charge-group and dd cell shift ranges */
 +                dd_get_ns_ranges(cr->dd,icg,&jcg0,&jcg1,sh0,sh1);
 +                max_jcg = 0;
 +            }
 +            else
 +            {
 +                /* Compute the number of charge groups that fall within the control
 +                 * of this one (icg)
 +                 */
 +                naaj = calc_naaj(icg,cgsnr);
 +                jcg0 = icg;
 +                jcg1 = icg + naaj;
 +                
 +                if (fr->n_tpi)
 +                {
 +                    /* The i-particle is awlways the test particle,
 +                     * so we want all j-particles
 +                     */
 +                    max_jcg = cgsnr - 1;
 +                }
 +                else
 +                {
 +                    max_jcg  = jcg1 - cgsnr;
 +                }
 +            }
 +        }
 +        
 +        i_egp_flags = fr->egp_flags + igid*ngid;
 +        
 +        /* Set the exclusions for the atoms in charge group icg using a bitmask */
 +        setexcl(i0,cgs->index[icg+1],&top->excls,TRUE,bexcl);
 +        
 +        ci2xyz(grid,icg,&cell_x,&cell_y,&cell_z);
 +        
 +        /* Changed iicg to icg, DvdS 990115 
 +         * (but see consistency check above, DvdS 990330) 
 +         */
 +#ifdef NS5DB
 +        fprintf(log,"icg=%5d, naaj=%5d, cell %d %d %d\n",
 +                icg,naaj,cell_x,cell_y,cell_z);
 +#endif
 +        /* Loop over shift vectors in three dimensions */
 +        for (tz=-shp[ZZ]; tz<=shp[ZZ]; tz++)
 +        {
 +            ZI = cgcm[icg][ZZ]+tz*box[ZZ][ZZ];
 +            /* Calculate range of cells in Z direction that have the shift tz */
 +            zgi = cell_z + tz*Nz;
 +#define FAST_DD_NS
 +#ifndef FAST_DD_NS
 +            get_dx(Nz,gridz,rl2,zgi,ZI,&dz0,&dz1,dcz2);
 +#else
 +            get_dx_dd(Nz,gridz,rl2,zgi,ZI-grid_offset[ZZ],
 +                      ncpddc[ZZ],sh0[ZZ],sh1[ZZ],&dz0,&dz1,dcz2);
 +#endif
 +            if (dz0 > dz1)
 +            {
 +                continue;
 +            }
 +            for (ty=-shp[YY]; ty<=shp[YY]; ty++)
 +            {
 +                YI = cgcm[icg][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
 +                /* Calculate range of cells in Y direction that have the shift ty */
 +                if (bTriclinicY)
 +                {
 +                    ygi = (int)(Ny + (YI - grid_offset[YY])*grid_y) - Ny;
 +                }
 +                else
 +                {
 +                    ygi = cell_y + ty*Ny;
 +                }
 +#ifndef FAST_DD_NS
 +                get_dx(Ny,gridy,rl2,ygi,YI,&dy0,&dy1,dcy2);
 +#else
 +                get_dx_dd(Ny,gridy,rl2,ygi,YI-grid_offset[YY],
 +                          ncpddc[YY],sh0[YY],sh1[YY],&dy0,&dy1,dcy2);
 +#endif
 +                if (dy0 > dy1)
 +                {
 +                    continue;
 +                }
 +                for (tx=-shp[XX]; tx<=shp[XX]; tx++)
 +                {
 +                    XI = cgcm[icg][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
 +                    /* Calculate range of cells in X direction that have the shift tx */
 +                    if (bTriclinicX)
 +                    {
 +                        xgi = (int)(Nx + (XI - grid_offset[XX])*grid_x) - Nx;
 +                    }
 +                    else
 +                    {
 +                        xgi = cell_x + tx*Nx;
 +                    }
 +#ifndef FAST_DD_NS
 +                    get_dx(Nx,gridx,rl2,xgi*Nx,XI,&dx0,&dx1,dcx2);
 +#else
 +                    get_dx_dd(Nx,gridx,rl2,xgi,XI-grid_offset[XX],
 +                              ncpddc[XX],sh0[XX],sh1[XX],&dx0,&dx1,dcx2);
 +#endif
 +                    if (dx0 > dx1)
 +                    {
 +                        continue;
 +                    }
 +                    /* Get shift vector */	  
 +                    shift=XYZ2IS(tx,ty,tz);
 +#ifdef NS5DB
 +                    range_check(shift,0,SHIFTS);
 +#endif
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        nsr[nn]      = 0;
 +                        nlr_ljc[nn]  = 0;
 +                        nlr_one[nn] = 0;
 +                    }
 +#ifdef NS5DB
 +                    fprintf(log,"shift: %2d, dx0,1: %2d,%2d, dy0,1: %2d,%2d, dz0,1: %2d,%2d\n",
 +                            shift,dx0,dx1,dy0,dy1,dz0,dz1);
 +                    fprintf(log,"cgcm: %8.3f  %8.3f  %8.3f\n",cgcm[icg][XX],
 +                            cgcm[icg][YY],cgcm[icg][ZZ]);
 +                    fprintf(log,"xi:   %8.3f  %8.3f  %8.3f\n",XI,YI,ZI);
 +#endif
 +                    for (dx=dx0; (dx<=dx1); dx++)
 +                    {
 +                        tmp1 = rl2 - dcx2[dx];
 +                        for (dy=dy0; (dy<=dy1); dy++)
 +                        {
 +                            tmp2 = tmp1 - dcy2[dy];
 +                            if (tmp2 > 0)
 +                            {
 +                                for (dz=dz0; (dz<=dz1); dz++) {
 +                                    if (tmp2 > dcz2[dz]) {
 +                                        /* Find grid-cell cj in which possible neighbours are */
 +                                        cj   = xyz2ci(Ny,Nz,dx,dy,dz);
 +                                        
 +                                        /* Check out how many cgs (nrj) there in this cell */
 +                                        nrj  = gridnra[cj];
 +                                        
 +                                        /* Find the offset in the cg list */
 +                                        cgj0 = gridind[cj];
 +                                        
 +                                        /* Check if all j's are out of range so we
 +                                         * can skip the whole cell.
 +                                         * Should save some time, especially with DD.
 +                                         */
 +                                        if (nrj == 0 ||
 +                                            (grida[cgj0] >= max_jcg &&
 +                                             (grida[cgj0] >= jcg1 || grida[cgj0+nrj-1] < jcg0)))
 +                                        {
 +                                            continue;
 +                                        }
 +                                        
 +                                        /* Loop over cgs */
 +                                        for (j=0; (j<nrj); j++)
 +                                        {
 +                                            jjcg = grida[cgj0+j];
 +                                            
 +                                            /* check whether this guy is in range! */
 +                                            if ((jjcg >= jcg0 && jjcg < jcg1) ||
 +                                                (jjcg < max_jcg))
 +                                            {
 +                                                r2=calc_dx2(XI,YI,ZI,cgcm[jjcg]);
 +                                                if (r2 < rl2) {
 +                                                    /* jgid = gid[cgsatoms[cgsindex[jjcg]]]; */
 +                                                    jgid = GET_CGINFO_GID(cginfo[jjcg]);
 +                                                    /* check energy group exclusions */
 +                                                    if (!(i_egp_flags[jgid] & EGP_EXCL))
 +                                                    {
 +                                                        if (r2 < rs2)
 +                                                        {
 +                                                            if (nsr[jgid] >= MAX_CG)
 +                                                            {
 +                                                                put_in_list(bHaveVdW,ngid,md,icg,jgid,
 +                                                                            nsr[jgid],nl_sr[jgid],
 +                                                                            cgs->index,/* cgsatoms, */ bexcl,
 +                                                                            shift,fr,FALSE,TRUE,TRUE);
 +                                                                nsr[jgid]=0;
 +                                                            }
 +                                                            nl_sr[jgid][nsr[jgid]++]=jjcg;
 +                                                        } 
 +                                                        else if (r2 < rm2)
 +                                                        {
 +                                                            if (nlr_ljc[jgid] >= MAX_CG)
 +                                                            {
 +                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
 +                                                                             nlr_ljc[jgid],
 +                                                                             nl_lr_ljc[jgid],bexcl,shift,x,
 +                                                                             box_size,nrnb,
 +                                                                             lambda,dvdlambda,
 +                                                                             grppener,
 +                                                                             TRUE,TRUE,FALSE,
 +                                                                             put_in_list,
 +                                                                             bHaveVdW,
 +                                                                             bDoForces,f);
 +                                                                nlr_ljc[jgid]=0;
 +                                                            }
 +                                                            nl_lr_ljc[jgid][nlr_ljc[jgid]++]=jjcg;
 +                                                        }
 +                                                        else
 +                                                        {
 +                                                            if (nlr_one[jgid] >= MAX_CG) {
 +                                                                do_longrange(cr,top,fr,ngid,md,icg,jgid,
 +                                                                             nlr_one[jgid],
 +                                                                             nl_lr_one[jgid],bexcl,shift,x,
 +                                                                             box_size,nrnb,
 +                                                                             lambda,dvdlambda,
 +                                                                             grppener,
 +                                                                             rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
 +                                                                             put_in_list,
 +                                                                             bHaveVdW,
 +                                                                             bDoForces,f);
 +                                                                nlr_one[jgid]=0;
 +                                                            }
 +                                                            nl_lr_one[jgid][nlr_one[jgid]++]=jjcg;
 +                                                        }
 +                                                    }
 +                                                }
 +                                                nns++;
 +                                            }
 +                                        }
 +                                    }
 +                                }
 +                            }
 +                        }
 +                    }
 +                    /* CHECK whether there is anything left in the buffers */
 +                    for(nn=0; (nn<ngid); nn++)
 +                    {
 +                        if (nsr[nn] > 0)
 +                        {
 +                            put_in_list(bHaveVdW,ngid,md,icg,nn,nsr[nn],nl_sr[nn],
 +                                        cgs->index, /* cgsatoms, */ bexcl,
 +                                        shift,fr,FALSE,TRUE,TRUE);
 +                        }
 +                        
 +                        if (nlr_ljc[nn] > 0)
 +                        {
 +                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_ljc[nn],
 +                                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
 +                                         lambda,dvdlambda,grppener,TRUE,TRUE,FALSE,
 +                                         put_in_list,bHaveVdW,bDoForces,f);
 +                        }
 +                        
 +                        if (nlr_one[nn] > 0)
 +                        {
 +                            do_longrange(cr,top,fr,ngid,md,icg,nn,nlr_one[nn],
 +                                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
 +                                         lambda,dvdlambda,grppener,
 +                                         rvdw_lt_rcoul,rcoul_lt_rvdw,FALSE,
 +                                         put_in_list,bHaveVdW,bDoForces,f);
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +        /* setexcl(nri,i_atoms,&top->atoms.excl,FALSE,bexcl); */
 +        setexcl(cgs->index[icg],cgs->index[icg+1],&top->excls,FALSE,bexcl);
 +    }
 +    /* Perform any left over force calculations */
 +    for (nn=0; (nn<ngid); nn++)
 +    {
 +        if (rm2 > rs2)
 +        {
 +            do_longrange(cr,top,fr,0,md,icg,nn,nlr_ljc[nn],
 +                         nl_lr_ljc[nn],bexcl,shift,x,box_size,nrnb,
 +                         lambda,dvdlambda,grppener,
 +                         TRUE,TRUE,TRUE,put_in_list,bHaveVdW,bDoForces,f);
 +        }
 +        if (rl2 > rm2) {
 +            do_longrange(cr,top,fr,0,md,icg,nn,nlr_one[nn],
 +                         nl_lr_one[nn],bexcl,shift,x,box_size,nrnb,
 +                         lambda,dvdlambda,grppener,
 +                         rvdw_lt_rcoul,rcoul_lt_rvdw,
 +                         TRUE,put_in_list,bHaveVdW,bDoForces,f);
 +        }
 +    }
 +    debug_gmx();
 +    
 +    /* Close off short range neighbourlists */
 +    close_neighbor_list(fr,FALSE,-1,-1,bMakeQMMMnblist);
 +    
 +    return nns;
 +}
 +
 +void ns_realloc_natoms(gmx_ns_t *ns,int natoms)
 +{
 +    int i;
 +    
 +    if (natoms > ns->nra_alloc)
 +    {
 +        ns->nra_alloc = over_alloc_dd(natoms);
 +        srenew(ns->bexcl,ns->nra_alloc);
 +        for(i=0; i<ns->nra_alloc; i++)
 +        {
 +            ns->bexcl[i] = 0;
 +        }
 +    }
 +}
 +
 +void init_ns(FILE *fplog,const t_commrec *cr,
 +             gmx_ns_t *ns,t_forcerec *fr,
 +             const gmx_mtop_t *mtop,
 +             matrix box)
 +{
 +    int  mt,icg,nr_in_cg,maxcg,i,j,jcg,ngid,ncg;
 +    t_block *cgs;
 +    char *ptr;
 +    
 +    /* Compute largest charge groups size (# atoms) */
 +    nr_in_cg=1;
 +    for(mt=0; mt<mtop->nmoltype; mt++) {
 +        cgs = &mtop->moltype[mt].cgs;
 +        for (icg=0; (icg < cgs->nr); icg++)
 +        {
 +            nr_in_cg=max(nr_in_cg,(int)(cgs->index[icg+1]-cgs->index[icg]));
 +        }
 +    }
 +
 +    /* Verify whether largest charge group is <= max cg.
 +     * This is determined by the type of the local exclusion type 
 +     * Exclusions are stored in bits. (If the type is not large
 +     * enough, enlarge it, unsigned char -> unsigned short -> unsigned long)
 +     */
 +    maxcg = sizeof(t_excl)*8;
 +    if (nr_in_cg > maxcg)
 +    {
 +        gmx_fatal(FARGS,"Max #atoms in a charge group: %d > %d\n",
 +                  nr_in_cg,maxcg);
 +    }
 +    
 +    ngid = mtop->groups.grps[egcENER].nr;
 +    snew(ns->bExcludeAlleg,ngid);
 +    for(i=0; i<ngid; i++) {
 +        ns->bExcludeAlleg[i] = TRUE;
 +        for(j=0; j<ngid; j++)
 +        {
 +            if (!(fr->egp_flags[i*ngid+j] & EGP_EXCL))
 +            {
 +                ns->bExcludeAlleg[i] = FALSE;
 +            }
 +        }
 +    }
 +    
 +    if (fr->bGrid) {
 +        /* Grid search */
 +        ns->grid = init_grid(fplog,fr);
 +        init_nsgrid_lists(fr,ngid,ns);
 +    }
 +    else
 +    {
 +        /* Simple search */
 +        snew(ns->ns_buf,ngid);
 +        for(i=0; (i<ngid); i++)
 +        {
 +            snew(ns->ns_buf[i],SHIFTS);
 +        }
 +        ncg = ncg_mtop(mtop);
 +        snew(ns->simple_aaj,2*ncg);
 +        for(jcg=0; (jcg<ncg); jcg++)
 +        {
 +            ns->simple_aaj[jcg]     = jcg;
 +            ns->simple_aaj[jcg+ncg] = jcg;
 +        }
 +    }
 +    
 +    /* Create array that determines whether or not atoms have VdW */
 +    snew(ns->bHaveVdW,fr->ntype);
 +    for(i=0; (i<fr->ntype); i++)
 +    {
 +        for(j=0; (j<fr->ntype); j++)
 +        {
 +            ns->bHaveVdW[i] = (ns->bHaveVdW[i] || 
 +                               (fr->bBHAM ? 
 +                                ((BHAMA(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMB(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (BHAMC(fr->nbfp,fr->ntype,i,j) != 0)) :
 +                                ((C6(fr->nbfp,fr->ntype,i,j) != 0) ||
 +                                 (C12(fr->nbfp,fr->ntype,i,j) != 0))));
 +        }
 +    }
 +    if (debug) 
 +        pr_bvec(debug,0,"bHaveVdW",ns->bHaveVdW,fr->ntype,TRUE);
 +    
 +    ns->nra_alloc = 0;
 +    ns->bexcl = NULL;
 +    if (!DOMAINDECOMP(cr))
 +    {
 +        /* This could be reduced with particle decomposition */
 +        ns_realloc_natoms(ns,mtop->natoms);
 +    }
 +
 +    ns->nblist_initialized=FALSE;
 +
 +    /* nbr list debug dump */
 +    {
 +        char *ptr=getenv("GMX_DUMP_NL");
 +        if (ptr)
 +        {
 +            ns->dump_nl=strtol(ptr,NULL,10);
 +            if (fplog)
 +            {
 +                fprintf(fplog, "GMX_DUMP_NL = %d", ns->dump_nl);
 +            }
 +        }
 +        else
 +        {
 +            ns->dump_nl=0;
 +        }
 +    }
 +}
 +
 +			 
 +int search_neighbours(FILE *log,t_forcerec *fr,
 +                      rvec x[],matrix box,
 +                      gmx_localtop_t *top,
 +                      gmx_groups_t *groups,
 +                      t_commrec *cr,
 +                      t_nrnb *nrnb,t_mdatoms *md,
 +                      real lambda,real *dvdlambda,
 +                      gmx_grppairener_t *grppener,
 +                      gmx_bool bFillGrid,
 +                      gmx_bool bDoLongRange,
 +                      gmx_bool bDoForces,rvec *f)
 +{
 +    t_block  *cgs=&(top->cgs);
 +    rvec     box_size,grid_x0,grid_x1;
 +    int      i,j,m,ngid;
 +    real     min_size,grid_dens;
 +    int      nsearch;
 +    gmx_bool     bGrid;
 +    char     *ptr;
 +    gmx_bool     *i_egp_flags;
 +    int      cg_start,cg_end,start,end;
 +    gmx_ns_t *ns;
 +    t_grid   *grid;
 +    gmx_domdec_zones_t *dd_zones;
 +    put_in_list_t *put_in_list;
 +	
 +    ns = &fr->ns;
 +
 +    /* Set some local variables */
 +    bGrid = fr->bGrid;
 +    ngid = groups->grps[egcENER].nr;
 +    
 +    for(m=0; (m<DIM); m++)
 +    {
 +        box_size[m] = box[m][m];
 +    }
 +  
 +    if (fr->ePBC != epbcNONE)
 +    {
 +        if (sqr(fr->rlistlong) >= max_cutoff2(fr->ePBC,box))
 +        {
 +            gmx_fatal(FARGS,"One of the box vectors has become shorter than twice the cut-off length or box_yy-|box_zy| or box_zz has become smaller than the cut-off.");
 +        }
 +        if (!bGrid)
 +        {
 +            min_size = min(box_size[XX],min(box_size[YY],box_size[ZZ]));
 +            if (2*fr->rlistlong >= min_size)
 +                gmx_fatal(FARGS,"One of the box diagonal elements has become smaller than twice the cut-off length.");
 +        }
 +    }
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        ns_realloc_natoms(ns,cgs->index[cgs->nr]);
 +    }
 +    debug_gmx();
 +    
 +    /* Reset the neighbourlists */
 +    reset_neighbor_list(fr,FALSE,-1,-1);
 +    
 +    if (bGrid && bFillGrid)
 +    {
 +		
 +        grid = ns->grid;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_zones = domdec_zones(cr->dd);
 +        }
 +        else
 +        {
 +            dd_zones = NULL;
 +
 +            get_nsgrid_boundaries(grid,NULL,box,NULL,NULL,NULL,
 +                                  cgs->nr,fr->cg_cm,grid_x0,grid_x1,&grid_dens);
 +
 +            grid_first(log,grid,NULL,NULL,fr->ePBC,box,grid_x0,grid_x1,
 +                       fr->rlistlong,grid_dens);
 +        }
 +        debug_gmx();
 +        
 +        /* Don't know why this all is... (DvdS 3/99) */
 +#ifndef SEGV
 +        start = 0;
 +        end   = cgs->nr;
 +#else
 +        start = fr->cg0;
 +        end   = (cgs->nr+1)/2;
 +#endif
 +        
 +        if (DOMAINDECOMP(cr))
 +        {
 +            end = cgs->nr;
 +            fill_grid(log,dd_zones,grid,end,-1,end,fr->cg_cm);
 +            grid->icg0 = 0;
 +            grid->icg1 = dd_zones->izone[dd_zones->nizone-1].cg1;
 +        }
 +        else
 +        {
 +            fill_grid(log,NULL,grid,cgs->nr,fr->cg0,fr->hcg,fr->cg_cm);
 +            grid->icg0 = fr->cg0;
 +            grid->icg1 = fr->hcg;
 +            debug_gmx();
 +            
 +            if (PARTDECOMP(cr))
 +                mv_grid(cr,grid);
 +            debug_gmx();
 +        }
 +        
 +        calc_elemnr(log,grid,start,end,cgs->nr);
 +        calc_ptrs(grid);
 +        grid_last(log,grid,start,end,cgs->nr);
 +        
 +        if (gmx_debug_at)
 +        {
 +            check_grid(debug,grid);
 +            print_grid(debug,grid);
 +        }
 +    }
 +    else if (fr->n_tpi)
 +    {
 +        /* Set the grid cell index for the test particle only.
 +         * The cell to cg index is not corrected, but that does not matter.
 +         */
 +        fill_grid(log,NULL,ns->grid,fr->hcg,fr->hcg-1,fr->hcg,fr->cg_cm);
 +    }
 +    debug_gmx();
 +    
 +    if (!fr->ns.bCGlist)
 +    {
 +        put_in_list = put_in_list_at;
 +    }
 +    else
 +    {
 +        put_in_list = put_in_list_cg;
 +    }
 +
 +    /* Do the core! */
 +    if (bGrid)
 +    {
 +        grid = ns->grid;
 +        nsearch = nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                              grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                              nrnb,md,lambda,dvdlambda,grppener,
 +                              put_in_list,ns->bHaveVdW,
 +                              bDoLongRange,bDoForces,f,
 +                              FALSE);
 +        
 +        /* neighbour searching withouth QMMM! QM atoms have zero charge in
 +         * the classical calculation. The charge-charge interaction
 +         * between QM and MM atoms is handled in the QMMM core calculation
 +         * (see QMMM.c). The VDW however, we'd like to compute classically
 +         * and the QM MM atom pairs have just been put in the
 +         * corresponding neighbourlists. in case of QMMM we still need to
 +         * fill a special QMMM neighbourlist that contains all neighbours
 +         * of the QM atoms. If bQMMM is true, this list will now be made: 
 +         */
 +        if (fr->bQMMM && fr->qr->QMMMscheme!=eQMMMschemeoniom)
 +        {
 +            nsearch += nsgrid_core(log,cr,fr,box,box_size,ngid,top,
 +                                   grid,x,ns->bexcl,ns->bExcludeAlleg,
 +                                   nrnb,md,lambda,dvdlambda,grppener,
 +                                   put_in_list_qmmm,ns->bHaveVdW,
 +                                   bDoLongRange,bDoForces,f,
 +                                   TRUE);
 +        }
 +    }
 +    else 
 +    {
 +        nsearch = ns_simple_core(fr,top,md,box,box_size,
 +                                 ns->bexcl,ns->simple_aaj,
 +                                 ngid,ns->ns_buf,put_in_list,ns->bHaveVdW);
 +    }
 +    debug_gmx();
 +    
 +#ifdef DEBUG
 +    pr_nsblock(log);
 +#endif
 +    
 +    inc_nrnb(nrnb,eNR_NS,nsearch);
 +    /* inc_nrnb(nrnb,eNR_LR,fr->nlr); */
 +    
 +    return nsearch;
 +}
 +
 +int natoms_beyond_ns_buffer(t_inputrec *ir,t_forcerec *fr,t_block *cgs,
 +                            matrix scale_tot,rvec *x)
 +{
 +    int  cg0,cg1,cg,a0,a1,a,i,j;
 +    real rint,hbuf2,scale;
 +    rvec *cg_cm,cgsc;
 +    gmx_bool bIsotropic;
 +    int  nBeyond;
 +    
 +    nBeyond = 0;
 +    
 +    rint = max(ir->rcoulomb,ir->rvdw);
 +    if (ir->rlist < rint)
 +    {
 +        gmx_fatal(FARGS,"The neighbor search buffer has negative size: %f nm",
 +                  ir->rlist - rint);
 +    }
 +    cg_cm = fr->cg_cm;
 +    
 +    cg0 = fr->cg0;
 +    cg1 = fr->hcg;
 +    
 +    if (!EI_DYNAMICS(ir->eI) || !DYNAMIC_BOX(*ir))
 +    {
 +        hbuf2 = sqr(0.5*(ir->rlist - rint));
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            a0 = cgs->index[cg];
 +            a1 = cgs->index[cg+1];
 +            for(a=a0; a<a1; a++)
 +            {
 +                if (distance2(cg_cm[cg],x[a]) > hbuf2)
 +                {
 +                    nBeyond++;
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        bIsotropic = TRUE;
 +        scale = scale_tot[0][0];
 +        for(i=1; i<DIM; i++)
 +        {
 +            /* With anisotropic scaling, the original spherical ns volumes become
 +             * ellipsoids. To avoid costly transformations we use the minimum
 +             * eigenvalue of the scaling matrix for determining the buffer size.
 +             * Since the lower half is 0, the eigenvalues are the diagonal elements.
 +             */
 +            scale = min(scale,scale_tot[i][i]);
 +            if (scale_tot[i][i] != scale_tot[i-1][i-1])
 +            {
 +                bIsotropic = FALSE;
 +            }
 +            for(j=0; j<i; j++)
 +            {
 +                if (scale_tot[i][j] != 0)
 +                {
 +                    bIsotropic = FALSE;
 +                }
 +            }
 +        }
 +        hbuf2 = sqr(0.5*(scale*ir->rlist - rint));
 +        if (bIsotropic)
 +        {
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                svmul(scale,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {                    
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Anistropic scaling */
 +            for(cg=cg0; cg<cg1; cg++)
 +            {
 +                /* Since scale_tot contains the transpose of the scaling matrix,
 +                 * we need to multiply with the transpose.
 +                 */
 +                tmvmul_ur0(scale_tot,cg_cm[cg],cgsc);
 +                a0 = cgs->index[cg];
 +                a1 = cgs->index[cg+1];
 +                for(a=a0; a<a1; a++)
 +                {
 +                    if (distance2(cgsc,x[a]) > hbuf2)
 +                    {
 +                        nBeyond++;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    return nBeyond;
 +}
diff --cc src/gromacs/selection/nbsearch.cpp
index 6ac4756542,0000000000..716087b720
mode 100644,000000..100644
--- a/src/gromacs/selection/nbsearch.cpp
+++ b/src/gromacs/selection/nbsearch.cpp
@@@ -1,788 -1,0 +1,788 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2009, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + */
 +/*! \page nbsearch Neighborhood search routines
 + *
 + * Functions to find particles within a neighborhood of a set of particles
 + * are defined in nbsearch.h.
 + * The usage is simple: a data structure is allocated with
 + * gmx_ana_nbsearch_create(), and the box shape and reference positions for a
 + * frame are set using gmx_ana_nbsearch_init() or gmx_ana_nbsearch_pos_init().
 + * Searches can then be performed with gmx_ana_nbsearch_is_within() and
 + * gmx_ana_nbsearch_mindist(), or with versions that take the \c gmx_ana_pos_t
 + * data structure.
 + * When the data structure is no longer required, it can be freed with
 + * gmx_ana_nbsearch_free().
 + *
 + * \internal
 + *
 + * \todo
 + * The grid implementation could still be optimized in several different ways:
 + *   - Triclinic grid cells are not the most efficient shape, but make PBC
 + *     handling easier.
 + *   - Precalculating the required PBC shift for a pair of cells outside the
 + *     inner loop. After this is done, it should be quite straightforward to
 + *     move to rectangular cells.
 + *   - Pruning grid cells from the search list if they are completely outside
 + *     the sphere that is being considered.
 + *   - A better heuristic could be added for falling back to simple loops for a
 + *     small number of reference particles.
 + *   - A better heuristic for selecting the grid size.
 + *   - A multi-level grid implementation could be used to be able to use small
 + *     grids for short cutoffs with very inhomogeneous particle distributions
 + *     without a memory cost.
 + */
 +/*! \internal \file
 + * \brief
 + * Implements functions in nbsearch.h.
 + *
 + * \author Teemu Murtola <teemu.murtola@cbr.su.se>
 + * \ingroup module_selection
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +
 +#include <smalloc.h>
 +#include <typedefs.h>
 +#include <pbc.h>
 +#include <vec.h>
 +
 +#include "gromacs/selection/nbsearch.h"
 +#include "gromacs/selection/position.h"
 +
 +/*! \internal \brief
 + * Data structure for neighborhood searches.
 + */
 +struct gmx_ana_nbsearch_t
 +{
 +    /** The cutoff. */
 +    real           cutoff;
 +    /** The cutoff squared. */
 +    real           cutoff2;
 +    /** Maximum number of reference points. */
 +    int            maxnref;
 +
 +    /** Number of reference points for the current frame. */
 +    int            nref;
 +    /** Reference point positions. */
 +    rvec          *xref;
 +    /** Reference position ids (NULL if not available). */
 +    int           *refid;
 +    /** PBC data. */
 +    t_pbc         *pbc;
 +
 +    /** Number of excluded reference positions for current test particle. */
 +    int            nexcl;
 +    /** Exclusions for current test particle. */
 +    int           *excl;
 +
 +    /** Whether to try grid searching. */
 +    gmx_bool           bTryGrid;
 +    /** Whether grid searching is actually used for the current positions. */
 +    gmx_bool           bGrid;
 +    /** Array allocated for storing in-unit-cell reference positions. */
 +    rvec          *xref_alloc;
 +    /** FALSE if the box is rectangular. */
 +    gmx_bool           bTric;
 +    /** Box vectors of a single grid cell. */
 +    matrix         cellbox;
 +    /** The reciprocal cell vectors as columns; the inverse of \p cellbox. */
 +    matrix         recipcell;
 +    /** Number of cells along each dimension. */
 +    ivec           ncelldim;
 +    /** Total number of cells. */
 +    int            ncells;
 +    /** Number of reference positions in each cell. */
 +    int           *ncatoms;
 +    /** List of reference positions in each cell. */
 +    atom_id      **catom;
 +    /** Allocation counts for each \p catom[i]. */
 +    int           *catom_nalloc;
 +    /** Allocation count for the per-cell arrays. */
 +    int            cells_nalloc;
 +    /** Number of neighboring cells to consider. */
 +    int            ngridnb;
 +    /** Offsets of the neighboring cells to consider. */
 +    ivec          *gnboffs;
 +    /** Allocation count for \p gnboffs. */
 +    int            gnboffs_nalloc;
 +
 +    /** Stores test position during a pair loop. */
 +    rvec           xtest;
 +    /** Stores the previous returned position during a pair loop. */
 +    int            previ;
 +    /** Stores the current exclusion index during loops. */
 +    int            exclind;
 +    /** Stores the test particle cell index during loops. */
 +    ivec           testcell;
 +    /** Stores the current cell neighbor index during pair loops. */
 +    int            prevnbi;
 +    /** Stores the index within the current cell during pair loops. */
 +    int            prevcai;
 +};
 +
 +/*!
 + * \param[in]  cutoff Cutoff distance for the search
 + *   (<=0 stands for no cutoff).
 + * \param[in]  maxn   Maximum number of reference particles.
 + * \returns  Created neighborhood search data structure.
 + */
 +gmx_ana_nbsearch_t *
 +gmx_ana_nbsearch_create(real cutoff, int maxn)
 +{
 +    gmx_ana_nbsearch_t *d;
 +
 +    snew(d, 1);
 +    d->bTryGrid = TRUE;
 +    if (cutoff <= 0)
 +    {
-         cutoff = HUGE_VAL;
++        cutoff = GMX_REAL_MAX;
 +        d->bTryGrid = FALSE;
 +    }
 +    d->cutoff = cutoff;
 +    d->cutoff2 = sqr(cutoff);
 +    d->maxnref = maxn;
 +
 +    d->xref = NULL;
 +    d->nexcl = 0;
 +    d->exclind = 0;
 +
 +    d->xref_alloc = NULL;
 +    d->ncells = 0;
 +    d->ncatoms = NULL;
 +    d->catom = NULL;
 +    d->catom_nalloc = 0;
 +    d->cells_nalloc = 0;
 +
 +    d->ngridnb = 0;
 +    d->gnboffs = NULL;
 +    d->gnboffs_nalloc = 0;
 +
 +    return d;
 +}
 +
 +/*!
 + * \param     d Data structure to free.
 + *
 + * After the call, the pointer \p d is no longer valid.
 + */
 +void
 +gmx_ana_nbsearch_free(gmx_ana_nbsearch_t *d)
 +{
 +    sfree(d->xref_alloc);
 +    sfree(d->ncatoms);
 +    if (d->catom)
 +    {
 +        int ci;
 +
 +        for (ci = 0; ci < d->ncells; ++ci)
 +        {
 +            sfree(d->catom[ci]);
 +        }
 +        sfree(d->catom);
 +    }
 +    sfree(d->catom_nalloc);
 +    sfree(d->gnboffs);
 +    sfree(d);
 +}
 +
 +/*! \brief
 + * Calculates offsets to neighboring grid cells that should be considered.
 + *
 + * \param[in,out] d    Grid information.
 + * \param[in]     pbc  Information about the box.
 + */
 +static void
 +grid_init_cell_nblist(gmx_ana_nbsearch_t *d, t_pbc *pbc)
 +{
 +    int   maxx, maxy, maxz;
 +    int   x, y, z, i;
 +    real  rvnorm;
 +
 +    /* Find the extent of the sphere in triclinic coordinates */
 +    maxz = (int)(d->cutoff * d->recipcell[ZZ][ZZ]) + 1;
 +    rvnorm = sqrt(sqr(d->recipcell[YY][YY]) + sqr(d->recipcell[ZZ][YY]));
 +    maxy = (int)(d->cutoff * rvnorm) + 1;
 +    rvnorm = sqrt(sqr(d->recipcell[XX][XX]) + sqr(d->recipcell[YY][XX])
 +                  + sqr(d->recipcell[ZZ][XX]));
 +    maxx = (int)(d->cutoff * rvnorm) + 1;
 +
 +    /* Calculate the number of cells and reallocate if necessary */
 +    d->ngridnb = (2 * maxx + 1) * (2 * maxy + 1) * (2 * maxz + 1);
 +    if (d->gnboffs_nalloc < d->ngridnb)
 +    {
 +        d->gnboffs_nalloc = d->ngridnb;
 +        srenew(d->gnboffs, d->gnboffs_nalloc);
 +    }
 +
 +    /* Store the whole cube */
 +    /* TODO: Prune off corners that are not needed */
 +    i = 0;
 +    for (x = -maxx; x <= maxx; ++x)
 +    {
 +        for (y = -maxy; y <= maxy; ++y)
 +        {
 +            for (z = -maxz; z <= maxz; ++z)
 +            {
 +                d->gnboffs[i][XX] = x;
 +                d->gnboffs[i][YY] = y;
 +                d->gnboffs[i][ZZ] = z;
 +                ++i;
 +            }
 +        }
 +    }
 +}
 +
 +/*! \brief
 + * Determines a suitable grid size.
 + *
 + * \param[in,out] d    Grid information.
 + * \param[in]     pbc  Information about the box.
 + * \returns  FALSE if grid search is not suitable.
 + */
 +static gmx_bool
 +grid_setup_cells(gmx_ana_nbsearch_t *d, t_pbc *pbc)
 +{
 +    real targetsize;
 +    int  dd;
 +
 +#ifdef HAVE_CBRT
 +    targetsize = cbrt(pbc->box[XX][XX] * pbc->box[YY][YY] * pbc->box[ZZ][ZZ]
 +                      * 10 / d->nref);
 +#else
 +    targetsize = pow(pbc->box[XX][XX] * pbc->box[YY][YY] * pbc->box[ZZ][ZZ]
 +                      * 10 / d->nref, 1./3.);
 +#endif
 +
 +    d->ncells = 1;
 +    for (dd = 0; dd < DIM; ++dd)
 +    {
 +        d->ncelldim[dd] = (int)(pbc->box[dd][dd] / targetsize);
 +        d->ncells *= d->ncelldim[dd];
 +        if (d->ncelldim[dd] < 3)
 +        {
 +            return FALSE;
 +        }
 +    }
 +    /* Reallocate if necessary */
 +    if (d->cells_nalloc < d->ncells)
 +    {
 +        int  i;
 +
 +        srenew(d->ncatoms, d->ncells);
 +        srenew(d->catom, d->ncells);
 +        srenew(d->catom_nalloc, d->ncells);
 +        for (i = d->cells_nalloc; i < d->ncells; ++i)
 +        {
 +            d->catom[i] = NULL;
 +            d->catom_nalloc[i] = 0;
 +        }
 +        d->cells_nalloc = d->ncells;
 +    }
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Sets ua a search grid for a given box.
 + *
 + * \param[in,out] d    Grid information.
 + * \param[in]     pbc  Information about the box.
 + * \returns  FALSE if grid search is not suitable.
 + */
 +static gmx_bool
 +grid_set_box(gmx_ana_nbsearch_t *d, t_pbc *pbc)
 +{
 +    int dd;
 +
 +    /* TODO: This check could be improved. */
 +    if (0.5*pbc->max_cutoff2 < d->cutoff2)
 +    {
 +        return FALSE;
 +    }
 +
 +    if (!grid_setup_cells(d, pbc))
 +    {
 +        return FALSE;
 +    }
 +
 +    d->bTric = TRICLINIC(pbc->box);
 +    if (d->bTric)
 +    {
 +        for (dd = 0; dd < DIM; ++dd)
 +        {
 +            svmul(1.0 / d->ncelldim[dd], pbc->box[dd], d->cellbox[dd]);
 +        }
 +        m_inv_ur0(d->cellbox, d->recipcell);
 +    }
 +    else
 +    {
 +        for (dd = 0; dd < DIM; ++dd)
 +        {
 +            d->cellbox[dd][dd] = pbc->box[dd][dd] / d->ncelldim[dd];
 +            d->recipcell[dd][dd] = 1 / d->cellbox[dd][dd];
 +        }
 +    }
 +    grid_init_cell_nblist(d, pbc);
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Maps a point into a grid cell.
 + *
 + * \param[in]  d    Grid information.
 + * \param[in]  x    Point to map.
 + * \param[out] cell Indices of the grid cell in which \p x lies.
 + *
 + * \p x should be in the triclinic unit cell.
 + */
 +static void
 +grid_map_onto(gmx_ana_nbsearch_t *d, const rvec x, ivec cell)
 +{
 +    int dd;
 +
 +    if (d->bTric)
 +    {
 +        rvec tx;
 +
 +        tmvmul_ur0(d->recipcell, x, tx);
 +        for (dd = 0; dd < DIM; ++dd)
 +        {
 +            cell[dd] = (int)tx[dd];
 +        }
 +    }
 +    else
 +    {
 +        for (dd = 0; dd < DIM; ++dd)
 +        {
 +            cell[dd] = (int)(x[dd] * d->recipcell[dd][dd]);
 +        }
 +    }
 +}
 +
 +/*! \brief
 + * Calculates linear index of a grid cell.
 + *
 + * \param[in]  d    Grid information.
 + * \param[in]  cell Cell indices.
 + * \returns    Linear index of \p cell.
 + */
 +static int
 +grid_index(gmx_ana_nbsearch_t *d, const ivec cell)
 +{
 +    return cell[XX] + cell[YY] * d->ncelldim[XX]
 +        + cell[ZZ] * d->ncelldim[XX] * d->ncelldim[YY];
 +}
 +
 +/*! \brief
 + * Clears all grid cells.
 + *
 + * \param[in,out] d    Grid information.
 + */
 +static void
 +grid_clear_cells(gmx_ana_nbsearch_t *d)
 +{
 +    int  ci;
 +
 +    for (ci = 0; ci < d->ncells; ++ci)
 +    {
 +        d->ncatoms[ci] = 0;
 +    }
 +}
 +
 +/*! \brief
 + * Adds an index into a grid cell.
 + *
 + * \param[in,out] d    Grid information.
 + * \param[in]     cell Cell into which \p i should be added.
 + * \param[in]     i    Index to add.
 + */
 +static void
 +grid_add_to_cell(gmx_ana_nbsearch_t *d, const ivec cell, int i)
 +{
 +    int ci = grid_index(d, cell);
 +
 +    if (d->ncatoms[ci] == d->catom_nalloc[ci])
 +    {
 +        d->catom_nalloc[ci] += 10;
 +        srenew(d->catom[ci], d->catom_nalloc[ci]);
 +    }
 +    d->catom[ci][d->ncatoms[ci]++] = i;
 +}
 +
 +/*!
 + * \param[in,out] d   Neighborhood search data structure.
 + * \param[in]     pbc PBC information for the frame.
 + * \param[in]     n   Number of reference positions for the frame.
 + * \param[in]     x   \p n reference positions for the frame.
 + *
 + * Initializes the data structure \p d such that it can be used to search
 + * for the neighbors of \p x.
 + */
 +void
 +gmx_ana_nbsearch_init(gmx_ana_nbsearch_t *d, t_pbc *pbc, int n, const rvec x[])
 +{
 +    d->pbc  = pbc;
 +    d->nref = n;
 +    if (!pbc)
 +    {
 +        d->bGrid = FALSE;
 +    }
 +    else if (d->bTryGrid)
 +    {
 +        d->bGrid = grid_set_box(d, pbc);
 +    }
 +    if (d->bGrid)
 +    {
 +        int  i;
 +
 +        if (!d->xref_alloc)
 +        {
 +            snew(d->xref_alloc, d->maxnref);
 +        }
 +        d->xref = d->xref_alloc;
 +        grid_clear_cells(d);
 +
 +        for (i = 0; i < n; ++i)
 +        {
 +            copy_rvec(x[i], d->xref[i]);
 +        }
 +        put_atoms_in_triclinic_unitcell(ecenterTRIC, pbc->box, n, d->xref);
 +        for (i = 0; i < n; ++i)
 +        {
 +            ivec refcell;
 +
 +            grid_map_onto(d, d->xref[i], refcell);
 +            grid_add_to_cell(d, refcell, i);
 +        }
 +    }
 +    else
 +    {
 +        // Won't be modified in this case, but when a grid is used,
 +        // xref _is_ modified, so it can't be const.
 +        d->xref = const_cast<rvec *>(x);
 +    }
 +    d->refid = NULL;
 +}
 +
 +/*!
 + * \param[in,out] d   Neighborhood search data structure.
 + * \param[in]     pbc PBC information for the frame.
 + * \param[in]     p   Reference positions for the frame.
 + *
 + * A convenience wrapper for gmx_ana_nbsearch_init().
 + */
 +void
 +gmx_ana_nbsearch_pos_init(gmx_ana_nbsearch_t *d, t_pbc *pbc, const gmx_ana_pos_t *p)
 +{
 +    gmx_ana_nbsearch_init(d, pbc, p->nr, p->x);
 +    d->refid = (p->nr < d->maxnref ? p->m.refid : NULL);
 +}
 +
 +/*!
 + * \param[in,out] d     Neighborhood search data structure.
 + * \param[in]     nexcl Number of reference positions to exclude from next
 + *      search.
 + * \param[in]     excl  Indices of reference positions to exclude.
 + *
 + * The set exclusions remain in effect until the next call of this function.
 + */
 +void
 +gmx_ana_nbsearch_set_excl(gmx_ana_nbsearch_t *d, int nexcl, int excl[])
 +{
 +
 +    d->nexcl = nexcl;
 +    d->excl = excl;
 +}
 +
 +/*! \brief
 + * Helper function to check whether a reference point should be excluded.
 + */
 +static gmx_bool
 +is_excluded(gmx_ana_nbsearch_t *d, int j)
 +{
 +    if (d->exclind < d->nexcl)
 +    {
 +        if (d->refid)
 +        {
 +            while (d->exclind < d->nexcl && d->refid[j] > d->excl[d->exclind])
 +            {
 +                ++d->exclind;
 +            }
 +            if (d->exclind < d->nexcl && d->refid[j] == d->excl[d->exclind])
 +            {
 +                ++d->exclind;
 +                return TRUE;
 +            }
 +        }
 +        else
 +        {
 +            while (d->bGrid && d->exclind < d->nexcl && d->excl[d->exclind] < j)
 +            {
 +                ++d->exclind;
 +            }
 +            if (d->excl[d->exclind] == j)
 +            {
 +                ++d->exclind;
 +                return TRUE;
 +            }
 +        }
 +    }
 +    return FALSE;
 +}
 +
 +/*! \brief
 + * Initializes a grid search to find reference positions neighboring \p x.
 + */
 +static void
 +grid_search_start(gmx_ana_nbsearch_t *d, const rvec x)
 +{
 +    copy_rvec(x, d->xtest);
 +    if (d->bGrid)
 +    {
 +        put_atoms_in_triclinic_unitcell(ecenterTRIC, d->pbc->box, 1, &d->xtest);
 +        grid_map_onto(d, d->xtest, d->testcell);
 +        d->prevnbi = 0;
 +        d->prevcai = -1;
 +    }
 +    else
 +    {
 +        d->previ = -1;
 +    }
 +    d->exclind = 0;
 +}
 +
 +/*! \brief
 + * Does a grid search.
 + */
 +static gmx_bool
 +grid_search(gmx_ana_nbsearch_t *d,
 +            gmx_bool (*action)(gmx_ana_nbsearch_t *d, int i, real r2))
 +{
 +    int  i;
 +    rvec dx;
 +    real r2;
 +
 +    if (d->bGrid)
 +    {
 +        int  nbi, ci, cai;
 +
 +        nbi = d->prevnbi;
 +        cai = d->prevcai + 1;
 +
 +        for ( ; nbi < d->ngridnb; ++nbi)
 +        {
 +            ivec cell;
 +
 +            ivec_add(d->testcell, d->gnboffs[nbi], cell);
 +            /* TODO: Support for 2D and screw PBC */
 +            cell[XX] = (cell[XX] + d->ncelldim[XX]) % d->ncelldim[XX];
 +            cell[YY] = (cell[YY] + d->ncelldim[YY]) % d->ncelldim[YY];
 +            cell[ZZ] = (cell[ZZ] + d->ncelldim[ZZ]) % d->ncelldim[ZZ];
 +            ci = grid_index(d, cell);
 +            /* TODO: Calculate the required PBC shift outside the inner loop */
 +            for ( ; cai < d->ncatoms[ci]; ++cai)
 +            {
 +                i = d->catom[ci][cai];
 +                if (is_excluded(d, i))
 +                {
 +                    continue;
 +                }
 +                pbc_dx_aiuc(d->pbc, d->xtest, d->xref[i], dx);
 +                r2 = norm2(dx);
 +                if (r2 <= d->cutoff2)
 +                {
 +                    if (action(d, i, r2))
 +                    {
 +                        d->prevnbi = nbi;
 +                        d->prevcai = cai;
 +                        d->previ   = i;
 +                        return TRUE;
 +                    }
 +                }
 +            }
 +            d->exclind = 0;
 +            cai = 0;
 +        }
 +    }
 +    else
 +    {
 +        i = d->previ + 1;
 +        for ( ; i < d->nref; ++i)
 +        {
 +            if (is_excluded(d, i))
 +            {
 +                continue;
 +            }
 +            if (d->pbc)
 +            {
 +                pbc_dx(d->pbc, d->xtest, d->xref[i], dx);
 +            }
 +            else
 +            {
 +                rvec_sub(d->xtest, d->xref[i], dx);
 +            }
 +            r2 = norm2(dx);
 +            if (r2 <= d->cutoff2)
 +            {
 +                if (action(d, i, r2))
 +                {
 +                    d->previ = i;
 +                    return TRUE;
 +                }
 +            }
 +        }
 +    }
 +    return FALSE;
 +}
 +
 +/*! \brief
 + * Helper function to use with grid_search() to find the next neighbor.
 + *
 + * Simply breaks the loop on the first found neighbor.
 + */
 +static gmx_bool
 +within_action(gmx_ana_nbsearch_t *d, int i, real r2)
 +{
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Helper function to use with grid_search() to find the minimum distance.
 + */
 +static gmx_bool
 +mindist_action(gmx_ana_nbsearch_t *d, int i, real r2)
 +{
 +    d->cutoff2 = r2;
 +    return FALSE;
 +}
 +
 +/*!
 + * \param[in] d   Neighborhood search data structure.
 + * \param[in] x   Test position.
 + * \returns   TRUE if \p x is within the cutoff of any reference position,
 + *   FALSE otherwise.
 + */
 +gmx_bool
 +gmx_ana_nbsearch_is_within(gmx_ana_nbsearch_t *d, const rvec x)
 +{
 +    grid_search_start(d, x);
 +    return grid_search(d, &within_action);
 +}
 +
 +/*!
 + * \param[in] d   Neighborhood search data structure.
 + * \param[in] p   Test positions.
 + * \param[in] i   Use the i'th position in \p p for testing.
 + * \returns   TRUE if the test position is within the cutoff of any reference
 + *   position, FALSE otherwise.
 + */
 +gmx_bool
 +gmx_ana_nbsearch_pos_is_within(gmx_ana_nbsearch_t *d, const gmx_ana_pos_t *p, int i)
 +{
 +    return gmx_ana_nbsearch_is_within(d, p->x[i]);
 +}
 +
 +/*!
 + * \param[in] d   Neighborhood search data structure.
 + * \param[in] x   Test position.
 + * \returns   The distance to the nearest reference position, or the cutoff
 + *   value if there are no reference positions within the cutoff.
 + */
 +real
 +gmx_ana_nbsearch_mindist(gmx_ana_nbsearch_t *d, const rvec x)
 +{
 +    real mind;
 +
 +    grid_search_start(d, x);
 +    grid_search(d, &mindist_action);
 +    mind = sqrt(d->cutoff2);
 +    d->cutoff2 = sqr(d->cutoff);
 +    return mind;
 +}
 +
 +/*!
 + * \param[in] d   Neighborhood search data structure.
 + * \param[in] p   Test positions.
 + * \param[in] i   Use the i'th position in \p p for testing.
 + * \returns   The distance to the nearest reference position, or the cutoff
 + *   value if there are no reference positions within the cutoff.
 + */
 +real
 +gmx_ana_nbsearch_pos_mindist(gmx_ana_nbsearch_t *d, const gmx_ana_pos_t *p, int i)
 +{
 +    return gmx_ana_nbsearch_mindist(d, p->x[i]);
 +}
 +
 +/*!
 + * \param[in]  d   Neighborhood search data structure.
 + * \param[in]  x   Test positions.
 + * \param[out] jp  Index of the reference position in the first pair.
 + * \returns    TRUE if there are positions within the cutoff.
 + */
 +gmx_bool
 +gmx_ana_nbsearch_first_within(gmx_ana_nbsearch_t *d, const rvec x, int *jp)
 +{
 +    grid_search_start(d, x);
 +    return gmx_ana_nbsearch_next_within(d, jp);
 +}
 +
 +/*!
 + * \param[in]  d   Neighborhood search data structure.
 + * \param[in]  p   Test positions.
 + * \param[in]  i   Use the i'th position in \p p.
 + * \param[out] jp  Index of the reference position in the first pair.
 + * \returns    TRUE if there are positions within the cutoff.
 + */
 +gmx_bool
 +gmx_ana_nbsearch_pos_first_within(gmx_ana_nbsearch_t *d, const gmx_ana_pos_t *p,
 +                                  int i, int *jp)
 +{
 +    return gmx_ana_nbsearch_first_within(d, p->x[i], jp);
 +}
 +
 +/*!
 + * \param[in]  d   Neighborhood search data structure.
 + * \param[out] jp  Index of the test position in the next pair.
 + * \returns    TRUE if there are positions within the cutoff.
 + */
 +gmx_bool
 +gmx_ana_nbsearch_next_within(gmx_ana_nbsearch_t *d, int *jp)
 +{
 +    if (grid_search(d, &within_action))
 +    {
 +        *jp = d->previ;
 +        return TRUE;
 +    }
 +    *jp = -1;
 +    return FALSE;
 +}
diff --cc src/gromacs/selection/params.cpp
index 2f660887a1,0000000000..e7612979b9
mode 100644,000000..100644
--- a/src/gromacs/selection/params.cpp
+++ b/src/gromacs/selection/params.cpp
@@@ -1,1285 -1,0 +1,1289 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2009, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + */
 +/*! \internal \file
 + * \brief
 + * Implements functions in selparam.h.
 + *
 + * \author Teemu Murtola <teemu.murtola@cbr.su.se>
 + * \ingroup module_selection
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <smalloc.h>
 +#include <string2.h>
 +#include <vec.h>
 +
 +#include "gromacs/fatalerror/errorcodes.h"
 +#include "gromacs/fatalerror/messagestringcollector.h"
 +#include "gromacs/selection/position.h"
 +#include "gromacs/selection/selmethod.h"
 +#include "gromacs/selection/selparam.h"
 +#include "gromacs/utility/format.h"
 +
 +#include "parsetree.h"
 +#include "position.h"
 +#include "scanner.h"
 +#include "selelem.h"
 +
 +template <typename T>
 +static T min(T a, T b)
 +{
 +    return (a < b) ? a : b;
 +}
 +
 +template <typename T>
 +static T max(T a, T b)
 +{
 +    return (a > b) ? a : b;
 +}
 +
 +/*!
 + * \param[in] name   Name of the parameter to search.
 + * \param[in] nparam Number of parameters in the \p param array.
 + * \param[in] param  Parameter array to search.
 + * \returns   Pointer to the parameter in the \p param
 + *   or NULL if no parameter with name \p name was found.
 + *
 + * The comparison is case-sensitive.
 + */
 +gmx_ana_selparam_t *
 +gmx_ana_selparam_find(const char *name, int nparam, gmx_ana_selparam_t *param)
 +{
 +    int                i;
 +
 +    if (nparam == 0)
 +    {
 +        return NULL;
 +    }
 +    /* Find the first non-null parameter */
 +    i = 0;
 +    while (i < nparam && param[i].name == NULL)
 +    {
 +        ++i;
 +    }
 +    /* Process the special case of a NULL parameter */
 +    if (name == NULL)
 +    {
 +        return (i == 0) ? NULL : &param[i-1];
 +    }
 +    for ( ; i < nparam; ++i)
 +    {
 +        if (!strcmp(param[i].name, name))
 +        {
 +            return &param[i];
 +        }
 +        /* Check for 'no' prefix on gmx_boolean parameters */
 +        if (param[i].val.type == NO_VALUE
 +            && strlen(name) > 2 && name[0] == 'n' && name[1] == 'o'
 +            && !strcmp(param[i].name, name+2))
 +        {
 +            return &param[i];
 +        }
 +    }
 +    return NULL;
 +}
 +
 +/*! \brief
 + * Does a type conversion on a \c t_selexpr_value.
 + *
 + * \param[in,out] value    Value to convert.
 + * \param[in]     type     Type to convert to.
 + * \param[in]     scanner  Scanner data structure.
 + * \returns       0 on success, a non-zero value on error.
 + */
 +static int
 +convert_value(t_selexpr_value *value, e_selvalue_t type, void *scanner)
 +{
 +    if (value->type == type || type == NO_VALUE)
 +    {
 +        return 0;
 +    }
 +    if (value->bExpr)
 +    {
 +        /* Conversion from atom selection to position using default
 +         * reference positions. */
 +        if (value->type == GROUP_VALUE && type == POS_VALUE)
 +        {
 +            value->u.expr =
 +                _gmx_sel_init_position(value->u.expr, NULL, scanner);
 +            if (value->u.expr == NULL)
 +            {
 +                return -1;
 +            }
 +            value->type = type;
 +            return 0;
 +        }
 +        return -1;
 +    }
 +    else
 +    {
 +        /* Integers to floating point are easy */
 +        if (value->type == INT_VALUE && type == REAL_VALUE)
 +        {
-             value->u.r.r1 = (real)value->u.i.i1;
-             value->u.r.r2 = (real)value->u.i.i2;
++            real r1 = (real)value->u.i.i1;
++            real r2 = (real)value->u.i.i2;
++            value->u.r.r1 = r1;
++            value->u.r.r2 = r2;
 +            value->type = type;
 +            return 0;
 +        }
 +        /* Reals that are integer-valued can also be converted */
 +        if (value->type == REAL_VALUE && type == INT_VALUE
 +            && gmx_within_tol(value->u.r.r1, (int)value->u.r.r1, GMX_REAL_EPS)
 +            && gmx_within_tol(value->u.r.r2, (int)value->u.r.r2, GMX_REAL_EPS))
 +        {
-             value->u.i.i1 = (int)value->u.r.r1;
-             value->u.i.i2 = (int)value->u.r.r2;
++            int i1 = (int)value->u.r.r1;
++            int i2 = (int)value->u.r.r2;
++            value->u.i.i1 = i1;
++            value->u.i.i2 = i2;
 +            value->type = type;
 +            return 0;
 +        }
 +    }
 +    return -1;
 +}
 +
 +/*! \brief
 + * Does a type conversion on a list of values.
 + *
 + * \param[in,out] values   Values to convert.
 + * \param[in]     type     Type to convert to.
 + * \param[in]     scanner  Scanner data structure.
 + * \returns       0 on success, a non-zero value on error.
 + */
 +static int
 +convert_values(t_selexpr_value *values, e_selvalue_t type, void *scanner)
 +{
 +    t_selexpr_value *value;
 +    int              rc, rc1;
 +
 +    rc = 0;
 +    value = values;
 +    while (value)
 +    {
 +        rc1 = convert_value(value, type, scanner);
 +        if (rc1 != 0 && rc == 0)
 +        {
 +            rc = rc1;
 +        }
 +        value = value->next;
 +    }
 +    /* FIXME: More informative error messages */
 +    return rc;
 +}
 +
 +/*! \brief
 + * Adds a child element for a parameter, keeping the parameter order.
 + *
 + * \param[in,out] root  Root element to which the child is added.
 + * \param[in]     child Child to add.
 + * \param[in]     param Parameter for which this child is a value.
 + *
 + * Puts \p child in the child list of \p root such that the list remains
 + * in the same order as the corresponding parameters.
 + */
 +static void
 +place_child(t_selelem *root, t_selelem *child, gmx_ana_selparam_t *param)
 +{
 +    gmx_ana_selparam_t *ps;
 +    int                 n;
 +
 +    ps = root->u.expr.method->param;
 +    n  = param - ps;
 +    /* Put the child element in the correct place */
 +    if (!root->child || n < root->child->u.param - ps)
 +    {
 +        child->next = root->child;
 +        root->child = child;
 +    }
 +    else
 +    {
 +        t_selelem *prev;
 +
 +        prev = root->child;
 +        while (prev->next && prev->next->u.param - ps >= n)
 +        {
 +            prev = prev->next;
 +        }
 +        child->next = prev->next;
 +        prev->next  = child;
 +    }
 +}
 +
 +/*! \brief
 + * Comparison function for sorting integer ranges.
 + * 
 + * \param[in] a Pointer to the first range.
 + * \param[in] b Pointer to the second range.
 + * \returns   -1, 0, or 1 depending on the relative order of \p a and \p b.
 + *
 + * The ranges are primarily sorted based on their starting point, and
 + * secondarily based on length (longer ranges come first).
 + */
 +static int
 +cmp_int_range(const void *a, const void *b)
 +{
 +    if (((int *)a)[0] < ((int *)b)[0])
 +    {
 +        return -1;
 +    }
 +    if (((int *)a)[0] > ((int *)b)[0])
 +    {
 +        return 1;
 +    }
 +    if (((int *)a)[1] > ((int *)b)[1])
 +    {
 +        return -1;
 +    }
 +    return 0;
 +}
 +
 +/*! \brief
 + * Comparison function for sorting real ranges.
 + *
 + * \param[in] a Pointer to the first range.
 + * \param[in] b Pointer to the second range.
 + * \returns   -1, 0, or 1 depending on the relative order of \p a and \p b.
 + *
 + * The ranges are primarily sorted based on their starting point, and
 + * secondarily based on length (longer ranges come first).
 + */
 +static int
 +cmp_real_range(const void *a, const void *b)
 +{
 +    if (((real *)a)[0] < ((real *)b)[0])
 +    {
 +        return -1;
 +    }
 +    if (((real *)a)[0] > ((real *)b)[0])
 +    {
 +        return 1;
 +    }
 +    if (((real *)a)[1] > ((real *)b)[1])
 +    {
 +        return -1;
 +    }
 +    return 0;
 +}
 +
 +/*! \brief
 + * Parses the values for a parameter that takes integer or real ranges.
 + * 
 + * \param[in] nval   Number of values in \p values.
 + * \param[in] values Pointer to the list of values.
 + * \param     param  Parameter to parse.
 + * \param[in] scanner Scanner data structure.
 + * \returns   TRUE if the values were parsed successfully, FALSE otherwise.
 + */
 +static gmx_bool
 +parse_values_range(int nval, t_selexpr_value *values, gmx_ana_selparam_t *param,
 +                   void *scanner)
 +{
 +    t_selexpr_value    *value;
 +    int                *idata;
 +    real               *rdata;
 +    int                 i, j, n;
 +
 +    param->flags &= ~SPAR_DYNAMIC;
 +    if (param->val.type != INT_VALUE && param->val.type != REAL_VALUE)
 +    {
 +        GMX_ERROR_NORET(gmx::eeInternalError, "Invalid range parameter type");
 +        return FALSE;
 +    }
 +    idata = NULL;
 +    rdata = NULL;
 +    if (param->val.type == INT_VALUE)
 +    {
 +        snew(idata, nval*2);
 +    }
 +    else
 +    {
 +        snew(rdata, nval*2);
 +    }
 +    value = values;
 +    i = 0;
 +    while (value)
 +    {
 +        if (value->bExpr)
 +        {
 +            _gmx_selparser_error(scanner, "expressions not supported within range parameters");
 +            return FALSE;
 +        }
 +        if (value->type != param->val.type)
 +        {
 +            GMX_ERROR_NORET(gmx::eeInternalError, "Invalid range value type");
 +            return FALSE;
 +        }
 +        if (param->val.type == INT_VALUE)
 +        {
 +            /* Make sure the input range is in increasing order */
 +            if (value->u.i.i1 > value->u.i.i2)
 +            {
 +                int tmp       = value->u.i.i1;
 +                value->u.i.i1 = value->u.i.i2;
 +                value->u.i.i2 = tmp;
 +            }
 +            /* Check if the new range overlaps or extends the previous one */
 +            if (i > 0 && value->u.i.i1 <= idata[i-1]+1 && value->u.i.i2 >= idata[i-2]-1)
 +            {
 +                idata[i-2] = min(idata[i-2], value->u.i.i1);
 +                idata[i-1] = max(idata[i-1], value->u.i.i2);
 +            }
 +            else
 +            {
 +                idata[i++] = value->u.i.i1;
 +                idata[i++] = value->u.i.i2;
 +            }
 +        }
 +        else
 +        {
 +            /* Make sure the input range is in increasing order */
 +            if (value->u.r.r1 > value->u.r.r2)
 +            {
 +                real tmp      = value->u.r.r1;
 +                value->u.r.r1 = value->u.r.r2;
 +                value->u.r.r2 = tmp;
 +            }
 +            /* Check if the new range overlaps or extends the previous one */
 +            if (i > 0 && value->u.r.r1 <= rdata[i-1] && value->u.r.r2 >= rdata[i-2])
 +            {
 +                rdata[i-2] = min(rdata[i-2], value->u.r.r1);
 +                rdata[i-1] = max(rdata[i-1], value->u.r.r2);
 +            }
 +            else
 +            {
 +                rdata[i++] = value->u.r.r1;
 +                rdata[i++] = value->u.r.r2;
 +            }
 +        }
 +        value = value->next;
 +    }
 +    n = i/2;
 +    /* Sort the ranges and merge consequent ones */
 +    if (param->val.type == INT_VALUE)
 +    {
 +        qsort(idata, n, 2*sizeof(int), &cmp_int_range);
 +        for (i = j = 2; i < 2*n; i += 2)
 +        {
 +            if (idata[j-1]+1 >= idata[i])
 +            {
 +                if (idata[i+1] > idata[j-1])
 +                {
 +                    idata[j-1] = idata[i+1];
 +                }
 +            }
 +            else
 +            {
 +                idata[j]   = idata[i];
 +                idata[j+1] = idata[i+1];
 +                j += 2;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        qsort(rdata, n, 2*sizeof(real), &cmp_real_range);
 +        for (i = j = 2; i < 2*n; i += 2)
 +        {
 +            if (rdata[j-1]+1 >= rdata[i])
 +            {
 +                if (rdata[i+1] > rdata[j-1])
 +                {
 +                    rdata[j-1] = rdata[i+1];
 +                }
 +            }
 +            else
 +            {
 +                rdata[j]   = rdata[i];
 +                rdata[j+1] = rdata[i+1];
 +                j += 2;
 +            }
 +        }
 +    }
 +    n = j/2;
 +    /* Store the values */
 +    if (param->flags & SPAR_VARNUM)
 +    {
 +        param->val.nr  = n;
 +        if (param->val.type == INT_VALUE)
 +        {
 +            srenew(idata, j);
 +            _gmx_selvalue_setstore_alloc(&param->val, idata, j);
 +        }
 +        else
 +        {
 +            srenew(rdata, j);
 +            _gmx_selvalue_setstore_alloc(&param->val, rdata, j);
 +        }
 +    }
 +    else
 +    {
 +        if (n != param->val.nr)
 +        {
 +            _gmx_selparser_error(scanner, "the value should consist of exactly one range");
 +            sfree(idata);
 +            sfree(rdata);
 +            return FALSE;
 +        }
 +        if (param->val.type == INT_VALUE)
 +        {
 +            memcpy(param->val.u.i, idata, 2*n*sizeof(int));
 +            sfree(idata);
 +        }
 +        else
 +        {
 +            memcpy(param->val.u.r, rdata, 2*n*sizeof(real));
 +            sfree(rdata);
 +        }
 +    }
 +    if (param->nvalptr)
 +    {
 +        *param->nvalptr = param->val.nr;
 +    }
 +    param->nvalptr = NULL;
 +
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Parses the values for a parameter that takes a variable number of values.
 + * 
 + * \param[in] nval   Number of values in \p values.
 + * \param[in] values Pointer to the list of values.
 + * \param     param  Parameter to parse.
 + * \param     root   Selection element to which child expressions are added.
 + * \param[in] scanner Scanner data structure.
 + * \returns   TRUE if the values were parsed successfully, FALSE otherwise.
 + *
 + * For integer ranges, the sequence of numbers from the first to second value
 + * is stored, each as a separate value.
 + */
 +static gmx_bool
 +parse_values_varnum(int nval, t_selexpr_value *values,
 +                    gmx_ana_selparam_t *param, t_selelem *root, void *scanner)
 +{
 +    t_selexpr_value    *value;
 +    int                 i, j;
 +
 +    param->flags &= ~SPAR_DYNAMIC;
 +    /* Update nval if there are integer ranges. */
 +    if (param->val.type == INT_VALUE)
 +    {
 +        value = values;
 +        while (value)
 +        {
 +            if (value->type == INT_VALUE && !value->bExpr)
 +            {
 +                nval += abs(value->u.i.i2 - value->u.i.i1);
 +            }
 +            value = value->next;
 +        }
 +    }
 +
 +    /* Check that the value type is actually implemented */
 +    if (param->val.type != INT_VALUE && param->val.type != REAL_VALUE
 +        && param->val.type != STR_VALUE && param->val.type != POS_VALUE)
 +    {
 +        GMX_ERROR_NORET(gmx::eeInternalError,
 +                        "Variable-count value type not implemented");
 +        return FALSE;
 +    }
 +
 +    /* Reserve appropriate amount of memory */
 +    if (param->val.type == POS_VALUE)
 +    {
 +        gmx_ana_pos_reserve(param->val.u.p, nval, 0);
 +        gmx_ana_pos_set_nr(param->val.u.p, nval);
 +        gmx_ana_indexmap_init(&param->val.u.p->m, NULL, NULL, INDEX_UNKNOWN);
 +    }
 +    else
 +    {
 +        _gmx_selvalue_reserve(&param->val, nval);
 +    }
 +
 +    value = values;
 +    i     = 0;
 +    while (value)
 +    {
 +        if (value->bExpr)
 +        {
 +            _gmx_selparser_error(scanner, "expressions not supported within value lists");
 +            return FALSE;
 +        }
 +        if (value->type != param->val.type)
 +        {
 +            GMX_ERROR_NORET(gmx::eeInternalError, "Invalid value type");
 +            return FALSE;
 +        }
 +        switch (param->val.type)
 +        {
 +            case INT_VALUE:
 +                if (value->u.i.i1 <= value->u.i.i2)
 +                {
 +                    for (j = value->u.i.i1; j <= value->u.i.i2; ++j)
 +                    {
 +                        param->val.u.i[i++] = j;
 +                    }
 +                }
 +                else
 +                {
 +                    for (j = value->u.i.i1; j >= value->u.i.i2; --j)
 +                    {
 +                        param->val.u.i[i++] = j;
 +                    }
 +                }
 +                break;
 +            case REAL_VALUE:
 +                if (value->u.r.r1 != value->u.r.r2)
 +                {
 +                    _gmx_selparser_error(scanner, "real ranges not supported");
 +                    return FALSE;
 +                }
 +                param->val.u.r[i++] = value->u.r.r1;
 +                break;
 +            case STR_VALUE:  param->val.u.s[i++] = strdup(value->u.s); break;
 +            case POS_VALUE:  copy_rvec(value->u.x, param->val.u.p->x[i++]); break;
 +            default: /* Should not be reached */
 +                GMX_ERROR_NORET(gmx::eeInternalError, "Invalid value type");
 +                return FALSE;
 +        }
 +        value = value->next;
 +    }
 +    param->val.nr = i;
 +    if (param->nvalptr)
 +    {
 +        *param->nvalptr = param->val.nr;
 +    }
 +    param->nvalptr = NULL;
 +    /* Create a dummy child element to store the string values.
 +     * This element is responsible for freeing the values, but carries no
 +     * other function. */
 +    if (param->val.type == STR_VALUE)
 +    {
 +        t_selelem *child;
 +
 +        child = _gmx_selelem_create(SEL_CONST);
 +        _gmx_selelem_set_vtype(child, STR_VALUE);
 +        child->name = param->name;
 +        child->flags &= ~SEL_ALLOCVAL;
 +        child->flags |= SEL_FLAGSSET | SEL_VARNUMVAL | SEL_ALLOCDATA;
 +        child->v.nr = param->val.nr;
 +        _gmx_selvalue_setstore(&child->v, param->val.u.s);
 +        /* Because the child is not group-valued, the u union is not used
 +         * for anything, so we can abuse it by storing the parameter value
 +         * as place_child() expects, but this is really ugly... */
 +        child->u.param = param;
 +        place_child(root, child, param);
 +    }
 +
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Adds a new subexpression reference to a selection element.
 + *
 + * \param[in,out] root  Root element to which the subexpression is added.
 + * \param[in]     param Parameter for which this expression is a value.
 + * \param[in]     expr  Expression to add.
 + * \param[in]     scanner Scanner data structure.
 + * \returns       The created child element.
 + *
 + * Creates a new \ref SEL_SUBEXPRREF element and adds it into the child
 + * list of \p root.
 + * If \p expr is already a \ref SEL_SUBEXPRREF, it is used as it is.
 + * \ref SEL_ALLOCVAL is cleared for the returned element.
 + */
 +static t_selelem *
 +add_child(t_selelem *root, gmx_ana_selparam_t *param, t_selelem *expr,
 +          void *scanner)
 +{
 +    t_selelem          *child;
 +    int                 rc;
 +
 +    if (root->type != SEL_EXPRESSION && root->type != SEL_MODIFIER)
 +    {
 +        GMX_ERROR_NORET(gmx::eeInternalError,
 +                        "Unsupported root element for selection parameter parser");
 +        return NULL;
 +    }
 +    /* Create a subexpression reference element if necessary */
 +    if (expr->type == SEL_SUBEXPRREF)
 +    {
 +        child = expr;
 +    }
 +    else
 +    {
 +        child = _gmx_selelem_create(SEL_SUBEXPRREF);
 +        if (!child)
 +        {
 +            return NULL;
 +        }
 +        _gmx_selelem_set_vtype(child, expr->v.type);
 +        child->child  = expr;
 +    }
 +    /* Setup the child element */
 +    child->flags &= ~SEL_ALLOCVAL;
 +    child->u.param = param;
 +    if (child->v.type != param->val.type)
 +    {
 +        _gmx_selparser_error(scanner, "invalid expression value");
 +        goto on_error;
 +    }
 +    rc = _gmx_selelem_update_flags(child, scanner);
 +    if (rc != 0)
 +    {
 +        goto on_error;
 +    }
 +    if ((child->flags & SEL_DYNAMIC) && !(param->flags & SPAR_DYNAMIC))
 +    {
 +        _gmx_selparser_error(scanner, "dynamic values not supported");
 +        goto on_error;
 +    }
 +    if (!(child->flags & SEL_DYNAMIC))
 +    {
 +        param->flags &= ~SPAR_DYNAMIC;
 +    }
 +    /* Put the child element in the correct place */
 +    place_child(root, child, param);
 +    return child;
 +
 +on_error:
 +    if (child != expr)
 +    {
 +        _gmx_selelem_free(child);
 +    }
 +    return NULL;
 +}
 +
 +/*! \brief
 + * Parses an expression value for a parameter that takes a variable number of values.
 + * 
 + * \param[in] nval   Number of values in \p values.
 + * \param[in] values Pointer to the list of values.
 + * \param     param  Parameter to parse.
 + * \param     root   Selection element to which child expressions are added.
 + * \param[in] scanner Scanner data structure.
 + * \returns   TRUE if the values were parsed successfully, FALSE otherwise.
 + */
 +static gmx_bool
 +parse_values_varnum_expr(int nval, t_selexpr_value *values,
 +                         gmx_ana_selparam_t *param, t_selelem *root,
 +                         void *scanner)
 +{
 +    t_selexpr_value    *value;
 +    t_selelem          *child;
 +    t_selelem          *expr;
 +
 +    if (nval != 1 || !values->bExpr)
 +    {
 +        GMX_ERROR_NORET(gmx::eeInternalError, "Invalid expression value");
 +        return FALSE;
 +    }
 +
 +    value = values;
 +    child = add_child(root, param, value->u.expr, scanner);
 +    value->u.expr = NULL;
 +    if (!child)
 +    {
 +        return FALSE;
 +    }
 +
 +    /* Process single-valued expressions */
 +    /* TODO: We should also handle SEL_SINGLEVAL expressions here */
 +    if (child->v.type == POS_VALUE || child->v.type == GROUP_VALUE)
 +    {
 +        /* Set the value storage */
 +        _gmx_selvalue_setstore(&child->v, param->val.u.ptr);
 +        param->val.nr = 1;
 +        if (param->nvalptr)
 +        {
 +            *param->nvalptr = param->val.nr;
 +        }
 +        param->nvalptr = NULL;
 +        return TRUE;
 +    }
 +
 +    if (!(child->flags & SEL_VARNUMVAL))
 +    {
 +        _gmx_selparser_error(scanner, "invalid expression value");
 +        return FALSE;
 +    }
 +
 +    child->flags   |= SEL_ALLOCVAL;
 +    param->val.nr   = -1;
 +    *param->nvalptr = param->val.nr;
 +    /* Rest of the initialization is done during compilation in
 +     * init_method(). */
 +
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Initializes the storage of an expression value.
 + *
 + * \param[in,out] sel   Selection element that evaluates the value.
 + * \param[in]     param Parameter to receive the value.
 + * \param[in]     i     The value of \p sel evaluates the value \p i for
 + *   \p param.
 + * \param[in]     scanner Scanner data structure.
 + *
 + * Initializes the data pointer of \p sel such that the result is stored
 + * as the value \p i of \p param.
 + * This function is used internally by parse_values_std().
 + */
 +static gmx_bool
 +set_expr_value_store(t_selelem *sel, gmx_ana_selparam_t *param, int i,
 +                     void *scanner)
 +{
 +    if (sel->v.type != GROUP_VALUE && !(sel->flags & SEL_SINGLEVAL))
 +    {
 +        _gmx_selparser_error(scanner, "invalid expression value");
 +        return FALSE;
 +    }
 +    switch (sel->v.type)
 +    {
 +        case INT_VALUE:   sel->v.u.i = &param->val.u.i[i]; break;
 +        case REAL_VALUE:  sel->v.u.r = &param->val.u.r[i]; break;
 +        case STR_VALUE:   sel->v.u.s = &param->val.u.s[i]; break;
 +        case POS_VALUE:   sel->v.u.p = &param->val.u.p[i]; break;
 +        case GROUP_VALUE: sel->v.u.g = &param->val.u.g[i]; break;
 +        default: /* Error */
 +            GMX_ERROR_NORET(gmx::eeInternalError, "Invalid value type");
 +            return FALSE;
 +    }
 +    sel->v.nr = 1;
 +    sel->v.nalloc = -1;
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Parses the values for a parameter that takes a constant number of values.
 + * 
 + * \param[in] nval   Number of values in \p values.
 + * \param[in] values Pointer to the list of values.
 + * \param     param  Parameter to parse.
 + * \param     root   Selection element to which child expressions are added.
 + * \param[in] scanner Scanner data structure.
 + * \returns   TRUE if the values were parsed successfully, FALSE otherwise.
 + *
 + * For integer ranges, the sequence of numbers from the first to second value
 + * is stored, each as a separate value.
 + */
 +static gmx_bool
 +parse_values_std(int nval, t_selexpr_value *values, gmx_ana_selparam_t *param,
 +                 t_selelem *root, void *scanner)
 +{
 +    t_selexpr_value   *value;
 +    t_selelem         *child;
 +    int                i, j;
 +    gmx_bool               bDynamic;
 +
 +    /* Handle atom-valued parameters */
 +    if (param->flags & SPAR_ATOMVAL)
 +    {
 +        if (nval > 1)
 +        {
 +            _gmx_selparser_error(scanner, "more than one value not supported");
 +            return FALSE;
 +        }
 +        value = values;
 +        if (value->bExpr)
 +        {
 +            child = add_child(root, param, value->u.expr, scanner);
 +            value->u.expr = NULL;
 +            if (!child)
 +            {
 +                return FALSE;
 +            }
 +            child->flags |= SEL_ALLOCVAL;
 +            if (child->v.type != GROUP_VALUE && (child->flags & SEL_ATOMVAL))
 +            {
 +                /* Rest of the initialization is done during compilation in
 +                 * init_method(). */
 +                /* TODO: Positions are not correctly handled */
 +                param->val.nr = -1;
 +                if (param->nvalptr)
 +                {
 +                    *param->nvalptr = -1;
 +                }
 +                return TRUE;
 +            }
 +            param->flags  &= ~SPAR_ATOMVAL;
 +            param->val.nr  = 1;
 +            if (param->nvalptr)
 +            {
 +                *param->nvalptr = 1;
 +            }
 +            param->nvalptr = NULL;
 +            if (param->val.type == INT_VALUE || param->val.type == REAL_VALUE
 +                || param->val.type == STR_VALUE)
 +            {
 +                _gmx_selvalue_reserve(&param->val, 1);
 +            }
 +            return set_expr_value_store(child, param, 0, scanner);
 +        }
 +        /* If we reach here, proceed with normal parameter handling */
 +        param->val.nr = 1;
 +        if (param->val.type == INT_VALUE || param->val.type == REAL_VALUE
 +            || param->val.type == STR_VALUE)
 +        {
 +            _gmx_selvalue_reserve(&param->val, 1);
 +        }
 +        param->flags &= ~SPAR_ATOMVAL;
 +        param->flags &= ~SPAR_DYNAMIC;
 +    }
 +
 +    value = values;
 +    i = 0;
 +    bDynamic = FALSE;
 +    while (value && i < param->val.nr)
 +    {
 +        if (value->type != param->val.type)
 +        {
 +            _gmx_selparser_error(scanner, "incorrect value skipped");
 +            value = value->next;
 +            continue;
 +        }
 +        if (value->bExpr)
 +        {
 +            child = add_child(root, param, value->u.expr, scanner);
 +            /* Clear the expression from the value once it is stored */
 +            value->u.expr = NULL;
 +            /* Check that the expression is valid */
 +            if (!child)
 +            {
 +                return FALSE;
 +            }
 +            if (!set_expr_value_store(child, param, i, scanner))
 +            {
 +                return FALSE;
 +            }
 +            if (child->flags & SEL_DYNAMIC)
 +            {
 +                bDynamic = TRUE;
 +            }
 +        }
 +        else
 +        {
 +            /* Value is not an expression */
 +            switch (value->type)
 +            {
 +                case INT_VALUE:
 +                    if (value->u.i.i1 <= value->u.i.i2)
 +                    {
 +                        for (j = value->u.i.i1; j <= value->u.i.i2 && i < param->val.nr; ++j)
 +                        {
 +                            param->val.u.i[i++] = j;
 +                        }
 +                        if (j != value->u.i.i2 + 1)
 +                        {
 +                            _gmx_selparser_error(scanner, "extra values skipped");
 +                        }
 +                    }
 +                    else
 +                    {
 +                        for (j = value->u.i.i1; j >= value->u.i.i2 && i < param->val.nr; --j)
 +                        {
 +                            param->val.u.i[i++] = j;
 +                        }
 +                        if (j != value->u.i.i2 - 1)
 +                        {
 +                            _gmx_selparser_error(scanner, "extra values skipped");
 +                        }
 +                    }
 +                    --i;
 +                    break;
 +                case REAL_VALUE:
 +                    if (value->u.r.r1 != value->u.r.r2)
 +                    {
 +                        _gmx_selparser_error(scanner, "real ranges not supported");
 +                        return FALSE;
 +                    }
 +                    param->val.u.r[i] = value->u.r.r1;
 +                    break;
 +                case STR_VALUE:
 +                    param->val.u.s[i] = strdup(value->u.s);
 +                    break;
 +                case POS_VALUE:
 +                    gmx_ana_pos_init_const(&param->val.u.p[i], value->u.x);
 +                    break;
 +                case NO_VALUE:
 +                case GROUP_VALUE:
 +                    GMX_ERROR_NORET(gmx::eeInternalError,
 +                                    "Invalid non-expression value");
 +                    return FALSE;
 +            }
 +        }
 +        ++i;
 +        value = value->next;
 +    }
 +    if (value != NULL)
 +    {
 +        _gmx_selparser_error(scanner, "extra values'");
 +        return FALSE;
 +    }
 +    if (i < param->val.nr)
 +    {
 +        _gmx_selparser_error(scanner, "not enough values");
 +        return FALSE;
 +    }
 +    if (!bDynamic)
 +    {
 +        param->flags &= ~SPAR_DYNAMIC;
 +    }
 +    if (param->nvalptr)
 +    {
 +        *param->nvalptr = param->val.nr;
 +    }
 +    param->nvalptr = NULL;
 +
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Parses the values for a boolean parameter.
 + *
 + * \param[in] name   Name by which the parameter was given.
 + * \param[in] nval   Number of values in \p values.
 + * \param[in] values Pointer to the list of values.
 + * \param     param  Parameter to parse.
 + * \param[in] scanner Scanner data structure.
 + * \returns   TRUE if the values were parsed successfully, FALSE otherwise.
 + */
 +static gmx_bool
 +parse_values_bool(const char *name, int nval, t_selexpr_value *values,
 +                  gmx_ana_selparam_t *param, void *scanner)
 +{
 +    gmx_bool bSetNo;
 +    int  len;
 +
 +    if (param->val.type != NO_VALUE)
 +    {
 +        GMX_ERROR_NORET(gmx::eeInternalError, "Invalid boolean parameter");
 +        return FALSE;
 +    }
 +    if (nval > 1 || (values && values->type != INT_VALUE))
 +    {
 +        _gmx_selparser_error(scanner, "parameter takes only a yes/no/on/off/0/1 value");
 +        return FALSE;
 +    }
 +
 +    bSetNo = FALSE;
 +    /* Check if the parameter name is given with a 'no' prefix */
 +    len = strlen(name);
 +    if (len > 2 && name[0] == 'n' && name[1] == 'o'
 +        && strncmp(name+2, param->name, len-2) == 0)
 +    {
 +        bSetNo = TRUE;
 +    }
 +    if (bSetNo && nval > 0)
 +    {
 +        _gmx_selparser_error(scanner, "parameter 'no%s' should not have a value",
 +                             param->name);
 +        return FALSE;
 +    }
 +    if (values && values->u.i.i1 == 0)
 +    {
 +        bSetNo = TRUE;
 +    }
 +
 +    *param->val.u.b = bSetNo ? FALSE : TRUE;
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Parses the values for an enumeration parameter.
 + *
 + * \param[in] nval   Number of values in \p values.
 + * \param[in] values Pointer to the list of values.
 + * \param     param  Parameter to parse.
 + * \param[in] scanner Scanner data structure.
 + * \returns   TRUE if the values were parsed successfully, FALSE otherwise.
 + */
 +static gmx_bool
 +parse_values_enum(int nval, t_selexpr_value *values, gmx_ana_selparam_t *param,
 +                  void *scanner)
 +{
 +    int  i, len, match;
 +
 +    if (nval != 1)
 +    {
 +        _gmx_selparser_error(scanner, "a single value is required");
 +        return FALSE;
 +    }
 +    if (values->type != STR_VALUE || param->val.type != STR_VALUE)
 +    {
 +        GMX_ERROR_NORET(gmx::eeInternalError, "Invalid enum parameter");
 +        return FALSE;
 +    }
 +    if (values->bExpr)
 +    {
 +        _gmx_selparser_error(scanner, "expression value for enumerated parameter not supported");
 +        return FALSE;
 +    }
 +
 +    len = strlen(values->u.s);
 +    i = 1;
 +    match = 0;
 +    while (param->val.u.s[i] != NULL)
 +    {
 +        if (strncmp(values->u.s, param->val.u.s[i], len) == 0)
 +        {
 +            /* Check if there is a duplicate match */
 +            if (match > 0)
 +            {
 +                _gmx_selparser_error(scanner, "ambiguous value");
 +                return FALSE;
 +            }
 +            match = i;
 +        }
 +        ++i;
 +    }
 +    if (match == 0)
 +    {
 +        _gmx_selparser_error(scanner, "invalid value");
 +        return FALSE;
 +    }
 +    param->val.u.s[0] = param->val.u.s[match];
 +    return TRUE;
 +}
 +
 +/*! \brief
 + * Replaces constant expressions with their values.
 + *
 + * \param[in,out] values First element in the value list to process.
 + */
 +static void
 +convert_const_values(t_selexpr_value *values)
 +{
 +    t_selexpr_value *val;
 +
 +    val = values;
 +    while (val)
 +    {
 +        if (val->bExpr && val->u.expr->v.type != GROUP_VALUE &&
 +            val->u.expr->type == SEL_CONST)
 +        {
 +            t_selelem *expr = val->u.expr;
 +            val->bExpr = FALSE;
 +            switch (expr->v.type)
 +            {
 +                case INT_VALUE:
 +                    val->u.i.i1 = val->u.i.i2 = expr->v.u.i[0];
 +                    break;
 +                case REAL_VALUE:
 +                    val->u.r.r1 = val->u.r.r2 = expr->v.u.r[0];
 +                    break;
 +                case STR_VALUE:
 +                    val->u.s = expr->v.u.s[0];
 +                    break;
 +                case POS_VALUE:
 +                    copy_rvec(expr->v.u.p->x[0], val->u.x);
 +                    break;
 +                default:
 +                    GMX_ERROR_NORET(gmx::eeInternalError,
 +                                    "Unsupported value type");
 +                    break;
 +            }
 +            _gmx_selelem_free(expr);
 +        }
 +        val = val->next;
 +    }
 +}
 +
 +/*!
 + * \param     pparams List of parameters from the selection parser.
 + * \param[in] nparam  Number of parameters in \p params.
 + * \param     params  Array of parameters to parse.
 + * \param     root    Selection element to which child expressions are added.
 + * \param[in] scanner Scanner data structure.
 + * \returns   TRUE if the parameters were parsed successfully, FALSE otherwise.
 + *
 + * Initializes the \p params array based on the parameters in \p pparams.
 + * See the documentation of \c gmx_ana_selparam_t for different options
 + * available for parsing.
 + *
 + * The list \p pparams and any associated values are freed after the parameters
 + * have been processed, no matter is there was an error or not.
 + */
 +gmx_bool
 +_gmx_sel_parse_params(t_selexpr_param *pparams, int nparam, gmx_ana_selparam_t *params,
 +                      t_selelem *root, void *scanner)
 +{
 +    gmx::MessageStringCollector *errors = _gmx_sel_lexer_error_reporter(scanner);
 +    t_selexpr_param    *pparam;
 +    gmx_ana_selparam_t *oparam;
 +    gmx_bool                bOk, rc;
 +    int                 i;
 +
 +    /* Check that the value pointers of SPAR_VARNUM parameters are NULL and
 +     * that they are not NULL for other parameters */
 +    bOk = TRUE;
 +    for (i = 0; i < nparam; ++i)
 +    {
 +        std::string contextStr = gmx::formatString("In parameter '%s'", params[i].name);
 +        gmx::MessageStringContext  context(errors, contextStr);
 +        if (params[i].val.type != POS_VALUE && (params[i].flags & (SPAR_VARNUM | SPAR_ATOMVAL)))
 +        {
 +            if (params[i].val.u.ptr != NULL)
 +            {
 +                _gmx_selparser_error(scanner, "value pointer is not NULL "
 +                                     "although it should be for SPAR_VARNUM "
 +                                     "and SPAR_ATOMVAL parameters");
 +            }
 +            if ((params[i].flags & SPAR_VARNUM)
 +                && (params[i].flags & SPAR_DYNAMIC) && !params[i].nvalptr)
 +            {
 +                _gmx_selparser_error(scanner, "nvalptr is NULL but both "
 +                                     "SPAR_VARNUM and SPAR_DYNAMIC are specified");
 +                bOk = FALSE;
 +            }
 +        }
 +        else
 +        {
 +            if (params[i].val.u.ptr == NULL)
 +            {
 +                _gmx_selparser_error(scanner, "value pointer is NULL");
 +                bOk = FALSE;
 +            }
 +        }
 +    }
 +    if (!bOk)
 +    {
 +        _gmx_selexpr_free_params(pparams);
 +        return FALSE;
 +    }
 +    /* Parse the parameters */
 +    pparam = pparams;
 +    i      = 0;
 +    while (pparam)
 +    {
 +        std::string contextStr;
 +        /* Find the parameter and make some checks */
 +        if (pparam->name != NULL)
 +        {
 +            contextStr = gmx::formatString("In parameter '%s'", pparam->name);
 +            i = -1;
 +            oparam = gmx_ana_selparam_find(pparam->name, nparam, params);
 +        }
 +        else if (i >= 0)
 +        {
 +            contextStr = gmx::formatString("In value %d", i + 1);
 +            oparam = &params[i];
 +            if (oparam->name != NULL)
 +            {
 +                oparam = NULL;
 +                _gmx_selparser_error(scanner, "too many NULL parameters provided");
 +                bOk = FALSE;
 +                pparam = pparam->next;
 +                continue;
 +            }
 +            ++i;
 +        }
 +        else
 +        {
 +            _gmx_selparser_error(scanner, "all NULL parameters should appear in the beginning of the list");
 +            bOk = FALSE;
 +            pparam = pparam->next;
 +            continue;
 +        }
 +        gmx::MessageStringContext  context(errors, contextStr);
 +        if (!oparam)
 +        {
 +            _gmx_selparser_error(scanner, "unknown parameter skipped");
 +            bOk = FALSE;
 +            goto next_param;
 +        }
 +        if (oparam->flags & SPAR_SET)
 +        {
 +            _gmx_selparser_error(scanner, "parameter set multiple times, extra values skipped");
 +            bOk = FALSE;
 +            goto next_param;
 +        }
 +        oparam->flags |= SPAR_SET;
 +        /* Process the values for the parameter */
 +        convert_const_values(pparam->value);
 +        if (convert_values(pparam->value, oparam->val.type, scanner) != 0)
 +        {
 +            _gmx_selparser_error(scanner, "invalid value");
 +            bOk = FALSE;
 +            goto next_param;
 +        }
 +        if (oparam->val.type == NO_VALUE)
 +        {
 +            rc = parse_values_bool(pparam->name, pparam->nval, pparam->value, oparam, scanner);
 +        }
 +        else if (oparam->flags & SPAR_RANGES)
 +        {
 +            rc = parse_values_range(pparam->nval, pparam->value, oparam, scanner);
 +        }
 +        else if (oparam->flags & SPAR_VARNUM)
 +        {
 +            if (pparam->nval == 1 && pparam->value->bExpr)
 +            {
 +                rc = parse_values_varnum_expr(pparam->nval, pparam->value, oparam, root, scanner);
 +            }
 +            else
 +            {
 +                rc = parse_values_varnum(pparam->nval, pparam->value, oparam, root, scanner);
 +            }
 +        }
 +        else if (oparam->flags & SPAR_ENUMVAL)
 +        {
 +            rc = parse_values_enum(pparam->nval, pparam->value, oparam, scanner);
 +        }
 +        else
 +        {
 +            rc = parse_values_std(pparam->nval, pparam->value, oparam, root, scanner);
 +        }
 +        if (!rc)
 +        {
 +            bOk = FALSE;
 +        }
 +        /* Advance to the next parameter */
 +next_param:
 +        pparam = pparam->next;
 +    }
 +    /* Check that all required parameters are present */
 +    for (i = 0; i < nparam; ++i)
 +    {
 +        if (!(params[i].flags & SPAR_OPTIONAL) && !(params[i].flags & SPAR_SET))
 +        {
 +            _gmx_selparser_error(scanner, "required parameter '%s' not specified", params[i].name);
 +            bOk = FALSE;
 +        }
 +    }
 +
 +    _gmx_selexpr_free_params(pparams);
 +    return bOk;
 +}
diff --cc src/gromacs/selection/sm_insolidangle.cpp
index 8321e77ad0,0000000000..75402d8261
mode 100644,000000..100644
--- a/src/gromacs/selection/sm_insolidangle.cpp
+++ b/src/gromacs/selection/sm_insolidangle.cpp
@@@ -1,969 -1,0 +1,966 @@@
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2009, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + */
 +/*! \page page_module_selection_insolidangle Selection method: insolidangle
 + *
 + * This method selects a subset of particles that are located in a solid
 + * angle defined by a center and a set of points.
 + * The solid angle is constructed as a union of small cones whose axis
 + * goes through the center and a point.
 + * So there's such a cone for each position, and a
 + * point is in the solid angle if it lies within any of these cones.
 + * The width of the cones can be adjusted.
 + *
 + * \internal
 + *
 + * The method is implemented by partitioning the surface of the unit sphere
 + * into bins using the polar coordinates \f$(\theta, \phi)\f$.
 + * The partitioning is always uniform in the zenith angle \f$\theta\f$,
 + * while the partitioning in the azimuthal angle \f$\phi\f$ varies.
 + * For each reference point, the unit vector from the center to the point
 + * is constructed, and it is stored in all the bins that overlap with the
 + * cone defined by the point.
 + * Bins that are completely covered by a single cone are marked as such.
 + * Checking whether a point is in the solid angle is then straightforward
 + * with this data structure: one finds the bin that corresponds to the point,
 + * and checks whether the bin is completely covered. If it is not, one
 + * additionally needs to check whether it is within the specified cutoff of
 + * any of the stored points.
 + *
 + * The above construction gives quite a lot of flexibility for constructing
 + * the bins without modifying the rest of the code.
 + * The current (quite inefficient) implementation is discussed below, but
 + * it should be optimized to get the most out of the code.
 + *
 + * The current way of constructing the bins constructs the boundaries
 + * statically: the bin size in the zenith direction is set to approximately
 + * half the angle cutoff, and the bins in the azimuthal direction have
 + * sizes such that the shortest edge of the bin is approximately equal to
 + * half the angle cutoff (for the regions close to the poles, a single bin
 + * is used).
 + * Each reference point is then added to the bins as follows:
 + *  -# Find the zenith angle range that is spanned by the cone centered at the
 + *     point (this is simple addition/subtraction).
 + *  -# Calculate the maximal span of the cone in the azimuthal direction using
 + *     the formula
 + *     \f[\sin \Delta \phi_{max} = \frac{\sin \alpha}{\sin \theta}\f]
 + *     (a sine formula in spherical coordinates),
 + *     where \f$\alpha\f$ is the width of the cone and \f$\theta\f$ is the
 + *     zenith angle of the cone center.
 + *     Similarly, the zenith angle at which this extent is achieved is
 + *     calculated using
 + *     \f[\cos \theta_{max} = \frac{\cos \theta}{\cos \alpha}\f]
 + *     (Pythagoras's theorem in spherical coordinates).
 + *  -# For each zenith angle bin that is at least partially covered by the
 + *     cone, calculate the span of the cone at the edges using
 + *     \f[\sin^2 \frac{\Delta \phi}{2} = \frac{\sin^2 \frac{\alpha}{2} - \sin^2 \frac{\theta - \theta'}{2}}{\sin \theta \sin \theta'}\f]
 + *     (distance in spherical geometry),
 + *     where \f$\theta'\f$ is the zenith angle of the bin edge.
 + *  -# Using the values calculated above, loop through the azimuthal bins that
 + *     are partially or completely covered by the cone and update them.
 + *
 + * The total solid angle (for covered fraction calculations) is estimated by
 + * taking the total area of completely covered bins plus
 + * half the area of partially covered bins.
 + * The second one is an approximation, but should give reasonable estimates
 + * for the averages as well as in cases where the bin size is small.
 + */
 +/*! \internal \file
 + * \brief
 + * Implements the \ref sm_insolidangle "insolidangle" selection method.
 + *
 + * \todo
 + * The implementation could be optimized quite a bit.
 + *
 + * \todo
 + * Move the covered fraction stuff somewhere else and make it more generic
 + * (along the lines it is handled in selection.h and trajana.h in the old C
 + * API).
 + *
 + * \author Teemu Murtola <teemu.murtola@cbr.su.se>
 + * \ingroup module_selection
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +
 +#include <macros.h>
 +#include <maths.h>
 +#include <pbc.h>
 +#include <physics.h>
 +#include <smalloc.h>
 +#include <vec.h>
 +
 +// FIXME: Should really be in the beginning, but causes compilation errors
 +#include <algorithm>
 +
 +#include "gromacs/fatalerror/exceptions.h"
 +#include "gromacs/selection/indexutil.h"
 +#include "gromacs/selection/position.h"
 +#include "gromacs/selection/selection.h"
 +#include "gromacs/selection/selmethod.h"
 +
 +#include "selelem.h"
 +
 +using std::min;
 +using std::max;
 +
 +/*! \internal \brief
 + * Internal data structure for the \p insolidangle selection method.
 + *
 + * \see \c t_partition
 + */
 +typedef struct
 +{
 +    /** Left edge of the partition. */
 +    real                left;
 +    /** Bin index corresponding to this partition. */
 +    int                 bin;
 +} t_partition_item;
 +
 +/*! \internal \brief
 + * Internal data structure for the \p insolidangle selection method.
 + *
 + * Describes the surface partitioning within one slice along the zenith angle.
 + * The slice from azimuthal angle \p p[i].left to \p p[i+1].left belongs to
 + * bin \p p[i].bin.
 + */
 +typedef struct
 +{
 +    /** Number of partition items (\p p contains \p n+1 items). */
 +    int                 n;
 +    /** Array of partition edges and corresponding bins. */
 +    t_partition_item   *p;
 +} t_partition;
 +
 +/*! \internal \brief
 + * Internal data structure for the \p insolidangle selection method.
 + *
 + * Contains the reference points that partially cover a certain region on the
 + * surface of the unit sphere.
 + * If \p n is -1, the whole region described by the bin is covered.
 + */
 +typedef struct
 +{
 +    /** Number of points in the array \p x, -1 if whole bin covered. */
 +    int   n;
 +    /** Number of elements allocated for \p x. */
 +    int   n_alloc;
 +    /** Array of points that partially cover the bin. */
 +    rvec *x;
 +} t_spheresurfacebin;
 +
 +/*! \internal \brief
 + * Data structure for the \p insolidangle selection method.
 + *
 + * All angle values are in the units of radians.
 + */
 +typedef struct
 +{
 +    /** Center of the solid angle. */
 +    gmx_ana_pos_t       center;
 +    /** Positions that span the solid angle. */
 +    gmx_ana_pos_t       span;
 +    /** Cutoff angle. */
 +    real                angcut;
 +    /** Estimate of the covered fraction. */
 +    real                cfrac;
 +
 +    /** Cutoff for the cosine (equals cos(angcut)). */
 +    real                distccut;
 +    /** Bin size to be used as the target bin size when constructing the bins. */
 +    real                targetbinsize;
 +
 +    /** Number of bins in the \p tbin array. */
 +    int                 ntbins;
 +    /** Size of one bin in the zenith angle direction. */
 +    real                tbinsize;
 +    /** Array of zenith angle slices. */
 +    t_partition        *tbin;
 +    /** Number of elements allocated for the \p bin array. */
 +    int                 maxbins;
 +    /** Number of elements used in the \p bin array. */
 +    int                 nbins;
 +    /** Array of individual bins. */
 +    t_spheresurfacebin *bin;
 +} t_methoddata_insolidangle;
 +
 +/** Allocates data for the \p insolidangle selection method. */
 +static void *
 +init_data_insolidangle(int npar, gmx_ana_selparam_t *param);
 +/** Initializes the \p insolidangle selection method. */
 +static void
 +init_insolidangle(t_topology *top, int npar, gmx_ana_selparam_t *param, void *data);
- /** Sets the COM/COG data for the \p insolidangle selection method. */
- static void
- set_comg_insolidangle(gmx_ana_pos_t *pos, void *data);
 +/** Frees the data allocated for the \p insolidangle selection method. */
 +static void
 +free_data_insolidangle(void *data);
 +/** Initializes the evaluation of the \p insolidangle selection method for a frame. */
 +static void
 +init_frame_insolidangle(t_topology *top, t_trxframe *fr, t_pbc *pbc, void *data);
 +/** Internal helper function for evaluate_insolidangle(). */
 +static gmx_bool
 +accept_insolidangle(rvec x, t_pbc *pbc, void *data);
 +/** Evaluates the \p insolidangle selection method. */
 +static void
 +evaluate_insolidangle(t_topology *top, t_trxframe *fr, t_pbc *pbc,
 +                      gmx_ana_pos_t *pos, gmx_ana_selvalue_t *out, void *data);
 +
 +/** Calculates the distance between unit vectors. */
 +static real
 +sph_distc(rvec x1, rvec x2);
 +/** Does a binary search on a \p t_partition to find a bin for a value. */
 +static int
 +find_partition_bin(t_partition *p, real value);
 +/** Finds a bin that corresponds to a location on the unit sphere surface. */
 +static int
 +find_surface_bin(t_methoddata_insolidangle *surf, rvec x);
 +/** Clears/initializes the bins on the unit sphere surface. */
 +static void
 +clear_surface_points(t_methoddata_insolidangle *surf);
 +/** Frees memory allocated for storing the reference points in the surface bins. */
 +static void
 +free_surface_points(t_methoddata_insolidangle *surf);
 +/** Adds a reference point to a given bin. */
 +static void
 +add_surface_point(t_methoddata_insolidangle *surf, int tbin, int pbin, rvec x);
 +/** Marks a bin as completely covered. */
 +static void
 +mark_surface_covered(t_methoddata_insolidangle *surf, int tbin, int pbin);
 +/** Helper function for store_surface_point() to update a single zenith angle bin. */
 +static void
 +update_surface_bin(t_methoddata_insolidangle *surf, int tbin,
 +                   real phi, real pdelta1, real pdelta2, real pdeltamax,
 +                   rvec x);
 +/** Adds a single reference point and updates the surface bins. */
 +static void
 +store_surface_point(t_methoddata_insolidangle *surf, rvec x);
 +/** Optimizes the surface bins for faster searching. */
 +static void
 +optimize_surface_points(t_methoddata_insolidangle *surf);
 +/** Estimates the area covered by the reference cones. */
 +static real
 +estimate_covered_fraction(t_methoddata_insolidangle *surf);
 +/** Checks whether a point lies within a solid angle. */
 +static gmx_bool
 +is_surface_covered(t_methoddata_insolidangle *surf, rvec x);
 +
 +/** Parameters for the \p insolidangle selection method. */
 +static gmx_ana_selparam_t smparams_insolidangle[] = {
 +    {"center", {POS_VALUE,   1, {NULL}}, NULL, SPAR_DYNAMIC},
 +    {"span",   {POS_VALUE,  -1, {NULL}}, NULL, SPAR_DYNAMIC | SPAR_VARNUM},
 +    {"cutoff", {REAL_VALUE,  1, {NULL}}, NULL, SPAR_OPTIONAL},
 +};
 +
 +/** Help text for the \p insolidangle selection method. */
 +static const char *help_insolidangle[] = {
 +    "SELECTING ATOMS IN A SOLID ANGLE[PAR]",
 +
 +    "[TT]insolidangle center POS span POS_EXPR [cutoff REAL][tt][PAR]",
 +
 +    "This keyword selects atoms that are within [TT]REAL[tt] degrees",
 +    "(default=5) of any position in [TT]POS_EXPR[tt] as seen from [TT]POS[tt]",
 +    "a position expression that evaluates to a single position), i.e., atoms",
 +    "in the solid angle spanned by the positions in [TT]POS_EXPR[tt] and",
 +    "centered at [TT]POS[tt].[PAR]"
 +
 +    "Technically, the solid angle is constructed as a union of small cones",
 +    "whose tip is at [TT]POS[tt] and the axis goes through a point in",
 +    "[TT]POS_EXPR[tt]. There is such a cone for each position in",
 +    "[TT]POS_EXPR[tt], and point is in the solid angle if it lies within any",
 +    "of these cones. The cutoff determines the width of the cones.",
 +};
 +
 +/** \internal Selection method data for the \p insolidangle method. */
 +gmx_ana_selmethod_t sm_insolidangle = {
 +    "insolidangle", GROUP_VALUE, SMETH_DYNAMIC,
 +    asize(smparams_insolidangle), smparams_insolidangle,
 +    &init_data_insolidangle,
 +    NULL,
 +    &init_insolidangle,
 +    NULL,
 +    &free_data_insolidangle,
 +    &init_frame_insolidangle,
 +    NULL,
 +    &evaluate_insolidangle,
 +    {"insolidangle center POS span POS_EXPR [cutoff REAL]",
 +     asize(help_insolidangle), help_insolidangle},
 +};
 +
 +/*!
 + * \param[in]     npar  Not used (should be 3).
 + * \param[in,out] param Method parameters (should point to 
 + *   \ref smparams_insolidangle).
 + * \returns Pointer to the allocated data (\ref t_methoddata_insolidangle).
 + *
 + * Allocates memory for a \ref t_methoddata_insolidangle structure and
 + * initializes the parameter as follows:
 + *  - \p center defines the value for t_methoddata_insolidangle::center.
 + *  - \p span   defines the value for t_methoddata_insolidangle::span.
 + *  - \p cutoff defines the value for t_methoddata_insolidangle::angcut.
 + */
 +static void *
 +init_data_insolidangle(int npar, gmx_ana_selparam_t *param)
 +{
 +    t_methoddata_insolidangle *data;
 +
 +    snew(data, 1);
 +    data->angcut = 5.0;
 +    param[0].val.u.p = &data->center;
 +    param[1].val.u.p = &data->span;
 +    param[2].val.u.r = &data->angcut;
 +    return data;
 +}
 +
 +/*!
 + * \param   top  Not used.
 + * \param   npar Not used.
 + * \param   param Not used.
 + * \param   data Pointer to \ref t_methoddata_insolidangle to initialize.
 + * \returns 0 on success, -1 on failure.
 + *
 + * Converts t_methoddata_insolidangle::angcut to radians and allocates
 + * and allocates memory for the bins used during the evaluation.
 + */
 +static void
 +init_insolidangle(t_topology *top, int npar, gmx_ana_selparam_t *param, void *data)
 +{
 +    t_methoddata_insolidangle *surf = (t_methoddata_insolidangle *)data;
 +    int                        i, c;
 +
 +    if (surf->angcut <= 0)
 +    {
 +        GMX_THROW(gmx::InvalidInputError("Angle cutoff should be > 0"));
 +    }
 +
 +    surf->angcut *= DEG2RAD;
 +
 +    surf->distccut = -cos(surf->angcut);
 +    surf->targetbinsize = surf->angcut / 2;
 +    surf->ntbins = (int) (M_PI / surf->targetbinsize);
 +    surf->tbinsize = (180.0 / surf->ntbins)*DEG2RAD;
 +
 +    snew(surf->tbin, (int)(M_PI/surf->tbinsize) + 1);
 +    surf->maxbins = 0;
 +    for (i = 0; i < surf->ntbins; ++i)
 +    {
 +        c = max(sin(surf->tbinsize*i), sin(surf->tbinsize*(i+1)))
 +              * M_2PI / surf->targetbinsize + 1;
 +        snew(surf->tbin[i].p, c+1);
 +        surf->maxbins += c;
 +    }
 +    surf->nbins = 0;
 +    snew(surf->bin, surf->maxbins);
 +}
 +
 +/*!
 + * \param data Data to free (should point to a \ref t_methoddata_insolidangle).
 + *
 + * Frees the memory allocated for \c t_methoddata_insolidangle::center and
 + * \c t_methoddata_insolidangle::span, as well as the memory for the internal
 + * bin structure.
 + */
 +static void
 +free_data_insolidangle(void *data)
 +{
 +    t_methoddata_insolidangle *d = (t_methoddata_insolidangle *)data;
 +    int                        i;
 +
 +    if (d->tbin)
 +    {
 +        for (i = 0; i < d->ntbins; ++i)
 +        {
 +            sfree(d->tbin[i].p);
 +        }
 +        sfree(d->tbin);
 +    }
 +    free_surface_points(d);
 +    sfree(d->bin);
 +}
 +
 +/*!
 + * \param[in]  top  Not used.
 + * \param[in]  fr   Current frame.
 + * \param[in]  pbc  PBC structure.
 + * \param      data Should point to a \ref t_methoddata_insolidangle.
 + *
 + * Creates a lookup structure that enables fast queries of whether a point
 + * is within the solid angle or not.
 + */
 +static void
 +init_frame_insolidangle(t_topology *top, t_trxframe *fr, t_pbc *pbc, void *data)
 +{
 +    t_methoddata_insolidangle *d = (t_methoddata_insolidangle *)data;
 +    rvec                       dx;
 +    int                        i;
 +
 +    free_surface_points(d);
 +    clear_surface_points(d);
 +    for (i = 0; i < d->span.nr; ++i)
 +    {
 +        if (pbc)
 +        {
 +            pbc_dx(pbc, d->span.x[i], d->center.x[0], dx);
 +        }
 +        else
 +        {
 +            rvec_sub(d->span.x[i], d->center.x[0], dx);
 +        }
 +        unitv(dx, dx);
 +        store_surface_point(d, dx);
 +    }
 +    optimize_surface_points(d);
 +    d->cfrac = -1;
 +}
 +
 +/*!
 + * \param[in] x    Test point.
 + * \param[in] pbc  PBC data (if NULL, no PBC are used).
 + * \param[in] data Pointer to a \c t_methoddata_insolidangle data structure.
 + * \returns   TRUE if \p x is within the solid angle, FALSE otherwise.
 + */
 +static gmx_bool
 +accept_insolidangle(rvec x, t_pbc *pbc, void *data)
 +{
 +    t_methoddata_insolidangle *d = (t_methoddata_insolidangle *)data;
 +    rvec                       dx;
 +
 +    if (pbc)
 +    {
 +        pbc_dx(pbc, x, d->center.x[0], dx);
 +    }
 +    else
 +    {
 +        rvec_sub(x, d->center.x[0], dx);
 +    }
 +    unitv(dx, dx);
 +    return is_surface_covered(d, dx);
 +}
 +
 +/*!
 + * See sel_updatefunc() for description of the parameters.
 + * \p data should point to a \c t_methoddata_insolidangle.
 + *
 + * Calculates which atoms in \p g are within the solid angle spanned by
 + * \c t_methoddata_insolidangle::span and centered at
 + * \c t_methoddata_insolidangle::center, and stores the result in \p out->u.g.
 + */
 +static void
 +evaluate_insolidangle(t_topology *top, t_trxframe *fr, t_pbc *pbc,
 +                      gmx_ana_pos_t *pos, gmx_ana_selvalue_t *out, void *data)
 +{
 +    t_methoddata_insolidangle *d = (t_methoddata_insolidangle *)data;
 +    int                        b;
 +
 +    out->u.g->isize = 0;
 +    for (b = 0; b < pos->nr; ++b)
 +    {
 +        if (accept_insolidangle(pos->x[b], pbc, data))
 +        {
 +            gmx_ana_pos_append(NULL, out->u.g, pos, b, 0);
 +        }
 +    }
 +}
 +
 +/*!
 + * \param[in] sel Selection element to query.
 + * \returns   TRUE if the covered fraction can be estimated for \p sel with
 + *   _gmx_selelem_estimate_coverfrac(), FALSE otherwise.
 + */
 +gmx_bool
 +_gmx_selelem_can_estimate_cover(t_selelem *sel)
 +{
 +    t_selelem   *child;
 +    gmx_bool         bFound;
 +    gmx_bool         bDynFound;
 +
 +    if (sel->type == SEL_BOOLEAN && sel->u.boolt == BOOL_OR)
 +    {
 +        return FALSE;
 +    }
 +    bFound    = FALSE;
 +    bDynFound = FALSE;
 +    child     = sel->child;
 +    while (child)
 +    {
 +        if (child->type == SEL_EXPRESSION)
 +        {
 +            if (child->u.expr.method->name == sm_insolidangle.name)
 +            {
 +                if (bFound || bDynFound)
 +                {
 +                    return FALSE;
 +                }
 +                bFound = TRUE;
 +            }
 +            else if (child->u.expr.method
 +                     && (child->u.expr.method->flags & SMETH_DYNAMIC))
 +            {
 +                if (bFound)
 +                {
 +                    return FALSE;
 +                }
 +                bDynFound = TRUE;
 +            }
 +        }
 +        else if (!_gmx_selelem_can_estimate_cover(child))
 +        {
 +            return FALSE;
 +        }
 +        child = child->next;
 +    }
 +    return TRUE;
 +}
 +
 +/*!
 + * \param[in] sel Selection for which the fraction should be calculated.
 + * \returns Fraction of angles covered by the selection (between zero and one).
 + *
 + * The return value is undefined if _gmx_selelem_can_estimate_cover() returns
 + * FALSE.
 + * Should be called after gmx_ana_evaluate_selections() has been called for the
 + * frame.
 + */
 +real
 +_gmx_selelem_estimate_coverfrac(t_selelem *sel)
 +{
 +    t_selelem   *child;
 +    real         cfrac;
 +
 +    if (sel->type == SEL_EXPRESSION && sel->u.expr.method->name == sm_insolidangle.name)
 +    {
 +        t_methoddata_insolidangle *d = (t_methoddata_insolidangle *)sel->u.expr.mdata;
 +        if (d->cfrac < 0)
 +        {
 +            d->cfrac = estimate_covered_fraction(d);        
 +        }
 +        return d->cfrac;
 +    }
 +    if (sel->type == SEL_BOOLEAN && sel->u.boolt == BOOL_NOT)
 +    {
 +        cfrac = _gmx_selelem_estimate_coverfrac(sel->child);
 +        if (cfrac < 1.0)
 +        {
 +            return 1 - cfrac;
 +        }
 +        return 1;
 +    }
 +
 +    /* Here, we assume that the selection is simple enough */
 +    child = sel->child;
 +    while (child)
 +    {
 +        cfrac = _gmx_selelem_estimate_coverfrac(child); 
 +        if (cfrac < 1.0)
 +        {
 +            return cfrac;
 +        }
 +        child = child->next;
 +    }
 +    return 1.0;
 +}
 +
 +/*!
 + * \param[in] x1  Unit vector 1.
 + * \param[in] x2  Unit vector 2.
 + * \returns   Minus the dot product of \p x1 and \p x2.
 + *
 + * This function is used internally to calculate the distance between the
 + * unit vectors \p x1 and \p x2 to find out whether \p x2 is within the
 + * cone centered at \p x1. Currently, the cosine of the angle is used
 + * for efficiency, and the minus is there to make it behave like a normal
 + * distance (larger values mean longer distances).
 + */
 +static real
 +sph_distc(rvec x1, rvec x2)
 +{
 +    return -iprod(x1, x2);
 +}
 +
 +/*!
 + * \param[in] p     Partition to search.
 + * \param[in] value Value to search for.
 + * \returns   The partition index in \p p that contains \p value.
 + *
 + * If \p value is outside the range of \p p, the first/last index is returned.
 + * Otherwise, the return value \c i satisfies \c p->p[i].left<=value and
 + * \c p->p[i+1].left>value
 + */
 +static int
 +find_partition_bin(t_partition *p, real value)
 +{
 +    int pmin, pmax, pbin;
 +
 +    /* Binary search the partition */
 +    pmin = 0; pmax = p->n;
 +    while (pmax > pmin + 1)
 +    {
 +        pbin = pmin + (pmax - pmin) / 2;
 +        if (p->p[pbin].left <= value)
 +        {
 +            pmin = pbin;
 +        }
 +        else
 +        {
 +            pmax = pbin;
 +        }
 +    }
 +    pbin = pmin;
 +    return pbin;
 +}
 +
 +/*!
 + * \param[in] surf  Surface data structure to search.
 + * \param[in] x     Unit vector to find.
 + * \returns   The bin index that contains \p x.
 + *
 + * The return value is an index to the \p surf->bin array.
 + */
 +static int
 +find_surface_bin(t_methoddata_insolidangle *surf, rvec x)
 +{
 +    real theta, phi;
 +    int  tbin, pbin;
 +    
 +    theta = acos(x[ZZ]);
 +    phi = atan2(x[YY], x[XX]);
 +    tbin = floor(theta / surf->tbinsize);
 +    if (tbin >= surf->ntbins)
 +    {
 +        tbin = surf->ntbins - 1;
 +    }
 +    pbin = find_partition_bin(&surf->tbin[tbin], phi);
 +    return surf->tbin[tbin].p[pbin].bin;
 +}
 +
 +/*!
 + * \param[in,out] surf Surface data structure.
 + *
 + * Clears the reference points from the bins and (re)initializes the edges
 + * of the azimuthal bins.
 + */
 +static void
 +clear_surface_points(t_methoddata_insolidangle *surf)
 +{
 +    int i, j, c;
 +
 +    surf->nbins = 0;
 +    for (i = 0; i < surf->ntbins; ++i)
 +    {
 +        c = min(sin(surf->tbinsize*i), sin(surf->tbinsize*(i+1)))
 +              * M_2PI / surf->targetbinsize + 1;
 +        if (c <= 0)
 +        {
 +            c = 1;
 +        }
 +        surf->tbin[i].n = c;
 +        for (j = 0; j < c; ++j)
 +        {
 +            surf->tbin[i].p[j].left = -M_PI + j*M_2PI/c - 0.0001;
 +            surf->tbin[i].p[j].bin = surf->nbins;
 +            surf->bin[surf->nbins].n = 0;
 +            surf->nbins++;
 +        }
 +        surf->tbin[i].p[c].left = M_PI + 0.0001;
 +        surf->tbin[i].p[c].bin = -1;
 +    }
 +}
 +
 +/*!
 + * \param[in,out] surf Surface data structure.
 + */
 +static void
 +free_surface_points(t_methoddata_insolidangle *surf)
 +{
 +    int i;
 +
 +    for (i = 0; i < surf->nbins; ++i)
 +    {
 +        if (surf->bin[i].x)
 +        {
 +            sfree(surf->bin[i].x);
 +        }
 +        surf->bin[i].n_alloc = 0;
 +        surf->bin[i].x = NULL;
 +    }
 +}
 +
 +/*!
 + * \param[in,out] surf Surface data structure.
 + * \param[in]     tbin Bin number in the zenith angle direction.
 + * \param[in]     pbin Bin number in the azimuthal angle direction.
 + * \param[in]     x    Point to store.
 + */
 +static void
 +add_surface_point(t_methoddata_insolidangle *surf, int tbin, int pbin, rvec x)
 +{
 +    int bin;
 +
 +    bin = surf->tbin[tbin].p[pbin].bin;
 +    /* Return if bin is already completely covered */
 +    if (surf->bin[bin].n == -1)
 +        return;
 +    /* Allocate more space if necessary */
 +    if (surf->bin[bin].n == surf->bin[bin].n_alloc) {
 +        surf->bin[bin].n_alloc += 10;
 +        srenew(surf->bin[bin].x, surf->bin[bin].n_alloc);
 +    }
 +    /* Add the point to the bin */
 +    copy_rvec(x, surf->bin[bin].x[surf->bin[bin].n]);
 +    ++surf->bin[bin].n;
 +}
 +
 +/*!
 + * \param[in,out] surf Surface data structure.
 + * \param[in]     tbin Bin number in the zenith angle direction.
 + * \param[in]     pbin Bin number in the azimuthal angle direction.
 + */
 +static void
 +mark_surface_covered(t_methoddata_insolidangle *surf, int tbin, int pbin)
 +{
 +    int bin;
 +
 +    bin = surf->tbin[tbin].p[pbin].bin;
 +    surf->bin[bin].n = -1;
 +}
 +
 +/*!
 + * \param[in,out] surf      Surface data structure.
 + * \param[in]     tbin      Bin number in the zenith angle direction.
 + * \param[in]     phi       Azimuthal angle of \p x.
 + * \param[in]     pdelta1   Width of the cone at the lower edge of \p tbin.
 + * \param[in]     pdelta2   Width of the cone at the uppper edge of \p tbin.
 + * \param[in]     pdeltamax Max. width of the cone inside \p tbin.
 + * \param[in]     x         Point to store (should have unit length).
 + */
 +static void
 +update_surface_bin(t_methoddata_insolidangle *surf, int tbin,
 +                   real phi, real pdelta1, real pdelta2, real pdeltamax,
 +                   rvec x)
 +{
 +    real pdelta, phi1, phi2;
 +    int  pbin1, pbin2, pbin;
 +
 +    /* Find the edges of the bins affected */
 +    pdelta = max(max(pdelta1, pdelta2), pdeltamax);
 +    phi1 = phi - pdelta;
 +    if (phi1 < -M_PI)
 +    {
 +        phi1 += M_2PI;
 +    }
 +    phi2 = phi + pdelta;
 +    if (phi2 > M_PI)
 +    {
 +        phi2 -= M_2PI;
 +    }
 +    pbin1 = find_partition_bin(&surf->tbin[tbin], phi1);
 +    pbin2 = find_partition_bin(&surf->tbin[tbin], phi2);
 +    /* Find the edges of completely covered region */
 +    pdelta = min(pdelta1, pdelta2);
 +    phi1 = phi - pdelta;
 +    if (phi1 < -M_PI)
 +    {
 +        phi1 += M_2PI;
 +    }
 +    phi2 = phi + pdelta;
 +    /* Loop over all affected bins */
 +    pbin = pbin1;
 +    do
 +    {
 +        /* Wrap bin around if end reached */
 +        if (pbin == surf->tbin[tbin].n)
 +        {
 +            pbin = 0;
 +            phi1 -= M_2PI;
 +            phi2 -= M_2PI;
 +        }
 +        /* Check if bin is completely covered and update */
 +        if (surf->tbin[tbin].p[pbin].left >= phi1
 +            && surf->tbin[tbin].p[pbin+1].left <= phi2)
 +        {
 +            mark_surface_covered(surf, tbin, pbin);
 +        }
 +        else
 +        {
 +            add_surface_point(surf, tbin, pbin, x);
 +        }
 +    }
 +    while (pbin++ != pbin2); /* Loop including pbin2 */
 +}
 +
 +/*!
 + * \param[in,out] surf Surface data structure.
 + * \param[in]     x    Point to store (should have unit length).
 + *
 + * Finds all the bins covered by the cone centered at \p x and calls
 + * update_surface_bin() to update them.
 + */
 +static void
 +store_surface_point(t_methoddata_insolidangle *surf, rvec x)
 +{
 +    real theta, phi;
 +    real pdeltamax, tmax;
 +    real theta1, theta2, pdelta1, pdelta2;
 +    int  tbin, pbin, bin;
 +
 +    theta = acos(x[ZZ]);
 +    phi = atan2(x[YY], x[XX]);
 +    /* Find the maximum extent in the phi direction */
 +    if (theta <= surf->angcut)
 +    {
 +        pdeltamax = M_PI;
 +        tmax = 0;
 +    }
 +    else if (theta >= M_PI - surf->angcut)
 +    {
 +        pdeltamax = M_PI;
 +        tmax = M_PI;
 +    }
 +    else
 +    {
 +        pdeltamax = asin(sin(surf->angcut) / sin(theta));
 +        tmax = acos(cos(theta) / cos(surf->angcut));
 +    }
 +    /* Find the first affected bin */
 +    tbin = max(floor((theta - surf->angcut) / surf->tbinsize), 0.0);
 +    theta1 = tbin * surf->tbinsize;
 +    if (theta1 < theta - surf->angcut)
 +    {
 +        pdelta1 = 0;
 +    }
 +    else
 +    {
 +        pdelta1 = M_PI;
 +    }
 +    /* Loop through all affected bins */
 +    while (tbin < ceil((theta + surf->angcut) / surf->tbinsize)
 +           && tbin < surf->ntbins)
 +    {
 +        /* Calculate the next boundaries */
 +        theta2 = (tbin+1) * surf->tbinsize;
 +        if (theta2 > theta + surf->angcut)
 +        {
 +            pdelta2 = 0;
 +        }
 +        else if (tbin == surf->ntbins - 1)
 +        {
 +            pdelta2 = M_PI;
 +        }
 +        else
 +        {
 +            pdelta2 = 2*asin(sqrt(
 +                    (sqr(sin(surf->angcut/2)) - sqr(sin((theta2-theta)/2))) /
 +                    (sin(theta) * sin(theta2))));
 +        }
 +        /* Update the bin */
 +        if (tmax >= theta1 && tmax <= theta2)
 +        {
 +            update_surface_bin(surf, tbin, phi, pdelta1, pdelta2, pdeltamax, x);
 +        }
 +        else
 +        {
 +            update_surface_bin(surf, tbin, phi, pdelta1, pdelta2, 0, x);
 +        }
 +        /* Next bin */
 +        theta1 = theta2;
 +        pdelta1 = pdelta2;
 +        ++tbin;
 +    }
 +}
 +
 +/*!
 + * \param[in,out] surf Surface data structure.
 + *
 + * Currently, this function does nothing.
 + */
 +static void
 +optimize_surface_points(t_methoddata_insolidangle *surf)
 +{
 +    /* TODO: Implement */
 +}
 +
 +/*!
 + * \param[in] surf Surface data structure.
 + * \returns   An estimate for the area covered by the reference points.
 + */
 +static real
 +estimate_covered_fraction(t_methoddata_insolidangle *surf)
 +{
 +    int  t, p, n;
 +    real cfrac, tfrac, pfrac;
 +
 +    cfrac = 0.0;
 +    for (t = 0; t < surf->ntbins; ++t)
 +    {
 +        tfrac = cos(t * surf->tbinsize) - cos((t+1) * surf->tbinsize);
 +        for (p = 0; p < surf->tbin[t].n; ++p)
 +        {
 +            pfrac = surf->tbin[t].p[p+1].left - surf->tbin[t].p[p].left;
 +            n = surf->bin[surf->tbin[t].p[p].bin].n;
 +            if (n == -1) /* Bin completely covered */
 +            {
 +                cfrac += tfrac * pfrac;
 +            }
 +            else if (n > 0) /* Bin partially covered */
 +            {
 +                cfrac += tfrac * pfrac / 2; /* A rough estimate */
 +            }
 +        }
 +    }
 +    return cfrac / (4*M_PI);
 +}
 +
 +/*!
 + * \param[in] surf  Surface data structure to search.
 + * \param[in] x     Unit vector to check.
 + * \returns   TRUE if \p x is within the solid angle, FALSE otherwise.
 + */
 +static gmx_bool
 +is_surface_covered(t_methoddata_insolidangle *surf, rvec x)
 +{
 +    int  bin, i;
 +
 +    bin = find_surface_bin(surf, x);
 +    /* Check for completely covered bin */
 +    if (surf->bin[bin].n == -1)
 +    {
 +        return TRUE;
 +    }
 +    /* Check each point that partially covers the bin */
 +    for (i = 0; i < surf->bin[bin].n; ++i)
 +    {
 +        if (sph_distc(x, surf->bin[bin].x[i]) < surf->distccut)
 +        {
 +            return TRUE;
 +        }
 +    }
 +    return FALSE;
 +}
diff --cc src/programs/mdrun/mdrun.c
index 377bc8fcca,0000000000..6a4f8906ba
mode 100644,000000..100644
--- a/src/programs/mdrun/mdrun.c
+++ b/src/programs/mdrun/mdrun.c
@@@ -1,696 -1,0 +1,701 @@@
 +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "copyrite.h"
 +#include "main.h"
 +#include "statutil.h"
 +#include "smalloc.h"
 +#include "futil.h"
 +#include "smalloc.h"
 +#include "edsam.h"
 +#include "mdrun.h"
 +#include "xmdrun.h"
 +#include "checkpoint.h"
 +#ifdef GMX_THREADS
 +#include "thread_mpi.h"
 +#endif
 +
 +/* afm stuf */
 +#include "pull.h"
 +
 +int main(int argc,char *argv[])
 +{
 +  const char *desc[] = {
 + #ifdef GMX_OPENMM
 +    "This is an experimental release of GROMACS for accelerated",
 +	"Molecular Dynamics simulations on GPU processors. Support is provided",
 +	"by the OpenMM library (https://simtk.org/home/openmm).[PAR]",
 +	"*Warning*[BR]",
 +	"This release is targeted at developers and advanced users and",
 +	"care should be taken before production use. The following should be",
 +	"noted before using the program:[PAR]",
 +	" * The current release runs only on modern nVidia GPU hardware with CUDA support.",
 +	"Make sure that the necessary CUDA drivers and libraries for your operating system",
 +	"are already installed. The CUDA SDK also should be installed in order to compile",
 +	"the program from source (http://www.nvidia.com/object/cuda_home.html).[PAR]",
 +	" * Multiple GPU cards are not supported.[PAR]",
 +	" * Only a small subset of the GROMACS features and options are supported on the GPUs.",
 +	"See below for a detailed list.[PAR]",
 +	" * Consumer level GPU cards are known to often have problems with faulty memory.",
 +	"It is recommended that a full memory check of the cards is done at least once",
 +	"(for example, using the memtest=full option).",
 +	"A partial memory check (for example, memtest=15) before and",
 +	"after the simulation run would help spot",
 +	"problems resulting from processor overheating.[PAR]",
 +	" * The maximum size of the simulated systems depends on the available",
 +	"GPU memory,for example, a GTX280 with 1GB memory has been tested with systems",
 +	"of up to about 100,000 atoms.[PAR]",
 +	" * In order to take a full advantage of the GPU platform features, many algorithms",
 +	"have been implemented in a very different way than they are on the CPUs.",
 +	"Therefore numercal correspondence between properties of the state of",
 +	"simulated systems should not be expected. Moreover, the values will likely vary",
 +	"when simulations are done on different GPU hardware.[PAR]",
 +	" * Frequent retrieval of system state information such as",
 +	"trajectory coordinates and energies can greatly influence the performance",
 +	"of the program due to slow CPU<->GPU memory transfer speed.[PAR]",
 +	" * MD algorithms are complex, and although the Gromacs code is highly tuned for them,",
 +	"they often do not translate very well onto the streaming architetures.",
 +	"Realistic expectations about the achievable speed-up from test with GTX280:",
 +	"For small protein systems in implicit solvent using all-vs-all kernels the acceleration",
 +	"can be as high as 20 times, but in most other setups involving cutoffs and PME the",
 +	"acceleration is usually only ~4 times relative to a 3GHz CPU.[PAR]",
 +	"Supported features:[PAR]",
 +	" * Integrators: md/md-vv/md-vv-avek, sd/sd1 and bd.\n",
 +	" * Long-range interactions (option coulombtype): Reaction-Field, Ewald, PME, and cut-off (for Implicit Solvent only)\n",
 +	" * Temperature control: Supported only with the md/md-vv/md-vv-avek, sd/sd1 and bd integrators.\n",
 +	" * Pressure control: Supported.\n",
 +	" * Implicit solvent: Supported.\n",
 +	"A detailed description can be found on the GROMACS website:\n",
 +	"http://www.gromacs.org/gpu[PAR]",
 +/* From the original mdrun documentaion */
 +    "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
 +    "and distributes the topology over nodes if needed.",
 +    "[TT]mdrun[tt] produces at least four output files.",
 +    "A single log file ([TT]-g[tt]) is written, unless the option",
 +    "[TT]-seppot[tt] is used, in which case each node writes a log file.",
 +    "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
 +    "optionally forces.",
 +    "The structure file ([TT]-c[tt]) contains the coordinates and",
 +    "velocities of the last step.",
 +    "The energy file ([TT]-e[tt]) contains energies, the temperature,",
 +    "pressure, etc, a lot of these things are also printed in the log file.",
 +    "Optionally coordinates can be written to a compressed trajectory file",
 +    "([TT]-x[tt]).[PAR]",
 +/* openmm specific information */
 +	"Usage with OpenMM:[BR]",
 +	"[TT]mdrun -device \"OpenMM:platform=Cuda,memtest=15,deviceid=0,force-device=no\"[tt][PAR]",
 +	"Options:[PAR]",
 +	"      [TT]platform[tt] = Cuda\t\t:\tThe only available value. OpenCL support will be available in future.\n",
 +	"      [TT]memtest[tt] = 15\t\t:\tRun a partial, random GPU memory test for the given amount of seconds. A full test",
 +	"(recommended!) can be run with \"memtest=full\". Memory testing can be disabled with \"memtest=off\".\n",
 +	"      [TT]deviceid[tt] = 0\t\t:\tSpecify the target device when multiple cards are present.",
 +	"Only one card can be used at any given time though.\n",
 +	"      [TT]force-device[tt] = no\t\t:\tIf set to \"yes\" [TT]mdrun[tt]  will be forced to execute on",
 +	"hardware that is not officially supported. GPU acceleration can also be achieved on older",
 +	"but Cuda capable cards, although the simulation might be too slow, and the memory limits too strict.",
 +#else
 +    "The [TT]mdrun[tt] program is the main computational chemistry engine",
 +    "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
 +    "but it can also perform Stochastic Dynamics, Energy Minimization,",
 +    "test particle insertion or (re)calculation of energies.",
 +    "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
 +    "builds a Hessian matrix from single conformation.",
 +    "For usual Normal Modes-like calculations, make sure that",
 +    "the structure provided is properly energy-minimized.",
 +    "The generated matrix can be diagonalized by [TT]g_nmeig[tt].[PAR]",
 +    "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
 +    "and distributes the topology over nodes if needed.",
 +    "[TT]mdrun[tt] produces at least four output files.",
 +    "A single log file ([TT]-g[tt]) is written, unless the option",
 +    "[TT]-seppot[tt] is used, in which case each node writes a log file.",
 +    "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
 +    "optionally forces.",
 +    "The structure file ([TT]-c[tt]) contains the coordinates and",
 +    "velocities of the last step.",
 +    "The energy file ([TT]-e[tt]) contains energies, the temperature,",
 +    "pressure, etc, a lot of these things are also printed in the log file.",
 +    "Optionally coordinates can be written to a compressed trajectory file",
 +    "([TT]-x[tt]).[PAR]",
 +    "The option [TT]-dhdl[tt] is only used when free energy calculation is",
 +    "turned on.[PAR]",
 +    "When [TT]mdrun[tt] is started using MPI with more than 1 node, parallelization",
 +    "is used. By default domain decomposition is used, unless the [TT]-pd[tt]",
 +    "option is set, which selects particle decomposition.[PAR]",
 +    "With domain decomposition, the spatial decomposition can be set",
 +    "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.",
 +    "The user only needs to change this when the system is very inhomogeneous.",
 +    "Dynamic load balancing is set with the option [TT]-dlb[tt],",
 +    "which can give a significant performance improvement,",
 +    "especially for inhomogeneous systems. The only disadvantage of",
 +    "dynamic load balancing is that runs are no longer binary reproducible,",
 +    "but in most cases this is not important.",
 +    "By default the dynamic load balancing is automatically turned on",
 +    "when the measured performance loss due to load imbalance is 5% or more.",
 +    "At low parallelization these are the only important options",
 +    "for domain decomposition.",
 +    "At high parallelization the options in the next two sections",
 +    "could be important for increasing the performace.",
 +    "[PAR]",
 +    "When PME is used with domain decomposition, separate nodes can",
 +    "be assigned to do only the PME mesh calculation;",
 +    "this is computationally more efficient starting at about 12 nodes.",
 +    "The number of PME nodes is set with option [TT]-npme[tt],",
 +    "this can not be more than half of the nodes.",
 +    "By default [TT]mdrun[tt] makes a guess for the number of PME",
 +    "nodes when the number of nodes is larger than 11 or performance wise",
 +    "not compatible with the PME grid x dimension.",
 +    "But the user should optimize npme. Performance statistics on this issue",
 +    "are written at the end of the log file.",
 +    "For good load balancing at high parallelization, the PME grid x and y",
 +    "dimensions should be divisible by the number of PME nodes",
 +    "(the simulation will run correctly also when this is not the case).",
 +    "[PAR]",
 +    "This section lists all options that affect the domain decomposition.",
 +    "[PAR]",
 +    "Option [TT]-rdd[tt] can be used to set the required maximum distance",
 +    "for inter charge-group bonded interactions.",
 +    "Communication for two-body bonded interactions below the non-bonded",
 +    "cut-off distance always comes for free with the non-bonded communication.",
 +    "Atoms beyond the non-bonded cut-off are only communicated when they have",
 +    "missing bonded interactions; this means that the extra cost is minor",
 +    "and nearly indepedent of the value of [TT]-rdd[tt].",
 +    "With dynamic load balancing option [TT]-rdd[tt] also sets",
 +    "the lower limit for the domain decomposition cell sizes.",
 +    "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on",
 +    "the initial coordinates. The chosen value will be a balance",
 +    "between interaction range and communication cost.",
 +    "[PAR]",
 +    "When inter charge-group bonded interactions are beyond",
 +    "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.",
 +    "For pair interactions and tabulated bonds",
 +    "that do not generate exclusions, this check can be turned off",
 +    "with the option [TT]-noddcheck[tt].",
 +    "[PAR]",
 +    "When constraints are present, option [TT]-rcon[tt] influences",
 +    "the cell size limit as well.",
 +    "Atoms connected by NC constraints, where NC is the LINCS order plus 1,",
 +    "should not be beyond the smallest cell size. A error message is",
 +    "generated when this happens and the user should change the decomposition",
 +    "or decrease the LINCS order and increase the number of LINCS iterations.",
 +    "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS",
 +    "in a conservative fashion. For high parallelization it can be useful",
 +    "to set the distance required for P-LINCS with the option [TT]-rcon[tt].",
 +    "[PAR]",
 +    "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling",
 +    "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that",
 +    "the cells can scale down by at least this factor. This option is used",
 +    "for the automated spatial decomposition (when not using [TT]-dd[tt])",
 +    "as well as for determining the number of grid pulses, which in turn",
 +    "sets the minimum allowed cell size. Under certain circumstances",
 +    "the value of [TT]-dds[tt] might need to be adjusted to account for",
 +    "high or low spatial inhomogeneity of the system.",
 +    "[PAR]",
 +    "The option [TT]-gcom[tt] can be used to only do global communication",
 +    "every n steps.",
 +    "This can improve performance for highly parallel simulations",
 +    "where this global communication step becomes the bottleneck.",
 +    "For a global thermostat and/or barostat the temperature",
 +    "and/or pressure will also only be updated every [TT]-gcom[tt] steps.",
 +    "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]",
 +    "With [TT]-rerun[tt] an input trajectory can be given for which ",
 +    "forces and energies will be (re)calculated. Neighbor searching will be",
 +    "performed for every frame, unless [TT]nstlist[tt] is zero",
 +    "(see the [TT].mdp[tt] file).[PAR]",
 +    "ED (essential dynamics) sampling is switched on by using the [TT]-ei[tt]",
 +    "flag followed by an [TT].edi[tt] file.",
 +    "The [TT].edi[tt] file can be produced using options in the essdyn",
 +    "menu of the WHAT IF program. [TT]mdrun[tt] produces a [TT].edo[tt] file that",
 +    "contains projections of positions, velocities and forces onto selected",
 +    "eigenvectors.[PAR]",
 +    "When user-defined potential functions have been selected in the",
 +    "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
 +    "a formatted table with potential functions. The file is read from",
 +    "either the current directory or from the [TT]GMXLIB[tt] directory.",
 +    "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
 +    "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
 +    "normal Coulomb.",
 +    "When pair interactions are present, a separate table for pair interaction",
 +    "functions is read using the [TT]-tablep[tt] option.[PAR]",
 +    "When tabulated bonded functions are present in the topology,",
 +    "interaction functions are read using the [TT]-tableb[tt] option.",
 +    "For each different tabulated interaction type the table file name is",
 +    "modified in a different way: before the file extension an underscore is",
 +    "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals",
 +    "and finally the table number of the interaction type.[PAR]",
 +    "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
 +    "coordinates and forces when pulling is selected",
 +    "in the [TT].mdp[tt] file.[PAR]",
 +    "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ",
 +    "simulated in parallel.",
 +    "As many input files/directories are required as the number of systems. ",
 +    "The [TT]-multidir[tt] option takes a list of directories (one for each ",
 +    "system) and runs in each of them, using the input/output file names, ",
 +    "such as specified by e.g. the [TT]-s[tt] option, relative to these ",
 +    "directories.",
 +    "With [TT]-multi[tt], the system number is appended to the run input ",
 +    "and each output filename, for instance [TT]topol.tpr[tt] becomes",
 +    "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.",
 +    "The number of nodes per system is the total number of nodes",
 +    "divided by the number of systems.",
 +    "One use of this option is for NMR refinement: when distance",
 +    "or orientation restraints are present these can be ensemble averaged",
 +    "over all the systems.[PAR]",
 +    "With [TT]-replex[tt] replica exchange is attempted every given number",
 +    "of steps. The number of replicas is set with the [TT]-multi[tt] or ",
 +    "[TT]-multidir[tt] option, described above.",
 +    "All run input files should use a different coupling temperature,",
 +    "the order of the files is not important. The random seed is set with",
 +    "[TT]-reseed[tt]. The velocities are scaled and neighbor searching",
 +    "is performed after every exchange.[PAR]",
 +    "Finally some experimental algorithms can be tested when the",
 +    "appropriate options have been given. Currently under",
 +    "investigation are: polarizability and X-ray bombardments.",
 +    "[PAR]",
 +    "The option [TT]-membed[dd] does what used to be g_membed, i.e. embed",
 +    "a protein into a membrane. The data file should contain the options",
 +    "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]",
 +    "both apply to this as well.",
 +    "[PAR]",
 +    "The option [TT]-pforce[tt] is useful when you suspect a simulation",
 +    "crashes due to too large forces. With this option coordinates and",
 +    "forces of atoms with a force larger than a certain value will",
 +    "be printed to stderr.",
 +    "[PAR]",
 +    "Checkpoints containing the complete state of the system are written",
 +    "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
 +    "unless option [TT]-cpt[tt] is set to -1.",
 +    "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
 +    "make sure that a recent state of the system is always available,",
 +    "even when the simulation is terminated while writing a checkpoint.",
 +    "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
 +    "with the step number.",
 +    "A simulation can be continued by reading the full state from file",
 +    "with option [TT]-cpi[tt]. This option is intelligent in the way that",
 +    "if no checkpoint file is found, Gromacs just assumes a normal run and",
 +    "starts from the first step of the [TT].tpr[tt] file. By default the output",
 +    "will be appending to the existing output files. The checkpoint file",
 +    "contains checksums of all output files, such that you will never",
 +    "loose data when some output files are modified, corrupt or removed.",
 +    "There are three scenarios with [TT]-cpi[tt]:[PAR]",
 +    "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
 +    "[TT]*[tt] all files are present with names and checksums matching those stored",
 +    "in the checkpoint file: files are appended[PAR]",
 +    "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
 +    "With [TT]-noappend[tt] new output files are opened and the simulation",
 +    "part number is added to all output file names.",
 +    "Note that in all cases the checkpoint file itself is not renamed",
 +    "and will be overwritten, unless its name does not match",
 +    "the [TT]-cpo[tt] option.",
 +    "[PAR]",
 +    "With checkpointing the output is appended to previously written",
 +    "output files, unless [TT]-noappend[tt] is used or none of the previous",
 +    "output files are present (except for the checkpoint file).",
 +    "The integrity of the files to be appended is verified using checksums",
 +    "which are stored in the checkpoint file. This ensures that output can",
 +    "not be mixed up or corrupted due to file appending. When only some",
 +    "of the previous output files are present, a fatal error is generated",
 +    "and no old output files are modified and no new output files are opened.",
 +    "The result with appending will be the same as from a single run.",
 +    "The contents will be binary identical, unless you use a different number",
 +    "of nodes or dynamic load balancing or the FFT library uses optimizations",
 +    "through timing.",
 +    "[PAR]",
 +    "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
 +    "file is written at the first neighbor search step where the run time",
 +    "exceeds [TT]-maxh[tt]*0.99 hours.",
 +    "[PAR]",
 +    "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
 +    "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
 +    "pressed), it will stop after the next neighbor search step ",
 +    "(with nstlist=0 at the next step).",
 +    "In both cases all the usual output will be written to file.",
 +    "When running with MPI, a signal to one of the [TT]mdrun[tt] processes",
 +    "is sufficient, this signal should not be sent to mpirun or",
 +    "the [TT]mdrun[tt] process that is the parent of the others.",
 +    "[PAR]",
 +    "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
 +#endif
 +  };
 +  t_commrec    *cr;
 +  t_filenm fnm[] = {
 +    { efTPX, NULL,      NULL,       ffREAD },
 +    { efTRN, "-o",      NULL,       ffWRITE },
 +    { efXTC, "-x",      NULL,       ffOPTWR },
 +    { efCPT, "-cpi",    NULL,       ffOPTRD },
 +    { efCPT, "-cpo",    NULL,       ffOPTWR },
 +    { efSTO, "-c",      "confout",  ffWRITE },
 +    { efEDR, "-e",      "ener",     ffWRITE },
 +    { efLOG, "-g",      "md",       ffWRITE },
 +    { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
 +    { efXVG, "-field",  "field",    ffOPTWR },
 +    { efXVG, "-table",  "table",    ffOPTRD },
 +    { efXVG, "-tablep", "tablep",   ffOPTRD },
 +    { efXVG, "-tableb", "table",    ffOPTRD },
 +    { efTRX, "-rerun",  "rerun",    ffOPTRD },
 +    { efXVG, "-tpi",    "tpi",      ffOPTWR },
 +    { efXVG, "-tpid",   "tpidist",  ffOPTWR },
 +    { efEDI, "-ei",     "sam",      ffOPTRD },
 +    { efEDO, "-eo",     "sam",      ffOPTWR },
 +    { efGCT, "-j",      "wham",     ffOPTRD },
 +    { efGCT, "-jo",     "bam",      ffOPTWR },
 +    { efXVG, "-ffout",  "gct",      ffOPTWR },
 +    { efXVG, "-devout", "deviatie", ffOPTWR },
 +    { efXVG, "-runav",  "runaver",  ffOPTWR },
 +    { efXVG, "-px",     "pullx",    ffOPTWR },
 +    { efXVG, "-pf",     "pullf",    ffOPTWR },
 +    { efXVG, "-ro",     "rotation", ffOPTWR },
 +    { efLOG, "-ra",     "rotangles",ffOPTWR },
 +    { efLOG, "-rs",     "rotslabs", ffOPTWR },
 +    { efLOG, "-rt",     "rottorque",ffOPTWR },
 +    { efMTX, "-mtx",    "nm",       ffOPTWR },
 +    { efNDX, "-dn",     "dipole",   ffOPTWR },
 +    { efDAT, "-membed", "membed",   ffOPTRD },
 +    { efTOP, "-mp",     "membed",   ffOPTRD },
 +    { efNDX, "-mn",     "membed",   ffOPTRD },
 +    { efRND, "-multidir",NULL,      ffOPTRDMULT}
 +  };
 +#define NFILE asize(fnm)
 +
 +  /* Command line options ! */
 +  gmx_bool bCart        = FALSE;
 +  gmx_bool bPPPME       = FALSE;
 +  gmx_bool bPartDec     = FALSE;
 +  gmx_bool bDDBondCheck = TRUE;
 +  gmx_bool bDDBondComm  = TRUE;
 +  gmx_bool bVerbose     = FALSE;
 +  gmx_bool bCompact     = TRUE;
 +  gmx_bool bSepPot      = FALSE;
 +  gmx_bool bRerunVSite  = FALSE;
 +  gmx_bool bIonize      = FALSE;
 +  gmx_bool bConfout     = TRUE;
 +  gmx_bool bReproducible = FALSE;
 +    
 +  int  npme=-1;
 +  int  nmultisim=0;
 +  int  nstglobalcomm=-1;
 +  int  repl_ex_nst=0;
 +  int  repl_ex_seed=-1;
 +  int  nstepout=100;
 +  int  nthreads=0; /* set to determine # of threads automatically */
 +  int  resetstep=-1;
 +  
 +  rvec realddxyz={0,0,0};
 +  const char *ddno_opt[ddnoNR+1] =
 +    { NULL, "interleave", "pp_pme", "cartesian", NULL };
 +    const char *dddlb_opt[] =
 +    { NULL, "auto", "no", "yes", NULL };
 +  real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
 +  char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
 +  real cpt_period=15.0,max_hours=-1;
 +  gmx_bool bAppendFiles=TRUE;
 +  gmx_bool bKeepAndNumCPT=FALSE;
 +  gmx_bool bResetCountersHalfWay=FALSE;
 +  output_env_t oenv=NULL;
 +  const char *deviceOptions = "";
 +
 +  t_pargs pa[] = {
 +
 +    { "-pd",      FALSE, etBOOL,{&bPartDec},
 +      "Use particle decompostion" },
 +    { "-dd",      FALSE, etRVEC,{&realddxyz},
 +      "Domain decomposition grid, 0 is optimize" },
 +#ifdef GMX_THREADS
 +    { "-nt",      FALSE, etINT, {&nthreads},
 +      "Number of threads to start (0 is guess)" },
 +#endif
 +    { "-npme",    FALSE, etINT, {&npme},
 +      "Number of separate nodes to be used for PME, -1 is guess" },
 +    { "-ddorder", FALSE, etENUM, {ddno_opt},
 +      "DD node order" },
 +    { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
 +      "Check for all bonded interactions with DD" },
 +    { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
 +      "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
 +    { "-rdd",     FALSE, etREAL, {&rdd},
 +      "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
 +    { "-rcon",    FALSE, etREAL, {&rconstr},
 +      "Maximum distance for P-LINCS (nm), 0 is estimate" },
 +    { "-dlb",     FALSE, etENUM, {dddlb_opt},
 +      "Dynamic load balancing (with DD)" },
 +    { "-dds",     FALSE, etREAL, {&dlb_scale},
 +      "Minimum allowed dlb scaling of the DD cell size" },
 +    { "-ddcsx",   FALSE, etSTR, {&ddcsx},
 +      "HIDDENThe DD cell sizes in x" },
 +    { "-ddcsy",   FALSE, etSTR, {&ddcsy},
 +      "HIDDENThe DD cell sizes in y" },
 +    { "-ddcsz",   FALSE, etSTR, {&ddcsz},
 +      "HIDDENThe DD cell sizes in z" },
 +    { "-gcom",    FALSE, etINT,{&nstglobalcomm},
 +      "Global communication frequency" },
 +    { "-v",       FALSE, etBOOL,{&bVerbose},  
 +      "Be loud and noisy" },
 +    { "-compact", FALSE, etBOOL,{&bCompact},  
 +      "Write a compact log file" },
 +    { "-seppot",  FALSE, etBOOL, {&bSepPot},
 +      "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
 +    { "-pforce",  FALSE, etREAL, {&pforce},
 +      "Print all forces larger than this (kJ/mol nm)" },
 +    { "-reprod",  FALSE, etBOOL,{&bReproducible},  
 +      "Try to avoid optimizations that affect binary reproducibility" },
 +    { "-cpt",     FALSE, etREAL, {&cpt_period},
 +      "Checkpoint interval (minutes)" },
 +    { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
 +      "Keep and number checkpoint files" },
 +    { "-append",  FALSE, etBOOL, {&bAppendFiles},
 +      "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
 +    { "-maxh",   FALSE, etREAL, {&max_hours},
 +      "Terminate after 0.99 times this time (hours)" },
 +    { "-multi",   FALSE, etINT,{&nmultisim}, 
 +      "Do multiple simulations in parallel" },
 +    { "-replex",  FALSE, etINT, {&repl_ex_nst}, 
 +      "Attempt replica exchange every # steps" },
 +    { "-reseed",  FALSE, etINT, {&repl_ex_seed}, 
 +      "Seed for replica exchange, -1 is generate a seed" },
 +    { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
 +      "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
 +    { "-ionize",  FALSE, etBOOL,{&bIonize},
 +      "Do a simulation including the effect of an X-Ray bombardment on your system" },
 +    { "-confout", FALSE, etBOOL, {&bConfout},
 +      "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
 +    { "-stepout", FALSE, etINT, {&nstepout},
 +      "HIDDENFrequency of writing the remaining runtime" },
 +    { "-resetstep", FALSE, etINT, {&resetstep},
 +      "HIDDENReset cycle counters after these many time steps" },
 +    { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
 +      "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
 +#ifdef GMX_OPENMM
 +    ,
 +    { "-device",  FALSE, etSTR, {&deviceOptions},
 +      "Device option string" }
 +#endif
 +  };
 +  gmx_edsam_t  ed;
 +  unsigned long Flags, PCA_Flags;
 +  ivec     ddxyz;
 +  int      dd_node_order;
 +  gmx_bool     bAddPart;
 +  FILE     *fplog,*fptest;
 +  int      sim_part,sim_part_fn;
 +  const char *part_suffix=".part";
 +  char     suffix[STRLEN];
 +  int      rc;
 +  char **multidir=NULL;
 +
 +
 +  cr = init_par(&argc,&argv);
 +
 +  if (MASTER(cr))
 +    CopyRight(stderr, argv[0]);
 +
 +  PCA_Flags = (PCA_KEEP_ARGS | PCA_NOEXIT_ON_ARGS | PCA_CAN_SET_DEFFNM
 +	       | (MASTER(cr) ? 0 : PCA_QUIET));
 +  
 +
 +  /* Comment this in to do fexist calls only on master
 +   * works not with rerun or tables at the moment
 +   * also comment out the version of init_forcerec in md.c 
 +   * with NULL instead of opt2fn
 +   */
 +  /*
 +     if (!MASTER(cr))
 +     {
 +     PCA_Flags |= PCA_NOT_READ_NODE;
 +     }
 +     */
 +
 +  parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
 +                    asize(desc),desc,0,NULL, &oenv);
 +
 +
 +
 +  /* we set these early because they might be used in init_multisystem() 
 +     Note that there is the potential for npme>nnodes until the number of
 +     threads is set later on, if there's thread parallelization. That shouldn't
 +     lead to problems. */ 
 +  dd_node_order = nenum(ddno_opt);
 +  cr->npmenodes = npme;
 +
 +#ifndef GMX_THREADS
 +  nthreads=1;
 +#endif
 +
 +  /* now check the -multi and -multidir option */
 +  if (opt2bSet("-multidir", NFILE, fnm))
 +  {
 +      int i;
 +      if (nmultisim > 0)
 +      {
 +          gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
 +      }
 +      nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
 +  }
 +
 +
 +  if (repl_ex_nst != 0 && nmultisim < 2)
 +      gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
 +
 +  if (nmultisim > 1) {
 +#ifndef GMX_THREADS
 +    gmx_bool bParFn = (multidir == NULL);
 +    init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
 +#else
 +    gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
 +#endif
 +  }
 +
 +  bAddPart = !bAppendFiles;
 +
 +  /* Check if there is ANY checkpoint file available */	
 +  sim_part    = 1;
 +  sim_part_fn = sim_part;
 +  if (opt2bSet("-cpi",NFILE,fnm))
 +  {
 +      if (bSepPot && bAppendFiles)
 +      {
 +          gmx_fatal(FARGS,"Output file appending is not supported with -seppot");
 +      }
 +
 +      bAppendFiles =
 +                read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
 +                                                              fnm,cr),
 +                                                &sim_part_fn,NULL,cr,
 +                                                bAppendFiles,NFILE,fnm,
 +                                                part_suffix,&bAddPart);
 +      if (sim_part_fn==0 && MASTER(cr))
 +      {
 +          fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
 +      }
 +      else
 +      {
 +          sim_part = sim_part_fn + 1;
 +      }
++
++      if (MULTISIM(cr) && MASTER(cr))
++      {
++          check_multi_int(stdout,cr->ms,sim_part,"simulation part");
++      }
 +  } 
 +  else
 +  {
 +      bAppendFiles = FALSE;
 +  }
 +
 +  if (!bAppendFiles)
 +  {
 +      sim_part_fn = sim_part;
 +  }
 +
 +  if (bAddPart)
 +  {
 +      /* Rename all output files (except checkpoint files) */
 +      /* create new part name first (zero-filled) */
 +      sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
 +
 +      add_suffix_to_output_names(fnm,NFILE,suffix);
 +      if (MASTER(cr))
 +      {
 +          fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
 +      }
 +  }
 +
 +  Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
 +  Flags = Flags | (bSepPot       ? MD_SEPPOT       : 0);
 +  Flags = Flags | (bIonize       ? MD_IONIZE       : 0);
 +  Flags = Flags | (bPartDec      ? MD_PARTDEC      : 0);
 +  Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
 +  Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
 +  Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
 +  Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
 +  Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
 +  Flags = Flags | (bAppendFiles  ? MD_APPENDFILES  : 0); 
 +  Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); 
 +  Flags = Flags | (sim_part>1    ? MD_STARTFROMCPT : 0); 
 +  Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
 +
 +
 +  /* We postpone opening the log file if we are appending, so we can 
 +     first truncate the old log file and append to the correct position 
 +     there instead.  */
 +  if ((MASTER(cr) || bSepPot) && !bAppendFiles) 
 +  {
 +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
 +      CopyRight(fplog,argv[0]);
 +      please_cite(fplog,"Hess2008b");
 +      please_cite(fplog,"Spoel2005a");
 +      please_cite(fplog,"Lindahl2001a");
 +      please_cite(fplog,"Berendsen95a");
 +  }
 +  else if (!MASTER(cr) && bSepPot)
 +  {
 +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
 +  }
 +  else
 +  {
 +      fplog = NULL;
 +  }
 +
 +  ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
 +  ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
 +  ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
 +
 +  rc = mdrunner(nthreads, fplog,cr,NFILE,fnm,oenv,bVerbose,bCompact,
 +                nstglobalcomm, ddxyz,dd_node_order,rdd,rconstr,
 +                dddlb_opt[0],dlb_scale,ddcsx,ddcsy,ddcsz,
 +                nstepout,resetstep,nmultisim,repl_ex_nst,repl_ex_seed,
 +                pforce, cpt_period,max_hours,deviceOptions,Flags);
 +
 +  if (gmx_parallel_env_initialized())
 +      gmx_finalize();
 +
 +  if (MULTIMASTER(cr)) {
 +      thanx(stderr);
 +  }
 +
 +  /* Log file has to be closed in mdrunner if we are appending to it 
 +     (fplog not set here) */
 +  if (MASTER(cr) && !bAppendFiles) 
 +  {
 +      gmx_log_close(fplog);
 +  }
 +
 +  return rc;
 +}
 +
diff --cc src/tools/CMakeLists.txt
index 84fa98ed9c,81d4f63449..99a46d5a9b
--- a/src/tools/CMakeLists.txt
+++ b/src/tools/CMakeLists.txt
@@@ -19,7 -20,7 +19,7 @@@ add_library(gmxan
              gmx_polystat.c  gmx_potential.c gmx_rama.c      
              gmx_rdf.c       gmx_rms.c       gmx_rmsf.c      
              gmx_rotacf.c    gmx_saltbr.c    gmx_sas.c              
-             gmx_rmsdist.c	gmx_rotmat.c
 -            gmx_select.c    gmx_rmsdist.c   gmx_rotmat.c
++            gmx_rmsdist.c   gmx_rotmat.c
              gmx_sgangle.c   gmx_sorient.c   gmx_spol.c      gmx_tcaf.c      
              gmx_traj.c      gmx_velacc.c    gmx_helixorient.c 
              gmx_clustsize.c gmx_mdmat.c     gmx_wham.c      
@@@ -28,7 -29,9 +28,9 @@@
              gmx_editconf.c  gmx_genbox.c    gmx_genion.c    gmx_genconf.c   
              gmx_genpr.c     gmx_eneconv.c   gmx_vanhove.c   gmx_wheel.c     
              addconf.c       calcpot.c       edittop.c       gmx_bar.c
-             gmx_pme_error.c gmx_options.c	
 -            gmx_membed.c    gmx_pme_error.c gmx_options.c   gmx_dos.c
++            gmx_pme_error.c gmx_options.c   gmx_dos.c
+             gmx_hydorder.c  gmx_densorder.c powerspect.c    dens_filter.c
+             binsearch.c
              )
  
  
@@@ -48,10 -51,11 +50,11 @@@ set(GMX_TOOLS_PROGRAM
      g_dyndom g_enemat g_energy g_lie g_filter g_gyrate
      g_h2order g_hbond g_helix g_mindist g_msd g_morph g_nmeig
      g_nmens g_order g_kinetics g_polystat g_potential g_rama g_rdf g_rms
 -    g_rmsf g_rotacf g_saltbr g_sas g_select g_sgangle g_sham g_sorient
 +    g_rmsf g_rotacf g_saltbr g_sas g_sgangle g_sham g_sorient
      g_spol g_spatial g_tcaf g_traj g_tune_pme g_vanhove
      g_velacc g_clustsize g_mdmat g_wham g_sigeps g_bar
 -    g_membed g_pme_error g_rmsdist g_rotmat g_options
 +    g_pme_error g_rmsdist g_rotmat g_options
+     g_dos    g_hydorder  g_densorder
      )
  
  set(GMX_TOOLS_PROGRAMS_NOT_FOR_INSTALLATION