Merge release-4-6 into master
authorRoland Schulz <roland@utk.edu>
Sun, 30 Sep 2012 23:24:42 +0000 (19:24 -0400)
committerRoland Schulz <roland@utk.edu>
Mon, 1 Oct 2012 18:20:39 +0000 (14:20 -0400)
Conflicts:
cmake/ThreadMPI.cmake (resolved)
src/tools/CMakeLists.txt (resolved)
src/gmxlib/CMakeLists.txt
src/kernel/CMakeLists.txt
src/mdlib/CMakeLists.txt
Last three applied to src/gromacs/CMakeLists.txt and
src/programs/mdrun/CMakeLists.txt.

The release-4-6 version is not the most recent. Merge is being done in
several steps.

Change-Id: Ia9ec3c132a6306701866867aa1a4314541203002

42 files changed:
1  2 
CMakeLists.txt
cmake/ThreadMPI.cmake
cmake/gmxDetectAcceleration.cmake
src/gromacs/CMakeLists.txt
src/gromacs/gmxlib/gmx_fatal.c
src/gromacs/gmxlib/main.c
src/gromacs/gmxlib/thread_mpi/CMakeLists.txt
src/gromacs/gmxlib/thread_mpi/impl.h
src/gromacs/gmxlib/thread_mpi/profile.c
src/gromacs/gmxlib/thread_mpi/profile.h
src/gromacs/gmxlib/thread_mpi/pthreads.c
src/gromacs/gmxlib/thread_mpi/reduce.c
src/gromacs/gmxlib/thread_mpi/scan.c
src/gromacs/gmxpreprocess/gen_ad.c
src/gromacs/gmxpreprocess/pdb2top.c
src/gromacs/gmxpreprocess/readadress.c
src/gromacs/gmxpreprocess/readir.c
src/gromacs/gmxpreprocess/readpull.c
src/gromacs/gmxpreprocess/resall.c
src/gromacs/gmxpreprocess/vsite_parm.c
src/gromacs/legacyheaders/edsam.h
src/gromacs/legacyheaders/gen_ad.h
src/gromacs/legacyheaders/hackblock.h
src/gromacs/legacyheaders/thread_mpi/mpi_bindings.h
src/gromacs/legacyheaders/thread_mpi/tmpi.h
src/gromacs/mdlib/domdec.c
src/gromacs/mdlib/edsam.c
src/gromacs/mdlib/groupcoord.c
src/gromacs/mdlib/minimize.c
src/gromacs/mdlib/pme.c
src/gromacs/mdlib/sim_util.c
src/gromacs/mdlib/tpi.c
src/programs/g_x2top/g_x2top.c
src/programs/gmxcheck/gmxcheck.c
src/programs/grompp/grompp.c
src/programs/mdrun/CMakeLists.txt
src/programs/mdrun/md.c
src/tools/CMakeLists.txt
src/tools/gmx_cluster.c
src/tools/gmx_covar.c
src/tools/gmx_trjconv.c
src/tools/make_edi.c

diff --cc CMakeLists.txt
Simple merge
index 3b99974a722e6349b114ee3f7c87608f3185bb18,68a44f38a95423a992a4b8770971020d93a52912..51fb69668367faa5aaccbc715425948e00617dd1
@@@ -23,45 -23,11 +23,46 @@@ ENDMACRO(TEST_TMPI_ATOMICS VARIABLE
  
  MACRO(TMPI_MAKE_CXX_LIB)
      set(TMPI_CXX_LIB 1)
 -    # the C++ library
 -    set(THREAD_MPI_CXX_SRC
 -        thread_mpi/system_error.cpp )
  ENDMACRO(TMPI_MAKE_CXX_LIB)
  
-              thread_mpi/numa_malloc.c   thread_mpi/once.c )
-     endif ()
 +MACRO(TMPI_GET_SOURCE_LIST SRC_VARIABLE)
 +    foreach (_option IN ITEMS ${ARGN})
 +        if (_option STREQUAL "CXX")
 +            set(TMPI_CXX_LIB 1)
 +        elseif (_option STREQUAL "NOMPI")
 +            set(TMPI_NO_MPI_LIB 1)
 +        else ()
 +            message(FATAL_ERROR "Unknown thread_mpi option '${_option}'")
 +        endif ()
 +    endforeach ()
 +    set(${SRC_VARIABLE}
 +        thread_mpi/errhandler.c
 +        thread_mpi/tmpi_malloc.c)
 +    if (THREAD_PTHREADS)
 +        list(APPEND ${SRC_VARIABLE} thread_mpi/pthreads.c)
 +    elseif (THREAD_WINDOWS)
 +        list(APPEND ${SRC_VARIABLE} thread_mpi/winthreads.c)
 +    endif (THREAD_PTHREADS)
 +    if (TMPI_CXX_LIB)
 +        list(APPEND ${SRC_VARIABLE} thread_mpi/system_error.cpp)
 +    endif (TMPI_CXX_LIB)
 +    if (NOT TMPI_NO_MPI_LIB)
 +        list(APPEND ${SRC_VARIABLE}
 +             thread_mpi/alltoall.c      thread_mpi/p2p_protocol.c
 +             thread_mpi/barrier.c       thread_mpi/p2p_send_recv.c
 +             thread_mpi/bcast.c         thread_mpi/p2p_wait.c
 +             thread_mpi/collective.c    thread_mpi/profile.c
 +             thread_mpi/comm.c          thread_mpi/reduce.c
 +             thread_mpi/event.c         thread_mpi/reduce_fast.c
 +             thread_mpi/gather.c        thread_mpi/scatter.c
 +             thread_mpi/group.c         thread_mpi/tmpi_init.c
 +             thread_mpi/topology.c      thread_mpi/list.c
 +             thread_mpi/type.c          thread_mpi/lock.c
++             thread_mpi/numa_malloc.c   thread_mpi/once.c
++             thread_mpi/scan.c)
++    endif()
 +ENDMACRO(TMPI_GET_SOURCE_LIST)
 +
  include(FindThreads)
  if (CMAKE_USE_PTHREADS_INIT)
      check_include_files(pthread.h    HAVE_PTHREAD_H)
Simple merge
index 2f473f7f41ebfc5cb5b34ad3da04a3f445c2f2f3,0000000000000000000000000000000000000000..5f211fa6553de7744f969955e759a48af0abbed7
mode 100644,000000..100644
--- /dev/null
@@@ -1,61 -1,0 +1,62 @@@
-                       ${THREAD_LIB})
 +set(LIBGROMACS_SOURCES)
 +
 +add_subdirectory(legacyheaders)
 +add_subdirectory(gmxlib)
 +add_subdirectory(mdlib)
 +add_subdirectory(gmxpreprocess)
 +add_subdirectory(analysisdata)
 +add_subdirectory(commandline)
 +add_subdirectory(linearalgebra)
 +add_subdirectory(onlinehelp)
 +add_subdirectory(options)
 +add_subdirectory(selection)
 +add_subdirectory(trajectoryanalysis)
 +add_subdirectory(utility)
 +
 +file(GLOB LIBGROMACS_HEADERS *.h)
 +install(FILES ${LIBGROMACS_HEADERS} DESTINATION ${INCL_INSTALL_DIR}/gromacs
 +        COMPONENT development)
 +
 +list(APPEND LIBGROMACS_SOURCES ${GMXLIB_SOURCES} ${MDLIB_SOURCES})
 +
 +# add target that generates version.c every time a make is run
 +# only do this if we generate the version
 +if (USE_VERSION_H)
 +    add_custom_target(gmx_version ALL
 +            COMMAND ${CMAKE_COMMAND} 
 +                -D GIT_EXECUTABLE="${GIT_EXECUTABLE}"
 +                -D GIT_VERSION="${GIT_VERSION}"
 +                -D PROJECT_VERSION="${PROJECT_VERSION}"
 +                -D PROJECT_SOURCE_DIR="${PROJECT_SOURCE_DIR}"
 +                -D VERSION_C_CMAKEIN="${CMAKE_CURRENT_SOURCE_DIR}/version.c.cmakein"
 +                -D VERSION_C_OUT="${CMAKE_CURRENT_BINARY_DIR}/version.c"
 +                -P ${CMAKE_SOURCE_DIR}/cmake/gmxGenerateVersionInfo.cmake 
 +            WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/gmxlib 
 +            DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/version.c.cmakein
 +            COMMENT "Generating version information")
 +    list(APPEND LIBGROMACS_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/version.c) # auto-generated
 +    set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/version.c 
 +                                PROPERTIES GENERATED true)
 +endif (USE_VERSION_H)
 +
 +add_library(libgromacs ${LIBGROMACS_SOURCES})
 +if (USE_VERSION_H)
 +    add_dependencies(libgromacs gmx_version)
 +endif (USE_VERSION_H)
 +target_link_libraries(libgromacs
 +                      ${GMX_EXTRA_LIBRARIES} ${FFT_LIBRARIES} ${XML_LIBRARIES}
-                       INSTALL_NAME_DIR "${LIB_INSTALL_DIR}")
++                      ${THREAD_LIB} ${OpenMP_SHARED_LINKER_FLAGS})
 +set_target_properties(libgromacs PROPERTIES
 +                      OUTPUT_NAME "gromacs${GMX_LIBS_SUFFIX}"
 +                      SOVERSION ${SOVERSION}
++                      INSTALL_NAME_DIR "${LIB_INSTALL_DIR}"
++                      COMPILE_FLAGS "${OpenMP_C_FLAGS}")
 +
 +install(TARGETS libgromacs DESTINATION ${LIB_INSTALL_DIR} COMPONENT libraries)
 +
 +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libgromacs.pc.cmakein
 +               ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc @ONLY)
 +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libgromacs.pc
 +        DESTINATION ${LIB_INSTALL_DIR}/pkgconfig
 +        RENAME "libgromacs${GMX_LIBS_SUFFIX}.pc"
 +        COMPONENT development)
Simple merge
index 06a5a8e5ccd6187a66272d935c0f6cb63cc3feba,0000000000000000000000000000000000000000..fd2a1f5077d278801bad4534cad0db9b8e1b374b
mode 100644,000000..100644
--- /dev/null
@@@ -1,560 -1,0 +1,560 @@@
-     argv = *argv_ptr;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <limits.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "network.h"
 +#include "main.h"
 +#include "macros.h"
 +#include "futil.h"
 +#include "filenm.h"
 +#include "mdrun.h"
 +#include "gmxfio.h"
 +#include "string2.h"
 +
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <process.h>
 +#endif
 +
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +#define BUFSIZE       1024
 +
 +
 +static void par_fn(char *base,int ftp,const t_commrec *cr,
 +                 gmx_bool bAppendSimId,gmx_bool bAppendNodeId,
 +                 char buf[],int bufsize)
 +{
 +  int n;
 +  
 +  if((size_t)bufsize<(strlen(base)+10))
 +     gmx_mem("Character buffer too small!");
 +
 +  /* Copy to buf, and strip extension */
 +  strcpy(buf,base);
 +  buf[strlen(base) - strlen(ftp2ext(fn2ftp(base))) - 1] = '\0';
 +
 +  if (bAppendSimId) {
 +    sprintf(buf+strlen(buf),"%d",cr->ms->sim);
 +  }
 +  if (bAppendNodeId) {
 +    strcat(buf,"_node");
 +    sprintf(buf+strlen(buf),"%d",cr->nodeid);
 +  }
 +  strcat(buf,".");
 +  
 +  /* Add extension again */
 +  strcat(buf,(ftp == efTPX) ? "tpr" : (ftp == efEDR) ? "edr" : ftp2ext(ftp));
 +  if (cr->nodeid == 0) {
 +    printf("node %d par_fn '%s'\n",cr->nodeid,buf);
 +    if (fn2ftp(buf) == efLOG) {
 +      printf("log\n");
 +    }
 +  }
 +}
 +
 +void check_multi_int(FILE *log,const gmx_multisim_t *ms,int val,
 +                     const char *name)
 +{
 +  int  *ibuf,p;
 +  gmx_bool bCompatible;
 +
 +  if (NULL != log)
 +      fprintf(log,"Multi-checking %s ... ",name);
 +  
 +  if (ms == NULL)
 +    gmx_fatal(FARGS,
 +            "check_multi_int called with a NULL communication pointer");
 +
 +  snew(ibuf,ms->nsim);
 +  ibuf[ms->sim] = val;
 +  gmx_sumi_sim(ms->nsim,ibuf,ms);
 +  
 +  bCompatible = TRUE;
 +  for(p=1; p<ms->nsim; p++)
 +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +  
 +  if (bCompatible) 
 +  {
 +      if (NULL != log)
 +          fprintf(log,"OK\n");
 +  }
 +  else 
 +  {
 +      if (NULL != log)
 +      {
 +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
 +          for(p=0; p<ms->nsim; p++)
 +              fprintf(log,"  subsystem %d: %d\n",p,ibuf[p]);
 +      }
 +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
 +  }
 +  
 +  sfree(ibuf);
 +}
 +
 +void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
 +                           gmx_large_int_t val, const char *name)
 +{
 +  gmx_large_int_t  *ibuf;
 +  int p;
 +  gmx_bool bCompatible;
 +
 +  if (NULL != log)
 +      fprintf(log,"Multi-checking %s ... ",name);
 +  
 +  if (ms == NULL)
 +    gmx_fatal(FARGS,
 +            "check_multi_int called with a NULL communication pointer");
 +
 +  snew(ibuf,ms->nsim);
 +  ibuf[ms->sim] = val;
 +  gmx_sumli_sim(ms->nsim,ibuf,ms);
 +  
 +  bCompatible = TRUE;
 +  for(p=1; p<ms->nsim; p++)
 +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +  
 +  if (bCompatible) 
 +  {
 +      if (NULL != log)
 +          fprintf(log,"OK\n");
 +  }
 +  else 
 +  {
 +      if (NULL != log)
 +      {
 +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
 +          for(p=0; p<ms->nsim; p++)
 +          {
 +              char strbuf[255];
 +              /* first make the format string */
 +              snprintf(strbuf, 255, "  subsystem %%d: %s\n", 
 +                       gmx_large_int_pfmt);
 +              fprintf(log,strbuf,p,ibuf[p]);
 +          }
 +      }
 +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
 +  }
 +  
 +  sfree(ibuf);
 +}
 +
 +
 +void gmx_log_open(const char *lognm,const t_commrec *cr,gmx_bool bMasterOnly, 
 +                   unsigned long Flags, FILE** fplog)
 +{
 +    int  len,testlen,pid;
 +    char buf[256],host[256];
 +    time_t t;
 +    char timebuf[STRLEN];
 +    FILE *fp=*fplog;
 +    char *tmpnm;
 +
 +    gmx_bool bAppend = Flags & MD_APPENDFILES;        
 +  
 +    debug_gmx();
 +  
 +    /* Communicate the filename for logfile */
 +    if (cr->nnodes > 1 && !bMasterOnly
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI the non-master log files are opened later
 +         * when the files names are already known on all nodes.
 +         */
 +        && FALSE
 +#endif
 +        )
 +    {
 +        if (MASTER(cr))
 +        {
 +            len = strlen(lognm) + 1;
 +        }
 +        gmx_bcast(sizeof(len),&len,cr);
 +        if (!MASTER(cr))
 +        {
 +            snew(tmpnm,len+8);
 +        }
 +        else
 +        {
 +            tmpnm=gmx_strdup(lognm);
 +        }
 +        gmx_bcast(len*sizeof(*tmpnm),tmpnm,cr);
 +    }
 +    else
 +    {
 +        tmpnm=gmx_strdup(lognm);
 +    }
 +  
 +    debug_gmx();
 +
 +    if (!bMasterOnly && !MASTER(cr))
 +    {
 +        /* Since log always ends with '.log' let's use this info */
 +        par_fn(tmpnm,efLOG,cr,FALSE,!bMasterOnly,buf,255);
 +        fp = gmx_fio_fopen(buf, bAppend ? "a+" : "w+" );
 +    }
 +    else if (!bAppend)
 +    {
 +        fp = gmx_fio_fopen(tmpnm, bAppend ? "a+" : "w+" );
 +    }
 +
 +    sfree(tmpnm);
 +
 +    gmx_fatal_set_log_file(fp);
 +  
 +    /* Get some machine parameters */
 +#ifdef HAVE_UNISTD_H
 +    if (gethostname(host,255) != 0)
 +    {
 +        sprintf(host,"unknown");
 +    }
 +#else
 +    sprintf(host,"unknown");
 +#endif  
 +
 +    time(&t);
 +
 +#ifndef NO_GETPID
 +#   ifdef GMX_NATIVE_WINDOWS
 +    pid = _getpid();
 +#   else
 +    pid = getpid();
 +#   endif
 +#else
 +      pid = 0;
 +#endif
 +
 +    if (bAppend)
 +    {
 +        fprintf(fp,
 +                "\n"
 +                "\n"
 +                "-----------------------------------------------------------\n"
 +                "Restarting from checkpoint, appending to previous log file.\n"
 +                "\n"
 +            );
 +    }
 +      
 +    gmx_ctime_r(&t,timebuf,STRLEN);
 +
 +    fprintf(fp,
 +            "Log file opened on %s"
 +            "Host: %s  pid: %d  nodeid: %d  nnodes:  %d\n",
 +            timebuf,host,pid,cr->nodeid,cr->nnodes);
 +    fprintf(fp,
 +            "Built %s by %s\n"
 +            "Build os/architecture: %s\n"
 +            "Build CPU Vendor: %s  Brand: %s\n"
 +            "Build CPU Family: %d  Model: %d  Stepping: %d\n"
 +            "Build CPU Features: %s\n"
 +            "Compiler: %s\n"
 +            "CFLAGS: %s\n\n",
 +            BUILD_TIME,BUILD_USER,BUILD_HOST,
 +            BUILD_CPU_VENDOR,BUILD_CPU_BRAND,
 +            BUILD_CPU_FAMILY,BUILD_CPU_MODEL,BUILD_CPU_STEPPING,
 +            BUILD_CPU_FEATURES,BUILD_COMPILER,BUILD_CFLAGS);
 +
 +    fflush(fp);
 +    debug_gmx();
 +
 +    *fplog = fp;
 +}
 +
 +void gmx_log_close(FILE *fp)
 +{
 +  if (fp) {
 +    gmx_fatal_set_log_file(NULL);
 +    gmx_fio_fclose(fp);
 +  }
 +}
 +
 +static void comm_args(const t_commrec *cr,int *argc,char ***argv)
 +{
 +  int i,len;
 +  
 +  if (PAR(cr))
 +    gmx_bcast(sizeof(*argc),argc,cr);
 +  
 +  if (!MASTER(cr))
 +    snew(*argv,*argc+1);
 +  fprintf(stderr,"NODEID=%d argc=%d\n",cr->nodeid,*argc);
 +  for(i=0; (i<*argc); i++) {
 +    if (MASTER(cr))
 +      len = strlen((*argv)[i])+1;
 +    gmx_bcast(sizeof(len),&len,cr);
 +    if (!MASTER(cr))
 +      snew((*argv)[i],len);
 +    /*gmx_bcast(len*sizeof((*argv)[i][0]),(*argv)[i],cr);*/
 +    gmx_bcast(len*sizeof(char),(*argv)[i],cr);
 +  }
 +  debug_gmx();
 +}
 +
 +void init_multisystem(t_commrec *cr,int nsim, char **multidirs,
 +                      int nfile, const t_filenm fnm[],gmx_bool bParFn)
 +{
 +    gmx_multisim_t *ms;
 +    int  nnodes,nnodpersim,sim,i,ftp;
 +    char buf[256];
 +#ifdef GMX_MPI
 +    MPI_Group mpi_group_world;
 +#endif  
 +    int *rank;
 +
 +#ifndef GMX_MPI
 +    if (nsim > 1)
 +    {
 +        gmx_fatal(FARGS,"This binary is compiled without MPI support, can not do multiple simulations.");
 +    }
 +#endif
 +
 +    nnodes  = cr->nnodes;
 +    if (nnodes % nsim != 0)
 +    {
 +        gmx_fatal(FARGS,"The number of nodes (%d) is not a multiple of the number of simulations (%d)",nnodes,nsim);
 +    }
 +
 +    nnodpersim = nnodes/nsim;
 +    sim = cr->nodeid/nnodpersim;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"We have %d simulations, %d nodes per simulation, local simulation is %d\n",nsim,nnodpersim,sim);
 +    }
 +
 +    snew(ms,1);
 +    cr->ms = ms;
 +    ms->nsim = nsim;
 +    ms->sim  = sim;
 +#ifdef GMX_MPI
 +    /* Create a communicator for the master nodes */
 +    snew(rank,ms->nsim);
 +    for(i=0; i<ms->nsim; i++)
 +    {
 +        rank[i] = i*nnodpersim;
 +    }
 +    MPI_Comm_group(MPI_COMM_WORLD,&mpi_group_world);
 +    MPI_Group_incl(mpi_group_world,nsim,rank,&ms->mpi_group_masters);
 +    sfree(rank);
 +    MPI_Comm_create(MPI_COMM_WORLD,ms->mpi_group_masters,
 +                    &ms->mpi_comm_masters);
 +
 +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
 +    /* initialize the MPI_IN_PLACE replacement buffers */
 +    snew(ms->mpb, 1);
 +    ms->mpb->ibuf=NULL;
 +    ms->mpb->libuf=NULL;
 +    ms->mpb->fbuf=NULL;
 +    ms->mpb->dbuf=NULL;
 +    ms->mpb->ibuf_alloc=0;
 +    ms->mpb->libuf_alloc=0;
 +    ms->mpb->fbuf_alloc=0;
 +    ms->mpb->dbuf_alloc=0;
 +#endif
 +
 +#endif
 +
 +    /* Reduce the intra-simulation communication */
 +    cr->sim_nodeid = cr->nodeid % nnodpersim;
 +    cr->nnodes = nnodpersim;
 +#ifdef GMX_MPI
 +    MPI_Comm_split(MPI_COMM_WORLD,sim,cr->sim_nodeid,&cr->mpi_comm_mysim);
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +    cr->nodeid = cr->sim_nodeid;
 +#endif
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"This is simulation %d",cr->ms->sim);
 +        if (PAR(cr))
 +        {
 +            fprintf(debug,", local number of nodes %d, local nodeid %d",
 +                    cr->nnodes,cr->sim_nodeid);
 +        }
 +        fprintf(debug,"\n\n");
 +    }
 +
 +    if (multidirs)
 +    {
 +        int ret;
 +        if (debug)
 +        {
 +            fprintf(debug,"Changing to directory %s\n",multidirs[cr->ms->sim]);
 +        }
 +        gmx_chdir(multidirs[cr->ms->sim]);
 +    }
 +    else if (bParFn)
 +    {
 +        /* Patch output and tpx, cpt and rerun input file names */
 +        for(i=0; (i<nfile); i++)
 +        {
 +            /* Because of possible multiple extensions per type we must look 
 +             * at the actual file name 
 +             */
 +            if (is_output(&fnm[i]) ||
 +                fnm[i].ftp == efTPX || fnm[i].ftp == efCPT ||
 +                strcmp(fnm[i].opt,"-rerun") == 0)
 +            {
 +                ftp = fn2ftp(fnm[i].fns[0]);
 +                par_fn(fnm[i].fns[0],ftp,cr,TRUE,FALSE,buf,255);
 +                sfree(fnm[i].fns[0]);
 +                fnm[i].fns[0] = gmx_strdup(buf);
 +            }
 +        }
 +    }
 +}
 +
 +t_commrec *init_par(int *argc,char ***argv_ptr)
 +{
 +    t_commrec *cr;
 +    char      **argv;
 +    int       i;
 +    gmx_bool      pe=FALSE;
 +
 +    snew(cr,1);
 +
++    argv = argv_ptr ? *argv_ptr : NULL;
 +
 +#if defined GMX_MPI && !defined GMX_THREAD_MPI
 +    cr->sim_nodeid = gmx_setup(argc,argv,&cr->nnodes);
 +
 +    if (!PAR(cr) && (cr->sim_nodeid != 0))
 +    {
 +        gmx_comm("(!PAR(cr) && (cr->sim_nodeid != 0))");
 +    }
 +
 +    cr->mpi_comm_mysim   = MPI_COMM_WORLD;
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#else
 +    /* These should never be accessed */
 +    cr->mpi_comm_mysim   = NULL;
 +    cr->mpi_comm_mygroup = NULL;
 +    cr->nnodes           = 1;
 +    cr->sim_nodeid       = 0;
 +#endif
 +
 +    cr->nodeid = cr->sim_nodeid;
 +
 +    cr->duty = (DUTY_PP | DUTY_PME);
 +
 +    /* Communicate arguments if parallel */
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        comm_args(cr,argc,argv_ptr);
 +    }
 +#endif /* GMX_THREAD_MPI */
 +
 +#ifdef GMX_MPI
 +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
 +  /* initialize the MPI_IN_PLACE replacement buffers */
 +  snew(cr->mpb, 1);
 +  cr->mpb->ibuf=NULL;
 +  cr->mpb->libuf=NULL;
 +  cr->mpb->fbuf=NULL;
 +  cr->mpb->dbuf=NULL;
 +  cr->mpb->ibuf_alloc=0;
 +  cr->mpb->libuf_alloc=0;
 +  cr->mpb->fbuf_alloc=0;
 +  cr->mpb->dbuf_alloc=0;
 +#endif
 +#endif
 +
 +    return cr;
 +}
 +
 +t_commrec *init_par_threads(const t_commrec *cro)
 +{
 +#ifdef GMX_THREAD_MPI
 +    int initialized;
 +    t_commrec *cr;
 +
 +    /* make a thread-specific commrec */
 +    snew(cr,1);
 +    /* now copy the whole thing, so settings like the number of PME nodes
 +       get propagated. */
 +    *cr=*cro;
 +
 +    /* and we start setting our own thread-specific values for things */
 +    MPI_Initialized(&initialized);
 +    if (!initialized)
 +    {
 +        gmx_comm("Initializing threads without comm");
 +    }
 +    /* once threads will be used together with MPI, we'll
 +       fill the cr structure with distinct data here. This might even work: */
 +    cr->sim_nodeid = gmx_setup(0,NULL, &cr->nnodes);
 +
 +    cr->mpi_comm_mysim = MPI_COMM_WORLD;
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +    cr->nodeid = cr->sim_nodeid;
 +    cr->duty = (DUTY_PP | DUTY_PME);
 +
 +    return cr;
 +#else
 +    return NULL;
 +#endif
 +}
Simple merge
index 0000000000000000000000000000000000000000,f222bf9d54ae69cedb16df2be6edb0ffa983def6..f222bf9d54ae69cedb16df2be6edb0ffa983def6
mode 000000,100644..100644
--- /dev/null
Simple merge
index ed59e5e7328920c86f8a608311301790c9d4922c,0000000000000000000000000000000000000000..4e924417bfc8bd985b24d391321fe28796181320
mode 100644,000000..100644
--- /dev/null
@@@ -1,1579 -1,0 +1,1577 @@@
-     check_restp_type("all dihedrals",r0->bAlldih,r1->bAlldih);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <math.h>
 +#include <ctype.h>
 +
 +#include "vec.h"
 +#include "copyrite.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "symtab.h"
 +#include "futil.h"
 +#include "statutil.h"
 +#include "gmx_fatal.h"
 +#include "pdb2top.h"
 +#include "gpp_nextnb.h"
 +#include "topdirs.h"
 +#include "toputil.h"
 +#include "h_db.h"
 +#include "pgutil.h"
 +#include "resall.h"
 +#include "topio.h"
 +#include "string2.h"
 +#include "physics.h"
 +#include "pdbio.h"
 +#include "gen_ad.h"
 +#include "filenm.h"
 +#include "index.h"
 +#include "gen_vsite.h"
 +#include "add_par.h"
 +#include "toputil.h"
 +#include "fflibutil.h"
 +#include "strdb.h"
 +
 +/* this must correspond to enum in pdb2top.h */
 +const char *hh[ehisNR]   = { "HISD", "HISE", "HISH", "HIS1" };
 +
 +static int missing_atoms(t_restp *rp, int resind,t_atoms *at, int i0, int i)
 +{
 +    int  j,k,nmiss;
 +    char *name;
 +    gmx_bool bFound, bRet;
 +    
 +    nmiss = 0;
 +    for (j=0; j<rp->natom; j++)
 +    {
 +        name=*(rp->atomname[j]);
 +        bFound=FALSE;
 +        for (k=i0; k<i; k++) 
 +        {
 +            bFound = (bFound || !gmx_strcasecmp(*(at->atomname[k]),name));
 +        }
 +        if (!bFound)
 +        {
 +            nmiss++;
 +            fprintf(stderr,"\nWARNING: "
 +                    "atom %s is missing in residue %s %d in the pdb file\n",
 +                    name,*(at->resinfo[resind].name),at->resinfo[resind].nr);
 +            if (name[0]=='H' || name[0]=='h')
 +            {
 +                fprintf(stderr,"         You might need to add atom %s to the hydrogen database of building block %s\n"
 +                        "         in the file %s.hdb (see the manual)\n",
 +                        name,*(at->resinfo[resind].rtp),rp->filebase);
 +            }
 +            fprintf(stderr,"\n");
 +        }
 +    }
 +  
 +    return nmiss;
 +}
 +
 +gmx_bool is_int(double x)
 +{
 +  const double tol = 1e-4;
 +  int   ix;
 +  
 +  if (x < 0)
 +    x=-x;
 +  ix=gmx_nint(x);
 +  
 +  return (fabs(x-ix) < tol);
 +}
 +
 +static void swap_strings(char **s,int i,int j)
 +{
 +    char *tmp;
 +
 +    tmp  = s[i];
 +    s[i] = s[j];
 +    s[j] = tmp;
 +}
 +
 +void
 +choose_ff(const char *ffsel,
 +          char *forcefield, int ff_maxlen,
 +          char *ffdir, int ffdir_maxlen)
 +{
 +    int  nff;
 +    char **ffdirs,**ffs,**ffs_dir,*ptr;
 +    int  i,j,sel,cwdsel,nfound;
 +    char buf[STRLEN],**desc;
 +    FILE *fp;
 +    char *pret;
 +    
 +    nff = fflib_search_file_in_dirend(fflib_forcefield_itp(),
 +                                      fflib_forcefield_dir_ext(),
 +                                      &ffdirs);
 +
 +    if (nff == 0)
 +    {
 +        gmx_fatal(FARGS,"No force fields found (files with name '%s' in subdirectories ending on '%s')",
 +                  fflib_forcefield_itp(),fflib_forcefield_dir_ext());
 +    }
 +
 +    /* Replace with unix path separators */
 +    if(DIR_SEPARATOR!='/')
 +    {
 +        for(i=0;i<nff;i++)
 +        {
 +            while( (ptr=strchr(ffdirs[i],DIR_SEPARATOR))!=NULL )
 +            {
 +                *ptr='/';
 +            }
 +        }
 +    }
 +    
 +    /* Store the force field names in ffs */
 +    snew(ffs,nff);
 +    snew(ffs_dir,nff);
 +    for(i=0; i<nff; i++)
 +    {
 +        /* Remove the path from the ffdir name - use our unix standard here! */
 +        ptr = strrchr(ffdirs[i],'/');
 +        if (ptr == NULL)
 +        {
 +            ffs[i] = strdup(ffdirs[i]);
 +            ffs_dir[i] = low_gmxlibfn(ffdirs[i],FALSE,FALSE);
 +            if (ffs_dir[i] == NULL)
 +            {
 +                gmx_fatal(FARGS,"Can no longer find file '%s'",ffdirs[i]);
 +            }
 +        }
 +        else
 +        {
 +            ffs[i] = strdup(ptr+1);
 +            ffs_dir[i] = strdup(ffdirs[i]);
 +        }
 +        ffs_dir[i][strlen(ffs_dir[i])-strlen(ffs[i])-1] = '\0';
 +        /* Remove the extension from the ffdir name */
 +        ffs[i][strlen(ffs[i])-strlen(fflib_forcefield_dir_ext())] = '\0';
 +    }
 +
 +    if (ffsel != NULL)
 +    {
 +        sel     = -1;
 +        cwdsel  = -1;
 +        nfound  = 0;
 +        for(i=0; i<nff; i++)
 +        {
 +            if ( strcmp(ffs[i],ffsel)==0 )
 +            {
 +                /* Matching ff name */
 +                sel = i;
 +                nfound++;
 +                
 +                if( strncmp(ffs_dir[i],".",1)==0 )
 +                {
 +                    cwdsel = i;
 +                }
 +            }
 +        }
 +        
 +        if(cwdsel != -1)
 +        {
 +            sel = cwdsel;
 +        }
 +        
 +        if(nfound>1)
 +        {
 +            if(cwdsel!=-1)
 +            {
 +                fprintf(stderr,
 +                        "Force field '%s' occurs in %d places. pdb2gmx is using the one in the\n"
 +                        "current directory. Use interactive selection (not the -ff option) if\n"
 +                        "you would prefer a different one.\n",ffsel,nfound);
 +            }
 +            else
 +            {
 +                gmx_fatal(FARGS,
 +                          "Force field '%s' occurs in %d places, but not in the current directory.\n"
 +                          "Run without the -ff switch and select the force field interactively.",ffsel,nfound);
 +            }
 +        }
 +        else if (nfound==0)
 +        {
 +            gmx_fatal(FARGS,"Could not find force field '%s' in current directory, install tree or GMXDATA path.",ffsel);
 +        }
 +    }
 +    else if (nff > 1)
 +    {
 +        snew(desc,nff);
 +        for(i=0; (i<nff); i++)
 +        {
 +            sprintf(buf,"%s%c%s%s%c%s",
 +                    ffs_dir[i],DIR_SEPARATOR,
 +                    ffs[i],fflib_forcefield_dir_ext(),DIR_SEPARATOR,
 +                    fflib_forcefield_doc());
 +            if (gmx_fexist(buf))
 +            {
 +                /* We don't use fflib_open, because we don't want printf's */
 +                fp = ffopen(buf,"r");
 +                snew(desc[i],STRLEN);
 +                get_a_line(fp,desc[i],STRLEN);
 +                ffclose(fp);
 +            }
 +            else
 +            {
 +                desc[i] = strdup(ffs[i]);
 +            }
 +        }
 +        /* Order force fields from the same dir alphabetically
 +         * and put deprecated force fields at the end.
 +         */
 +        for(i=0; (i<nff); i++)
 +        {
 +            for(j=i+1; (j<nff); j++)
 +            {
 +                if (strcmp(ffs_dir[i],ffs_dir[j]) == 0 &&
 +                    ((desc[i][0] == '[' && desc[j][0] != '[') ||
 +                     ((desc[i][0] == '[' || desc[j][0] != '[') &&
 +                      gmx_strcasecmp(desc[i],desc[j]) > 0)))
 +                {
 +                    swap_strings(ffdirs,i,j);
 +                    swap_strings(ffs   ,i,j);
 +                    swap_strings(desc  ,i,j);
 +                }
 +            }
 +        }
 +
 +        printf("\nSelect the Force Field:\n");
 +        for(i=0; (i<nff); i++)
 +        {
 +            if (i == 0 || strcmp(ffs_dir[i-1],ffs_dir[i]) != 0)
 +            {
 +                if( strcmp(ffs_dir[i],".")==0 )
 +                {
 +                    printf("From current directory:\n");
 +                }
 +                else
 +                {
 +                    printf("From '%s':\n",ffs_dir[i]);
 +                }
 +            }
 +            printf("%2d: %s\n",i+1,desc[i]);
 +            sfree(desc[i]);
 +        }
 +        sfree(desc);
 +
 +        do
 +        {
 +            pret = fgets(buf,STRLEN,stdin);
 +            
 +            if (pret != NULL)
 +            {
 +                sscanf(buf,"%d",&sel);
 +                sel--;
 +            }
 +        }
 +        while ( pret==NULL || (sel < 0) || (sel >= nff));
 +
 +        /* Check for a current limitation of the fflib code.
 +         * It will always read from the first ff directory in the list.
 +         * This check assumes that the order of ffs matches the order
 +         * in which fflib_open searches ff library files.
 +         */
 +        for(i=0; i<sel; i++)
 +        {
 +            if (strcmp(ffs[i],ffs[sel]) == 0)
 +            {
 +                gmx_fatal(FARGS,"Can only select the first of multiple force field entries with directory name '%s%s' in the list. If you want to use the next entry, run pdb2gmx in a different directory or rename or move the force field directory present in the current working directory.",
 +                          ffs[sel],fflib_forcefield_dir_ext());
 +            }
 +        }
 +    }
 +    else
 +    {
 +        sel = 0;
 +    }
 +
 +    if (strlen(ffs[sel]) >= (size_t)ff_maxlen)
 +    {
 +        gmx_fatal(FARGS,"Length of force field name (%d) >= maxlen (%d)",
 +                  strlen(ffs[sel]),ff_maxlen);
 +    }
 +    strcpy(forcefield,ffs[sel]);
 +
 +    if (strlen(ffdirs[sel]) >= (size_t)ffdir_maxlen)
 +    {
 +        gmx_fatal(FARGS,"Length of force field dir (%d) >= maxlen (%d)",
 +                  strlen(ffdirs[sel]),ffdir_maxlen);
 +    }
 +    strcpy(ffdir,ffdirs[sel]);
 +
 +    for(i=0; (i<nff); i++)
 +    {
 +        sfree(ffdirs[i]);
 +        sfree(ffs[i]);
 +        sfree(ffs_dir[i]);
 +    }
 +    sfree(ffdirs);
 +    sfree(ffs);
 +    sfree(ffs_dir);
 +}
 +
 +void choose_watermodel(const char *wmsel,const char *ffdir,
 +                       char **watermodel)
 +{
 +    const char *fn_watermodels="watermodels.dat";
 +    char fn_list[STRLEN];
 +    FILE *fp;
 +    char buf[STRLEN];
 +    int  nwm,sel,i;
 +    char **model;
 +    char *pret;
 +
 +    if (strcmp(wmsel,"none") == 0)
 +    {
 +        *watermodel = NULL;
 +        
 +        return;
 +    }
 +    else if (strcmp(wmsel,"select") != 0)
 +    {
 +        *watermodel = strdup(wmsel);
 +
 +        return;
 +    }
 +
 +    sprintf(fn_list,"%s%c%s",ffdir,DIR_SEPARATOR,fn_watermodels);
 +    
 +    if (!fflib_fexist(fn_list))
 +    {
 +        fprintf(stderr,"No file '%s' found, will not include a water model\n",
 +                fn_watermodels);
 +        *watermodel = NULL;
 +        
 +        return;
 +    }
 +
 +    fp = fflib_open(fn_list);
 +    printf("\nSelect the Water Model:\n");
 +    nwm = 0;
 +    model = NULL;
 +    while (get_a_line(fp,buf,STRLEN))
 +    {
 +        srenew(model,nwm+1);
 +        snew(model[nwm],STRLEN);
 +        sscanf(buf,"%s%n",model[nwm],&i);
 +        if (i > 0)
 +        {
 +            ltrim(buf+i);
 +            fprintf(stderr,"%2d: %s\n",nwm+1,buf+i);
 +            nwm++;
 +        }
 +        else
 +        {
 +            sfree(model[nwm]);
 +        }
 +    }
 +    ffclose(fp);
 +    fprintf(stderr,"%2d: %s\n",nwm+1,"None");
 +
 +    do
 +    {
 +        pret = fgets(buf,STRLEN,stdin);
 +        
 +        if (pret != NULL)
 +        {
 +            sscanf(buf,"%d",&sel);
 +            sel--;
 +        }
 +    }
 +    while (pret == NULL || sel < 0 || sel > nwm);
 +
 +    if (sel == nwm)
 +    {
 +        *watermodel = NULL;
 +    }
 +    else
 +    {
 +        *watermodel = strdup(model[sel]);
 +    }
 +
 +    for(i=0; i<nwm; i++)
 +    {
 +        sfree(model[i]);
 +    }
 +    sfree(model);
 +}
 +
 +static int name2type(t_atoms *at, int **cgnr, gpp_atomtype_t atype, 
 +                     t_restp restp[], gmx_residuetype_t rt)
 +{
 +  int     i,j,prevresind,resind,i0,prevcg,cg,curcg;
 +  char    *name;
 +  gmx_bool    bProt, bNterm;
 +  double  qt;
 +  int     nmissat;
 +    
 +  nmissat = 0;
 +
 +  resind=-1;
 +  bProt=FALSE;
 +  bNterm=FALSE;
 +  i0=0;
 +  snew(*cgnr,at->nr);
 +  qt=0;
 +  prevcg=NOTSET;
 +  curcg=0;
 +  cg=-1;
 +  j=NOTSET;
 +  
 +  for(i=0; (i<at->nr); i++) {
 +    prevresind=resind;
 +    if (at->atom[i].resind != resind) {
 +      resind = at->atom[i].resind;
 +      bProt = gmx_residuetype_is_protein(rt,*(at->resinfo[resind].name));
 +      bNterm=bProt && (resind == 0);
 +      if (resind > 0) {
 +          nmissat += missing_atoms(&restp[prevresind],prevresind,at,i0,i);
 +      }
 +      i0=i;
 +    }
 +    if (at->atom[i].m == 0) {
 +      if (debug)
 +      fprintf(debug,"atom %d%s: curcg=%d, prevcg=%d, cg=%d\n",
 +              i+1,*(at->atomname[i]),curcg,prevcg,
 +              j==NOTSET ? NOTSET : restp[resind].cgnr[j]);
 +      qt=0;
 +      prevcg=cg;
 +      name=*(at->atomname[i]);
 +      j=search_jtype(&restp[resind],name,bNterm);
 +      at->atom[i].type = restp[resind].atom[j].type;
 +      at->atom[i].q    = restp[resind].atom[j].q;
 +      at->atom[i].m    = get_atomtype_massA(restp[resind].atom[j].type,
 +                                          atype);
 +      cg = restp[resind].cgnr[j];
 +      /* A charge group number -1 signals a separate charge group
 +       * for this atom.
 +       */
 +      if ( (cg == -1) || (cg != prevcg) || (resind != prevresind) ) {
 +          curcg++;
 +      }
 +    } else {
 +      if (debug)
 +      fprintf(debug,"atom %d%s: curcg=%d, qt=%g, is_int=%d\n",
 +              i+1,*(at->atomname[i]),curcg,qt,is_int(qt));
 +      cg=-1;
 +      if (is_int(qt)) {
 +      qt=0;
 +      curcg++;
 +      }
 +      qt+=at->atom[i].q;
 +    }
 +    (*cgnr)[i]=curcg;
 +    at->atom[i].typeB = at->atom[i].type;
 +    at->atom[i].qB    = at->atom[i].q;
 +    at->atom[i].mB    = at->atom[i].m;
 +  }
 +  nmissat += missing_atoms(&restp[resind],resind,at,i0,i);
 +
 +  return nmissat;
 +}
 +
 +static void print_top_heavy_H(FILE *out, real mHmult)
 +{
 +  if (mHmult == 2.0) 
 +    fprintf(out,"; Using deuterium instead of hydrogen\n\n");
 +  else if (mHmult == 4.0)
 +    fprintf(out,"#define HEAVY_H\n\n");
 +  else if (mHmult != 1.0)
 +    fprintf(stderr,"WARNING: unsupported proton mass multiplier (%g) "
 +          "in pdb2top\n",mHmult);
 +}
 +
 +void print_top_comment(FILE *out,
 +                       const char *filename,
 +                       const char *generator,
 +                       const char *ffdir,
 +                       gmx_bool bITP)
 +{
 +  char tmp[256]; 
 +  char ffdir_parent[STRLEN];
 +  char *p;
 +        
 +  nice_header(out,filename);
 +  fprintf(out,";\tThis is a %s topology file\n;\n",bITP ? "include" : "standalone");
 +  fprintf(out,";\tIt was generated using program:\n;\t%s\n;\n",
 +          (NULL == generator) ? "unknown" : generator);
 +  fprintf(out,";\tCommand line was:\n;\t%s\n;\n",command_line());
 +
 +  if(strchr(ffdir,'/')==NULL)
 +  {
 +      fprintf(out,";\tForce field was read from the standard Gromacs share directory.\n;\n\n");
 +  }
 +  else if(ffdir[0]=='.')
 +  {
 +      fprintf(out,";\tForce field was read from current directory or a relative path - path added.\n;\n\n");
 +  }
 +  else
 +  {
 +      strncpy(ffdir_parent,ffdir,STRLEN-1);
 +      ffdir_parent[STRLEN-1]='\0'; /*make sure it is 0-terminated even for long string*/
 +      p=strrchr(ffdir_parent,'/');
 +
 +      *p='\0';
 +      
 +      fprintf(out,
 +              ";\tForce field data was read from:\n"
 +              ";\t%s\n"
 +              ";\n"
 +              ";\tNote:\n"
 +              ";\tThis might be a non-standard force field location. When you use this topology, the\n"
 +              ";\tforce field must either be present in the current directory, or the location\n"
 +              ";\tspecified in the GMXLIB path variable or with the 'include' mdp file option.\n;\n\n",
 +              ffdir_parent);
 +  }
 +}
 +
 +void print_top_header(FILE *out,const char *filename, 
 +                      const char *title,gmx_bool bITP,const char *ffdir,real mHmult)
 +{
 +    const char *p;
 +    
 +    print_top_comment(out,filename,title,ffdir,bITP);
 +    
 +    print_top_heavy_H(out, mHmult);
 +    fprintf(out,"; Include forcefield parameters\n");
 +
 +    p=strrchr(ffdir,'/');        
 +    p = (ffdir[0]=='.' || p==NULL) ? ffdir : p+1;
 +
 +    fprintf(out,"#include \"%s/%s\"\n\n",p,fflib_forcefield_itp());
 +}
 +
 +static void print_top_posre(FILE *out,const char *pr)
 +{
 +  fprintf(out,"; Include Position restraint file\n");
 +  fprintf(out,"#ifdef POSRES\n");
 +  fprintf(out,"#include \"%s\"\n",pr);
 +  fprintf(out,"#endif\n\n");
 +}
 +  
 +static void print_top_water(FILE *out,const char *ffdir,const char *water)
 +{
 +  const char *p;
 +  char  buf[STRLEN];
 +    
 +  fprintf(out,"; Include water topology\n");
 +
 +  p=strrchr(ffdir,'/');        
 +  p = (ffdir[0]=='.' || p==NULL) ? ffdir : p+1;
 +  fprintf(out,"#include \"%s/%s.itp\"\n",p,water);
 +  
 +  fprintf(out,"\n");
 +  fprintf(out,"#ifdef POSRES_WATER\n");
 +  fprintf(out,"; Position restraint for each water oxygen\n");
 +  fprintf(out,"[ position_restraints ]\n");
 +  fprintf(out,";%3s %5s %9s %10s %10s\n","i","funct","fcx","fcy","fcz");
 +  fprintf(out,"%4d %4d %10g %10g %10g\n",1,1,1000.0,1000.0,1000.0);
 +  fprintf(out,"#endif\n");
 +  fprintf(out,"\n");
 +
 +  sprintf(buf,"%s/ions.itp",p);
 +
 +  if (fflib_fexist(buf))
 +  {
 +    fprintf(out,"; Include topology for ions\n");
 +    fprintf(out,"#include \"%s\"\n",buf);
 +    fprintf(out,"\n");
 +  }
 +}
 +
 +static void print_top_system(FILE *out, const char *title)
 +{
 +  fprintf(out,"[ %s ]\n",dir2str(d_system));
 +  fprintf(out,"; Name\n");
 +  fprintf(out,"%s\n\n",title[0]?title:"Protein");
 +}
 +
 +void print_top_mols(FILE *out,
 +                    const char *title, const char *ffdir, const char *water,
 +                    int nincl, char **incls, int nmol, t_mols *mols)
 +{
 +  int  i;
 +  char *incl;
 +
 +  if (nincl>0) {
 +    fprintf(out,"; Include chain topologies\n");
 +    for (i=0; (i<nincl); i++) {
 +        incl = strrchr(incls[i],DIR_SEPARATOR);
 +        if (incl == NULL) {
 +            incl = incls[i];
 +        } else {
 +            /* Remove the path from the include name */
 +            incl = incl + 1;
 +        }
 +      fprintf(out,"#include \"%s\"\n",incl);
 +    }
 +    fprintf(out,"\n");
 +  }
 +
 +    if (water)
 +    {
 +      print_top_water(out,ffdir,water);
 +    }
 +    print_top_system(out, title);
 +  
 +  if (nmol) {
 +    fprintf(out,"[ %s ]\n",dir2str(d_molecules));
 +    fprintf(out,"; %-15s %5s\n","Compound","#mols");
 +    for (i=0; (i<nmol); i++)
 +      fprintf(out,"%-15s %5d\n",mols[i].name,mols[i].nr);
 +  }
 +}
 +
 +void write_top(FILE *out, char *pr,char *molname,
 +               t_atoms *at,gmx_bool bRTPresname,
 +               int bts[],t_params plist[],t_excls excls[],
 +               gpp_atomtype_t atype,int *cgnr, int nrexcl)
 +     /* NOTE: nrexcl is not the size of *excl! */
 +{
 +  if (at && atype && cgnr) {
 +    fprintf(out,"[ %s ]\n",dir2str(d_moleculetype));
 +    fprintf(out,"; %-15s %5s\n","Name","nrexcl");
 +    fprintf(out,"%-15s %5d\n\n",molname?molname:"Protein",nrexcl);
 +    
 +    print_atoms(out, atype, at, cgnr, bRTPresname);
 +    print_bondeds(out,at->nr,d_bonds,      F_BONDS,    bts[ebtsBONDS], plist);
 +    print_bondeds(out,at->nr,d_constraints,F_CONSTR,   0,              plist);
 +    print_bondeds(out,at->nr,d_constraints,F_CONSTRNC, 0,              plist);
 +    print_bondeds(out,at->nr,d_pairs,      F_LJ14,     0,              plist);
 +    print_excl(out,at->nr,excls);
 +    print_bondeds(out,at->nr,d_angles,     F_ANGLES,   bts[ebtsANGLES],plist);
 +    print_bondeds(out,at->nr,d_dihedrals,  F_PDIHS,    bts[ebtsPDIHS], plist);
 +    print_bondeds(out,at->nr,d_dihedrals,  F_IDIHS,    bts[ebtsIDIHS], plist);
 +    print_bondeds(out,at->nr,d_cmap,       F_CMAP,     bts[ebtsCMAP],  plist);
 +    print_bondeds(out,at->nr,d_polarization,F_POLARIZATION,   0,       plist);
 +    print_bondeds(out,at->nr,d_thole_polarization,F_THOLE_POL,0,       plist);
 +    print_bondeds(out,at->nr,d_vsites2,    F_VSITE2,   0,              plist);
 +    print_bondeds(out,at->nr,d_vsites3,    F_VSITE3,   0,              plist);
 +    print_bondeds(out,at->nr,d_vsites3,    F_VSITE3FD, 0,              plist);
 +    print_bondeds(out,at->nr,d_vsites3,    F_VSITE3FAD,0,              plist);
 +    print_bondeds(out,at->nr,d_vsites3,    F_VSITE3OUT,0,              plist);
 +    print_bondeds(out,at->nr,d_vsites4,    F_VSITE4FD, 0,              plist);
 +    print_bondeds(out,at->nr,d_vsites4,    F_VSITE4FDN, 0,             plist);
 +    
 +    if (pr)
 +      print_top_posre(out,pr);
 +  }
 +}
 +
 +static atom_id search_res_atom(const char *type,int resind,
 +                   t_atoms *atoms,
 +                             const char *bondtype,gmx_bool bAllowMissing)
 +{
 +  int i;
 +
 +  for(i=0; (i<atoms->nr); i++)
 +  {
 +    if (atoms->atom[i].resind == resind)
 +    {
 +      return search_atom(type,i,atoms,bondtype,bAllowMissing);
 +    }
 +  }
 +  
 +  return NO_ATID;
 +}
 +
 +static void do_ssbonds(t_params *ps,t_atoms *atoms,
 +                     int nssbonds,t_ssbond *ssbonds,gmx_bool bAllowMissing)
 +{
 +  int     i,ri,rj;
 +  atom_id ai,aj;
 +  
 +  for(i=0; (i<nssbonds); i++) {
 +    ri = ssbonds[i].res1;
 +    rj = ssbonds[i].res2;
 +    ai = search_res_atom(ssbonds[i].a1,ri,atoms,
 +                       "special bond",bAllowMissing);
 +    aj = search_res_atom(ssbonds[i].a2,rj,atoms,
 +                       "special bond",bAllowMissing);
 +    if ((ai == NO_ATID) || (aj == NO_ATID))
 +      gmx_fatal(FARGS,"Trying to make impossible special bond (%s-%s)!",
 +                ssbonds[i].a1,ssbonds[i].a2);
 +    add_param(ps,ai,aj,NULL,NULL);
 +  }
 +}
 +
 +static void at2bonds(t_params *psb, t_hackblock *hb,
 +                     t_atoms *atoms,
 +                     rvec x[],
 +                     real long_bond_dist, real short_bond_dist,
 +                     gmx_bool bAllowMissing)
 +{
 +  int     resind,i,j,k;
 +  atom_id ai,aj;
 +  real    dist2, long_bond_dist2, short_bond_dist2;
 +  const char *ptr;
 +
 +  long_bond_dist2  = sqr(long_bond_dist);
 +  short_bond_dist2 = sqr(short_bond_dist);
 +
 +  if (debug)
 +    ptr = "bond";
 +  else
 +    ptr = "check";
 +
 +  fprintf(stderr,"Making bonds...\n");
 +  i=0;
 +  for(resind=0; (resind < atoms->nres) && (i<atoms->nr); resind++) {
 +    /* add bonds from list of bonded interactions */
 +    for(j=0; j < hb[resind].rb[ebtsBONDS].nb; j++) {
 +      /* Unfortunately we can not issue errors or warnings
 +       * for missing atoms in bonds, as the hydrogens and terminal atoms
 +       * have not been added yet.
 +       */
 +      ai=search_atom(hb[resind].rb[ebtsBONDS].b[j].AI,i,atoms,
 +                   ptr,TRUE);
 +      aj=search_atom(hb[resind].rb[ebtsBONDS].b[j].AJ,i,atoms,
 +                   ptr,TRUE);
 +      if (ai != NO_ATID && aj != NO_ATID) {
 +          dist2 = distance2(x[ai],x[aj]);
 +          if (dist2 > long_bond_dist2 )
 +          {
 +              fprintf(stderr,"Warning: Long Bond (%d-%d = %g nm)\n",
 +                      ai+1,aj+1,sqrt(dist2));
 +          }
 +          else if (dist2 < short_bond_dist2 )
 +          {
 +              fprintf(stderr,"Warning: Short Bond (%d-%d = %g nm)\n",
 +                      ai+1,aj+1,sqrt(dist2));
 +          }
 +          add_param(psb,ai,aj,NULL,hb[resind].rb[ebtsBONDS].b[j].s);
 +      }
 +    }
 +    /* add bonds from list of hacks (each added atom gets a bond) */
 +    while( (i<atoms->nr) && (atoms->atom[i].resind == resind) ) {
 +      for(j=0; j < hb[resind].nhack; j++)
 +      if ( ( hb[resind].hack[j].tp > 0 ||
 +             hb[resind].hack[j].oname==NULL ) &&
 +           strcmp(hb[resind].hack[j].AI,*(atoms->atomname[i])) == 0 ) {
 +        switch(hb[resind].hack[j].tp) {
 +        case 9:          /* COOH terminus */
 +          add_param(psb,i,i+1,NULL,NULL);     /* C-O  */
 +          add_param(psb,i,i+2,NULL,NULL);     /* C-OA */
 +          add_param(psb,i+2,i+3,NULL,NULL);   /* OA-H */
 +          break;
 +        default:
 +          for(k=0; (k<hb[resind].hack[j].nr); k++)
 +            add_param(psb,i,i+k+1,NULL,NULL);
 +        }
 +      }
 +      i++;
 +    }
 +    /* we're now at the start of the next residue */
 +  }
 +}
 +
 +static int pcompar(const void *a, const void *b)
 +{
 +  t_param *pa,*pb;
 +  int     d;
 +  pa=(t_param *)a;
 +  pb=(t_param *)b;
 +  
 +  d = pa->AI - pb->AI;
 +  if (d == 0) 
 +    d = pa->AJ - pb->AJ;
 +  if (d == 0)
 +    return strlen(pb->s) - strlen(pa->s);
 +  else
 +    return d;
 +}
 +
 +static void clean_bonds(t_params *ps)
 +{
 +  int     i,j;
 +  atom_id a;
 +  
 +  if (ps->nr > 0) {
 +    /* swap atomnumbers in bond if first larger than second: */
 +    for(i=0; (i<ps->nr); i++)
 +      if ( ps->param[i].AJ < ps->param[i].AI ) {
 +      a = ps->param[i].AI;
 +      ps->param[i].AI = ps->param[i].AJ;
 +      ps->param[i].AJ = a;
 +      }
 +    
 +    /* Sort bonds */
 +    qsort(ps->param,ps->nr,(size_t)sizeof(ps->param[0]),pcompar);
 +    
 +    /* remove doubles, keep the first one always. */
 +    j = 1;
 +    for(i=1; (i<ps->nr); i++) {
 +      if ((ps->param[i].AI != ps->param[j-1].AI) ||
 +        (ps->param[i].AJ != ps->param[j-1].AJ) ) {
 +        if (j != i) {
 +          cp_param(&(ps->param[j]),&(ps->param[i]));
 +        }
 +      j++;
 +      } 
 +    }
 +    fprintf(stderr,"Number of bonds was %d, now %d\n",ps->nr,j);
 +    ps->nr=j;
 +  }
 +  else
 +    fprintf(stderr,"No bonds\n");
 +}
 +
 +void print_sums(t_atoms *atoms, gmx_bool bSystem)
 +{
 +  double m,qtot;
 +  int    i;
 +  const char *where;
 +  
 +  if (bSystem)
 +    where=" in system";
 +  else
 +    where="";
 +  
 +  m=0;
 +  qtot=0;
 +  for(i=0; (i<atoms->nr); i++) {
 +    m+=atoms->atom[i].m;
 +    qtot+=atoms->atom[i].q;
 +  }
 +  
 +  fprintf(stderr,"Total mass%s %.3f a.m.u.\n",where,m);
 +  fprintf(stderr,"Total charge%s %.3f e\n",where,qtot);
 +}
 +
 +static void check_restp_type(const char *name,int t1,int t2)
 +{
 +    if (t1 != t2)
 +    {
 +        gmx_fatal(FARGS,"Residues in one molecule have a different '%s' type: %d and %d",name,t1,t2);
 +    }
 +}
 +
 +static void check_restp_types(t_restp *r0,t_restp *r1)
 +{
 +    int i;
 +
-     check_restp_type("HH14",r0->HH14,r1->HH14);
-     check_restp_type("remove dihedrals",r0->bRemoveDih,r1->bRemoveDih);
++    check_restp_type("all dihedrals",r0->bKeepAllGeneratedDihedrals,r1->bKeepAllGeneratedDihedrals);
 +    check_restp_type("nrexcl",r0->nrexcl,r1->nrexcl);
-   gen_pad(&nnb,atoms,restp[0].nrexcl,restp[0].HH14,
-           plist,excls,hb,restp[0].bAlldih,restp[0].bRemoveDih,
-           bAllowMissing);
++    check_restp_type("HH14",r0->bGenerateHH14Interactions,r1->bGenerateHH14Interactions);
++    check_restp_type("remove dihedrals",r0->bRemoveDihedralIfWithImproper,r1->bRemoveDihedralIfWithImproper);
 +
 +    for(i=0; i<ebtsNR; i++)
 +    {
 +        check_restp_type(btsNames[i],r0->rb[i].type,r1->rb[i].type);
 +    }
 +}
 +
 +void add_atom_to_restp(t_restp *restp,int resnr,int at_start,const t_hack *hack)
 +{
 +    char buf[STRLEN];
 +    int  k;
 +    const char *Hnum="123456";
 +
 +    /*if (debug)
 +    {
 +        fprintf(debug,"adding atom(s) %s to atom %s in res %d%s in rtp\n",
 +                hack->nname,
 +                *restp->atomname[at_start], resnr, restp->resname);
 +                }*/
 +    strcpy(buf, hack->nname);
 +    buf[strlen(buf)+1]='\0';
 +    if ( hack->nr > 1 )
 +    {
 +        buf[strlen(buf)]='-';
 +    }
 +    /* make space */
 +    restp->natom += hack->nr;
 +    srenew(restp->atom,     restp->natom);
 +    srenew(restp->atomname, restp->natom);
 +    srenew(restp->cgnr,     restp->natom);
 +    /* shift the rest */
 +    for(k=restp->natom-1; k > at_start+hack->nr; k--)
 +    {
 +        restp->atom[k] =
 +            restp->atom    [k - hack->nr];
 +        restp->atomname[k] =
 +            restp->atomname[k - hack->nr];
 +        restp->cgnr[k] =
 +            restp->cgnr    [k - hack->nr];
 +    }
 +    /* now add them */
 +    for(k=0; k < hack->nr; k++)
 +    {
 +        /* set counter in atomname */
 +        if ( hack->nr > 1 )
 +        {
 +            buf[strlen(buf)-1] = Hnum[k];
 +        }
 +        snew( restp->atomname[at_start+1+k], 1);
 +        restp->atom     [at_start+1+k] = *hack->atom;
 +        *restp->atomname[at_start+1+k] = strdup(buf);
 +        if ( hack->cgnr != NOTSET )
 +        {
 +            restp->cgnr   [at_start+1+k] = hack->cgnr;
 +        }
 +        else
 +        {
 +            restp->cgnr   [at_start+1+k] = restp->cgnr[at_start];
 +        }
 +    }
 +}
 +
 +void get_hackblocks_rtp(t_hackblock **hb, t_restp **restp, 
 +                             int nrtp, t_restp rtp[],
 +                             int nres, t_resinfo *resinfo, 
 +                             int nterpairs,
 +                             t_hackblock **ntdb, t_hackblock **ctdb,
 +                             int *rn, int *rc)
 +{
 +  int i, j, k, l;
 +  char *key;
 +  t_restp *res;
 +  char buf[STRLEN];
 +  const char *Hnum="123456";
 +  int tern,terc;
 +  gmx_bool bN,bC,bRM;
 +
 +  snew(*hb,nres);
 +  snew(*restp,nres);
 +  /* first the termini */
 +  for(i=0; i<nterpairs; i++) {
 +      if (rn[i] >= 0 && ntdb[i] != NULL) {
 +          copy_t_hackblock(ntdb[i], &(*hb)[rn[i]]);
 +      }
 +      if (rc[i] >= 0 && ctdb[i] != NULL) {
 +          merge_t_hackblock(ctdb[i], &(*hb)[rc[i]]);
 +      }
 +  }  
 +
 +  /* then the whole rtp */
 +  for(i=0; i < nres; i++) {
 +    /* Here we allow a mismatch of one character when looking for the rtp entry.
 +     * For such a mismatch there should be only one mismatching name.
 +     * This is mainly useful for small molecules such as ions.
 +     * Note that this will usually not work for protein, DNA and RNA,
 +     * since there the residue names should be listed in residuetypes.dat
 +     * and an error will have been generated earlier in the process.
 +     */
 +    key = *resinfo[i].rtp;
 +    snew(resinfo[i].rtp,1);
 +    *resinfo[i].rtp = search_rtp(key,nrtp,rtp);
 +    res = get_restp(*resinfo[i].rtp,nrtp,rtp);
 +    copy_t_restp(res, &(*restp)[i]);
 +
 +    /* Check that we do not have different bonded types in one molecule */
 +    check_restp_types(&(*restp)[0],&(*restp)[i]);
 +
 +    tern = -1;
 +    for(j=0; j<nterpairs && tern==-1; j++) {
 +        if (i == rn[j]) {
 +            tern = j;
 +        }
 +    }
 +    terc = -1;
 +    for(j=0; j<nterpairs && terc == -1; j++) {
 +        if (i == rc[j]) {
 +            terc = j;
 +        }
 +    }
 +    bRM = merge_t_bondeds(res->rb, (*hb)[i].rb,tern>=0,terc>=0);
 +
 +    if (bRM && ((tern >= 0 && ntdb[tern] == NULL) ||
 +                (terc >= 0 && ctdb[terc] == NULL))) {
 +        gmx_fatal(FARGS,"There is a dangling bond at at least one of the terminal ends and the force field does not provide terminal entries or files. Fix your terminal residues so that they match the residue database (.rtp) entries, or provide terminal database entries (.tdb).");
 +    }
 +    if (bRM && ((tern >= 0 && ntdb[tern]->nhack == 0) ||
 +                (terc >= 0 && ctdb[terc]->nhack == 0))) {
 +        gmx_fatal(FARGS,"There is a dangling bond at at least one of the terminal ends. Fix your coordinate file, add a new terminal database entry (.tdb), or select the proper existing terminal entry.");
 +    }
 +  }
 +  
 +  /* now perform t_hack's on t_restp's,
 +     i.e. add's and deletes from termini database will be 
 +     added to/removed from residue topology 
 +     we'll do this on one big dirty loop, so it won't make easy reading! */
 +    for(i=0; i < nres; i++)
 +    {
 +        for(j=0; j < (*hb)[i].nhack; j++)
 +        {
 +            if ( (*hb)[i].hack[j].nr )
 +            {
 +                /* find atom in restp */
 +                for(l=0; l < (*restp)[i].natom; l++)
 +                    if ( ( (*hb)[i].hack[j].oname==NULL && 
 +                           strcmp((*hb)[i].hack[j].AI, *(*restp)[i].atomname[l])==0 ) ||
 +                         ( (*hb)[i].hack[j].oname!=NULL &&
 +                           strcmp((*hb)[i].hack[j].oname,*(*restp)[i].atomname[l])==0 ) )
 +                        break;
 +                if (l == (*restp)[i].natom)
 +                {
 +                    /* If we are doing an atom rename only, we don't need
 +                     * to generate a fatal error if the old name is not found
 +                     * in the rtp.
 +                     */
 +                    /* Deleting can happen also only on the input atoms,
 +                     * not necessarily always on the rtp entry.
 +                     */
 +                    if (!((*hb)[i].hack[j].oname != NULL &&
 +                          (*hb)[i].hack[j].nname != NULL) &&
 +                        !((*hb)[i].hack[j].oname != NULL &&
 +                          (*hb)[i].hack[j].nname == NULL))
 +                    {
 +                        gmx_fatal(FARGS,
 +                                  "atom %s not found in buiding block %d%s "
 +                                  "while combining tdb and rtp",
 +                                  (*hb)[i].hack[j].oname!=NULL ? 
 +                                  (*hb)[i].hack[j].oname : (*hb)[i].hack[j].AI, 
 +                                  i+1,*resinfo[i].rtp);
 +                    }
 +                }
 +                else
 +                {
 +                    if ( (*hb)[i].hack[j].oname==NULL ) {
 +                        /* we're adding: */
 +                        add_atom_to_restp(&(*restp)[i],resinfo[i].nr,l,
 +                                          &(*hb)[i].hack[j]);
 +                    }
 +                    else
 +                    {
 +                        /* oname != NULL */
 +                        if ( (*hb)[i].hack[j].nname==NULL ) {
 +                            /* we're deleting */
 +                            if (debug) 
 +                                fprintf(debug, "deleting atom %s from res %d%s in rtp\n",
 +                                        *(*restp)[i].atomname[l], 
 +                                        i+1,(*restp)[i].resname);
 +                            /* shift the rest */
 +                            (*restp)[i].natom--;
 +                            for(k=l; k < (*restp)[i].natom; k++) {
 +                                (*restp)[i].atom    [k] = (*restp)[i].atom    [k+1];
 +                                (*restp)[i].atomname[k] = (*restp)[i].atomname[k+1];
 +                                (*restp)[i].cgnr    [k] = (*restp)[i].cgnr    [k+1];
 +                            }
 +                            /* give back space */
 +                            srenew((*restp)[i].atom,     (*restp)[i].natom);
 +                            srenew((*restp)[i].atomname, (*restp)[i].natom);
 +                            srenew((*restp)[i].cgnr,     (*restp)[i].natom);
 +                        } else { /* nname != NULL */
 +                            /* we're replacing */
 +                            if (debug) 
 +                                fprintf(debug, "replacing atom %s by %s in res %d%s in rtp\n",
 +                                        *(*restp)[i].atomname[l], (*hb)[i].hack[j].nname, 
 +                                        i+1,(*restp)[i].resname);
 +                            snew( (*restp)[i].atomname[l], 1);
 +                            (*restp)[i].atom[l]      =       *(*hb)[i].hack[j].atom;
 +                            *(*restp)[i].atomname[l] = strdup((*hb)[i].hack[j].nname);
 +                            if ( (*hb)[i].hack[j].cgnr != NOTSET )
 +                                (*restp)[i].cgnr   [l] = (*hb)[i].hack[j].cgnr;
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +static gmx_bool atomname_cmp_nr(const char *anm,t_hack *hack,int *nr)
 +{
 +
 +    if (hack->nr == 1)
 +    {
 +        *nr = 0;
 +        
 +        return (gmx_strcasecmp(anm,hack->nname) == 0);
 +    }
 +    else
 +    {
 +        if (isdigit(anm[strlen(anm)-1]))
 +        {
 +            *nr = anm[strlen(anm)-1] - '0';
 +        }
 +        else
 +        {
 +            *nr = 0;
 +        }
 +        if (*nr <= 0 || *nr > hack->nr)
 +        {
 +            return FALSE;
 +        }
 +        else
 +        {
 +            return (strlen(anm) == strlen(hack->nname) + 1 &&
 +                    gmx_strncasecmp(anm,hack->nname,strlen(hack->nname)) == 0);
 +        }
 +    }
 +}
 +
 +static gmx_bool match_atomnames_with_rtp_atom(t_atoms *pdba,rvec *x,int atind,
 +                                          t_restp *rptr,t_hackblock *hbr,
 +                                          gmx_bool bVerbose)
 +{
 +    int  resnr;
 +    int  i,j,k;
 +    char *oldnm,*newnm;
 +    int  anmnr;
 +    char *start_at,buf[STRLEN];
 +    int  start_nr;
 +    gmx_bool bReplaceReplace,bFoundInAdd;
 +    gmx_bool bDeleted;
 +
 +    oldnm = *pdba->atomname[atind];
 +    resnr = pdba->resinfo[pdba->atom[atind].resind].nr;
 +
 +    bDeleted = FALSE;
 +    for(j=0; j<hbr->nhack; j++)
 +    {
 +        if (hbr->hack[j].oname != NULL && hbr->hack[j].nname != NULL &&
 +            gmx_strcasecmp(oldnm,hbr->hack[j].oname) == 0)
 +        {
 +            /* This is a replace entry. */
 +            /* Check if we are not replacing a replaced atom. */
 +            bReplaceReplace = FALSE;
 +            for(k=0; k<hbr->nhack; k++) {
 +                if (k != j &&
 +                    hbr->hack[k].oname != NULL && hbr->hack[k].nname != NULL &&
 +                    gmx_strcasecmp(hbr->hack[k].nname,hbr->hack[j].oname) == 0)
 +                {
 +                    /* The replace in hack[j] replaces an atom that
 +                     * was already replaced in hack[k], we do not want
 +                     * second or higher level replaces at this stage.
 +                     */
 +                    bReplaceReplace = TRUE;
 +                }
 +            }
 +            if (bReplaceReplace)
 +            {
 +                /* Skip this replace. */
 +                continue;
 +            }
 +
 +            /* This atom still has the old name, rename it */
 +            newnm = hbr->hack[j].nname;
 +            for(k=0; k<rptr->natom; k++)
 +            {
 +                if (gmx_strcasecmp(newnm,*rptr->atomname[k]) == 0)
 +                {
 +                    break;
 +                }
 +            }
 +            if (k == rptr->natom)
 +            {
 +                /* The new name is not present in the rtp.
 +                 * We need to apply the replace also to the rtp entry.
 +                 */
 +                
 +                /* We need to find the add hack that can add this atom
 +                 * to find out after which atom it should be added.
 +                 */
 +                bFoundInAdd = FALSE;
 +                for(k=0; k<hbr->nhack; k++)
 +                {
 +                    if (hbr->hack[k].oname == NULL &&
 +                        hbr->hack[k].nname != NULL &&
 +                        atomname_cmp_nr(newnm,&hbr->hack[k],&anmnr))
 +                    {
 +                        if (anmnr <= 1)
 +                        {
 +                            start_at = hbr->hack[k].a[0];
 +                        }
 +                        else
 +                        {
 +                            sprintf(buf,"%s%d",hbr->hack[k].nname,anmnr-1);
 +                            start_at = buf;
 +                        }
 +                        for(start_nr=0; start_nr<rptr->natom; start_nr++)
 +                        {
 +                            if (gmx_strcasecmp(start_at,(*rptr->atomname[start_nr])) == 0)
 +                            {
 +                                break;
 +                            }
 +                        }
 +                        if (start_nr == rptr->natom)
 +                        {
 +                            gmx_fatal(FARGS,"Could not find atom '%s' in residue building block '%s' to add atom '%s' to",
 +                                      start_at,rptr->resname,newnm);
 +                        }
 +                        /* We can add the atom after atom start_nr */
 +                        add_atom_to_restp(rptr,resnr,start_nr,
 +                                          &hbr->hack[j]);
 +                        
 +                        bFoundInAdd = TRUE;
 +                    }
 +                }
 +
 +                if (!bFoundInAdd)
 +                {
 +                    gmx_fatal(FARGS,"Could not find an 'add' entry for atom named '%s' corresponding to the 'replace' entry from atom name '%s' to '%s' for tdb or hdb database of residue type '%s'",
 +                              newnm,
 +                              hbr->hack[j].oname,hbr->hack[j].nname,
 +                              rptr->resname);
 +                }
 +            }
 +                
 +            if (bVerbose)
 +            {
 +                printf("Renaming atom '%s' in residue '%s' %d to '%s'\n",
 +                       oldnm,rptr->resname,resnr,newnm);
 +            }
 +            /* Rename the atom in pdba */
 +            snew(pdba->atomname[atind],1);
 +            *pdba->atomname[atind] = strdup(newnm);
 +        }
 +        else if (hbr->hack[j].oname != NULL && hbr->hack[j].nname == NULL &&
 +                 gmx_strcasecmp(oldnm,hbr->hack[j].oname) == 0)
 +        {
 +            /* This is a delete entry, check if this atom is present
 +             * in the rtp entry of this residue.
 +             */
 +            for(k=0; k<rptr->natom; k++)
 +            {
 +                if (gmx_strcasecmp(oldnm,*rptr->atomname[k]) == 0)
 +                {
 +                    break;
 +                }
 +            }
 +            if (k == rptr->natom)
 +            {
 +                /* This atom is not present in the rtp entry,
 +                 * delete is now from the input pdba.
 +                 */
 +                if (bVerbose)
 +                {
 +                    printf("Deleting atom '%s' in residue '%s' %d\n",
 +                           oldnm,rptr->resname,resnr);
 +                }
 +                /* We should free the atom name,
 +                 * but it might be used multiple times in the symtab.
 +                 * sfree(pdba->atomname[atind]);
 +                 */
 +                for(k=atind+1; k<pdba->nr; k++)
 +                {
 +                    pdba->atom[k-1]     = pdba->atom[k];
 +                    pdba->atomname[k-1] = pdba->atomname[k];
 +                    copy_rvec(x[k],x[k-1]);
 +                }
 +                pdba->nr--;
 +                bDeleted = TRUE;
 +            }
 +        }
 +    }
 +
 +    return bDeleted;
 +}
 +    
 +void match_atomnames_with_rtp(t_restp restp[],t_hackblock hb[],
 +                              t_atoms *pdba,rvec *x,
 +                              gmx_bool bVerbose)
 +{
 +    int  i,j,k;
 +    char *oldnm,*newnm;
 +    int  resnr;
 +    t_restp *rptr;
 +    t_hackblock *hbr;
 +    int  anmnr;
 +    char *start_at,buf[STRLEN];
 +    int  start_nr;
 +    gmx_bool bFoundInAdd;
 +    
 +    for(i=0; i<pdba->nr; i++)
 +    {
 +        oldnm = *pdba->atomname[i];
 +        resnr = pdba->resinfo[pdba->atom[i].resind].nr;
 +        rptr  = &restp[pdba->atom[i].resind];
 +        for(j=0; (j<rptr->natom); j++)
 +        {
 +            if (gmx_strcasecmp(oldnm,*(rptr->atomname[j])) == 0)
 +            {
 +                break;
 +            }
 +        }
 +        if (j == rptr->natom)
 +        {
 +            /* Not found yet, check if we have to rename this atom */
 +            if (match_atomnames_with_rtp_atom(pdba,x,i,
 +                                              rptr,&(hb[pdba->atom[i].resind]),
 +                                              bVerbose))
 +            {
 +                /* We deleted this atom, decrease the atom counter by 1. */
 +                i--;
 +            }
 +        }
 +    }
 +}
 +
 +#define NUM_CMAP_ATOMS 5
 +static void gen_cmap(t_params *psb, t_restp *restp, t_atoms *atoms, gmx_residuetype_t rt)
 +{
 +    int residx,i,j,k;
 +    const char *ptr;
 +    t_resinfo *resinfo = atoms->resinfo;
 +    int nres = atoms->nres;
 +    gmx_bool bAddCMAP;
 +    atom_id cmap_atomid[NUM_CMAP_ATOMS];
 +    int cmap_chainnum=-1, this_residue_index;
 +
 +      if (debug)
 +              ptr = "cmap";
 +      else
 +              ptr = "check";
 +      
 +      fprintf(stderr,"Making cmap torsions...");
 +      i=0;
 +      /* End loop at nres-1, since the very last residue does not have a +N atom, and
 +       * therefore we get a valgrind invalid 4 byte read error with atom am */
 +      for(residx=0; residx<nres-1; residx++)
 +      {
 +              /* Add CMAP terms from the list of CMAP interactions */
 +              for(j=0;j<restp[residx].rb[ebtsCMAP].nb; j++)
 +              {
 +            bAddCMAP = TRUE;
 +            /* Loop over atoms in a candidate CMAP interaction and
 +             * check that they exist, are from the same chain and are
 +             * from residues labelled as protein. */
 +            for(k = 0; k < NUM_CMAP_ATOMS && bAddCMAP; k++)
 +            {
 +                cmap_atomid[k] = search_atom(restp[residx].rb[ebtsCMAP].b[j].a[k],
 +                                             i,atoms,ptr,TRUE);
 +                bAddCMAP = bAddCMAP && (cmap_atomid[k] != NO_ATID);
 +                if (!bAddCMAP)
 +                {
 +                    /* This break is necessary, because cmap_atomid[k]
 +                     * == NO_ATID cannot be safely used as an index
 +                     * into the atom array. */
 +                    break;
 +                }
 +                this_residue_index = atoms->atom[cmap_atomid[k]].resind;
 +                if (0 == k)
 +                {
 +                    cmap_chainnum = resinfo[this_residue_index].chainnum;
 +                }
 +                else
 +                {
 +                    /* Does the residue for this atom have the same
 +                     * chain number as the residues for previous
 +                     * atoms? */
 +                    bAddCMAP = bAddCMAP &&
 +                        cmap_chainnum == resinfo[this_residue_index].chainnum;
 +                }
 +                bAddCMAP = bAddCMAP && gmx_residuetype_is_protein(rt,*(resinfo[this_residue_index].name));
 +            }
 +
 +            if(bAddCMAP)
 +            {
 +                add_cmap_param(psb,cmap_atomid[0],cmap_atomid[1],cmap_atomid[2],cmap_atomid[3],cmap_atomid[4],restp[residx].rb[ebtsCMAP].b[j].s);
 +                      }
 +              }
 +              
 +              if(residx<nres-1)
 +              {
 +                      while(atoms->atom[i].resind<residx+1)
 +                      {
 +                              i++;
 +                      }
 +              }
 +      }
 +      
 +      /* Start the next residue */
 +}
 +
 +static void 
 +scrub_charge_groups(int *cgnr, int natoms)
 +{
 +      int i;
 +      
 +      for(i=0;i<natoms;i++)
 +      {
 +              cgnr[i]=i+1;
 +      }
 +}
 +
 +
 +void pdb2top(FILE *top_file, char *posre_fn, char *molname,
 +             t_atoms *atoms, rvec **x, gpp_atomtype_t atype, t_symtab *tab,
 +             int nrtp, t_restp rtp[],
 +             t_restp *restp, t_hackblock *hb,
 +             int nterpairs,t_hackblock **ntdb, t_hackblock **ctdb,
 +             gmx_bool bAllowMissing,
 +             gmx_bool bVsites, gmx_bool bVsiteAromatics,
 +             const char *ff, const char *ffdir,
 +             real mHmult,
 +             int nssbonds, t_ssbond *ssbonds,
 +             real long_bond_dist, real short_bond_dist,
 +             gmx_bool bDeuterate, gmx_bool bChargeGroups, gmx_bool bCmap,
 +             gmx_bool bRenumRes,gmx_bool bRTPresname)
 +{
 +    /*
 +  t_hackblock *hb;
 +  t_restp  *restp;
 +    */
 +  t_params plist[F_NRE];
 +  t_excls  *excls;
 +  t_nextnb nnb;
 +  int      *cgnr;
 +  int      *vsite_type;
 +  int      i,nmissat;
 +  int      bts[ebtsNR];
 +  gmx_residuetype_t rt;
 +  
 +  init_plist(plist);
 +  gmx_residuetype_init(&rt);
 +
 +  if (debug) {
 +    print_resall(debug, atoms->nres, restp, atype);
 +    dump_hb(debug, atoms->nres, hb);
 +  }
 +  
 +  /* Make bonds */
 +  at2bonds(&(plist[F_BONDS]), hb, 
 +           atoms, *x,
 +           long_bond_dist, short_bond_dist, bAllowMissing);
 +  
 +  /* specbonds: disulphide bonds & heme-his */
 +  do_ssbonds(&(plist[F_BONDS]),
 +           atoms, nssbonds, ssbonds,
 +           bAllowMissing);
 +  
 +  nmissat = name2type(atoms, &cgnr, atype, restp, rt);
 +  if (nmissat) {
 +    if (bAllowMissing)
 +      fprintf(stderr,"There were %d missing atoms in molecule %s\n",
 +            nmissat,molname);
 +    else
 +      gmx_fatal(FARGS,"There were %d missing atoms in molecule %s, if you want to use this incomplete topology anyhow, use the option -missing",
 +                nmissat,molname);
 +  }
 +  
 +  /* Cleanup bonds (sort and rm doubles) */ 
 +  clean_bonds(&(plist[F_BONDS]));
 +  
 +  snew(vsite_type,atoms->nr);
 +  for(i=0; i<atoms->nr; i++)
 +    vsite_type[i]=NOTSET;
 +  if (bVsites) {
 +    /* determine which atoms will be vsites and add dummy masses 
 +       also renumber atom numbers in plist[0..F_NRE]! */
 +    do_vsites(nrtp, rtp, atype, atoms, tab, x, plist, 
 +              &vsite_type, &cgnr, mHmult, bVsiteAromatics, ffdir);
 +  }
 +  
 +  /* Make Angles and Dihedrals */
 +  fprintf(stderr,"Generating angles, dihedrals and pairs...\n");
 +  snew(excls,atoms->nr);
 +  init_nnb(&nnb,atoms->nr,4);
 +  gen_nnb(&nnb,plist);
 +  print_nnb(&nnb,"NNB");
++  gen_pad(&nnb,atoms,restp,plist,excls,hb,bAllowMissing);
 +  done_nnb(&nnb);
 +  
 +    /* Make CMAP */
 +    if (TRUE == bCmap)
 +    {
 +        gen_cmap(&(plist[F_CMAP]), restp, atoms, rt);
 +        if (plist[F_CMAP].nr > 0)
 +        {
 +            fprintf(stderr, "There are %4d cmap torsion pairs\n",
 +                    plist[F_CMAP].nr);
 +        }
 +    }
 +
 +  /* set mass of all remaining hydrogen atoms */
 +  if (mHmult != 1.0)
 +    do_h_mass(&(plist[F_BONDS]),vsite_type,atoms,mHmult,bDeuterate);
 +  sfree(vsite_type);
 +  
 +  /* Cleanup bonds (sort and rm doubles) */ 
 +  /* clean_bonds(&(plist[F_BONDS]));*/
 +   
 +  fprintf(stderr,
 +        "There are %4d dihedrals, %4d impropers, %4d angles\n"
 +        "          %4d pairs,     %4d bonds and  %4d virtual sites\n",
 +        plist[F_PDIHS].nr, plist[F_IDIHS].nr, plist[F_ANGLES].nr,
 +        plist[F_LJ14].nr, plist[F_BONDS].nr,
 +        plist[F_VSITE2].nr +
 +        plist[F_VSITE3].nr +
 +        plist[F_VSITE3FD].nr +
 +        plist[F_VSITE3FAD].nr +
 +        plist[F_VSITE3OUT].nr +
 +      plist[F_VSITE4FD].nr +
 +      plist[F_VSITE4FDN].nr );
 +  
 +  print_sums(atoms, FALSE);
 +  
 +  if (FALSE == bChargeGroups)
 +  {
 +        scrub_charge_groups(cgnr, atoms->nr);
 +  }
 +
 +    if (bRenumRes)
 +    {
 +        for(i=0; i<atoms->nres; i++) 
 +        {
 +            atoms->resinfo[i].nr = i + 1;
 +            atoms->resinfo[i].ic = ' ';
 +        }
 +    }
 +      
 +  if (top_file) {
 +    fprintf(stderr,"Writing topology\n");
 +    /* We can copy the bonded types from the first restp,
 +     * since the types have to be identical for all residues in one molecule.
 +     */
 +    for(i=0; i<ebtsNR; i++) {
 +        bts[i] = restp[0].rb[i].type;
 +    }
 +    write_top(top_file, posre_fn, molname,
 +              atoms, bRTPresname, 
 +              bts, plist, excls, atype, cgnr, restp[0].nrexcl);
 +  }
 +  
 +  /* cleaning up */
 +  free_t_hackblock(atoms->nres, &hb);
 +  free_t_restp(atoms->nres, &restp);
 +  gmx_residuetype_destroy(rt);
 +    
 +  /* we should clean up hb and restp here, but that is a *L*O*T* of work! */
 +  sfree(cgnr);
 +  for (i=0; i<F_NRE; i++)
 +    sfree(plist[i].param);
 +  for (i=0; i<atoms->nr; i++)
 +    sfree(excls[i].e);
 +  sfree(excls);
 +}
index 08191a9bb863de55c499cbfa9dd862d43a769189,0000000000000000000000000000000000000000..d4833666ad3d847296e6d450d712eea4864329c2
mode 100644,000000..100644
--- /dev/null
@@@ -1,3246 -1,0 +1,3246 @@@
-    if (ir->bAdress && (EEL_PME(ir->coulombtype))){
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <limits.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "index.h"
 +#include "symtab.h"
 +#include "string2.h"
 +#include "readinp.h"
 +#include "warninp.h"
 +#include "readir.h" 
 +#include "toputil.h"
 +#include "index.h"
 +#include "network.h"
 +#include "vec.h"
 +#include "pbc.h"
 +#include "mtop_util.h"
 +#include "chargegroup.h"
 +#include "inputrec.h"
 +
 +#define MAXPTR 254
 +#define NOGID  255
 +#define MAXLAMBDAS 1024
 +
 +/* Resource parameters 
 + * Do not change any of these until you read the instruction
 + * in readinp.h. Some cpp's do not take spaces after the backslash
 + * (like the c-shell), which will give you a very weird compiler
 + * message.
 + */
 +
 +static char tcgrps[STRLEN],tau_t[STRLEN],ref_t[STRLEN],
 +  acc[STRLEN],accgrps[STRLEN],freeze[STRLEN],frdim[STRLEN],
 +  energy[STRLEN],user1[STRLEN],user2[STRLEN],vcm[STRLEN],xtc_grps[STRLEN],
 +  couple_moltype[STRLEN],orirefitgrp[STRLEN],egptable[STRLEN],egpexcl[STRLEN],
 +  wall_atomtype[STRLEN],wall_density[STRLEN],deform[STRLEN],QMMM[STRLEN];
 +static char fep_lambda[efptNR][STRLEN];
 +static char lambda_weights[STRLEN];
 +static char **pull_grp;
 +static char **rot_grp;
 +static char anneal[STRLEN],anneal_npoints[STRLEN],
 +  anneal_time[STRLEN],anneal_temp[STRLEN];
 +static char QMmethod[STRLEN],QMbasis[STRLEN],QMcharge[STRLEN],QMmult[STRLEN],
 +  bSH[STRLEN],CASorbitals[STRLEN], CASelectrons[STRLEN],SAon[STRLEN],
 +  SAoff[STRLEN],SAsteps[STRLEN],bTS[STRLEN],bOPT[STRLEN]; 
 +static char efield_x[STRLEN],efield_xt[STRLEN],efield_y[STRLEN],
 +  efield_yt[STRLEN],efield_z[STRLEN],efield_zt[STRLEN];
 +
 +enum {
 +    egrptpALL,         /* All particles have to be a member of a group.     */
 +    egrptpALL_GENREST, /* A rest group with name is generated for particles *
 +                        * that are not part of any group.                   */
 +    egrptpPART,        /* As egrptpALL_GENREST, but no name is generated    *
 +                        * for the rest group.                               */
 +    egrptpONE          /* Merge all selected groups into one group,         *
 +                        * make a rest group for the remaining particles.    */
 +};
 +
 +
 +void init_ir(t_inputrec *ir, t_gromppopts *opts)
 +{
 +  snew(opts->include,STRLEN); 
 +  snew(opts->define,STRLEN);
 +  snew(ir->fepvals,1);
 +  snew(ir->expandedvals,1);
 +  snew(ir->simtempvals,1);
 +}
 +
 +static void GetSimTemps(int ntemps, t_simtemp *simtemp, double *temperature_lambdas)
 +{
 +
 +    int i;
 +
 +    for (i=0;i<ntemps;i++)
 +    {
 +        /* simple linear scaling -- allows more control */
 +        if (simtemp->eSimTempScale == esimtempLINEAR)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*temperature_lambdas[i];
 +        }
 +        else if (simtemp->eSimTempScale == esimtempGEOMETRIC)  /* should give roughly equal acceptance for constant heat capacity . . . */
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low * pow(simtemp->simtemp_high/simtemp->simtemp_low,(1.0*i)/(ntemps-1));
 +        }
 +        else if (simtemp->eSimTempScale == esimtempEXPONENTIAL)
 +        {
 +            simtemp->temperatures[i] = simtemp->simtemp_low + (simtemp->simtemp_high-simtemp->simtemp_low)*((exp(temperature_lambdas[i])-1)/(exp(1.0)-1));
 +        }
 +        else
 +        {
 +            char errorstr[128];
 +            sprintf(errorstr,"eSimTempScale=%d not defined",simtemp->eSimTempScale);
 +            gmx_fatal(FARGS,errorstr);
 +        }
 +    }
 +}
 +
 +
 +
 +static void _low_check(gmx_bool b,char *s,warninp_t wi)
 +{
 +    if (b)
 +    {
 +        warning_error(wi,s);
 +    }
 +}
 +
 +static void check_nst(const char *desc_nst,int nst,
 +                      const char *desc_p,int *p,
 +                      warninp_t wi)
 +{
 +    char buf[STRLEN];
 +
 +    if (*p > 0 && *p % nst != 0)
 +    {
 +        /* Round up to the next multiple of nst */
 +        *p = ((*p)/nst + 1)*nst;
 +        sprintf(buf,"%s should be a multiple of %s, changing %s to %d\n",
 +              desc_p,desc_nst,desc_p,*p);
 +        warning(wi,buf);
 +    }
 +}
 +
 +static gmx_bool ir_NVE(const t_inputrec *ir)
 +{
 +    return ((ir->eI == eiMD || EI_VV(ir->eI)) && ir->etc == etcNO);
 +}
 +
 +static int lcd(int n1,int n2)
 +{
 +    int d,i;
 +    
 +    d = 1;
 +    for(i=2; (i<=n1 && i<=n2); i++)
 +    {
 +        if (n1 % i == 0 && n2 % i == 0)
 +        {
 +            d = i;
 +        }
 +    }
 +    
 +  return d;
 +}
 +
 +void check_ir(const char *mdparin,t_inputrec *ir, t_gromppopts *opts,
 +              warninp_t wi)
 +/* Check internal consistency */
 +{
 +    /* Strange macro: first one fills the err_buf, and then one can check 
 +     * the condition, which will print the message and increase the error
 +     * counter.
 +     */
 +#define CHECK(b) _low_check(b,err_buf,wi)
 +    char err_buf[256],warn_buf[STRLEN];
 +    int i,j;
 +    int  ns_type=0;
 +    real dt_coupl=0;
 +    real dt_pcoupl;
 +    int  nstcmin;
 +    t_lambda *fep = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  /* BASIC CUT-OFF STUFF */
 +  if (ir->rcoulomb < 0)
 +  {
 +      warning_error(wi,"rcoulomb should be >= 0");
 +  }
 +  if (ir->rvdw < 0)
 +  {
 +      warning_error(wi,"rvdw should be >= 0");
 +  }
 +  if (ir->rlist < 0)
 +  {
 +      warning_error(wi,"rlist should be >= 0");
 +  }
 +  if (ir->rlist == 0 ||
 +      !((EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > ir->rlist) ||
 +        (EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)    && ir->rvdw     > ir->rlist))) {
 +    /* No switched potential and/or no twin-range:
 +     * we can set the long-range cut-off to the maximum of the other cut-offs.
 +     */
 +    ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
 +  } else if (ir->rlistlong < 0) {
 +    ir->rlistlong = max_cutoff(ir->rlist,max_cutoff(ir->rvdw,ir->rcoulomb));
 +    sprintf(warn_buf,"rlistlong was not set, setting it to %g (no buffer)",
 +          ir->rlistlong);
 +    warning(wi,warn_buf);
 +  }
 +  if (ir->rlistlong == 0 && ir->ePBC != epbcNONE) {
 +      warning_error(wi,"Can not have an infinite cut-off with PBC");
 +  }
 +  if (ir->rlistlong > 0 && (ir->rlist == 0 || ir->rlistlong < ir->rlist)) {
 +      warning_error(wi,"rlistlong can not be shorter than rlist");
 +  }
 +  if (IR_TWINRANGE(*ir) && ir->nstlist <= 0) {
 +      warning_error(wi,"Can not have nstlist<=0 with twin-range interactions");
 +  }
 +
 +    /* GENERAL INTEGRATOR STUFF */
 +    if (!(ir->eI == eiMD || EI_VV(ir->eI)))
 +    {
 +        ir->etc = etcNO;
 +    }
 +    if (ir->eI == eiVVAK) {
 +        sprintf(warn_buf,"Integrator method %s is implemented primarily for validation purposes; for molecular dynamics, you should probably be using %s or %s",ei_names[eiVVAK],ei_names[eiMD],ei_names[eiVV]);
 +        warning_note(wi,warn_buf);
 +    }
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        ir->epc = epcNO;
 +    }
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        if (ir->nstcalcenergy < 0)
 +        {
 +            ir->nstcalcenergy = ir_optimal_nstcalcenergy(ir);
 +            if (ir->nstenergy != 0 && ir->nstenergy < ir->nstcalcenergy)
 +            {
 +                /* nstcalcenergy larger than nstener does not make sense.
 +                 * We ideally want nstcalcenergy=nstener.
 +                 */
 +                if (ir->nstlist > 0)
 +                {
 +                    ir->nstcalcenergy = lcd(ir->nstenergy,ir->nstlist);
 +                }
 +                else
 +                {
 +                    ir->nstcalcenergy = ir->nstenergy;
 +                }
 +            }
 +        }
 +        if (ir->epc != epcNO)
 +        {
 +            if (ir->nstpcouple < 0)
 +            {
 +                ir->nstpcouple = ir_optimal_nstpcouple(ir);
 +            }
 +        }
 +        if (IR_TWINRANGE(*ir))
 +        {
 +            check_nst("nstlist",ir->nstlist,
 +                      "nstcalcenergy",&ir->nstcalcenergy,wi);
 +            if (ir->epc != epcNO)
 +            {
 +                check_nst("nstlist",ir->nstlist,
 +                          "nstpcouple",&ir->nstpcouple,wi); 
 +            }
 +        }
 +
 +        if (ir->nstcalcenergy > 1)
 +        {
 +            /* for storing exact averages nstenergy should be
 +             * a multiple of nstcalcenergy
 +             */
 +            check_nst("nstcalcenergy",ir->nstcalcenergy,
 +                      "nstenergy",&ir->nstenergy,wi);
 +            if (ir->efep != efepNO)
 +            {
 +                /* nstdhdl should be a multiple of nstcalcenergy */
 +                check_nst("nstcalcenergy",ir->nstcalcenergy,
 +                          "nstdhdl",&ir->fepvals->nstdhdl,wi);
 +            }
 +        }
 +    }
 +
 +  /* LD STUFF */
 +  if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +      ir->bContinuation && ir->ld_seed != -1) {
 +      warning_note(wi,"You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +  }
 +
 +  /* TPI STUFF */
 +  if (EI_TPI(ir->eI)) {
 +    sprintf(err_buf,"TPI only works with pbc = %s",epbc_names[epbcXYZ]);
 +    CHECK(ir->ePBC != epbcXYZ);
 +    sprintf(err_buf,"TPI only works with ns = %s",ens_names[ensGRID]);
 +    CHECK(ir->ns_type != ensGRID);
 +    sprintf(err_buf,"with TPI nstlist should be larger than zero");
 +    CHECK(ir->nstlist <= 0);
 +    sprintf(err_buf,"TPI does not work with full electrostatics other than PME");
 +    CHECK(EEL_FULL(ir->coulombtype) && !EEL_PME(ir->coulombtype));
 +  }
 +
 +  /* SHAKE / LINCS */
 +  if ( (opts->nshake > 0) && (opts->bMorse) ) {
 +      sprintf(warn_buf,
 +              "Using morse bond-potentials while constraining bonds is useless");
 +      warning(wi,warn_buf);
 +  }
 +
 +  if ((EI_SD(ir->eI) || ir->eI == eiBD) &&
 +      ir->bContinuation && ir->ld_seed != -1) {
 +      warning_note(wi,"You are doing a continuation with SD or BD, make sure that ld_seed is different from the previous run (using ld_seed=-1 will ensure this)");
 +  }
 +  /* verify simulated tempering options */
 +
 +  if (ir->bSimTemp) {
 +      gmx_bool bAllTempZero = TRUE;
 +      for (i=0;i<fep->n_lambda;i++)
 +      {
 +          sprintf(err_buf,"Entry %d for %s must be between 0 and 1, instead is %g",i,efpt_names[efptTEMPERATURE],fep->all_lambda[efptTEMPERATURE][i]);
 +          CHECK((fep->all_lambda[efptTEMPERATURE][i] < 0) || (fep->all_lambda[efptTEMPERATURE][i] > 1));
 +          if (fep->all_lambda[efptTEMPERATURE][i] > 0)
 +          {
 +              bAllTempZero = FALSE;
 +          }
 +      }
 +      sprintf(err_buf,"if simulated tempering is on, temperature-lambdas may not be all zero");
 +      CHECK(bAllTempZero==TRUE);
 +
 +      sprintf(err_buf,"Simulated tempering is currently only compatible with md-vv");
 +      CHECK(ir->eI != eiVV);
 +
 +      /* check compatability of the temperature coupling with simulated tempering */
 +
 +      if (ir->etc == etcNOSEHOOVER) {
 +          sprintf(warn_buf,"Nose-Hoover based temperature control such as [%s] my not be entirelyconsistent with simulated tempering",etcoupl_names[ir->etc]);
 +          warning_note(wi,warn_buf);
 +      }
 +
 +      /* check that the temperatures make sense */
 +
 +      sprintf(err_buf,"Higher simulated tempering temperature (%g) must be >= than the simulated tempering lower temperature (%g)",ir->simtempvals->simtemp_high,ir->simtempvals->simtemp_low);
 +      CHECK(ir->simtempvals->simtemp_high <= ir->simtempvals->simtemp_low);
 +
 +      sprintf(err_buf,"Higher simulated tempering temperature (%g) must be >= zero",ir->simtempvals->simtemp_high);
 +      CHECK(ir->simtempvals->simtemp_high <= 0);
 +
 +      sprintf(err_buf,"Lower simulated tempering temperature (%g) must be >= zero",ir->simtempvals->simtemp_low);
 +      CHECK(ir->simtempvals->simtemp_low <= 0);
 +  }
 +
 +  /* verify free energy options */
 +
 +  if (ir->efep != efepNO) {
 +      fep = ir->fepvals;
 +      sprintf(err_buf,"The soft-core power is %d and can only be 1 or 2",
 +              fep->sc_power);
 +      CHECK(fep->sc_alpha!=0 && fep->sc_power!=1 && fep->sc_power!=2);
 +
 +      sprintf(err_buf,"The soft-core sc-r-power is %d and can only be 6 or 48",
 +              (int)fep->sc_r_power);
 +      CHECK(fep->sc_alpha!=0 && fep->sc_r_power!=6.0 && fep->sc_r_power!=48.0);
 +
 +      /* check validity of options */
 +      if (fep->n_lambda > 0 && ir->rlist < max(ir->rvdw,ir->rcoulomb))
 +      {
 +          sprintf(warn_buf,
 +                  "For foreign lambda free energy differences it is assumed that the soft-core interactions have no effect beyond the neighborlist cut-off");
 +          warning(wi,warn_buf);
 +      }
 +
 +      sprintf(err_buf,"Can't use postive delta-lambda (%g) if initial state/lambda does not start at zero",fep->delta_lambda);
 +      CHECK(fep->delta_lambda > 0 && ((fep->init_fep_state !=0) ||  (fep->init_lambda !=0)));
 +
 +      sprintf(err_buf,"Can't use postive delta-lambda (%g) with expanded ensemble simulations",fep->delta_lambda);
 +      CHECK(fep->delta_lambda > 0 && (ir->efep == efepEXPANDED));
 +
 +      sprintf(err_buf,"Free-energy not implemented for Ewald");
 +      CHECK(ir->coulombtype==eelEWALD);
 +
 +      /* check validty of lambda inputs */
 +      sprintf(err_buf,"initial thermodynamic state %d does not exist, only goes to %d",fep->init_fep_state,fep->n_lambda);
 +      CHECK((fep->init_fep_state > fep->n_lambda));
 +
 +      for (j=0;j<efptNR;j++)
 +      {
 +          for (i=0;i<fep->n_lambda;i++)
 +          {
 +              sprintf(err_buf,"Entry %d for %s must be between 0 and 1, instead is %g",i,efpt_names[j],fep->all_lambda[j][i]);
 +              CHECK((fep->all_lambda[j][i] < 0) || (fep->all_lambda[j][i] > 1));
 +          }
 +      }
 +
 +      if ((fep->sc_alpha>0) && (!fep->bScCoul))
 +      {
 +          for (i=0;i<fep->n_lambda;i++)
 +          {
 +              sprintf(err_buf,"For state %d, vdw-lambdas (%f) is changing with vdw softcore, while coul-lambdas (%f) is nonzero without coulomb softcore: this will lead to crashes, and is not supported.",i,fep->all_lambda[efptVDW][i],
 +                      fep->all_lambda[efptCOUL][i]);
 +              CHECK((fep->sc_alpha>0) &&
 +                    (((fep->all_lambda[efptCOUL][i] > 0.0) &&
 +                      (fep->all_lambda[efptCOUL][i] < 1.0)) &&
 +                     ((fep->all_lambda[efptVDW][i] > 0.0) &&
 +                      (fep->all_lambda[efptVDW][i] < 1.0))));
 +          }
 +      }
 +
 +      if ((fep->bScCoul) && (EEL_PME(ir->coulombtype)))
 +      {
 +          sprintf(warn_buf,"With coulomb soft core, the reciprocal space calculation will not necessarily cancel.  It may be necessary to decrease the reciprocal space energy, and increase the cutoff radius to get sufficiently close matches to energies with free energy turned off.");
 +          warning(wi, warn_buf);
 +      }
 +
 +      /*  Free Energy Checks -- In an ideal world, slow growth and FEP would
 +          be treated differently, but that's the next step */
 +
 +      for (i=0;i<efptNR;i++) {
 +          for (j=0;j<fep->n_lambda;j++) {
 +              sprintf(err_buf,"%s[%d] must be between 0 and 1",efpt_names[i],j);
 +              CHECK((fep->all_lambda[i][j] < 0) || (fep->all_lambda[i][j] > 1));
 +          }
 +      }
 +  }
 +
 +  if ((ir->bSimTemp) || (ir->efep == efepEXPANDED)) {
 +      fep = ir->fepvals;
 +      expand = ir->expandedvals;
 +
 +      /* checking equilibration of weights inputs for validity */
 +
 +      sprintf(err_buf,"weight-equil-number-all-lambda (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_n_at_lam,elmceq_names[elmceqNUMATLAM]);
 +      CHECK((expand->equil_n_at_lam>0) && (expand->elmceq!=elmceqNUMATLAM));
 +
 +      sprintf(err_buf,"weight-equil-number-samples (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_samples,elmceq_names[elmceqSAMPLES]);
 +      CHECK((expand->equil_samples>0) && (expand->elmceq!=elmceqSAMPLES));
 +
 +      sprintf(err_buf,"weight-equil-number-steps (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_steps,elmceq_names[elmceqSTEPS]);
 +      CHECK((expand->equil_steps>0) && (expand->elmceq!=elmceqSTEPS));
 +
 +      sprintf(err_buf,"weight-equil-wl-delta (%d) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_samples,elmceq_names[elmceqWLDELTA]);
 +      CHECK((expand->equil_wl_delta>0) && (expand->elmceq!=elmceqWLDELTA));
 +
 +      sprintf(err_buf,"weight-equil-count-ratio (%f) is ignored if lmc-weights-equil is not equal to %s",
 +              expand->equil_ratio,elmceq_names[elmceqRATIO]);
 +      CHECK((expand->equil_ratio>0) && (expand->elmceq!=elmceqRATIO));
 +
 +      sprintf(err_buf,"weight-equil-number-all-lambda (%d) must be a positive integer if lmc-weights-equil=%s",
 +              expand->equil_n_at_lam,elmceq_names[elmceqNUMATLAM]);
 +      CHECK((expand->equil_n_at_lam<=0) && (expand->elmceq==elmceqNUMATLAM));
 +
 +      sprintf(err_buf,"weight-equil-number-samples (%d) must be a positive integer if lmc-weights-equil=%s",
 +              expand->equil_samples,elmceq_names[elmceqSAMPLES]);
 +      CHECK((expand->equil_samples<=0) && (expand->elmceq==elmceqSAMPLES));
 +
 +      sprintf(err_buf,"weight-equil-number-steps (%d) must be a positive integer if lmc-weights-equil=%s",
 +              expand->equil_steps,elmceq_names[elmceqSTEPS]);
 +      CHECK((expand->equil_steps<=0) && (expand->elmceq==elmceqSTEPS));
 +
 +      sprintf(err_buf,"weight-equil-wl-delta (%f) must be > 0 if lmc-weights-equil=%s",
 +              expand->equil_wl_delta,elmceq_names[elmceqWLDELTA]);
 +      CHECK((expand->equil_wl_delta<=0) && (expand->elmceq==elmceqWLDELTA));
 +
 +      sprintf(err_buf,"weight-equil-count-ratio (%f) must be > 0 if lmc-weights-equil=%s",
 +              expand->equil_ratio,elmceq_names[elmceqRATIO]);
 +      CHECK((expand->equil_ratio<=0) && (expand->elmceq==elmceqRATIO));
 +
 +      sprintf(err_buf,"lmc-weights-equil=%s only possible when lmc-stats = %s or lmc-stats %s",
 +              elmceq_names[elmceqWLDELTA],elamstats_names[elamstatsWL],elamstats_names[elamstatsWWL]);
 +      CHECK((expand->elmceq==elmceqWLDELTA) && (!EWL(expand->elamstats)));
 +
 +      sprintf(err_buf,"lmc-repeats (%d) must be greater than 0",expand->lmc_repeats);
 +      CHECK((expand->lmc_repeats <= 0));
 +      sprintf(err_buf,"minimum-var-min (%d) must be greater than 0",expand->minvarmin);
 +      CHECK((expand->minvarmin <= 0));
 +      sprintf(err_buf,"weight-c-range (%d) must be greater or equal to 0",expand->c_range);
 +      CHECK((expand->c_range < 0));
 +      sprintf(err_buf,"init-lambda-state (%d) must be zero if lmc-forced-nstart (%d)> 0 and lmc-move != 'no'",
 +              fep->init_fep_state, expand->lmc_forced_nstart);
 +      CHECK((fep->init_fep_state!=0) && (expand->lmc_forced_nstart>0) && (expand->elmcmove!=elmcmoveNO));
 +      sprintf(err_buf,"lmc-forced-nstart (%d) must not be negative",expand->lmc_forced_nstart);
 +      CHECK((expand->lmc_forced_nstart < 0));
 +      sprintf(err_buf,"init-lambda-state (%d) must be in the interval [0,number of lambdas)",fep->init_fep_state);
 +      CHECK((fep->init_fep_state < 0) || (fep->init_fep_state >= fep->n_lambda));
 +
 +      sprintf(err_buf,"init-wl-delta (%f) must be greater than or equal to 0",expand->init_wl_delta);
 +      CHECK((expand->init_wl_delta < 0));
 +      sprintf(err_buf,"wl-ratio (%f) must be between 0 and 1",expand->wl_ratio);
 +      CHECK((expand->wl_ratio <= 0) || (expand->wl_ratio >= 1));
 +      sprintf(err_buf,"wl-scale (%f) must be between 0 and 1",expand->wl_scale);
 +      CHECK((expand->wl_scale <= 0) || (expand->wl_scale >= 1));
 +
 +      /* if there is no temperature control, we need to specify an MC temperature */
 +      sprintf(err_buf,"If there is no temperature control, and lmc-mcmove!= 'no',mc_temperature must be set to a positive number");
 +      if (expand->nstTij > 0)
 +      {
 +          sprintf(err_buf,"nst-transition-matrix (%d) must be an integer multiple of nstlog (%d)",
 +                  expand->nstTij,ir->nstlog);
 +          CHECK((mod(expand->nstTij,ir->nstlog)!=0));
 +      }
 +  }
 +
 +  /* PBC/WALLS */
 +  sprintf(err_buf,"walls only work with pbc=%s",epbc_names[epbcXY]);
 +  CHECK(ir->nwall && ir->ePBC!=epbcXY);
 +
 +  /* VACUUM STUFF */
 +  if (ir->ePBC != epbcXYZ && ir->nwall != 2) {
 +    if (ir->ePBC == epbcNONE) {
 +      if (ir->epc != epcNO) {
 +          warning(wi,"Turning off pressure coupling for vacuum system");
 +          ir->epc = epcNO;
 +      }
 +    } else {
 +      sprintf(err_buf,"Can not have pressure coupling with pbc=%s",
 +            epbc_names[ir->ePBC]);
 +      CHECK(ir->epc != epcNO);
 +    }
 +    sprintf(err_buf,"Can not have Ewald with pbc=%s",epbc_names[ir->ePBC]);
 +    CHECK(EEL_FULL(ir->coulombtype));
 +
 +    sprintf(err_buf,"Can not have dispersion correction with pbc=%s",
 +          epbc_names[ir->ePBC]);
 +    CHECK(ir->eDispCorr != edispcNO);
 +  }
 +
 +  if (ir->rlist == 0.0) {
 +    sprintf(err_buf,"can only have neighborlist cut-off zero (=infinite)\n"
 +          "with coulombtype = %s or coulombtype = %s\n"
 +          "without periodic boundary conditions (pbc = %s) and\n"
 +          "rcoulomb and rvdw set to zero",
 +          eel_names[eelCUT],eel_names[eelUSER],epbc_names[epbcNONE]);
 +    CHECK(((ir->coulombtype != eelCUT) && (ir->coulombtype != eelUSER)) ||
 +        (ir->ePBC     != epbcNONE) ||
 +        (ir->rcoulomb != 0.0)      || (ir->rvdw != 0.0));
 +
 +    if (ir->nstlist < 0) {
 +        warning_error(wi,"Can not have heuristic neighborlist updates without cut-off");
 +    }
 +    if (ir->nstlist > 0) {
 +        warning_note(wi,"Simulating without cut-offs is usually (slightly) faster with nstlist=0, nstype=simple and particle decomposition");
 +    }
 +  }
 +
 +  /* COMM STUFF */
 +  if (ir->nstcomm == 0) {
 +    ir->comm_mode = ecmNO;
 +  }
 +  if (ir->comm_mode != ecmNO) {
 +    if (ir->nstcomm < 0) {
 +        warning(wi,"If you want to remove the rotation around the center of mass, you should set comm_mode = Angular instead of setting nstcomm < 0. nstcomm is modified to its absolute value");
 +      ir->nstcomm = abs(ir->nstcomm);
 +    }
 +
 +    if (ir->nstcalcenergy > 0 && ir->nstcomm < ir->nstcalcenergy) {
 +        warning_note(wi,"nstcomm < nstcalcenergy defeats the purpose of nstcalcenergy, setting nstcomm to nstcalcenergy");
 +        ir->nstcomm = ir->nstcalcenergy;
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR) {
 +      sprintf(err_buf,"Can not remove the rotation around the center of mass with periodic molecules");
 +      CHECK(ir->bPeriodicMols);
 +      if (ir->ePBC != epbcNONE)
 +          warning(wi,"Removing the rotation around the center of mass in a periodic system (this is not a problem when you have only one molecule).");
 +    }
 +  }
 +
 +  if (EI_STATE_VELOCITY(ir->eI) && ir->ePBC == epbcNONE && ir->comm_mode != ecmANGULAR) {
 +      warning_note(wi,"Tumbling and or flying ice-cubes: We are not removing rotation around center of mass in a non-periodic system. You should probably set comm_mode = ANGULAR.");
 +  }
 +  
 +  sprintf(err_buf,"Twin-range neighbour searching (NS) with simple NS"
 +        " algorithm not implemented");
 +  CHECK(((ir->rcoulomb > ir->rlist) || (ir->rvdw > ir->rlist))
 +      && (ir->ns_type == ensSIMPLE));
 +
 +  /* TEMPERATURE COUPLING */
 +  if (ir->etc == etcYES)
 +    {
 +        ir->etc = etcBERENDSEN;
 +        warning_note(wi,"Old option for temperature coupling given: "
 +                     "changing \"yes\" to \"Berendsen\"\n");
 +    }
 +
 +    if ((ir->etc == etcNOSEHOOVER) || (ir->epc == epcMTTK))
 +    {
 +        if (ir->opts.nhchainlength < 1)
 +        {
 +            sprintf(warn_buf,"number of Nose-Hoover chains (currently %d) cannot be less than 1,reset to 1\n",ir->opts.nhchainlength);
 +            ir->opts.nhchainlength =1;
 +            warning(wi,warn_buf);
 +        }
 +        
 +        if (ir->etc==etcNOSEHOOVER && !EI_VV(ir->eI) && ir->opts.nhchainlength > 1)
 +        {
 +            warning_note(wi,"leapfrog does not yet support Nose-Hoover chains, nhchainlength reset to 1");
 +            ir->opts.nhchainlength = 1;
 +        }
 +    }
 +    else
 +    {
 +        ir->opts.nhchainlength = 0;
 +    }
 +
 +    if (ir->eI == eiVVAK) {
 +        sprintf(err_buf,"%s implemented primarily for validation, and requires nsttcouple = 1 and nstpcouple = 1.",
 +                ei_names[eiVVAK]);
 +        CHECK((ir->nsttcouple != 1) || (ir->nstpcouple != 1));
 +    }
 +
 +    if (ETC_ANDERSEN(ir->etc))
 +    {
 +        sprintf(err_buf,"%s temperature control not supported for integrator %s.",etcoupl_names[ir->etc],ei_names[ir->eI]);
 +        CHECK(!(EI_VV(ir->eI)));
 +
 +        for (i=0;i<ir->opts.ngtc;i++)
 +        {
 +            sprintf(err_buf,"all tau_t must currently be equal using Andersen temperature control, violated for group %d",i);
 +            CHECK(ir->opts.tau_t[0] != ir->opts.tau_t[i]);
 +            sprintf(err_buf,"all tau_t must be postive using Andersen temperature control, tau_t[%d]=%10.6f",
 +                    i,ir->opts.tau_t[i]);
 +            CHECK(ir->opts.tau_t[i]<0);
 +        }
 +        if (ir->nstcomm > 0 && (ir->etc == etcANDERSEN)) {
 +            sprintf(warn_buf,"Center of mass removal not necessary for %s.  All velocities of coupled groups are rerandomized periodically, so flying ice cube errors will not occur.",etcoupl_names[ir->etc]);
 +            warning_note(wi,warn_buf);
 +        }
 +
 +        sprintf(err_buf,"nstcomm must be 1, not %d for %s, as velocities of atoms in coupled groups are randomized every time step",ir->nstcomm,etcoupl_names[ir->etc]);
 +        CHECK(ir->nstcomm > 1 && (ir->etc == etcANDERSEN));
 +
 +        for (i=0;i<ir->opts.ngtc;i++)
 +        {
 +            int nsteps = (int)(ir->opts.tau_t[i]/ir->delta_t);
 +            sprintf(err_buf,"tau_t/delta_t for group %d for temperature control method %s must be a multiple of nstcomm (%d), as velocities of atoms in coupled groups are randomized every time step. The input tau_t (%8.3f) leads to %d steps per randomization",i,etcoupl_names[ir->etc],ir->nstcomm,ir->opts.tau_t[i],nsteps);
 +            CHECK((nsteps % ir->nstcomm) && (ir->etc == etcANDERSENMASSIVE));
 +        }
 +    }
 +    if (ir->etc == etcBERENDSEN)
 +    {
 +        sprintf(warn_buf,"The %s thermostat does not generate the correct kinetic energy distribution. You might want to consider using the %s thermostat.",
 +                ETCOUPLTYPE(ir->etc),ETCOUPLTYPE(etcVRESCALE));
 +        warning_note(wi,warn_buf);
 +    }
 +
 +    if ((ir->etc==etcNOSEHOOVER || ETC_ANDERSEN(ir->etc))
 +        && ir->epc==epcBERENDSEN)
 +    {
 +        sprintf(warn_buf,"Using Berendsen pressure coupling invalidates the "
 +                "true ensemble for the thermostat");
 +        warning(wi,warn_buf);
 +    }
 +
 +    /* PRESSURE COUPLING */
 +    if (ir->epc == epcISOTROPIC)
 +    {
 +        ir->epc = epcBERENDSEN;
 +        warning_note(wi,"Old option for pressure coupling given: "
 +                     "changing \"Isotropic\" to \"Berendsen\"\n"); 
 +    }
 +
 +    if (ir->epc != epcNO)
 +    {
 +        dt_pcoupl = ir->nstpcouple*ir->delta_t;
 +
 +        sprintf(err_buf,"tau-p must be > 0 instead of %g\n",ir->tau_p);
 +        CHECK(ir->tau_p <= 0);
 +
 +        if (ir->tau_p/dt_pcoupl < pcouple_min_integration_steps(ir->epc))
 +        {
 +            sprintf(warn_buf,"For proper integration of the %s barostat, tau-p (%g) should be at least %d times larger than nstpcouple*dt (%g)",
 +                    EPCOUPLTYPE(ir->epc),ir->tau_p,pcouple_min_integration_steps(ir->epc),dt_pcoupl);
 +            warning(wi,warn_buf);
 +        }
 +
 +        sprintf(err_buf,"compressibility must be > 0 when using pressure"
 +                " coupling %s\n",EPCOUPLTYPE(ir->epc));
 +        CHECK(ir->compress[XX][XX] < 0 || ir->compress[YY][YY] < 0 ||
 +              ir->compress[ZZ][ZZ] < 0 ||
 +              (trace(ir->compress) == 0 && ir->compress[YY][XX] <= 0 &&
 +               ir->compress[ZZ][XX] <= 0 && ir->compress[ZZ][YY] <= 0));
 +        
 +        if (epcPARRINELLORAHMAN == ir->epct && opts->bGenVel)
 +        {
 +            sprintf(warn_buf,
 +                    "You are generating velocities so I am assuming you "
 +                    "are equilibrating a system. You are using "
 +                    "Parrinello-Rahman pressure coupling, but this can be "
 +                    "unstable for equilibration. If your system crashes, try "
 +                    "equilibrating first with Berendsen pressure coupling. If "
 +                    "you are not equilibrating the system, you can probably "
 +                    "ignore this warning.");
 +            warning(wi,warn_buf);
 +        }
 +    }
 +
 +    if (EI_VV(ir->eI))
 +    {
 +        if (ir->epc > epcNO)
 +        {
 +            if ((ir->epc!=epcBERENDSEN) && (ir->epc!=epcMTTK))
 +            {
 +                warning_error(wi,"for md-vv and md-vv-avek, can only use Berendsen and Martyna-Tuckerman-Tobias-Klein (MTTK) equations for pressure control; MTTK is equivalent to Parrinello-Rahman.");
 +            }
 +        }
 +    }
 +
 +  /* ELECTROSTATICS */
 +  /* More checks are in triple check (grompp.c) */
 +
 +  if (ir->coulombtype == eelSWITCH) {
 +    sprintf(warn_buf,"coulombtype = %s is only for testing purposes and can lead to serious artifacts, advice: use coulombtype = %s",
 +          eel_names[ir->coulombtype],
 +          eel_names[eelRF_ZERO]);
 +    warning(wi,warn_buf);
 +  }
 +
 +  if (ir->epsilon_r!=1 && ir->implicit_solvent==eisGBSA) {
 +    sprintf(warn_buf,"epsilon-r = %g with GB implicit solvent, will use this value for inner dielectric",ir->epsilon_r);
 +    warning_note(wi,warn_buf);
 +  }
 +
 +  if (EEL_RF(ir->coulombtype) && ir->epsilon_rf==1 && ir->epsilon_r!=1) {
 +    sprintf(warn_buf,"epsilon-r = %g and epsilon-rf = 1 with reaction field, assuming old format and exchanging epsilon-r and epsilon-rf",ir->epsilon_r);
 +    warning(wi,warn_buf);
 +    ir->epsilon_rf = ir->epsilon_r;
 +    ir->epsilon_r  = 1.0;
 +  }
 +
 +  if (getenv("GALACTIC_DYNAMICS") == NULL) {  
 +    sprintf(err_buf,"epsilon-r must be >= 0 instead of %g\n",ir->epsilon_r);
 +    CHECK(ir->epsilon_r < 0);
 +  }
 +  
 +  if (EEL_RF(ir->coulombtype)) {
 +    /* reaction field (at the cut-off) */
 +    
 +    if (ir->coulombtype == eelRF_ZERO) {
 +       sprintf(err_buf,"With coulombtype = %s, epsilon-rf must be 0",
 +             eel_names[ir->coulombtype]);
 +      CHECK(ir->epsilon_rf != 0);
 +    }
 +
 +    sprintf(err_buf,"epsilon-rf must be >= epsilon-r");
 +    CHECK((ir->epsilon_rf < ir->epsilon_r && ir->epsilon_rf != 0) ||
 +        (ir->epsilon_r == 0));
 +    if (ir->epsilon_rf == ir->epsilon_r) {
 +      sprintf(warn_buf,"Using epsilon-rf = epsilon-r with %s does not make sense",
 +            eel_names[ir->coulombtype]);
 +      warning(wi,warn_buf);
 +    }
 +  }
 +  /* Allow rlist>rcoulomb for tabulated long range stuff. This just
 +   * means the interaction is zero outside rcoulomb, but it helps to
 +   * provide accurate energy conservation.
 +   */
 +  if (EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype)) {
 +    if (EEL_SWITCHED(ir->coulombtype)) {
 +      sprintf(err_buf,
 +            "With coulombtype = %s rcoulomb_switch must be < rcoulomb",
 +            eel_names[ir->coulombtype]);
 +      CHECK(ir->rcoulomb_switch >= ir->rcoulomb);
 +    }
 +  } else if (ir->coulombtype == eelCUT || EEL_RF(ir->coulombtype)) {
 +    sprintf(err_buf,"With coulombtype = %s, rcoulomb must be >= rlist",
 +          eel_names[ir->coulombtype]);
 +    CHECK(ir->rlist > ir->rcoulomb);
 +  }
 +
 +  if (EEL_FULL(ir->coulombtype)) {
 +    if (ir->coulombtype==eelPMESWITCH || ir->coulombtype==eelPMEUSER ||
 +        ir->coulombtype==eelPMEUSERSWITCH) {
 +      sprintf(err_buf,"With coulombtype = %s, rcoulomb must be <= rlist",
 +            eel_names[ir->coulombtype]);
 +      CHECK(ir->rcoulomb > ir->rlist);
 +    } else {
 +      if (ir->coulombtype == eelPME || ir->coulombtype == eelP3M_AD) {
 +      sprintf(err_buf,
 +              "With coulombtype = %s, rcoulomb must be equal to rlist\n"
 +              "If you want optimal energy conservation or exact integration use %s",
 +              eel_names[ir->coulombtype],eel_names[eelPMESWITCH]);
 +      } else { 
 +      sprintf(err_buf,
 +              "With coulombtype = %s, rcoulomb must be equal to rlist",
 +              eel_names[ir->coulombtype]);
 +      }
 +      CHECK(ir->rcoulomb != ir->rlist);
 +    }
 +  }
 +
 +  if (EEL_PME(ir->coulombtype)) {
 +    if (ir->pme_order < 3) {
 +        warning_error(wi,"pme-order can not be smaller than 3");
 +    }
 +  }
 +
 +  if (ir->nwall==2 && EEL_FULL(ir->coulombtype)) {
 +    if (ir->ewald_geometry == eewg3D) {
 +      sprintf(warn_buf,"With pbc=%s you should use ewald-geometry=%s",
 +            epbc_names[ir->ePBC],eewg_names[eewg3DC]);
 +      warning(wi,warn_buf);
 +    }
 +    /* This check avoids extra pbc coding for exclusion corrections */
 +    sprintf(err_buf,"wall-ewald-zfac should be >= 2");
 +    CHECK(ir->wall_ewald_zfac < 2);
 +  }
 +
 +  if (EVDW_SWITCHED(ir->vdwtype)) {
 +    sprintf(err_buf,"With vdwtype = %s rvdw-switch must be < rvdw",
 +          evdw_names[ir->vdwtype]);
 +    CHECK(ir->rvdw_switch >= ir->rvdw);
 +  } else if (ir->vdwtype == evdwCUT) {
 +    sprintf(err_buf,"With vdwtype = %s, rvdw must be >= rlist",evdw_names[ir->vdwtype]);
 +    CHECK(ir->rlist > ir->rvdw);
 +  }
 +  if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype)
 +      && (ir->rlistlong <= ir->rcoulomb)) {
 +    sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rcoulomb.",
 +          IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +    warning_note(wi,warn_buf);
 +  }
 +  if (EVDW_SWITCHED(ir->vdwtype) && (ir->rlistlong <= ir->rvdw)) {
 +    sprintf(warn_buf,"For energy conservation with switch/shift potentials, %s should be 0.1 to 0.3 nm larger than rvdw.",
 +          IR_TWINRANGE(*ir) ? "rlistlong" : "rlist");
 +    warning_note(wi,warn_buf);
 +  }
 +
 +  if (ir->vdwtype == evdwUSER && ir->eDispCorr != edispcNO) {
 +      warning_note(wi,"You have selected user tables with dispersion correction, the dispersion will be corrected to -C6/r^6 beyond rvdw_switch (the tabulated interaction between rvdw_switch and rvdw will not be double counted). Make sure that you really want dispersion correction to -C6/r^6.");
 +  }
 +
 +  if (ir->nstlist == -1) {
 +    sprintf(err_buf,
 +          "nstlist=-1 only works with switched or shifted potentials,\n"
 +          "suggestion: use vdw-type=%s and coulomb-type=%s",
 +          evdw_names[evdwSHIFT],eel_names[eelPMESWITCH]);
 +    CHECK(!(EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) &&
 +            EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype)));
 +
 +    sprintf(err_buf,"With nstlist=-1 rvdw and rcoulomb should be smaller than rlist to account for diffusion and possibly charge-group radii");
 +    CHECK(ir->rvdw >= ir->rlist || ir->rcoulomb >= ir->rlist);
 +  }
 +  sprintf(err_buf,"nstlist can not be smaller than -1");
 +  CHECK(ir->nstlist < -1);
 +
 +  if (ir->eI == eiLBFGS && (ir->coulombtype==eelCUT || ir->vdwtype==evdwCUT)
 +     && ir->rvdw != 0) {
 +    warning(wi,"For efficient BFGS minimization, use switch/shift/pme instead of cut-off.");
 +  }
 +
 +  if (ir->eI == eiLBFGS && ir->nbfgscorr <= 0) {
 +    warning(wi,"Using L-BFGS with nbfgscorr<=0 just gets you steepest descent.");
 +  }
 +
 +  /* ENERGY CONSERVATION */
 +  if (ir_NVE(ir))
 +  {
 +      if (!EVDW_MIGHT_BE_ZERO_AT_CUTOFF(ir->vdwtype) && ir->rvdw > 0)
 +      {
 +          sprintf(warn_buf,"You are using a cut-off for VdW interactions with NVE, for good energy conservation use vdwtype = %s (possibly with DispCorr)",
 +                  evdw_names[evdwSHIFT]);
 +          warning_note(wi,warn_buf);
 +      }
 +      if (!EEL_MIGHT_BE_ZERO_AT_CUTOFF(ir->coulombtype) && ir->rcoulomb > 0)
 +      {
 +          sprintf(warn_buf,"You are using a cut-off for electrostatics with NVE, for good energy conservation use coulombtype = %s or %s",
 +                  eel_names[eelPMESWITCH],eel_names[eelRF_ZERO]);
 +          warning_note(wi,warn_buf);
 +      }
 +  }
 +
 +  /* IMPLICIT SOLVENT */
 +  if(ir->coulombtype==eelGB_NOTUSED)
 +  {
 +    ir->coulombtype=eelCUT;
 +    ir->implicit_solvent=eisGBSA;
 +    fprintf(stderr,"Note: Old option for generalized born electrostatics given:\n"
 +          "Changing coulombtype from \"generalized-born\" to \"cut-off\" and instead\n"
 +            "setting implicit-solvent value to \"GBSA\" in input section.\n");
 +  }
 +
 +  if(ir->sa_algorithm==esaSTILL)
 +  {
 +    sprintf(err_buf,"Still SA algorithm not available yet, use %s or %s instead\n",esa_names[esaAPPROX],esa_names[esaNO]);
 +    CHECK(ir->sa_algorithm == esaSTILL);
 +  }
 +  
 +  if(ir->implicit_solvent==eisGBSA)
 +  {
 +    sprintf(err_buf,"With GBSA implicit solvent, rgbradii must be equal to rlist.");
 +    CHECK(ir->rgbradii != ir->rlist);
 +        
 +    if(ir->coulombtype!=eelCUT)
 +        {
 +                sprintf(err_buf,"With GBSA, coulombtype must be equal to %s\n",eel_names[eelCUT]);
 +                CHECK(ir->coulombtype!=eelCUT);
 +        }
 +        if(ir->vdwtype!=evdwCUT)
 +        {
 +                sprintf(err_buf,"With GBSA, vdw-type must be equal to %s\n",evdw_names[evdwCUT]);
 +                CHECK(ir->vdwtype!=evdwCUT);
 +        }
 +    if(ir->nstgbradii<1)
 +    {
 +      sprintf(warn_buf,"Using GBSA with nstgbradii<1, setting nstgbradii=1");
 +      warning_note(wi,warn_buf);
 +      ir->nstgbradii=1;
 +    }
 +    if(ir->sa_algorithm==esaNO)
 +    {
 +      sprintf(warn_buf,"No SA (non-polar) calculation requested together with GB. Are you sure this is what you want?\n");
 +      warning_note(wi,warn_buf);
 +    }
 +    if(ir->sa_surface_tension<0 && ir->sa_algorithm!=esaNO)
 +    {
 +      sprintf(warn_buf,"Value of sa_surface_tension is < 0. Changing it to 2.05016 or 2.25936 kJ/nm^2/mol for Still and HCT/OBC respectively\n");
 +      warning_note(wi,warn_buf);
 +      
 +      if(ir->gb_algorithm==egbSTILL)
 +      {
 +        ir->sa_surface_tension = 0.0049 * CAL2JOULE * 100;
 +      }
 +      else
 +      {
 +        ir->sa_surface_tension = 0.0054 * CAL2JOULE * 100;
 +      }
 +    }
 +    if(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO)
 +    {
 +      sprintf(err_buf, "Surface tension set to 0 while SA-calculation requested\n");
 +      CHECK(ir->sa_surface_tension==0 && ir->sa_algorithm!=esaNO);
 +    }
 +    
 +  }
 +
 +  if (ir->bAdress && !EI_SD(ir->eI)){
 +       warning_error(wi,"AdresS simulation supports only stochastic dynamics");
 +  }
 +  if (ir->bAdress && ir->epc != epcNO){
 +       warning_error(wi,"AdresS simulation does not support pressure coupling");
 +  }
-    }
++  if (ir->bAdress && (EEL_FULL(ir->coulombtype))){
 +       warning_error(wi,"AdresS simulation does not support long-range electrostatics");
++  }
 +
 +}
 +
 +/* count the number of text elemets separated by whitespace in a string.
 +    str = the input string
 +    maxptr = the maximum number of allowed elements
 +    ptr = the output array of pointers to the first character of each element
 +    returns: the number of elements. */
 +int str_nelem(const char *str,int maxptr,char *ptr[])
 +{
 +  int  np=0;
 +  char *copy0,*copy;
 +  
 +  copy0=strdup(str); 
 +  copy=copy0;
 +  ltrim(copy);
 +  while (*copy != '\0') {
 +    if (np >= maxptr)
 +      gmx_fatal(FARGS,"Too many groups on line: '%s' (max is %d)",
 +                str,maxptr);
 +    if (ptr) 
 +      ptr[np]=copy;
 +    np++;
 +    while ((*copy != '\0') && !isspace(*copy))
 +      copy++;
 +    if (*copy != '\0') {
 +      *copy='\0';
 +      copy++;
 +    }
 +    ltrim(copy);
 +  }
 +  if (ptr == NULL)
 +    sfree(copy0);
 +
 +  return np;
 +}
 +
 +/* interpret a number of doubles from a string and put them in an array,
 +   after allocating space for them.
 +   str = the input string
 +   n = the (pre-allocated) number of doubles read
 +   r = the output array of doubles. */
 +static void parse_n_real(char *str,int *n,real **r)
 +{
 +  char *ptr[MAXPTR];
 +  int  i;
 +
 +  *n = str_nelem(str,MAXPTR,ptr);
 +
 +  snew(*r,*n);
 +  for(i=0; i<*n; i++) {
 +    (*r)[i] = strtod(ptr[i],NULL);
 +  }
 +}
 +
 +static void do_fep_params(t_inputrec *ir, char fep_lambda[][STRLEN],char weights[STRLEN]) {
 +
 +    int i,j,max_n_lambda,nweights,nfep[efptNR];
 +    t_lambda *fep = ir->fepvals;
 +    t_expanded *expand = ir->expandedvals;
 +    real **count_fep_lambdas;
 +    gmx_bool bOneLambda = TRUE;
 +
 +    snew(count_fep_lambdas,efptNR);
 +
 +    /* FEP input processing */
 +    /* first, identify the number of lambda values for each type.
 +       All that are nonzero must have the same number */
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        parse_n_real(fep_lambda[i],&(nfep[i]),&(count_fep_lambdas[i]));
 +    }
 +
 +    /* now, determine the number of components.  All must be either zero, or equal. */
 +
 +    max_n_lambda = 0;
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (nfep[i] > max_n_lambda) {
 +            max_n_lambda = nfep[i];  /* here's a nonzero one.  All of them
 +                                        must have the same number if its not zero.*/
 +            break;
 +        }
 +    }
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (nfep[i] == 0)
 +        {
 +            ir->fepvals->separate_dvdl[i] = FALSE;
 +        }
 +        else if (nfep[i] == max_n_lambda)
 +        {
 +            if (i!=efptTEMPERATURE)  /* we treat this differently -- not really a reason to compute the derivative with
 +                                        respect to the temperature currently */
 +            {
 +                ir->fepvals->separate_dvdl[i] = TRUE;
 +            }
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS,"Number of lambdas (%d) for FEP type %s not equal to number of other types (%d)",
 +                      nfep[i],efpt_names[i],max_n_lambda);
 +        }
 +    }
 +    /* we don't print out dhdl if the temperature is changing, since we can't correctly define dhdl in this case */
 +    ir->fepvals->separate_dvdl[efptTEMPERATURE] = FALSE;
 +
 +    /* the number of lambdas is the number we've read in, which is either zero
 +       or the same for all */
 +    fep->n_lambda = max_n_lambda;
 +
 +    /* allocate space for the array of lambda values */
 +    snew(fep->all_lambda,efptNR);
 +    /* if init_lambda is defined, we need to set lambda */
 +    if ((fep->init_lambda > 0) && (fep->n_lambda == 0))
 +    {
 +        ir->fepvals->separate_dvdl[efptFEP] = TRUE;
 +    }
 +    /* otherwise allocate the space for all of the lambdas, and transfer the data */
 +    for (i=0;i<efptNR;i++)
 +    {
 +        snew(fep->all_lambda[i],fep->n_lambda);
 +        if (nfep[i] > 0)  /* if it's zero, then the count_fep_lambda arrays
 +                             are zero */
 +        {
 +            for (j=0;j<fep->n_lambda;j++)
 +            {
 +                fep->all_lambda[i][j] = (double)count_fep_lambdas[i][j];
 +            }
 +            sfree(count_fep_lambdas[i]);
 +        }
 +    }
 +    sfree(count_fep_lambdas);
 +
 +    /* "fep-vals" is either zero or the full number. If zero, we'll need to define fep-lambdas for internal
 +       bookkeeping -- for now, init_lambda */
 +
 +    if ((nfep[efptFEP] == 0) && (fep->init_lambda >= 0) && (fep->init_lambda <= 1))
 +    {
 +        for (i=0;i<fep->n_lambda;i++)
 +        {
 +            fep->all_lambda[efptFEP][i] = fep->init_lambda;
 +        }
 +    }
 +
 +    /* check to see if only a single component lambda is defined, and soft core is defined.
 +       In this case, turn on coulomb soft core */
 +
 +    if (max_n_lambda == 0)
 +    {
 +        bOneLambda = TRUE;
 +    }
 +    else
 +    {
 +        for (i=0;i<efptNR;i++)
 +        {
 +            if ((nfep[i] != 0) && (i!=efptFEP))
 +            {
 +                bOneLambda = FALSE;
 +            }
 +        }
 +    }
 +    if ((bOneLambda) && (fep->sc_alpha > 0))
 +    {
 +        fep->bScCoul = TRUE;
 +    }
 +
 +    /* Fill in the others with the efptFEP if they are not explicitly
 +       specified (i.e. nfep[i] == 0).  This means if fep is not defined,
 +       they are all zero. */
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if ((nfep[i] == 0) && (i!=efptFEP))
 +        {
 +            for (j=0;j<fep->n_lambda;j++)
 +            {
 +                fep->all_lambda[i][j] = fep->all_lambda[efptFEP][j];
 +            }
 +        }
 +    }
 +
 +
 +    /* make it easier if sc_r_power = 48 by increasing it to the 4th power, to be in the right scale. */
 +    if (fep->sc_r_power == 48)
 +    {
 +        if (fep->sc_alpha > 0.1)
 +        {
 +            gmx_fatal(FARGS,"sc_alpha (%f) for sc_r_power = 48 should usually be between 0.001 and 0.004", fep->sc_alpha);
 +        }
 +    }
 +
 +    expand = ir->expandedvals;
 +    /* now read in the weights */
 +    parse_n_real(weights,&nweights,&(expand->init_lambda_weights));
 +    if (nweights == 0)
 +    {
 +        expand->bInit_weights = FALSE;
 +        snew(expand->init_lambda_weights,fep->n_lambda); /* initialize to zero */
 +    }
 +    else if (nweights != fep->n_lambda)
 +    {
 +        gmx_fatal(FARGS,"Number of weights (%d) is not equal to number of lambda values (%d)",
 +                  nweights,fep->n_lambda);
 +    }
 +    else
 +    {
 +        expand->bInit_weights = TRUE;
 +    }
 +    if ((expand->nstexpanded < 0) && (ir->efep != efepNO)) {
 +        expand->nstexpanded = fep->nstdhdl;
 +        /* if you don't specify nstexpanded when doing expanded ensemble free energy calcs, it is set to nstdhdl */
 +    }
 +    if ((expand->nstexpanded < 0) && ir->bSimTemp) {
 +        expand->nstexpanded = ir->nstlist;
 +        /* if you don't specify nstexpanded when doing expanded ensemble simulated tempering, it is set to nstlist*/
 +    }
 +}
 +
 +
 +static void do_simtemp_params(t_inputrec *ir) {
 +
 +    snew(ir->simtempvals->temperatures,ir->fepvals->n_lambda);
 +    GetSimTemps(ir->fepvals->n_lambda,ir->simtempvals,ir->fepvals->all_lambda[efptTEMPERATURE]);
 +
 +    return;
 +}
 +
 +static void do_wall_params(t_inputrec *ir,
 +                           char *wall_atomtype, char *wall_density,
 +                           t_gromppopts *opts)
 +{
 +    int  nstr,i;
 +    char *names[MAXPTR];
 +    double dbl;
 +
 +    opts->wall_atomtype[0] = NULL;
 +    opts->wall_atomtype[1] = NULL;
 +
 +    ir->wall_atomtype[0] = -1;
 +    ir->wall_atomtype[1] = -1;
 +    ir->wall_density[0] = 0;
 +    ir->wall_density[1] = 0;
 +  
 +    if (ir->nwall > 0)
 +    {
 +        nstr = str_nelem(wall_atomtype,MAXPTR,names);
 +        if (nstr != ir->nwall)
 +        {
 +            gmx_fatal(FARGS,"Expected %d elements for wall_atomtype, found %d",
 +                      ir->nwall,nstr);
 +        }
 +        for(i=0; i<ir->nwall; i++)
 +        {
 +            opts->wall_atomtype[i] = strdup(names[i]);
 +        }
 +    
 +        if (ir->wall_type == ewt93 || ir->wall_type == ewt104) {
 +            nstr = str_nelem(wall_density,MAXPTR,names);
 +            if (nstr != ir->nwall)
 +            {
 +                gmx_fatal(FARGS,"Expected %d elements for wall-density, found %d",ir->nwall,nstr);
 +            }
 +            for(i=0; i<ir->nwall; i++)
 +            {
 +                sscanf(names[i],"%lf",&dbl);
 +                if (dbl <= 0)
 +                {
 +                    gmx_fatal(FARGS,"wall-density[%d] = %f\n",i,dbl);
 +                }
 +                ir->wall_density[i] = dbl;
 +            }
 +        }
 +    }
 +}
 +
 +static void add_wall_energrps(gmx_groups_t *groups,int nwall,t_symtab *symtab)
 +{
 +  int  i;
 +  t_grps *grps;
 +  char str[STRLEN];
 +  
 +  if (nwall > 0) {
 +    srenew(groups->grpname,groups->ngrpname+nwall);
 +    grps = &(groups->grps[egcENER]);
 +    srenew(grps->nm_ind,grps->nr+nwall);
 +    for(i=0; i<nwall; i++) {
 +      sprintf(str,"wall%d",i);
 +      groups->grpname[groups->ngrpname] = put_symtab(symtab,str);
 +      grps->nm_ind[grps->nr++] = groups->ngrpname++;
 +    }
 +  }
 +}
 +
 +void read_expandedparams(int *ninp_p,t_inpfile **inp_p,
 +                         t_expanded *expand,warninp_t wi)
 +{
 +  int  ninp,nerror=0;
 +  t_inpfile *inp;
 +
 +  ninp   = *ninp_p;
 +  inp    = *inp_p;
 +
 +  /* read expanded ensemble parameters */
 +  CCTYPE ("expanded ensemble variables");
 +  ITYPE ("nstexpanded",expand->nstexpanded,-1);
 +  EETYPE("lmc-stats", expand->elamstats, elamstats_names);
 +  EETYPE("lmc-move", expand->elmcmove, elmcmove_names);
 +  EETYPE("lmc-weights-equil",expand->elmceq,elmceq_names);
 +  ITYPE ("weight-equil-number-all-lambda",expand->equil_n_at_lam,-1);
 +  ITYPE ("weight-equil-number-samples",expand->equil_samples,-1);
 +  ITYPE ("weight-equil-number-steps",expand->equil_steps,-1);
 +  RTYPE ("weight-equil-wl-delta",expand->equil_wl_delta,-1);
 +  RTYPE ("weight-equil-count-ratio",expand->equil_ratio,-1);
 +  CCTYPE("Seed for Monte Carlo in lambda space");
 +  ITYPE ("lmc-seed",expand->lmc_seed,-1);
 +  RTYPE ("mc-temperature",expand->mc_temp,-1);
 +  ITYPE ("lmc-repeats",expand->lmc_repeats,1);
 +  ITYPE ("lmc-gibbsdelta",expand->gibbsdeltalam,-1);
 +  ITYPE ("lmc-forced-nstart",expand->lmc_forced_nstart,0);
 +  EETYPE("symmetrized-transition-matrix", expand->bSymmetrizedTMatrix, yesno_names);
 +  ITYPE("nst-transition-matrix", expand->nstTij, -1);
 +  ITYPE ("mininum-var-min",expand->minvarmin, 100); /*default is reasonable */
 +  ITYPE ("weight-c-range",expand->c_range, 0); /* default is just C=0 */
 +  RTYPE ("wl-scale",expand->wl_scale,0.8);
 +  RTYPE ("wl-ratio",expand->wl_ratio,0.8);
 +  RTYPE ("init-wl-delta",expand->init_wl_delta,1.0);
 +  EETYPE("wl-oneovert",expand->bWLoneovert,yesno_names);
 +
 +  *ninp_p   = ninp;
 +  *inp_p    = inp;
 +
 +  return;
 +}
 +
 +void get_ir(const char *mdparin,const char *mdparout,
 +            t_inputrec *ir,t_gromppopts *opts,
 +            warninp_t wi)
 +{
 +  char      *dumstr[2];
 +  double    dumdub[2][6];
 +  t_inpfile *inp;
 +  const char *tmp;
 +  int       i,j,m,ninp;
 +  char      warn_buf[STRLEN];
 +  t_lambda  *fep = ir->fepvals;
 +  t_expanded *expand = ir->expandedvals;
 +
 +  inp = read_inpfile(mdparin, &ninp, NULL, wi);
 +
 +  snew(dumstr[0],STRLEN);
 +  snew(dumstr[1],STRLEN);
 +
 +  /* remove the following deprecated commands */
 +  REM_TYPE("title");
 +  REM_TYPE("cpp");
 +  REM_TYPE("domain-decomposition");
 +  REM_TYPE("andersen-seed");
 +  REM_TYPE("dihre");
 +  REM_TYPE("dihre-fc");
 +  REM_TYPE("dihre-tau");
 +  REM_TYPE("nstdihreout");
 +  REM_TYPE("nstcheckpoint");
 +
 +  /* replace the following commands with the clearer new versions*/
 +  REPL_TYPE("unconstrained-start","continuation");
 +  REPL_TYPE("foreign-lambda","fep-lambdas");
 +
 +  CCTYPE ("VARIOUS PREPROCESSING OPTIONS");
 +  CTYPE ("Preprocessor information: use cpp syntax.");
 +  CTYPE ("e.g.: -I/home/joe/doe -I/home/mary/roe");
 +  STYPE ("include",   opts->include,  NULL);
 +  CTYPE ("e.g.: -DPOSRES -DFLEXIBLE (note these variable names are case sensitive)");
 +  STYPE ("define",    opts->define,   NULL);
 +    
 +  CCTYPE ("RUN CONTROL PARAMETERS");
 +  EETYPE("integrator",  ir->eI,         ei_names);
 +  CTYPE ("Start time and timestep in ps");
 +  RTYPE ("tinit",     ir->init_t,     0.0);
 +  RTYPE ("dt",                ir->delta_t,    0.001);
 +  STEPTYPE ("nsteps",   ir->nsteps,     0);
 +  CTYPE ("For exact run continuation or redoing part of a run");
 +  STEPTYPE ("init-step",ir->init_step,  0);
 +  CTYPE ("Part index is updated automatically on checkpointing (keeps files separate)");
 +  ITYPE ("simulation-part", ir->simulation_part, 1);
 +  CTYPE ("mode for center of mass motion removal");
 +  EETYPE("comm-mode",   ir->comm_mode,  ecm_names);
 +  CTYPE ("number of steps for center of mass motion removal");
 +  ITYPE ("nstcomm",   ir->nstcomm,    10);
 +  CTYPE ("group(s) for center of mass motion removal");
 +  STYPE ("comm-grps",   vcm,            NULL);
 +  
 +  CCTYPE ("LANGEVIN DYNAMICS OPTIONS");
 +  CTYPE ("Friction coefficient (amu/ps) and random seed");
 +  RTYPE ("bd-fric",     ir->bd_fric,    0.0);
 +  ITYPE ("ld-seed",     ir->ld_seed,    1993);
 +  
 +  /* Em stuff */
 +  CCTYPE ("ENERGY MINIMIZATION OPTIONS");
 +  CTYPE ("Force tolerance and initial step-size");
 +  RTYPE ("emtol",       ir->em_tol,     10.0);
 +  RTYPE ("emstep",      ir->em_stepsize,0.01);
 +  CTYPE ("Max number of iterations in relax-shells");
 +  ITYPE ("niter",       ir->niter,      20);
 +  CTYPE ("Step size (ps^2) for minimization of flexible constraints");
 +  RTYPE ("fcstep",      ir->fc_stepsize, 0);
 +  CTYPE ("Frequency of steepest descents steps when doing CG");
 +  ITYPE ("nstcgsteep",        ir->nstcgsteep, 1000);
 +  ITYPE ("nbfgscorr",   ir->nbfgscorr,  10); 
 +
 +  CCTYPE ("TEST PARTICLE INSERTION OPTIONS");
 +  RTYPE ("rtpi",      ir->rtpi,       0.05);
 +
 +  /* Output options */
 +  CCTYPE ("OUTPUT CONTROL OPTIONS");
 +  CTYPE ("Output frequency for coords (x), velocities (v) and forces (f)");
 +  ITYPE ("nstxout",   ir->nstxout,    0);
 +  ITYPE ("nstvout",   ir->nstvout,    0);
 +  ITYPE ("nstfout",   ir->nstfout,    0);
 +  ir->nstcheckpoint = 1000;
 +  CTYPE ("Output frequency for energies to log file and energy file");
 +  ITYPE ("nstlog",    ir->nstlog,     1000);
 +  ITYPE ("nstcalcenergy",ir->nstcalcenergy,   -1);
 +  ITYPE ("nstenergy",   ir->nstenergy,  100);
 +  CTYPE ("Output frequency and precision for .xtc file");
 +  ITYPE ("nstxtcout",   ir->nstxtcout,  0);
 +  RTYPE ("xtc-precision",ir->xtcprec,   1000.0);
 +  CTYPE ("This selects the subset of atoms for the .xtc file. You can");
 +  CTYPE ("select multiple groups. By default all atoms will be written.");
 +  STYPE ("xtc-grps",    xtc_grps,       NULL);
 +  CTYPE ("Selection of energy groups");
 +  STYPE ("energygrps",  energy,         NULL);
 +
 +  /* Neighbor searching */  
 +  CCTYPE ("NEIGHBORSEARCHING PARAMETERS");
 +  CTYPE ("nblist update frequency");
 +  ITYPE ("nstlist",   ir->nstlist,    10);
 +  CTYPE ("ns algorithm (simple or grid)");
 +  EETYPE("ns-type",     ir->ns_type,    ens_names);
 +  /* set ndelta to the optimal value of 2 */
 +  ir->ndelta = 2;
 +  CTYPE ("Periodic boundary conditions: xyz, no, xy");
 +  EETYPE("pbc",         ir->ePBC,       epbc_names);
 +  EETYPE("periodic-molecules", ir->bPeriodicMols, yesno_names);
 +  CTYPE ("nblist cut-off");
 +  RTYPE ("rlist",     ir->rlist,      -1);
 +  CTYPE ("long-range cut-off for switched potentials");
 +  RTYPE ("rlistlong", ir->rlistlong,  -1);
 +
 +  /* Electrostatics */
 +  CCTYPE ("OPTIONS FOR ELECTROSTATICS AND VDW");
 +  CTYPE ("Method for doing electrostatics");
 +  EETYPE("coulombtype",       ir->coulombtype,    eel_names);
 +  CTYPE ("cut-off lengths");
 +  RTYPE ("rcoulomb-switch",   ir->rcoulomb_switch,    0.0);
 +  RTYPE ("rcoulomb",  ir->rcoulomb,   -1);
 +  CTYPE ("Relative dielectric constant for the medium and the reaction field");
 +  RTYPE ("epsilon-r",   ir->epsilon_r,  1.0);
 +  RTYPE ("epsilon-rf",  ir->epsilon_rf, 0.0);
 +  CTYPE ("Method for doing Van der Waals");
 +  EETYPE("vdw-type",  ir->vdwtype,    evdw_names);
 +  CTYPE ("cut-off lengths");
 +  RTYPE ("rvdw-switch",       ir->rvdw_switch,        0.0);
 +  RTYPE ("rvdw",      ir->rvdw,       -1);
 +  CTYPE ("Apply long range dispersion corrections for Energy and Pressure");
 +  EETYPE("DispCorr",    ir->eDispCorr,  edispc_names);
 +  CTYPE ("Extension of the potential lookup tables beyond the cut-off");
 +  RTYPE ("table-extension", ir->tabext, 1.0);
 +  CTYPE ("Seperate tables between energy group pairs");
 +  STYPE ("energygrp-table", egptable,   NULL);
 +  CTYPE ("Spacing for the PME/PPPM FFT grid");
 +  RTYPE ("fourierspacing", opts->fourierspacing,0.12);
 +  CTYPE ("FFT grid size, when a value is 0 fourierspacing will be used");
 +  ITYPE ("fourier-nx",  ir->nkx,         0);
 +  ITYPE ("fourier-ny",  ir->nky,         0);
 +  ITYPE ("fourier-nz",  ir->nkz,         0);
 +  CTYPE ("EWALD/PME/PPPM parameters");
 +  ITYPE ("pme-order",   ir->pme_order,   4);
 +  RTYPE ("ewald-rtol",  ir->ewald_rtol, 0.00001);
 +  EETYPE("ewald-geometry", ir->ewald_geometry, eewg_names);
 +  RTYPE ("epsilon-surface", ir->epsilon_surface, 0.0);
 +  EETYPE("optimize-fft",ir->bOptFFT,  yesno_names);
 +
 +  CCTYPE("IMPLICIT SOLVENT ALGORITHM");
 +  EETYPE("implicit-solvent", ir->implicit_solvent, eis_names);
 +      
 +  CCTYPE ("GENERALIZED BORN ELECTROSTATICS"); 
 +  CTYPE ("Algorithm for calculating Born radii");
 +  EETYPE("gb-algorithm", ir->gb_algorithm, egb_names);
 +  CTYPE ("Frequency of calculating the Born radii inside rlist");
 +  ITYPE ("nstgbradii", ir->nstgbradii, 1);
 +  CTYPE ("Cutoff for Born radii calculation; the contribution from atoms");
 +  CTYPE ("between rlist and rgbradii is updated every nstlist steps");
 +  RTYPE ("rgbradii",  ir->rgbradii, 1.0);
 +  CTYPE ("Dielectric coefficient of the implicit solvent");
 +  RTYPE ("gb-epsilon-solvent",ir->gb_epsilon_solvent, 80.0);
 +  CTYPE ("Salt concentration in M for Generalized Born models");
 +  RTYPE ("gb-saltconc",  ir->gb_saltconc, 0.0);
 +  CTYPE ("Scaling factors used in the OBC GB model. Default values are OBC(II)");
 +  RTYPE ("gb-obc-alpha", ir->gb_obc_alpha, 1.0);
 +  RTYPE ("gb-obc-beta", ir->gb_obc_beta, 0.8);
 +  RTYPE ("gb-obc-gamma", ir->gb_obc_gamma, 4.85);
 +  RTYPE ("gb-dielectric-offset", ir->gb_dielectric_offset, 0.009);
 +  EETYPE("sa-algorithm", ir->sa_algorithm, esa_names);
 +  CTYPE ("Surface tension (kJ/mol/nm^2) for the SA (nonpolar surface) part of GBSA");
 +  CTYPE ("The value -1 will set default value for Still/HCT/OBC GB-models.");
 +  RTYPE ("sa-surface-tension", ir->sa_surface_tension, -1);
 +               
 +  /* Coupling stuff */
 +  CCTYPE ("OPTIONS FOR WEAK COUPLING ALGORITHMS");
 +  CTYPE ("Temperature coupling");
 +  EETYPE("tcoupl",    ir->etc,        etcoupl_names);
 +  ITYPE ("nsttcouple", ir->nsttcouple,  -1);
 +  ITYPE("nh-chain-length",     ir->opts.nhchainlength, NHCHAINLENGTH);
 +  EETYPE("print-nose-hoover-chain-variables", ir->bPrintNHChains, yesno_names);
 +  CTYPE ("Groups to couple separately");
 +  STYPE ("tc-grps",     tcgrps,         NULL);
 +  CTYPE ("Time constant (ps) and reference temperature (K)");
 +  STYPE ("tau-t",     tau_t,          NULL);
 +  STYPE ("ref-t",     ref_t,          NULL);
 +  CTYPE ("pressure coupling");
 +  EETYPE("pcoupl",    ir->epc,        epcoupl_names);
 +  EETYPE("pcoupltype",        ir->epct,       epcoupltype_names);
 +  ITYPE ("nstpcouple", ir->nstpcouple,  -1);
 +  CTYPE ("Time constant (ps), compressibility (1/bar) and reference P (bar)");
 +  RTYPE ("tau-p",     ir->tau_p,      1.0);
 +  STYPE ("compressibility",   dumstr[0],      NULL);
 +  STYPE ("ref-p",       dumstr[1],      NULL);
 +  CTYPE ("Scaling of reference coordinates, No, All or COM");
 +  EETYPE ("refcoord-scaling",ir->refcoord_scaling,erefscaling_names);
 +
 +  /* QMMM */
 +  CCTYPE ("OPTIONS FOR QMMM calculations");
 +  EETYPE("QMMM", ir->bQMMM, yesno_names);
 +  CTYPE ("Groups treated Quantum Mechanically");
 +  STYPE ("QMMM-grps",  QMMM,          NULL);
 +  CTYPE ("QM method");
 +  STYPE("QMmethod",     QMmethod, NULL);
 +  CTYPE ("QMMM scheme");
 +  EETYPE("QMMMscheme",  ir->QMMMscheme,    eQMMMscheme_names);
 +  CTYPE ("QM basisset");
 +  STYPE("QMbasis",      QMbasis, NULL);
 +  CTYPE ("QM charge");
 +  STYPE ("QMcharge",    QMcharge,NULL);
 +  CTYPE ("QM multiplicity");
 +  STYPE ("QMmult",      QMmult,NULL);
 +  CTYPE ("Surface Hopping");
 +  STYPE ("SH",          bSH, NULL);
 +  CTYPE ("CAS space options");
 +  STYPE ("CASorbitals",      CASorbitals,   NULL);
 +  STYPE ("CASelectrons",     CASelectrons,  NULL);
 +  STYPE ("SAon", SAon, NULL);
 +  STYPE ("SAoff",SAoff,NULL);
 +  STYPE ("SAsteps",  SAsteps, NULL);
 +  CTYPE ("Scale factor for MM charges");
 +  RTYPE ("MMChargeScaleFactor", ir->scalefactor, 1.0);
 +  CTYPE ("Optimization of QM subsystem");
 +  STYPE ("bOPT",          bOPT, NULL);
 +  STYPE ("bTS",          bTS, NULL);
 +
 +  /* Simulated annealing */
 +  CCTYPE("SIMULATED ANNEALING");
 +  CTYPE ("Type of annealing for each temperature group (no/single/periodic)");
 +  STYPE ("annealing",   anneal,      NULL);
 +  CTYPE ("Number of time points to use for specifying annealing in each group");
 +  STYPE ("annealing-npoints", anneal_npoints, NULL);
 +  CTYPE ("List of times at the annealing points for each group");
 +  STYPE ("annealing-time",       anneal_time,       NULL);
 +  CTYPE ("Temp. at each annealing point, for each group.");
 +  STYPE ("annealing-temp",  anneal_temp,  NULL);
 +  
 +  /* Startup run */
 +  CCTYPE ("GENERATE VELOCITIES FOR STARTUP RUN");
 +  EETYPE("gen-vel",     opts->bGenVel,  yesno_names);
 +  RTYPE ("gen-temp",    opts->tempi,    300.0);
 +  ITYPE ("gen-seed",    opts->seed,     173529);
 +  
 +  /* Shake stuff */
 +  CCTYPE ("OPTIONS FOR BONDS");
 +  EETYPE("constraints",       opts->nshake,   constraints);
 +  CTYPE ("Type of constraint algorithm");
 +  EETYPE("constraint-algorithm",  ir->eConstrAlg, econstr_names);
 +  CTYPE ("Do not constrain the start configuration");
 +  EETYPE("continuation", ir->bContinuation, yesno_names);
 +  CTYPE ("Use successive overrelaxation to reduce the number of shake iterations");
 +  EETYPE("Shake-SOR", ir->bShakeSOR, yesno_names);
 +  CTYPE ("Relative tolerance of shake");
 +  RTYPE ("shake-tol", ir->shake_tol, 0.0001);
 +  CTYPE ("Highest order in the expansion of the constraint coupling matrix");
 +  ITYPE ("lincs-order", ir->nProjOrder, 4);
 +  CTYPE ("Number of iterations in the final step of LINCS. 1 is fine for");
 +  CTYPE ("normal simulations, but use 2 to conserve energy in NVE runs.");
 +  CTYPE ("For energy minimization with constraints it should be 4 to 8.");
 +  ITYPE ("lincs-iter", ir->nLincsIter, 1);
 +  CTYPE ("Lincs will write a warning to the stderr if in one step a bond"); 
 +  CTYPE ("rotates over more degrees than");
 +  RTYPE ("lincs-warnangle", ir->LincsWarnAngle, 30.0);
 +  CTYPE ("Convert harmonic bonds to morse potentials");
 +  EETYPE("morse",       opts->bMorse,yesno_names);
 +
 +  /* Energy group exclusions */
 +  CCTYPE ("ENERGY GROUP EXCLUSIONS");
 +  CTYPE ("Pairs of energy groups for which all non-bonded interactions are excluded");
 +  STYPE ("energygrp-excl", egpexcl,     NULL);
 +  
 +  /* Walls */
 +  CCTYPE ("WALLS");
 +  CTYPE ("Number of walls, type, atom types, densities and box-z scale factor for Ewald");
 +  ITYPE ("nwall", ir->nwall, 0);
 +  EETYPE("wall-type",     ir->wall_type,   ewt_names);
 +  RTYPE ("wall-r-linpot", ir->wall_r_linpot, -1);
 +  STYPE ("wall-atomtype", wall_atomtype, NULL);
 +  STYPE ("wall-density",  wall_density,  NULL);
 +  RTYPE ("wall-ewald-zfac", ir->wall_ewald_zfac, 3);
 +  
 +  /* COM pulling */
 +  CCTYPE("COM PULLING");
 +  CTYPE("Pull type: no, umbrella, constraint or constant-force");
 +  EETYPE("pull",          ir->ePull, epull_names);
 +  if (ir->ePull != epullNO) {
 +    snew(ir->pull,1);
 +    pull_grp = read_pullparams(&ninp,&inp,ir->pull,&opts->pull_start,wi);
 +  }
 +  
 +  /* Enforced rotation */
 +  CCTYPE("ENFORCED ROTATION");
 +  CTYPE("Enforced rotation: No or Yes");
 +  EETYPE("rotation",       ir->bRot, yesno_names);
 +  if (ir->bRot) {
 +    snew(ir->rot,1);
 +    rot_grp = read_rotparams(&ninp,&inp,ir->rot,wi);
 +  }
 +
 +  /* Refinement */
 +  CCTYPE("NMR refinement stuff");
 +  CTYPE ("Distance restraints type: No, Simple or Ensemble");
 +  EETYPE("disre",       ir->eDisre,     edisre_names);
 +  CTYPE ("Force weighting of pairs in one distance restraint: Conservative or Equal");
 +  EETYPE("disre-weighting", ir->eDisreWeighting, edisreweighting_names);
 +  CTYPE ("Use sqrt of the time averaged times the instantaneous violation");
 +  EETYPE("disre-mixed", ir->bDisreMixed, yesno_names);
 +  RTYPE ("disre-fc",  ir->dr_fc,      1000.0);
 +  RTYPE ("disre-tau", ir->dr_tau,     0.0);
 +  CTYPE ("Output frequency for pair distances to energy file");
 +  ITYPE ("nstdisreout", ir->nstdisreout, 100);
 +  CTYPE ("Orientation restraints: No or Yes");
 +  EETYPE("orire",       opts->bOrire,   yesno_names);
 +  CTYPE ("Orientation restraints force constant and tau for time averaging");
 +  RTYPE ("orire-fc",  ir->orires_fc,  0.0);
 +  RTYPE ("orire-tau", ir->orires_tau, 0.0);
 +  STYPE ("orire-fitgrp",orirefitgrp,    NULL);
 +  CTYPE ("Output frequency for trace(SD) and S to energy file");
 +  ITYPE ("nstorireout", ir->nstorireout, 100);
 +
 +  /* free energy variables */
 +  CCTYPE ("Free energy variables");
 +  EETYPE("free-energy", ir->efep, efep_names);
 +  STYPE ("couple-moltype",  couple_moltype,  NULL);
 +  EETYPE("couple-lambda0", opts->couple_lam0, couple_lam);
 +  EETYPE("couple-lambda1", opts->couple_lam1, couple_lam);
 +  EETYPE("couple-intramol", opts->bCoupleIntra, yesno_names);
 +
 +  RTYPE ("init-lambda", fep->init_lambda,-1); /* start with -1 so
 +                                                 we can recognize if
 +                                                 it was not entered */
 +  ITYPE ("init-lambda-state", fep->init_fep_state,0);
 +  RTYPE ("delta-lambda",fep->delta_lambda,0.0);
 +  ITYPE ("nstdhdl",fep->nstdhdl, 10);
 +  STYPE ("fep-lambdas", fep_lambda[efptFEP], NULL);
 +  STYPE ("mass-lambdas", fep_lambda[efptMASS], NULL);
 +  STYPE ("coul-lambdas", fep_lambda[efptCOUL], NULL);
 +  STYPE ("vdw-lambdas", fep_lambda[efptVDW], NULL);
 +  STYPE ("bonded-lambdas", fep_lambda[efptBONDED], NULL);
 +  STYPE ("restraint-lambdas", fep_lambda[efptRESTRAINT], NULL);
 +  STYPE ("temperature-lambdas", fep_lambda[efptTEMPERATURE], NULL);
 +  STYPE ("init-lambda-weights",lambda_weights,NULL);
 +  EETYPE("dhdl-print-energy", fep->bPrintEnergy, yesno_names);
 +  RTYPE ("sc-alpha",fep->sc_alpha,0.0);
 +  ITYPE ("sc-power",fep->sc_power,1);
 +  RTYPE ("sc-r-power",fep->sc_r_power,6.0);
 +  RTYPE ("sc-sigma",fep->sc_sigma,0.3);
 +  EETYPE("sc-coul",fep->bScCoul,yesno_names);
 +  ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +  RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +  EETYPE("separate-dhdl-file", fep->separate_dhdl_file,
 +                               separate_dhdl_file_names);
 +  EETYPE("dhdl-derivatives", fep->dhdl_derivatives, dhdl_derivatives_names);
 +  ITYPE ("dh_hist_size", fep->dh_hist_size, 0);
 +  RTYPE ("dh_hist_spacing", fep->dh_hist_spacing, 0.1);
 +
 +  /* Non-equilibrium MD stuff */  
 +  CCTYPE("Non-equilibrium MD stuff");
 +  STYPE ("acc-grps",    accgrps,        NULL);
 +  STYPE ("accelerate",  acc,            NULL);
 +  STYPE ("freezegrps",  freeze,         NULL);
 +  STYPE ("freezedim",   frdim,          NULL);
 +  RTYPE ("cos-acceleration", ir->cos_accel, 0);
 +  STYPE ("deform",      deform,         NULL);
 +
 +  /* simulated tempering variables */
 +  CCTYPE("simulated tempering variables");
 +  EETYPE("simulated-tempering",ir->bSimTemp,yesno_names);
 +  EETYPE("simulated-tempering-scaling",ir->simtempvals->eSimTempScale,esimtemp_names);
 +  RTYPE("sim-temp-low",ir->simtempvals->simtemp_low,300.0);
 +  RTYPE("sim-temp-high",ir->simtempvals->simtemp_high,300.0);
 +
 +  /* expanded ensemble variables */
 +  if (ir->efep==efepEXPANDED || ir->bSimTemp)
 +  {
 +      read_expandedparams(&ninp,&inp,expand,wi);
 +  }
 +
 +  /* Electric fields */
 +  CCTYPE("Electric fields");
 +  CTYPE ("Format is number of terms (int) and for all terms an amplitude (real)");
 +  CTYPE ("and a phase angle (real)");
 +  STYPE ("E-x",       efield_x,       NULL);
 +  STYPE ("E-xt",      efield_xt,      NULL);
 +  STYPE ("E-y",       efield_y,       NULL);
 +  STYPE ("E-yt",      efield_yt,      NULL);
 +  STYPE ("E-z",       efield_z,       NULL);
 +  STYPE ("E-zt",      efield_zt,      NULL);
 +  
 +  /* AdResS defined thingies */
 +  CCTYPE ("AdResS parameters");
 +  EETYPE("adress",       ir->bAdress, yesno_names);
 +  if (ir->bAdress) {
 +    snew(ir->adress,1);
 +    read_adressparams(&ninp,&inp,ir->adress,wi);
 +  }
 +
 +  /* User defined thingies */
 +  CCTYPE ("User defined thingies");
 +  STYPE ("user1-grps",  user1,          NULL);
 +  STYPE ("user2-grps",  user2,          NULL);
 +  ITYPE ("userint1",    ir->userint1,   0);
 +  ITYPE ("userint2",    ir->userint2,   0);
 +  ITYPE ("userint3",    ir->userint3,   0);
 +  ITYPE ("userint4",    ir->userint4,   0);
 +  RTYPE ("userreal1",   ir->userreal1,  0);
 +  RTYPE ("userreal2",   ir->userreal2,  0);
 +  RTYPE ("userreal3",   ir->userreal3,  0);
 +  RTYPE ("userreal4",   ir->userreal4,  0);
 +#undef CTYPE
 +
 +  write_inpfile(mdparout,ninp,inp,FALSE,wi);
 +  for (i=0; (i<ninp); i++) {
 +    sfree(inp[i].name);
 +    sfree(inp[i].value);
 +  }
 +  sfree(inp);
 +
 +  /* Process options if necessary */
 +  for(m=0; m<2; m++) {
 +    for(i=0; i<2*DIM; i++)
 +      dumdub[m][i]=0.0;
 +    if(ir->epc) {
 +      switch (ir->epct) {
 +      case epctISOTROPIC:
 +      if (sscanf(dumstr[m],"%lf",&(dumdub[m][XX]))!=1) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 1)");
 +      }
 +      dumdub[m][YY]=dumdub[m][ZZ]=dumdub[m][XX];
 +      break;
 +      case epctSEMIISOTROPIC:
 +      case epctSURFACETENSION:
 +      if (sscanf(dumstr[m],"%lf%lf",
 +                 &(dumdub[m][XX]),&(dumdub[m][ZZ]))!=2) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 2)");
 +      }
 +      dumdub[m][YY]=dumdub[m][XX];
 +      break;
 +      case epctANISOTROPIC:
 +      if (sscanf(dumstr[m],"%lf%lf%lf%lf%lf%lf",
 +                 &(dumdub[m][XX]),&(dumdub[m][YY]),&(dumdub[m][ZZ]),
 +                 &(dumdub[m][3]),&(dumdub[m][4]),&(dumdub[m][5]))!=6) {
 +        warning_error(wi,"Pressure coupling not enough values (I need 6)");
 +      }
 +      break;
 +      default:
 +      gmx_fatal(FARGS,"Pressure coupling type %s not implemented yet",
 +                  epcoupltype_names[ir->epct]);
 +      }
 +    }
 +  }
 +  clear_mat(ir->ref_p);
 +  clear_mat(ir->compress);
 +  for(i=0; i<DIM; i++) {
 +    ir->ref_p[i][i]    = dumdub[1][i];
 +    ir->compress[i][i] = dumdub[0][i];
 +  }
 +  if (ir->epct == epctANISOTROPIC) {
 +    ir->ref_p[XX][YY] = dumdub[1][3];
 +    ir->ref_p[XX][ZZ] = dumdub[1][4];
 +    ir->ref_p[YY][ZZ] = dumdub[1][5];
 +    if (ir->ref_p[XX][YY]!=0 && ir->ref_p[XX][ZZ]!=0 && ir->ref_p[YY][ZZ]!=0) {
 +      warning(wi,"All off-diagonal reference pressures are non-zero. Are you sure you want to apply a threefold shear stress?\n");
 +    }
 +    ir->compress[XX][YY] = dumdub[0][3];
 +    ir->compress[XX][ZZ] = dumdub[0][4];
 +    ir->compress[YY][ZZ] = dumdub[0][5];
 +    for(i=0; i<DIM; i++) {
 +      for(m=0; m<i; m++) {
 +      ir->ref_p[i][m] = ir->ref_p[m][i];
 +      ir->compress[i][m] = ir->compress[m][i];
 +      }
 +    }
 +  } 
 +  
 +  if (ir->comm_mode == ecmNO)
 +    ir->nstcomm = 0;
 +
 +  opts->couple_moltype = NULL;
 +  if (strlen(couple_moltype) > 0) 
 +  {
 +      if (ir->efep != efepNO) 
 +      {
 +          opts->couple_moltype = strdup(couple_moltype);
 +          if (opts->couple_lam0 == opts->couple_lam1)
 +          {
 +              warning(wi,"The lambda=0 and lambda=1 states for coupling are identical");
 +          }
 +          if (ir->eI == eiMD && (opts->couple_lam0 == ecouplamNONE ||
 +                                 opts->couple_lam1 == ecouplamNONE)) 
 +          {
 +              warning(wi,"For proper sampling of the (nearly) decoupled state, stochastic dynamics should be used");
 +          }
 +      }
 +      else
 +      {
 +          warning(wi,"Can not couple a molecule with free_energy = no");
 +      }
 +  }
 +  /* FREE ENERGY AND EXPANDED ENSEMBLE OPTIONS */
 +  if (ir->efep != efepNO) {
 +      if (fep->delta_lambda > 0) {
 +          ir->efep = efepSLOWGROWTH;
 +      }
 +  }
 +
 +  if (ir->bSimTemp) {
 +      fep->bPrintEnergy = TRUE;
 +      /* always print out the energy to dhdl if we are doing expanded ensemble, since we need the total energy
 +         if the temperature is changing. */
 +  }
 +
 +  if ((ir->efep != efepNO) || ir->bSimTemp)
 +  {
 +      ir->bExpanded = FALSE;
 +      if ((ir->efep == efepEXPANDED) || ir->bSimTemp)
 +      {
 +          ir->bExpanded = TRUE;
 +      }
 +      do_fep_params(ir,fep_lambda,lambda_weights);
 +      if (ir->bSimTemp) { /* done after fep params */
 +          do_simtemp_params(ir);
 +      }
 +  }
 +  else
 +  {
 +      ir->fepvals->n_lambda = 0;
 +  }
 +
 +  /* WALL PARAMETERS */
 +
 +  do_wall_params(ir,wall_atomtype,wall_density,opts);
 +
 +  /* ORIENTATION RESTRAINT PARAMETERS */
 +  
 +  if (opts->bOrire && str_nelem(orirefitgrp,MAXPTR,NULL)!=1) {
 +      warning_error(wi,"ERROR: Need one orientation restraint fit group\n");
 +  }
 +
 +  /* DEFORMATION PARAMETERS */
 +
 +  clear_mat(ir->deform);
 +  for(i=0; i<6; i++)
 +  {
 +      dumdub[0][i] = 0;
 +  }
 +  m = sscanf(deform,"%lf %lf %lf %lf %lf %lf",
 +           &(dumdub[0][0]),&(dumdub[0][1]),&(dumdub[0][2]),
 +           &(dumdub[0][3]),&(dumdub[0][4]),&(dumdub[0][5]));
 +  for(i=0; i<3; i++)
 +  {
 +      ir->deform[i][i] = dumdub[0][i];
 +  }
 +  ir->deform[YY][XX] = dumdub[0][3];
 +  ir->deform[ZZ][XX] = dumdub[0][4];
 +  ir->deform[ZZ][YY] = dumdub[0][5];
 +  if (ir->epc != epcNO) {
 +    for(i=0; i<3; i++)
 +      for(j=0; j<=i; j++)
 +      if (ir->deform[i][j]!=0 && ir->compress[i][j]!=0) {
 +        warning_error(wi,"A box element has deform set and compressibility > 0");
 +      }
 +    for(i=0; i<3; i++)
 +      for(j=0; j<i; j++)
 +      if (ir->deform[i][j]!=0) {
 +        for(m=j; m<DIM; m++)
 +          if (ir->compress[m][j]!=0) {
 +            sprintf(warn_buf,"An off-diagonal box element has deform set while compressibility > 0 for the same component of another box vector, this might lead to spurious periodicity effects.");
 +            warning(wi,warn_buf);
 +          }
 +      }
 +  }
 +
 +  sfree(dumstr[0]);
 +  sfree(dumstr[1]);
 +}
 +
 +static int search_QMstring(char *s,int ng,const char *gn[])
 +{
 +  /* same as normal search_string, but this one searches QM strings */
 +  int i;
 +
 +  for(i=0; (i<ng); i++)
 +    if (gmx_strcasecmp(s,gn[i]) == 0)
 +      return i;
 +
 +  gmx_fatal(FARGS,"this QM method or basisset (%s) is not implemented\n!",s);
 +
 +  return -1;
 +
 +} /* search_QMstring */
 +
 +
 +int search_string(char *s,int ng,char *gn[])
 +{
 +  int i;
 +  
 +  for(i=0; (i<ng); i++)
 +  {
 +    if (gmx_strcasecmp(s,gn[i]) == 0)
 +    {
 +      return i;
 +    }
 +  }
 +    
 +  gmx_fatal(FARGS,
 +            "Group %s referenced in the .mdp file was not found in the index file.\n"
 +            "Group names must match either [moleculetype] names or custom index group\n"
 +            "names, in which case you must supply an index file to the '-n' option\n"
 +            "of grompp.",
 +            s);
 +  
 +  return -1;
 +}
 +
 +static gmx_bool do_numbering(int natoms,gmx_groups_t *groups,int ng,char *ptrs[],
 +                         t_blocka *block,char *gnames[],
 +                         int gtype,int restnm,
 +                         int grptp,gmx_bool bVerbose,
 +                         warninp_t wi)
 +{
 +    unsigned short *cbuf;
 +    t_grps *grps=&(groups->grps[gtype]);
 +    int    i,j,gid,aj,ognr,ntot=0;
 +    const char *title;
 +    gmx_bool   bRest;
 +    char   warn_buf[STRLEN];
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Starting numbering %d groups of type %d\n",ng,gtype);
 +    }
 +  
 +    title = gtypes[gtype];
 +    
 +    snew(cbuf,natoms);
 +    /* Mark all id's as not set */
 +    for(i=0; (i<natoms); i++)
 +    {
 +        cbuf[i] = NOGID;
 +    }
 +  
 +    snew(grps->nm_ind,ng+1); /* +1 for possible rest group */
 +    for(i=0; (i<ng); i++)
 +    {
 +        /* Lookup the group name in the block structure */
 +        gid = search_string(ptrs[i],block->nr,gnames);
 +        if ((grptp != egrptpONE) || (i == 0))
 +        {
 +            grps->nm_ind[grps->nr++]=gid;
 +        }
 +        if (debug) 
 +        {
 +            fprintf(debug,"Found gid %d for group %s\n",gid,ptrs[i]);
 +        }
 +    
 +        /* Now go over the atoms in the group */
 +        for(j=block->index[gid]; (j<block->index[gid+1]); j++)
 +        {
 +
 +            aj=block->a[j];
 +      
 +            /* Range checking */
 +            if ((aj < 0) || (aj >= natoms)) 
 +            {
 +                gmx_fatal(FARGS,"Invalid atom number %d in indexfile",aj);
 +            }
 +            /* Lookup up the old group number */
 +            ognr = cbuf[aj];
 +            if (ognr != NOGID)
 +            {
 +                gmx_fatal(FARGS,"Atom %d in multiple %s groups (%d and %d)",
 +                          aj+1,title,ognr+1,i+1);
 +            }
 +            else
 +            {
 +                /* Store the group number in buffer */
 +                if (grptp == egrptpONE)
 +                {
 +                    cbuf[aj] = 0;
 +                }
 +                else
 +                {
 +                    cbuf[aj] = i;
 +                }
 +                ntot++;
 +            }
 +        }
 +    }
 +    
 +    /* Now check whether we have done all atoms */
 +    bRest = FALSE;
 +    if (ntot != natoms)
 +    {
 +        if (grptp == egrptpALL)
 +        {
 +            gmx_fatal(FARGS,"%d atoms are not part of any of the %s groups",
 +                      natoms-ntot,title);
 +        }
 +        else if (grptp == egrptpPART)
 +        {
 +            sprintf(warn_buf,"%d atoms are not part of any of the %s groups",
 +                    natoms-ntot,title);
 +            warning_note(wi,warn_buf);
 +        }
 +        /* Assign all atoms currently unassigned to a rest group */
 +        for(j=0; (j<natoms); j++)
 +        {
 +            if (cbuf[j] == NOGID)
 +            {
 +                cbuf[j] = grps->nr;
 +                bRest = TRUE;
 +            }
 +        }
 +        if (grptp != egrptpPART)
 +        {
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,
 +                        "Making dummy/rest group for %s containing %d elements\n",
 +                        title,natoms-ntot);
 +            }
 +            /* Add group name "rest" */ 
 +            grps->nm_ind[grps->nr] = restnm;
 +            
 +            /* Assign the rest name to all atoms not currently assigned to a group */
 +            for(j=0; (j<natoms); j++)
 +            {
 +                if (cbuf[j] == NOGID)
 +                {
 +                    cbuf[j] = grps->nr;
 +                }
 +            }
 +            grps->nr++;
 +        }
 +    }
 +    
 +    if (grps->nr == 1 && (ntot == 0 || ntot == natoms))
 +    {
 +        /* All atoms are part of one (or no) group, no index required */
 +        groups->ngrpnr[gtype] = 0;
 +        groups->grpnr[gtype]  = NULL;
 +    }
 +    else
 +    {
 +        groups->ngrpnr[gtype] = natoms;
 +        snew(groups->grpnr[gtype],natoms);
 +        for(j=0; (j<natoms); j++)
 +        {
 +            groups->grpnr[gtype][j] = cbuf[j];
 +        }
 +    }
 +    
 +    sfree(cbuf);
 +
 +    return (bRest && grptp == egrptpPART);
 +}
 +
 +static void calc_nrdf(gmx_mtop_t *mtop,t_inputrec *ir,char **gnames)
 +{
 +  t_grpopts *opts;
 +  gmx_groups_t *groups;
 +  t_pull  *pull;
 +  int     natoms,ai,aj,i,j,d,g,imin,jmin,nc;
 +  t_iatom *ia;
 +  int     *nrdf2,*na_vcm,na_tot;
 +  double  *nrdf_tc,*nrdf_vcm,nrdf_uc,n_sub=0;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom  *atom;
 +  int     mb,mol,ftype,as;
 +  gmx_molblock_t *molb;
 +  gmx_moltype_t *molt;
 +
 +  /* Calculate nrdf. 
 +   * First calc 3xnr-atoms for each group
 +   * then subtract half a degree of freedom for each constraint
 +   *
 +   * Only atoms and nuclei contribute to the degrees of freedom...
 +   */
 +
 +  opts = &ir->opts;
 +  
 +  groups = &mtop->groups;
 +  natoms = mtop->natoms;
 +
 +  /* Allocate one more for a possible rest group */
 +  /* We need to sum degrees of freedom into doubles,
 +   * since floats give too low nrdf's above 3 million atoms.
 +   */
 +  snew(nrdf_tc,groups->grps[egcTC].nr+1);
 +  snew(nrdf_vcm,groups->grps[egcVCM].nr+1);
 +  snew(na_vcm,groups->grps[egcVCM].nr+1);
 +  
 +  for(i=0; i<groups->grps[egcTC].nr; i++)
 +    nrdf_tc[i] = 0;
 +  for(i=0; i<groups->grps[egcVCM].nr+1; i++)
 +    nrdf_vcm[i] = 0;
 +
 +  snew(nrdf2,natoms);
 +  aloop = gmx_mtop_atomloop_all_init(mtop);
 +  while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +    nrdf2[i] = 0;
 +    if (atom->ptype == eptAtom || atom->ptype == eptNucleus) {
 +      g = ggrpnr(groups,egcFREEZE,i);
 +      /* Double count nrdf for particle i */
 +      for(d=0; d<DIM; d++) {
 +      if (opts->nFreeze[g][d] == 0) {
 +        nrdf2[i] += 2;
 +      }
 +      }
 +      nrdf_tc [ggrpnr(groups,egcTC ,i)] += 0.5*nrdf2[i];
 +      nrdf_vcm[ggrpnr(groups,egcVCM,i)] += 0.5*nrdf2[i];
 +    }
 +  }
 +
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    molt = &mtop->moltype[molb->type];
 +    atom = molt->atoms.atom;
 +    for(mol=0; mol<molb->nmol; mol++) {
 +      for (ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +      ia = molt->ilist[ftype].iatoms;
 +      for(i=0; i<molt->ilist[ftype].nr; ) {
 +        /* Subtract degrees of freedom for the constraints,
 +         * if the particles still have degrees of freedom left.
 +         * If one of the particles is a vsite or a shell, then all
 +         * constraint motion will go there, but since they do not
 +         * contribute to the constraints the degrees of freedom do not
 +         * change.
 +         */
 +        ai = as + ia[1];
 +        aj = as + ia[2];
 +        if (((atom[ia[1]].ptype == eptNucleus) ||
 +             (atom[ia[1]].ptype == eptAtom)) &&
 +            ((atom[ia[2]].ptype == eptNucleus) ||
 +             (atom[ia[2]].ptype == eptAtom))) {
 +          if (nrdf2[ai] > 0) 
 +            jmin = 1;
 +          else
 +            jmin = 2;
 +          if (nrdf2[aj] > 0)
 +            imin = 1;
 +          else
 +            imin = 2;
 +          imin = min(imin,nrdf2[ai]);
 +          jmin = min(jmin,nrdf2[aj]);
 +          nrdf2[ai] -= imin;
 +          nrdf2[aj] -= jmin;
 +          nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +          nrdf_tc [ggrpnr(groups,egcTC ,aj)] -= 0.5*jmin;
 +          nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +          nrdf_vcm[ggrpnr(groups,egcVCM,aj)] -= 0.5*jmin;
 +        }
 +        ia += interaction_function[ftype].nratoms+1;
 +        i  += interaction_function[ftype].nratoms+1;
 +      }
 +      }
 +      ia = molt->ilist[F_SETTLE].iatoms;
 +      for(i=0; i<molt->ilist[F_SETTLE].nr; ) {
 +      /* Subtract 1 dof from every atom in the SETTLE */
 +      for(j=0; j<3; j++) {
 +      ai = as + ia[1+j];
 +        imin = min(2,nrdf2[ai]);
 +        nrdf2[ai] -= imin;
 +        nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +        nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +      }
 +      ia += 4;
 +      i  += 4;
 +      }
 +      as += molt->atoms.nr;
 +    }
 +  }
 +
 +  if (ir->ePull == epullCONSTRAINT) {
 +    /* Correct nrdf for the COM constraints.
 +     * We correct using the TC and VCM group of the first atom
 +     * in the reference and pull group. If atoms in one pull group
 +     * belong to different TC or VCM groups it is anyhow difficult
 +     * to determine the optimal nrdf assignment.
 +     */
 +    pull = ir->pull;
 +    if (pull->eGeom == epullgPOS) {
 +      nc = 0;
 +      for(i=0; i<DIM; i++) {
 +      if (pull->dim[i])
 +        nc++;
 +      }
 +    } else {
 +      nc = 1;
 +    }
 +    for(i=0; i<pull->ngrp; i++) {
 +      imin = 2*nc;
 +      if (pull->grp[0].nat > 0) {
 +      /* Subtract 1/2 dof from the reference group */
 +      ai = pull->grp[0].ind[0];
 +      if (nrdf_tc[ggrpnr(groups,egcTC,ai)] > 1) {
 +        nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5;
 +        nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5;
 +        imin--;
 +      }
 +      }
 +      /* Subtract 1/2 dof from the pulled group */
 +      ai = pull->grp[1+i].ind[0];
 +      nrdf_tc [ggrpnr(groups,egcTC ,ai)] -= 0.5*imin;
 +      nrdf_vcm[ggrpnr(groups,egcVCM,ai)] -= 0.5*imin;
 +      if (nrdf_tc[ggrpnr(groups,egcTC,ai)] < 0)
 +      gmx_fatal(FARGS,"Center of mass pulling constraints caused the number of degrees of freedom for temperature coupling group %s to be negative",gnames[groups->grps[egcTC].nm_ind[ggrpnr(groups,egcTC,ai)]]);
 +    }
 +  }
 +  
 +  if (ir->nstcomm != 0) {
 +    /* Subtract 3 from the number of degrees of freedom in each vcm group
 +     * when com translation is removed and 6 when rotation is removed
 +     * as well.
 +     */
 +    switch (ir->comm_mode) {
 +    case ecmLINEAR:
 +      n_sub = ndof_com(ir);
 +      break;
 +    case ecmANGULAR:
 +      n_sub = 6;
 +      break;
 +    default:
 +      n_sub = 0;
 +      gmx_incons("Checking comm_mode");
 +    }
 +    
 +    for(i=0; i<groups->grps[egcTC].nr; i++) {
 +      /* Count the number of atoms of TC group i for every VCM group */
 +      for(j=0; j<groups->grps[egcVCM].nr+1; j++)
 +      na_vcm[j] = 0;
 +      na_tot = 0;
 +      for(ai=0; ai<natoms; ai++)
 +      if (ggrpnr(groups,egcTC,ai) == i) {
 +        na_vcm[ggrpnr(groups,egcVCM,ai)]++;
 +        na_tot++;
 +      }
 +      /* Correct for VCM removal according to the fraction of each VCM
 +       * group present in this TC group.
 +       */
 +      nrdf_uc = nrdf_tc[i];
 +      if (debug) {
 +      fprintf(debug,"T-group[%d] nrdf_uc = %g, n_sub = %g\n",
 +              i,nrdf_uc,n_sub);
 +      }
 +      nrdf_tc[i] = 0;
 +      for(j=0; j<groups->grps[egcVCM].nr+1; j++) {
 +      if (nrdf_vcm[j] > n_sub) {
 +        nrdf_tc[i] += nrdf_uc*((double)na_vcm[j]/(double)na_tot)*
 +          (nrdf_vcm[j] - n_sub)/nrdf_vcm[j];
 +      }
 +      if (debug) {
 +        fprintf(debug,"  nrdf_vcm[%d] = %g, nrdf = %g\n",
 +                j,nrdf_vcm[j],nrdf_tc[i]);
 +      }
 +      }
 +    }
 +  }
 +  for(i=0; (i<groups->grps[egcTC].nr); i++) {
 +    opts->nrdf[i] = nrdf_tc[i];
 +    if (opts->nrdf[i] < 0)
 +      opts->nrdf[i] = 0;
 +    fprintf(stderr,
 +          "Number of degrees of freedom in T-Coupling group %s is %.2f\n",
 +          gnames[groups->grps[egcTC].nm_ind[i]],opts->nrdf[i]);
 +  }
 +  
 +  sfree(nrdf2);
 +  sfree(nrdf_tc);
 +  sfree(nrdf_vcm);
 +  sfree(na_vcm);
 +}
 +
 +static void decode_cos(char *s,t_cosines *cosine,gmx_bool bTime)
 +{
 +  char   *t;
 +  char   format[STRLEN],f1[STRLEN];
 +  double a,phi;
 +  int    i;
 +  
 +  t=strdup(s);
 +  trim(t);
 +  
 +  cosine->n=0;
 +  cosine->a=NULL;
 +  cosine->phi=NULL;
 +  if (strlen(t)) {
 +    sscanf(t,"%d",&(cosine->n));
 +    if (cosine->n <= 0) {
 +      cosine->n=0;
 +    } else {
 +      snew(cosine->a,cosine->n);
 +      snew(cosine->phi,cosine->n);
 +      
 +      sprintf(format,"%%*d");
 +      for(i=0; (i<cosine->n); i++) {
 +      strcpy(f1,format);
 +      strcat(f1,"%lf%lf");
 +      if (sscanf(t,f1,&a,&phi) < 2)
 +        gmx_fatal(FARGS,"Invalid input for electric field shift: '%s'",t);
 +      cosine->a[i]=a;
 +      cosine->phi[i]=phi;
 +      strcat(format,"%*lf%*lf");
 +      }
 +    }
 +  }
 +  sfree(t);
 +}
 +
 +static gmx_bool do_egp_flag(t_inputrec *ir,gmx_groups_t *groups,
 +                      const char *option,const char *val,int flag)
 +{
 +  /* The maximum number of energy group pairs would be MAXPTR*(MAXPTR+1)/2.
 +   * But since this is much larger than STRLEN, such a line can not be parsed.
 +   * The real maximum is the number of names that fit in a string: STRLEN/2.
 +   */
 +#define EGP_MAX (STRLEN/2)
 +  int  nelem,i,j,k,nr;
 +  char *names[EGP_MAX];
 +  char ***gnames;
 +  gmx_bool bSet;
 +
 +  gnames = groups->grpname;
 +
 +  nelem = str_nelem(val,EGP_MAX,names);
 +  if (nelem % 2 != 0)
 +    gmx_fatal(FARGS,"The number of groups for %s is odd",option);
 +  nr = groups->grps[egcENER].nr;
 +  bSet = FALSE;
 +  for(i=0; i<nelem/2; i++) {
 +    j = 0;
 +    while ((j < nr) &&
 +         gmx_strcasecmp(names[2*i],*(gnames[groups->grps[egcENER].nm_ind[j]])))
 +      j++;
 +    if (j == nr)
 +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
 +                names[2*i],option);
 +    k = 0;
 +    while ((k < nr) &&
 +         gmx_strcasecmp(names[2*i+1],*(gnames[groups->grps[egcENER].nm_ind[k]])))
 +      k++;
 +    if (k==nr)
 +      gmx_fatal(FARGS,"%s in %s is not an energy group\n",
 +            names[2*i+1],option);
 +    if ((j < nr) && (k < nr)) {
 +      ir->opts.egp_flags[nr*j+k] |= flag;
 +      ir->opts.egp_flags[nr*k+j] |= flag;
 +      bSet = TRUE;
 +    }
 +  }
 +
 +  return bSet;
 +}
 +
 +void do_index(const char* mdparin, const char *ndx,
 +              gmx_mtop_t *mtop,
 +              gmx_bool bVerbose,
 +              t_inputrec *ir,rvec *v,
 +              warninp_t wi)
 +{
 +  t_blocka *grps;
 +  gmx_groups_t *groups;
 +  int     natoms;
 +  t_symtab *symtab;
 +  t_atoms atoms_all;
 +  char    warnbuf[STRLEN],**gnames;
 +  int     nr,ntcg,ntau_t,nref_t,nacc,nofg,nSA,nSA_points,nSA_time,nSA_temp;
 +  real    tau_min;
 +  int     nstcmin;
 +  int     nacg,nfreeze,nfrdim,nenergy,nvcm,nuser;
 +  char    *ptr1[MAXPTR],*ptr2[MAXPTR],*ptr3[MAXPTR];
 +  int     i,j,k,restnm;
 +  real    SAtime;
 +  gmx_bool    bExcl,bTable,bSetTCpar,bAnneal,bRest;
 +  int     nQMmethod,nQMbasis,nQMcharge,nQMmult,nbSH,nCASorb,nCASelec,
 +    nSAon,nSAoff,nSAsteps,nQMg,nbOPT,nbTS;
 +  char    warn_buf[STRLEN];
 +
 +  if (bVerbose)
 +    fprintf(stderr,"processing index file...\n");
 +  debug_gmx();
 +  if (ndx == NULL) {
 +    snew(grps,1);
 +    snew(grps->index,1);
 +    snew(gnames,1);
 +    atoms_all = gmx_mtop_global_atoms(mtop);
 +    analyse(&atoms_all,grps,&gnames,FALSE,TRUE);
 +    free_t_atoms(&atoms_all,FALSE);
 +  } else {
 +    grps = init_index(ndx,&gnames);
 +  }
 +
 +  groups = &mtop->groups;
 +  natoms = mtop->natoms;
 +  symtab = &mtop->symtab;
 +
 +  snew(groups->grpname,grps->nr+1);
 +  
 +  for(i=0; (i<grps->nr); i++) {
 +    groups->grpname[i] = put_symtab(symtab,gnames[i]);
 +  }
 +  groups->grpname[i] = put_symtab(symtab,"rest");
 +  restnm=i;
 +  srenew(gnames,grps->nr+1);
 +  gnames[restnm] = *(groups->grpname[i]);
 +  groups->ngrpname = grps->nr+1;
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  ntau_t = str_nelem(tau_t,MAXPTR,ptr1);
 +  nref_t = str_nelem(ref_t,MAXPTR,ptr2);
 +  ntcg   = str_nelem(tcgrps,MAXPTR,ptr3);
 +  if ((ntau_t != ntcg) || (nref_t != ntcg)) {
 +    gmx_fatal(FARGS,"Invalid T coupling input: %d groups, %d ref-t values and "
 +                "%d tau-t values",ntcg,nref_t,ntau_t);
 +  }
 +
 +  bSetTCpar = (ir->etc || EI_SD(ir->eI) || ir->eI==eiBD || EI_TPI(ir->eI));
 +  do_numbering(natoms,groups,ntcg,ptr3,grps,gnames,egcTC,
 +               restnm,bSetTCpar ? egrptpALL : egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcTC].nr;
 +  ir->opts.ngtc = nr;
 +  snew(ir->opts.nrdf,nr);
 +  snew(ir->opts.tau_t,nr);
 +  snew(ir->opts.ref_t,nr);
 +  if (ir->eI==eiBD && ir->bd_fric==0) {
 +    fprintf(stderr,"bd-fric=0, so tau-t will be used as the inverse friction constant(s)\n");
 +  }
 +
 +  if (bSetTCpar)
 +  {
 +      if (nr != nref_t)
 +      {
 +          gmx_fatal(FARGS,"Not enough ref-t and tau-t values!");
 +      }
 +      
 +      tau_min = 1e20;
 +      for(i=0; (i<nr); i++)
 +      {
 +          ir->opts.tau_t[i] = strtod(ptr1[i],NULL);
 +          if ((ir->eI == eiBD || ir->eI == eiSD2) && ir->opts.tau_t[i] <= 0)
 +          {
 +              sprintf(warn_buf,"With integrator %s tau-t should be larger than 0",ei_names[ir->eI]);
 +              warning_error(wi,warn_buf);
 +          }
 +          if ((ir->etc == etcVRESCALE && ir->opts.tau_t[i] >= 0) || 
 +              (ir->etc != etcVRESCALE && ir->opts.tau_t[i] >  0))
 +          {
 +              tau_min = min(tau_min,ir->opts.tau_t[i]);
 +          }
 +      }
 +      if (ir->etc != etcNO && ir->nsttcouple == -1)
 +      {
 +            ir->nsttcouple = ir_optimal_nsttcouple(ir);
 +      }
 +
 +      if (EI_VV(ir->eI)) 
 +      {
 +          if ((ir->etc==etcNOSEHOOVER) && (ir->epc==epcBERENDSEN)) {
 +              gmx_fatal(FARGS,"Cannot do Nose-Hoover temperature with Berendsen pressure control with md-vv; use either vrescale temperature with berendsen pressure or Nose-Hoover temperature with MTTK pressure");
 +          }
 +          if ((ir->epc==epcMTTK) && (ir->etc>etcNO))
 +          {
 +              int mincouple;
 +              mincouple = ir->nsttcouple;
 +              if (ir->nstpcouple < mincouple)
 +              {
 +                  mincouple = ir->nstpcouple;
 +              }
 +              ir->nstpcouple = mincouple;
 +              ir->nsttcouple = mincouple;
 +              sprintf(warn_buf,"for current Trotter decomposition methods with vv, nsttcouple and nstpcouple must be equal.  Both have been reset to min(nsttcouple,nstpcouple) = %d",mincouple);
 +              warning_note(wi,warn_buf);
 +          }
 +      }
 +      /* velocity verlet with averaged kinetic energy KE = 0.5*(v(t+1/2) - v(t-1/2)) is implemented
 +         primarily for testing purposes, and does not work with temperature coupling other than 1 */
 +
 +      if (ETC_ANDERSEN(ir->etc)) {
 +          if (ir->nsttcouple != 1) {
 +              ir->nsttcouple = 1;
 +              sprintf(warn_buf,"Andersen temperature control methods assume nsttcouple = 1; there is no need for larger nsttcouple > 1, since no global parameters are computed. nsttcouple has been reset to 1");
 +              warning_note(wi,warn_buf);
 +          }
 +      }
 +      nstcmin = tcouple_min_integration_steps(ir->etc);
 +      if (nstcmin > 1)
 +      {
 +          if (tau_min/(ir->delta_t*ir->nsttcouple) < nstcmin)
 +          {
 +              sprintf(warn_buf,"For proper integration of the %s thermostat, tau-t (%g) should be at least %d times larger than nsttcouple*dt (%g)",
 +                      ETCOUPLTYPE(ir->etc),
 +                      tau_min,nstcmin,
 +                      ir->nsttcouple*ir->delta_t);
 +              warning(wi,warn_buf);
 +          }
 +      }
 +      for(i=0; (i<nr); i++)
 +      {
 +          ir->opts.ref_t[i] = strtod(ptr2[i],NULL);
 +          if (ir->opts.ref_t[i] < 0)
 +          {
 +              gmx_fatal(FARGS,"ref-t for group %d negative",i);
 +          }
 +      }
 +      /* set the lambda mc temperature to the md integrator temperature (which should be defined
 +         if we are in this conditional) if mc_temp is negative */
 +      if (ir->expandedvals->mc_temp < 0)
 +      {
 +          ir->expandedvals->mc_temp = ir->opts.ref_t[0];  /*for now, set to the first reft */
 +      }
 +  }
 +
 +  /* Simulated annealing for each group. There are nr groups */
 +  nSA = str_nelem(anneal,MAXPTR,ptr1);
 +  if (nSA == 1 && (ptr1[0][0]=='n' || ptr1[0][0]=='N'))
 +     nSA = 0;
 +  if(nSA>0 && nSA != nr) 
 +    gmx_fatal(FARGS,"Not enough annealing values: %d (for %d groups)\n",nSA,nr);
 +  else {
 +    snew(ir->opts.annealing,nr);
 +    snew(ir->opts.anneal_npoints,nr);
 +    snew(ir->opts.anneal_time,nr);
 +    snew(ir->opts.anneal_temp,nr);
 +    for(i=0;i<nr;i++) {
 +      ir->opts.annealing[i]=eannNO;
 +      ir->opts.anneal_npoints[i]=0;
 +      ir->opts.anneal_time[i]=NULL;
 +      ir->opts.anneal_temp[i]=NULL;
 +    }
 +    if (nSA > 0) {
 +      bAnneal=FALSE;
 +      for(i=0;i<nr;i++) { 
 +      if(ptr1[i][0]=='n' || ptr1[i][0]=='N') {
 +        ir->opts.annealing[i]=eannNO;
 +      } else if(ptr1[i][0]=='s'|| ptr1[i][0]=='S') {
 +        ir->opts.annealing[i]=eannSINGLE;
 +        bAnneal=TRUE;
 +      } else if(ptr1[i][0]=='p'|| ptr1[i][0]=='P') {
 +        ir->opts.annealing[i]=eannPERIODIC;
 +        bAnneal=TRUE;
 +      } 
 +      } 
 +      if(bAnneal) {
 +      /* Read the other fields too */
 +      nSA_points = str_nelem(anneal_npoints,MAXPTR,ptr1);
 +      if(nSA_points!=nSA) 
 +          gmx_fatal(FARGS,"Found %d annealing-npoints values for %d groups\n",nSA_points,nSA);
 +      for(k=0,i=0;i<nr;i++) {
 +        ir->opts.anneal_npoints[i]=strtol(ptr1[i],NULL,10);
 +        if(ir->opts.anneal_npoints[i]==1)
 +          gmx_fatal(FARGS,"Please specify at least a start and an end point for annealing\n");
 +        snew(ir->opts.anneal_time[i],ir->opts.anneal_npoints[i]);
 +        snew(ir->opts.anneal_temp[i],ir->opts.anneal_npoints[i]);
 +        k += ir->opts.anneal_npoints[i];
 +      }
 +
 +      nSA_time = str_nelem(anneal_time,MAXPTR,ptr1);
 +      if(nSA_time!=k) 
 +          gmx_fatal(FARGS,"Found %d annealing-time values, wanter %d\n",nSA_time,k);
 +      nSA_temp = str_nelem(anneal_temp,MAXPTR,ptr2);
 +      if(nSA_temp!=k) 
 +          gmx_fatal(FARGS,"Found %d annealing-temp values, wanted %d\n",nSA_temp,k);
 +
 +      for(i=0,k=0;i<nr;i++) {
 +        
 +        for(j=0;j<ir->opts.anneal_npoints[i];j++) {
 +          ir->opts.anneal_time[i][j]=strtod(ptr1[k],NULL);
 +          ir->opts.anneal_temp[i][j]=strtod(ptr2[k],NULL);
 +          if(j==0) {
 +            if(ir->opts.anneal_time[i][0] > (ir->init_t+GMX_REAL_EPS))
 +              gmx_fatal(FARGS,"First time point for annealing > init_t.\n");      
 +          } else { 
 +            /* j>0 */
 +            if(ir->opts.anneal_time[i][j]<ir->opts.anneal_time[i][j-1])
 +              gmx_fatal(FARGS,"Annealing timepoints out of order: t=%f comes after t=%f\n",
 +                          ir->opts.anneal_time[i][j],ir->opts.anneal_time[i][j-1]);
 +          }
 +          if(ir->opts.anneal_temp[i][j]<0) 
 +            gmx_fatal(FARGS,"Found negative temperature in annealing: %f\n",ir->opts.anneal_temp[i][j]);    
 +          k++;
 +        }
 +      }
 +      /* Print out some summary information, to make sure we got it right */
 +      for(i=0,k=0;i<nr;i++) {
 +        if(ir->opts.annealing[i]!=eannNO) {
 +          j = groups->grps[egcTC].nm_ind[i];
 +          fprintf(stderr,"Simulated annealing for group %s: %s, %d timepoints\n",
 +                  *(groups->grpname[j]),eann_names[ir->opts.annealing[i]],
 +                  ir->opts.anneal_npoints[i]);
 +          fprintf(stderr,"Time (ps)   Temperature (K)\n");
 +          /* All terms except the last one */
 +          for(j=0;j<(ir->opts.anneal_npoints[i]-1);j++) 
 +              fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +          
 +          /* Finally the last one */
 +          j = ir->opts.anneal_npoints[i]-1;
 +          if(ir->opts.annealing[i]==eannSINGLE)
 +            fprintf(stderr,"%9.1f-     %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +          else {
 +            fprintf(stderr,"%9.1f      %5.1f\n",ir->opts.anneal_time[i][j],ir->opts.anneal_temp[i][j]);
 +            if(fabs(ir->opts.anneal_temp[i][j]-ir->opts.anneal_temp[i][0])>GMX_REAL_EPS)
 +              warning_note(wi,"There is a temperature jump when your annealing loops back.\n");
 +          }
 +        }
 +      } 
 +      }
 +    }
 +  }   
 +
 +  if (ir->ePull != epullNO) {
 +    make_pull_groups(ir->pull,pull_grp,grps,gnames);
 +  }
 +  
 +  if (ir->bRot) {
 +    make_rotation_groups(ir->rot,rot_grp,grps,gnames);
 +  }
 +
 +  nacc = str_nelem(acc,MAXPTR,ptr1);
 +  nacg = str_nelem(accgrps,MAXPTR,ptr2);
 +  if (nacg*DIM != nacc)
 +    gmx_fatal(FARGS,"Invalid Acceleration input: %d groups and %d acc. values",
 +              nacg,nacc);
 +  do_numbering(natoms,groups,nacg,ptr2,grps,gnames,egcACC,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcACC].nr;
 +  snew(ir->opts.acc,nr);
 +  ir->opts.ngacc=nr;
 +  
 +  for(i=k=0; (i<nacg); i++)
 +    for(j=0; (j<DIM); j++,k++)
 +      ir->opts.acc[i][j]=strtod(ptr1[k],NULL);
 +  for( ;(i<nr); i++)
 +    for(j=0; (j<DIM); j++)
 +      ir->opts.acc[i][j]=0;
 +  
 +  nfrdim  = str_nelem(frdim,MAXPTR,ptr1);
 +  nfreeze = str_nelem(freeze,MAXPTR,ptr2);
 +  if (nfrdim != DIM*nfreeze)
 +    gmx_fatal(FARGS,"Invalid Freezing input: %d groups and %d freeze values",
 +              nfreeze,nfrdim);
 +  do_numbering(natoms,groups,nfreeze,ptr2,grps,gnames,egcFREEZE,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = groups->grps[egcFREEZE].nr;
 +  ir->opts.ngfrz=nr;
 +  snew(ir->opts.nFreeze,nr);
 +  for(i=k=0; (i<nfreeze); i++)
 +    for(j=0; (j<DIM); j++,k++) {
 +      ir->opts.nFreeze[i][j]=(gmx_strncasecmp(ptr1[k],"Y",1)==0);
 +      if (!ir->opts.nFreeze[i][j]) {
 +      if (gmx_strncasecmp(ptr1[k],"N",1) != 0) {
 +        sprintf(warnbuf,"Please use Y(ES) or N(O) for freezedim only "
 +                "(not %s)", ptr1[k]);
 +        warning(wi,warn_buf);
 +      }
 +      }
 +    }
 +  for( ; (i<nr); i++)
 +    for(j=0; (j<DIM); j++)
 +      ir->opts.nFreeze[i][j]=0;
 +  
 +  nenergy=str_nelem(energy,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nenergy,ptr1,grps,gnames,egcENER,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  add_wall_energrps(groups,ir->nwall,symtab);
 +  ir->opts.ngener = groups->grps[egcENER].nr;
 +  nvcm=str_nelem(vcm,MAXPTR,ptr1);
 +  bRest =
 +    do_numbering(natoms,groups,nvcm,ptr1,grps,gnames,egcVCM,
 +                 restnm,nvcm==0 ? egrptpALL_GENREST : egrptpPART,bVerbose,wi);
 +  if (bRest) {
 +    warning(wi,"Some atoms are not part of any center of mass motion removal group.\n"
 +          "This may lead to artifacts.\n"
 +          "In most cases one should use one group for the whole system.");
 +  }
 +
 +  /* Now we have filled the freeze struct, so we can calculate NRDF */ 
 +  calc_nrdf(mtop,ir,gnames);
 +
 +  if (v && NULL) {
 +    real fac,ntot=0;
 +    
 +    /* Must check per group! */
 +    for(i=0; (i<ir->opts.ngtc); i++) 
 +      ntot += ir->opts.nrdf[i];
 +    if (ntot != (DIM*natoms)) {
 +      fac = sqrt(ntot/(DIM*natoms));
 +      if (bVerbose)
 +      fprintf(stderr,"Scaling velocities by a factor of %.3f to account for constraints\n"
 +              "and removal of center of mass motion\n",fac);
 +      for(i=0; (i<natoms); i++)
 +      svmul(fac,v[i],v[i]);
 +    }
 +  }
 +  
 +  nuser=str_nelem(user1,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser1,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nuser=str_nelem(user2,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcUser2,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nuser=str_nelem(xtc_grps,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nuser,ptr1,grps,gnames,egcXTC,
 +               restnm,egrptpONE,bVerbose,wi);
 +  nofg = str_nelem(orirefitgrp,MAXPTR,ptr1);
 +  do_numbering(natoms,groups,nofg,ptr1,grps,gnames,egcORFIT,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +
 +  /* QMMM input processing */
 +  nQMg          = str_nelem(QMMM,MAXPTR,ptr1);
 +  nQMmethod     = str_nelem(QMmethod,MAXPTR,ptr2);
 +  nQMbasis      = str_nelem(QMbasis,MAXPTR,ptr3);
 +  if((nQMmethod != nQMg)||(nQMbasis != nQMg)){
 +    gmx_fatal(FARGS,"Invalid QMMM input: %d groups %d basissets"
 +            " and %d methods\n",nQMg,nQMbasis,nQMmethod);
 +  }
 +  /* group rest, if any, is always MM! */
 +  do_numbering(natoms,groups,nQMg,ptr1,grps,gnames,egcQMMM,
 +               restnm,egrptpALL_GENREST,bVerbose,wi);
 +  nr = nQMg; /*atoms->grps[egcQMMM].nr;*/
 +  ir->opts.ngQM = nQMg;
 +  snew(ir->opts.QMmethod,nr);
 +  snew(ir->opts.QMbasis,nr);
 +  for(i=0;i<nr;i++){
 +    /* input consists of strings: RHF CASSCF PM3 .. These need to be
 +     * converted to the corresponding enum in names.c
 +     */
 +    ir->opts.QMmethod[i] = search_QMstring(ptr2[i],eQMmethodNR,
 +                                           eQMmethod_names);
 +    ir->opts.QMbasis[i]  = search_QMstring(ptr3[i],eQMbasisNR,
 +                                           eQMbasis_names);
 +
 +  }
 +  nQMmult   = str_nelem(QMmult,MAXPTR,ptr1);
 +  nQMcharge = str_nelem(QMcharge,MAXPTR,ptr2);
 +  nbSH      = str_nelem(bSH,MAXPTR,ptr3);
 +  snew(ir->opts.QMmult,nr);
 +  snew(ir->opts.QMcharge,nr);
 +  snew(ir->opts.bSH,nr);
 +
 +  for(i=0;i<nr;i++){
 +    ir->opts.QMmult[i]   = strtol(ptr1[i],NULL,10);
 +    ir->opts.QMcharge[i] = strtol(ptr2[i],NULL,10);
 +    ir->opts.bSH[i]      = (gmx_strncasecmp(ptr3[i],"Y",1)==0);
 +  }
 +
 +  nCASelec  = str_nelem(CASelectrons,MAXPTR,ptr1);
 +  nCASorb   = str_nelem(CASorbitals,MAXPTR,ptr2);
 +  snew(ir->opts.CASelectrons,nr);
 +  snew(ir->opts.CASorbitals,nr);
 +  for(i=0;i<nr;i++){
 +    ir->opts.CASelectrons[i]= strtol(ptr1[i],NULL,10);
 +    ir->opts.CASorbitals[i] = strtol(ptr2[i],NULL,10);
 +  }
 +  /* special optimization options */
 +
 +  nbOPT = str_nelem(bOPT,MAXPTR,ptr1);
 +  nbTS = str_nelem(bTS,MAXPTR,ptr2);
 +  snew(ir->opts.bOPT,nr);
 +  snew(ir->opts.bTS,nr);
 +  for(i=0;i<nr;i++){
 +    ir->opts.bOPT[i] = (gmx_strncasecmp(ptr1[i],"Y",1)==0);
 +    ir->opts.bTS[i]  = (gmx_strncasecmp(ptr2[i],"Y",1)==0);
 +  }
 +  nSAon     = str_nelem(SAon,MAXPTR,ptr1);
 +  nSAoff    = str_nelem(SAoff,MAXPTR,ptr2);
 +  nSAsteps  = str_nelem(SAsteps,MAXPTR,ptr3);
 +  snew(ir->opts.SAon,nr);
 +  snew(ir->opts.SAoff,nr);
 +  snew(ir->opts.SAsteps,nr);
 +
 +  for(i=0;i<nr;i++){
 +    ir->opts.SAon[i]    = strtod(ptr1[i],NULL);
 +    ir->opts.SAoff[i]   = strtod(ptr2[i],NULL);
 +    ir->opts.SAsteps[i] = strtol(ptr3[i],NULL,10);
 +  }
 +  /* end of QMMM input */
 +
 +  if (bVerbose)
 +    for(i=0; (i<egcNR); i++) {
 +      fprintf(stderr,"%-16s has %d element(s):",gtypes[i],groups->grps[i].nr); 
 +      for(j=0; (j<groups->grps[i].nr); j++)
 +      fprintf(stderr," %s",*(groups->grpname[groups->grps[i].nm_ind[j]]));
 +      fprintf(stderr,"\n");
 +    }
 +
 +  nr = groups->grps[egcENER].nr;
 +  snew(ir->opts.egp_flags,nr*nr);
 +
 +  bExcl = do_egp_flag(ir,groups,"energygrp-excl",egpexcl,EGP_EXCL);
 +  if (bExcl && EEL_FULL(ir->coulombtype))
 +    warning(wi,"Can not exclude the lattice Coulomb energy between energy groups");
 +
 +  bTable = do_egp_flag(ir,groups,"energygrp-table",egptable,EGP_TABLE);
 +  if (bTable && !(ir->vdwtype == evdwUSER) && 
 +      !(ir->coulombtype == eelUSER) && !(ir->coulombtype == eelPMEUSER) &&
 +      !(ir->coulombtype == eelPMEUSERSWITCH))
 +    gmx_fatal(FARGS,"Can only have energy group pair tables in combination with user tables for VdW and/or Coulomb");
 +
 +  decode_cos(efield_x,&(ir->ex[XX]),FALSE);
 +  decode_cos(efield_xt,&(ir->et[XX]),TRUE);
 +  decode_cos(efield_y,&(ir->ex[YY]),FALSE);
 +  decode_cos(efield_yt,&(ir->et[YY]),TRUE);
 +  decode_cos(efield_z,&(ir->ex[ZZ]),FALSE);
 +  decode_cos(efield_zt,&(ir->et[ZZ]),TRUE);
 +
 +  if (ir->bAdress)
 +    do_adress_index(ir->adress,groups,gnames,&(ir->opts),wi);
 +
 +  for(i=0; (i<grps->nr); i++)
 +    sfree(gnames[i]);
 +  sfree(gnames);
 +  done_blocka(grps);
 +  sfree(grps);
 +
 +}
 +
 +
 +
 +static void check_disre(gmx_mtop_t *mtop)
 +{
 +  gmx_ffparams_t *ffparams;
 +  t_functype *functype;
 +  t_iparams  *ip;
 +  int i,ndouble,ftype;
 +  int label,old_label;
 +  
 +  if (gmx_mtop_ftype_count(mtop,F_DISRES) > 0) {
 +    ffparams  = &mtop->ffparams;
 +    functype  = ffparams->functype;
 +    ip        = ffparams->iparams;
 +    ndouble   = 0;
 +    old_label = -1;
 +    for(i=0; i<ffparams->ntypes; i++) {
 +      ftype = functype[i];
 +      if (ftype == F_DISRES) {
 +      label = ip[i].disres.label;
 +      if (label == old_label) {
 +        fprintf(stderr,"Distance restraint index %d occurs twice\n",label);
 +        ndouble++;
 +      }
 +      old_label = label;
 +      }
 +    }
 +    if (ndouble>0)
 +      gmx_fatal(FARGS,"Found %d double distance restraint indices,\n"
 +              "probably the parameters for multiple pairs in one restraint "
 +              "are not identical\n",ndouble);
 +  }
 +}
 +
 +static gmx_bool absolute_reference(t_inputrec *ir,gmx_mtop_t *sys,
 +                                   gmx_bool posres_only,
 +                                   ivec AbsRef)
 +{
 +    int d,g,i;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist *ilist;
 +    int nmol;
 +    t_iparams *pr;
 +
 +    clear_ivec(AbsRef);
 +
 +    if (!posres_only)
 +    {
 +        /* Check the COM */
 +        for(d=0; d<DIM; d++)
 +        {
 +            AbsRef[d] = (d < ndof_com(ir) ? 0 : 1);
 +        }
 +        /* Check for freeze groups */
 +        for(g=0; g<ir->opts.ngfrz; g++)
 +        {
 +            for(d=0; d<DIM; d++)
 +            {
 +                if (ir->opts.nFreeze[g][d] != 0)
 +                {
 +                    AbsRef[d] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Check for position restraints */
 +    iloop = gmx_mtop_ilistloop_init(sys);
 +    while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol))
 +    {
 +        if (nmol > 0 &&
 +            (AbsRef[XX] == 0 || AbsRef[YY] == 0 || AbsRef[ZZ] == 0))
 +        {
 +            for(i=0; i<ilist[F_POSRES].nr; i+=2)
 +            {
 +                pr = &sys->ffparams.iparams[ilist[F_POSRES].iatoms[i]];
 +                for(d=0; d<DIM; d++)
 +                {
 +                    if (pr->posres.fcA[d] != 0)
 +                    {
 +                        AbsRef[d] = 1;
 +                    }
 +                }
 +            }
 +            for(i=0; i<ilist[F_FBPOSRES].nr; i+=2)
 +            {
 +                /* Check for flat-bottom posres */
 +                pr = &sys->ffparams.iparams[ilist[F_FBPOSRES].iatoms[i]];
 +                if (pr->fbposres.k != 0)
 +                {
 +                    switch(pr->fbposres.geom)
 +                    {
 +                    case efbposresSPHERE:
 +                        AbsRef[XX] = AbsRef[YY] = AbsRef[ZZ] = 1;
 +                        break;
 +                    case efbposresCYLINDER:
 +                        AbsRef[XX] = AbsRef[YY] = 1;
 +                        break;
 +                    case efbposresX: /* d=XX */
 +                    case efbposresY: /* d=YY */
 +                    case efbposresZ: /* d=ZZ */
 +                        d = pr->fbposres.geom - efbposresX;
 +                        AbsRef[d] = 1;
 +                        break;
 +                    default:
 +                        gmx_fatal(FARGS," Invalid geometry for flat-bottom position restraint.\n"
 +                                  "Expected nr between 1 and %d. Found %d\n", efbposresNR-1,
 +                                  pr->fbposres.geom);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    return (AbsRef[XX] != 0 && AbsRef[YY] != 0 && AbsRef[ZZ] != 0);
 +}
 +
 +void triple_check(const char *mdparin,t_inputrec *ir,gmx_mtop_t *sys,
 +                  warninp_t wi)
 +{
 +  char err_buf[256];
 +  int  i,m,g,nmol,npct;
 +  gmx_bool bCharge,bAcc;
 +  real gdt_max,*mgrp,mt;
 +  rvec acc;
 +  gmx_mtop_atomloop_block_t aloopb;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom *atom;
 +  ivec AbsRef;
 +  char warn_buf[STRLEN];
 +
 +  set_warning_line(wi,mdparin,-1);
 +
 +  if (EI_DYNAMICS(ir->eI) && !EI_SD(ir->eI) && ir->eI != eiBD &&
 +      ir->comm_mode == ecmNO &&
 +      !(absolute_reference(ir,sys,FALSE,AbsRef) || ir->nsteps <= 10)) {
 +    warning(wi,"You are not using center of mass motion removal (mdp option comm-mode), numerical rounding errors can lead to build up of kinetic energy of the center of mass");
 +  }
 +
 +    /* Check for pressure coupling with absolute position restraints */
 +    if (ir->epc != epcNO && ir->refcoord_scaling == erscNO)
 +    {
 +        absolute_reference(ir,sys,TRUE,AbsRef);
 +        {
 +            for(m=0; m<DIM; m++)
 +            {
 +                if (AbsRef[m] && norm2(ir->compress[m]) > 0)
 +                {
 +                    warning(wi,"You are using pressure coupling with absolute position restraints, this will give artifacts. Use the refcoord_scaling option.");
 +                    break;
 +                }
 +            }
 +        }
 +    }
 +
 +  bCharge = FALSE;
 +  aloopb = gmx_mtop_atomloop_block_init(sys);
 +  while (gmx_mtop_atomloop_block_next(aloopb,&atom,&nmol)) {
 +    if (atom->q != 0 || atom->qB != 0) {
 +      bCharge = TRUE;
 +    }
 +  }
 +  
 +  if (!bCharge) {
 +    if (EEL_FULL(ir->coulombtype)) {
 +      sprintf(err_buf,
 +            "You are using full electrostatics treatment %s for a system without charges.\n"
 +            "This costs a lot of performance for just processing zeros, consider using %s instead.\n",
 +            EELTYPE(ir->coulombtype),EELTYPE(eelCUT));
 +      warning(wi,err_buf);
 +    }
 +  } else {
 +    if (ir->coulombtype == eelCUT && ir->rcoulomb > 0 && !ir->implicit_solvent) {
 +      sprintf(err_buf,
 +            "You are using a plain Coulomb cut-off, which might produce artifacts.\n"
 +            "You might want to consider using %s electrostatics.\n",
 +            EELTYPE(eelPME));
 +      warning_note(wi,err_buf);
 +    }
 +  }
 +
 +  /* Generalized reaction field */  
 +  if (ir->opts.ngtc == 0) {
 +    sprintf(err_buf,"No temperature coupling while using coulombtype %s",
 +          eel_names[eelGRF]);
 +    CHECK(ir->coulombtype == eelGRF);
 +  }
 +  else {
 +    sprintf(err_buf,"When using coulombtype = %s"
 +          " ref-t for temperature coupling should be > 0",
 +          eel_names[eelGRF]);
 +    CHECK((ir->coulombtype == eelGRF) && (ir->opts.ref_t[0] <= 0));
 +  }
 +
 +    if (ir->eI == eiSD1 &&
 +        (gmx_mtop_ftype_count(sys,F_CONSTR) > 0 ||
 +         gmx_mtop_ftype_count(sys,F_SETTLE) > 0))
 +    {
 +        sprintf(warn_buf,"With constraints integrator %s is less accurate, consider using %s instead",ei_names[ir->eI],ei_names[eiSD2]);
 +        warning_note(wi,warn_buf);
 +    }
 +    
 +  bAcc = FALSE;
 +  for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
 +    for(m=0; (m<DIM); m++) {
 +      if (fabs(ir->opts.acc[i][m]) > 1e-6) {
 +      bAcc = TRUE;
 +      }
 +    }
 +  }
 +  if (bAcc) {
 +    clear_rvec(acc);
 +    snew(mgrp,sys->groups.grps[egcACC].nr);
 +    aloop = gmx_mtop_atomloop_all_init(sys);
 +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +      mgrp[ggrpnr(&sys->groups,egcACC,i)] += atom->m;
 +    }
 +    mt = 0.0;
 +    for(i=0; (i<sys->groups.grps[egcACC].nr); i++) {
 +      for(m=0; (m<DIM); m++)
 +      acc[m] += ir->opts.acc[i][m]*mgrp[i];
 +      mt += mgrp[i];
 +    }
 +    for(m=0; (m<DIM); m++) {
 +      if (fabs(acc[m]) > 1e-6) {
 +      const char *dim[DIM] = { "X", "Y", "Z" };
 +      fprintf(stderr,
 +              "Net Acceleration in %s direction, will %s be corrected\n",
 +              dim[m],ir->nstcomm != 0 ? "" : "not");
 +      if (ir->nstcomm != 0 && m < ndof_com(ir)) {
 +        acc[m] /= mt;
 +        for (i=0; (i<sys->groups.grps[egcACC].nr); i++)
 +          ir->opts.acc[i][m] -= acc[m];
 +      }
 +      }
 +    }
 +    sfree(mgrp);
 +  }
 +
 +  if (ir->efep != efepNO && ir->fepvals->sc_alpha != 0 &&
 +      !gmx_within_tol(sys->ffparams.reppow,12.0,10*GMX_DOUBLE_EPS)) {
 +    gmx_fatal(FARGS,"Soft-core interactions are only supported with VdW repulsion power 12");
 +  }
 +
 +  if (ir->ePull != epullNO) {
 +    if (ir->pull->grp[0].nat == 0) {
 +        absolute_reference(ir,sys,FALSE,AbsRef);
 +      for(m=0; m<DIM; m++) {
 +      if (ir->pull->dim[m] && !AbsRef[m]) {
 +        warning(wi,"You are using an absolute reference for pulling, but the rest of the system does not have an absolute reference. This will lead to artifacts.");
 +        break;
 +      }
 +      }
 +    }
 +
 +    if (ir->pull->eGeom == epullgDIRPBC) {
 +      for(i=0; i<3; i++) {
 +      for(m=0; m<=i; m++) {
 +        if ((ir->epc != epcNO && ir->compress[i][m] != 0) ||
 +            ir->deform[i][m] != 0) {
 +          for(g=1; g<ir->pull->ngrp; g++) {
 +            if (ir->pull->grp[g].vec[m] != 0) {
 +              gmx_fatal(FARGS,"Can not have dynamic box while using pull geometry '%s' (dim %c)",EPULLGEOM(ir->pull->eGeom),'x'+m);
 +            }
 +          }
 +        }
 +      }
 +      }
 +    }
 +  }
 +
 +  check_disre(sys);
 +}
 +
 +void double_check(t_inputrec *ir,matrix box,gmx_bool bConstr,warninp_t wi)
 +{
 +  real min_size;
 +  gmx_bool bTWIN;
 +  char warn_buf[STRLEN];
 +  const char *ptr;
 +  
 +  ptr = check_box(ir->ePBC,box);
 +  if (ptr) {
 +      warning_error(wi,ptr);
 +  }  
 +
 +  if (bConstr && ir->eConstrAlg == econtSHAKE) {
 +    if (ir->shake_tol <= 0.0) {
 +      sprintf(warn_buf,"ERROR: shake-tol must be > 0 instead of %g\n",
 +              ir->shake_tol);
 +      warning_error(wi,warn_buf);
 +    }
 +
 +    if (IR_TWINRANGE(*ir) && ir->nstlist > 1) {
 +      sprintf(warn_buf,"With twin-range cut-off's and SHAKE the virial and the pressure are incorrect.");
 +      if (ir->epc == epcNO) {
 +      warning(wi,warn_buf);
 +      } else {
 +          warning_error(wi,warn_buf);
 +      }
 +    }
 +  }
 +
 +  if( (ir->eConstrAlg == econtLINCS) && bConstr) {
 +    /* If we have Lincs constraints: */
 +    if(ir->eI==eiMD && ir->etc==etcNO &&
 +       ir->eConstrAlg==econtLINCS && ir->nLincsIter==1) {
 +      sprintf(warn_buf,"For energy conservation with LINCS, lincs_iter should be 2 or larger.\n");
 +      warning_note(wi,warn_buf);
 +    }
 +    
 +    if ((ir->eI == eiCG || ir->eI == eiLBFGS) && (ir->nProjOrder<8)) {
 +      sprintf(warn_buf,"For accurate %s with LINCS constraints, lincs-order should be 8 or more.",ei_names[ir->eI]);
 +      warning_note(wi,warn_buf);
 +    }
 +    if (ir->epc==epcMTTK) {
 +        warning_error(wi,"MTTK not compatible with lincs -- use shake instead.");
 +    }
 +  }
 +
 +  if (ir->LincsWarnAngle > 90.0) {
 +    sprintf(warn_buf,"lincs-warnangle can not be larger than 90 degrees, setting it to 90.\n");
 +    warning(wi,warn_buf);
 +    ir->LincsWarnAngle = 90.0;
 +  }
 +
 +  if (ir->ePBC != epbcNONE) {
 +    if (ir->nstlist == 0) {
 +      warning(wi,"With nstlist=0 atoms are only put into the box at step 0, therefore drifting atoms might cause the simulation to crash.");
 +    }
 +    bTWIN = (ir->rlistlong > ir->rlist);
 +    if (ir->ns_type == ensGRID) {
 +      if (sqr(ir->rlistlong) >= max_cutoff2(ir->ePBC,box)) {
 +          sprintf(warn_buf,"ERROR: The cut-off length is longer than half the shortest box vector or longer than the smallest box diagonal element. Increase the box size or decrease %s.\n",
 +              bTWIN ? (ir->rcoulomb==ir->rlistlong ? "rcoulomb" : "rvdw"):"rlist");
 +          warning_error(wi,warn_buf);
 +      }
 +    } else {
 +      min_size = min(box[XX][XX],min(box[YY][YY],box[ZZ][ZZ]));
 +      if (2*ir->rlistlong >= min_size) {
 +          sprintf(warn_buf,"ERROR: One of the box lengths is smaller than twice the cut-off length. Increase the box size or decrease rlist.");
 +          warning_error(wi,warn_buf);
 +      if (TRICLINIC(box))
 +        fprintf(stderr,"Grid search might allow larger cut-off's than simple search with triclinic boxes.");
 +      }
 +    }
 +  }
 +}
 +
 +void check_chargegroup_radii(const gmx_mtop_t *mtop,const t_inputrec *ir,
 +                             rvec *x,
 +                             warninp_t wi)
 +{
 +    real rvdw1,rvdw2,rcoul1,rcoul2;
 +    char warn_buf[STRLEN];
 +
 +    calc_chargegroup_radii(mtop,x,&rvdw1,&rvdw2,&rcoul1,&rcoul2);
 +
 +    if (rvdw1 > 0)
 +    {
 +        printf("Largest charge group radii for Van der Waals: %5.3f, %5.3f nm\n",
 +               rvdw1,rvdw2);
 +    }
 +    if (rcoul1 > 0)
 +    {
 +        printf("Largest charge group radii for Coulomb:       %5.3f, %5.3f nm\n",
 +               rcoul1,rcoul2);
 +    }
 +
 +    if (ir->rlist > 0)
 +    {
 +        if (rvdw1  + rvdw2  > ir->rlist ||
 +            rcoul1 + rcoul2 > ir->rlist)
 +        {
 +            sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f)\n",max(rvdw1+rvdw2,rcoul1+rcoul2),ir->rlist);
 +            warning(wi,warn_buf);
 +        }
 +        else
 +        {
 +            /* Here we do not use the zero at cut-off macro,
 +             * since user defined interactions might purposely
 +             * not be zero at the cut-off.
 +             */
 +            if (EVDW_IS_ZERO_AT_CUTOFF(ir->vdwtype) &&
 +                rvdw1 + rvdw2 > ir->rlist - ir->rvdw)
 +            {
 +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than rlist (%f) - rvdw (%f)\n",
 +                        rvdw1+rvdw2,
 +                        ir->rlist,ir->rvdw);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi,warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi,warn_buf);
 +                }
 +            }
 +            if (EEL_IS_ZERO_AT_CUTOFF(ir->coulombtype) &&
 +                rcoul1 + rcoul2 > ir->rlistlong - ir->rcoulomb)
 +            {
 +                sprintf(warn_buf,"The sum of the two largest charge group radii (%f) is larger than %s (%f) - rcoulomb (%f)\n",
 +                        rcoul1+rcoul2,
 +                        ir->rlistlong > ir->rlist ? "rlistlong" : "rlist",
 +                        ir->rlistlong,ir->rcoulomb);
 +                if (ir_NVE(ir))
 +                {
 +                    warning(wi,warn_buf);
 +                }
 +                else
 +                {
 +                    warning_note(wi,warn_buf);
 +                }
 +            }
 +        }
 +    }
 +}
Simple merge
Simple merge
index 339d3e9204aa0e3205c2373d1ec454a5def02f63,0000000000000000000000000000000000000000..b18aa95356c43c162df83171d2a076629fad185f
mode 100644,000000..100644
--- /dev/null
@@@ -1,1312 -1,0 +1,1310 @@@
-       memcpy(&(ps->param[kept_i]),
-            &(ps->param[i]),(size_t)sizeof(ps->param[0]));
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <math.h>
++#include <assert.h>
 +#include <string.h>
 +#include "vsite_parm.h"
 +#include "smalloc.h"
 +#include "resall.h"
 +#include "add_par.h"
 +#include "vec.h"
 +#include "toputil.h"
 +#include "physics.h"
 +#include "index.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "string2.h"
 +#include "physics.h"
 +#include "macros.h"
 +
 +typedef struct {
 +  t_iatom a[4];
 +  real    c;
 +} t_mybonded;
 +
 +typedef struct {
 +  int     ftype;
 +  t_param *param;
 +} vsitebondparam_t;
 +
 +typedef struct {
 +  int              nr;
 +  int              ftype;
 +  vsitebondparam_t *vsbp;
 +} at2vsitebond_t;
 +
 +typedef struct {
 +  int nr;
 +  int *aj;
 +} at2vsitecon_t;
 +
 +static int vsite_bond_nrcheck(int ftype)
 +{
 +  int nrcheck;
 +  
 +  if ((interaction_function[ftype].flags & (IF_BTYPE | IF_CONSTRAINT | IF_ATYPE)) || (ftype == F_IDIHS))
 +    nrcheck = NRAL(ftype);
 +  else
 +    nrcheck = 0;
 +  
 +  return nrcheck;
 +}
 +
 +static void enter_bonded(int nratoms, int *nrbonded, t_mybonded **bondeds, 
 +                       t_param *param)
 +{
 +  int j;
 +
 +  srenew(*bondeds, *nrbonded+1);
 +  
 +  /* copy atom numbers */
 +  for(j=0; j<nratoms; j++)
 +    (*bondeds)[*nrbonded].a[j] = param->a[j];
 +  /* copy parameter */
 +  (*bondeds)[*nrbonded].c = param->C0;
 +  
 +  (*nrbonded)++;
 +}
 +
 +static void get_bondeds(int nrat, t_iatom atoms[],
 +                      at2vsitebond_t *at2vb, t_params plist[],
 +                      int *nrbond, t_mybonded **bonds,
 +                      int *nrang,  t_mybonded **angles,
 +                      int *nridih, t_mybonded **idihs )
 +{
 +  int     k,i,ftype,nrcheck;
 +  t_param *param;
 +  
 +  for(k=0; k<nrat; k++) {
 +    for(i=0; i<at2vb[atoms[k]].nr; i++) {
 +      ftype = at2vb[atoms[k]].vsbp[i].ftype;
 +      param = at2vb[atoms[k]].vsbp[i].param;
 +      nrcheck = vsite_bond_nrcheck(ftype);
 +      /* abuse nrcheck to see if we're adding bond, angle or idih */
 +      switch (nrcheck) {
 +      case 2: enter_bonded(nrcheck,nrbond,bonds, param); break;
 +      case 3: enter_bonded(nrcheck,nrang, angles,param); break;
 +      case 4: enter_bonded(nrcheck,nridih,idihs, param); break;
 +      }
 +    }
 +  }
 +}
 +
 +static at2vsitebond_t *make_at2vsitebond(int natoms,t_params plist[])
 +{
 +  gmx_bool *bVSI;
 +  int  ftype,i,j,nrcheck,nr;
 +  t_iatom *aa;
 +  at2vsitebond_t *at2vb;
 +
 +  snew(at2vb,natoms);
 +
 +  snew(bVSI,natoms);
 +  for(ftype=0; (ftype<F_NRE); ftype++) {
 +    if ((interaction_function[ftype].flags & IF_VSITE) && ftype != F_VSITEN) {
 +      for(i=0; (i<plist[ftype].nr); i++) {
 +      for(j=0; j<NRAL(ftype); j++)
 +        bVSI[plist[ftype].param[i].a[j]] = TRUE;
 +      }
 +    }
 +  }
 +  
 +  for(ftype=0; (ftype<F_NRE); ftype++) {
 +    nrcheck = vsite_bond_nrcheck(ftype);
 +    if (nrcheck > 0) {
 +      for(i=0; (i<plist[ftype].nr); i++) {
 +      aa = plist[ftype].param[i].a;
 +      for(j=0; j<nrcheck; j++) {
 +        if (bVSI[aa[j]]) {
 +          nr = at2vb[aa[j]].nr;
 +          if (nr % 10 == 0)
 +            srenew(at2vb[aa[j]].vsbp,nr+10);
 +          at2vb[aa[j]].vsbp[nr].ftype = ftype;
 +          at2vb[aa[j]].vsbp[nr].param = &plist[ftype].param[i];
 +          at2vb[aa[j]].nr++;
 +        }
 +      }
 +      }
 +    }
 +  }
 +  sfree(bVSI);
 +
 +  return at2vb;
 +}
 +
 +static void done_at2vsitebond(int natoms,at2vsitebond_t *at2vb)
 +{
 +  int i;
 +
 +  for(i=0; i<natoms; i++)
 +    if (at2vb[i].nr)
 +      sfree(at2vb[i].vsbp);
 +  sfree(at2vb);
 +}
 +
 +static at2vsitecon_t *make_at2vsitecon(int natoms,t_params plist[])
 +{
 +  gmx_bool *bVSI;
 +  int  ftype,i,j,ai,aj,nr;
 +  at2vsitecon_t *at2vc;
 +
 +  snew(at2vc,natoms);
 +
 +  snew(bVSI,natoms);
 +  for(ftype=0; (ftype<F_NRE); ftype++) {
 +    if ((interaction_function[ftype].flags & IF_VSITE) && ftype != F_VSITEN) {
 +      for(i=0; (i<plist[ftype].nr); i++) {
 +      for(j=0; j<NRAL(ftype); j++)
 +        bVSI[plist[ftype].param[i].a[j]] = TRUE;
 +      }
 +    }
 +  }
 +  
 +  for(ftype=0; (ftype<F_NRE); ftype++) {
 +    if (interaction_function[ftype].flags & IF_CONSTRAINT) {
 +      for(i=0; (i<plist[ftype].nr); i++) {
 +      ai = plist[ftype].param[i].AI;
 +      aj = plist[ftype].param[i].AJ;
 +      if (bVSI[ai] && bVSI[aj]) {
 +        /* Store forward direction */
 +        nr = at2vc[ai].nr;
 +        if (nr % 10 == 0)
 +          srenew(at2vc[ai].aj,nr+10);
 +        at2vc[ai].aj[nr] = aj;
 +        at2vc[ai].nr++;
 +        /* Store backward direction */
 +        nr = at2vc[aj].nr;
 +        if (nr % 10 == 0)
 +          srenew(at2vc[aj].aj,nr+10);
 +        at2vc[aj].aj[nr] = ai;
 +        at2vc[aj].nr++;
 +      }
 +      }
 +    }
 +  }
 +  sfree(bVSI);
 +
 +  return at2vc;
 +}
 +
 +static void done_at2vsitecon(int natoms,at2vsitecon_t *at2vc)
 +{
 +  int i;
 +
 +  for(i=0; i<natoms; i++)
 +    if (at2vc[i].nr)
 +      sfree(at2vc[i].aj);
 +  sfree(at2vc);
 +}
 +
 +/* for debug */
 +static void print_bad(FILE *fp, 
 +                    int nrbond, t_mybonded *bonds,
 +                    int nrang,  t_mybonded *angles,
 +                    int nridih, t_mybonded *idihs )
 +{
 +  int i;
 +  
 +  if (nrbond) {
 +    fprintf(fp,"bonds:");
 +    for(i=0; i<nrbond; i++)
 +      fprintf(fp," %u-%u (%g)", 
 +            bonds[i].AI+1, bonds[i].AJ+1, bonds[i].c);
 +    fprintf(fp,"\n");
 +  }
 +  if (nrang) {
 +    fprintf(fp,"angles:");
 +    for(i=0; i<nrang; i++)
 +      fprintf(fp," %u-%u-%u (%g)", 
 +            angles[i].AI+1, angles[i].AJ+1, 
 +            angles[i].AK+1, angles[i].c);
 +    fprintf(fp,"\n");
 +  }
 +  if (nridih) {
 +    fprintf(fp,"idihs:");
 +    for(i=0; i<nridih; i++)
 +      fprintf(fp," %u-%u-%u-%u (%g)", 
 +            idihs[i].AI+1, idihs[i].AJ+1, 
 +            idihs[i].AK+1, idihs[i].AL+1, idihs[i].c);
 +    fprintf(fp,"\n");
 +  }
 +}
 +
 +static void print_param(FILE *fp, int ftype, int i, t_param *param)
 +{
 +  static int pass = 0;
 +  static int prev_ftype= NOTSET;
 +  static int prev_i    = NOTSET;
 +  int j;
 +  
 +  if ( (ftype!=prev_ftype) || (i!=prev_i) ) {
 +    pass = 0;
 +    prev_ftype= ftype;
 +    prev_i    = i;
 +  }
 +  fprintf(fp,"(%d) plist[%s].param[%d]",
 +        pass,interaction_function[ftype].name,i);
 +  for(j=0; j<NRFP(ftype); j++)
 +    fprintf(fp,".c[%d]=%g ",j,param->c[j]);
 +  fprintf(fp,"\n");
 +  pass++;
 +}
 +
 +static real get_bond_length(int nrbond, t_mybonded bonds[], 
 +                          t_iatom ai, t_iatom aj)
 +{
 +  int  i;
 +  real bondlen;
 +  
 +  bondlen=NOTSET;
 +  for (i=0; i < nrbond && (bondlen==NOTSET); i++) {
 +    /* check both ways */
 +    if ( ( (ai == bonds[i].AI) && (aj == bonds[i].AJ) ) || 
 +       ( (ai == bonds[i].AJ) && (aj == bonds[i].AI) ) )
 +      bondlen = bonds[i].c; /* note: bonds[i].c might be NOTSET */
 +  }
 +  return bondlen;
 +}
 +
 +static real get_angle(int nrang, t_mybonded angles[], 
 +                    t_iatom ai, t_iatom aj, t_iatom ak)
 +{
 +  int  i;
 +  real angle;
 +  
 +  angle=NOTSET;
 +  for (i=0; i < nrang && (angle==NOTSET); i++) {
 +    /* check both ways */
 +    if ( ( (ai==angles[i].AI) && (aj==angles[i].AJ) && (ak==angles[i].AK) ) || 
 +       ( (ai==angles[i].AK) && (aj==angles[i].AJ) && (ak==angles[i].AI) ) )
 +      angle = DEG2RAD*angles[i].c;
 +  }
 +  return angle;
 +}
 +
 +static char *get_atomtype_name_AB(t_atom *atom,gpp_atomtype_t atype)
 +{
 +    char *name;
 +
 +    name = get_atomtype_name(atom->type,atype);
 +
 +    /* When using the decoupling option, atom types are changed
 +     * to decoupled for the non-bonded interactions, but the virtual
 +     * sites constructions should be based on the "normal" interactions.
 +     * So we return the state B atom type name if the state A atom
 +     * type is the decoupled one. We should actually check for the atom
 +     * type number, but that's not passed here. So we check for
 +     * the decoupled atom type name. This should not cause trouble
 +     * as this code is only used for topologies with v-sites without
 +     * parameters generated by pdb2gmx.
 +     */
 +    if (strcmp(name,"decoupled") == 0)
 +    {
 +        name = get_atomtype_name(atom->typeB,atype);
 +    }
 +
 +    return name;
 +}
 +
 +static gmx_bool calc_vsite3_param(gpp_atomtype_t atype,
 +                            t_param *param, t_atoms *at,
 +                            int nrbond, t_mybonded *bonds,
 +                            int nrang,  t_mybonded *angles )
 +{
 +  /* i = virtual site          |    ,k
 +   * j = 1st bonded heavy atom | i-j
 +   * k,l = 2nd bonded atoms    |    `l
 +   */
 +  
 +  gmx_bool bXH3,bError;
 +  real bjk,bjl,a=-1,b=-1;
 +  /* check if this is part of a NH3 , NH2-umbrella or CH3 group,
 +   * i.e. if atom k and l are dummy masses (MNH* or MCH3*) */
 +  if (debug) {
 +    int i;
 +    for (i=0; i<4; i++)
 +      fprintf(debug,"atom %u type %s ",
 +            param->a[i]+1,
 +            get_atomtype_name_AB(&at->atom[param->a[i]],atype));
 +    fprintf(debug,"\n");
 +  }
 +  bXH3 = 
 +    ( (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AK],atype),"MNH",3)==0) &&
 +      (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AL],atype),"MNH",3)==0) ) ||
 +    ( (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AK],atype),"MCH3",4)==0) &&
 +      (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AL],atype),"MCH3",4)==0) );
 +  
 +  bjk = get_bond_length(nrbond, bonds, param->AJ, param->AK);
 +  bjl = get_bond_length(nrbond, bonds, param->AJ, param->AL);
 +  bError = (bjk==NOTSET) || (bjl==NOTSET);
 +  if (bXH3) {
 +    /* now we get some XH2/XH3 group specific construction */
 +    /* note: we call the heavy atom 'C' and the X atom 'N' */
 +    real bMM,bCM,bCN,bNH,aCNH,dH,rH,dM,rM;
 +    int aN;
 +    
 +    /* check if bonds from heavy atom (j) to dummy masses (k,l) are equal: */
 +    bError = bError || (bjk!=bjl);
 +    
 +    /* the X atom (C or N) in the XH2/XH3 group is the first after the masses: */
 +    aN = max(param->AK,param->AL)+1;
 +    
 +    /* get common bonds */
 +    bMM = get_bond_length(nrbond, bonds, param->AK, param->AL);
 +    bCM = bjk;
 +    bCN = get_bond_length(nrbond, bonds, param->AJ, aN);
 +    bError = bError || (bMM==NOTSET) || (bCN==NOTSET);
 +    
 +    /* calculate common things */
 +    rM  = 0.5*bMM;
 +    dM  = sqrt( sqr(bCM) - sqr(rM) );
 +    
 +    /* are we dealing with the X atom? */
 +    if ( param->AI == aN ) {
 +      /* this is trivial */
 +      a = b = 0.5 * bCN/dM;
 +      
 +    } else {
 +      /* get other bondlengths and angles: */
 +      bNH = get_bond_length(nrbond, bonds, aN, param->AI);
 +      aCNH= get_angle      (nrang, angles, param->AJ, aN, param->AI);
 +      bError = bError || (bNH==NOTSET) || (aCNH==NOTSET);
 +      
 +      /* calculate */
 +      dH  = bCN - bNH * cos(aCNH);
 +      rH  = bNH * sin(aCNH);
 +      
 +      a = 0.5 * ( dH/dM + rH/rM );
 +      b = 0.5 * ( dH/dM - rH/rM );
 +    }
 +  } else
 +    gmx_fatal(FARGS,"calc_vsite3_param not implemented for the general case "
 +              "(atom %d)",param->AI+1);
 +  
 +  param->C0 = a;
 +  param->C1 = b;
 +  
 +  if (debug)
 +    fprintf(debug,"params for vsite3 %u: %g %g\n",
 +          param->AI+1,param->C0,param->C1);
 +  
 +  return bError;
 +}
 +
 +static gmx_bool calc_vsite3fd_param(t_param *param,
 +                              int nrbond, t_mybonded *bonds,
 +                              int nrang,  t_mybonded *angles)
 +{
 +  /* i = virtual site          |    ,k
 +   * j = 1st bonded heavy atom | i-j
 +   * k,l = 2nd bonded atoms    |    `l
 +   */
 +
 +  gmx_bool bError;
 +  real bij,bjk,bjl,aijk,aijl,rk,rl;
 +  
 +  bij = get_bond_length(nrbond, bonds, param->AI, param->AJ);
 +  bjk = get_bond_length(nrbond, bonds, param->AJ, param->AK);
 +  bjl = get_bond_length(nrbond, bonds, param->AJ, param->AL);
 +  aijk= get_angle      (nrang, angles, param->AI, param->AJ, param->AK);
 +  aijl= get_angle      (nrang, angles, param->AI, param->AJ, param->AL);
 +  bError = (bij==NOTSET) || (bjk==NOTSET) || (bjl==NOTSET) || 
 +    (aijk==NOTSET) || (aijl==NOTSET);
 +  
 +  rk = bjk * sin(aijk);
 +  rl = bjl * sin(aijl);
 +  param->C0 = rk / (rk + rl);
 +  param->C1 = -bij; /* 'bond'-length for fixed distance vsite */
 +  
 +  if (debug)
 +    fprintf(debug,"params for vsite3fd %u: %g %g\n",
 +          param->AI+1,param->C0,param->C1);
 +  return bError;
 +}
 +
 +static gmx_bool calc_vsite3fad_param(t_param *param,
 +                               int nrbond, t_mybonded *bonds,
 +                               int nrang,  t_mybonded *angles)
 +{
 +  /* i = virtual site          |
 +   * j = 1st bonded heavy atom | i-j
 +   * k = 2nd bonded heavy atom |    `k-l
 +   * l = 3d bonded heavy atom  |
 +   */
 +
 +  gmx_bool bSwapParity,bError;
 +  real bij,aijk;
 +  
 +  bSwapParity = ( param->C1 == -1 );
 +  
 +  bij  = get_bond_length(nrbond, bonds, param->AI, param->AJ);
 +  aijk = get_angle      (nrang, angles, param->AI, param->AJ, param->AK);
 +  bError = (bij==NOTSET) || (aijk==NOTSET);
 +  
 +  param->C1 = bij;          /* 'bond'-length for fixed distance vsite */
 +  param->C0 = RAD2DEG*aijk; /* 'bond'-angle for fixed angle vsite */
 +  
 +  if (bSwapParity)
 +    param->C0 = 360 - param->C0;
 +  
 +  if (debug)
 +    fprintf(debug,"params for vsite3fad %u: %g %g\n",
 +          param->AI+1,param->C0,param->C1);
 +  return bError;
 +}
 +
 +static gmx_bool calc_vsite3out_param(gpp_atomtype_t atype,
 +                               t_param *param, t_atoms *at,
 +                               int nrbond, t_mybonded *bonds,
 +                               int nrang,  t_mybonded *angles)
 +{
 +  /* i = virtual site          |    ,k
 +   * j = 1st bonded heavy atom | i-j
 +   * k,l = 2nd bonded atoms    |    `l
 +   * NOTE: i is out of the j-k-l plane!
 +   */
 +  
 +  gmx_bool bXH3,bError,bSwapParity;
 +  real bij,bjk,bjl,aijk,aijl,akjl,pijk,pijl,a,b,c;
 +  
 +  /* check if this is part of a NH2-umbrella, NH3 or CH3 group,
 +   * i.e. if atom k and l are dummy masses (MNH* or MCH3*) */
 +  if (debug) {
 +    int i;
 +    for (i=0; i<4; i++)
 +      fprintf(debug,"atom %u type %s ",
 +            param->a[i]+1,get_atomtype_name_AB(&at->atom[param->a[i]],atype));
 +    fprintf(debug,"\n");
 +  }
 +  bXH3 = 
 +    ( (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AK],atype),"MNH",3)==0) &&
 +      (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AL],atype),"MNH",3)==0) ) ||
 +    ( (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AK],atype),"MCH3",4)==0) &&
 +      (gmx_strncasecmp(get_atomtype_name_AB(&at->atom[param->AL],atype),"MCH3",4)==0) );
 +  
 +  /* check if construction parity must be swapped */  
 +  bSwapParity = ( param->C1 == -1 );
 +  
 +  bjk = get_bond_length(nrbond, bonds, param->AJ, param->AK);
 +  bjl = get_bond_length(nrbond, bonds, param->AJ, param->AL);
 +  bError = (bjk==NOTSET) || (bjl==NOTSET);
 +  if (bXH3) {
 +    /* now we get some XH3 group specific construction */
 +    /* note: we call the heavy atom 'C' and the X atom 'N' */
 +    real bMM,bCM,bCN,bNH,aCNH,dH,rH,rHx,rHy,dM,rM;
 +    int aN;
 +    
 +    /* check if bonds from heavy atom (j) to dummy masses (k,l) are equal: */
 +    bError = bError || (bjk!=bjl);
 +    
 +    /* the X atom (C or N) in the XH3 group is the first after the masses: */
 +    aN = max(param->AK,param->AL)+1;
 +    
 +    /* get all bondlengths and angles: */
 +    bMM = get_bond_length(nrbond, bonds, param->AK, param->AL);
 +    bCM = bjk;
 +    bCN = get_bond_length(nrbond, bonds, param->AJ, aN);
 +    bNH = get_bond_length(nrbond, bonds, aN, param->AI);
 +    aCNH= get_angle      (nrang, angles, param->AJ, aN, param->AI);
 +    bError = bError || 
 +      (bMM==NOTSET) || (bCN==NOTSET) || (bNH==NOTSET) || (aCNH==NOTSET);
 +    
 +    /* calculate */
 +    dH  = bCN - bNH * cos(aCNH);
 +    rH  = bNH * sin(aCNH);
 +    /* we assume the H's are symmetrically distributed */
 +    rHx = rH*cos(DEG2RAD*30);
 +    rHy = rH*sin(DEG2RAD*30);
 +    rM  = 0.5*bMM;
 +    dM  = sqrt( sqr(bCM) - sqr(rM) );
 +    a   = 0.5*( (dH/dM) - (rHy/rM) );
 +    b   = 0.5*( (dH/dM) + (rHy/rM) );
 +    c   = rHx / (2*dM*rM);
 +    
 +  } else {
 +    /* this is the general construction */
 +    
 +    bij = get_bond_length(nrbond, bonds, param->AI, param->AJ);
 +    aijk= get_angle      (nrang, angles, param->AI, param->AJ, param->AK);
 +    aijl= get_angle      (nrang, angles, param->AI, param->AJ, param->AL);
 +    akjl= get_angle      (nrang, angles, param->AK, param->AJ, param->AL);
 +    bError = bError || 
 +      (bij==NOTSET) || (aijk==NOTSET) || (aijl==NOTSET) || (akjl==NOTSET);
 +  
 +    pijk = cos(aijk)*bij;
 +    pijl = cos(aijl)*bij;
 +    a = ( pijk + (pijk*cos(akjl)-pijl) * cos(akjl) / sqr(sin(akjl)) ) / bjk;
 +    b = ( pijl + (pijl*cos(akjl)-pijk) * cos(akjl) / sqr(sin(akjl)) ) / bjl;
 +    c = - sqrt( sqr(bij) - 
 +              ( sqr(pijk) - 2*pijk*pijl*cos(akjl) + sqr(pijl) ) 
 +              / sqr(sin(akjl)) )
 +      / ( bjk*bjl*sin(akjl) );
 +  }
 +  
 +  param->C0 = a;
 +  param->C1 = b;
 +  if (bSwapParity)
 +    param->C2 = -c;
 +  else
 +    param->C2 =  c;
 +  if (debug)
 +    fprintf(debug,"params for vsite3out %u: %g %g %g\n",
 +          param->AI+1,param->C0,param->C1,param->C2);
 +  return bError;
 +}
 +
 +static gmx_bool calc_vsite4fd_param(t_param *param,
 +                              int nrbond, t_mybonded *bonds,
 +                              int nrang,  t_mybonded *angles)
 +{
 +  /* i = virtual site          |    ,k
 +   * j = 1st bonded heavy atom | i-j-m
 +   * k,l,m = 2nd bonded atoms  |    `l
 +   */
 +  
 +  gmx_bool bError;
 +  real bij,bjk,bjl,bjm,aijk,aijl,aijm,akjm,akjl;
 +  real pk,pl,pm,cosakl,cosakm,sinakl,sinakm,cl,cm;
 +  
 +  bij = get_bond_length(nrbond, bonds, param->AI, param->AJ);
 +  bjk = get_bond_length(nrbond, bonds, param->AJ, param->AK);
 +  bjl = get_bond_length(nrbond, bonds, param->AJ, param->AL);
 +  bjm = get_bond_length(nrbond, bonds, param->AJ, param->AM);
 +  aijk= get_angle      (nrang, angles, param->AI, param->AJ, param->AK);
 +  aijl= get_angle      (nrang, angles, param->AI, param->AJ, param->AL);
 +  aijm= get_angle      (nrang, angles, param->AI, param->AJ, param->AM);
 +  akjm= get_angle      (nrang, angles, param->AK, param->AJ, param->AM);
 +  akjl= get_angle      (nrang, angles, param->AK, param->AJ, param->AL);
 +  bError = (bij==NOTSET) || (bjk==NOTSET) || (bjl==NOTSET) || (bjm==NOTSET) ||
 +    (aijk==NOTSET) || (aijl==NOTSET) || (aijm==NOTSET) || (akjm==NOTSET) || 
 +    (akjl==NOTSET);
 +  
 +  if (!bError) {
 +    pk = bjk*sin(aijk);
 +    pl = bjl*sin(aijl);
 +    pm = bjm*sin(aijm);
 +    cosakl = (cos(akjl) - cos(aijk)*cos(aijl)) / (sin(aijk)*sin(aijl));
 +    cosakm = (cos(akjm) - cos(aijk)*cos(aijm)) / (sin(aijk)*sin(aijm));
 +    if ( cosakl < -1 || cosakl > 1 || cosakm < -1 || cosakm > 1 ) {
 +      fprintf(stderr,"virtual site %d: angle ijk = %f, angle ijl = %f, angle ijm = %f\n",
 +            param->AI+1,RAD2DEG*aijk,RAD2DEG*aijl,RAD2DEG*aijm);
 +      gmx_fatal(FARGS,"invalid construction in calc_vsite4fd for atom %d: "
 +                "cosakl=%f, cosakm=%f\n",param->AI+1,cosakl,cosakm);
 +    }
 +    sinakl = sqrt(1-sqr(cosakl));
 +    sinakm = sqrt(1-sqr(cosakm));
 +    
 +    /* note: there is a '+' because of the way the sines are calculated */
 +    cl = -pk / ( pl*cosakl - pk + pl*sinakl*(pm*cosakm-pk)/(pm*sinakm) );
 +    cm = -pk / ( pm*cosakm - pk + pm*sinakm*(pl*cosakl-pk)/(pl*sinakl) );
 +    
 +    param->C0 = cl;
 +    param->C1 = cm;
 +    param->C2 = -bij;
 +    if (debug)
 +      fprintf(debug,"params for vsite4fd %u: %g %g %g\n",
 +            param->AI+1,param->C0,param->C1,param->C2);
 +  }
 +  
 +  return bError;
 +}
 +
 +
 +static gmx_bool 
 +calc_vsite4fdn_param(t_param *param,
 +                     int nrbond, t_mybonded *bonds,
 +                     int nrang,  t_mybonded *angles)
 +{
 +    /* i = virtual site          |    ,k
 +    * j = 1st bonded heavy atom | i-j-m
 +    * k,l,m = 2nd bonded atoms  |    `l
 +    */
 +    
 +    gmx_bool bError;
 +    real bij,bjk,bjl,bjm,aijk,aijl,aijm;
 +    real pk,pl,pm,a,b;
 +    
 +    bij = get_bond_length(nrbond, bonds, param->AI, param->AJ);
 +    bjk = get_bond_length(nrbond, bonds, param->AJ, param->AK);
 +    bjl = get_bond_length(nrbond, bonds, param->AJ, param->AL);
 +    bjm = get_bond_length(nrbond, bonds, param->AJ, param->AM);
 +    aijk= get_angle      (nrang, angles, param->AI, param->AJ, param->AK);
 +    aijl= get_angle      (nrang, angles, param->AI, param->AJ, param->AL);
 +    aijm= get_angle      (nrang, angles, param->AI, param->AJ, param->AM);
 +
 +    bError = (bij==NOTSET) || (bjk==NOTSET) || (bjl==NOTSET) || (bjm==NOTSET) ||
 +        (aijk==NOTSET) || (aijl==NOTSET) || (aijm==NOTSET);
 +    
 +    if (!bError) {
 +        
 +        /* Calculate component of bond j-k along the direction i-j */
 +        pk = -bjk*cos(aijk);
 +
 +        /* Calculate component of bond j-l along the direction i-j */
 +        pl = -bjl*cos(aijl);
 +
 +        /* Calculate component of bond j-m along the direction i-j */
 +        pm = -bjm*cos(aijm);
 +        
 +        if(fabs(pl)<1000*GMX_REAL_MIN || fabs(pm)<1000*GMX_REAL_MIN)
 +        {
 +            fprintf(stderr,"virtual site %d: angle ijk = %f, angle ijl = %f, angle ijm = %f\n",
 +                    param->AI+1,RAD2DEG*aijk,RAD2DEG*aijl,RAD2DEG*aijm);
 +            gmx_fatal(FARGS,"invalid construction in calc_vsite4fdn for atom %d: "
 +                      "pl=%f, pm=%f\n",param->AI+1,pl,pm);
 +        }
 +        
 +        a = pk/pl;
 +        b = pk/pm;
 +          
 +        param->C0 = a;
 +        param->C1 = b;
 +        param->C2 = bij;
 +        
 +        if (debug)
 +            fprintf(debug,"params for vsite4fdn %u: %g %g %g\n",
 +                    param->AI+1,param->C0,param->C1,param->C2);
 +    }
 +    
 +    return bError;
 +}
 +
 +
 +
 +int set_vsites(gmx_bool bVerbose, t_atoms *atoms, gpp_atomtype_t atype,
 +              t_params plist[])
 +{
 +  int i,j,ftype;
 +  int nvsite,nrbond,nrang,nridih,nrset;
 +  gmx_bool bFirst,bSet,bERROR;
 +  at2vsitebond_t *at2vb;
 +  t_mybonded *bonds;
 +  t_mybonded *angles;
 +  t_mybonded *idihs;
 +  
 +  bFirst = TRUE;
 +  bERROR = TRUE;
 +  nvsite=0;
 +  if (debug)
 +    fprintf(debug, "\nCalculating parameters for virtual sites\n");
 +
 +  /* Make a reverse list to avoid ninteractions^2 operations */
 +  at2vb = make_at2vsitebond(atoms->nr,plist);
 +
 +  for(ftype=0; (ftype<F_NRE); ftype++)
 +    if ((interaction_function[ftype].flags & IF_VSITE) && ftype != F_VSITEN) {
 +      nrset=0;
 +      nvsite+=plist[ftype].nr;
 +      for(i=0; (i<plist[ftype].nr); i++) {
 +      /* check if all parameters are set */
 +      bSet=TRUE;
 +      for(j=0; j<NRFP(ftype) && bSet; j++)
 +        bSet = plist[ftype].param[i].c[j]!=NOTSET;
 +
 +      if (debug) {
 +        fprintf(debug,"bSet=%s ",bool_names[bSet]);
 +        print_param(debug,ftype,i,&plist[ftype].param[i]);
 +      }
 +      if (!bSet) {
 +        if (bVerbose && bFirst) {
 +          fprintf(stderr,"Calculating parameters for virtual sites\n");
 +          bFirst=FALSE;
 +        }
 +        
 +        nrbond=nrang=nridih=0;
 +        bonds = NULL;
 +        angles= NULL;
 +        idihs = NULL;
 +        nrset++;
 +        /* now set the vsite parameters: */
 +        get_bondeds(NRAL(ftype), plist[ftype].param[i].a, at2vb, plist, 
 +                    &nrbond, &bonds, &nrang,  &angles, &nridih, &idihs);
 +        if (debug) {
 +          fprintf(debug, "Found %d bonds, %d angles and %d idihs "
 +                  "for virtual site %u (%s)\n",nrbond,nrang,nridih,
 +                  plist[ftype].param[i].AI+1,
 +                  interaction_function[ftype].longname);
 +          print_bad(debug, nrbond, bonds, nrang, angles, nridih, idihs);
 +        } /* debug */
 +        switch(ftype) {
 +        case F_VSITE3: 
 +          bERROR = 
 +            calc_vsite3_param(atype, &(plist[ftype].param[i]), atoms,
 +                              nrbond, bonds, nrang, angles);
 +          break;
 +        case F_VSITE3FD:
 +          bERROR = 
 +            calc_vsite3fd_param(&(plist[ftype].param[i]),
 +                                nrbond, bonds, nrang, angles);
 +          break;
 +        case F_VSITE3FAD:
 +          bERROR = 
 +            calc_vsite3fad_param(&(plist[ftype].param[i]),
 +                                 nrbond, bonds, nrang, angles);
 +          break;
 +        case F_VSITE3OUT:
 +          bERROR = 
 +            calc_vsite3out_param(atype, &(plist[ftype].param[i]), atoms,
 +                                 nrbond, bonds, nrang, angles);
 +          break;
 +        case F_VSITE4FD:
 +          bERROR = 
 +            calc_vsite4fd_param(&(plist[ftype].param[i]), 
 +                                nrbond, bonds, nrang, angles);
 +          break;
 +        case F_VSITE4FDN:
 +          bERROR = 
 +            calc_vsite4fdn_param(&(plist[ftype].param[i]), 
 +                               nrbond, bonds, nrang, angles);
 +          break;
 +        default:
 +          gmx_fatal(FARGS,"Automatic parameter generation not supported "
 +                      "for %s atom %d",
 +                      interaction_function[ftype].longname,
 +                      plist[ftype].param[i].AI+1);
 +        } /* switch */
 +        if (bERROR)
 +          gmx_fatal(FARGS,"Automatic parameter generation not supported "
 +                      "for %s atom %d for this bonding configuration",
 +                      interaction_function[ftype].longname,
 +                      plist[ftype].param[i].AI+1);
 +        sfree(bonds);
 +        sfree(angles);
 +        sfree(idihs);
 +      } /* if bSet */
 +      } /* for i */
 +      if (debug && plist[ftype].nr)
 +      fprintf(stderr,"Calculated parameters for %d out of %d %s atoms\n",
 +              nrset,plist[ftype].nr,interaction_function[ftype].longname);
 +    } /* if IF_VSITE */
 +
 +  done_at2vsitebond(atoms->nr,at2vb);
 +  
 +  return nvsite;
 +}
 +
 +void set_vsites_ptype(gmx_bool bVerbose, gmx_moltype_t *molt)
 +{
 +  int ftype,i;
 +  int nra,nrd;
 +  t_ilist *il;
 +  t_iatom *ia,avsite;
 +  
 +  if (bVerbose)
 +    fprintf(stderr,"Setting particle type to V for virtual sites\n");
 +  if (debug)
 +    fprintf(stderr,"checking %d functypes\n",F_NRE);
 +  for(ftype=0; ftype<F_NRE; ftype++) {
 +    il = &molt->ilist[ftype];
 +    if (interaction_function[ftype].flags & IF_VSITE) {
 +      nra    = interaction_function[ftype].nratoms;
 +      nrd    = il->nr;
 +      ia     = il->iatoms;
 +      
 +      if (debug && nrd)
 +      fprintf(stderr,"doing %d %s virtual sites\n",
 +              (nrd / (nra+1)),interaction_function[ftype].longname);
 +      
 +      for(i=0; (i<nrd); ) {
 +      /* The virtual site */
 +      avsite = ia[1];
 +      molt->atoms.atom[avsite].ptype = eptVSite;
 +      
 +      i  += nra+1;
 +      ia += nra+1;
 +      }
 +    }
 +  }
 +  
 +}
 +
 +typedef struct { 
 +  int ftype,parnr;
 +} t_pindex;
 +
 +static void check_vsite_constraints(t_params *plist, 
 +                                  int cftype, int vsite_type[])
 +{
 +  int      i,k,n;
 +  atom_id  atom;
 +  t_params *ps;
 +  
 +  n=0;
 +  ps = &(plist[cftype]);
 +  for(i=0; (i<ps->nr); i++)
 +    for(k=0; k<2; k++) {
 +      atom = ps->param[i].a[k];
 +      if (vsite_type[atom]!=NOTSET) {
 +      fprintf(stderr,"ERROR: Cannot have constraint (%u-%u) with virtual site (%u)\n",
 +              ps->param[i].AI+1, ps->param[i].AJ+1, atom+1);
 +      n++;
 +      }
 +    }
 +  if (n)
 +    gmx_fatal(FARGS,"There were %d virtual sites involved in constraints",n);
 +}
 +
 +static void clean_vsite_bonds(t_params *plist, t_pindex pindex[], 
 +                          int cftype, int vsite_type[])
 +{
 +  int      ftype,i,j,parnr,k,l,m,n,nvsite,nOut,kept_i,vsnral,vsitetype;
 +  int      nconverted,nremoved;
 +  atom_id  atom,oatom,constr,at1,at2;
 +  atom_id  vsiteatoms[MAXATOMLIST];
 +  gmx_bool     bKeep,bRemove,bUsed,bPresent,bThisFD,bThisOUT,bAllFD,bFirstTwo;
 +  t_params *ps;
 +
 +  if (cftype == F_CONNBONDS)
 +    return;
 +  
 +  ps = &(plist[cftype]);
 +  vsnral=0;
 +  kept_i=0;
 +  nconverted=0;
 +  nremoved=0;
 +  nOut=0;
 +  for(i=0; (i<ps->nr); i++) { /* for all bonds in the plist */
 +    bKeep=FALSE;
 +    bRemove=FALSE;
 +    bAllFD=TRUE;
 +    /* check if all virtual sites are constructed from the same atoms */
 +    nvsite=0;
 +    if(debug) 
 +      fprintf(debug,"constr %u %u:",ps->param[i].AI+1,ps->param[i].AJ+1);
 +    for(k=0; (k<2) && !bKeep && !bRemove; k++) { 
 +      /* for all atoms in the bond */
 +      atom = ps->param[i].a[k];
 +      if (vsite_type[atom]!=NOTSET) {
 +      if(debug) {
 +        fprintf(debug," d%d[%d: %d %d %d]",k,atom+1,
 +                plist[pindex[atom].ftype].param[pindex[atom].parnr].AJ+1,
 +                plist[pindex[atom].ftype].param[pindex[atom].parnr].AK+1,
 +                plist[pindex[atom].ftype].param[pindex[atom].parnr].AL+1);
 +      }
 +      nvsite++;
 +      bThisFD = ( (pindex[atom].ftype == F_VSITE3FD ) ||
 +                (pindex[atom].ftype == F_VSITE3FAD) ||
 +                (pindex[atom].ftype == F_VSITE4FD ) ||
 +                (pindex[atom].ftype == F_VSITE4FDN ) );
 +      bThisOUT= ( (pindex[atom].ftype == F_VSITE3OUT) &&
 +                  (interaction_function[cftype].flags & IF_CONSTRAINT) );
 +      bAllFD = bAllFD && bThisFD;
 +      if (bThisFD || bThisOUT) {
 +        if(debug)fprintf(debug," %s",bThisOUT?"out":"fd");
 +        oatom = ps->param[i].a[1-k]; /* the other atom */
 +        if ( vsite_type[oatom]==NOTSET &&
 +             oatom==plist[pindex[atom].ftype].param[pindex[atom].parnr].AJ ){
 +          /* if the other atom isn't a vsite, and it is AI */
 +          bRemove=TRUE;
 +          if (bThisOUT)
 +            nOut++;
 +          if(debug)fprintf(debug," D-AI");
 +        }
 +      }
 +      if (!bRemove) {
 +        if (nvsite==1) {
 +          /* if this is the first vsite we encounter then
 +             store construction atoms */
 +          vsnral=NRAL(pindex[atom].ftype)-1;
 +          for(m=0; (m<vsnral); m++)
 +            vsiteatoms[m]=
 +              plist[pindex[atom].ftype].param[pindex[atom].parnr].a[m+1];
 +        } else {
 +          /* if it is not the first then
 +             check if this vsite is constructed from the same atoms */
 +          if (vsnral == NRAL(pindex[atom].ftype)-1 )
 +            for(m=0; (m<vsnral) && !bKeep; m++) {
 +              bPresent=FALSE;
 +              constr=
 +                plist[pindex[atom].ftype].param[pindex[atom].parnr].a[m+1];
 +              for(n=0; (n<vsnral) && !bPresent; n++)
 +                if (constr == vsiteatoms[n])
 +                  bPresent=TRUE;
 +              if (!bPresent) {
 +                bKeep=TRUE;
 +                if(debug)fprintf(debug," !present");
 +              }
 +            }
 +          else {
 +            bKeep=TRUE;
 +            if(debug)fprintf(debug," !same#at");
 +          }
 +        }
 +      }
 +      }
 +    }
 +    
 +    if (bRemove) 
 +      bKeep=FALSE;
 +    else {
 +      /* if we have no virtual sites in this bond, keep it */
 +      if (nvsite==0) {
 +      if (debug)fprintf(debug," no vsite");
 +      bKeep=TRUE;
 +      }
 +    
 +      /* check if all non-vsite atoms are used in construction: */
 +      bFirstTwo=TRUE;
 +      for(k=0; (k<2) && !bKeep; k++) { /* for all atoms in the bond */
 +      atom = ps->param[i].a[k];
 +      if (vsite_type[atom]==NOTSET) {
 +        bUsed=FALSE;
 +        for(m=0; (m<vsnral) && !bUsed; m++)
 +          if (atom == vsiteatoms[m]) {
 +            bUsed=TRUE;
 +            bFirstTwo = bFirstTwo && m<2;
 +          }
 +        if (!bUsed) {
 +          bKeep=TRUE;
 +          if(debug)fprintf(debug," !used");
 +        }
 +      }
 +      }
 +      
 +      if ( ! ( bAllFD && bFirstTwo ) )
 +      /* check if all constructing atoms are constrained together */
 +      for (m=0; m<vsnral && !bKeep; m++) { /* all constr. atoms */
 +        at1 = vsiteatoms[m];
 +        at2 = vsiteatoms[(m+1) % vsnral];
 +        bPresent=FALSE;
 +        for (ftype=0; ftype<F_NRE; ftype++)
 +          if ( interaction_function[ftype].flags & IF_CONSTRAINT )
 +            for (j=0; (j<plist[ftype].nr) && !bPresent; j++)
 +              /* all constraints until one matches */
 +              bPresent = ( ( (plist[ftype].param[j].AI == at1) &&
 +                             (plist[ftype].param[j].AJ == at2) ) || 
 +                           ( (plist[ftype].param[j].AI == at2) &&
 +                             (plist[ftype].param[j].AJ == at1) ) );
 +        if (!bPresent) {
 +          bKeep=TRUE;
 +          if(debug)fprintf(debug," !bonded");
 +        }
 +      }
 +    }
 +    
 +    if ( bKeep ) {
 +      if(debug)fprintf(debug," keeping");
 +      /* now copy the bond to the new array */
-       memcpy(&(plist[F_CONNBONDS].param[plist[F_CONNBONDS].nr]),
-            &(ps->param[i]),(size_t)sizeof(plist[F_CONNBONDS].param[0]));
++      ps->param[kept_i] = ps->param[i];
 +      kept_i++;
 +    } else if (IS_CHEMBOND(cftype)) {
 +      srenew(plist[F_CONNBONDS].param,plist[F_CONNBONDS].nr+1);
-       /* now copy the angle to the new array */
-       memcpy(&(ps->param[kept_i]),
-            &(ps->param[i]),(size_t)sizeof(ps->param[0]));
-       kept_i++;
++      plist[F_CONNBONDS].param[plist[F_CONNBONDS].nr] = ps->param[i];
 +      plist[F_CONNBONDS].nr++;
 +      nconverted++;
 +    } else
 +      nremoved++;
 +    if(debug)fprintf(debug,"\n");
 +  }
 +  
 +  if (nremoved)
 +    fprintf(stderr,"Removed   %4d %15ss with virtual sites, %5d left\n",
 +          nremoved, interaction_function[cftype].longname, kept_i);
 +  if (nconverted)
 +    fprintf(stderr,"Converted %4d %15ss with virtual sites to connections, %5d left\n",
 +          nconverted, interaction_function[cftype].longname, kept_i);
 +  if (nOut)
 +    fprintf(stderr,"Warning: removed %d %ss with vsite with %s construction\n"
 +          "         This vsite construction does not guarantee constant "
 +          "bond-length\n"
 +          "         If the constructions were generated by pdb2gmx ignore "
 +          "this warning\n",
 +          nOut, interaction_function[cftype].longname, 
 +          interaction_function[F_VSITE3OUT].longname );
 +  ps->nr=kept_i;
 +}
 +
 +static void clean_vsite_angles(t_params *plist, t_pindex pindex[], 
 +                             int cftype, int vsite_type[],
 +                             at2vsitecon_t *at2vc)
 +{
 +  int      i,j,parnr,k,l,m,n,nvsite,kept_i,vsnral,vsitetype;
 +  atom_id  atom,constr,at1,at2;
 +  atom_id  vsiteatoms[MAXATOMLIST];
 +  gmx_bool     bKeep,bUsed,bPresent,bAll3FAD,bFirstTwo;
 +  t_params *ps;
 +  
 +  ps = &(plist[cftype]);
 +  vsnral=0;
 +  kept_i=0;
 +  for(i=0; (i<ps->nr); i++) { /* for all angles in the plist */
 +    bKeep=FALSE;
 +    bAll3FAD=TRUE;
 +    /* check if all virtual sites are constructed from the same atoms */
 +    nvsite=0;
 +    for(k=0; (k<3) && !bKeep; k++) { /* for all atoms in the angle */
 +      atom = ps->param[i].a[k];
 +      if (vsite_type[atom]!=NOTSET) {
 +      nvsite++;
 +      bAll3FAD = bAll3FAD && (pindex[atom].ftype == F_VSITE3FAD);
 +      if (nvsite==1) {
 +        /* store construction atoms of first vsite */
 +        vsnral=NRAL(pindex[atom].ftype)-1;
 +        for(m=0; (m<vsnral); m++)
 +          vsiteatoms[m]=
 +            plist[pindex[atom].ftype].param[pindex[atom].parnr].a[m+1];
 +      } else 
 +        /* check if this vsite is constructed from the same atoms */
 +        if (vsnral == NRAL(pindex[atom].ftype)-1 )
 +          for(m=0; (m<vsnral) && !bKeep; m++) {
 +            bPresent=FALSE;
 +            constr=
 +              plist[pindex[atom].ftype].param[pindex[atom].parnr].a[m+1];
 +            for(n=0; (n<vsnral) && !bPresent; n++)
 +              if (constr == vsiteatoms[n])
 +                bPresent=TRUE;
 +            if (!bPresent)
 +              bKeep=TRUE;
 +          }
 +        else
 +          bKeep=TRUE;
 +      }
 +    }
 +    
 +    /* keep all angles with no virtual sites in them or 
 +       with virtual sites with more than 3 constr. atoms */
 +    if ( nvsite == 0 && vsnral > 3 )
 +      bKeep=TRUE;
 +    
 +    /* check if all non-vsite atoms are used in construction: */
 +    bFirstTwo=TRUE;
 +    for(k=0; (k<3) && !bKeep; k++) { /* for all atoms in the angle */
 +      atom = ps->param[i].a[k];
 +      if (vsite_type[atom]==NOTSET) {
 +      bUsed=FALSE;
 +      for(m=0; (m<vsnral) && !bUsed; m++)
 +        if (atom == vsiteatoms[m]) {
 +          bUsed=TRUE;
 +          bFirstTwo = bFirstTwo && m<2;
 +        }
 +      if (!bUsed)
 +        bKeep=TRUE;
 +      }
 +    }
 +    
 +    if ( ! ( bAll3FAD && bFirstTwo ) )
 +      /* check if all constructing atoms are constrained together */
 +      for (m=0; m<vsnral && !bKeep; m++) { /* all constr. atoms */
 +      at1 = vsiteatoms[m];
 +      at2 = vsiteatoms[(m+1) % vsnral];
 +      bPresent=FALSE;
 +      for(j=0; j<at2vc[at1].nr; j++) {
 +        if (at2vc[at1].aj[j] == at2)
 +          bPresent = TRUE;
 +      }
 +      if (!bPresent)
 +        bKeep=TRUE;
 +      }
 +    
 +    if ( bKeep ) {
-   atom_id  vsiteatoms[3];
++        /* now copy the angle to the new array */
++        ps->param[kept_i] = ps->param[i];
++        kept_i++;
 +    }
 +  }
 +  
 +  if (ps->nr != kept_i)
 +    fprintf(stderr,"Removed   %4d %15ss with virtual sites, %5d left\n",
 +          ps->nr-kept_i, interaction_function[cftype].longname, kept_i);
 +  ps->nr=kept_i;
 +}
 +
 +static void clean_vsite_dihs(t_params *plist, t_pindex pindex[], 
 +                         int cftype, int vsite_type[])
 +{
 +  int      ftype,i,parnr,k,l,m,n,nvsite,kept_i,vsnral;
 +  atom_id  atom,constr;
-       memcpy(&(ps->param[kept_i]),
-            &(ps->param[i]),(size_t)sizeof(ps->param[0]));
-       kept_i++;
++  atom_id  vsiteatoms[4];
 +  gmx_bool     bKeep,bUsed,bPresent;
 +  t_params *ps;
 +  
 +  ps = &(plist[cftype]);
 +  
 +  vsnral=0;
 +  kept_i=0;
 +  for(i=0; (i<ps->nr); i++) { /* for all dihedrals in the plist */
 +    bKeep=FALSE;
 +    /* check if all virtual sites are constructed from the same atoms */
 +    nvsite=0;
 +    for(k=0; (k<4) && !bKeep; k++) { /* for all atoms in the dihedral */
 +      atom = ps->param[i].a[k];
 +      if (vsite_type[atom]!=NOTSET) {
 +      nvsite++;
 +      if (nvsite==1) {
 +        /* store construction atoms of first vsite */
 +        vsnral=NRAL(pindex[atom].ftype)-1;
++        assert(vsnral<=4);
 +        for(m=0; (m<vsnral); m++)
 +          vsiteatoms[m]=
 +            plist[pindex[atom].ftype].param[pindex[atom].parnr].a[m+1];
 +        if (debug) {
 +          fprintf(debug,"dih w. vsite: %u %u %u %u\n",
 +                  ps->param[i].AI+1,ps->param[i].AJ+1,
 +                  ps->param[i].AK+1,ps->param[i].AL+1);
 +          fprintf(debug,"vsite %u from: %u %u %u\n",
 +                  atom+1,vsiteatoms[0]+1,vsiteatoms[1]+1,vsiteatoms[2]+1);
 +        }
 +      } else 
 +        /* check if this vsite is constructed from the same atoms */
 +        if (vsnral == NRAL(pindex[atom].ftype)-1 )
 +          for(m=0; (m<vsnral) && !bKeep; m++) {
 +            bPresent=FALSE;
 +            constr=
 +              plist[pindex[atom].ftype].param[pindex[atom].parnr].a[m+1];
 +            for(n=0; (n<vsnral) && !bPresent; n++)
 +              if (constr == vsiteatoms[n])
 +                bPresent=TRUE;
 +            if (!bPresent)
 +              bKeep=TRUE;
 +          }
 +      }
 +    }
 +    
 +    /* keep all dihedrals with no virtual sites in them */
 +    if (nvsite==0)
 +      bKeep=TRUE;
 +    
 +    /* check if all atoms in dihedral are either virtual sites, or used in 
 +       construction of virtual sites. If so, keep it, if not throw away: */
 +    for(k=0; (k<4) && !bKeep; k++) { /* for all atoms in the dihedral */
 +      atom = ps->param[i].a[k];
 +      if (vsite_type[atom]==NOTSET) {
 +      bUsed=FALSE;
 +      for(m=0; (m<vsnral) && !bUsed; m++)
 +        if (atom == vsiteatoms[m])
 +          bUsed=TRUE;
 +      if (!bUsed) {
 +        bKeep=TRUE;
 +        if (debug) fprintf(debug,"unused atom in dih: %u\n",atom+1);
 +      }
 +      }
 +    }
 +      
 +    if ( bKeep ) {
++        ps->param[kept_i] = ps->param[i];
++        kept_i++;
 +    }
 +  }
 +
 +  if (ps->nr != kept_i)
 +    fprintf(stderr,"Removed   %4d %15ss with virtual sites, %5d left\n", 
 +          ps->nr-kept_i, interaction_function[cftype].longname, kept_i);
 +  ps->nr=kept_i;
 +}
 +
 +void clean_vsite_bondeds(t_params *plist, int natoms, gmx_bool bRmVSiteBds)
 +{
 +  int i,k,nvsite,ftype,vsite,parnr;
 +  int *vsite_type;
 +  t_pindex *pindex;
 +  at2vsitecon_t *at2vc;
 +
 +  pindex=0; /* avoid warnings */
 +  /* make vsite_type array */
 +  snew(vsite_type,natoms);
 +  for(i=0; i<natoms; i++)
 +    vsite_type[i]=NOTSET;
 +  nvsite=0;
 +  for(ftype=0; ftype<F_NRE; ftype++)
 +    if (interaction_function[ftype].flags & IF_VSITE) {
 +      nvsite+=plist[ftype].nr;
 +      i = 0;
 +      while (i < plist[ftype].nr) {
 +      vsite = plist[ftype].param[i].AI;
 +      if ( vsite_type[vsite] == NOTSET)
 +        vsite_type[vsite] = ftype;
 +      else
 +        gmx_fatal(FARGS,"multiple vsite constructions for atom %d",vsite+1);
 +      if (ftype == F_VSITEN) {
 +        while (i < plist[ftype].nr && plist[ftype].param[i].AI == vsite)
 +          i++;
 +      } else {
 +        i++;
 +      }
 +      }
 +    }
 +  
 +  /* the rest only if we have virtual sites: */
 +  if (nvsite) {
 +    fprintf(stderr,"Cleaning up constraints %swith virtual sites\n",
 +          bRmVSiteBds?"and constant bonded interactions ":"");
 +
 +    /* Make a reverse list to avoid ninteractions^2 operations */
 +    at2vc = make_at2vsitecon(natoms,plist);
 +
 +    snew(pindex,natoms);
 +    for(ftype=0; ftype<F_NRE; ftype++) {
 +      if ((interaction_function[ftype].flags & IF_VSITE) &&
 +        ftype != F_VSITEN) {
 +      for (parnr=0; (parnr<plist[ftype].nr); parnr++) {
 +        k=plist[ftype].param[parnr].AI;
 +        pindex[k].ftype=ftype;
 +        pindex[k].parnr=parnr;
 +      }
 +      }
 +    }
 +
 +    if (debug)
 +      for(i=0; i<natoms; i++)
 +      fprintf(debug,"atom %d vsite_type %s\n",i, 
 +              vsite_type[i]==NOTSET ? "NOTSET" : 
 +              interaction_function[vsite_type[i]].name);
 +    
 +    /* remove things with vsite atoms */
 +    for(ftype=0; ftype<F_NRE; ftype++)
 +      if ( ( ( interaction_function[ftype].flags & IF_BOND ) && bRmVSiteBds ) ||
 +         ( interaction_function[ftype].flags & IF_CONSTRAINT ) ) {
 +      if (interaction_function[ftype].flags & (IF_BTYPE | IF_CONSTRAINT) )
 +        clean_vsite_bonds (plist, pindex, ftype, vsite_type);
 +      else if (interaction_function[ftype].flags & IF_ATYPE)
 +        clean_vsite_angles(plist, pindex, ftype, vsite_type, at2vc);
 +      else if ( (ftype==F_PDIHS) || (ftype==F_IDIHS) )
 +        clean_vsite_dihs  (plist, pindex, ftype, vsite_type);
 +      }
 +    /* check if we have constraints left with virtual sites in them */
 +    for(ftype=0; ftype<F_NRE; ftype++)
 +      if (interaction_function[ftype].flags & IF_CONSTRAINT)
 +      check_vsite_constraints(plist, ftype, vsite_type);
 +
 +    done_at2vsitecon(natoms,at2vc);
 +  }
 +  sfree(pindex);
 +  sfree(vsite_type);
 +}
Simple merge
Simple merge
Simple merge
index 97f66bde0722c0974d9f66476e13472ccc9c7308,0000000000000000000000000000000000000000..2b563d902a5171adfec6bc6e8bd8d7ccfcea7ae6
mode 100644,000000..100644
--- /dev/null
@@@ -1,8653 -1,0 +1,8653 @@@
-         fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include <math.h>
 +#include <string.h>
 +#include <stdlib.h>
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "vec.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "nrnb.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "constr.h"
 +#include "mdatoms.h"
 +#include "names.h"
 +#include "pdbio.h"
 +#include "futil.h"
 +#include "force.h"
 +#include "pme.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "gmx_wallcycle.h"
 +#include "mdrun.h"
 +#include "nsgrid.h"
 +#include "shellfc.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "gmx_ga2la.h"
 +#include "gmx_sort.h"
 +#include "macros.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#define DDRANK(dd,rank)    (rank)
 +#define DDMASTERRANK(dd)   (dd->masterrank)
 +
 +typedef struct gmx_domdec_master
 +{
 +    /* The cell boundaries */
 +    real **cell_x;
 +    /* The global charge group division */
 +    int  *ncg;     /* Number of home charge groups for each node */
 +    int  *index;   /* Index of nnodes+1 into cg */
 +    int  *cg;      /* Global charge group index */
 +    int  *nat;     /* Number of home atoms for each node. */
 +    int  *ibuf;    /* Buffer for communication */
 +    rvec *vbuf;    /* Buffer for state scattering and gathering */
 +} gmx_domdec_master_t;
 +
 +typedef struct
 +{
 +    /* The numbers of charge groups to send and receive for each cell
 +     * that requires communication, the last entry contains the total
 +     * number of atoms that needs to be communicated.
 +     */
 +    int nsend[DD_MAXIZONE+2];
 +    int nrecv[DD_MAXIZONE+2];
 +    /* The charge groups to send */
 +    int *index;
 +    int nalloc;
 +    /* The atom range for non-in-place communication */
 +    int cell2at0[DD_MAXIZONE];
 +    int cell2at1[DD_MAXIZONE];
 +} gmx_domdec_ind_t;
 +
 +typedef struct
 +{
 +    int  np;                   /* Number of grid pulses in this dimension */
 +    int  np_dlb;               /* For dlb, for use with edlbAUTO          */
 +    gmx_domdec_ind_t *ind;     /* The indices to communicate, size np     */
 +    int  np_nalloc;
 +    gmx_bool bInPlace;             /* Can we communicate in place?            */
 +} gmx_domdec_comm_dim_t;
 +
 +typedef struct
 +{
 +    gmx_bool *bCellMin;    /* Temp. var.: is this cell size at the limit     */
 +    real *cell_f;      /* State var.: cell boundaries, box relative      */
 +    real *old_cell_f;  /* Temp. var.: old cell size                      */
 +    real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
 +    real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
 +    real *bound_min;   /* Temp. var.: lower limit for cell boundary      */
 +    real *bound_max;   /* Temp. var.: upper limit for cell boundary      */
 +    gmx_bool bLimited;     /* State var.: is DLB limited in this dim and row */
 +    real *buf_ncd;     /* Temp. var.                                     */
 +} gmx_domdec_root_t;
 +
 +#define DD_NLOAD_MAX 9
 +
 +/* Here floats are accurate enough, since these variables
 + * only influence the load balancing, not the actual MD results.
 + */
 +typedef struct
 +{
 +    int  nload;
 +    float *load;
 +    float sum;
 +    float max;
 +    float sum_m;
 +    float cvol_min;
 +    float mdf;
 +    float pme;
 +    int   flags;
 +} gmx_domdec_load_t;
 +
 +typedef struct
 +{
 +    int  nsc;
 +    int  ind_gl;
 +    int  ind;
 +} gmx_cgsort_t;
 +
 +typedef struct
 +{
 +    gmx_cgsort_t *sort1,*sort2;
 +    int  sort_nalloc;
 +    gmx_cgsort_t *sort_new;
 +    int  sort_new_nalloc;
 +    int  *ibuf;
 +    int  ibuf_nalloc;
 +} gmx_domdec_sort_t;
 +
 +typedef struct
 +{
 +    rvec *v;
 +    int  nalloc;
 +} vec_rvec_t;
 +
 +/* This enum determines the order of the coordinates.
 + * ddnatHOME and ddnatZONE should be first and second,
 + * the others can be ordered as wanted.
 + */
 +enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
 +
 +enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
 +const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
 +
 +typedef struct
 +{
 +    int  dim;      /* The dimension                                          */
 +    gmx_bool dim_match;/* Tells if DD and PME dims match                         */
 +    int  nslab;    /* The number of PME slabs in this dimension              */
 +    real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB    */
 +    int  *pp_min;  /* The minimum pp node location, size nslab               */
 +    int  *pp_max;  /* The maximum pp node location,size nslab                */
 +    int  maxshift; /* The maximum shift for coordinate redistribution in PME */
 +} gmx_ddpme_t;
 +
 +typedef struct
 +{
 +    real min0;    /* The minimum bottom of this zone                        */
 +    real max1;    /* The maximum top of this zone                           */
 +    real mch0;    /* The maximum bottom communicaton height for this zone   */
 +    real mch1;    /* The maximum top communicaton height for this zone      */
 +    real p1_0;    /* The bottom value of the first cell in this zone        */
 +    real p1_1;    /* The top value of the first cell in this zone           */
 +} gmx_ddzone_t;
 +
 +typedef struct gmx_domdec_comm
 +{
 +    /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
 +     * unless stated otherwise.
 +     */
 +
 +    /* The number of decomposition dimensions for PME, 0: no PME */
 +    int  npmedecompdim;
 +    /* The number of nodes doing PME (PP/PME or only PME) */
 +    int  npmenodes;
 +    int  npmenodes_x;
 +    int  npmenodes_y;
 +    /* The communication setup including the PME only nodes */
 +    gmx_bool bCartesianPP_PME;
 +    ivec ntot;
 +    int  cartpmedim;
 +    int  *pmenodes;          /* size npmenodes                         */
 +    int  *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
 +                              * but with bCartesianPP_PME              */
 +    gmx_ddpme_t ddpme[2];
 +    
 +    /* The DD particle-particle nodes only */
 +    gmx_bool bCartesianPP;
 +    int  *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
 +    
 +    /* The global charge groups */
 +    t_block cgs_gl;
 +
 +    /* Should we sort the cgs */
 +    int  nstSortCG;
 +    gmx_domdec_sort_t *sort;
 +    
 +    /* Are there bonded and multi-body interactions between charge groups? */
 +    gmx_bool bInterCGBondeds;
 +    gmx_bool bInterCGMultiBody;
 +
 +    /* Data for the optional bonded interaction atom communication range */
 +    gmx_bool bBondComm;
 +    t_blocka *cglink;
 +    char *bLocalCG;
 +
 +    /* The DLB option */
 +    int  eDLB;
 +    /* Are we actually using DLB? */
 +    gmx_bool bDynLoadBal;
 +
 +    /* Cell sizes for static load balancing, first index cartesian */
 +    real **slb_frac;
 +    
 +    /* The width of the communicated boundaries */
 +    real cutoff_mbody;
 +    real cutoff;
 +    /* The minimum cell size (including triclinic correction) */
 +    rvec cellsize_min;
 +    /* For dlb, for use with edlbAUTO */
 +    rvec cellsize_min_dlb;
 +    /* The lower limit for the DD cell size with DLB */
 +    real cellsize_limit;
 +    /* Effectively no NB cut-off limit with DLB for systems without PBC? */
 +    gmx_bool bVacDLBNoLimit;
 +
 +    /* tric_dir is only stored here because dd_get_ns_ranges needs it */
 +    ivec tric_dir;
 +    /* box0 and box_size are required with dim's without pbc and -gcom */
 +    rvec box0;
 +    rvec box_size;
 +    
 +    /* The cell boundaries */
 +    rvec cell_x0;
 +    rvec cell_x1;
 +
 +    /* The old location of the cell boundaries, to check cg displacements */
 +    rvec old_cell_x0;
 +    rvec old_cell_x1;
 +
 +    /* The communication setup and charge group boundaries for the zones */
 +    gmx_domdec_zones_t zones;
 +    
 +    /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
 +     * cell boundaries of neighboring cells for dynamic load balancing.
 +     */
 +    gmx_ddzone_t zone_d1[2];
 +    gmx_ddzone_t zone_d2[2][2];
 +    
 +    /* The coordinate/force communication setup and indices */
 +    gmx_domdec_comm_dim_t cd[DIM];
 +    /* The maximum number of cells to communicate with in one dimension */
 +    int  maxpulse;
 +    
 +    /* Which cg distribution is stored on the master node */
 +    int master_cg_ddp_count;
 +    
 +    /* The number of cg's received from the direct neighbors */
 +    int  zone_ncg1[DD_MAXZONE];
 +    
 +    /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
 +    int  nat[ddnatNR];
 +    
 +    /* Communication buffer for general use */
 +    int  *buf_int;
 +    int  nalloc_int;
 +
 +     /* Communication buffer for general use */
 +    vec_rvec_t vbuf;
 +    
 +    /* Communication buffers only used with multiple grid pulses */
 +    int  *buf_int2;
 +    int  nalloc_int2;
 +    vec_rvec_t vbuf2;
 +    
 +    /* Communication buffers for local redistribution */
 +    int  **cggl_flag;
 +    int  cggl_flag_nalloc[DIM*2];
 +    rvec **cgcm_state;
 +    int  cgcm_state_nalloc[DIM*2];
 +    
 +    /* Cell sizes for dynamic load balancing */
 +    gmx_domdec_root_t **root;
 +    real *cell_f_row;
 +    real cell_f0[DIM];
 +    real cell_f1[DIM];
 +    real cell_f_max0[DIM];
 +    real cell_f_min1[DIM];
 +    
 +    /* Stuff for load communication */
 +    gmx_bool bRecordLoad;
 +    gmx_domdec_load_t *load;
 +#ifdef GMX_MPI
 +    MPI_Comm *mpi_comm_load;
 +#endif
 +
 +    /* Maximum DLB scaling per load balancing step in percent */
 +    int dlb_scale_lim;
 +
 +    /* Cycle counters */
 +    float cycl[ddCyclNr];
 +    int   cycl_n[ddCyclNr];
 +    float cycl_max[ddCyclNr];
 +    /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
 +    int eFlop;
 +    double flop;
 +    int    flop_n;
 +    /* Have often have did we have load measurements */
 +    int    n_load_have;
 +    /* Have often have we collected the load measurements */
 +    int    n_load_collect;
 +    
 +    /* Statistics */
 +    double sum_nat[ddnatNR-ddnatZONE];
 +    int    ndecomp;
 +    int    nload;
 +    double load_step;
 +    double load_sum;
 +    double load_max;
 +    ivec   load_lim;
 +    double load_mdf;
 +    double load_pme;
 +
 +    /* The last partition step */
 +    gmx_large_int_t globalcomm_step;
 +
 +    /* Debugging */
 +    int  nstDDDump;
 +    int  nstDDDumpGrid;
 +    int  DD_debug;
 +} gmx_domdec_comm_t;
 +
 +/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_CGIBS 2
 +
 +/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_FLAG_NRCG  65535
 +#define DD_FLAG_FW(d) (1<<(16+(d)*2))
 +#define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 +
 +/* Zone permutation required to obtain consecutive charge groups
 + * for neighbor searching.
 + */
 +static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
 +
 +/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
 + * components see only j zones with that component 0.
 + */
 +
 +/* The DD zone order */
 +static const ivec dd_zo[DD_MAXZONE] =
 +  {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
 +
 +/* The 3D setup */
 +#define dd_z3n  8
 +#define dd_zp3n 4
 +static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
 +
 +/* The 2D setup */
 +#define dd_z2n  4
 +#define dd_zp2n 2
 +static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
 +
 +/* The 1D setup */
 +#define dd_z1n  2
 +#define dd_zp1n 1
 +static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
 +
 +/* Factors used to avoid problems due to rounding issues */
 +#define DD_CELL_MARGIN       1.0001
 +#define DD_CELL_MARGIN2      1.00005
 +/* Factor to account for pressure scaling during nstlist steps */
 +#define DD_PRES_SCALE_MARGIN 1.02
 +
 +/* Allowed performance loss before we DLB or warn */
 +#define DD_PERF_LOSS 0.05
 +
 +#define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
 +
 +/* Use separate MPI send and receive commands
 + * when nnodes <= GMX_DD_NNODES_SENDRECV.
 + * This saves memory (and some copying for small nnodes).
 + * For high parallelization scatter and gather calls are used.
 + */
 +#define GMX_DD_NNODES_SENDRECV 4
 +
 +
 +/*
 +#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 +
 +static void index2xyz(ivec nc,int ind,ivec xyz)
 +{
 +  xyz[XX] = ind % nc[XX];
 +  xyz[YY] = (ind / nc[XX]) % nc[YY];
 +  xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 +}
 +*/
 +
 +/* This order is required to minimize the coordinate communication in PME
 + * which uses decomposition in the x direction.
 + */
 +#define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
 +
 +static void ddindex2xyz(ivec nc,int ind,ivec xyz)
 +{
 +    xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 +    xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 +    xyz[ZZ] = ind % nc[ZZ];
 +}
 +
 +static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
 +{
 +    int ddindex;
 +    int ddnodeid=-1;
 +    
 +    ddindex = dd_index(dd->nc,c);
 +    if (dd->comm->bCartesianPP_PME)
 +    {
 +        ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
 +    }
 +    else if (dd->comm->bCartesianPP)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddnodeid = ddindex;
 +    }
 +    
 +    return ddnodeid;
 +}
 +
 +static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
 +{
 +    return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
 +}
 +
 +int ddglatnr(gmx_domdec_t *dd,int i)
 +{
 +    int atnr;
 +    
 +    if (dd == NULL)
 +    {
 +        atnr = i + 1;
 +    }
 +    else
 +    {
 +        if (i >= dd->comm->nat[ddnatNR-1])
 +        {
 +            gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
 +        }
 +        atnr = dd->gatindex[i] + 1;
 +    }
 +    
 +    return atnr;
 +}
 +
 +t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->cgs_gl;
 +}
 +
 +static void vec_rvec_init(vec_rvec_t *v)
 +{
 +    v->nalloc = 0;
 +    v->v      = NULL;
 +}
 +
 +static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
 +{
 +    if (n > v->nalloc)
 +    {
 +        v->nalloc = over_alloc_dd(n);
 +        srenew(v->v,v->nalloc);
 +    }
 +}
 +
 +void dd_store_state(gmx_domdec_t *dd,t_state *state)
 +{
 +    int i;
 +    
 +    if (state->ddp_count != dd->ddp_count)
 +    {
 +        gmx_incons("The state does not the domain decomposition state");
 +    }
 +    
 +    state->ncg_gl = dd->ncg_home;
 +    if (state->ncg_gl > state->cg_gl_nalloc)
 +    {
 +        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
 +        srenew(state->cg_gl,state->cg_gl_nalloc);
 +    }
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        state->cg_gl[i] = dd->index_gl[i];
 +    }
 +    
 +    state->ddp_count_cg_gl = dd->ddp_count;
 +}
 +
 +gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->zones;
 +}
 +
 +void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
 +                      int *jcg0,int *jcg1,ivec shift0,ivec shift1)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int izone,d,dim;
 +
 +    zones = &dd->comm->zones;
 +
 +    izone = 0;
 +    while (icg >= zones->izone[izone].cg1)
 +    {
 +        izone++;
 +    }
 +    
 +    if (izone == 0)
 +    {
 +        *jcg0 = icg;
 +    }
 +    else if (izone < zones->nizone)
 +    {
 +        *jcg0 = zones->izone[izone].jcg0;
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
 +                  icg,izone,zones->nizone);
 +    }
 +        
 +    *jcg1 = zones->izone[izone].jcg1;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        shift0[dim] = zones->izone[izone].shift0[dim];
 +        shift1[dim] = zones->izone[izone].shift1[dim];
 +        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
 +        {
 +            /* A conservative approach, this can be optimized */
 +            shift0[dim] -= 1;
 +            shift1[dim] += 1;
 +        }
 +    }
 +}
 +
 +int dd_natoms_vsite(gmx_domdec_t *dd)
 +{
 +    return dd->comm->nat[ddnatVSITE];
 +}
 +
 +void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
 +{
 +    *at_start = dd->comm->nat[ddnatCON-1];
 +    *at_end   = dd->comm->nat[ddnatCON];
 +}
 +
 +void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec shift={0,0,0},*buf,*rbuf;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = comm->vbuf.v;
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (bPBC)
 +        {
 +            copy_rvec(box[dd->dim[d]],shift);
 +        }
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        copy_rvec(x[j],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* We need to shift the coordinates */
 +                        rvec_add(x[j],shift,buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Shift x */
 +                        buf[n][XX] = x[j][XX] + shift[XX];
 +                        /* Rotate y and z.
 +                         * This operation requires a special shift force
 +                         * treatment, which is performed in calc_vir.
 +                         */
 +                        buf[n][YY] = box[YY][YY] - x[j][YY];
 +                        buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 +                        n++;
 +                    }
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = x + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = comm->vbuf2.v;
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_rvec(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(rbuf[j],x[i]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec *buf,*sbuf;
 +    ivec vis;
 +    int  is;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = comm->vbuf.v;
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (fshift == NULL && !bScrew)
 +        {
 +            bPBC = FALSE;
 +        }
 +        /* Determine which shift vector we need */
 +        clear_ivec(vis);
 +        vis[dd->dim[d]] = 1;
 +        is = IVEC2IS(vis);
 +        
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = f + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = comm->vbuf2.v;
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(f[i],sbuf[j]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        n++;
 +                    }
 +                } 
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        /* Add this force to the shift force */
 +                        rvec_inc(fshift[is],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Rotate the force */
 +                        f[j][XX] += buf[n][XX];
 +                        f[j][YY] -= buf[n][YY];
 +                        f[j][ZZ] -= buf[n][ZZ];
 +                        if (fshift)
 +                        {
 +                            /* Add this force to the shift force */
 +                            rvec_inc(fshift[is],buf[n]);
 +                        }
 +                        n++;
 +                    }
 +                }
 +            }
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*rbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = &comm->vbuf.v[0][0];
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    buf[n] = v[j];
 +                    n++;
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = &comm->vbuf2.v[0][0];
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_real(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        v[i] = rbuf[j];
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*sbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = &comm->vbuf.v[0][0];
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = &comm->vbuf2.v[0][0];
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        sbuf[j] = v[i];
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_real(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    v[j] += buf[n];
 +                    n++;
 +                }
 +            } 
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
 +{
 +    fprintf(fp,"zone d0 %d d1 %d d2 %d  min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
 +            d,i,j,
 +            zone->min0,zone->max1,
 +            zone->mch0,zone->mch0,
 +            zone->p1_0,zone->p1_1);
 +}
 +
 +static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
 +                               int ddimind,int direction,
 +                               gmx_ddzone_t *buf_s,int n_s,
 +                               gmx_ddzone_t *buf_r,int n_r)
 +{
 +    rvec vbuf_s[5*2],vbuf_r[5*2];
 +    int i;
 +
 +    for(i=0; i<n_s; i++)
 +    {
 +        vbuf_s[i*2  ][0] = buf_s[i].min0;
 +        vbuf_s[i*2  ][1] = buf_s[i].max1;
 +        vbuf_s[i*2  ][2] = buf_s[i].mch0;
 +        vbuf_s[i*2+1][0] = buf_s[i].mch1;
 +        vbuf_s[i*2+1][1] = buf_s[i].p1_0;
 +        vbuf_s[i*2+1][2] = buf_s[i].p1_1;
 +    }
 +
 +    dd_sendrecv_rvec(dd, ddimind, direction,
 +                     vbuf_s, n_s*2,
 +                     vbuf_r, n_r*2);
 +
 +    for(i=0; i<n_r; i++)
 +    {
 +        buf_r[i].min0 = vbuf_r[i*2  ][0];
 +        buf_r[i].max1 = vbuf_r[i*2  ][1];
 +        buf_r[i].mch0 = vbuf_r[i*2  ][2];
 +        buf_r[i].mch1 = vbuf_r[i*2+1][0];
 +        buf_r[i].p1_0 = vbuf_r[i*2+1][1];
 +        buf_r[i].p1_1 = vbuf_r[i*2+1][2];
 +    }
 +}
 +
 +static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                          rvec cell_ns_x0,rvec cell_ns_x1)
 +{
 +    int  d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
 +    gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
 +    rvec extr_s[2],extr_r[2];
 +    rvec dh;
 +    real dist_d,c=0,det;
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bPBC,bUse;
 +
 +    comm = dd->comm;
 +
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
 +        zp->min0 = cell_ns_x0[dim];
 +        zp->max1 = cell_ns_x1[dim];
 +        zp->mch0 = cell_ns_x0[dim];
 +        zp->mch1 = cell_ns_x1[dim];
 +        zp->p1_0 = cell_ns_x0[dim];
 +        zp->p1_1 = cell_ns_x1[dim];
 +    }
 +    
 +    for(d=dd->ndim-2; d>=0; d--)
 +    {
 +        dim  = dd->dim[d];
 +        bPBC = (dim < ddbox->npbcdim);
 +
 +        /* Use an rvec to store two reals */
 +        extr_s[d][0] = comm->cell_f0[d+1];
 +        extr_s[d][1] = comm->cell_f1[d+1];
 +        extr_s[d][2] = 0;
 +
 +        pos = 0;
 +        /* Store the extremes in the backward sending buffer,
 +         * so the get updated separately from the forward communication.
 +         */
 +        for(d1=d; d1<dd->ndim-1; d1++)
 +        {
 +            /* We invert the order to be able to use the same loop for buf_e */
 +            buf_s[pos].min0 = extr_s[d1][1];
 +            buf_s[pos].max1 = extr_s[d1][0];
 +            buf_s[pos].mch0 = 0;
 +            buf_s[pos].mch1 = 0;
 +            /* Store the cell corner of the dimension we communicate along */
 +            buf_s[pos].p1_0 = comm->cell_x0[dim];
 +            buf_s[pos].p1_1 = 0;
 +            pos++;
 +        }
 +
 +        buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
 +        pos++;
 +
 +        if (dd->ndim == 3 && d == 0)
 +        {
 +            buf_s[pos] = comm->zone_d2[0][1];
 +            pos++;
 +            buf_s[pos] = comm->zone_d1[0];
 +            pos++;
 +        }
 +
 +        /* We only need to communicate the extremes
 +         * in the forward direction
 +         */
 +        npulse = comm->cd[d].np;
 +        if (bPBC)
 +        {
 +            /* Take the minimum to avoid double communication */
 +            npulse_min = min(npulse,dd->nc[dim]-1-npulse);
 +        }
 +        else
 +        {
 +            /* Without PBC we should really not communicate over
 +             * the boundaries, but implementing that complicates
 +             * the communication setup and therefore we simply
 +             * do all communication, but ignore some data.
 +             */
 +            npulse_min = npulse;
 +        }
 +        for(p=0; p<npulse_min; p++)
 +        {
 +            /* Communicate the extremes forward */
 +            bUse = (bPBC || dd->ci[dim] > 0);
 +
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             extr_s+d, dd->ndim-d-1,
 +                             extr_r+d, dd->ndim-d-1);
 +
 +            if (bUse)
 +            {
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
 +                    extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
 +                }
 +            }
 +        }
 +
 +        buf_size = pos;
 +        for(p=0; p<npulse; p++)
 +        {
 +            /* Communicate all the zone information backward */
 +            bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
 +
 +            dd_sendrecv_ddzone(dd, d, dddirBackward,
 +                               buf_s, buf_size,
 +                               buf_r, buf_size);
 +
 +            clear_rvec(dh);
 +            if (p > 0)
 +            {
 +                for(d1=d+1; d1<dd->ndim; d1++)
 +                {
 +                    /* Determine the decrease of maximum required
 +                     * communication height along d1 due to the distance along d,
 +                     * this avoids a lot of useless atom communication.
 +                     */
 +                    dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
 +
 +                    if (ddbox->tric_dir[dim])
 +                    {
 +                        /* c is the off-diagonal coupling between the cell planes
 +                         * along directions d and d1.
 +                         */
 +                        c = ddbox->v[dim][dd->dim[d1]][dim];
 +                    }
 +                    else
 +                    {
 +                        c = 0;
 +                    }
 +                    det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
 +                    if (det > 0)
 +                    {
 +                        dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
 +                    }
 +                    else
 +                    {
 +                        /* A negative value signals out of range */
 +                        dh[d1] = -1;
 +                    }
 +                }
 +            }
 +
 +            /* Accumulate the extremes over all pulses */
 +            for(i=0; i<buf_size; i++)
 +            {
 +                if (p == 0)
 +                {
 +                    buf_e[i] = buf_r[i];
 +                }
 +                else
 +                {
 +                    if (bUse)
 +                    {
 +                        buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
 +                        buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
 +                    }
 +
 +                    if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
 +                    {
 +                        d1 = 1;
 +                    }
 +                    else
 +                    {
 +                        d1 = d + 1;
 +                    }
 +                    if (bUse && dh[d1] >= 0)
 +                    {
 +                        buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
 +                        buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
 +                    }
 +                }
 +                /* Copy the received buffer to the send buffer,
 +                 * to pass the data through with the next pulse.
 +                 */
 +                buf_s[i] = buf_r[i];
 +            }
 +            if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
 +                (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
 +            {
 +                /* Store the extremes */ 
 +                pos = 0;
 +
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
 +                    extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
 +                    pos++;
 +                }
 +
 +                if (d == 1 || (d == 0 && dd->ndim == 3))
 +                {
 +                    for(i=d; i<2; i++)
 +                    {
 +                        comm->zone_d2[1-d][i] = buf_e[pos];
 +                        pos++;
 +                    }
 +                }
 +                if (d == 0)
 +                {
 +                    comm->zone_d1[1] = buf_e[pos];
 +                    pos++;
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->ndim >= 2)
 +    {
 +        dim = dd->dim[1];
 +        for(i=0; i<2; i++)
 +        {
 +            if (debug)
 +            {
 +                print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
 +            }
 +            cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
 +            cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        dim = dd->dim[2];
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0; j<2; j++)
 +            {
 +                if (debug)
 +                {
 +                    print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
 +                }
 +                cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
 +                cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
 +            }
 +        }
 +    }
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        comm->cell_f_max0[d] = extr_s[d-1][0];
 +        comm->cell_f_min1[d] = extr_s[d-1][1];
 +        if (debug)
 +        {
 +            fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
 +                    d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
 +        }
 +    }
 +}
 +
 +static void dd_collect_cg(gmx_domdec_t *dd,
 +                          t_state *state_local)
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
 +    t_block *cgs_gl;
 +
 +    if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
 +    {
 +        /* The master has the correct distribution */
 +        return;
 +    }
 +    
 +    if (state_local->ddp_count == dd->ddp_count)
 +    {
 +        ncg_home = dd->ncg_home;
 +        cg       = dd->index_gl;
 +        nat_home = dd->nat_home;
 +    } 
 +    else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        ncg_home = state_local->ncg_gl;
 +        cg       = state_local->cg_gl;
 +        nat_home = 0;
 +        for(i=0; i<ncg_home; i++)
 +        {
 +            nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
 +        }
 +    }
 +    else
 +    {
 +        gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
 +    }
 +    
 +    buf2[0] = dd->ncg_home;
 +    buf2[1] = dd->nat_home;
 +    if (DDMASTER(dd))
 +    {
 +        ma = dd->ma;
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    /* Collect the charge group and atom counts on the master */
 +    dd_gather(dd,2*sizeof(int),buf2,ibuf);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma->index[0] = 0;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ncg[i] = ma->ibuf[2*i];
 +            ma->nat[i] = ma->ibuf[2*i+1];
 +            ma->index[i+1] = ma->index[i] + ma->ncg[i];
 +            
 +        }
 +        /* Make byte counts and indices */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"Initial charge group distribution: ");
 +            for(i=0; i<dd->nnodes; i++)
 +                fprintf(debug," %d",ma->ncg[i]);
 +            fprintf(debug,"\n");
 +        }
 +    }
 +    
 +    /* Collect the charge group indices on the master */
 +    dd_gatherv(dd,
 +               dd->ncg_home*sizeof(int),dd->index_gl,
 +               DDMASTER(dd) ? ma->ibuf : NULL,
 +               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +               DDMASTER(dd) ? ma->cg : NULL);
 +    
 +    dd->comm->master_cg_ddp_count = state_local->ddp_count;
 +}
 +
 +static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
 +                                    rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +
 +    ma = dd->ma;
 +    
 +    if (!DDMASTER(dd))
 +    {
 +#ifdef GMX_MPI
 +        MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 dd->rank,dd->mpi_comm_all);
 +#endif
 +    } else {
 +        /* Copy the master coordinates to the global array */
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(lv[a++],v[c]);
 +            }
 +        }
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +#ifdef GMX_MPI
 +                MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
 +                         n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(buf[a++],v[c]);
 +                    }
 +                }
 +            }
 +        }
 +        sfree(buf);
 +    }
 +}
 +
 +static void get_commbuffer_counts(gmx_domdec_t *dd,
 +                                  int **counts,int **disps)
 +{
 +    gmx_domdec_master_t *ma;
 +    int n;
 +
 +    ma = dd->ma;
 +    
 +    /* Make the rvec count and displacment arrays */
 +    *counts  = ma->ibuf;
 +    *disps   = ma->ibuf + dd->nnodes;
 +    for(n=0; n<dd->nnodes; n++)
 +    {
 +        (*counts)[n] = ma->nat[n]*sizeof(rvec);
 +        (*disps)[n]  = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
 +    }
 +}
 +
 +static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
 +                                   rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *rcounts=NULL,*disps=NULL;
 +    int  n,i,c,a;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +    
 +    ma = dd->ma;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        get_commbuffer_counts(dd,&rcounts,&disps);
 +
 +        buf = ma->vbuf;
 +    }
 +    
 +    dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
 +
 +    if (DDMASTER(dd))
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(buf[a++],v[c]);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +void dd_collect_vec(gmx_domdec_t *dd,
 +                    t_state *state_local,rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    dd_collect_cg(dd,state_local);
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_collect_vec_sendrecv(dd,lv,v);
 +    }
 +    else
 +    {
 +        dd_collect_vec_gatherv(dd,lv,v);
 +    }
 +}
 +
 +
 +void dd_collect_state(gmx_domdec_t *dd,
 +                      t_state *state_local,t_state *state)
 +{
 +    int est,i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        for (i=0;i<efptNR;i++) {
 +            state->lambda[i] = state_local->lambda[i];
 +        }
 +        state->fep_state = state_local->fep_state;
 +        state->veta = state_local->veta;
 +        state->vol0 = state_local->vol0;
 +        copy_mat(state_local->box,state->box);
 +        copy_mat(state_local->boxv,state->boxv);
 +        copy_mat(state_local->svir_prev,state->svir_prev);
 +        copy_mat(state_local->fvir_prev,state->fvir_prev);
 +        copy_mat(state_local->pres_prev,state->pres_prev);
 +
 +
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
 +                state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
 +            }
 +            state->therm_integral[i] = state_local->therm_integral[i];            
 +        }
 +        for(i=0; i<state_local->nnhpres; i++) 
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
 +                state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state_local->flags & (1<<est)))
 +        {
 +            switch (est) {
 +            case estX:
 +                dd_collect_vec(dd,state_local,state_local->x,state->x);
 +                break;
 +            case estV:
 +                dd_collect_vec(dd,state_local,state_local->v,state->v);
 +                break;
 +            case estSDX:
 +                dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
 +                break;
 +            case estCGP:
 +                dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    if (DDMASTER(dd))
 +                    {
 +                        for(i=0; i<state_local->nrng; i++)
 +                        {
 +                            state->ld_rng[i] = state_local->ld_rng[i];
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
 +                              state_local->ld_rng,state->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                   if (DDMASTER(dd))
 +                    {
 +                        state->ld_rngi[0] = state_local->ld_rngi[0];
 +                    } 
 +                }
 +                else
 +                {
 +                    dd_gather(dd,sizeof(state->ld_rngi[0]),
 +                              state_local->ld_rngi,state->ld_rngi);
 +                }
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_collect_state");
 +            }
 +        }
 +    }
 +}
 +
 +static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
 +{
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +    fr->cg_nalloc = over_alloc_dd(nalloc);
 +    srenew(fr->cg_cm,fr->cg_nalloc);
 +    srenew(fr->cginfo,fr->cg_nalloc);
 +}
 +
 +static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
 +{
 +    int est;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +
 +    state->nalloc = over_alloc_dd(nalloc);
 +    
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est)))
 +        {
 +            switch(est) {
 +            case estX:
 +                srenew(state->x,state->nalloc);
 +                break;
 +            case estV:
 +                srenew(state->v,state->nalloc);
 +                break;
 +            case estSDX:
 +                srenew(state->sd_X,state->nalloc);
 +                break;
 +            case estCGP:
 +                srenew(state->cg_p,state->nalloc);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No reallocation required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_realloc_state");            
 +            }
 +        }
 +    }
 +    
 +    if (f != NULL)
 +    {
 +        srenew(*f,state->nalloc);
 +    }
 +}
 +
 +static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +                /* Use lv as a temporary buffer */
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(v[c],buf[a++]);
 +                    }
 +                }
 +                if (a != ma->nat[n])
 +                {
 +                    gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
 +                              a,ma->nat[n]);
 +                }
 +                
 +#ifdef GMX_MPI
 +                MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
 +                         DDRANK(dd,n),n,dd->mpi_comm_all);
 +#endif
 +            }
 +        }
 +        sfree(buf);
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(v[c],lv[a++]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +#ifdef GMX_MPI
 +        MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +    }
 +}
 +
 +static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *scounts=NULL,*disps=NULL;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +     
 +        get_commbuffer_counts(dd,&scounts,&disps);
 +
 +        buf = ma->vbuf;
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(v[c],buf[a++]);
 +                }
 +            }
 +        }
 +    }
 +
 +    dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
 +}
 +
 +static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
 +{
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_distribute_vec_sendrecv(dd,cgs,v,lv);
 +    }
 +    else
 +    {
 +        dd_distribute_vec_scatterv(dd,cgs,v,lv);
 +    }
 +}
 +
 +static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
 +                                t_state *state,t_state *state_local,
 +                                rvec **f)
 +{
 +    int  i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        for(i=0;i<efptNR;i++)
 +        {
 +            state_local->lambda[i] = state->lambda[i];
 +        }
 +        state_local->fep_state = state->fep_state;
 +        state_local->veta   = state->veta;
 +        state_local->vol0   = state->vol0;
 +        copy_mat(state->box,state_local->box);
 +        copy_mat(state->box_rel,state_local->box_rel);
 +        copy_mat(state->boxv,state_local->boxv);
 +        copy_mat(state->svir_prev,state_local->svir_prev);
 +        copy_mat(state->fvir_prev,state_local->fvir_prev);
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
 +                state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
 +            }
 +            state_local->therm_integral[i] = state->therm_integral[i];
 +        }
 +        for(i=0; i<state_local->nnhpres; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
 +                state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    dd_bcast(dd,((efptNR)*sizeof(real)),state_local->lambda);
 +    dd_bcast(dd,sizeof(int),&state_local->fep_state);
 +    dd_bcast(dd,sizeof(real),&state_local->veta);
 +    dd_bcast(dd,sizeof(real),&state_local->vol0);
 +    dd_bcast(dd,sizeof(state_local->box),state_local->box);
 +    dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
 +    dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
 +    dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
 +    dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
 +    dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
 +
 +    if (dd->nat_home > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,dd->nat_home);
 +    }
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state_local->flags & (1<<i)))
 +        {
 +            switch (i) {
 +            case estX:
 +                dd_distribute_vec(dd,cgs,state->x,state_local->x);
 +                break;
 +            case estV:
 +                dd_distribute_vec(dd,cgs,state->v,state_local->v);
 +                break;
 +            case estSDX:
 +                dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
 +                break;
 +            case estCGP:
 +                dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,
 +                              state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                              state->ld_rng,state_local->ld_rng);
 +                }
 +                else
 +                {
 +                    dd_scatter(dd,
 +                               state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                               state->ld_rng,state_local->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
 +                              state->ld_rngi,state_local->ld_rngi);
 +                }
 +                else
 +                {
 +                     dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
 +                               state->ld_rngi,state_local->ld_rngi);
 +                }   
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* Not implemented yet */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_distribute_state");
 +            }
 +        }
 +    }
 +}
 +
 +static char dim2char(int dim)
 +{
 +    char c='?';
 +    
 +    switch (dim)
 +    {
 +    case XX: c = 'X'; break;
 +    case YY: c = 'Y'; break;
 +    case ZZ: c = 'Z'; break;
 +    default: gmx_fatal(FARGS,"Unknown dim %d",dim);
 +    }
 +    
 +    return c;
 +}
 +
 +static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
 +{
 +    rvec grid_s[2],*grid_r=NULL,cx,r;
 +    char fname[STRLEN],format[STRLEN],buf[22];
 +    FILE *out;
 +    int  a,i,d,z,y,x;
 +    matrix tric;
 +    real vol;
 +
 +    copy_rvec(dd->comm->cell_x0,grid_s[0]);
 +    copy_rvec(dd->comm->cell_x1,grid_s[1]);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        snew(grid_r,2*dd->nnodes);
 +    }
 +    
 +    dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            for(i=0; i<DIM; i++)
 +            {
 +                if (d == i)
 +                {
 +                    tric[d][i] = 1;
 +                }
 +                else
 +                {
 +                    if (d < ddbox->npbcdim && dd->nc[d] > 1)
 +                    {
 +                        tric[d][i] = box[i][d]/box[i][i];
 +                    }
 +                    else
 +                    {
 +                        tric[d][i] = 0;
 +                    }
 +                }
 +            }
 +        }
 +        sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
 +        sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +        out = gmx_fio_fopen(fname,"w");
 +        gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +        a = 1;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 +            for(d=0; d<DIM; d++)
 +            {
 +                vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
 +            }
 +            for(z=0; z<2; z++)
 +            {
 +                for(y=0; y<2; y++)
 +                {
 +                    for(x=0; x<2; x++)
 +                    {
 +                        cx[XX] = grid_r[i*2+x][XX];
 +                        cx[YY] = grid_r[i*2+y][YY];
 +                        cx[ZZ] = grid_r[i*2+z][ZZ];
 +                        mvmul(tric,cx,r);
 +                        fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
 +                                10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
 +                    }
 +                }
 +            }
 +            for(d=0; d<DIM; d++)
 +            {
 +                for(x=0; x<4; x++)
 +                {
 +                    switch(d)
 +                    {
 +                    case 0: y = 1 + i*8 + 2*x; break;
 +                    case 1: y = 1 + i*8 + 2*x - (x % 2); break;
 +                    case 2: y = 1 + i*8 + x; break;
 +                    }
 +                    fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
 +                }
 +            }
 +        }
 +        gmx_fio_fclose(out);
 +        sfree(grid_r);
 +    }
 +}
 +
 +void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
 +                  gmx_mtop_t *mtop,t_commrec *cr,
 +                  int natoms,rvec x[],matrix box)
 +{
 +    char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
 +    FILE *out;
 +    int  i,ii,resnr,c;
 +    char *atomname,*resname;
 +    real b;
 +    gmx_domdec_t *dd;
 +    
 +    dd = cr->dd;
 +    if (natoms == -1)
 +    {
 +        natoms = dd->comm->nat[ddnatVSITE];
 +    }
 +    
 +    sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
 +    
 +    sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +    sprintf(format4,"%s%s\n",get_pdbformat4(),"%6.2f%6.2f");
 +    
 +    out = gmx_fio_fopen(fname,"w");
 +    
 +    fprintf(out,"TITLE     %s\n",title);
 +    gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +    for(i=0; i<natoms; i++)
 +    {
 +        ii = dd->gatindex[i];
 +        gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
 +        if (i < dd->comm->nat[ddnatZONE])
 +        {
 +            c = 0;
 +            while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
 +            {
 +                c++;
 +            }
 +            b = c;
 +        }
 +        else if (i < dd->comm->nat[ddnatVSITE])
 +        {
 +            b = dd->comm->zones.n;
 +        }
 +        else
 +        {
 +            b = dd->comm->zones.n + 1;
 +        }
 +        fprintf(out,strlen(atomname)<4 ? format : format4,
 +                "ATOM",(ii+1)%100000,
 +                atomname,resname,' ',resnr%10000,' ',
 +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
 +    }
 +    fprintf(out,"TER\n");
 +    
 +    gmx_fio_fclose(out);
 +}
 +
 +real dd_cutoff_mbody(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  di;
 +    real r;
 +
 +    comm = dd->comm;
 +
 +    r = -1;
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm->cutoff_mbody > 0)
 +        {
 +            r = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            /* cutoff_mbody=0 means we do not have DLB */
 +            r = comm->cellsize_min[dd->dim[0]];
 +            for(di=1; di<dd->ndim; di++)
 +            {
 +                r = min(r,comm->cellsize_min[dd->dim[di]]);
 +            }
 +            if (comm->bBondComm)
 +            {
 +                r = max(r,comm->cutoff_mbody);
 +            }
 +            else
 +            {
 +                r = min(r,comm->cutoff);
 +            }
 +        }
 +    }
 +
 +    return r;
 +}
 +
 +real dd_cutoff_twobody(gmx_domdec_t *dd)
 +{
 +    real r_mb;
 +
 +    r_mb = dd_cutoff_mbody(dd);
 +
 +    return max(dd->comm->cutoff,r_mb);
 +}
 +
 +
 +static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
 +{
 +    int nc,ntot;
 +    
 +    nc   = dd->nc[dd->comm->cartpmedim];
 +    ntot = dd->comm->ntot[dd->comm->cartpmedim];
 +    copy_ivec(coord,coord_pme);
 +    coord_pme[dd->comm->cartpmedim] =
 +        nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 +}
 +
 +static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
 +{
 +    /* Here we assign a PME node to communicate with this DD node
 +     * by assuming that the major index of both is x.
 +     * We add cr->npmenodes/2 to obtain an even distribution.
 +     */
 +    return (ddindex*npme + npme/2)/ndd;
 +}
 +
 +static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
 +{
 +    return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
 +}
 +
 +static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
 +{
 +    return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
 +}
 +
 +static int *dd_pmenodes(t_commrec *cr)
 +{
 +    int *pmenodes;
 +    int n,i,p0,p1;
 +    
 +    snew(pmenodes,cr->npmenodes);
 +    n = 0;
 +    for(i=0; i<cr->dd->nnodes; i++) {
 +        p0 = cr_ddindex2pmeindex(cr,i);
 +        p1 = cr_ddindex2pmeindex(cr,i+1);
 +        if (i+1 == cr->dd->nnodes || p1 > p0) {
 +            if (debug)
 +                fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
 +            pmenodes[n] = i + 1 + n;
 +            n++;
 +        }
 +    }
 +
 +    return pmenodes;
 +}
 +
 +static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_t *dd;
 +    ivec coords,coords_pme,nc;
 +    int  slab;
 +    
 +    dd = cr->dd;
 +    /*
 +      if (dd->comm->bCartesian) {
 +      gmx_ddindex2xyz(dd->nc,ddindex,coords);
 +      dd_coords2pmecoords(dd,coords,coords_pme);
 +      copy_ivec(dd->ntot,nc);
 +      nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 +      coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 +      
 +      slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 +      } else {
 +      slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 +      }
 +    */
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
 +    
 +    return slab;
 +}
 +
 +static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_comm_t *comm;
 +    ivec coords;
 +    int  ddindex,nodeid=-1;
 +    
 +    comm = cr->dd->comm;
 +    
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddindex = dd_index(cr->dd->nc,coords);
 +        if (comm->bCartesianPP)
 +        {
 +            nodeid = comm->ddindex2simnodeid[ddindex];
 +        }
 +        else
 +        {
 +            if (comm->pmenodes)
 +            {
 +                nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
 +            }
 +            else
 +            {
 +                nodeid = ddindex;
 +            }
 +        }
 +    }
 +  
 +    return nodeid;
 +}
 +
 +static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    ivec coord,coord_pme;
 +    int  i;
 +    int  pmenode=-1;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    /* This assumes a uniform x domain decomposition grid cell size */
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
 +        if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            /* This is a PP node */
 +            dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +            MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
 +        }
 +#endif
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (sim_nodeid < dd->nnodes)
 +        {
 +            pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +        }
 +    }
 +    else
 +    {
 +        /* This assumes DD cells with identical x coordinates
 +         * are numbered sequentially.
 +         */
 +        if (dd->comm->pmenodes == NULL)
 +        {
 +            if (sim_nodeid < dd->nnodes)
 +            {
 +                /* The DD index equals the nodeid */
 +                pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +            }
 +        }
 +        else
 +        {
 +            i = 0;
 +            while (sim_nodeid > dd->comm->pmenodes[i])
 +            {
 +                i++;
 +            }
 +            if (sim_nodeid < dd->comm->pmenodes[i])
 +            {
 +                pmenode = dd->comm->pmenodes[i];
 +            }
 +        }
 +    }
 +    
 +    return pmenode;
 +}
 +
 +gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_bool bPMEOnlyNode;
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
 +    }
 +    else
 +    {
 +        bPMEOnlyNode = FALSE;
 +    }
 +    
 +    return bPMEOnlyNode;
 +}
 +
 +void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
 +                     int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
 +{
 +    gmx_domdec_t *dd;
 +    int x,y,z;
 +    ivec coord,coord_pme;
 +    
 +    dd = cr->dd;
 +    
 +    snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
 +    
 +    *nmy_ddnodes = 0;
 +    for(x=0; x<dd->nc[XX]; x++)
 +    {
 +        for(y=0; y<dd->nc[YY]; y++)
 +        {
 +            for(z=0; z<dd->nc[ZZ]; z++)
 +            {
 +                if (dd->comm->bCartesianPP_PME)
 +                {
 +                    coord[XX] = x;
 +                    coord[YY] = y;
 +                    coord[ZZ] = z;
 +                    dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +                    if (dd->ci[XX] == coord_pme[XX] &&
 +                        dd->ci[YY] == coord_pme[YY] &&
 +                        dd->ci[ZZ] == coord_pme[ZZ])
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                }
 +                else
 +                {
 +                    /* The slab corresponds to the nodeid in the PME group */
 +                    if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
 +                    {
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* The last PP-only node is the peer node */
 +    *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Receive coordinates from PP nodes:");
 +        for(x=0; x<*nmy_ddnodes; x++)
 +        {
 +            fprintf(debug," %d",(*my_ddnodes)[x]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static gmx_bool receive_vir_ener(t_commrec *cr)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  pmenode,coords[DIM],rank;
 +    gmx_bool bReceive;
 +    
 +    bReceive = TRUE;
 +    if (cr->npmenodes < cr->dd->nnodes)
 +    {
 +        comm = cr->dd->comm;
 +        if (comm->bCartesianPP_PME)
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +#ifdef GMX_MPI
 +            MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
 +            coords[comm->cartpmedim]++;
 +            if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
 +            {
 +                MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
 +                if (dd_simnode2pmenode(cr,rank) == pmenode)
 +                {
 +                    /* This is not the last PP node for pmenode */
 +                    bReceive = FALSE;
 +                }
 +            }
 +#endif  
 +        }
 +        else
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +            if (cr->sim_nodeid+1 < cr->nnodes &&
 +                dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
 +            {
 +                /* This is not the last PP node for pmenode */
 +                bReceive = FALSE;
 +            }
 +        }
 +    }
 +    
 +    return bReceive;
 +}
 +
 +static void set_zones_ncg_home(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int i;
 +
 +    zones = &dd->comm->zones;
 +
 +    zones->cg_range[0] = 0;
 +    for(i=1; i<zones->n+1; i++)
 +    {
 +        zones->cg_range[i] = dd->ncg_home;
 +    }
 +}
 +
 +static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
 +{
 +    int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
 +    
 +    ind = state->cg_gl;
 +    dd_cg_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    nat = 0;
 +    cgindex[0] = nat;
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        cgindex[i] = nat;
 +        cg_gl = ind[i];
 +        dd_cg_gl[i] = cg_gl;
 +        nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
 +    }
 +    cgindex[i] = nat;
 +    
 +    dd->ncg_home = state->ncg_gl;
 +    dd->nat_home = nat;
 +
 +    set_zones_ncg_home(dd);
 +}
 +
 +static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
 +{
 +    while (cg >= cginfo_mb->cg_end)
 +    {
 +        cginfo_mb++;
 +    }
 +
 +    return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
 +}
 +
 +static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
 +                          t_forcerec *fr,char *bLocalCG)
 +{
 +    cginfo_mb_t *cginfo_mb;
 +    int *cginfo;
 +    int cg;
 +
 +    if (fr != NULL)
 +    {
 +        cginfo_mb = fr->cginfo_mb;
 +        cginfo    = fr->cginfo;
 +
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
 +        }
 +    }
 +
 +    if (bLocalCG != NULL)
 +    {
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            bLocalCG[index_gl[cg]] = TRUE;
 +        }
 +    }
 +}
 +
 +static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
 +{
 +    int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
 +    int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
 +    gmx_ga2la_t *ga2la;
 +    char *bLocalCG;
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +
 +    if (dd->nat_tot > dd->gatindex_nalloc)
 +    {
 +        dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
 +        srenew(dd->gatindex,dd->gatindex_nalloc);
 +    }
 +
 +    nzone      = dd->comm->zones.n;
 +    zone2cg    = dd->comm->zones.cg_range;
 +    zone_ncg1  = dd->comm->zone_ncg1;
 +    index_gl   = dd->index_gl;
 +    gatindex   = dd->gatindex;
 +
 +    if (zone2cg[1] != dd->ncg_home)
 +    {
 +        gmx_incons("dd->ncg_zone is not up to date");
 +    }
 +    
 +    /* Make the local to global and global to local atom index */
 +    a = dd->cgindex[cg_start];
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        if (zone == 0)
 +        {
 +            cg0 = cg_start;
 +        }
 +        else
 +        {
 +            cg0 = zone2cg[zone];
 +        }
 +        for(cg=cg0; cg<zone2cg[zone+1]; cg++)
 +        {
 +            zone1 = zone;
 +            if (cg - cg0 >= zone_ncg1[zone])
 +            {
 +                /* Signal that this cg is from more than one zone away */
 +                zone1 += nzone;
 +            }
 +            cg_gl = index_gl[cg];
 +            for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
 +            {
 +                gatindex[a] = a_gl;
 +                ga2la_set(dd->ga2la,a_gl,a,zone1);
 +                a++;
 +            }
 +        }
 +    }
 +}
 +
 +static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
 +                          const char *where)
 +{
 +    int ncg,i,ngl,nerr;
 +
 +    nerr = 0;
 +    if (bLocalCG == NULL)
 +    {
 +        return nerr;
 +    }
 +    for(i=0; i<dd->ncg_tot; i++)
 +    {
 +        if (!bLocalCG[dd->index_gl[i]])
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
 +            nerr++;
 +        }
 +    }
 +    ngl = 0;
 +    for(i=0; i<ncg_sys; i++)
 +    {
 +        if (bLocalCG[i])
 +        {
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->ncg_tot)
 +    {
 +        fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
 +        nerr++;
 +    }
 +
 +    return nerr;
 +}
 +
 +static void check_index_consistency(gmx_domdec_t *dd,
 +                                    int natoms_sys,int ncg_sys,
 +                                    const char *where)
 +{
 +    int  nerr,ngl,i,a,cell;
 +    int  *have;
 +
 +    nerr = 0;
 +
 +    if (dd->comm->DD_debug > 1)
 +    {
 +        snew(have,natoms_sys);
 +        for(a=0; a<dd->nat_tot; a++)
 +        {
 +            if (have[dd->gatindex[a]] > 0)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
 +            }
 +            else
 +            {
 +                have[dd->gatindex[a]] = a + 1;
 +            }
 +        }
 +        sfree(have);
 +    }
 +
 +    snew(have,dd->nat_tot);
 +
 +    ngl  = 0;
 +    for(i=0; i<natoms_sys; i++)
 +    {
 +        if (ga2la_get(dd->ga2la,i,&a,&cell))
 +        {
 +            if (a >= dd->nat_tot)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
 +                nerr++;
 +            }
 +            else
 +            {
 +                have[a] = 1;
 +                if (dd->gatindex[a] != i)
 +                {
 +                    fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
 +                    nerr++;
 +                }
 +            }
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->nat_tot)
 +    {
 +        fprintf(stderr,
 +                "DD node %d, %s: %d global atom indices, %d local atoms\n",
 +                dd->rank,where,ngl,dd->nat_tot);
 +    }
 +    for(a=0; a<dd->nat_tot; a++)
 +    {
 +        if (have[a] == 0)
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: local atom %d, global %d has no global index\n",
 +                    dd->rank,where,a+1,dd->gatindex[a]+1);
 +        }
 +    }
 +    sfree(have);
 +
 +    nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
 +
 +    if (nerr > 0) {
 +        gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
 +                  dd->rank,where,nerr);
 +    }
 +}
 +
 +static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
 +{
 +    int  i;
 +    char *bLocalCG;
 +
 +    if (a_start == 0)
 +    {
 +        /* Clear the whole list without searching */
 +        ga2la_clear(dd->ga2la);
 +    }
 +    else
 +    {
 +        for(i=a_start; i<dd->nat_tot; i++)
 +        {
 +            ga2la_del(dd->ga2la,dd->gatindex[i]);
 +        }
 +    }
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +    if (bLocalCG)
 +    {
 +        for(i=cg_start; i<dd->ncg_tot; i++)
 +        {
 +            bLocalCG[dd->index_gl[i]] = FALSE;
 +        }
 +    }
 +
 +    dd_clear_local_vsite_indices(dd);
 +    
 +    if (dd->constraints)
 +    {
 +        dd_clear_local_constraint_indices(dd);
 +    }
 +}
 +
 +static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
 +{
 +    real grid_jump_limit;
 +
 +    /* The distance between the boundaries of cells at distance
 +     * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
 +     * and by the fact that cells should not be shifted by more than
 +     * half their size, such that cg's only shift by one cell
 +     * at redecomposition.
 +     */
 +    grid_jump_limit = comm->cellsize_limit;
 +    if (!comm->bVacDLBNoLimit)
 +    {
 +        grid_jump_limit = max(grid_jump_limit,
 +                              comm->cutoff/comm->cd[dim_ind].np);
 +    }
 +
 +    return grid_jump_limit;
 +}
 +
 +static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim;
 +    real limit,bfac;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        limit = grid_jump_limit(comm,d);
 +        bfac = ddbox->box_size[dim];
 +        if (ddbox->tric_dir[dim])
 +        {
 +            bfac *= ddbox->skew_fac[dim];
 +        }
 +        if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac <  limit ||
 +            (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
 +                      gmx_step_str(step,buf),
 +                      dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +}
 +
 +static int dd_load_count(gmx_domdec_comm_t *comm)
 +{
 +    return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
 +}
 +
 +static float dd_force_load(gmx_domdec_comm_t *comm)
 +{
 +    float load;
 +    
 +    if (comm->eFlop)
 +    {
 +        load = comm->flop;
 +        if (comm->eFlop > 1)
 +        {
 +            load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
 +        }
 +    } 
 +    else
 +    {
 +        load = comm->cycl[ddCyclF];
 +        if (comm->cycl_n[ddCyclF] > 1)
 +        {
 +            /* Subtract the maximum of the last n cycle counts
 +             * to get rid of possible high counts due to other soures,
 +             * for instance system activity, that would otherwise
 +             * affect the dynamic load balancing.
 +             */
 +            load -= comm->cycl_max[ddCyclF];
 +        }
 +    }
 +    
 +    return load;
 +}
 +
 +static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int i;
 +    
 +    comm = dd->comm;
 +    
 +    snew(*dim_f,dd->nc[dim]+1);
 +    (*dim_f)[0] = 0;
 +    for(i=1; i<dd->nc[dim]; i++)
 +    {
 +        if (comm->slb_frac[dim])
 +        {
 +            (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 +        }
 +        else
 +        {
 +            (*dim_f)[i] = (real)i/(real)dd->nc[dim];
 +        }
 +    }
 +    (*dim_f)[dd->nc[dim]] = 1;
 +}
 +
 +static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
 +{
 +    int        pmeindex,slab,nso,i;
 +    ivec xyz;
 +    
 +    if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
 +    {
 +        ddpme->dim = YY;
 +    }
 +    else
 +    {
 +        ddpme->dim = dimind;
 +    }
 +    ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 +    
 +    ddpme->nslab = (ddpme->dim == 0 ?
 +                    dd->comm->npmenodes_x :
 +                    dd->comm->npmenodes_y);
 +
 +    if (ddpme->nslab <= 1)
 +    {
 +        return;
 +    }
 +
 +    nso = dd->comm->npmenodes/ddpme->nslab;
 +    /* Determine for each PME slab the PP location range for dimension dim */
 +    snew(ddpme->pp_min,ddpme->nslab);
 +    snew(ddpme->pp_max,ddpme->nslab);
 +    for(slab=0; slab<ddpme->nslab; slab++) {
 +        ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 +        ddpme->pp_max[slab] = 0;
 +    }
 +    for(i=0; i<dd->nnodes; i++) {
 +        ddindex2xyz(dd->nc,i,xyz);
 +        /* For y only use our y/z slab.
 +         * This assumes that the PME x grid size matches the DD grid size.
 +         */
 +        if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
 +            pmeindex = ddindex2pmeindex(dd,i);
 +            if (dimind == 0) {
 +                slab = pmeindex/nso;
 +            } else {
 +                slab = pmeindex % ddpme->nslab;
 +            }
 +            ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
 +            ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
 +        }
 +    }
 +
 +    set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
 +}
 +
 +int dd_pme_maxshift_x(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == XX)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +int dd_pme_maxshift_y(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == YY)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
 +    {
 +        return dd->comm->ddpme[1].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
 +                             gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  nc,ns,s;
 +    int  *xmin,*xmax;
 +    real range,pme_boundary;
 +    int  sh;
 +    
 +    comm = dd->comm;
 +    nc  = dd->nc[ddpme->dim];
 +    ns  = ddpme->nslab;
 +    
 +    if (!ddpme->dim_match)
 +    {
 +        /* PP decomposition is not along dim: the worst situation */
 +        sh = ns/2;
 +    }
 +    else if (ns <= 3 || (bUniform && ns == nc))
 +    {
 +        /* The optimal situation */
 +        sh = 1;
 +    }
 +    else
 +    {
 +        /* We need to check for all pme nodes which nodes they
 +         * could possibly need to communicate with.
 +         */
 +        xmin = ddpme->pp_min;
 +        xmax = ddpme->pp_max;
 +        /* Allow for atoms to be maximally 2/3 times the cut-off
 +         * out of their DD cell. This is a reasonable balance between
 +         * between performance and support for most charge-group/cut-off
 +         * combinations.
 +         */
 +        range  = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
 +        /* Avoid extra communication when we are exactly at a boundary */
 +        range *= 0.999;
 +        
 +        sh = 1;
 +        for(s=0; s<ns; s++)
 +        {
 +            /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
 +            pme_boundary = (real)s/ns;
 +            while (sh+1 < ns &&
 +                   ((s-(sh+1) >= 0 &&
 +                     cell_f[xmax[s-(sh+1)   ]+1]     + range > pme_boundary) ||
 +                    (s-(sh+1) <  0 &&
 +                     cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
 +            {
 +                sh++;
 +            }
 +            pme_boundary = (real)(s+1)/ns;
 +            while (sh+1 < ns &&
 +                   ((s+(sh+1) <  ns &&
 +                     cell_f[xmin[s+(sh+1)   ]  ]     - range < pme_boundary) ||
 +                    (s+(sh+1) >= ns &&
 +                     cell_f[xmin[s+(sh+1)-ns]  ] + 1 - range < pme_boundary)))
 +            {
 +                sh++;
 +            }
 +        }
 +    }
 +    
 +    ddpme->maxshift = sh;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"PME slab communication range for dim %d is %d\n",
 +                ddpme->dim,ddpme->maxshift);
 +    }
 +}
 +
 +static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d,dim;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        if (dim < ddbox->nboundeddim &&
 +            ddbox->box_size[dim]*ddbox->skew_fac[dim] <
 +            dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
 +        {
 +            gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
 +                      dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                      dd->nc[dim],dd->comm->cellsize_limit);
 +        }
 +    }
 +}
 +
 +static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                                  gmx_bool bMaster,ivec npulse)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,j;
 +    rvec cellsize_min;
 +    real *cell_x,cell_dx,cellsize;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=0; d<DIM; d++)
 +    {
 +        cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
 +        npulse[d] = 1;
 +        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
 +        {
 +            /* Uniform grid */
 +            cell_dx = ddbox->box_size[d]/dd->nc[d];
 +            if (bMaster)
 +            {
 +                for(j=0; j<dd->nc[d]+1; j++)
 +                {
 +                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
 +                }
 +            }
 +            else
 +            {
 +                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
 +                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
 +            }
 +            cellsize = cell_dx*ddbox->skew_fac[d];
 +            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
 +            {
 +                npulse[d]++;
 +            }
 +            cellsize_min[d] = cellsize;
 +        }
 +        else
 +        {
 +            /* Statically load balanced grid */
 +            /* Also when we are not doing a master distribution we determine
 +             * all cell borders in a loop to obtain identical values
 +             * to the master distribution case and to determine npulse.
 +             */
 +            if (bMaster)
 +            {
 +                cell_x = dd->ma->cell_x[d];
 +            }
 +            else
 +            {
 +                snew(cell_x,dd->nc[d]+1);
 +            }
 +            cell_x[0] = ddbox->box0[d];
 +            for(j=0; j<dd->nc[d]; j++)
 +            {
 +                cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
 +                cell_x[j+1] = cell_x[j] + cell_dx;
 +                cellsize = cell_dx*ddbox->skew_fac[d];
 +                while (cellsize*npulse[d] < comm->cutoff &&
 +                       npulse[d] < dd->nc[d]-1)
 +                {
 +                    npulse[d]++;
 +                }
 +                cellsize_min[d] = min(cellsize_min[d],cellsize);
 +            }
 +            if (!bMaster)
 +            {
 +                comm->cell_x0[d] = cell_x[dd->ci[d]];
 +                comm->cell_x1[d] = cell_x[dd->ci[d]+1];
 +                sfree(cell_x);
 +            }
 +        }
 +        /* The following limitation is to avoid that a cell would receive
 +         * some of its own home charge groups back over the periodic boundary.
 +         * Double charge groups cause trouble with the global indices.
 +         */
 +        if (d < ddbox->npbcdim &&
 +            dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
 +                                 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
 +                                 comm->cutoff,
 +                                 dd->nc[d],dd->nc[d],
 +                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
 +        }
 +    }
 +    
 +    if (!comm->bDynLoadBal)
 +    {
 +        copy_rvec(cellsize_min,comm->cellsize_min);
 +    }
 +   
 +    for(d=0; d<comm->npmedecompdim; d++)
 +    {
 +        set_pme_maxshift(dd,&comm->ddpme[d],
 +                         comm->slb_frac[dd->dim[d]]==NULL,ddbox,
 +                         comm->ddpme[d].slb_dim_f);
 +    }
 +}
 +
 +
 +static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,
 +                                       gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,i,j,nmin,nmin_old;
 +    gmx_bool bLimLo,bLimHi;
 +    real *cell_size;
 +    real fac,halfway,cellsize_limit_f_i,region_size;
 +    gmx_bool bPBC,bLastHi=FALSE;
 +    int nrange[]={range[0],range[1]};
 +
 +    region_size= root->cell_f[range[1]]-root->cell_f[range[0]];  
 +
 +    comm = dd->comm;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    if (debug) 
 +    {
 +        fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
 +    }
 +
 +    /* First we need to check if the scaling does not make cells
 +     * smaller than the smallest allowed size.
 +     * We need to do this iteratively, since if a cell is too small,
 +     * it needs to be enlarged, which makes all the other cells smaller,
 +     * which could in turn make another cell smaller than allowed.
 +     */
 +    for(i=range[0]; i<range[1]; i++)
 +    {
 +        root->bCellMin[i] = FALSE;
 +    }
 +    nmin = 0;
 +    do
 +    {
 +        nmin_old = nmin;
 +        /* We need the total for normalization */
 +        fac = 0;
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                fac += cell_size[i];
 +            }
 +        }
 +        fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
 +        /* Determine the cell boundaries */
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                cell_size[i] *= fac;
 +                if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
 +                {
 +                    cellsize_limit_f_i = 0;
 +                }
 +                else
 +                {
 +                    cellsize_limit_f_i = cellsize_limit_f;
 +                }
 +                if (cell_size[i] < cellsize_limit_f_i)
 +                {
 +                    root->bCellMin[i] = TRUE;
 +                    cell_size[i] = cellsize_limit_f_i;
 +                    nmin++;
 +                }
 +            }
 +            root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
 +        }
 +    }
 +    while (nmin > nmin_old);
 +    
 +    i=range[1]-1;
 +    cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
 +    /* For this check we should not use DD_CELL_MARGIN,
 +     * but a slightly smaller factor,
 +     * since rounding could get use below the limit.
 +     */
 +    if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
 +    {
 +        char buf[22];
 +        gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
 +                  gmx_step_str(step,buf),
 +                  dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                  ncd,comm->cellsize_min[dim]);
 +    }
 +    
 +    root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
 +    
 +    if (!bUniform)
 +    {
 +        /* Check if the boundary did not displace more than halfway
 +         * each of the cells it bounds, as this could cause problems,
 +         * especially when the differences between cell sizes are large.
 +         * If changes are applied, they will not make cells smaller
 +         * than the cut-off, as we check all the boundaries which
 +         * might be affected by a change and if the old state was ok,
 +         * the cells will at most be shrunk back to their old size.
 +         */
 +        for(i=range[0]+1; i<range[1]; i++)
 +        {
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
 +            if (root->cell_f[i] < halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i+1; j<range[1]; j++)
 +                {
 +                    if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
 +                        root->cell_f[j] =  root->cell_f[j-1] + cellsize_limit_f;
 +                }
 +            }
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
 +            if (root->cell_f[i] > halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i-1; j>=range[0]+1; j--)
 +                {
 +                    if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
 +                        root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* nrange is defined as [lower, upper) range for new call to enforce_limits */
 +    /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
 +     * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
 +     * for a and b nrange is used */
 +    if (d > 0)
 +    {
 +        /* Take care of the staggering of the cell boundaries */
 +        if (bUniform)
 +        {
 +            for(i=range[0]; i<range[1]; i++)
 +            {
 +                root->cell_f_max0[i] = root->cell_f[i];
 +                root->cell_f_min1[i] = root->cell_f[i+1];
 +            }
 +        }
 +        else
 +        {
 +            for(i=range[0]+1; i<range[1]; i++)
 +            {
 +                bLimLo = (root->cell_f[i] < root->bound_min[i]);
 +                bLimHi = (root->cell_f[i] > root->bound_max[i]);
 +                if (bLimLo && bLimHi)
 +                {
 +                    /* Both limits violated, try the best we can */
 +                    /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
 +                    root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
 +                    nrange[0]=range[0];
 +                    nrange[1]=i;
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    return;
 +                }
 +                else if (bLimLo)
 +                {
 +                    /* root->cell_f[i] = root->bound_min[i]; */
 +                    nrange[1]=i;  /* only store violation location. There could be a LimLo violation following with an higher index */
 +                    bLastHi=FALSE;
 +                }
 +                else if (bLimHi && !bLastHi)
 +                {
 +                    bLastHi=TRUE;
 +                    if (nrange[1] < range[1])   /* found a LimLo before */
 +                    {
 +                        root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                        dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                        nrange[0]=nrange[1];
 +                    }
 +                    root->cell_f[i] = root->bound_max[i];
 +                    nrange[1]=i; 
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                }
 +            }
 +            if (nrange[1] < range[1])   /* found last a LimLo */
 +            {
 +                root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                nrange[0]=nrange[1];
 +                nrange[1]=range[1];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            } 
 +            else if (nrange[0] > range[0]) /* found at least one LimHi */
 +            {
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                       gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,d1,i,j,pos;
 +    real *cell_size;
 +    real load_aver,load_i,imbalance,change,change_max,sc;
 +    real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
 +    real change_limit;
 +    real relax = 0.5;
 +    gmx_bool bPBC;
 +    int range[] = { 0, 0 };
 +
 +    comm = dd->comm;
 +
 +    /* Convert the maximum change from the input percentage to a fraction */
 +    change_limit = comm->dlb_scale_lim*0.01;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    /* Store the original boundaries */
 +    for(i=0; i<ncd+1; i++)
 +    {
 +        root->old_cell_f[i] = root->cell_f[i];
 +    }
 +    if (bUniform) {
 +        for(i=0; i<ncd; i++)
 +        {
 +            cell_size[i] = 1.0/ncd;
 +        }
 +    }
 +    else if (dd_load_count(comm))
 +    {
 +        load_aver = comm->load[d].sum_m/ncd;
 +        change_max = 0;
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -relax*imbalance;
 +            change_max = max(change_max,max(change,-change));
 +        }
 +        /* Limit the amount of scaling.
 +         * We need to use the same rescaling for all cells in one row,
 +         * otherwise the load balancing might not converge.
 +         */
 +        sc = relax;
 +        if (change_max > change_limit)
 +        {
 +            sc *= change_limit/change_max;
 +        }
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -sc*imbalance;
 +            cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
 +        }
 +    }
 +    
 +    cellsize_limit_f  = comm->cellsize_min[dim]/ddbox->box_size[dim];
 +    cellsize_limit_f *= DD_CELL_MARGIN;
 +    dist_min_f_hard        = grid_jump_limit(comm,d)/ddbox->box_size[dim];
 +    dist_min_f       = dist_min_f_hard * DD_CELL_MARGIN;
 +    if (ddbox->tric_dir[dim])
 +    {
 +        cellsize_limit_f /= ddbox->skew_fac[dim];
 +        dist_min_f       /= ddbox->skew_fac[dim];
 +    }
 +    if (bDynamicBox && d > 0)
 +    {
 +        dist_min_f *= DD_PRES_SCALE_MARGIN;
 +    }
 +    if (d > 0 && !bUniform)
 +    {
 +        /* Make sure that the grid is not shifted too much */
 +        for(i=1; i<ncd; i++) {
 +            if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard) 
 +            {
 +                gmx_incons("Inconsistent DD boundary staggering limits!");
 +            }
 +            root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
 +            if (space > 0) {
 +                root->bound_min[i] += 0.5*space;
 +            }
 +            root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
 +            if (space < 0) {
 +                root->bound_max[i] += 0.5*space;
 +            }
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
 +                        d,i,
 +                        root->cell_f_max0[i-1] + dist_min_f,
 +                        root->bound_min[i],root->cell_f[i],root->bound_max[i],
 +                        root->cell_f_min1[i] - dist_min_f);
 +            }
 +        }
 +    }
 +    range[1]=ncd;
 +    root->cell_f[0] = 0;
 +    root->cell_f[ncd] = 1;
 +    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
 +
 +
 +    /* After the checks above, the cells should obey the cut-off
 +     * restrictions, but it does not hurt to check.
 +     */
 +    for(i=0; i<ncd; i++)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug,"Relative bounds dim %d  cell %d: %f %f\n",
 +                    dim,i,root->cell_f[i],root->cell_f[i+1]);
 +        }
 +
 +        if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
 +            root->cell_f[i+1] - root->cell_f[i] <
 +            cellsize_limit_f/DD_CELL_MARGIN)
 +        {
 +            char buf[22];
 +            fprintf(stderr,
 +                    "\nWARNING step %s: direction %c, cell %d too small: %f\n",
 +                    gmx_step_str(step,buf),dim2char(dim),i,
 +                    (root->cell_f[i+1] - root->cell_f[i])
 +                    *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
 +        }
 +    }
 +    
 +    pos = ncd + 1;
 +    /* Store the cell boundaries of the lower dimensions at the end */
 +    for(d1=0; d1<d; d1++)
 +    {
 +        root->cell_f[pos++] = comm->cell_f0[d1];
 +        root->cell_f[pos++] = comm->cell_f1[d1];
 +    }
 +    
 +    if (d < comm->npmedecompdim)
 +    {
 +        /* The master determines the maximum shift for
 +         * the coordinate communication between separate PME nodes.
 +         */
 +        set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
 +    }
 +    root->cell_f[pos++] = comm->ddpme[0].maxshift;
 +    if (d >= 1)
 +    {
 +        root->cell_f[pos++] = comm->ddpme[1].maxshift;
 +    }
 +}    
 +
 +static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
 +                                             gmx_ddbox_t *ddbox,int dimind)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +
 +    /* Set the cell dimensions */
 +    dim = dd->dim[dimind];
 +    comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
 +    comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
 +    if (dim >= ddbox->nboundeddim)
 +    {
 +        comm->cell_x0[dim] += ddbox->box0[dim];
 +        comm->cell_x1[dim] += ddbox->box0[dim];
 +    }
 +}
 +
 +static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                         int d,int dim,real *cell_f_row,
 +                                         gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d1,dim1,pos;
 +
 +    comm = dd->comm;
 +
 +#ifdef GMX_MPI
 +    /* Each node would only need to know two fractions,
 +     * but it is probably cheaper to broadcast the whole array.
 +     */
 +    MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
 +              0,comm->mpi_comm_load[d]);
 +#endif
 +    /* Copy the fractions for this dimension from the buffer */
 +    comm->cell_f0[d] = cell_f_row[dd->ci[dim]  ];
 +    comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
 +    /* The whole array was communicated, so set the buffer position */
 +    pos = dd->nc[dim] + 1;
 +    for(d1=0; d1<=d; d1++)
 +    {
 +        if (d1 < d)
 +        {
 +            /* Copy the cell fractions of the lower dimensions */
 +            comm->cell_f0[d1] = cell_f_row[pos++];
 +            comm->cell_f1[d1] = cell_f_row[pos++];
 +        }
 +        relative_to_absolute_cell_bounds(dd,ddbox,d1);
 +    }
 +    /* Convert the communicated shift from float to int */
 +    comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    if (d >= 1)
 +    {
 +        comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    }
 +}
 +
 +static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
 +                                         gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                         gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d,dim,d1;
 +    gmx_bool bRowMember,bRowRoot;
 +    real *cell_f_row;
 +    
 +    comm = dd->comm;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        bRowMember = TRUE;
 +        bRowRoot = TRUE;
 +        for(d1=d; d1<dd->ndim; d1++)
 +        {
 +            if (dd->ci[dd->dim[d1]] > 0)
 +            {
 +                if (d1 > d)
 +                {
 +                    bRowMember = FALSE;
 +                }
 +                bRowRoot = FALSE;
 +            }
 +        }
 +        if (bRowMember)
 +        {
 +            if (bRowRoot)
 +            {
 +                set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
 +                                           ddbox,bDynamicBox,bUniform,step);
 +                cell_f_row = comm->root[d]->cell_f;
 +            }
 +            else
 +            {
 +                cell_f_row = comm->cell_f_row;
 +            }
 +            distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
 +        }
 +    }
 +}    
 +
 +static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d;
 +
 +    /* This function assumes the box is static and should therefore
 +     * not be called when the box has changed since the last
 +     * call to dd_partition_system.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        relative_to_absolute_cell_bounds(dd,ddbox,d); 
 +    }
 +}
 +
 +
 +
 +static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                  gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                                  gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +    
 +    if (bDoDLB)
 +    {
 +        wallcycle_start(wcycle,ewcDDCOMMBOUND);
 +        set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
 +        wallcycle_stop(wcycle,ewcDDCOMMBOUND);
 +    }
 +    else if (bDynamicBox)
 +    {
 +        set_dd_cell_sizes_dlb_nochange(dd,ddbox);
 +    }
 +    
 +    /* Set the dimensions for which no DD is used */
 +    for(dim=0; dim<DIM; dim++) {
 +        if (dd->nc[dim] == 1) {
 +            comm->cell_x0[dim] = 0;
 +            comm->cell_x1[dim] = ddbox->box_size[dim];
 +            if (dim >= ddbox->nboundeddim)
 +            {
 +                comm->cell_x0[dim] += ddbox->box0[dim];
 +                comm->cell_x1[dim] += ddbox->box0[dim];
 +            }
 +        }
 +    }
 +}
 +
 +static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
 +{
 +    int d,np,i;
 +    gmx_domdec_comm_dim_t *cd;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &dd->comm->cd[d];
 +        np = npulse[dd->dim[d]];
 +        if (np > cd->np_nalloc)
 +        {
 +            if (debug)
 +            {
 +                fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
 +                        dim2char(dd->dim[d]),np);
 +            }
 +            if (DDMASTER(dd) && cd->np_nalloc > 0)
 +            {
 +                fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
 +            }
 +            srenew(cd->ind,np);
 +            for(i=cd->np_nalloc; i<np; i++)
 +            {
 +                cd->ind[i].index  = NULL;
 +                cd->ind[i].nalloc = 0;
 +            }
 +            cd->np_nalloc = np;
 +        }
 +        cd->np = np;
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes(gmx_domdec_t *dd,
 +                              gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                              gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                              gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec npulse;
 +    
 +    comm = dd->comm;
 +
 +    /* Copy the old cell boundaries for the cg displacement check */
 +    copy_rvec(comm->cell_x0,comm->old_cell_x0);
 +    copy_rvec(comm->cell_x1,comm->old_cell_x1);
 +    
 +    if (comm->bDynLoadBal)
 +    {
 +        if (DDMASTER(dd))
 +        {
 +            check_box_size(dd,ddbox);
 +        }
 +        set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
 +        realloc_comm_ind(dd,npulse);
 +    }
 +    
 +    if (debug)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
 +                    d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
 +        }
 +    }
 +}
 +
 +static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,
 +                                  rvec cell_ns_x0,rvec cell_ns_x1,
 +                                  gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim_ind,dim;
 +    
 +    comm = dd->comm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        
 +        /* Without PBC we don't have restrictions on the outer cells */
 +        if (!(dim >= ddbox->npbcdim && 
 +              (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
 +            comm->bDynLoadBal &&
 +            (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
 +            comm->cellsize_min[dim])
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
 +                      gmx_step_str(step,buf),dim2char(dim),
 +                      comm->cell_x1[dim] - comm->cell_x0[dim],
 +                      ddbox->skew_fac[dim],
 +                      dd->comm->cellsize_min[dim],
 +                      dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +    
 +    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
 +    {
 +        /* Communicate the boundaries and update cell_ns_x0/1 */
 +        dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
 +        if (dd->bGridJump && dd->ndim > 1)
 +        {
 +            check_grid_jump(step,dd,ddbox);
 +        }
 +    }
 +}
 +
 +static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
 +{
 +    if (YY < npbcdim)
 +    {
 +        tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
 +    }
 +    else
 +    {
 +        tcm[YY][XX] = 0;
 +    }
 +    if (ZZ < npbcdim)
 +    {
 +        tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
 +        tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
 +    }
 +    else
 +    {
 +        tcm[ZZ][XX] = 0;
 +        tcm[ZZ][YY] = 0;
 +    }
 +}
 +
 +static void check_screw_box(matrix box)
 +{
 +    /* Mathematical limitation */
 +    if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
 +    {
 +        gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
 +    }
 +    
 +    /* Limitation due to the asymmetry of the eighth shell method */
 +    if (box[ZZ][YY] != 0)
 +    {
 +        gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
 +    }
 +}
 +
 +static void distribute_cg(FILE *fplog,gmx_large_int_t step,
 +                          matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
 +                          gmx_domdec_t *dd)
 +{
 +    gmx_domdec_master_t *ma;
 +    int **tmp_ind=NULL,*tmp_nalloc=NULL;
 +    int  i,icg,j,k,k0,k1,d,npbcdim;
 +    matrix tcm;
 +    rvec box_size,cg_cm;
 +    ivec ind;
 +    real nrcg,inv_ncg,pos_d;
 +    atom_id *cgindex;
 +    gmx_bool bUnbounded,bScrew;
 +
 +    ma = dd->ma;
 +    
 +    if (tmp_ind == NULL)
 +    {
 +        snew(tmp_nalloc,dd->nnodes);
 +        snew(tmp_ind,dd->nnodes);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
 +            snew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +    }
 +    
 +    /* Clear the count */
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->ncg[i] = 0;
 +        ma->nat[i] = 0;
 +    }
 +    
 +    make_tric_corr_matrix(dd->npbcdim,box,tcm);
 +    
 +    cgindex = cgs->index;
 +    
 +    /* Compute the center of geometry for all charge groups */
 +    for(icg=0; icg<cgs->nr; icg++)
 +    {
 +        k0      = cgindex[icg];
 +        k1      = cgindex[icg+1];
 +        nrcg    = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(pos[k0],cg_cm);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cg_cm);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cg_cm,pos[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cg_cm[d] *= inv_ncg;
 +            }
 +        }
 +        /* Put the charge group in the box and determine the cell index */
 +        for(d=DIM-1; d>=0; d--) {
 +            pos_d = cg_cm[d];
 +            if (d < dd->npbcdim)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                if (tric_dir[d] && dd->nc[d] > 1)
 +                {
 +                    /* Use triclinic coordintates for this dimension */
 +                    for(j=d+1; j<DIM; j++)
 +                    {
 +                        pos_d += cg_cm[j]*tcm[j][d];
 +                    }
 +                }
 +                while(pos_d >= box[d][d])
 +                {
 +                    pos_d -= box[d][d];
 +                    rvec_dec(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(pos[k],box[d]);
 +                        if (bScrew)
 +                        {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +                while(pos_d < 0)
 +                {
 +                    pos_d += box[d][d];
 +                    rvec_inc(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(pos[k],box[d]);
 +                        if (bScrew) {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +            }
 +            /* This could be done more efficiently */
 +            ind[d] = 0;
 +            while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
 +            {
 +                ind[d]++;
 +            }
 +        }
 +        i = dd_index(dd->nc,ind);
 +        if (ma->ncg[i] == tmp_nalloc[i])
 +        {
 +            tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
 +            srenew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +        tmp_ind[i][ma->ncg[i]] = icg;
 +        ma->ncg[i]++;
 +        ma->nat[i] += cgindex[icg+1] - cgindex[icg];
 +    }
 +    
 +    k1 = 0;
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->index[i] = k1;
 +        for(k=0; k<ma->ncg[i]; k++)
 +        {
 +            ma->cg[k1++] = tmp_ind[i][k];
 +        }
 +    }
 +    ma->index[dd->nnodes] = k1;
 +    
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        sfree(tmp_ind[i]);
 +    }
 +    sfree(tmp_ind);
 +    sfree(tmp_nalloc);
 +    
 +    if (fplog)
 +    {
 +        char buf[22];
 +        fprintf(fplog,"Charge group distribution at step %s:",
 +                gmx_step_str(step,buf));
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            fprintf(fplog," %d",ma->ncg[i]);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +}
 +
 +static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
 +                                t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
 +                                rvec pos[])
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    ivec npulse;
 +    int  i,cg_gl;
 +    int  *ibuf,buf2[2] = { 0, 0 };
 +    gmx_bool bMaster = DDMASTER(dd);
 +    if (bMaster)
 +    {
 +        ma = dd->ma;
 +        
 +        if (dd->bScrewPBC)
 +        {
 +            check_screw_box(box);
 +        }
 +    
 +        set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
 +    
 +        distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[2*i]   = ma->ncg[i];
 +            ma->ibuf[2*i+1] = ma->nat[i];
 +        }
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    dd_scatter(dd,2*sizeof(int),ibuf,buf2);
 +    
 +    dd->ncg_home = buf2[0];
 +    dd->nat_home = buf2[1];
 +    dd->ncg_tot  = dd->ncg_home;
 +    dd->nat_tot  = dd->nat_home;
 +    if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
 +    {
 +        dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(dd->index_gl,dd->cg_nalloc);
 +        srenew(dd->cgindex,dd->cg_nalloc+1);
 +    }
 +    if (bMaster)
 +    {
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +    }
 +    
 +    dd_scatterv(dd,
 +                DDMASTER(dd) ? ma->ibuf : NULL,
 +                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +                DDMASTER(dd) ? ma->cg : NULL,
 +                dd->ncg_home*sizeof(int),dd->index_gl);
 +    
 +    /* Determine the home charge group sizes */
 +    dd->cgindex[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cg_gl = dd->index_gl[i];
 +        dd->cgindex[i+1] =
 +            dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
 +    }
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Home charge groups:\n");
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            fprintf(debug," %d",dd->index_gl[i]);
 +            if (i % 10 == 9) 
 +                fprintf(debug,"\n");
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static int compact_and_copy_vec_at(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,int vec,
 +                                   rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                for(i=i0; i<i1; i++)
 +                {
 +                    copy_rvec(src[i],src[home_pos++]);
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Copy to the communication buffer */
 +            nrcg = i1 - i0;
 +            pos_vec[m] += 1 + vec*nrcg;
 +            for(i=i0; i<i1; i++)
 +            {
 +                copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
 +            }
 +            pos_vec[m] += (nvec - vec - 1)*nrcg;
 +        }
 +        if (!bCompact)
 +        {
 +            home_pos += i1 - i0;
 +        }
 +        i0 = i1;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_and_copy_vec_cg(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +    
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                copy_rvec(src[icg],src[home_pos++]);
 +            }
 +        }
 +        else
 +        {
 +            nrcg = i1 - i0;
 +            /* Copy to the communication buffer */
 +            copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
 +            pos_vec[m] += 1 + nrcg*nvec;
 +        }
 +        i0 = i1;
 +    }
 +    if (!bCompact)
 +    {
 +        home_pos = ncg;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_ind(int ncg,int *move,
 +                       int *index_gl,int *cgindex,
 +                       int *gatindex,
 +                       gmx_ga2la_t ga2la,char *bLocalCG,
 +                       int *cginfo)
 +{
 +    int cg,nat,a0,a1,a,a_gl;
 +    int home_pos;
 +
 +    home_pos = 0;
 +    nat = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        a0 = cgindex[cg];
 +        a1 = cgindex[cg+1];
 +        if (move[cg] == -1)
 +        {
 +            /* Compact the home arrays in place.
 +             * Anything that can be done here avoids access to global arrays.
 +             */
 +            cgindex[home_pos] = nat;
 +            for(a=a0; a<a1; a++)
 +            {
 +                a_gl = gatindex[a];
 +                gatindex[nat] = a_gl;
 +                /* The cell number stays 0, so we don't need to set it */
 +                ga2la_change_la(ga2la,a_gl,nat);
 +                nat++;
 +            }
 +            index_gl[home_pos] = index_gl[cg];
 +            cginfo[home_pos]   = cginfo[cg];
 +            /* The charge group remains local, so bLocalCG does not change */
 +            home_pos++;
 +        }
 +        else
 +        {
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +        }
 +    }
 +    cgindex[home_pos] = nat;
 +    
 +    return home_pos;
 +}
 +
 +static void clear_and_mark_ind(int ncg,int *move,
 +                               int *index_gl,int *cgindex,int *gatindex,
 +                               gmx_ga2la_t ga2la,char *bLocalCG,
 +                               int *cell_index)
 +{
 +    int cg,a0,a1,a;
 +    
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        if (move[cg] >= 0)
 +        {
 +            a0 = cgindex[cg];
 +            a1 = cgindex[cg+1];
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +            /* Signal that this cg has moved using the ns cell index.
 +             * Here we set it to -1.
 +             * fill_grid will change it from -1 to 4*grid->ncells.
 +             */
 +            cell_index[cg] = -1;
 +        }
 +    }
 +}
 +
 +static void print_cg_move(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    gmx_domdec_comm_t *comm;
 +    char buf[22];
 +
 +    comm = dd->comm;
 +
 +    fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
 +    if (bHaveLimitdAndCMOld)
 +    {
-                  * We to a separate check if a charge did not move too far.
++        fprintf(fplog,"The charge group starting at atom %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
 +    }
 +    else
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
 +    }
 +    fprintf(fplog,"distance out of cell %f\n",
 +            dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
 +                cm_old[XX],cm_old[YY],cm_old[ZZ]);
 +    }
 +    fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
 +            cm_new[XX],cm_new[YY],cm_new[ZZ]);
 +    fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
 +    fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->cell_x0[dim],comm->cell_x1[dim]);
 +}
 +
 +static void cg_move_error(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    if (fplog)
 +    {
 +        print_cg_move(fplog, dd,step,cg,dim,dir,
 +                      bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    }
 +    print_cg_move(stderr,dd,step,cg,dim,dir,
 +                  bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    gmx_fatal(FARGS,
 +              "A charge group moved too far between two domain decomposition steps\n"
 +              "This usually means that your system is not well equilibrated");
 +}
 +
 +static void rotate_state_atom(t_state *state,int a)
 +{
 +    int est;
 +
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est))) {
 +            switch (est) {
 +            case estX:
 +                /* Rotate the complete state; for a rectangular box only */
 +                state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
 +                state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
 +                break;
 +            case estV:
 +                state->v[a][YY] = -state->v[a][YY];
 +                state->v[a][ZZ] = -state->v[a][ZZ];
 +                break;
 +            case estSDX:
 +                state->sd_X[a][YY] = -state->sd_X[a][YY];
 +                state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
 +                break;
 +            case estCGP:
 +                state->cg_p[a][YY] = -state->cg_p[a][YY];
 +                state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* These are distances, so not affected by rotation */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in rotate_state_atom");            
 +            }
 +        }
 +    }
 +}
 +
 +static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,ivec tric_dir,
 +                              t_state *state,rvec **f,
 +                              t_forcerec *fr,t_mdatoms *md,
 +                              gmx_bool bCompact,
 +                              t_nrnb *nrnb)
 +{
 +    int  *move;
 +    int  npbcdim;
 +    int  ncg[DIM*2],nat[DIM*2];
 +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
 +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
 +    int  sbuf[2],rbuf[2];
 +    int  home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
 +    int  flag;
 +    gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
 +    gmx_bool bScrew;
 +    ivec dev;
 +    real inv_ncg,pos_d;
 +    matrix tcm;
 +    rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
 +    atom_id *cgindex;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_domdec_comm_t *comm;
 +    
 +    if (dd->bScrewPBC)
 +    {
 +        check_screw_box(state->box);
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +    
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i))
 +        {
 +            switch (i)
 +            {
 +            case estX:   /* Always present */            break;
 +            case estV:   bV   = (state->flags & (1<<i)); break;
 +            case estSDX: bSDX = (state->flags & (1<<i)); break;
 +            case estCGP: bCGP = (state->flags & (1<<i)); break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No processing required */
 +                break;
 +            default:
 +            gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
 +            }
 +        }
 +    }
 +    
 +    if (dd->ncg_tot > comm->nalloc_int)
 +    {
 +        comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
 +        srenew(comm->buf_int,comm->nalloc_int);
 +    }
 +    move = comm->buf_int;
 +    
 +    /* Clear the count */
 +    for(c=0; c<dd->ndim*2; c++)
 +    {
 +        ncg[c] = 0;
 +        nat[c] = 0;
 +    }
 +
 +    npbcdim = dd->npbcdim;
 +
 +    for(d=0; (d<DIM); d++)
 +    {
 +        limitd[d] = dd->comm->cellsize_min[d];
 +        if (d >= npbcdim && dd->ci[d] == 0)
 +        {
 +            cell_x0[d] = -GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x0[d] = comm->cell_x0[d];
 +        }
 +        if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
 +        {
 +            cell_x1[d] = GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x1[d] = comm->cell_x1[d];
 +        }
 +        if (d < npbcdim)
 +        {
 +            limit0[d] = comm->old_cell_x0[d] - limitd[d];
 +            limit1[d] = comm->old_cell_x1[d] + limitd[d];
 +        }
 +        else
 +        {
 +            /* We check after communication if a charge group moved
 +             * more than one cell. Set the pre-comm check limit to float_max.
 +             */
 +            limit0[d] = -GMX_FLOAT_MAX;
 +            limit1[d] =  GMX_FLOAT_MAX;
 +        }
 +    }
 +    
 +    make_tric_corr_matrix(npbcdim,state->box,tcm);
 +    
 +    cgindex = dd->cgindex;
 +    
 +    /* Compute the center of geometry for all home charge groups
 +     * and put them in the box and determine where they should go.
 +     */
 +    for(cg=0; cg<dd->ncg_home; cg++)
 +    {
 +        k0   = cgindex[cg];
 +        k1   = cgindex[cg+1];
 +        nrcg = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(state->x[k0],cm_new);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cm_new);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cm_new,state->x[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cm_new[d] = inv_ncg*cm_new[d];
 +            }
 +        }
 +        
 +        clear_ivec(dev);
 +        /* Do pbc and check DD cell boundary crossings */
 +        for(d=DIM-1; d>=0; d--)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                /* Determine the location of this cg in lattice coordinates */
 +                pos_d = cm_new[d];
 +                if (tric_dir[d])
 +                {
 +                    for(d2=d+1; d2<DIM; d2++)
 +                    {
 +                        pos_d += cm_new[d2]*tcm[d2][d];
 +                    }
 +                }
 +                /* Put the charge group in the triclinic unit-cell */
 +                if (pos_d >= cell_x1[d])
 +                {
 +                    if (pos_d >= limit1[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = 1;
 +                    if (dd->ci[d] == dd->nc[d] - 1)
 +                    {
 +                        rvec_dec(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_dec(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +                else if (pos_d < cell_x0[d])
 +                {
 +                    if (pos_d < limit0[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = -1;
 +                    if (dd->ci[d] == 0)
 +                    {
 +                        rvec_inc(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_inc(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            else if (d < npbcdim)
 +            {
 +                /* Put the charge group in the rectangular unit-cell */
 +                while (cm_new[d] >= state->box[d][d])
 +                {
 +                    rvec_dec(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(state->x[k],state->box[d]);
 +                    }
 +                }
 +                while (cm_new[d] < 0)
 +                {
 +                    rvec_inc(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(state->x[k],state->box[d]);
 +                    }
 +                }
 +            }
 +        }
 +    
 +        copy_rvec(cm_new,cg_cm[cg]);
 +        
 +        /* Determine where this cg should go */
 +        flag = 0;
 +        mc = -1;
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            dim = dd->dim[d];
 +            if (dev[dim] == 1)
 +            {
 +                flag |= DD_FLAG_FW(d);
 +                if (mc == -1)
 +                {
 +                    mc = d*2;
 +                }
 +            }
 +            else if (dev[dim] == -1)
 +            {
 +                flag |= DD_FLAG_BW(d);
 +                if (mc == -1) {
 +                    if (dd->nc[dim] > 2)
 +                    {
 +                        mc = d*2 + 1;
 +                    }
 +                    else
 +                    {
 +                        mc = d*2;
 +                    }
 +                }
 +            }
 +        }
 +        move[cg] = mc;
 +        if (mc >= 0)
 +        {
 +            if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +            {
 +                comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +            }
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
 +            /* We store the cg size in the lower 16 bits
 +             * and the place where the charge group should go
 +             * in the next 6 bits. This saves some communication volume.
 +             */
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
 +            ncg[mc] += 1;
 +            nat[mc] += nrcg;
 +        }
 +    }
 +    
 +    inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +    inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
 +    
 +    nvec = 1;
 +    if (bV)
 +    {
 +        nvec++;
 +    }
 +    if (bSDX)
 +    {
 +        nvec++;
 +    }
 +    if (bCGP)
 +    {
 +        nvec++;
 +    }
 +    
 +    /* Make sure the communication buffers are large enough */
 +    for(mc=0; mc<dd->ndim*2; mc++)
 +    {
 +        nvr = ncg[mc] + nat[mc]*nvec;
 +        if (nvr > comm->cgcm_state_nalloc[mc])
 +        {
 +            comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
 +            srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +        }
 +    }
 +    
 +    /* Recalculating cg_cm might be cheaper than communicating,
 +     * but that could give rise to rounding issues.
 +     */
 +    home_pos_cg =
 +        compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
 +                                nvec,cg_cm,comm,bCompact);
 +    
 +    vec = 0;
 +    home_pos_at =
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->x,comm,bCompact);
 +    if (bV)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->v,comm,bCompact);
 +    }
 +    if (bSDX)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->sd_X,comm,bCompact);
 +    }
 +    if (bCGP)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->cg_p,comm,bCompact);
 +    }
 +    
 +    if (bCompact)
 +    {
 +        compact_ind(dd->ncg_home,move,
 +                    dd->index_gl,dd->cgindex,dd->gatindex,
 +                    dd->ga2la,comm->bLocalCG,
 +                    fr->cginfo);
 +    }
 +    else
 +    {
 +        clear_and_mark_ind(dd->ncg_home,move,
 +                           dd->index_gl,dd->cgindex,dd->gatindex,
 +                           dd->ga2la,comm->bLocalCG,
 +                           fr->ns.grid->cell_index);
 +    }
 +    
 +    cginfo_mb = fr->cginfo_mb;
 +
 +    ncg_stay_home = home_pos_cg;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        ncg_recv = 0;
 +        nat_recv = 0;
 +        nvr      = 0;
 +        for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
 +        {
 +            cdd = d*2 + dir;
 +            /* Communicate the cg and atom counts */
 +            sbuf[0] = ncg[cdd];
 +            sbuf[1] = nat[cdd];
 +            if (debug)
 +            {
 +                fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
 +                        d,dir,sbuf[0],sbuf[1]);
 +            }
 +            dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
 +            
 +            if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
 +            {
 +                comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
 +                srenew(comm->buf_int,comm->nalloc_int);
 +            }
 +            
 +            /* Communicate the charge group indices, sizes and flags */
 +            dd_sendrecv_int(dd, d, dir,
 +                            comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
 +                            comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
 +            
 +            nvs = ncg[cdd] + nat[cdd]*nvec;
 +            i   = rbuf[0]  + rbuf[1] *nvec;
 +            vec_rvec_check_alloc(&comm->vbuf,nvr+i);
 +            
 +            /* Communicate cgcm and state */
 +            dd_sendrecv_rvec(dd, d, dir,
 +                             comm->cgcm_state[cdd], nvs,
 +                             comm->vbuf.v+nvr, i);
 +            ncg_recv += rbuf[0];
 +            nat_recv += rbuf[1];
 +            nvr      += i;
 +        }
 +        
 +        /* Process the received charge groups */
 +        buf_pos = 0;
 +        for(cg=0; cg<ncg_recv; cg++)
 +        {
 +            flag = comm->buf_int[cg*DD_CGIBS+1];
 +
 +            if (dim >= npbcdim && dd->nc[dim] > 2)
 +            {
 +                /* No pbc in this dim and more than one domain boundary.
-                      comm->vbuf.v[buf_pos][d] > cell_x1[dim]) ||
++                 * We do a separate check if a charge group didn't move too far.
 +                 */
 +                if (((flag & DD_FLAG_FW(d)) &&
-                      comm->vbuf.v[buf_pos][d] < cell_x0[dim]))
++                     comm->vbuf.v[buf_pos][dim] > cell_x1[dim]) ||
 +                    ((flag & DD_FLAG_BW(d)) &&
-                     cg_move_error(fplog,dd,step,cg,d,
++                     comm->vbuf.v[buf_pos][dim] < cell_x0[dim]))
 +                {
-                                    comm->vbuf.v[buf_pos][d]);
++                    cg_move_error(fplog,dd,step,cg,dim,
 +                                  (flag & DD_FLAG_FW(d)) ? 1 : 0,
 +                                   FALSE,0,
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos],
++                                   comm->vbuf.v[buf_pos][dim]);
 +                }
 +            }
 +
 +            mc = -1;
 +            if (d < dd->ndim-1)
 +            {
 +                /* Check which direction this cg should go */
 +                for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
 +                {
 +                    if (dd->bGridJump)
 +                    {
 +                        /* The cell boundaries for dimension d2 are not equal
 +                         * for each cell row of the lower dimension(s),
 +                         * therefore we might need to redetermine where
 +                         * this cg should go.
 +                         */
 +                        dim2 = dd->dim[d2];
 +                        /* If this cg crosses the box boundary in dimension d2
 +                         * we can use the communicated flag, so we do not
 +                         * have to worry about pbc.
 +                         */
 +                        if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
 +                               (flag & DD_FLAG_FW(d2))) ||
 +                              (dd->ci[dim2] == 0 &&
 +                               (flag & DD_FLAG_BW(d2)))))
 +                        {
 +                            /* Clear the two flags for this dimension */
 +                            flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
 +                            /* Determine the location of this cg
 +                             * in lattice coordinates
 +                             */
 +                            pos_d = comm->vbuf.v[buf_pos][dim2];
 +                            if (tric_dir[dim2])
 +                            {
 +                                for(d3=dim2+1; d3<DIM; d3++)
 +                                {
 +                                    pos_d +=
 +                                        comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
 +                                }
 +                            }
 +                            /* Check of we are not at the box edge.
 +                             * pbc is only handled in the first step above,
 +                             * but this check could move over pbc while
 +                             * the first step did not due to different rounding.
 +                             */
 +                            if (pos_d >= cell_x1[dim2] &&
 +                                dd->ci[dim2] != dd->nc[dim2]-1)
 +                            {
 +                                flag |= DD_FLAG_FW(d2);
 +                            }
 +                            else if (pos_d < cell_x0[dim2] &&
 +                                     dd->ci[dim2] != 0)
 +                            {
 +                                flag |= DD_FLAG_BW(d2);
 +                            }
 +                            comm->buf_int[cg*DD_CGIBS+1] = flag;
 +                        }
 +                    }
 +                    /* Set to which neighboring cell this cg should go */
 +                    if (flag & DD_FLAG_FW(d2))
 +                    {
 +                        mc = d2*2;
 +                    }
 +                    else if (flag & DD_FLAG_BW(d2))
 +                    {
 +                        if (dd->nc[dd->dim[d2]] > 2)
 +                        {
 +                            mc = d2*2+1;
 +                        }
 +                        else
 +                        {
 +                            mc = d2*2;
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            nrcg = flag & DD_FLAG_NRCG;
 +            if (mc == -1)
 +            {
 +                if (home_pos_cg+1 > dd->cg_nalloc)
 +                {
 +                    dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
 +                    srenew(dd->index_gl,dd->cg_nalloc);
 +                    srenew(dd->cgindex,dd->cg_nalloc+1);
 +                }
 +                /* Set the global charge group index and size */
 +                dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
 +                dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
 +                /* Copy the state from the buffer */
 +                if (home_pos_cg >= fr->cg_nalloc)
 +                {
 +                    dd_realloc_fr_cg(fr,home_pos_cg+1);
 +                    cg_cm = fr->cg_cm;
 +                }
 +                copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
 +                /* Set the cginfo */
 +                fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
 +                                                   dd->index_gl[home_pos_cg]);
 +                if (comm->bLocalCG)
 +                {
 +                    comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
 +                }
 +
 +                if (home_pos_at+nrcg > state->nalloc)
 +                {
 +                    dd_realloc_state(state,f,home_pos_at+nrcg);
 +                }
 +                for(i=0; i<nrcg; i++)
 +                {
 +                    copy_rvec(comm->vbuf.v[buf_pos++],
 +                              state->x[home_pos_at+i]);
 +                }
 +                if (bV)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->v[home_pos_at+i]);
 +                    }
 +                }
 +                if (bSDX)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->sd_X[home_pos_at+i]);
 +                    }
 +                }
 +                if (bCGP)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->cg_p[home_pos_at+i]);
 +                    }
 +                }
 +                home_pos_cg += 1;
 +                home_pos_at += nrcg;
 +            }
 +            else
 +            {
 +                /* Reallocate the buffers if necessary  */
 +                if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +                {
 +                    comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                    srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +                }
 +                nvr = ncg[mc] + nat[mc]*nvec;
 +                if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
 +                {
 +                    comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
 +                    srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +                }
 +                /* Copy from the receive to the send buffers */
 +                memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
 +                       comm->buf_int + cg*DD_CGIBS,
 +                       DD_CGIBS*sizeof(int));
 +                memcpy(comm->cgcm_state[mc][nvr],
 +                       comm->vbuf.v[buf_pos],
 +                       (1+nrcg*nvec)*sizeof(rvec));
 +                buf_pos += 1 + nrcg*nvec;
 +                ncg[mc] += 1;
 +                nat[mc] += nrcg;
 +            }
 +        }
 +    }
 +    
 +    /* With sorting (!bCompact) the indices are now only partially up to date
 +     * and ncg_home and nat_home are not the real count, since there are
 +     * "holes" in the arrays for the charge groups that moved to neighbors.
 +     */
 +    dd->ncg_home = home_pos_cg;
 +    dd->nat_home = home_pos_at;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished repartitioning\n");
 +    }
 +
 +    return ncg_stay_home;
 +}
 +
 +void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
 +{
 +    dd->comm->cycl[ddCycl] += cycles;
 +    dd->comm->cycl_n[ddCycl]++;
 +    if (cycles > dd->comm->cycl_max[ddCycl])
 +    {
 +        dd->comm->cycl_max[ddCycl] = cycles;
 +    }
 +}
 +
 +static double force_flop_count(t_nrnb *nrnb)
 +{
 +    int i;
 +    double sum;
 +    const char *name;
 +
 +    sum = 0;
 +    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
 +    {
 +        /* To get closer to the real timings, we half the count
 +         * for the normal loops and again half it for water loops.
 +         */
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        {
 +            sum += nrnb->n[i]*0.25*cost_nrnb(i);
 +        }
 +        else
 +        {
 +            sum += nrnb->n[i]*0.50*cost_nrnb(i);
 +        }
 +    }
 +    for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
 +    {
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +    for(i=eNR_BONDS; i<=eNR_WALLS; i++)
 +    {
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +
 +    return sum;
 +}
 +
 +void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop -= force_flop_count(nrnb);
 +    }
 +}
 +void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop += force_flop_count(nrnb);
 +        dd->comm->flop_n++;
 +    }
 +}  
 +
 +static void clear_dd_cycle_counts(gmx_domdec_t *dd)
 +{
 +    int i;
 +    
 +    for(i=0; i<ddCyclNr; i++)
 +    {
 +        dd->comm->cycl[i] = 0;
 +        dd->comm->cycl_n[i] = 0;
 +        dd->comm->cycl_max[i] = 0;
 +    }
 +    dd->comm->flop = 0;
 +    dd->comm->flop_n = 0;
 +}
 +
 +static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_load_t *load;
 +    gmx_domdec_root_t *root=NULL;
 +    int  d,dim,cid,i,pos;
 +    float cell_frac=0,sbuf[DD_NLOAD_MAX];
 +    gmx_bool bSepPME;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution start\n");
 +    }
 +
 +    wallcycle_start(wcycle,ewcDDCOMMLOAD);
 +    
 +    comm = dd->comm;
 +    
 +    bSepPME = (dd->pme_nodeid >= 0);
 +    
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        dim = dd->dim[d];
 +        /* Check if we participate in the communication in this dimension */
 +        if (d == dd->ndim-1 || 
 +            (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
 +        {
 +            load = &comm->load[d];
 +            if (dd->bGridJump)
 +            {
 +                cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
 +            }
 +            pos = 0;
 +            if (d == dd->ndim-1)
 +            {
 +                sbuf[pos++] = dd_force_load(comm);
 +                sbuf[pos++] = sbuf[0];
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = sbuf[0];
 +                    sbuf[pos++] = cell_frac;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
 +                    sbuf[pos++] = comm->cycl[ddCyclPME];
 +                }
 +            }
 +            else
 +            {
 +                sbuf[pos++] = comm->load[d+1].sum;
 +                sbuf[pos++] = comm->load[d+1].max;
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].sum_m;
 +                    sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
 +                    sbuf[pos++] = comm->load[d+1].flags;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].mdf;
 +                    sbuf[pos++] = comm->load[d+1].pme;
 +                }
 +            }
 +            load->nload = pos;
 +            /* Communicate a row in DD direction d.
 +             * The communicators are setup such that the root always has rank 0.
 +             */
 +#ifdef GMX_MPI
 +            MPI_Gather(sbuf      ,load->nload*sizeof(float),MPI_BYTE,
 +                       load->load,load->nload*sizeof(float),MPI_BYTE,
 +                       0,comm->mpi_comm_load[d]);
 +#endif
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* We are the root, process this row */
 +                if (comm->bDynLoadBal)
 +                {
 +                    root = comm->root[d];
 +                }
 +                load->sum = 0;
 +                load->max = 0;
 +                load->sum_m = 0;
 +                load->cvol_min = 1;
 +                load->flags = 0;
 +                load->mdf = 0;
 +                load->pme = 0;
 +                pos = 0;
 +                for(i=0; i<dd->nc[dim]; i++)
 +                {
 +                    load->sum += load->load[pos++];
 +                    load->max = max(load->max,load->load[pos]);
 +                    pos++;
 +                    if (dd->bGridJump)
 +                    {
 +                        if (root->bLimited)
 +                        {
 +                            /* This direction could not be load balanced properly,
 +                             * therefore we need to use the maximum iso the average load.
 +                             */
 +                            load->sum_m = max(load->sum_m,load->load[pos]);
 +                        }
 +                        else
 +                        {
 +                            load->sum_m += load->load[pos];
 +                        }
 +                        pos++;
 +                        load->cvol_min = min(load->cvol_min,load->load[pos]);
 +                        pos++;
 +                        if (d < dd->ndim-1)
 +                        {
 +                            load->flags = (int)(load->load[pos++] + 0.5);
 +                        }
 +                        if (d > 0)
 +                        {
 +                            root->cell_f_max0[i] = load->load[pos++];
 +                            root->cell_f_min1[i] = load->load[pos++];
 +                        }
 +                    }
 +                    if (bSepPME)
 +                    {
 +                        load->mdf = max(load->mdf,load->load[pos]);
 +                        pos++;
 +                        load->pme = max(load->pme,load->load[pos]);
 +                        pos++;
 +                    }
 +                }
 +                if (comm->bDynLoadBal && root->bLimited)
 +                {
 +                    load->sum_m *= dd->nc[dim];
 +                    load->flags |= (1<<d);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        comm->nload      += dd_load_count(comm);
 +        comm->load_step  += comm->cycl[ddCyclStep];
 +        comm->load_sum   += comm->load[0].sum;
 +        comm->load_max   += comm->load[0].max;
 +        if (comm->bDynLoadBal)
 +        {
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                if (comm->load[0].flags & (1<<d))
 +                {
 +                    comm->load_lim[d]++;
 +                }
 +            }
 +        }
 +        if (bSepPME)
 +        {
 +            comm->load_mdf += comm->load[0].mdf;
 +            comm->load_pme += comm->load[0].pme;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcDDCOMMLOAD);
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution finished\n");
 +    }
 +}
 +
 +static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
 +{
 +    /* Return the relative performance loss on the total run time
 +     * due to the force calculation load imbalance.
 +     */
 +    if (dd->comm->nload > 0)
 +    {
 +        return
 +            (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
 +            (dd->comm->load_step*dd->nnodes);
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    char  buf[STRLEN];
 +    int   npp,npme,nnodes,d,limp;
 +    float imbal,pme_f_ratio,lossf,lossp=0;
 +    gmx_bool  bLim;
 +    gmx_domdec_comm_t *comm;
 +
 +    comm = dd->comm;
 +    if (DDMASTER(dd) && comm->nload > 0)
 +    {
 +        npp    = dd->nnodes;
 +        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
 +        nnodes = npp + npme;
 +        imbal = comm->load_max*npp/comm->load_sum - 1;
 +        lossf = dd_force_imb_perf_loss(dd);
 +        sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"%s",buf);
 +        sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"%s",buf);
 +        bLim = FALSE;
 +        if (comm->bDynLoadBal)
 +        {
 +            sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
 +                sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
 +                if (limp >= 50)
 +                {
 +                    bLim = TRUE;
 +                }
 +            }
 +            sprintf(buf+strlen(buf),"\n");
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        if (npme > 0)
 +        {
 +            pme_f_ratio = comm->load_pme/comm->load_mdf;
 +            lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
 +            if (lossp <= 0)
 +            {
 +                lossp *= (float)npme/(float)nnodes;
 +            }
 +            else
 +            {
 +                lossp *= (float)npp/(float)nnodes;
 +            }
 +            sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +            sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(stderr,"\n");
 +        
 +        if (lossf >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost due to load imbalance\n"
 +                    "      in the domain decomposition.\n",lossf*100);
 +            if (!comm->bDynLoadBal)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to use dynamic load balancing (option -dlb.)\n");
 +            }
 +            else if (bLim)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
 +            }
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
 +                    "      had %s work to do than the PP nodes.\n"
 +                    "      You might want to %s the number of PME nodes\n"
 +                    "      or %s the cut-off and the grid spacing.\n",
 +                    fabs(lossp*100),
 +                    (lossp < 0) ? "less"     : "more",
 +                    (lossp < 0) ? "decrease" : "increase",
 +                    (lossp < 0) ? "decrease" : "increase");
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +    }
 +}
 +
 +static float dd_vol_min(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].cvol_min*dd->nnodes;
 +}
 +
 +static gmx_bool dd_load_flags(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].flags;
 +}
 +
 +static float dd_f_imbal(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
 +}
 +
 +static float dd_pme_f_ratio(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].pme/dd->comm->load[0].mdf;
 +}
 +
 +static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
 +{
 +    int flags,d;
 +    char buf[22];
 +    
 +    flags = dd_load_flags(dd);
 +    if (flags)
 +    {
 +        fprintf(fplog,
 +                "DD  load balancing is limited by minimum cell size in dimension");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (flags & (1<<d))
 +            {
 +                fprintf(fplog," %c",dim2char(dd->dim[d]));
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    fprintf(fplog,"DD  step %s",gmx_step_str(step,buf));
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(fplog,"  vol min/aver %5.3f%c",
 +                dd_vol_min(dd),flags ? '!' : ' ');
 +    }
 +    fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(fplog,"  pme mesh/force %5.3f",dd_pme_f_ratio(dd));
 +    }
 +    fprintf(fplog,"\n\n");
 +}
 +
 +static void dd_print_load_verbose(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(stderr,"vol %4.2f%c ",
 +                dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
 +    }
 +    fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void make_load_communicator(gmx_domdec_t *dd, int dim_ind,ivec loc)
 +{
 +    MPI_Comm  c_row;
 +    int  dim, i, rank;
 +    ivec loc_c;
 +    gmx_domdec_root_t *root;
 +    gmx_bool bPartOfGroup = FALSE;
 +    
 +    dim = dd->dim[dim_ind];
 +    copy_ivec(loc,loc_c);
 +    for(i=0; i<dd->nc[dim]; i++)
 +    {
 +        loc_c[dim] = i;
 +        rank = dd_index(dd->nc,loc_c);
 +        if (rank == dd->rank)
 +        {
 +            /* This process is part of the group */
 +            bPartOfGroup = TRUE;
 +        }
 +    }
 +    MPI_Comm_split(dd->mpi_comm_all, bPartOfGroup?0:MPI_UNDEFINED, dd->rank,
 +                   &c_row);
 +    if (bPartOfGroup)
 +    {
 +        dd->comm->mpi_comm_load[dim_ind] = c_row;
 +        if (dd->comm->eDLB != edlbNO)
 +        {
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* This is the root process of this row */
 +                snew(dd->comm->root[dim_ind],1);
 +                root = dd->comm->root[dim_ind];
 +                snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
 +                snew(root->old_cell_f,dd->nc[dim]+1);
 +                snew(root->bCellMin,dd->nc[dim]);
 +                if (dim_ind > 0)
 +                {
 +                    snew(root->cell_f_max0,dd->nc[dim]);
 +                    snew(root->cell_f_min1,dd->nc[dim]);
 +                    snew(root->bound_min,dd->nc[dim]);
 +                    snew(root->bound_max,dd->nc[dim]);
 +                }
 +                snew(root->buf_ncd,dd->nc[dim]);
 +            }
 +            else
 +            {
 +                /* This is not a root process, we only need to receive cell_f */
 +                snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
 +            }
 +        }
 +        if (dd->ci[dim] == dd->master_ci[dim])
 +        {
 +            snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
 +        }
 +    }
 +}
 +#endif
 +
 +static void make_load_communicators(gmx_domdec_t *dd)
 +{
 +#ifdef GMX_MPI
 +  int  dim0,dim1,i,j;
 +  ivec loc;
 +
 +  if (debug)
 +    fprintf(debug,"Making load communicators\n");
 +
 +  snew(dd->comm->load,dd->ndim);
 +  snew(dd->comm->mpi_comm_load,dd->ndim);
 +  
 +  clear_ivec(loc);
 +  make_load_communicator(dd,0,loc);
 +  if (dd->ndim > 1) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      make_load_communicator(dd,1,loc);
 +    }
 +  }
 +  if (dd->ndim > 2) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      dim1 = dd->dim[1];
 +      for(j=0; j<dd->nc[dim1]; j++) {
 +        loc[dim1] = j;
 +        make_load_communicator(dd,2,loc);
 +      }
 +    }
 +  }
 +
 +  if (debug)
 +    fprintf(debug,"Finished making load communicators\n");
 +#endif
 +}
 +
 +void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    gmx_bool bZYX;
 +    int  d,dim,i,j,m;
 +    ivec tmp,s;
 +    int  nzone,nzonep;
 +    ivec dd_zp[DD_MAXIZONE];
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_ns_ranges_t *izone;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
 +        dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
 +        dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
 +        if (debug)
 +        {
 +            fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
 +                    dd->rank,dim,
 +                    dd->neighbor[d][0],
 +                    dd->neighbor[d][1]);
 +        }
 +    }
 +    
 +    if (DDMASTER(dd))
 +    {
 +        fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
 +          dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
 +                dd->ndim,
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
 +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    switch (dd->ndim)
 +    {
 +    case 3:
 +        nzone  = dd_z3n;
 +        nzonep = dd_zp3n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp3[i],dd_zp[i]);
 +        }
 +        break;
 +    case 2:
 +        nzone  = dd_z2n;
 +        nzonep = dd_zp2n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp2[i],dd_zp[i]);
 +        }
 +        break;
 +    case 1:
 +        nzone  = dd_z1n;
 +        nzonep = dd_zp1n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp1[i],dd_zp[i]);
 +        }
 +        break;
 +    default:
 +        gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
 +        nzone = 0;
 +        nzonep = 0;
 +    }
 +
 +    zones = &dd->comm->zones;
 +
 +    for(i=0; i<nzone; i++)
 +    {
 +        m = 0;
 +        clear_ivec(zones->shift[i]);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
 +        }
 +    }
 +    
 +    zones->n = nzone;
 +    for(i=0; i<nzone; i++)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            s[d] = dd->ci[d] - zones->shift[i][d];
 +            if (s[d] < 0)
 +            {
 +                s[d] += dd->nc[d];
 +            }
 +            else if (s[d] >= dd->nc[d])
 +            {
 +                s[d] -= dd->nc[d];
 +            }
 +        }
 +    }
 +    zones->nizone = nzonep;
 +    for(i=0; i<zones->nizone; i++)
 +    {
 +        if (dd_zp[i][0] != i)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
 +        }
 +        izone = &zones->izone[i];
 +        izone->j0 = dd_zp[i][1];
 +        izone->j1 = dd_zp[i][2];
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] == 1)
 +            {
 +                /* All shifts should be allowed */
 +                izone->shift0[dim] = -1;
 +                izone->shift1[dim] = 1;
 +            }
 +            else
 +            {
 +                /*
 +                  izone->shift0[d] = 0;
 +                  izone->shift1[d] = 0;
 +                  for(j=izone->j0; j<izone->j1; j++) {
 +                  if (dd->shift[j][d] > dd->shift[i][d])
 +                  izone->shift0[d] = -1;
 +                  if (dd->shift[j][d] < dd->shift[i][d])
 +                  izone->shift1[d] = 1;
 +                  }
 +                */
 +                
 +                int shift_diff;
 +                
 +                /* Assume the shift are not more than 1 cell */
 +                izone->shift0[dim] = 1;
 +                izone->shift1[dim] = -1;
 +                for(j=izone->j0; j<izone->j1; j++)
 +                {
 +                    shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
 +                    if (shift_diff < izone->shift0[dim])
 +                    {
 +                        izone->shift0[dim] = shift_diff;
 +                    }
 +                    if (shift_diff > izone->shift1[dim])
 +                    {
 +                        izone->shift1[dim] = shift_diff;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->comm->eDLB != edlbNO)
 +    {
 +        snew(dd->comm->root,dd->ndim);
 +    }
 +    
 +    if (dd->comm->bRecordLoad)
 +    {
 +        make_load_communicators(dd);
 +    }
 +}
 +
 +static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank,*buf;
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP)
 +    {
 +        /* Set up cartesian communication for the particle-particle part */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
 +                    dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
 +                        &comm_cart);
 +        /* We overwrite the old communicator with the new cartesian one */
 +        cr->mpi_comm_mygroup = comm_cart;
 +    }
 +    
 +    dd->mpi_comm_all = cr->mpi_comm_mygroup;
 +    MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
 +    
 +    if (comm->bCartesianPP_PME)
 +    {
 +        /* Since we want to use the original cartesian setup for sim,
 +         * and not the one after split, we need to make an index.
 +         */
 +        snew(comm->ddindex2ddnodeid,dd->nnodes);
 +        comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
 +        gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
 +        /* Get the rank of the DD master,
 +         * above we made sure that the master node is a PP node.
 +         */
 +        if (MASTER(cr))
 +        {
 +            rank = dd->rank;
 +        }
 +        else
 +        {
 +            rank = 0;
 +        }
 +        MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (cr->npmenodes == 0)
 +        {
 +            /* The PP communicator is also
 +             * the communicator for this simulation
 +             */
 +            cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
 +        }
 +        cr->nodeid = dd->rank;
 +        
 +        MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
 +        
 +        /* We need to make an index to go from the coordinates
 +         * to the nodeid of this simulation.
 +         */
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        sfree(buf);
 +        
 +        /* Determine the master coordinates and rank.
 +         * The DD master should be the same node as the master of this sim.
 +         */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            if (comm->ddindex2simnodeid[i] == 0)
 +            {
 +                ddindex2xyz(dd->nc,i,dd->master_ci);
 +                MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"The master rank is %d\n",dd->masterrank);
 +        }
 +    }
 +    else
 +    {
 +        /* No Cartesian communicators */
 +        /* We use the rank in dd->comm->all as DD index */
 +        ddindex2xyz(dd->nc,dd->rank,dd->ci);
 +        /* The simulation master nodeid is 0, so the DD master rank is also 0 */
 +        dd->masterrank = 0;
 +        clear_ivec(dd->master_ci);
 +    }
 +#endif
 +  
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +}
 +
 +static void receive_ddindex2simnodeid(t_commrec *cr)
 +{
 +    gmx_domdec_t *dd;
 +    
 +    gmx_domdec_comm_t *comm;
 +    int  *buf;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (!comm->bCartesianPP_PME && comm->bCartesianPP)
 +    {
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +#ifdef GMX_MPI
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +        sfree(buf);
 +    }
 +#endif
 +}
 +
 +static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
 +                                                     int ncg,int natoms)
 +{
 +    gmx_domdec_master_t *ma;
 +    int i;
 +
 +    snew(ma,1);
 +    
 +    snew(ma->ncg,dd->nnodes);
 +    snew(ma->index,dd->nnodes+1);
 +    snew(ma->cg,ncg);
 +    snew(ma->nat,dd->nnodes);
 +    snew(ma->ibuf,dd->nnodes*2);
 +    snew(ma->cell_x,DIM);
 +    for(i=0; i<DIM; i++)
 +    {
 +        snew(ma->cell_x[i],dd->nc[i]+1);
 +    }
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        ma->vbuf = NULL;
 +    }
 +    else
 +    {
 +        snew(ma->vbuf,natoms);
 +    }
 +
 +    return ma;
 +}
 +
 +static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
 +                               int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank;
 +    gmx_bool bDiv[DIM];
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (comm->bCartesianPP)
 +    {
 +        for(i=1; i<DIM; i++)
 +        {
 +            bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
 +        }
 +        if (bDiv[YY] || bDiv[ZZ])
 +        {
 +            comm->bCartesianPP_PME = TRUE;
 +            /* If we have 2D PME decomposition, which is always in x+y,
 +             * we stack the PME only nodes in z.
 +             * Otherwise we choose the direction that provides the thinnest slab
 +             * of PME only nodes as this will have the least effect
 +             * on the PP communication.
 +             * But for the PME communication the opposite might be better.
 +             */
 +            if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
 +                             !bDiv[YY] ||
 +                             dd->nc[YY] > dd->nc[ZZ]))
 +            {
 +                comm->cartpmedim = ZZ;
 +            }
 +            else
 +            {
 +                comm->cartpmedim = YY;
 +            }
 +            comm->ntot[comm->cartpmedim]
 +                += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
 +        }
 +        else if (fplog)
 +        {
 +            fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
 +            fprintf(fplog,
 +                    "Will not use a Cartesian communicator for PP <-> PME\n\n");
 +        }
 +    }
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP_PME)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
 +                        &comm_cart);
 +        
 +        MPI_Comm_rank(comm_cart,&rank);
 +        if (MASTERNODE(cr) && rank != 0)
 +        {
 +            gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
 +        }
 +        
 +        /* With this assigment we loose the link to the original communicator
 +         * which will usually be MPI_COMM_WORLD, unless have multisim.
 +         */
 +        cr->mpi_comm_mysim = comm_cart;
 +        cr->sim_nodeid = rank;
 +        
 +        MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
 +        
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
 +                    cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +        
 +        if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        if (cr->npmenodes == 0 ||
 +            dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       dd_index(comm->ntot,dd->ci),
 +                       &cr->mpi_comm_mygroup);
 +    }
 +    else
 +    {
 +        switch (dd_node_order)
 +        {
 +        case ddnoPP_PME:
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Order of the nodes: PP first, PME last\n");
 +            }
 +            break;
 +        case ddnoINTERLEAVE:
 +            /* Interleave the PP-only and PME-only nodes,
 +             * as on clusters with dual-core machines this will double
 +             * the communication bandwidth of the PME processes
 +             * and thus speed up the PP <-> PME and inter PME communication.
 +             */
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Interleaving PP and PME nodes\n");
 +            }
 +            comm->pmenodes = dd_pmenodes(cr);
 +            break;
 +        case ddnoCARTESIAN:
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
 +        }
 +    
 +        if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        else
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       cr->nodeid,
 +                       &cr->mpi_comm_mygroup);
 +        MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
 +    }
 +#endif
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"This is a %s only node\n\n",
 +                (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
 +    }
 +}
 +
 +void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int CartReorder;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    copy_ivec(dd->nc,comm->ntot);
 +    
 +    comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
 +    comm->bCartesianPP_PME = FALSE;
 +    
 +    /* Reorder the nodes by default. This might change the MPI ranks.
 +     * Real reordering is only supported on very few architectures,
 +     * Blue Gene is one of them.
 +     */
 +    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
 +    
 +    if (cr->npmenodes > 0)
 +    {
 +        /* Split the communicator into a PP and PME part */
 +        split_communicator(fplog,cr,dd_node_order,CartReorder);
 +        if (comm->bCartesianPP_PME)
 +        {
 +            /* We (possibly) reordered the nodes in split_communicator,
 +             * so it is no longer required in make_pp_communicator.
 +             */
 +            CartReorder = FALSE;
 +        }
 +    }
 +    else
 +    {
 +        /* All nodes do PP and PME */
 +#ifdef GMX_MPI    
 +        /* We do not require separate communicators */
 +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#endif
 +    }
 +    
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* Copy or make a new PP communicator */
 +        make_pp_communicator(fplog,cr,CartReorder);
 +    }
 +    else
 +    {
 +        receive_ddindex2simnodeid(cr);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Set up the commnuication to our PME node */
 +        dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +        dd->pme_receive_vir_ener = receive_vir_ener(cr);
 +        if (debug)
 +        {
 +            fprintf(debug,"My pme_nodeid %d receive ener %d\n",
 +                    dd->pme_nodeid,dd->pme_receive_vir_ener);
 +        }
 +    }
 +    else
 +    {
 +        dd->pme_nodeid = -1;
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        dd->ma = init_gmx_domdec_master_t(dd,
 +                                          comm->cgs_gl.nr,
 +                                          comm->cgs_gl.index[comm->cgs_gl.nr]);
 +    }
 +}
 +
 +static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
 +{
 +    real *slb_frac,tot;
 +    int  i,n;
 +    double dbl;
 +    
 +    slb_frac = NULL;
 +    if (nc > 1 && size_string != NULL)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using static load balancing for the %s direction\n",
 +                    dir);
 +        }
 +        snew(slb_frac,nc);
 +        tot = 0;
 +        for (i=0; i<nc; i++)
 +        {
 +            dbl = 0;
 +            sscanf(size_string,"%lf%n",&dbl,&n);
 +            if (dbl == 0)
 +            {
 +                gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
 +            }
 +            slb_frac[i] = dbl;
 +            size_string += n;
 +            tot += slb_frac[i];
 +        }
 +        /* Normalize */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Relative cell sizes:");
 +        }
 +        for (i=0; i<nc; i++)
 +        {
 +            slb_frac[i] /= tot;
 +            if (fplog)
 +            {
 +                fprintf(fplog," %5.3f",slb_frac[i]);
 +            }
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\n");
 +        }
 +    }
 +    
 +    return slb_frac;
 +}
 +
 +static int multi_body_bondeds_count(gmx_mtop_t *mtop)
 +{
 +    int n,nmol,ftype;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist *il;
 +    
 +    n = 0;
 +    iloop = gmx_mtop_ilistloop_init(mtop);
 +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
 +    {
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if ((interaction_function[ftype].flags & IF_BOND) &&
 +                NRAL(ftype) >  2)
 +            {
 +                n += nmol*il[ftype].nr/(1 + NRAL(ftype));
 +            }
 +        }
 +  }
 +
 +  return n;
 +}
 +
 +static int dd_nst_env(FILE *fplog,const char *env_var,int def)
 +{
 +    char *val;
 +    int  nst;
 +    
 +    nst = def;
 +    val = getenv(env_var);
 +    if (val)
 +    {
 +        if (sscanf(val,"%d",&nst) <= 0)
 +        {
 +            nst = 1;
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
 +                    env_var,val,nst);
 +        }
 +    }
 +    
 +    return nst;
 +}
 +
 +static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
 +{
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n%s\n",warn_string);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n%s\n",warn_string);
 +    }
 +}
 +
 +static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
 +                                  t_inputrec *ir,FILE *fplog)
 +{
 +    if (ir->ePBC == epbcSCREW &&
 +        (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
 +    {
 +        gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
 +    }
 +
 +    if (ir->ns_type == ensSIMPLE)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
 +    }
 +
 +    if (ir->nstlist == 0)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
 +    {
 +        dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
 +    }
 +}
 +
 +static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int  di,d;
 +    real r;
 +
 +    r = ddbox->box_size[XX];
 +    for(di=0; di<dd->ndim; di++)
 +    {
 +        d = dd->dim[di];
 +        /* Check using the initial average cell size */
 +        r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +    }
 +
 +    return r;
 +}
 +
 +static int check_dlb_support(FILE *fplog,t_commrec *cr,
 +                             const char *dlb_opt,gmx_bool bRecordLoad,
 +                             unsigned long Flags,t_inputrec *ir)
 +{
 +    gmx_domdec_t *dd;
 +    int  eDLB=-1;
 +    char buf[STRLEN];
 +
 +    switch (dlb_opt[0])
 +    {
 +    case 'a': eDLB = edlbAUTO; break;
 +    case 'n': eDLB = edlbNO;   break;
 +    case 'y': eDLB = edlbYES;  break;
 +    default: gmx_incons("Unknown dlb_opt");
 +    }
 +
 +    if (Flags & MD_RERUN)
 +    {
 +        return edlbNO;
 +    }
 +
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        if (eDLB == edlbYES)
 +        {
 +            sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
 +            dd_warning(cr,fplog,buf);
 +        }
 +            
 +        return edlbNO;
 +    }
 +
 +    if (!bRecordLoad)
 +    {
 +        dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
 +
 +        return edlbNO;
 +    }
 +
 +    if (Flags & MD_REPRODUCIBLE)
 +    {
 +        switch (eDLB)
 +        {
 +                      case edlbNO: 
 +                              break;
 +                      case edlbAUTO:
 +                              dd_warning(cr,fplog,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
 +                              eDLB = edlbNO;
 +                              break;
 +                      case edlbYES:
 +                              dd_warning(cr,fplog,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
 +                              break;
 +                      default:
 +                              gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
 +                              break;
 +        }
 +    }
 +
 +    return eDLB;
 +}
 +
 +static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    int dim;
 +
 +    dd->ndim = 0;
 +    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
 +    {
 +        /* Decomposition order z,y,x */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using domain decomposition order z, y, x\n");
 +        }
 +        for(dim=DIM-1; dim>=0; dim--)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* Decomposition order x,y,z */
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +}
 +
 +static gmx_domdec_comm_t *init_dd_comm()
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  i;
 +
 +    snew(comm,1);
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +    for(i=0; i<DIM*2; i++)
 +    {
 +        comm->cggl_flag_nalloc[i]  = 0;
 +        comm->cgcm_state_nalloc[i] = 0;
 +    }
 +    
 +    comm->nalloc_int = 0;
 +    comm->buf_int    = NULL;
 +
 +    vec_rvec_init(&comm->vbuf);
 +
 +    comm->n_load_have    = 0;
 +    comm->n_load_collect = 0;
 +
 +    for(i=0; i<ddnatNR-ddnatZONE; i++)
 +    {
 +        comm->sum_nat[i] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload   = 0;
 +    comm->load_step = 0;
 +    comm->load_sum  = 0;
 +    comm->load_max  = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf  = 0;
 +    comm->load_pme  = 0;
 +
 +    return comm;
 +}
 +
 +gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
 +                                        unsigned long Flags,
 +                                        ivec nc,
 +                                        real comm_distance_min,real rconstr,
 +                                        const char *dlb_opt,real dlb_scale,
 +                                        const char *sizex,const char *sizey,const char *sizez,
 +                                        gmx_mtop_t *mtop,t_inputrec *ir,
 +                                        matrix box,rvec *x,
 +                                        gmx_ddbox_t *ddbox,
 +                                        int *npme_x,int *npme_y)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  recload;
 +    int  d,i,j;
 +    real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
 +    gmx_bool bC;
 +    char buf[STRLEN];
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
 +    }
 +    
 +    snew(dd,1);
 +
 +    dd->comm = init_dd_comm();
 +    comm = dd->comm;
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +
 +    dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
 +    dd->bScrewPBC = (ir->ePBC == epbcSCREW);
 +    
 +    dd->bSendRecv2      = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
 +    comm->dlb_scale_lim = dd_nst_env(fplog,"GMX_DLB_MAX",10);
 +    comm->eFlop         = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
 +    recload             = dd_nst_env(fplog,"GMX_DD_LOAD",1);
 +    comm->nstSortCG     = dd_nst_env(fplog,"GMX_DD_SORT",1);
 +    comm->nstDDDump     = dd_nst_env(fplog,"GMX_DD_DUMP",0);
 +    comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
 +    comm->DD_debug      = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
 +
 +    dd->pme_recv_f_alloc = 0;
 +    dd->pme_recv_f_buf = NULL;
 +
 +    if (dd->bSendRecv2 && fplog)
 +    {
 +        fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
 +    }
 +    if (comm->eFlop)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will load balance based on FLOP count\n");
 +        }
 +        if (comm->eFlop > 1)
 +        {
 +            srand(1+cr->nodeid);
 +        }
 +        comm->bRecordLoad = TRUE;
 +    }
 +    else
 +    {
 +        comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
 +                             
 +    }
 +    
 +    comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
 +    
 +    comm->bDynLoadBal = (comm->eDLB == edlbYES);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
 +    }
 +    dd->bGridJump = comm->bDynLoadBal;
 +    
 +    if (comm->nstSortCG)
 +    {
 +        if (fplog)
 +        {
 +            if (comm->nstSortCG == 1)
 +            {
 +                fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
 +            }
 +            else
 +            {
 +                fprintf(fplog,"Will sort the charge groups every %d steps\n",
 +                        comm->nstSortCG);
 +            }
 +        }
 +        snew(comm->sort,1);
 +    }
 +    else
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will not sort the charge groups\n");
 +        }
 +    }
 +    
 +    comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
 +    if (comm->bInterCGBondeds)
 +    {
 +        comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
 +    }
 +    else
 +    {
 +        comm->bInterCGMultiBody = FALSE;
 +    }
 +    
 +    dd->bInterCGcons = inter_charge_group_constraints(mtop);
 +
 +    if (ir->rlistlong == 0)
 +    {
 +        /* Set the cut-off to some very large value,
 +         * so we don't need if statements everywhere in the code.
 +         * We use sqrt, since the cut-off is squared in some places.
 +         */
 +        comm->cutoff   = GMX_CUTOFF_INF;
 +    }
 +    else
 +    {
 +        comm->cutoff   = ir->rlistlong;
 +    }
 +    comm->cutoff_mbody = 0;
 +    
 +    comm->cellsize_limit = 0;
 +    comm->bBondComm = FALSE;
 +
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm_distance_min > 0)
 +        {
 +            comm->cutoff_mbody = comm_distance_min;
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
 +            }
 +            else
 +            {
 +                comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else if (ir->bPeriodicMols)
 +        {
 +            /* Can not easily determine the required cut-off */
 +            dd_warning(cr,fplog,"NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
 +            comm->cutoff_mbody = comm->cutoff/2;
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            if (MASTER(cr))
 +            {
 +                dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
 +                                      Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
 +            }
 +            gmx_bcast(sizeof(r_2b),&r_2b,cr);
 +            gmx_bcast(sizeof(r_mb),&r_mb,cr);
 +
 +            /* We use an initial margin of 10% for the minimum cell size,
 +             * except when we are just below the non-bonded cut-off.
 +             */
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                if (max(r_2b,r_mb) > comm->cutoff)
 +                {
 +                    r_bonded       = max(r_2b,r_mb);
 +                    r_bonded_limit = 1.1*r_bonded;
 +                    comm->bBondComm = TRUE;
 +                }
 +                else
 +                {
 +                    r_bonded       = r_mb;
 +                    r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
 +                }
 +                /* We determine cutoff_mbody later */
 +            }
 +            else
 +            {
 +                /* No special bonded communication,
 +                 * simply increase the DD cut-off.
 +                 */
 +                r_bonded_limit     = 1.1*max(r_2b,r_mb);
 +                comm->cutoff_mbody = r_bonded_limit;
 +                comm->cutoff       = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Minimum cell size due to bonded interactions: %.3f nm\n",
 +                    comm->cellsize_limit);
 +        }
 +    }
 +
 +    if (dd->bInterCGcons && rconstr <= 0)
 +    {
 +        /* There is a cell size limit due to the constraints (P-LINCS) */
 +        rconstr = constr_r_max(fplog,mtop,ir);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Estimated maximum distance required for P-LINCS: %.3f nm\n",
 +                    rconstr);
 +            if (rconstr > comm->cellsize_limit)
 +            {
 +                fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
 +            }
 +        }
 +    }
 +    else if (rconstr > 0 && fplog)
 +    {
 +        /* Here we do not check for dd->bInterCGcons,
 +         * because one can also set a cell size limit for virtual sites only
 +         * and at this point we don't know yet if there are intercg v-sites.
 +         */
 +        fprintf(fplog,
 +                "User supplied maximum distance required for P-LINCS: %.3f nm\n",
 +                rconstr);
 +    }
 +    comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
 +
 +    comm->cgs_gl = gmx_mtop_global_cgs(mtop);
 +
 +    if (nc[XX] > 0)
 +    {
 +        copy_ivec(nc,dd->nc);
 +        set_dd_dim(fplog,dd);
 +        set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        if (cr->npmenodes == -1)
 +        {
 +            cr->npmenodes = 0;
 +        }
 +        acs = average_cellsize_min(dd,ddbox);
 +        if (acs < comm->cellsize_limit)
 +        {
 +            if (fplog)
 +            {
 +                fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
 +            }
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
 +                                 acs,comm->cellsize_limit);
 +        }
 +    }
 +    else
 +    {
 +        set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        /* We need to choose the optimal DD grid and possibly PME nodes */
 +        limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
 +                               comm->eDLB!=edlbNO,dlb_scale,
 +                               comm->cellsize_limit,comm->cutoff,
 +                               comm->bInterCGBondeds,comm->bInterCGMultiBody);
 +        
 +        if (dd->nc[XX] == 0)
 +        {
 +            bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
 +            sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
 +                    !bC ? "-rdd" : "-rcon",
 +                    comm->eDLB!=edlbNO ? " or -dds" : "",
 +                    bC ? " or your LINCS settings" : "");
 +
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
 +                                 "%s\n"
 +                                 "Look in the log file for details on the domain decomposition",
 +                                 cr->nnodes-cr->npmenodes,limit,buf);
 +        }
 +        set_dd_dim(fplog,dd);
 +    }
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
 +    }
 +    
 +    dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
 +    if (cr->nnodes - dd->nnodes != cr->npmenodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
 +                             dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
 +    }
 +    if (cr->npmenodes > dd->nnodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
 +    }
 +    if (cr->npmenodes > 0)
 +    {
 +        comm->npmenodes = cr->npmenodes;
 +    }
 +    else
 +    {
 +        comm->npmenodes = dd->nnodes;
 +    }
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        /* The following choices should match those
 +         * in comm_cost_est in domdec_setup.c.
 +         * Note that here the checks have to take into account
 +         * that the decomposition might occur in a different order than xyz
 +         * (for instance through the env.var. GMX_DD_ORDER_ZYX),
 +         * in which case they will not match those in comm_cost_est,
 +         * but since that is mainly for testing purposes that's fine.
 +         */
 +        if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
 +            comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
 +            getenv("GMX_PMEONEDD") == NULL)
 +        {
 +            comm->npmedecompdim = 2;
 +            comm->npmenodes_x   = dd->nc[XX];
 +            comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
 +        }
 +        else
 +        {
 +            /* In case nc is 1 in both x and y we could still choose to
 +             * decompose pme in y instead of x, but we use x for simplicity.
 +             */
 +            comm->npmedecompdim = 1;
 +            if (dd->dim[0] == YY)
 +            {
 +                comm->npmenodes_x = 1;
 +                comm->npmenodes_y = comm->npmenodes;
 +            }
 +            else
 +            {
 +                comm->npmenodes_x = comm->npmenodes;
 +                comm->npmenodes_y = 1;
 +            }
 +        }    
 +        if (fplog)
 +        {
 +            fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
 +                    comm->npmenodes_x,comm->npmenodes_y,1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmedecompdim = 0;
 +        comm->npmenodes_x   = 0;
 +        comm->npmenodes_y   = 0;
 +    }
 +    
 +    /* Technically we don't need both of these,
 +     * but it simplifies code not having to recalculate it.
 +     */
 +    *npme_x = comm->npmenodes_x;
 +    *npme_y = comm->npmenodes_y;
 +        
 +    snew(comm->slb_frac,DIM);
 +    if (comm->eDLB == edlbNO)
 +    {
 +        comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
 +        comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
 +        comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
 +    }
 +
 +    if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
 +    {
 +        if (comm->bBondComm || comm->eDLB != edlbNO)
 +        {
 +            /* Set the bonded communication distance to halfway
 +             * the minimum and the maximum,
 +             * since the extra communication cost is nearly zero.
 +             */
 +            acs = average_cellsize_min(dd,ddbox);
 +            comm->cutoff_mbody = 0.5*(r_bonded + acs);
 +            if (comm->eDLB != edlbNO)
 +            {
 +                /* Check if this does not limit the scaling */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
 +            }
 +            if (!comm->bBondComm)
 +            {
 +                /* Without bBondComm do not go beyond the n.b. cut-off */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
 +                if (comm->cellsize_limit >= comm->cutoff)
 +                {
 +                    /* We don't loose a lot of efficieny
 +                     * when increasing it to the n.b. cut-off.
 +                     * It can even be slightly faster, because we need
 +                     * less checks for the communication setup.
 +                     */
 +                    comm->cutoff_mbody = comm->cutoff;
 +                }
 +            }
 +            /* Check if we did not end up below our original limit */
 +            comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
 +
 +            if (comm->cutoff_mbody > comm->cellsize_limit)
 +            {
 +                comm->cellsize_limit = comm->cutoff_mbody;
 +            }
 +        }
 +        /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
 +                "cellsize limit %f\n",
 +                comm->bBondComm,comm->cellsize_limit);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        check_dd_restrictions(cr,dd,ir,fplog);
 +    }
 +
 +    comm->globalcomm_step = INT_MIN;
 +    dd->ddp_count = 0;
 +
 +    clear_dd_cycle_counts(dd);
 +
 +    return dd;
 +}
 +
 +static void set_dlb_limits(gmx_domdec_t *dd)
 +
 +{
 +    int d;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
 +        dd->comm->cellsize_min[dd->dim[d]] =
 +            dd->comm->cellsize_min_dlb[dd->dim[d]];
 +    }
 +}
 +
 +
 +static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    real cellsize_min;
 +    int  d,nc,i;
 +    char buf[STRLEN];
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
 +    }
 +
 +    cellsize_min = comm->cellsize_min[dd->dim[0]];
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
 +    }
 +
 +    if (cellsize_min < comm->cellsize_limit*1.05)
 +    {
 +        dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
 +
 +        /* Change DLB from "auto" to "no". */
 +        comm->eDLB = edlbNO;
 +
 +        return;
 +    }
 +
 +    dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
 +    comm->bDynLoadBal = TRUE;
 +    dd->bGridJump = TRUE;
 +    
 +    set_dlb_limits(dd);
 +
 +    /* We can set the required cell size info here,
 +     * so we do not need to communicate this.
 +     * The grid is completely uniform.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        if (comm->root[d])
 +        {
 +            comm->load[d].sum_m = comm->load[d].sum;
 +
 +            nc = dd->nc[dd->dim[d]];
 +            for(i=0; i<nc; i++)
 +            {
 +                comm->root[d]->cell_f[i]    = i/(real)nc;
 +                if (d > 0)
 +                {
 +                    comm->root[d]->cell_f_max0[i] =  i   /(real)nc;
 +                    comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
 +                }
 +            }
 +            comm->root[d]->cell_f[nc] = 1.0;
 +        }
 +    }
 +}
 +
 +static char *init_bLocalCG(gmx_mtop_t *mtop)
 +{
 +    int  ncg,cg;
 +    char *bLocalCG;
 +    
 +    ncg = ncg_mtop(mtop);
 +    snew(bLocalCG,ncg);
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        bLocalCG[cg] = FALSE;
 +    }
 +
 +    return bLocalCG;
 +}
 +
 +void dd_init_bondeds(FILE *fplog,
 +                     gmx_domdec_t *dd,gmx_mtop_t *mtop,
 +                     gmx_vsite_t *vsite,gmx_constr_t constr,
 +                     t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bBondComm;
 +    int  d;
 +
 +    dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
 +
 +    comm = dd->comm;
 +
 +    if (comm->bBondComm)
 +    {
 +        /* Communicate atoms beyond the cut-off for bonded interactions */
 +        comm = dd->comm;
 +
 +        comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
 +
 +        comm->bLocalCG = init_bLocalCG(mtop);
 +    }
 +    else
 +    {
 +        /* Only communicate atoms based on cut-off */
 +        comm->cglink   = NULL;
 +        comm->bLocalCG = NULL;
 +    }
 +}
 +
 +static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
 +                              t_inputrec *ir,
 +                              gmx_bool bDynLoadBal,real dlb_scale,
 +                              gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec np;
 +    real limit,shrink;
 +    char buf[64];
 +
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +
 +    comm = dd->comm;
 +
 +    if (bDynLoadBal)
 +    {
 +        fprintf(fplog,"The maximum number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
 +        fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
 +        fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
 +        for(d=0; d<DIM; d++)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                if (d >= ddbox->npbcdim && dd->nc[d] == 2)
 +                {
 +                    shrink = 0;
 +                }
 +                else
 +                {
 +                    shrink =
 +                        comm->cellsize_min_dlb[d]/
 +                        (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +                }
 +                fprintf(fplog," %c %.2f",dim2char(d),shrink);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
 +        fprintf(fplog,"The initial number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The initial domain decomposition cell size is:");
 +        for(d=0; d<DIM; d++) {
 +            if (dd->nc[d] > 1)
 +            {
 +                fprintf(fplog," %c %.2f nm",
 +                        dim2char(d),dd->comm->cellsize_min[d]);
 +            }
 +        }
 +        fprintf(fplog,"\n\n");
 +    }
 +    
 +    if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
 +    {
 +        fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
 +        fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                "non-bonded interactions","",comm->cutoff);
 +
 +        if (bDynLoadBal)
 +        {
 +            limit = dd->comm->cellsize_limit;
 +        }
 +        else
 +        {
 +            if (dynamic_dd_box(ddbox,ir))
 +            {
 +                fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
 +            }
 +            limit = dd->comm->cellsize_min[XX];
 +            for(d=1; d<DIM; d++)
 +            {
 +                limit = min(limit,dd->comm->cellsize_min[d]);
 +            }
 +        }
 +
 +        if (comm->bInterCGBondeds)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "two-body bonded interactions","(-rdd)",
 +                    max(comm->cutoff,comm->cutoff_mbody));
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "multi-body bonded interactions","(-rdd)",
 +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
 +        }
 +        if (dd->vsite_comm)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "virtual site constructions","(-rcon)",limit);
 +        }
 +        if (dd->constraint_comm)
 +        {
 +            sprintf(buf,"atoms separated by up to %d constraints",
 +                    1+ir->nProjOrder);
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    buf,"(-rcon)",limit);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    fflush(fplog);
 +}
 +
 +void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
 +                       t_inputrec *ir,t_forcerec *fr,
 +                       gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim,npulse,npulse_d_max,npulse_d;
 +    gmx_bool bNoCutOff;
 +    int  natoms_tot;
 +    real vol_frac;
 +
 +    comm = dd->comm;
 +
 +    bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        init_ddpme(dd,&comm->ddpme[0],0);
 +        if (comm->npmedecompdim >= 2)
 +        {
 +            init_ddpme(dd,&comm->ddpme[1],1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmenodes = 0;
 +        if (dd->pme_nodeid >= 0)
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "Can not have separate PME nodes without PME electrostatics");
 +        }
 +    }
 +    
 +    /* If each molecule is a single charge group
 +     * or we use domain decomposition for each periodic dimension,
 +     * we do not need to take pbc into account for the bonded interactions.
 +     */
 +    if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
 +        (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        fr->bMolPBC = TRUE;
 +    }
 +        
 +    if (debug)
 +    {
 +        fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
 +    }
 +    if (comm->eDLB != edlbNO)
 +    {
 +        /* Determine the maximum number of comm. pulses in one dimension */
 +        
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        
 +        /* Determine the maximum required number of grid pulses */
 +        if (comm->cellsize_limit >= comm->cutoff)
 +        {
 +            /* Only a single pulse is required */
 +            npulse = 1;
 +        }
 +        else if (!bNoCutOff && comm->cellsize_limit > 0)
 +        {
 +            /* We round down slightly here to avoid overhead due to the latency
 +             * of extra communication calls when the cut-off
 +             * would be only slightly longer than the cell size.
 +             * Later cellsize_limit is redetermined,
 +             * so we can not miss interactions due to this rounding.
 +             */
 +            npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
 +        }
 +        else
 +        {
 +            /* There is no cell size limit */
 +            npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
 +        }
 +
 +        if (!bNoCutOff && npulse > 1)
 +        {
 +            /* See if we can do with less pulses, based on dlb_scale */
 +            npulse_d_max = 0;
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                dim = dd->dim[d];
 +                npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
 +                                 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
 +                npulse_d_max = max(npulse_d_max,npulse_d);
 +            }
 +            npulse = min(npulse,npulse_d_max);
 +        }
 +        
 +        /* This env var can override npulse */
 +        d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
 +        if (d > 0)
 +        {
 +            npulse = d;
 +        }
 +
 +        comm->maxpulse = 1;
 +        comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
 +            comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
 +            snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
 +            comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
 +            if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
 +            {
 +                comm->bVacDLBNoLimit = FALSE;
 +            }
 +        }
 +        
 +        /* cellsize_limit is set for LINCS in init_domain_decomposition */
 +        if (!comm->bVacDLBNoLimit)
 +        {
 +            comm->cellsize_limit = max(comm->cellsize_limit,
 +                                       comm->cutoff/comm->maxpulse);
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        /* Set the minimum cell size for each DD dimension */
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (comm->bVacDLBNoLimit ||
 +                comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
 +            }
 +            else
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] =
 +                    comm->cutoff/comm->cd[d].np_dlb;
 +            }
 +        }
 +        if (comm->cutoff_mbody <= 0)
 +        {
 +            comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
 +        }
 +        if (comm->bDynLoadBal)
 +        {
 +            set_dlb_limits(dd);
 +        }
 +    }
 +    
 +    print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
 +    if (comm->eDLB == edlbAUTO)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
 +        }
 +        print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
 +    }
 +
 +    if (ir->ePBC == epbcNONE)
 +    {
 +        vol_frac = 1 - 1/(double)dd->nnodes;
 +    }
 +    else
 +    {
 +        vol_frac =
 +            (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
 +    }
 +    natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
 +   
 +    dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
 +}
 +
 +static void merge_cg_buffers(int ncell,
 +                             gmx_domdec_comm_dim_t *cd, int pulse,
 +                             int  *ncg_cell,
 +                             int  *index_gl, int  *recv_i,
 +                             rvec *cg_cm,    rvec *recv_vr,
 +                             int *cgindex,
 +                             cginfo_mb_t *cginfo_mb,int *cginfo)
 +{
 +    gmx_domdec_ind_t *ind,*ind_p;
 +    int p,cell,c,cg,cg0,cg1,cg_gl,nat;
 +    int shift,shift_at;
 +    
 +    ind = &cd->ind[pulse];
 +    
 +    /* First correct the already stored data */
 +    shift = ind->nrecv[ncell];
 +    for(cell=ncell-1; cell>=0; cell--)
 +    {
 +        shift -= ind->nrecv[cell];
 +        if (shift > 0)
 +        {
 +            /* Move the cg's present from previous grid pulses */
 +            cg0 = ncg_cell[ncell+cell];
 +            cg1 = ncg_cell[ncell+cell+1];
 +            cgindex[cg1+shift] = cgindex[cg1];
 +            for(cg=cg1-1; cg>=cg0; cg--)
 +            {
 +                index_gl[cg+shift] = index_gl[cg];
 +                copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
 +                cgindex[cg+shift] = cgindex[cg];
 +                cginfo[cg+shift] = cginfo[cg];
 +            }
 +            /* Correct the already stored send indices for the shift */
 +            for(p=1; p<=pulse; p++)
 +            {
 +                ind_p = &cd->ind[p];
 +                cg0 = 0;
 +                for(c=0; c<cell; c++)
 +                {
 +                    cg0 += ind_p->nsend[c];
 +                }
 +                cg1 = cg0 + ind_p->nsend[cell];
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    ind_p->index[cg] += shift;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Merge in the communicated buffers */
 +    shift = 0;
 +    shift_at = 0;
 +    cg0 = 0;
 +    for(cell=0; cell<ncell; cell++)
 +    {
 +        cg1 = ncg_cell[ncell+cell+1] + shift;
 +        if (shift_at > 0)
 +        {
 +            /* Correct the old cg indices */
 +            for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
 +            {
 +                cgindex[cg+1] += shift_at;
 +            }
 +        }
 +        for(cg=0; cg<ind->nrecv[cell]; cg++)
 +        {
 +            /* Copy this charge group from the buffer */
 +            index_gl[cg1] = recv_i[cg0];
 +            copy_rvec(recv_vr[cg0],cg_cm[cg1]);
 +            /* Add it to the cgindex */
 +            cg_gl = index_gl[cg1];
 +            cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
 +            nat = GET_CGINFO_NATOMS(cginfo[cg1]);
 +            cgindex[cg1+1] = cgindex[cg1] + nat;
 +            cg0++;
 +            cg1++;
 +            shift_at += nat;
 +        }
 +        shift += ind->nrecv[cell];
 +        ncg_cell[ncell+cell+1] = cg1;
 +    }
 +}
 +
 +static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
 +                               int nzone,int cg0,const int *cgindex)
 +{
 +    int cg,zone,p;
 +    
 +    /* Store the atom block boundaries for easy copying of communication buffers
 +     */
 +    cg = cg0;
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        for(p=0; p<cd->np; p++) {
 +            cd->ind[p].cell2at0[zone] = cgindex[cg];
 +            cg += cd->ind[p].nrecv[zone];
 +            cd->ind[p].cell2at1[zone] = cgindex[cg];
 +        }
 +    }
 +}
 +
 +static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
 +{
 +    int  i;
 +    gmx_bool bMiss;
 +
 +    bMiss = FALSE;
 +    for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
 +    {
 +        if (!bLocalCG[link->a[i]])
 +        {
 +            bMiss = TRUE;
 +        }
 +    }
 +
 +    return bMiss;
 +}
 +
 +static void setup_dd_communication(gmx_domdec_t *dd,
 +                                   matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
 +{
 +    int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
 +    int nzone,nzone_send,zone,zonei,cg0,cg1;
 +    int c,i,j,cg,cg_gl,nrcg;
 +    int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
 +    real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
 +    rvec rb,rn;
 +    real corner[DIM][4],corner_round_0=0,corner_round_1[4];
 +    real bcorner[DIM],bcorner_round_1=0;
 +    ivec tric_dist;
 +    rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
 +    real skew_fac2_d,skew_fac_01;
 +    rvec sf2_round;
 +    int  nsend,nat;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Setting up DD communication\n");
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +
 +        /* Check if we need to use triclinic distances */
 +        tric_dist[dim_ind] = 0;
 +        for(i=0; i<=dim_ind; i++)
 +        {
 +            if (ddbox->tric_dir[dd->dim[i]])
 +            {
 +                tric_dist[dim_ind] = 1;
 +            }
 +        }
 +    }
 +
 +    bBondComm = comm->bBondComm;
 +
 +    /* Do we need to determine extra distances for multi-body bondeds? */
 +    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
 +    
 +    /* Do we need to determine extra distances for only two-body bondeds? */
 +    bDist2B = (bBondComm && !bDistMB);
 +
 +    r_comm2  = sqr(comm->cutoff);
 +    r_bcomm2 = sqr(comm->cutoff_mbody);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
 +    }
 +
 +    zones = &comm->zones;
 +    
 +    dim0 = dd->dim[0];
 +    /* The first dimension is equal for all cells */
 +    corner[0][0] = comm->cell_x0[dim0];
 +    if (bDistMB)
 +    {
 +        bcorner[0] = corner[0][0];
 +    }
 +    if (dd->ndim >= 2)
 +    {
 +        dim1 = dd->dim[1];
 +        /* This cell row is only seen from the first row */
 +        corner[1][0] = comm->cell_x0[dim1];
 +        /* All rows can see this row */
 +        corner[1][1] = comm->cell_x0[dim1];
 +        if (dd->bGridJump)
 +        {
 +            corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
 +            if (bDistMB)
 +            {
 +                /* For the multi-body distance we need the maximum */
 +                bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
 +            }
 +        }
 +        /* Set the upper-right corner for rounding */
 +        corner_round_0 = comm->cell_x1[dim0];
 +        
 +        if (dd->ndim >= 3)
 +        {
 +            dim2 = dd->dim[2];
 +            for(j=0; j<4; j++)
 +            {
 +                corner[2][j] = comm->cell_x0[dim2];
 +            }
 +            if (dd->bGridJump)
 +            {
 +                /* Use the maximum of the i-cells that see a j-cell */
 +                for(i=0; i<zones->nizone; i++)
 +                {
 +                    for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
 +                    {
 +                        if (j >= 4)
 +                        {
 +                            corner[2][j-4] =
 +                                max(corner[2][j-4],
 +                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
 +                        }
 +                    }
 +                }
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner[2] = comm->cell_x0[dim2];
 +                    for(i=0; i<2; i++)
 +                    {
 +                        for(j=0; j<2; j++)
 +                        {
 +                            bcorner[2] = max(bcorner[2],
 +                                             comm->zone_d2[i][j].p1_0);
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            /* Set the upper-right corner for rounding */
 +            /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
 +             * Only cell (0,0,0) can see cell 7 (1,1,1)
 +             */
 +            corner_round_1[0] = comm->cell_x1[dim1];
 +            corner_round_1[3] = comm->cell_x1[dim1];
 +            if (dd->bGridJump)
 +            {
 +                corner_round_1[0] = max(comm->cell_x1[dim1],
 +                                        comm->zone_d1[1].mch1);
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner_round_1 = max(comm->cell_x1[dim1],
 +                                          comm->zone_d1[1].p1_1);
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* Triclinic stuff */
 +    normal = ddbox->normal;
 +    skew_fac_01 = 0;
 +    if (dd->ndim >= 2)
 +    {
 +        v_0 = ddbox->v[dim0];
 +        if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
 +        {
 +            /* Determine the coupling coefficient for the distances
 +             * to the cell planes along dim0 and dim1 through dim2.
 +             * This is required for correct rounding.
 +             */
 +            skew_fac_01 =
 +                ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
 +            if (debug)
 +            {
 +                fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
 +            }
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        v_1 = ddbox->v[dim1];
 +    }
 +    
 +    zone_cg_range = zones->cg_range;
 +    index_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    cginfo_mb = fr->cginfo_mb;
 +    
 +    zone_cg_range[0]   = 0;
 +    zone_cg_range[1]   = dd->ncg_home;
 +    comm->zone_ncg1[0] = dd->ncg_home;
 +    pos_cg             = dd->ncg_home;
 +    
 +    nat_tot = dd->nat_home;
 +    nzone = 1;
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        cd = &comm->cd[dim_ind];
 +        
 +        if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
 +        {
 +            /* No pbc in this dimension, the first node should not comm. */
 +            nzone_send = 0;
 +        }
 +        else
 +        {
 +            nzone_send = nzone;
 +        }
 +
 +        bScrew = (dd->bScrewPBC && dim == XX);
 +        
 +        v_d = ddbox->v[dim];
 +        skew_fac2_d = sqr(ddbox->skew_fac[dim]);
 +
 +        cd->bInPlace = TRUE;
 +        for(p=0; p<cd->np; p++)
 +        {
 +            /* Only atoms communicated in the first pulse are used
 +             * for multi-body bonded interactions or for bBondComm.
 +             */
 +            bDistBonded   = ((bDistMB || bDist2B) && p == 0);
 +            bDistMB_pulse = (bDistMB && bDistBonded);
 +
 +            ind = &cd->ind[p];
 +            nsend = 0;
 +            nat = 0;
 +            for(zone=0; zone<nzone_send; zone++)
 +            {
 +                if (tric_dist[dim_ind] && dim_ind > 0)
 +                {
 +                    /* Determine slightly more optimized skew_fac's
 +                     * for rounding.
 +                     * This reduces the number of communicated atoms
 +                     * by about 10% for 3D DD of rhombic dodecahedra.
 +                     */
 +                    for(dimd=0; dimd<dim; dimd++)
 +                    {
 +                        sf2_round[dimd] = 1;
 +                        if (ddbox->tric_dir[dimd])
 +                        {
 +                            for(i=dd->dim[dimd]+1; i<DIM; i++)
 +                            {
 +                                /* If we are shifted in dimension i
 +                                 * and the cell plane is tilted forward
 +                                 * in dimension i, skip this coupling.
 +                                 */
 +                                if (!(zones->shift[nzone+zone][i] &&
 +                                      ddbox->v[dimd][i][dimd] >= 0))
 +                                {
 +                                    sf2_round[dimd] +=
 +                                        sqr(ddbox->v[dimd][i][dimd]);
 +                                }
 +                            }
 +                            sf2_round[dimd] = 1/sf2_round[dimd];
 +                        }
 +                    }
 +                }
 +
 +                zonei = zone_perm[dim_ind][zone];
 +                if (p == 0)
 +                {
 +                    /* Here we permutate the zones to obtain a convenient order
 +                     * for neighbor searching
 +                     */
 +                    cg0 = zone_cg_range[zonei];
 +                    cg1 = zone_cg_range[zonei+1];
 +                }
 +                else
 +                {
 +                    /* Look only at the cg's received in the previous grid pulse
 +                     */
 +                    cg1 = zone_cg_range[nzone+zone+1];
 +                    cg0 = cg1 - cd->ind[p-1].nrecv[zone];
 +                }
 +                ind->nsend[zone] = 0;
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    r2  = 0;
 +                    rb2 = 0;
 +                    if (tric_dist[dim_ind] == 0)
 +                    {
 +                        /* Rectangular direction, easy */
 +                        r = cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        if (r > 0)
 +                        {
 +                            r2 += r*r;
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            r = cg_cm[cg][dim] - bcorner[dim_ind];
 +                            if (r > 0)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        /* Rounding gives at most a 16% reduction
 +                         * in communicated atoms
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            r = cg_cm[cg][dim0] - corner_round_0;
 +                            /* This is the first dimension, so always r >= 0 */
 +                            r2 += r*r;
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            r = cg_cm[cg][dim1] - corner_round_1[zone];
 +                            if (r > 0)
 +                            {
 +                                r2 += r*r;
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                r = cg_cm[cg][dim1] - bcorner_round_1;
 +                                if (r > 0)
 +                                {
 +                                    rb2 += r*r;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    else
 +                    {
 +                        /* Triclinic direction, more complicated */
 +                        clear_rvec(rn);
 +                        clear_rvec(rb);
 +                        /* Rounding, conservative as the skew_fac multiplication
 +                         * will slightly underestimate the distance.
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
 +                            for(i=dim0+1; i<DIM; i++)
 +                            {
 +                                rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
 +                            }
 +                            r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim0] = rn[dim0];
 +                                rb2 = r2;
 +                            }
 +                            /* Take care that the cell planes along dim0 might not
 +                             * be orthogonal to those along dim1 and dim2.
 +                             */
 +                            for(i=1; i<=dim_ind; i++)
 +                            {
 +                                dimd = dd->dim[i];
 +                                if (normal[dim0][dimd] > 0)
 +                                {
 +                                    rn[dimd] -= rn[dim0]*normal[dim0][dimd];
 +                                    if (bDistMB_pulse)
 +                                    {
 +                                        rb[dimd] -= rb[dim0]*normal[dim0][dimd];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
 +                            tric_sh = 0;
 +                            for(i=dim1+1; i<DIM; i++)
 +                            {
 +                                tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
 +                            }
 +                            rn[dim1] += tric_sh;
 +                            if (rn[dim1] > 0)
 +                            {
 +                                r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
 +                                /* Take care that the cell planes along dim1
 +                                 * might not be orthogonal to that along dim2.
 +                                 */
 +                                if (normal[dim1][dim2] > 0)
 +                                {
 +                                    rn[dim2] -= rn[dim1]*normal[dim1][dim2];
 +                                }
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim1] +=
 +                                    cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
 +                                if (rb[dim1] > 0)
 +                                {
 +                                    rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
 +                                    /* Take care of coupling of the distances
 +                                     * to the planes along dim0 and dim1 through dim2.
 +                                     */
 +                                    rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
 +                                    /* Take care that the cell planes along dim1
 +                                     * might not be orthogonal to that along dim2.
 +                                     */
 +                                    if (normal[dim1][dim2] > 0)
 +                                    {
 +                                        rb[dim2] -= rb[dim1]*normal[dim1][dim2];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        /* The distance along the communication direction */
 +                        rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        tric_sh = 0;
 +                        for(i=dim+1; i<DIM; i++)
 +                        {
 +                            tric_sh -= cg_cm[cg][i]*v_d[i][dim];
 +                        }
 +                        rn[dim] += tric_sh;
 +                        if (rn[dim] > 0)
 +                        {
 +                            r2 += rn[dim]*rn[dim]*skew_fac2_d;
 +                            /* Take care of coupling of the distances
 +                             * to the planes along dim0 and dim1 through dim2.
 +                             */
 +                            if (dim_ind == 1 && zonei == 1)
 +                            {
 +                                r2 -= rn[dim0]*rn[dim]*skew_fac_01;
 +                            }
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            clear_rvec(rb);
 +                            rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
 +                            if (rb[dim] > 0)
 +                            {
 +                                rb2 += rb[dim]*rb[dim]*skew_fac2_d;
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                if (dim_ind == 1 && zonei == 1)
 +                                {
 +                                    rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    
 +                    if (r2 < r_comm2 ||
 +                        (bDistBonded &&
 +                         ((bDistMB && rb2 < r_bcomm2) ||
 +                          (bDist2B && r2  < r_bcomm2)) &&
 +                         (!bBondComm ||
 +                          (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
 +                           missing_link(comm->cglink,index_gl[cg],
 +                                        comm->bLocalCG)))))
 +                    {
 +                        /* Make an index to the local charge groups */
 +                        if (nsend+1 > ind->nalloc)
 +                        {
 +                            ind->nalloc = over_alloc_large(nsend+1);
 +                            srenew(ind->index,ind->nalloc);
 +                        }
 +                        if (nsend+1 > comm->nalloc_int)
 +                        {
 +                            comm->nalloc_int = over_alloc_large(nsend+1);
 +                            srenew(comm->buf_int,comm->nalloc_int);
 +                        }
 +                        ind->index[nsend] = cg;
 +                        comm->buf_int[nsend] = index_gl[cg];
 +                        ind->nsend[zone]++;
 +                        vec_rvec_check_alloc(&comm->vbuf,nsend+1);
 +
 +                        if (dd->ci[dim] == 0)
 +                        {
 +                            /* Correct cg_cm for pbc */
 +                            rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
 +                            if (bScrew)
 +                            {
 +                                comm->vbuf.v[nsend][YY] =
 +                                    box[YY][YY]-comm->vbuf.v[nsend][YY];
 +                                comm->vbuf.v[nsend][ZZ] =
 +                                    box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
 +                            }
 +                        }
 +                        else
 +                        {
 +                            copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
 +                        }
 +                        nsend++;
 +                        nat += cgindex[cg+1] - cgindex[cg];
 +                    }
 +                }
 +            }
 +            /* Clear the counts in case we do not have pbc */
 +            for(zone=nzone_send; zone<nzone; zone++)
 +            {
 +                ind->nsend[zone] = 0;
 +            }
 +            ind->nsend[nzone]   = nsend;
 +            ind->nsend[nzone+1] = nat;
 +            /* Communicate the number of cg's and atoms to receive */
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            ind->nsend, nzone+2,
 +                            ind->nrecv, nzone+2);
 +            
 +            /* The rvec buffer is also required for atom buffers of size nsend
 +             * in dd_move_x and dd_move_f.
 +             */
 +            vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
 +
 +            if (p > 0)
 +            {
 +                /* We can receive in place if only the last zone is not empty */
 +                for(zone=0; zone<nzone-1; zone++)
 +                {
 +                    if (ind->nrecv[zone] > 0)
 +                    {
 +                        cd->bInPlace = FALSE;
 +                    }
 +                }
 +                if (!cd->bInPlace)
 +                {
 +                    /* The int buffer is only required here for the cg indices */
 +                    if (ind->nrecv[nzone] > comm->nalloc_int2)
 +                    {
 +                        comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
 +                        srenew(comm->buf_int2,comm->nalloc_int2);
 +                    }
 +                    /* The rvec buffer is also required for atom buffers
 +                     * of size nrecv in dd_move_x and dd_move_f.
 +                     */
 +                    i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
 +                    vec_rvec_check_alloc(&comm->vbuf2,i);
 +                }
 +            }
 +            
 +            /* Make space for the global cg indices */
 +            if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
 +                || dd->cg_nalloc == 0)
 +            {
 +                dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
 +                srenew(index_gl,dd->cg_nalloc);
 +                srenew(cgindex,dd->cg_nalloc+1);
 +            }
 +            /* Communicate the global cg indices */
 +            if (cd->bInPlace)
 +            {
 +                recv_i = index_gl + pos_cg;
 +            }
 +            else
 +            {
 +                recv_i = comm->buf_int2;
 +            }
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            comm->buf_int, nsend,
 +                            recv_i,        ind->nrecv[nzone]);
 +
 +            /* Make space for cg_cm */
 +            if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
 +            {
 +                dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
 +                cg_cm = fr->cg_cm;
 +            }
 +            /* Communicate cg_cm */
 +            if (cd->bInPlace)
 +            {
 +                recv_vr = cg_cm + pos_cg;
 +            }
 +            else
 +            {
 +                recv_vr = comm->vbuf2.v;
 +            }
 +            dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
 +                             comm->vbuf.v, nsend,
 +                             recv_vr,      ind->nrecv[nzone]);
 +            
 +            /* Make the charge group index */
 +            if (cd->bInPlace)
 +            {
 +                zone = (p == 0 ? 0 : nzone - 1);
 +                while (zone < nzone)
 +                {
 +                    for(cg=0; cg<ind->nrecv[zone]; cg++)
 +                    {
 +                        cg_gl = index_gl[pos_cg];
 +                        fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
 +                        nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
 +                        cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
 +                        if (bBondComm)
 +                        {
 +                            /* Update the charge group presence,
 +                             * so we can use it in the next pass of the loop.
 +                             */
 +                            comm->bLocalCG[cg_gl] = TRUE;
 +                        }
 +                        pos_cg++;
 +                    }
 +                    if (p == 0)
 +                    {
 +                        comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
 +                    }
 +                    zone++;
 +                    zone_cg_range[nzone+zone] = pos_cg;
 +                }
 +            }
 +            else
 +            {
 +                /* This part of the code is never executed with bBondComm. */
 +                merge_cg_buffers(nzone,cd,p,zone_cg_range,
 +                                 index_gl,recv_i,cg_cm,recv_vr,
 +                                 cgindex,fr->cginfo_mb,fr->cginfo);
 +                pos_cg += ind->nrecv[nzone];
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        if (!cd->bInPlace)
 +        {
 +            /* Store the atom block for easy copying of communication buffers */
 +            make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
 +        }
 +        nzone += nzone;
 +    }
 +    dd->index_gl = index_gl;
 +    dd->cgindex  = cgindex;
 +    
 +    dd->ncg_tot = zone_cg_range[zones->n];
 +    dd->nat_tot = nat_tot;
 +    comm->nat[ddnatHOME] = dd->nat_home;
 +    for(i=ddnatZONE; i<ddnatNR; i++)
 +    {
 +        comm->nat[i] = dd->nat_tot;
 +    }
 +
 +    if (!bBondComm)
 +    {
 +        /* We don't need to update cginfo, since that was alrady done above.
 +         * So we pass NULL for the forcerec.
 +         */
 +        dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
 +                      NULL,comm->bLocalCG);
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished setting up DD communication, zones:");
 +        for(c=0; c<zones->n; c++)
 +        {
 +            fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static void set_cg_boundaries(gmx_domdec_zones_t *zones)
 +{
 +    int c;
 +    
 +    for(c=0; c<zones->nizone; c++)
 +    {
 +        zones->izone[c].cg1  = zones->cg_range[c+1];
 +        zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
 +        zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
 +    }
 +}
 +
 +static int comp_cgsort(const void *a,const void *b)
 +{
 +    int comp;
 +    
 +    gmx_cgsort_t *cga,*cgb;
 +    cga = (gmx_cgsort_t *)a;
 +    cgb = (gmx_cgsort_t *)b;
 +    
 +    comp = cga->nsc - cgb->nsc;
 +    if (comp == 0)
 +    {
 +        comp = cga->ind_gl - cgb->ind_gl;
 +    }
 +    
 +    return comp;
 +}
 +
 +static void order_int_cg(int n,gmx_cgsort_t *sort,
 +                         int *a,int *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        buf[i] = a[sort[i].ind];
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        a[i] = buf[i];
 +    }
 +}
 +
 +static void order_vec_cg(int n,gmx_cgsort_t *sort,
 +                         rvec *v,rvec *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(v[sort[i].ind],buf[i]);
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(buf[i],v[i]);
 +    }
 +}
 +
 +static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
 +                           rvec *v,rvec *buf)
 +{
 +    int a,atot,cg,cg0,cg1,i;
 +    
 +    /* Order the data */
 +    a = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        cg0 = cgindex[sort[cg].ind];
 +        cg1 = cgindex[sort[cg].ind+1];
 +        for(i=cg0; i<cg1; i++)
 +        {
 +            copy_rvec(v[i],buf[a]);
 +            a++;
 +        }
 +    }
 +    atot = a;
 +    
 +    /* Copy back to the original array */
 +    for(a=0; a<atot; a++)
 +    {
 +        copy_rvec(buf[a],v[a]);
 +    }
 +}
 +
 +static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
 +                         int nsort_new,gmx_cgsort_t *sort_new,
 +                         gmx_cgsort_t *sort1)
 +{
 +    int i1,i2,i_new;
 +    
 +    /* The new indices are not very ordered, so we qsort them */
 +    qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
 +    
 +    /* sort2 is already ordered, so now we can merge the two arrays */
 +    i1 = 0;
 +    i2 = 0;
 +    i_new = 0;
 +    while(i2 < nsort2 || i_new < nsort_new)
 +    {
 +        if (i2 == nsort2)
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +        else if (i_new == nsort_new)
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else if (sort2[i2].nsc < sort_new[i_new].nsc ||
 +                 (sort2[i2].nsc == sort_new[i_new].nsc &&
 +                  sort2[i2].ind_gl < sort_new[i_new].ind_gl))
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +    }
 +}
 +
 +static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
 +                          rvec *cgcm,t_forcerec *fr,t_state *state,
 +                          int ncg_home_old)
 +{
 +    gmx_domdec_sort_t *sort;
 +    gmx_cgsort_t *cgsort,*sort_i;
 +    int  ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
 +    rvec *vbuf;
 +    
 +    sort = dd->comm->sort;
 +    
 +    if (dd->ncg_home > sort->sort_nalloc)
 +    {
 +        sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(sort->sort1,sort->sort_nalloc);
 +        srenew(sort->sort2,sort->sort_nalloc);
 +    }
 +    
 +    if (ncg_home_old >= 0)
 +    {
 +        /* The charge groups that remained in the same ns grid cell
 +         * are completely ordered. So we can sort efficiently by sorting
 +         * the charge groups that did move into the stationary list.
 +         */
 +        ncg_new = 0;
 +        nsort2 = 0;
 +        nsort_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Check if this cg did not move to another node */
 +            cell_index = fr->ns.grid->cell_index[i];
 +            if (cell_index !=  4*fr->ns.grid->ncells)
 +            {
 +                if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
 +                {
 +                    /* This cg is new on this node or moved ns grid cell */
 +                    if (nsort_new >= sort->sort_new_nalloc)
 +                    {
 +                        sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
 +                        srenew(sort->sort_new,sort->sort_new_nalloc);
 +                    }
 +                    sort_i = &(sort->sort_new[nsort_new++]);
 +                }
 +                else
 +                {
 +                    /* This cg did not move */
 +                    sort_i = &(sort->sort2[nsort2++]);
 +                }
 +                /* Sort on the ns grid cell indices
 +                 * and the global topology index
 +                 */
 +                sort_i->nsc    = cell_index;
 +                sort_i->ind_gl = dd->index_gl[i];
 +                sort_i->ind    = i;
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
 +                    nsort2,nsort_new);
 +        }
 +        /* Sort efficiently */
 +        ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
 +    }
 +    else
 +    {
 +        cgsort = sort->sort1;
 +        ncg_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Sort on the ns grid cell indices
 +             * and the global topology index
 +             */
 +            cgsort[i].nsc    = fr->ns.grid->cell_index[i];
 +            cgsort[i].ind_gl = dd->index_gl[i];
 +            cgsort[i].ind    = i;
 +            if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
 +            {
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
 +        }
 +        /* Determine the order of the charge groups using qsort */
 +        qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
 +    }
 +    cgsort = sort->sort1;
 +    
 +    /* We alloc with the old size, since cgindex is still old */
 +    vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
 +    vbuf = dd->comm->vbuf.v;
 +    
 +    /* Remove the charge groups which are no longer at home here */
 +    dd->ncg_home = ncg_new;
 +    
 +    /* Reorder the state */
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state->flags & (1<<i)))
 +        {
 +            switch (i)
 +            {
 +            case estX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
 +                break;
 +            case estV:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
 +                break;
 +            case estSDX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
 +                break;
 +            case estCGP:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No ordering required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_sort_state");
 +                break;
 +            }
 +        }
 +    }
 +    /* Reorder cgcm */
 +    order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
 +    
 +    if (dd->ncg_home+1 > sort->ibuf_nalloc)
 +    {
 +        sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
 +        srenew(sort->ibuf,sort->ibuf_nalloc);
 +    }
 +    ibuf = sort->ibuf;
 +    /* Reorder the global cg index */
 +    order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
 +    /* Reorder the cginfo */
 +    order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
 +    /* Rebuild the local cg index */
 +    ibuf[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
 +        ibuf[i+1] = ibuf[i] + cgsize;
 +    }
 +    for(i=0; i<dd->ncg_home+1; i++)
 +    {
 +        dd->cgindex[i] = ibuf[i];
 +    }
 +    /* Set the home atom number */
 +    dd->nat_home = dd->cgindex[dd->ncg_home];
 +    
 +    /* Copy the sorted ns cell indices back to the ns grid struct */
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        fr->ns.grid->cell_index[i] = cgsort[i].nsc;
 +    }
 +    fr->ns.grid->nr = dd->ncg_home;
 +}
 +
 +static void add_dd_statistics(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +    
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] +=
 +            comm->nat[ddnat] - comm->nat[ddnat-1];
 +    }
 +    comm->ndecomp++;
 +}
 +
 +void reset_dd_statistics_counters(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +
 +    /* Reset all the statistics and counters for total run counting */
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload = 0;
 +    comm->load_step = 0;
 +    comm->load_sum = 0;
 +    comm->load_max = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf = 0;
 +    comm->load_pme = 0;
 +}
 +
 +void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    double av;
 +   
 +    comm = cr->dd->comm;
 +    
 +    gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
 +    
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +    
 +    fprintf(fplog,"\n    D O M A I N   D E C O M P O S I T I O N   S T A T I S T I C S\n\n");
 +            
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
 +        switch(ddnat)
 +        {
 +        case ddnatZONE:
 +            fprintf(fplog,
 +                    " av. #atoms communicated per step for force:  %d x %.1f\n",
 +                    2,av);
 +            break;
 +        case ddnatVSITE:
 +            if (cr->dd->vsite_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for vsites: %d x %.1f\n",
 +                        (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
 +                        av);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (cr->dd->constraint_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for LINCS:  %d x %.1f\n",
 +                        1 + ir->nLincsIter,av);
 +            }
 +            break;
 +        default:
 +            gmx_incons(" Unknown type for DD statistics");
 +        }
 +    }
 +    fprintf(fplog,"\n");
 +    
 +    if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
 +    {
 +        print_dd_load_av(fplog,cr->dd);
 +    }
 +}
 +
 +void dd_partition_system(FILE            *fplog,
 +                         gmx_large_int_t      step,
 +                         t_commrec       *cr,
 +                         gmx_bool            bMasterState,
 +                         int             nstglobalcomm,
 +                         t_state         *state_global,
 +                         gmx_mtop_t      *top_global,
 +                         t_inputrec      *ir,
 +                         t_state         *state_local,
 +                         rvec            **f,
 +                         t_mdatoms       *mdatoms,
 +                         gmx_localtop_t  *top_local,
 +                         t_forcerec      *fr,
 +                         gmx_vsite_t     *vsite,
 +                         gmx_shellfc_t   shellfc,
 +                         gmx_constr_t    constr,
 +                         t_nrnb          *nrnb,
 +                         gmx_wallcycle_t wcycle,
 +                         gmx_bool            bVerbose)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    gmx_ddbox_t ddbox={0};
 +    t_block *cgs_gl;
 +    gmx_large_int_t step_pcoupl;
 +    rvec cell_ns_x0,cell_ns_x1;
 +    int  i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
 +    gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
 +    gmx_bool bRedist,bSortCG,bResortAll;
 +    ivec ncells_old,np;
 +    real grid_density;
 +    char sbuf[22];
 +      
 +    dd = cr->dd;
 +    comm = dd->comm;
 +
 +    bBoxChanged = (bMasterState || DEFORM(*ir));
 +    if (ir->epc != epcNO)
 +    {
 +        /* With nstpcouple > 1 pressure coupling happens.
 +         * one step after calculating the pressure.
 +         * Box scaling happens at the end of the MD step,
 +         * after the DD partitioning.
 +         * We therefore have to do DLB in the first partitioning
 +         * after an MD step where P-coupling occured.
 +         * We need to determine the last step in which p-coupling occurred.
 +         * MRS -- need to validate this for vv?
 +         */
 +        n = ir->nstpcouple;
 +        if (n == 1)
 +        {
 +            step_pcoupl = step - 1;
 +        }
 +        else
 +        {
 +            step_pcoupl = ((step - 1)/n)*n + 1;
 +        }
 +        if (step_pcoupl >= comm->globalcomm_step)
 +        {
 +            bBoxChanged = TRUE;
 +        }
 +    }
 +
 +    bNStGlobalComm = (step >= comm->globalcomm_step + nstglobalcomm);
 +
 +    if (!comm->bDynLoadBal)
 +    {
 +        bDoDLB = FALSE;
 +    }
 +    else
 +    {
 +        /* Should we do dynamic load balacing this step?
 +         * Since it requires (possibly expensive) global communication,
 +         * we might want to do DLB less frequently.
 +         */
 +        if (bBoxChanged || ir->epc != epcNO)
 +        {
 +            bDoDLB = bBoxChanged;
 +        }
 +        else
 +        {
 +            bDoDLB = bNStGlobalComm;
 +        }
 +    }
 +
 +    /* Check if we have recorded loads on the nodes */
 +    if (comm->bRecordLoad && dd_load_count(comm))
 +    {
 +        if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
 +        {
 +            /* Check if we should use DLB at the second partitioning
 +             * and every 100 partitionings,
 +             * so the extra communication cost is negligible.
 +             */
 +            n = max(100,nstglobalcomm);
 +            bCheckDLB = (comm->n_load_collect == 0 ||
 +                         comm->n_load_have % n == n-1);
 +        }
 +        else
 +        {
 +            bCheckDLB = FALSE;
 +        }
 +        
 +        /* Print load every nstlog, first and last step to the log file */
 +        bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
 +                    comm->n_load_collect == 0 ||
 +                    (ir->nsteps >= 0 &&
 +                     (step + ir->nstlist > ir->init_step + ir->nsteps)));
 +
 +        /* Avoid extra communication due to verbose screen output
 +         * when nstglobalcomm is set.
 +         */
 +        if (bDoDLB || bLogLoad || bCheckDLB ||
 +            (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
 +        {
 +            get_load_distribution(dd,wcycle);
 +            if (DDMASTER(dd))
 +            {
 +                if (bLogLoad)
 +                {
 +                    dd_print_load(fplog,dd,step-1);
 +                }
 +                if (bVerbose)
 +                {
 +                    dd_print_load_verbose(dd);
 +                }
 +            }
 +            comm->n_load_collect++;
 +
 +            if (bCheckDLB) {
 +                /* Since the timings are node dependent, the master decides */
 +                if (DDMASTER(dd))
 +                {
 +                    bTurnOnDLB =
 +                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
 +                    if (debug)
 +                    {
 +                        fprintf(debug,"step %s, imb loss %f\n",
 +                                gmx_step_str(step,sbuf),
 +                                dd_force_imb_perf_loss(dd));
 +                    }
 +                }
 +                dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
 +                if (bTurnOnDLB)
 +                {
 +                    turn_on_dlb(fplog,cr,step);
 +                    bDoDLB = TRUE;
 +                }
 +            }
 +        }
 +        comm->n_load_have++;
 +    }
 +
 +    cgs_gl = &comm->cgs_gl;
 +
 +    bRedist = FALSE;
 +    if (bMasterState)
 +    {
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_global->box,
 +                  TRUE,cgs_gl,state_global->x,&ddbox);
 +    
 +        get_cg_distribution(fplog,step,dd,cgs_gl,
 +                            state_global->box,&ddbox,state_global->x);
 +        
 +        dd_distribute_state(dd,cgs_gl,
 +                            state_global,state_local,f);
 +        
 +        dd_make_local_cgs(dd,&top_local->cgs);
 +        
 +        if (dd->ncg_home > fr->cg_nalloc)
 +        {
 +            dd_realloc_fr_cg(fr,dd->ncg_home);
 +        }
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +        
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        cg0 = 0;
 +    }
 +    else if (state_local->ddp_count != dd->ddp_count)
 +    {
 +        if (state_local->ddp_count > dd->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
 +        }
 +        
 +        if (state_local->ddp_count_cg_gl != state_local->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
 +        }
 +        
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +        
 +        /* Build the new indices */
 +        rebuild_cgindex(dd,cgs_gl->index,state_local);
 +        make_dd_indices(dd,cgs_gl->index,0);
 +        
 +        /* Redetermine the cg COMs */
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  TRUE,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bRedist = comm->bDynLoadBal;
 +    }
 +    else
 +    {
 +        /* We have the full state, only redistribute the cgs */
 +
 +        /* Clear the non-home indices */
 +        clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
 +
 +        /* Avoid global communication for dim's without pbc and -gcom */
 +        if (!bNStGlobalComm)
 +        {
 +            copy_rvec(comm->box0    ,ddbox.box0    );
 +            copy_rvec(comm->box_size,ddbox.box_size);
 +        }
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bBoxChanged = TRUE;
 +        bRedist = TRUE;
 +    }
 +    /* For dim's without pbc and -gcom */
 +    copy_rvec(ddbox.box0    ,comm->box0    );
 +    copy_rvec(ddbox.box_size,comm->box_size);
 +    
 +    set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
 +                      step,wcycle);
 +    
 +    if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
 +    {
 +        write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
 +    }
 +    
 +    /* Check if we should sort the charge groups */
 +    if (comm->nstSortCG > 0)
 +    {
 +        bSortCG = (bMasterState ||
 +                   (bRedist && (step % comm->nstSortCG == 0)));
 +    }
 +    else
 +    {
 +        bSortCG = FALSE;
 +    }
 +
 +    ncg_home_old = dd->ncg_home;
 +
 +    if (bRedist)
 +    {
 +        cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
 +                                 state_local,f,fr,mdatoms,
 +                                 !bSortCG,nrnb);
 +    }
 +    
 +    get_nsgrid_boundaries(fr->ns.grid,dd,
 +                          state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
 +                          dd->ncg_home,fr->cg_cm,
 +                          cell_ns_x0,cell_ns_x1,&grid_density);
 +
 +    if (bBoxChanged)
 +    {
 +        comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
 +    }
 +
 +    copy_ivec(fr->ns.grid->n,ncells_old);
 +    grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
 +               state_local->box,cell_ns_x0,cell_ns_x1,
 +               fr->rlistlong,grid_density);
 +    /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
 +    copy_ivec(ddbox.tric_dir,comm->tric_dir);
 +
 +    if (bSortCG)
 +    {
 +        /* Sort the state on charge group position.
 +         * This enables exact restarts from this step.
 +         * It also improves performance by about 15% with larger numbers
 +         * of atoms per node.
 +         */
 +        
 +        /* Fill the ns grid with the home cell,
 +         * so we can sort with the indices.
 +         */
 +        set_zones_ncg_home(dd);
 +        fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
 +                  0,dd->ncg_home,fr->cg_cm);
 +        
 +        /* Check if we can user the old order and ns grid cell indices
 +         * of the charge groups to sort the charge groups efficiently.
 +         */
 +        bResortAll = (bMasterState ||
 +                      fr->ns.grid->n[XX] != ncells_old[XX] ||
 +                      fr->ns.grid->n[YY] != ncells_old[YY] ||
 +                      fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"Step %s, sorting the %d home charge groups\n",
 +                    gmx_step_str(step,sbuf),dd->ncg_home);
 +        }
 +        dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
 +                      bResortAll ? -1 : ncg_home_old);
 +        /* Rebuild all the indices */
 +        cg0 = 0;
 +        ga2la_clear(dd->ga2la);
 +    }
 +    
 +    /* Setup up the communication and communicate the coordinates */
 +    setup_dd_communication(dd,state_local->box,&ddbox,fr);
 +    
 +    /* Set the indices */
 +    make_dd_indices(dd,cgs_gl->index,cg0);
 +
 +    /* Set the charge group boundaries for neighbor searching */
 +    set_cg_boundaries(&comm->zones);
 +    
 +    /*
 +    write_dd_pdb("dd_home",step,"dump",top_global,cr,
 +                 -1,state_local->x,state_local->box);
 +    */
 +    
 +    /* Extract a local topology from the global topology */
 +    for(i=0; i<dd->ndim; i++)
 +    {
 +        np[dd->dim[i]] = comm->cd[i].np;
 +    }
 +    dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
 +                      comm->cellsize_min,np,
 +                      fr,vsite,top_global,top_local);
 +    
 +    /* Set up the special atom communication */
 +    n = comm->nat[ddnatZONE];
 +    for(i=ddnatZONE+1; i<ddnatNR; i++)
 +    {
 +        switch(i)
 +        {
 +        case ddnatVSITE:
 +            if (vsite && vsite->n_intercg_vsite)
 +            {
 +                n = dd_make_local_vsites(dd,n,top_local->idef.il);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (dd->bInterCGcons)
 +            {
 +                /* Only for inter-cg constraints we need special code */
 +                n = dd_make_local_constraints(dd,n,top_global,
 +                                              constr,ir->nProjOrder,
 +                                              &top_local->idef.il[F_CONSTR]);
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown special atom type setup");
 +        }
 +        comm->nat[i] = n;
 +    }
 +    
 +    /* Make space for the extra coordinates for virtual site
 +     * or constraint communication.
 +     */
 +    state_local->natoms = comm->nat[ddnatNR-1];
 +    if (state_local->natoms > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,state_local->natoms);
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite && vsite->n_intercg_vsite)
 +        {
 +            nat_f_novirsum = comm->nat[ddnatVSITE];
 +        }
 +        else
 +        {
 +            if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
 +            {
 +                nat_f_novirsum = dd->nat_tot;
 +            }
 +            else
 +            {
 +                nat_f_novirsum = dd->nat_home;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nat_f_novirsum = 0;
 +    }
 +
 +    /* Set the number of atoms required for the force calculation.
 +     * Forces need to be constrained when using a twin-range setup
 +     * or with energy minimization. For simple simulations we could
 +     * avoid some allocation, zeroing and copying, but this is
 +     * probably not worth the complications ande checking.
 +     */
 +    forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
 +                        dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
 +
 +    /* We make the all mdatoms up to nat_tot_con.
 +     * We could save some work by only setting invmass
 +     * between nat_tot and nat_tot_con.
 +     */
 +    /* This call also sets the new number of home particles to dd->nat_home */
 +    atoms2md(top_global,ir,
 +             comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
 +
 +    /* Now we have the charges we can sort the FE interactions */
 +    dd_sort_local_top(dd,mdatoms,top_local);
 +
 +    if (shellfc)
 +    {
 +        /* Make the local shell stuff, currently no communication is done */
 +        make_local_shells(cr,mdatoms,shellfc);
 +    }
 +    
 +      if (ir->implicit_solvent)
 +    {
 +        make_local_gb(cr,fr->born,ir->gb_algorithm);
 +    }
 +      
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Send the charges to our PME only node */
 +        gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
 +                       mdatoms->chargeA,mdatoms->chargeB,
 +                       dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
 +    }
 +    
 +    if (constr)
 +    {
 +        set_constraints(constr,top_local,ir,mdatoms,cr);
 +    }
 +    
 +    if (ir->ePull != epullNO)
 +    {
 +        /* Update the local pull groups */
 +        dd_make_local_pull_groups(dd,ir->pull,mdatoms);
 +    }
 +    
 +    if (ir->bRot)
 +    {
 +        /* Update the local rotation groups */
 +        dd_make_local_rotation_groups(dd,ir->rot);
 +    }
 +
 +
 +    add_dd_statistics(dd);
 +    
 +    /* Make sure we only count the cycles for this DD partitioning */
 +    clear_dd_cycle_counts(dd);
 +    
 +    /* Because the order of the atoms might have changed since
 +     * the last vsite construction, we need to communicate the constructing
 +     * atom coordinates again (for spreading the forces this MD step).
 +     */
 +    dd_move_x_vsites(dd,state_local->box,state_local->x);
 +    
 +    if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
 +    {
 +        dd_move_x(dd,state_local->box,state_local->x);
 +        write_dd_pdb("dd_dump",step,"dump",top_global,cr,
 +                     -1,state_local->x,state_local->box);
 +    }
 +
 +    if (bNStGlobalComm)
 +    {
 +        /* Store the global communication step */
 +        comm->globalcomm_step = step;
 +    }
 +    
 +    /* Increase the DD partitioning counter */
 +    dd->ddp_count++;
 +    /* The state currently matches this DD partitioning count, store it */
 +    state_local->ddp_count = dd->ddp_count;
 +    if (bMasterState)
 +    {
 +        /* The DD master node knows the complete cg distribution,
 +         * store the count so we can possibly skip the cg info communication.
 +         */
 +        comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
 +    }
 +
 +    if (comm->DD_debug > 0)
 +    {
 +        /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
 +        check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
 +                                "after partitioning");
 +    }
 +}
index 5238a20c3ce54b32ca1d6bedd2a3e6012df9b882,0000000000000000000000000000000000000000..e88d92485134fb9fe553c2e163951cf79eb7deaa
mode 100644,000000..100644
--- /dev/null
@@@ -1,2575 -1,0 +1,2609 @@@
-         t_commrec *cr)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "nrjac.h"
 +#include "mtop_util.h"
 +#include "edsam.h"
 +#include "gmxfio.h"
 +#include "groupcoord.h"
 +
 +
 +/* We use the same defines as in mvdata.c here */
 +#define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d),(cr))
 +#define nblock_bc(cr,nr,d) gmx_bcast((nr)*sizeof((d)[0]), (d),(cr))
 +#define   snew_bc(cr,d,nr) { if (!MASTER(cr)) snew((d),(nr)); }
 +
 +
 +/* enum to identify the type of ED: none, normal ED, flooding */
 +enum {eEDnone, eEDedsam, eEDflood, eEDnr};
 +
 +/* enum to identify operations on reference, average, origin, target structures */
 +enum {eedREF, eedAV, eedORI, eedTAR, eedNR};
 +
 +
 +typedef struct
 +{
 +    int    neig;     /* nr of eigenvectors             */
 +    int   *ieig;     /* index nrs of eigenvectors      */
 +    real  *stpsz;    /* stepsizes (per eigenvector)    */
 +    rvec  **vec;     /* eigenvector components         */
 +    real  *xproj;    /* instantaneous x projections    */
 +    real  *fproj;    /* instantaneous f projections    */
 +    real  radius;    /* instantaneous radius           */
 +    real  *refproj;  /* starting or target projecions  */
 +    /* When using flooding as harmonic restraint: The current reference projection
 +     * is at each step calculated from the initial refproj0 and the slope. */
 +    real  *refproj0,*refprojslope;
 +} t_eigvec;
 +
 +
 +typedef struct
 +{
 +    t_eigvec      mon;            /* only monitored, no constraints       */
 +    t_eigvec      linfix;         /* fixed linear constraints             */
 +    t_eigvec      linacc;         /* acceptance linear constraints        */
 +    t_eigvec      radfix;         /* fixed radial constraints (exp)       */
 +    t_eigvec      radacc;         /* acceptance radial constraints (exp)  */
 +    t_eigvec      radcon;         /* acceptance rad. contraction constr.  */
 +} t_edvecs;
 +
 +
 +typedef struct
 +{
 +    real deltaF0;
 +    gmx_bool bHarmonic;           /* Use flooding for harmonic restraint on
 +                                     the eigenvector                          */
 +    gmx_bool bConstForce;         /* Do not calculate a flooding potential,
 +                                     instead flood with a constant force      */
 +    real tau;
 +    real deltaF;
 +    real Efl;
 +    real kT;
 +    real Vfl;
 +    real dt;
 +    real constEfl;
 +    real alpha2;
 +    int flood_id;
 +    rvec *forces_cartesian;
 +    t_eigvec vecs;         /* use flooding for these */
 +} t_edflood;
 +
 +
 +/* This type is for the average, reference, target, and origin structure    */
 +typedef struct gmx_edx
 +{
 +    int           nr;             /* number of atoms this structure contains  */
 +    int           nr_loc;         /* number of atoms on local node            */
 +    int           *anrs;          /* atom index numbers                       */
 +    int           *anrs_loc;      /* local atom index numbers                 */
 +    int           nalloc_loc;     /* allocation size of anrs_loc              */
 +    int           *c_ind;         /* at which position of the whole anrs
 +                                   * array is a local atom?, i.e.
 +                                   * c_ind[0...nr_loc-1] gives the atom index
 +                                   * with respect to the collective
 +                                   * anrs[0...nr-1] array                     */
 +    rvec          *x;             /* positions for this structure             */
 +    rvec          *x_old;         /* used to keep track of the shift vectors
 +                                     such that the ED molecule can always be
 +                                     made whole in the parallel case          */
 +    real          *m;             /* masses                                   */
 +    real          mtot;           /* total mass (only used in sref)           */
 +    real          *sqrtm;         /* sqrt of the masses used for mass-
 +                                   * weighting of analysis (only used in sav) */
 +} t_gmx_edx;
 +
 +
 +typedef struct edpar
 +{
 +    int            nini;           /* total Nr of atoms                    */
 +    gmx_bool       fitmas;         /* true if trans fit with cm            */
 +    gmx_bool       pcamas;         /* true if mass-weighted PCA            */
 +    int            presteps;       /* number of steps to run without any
 +                                    *    perturbations ... just monitoring */
 +    int            outfrq;         /* freq (in steps) of writing to edo    */
 +    int            maxedsteps;     /* max nr of steps per cycle            */
 +
 +    /* all gmx_edx datasets are copied to all nodes in the parallel case   */
 +    struct gmx_edx sref;           /* reference positions, to these fitting
 +                                    * will be done                         */
 +    gmx_bool       bRefEqAv;       /* If true, reference & average indices
 +                                    * are the same. Used for optimization  */
 +    struct gmx_edx sav;            /* average positions                    */
 +    struct gmx_edx star;           /* target positions                     */
 +    struct gmx_edx sori;           /* origin positions                     */
 +
 +    t_edvecs       vecs;           /* eigenvectors                         */
 +    real           slope;          /* minimal slope in acceptance radexp   */
 +
 +    gmx_bool       bNeedDoEdsam;   /* if any of the options mon, linfix, ...
 +                                    * is used (i.e. apart from flooding)   */
 +    t_edflood      flood;          /* parameters especially for flooding   */
 +    struct t_ed_buffer *buf;       /* handle to local buffers              */
 +    struct edpar   *next_edi;      /* Pointer to another ed dataset        */
 +} t_edpar;
 +
 +
 +typedef struct gmx_edsam
 +{
 +    int           eEDtype;        /* Type of ED: see enums above          */
 +    const char    *edinam;        /* name of ED sampling input file       */
 +    const char    *edonam;        /*                     output           */
 +    FILE          *edo;           /* output file pointer                  */
 +    t_edpar       *edpar;
 +    gmx_bool      bFirst;
 +    gmx_bool      bStartFromCpt;
 +} t_gmx_edsam;
 +
 +
 +struct t_do_edsam
 +{
 +    matrix old_rotmat;
 +    real oldrad;
 +    rvec old_transvec,older_transvec,transvec_compact;
 +    rvec *xcoll;         /* Positions from all nodes, this is the
 +                            collective set we work on.
 +                            These are the positions of atoms with
 +                            average structure indices */
 +    rvec *xc_ref;        /* same but with reference structure indices */
 +    ivec *shifts_xcoll;        /* Shifts for xcoll  */
 +    ivec *extra_shifts_xcoll;  /* xcoll shift changes since last NS step */
 +    ivec *shifts_xc_ref;       /* Shifts for xc_ref */
 +    ivec *extra_shifts_xc_ref; /* xc_ref shift changes since last NS step */
 +    gmx_bool bUpdateShifts;    /* TRUE in NS steps to indicate that the
 +                                  ED shifts for this ED dataset need to
 +                                  be updated */
 +};
 +
 +
 +/* definition of ED buffer structure */
 +struct t_ed_buffer
 +{
 +    struct t_fit_to_ref *           fit_to_ref;
 +    struct t_do_edfit *             do_edfit;
 +    struct t_do_edsam *             do_edsam;
 +    struct t_do_radcon *            do_radcon;
 +};
 +
 +
 +/* Function declarations */
 +static void fit_to_reference(rvec *xcoll,rvec transvec,matrix rotmat,t_edpar *edi);
 +
 +static void translate_and_rotate(rvec *x,int nat,rvec transvec,matrix rotmat);
 +/* End function declarations */
 +
 +
 +/* Does not subtract average positions, projection on single eigenvector is returned
 + * used by: do_linfix, do_linacc, do_radfix, do_radacc, do_radcon
 + * Average position is subtracted in ed_apply_constraints prior to calling projectx
 + */
 +static real projectx(t_edpar *edi, rvec *xcoll, rvec *vec)
 +{
 +    int  i;
 +    real proj=0.0;
 +
 +
 +    for (i=0; i<edi->sav.nr; i++)
 +        proj += edi->sav.sqrtm[i]*iprod(vec[i], xcoll[i]);
 +
 +    return proj;
 +}
 +
 +
 +/* Specialized: projection is stored in vec->refproj
 + * -> used for radacc, radfix, radcon  and center of flooding potential
 + * subtracts average positions, projects vector x */
 +static void rad_project(t_edpar *edi, rvec *x, t_eigvec *vec, t_commrec *cr)
 +{
 +    int i;
 +    real rad=0.0;
 +
 +    /* Subtract average positions */
 +    for (i = 0; i < edi->sav.nr; i++)
 +        rvec_dec(x[i], edi->sav.x[i]);
 +
 +    for (i = 0; i < vec->neig; i++)
 +    {
 +        vec->refproj[i] = projectx(edi,x,vec->vec[i]);
 +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
 +    }
 +    vec->radius=sqrt(rad);
 +
 +    /* Add average positions */
 +    for (i = 0; i < edi->sav.nr; i++)
 +        rvec_inc(x[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Project vector x, subtract average positions prior to projection and add
 + * them afterwards to retain the unchanged vector. Store in xproj. Mass-weighting
 + * is applied. */
 +static void project_to_eigvectors(rvec       *x,    /* The positions to project to an eigenvector */
 +                                  t_eigvec   *vec,  /* The eigenvectors */
 +                                  t_edpar    *edi)
 +{
 +    int  i;
 +
 +
 +    if (!vec->neig) return;
 +
 +    /* Subtract average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_dec(x[i], edi->sav.x[i]);
 +
 +    for (i=0; i<vec->neig; i++)
 +        vec->xproj[i] = projectx(edi, x, vec->vec[i]);
 +
 +    /* Add average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_inc(x[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Project vector x onto all edi->vecs (mon, linfix,...) */
 +static void project(rvec      *x,     /* positions to project */
 +                    t_edpar   *edi)   /* edi data set */
 +{
 +    /* It is not more work to subtract the average position in every
 +     * subroutine again, because these routines are rarely used simultanely */
 +    project_to_eigvectors(x, &edi->vecs.mon   , edi);
 +    project_to_eigvectors(x, &edi->vecs.linfix, edi);
 +    project_to_eigvectors(x, &edi->vecs.linacc, edi);
 +    project_to_eigvectors(x, &edi->vecs.radfix, edi);
 +    project_to_eigvectors(x, &edi->vecs.radacc, edi);
 +    project_to_eigvectors(x, &edi->vecs.radcon, edi);
 +}
 +
 +
 +static real calc_radius(t_eigvec *vec)
 +{
 +    int i;
 +    real rad=0.0;
 +
 +
 +    for (i=0; i<vec->neig; i++)
 +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
 +
 +    return rad=sqrt(rad);
 +}
 +
 +
 +/* Debug helper */
 +#ifdef DEBUGHELPERS
 +static void dump_xcoll(t_edpar *edi, struct t_do_edsam *buf, t_commrec *cr,
 +                       int step)
 +{
 +    int i;
 +    FILE *fp;
 +    char fn[STRLEN];
 +    rvec *xcoll;
 +    ivec *shifts, *eshifts;
 +
 +
 +    if (!MASTER(cr))
 +        return;
 +
 +    xcoll   = buf->xcoll;
 +    shifts  = buf->shifts_xcoll;
 +    eshifts = buf->extra_shifts_xcoll;
 +
 +    sprintf(fn, "xcolldump_step%d.txt", step);
 +    fp = fopen(fn, "w");
 +
 +    for (i=0; i<edi->sav.nr; i++)
 +        fprintf(fp, "%d %9.5f %9.5f %9.5f   %d %d %d   %d %d %d\n",
 +                edi->sav.anrs[i]+1,
 +                xcoll[i][XX]  , xcoll[i][YY]  , xcoll[i][ZZ],
 +                shifts[i][XX] , shifts[i][YY] , shifts[i][ZZ],
 +                eshifts[i][XX], eshifts[i][YY], eshifts[i][ZZ]);
 +
 +    fclose(fp);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi_positions(FILE *out, struct gmx_edx *s, const char name[])
 +{
 +    int i;
 +
 +
 +    fprintf(out, "#%s positions:\n%d\n", name, s->nr);
 +    if (s->nr == 0)
 +        return;
 +
 +    fprintf(out, "#index, x, y, z");
 +    if (s->sqrtm)
 +        fprintf(out, ", sqrt(m)");
 +    for (i=0; i<s->nr; i++)
 +    {
 +        fprintf(out, "\n%6d  %11.6f %11.6f %11.6f",s->anrs[i], s->x[i][XX], s->x[i][YY], s->x[i][ZZ]);
 +        if (s->sqrtm)
 +            fprintf(out,"%9.3f",s->sqrtm[i]);
 +    }
 +    fprintf(out, "\n");
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi_eigenvecs(FILE *out, t_eigvec *ev,
 +                               const char name[], int length)
 +{
 +    int i,j;
 +
 +
 +    fprintf(out, "#%s eigenvectors:\n%d\n", name, ev->neig);
 +    /* Dump the data for every eigenvector: */
 +    for (i=0; i<ev->neig; i++)
 +    {
 +        fprintf(out, "EV %4d\ncomponents %d\nstepsize %f\nxproj %f\nfproj %f\nrefproj %f\nradius %f\nComponents:\n",
 +                ev->ieig[i], length, ev->stpsz[i], ev->xproj[i], ev->fproj[i], ev->refproj[i], ev->radius);
 +        for (j=0; j<length; j++)
 +            fprintf(out, "%11.6f %11.6f %11.6f\n", ev->vec[i][j][XX], ev->vec[i][j][YY], ev->vec[i][j][ZZ]);
 +    }
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi(t_edpar *edpars, t_commrec *cr, int nr_edi)
 +{
 +    FILE  *out;
 +    char  fn[STRLEN];
 +
 +
 +    sprintf(fn, "EDdump_node%d_edi%d", cr->nodeid, nr_edi);
 +    out = ffopen(fn, "w");
 +
 +    fprintf(out,"#NINI\n %d\n#FITMAS\n %d\n#ANALYSIS_MAS\n %d\n",
 +            edpars->nini,edpars->fitmas,edpars->pcamas);
 +    fprintf(out,"#OUTFRQ\n %d\n#MAXLEN\n %d\n#SLOPECRIT\n %f\n",
 +            edpars->outfrq,edpars->maxedsteps,edpars->slope);
 +    fprintf(out,"#PRESTEPS\n %d\n#DELTA_F0\n %f\n#TAU\n %f\n#EFL_NULL\n %f\n#ALPHA2\n %f\n",
 +            edpars->presteps,edpars->flood.deltaF0,edpars->flood.tau,
 +            edpars->flood.constEfl,edpars->flood.alpha2);
 +
 +    /* Dump reference, average, target, origin positions */
 +    dump_edi_positions(out, &edpars->sref, "REFERENCE");
 +    dump_edi_positions(out, &edpars->sav , "AVERAGE"  );
 +    dump_edi_positions(out, &edpars->star, "TARGET"   );
 +    dump_edi_positions(out, &edpars->sori, "ORIGIN"   );
 +
 +    /* Dump eigenvectors */
 +    dump_edi_eigenvecs(out, &edpars->vecs.mon   , "MONITORED", edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.linfix, "LINFIX"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.linacc, "LINACC"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radfix, "RADFIX"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radacc, "RADACC"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radcon, "RADCON"   , edpars->sav.nr);
 +
 +    /* Dump flooding eigenvectors */
 +    dump_edi_eigenvecs(out, &edpars->flood.vecs, "FLOODING"  , edpars->sav.nr);
 +
 +    /* Dump ed local buffer */
 +    fprintf(out, "buf->do_edfit         =%p\n", (void*)edpars->buf->do_edfit  );
 +    fprintf(out, "buf->do_edsam         =%p\n", (void*)edpars->buf->do_edsam  );
 +    fprintf(out, "buf->do_radcon        =%p\n", (void*)edpars->buf->do_radcon );
 +
 +    ffclose(out);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_rotmat(FILE* out,matrix rotmat)
 +{
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[XX][XX],rotmat[XX][YY],rotmat[XX][ZZ]);
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[YY][XX],rotmat[YY][YY],rotmat[YY][ZZ]);
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[ZZ][XX],rotmat[ZZ][YY],rotmat[ZZ][ZZ]);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_rvec(FILE *out, int dim, rvec *x)
 +{
 +    int i;
 +
 +
 +    for (i=0; i<dim; i++)
 +        fprintf(out,"%4d   %f %f %f\n",i,x[i][XX],x[i][YY],x[i][ZZ]);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_mat(FILE* out, int dim, double** mat)
 +{
 +    int i,j;
 +
 +
 +    fprintf(out,"MATRIX:\n");
 +    for (i=0;i<dim;i++)
 +    {
 +        for (j=0;j<dim;j++)
 +            fprintf(out,"%f ",mat[i][j]);
 +        fprintf(out,"\n");
 +    }
 +}
 +#endif
 +
 +
 +struct t_do_edfit {
 +    double **omega;
 +    double **om;
 +};
 +
 +static void do_edfit(int natoms,rvec *xp,rvec *x,matrix R,t_edpar *edi)
 +{
 +    /* this is a copy of do_fit with some modifications */
 +    int    c,r,n,j,i,irot;
 +    double d[6],xnr,xpc;
 +    matrix vh,vk,u;
 +    int    index;
 +    real   max_d;
 +
 +    struct t_do_edfit *loc;
 +    gmx_bool bFirst;
 +
 +    if(edi->buf->do_edfit != NULL)
 +        bFirst = FALSE;
 +    else
 +    {
 +        bFirst = TRUE;
 +        snew(edi->buf->do_edfit,1);
 +    }
 +    loc = edi->buf->do_edfit;
 +
 +    if (bFirst)
 +    {
 +        snew(loc->omega,2*DIM);
 +        snew(loc->om,2*DIM);
 +        for(i=0; i<2*DIM; i++)
 +        {
 +            snew(loc->omega[i],2*DIM);
 +            snew(loc->om[i],2*DIM);
 +        }
 +    }
 +
 +    for(i=0;(i<6);i++)
 +    {
 +        d[i]=0;
 +        for(j=0;(j<6);j++)
 +        {
 +            loc->omega[i][j]=0;
 +            loc->om[i][j]=0;
 +        }
 +    }
 +
 +    /* calculate the matrix U */
 +    clear_mat(u);
 +    for(n=0;(n<natoms);n++)
 +    {
 +        for(c=0; (c<DIM); c++)
 +        {
 +            xpc=xp[n][c];
 +            for(r=0; (r<DIM); r++)
 +            {
 +                xnr=x[n][r];
 +                u[c][r]+=xnr*xpc;
 +            }
 +        }
 +    }
 +
 +    /* construct loc->omega */
 +    /* loc->omega is symmetric -> loc->omega==loc->omega' */
 +    for(r=0;(r<6);r++)
 +        for(c=0;(c<=r);c++)
 +            if ((r>=3) && (c<3))
 +            {
 +                loc->omega[r][c]=u[r-3][c];
 +                loc->omega[c][r]=u[r-3][c];
 +            }
 +            else
 +            {
 +                loc->omega[r][c]=0;
 +                loc->omega[c][r]=0;
 +            }
 +
 +    /* determine h and k */
 +#ifdef DEBUG
 +    {
 +        int i;
 +        dump_mat(stderr,2*DIM,loc->omega);
 +        for (i=0; i<6; i++)
 +            fprintf(stderr,"d[%d] = %f\n",i,d[i]);
 +    }
 +#endif
 +    jacobi(loc->omega,6,d,loc->om,&irot);
 +
 +    if (irot==0)
 +        fprintf(stderr,"IROT=0\n");
 +
 +    index=0; /* For the compiler only */
 +
 +    for(j=0;(j<3);j++)
 +    {
 +        max_d=-1000;
 +        for(i=0;(i<6);i++)
 +            if (d[i]>max_d)
 +            {
 +                max_d=d[i];
 +                index=i;
 +            }
 +        d[index]=-10000;
 +        for(i=0;(i<3);i++)
 +        {
 +            vh[j][i]=M_SQRT2*loc->om[i][index];
 +            vk[j][i]=M_SQRT2*loc->om[i+DIM][index];
 +        }
 +    }
 +
 +    /* determine R */
 +    for(c=0;(c<3);c++)
 +        for(r=0;(r<3);r++)
 +            R[c][r]=vk[0][r]*vh[0][c]+
 +            vk[1][r]*vh[1][c]+
 +            vk[2][r]*vh[2][c];
 +    if (det(R) < 0)
 +        for(c=0;(c<3);c++)
 +            for(r=0;(r<3);r++)
 +                R[c][r]=vk[0][r]*vh[0][c]+
 +                vk[1][r]*vh[1][c]-
 +                vk[2][r]*vh[2][c];
 +}
 +
 +
 +static void rmfit(int nat, rvec *xcoll, rvec transvec, matrix rotmat)
 +{
 +    rvec vec;
 +    matrix tmat;
 +
 +
 +    /* Remove rotation.
 +     * The inverse rotation is described by the transposed rotation matrix */
 +    transpose(rotmat,tmat);
 +    rotate_x(xcoll, nat, tmat);
 +
 +    /* Remove translation */
 +    vec[XX]=-transvec[XX];
 +    vec[YY]=-transvec[YY];
 +    vec[ZZ]=-transvec[ZZ];
 +    translate_x(xcoll, nat, vec);
 +}
 +
 +
 +/**********************************************************************************
 + ******************** FLOODING ****************************************************
 + **********************************************************************************
 +
 +The flooding ability was added later to edsam. Many of the edsam functionality could be reused for that purpose.
 +The flooding covariance matrix, i.e. the selected eigenvectors and their corresponding eigenvalues are
 +read as 7th Component Group. The eigenvalues are coded into the stepsize parameter (as used by -linfix or -linacc).
 +
 +do_md clls right in the beginning the function init_edsam, which reads the edi file, saves all the necessary information in
 +the edi structure and calls init_flood, to initialise some extra fields in the edi->flood structure.
 +
 +since the flooding acts on forces do_flood is called from the function force() (force.c), while the other
 +edsam functionality is hooked into md via the update() (update.c) function acting as constraint on positions.
 +
 +do_flood makes a copy of the positions,
 +fits them, projects them computes flooding_energy, and flooding forces. The forces are computed in the
 +space of the eigenvectors and are then blown up to the full cartesian space and rotated back to remove the
 +fit. Then do_flood adds these forces to the forcefield-forces
 +(given as parameter) and updates the adaptive flooding parameters Efl and deltaF.
 +
 +To center the flooding potential at a different location one can use the -ori option in make_edi. The ori
 +structure is projected to the system of eigenvectors and then this position in the subspace is used as
 +center of the flooding potential.   If the option is not used, the center will be zero in the subspace,
 +i.e. the average structure as given in the make_edi file.
 +
 +To use the flooding potential as restraint, make_edi has the option -restrain, which leads to inverted
 +signs of alpha2 and Efl, such that the sign in the exponential of Vfl is not inverted but the sign of
 +Vfl is inverted. Vfl = Efl * exp (- .../Efl/alpha2*x^2...) With tau>0 the negative Efl will grow slowly
 +so that the restraint is switched off slowly. When Efl==0 and inverted flooding is ON is reached no
 + further adaption is applied, Efl will stay constant at zero.
 +
 +To use restraints with harmonic potentials switch -restrain and -harmonic. Then the eigenvalues are
 +used as spring constants for the harmonic potential.
 +Note that eq3 in the flooding paper (J. Comp. Chem. 2006, 27, 1693-1702) defines the parameter lambda \
 +as the inverse of the spring constant, whereas the implementation uses lambda as the spring constant.
 +
 +To use more than one flooding matrix just concatenate several .edi files (cat flood1.edi flood2.edi > flood_all.edi)
 +the routine read_edi_file reads all of theses flooding files.
 +The structure t_edi is now organized as a list of t_edis and the function do_flood cycles through the list
 +calling the do_single_flood() routine for every single entry. Since every state variables have been kept in one
 +edi there is no interdependence whatsoever. The forces are added together.
 +
 +  To write energies into the .edr file, call the function
 +        get_flood_enx_names(char**, int *nnames) to get the Header (Vfl1 Vfl2... Vfln)
 +and call
 +        get_flood_energies(real Vfl[],int nnames);
 +
 +  TODO:
 +- one could program the whole thing such that Efl, Vfl and deltaF is written to the .edr file. -- i dont know how to do that, yet.
 +
 +  Maybe one should give a range of atoms for which to remove motion, so that motion is removed with
 +  two edsam files from two peptide chains
 +*/
 +
 +static void write_edo_flood(t_edpar *edi, FILE *fp, gmx_large_int_t step)
 +{
 +    int i;
 +    char buf[22];
 +    gmx_bool bOutputRef=FALSE;
 +
 +
 +    fprintf(fp,"%d.th FL: %s %12.5e %12.5e %12.5e\n",
 +            edi->flood.flood_id, gmx_step_str(step,buf),
 +            edi->flood.Efl, edi->flood.Vfl, edi->flood.deltaF);
 +
 +
 +    /* Check whether any of the references changes with time (this can happen
 +     * in case flooding is used as harmonic restraint). If so, output all the
 +     * current reference projections. */
 +    if (edi->flood.bHarmonic)
 +    {
 +        for (i = 0; i < edi->flood.vecs.neig; i++)
 +        {
 +            if (edi->flood.vecs.refprojslope[i] != 0.0)
 +                bOutputRef=TRUE;
 +        }
 +        if (bOutputRef)
 +        {
 +            fprintf(fp, "Ref. projs.: ");
 +            for (i = 0; i < edi->flood.vecs.neig; i++)
 +            {
 +                fprintf(fp, "%12.5e ", edi->flood.vecs.refproj[i]);
 +            }
 +            fprintf(fp, "\n");
 +        }
 +    }
 +    fprintf(fp,"FL_FORCES: ");
 +
 +    for (i=0; i<edi->flood.vecs.neig; i++)
 +        fprintf(fp," %12.5e",edi->flood.vecs.fproj[i]);
 +
 +    fprintf(fp,"\n");
 +}
 +
 +
 +/* From flood.xproj compute the Vfl(x) at this point */
 +static real flood_energy(t_edpar *edi, gmx_large_int_t step)
 +{
 +    /* compute flooding energy Vfl
 +     Vfl = Efl * exp( - \frac {kT} {2Efl alpha^2} * sum_i { \lambda_i c_i^2 } )
 +     \lambda_i is the reciprocal eigenvalue 1/\sigma_i
 +         it is already computed by make_edi and stored in stpsz[i]
 +     bHarmonic:
 +       Vfl = - Efl * 1/2(sum _i {\frac 1{\lambda_i} c_i^2})
 +     */
 +    real sum;
 +    real Vfl;
 +    int i;
 +
 +
 +    /* Each time this routine is called (i.e. each time step), we add a small
 +     * value to the reference projection. This way a harmonic restraint towards
 +     * a moving reference is realized. If no value for the additive constant
 +     * is provided in the edi file, the reference will not change. */
 +    if (edi->flood.bHarmonic)
 +    {
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i] + step * edi->flood.vecs.refprojslope[i];
 +        }
 +    }
 +
 +    sum=0.0;
 +    /* Compute sum which will be the exponent of the exponential */
 +    for (i=0; i<edi->flood.vecs.neig; i++)
 +    {
 +        /* stpsz stores the reciprocal eigenvalue 1/sigma_i */
 +        sum += edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i])*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
 +    }
 +
 +    /* Compute the Gauss function*/
 +    if (edi->flood.bHarmonic)
 +    {
 +        Vfl = -0.5*edi->flood.Efl*sum;  /* minus sign because Efl is negative, if restrain is on. */
 +    }
 +    else
 +    {
 +        Vfl = edi->flood.Efl!=0 ? edi->flood.Efl*exp(-edi->flood.kT/2/edi->flood.Efl/edi->flood.alpha2*sum) :0;
 +    }
 +
 +    return Vfl;
 +}
 +
 +
 +/* From the position and from Vfl compute forces in subspace -> store in edi->vec.flood.fproj */
 +static void flood_forces(t_edpar *edi)
 +{
 +    /* compute the forces in the subspace of the flooding eigenvectors
 +     * by the formula F_i= V_{fl}(c) * ( \frac {kT} {E_{fl}} \lambda_i c_i */
 +
 +    int i;
 +    real energy=edi->flood.Vfl;
 +
 +
 +    if (edi->flood.bHarmonic)
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            edi->flood.vecs.fproj[i] = edi->flood.Efl* edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
 +        }
 +    else
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            /* if Efl is zero the forces are zero if not use the formula */
 +            edi->flood.vecs.fproj[i] = edi->flood.Efl!=0 ? edi->flood.kT/edi->flood.Efl/edi->flood.alpha2*energy*edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]) : 0;
 +        }
 +}
 +
 +
 +/* Raise forces from subspace into cartesian space */
 +static void flood_blowup(t_edpar *edi, rvec *forces_cart)
 +{
 +    /* this function lifts the forces from the subspace to the cartesian space
 +     all the values not contained in the subspace are assumed to be zero and then
 +     a coordinate transformation from eigenvector to cartesian vectors is performed
 +     The nonexistent values don't have to be set to zero explicitly, they would occur
 +     as zero valued summands, hence we just stop to compute this part of the sum.
 +
 +     for every atom we add all the contributions to this atom from all the different eigenvectors.
 +
 +     NOTE: one could add directly to the forcefield forces, would mean we wouldn't have to clear the
 +     field forces_cart prior the computation, but we compute the forces separately
 +     to have them accessible for diagnostics
 +     */
 +    int  j,eig;
 +    rvec dum;
 +    real *forces_sub;
 +
 +
 +    forces_sub = edi->flood.vecs.fproj;
 +
 +
 +    /* Calculate the cartesian forces for the local atoms */
 +
 +    /* Clear forces first */
 +    for (j=0; j<edi->sav.nr_loc; j++)
 +        clear_rvec(forces_cart[j]);
 +
 +    /* Now compute atomwise */
 +    for (j=0; j<edi->sav.nr_loc; j++)
 +    {
 +        /* Compute forces_cart[edi->sav.anrs[j]] */
 +        for (eig=0; eig<edi->flood.vecs.neig; eig++)
 +        {
 +            /* Force vector is force * eigenvector (compute only atom j) */
 +            svmul(forces_sub[eig],edi->flood.vecs.vec[eig][edi->sav.c_ind[j]],dum);
 +            /* Add this vector to the cartesian forces */
 +            rvec_inc(forces_cart[j],dum);
 +        }
 +    }
 +}
 +
 +
 +/* Update the values of Efl, deltaF depending on tau and Vfl */
 +static void update_adaption(t_edpar *edi)
 +{
 +    /* this function updates the parameter Efl and deltaF according to the rules given in
 +     * 'predicting unimolecular chemical reactions: chemical flooding' M Mueller et al,
 +     * J. chem Phys. */
 +
 +    if ((edi->flood.tau < 0 ? -edi->flood.tau : edi->flood.tau ) > 0.00000001)
 +    {
 +        edi->flood.Efl = edi->flood.Efl+edi->flood.dt/edi->flood.tau*(edi->flood.deltaF0-edi->flood.deltaF);
 +        /* check if restrain (inverted flooding) -> don't let EFL become positive */
 +        if (edi->flood.alpha2<0 && edi->flood.Efl>-0.00000001)
 +            edi->flood.Efl = 0;
 +
 +        edi->flood.deltaF = (1-edi->flood.dt/edi->flood.tau)*edi->flood.deltaF+edi->flood.dt/edi->flood.tau*edi->flood.Vfl;
 +    }
 +}
 +
 +
 +static void do_single_flood(
 +        FILE *edo,
 +        rvec x[],
 +        rvec force[],
 +        t_edpar *edi,
 +        gmx_large_int_t step,
 +        matrix box,
-     communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, buf->bUpdateShifts, x,
++        t_commrec *cr,
++        gmx_bool bNS)       /* Are we in a neighbor searching step? */
 +{
 +    int i;
 +    matrix  rotmat;         /* rotation matrix */
 +    matrix  tmat;           /* inverse rotation */
 +    rvec    transvec;       /* translation vector */
 +    struct t_do_edsam *buf;
 +
 +
 +    buf=edi->buf->do_edsam;
 +
++
 +    /* Broadcast the positions of the AVERAGE structure such that they are known on
 +     * every processor. Each node contributes its local positions x and stores them in
 +     * the collective ED array buf->xcoll */
-         communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, buf->bUpdateShifts, x,
++    communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, bNS, x,
 +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old, box);
 +
 +    /* Only assembly REFERENCE positions if their indices differ from the average ones */
 +    if (!edi->bRefEqAv)
-         gmx_large_int_t step)    /* The relative time step since ir->init_step is already subtracted */
++        communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, bNS, x,
 +                edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
 +
 +    /* If bUpdateShifts was TRUE, the shifts have just been updated in get_positions.
 +     * We do not need to update the shifts until the next NS step */
 +    buf->bUpdateShifts = FALSE;
 +
 +    /* Now all nodes have all of the ED/flooding positions in edi->sav->xcoll,
 +     * as well as the indices in edi->sav.anrs */
 +
 +    /* Fit the reference indices to the reference structure */
 +    if (edi->bRefEqAv)
 +        fit_to_reference(buf->xcoll , transvec, rotmat, edi);
 +    else
 +        fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
 +
 +    /* Now apply the translation and rotation to the ED structure */
 +    translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
 +
 +    /* Project fitted structure onto supbspace -> store in edi->flood.vecs.xproj */
 +    project_to_eigvectors(buf->xcoll,&edi->flood.vecs,edi);
 +
 +    if (FALSE == edi->flood.bConstForce)
 +    {
 +        /* Compute Vfl(x) from flood.xproj */
 +        edi->flood.Vfl = flood_energy(edi, step);
 +
 +        update_adaption(edi);
 +
 +        /* Compute the flooding forces */
 +        flood_forces(edi);
 +    }
 +
 +    /* Translate them into cartesian positions */
 +    flood_blowup(edi, edi->flood.forces_cartesian);
 +
 +    /* Rotate forces back so that they correspond to the given structure and not to the fitted one */
 +    /* Each node rotates back its local forces */
 +    transpose(rotmat,tmat);
 +    rotate_x(edi->flood.forces_cartesian, edi->sav.nr_loc, tmat);
 +
 +    /* Finally add forces to the main force variable */
 +    for (i=0; i<edi->sav.nr_loc; i++)
 +        rvec_inc(force[edi->sav.anrs_loc[i]],edi->flood.forces_cartesian[i]);
 +
 +    /* Output is written by the master process */
 +    if (do_per_step(step,edi->outfrq) && MASTER(cr))
 +        write_edo_flood(edi,edo,step);
 +}
 +
 +
 +/* Main flooding routine, called from do_force */
 +extern void do_flood(
 +        FILE            *log,    /* md.log file */
 +        t_commrec       *cr,     /* Communication record */
 +        rvec            x[],     /* Positions on the local processor */
 +        rvec            force[], /* forcefield forces, to these the flooding forces are added */
 +        gmx_edsam_t     ed,      /* ed data structure contains all ED and flooding datasets */
 +        matrix          box,     /* the box */
-             do_single_flood(ed->edo,x,force,edi,step,box,cr);
++        gmx_large_int_t step,    /* The relative time step since ir->init_step is already subtracted */
++        gmx_bool        bNS)     /* Are we in a neighbor searching step? */
 +{
 +    t_edpar *edi;
 +
 +
 +    if (ed->eEDtype != eEDflood)
 +        return;
 +
 +    edi = ed->edpar;
 +    while (edi)
 +    {
 +        /* Call flooding for one matrix */
 +        if (edi->flood.vecs.neig)
-     if (PAR(cr))
-         snew(edi->sref.x_old,edi->sref.nr);
++            do_single_flood(ed->edo,x,force,edi,step,box,cr,bNS);
 +        edi = edi->next_edi;
 +    }
 +}
 +
 +
 +/* Called by init_edi, configure some flooding related variables and structures,
 + * print headers to output files */
 +static void init_flood(t_edpar *edi, gmx_edsam_t ed, real dt, t_commrec *cr)
 +{
 +    int i;
 +
 +
 +    edi->flood.Efl = edi->flood.constEfl;
 +    edi->flood.Vfl = 0;
 +    edi->flood.dt  = dt;
 +
 +    if (edi->flood.vecs.neig)
 +    {
 +        /* If in any of the datasets we find a flooding vector, flooding is turned on */
 +        ed->eEDtype = eEDflood;
 +
 +        fprintf(stderr,"ED: Flooding of matrix %d is switched on.\n", edi->flood.flood_id);
 +
 +        if (edi->flood.bConstForce)
 +        {
 +            /* We have used stpsz as a vehicle to carry the fproj values for constant
 +             * force flooding. Now we copy that to flood.vecs.fproj. Note that
 +             * in const force flooding, fproj is never changed. */
 +            for (i=0; i<edi->flood.vecs.neig; i++)
 +            {
 +                edi->flood.vecs.fproj[i] = edi->flood.vecs.stpsz[i];
 +
 +                fprintf(stderr, "ED: applying on eigenvector %d a constant force of %g\n",
 +                        edi->flood.vecs.ieig[i], edi->flood.vecs.fproj[i]);
 +            }
 +        }
 +        fprintf(ed->edo,"FL_HEADER: Flooding of matrix %d is switched on! The flooding output will have the following format:\n",
 +                edi->flood.flood_id);
 +        fprintf(ed->edo,"FL_HEADER: Step     Efl          Vfl       deltaF\n");
 +    }
 +}
 +
 +
 +#ifdef DEBUGHELPERS
 +/*********** Energy book keeping ******/
 +static void get_flood_enx_names(t_edpar *edi, char** names, int *nnames)  /* get header of energies */
 +{
 +    t_edpar *actual;
 +    int count;
 +    char buf[STRLEN];
 +    actual=edi;
 +    count = 1;
 +    while (actual)
 +    {
 +        srenew(names,count);
 +        sprintf(buf,"Vfl_%d",count);
 +        names[count-1]=strdup(buf);
 +        actual=actual->next_edi;
 +        count++;
 +    }
 +    *nnames=count-1;
 +}
 +
 +
 +static void get_flood_energies(t_edpar *edi, real Vfl[],int nnames)
 +{
 +    /*fl has to be big enough to capture nnames-many entries*/
 +    t_edpar *actual;
 +    int count;
 +
 +
 +    actual=edi;
 +    count = 1;
 +    while (actual)
 +    {
 +        Vfl[count-1]=actual->flood.Vfl;
 +        actual=actual->next_edi;
 +        count++;
 +    }
 +    if (nnames!=count-1)
 +        gmx_fatal(FARGS,"Number of energies is not consistent with t_edi structure");
 +}
 +/************* END of FLOODING IMPLEMENTATION ****************************/
 +#endif
 +
 +
 +gmx_edsam_t ed_open(int nfile,const t_filenm fnm[],unsigned long Flags,t_commrec *cr)
 +{
 +    gmx_edsam_t ed;
 +
 +
 +    /* Allocate space for the ED data structure */
 +    snew(ed, 1);
 +
 +    /* We want to perform ED (this switch might later be upgraded to eEDflood) */
 +    ed->eEDtype = eEDedsam;
 +
 +    if (MASTER(cr))
 +    {
 +        /* Open .edi input file: */
 +        ed->edinam=ftp2fn(efEDI,nfile,fnm);
 +        /* The master opens the .edo output file */
 +        fprintf(stderr,"ED sampling will be performed!\n");
 +        ed->edonam = ftp2fn(efEDO,nfile,fnm);
 +        ed->edo    = gmx_fio_fopen(ed->edonam,(Flags & MD_APPENDFILES)? "a+" : "w+");
 +        ed->bStartFromCpt = Flags & MD_STARTFROMCPT;
 +    }
 +    return ed;
 +}
 +
 +
 +/* Broadcasts the structure data */
 +static void bc_ed_positions(t_commrec *cr, struct gmx_edx *s, int stype)
 +{
 +    snew_bc(cr, s->anrs, s->nr   );    /* Index numbers     */
 +    snew_bc(cr, s->x   , s->nr   );    /* Positions         */
 +    nblock_bc(cr, s->nr, s->anrs );
 +    nblock_bc(cr, s->nr, s->x    );
 +
 +    /* For the average & reference structures we need an array for the collective indices,
 +     * and we need to broadcast the masses as well */
 +    if (stype == eedAV || stype == eedREF)
 +    {
 +        /* We need these additional variables in the parallel case: */
 +        snew(s->c_ind    , s->nr   );   /* Collective indices */
 +        /* Local atom indices get assigned in dd_make_local_group_indices.
 +         * There, also memory is allocated */
 +        s->nalloc_loc = 0;              /* allocation size of s->anrs_loc */
 +        snew_bc(cr, s->x_old, s->nr);   /* To be able to always make the ED molecule whole, ...        */
 +        nblock_bc(cr, s->nr, s->x_old); /* ... keep track of shift changes with the help of old coords */
 +    }
 +
 +    /* broadcast masses for the reference structure (for mass-weighted fitting) */
 +    if (stype == eedREF)
 +    {
 +        snew_bc(cr, s->m, s->nr);
 +        nblock_bc(cr, s->nr, s->m);
 +    }
 +
 +    /* For the average structure we might need the masses for mass-weighting */
 +    if (stype == eedAV)
 +    {
 +        snew_bc(cr, s->sqrtm, s->nr);
 +        nblock_bc(cr, s->nr, s->sqrtm);
 +        snew_bc(cr, s->m, s->nr);
 +        nblock_bc(cr, s->nr, s->m);
 +    }
 +}
 +
 +
 +/* Broadcasts the eigenvector data */
 +static void bc_ed_vecs(t_commrec *cr, t_eigvec *ev, int length, gmx_bool bHarmonic)
 +{
 +    int i;
 +
 +    snew_bc(cr, ev->ieig   , ev->neig);  /* index numbers of eigenvector  */
 +    snew_bc(cr, ev->stpsz  , ev->neig);  /* stepsizes per eigenvector     */
 +    snew_bc(cr, ev->xproj  , ev->neig);  /* instantaneous x projection    */
 +    snew_bc(cr, ev->fproj  , ev->neig);  /* instantaneous f projection    */
 +    snew_bc(cr, ev->refproj, ev->neig);  /* starting or target projection */
 +
 +    nblock_bc(cr, ev->neig, ev->ieig   );
 +    nblock_bc(cr, ev->neig, ev->stpsz  );
 +    nblock_bc(cr, ev->neig, ev->xproj  );
 +    nblock_bc(cr, ev->neig, ev->fproj  );
 +    nblock_bc(cr, ev->neig, ev->refproj);
 +
 +    snew_bc(cr, ev->vec, ev->neig);      /* Eigenvector components        */
 +    for (i=0; i<ev->neig; i++)
 +    {
 +        snew_bc(cr, ev->vec[i], length);
 +        nblock_bc(cr, length, ev->vec[i]);
 +    }
 +
 +    /* For harmonic restraints the reference projections can change with time */
 +    if (bHarmonic)
 +    {
 +        snew_bc(cr, ev->refproj0    , ev->neig);
 +        snew_bc(cr, ev->refprojslope, ev->neig);
 +        nblock_bc(cr, ev->neig, ev->refproj0    );
 +        nblock_bc(cr, ev->neig, ev->refprojslope);
 +    }
 +}
 +
 +
 +/* Broadcasts the ED / flooding data to other nodes
 + * and allocates memory where needed */
 +static void broadcast_ed_data(t_commrec *cr, gmx_edsam_t ed, int numedis)
 +{
 +    int     nr;
 +    t_edpar *edi;
 +
 +
 +    /* Master lets the other nodes know if its ED only or also flooding */
 +    gmx_bcast(sizeof(ed->eEDtype), &(ed->eEDtype), cr);
 +
 +    snew_bc(cr, ed->edpar,1);
 +    /* Now transfer the ED data set(s) */
 +    edi = ed->edpar;
 +    for (nr=0; nr<numedis; nr++)
 +    {
 +        /* Broadcast a single ED data set */
 +        block_bc(cr, *edi);
 +
 +        /* Broadcast positions */
 +        bc_ed_positions(cr, &(edi->sref), eedREF); /* reference positions (don't broadcast masses)    */
 +        bc_ed_positions(cr, &(edi->sav ), eedAV ); /* average positions (do broadcast masses as well) */
 +        bc_ed_positions(cr, &(edi->star), eedTAR); /* target positions                                */
 +        bc_ed_positions(cr, &(edi->sori), eedORI); /* origin positions                                */
 +
 +        /* Broadcast eigenvectors */
 +        bc_ed_vecs(cr, &edi->vecs.mon   , edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.linfix, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.linacc, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radfix, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radacc, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radcon, edi->sav.nr, FALSE);
 +        /* Broadcast flooding eigenvectors and, if needed, values for the moving reference */
 +        bc_ed_vecs(cr, &edi->flood.vecs,  edi->sav.nr, edi->flood.bHarmonic);
 +
 +        /* Set the pointer to the next ED dataset */
 +        if (edi->next_edi)
 +        {
 +          snew_bc(cr, edi->next_edi, 1);
 +          edi = edi->next_edi;
 +        }
 +    }
 +}
 +
 +
 +/* init-routine called for every *.edi-cycle, initialises t_edpar structure */
 +static void init_edi(gmx_mtop_t *mtop,t_inputrec *ir,
 +                     t_commrec *cr,gmx_edsam_t ed,t_edpar *edi)
 +{
 +    int  i;
 +    real totalmass = 0.0;
 +    rvec com;
 +    t_atom *atom;
 +
 +    /* NOTE Init_edi is executed on the master process only
 +     * The initialized data sets are then transmitted to the
 +     * other nodes in broadcast_ed_data */
 +
 +    edi->bNeedDoEdsam = edi->vecs.mon.neig
 +                     || edi->vecs.linfix.neig
 +                     || edi->vecs.linacc.neig
 +                     || edi->vecs.radfix.neig
 +                     || edi->vecs.radacc.neig
 +                     || edi->vecs.radcon.neig;
 +
 +    /* evaluate masses (reference structure) */
 +    snew(edi->sref.m, edi->sref.nr);
 +    for (i = 0; i < edi->sref.nr; i++)
 +    {
 +        if (edi->fitmas)
 +        {
 +            gmx_mtop_atomnr_to_atom(mtop,edi->sref.anrs[i],&atom);
 +            edi->sref.m[i] = atom->m;
 +        }
 +        else
 +        {
 +            edi->sref.m[i] = 1.0;
 +        }
 +
 +        /* Check that every m > 0. Bad things will happen otherwise. */
 +        if (edi->sref.m[i] <= 0.0)
 +        {
 +            gmx_fatal(FARGS, "Reference structure atom %d (sam.edi index %d) has a mass of %g.\n"
 +                             "For a mass-weighted fit, all reference structure atoms need to have a mass >0.\n"
 +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
 +                             "atoms from the reference structure by creating a proper index group.\n",
 +                      i, edi->sref.anrs[i]+1, edi->sref.m[i]);
 +        }
 +
 +        totalmass += edi->sref.m[i];
 +    }
 +    edi->sref.mtot = totalmass;
 +
 +    /* Masses m and sqrt(m) for the average structure. Note that m
 +     * is needed if forces have to be evaluated in do_edsam */
 +    snew(edi->sav.sqrtm, edi->sav.nr );
 +    snew(edi->sav.m    , edi->sav.nr );
 +    for (i = 0; i < edi->sav.nr; i++)
 +    {
 +        gmx_mtop_atomnr_to_atom(mtop,edi->sav.anrs[i],&atom);
 +        edi->sav.m[i] = atom->m;
 +        if (edi->pcamas)
 +        {
 +            edi->sav.sqrtm[i] = sqrt(atom->m);
 +        }
 +        else
 +        {
 +            edi->sav.sqrtm[i] = 1.0;
 +        }
 +
 +        /* Check that every m > 0. Bad things will happen otherwise. */
 +        if (edi->sav.sqrtm[i] <= 0.0)
 +        {
 +            gmx_fatal(FARGS, "Average structure atom %d (sam.edi index %d) has a mass of %g.\n"
 +                             "For ED with mass-weighting, all average structure atoms need to have a mass >0.\n"
 +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
 +                             "atoms from the average structure by creating a proper index group.\n",
 +                      i, edi->sav.anrs[i]+1, atom->m);
 +        }
 +    }
 +
 +    /* put reference structure in origin */
 +    get_center(edi->sref.x, edi->sref.m, edi->sref.nr, com);
 +    com[XX] = -com[XX];
 +    com[YY] = -com[YY];
 +    com[ZZ] = -com[ZZ];
 +    translate_x(edi->sref.x, edi->sref.nr, com);
 +
 +    /* Init ED buffer */
 +    snew(edi->buf, 1);
 +}
 +
 +
 +static void check(const char *line, const char *label)
 +{
 +    if (!strstr(line,label))
 +        gmx_fatal(FARGS,"Could not find input parameter %s at expected position in edsam input-file (.edi)\nline read instead is %s",label,line);
 +}
 +
 +
 +static int read_checked_edint(FILE *file,const char *label)
 +{
 +    char line[STRLEN+1];
 +    int idum;
 +
 +
 +    fgets2 (line,STRLEN,file);
 +    check(line,label);
 +    fgets2 (line,STRLEN,file);
 +    sscanf (line,"%d",&idum);
 +    return idum;
 +}
 +
 +
 +static int read_edint(FILE *file,gmx_bool *bEOF)
 +{
 +    char line[STRLEN+1];
 +    int idum;
 +    char *eof;
 +
 +
 +    eof=fgets2 (line,STRLEN,file);
 +    if (eof==NULL)
 +    {
 +        *bEOF = TRUE;
 +        return -1;
 +    }
 +    eof=fgets2 (line,STRLEN,file);
 +    if (eof==NULL)
 +    {
 +        *bEOF = TRUE;
 +        return -1;
 +    }
 +    sscanf (line,"%d",&idum);
 +    *bEOF = FALSE;
 +    return idum;
 +}
 +
 +
 +static real read_checked_edreal(FILE *file,const char *label)
 +{
 +    char line[STRLEN+1];
 +    double rdum;
 +
 +
 +    fgets2 (line,STRLEN,file);
 +    check(line,label);
 +    fgets2 (line,STRLEN,file);
 +    sscanf (line,"%lf",&rdum);
 +    return (real) rdum; /* always read as double and convert to single */
 +}
 +
 +
 +static void read_edx(FILE *file,int number,int *anrs,rvec *x)
 +{
 +    int i,j;
 +    char line[STRLEN+1];
 +    double d[3];
 +
 +
 +    for(i=0; i<number; i++)
 +    {
 +        fgets2 (line,STRLEN,file);
 +        sscanf (line,"%d%lf%lf%lf",&anrs[i],&d[0],&d[1],&d[2]);
 +        anrs[i]--; /* we are reading FORTRAN indices */
 +        for(j=0; j<3; j++)
 +            x[i][j]=d[j]; /* always read as double and convert to single */
 +    }
 +}
 +
 +
 +static void scan_edvec(FILE *in,int nr,rvec *vec)
 +{
 +    char line[STRLEN+1];
 +    int i;
 +    double x,y,z;
 +
 +
 +    for(i=0; (i < nr); i++)
 +    {
 +        fgets2 (line,STRLEN,in);
 +        sscanf (line,"%le%le%le",&x,&y,&z);
 +        vec[i][XX]=x;
 +        vec[i][YY]=y;
 +        vec[i][ZZ]=z;
 +    }
 +}
 +
 +
 +static void read_edvec(FILE *in,int nr,t_eigvec *tvec,gmx_bool bReadRefproj, gmx_bool *bHaveReference)
 +{
 +    int i,idum,nscan;
 +    double rdum,refproj_dum=0.0,refprojslope_dum=0.0;
 +    char line[STRLEN+1];
 +
 +
 +    tvec->neig=read_checked_edint(in,"NUMBER OF EIGENVECTORS");
 +    if (tvec->neig >0)
 +    {
 +        snew(tvec->ieig   ,tvec->neig);
 +        snew(tvec->stpsz  ,tvec->neig);
 +        snew(tvec->vec    ,tvec->neig);
 +        snew(tvec->xproj  ,tvec->neig);
 +        snew(tvec->fproj  ,tvec->neig);
 +        snew(tvec->refproj,tvec->neig);
 +        if (bReadRefproj)
 +        {
 +            snew(tvec->refproj0    ,tvec->neig);
 +            snew(tvec->refprojslope,tvec->neig);
 +        }
 +
 +        for(i=0; (i < tvec->neig); i++)
 +        {
 +            fgets2 (line,STRLEN,in);
 +            if (bReadRefproj) /* ONLY when using flooding as harmonic restraint */
 +            {
 +                nscan = sscanf(line,"%d%lf%lf%lf",&idum,&rdum,&refproj_dum,&refprojslope_dum);
 +                /* Zero out values which were not scanned */
 +                switch(nscan)
 +                {
 +                    case 4:
 +                        /* Every 4 values read, including reference position */
 +                        *bHaveReference = TRUE;
 +                        break;
 +                    case 3:
 +                        /* A reference position is provided */
 +                        *bHaveReference = TRUE;
 +                        /* No value for slope, set to 0 */
 +                        refprojslope_dum = 0.0;
 +                        break;
 +                    case 2:
 +                        /* No values for reference projection and slope, set to 0 */
 +                        refproj_dum      = 0.0;
 +                        refprojslope_dum = 0.0;
 +                        break;
 +                    default:
 +                        gmx_fatal(FARGS,"Expected 2 - 4 (not %d) values for flooding vec: <nr> <spring const> <refproj> <refproj-slope>\n", nscan);
 +                        break;
 +                }
 +                tvec->refproj[i]=refproj_dum;
 +                tvec->refproj0[i]=refproj_dum;
 +                tvec->refprojslope[i]=refprojslope_dum;
 +            }
 +            else /* Normal flooding */
 +            {
 +                nscan = sscanf(line,"%d%lf",&idum,&rdum);
 +                if (nscan != 2)
 +                    gmx_fatal(FARGS,"Expected 2 values for flooding vec: <nr> <stpsz>\n");
 +            }
 +            tvec->ieig[i]=idum;
 +            tvec->stpsz[i]=rdum;
 +        } /* end of loop over eigenvectors */
 +
 +        for(i=0; (i < tvec->neig); i++)
 +        {
 +            snew(tvec->vec[i],nr);
 +            scan_edvec(in,nr,tvec->vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/* calls read_edvec for the vector groups, only for flooding there is an extra call */
 +static void read_edvecs(FILE *in,int nr,t_edvecs *vecs)
 +{
 +      gmx_bool bHaveReference = FALSE;
 +
 +
 +    read_edvec(in, nr, &vecs->mon   , FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->linfix, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->linacc, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radfix, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radacc, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radcon, FALSE, &bHaveReference);
 +}
 +
 +
 +/* Check if the same atom indices are used for reference and average positions */
 +static gmx_bool check_if_same(struct gmx_edx sref, struct gmx_edx sav)
 +{
 +    int i;
 +
 +
 +    /* If the number of atoms differs between the two structures,
 +     * they cannot be identical */
 +    if (sref.nr != sav.nr)
 +        return FALSE;
 +
 +    /* Now that we know that both stuctures have the same number of atoms,
 +     * check if also the indices are identical */
 +    for (i=0; i < sav.nr; i++)
 +    {
 +        if (sref.anrs[i] != sav.anrs[i])
 +            return FALSE;
 +    }
 +    fprintf(stderr, "ED: Note: Reference and average structure are composed of the same atom indices.\n");
 +
 +    return TRUE;
 +}
 +
 +
 +static int read_edi(FILE* in, gmx_edsam_t ed,t_edpar *edi,int nr_mdatoms, int edi_nr, t_commrec *cr)
 +{
 +    int readmagic;
 +    const int magic=670;
 +    gmx_bool bEOF;
 +
 +    /* Was a specific reference point for the flooding/umbrella potential provided in the edi file? */
 +    gmx_bool bHaveReference = FALSE;
 +
 +
 +    /* the edi file is not free format, so expect problems if the input is corrupt. */
 +
 +    /* check the magic number */
 +    readmagic=read_edint(in,&bEOF);
 +    /* Check whether we have reached the end of the input file */
 +    if (bEOF)
 +        return 0;
 +
 +    if (readmagic != magic)
 +    {
 +        if (readmagic==666 || readmagic==667 || readmagic==668)
 +            gmx_fatal(FARGS,"Wrong magic number: Use newest version of make_edi to produce edi file");
 +        else if (readmagic != 669)
 +            gmx_fatal(FARGS,"Wrong magic number %d in %s",readmagic,ed->edinam);
 +    }
 +
 +    /* check the number of atoms */
 +    edi->nini=read_edint(in,&bEOF);
 +    if (edi->nini != nr_mdatoms)
 +        gmx_fatal(FARGS,"Nr of atoms in %s (%d) does not match nr of md atoms (%d)",
 +                ed->edinam,edi->nini,nr_mdatoms);
 +
 +    /* Done checking. For the rest we blindly trust the input */
 +    edi->fitmas          = read_checked_edint(in,"FITMAS");
 +    edi->pcamas          = read_checked_edint(in,"ANALYSIS_MAS");
 +    edi->outfrq          = read_checked_edint(in,"OUTFRQ");
 +    edi->maxedsteps      = read_checked_edint(in,"MAXLEN");
 +    edi->slope           = read_checked_edreal(in,"SLOPECRIT");
 +
 +    edi->presteps        = read_checked_edint(in,"PRESTEPS");
 +    edi->flood.deltaF0   = read_checked_edreal(in,"DELTA_F0");
 +    edi->flood.deltaF    = read_checked_edreal(in,"INIT_DELTA_F");
 +    edi->flood.tau       = read_checked_edreal(in,"TAU");
 +    edi->flood.constEfl  = read_checked_edreal(in,"EFL_NULL");
 +    edi->flood.alpha2    = read_checked_edreal(in,"ALPHA2");
 +    edi->flood.kT        = read_checked_edreal(in,"KT");
 +    edi->flood.bHarmonic = read_checked_edint(in,"HARMONIC");
 +    if (readmagic > 669)
 +        edi->flood.bConstForce = read_checked_edint(in,"CONST_FORCE_FLOODING");
 +    else
 +        edi->flood.bConstForce = FALSE;
 +    edi->flood.flood_id  = edi_nr;
 +    edi->sref.nr         = read_checked_edint(in,"NREF");
 +
 +    /* allocate space for reference positions and read them */
 +    snew(edi->sref.anrs,edi->sref.nr);
 +    snew(edi->sref.x   ,edi->sref.nr);
-     if (PAR(cr))
-         snew(edi->sav.x_old,edi->sav.nr);
++    snew(edi->sref.x_old,edi->sref.nr);
 +    edi->sref.sqrtm    =NULL;
 +    read_edx(in,edi->sref.nr,edi->sref.anrs,edi->sref.x);
 +
 +    /* average positions. they define which atoms will be used for ED sampling */
 +    edi->sav.nr=read_checked_edint(in,"NAV");
 +    snew(edi->sav.anrs,edi->sav.nr);
 +    snew(edi->sav.x   ,edi->sav.nr);
-       int i;
++    snew(edi->sav.x_old,edi->sav.nr);
 +    read_edx(in,edi->sav.nr,edi->sav.anrs,edi->sav.x);
 +
 +    /* Check if the same atom indices are used for reference and average positions */
 +    edi->bRefEqAv = check_if_same(edi->sref, edi->sav);
 +
 +    /* eigenvectors */
 +    read_edvecs(in,edi->sav.nr,&edi->vecs);
 +    read_edvec(in,edi->sav.nr,&edi->flood.vecs,edi->flood.bHarmonic, &bHaveReference);
 +
 +    /* target positions */
 +    edi->star.nr=read_edint(in,&bEOF);
 +    if (edi->star.nr > 0)
 +    {
 +        snew(edi->star.anrs,edi->star.nr);
 +        snew(edi->star.x   ,edi->star.nr);
 +        edi->star.sqrtm    =NULL;
 +        read_edx(in,edi->star.nr,edi->star.anrs,edi->star.x);
 +    }
 +
 +    /* positions defining origin of expansion circle */
 +    edi->sori.nr=read_edint(in,&bEOF);
 +    if (edi->sori.nr > 0)
 +    {
 +      if (bHaveReference)
 +      {
 +              /* Both an -ori structure and a at least one manual reference point have been
 +               * specified. That's ambiguous and probably not intentional. */
 +              gmx_fatal(FARGS, "ED: An origin structure has been provided and a at least one (moving) reference\n"
 +                               "    point was manually specified in the edi file. That is ambiguous. Aborting.\n");
 +      }
 +        snew(edi->sori.anrs,edi->sori.nr);
 +        snew(edi->sori.x   ,edi->sori.nr);
 +        edi->sori.sqrtm    =NULL;
 +        read_edx(in,edi->sori.nr,edi->sori.anrs,edi->sori.x);
 +    }
 +
 +    /* all done */
 +    return 1;
 +}
 +
 +
 +
 +/* Read in the edi input file. Note that it may contain several ED data sets which were
 + * achieved by concatenating multiple edi files. The standard case would be a single ED
 + * data set, though. */
 +static void read_edi_file(gmx_edsam_t ed, t_edpar *edi, int nr_mdatoms, t_commrec *cr)
 +{
 +    FILE    *in;
 +    t_edpar *curr_edi,*last_edi;
 +    t_edpar *edi_read;
 +    int     edi_nr = 0;
 +
 +
 +    /* This routine is executed on the master only */
 +
 +    /* Open the .edi parameter input file */
 +    in = gmx_fio_fopen(ed->edinam,"r");
 +    fprintf(stderr, "ED: Reading edi file %s\n", ed->edinam);
 +
 +    /* Now read a sequence of ED input parameter sets from the edi file */
 +    curr_edi=edi;
 +    last_edi=edi;
 +    while( read_edi(in, ed, curr_edi, nr_mdatoms, edi_nr, cr) )
 +    {
 +        edi_nr++;
 +        /* Make shure that the number of atoms in each dataset is the same as in the tpr file */
 +        if (edi->nini != nr_mdatoms)
 +            gmx_fatal(FARGS,"edi file %s (dataset #%d) was made for %d atoms, but the simulation contains %d atoms.",
 +                    ed->edinam, edi_nr, edi->nini, nr_mdatoms);
 +        /* Since we arrived within this while loop we know that there is still another data set to be read in */
 +        /* We need to allocate space for the data: */
 +        snew(edi_read,1);
 +        /* Point the 'next_edi' entry to the next edi: */
 +        curr_edi->next_edi=edi_read;
 +        /* Keep the curr_edi pointer for the case that the next dataset is empty: */
 +        last_edi = curr_edi;
 +        /* Let's prepare to read in the next edi data set: */
 +        curr_edi = edi_read;
 +    }
 +    if (edi_nr == 0)
 +        gmx_fatal(FARGS, "No complete ED data set found in edi file %s.", ed->edinam);
 +
 +    /* Terminate the edi dataset list with a NULL pointer: */
 +    last_edi->next_edi = NULL;
 +
 +    fprintf(stderr, "ED: Found %d ED dataset%s.\n", edi_nr, edi_nr>1? "s" : "");
 +
 +    /* Close the .edi file again */
 +    gmx_fio_fclose(in);
 +}
 +
 +
 +struct t_fit_to_ref {
 +    rvec *xcopy;       /* Working copy of the positions in fit_to_reference */
 +};
 +
 +/* Fit the current positions to the reference positions
 + * Do not actually do the fit, just return rotation and translation.
 + * Note that the COM of the reference structure was already put into
 + * the origin by init_edi. */
 +static void fit_to_reference(rvec      *xcoll,    /* The positions to be fitted */
 +                             rvec      transvec,  /* The translation vector */
 +                             matrix    rotmat,    /* The rotation matrix */
 +                             t_edpar   *edi)      /* Just needed for do_edfit */
 +{
 +    rvec com;          /* center of mass */
 +    int  i;
 +    struct t_fit_to_ref *loc;
 +
 +
 +    /* Allocate memory the first time this routine is called for each edi dataset */
 +    if (NULL == edi->buf->fit_to_ref)
 +    {
 +        snew(edi->buf->fit_to_ref, 1);
 +        snew(edi->buf->fit_to_ref->xcopy, edi->sref.nr);
 +    }
 +    loc = edi->buf->fit_to_ref;
 +
 +    /* We do not touch the original positions but work on a copy. */
 +    for (i=0; i<edi->sref.nr; i++)
 +        copy_rvec(xcoll[i], loc->xcopy[i]);
 +
 +    /* Calculate the center of mass */
 +    get_center(loc->xcopy, edi->sref.m, edi->sref.nr, com);
 +
 +    transvec[XX] = -com[XX];
 +    transvec[YY] = -com[YY];
 +    transvec[ZZ] = -com[ZZ];
 +
 +    /* Subtract the center of mass from the copy */
 +    translate_x(loc->xcopy, edi->sref.nr, transvec);
 +
 +    /* Determine the rotation matrix */
 +    do_edfit(edi->sref.nr, edi->sref.x, loc->xcopy, rotmat, edi);
 +}
 +
 +
 +static void translate_and_rotate(rvec *x,         /* The positions to be translated and rotated */
 +                                 int nat,         /* How many positions are there? */
 +                                 rvec transvec,   /* The translation vector */
 +                                 matrix rotmat)   /* The rotation matrix */
 +{
 +    /* Translation */
 +    translate_x(x, nat, transvec);
 +
 +    /* Rotation */
 +    rotate_x(x, nat, rotmat);
 +}
 +
 +
 +/* Gets the rms deviation of the positions to the structure s */
 +/* fit_to_structure has to be called before calling this routine! */
 +static real rmsd_from_structure(rvec           *x,  /* The positions under consideration */
 +                                struct gmx_edx *s)  /* The structure from which the rmsd shall be computed */
 +{
 +    real  rmsd=0.0;
 +    int   i;
 +
 +
 +    for (i=0; i < s->nr; i++)
 +        rmsd += distance2(s->x[i], x[i]);
 +
 +    rmsd /= (real) s->nr;
 +    rmsd = sqrt(rmsd);
 +
 +    return rmsd;
 +}
 +
 +
 +void dd_make_local_ed_indices(gmx_domdec_t *dd, struct gmx_edsam *ed)
 +{
 +    t_edpar *edi;
 +
 +
 +    if (ed->eEDtype != eEDnone)
 +    {
 +        /* Loop over ED datasets (usually there is just one dataset, though) */
 +        edi=ed->edpar;
 +        while (edi)
 +        {
 +            /* Local atoms of the reference structure (for fitting), need only be assembled
 +             * if their indices differ from the average ones */
 +            if (!edi->bRefEqAv)
 +                dd_make_local_group_indices(dd->ga2la, edi->sref.nr, edi->sref.anrs,
 +                        &edi->sref.nr_loc, &edi->sref.anrs_loc, &edi->sref.nalloc_loc, edi->sref.c_ind);
 +
 +            /* Local atoms of the average structure (on these ED will be performed) */
 +            dd_make_local_group_indices(dd->ga2la, edi->sav.nr, edi->sav.anrs,
 +                    &edi->sav.nr_loc, &edi->sav.anrs_loc, &edi->sav.nalloc_loc, edi->sav.c_ind);
 +
 +            /* Indicate that the ED shift vectors for this structure need to be updated
 +             * at the next call to communicate_group_positions, since obviously we are in a NS step */
 +            edi->buf->do_edsam->bUpdateShifts = TRUE;
 +
 +            /* Set the pointer to the next ED dataset (if any) */
 +            edi=edi->next_edi;
 +        }
 +    }
 +}
 +
 +
 +static inline void ed_unshift_single_coord(matrix box, const rvec x, const ivec is, rvec xu)
 +{
 +    int tx,ty,tz;
 +
 +
 +    tx=is[XX];
 +    ty=is[YY];
 +    tz=is[ZZ];
 +
 +    if(TRICLINIC(box))
 +    {
 +        xu[XX] = x[XX]-tx*box[XX][XX]-ty*box[YY][XX]-tz*box[ZZ][XX];
 +        xu[YY] = x[YY]-ty*box[YY][YY]-tz*box[ZZ][YY];
 +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
 +    } else
 +    {
 +        xu[XX] = x[XX]-tx*box[XX][XX];
 +        xu[YY] = x[YY]-ty*box[YY][YY];
 +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
 +    }
 +}
 +
 +
 +static void do_linfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
 +{
 +    int  i, j;
 +    real proj, add;
 +    rvec vec_dum;
 +
 +
 +    /* loop over linfix vectors */
 +    for (i=0; i<edi->vecs.linfix.neig; i++)
 +    {
 +        /* calculate the projection */
 +        proj = projectx(edi, xcoll, edi->vecs.linfix.vec[i]);
 +
 +        /* calculate the correction */
 +        add = edi->vecs.linfix.refproj[i] + step*edi->vecs.linfix.stpsz[i] - proj;
 +
 +        /* apply the correction */
 +        add /= edi->sav.sqrtm[i];
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(add, edi->vecs.linfix.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +}
 +
 +
 +static void do_linacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i, j;
 +    real proj, add;
 +    rvec vec_dum;
 +
 +
 +    /* loop over linacc vectors */
 +    for (i=0; i<edi->vecs.linacc.neig; i++)
 +    {
 +        /* calculate the projection */
 +        proj=projectx(edi, xcoll, edi->vecs.linacc.vec[i]);
 +
 +        /* calculate the correction */
 +        add = 0.0;
 +        if (edi->vecs.linacc.stpsz[i] > 0.0)
 +        {
 +            if ((proj-edi->vecs.linacc.refproj[i]) < 0.0)
 +                add = edi->vecs.linacc.refproj[i] - proj;
 +        }
 +        if (edi->vecs.linacc.stpsz[i] < 0.0)
 +        {
 +            if ((proj-edi->vecs.linacc.refproj[i]) > 0.0)
 +                add = edi->vecs.linacc.refproj[i] - proj;
 +        }
 +
 +        /* apply the correction */
 +        add /= edi->sav.sqrtm[i];
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(add, edi->vecs.linacc.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +
 +        /* new positions will act as reference */
 +        edi->vecs.linacc.refproj[i] = proj + add;
 +    }
 +}
 +
 +
 +static void do_radfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
 +{
 +    int  i,j;
 +    real *proj, rad=0.0, ratio;
 +    rvec vec_dum;
 +
 +
 +    if (edi->vecs.radfix.neig == 0)
 +        return;
 +
 +    snew(proj, edi->vecs.radfix.neig);
 +
 +    /* loop over radfix vectors */
 +    for (i=0; i<edi->vecs.radfix.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        proj[i] = projectx(edi, xcoll, edi->vecs.radfix.vec[i]);
 +        rad += pow(proj[i] - edi->vecs.radfix.refproj[i], 2);
 +    }
 +
 +    rad   = sqrt(rad);
 +    ratio = (edi->vecs.radfix.stpsz[0]+edi->vecs.radfix.radius)/rad - 1.0;
 +    edi->vecs.radfix.radius += edi->vecs.radfix.stpsz[0];
 +
 +    /* loop over radfix vectors */
 +    for (i=0; i<edi->vecs.radfix.neig; i++)
 +    {
 +        proj[i] -= edi->vecs.radfix.refproj[i];
 +
 +        /* apply the correction */
 +        proj[i] /= edi->sav.sqrtm[i];
 +        proj[i] *= ratio;
 +        for (j=0; j<edi->sav.nr; j++) {
 +            svmul(proj[i], edi->vecs.radfix.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +
 +    sfree(proj);
 +}
 +
 +
 +static void do_radacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i,j;
 +    real *proj, rad=0.0, ratio=0.0;
 +    rvec vec_dum;
 +
 +
 +    if (edi->vecs.radacc.neig == 0)
 +        return;
 +
 +    snew(proj,edi->vecs.radacc.neig);
 +
 +    /* loop over radacc vectors */
 +    for (i=0; i<edi->vecs.radacc.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        proj[i] = projectx(edi, xcoll, edi->vecs.radacc.vec[i]);
 +        rad += pow(proj[i] - edi->vecs.radacc.refproj[i], 2);
 +    }
 +    rad = sqrt(rad);
 +
 +    /* only correct when radius decreased */
 +    if (rad < edi->vecs.radacc.radius)
 +    {
 +        ratio = edi->vecs.radacc.radius/rad - 1.0;
 +        rad   = edi->vecs.radacc.radius;
 +    }
 +    else
 +        edi->vecs.radacc.radius = rad;
 +
 +    /* loop over radacc vectors */
 +    for (i=0; i<edi->vecs.radacc.neig; i++)
 +    {
 +        proj[i] -= edi->vecs.radacc.refproj[i];
 +
 +        /* apply the correction */
 +        proj[i] /= edi->sav.sqrtm[i];
 +        proj[i] *= ratio;
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(proj[i], edi->vecs.radacc.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +    sfree(proj);
 +}
 +
 +
 +struct t_do_radcon {
 +    real *proj;
 +};
 +
 +static void do_radcon(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i,j;
 +    real rad=0.0, ratio=0.0;
 +    struct t_do_radcon *loc;
 +    gmx_bool bFirst;
 +    rvec vec_dum;
 +
 +
 +    if(edi->buf->do_radcon != NULL)
 +    {
 +        bFirst = FALSE;
 +        loc    = edi->buf->do_radcon;
 +    }
 +    else
 +    {
 +        bFirst = TRUE;
 +        snew(edi->buf->do_radcon, 1);
 +    }
 +    loc = edi->buf->do_radcon;
 +
 +    if (edi->vecs.radcon.neig == 0)
 +        return;
 +
 +    if (bFirst)
 +        snew(loc->proj, edi->vecs.radcon.neig);
 +
 +    /* loop over radcon vectors */
 +    for (i=0; i<edi->vecs.radcon.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
 +        rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
 +    }
 +    rad = sqrt(rad);
 +    /* only correct when radius increased */
 +    if (rad > edi->vecs.radcon.radius)
 +    {
 +        ratio = edi->vecs.radcon.radius/rad - 1.0;
 +
 +        /* loop over radcon vectors */
 +        for (i=0; i<edi->vecs.radcon.neig; i++)
 +        {
 +            /* apply the correction */
 +            loc->proj[i] -= edi->vecs.radcon.refproj[i];
 +            loc->proj[i] /= edi->sav.sqrtm[i];
 +            loc->proj[i] *= ratio;
 +
 +            for (j=0; j<edi->sav.nr; j++)
 +            {
 +                svmul(loc->proj[i], edi->vecs.radcon.vec[i][j], vec_dum);
 +                rvec_inc(xcoll[j], vec_dum);
 +            }
 +        }
 +    }
 +    else
 +        edi->vecs.radcon.radius = rad;
 +
 +    if (rad != edi->vecs.radcon.radius)
 +    {
 +        rad = 0.0;
 +        for (i=0; i<edi->vecs.radcon.neig; i++)
 +        {
 +            /* calculate the projections, radius */
 +            loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
 +            rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
 +        }
 +        rad = sqrt(rad);
 +    }
 +}
 +
 +
 +static void ed_apply_constraints(rvec *xcoll, t_edpar *edi, gmx_large_int_t step, t_commrec *cr)
 +{
 +    int i;
 +
 +
 +    /* subtract the average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_dec(xcoll[i], edi->sav.x[i]);
 +
 +    /* apply the constraints */
 +    if (step >= 0)
 +        do_linfix(xcoll, edi, step, cr);
 +    do_linacc(xcoll, edi, cr);
 +    if (step >= 0)
 +        do_radfix(xcoll, edi, step, cr);
 +    do_radacc(xcoll, edi, cr);
 +    do_radcon(xcoll, edi, cr);
 +
 +    /* add back the average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_inc(xcoll[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Write out the projections onto the eigenvectors */
 +static void write_edo(int nr_edi, t_edpar *edi, gmx_edsam_t ed, gmx_large_int_t step,real rmsd)
 +{
 +    int i;
 +    char buf[22];
 +
 +
 +    if (edi->bNeedDoEdsam)
 +    {
 +        if (step == -1)
 +            fprintf(ed->edo, "Initial projections:\n");
 +        else
 +        {
 +            fprintf(ed->edo,"Step %s, ED #%d  ", gmx_step_str(step, buf), nr_edi);
 +            fprintf(ed->edo,"  RMSD %f nm\n",rmsd);
 +        }
 +
 +        if (edi->vecs.mon.neig)
 +        {
 +            fprintf(ed->edo,"  Monitor eigenvectors");
 +            for (i=0; i<edi->vecs.mon.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.mon.ieig[i],edi->vecs.mon.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.linfix.neig)
 +        {
 +            fprintf(ed->edo,"  Linfix  eigenvectors");
 +            for (i=0; i<edi->vecs.linfix.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linfix.ieig[i],edi->vecs.linfix.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.linacc.neig)
 +        {
 +            fprintf(ed->edo,"  Linacc  eigenvectors");
 +            for (i=0; i<edi->vecs.linacc.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linacc.ieig[i],edi->vecs.linacc.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.radfix.neig)
 +        {
 +            fprintf(ed->edo,"  Radfix  eigenvectors");
 +            for (i=0; i<edi->vecs.radfix.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radfix.ieig[i],edi->vecs.radfix.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  fixed increment radius = %f\n", calc_radius(&edi->vecs.radfix));
 +        }
 +        if (edi->vecs.radacc.neig)
 +        {
 +            fprintf(ed->edo,"  Radacc  eigenvectors");
 +            for (i=0; i<edi->vecs.radacc.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radacc.ieig[i],edi->vecs.radacc.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  acceptance radius      = %f\n", calc_radius(&edi->vecs.radacc));
 +        }
 +        if (edi->vecs.radcon.neig)
 +        {
 +            fprintf(ed->edo,"  Radcon  eigenvectors");
 +            for (i=0; i<edi->vecs.radcon.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radcon.ieig[i],edi->vecs.radcon.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  contracting radius     = %f\n", calc_radius(&edi->vecs.radcon));
 +        }
 +    }
 +}
 +
 +/* Returns if any constraints are switched on */
 +static int ed_constraints(gmx_bool edtype, t_edpar *edi)
 +{
 +    if (edtype == eEDedsam || edtype == eEDflood)
 +    {
 +        return (edi->vecs.linfix.neig || edi->vecs.linacc.neig ||
 +                edi->vecs.radfix.neig || edi->vecs.radacc.neig ||
 +                edi->vecs.radcon.neig);
 +    }
 +    return 0;
 +}
 +
 +
 +/* Copies reference projection 'refproj' to fixed 'refproj0' variable for flooding/
 + * umbrella sampling simulations. */
 +static void copyEvecReference(t_eigvec* floodvecs)
 +{
-       for (i=0; i<floodvecs->neig; i++)
-       {
-               floodvecs->refproj0[i] = floodvecs->refproj[i];
-       }
++    int i;
++
 +
++    if (NULL==floodvecs->refproj0)
++        snew(floodvecs->refproj0, floodvecs->neig);
 +
-     int     i,nr_edi;
++    for (i=0; i<floodvecs->neig; i++)
++    {
++        floodvecs->refproj0[i] = floodvecs->refproj[i];
++    }
 +}
 +
 +
 +void init_edsam(gmx_mtop_t  *mtop,   /* global topology                    */
 +                t_inputrec  *ir,     /* input record                       */
 +                t_commrec   *cr,     /* communication record               */
 +                gmx_edsam_t ed,      /* contains all ED data               */
 +                rvec        x[],     /* positions of the whole MD system   */
 +                matrix      box)     /* the box                            */
 +{
 +    t_edpar *edi = NULL;    /* points to a single edi data set */
 +    int     numedis=0;      /* keep track of the number of ED data sets in edi file */
-                 /* Save the sref positions such that in the next time step the molecule can
-                  * be made whole again (in the parallel case) */
-                 if (PAR(cr))
-                     copy_rvec(xfit[i], edi->sref.x_old[i]);
++    int     i,nr_edi,avindex;
 +    rvec    *x_pbc  = NULL; /* positions of the whole MD system with pbc removed  */
 +    rvec    *xfit   = NULL; /* the positions which will be fitted to the reference structure  */
 +    rvec    *xstart = NULL; /* the positions which are subject to ED sampling */
 +    rvec    fit_transvec;   /* translation ... */
 +    matrix  fit_rotmat;     /* ... and rotation from fit to reference structure */
 +
 +
 +    if (!DOMAINDECOMP(cr) && PAR(cr) && MASTER(cr))
 +        gmx_fatal(FARGS, "Please switch on domain decomposition to use essential dynamics in parallel.");
 +
 +    if (MASTER(cr))
 +        fprintf(stderr, "ED: Initializing essential dynamics constraints.\n");
 +
 +    /* Needed for initializing radacc radius in do_edsam */
 +    ed->bFirst = 1;
 +
 +    /* The input file is read by the master and the edi structures are
 +     * initialized here. Input is stored in ed->edpar. Then the edi
 +     * structures are transferred to the other nodes */
 +    if (MASTER(cr))
 +    {
 +        snew(ed->edpar,1);
 +        /* Read the whole edi file at once: */
 +        read_edi_file(ed,ed->edpar,mtop->natoms,cr);
 +
 +        /* Initialization for every ED/flooding dataset. Flooding uses one edi dataset per
 +         * flooding vector, Essential dynamics can be applied to more than one structure
 +         * as well, but will be done in the order given in the edi file, so
 +         * expect different results for different order of edi file concatenation! */
 +        edi=ed->edpar;
 +        while(edi != NULL)
 +        {
 +            init_edi(mtop,ir,cr,ed,edi);
 +
 +            /* Init flooding parameters if needed */
 +            init_flood(edi,ed,ir->delta_t,cr);
 +
 +            edi=edi->next_edi;
 +            numedis++;
 +        }
 +    }
 +
 +    /* The master does the work here. The other nodes get the positions
 +     * not before dd_partition_system which is called after init_edsam */
 +    if (MASTER(cr))
 +    {
 +        /* Remove pbc, make molecule whole.
 +         * When ir->bContinuation=TRUE this has already been done, but ok.
 +         */
 +        snew(x_pbc,mtop->natoms);
 +        m_rveccopy(mtop->natoms,x,x_pbc);
 +        do_pbc_first_mtop(NULL,ir->ePBC,box,mtop,x_pbc);
 +
 +        /* Reset pointer to first ED data set which contains the actual ED data */
 +        edi=ed->edpar;
 +
 +        /* Loop over all ED/flooding data sets (usually only one, though) */
 +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +        {
 +            /* We use srenew to allocate memory since the size of the buffers
 +             * is likely to change with every ED dataset */
 +            srenew(xfit  , edi->sref.nr );
 +            srenew(xstart, edi->sav.nr  );
 +
 +            /* Extract the positions of the atoms to which will be fitted */
 +            for (i=0; i < edi->sref.nr; i++)
 +            {
 +                copy_rvec(x_pbc[edi->sref.anrs[i]], xfit[i]);
 +
-                 /* Save the sav positions such that in the next time step the molecule can
-                  * be made whole again (in the parallel case) */
-                 if (PAR(cr))
-                     copy_rvec(xstart[i], edi->sav.x_old[i]);
++                /* Save the sref positions such that in the next time step we can make the ED group whole
++                 * in case any of the atoms do not have the correct PBC representation */
++                copy_rvec(xfit[i], edi->sref.x_old[i]);
 +            }
 +
 +            /* Extract the positions of the atoms subject to ED sampling */
 +            for (i=0; i < edi->sav.nr; i++)
 +            {
 +                copy_rvec(x_pbc[edi->sav.anrs[i]], xstart[i]);
 +
-                 translate_and_rotate(edi->star.x, edi->sav.nr, fit_transvec, fit_rotmat);
-                 rad_project(edi, edi->star.x, &edi->vecs.radcon, cr);
++                /* Save the sav positions such that in the next time step we can make the ED group whole
++                 * in case any of the atoms do not have the correct PBC representation */
++                copy_rvec(xstart[i], edi->sav.x_old[i]);
 +            }
 +
 +            /* Make the fit to the REFERENCE structure, get translation and rotation */
 +            fit_to_reference(xfit, fit_transvec, fit_rotmat, edi);
 +
 +            /* Output how well we fit to the reference at the start */
 +            translate_and_rotate(xfit, edi->sref.nr, fit_transvec, fit_rotmat);
 +            fprintf(stderr, "ED: Initial RMSD from reference after fit = %f nm (dataset #%d)\n",
 +                    rmsd_from_structure(xfit, &edi->sref), nr_edi);
 +
 +            /* Now apply the translation and rotation to the atoms on which ED sampling will be performed */
 +            translate_and_rotate(xstart, edi->sav.nr, fit_transvec, fit_rotmat);
 +
 +            /* calculate initial projections */
 +            project(xstart, edi);
 +
++            /* For the target and origin structure both a reference (fit) and an
++             * average structure can be provided in make_edi. If both structures
++             * are the same, make_edi only stores one of them in the .edi file.
++             * If they differ, first the fit and then the average structure is stored
++             * in star (or sor), thus the number of entries in star/sor is
++             * (n_fit + n_av) with n_fit the size of the fitting group and n_av
++             * the size of the average group. */
++
 +            /* process target structure, if required */
 +            if (edi->star.nr > 0)
 +            {
 +                fprintf(stderr, "ED: Fitting target structure to reference structure\n");
++
 +                /* get translation & rotation for fit of target structure to reference structure */
 +                fit_to_reference(edi->star.x, fit_transvec, fit_rotmat, edi);
 +                /* do the fit */
-                 translate_and_rotate(edi->sori.x, edi->sav.nr, fit_transvec, fit_rotmat);
-                 rad_project(edi, edi->sori.x, &edi->vecs.radacc, cr);
-                 rad_project(edi, edi->sori.x, &edi->vecs.radfix, cr);
++                translate_and_rotate(edi->star.x, edi->star.nr, fit_transvec, fit_rotmat);
++                if (edi->star.nr == edi->sav.nr)
++                {
++                    avindex = 0;
++                }
++                else /* edi->star.nr = edi->sref.nr + edi->sav.nr */
++                {
++                    /* The last sav.nr indices of the target structure correspond to
++                     * the average structure, which must be projected */
++                    avindex = edi->star.nr - edi->sav.nr;
++                }
++                rad_project(edi, &edi->star.x[avindex], &edi->vecs.radcon, cr);
 +            } else
 +                rad_project(edi, xstart, &edi->vecs.radcon, cr);
 +
 +            /* process structure that will serve as origin of expansion circle */
 +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                fprintf(stderr, "ED: Setting center of flooding potential (0 = average structure)\n");
++
 +            if (edi->sori.nr > 0)
 +            {
 +                fprintf(stderr, "ED: Fitting origin structure to reference structure\n");
++
 +                /* fit this structure to reference structure */
 +                fit_to_reference(edi->sori.x, fit_transvec, fit_rotmat, edi);
 +                /* do the fit */
-                     rad_project(edi, edi->sori.x, &edi->flood.vecs, cr);
++                translate_and_rotate(edi->sori.x, edi->sori.nr, fit_transvec, fit_rotmat);
++                if (edi->sori.nr == edi->sav.nr)
++                {
++                    avindex = 0;
++                }
++                else /* edi->sori.nr = edi->sref.nr + edi->sav.nr */
++                {
++                    /* For the projection, we need the last sav.nr indices of sori */
++                    avindex = edi->sori.nr - edi->sav.nr;
++                }
++
++                rad_project(edi, &edi->sori.x[avindex], &edi->vecs.radacc, cr);
++                rad_project(edi, &edi->sori.x[avindex], &edi->vecs.radfix, cr);
 +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                {
 +                    fprintf(stderr, "ED: The ORIGIN structure will define the flooding potential center.\n");
 +                    /* Set center of flooding potential to the ORIGIN structure */
-             communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, buf->bUpdateShifts, xs,
++                    rad_project(edi, &edi->sori.x[avindex], &edi->flood.vecs, cr);
 +                    /* We already know that no (moving) reference position was provided,
 +                     * therefore we can overwrite refproj[0]*/
 +                    copyEvecReference(&edi->flood.vecs);
 +                }
 +            }
 +            else /* No origin structure given */
 +            {
 +                rad_project(edi, xstart, &edi->vecs.radacc, cr);
 +                rad_project(edi, xstart, &edi->vecs.radfix, cr);
 +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                {
 +                    if (edi->flood.bHarmonic)
 +                    {
 +                        fprintf(stderr, "ED: A (possibly changing) ref. projection will define the flooding potential center.\n");
 +                        for (i=0; i<edi->flood.vecs.neig; i++)
 +                            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i];
 +                    }
 +                    else
 +                    {
 +                        fprintf(stderr, "ED: The AVERAGE structure will define the flooding potential center.\n");
 +                        /* Set center of flooding potential to the center of the covariance matrix,
 +                         * i.e. the average structure, i.e. zero in the projected system */
 +                        for (i=0; i<edi->flood.vecs.neig; i++)
 +                            edi->flood.vecs.refproj[i] = 0.0;
 +                    }
 +                }
 +            }
 +            /* For convenience, output the center of the flooding potential for the eigenvectors */
 +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +            {
 +                for (i=0; i<edi->flood.vecs.neig; i++)
 +                {
 +                    fprintf(stdout, "ED: EV %d flooding potential center: %11.4e", i, edi->flood.vecs.refproj[i]);
 +                    if (edi->flood.bHarmonic)
 +                        fprintf(stdout, " (adding %11.4e/timestep)", edi->flood.vecs.refprojslope[i]);
 +                    fprintf(stdout, "\n");
 +                }
 +            }
 +
 +            /* set starting projections for linsam */
 +            rad_project(edi, xstart, &edi->vecs.linacc, cr);
 +            rad_project(edi, xstart, &edi->vecs.linfix, cr);
 +
 +            /* Output to file, set the step to -1 so that write_edo knows it was called from init_edsam */
 +            if (ed->edo && !(ed->bStartFromCpt))
 +                write_edo(nr_edi, edi, ed, -1, 0);
 +
 +            /* Prepare for the next edi data set: */
 +            edi=edi->next_edi;
 +        }
 +        /* Cleaning up on the master node: */
 +        sfree(x_pbc);
 +        sfree(xfit);
 +        sfree(xstart);
 +
 +    } /* end of MASTER only section */
 +
 +    if (PAR(cr))
 +    {
 +        /* First let everybody know how many ED data sets to expect */
 +        gmx_bcast(sizeof(numedis), &numedis, cr);
 +        /* Broadcast the essential dynamics / flooding data to all nodes */
 +        broadcast_ed_data(cr, ed, numedis);
 +    }
 +    else
 +    {
 +        /* In the single-CPU case, point the local atom numbers pointers to the global
 +         * one, so that we can use the same notation in serial and parallel case: */
 +
 +        /* Loop over all ED data sets (usually only one, though) */
 +        edi=ed->edpar;
 +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +        {
 +            edi->sref.anrs_loc = edi->sref.anrs;
 +            edi->sav.anrs_loc  = edi->sav.anrs;
 +            edi->star.anrs_loc = edi->star.anrs;
 +            edi->sori.anrs_loc = edi->sori.anrs;
 +            /* For the same reason as above, make a dummy c_ind array: */
 +            snew(edi->sav.c_ind, edi->sav.nr);
 +            /* Initialize the array */
 +            for (i=0; i<edi->sav.nr; i++)
 +                edi->sav.c_ind[i] = i;
 +            /* In the general case we will need a different-sized array for the reference indices: */
 +            if (!edi->bRefEqAv)
 +            {
 +                snew(edi->sref.c_ind, edi->sref.nr);
 +                for (i=0; i<edi->sref.nr; i++)
 +                    edi->sref.c_ind[i] = i;
 +            }
 +            /* Point to the very same array in case of other structures: */
 +            edi->star.c_ind = edi->sav.c_ind;
 +            edi->sori.c_ind = edi->sav.c_ind;
 +            /* In the serial case, the local number of atoms is the global one: */
 +            edi->sref.nr_loc = edi->sref.nr;
 +            edi->sav.nr_loc  = edi->sav.nr;
 +            edi->star.nr_loc = edi->star.nr;
 +            edi->sori.nr_loc = edi->sori.nr;
 +
 +            /* An on we go to the next edi dataset */
 +            edi=edi->next_edi;
 +        }
 +    }
 +
 +    /* Allocate space for ED buffer variables */
 +    /* Again, loop over ED data sets */
 +    edi=ed->edpar;
 +    for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +    {
 +        /* Allocate space for ED buffer */
 +        snew(edi->buf, 1);
 +        snew(edi->buf->do_edsam, 1);
 +
 +        /* Space for collective ED buffer variables */
 +
 +        /* Collective positions of atoms with the average indices */
 +        snew(edi->buf->do_edsam->xcoll                  , edi->sav.nr);
 +        snew(edi->buf->do_edsam->shifts_xcoll           , edi->sav.nr); /* buffer for xcoll shifts */
 +        snew(edi->buf->do_edsam->extra_shifts_xcoll     , edi->sav.nr);
 +        /* Collective positions of atoms with the reference indices */
 +        if (!edi->bRefEqAv)
 +        {
 +            snew(edi->buf->do_edsam->xc_ref             , edi->sref.nr);
 +            snew(edi->buf->do_edsam->shifts_xc_ref      , edi->sref.nr); /* To store the shifts in */
 +            snew(edi->buf->do_edsam->extra_shifts_xc_ref, edi->sref.nr);
 +        }
 +
 +        /* Get memory for flooding forces */
 +        snew(edi->flood.forces_cartesian                , edi->sav.nr);
 +
 +#ifdef DUMPEDI
 +        /* Dump it all into one file per process */
 +        dump_edi(edi, cr, nr_edi);
 +#endif
 +
 +        /* An on we go to the next edi dataset */
 +        edi=edi->next_edi;
 +    }
 +
 +    /* Flush the edo file so that the user can check some things
 +     * when the simulation has started */
 +    if (ed->edo)
 +        fflush(ed->edo);
 +}
 +
 +
 +void do_edsam(t_inputrec  *ir,
 +              gmx_large_int_t step,
 +              t_mdatoms   *md,
 +              t_commrec   *cr,
 +              rvec        xs[],   /* The local current positions on this processor */
 +              rvec        v[],    /* The velocities */
 +              matrix      box,
 +              gmx_edsam_t ed)
 +{
 +    int     i,edinr,iupdate=500;
 +    matrix  rotmat;         /* rotation matrix */
 +    rvec    transvec;       /* translation vector */
 +    rvec    dv,dx,x_unsh;   /* tmp vectors for velocity, distance, unshifted x coordinate */
 +    real    dt_1;           /* 1/dt */
 +    struct t_do_edsam *buf;
 +    t_edpar *edi;
 +    real    rmsdev=-1;      /* RMSD from reference structure prior to applying the constraints */
 +    gmx_bool bSuppress=FALSE; /* Write .edo file on master? */
 +
 +
 +    /* Check if ED sampling has to be performed */
 +    if ( ed->eEDtype==eEDnone )
 +        return;
 +
 +    /* Suppress output on first call of do_edsam if
 +     * two-step sd2 integrator is used */
 +    if ( (ir->eI==eiSD2) && (v != NULL) )
 +        bSuppress = TRUE;
 +
 +    dt_1 = 1.0/ir->delta_t;
 +
 +    /* Loop over all ED datasets (usually one) */
 +    edi  = ed->edpar;
 +    edinr = 0;
 +    while (edi != NULL)
 +    {
 +        edinr++;
 +        if (edi->bNeedDoEdsam)
 +        {
 +
 +            buf=edi->buf->do_edsam;
 +
 +            if (ed->bFirst)
 +                /* initialise radacc radius for slope criterion */
 +                buf->oldrad=calc_radius(&edi->vecs.radacc);
 +
 +            /* Copy the positions into buf->xc* arrays and after ED
 +             * feed back corrections to the official positions */
 +
 +            /* Broadcast the ED positions such that every node has all of them
 +             * Every node contributes its local positions xs and stores it in
 +             * the collective buf->xcoll array. Note that for edinr > 1
 +             * xs could already have been modified by an earlier ED */
 +
-                 communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, buf->bUpdateShifts, xs,
++            communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, PAR(cr) ? buf->bUpdateShifts : TRUE, xs,
 +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old,  box);
 +
 +#ifdef DEBUG_ED
 +            dump_xcoll(edi, buf, cr, step);
 +#endif
 +            /* Only assembly reference positions if their indices differ from the average ones */
 +            if (!edi->bRefEqAv)
-             /* If bUpdateShifts was TRUE then the shifts have just been updated in get_positions.
-              * We do not need to uptdate the shifts until the next NS step */
++                communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, PAR(cr) ? buf->bUpdateShifts : TRUE, xs,
 +                        edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
 +
++            /* If bUpdateShifts was TRUE then the shifts have just been updated in communicate_group_positions.
++             * We do not need to update the shifts until the next NS step. Note that dd_make_local_ed_indices
++             * set bUpdateShifts=TRUE in the parallel case. */
 +            buf->bUpdateShifts = FALSE;
 +
 +            /* Now all nodes have all of the ED positions in edi->sav->xcoll,
 +             * as well as the indices in edi->sav.anrs */
 +
 +            /* Fit the reference indices to the reference structure */
 +            if (edi->bRefEqAv)
 +                fit_to_reference(buf->xcoll , transvec, rotmat, edi);
 +            else
 +                fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
 +
 +            /* Now apply the translation and rotation to the ED structure */
 +            translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
 +
 +            /* Find out how well we fit to the reference (just for output steps) */
 +            if (do_per_step(step,edi->outfrq) && MASTER(cr))
 +            {
 +                if (edi->bRefEqAv)
 +                {
 +                    /* Indices of reference and average structures are identical,
 +                     * thus we can calculate the rmsd to SREF using xcoll */
 +                    rmsdev = rmsd_from_structure(buf->xcoll,&edi->sref);
 +                }
 +                else
 +                {
 +                    /* We have to translate & rotate the reference atoms first */
 +                    translate_and_rotate(buf->xc_ref, edi->sref.nr, transvec, rotmat);
 +                    rmsdev = rmsd_from_structure(buf->xc_ref,&edi->sref);
 +                }
 +            }
 +
 +            /* update radsam references, when required */
 +            if (do_per_step(step,edi->maxedsteps) && step >= edi->presteps)
 +            {
 +                project(buf->xcoll, edi);
 +                rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
 +                rad_project(edi, buf->xcoll, &edi->vecs.radfix, cr);
 +                buf->oldrad=-1.e5;
 +            }
 +
 +            /* update radacc references, when required */
 +            if (do_per_step(step,iupdate) && step >= edi->presteps)
 +            {
 +                edi->vecs.radacc.radius = calc_radius(&edi->vecs.radacc);
 +                if (edi->vecs.radacc.radius - buf->oldrad < edi->slope)
 +                {
 +                    project(buf->xcoll, edi);
 +                    rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
 +                    buf->oldrad = 0.0;
 +                } else
 +                    buf->oldrad = edi->vecs.radacc.radius;
 +            }
 +
 +            /* apply the constraints */
 +            if (step >= edi->presteps && ed_constraints(ed->eEDtype, edi))
 +            {
 +                /* ED constraints should be applied already in the first MD step
 +                 * (which is step 0), therefore we pass step+1 to the routine */
 +                ed_apply_constraints(buf->xcoll, edi, step+1 - ir->init_step, cr);
 +            }
 +
 +            /* write to edo, when required */
 +            if (do_per_step(step,edi->outfrq))
 +            {
 +                project(buf->xcoll, edi);
 +                if (MASTER(cr) && !bSuppress)
 +                    write_edo(edinr, edi, ed, step, rmsdev);
 +            }
 +
 +            /* Copy back the positions unless monitoring only */
 +            if (ed_constraints(ed->eEDtype, edi))
 +            {
 +                /* remove fitting */
 +                rmfit(edi->sav.nr, buf->xcoll, transvec, rotmat);
 +
 +                /* Copy the ED corrected positions into the coordinate array */
 +                /* Each node copies its local part. In the serial case, nat_loc is the
 +                 * total number of ED atoms */
 +                for (i=0; i<edi->sav.nr_loc; i++)
 +                {
 +                    /* Unshift local ED coordinate and store in x_unsh */
 +                    ed_unshift_single_coord(box, buf->xcoll[edi->sav.c_ind[i]],
 +                                            buf->shifts_xcoll[edi->sav.c_ind[i]], x_unsh);
 +
 +                    /* dx is the ED correction to the positions: */
 +                    rvec_sub(x_unsh, xs[edi->sav.anrs_loc[i]], dx);
 +
 +                    if (v != NULL)
 +                    {
 +                        /* dv is the ED correction to the velocity: */
 +                        svmul(dt_1, dx, dv);
 +                        /* apply the velocity correction: */
 +                        rvec_inc(v[edi->sav.anrs_loc[i]], dv);
 +                    }
 +                    /* Finally apply the position correction due to ED: */
 +                    copy_rvec(x_unsh, xs[edi->sav.anrs_loc[i]]);
 +                }
 +            }
 +        } /* END of if (edi->bNeedDoEdsam) */
 +
 +        /* Prepare for the next ED dataset */
 +        edi = edi->next_edi;
 +
 +    } /* END of loop over ED datasets */
 +
 +    ed->bFirst = FALSE;
 +}
index 3ee2e408613c5220d7bb6b1a2aa6da80f708baa0,0000000000000000000000000000000000000000..73094bb353f38f19d449698d30f0d31779b7d5e6
mode 100644,000000..100644
--- /dev/null
@@@ -1,386 -1,0 +1,385 @@@
-         const gmx_bool bNS,           /* IN:  NS step, the shifts have changed */
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 4.5
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2008, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + 
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Groningen Machine for Chemical Simulation
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +
 +#include "groupcoord.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "vec.h"
 +#include "smalloc.h"
 +#include "gmx_ga2la.h"
 +
 +#define MIN(a,b) (((a)<(b))?(a):(b))
 +
 +
 +
 +/* Select the indices of the group's atoms which are local and store them in 
 + * anrs_loc[0..nr_loc]. The indices are saved in coll_ind[] for later reduction
 + * in communicate_group_positions()
 + */
 +extern void dd_make_local_group_indices(
 +        gmx_ga2la_t   ga2la,
 +        const int     nr,          /* IN:  Total number of atoms in the group */
 +        int           anrs[],      /* IN:  Global atom numbers of the groups atoms */
 +        int           *nr_loc,     /* OUT: Number of group atoms found locally */
 +        int           *anrs_loc[], /* OUT: Local atom numbers of the group  */
 +        int           *nalloc_loc, /* IN+OUT: Allocation size of anrs_loc */
 +        int           coll_ind[])  /* OUT (opt): Where is this position found in the collective array? */
 +{
 +    int  i,ii;
 +    int  localnr;       
 +
 +    
 +    /* Loop over all the atom indices of the group to check
 +     * which ones are on the local node */
 +    localnr = 0;
 +    for(i=0; i<nr; i++)
 +    {
 +        if (ga2la_get_home(ga2la,anrs[i],&ii))
 +        {
 +            /* The atom with this index is a home atom */
 +            if (localnr >= *nalloc_loc) /* Check whether memory suffices */
 +            {
 +                *nalloc_loc = over_alloc_dd(localnr+1);
 +                /* We never need more memory than the number of atoms in the group */
 +                *nalloc_loc = MIN(*nalloc_loc, nr);
 +                srenew(*anrs_loc,*nalloc_loc);
 +            }
 +            /* Save the atoms index in the local atom numbers array */
 +            (*anrs_loc)[localnr] = ii;
 +
 +            if (coll_ind != NULL)
 +            {
 +                /* Keep track of where this local atom belongs in the collective index array.
 +                 * This is needed when reducing the local arrays to a collective/global array
 +                 * in communicate_group_positions */
 +                coll_ind[localnr] = i;
 +            }
 +
 +            /* add one to the local atom count */
 +            localnr++;
 +        }
 +    }
 + 
 +    /* Return the number of local atoms that were found */
 +    *nr_loc = localnr;
 +}
 +
 +
 +static void get_shifts_group(
 +        int    npbcdim, 
 +        matrix box,
 +        rvec   *xcoll,     /* IN:  Collective set of positions [0..nr] */
 +        int    nr,         /* IN:  Total number of atoms in the group */
 +        rvec   *xcoll_old, /* IN:  Positions from the last time step [0...nr] */
 +        ivec   *shifts)    /* OUT: Shifts for xcoll */
 +{
 +    int  i,m,d;
 +    rvec dx;
 +
 +
 +    /* Get the shifts such that each atom is within closest
 +     * distance to its position at the last NS time step after shifting.
 +     * If we start with a whole group, and always keep track of 
 +     * shift changes, the group will stay whole this way */
 +    for (i=0; i < nr; i++)
 +        clear_ivec(shifts[i]);
 +
 +    for (i=0; i<nr; i++)
 +    {
 +        /* The distance this atom moved since the last time step */
 +        /* If this is more than just a bit, it has changed its home pbc box */
 +        rvec_sub(xcoll[i],xcoll_old[i],dx);
 +
 +        for(m=npbcdim-1; m>=0; m--)
 +        {
 +            while (dx[m] < -0.5*box[m][m])
 +            {
 +                for(d=0; d<DIM; d++)
 +                    dx[d] += box[m][d];
 +                shifts[i][m]++;
 +            }
 +            while (dx[m] >= 0.5*box[m][m])
 +            {
 +                for(d=0; d<DIM; d++)
 +                    dx[d] -= box[m][d];
 +                shifts[i][m]--;
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void shift_positions_group(
 +        matrix box, 
 +        rvec   x[],      /* The positions [0..nr] */ 
 +        ivec   *is,      /* The shifts [0..nr] */ 
 +        int    nr)       /* The number of positions and shifts */
 +{
 +    int      i,tx,ty,tz;
 +
 +
 +    /* Loop over the group's atoms */
 +    if(TRICLINIC(box)) 
 +    {
 +        for (i=0; i < nr; i++)
 +        {
 +            tx=is[i][XX];
 +            ty=is[i][YY];
 +            tz=is[i][ZZ];
 +
 +            x[i][XX]=x[i][XX]+tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX];
 +            x[i][YY]=x[i][YY]+ty*box[YY][YY]+tz*box[ZZ][YY];
 +            x[i][ZZ]=x[i][ZZ]+tz*box[ZZ][ZZ];
 +        }
 +    } else
 +    {
 +        for (i=0; i < nr; i++)
 +        {
 +            tx=is[i][XX];
 +            ty=is[i][YY];
 +            tz=is[i][ZZ];
 +
 +            x[i][XX]=x[i][XX]+tx*box[XX][XX];
 +            x[i][YY]=x[i][YY]+ty*box[YY][YY];
 +            x[i][ZZ]=x[i][ZZ]+tz*box[ZZ][ZZ];
 +        }
 +    }    
 +}
 +
 +
 +/* Assemble the positions of the group such that every node has all of them. 
 + * The atom indices are retrieved from anrs_loc[0..nr_loc] 
 + * Note that coll_ind[i] = i is needed in the serial case */
 +extern void communicate_group_positions(
 +        t_commrec  *cr, 
 +        rvec       *xcoll,        /* OUT: Collective array of positions */
 +        ivec       *shifts,       /* IN+OUT: Collective array of shifts for xcoll */
 +        ivec       *extra_shifts, /* BUF: Extra shifts since last time step */
-         /* To make the group whole, start with a whole group and each
-          * step move the assembled positions at closest distance to the positions 
-          * from the last step. First shift the positions with the saved shift 
-          * vectors (these are 0 when this routine is called for the first time!) */
-         shift_positions_group(box, xcoll, shifts, nr);
-         
-         /* Now check if some shifts changed since the last step.
-          * This only needs to be done when the shifts are expected to have changed,
-          * i.e. after neighboursearching */
-         if (bNS) 
++        const gmx_bool bNS,       /* IN:  NS step, the shifts have changed */
 +        rvec       *x_loc,        /* IN:  Local positions on this node */ 
 +        const int  nr,            /* IN:  Total number of atoms in the group */
 +        const int  nr_loc,        /* IN:  Local number of atoms in the group */
 +        int        *anrs_loc,     /* IN:  Local atom numbers */
 +        int        *coll_ind,     /* IN:  Collective index */
 +        rvec       *xcoll_old,    /* IN+OUT: Positions from the last time step, used to make group whole */
 +        matrix     box)
 +{
 +    int i;
 +
 +
 +    /* Zero out the groups' global position array */
 +    clear_rvecs(nr, xcoll);
 +
 +    /* Put the local positions that this node has into the right place of 
 +     * the collective array. Note that in the serial case, coll_ind[i] = i */
 +    for (i=0; i<nr_loc; i++)
 +        copy_rvec(x_loc[anrs_loc[i]], xcoll[coll_ind[i]]);
 +
 +    if (PAR(cr))
 +    {
 +        /* Add the arrays from all nodes together */
 +        gmx_sum(nr*3, xcoll[0], cr);
++    }
++    /* To make the group whole, start with a whole group and each
++     * step move the assembled positions at closest distance to the positions
++     * from the last step. First shift the positions with the saved shift
++     * vectors (these are 0 when this routine is called for the first time!) */
++    shift_positions_group(box, xcoll, shifts, nr);
++
++    /* Now check if some shifts changed since the last step.
++     * This only needs to be done when the shifts are expected to have changed,
++     * i.e. after neighboursearching */
++    if (bNS)
++    {
++        get_shifts_group(3, box, xcoll, nr, xcoll_old, extra_shifts);
++
++        /* Shift with the additional shifts such that we get a whole group now */
++        shift_positions_group(box, xcoll, extra_shifts, nr);
 +
-             get_shifts_group(3, box, xcoll, nr, xcoll_old, extra_shifts);
-             
-             /* Shift with the additional shifts such that we get a whole group now */
-             shift_positions_group(box, xcoll, extra_shifts, nr);
-             
-             /* Add the shift vectors together for the next time step */
-             for (i=0; i<nr; i++)
-             {
-                 shifts[i][XX] += extra_shifts[i][XX];
-                 shifts[i][YY] += extra_shifts[i][YY];
-                 shifts[i][ZZ] += extra_shifts[i][ZZ];
-             }
-             
-             /* Store current correctly-shifted positions for comparison in the next NS time step */
-             for (i=0; i<nr; i++)
-                 copy_rvec(xcoll[i],xcoll_old[i]);   
++        /* Add the shift vectors together for the next time step */
++        for (i=0; i<nr; i++)
 +        {
++            shifts[i][XX] += extra_shifts[i][XX];
++            shifts[i][YY] += extra_shifts[i][YY];
++            shifts[i][ZZ] += extra_shifts[i][ZZ];
 +        }
++
++        /* Store current correctly-shifted positions for comparison in the next NS time step */
++        for (i=0; i<nr; i++)
++            copy_rvec(xcoll[i],xcoll_old[i]);
 +    }
 +}
 +
 +
 +/* Determine the (weighted) sum vector from positions x */
 +extern double get_sum_of_positions(rvec x[], real weight[], const int nat, dvec dsumvec)
 +{
 +    int i;
 +    rvec x_weighted;
 +    double weight_sum = 0.0;
 +
 +
 +    /* Zero out the center */
 +    clear_dvec(dsumvec);
 +
 +    /* Loop over all atoms and add their weighted position vectors */
 +    if (weight != NULL)
 +    {
 +        for (i=0; i<nat; i++)
 +        {
 +            weight_sum += weight[i];
 +            svmul(weight[i], x[i], x_weighted);
 +            dsumvec[XX] += x_weighted[XX];
 +            dsumvec[YY] += x_weighted[YY];
 +            dsumvec[ZZ] += x_weighted[ZZ];
 +        }
 +    }
 +    else
 +    {
 +        for (i=0; i<nat; i++)
 +        {
 +            dsumvec[XX] += x[i][XX];
 +            dsumvec[YY] += x[i][YY];
 +            dsumvec[ZZ] += x[i][ZZ];
 +        }
 +    }
 +    return weight_sum;
 +}
 +
 +
 +/* Determine center of structure from collective positions x */
 +extern void get_center(rvec x[], real weight[], const int nr, rvec rcenter)
 +{
 +    dvec   dcenter;
 +    double weight_sum, denom;
 +
 +    
 +    weight_sum = get_sum_of_positions(x, weight, nr, dcenter);
 +    
 +    if (weight != NULL)
 +        denom = weight_sum; /* Divide by the sum of weight */
 +    else
 +        denom = nr;        /* Divide by the number of atoms */
 +        
 +    dsvmul(1.0/denom, dcenter, dcenter);
 +    
 +    rcenter[XX] = dcenter[XX];
 +    rcenter[YY] = dcenter[YY];
 +    rcenter[ZZ] = dcenter[ZZ];
 +}
 +
 +
 +/* Get the center from local positions that already have the correct
 + * PBC representation */
 +extern void get_center_comm(
 +        t_commrec *cr,
 +        rvec x_loc[],       /* Local positions */
 +        real weight_loc[],  /* Local masses or other weights */
 +        int nr_loc,         /* Local number of atoms */
 +        int nr_group,       /* Total number of atoms of the group */ 
 +        rvec center)        /* Weighted center */
 +{
 +    double weight_sum, denom;
 +    dvec   dsumvec;
 +    double buf[4];    
 +    
 +    
 +    weight_sum = get_sum_of_positions(x_loc, weight_loc, nr_loc, dsumvec);
 +    
 +    /* Add the local contributions from all nodes. Put the sum vector and the 
 +     * weight in a buffer array so that we get along with a single communication
 +     * call. */
 +    if (PAR(cr))
 +    {
 +        buf[0] = dsumvec[XX];
 +        buf[1] = dsumvec[YY];
 +        buf[2] = dsumvec[ZZ];
 +        buf[3] = weight_sum;
 +        
 +        /* Communicate buffer */
 +        gmx_sumd(4, buf, cr);
 +        
 +        dsumvec[XX] = buf[0];
 +        dsumvec[YY] = buf[1];
 +        dsumvec[ZZ] = buf[2];
 +        weight_sum  = buf[3];
 +    }
 +    
 +    if (weight_loc != NULL)
 +        denom = 1.0/weight_sum; /* Divide by the sum of weight to get center of mass e.g. */
 +    else
 +        denom = 1.0/nr_group;   /* Divide by the number of atoms to get the geometrical center */
 +        
 +    center[XX] = dsumvec[XX]*denom;
 +    center[YY] = dsumvec[YY]*denom;
 +    center[ZZ] = dsumvec[ZZ]*denom;
 +}
 +
 +
 +/* Translate x with transvec */
 +extern void translate_x(rvec x[], const int nr, const rvec transvec)
 +{
 +    int i;
 +    
 +    
 +    for (i=0; i<nr; i++)
 +        rvec_inc(x[i], transvec);
 +}
 +
 +
 +extern void rotate_x(rvec x[], const int nr, matrix rmat)
 +{
 +    int i,j,k;
 +    rvec x_old;
 +
 +    
 +    /* Apply the rotation matrix */
 +    for (i=0; i<nr; i++)
 +    {
 +        for (j=0; j<3; j++)
 +            x_old[j] = x[i][j];
 +        for (j=0; j<3; j++)
 +        {
 +            x[i][j] = 0;
 +            for (k=0; k<3; k++)
 +                x[i][j] += rmat[k][j]*x_old[k];
 +        }
 +    }
 +}
 +
index 781629b4f9ed5fc9dcabc257e026277e3431546b,0000000000000000000000000000000000000000..159ad9280ec01925c66c60d22b9a8b08bf4333e6
mode 100644,000000..100644
--- /dev/null
@@@ -1,2486 -1,0 +1,2497 @@@
-         fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <time.h>
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "network.h"
 +#include "confio.h"
 +#include "copyrite.h"
 +#include "smalloc.h"
 +#include "nrnb.h"
 +#include "main.h"
 +#include "force.h"
 +#include "macros.h"
 +#include "random.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "txtdump.h"
 +#include "typedefs.h"
 +#include "update.h"
 +#include "constr.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "tgroup.h"
 +#include "mdebin.h"
 +#include "vsite.h"
 +#include "force.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "trnio.h"
 +#include "mdatoms.h"
 +#include "ns.h"
 +#include "gmx_wallcycle.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "pme.h"
 +
 +#include "gromacs/linearalgebra/mtxio.h"
 +#include "gromacs/linearalgebra/sparsematrix.h"
 +
 +typedef struct {
 +  t_state s;
 +  rvec    *f;
 +  real    epot;
 +  real    fnorm;
 +  real    fmax;
 +  int     a_fmax;
 +} em_state_t;
 +
 +static em_state_t *init_em_state()
 +{
 +  em_state_t *ems;
 +
 +  snew(ems,1);
 +
 +  /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
 +  snew(ems->s.lambda,efptNR);
 +
 +  return ems;
 +}
 +
 +static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                           gmx_wallcycle_t wcycle,
 +                           const char *name)
 +{
 +    char buf[STRLEN];
 +
 +    runtime_start(runtime);
 +
 +    sprintf(buf,"Started %s",name);
 +    print_date_and_time(fplog,cr->nodeid,buf,NULL);
 +
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                        gmx_wallcycle_t wcycle)
 +{
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    runtime_end(runtime);
 +}
 +
 +static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 +{
 +    fprintf(out,"\n");
 +    fprintf(out,"%s:\n",minimizer);
 +    fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 +    fprintf(out,"   Number of steps    = %12d\n",nsteps);
 +}
 +
 +static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 +{
++    char buffer[2048];
 +    if (bLastStep)
 +    {
-         fprintf(fp,"\nStepsize too small, or no change in energy.\n"
-                 "Converged to machine precision,\n"
-                 "but not to the requested precision Fmax < %g\n",
-                 ftol);
-         if (sizeof(real)<sizeof(double))
-         {
-             fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
-         }
-         if (bConstrain)
-         {
-             fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
-                     "off constraints alltogether (set constraints = none in mdp file)\n");
-         }
++        sprintf(buffer,
++                "\nEnergy minimization reached the maximum number"
++                "of steps before the forces reached the requested"
++                "precision Fmax < %g.\n",ftol);
 +    }
 +    else
 +    {
++        sprintf(buffer,
++                "\nEnergy minimization has stopped, but the forces have"
++                "not converged to the requested precision Fmax < %g (which"
++                "may not be possible for your system). It stopped"
++                "because the algorithm tried to make a new step whose size"
++                "was too small, or there was no change in the energy since"
++                "last step. Either way, we regard the minimization as"
++                "converged to within the available machine precision,"
++                "given your starting configuration and EM parameters.\n%s%s",
++                ftol,
++                sizeof(real)<sizeof(double) ?
++                "\nDouble precision normally gives you higher accuracy, but"
++                "this is often not needed for preparing to run molecular"
++                "dynamics.\n" :
++                "",
++                bConstrain ?
++                "You might need to increase your constraint accuracy, or turn\n"
++                "off constraints altogether (set constraints = none in mdp file)\n" :
++                "");
 +    }
++    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 +}
 +
 +
 +
 +static void print_converged(FILE *fp,const char *alg,real ftol,
 +                          gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 +                          real epot,real fmax, int nfmax, real fnorm)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  if (bDone)
 +    fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 +          alg,ftol,gmx_step_str(count,buf));
 +  else if(count<nsteps)
 +    fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 +               "but did not reach the requested Fmax < %g.\n",
 +          alg,gmx_step_str(count,buf),ftol);
 +  else
 +    fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 +          alg,ftol,gmx_step_str(count,buf));
 +
 +#ifdef GMX_DOUBLE
 +  fprintf(fp,"Potential Energy  = %21.14e\n",epot);
 +  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
 +  fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
 +#else
 +  fprintf(fp,"Potential Energy  = %14.7e\n",epot);
 +  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
 +  fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
 +#endif
 +}
 +
 +static void get_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 +                         real *fnorm,real *fmax,int *a_fmax)
 +{
 +  double fnorm2,*sum;
 +  real fmax2,fmax2_0,fam;
 +  int  la_max,a_max,start,end,i,m,gf;
 +
 +  /* This routine finds the largest force and returns it.
 +   * On parallel machines the global max is taken.
 +   */
 +  fnorm2 = 0;
 +  fmax2 = 0;
 +  la_max = -1;
 +  gf = 0;
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +  if (mdatoms->cFREEZE) {
 +    for(i=start; i<end; i++) {
 +      gf = mdatoms->cFREEZE[i];
 +      fam = 0;
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m])
 +        fam += sqr(f[i][m]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  } else {
 +    for(i=start; i<end; i++) {
 +      fam = norm2(f[i]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  }
 +
 +  if (la_max >= 0 && DOMAINDECOMP(cr)) {
 +    a_max = cr->dd->gatindex[la_max];
 +  } else {
 +    a_max = la_max;
 +  }
 +  if (PAR(cr)) {
 +    snew(sum,2*cr->nnodes+1);
 +    sum[2*cr->nodeid]   = fmax2;
 +    sum[2*cr->nodeid+1] = a_max;
 +    sum[2*cr->nnodes]   = fnorm2;
 +    gmx_sumd(2*cr->nnodes+1,sum,cr);
 +    fnorm2 = sum[2*cr->nnodes];
 +    /* Determine the global maximum */
 +    for(i=0; i<cr->nnodes; i++) {
 +      if (sum[2*i] > fmax2) {
 +      fmax2 = sum[2*i];
 +      a_max = (int)(sum[2*i+1] + 0.5);
 +      }
 +    }
 +    sfree(sum);
 +  }
 +
 +  if (fnorm)
 +    *fnorm = sqrt(fnorm2);
 +  if (fmax)
 +    *fmax  = sqrt(fmax2);
 +  if (a_fmax)
 +    *a_fmax = a_max;
 +}
 +
 +static void get_state_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,
 +                         em_state_t *ems)
 +{
 +  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 +}
 +
 +void init_em(FILE *fplog,const char *title,
 +             t_commrec *cr,t_inputrec *ir,
 +             t_state *state_global,gmx_mtop_t *top_global,
 +             em_state_t *ems,gmx_localtop_t **top,
 +             rvec **f,rvec **f_global,
 +             t_nrnb *nrnb,rvec mu_tot,
 +             t_forcerec *fr,gmx_enerdata_t **enerd,
 +             t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin)
 +{
 +    int  start,homenr,i;
 +    real dvdlambda;
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Initiating %s\n",title);
 +    }
 +
 +    state_global->ngtc = 0;
 +
 +    /* Initialize lambda variables */
 +    initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
 +
 +    init_nrnb(nrnb);
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        *top = dd_init_local_top(top_global);
 +
 +        dd_init_local_state(cr->dd,state_global,&ems->s);
 +
 +        *f = NULL;
 +
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            &ems->s,&ems->f,mdatoms,*top,
 +                            fr,vsite,NULL,constr,
 +                            nrnb,NULL,FALSE);
 +        dd_store_state(cr->dd,&ems->s);
 +
 +        if (ir->nstfout)
 +        {
 +            snew(*f_global,top_global->natoms);
 +        }
 +        else
 +        {
 +            *f_global = NULL;
 +        }
 +        *graph = NULL;
 +    }
 +    else
 +    {
 +        snew(*f,top_global->natoms);
 +
 +        /* Just copy the state */
 +        ems->s = *state_global;
 +        snew(ems->s.x,ems->s.nalloc);
 +        snew(ems->f,ems->s.nalloc);
 +        for(i=0; i<state_global->natoms; i++)
 +        {
 +            copy_rvec(state_global->x[i],ems->s.x[i]);
 +        }
 +        copy_mat(state_global->box,ems->s.box);
 +
 +        if (PAR(cr) && ir->eI != eiNM)
 +        {
 +            /* Initialize the particle decomposition and split the topology */
 +            *top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +        }
 +        else
 +        {
 +            *top = gmx_mtop_generate_local_top(top_global,ir);
 +        }
 +        *f_global = *f;
 +
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +        {
 +            *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +        else
 +        {
 +            *graph = NULL;
 +        }
 +
 +        if (PARTDECOMP(cr))
 +        {
 +            pd_at_range(cr,&start,&homenr);
 +            homenr -= start;
 +        }
 +        else
 +        {
 +            start  = 0;
 +            homenr = top_global->natoms;
 +        }
 +        atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 +        update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
 +
 +        if (vsite)
 +        {
 +            set_vsite_top(vsite,*top,mdatoms,cr);
 +        }
 +    }
 +
 +    if (constr)
 +    {
 +        if (ir->eConstrAlg == econtSHAKE &&
 +            gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 +        {
 +            gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 +                      econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        }
 +
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            set_constraints(constr,*top,ir,mdatoms,cr);
 +        }
 +
 +        if (!ir->bContinuation)
 +        {
 +            /* Constrain the starting coordinates */
 +            dvdlambda=0;
 +            constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 +                      ir,NULL,cr,-1,0,mdatoms,
 +                      ems->s.x,ems->s.x,NULL,ems->s.box,
 +                      ems->s.lambda[efptFEP],&dvdlambda,
 +                      NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        *gstat = global_stat_init(ir);
 +    }
 +
 +    *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 +
 +    snew(*enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 +                  *enerd);
 +
 +    if (mdebin != NULL)
 +    {
 +        /* Init bin for energy stuff */
 +        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
 +    }
 +
 +    clear_rvec(mu_tot);
 +    calc_shifts(ems->s.box,fr->shift_vec);
 +}
 +
 +static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 +                      gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 +{
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Tell the PME only node to finish */
 +    gmx_pme_finish(cr);
 +  }
 +
 +  done_mdoutf(outf);
 +
 +  em_time_end(fplog,cr,runtime,wcycle);
 +}
 +
 +static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 +{
 +  em_state_t tmp;
 +
 +  tmp   = *ems1;
 +  *ems1 = *ems2;
 +  *ems2 = tmp;
 +}
 +
 +static void copy_em_coords(em_state_t *ems,t_state *state)
 +{
 +    int i;
 +
 +    for(i=0; (i<state->natoms); i++)
 +    {
 +        copy_rvec(ems->s.x[i],state->x[i]);
 +    }
 +}
 +
 +static void write_em_traj(FILE *fplog,t_commrec *cr,
 +                          gmx_mdoutf_t *outf,
 +                          gmx_bool bX,gmx_bool bF,const char *confout,
 +                          gmx_mtop_t *top_global,
 +                          t_inputrec *ir,gmx_large_int_t step,
 +                          em_state_t *state,
 +                          t_state *state_global,rvec *f_global)
 +{
 +    int mdof_flags;
 +
 +    if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 +    {
 +        copy_em_coords(state,state_global);
 +        f_global = state->f;
 +    }
 +
 +    mdof_flags = 0;
 +    if (bX) { mdof_flags |= MDOF_X; }
 +    if (bF) { mdof_flags |= MDOF_F; }
 +    write_traj(fplog,cr,outf,mdof_flags,
 +               top_global,step,(double)step,
 +               &state->s,state_global,state->f,f_global,NULL,NULL);
 +
 +    if (confout != NULL && MASTER(cr))
 +    {
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 +        {
 +            /* Make molecules whole only for confout writing */
 +            do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 +                        state_global->x);
 +        }
 +
 +        write_sto_conf_mtop(confout,
 +                            *top_global->name,top_global,
 +                            state_global->x,NULL,ir->ePBC,state_global->box);
 +    }
 +}
 +
 +static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 +                     em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 +                     gmx_constr_t constr,gmx_localtop_t *top,
 +                     t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                     gmx_large_int_t count)
 +
 +{
 +  t_state *s1,*s2;
 +  int  start,end,gf,i,m;
 +  rvec *x1,*x2;
 +  real dvdlambda;
 +
 +  s1 = &ems1->s;
 +  s2 = &ems2->s;
 +
 +  if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 +    gmx_incons("state mismatch in do_em_step");
 +
 +  s2->flags = s1->flags;
 +
 +  if (s2->nalloc != s1->nalloc) {
 +    s2->nalloc = s1->nalloc;
 +    srenew(s2->x,s1->nalloc);
 +    srenew(ems2->f,  s1->nalloc);
 +    if (s2->flags & (1<<estCGP))
 +      srenew(s2->cg_p,  s1->nalloc);
 +  }
 +
 +  s2->natoms = s1->natoms;
 +  /* Copy free energy state -> is this necessary? */
 +  for (i=0;i<efptNR;i++)
 +  {
 +      s2->lambda[i] = s1->lambda[i];
 +  }
 +  copy_mat(s1->box,s2->box);
 +
 +  start = md->start;
 +  end   = md->start + md->homenr;
 +
 +  x1 = s1->x;
 +  x2 = s2->x;
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (md->cFREEZE)
 +      gf = md->cFREEZE[i];
 +    for(m=0; m<DIM; m++) {
 +      if (ir->opts.nFreeze[gf][m])
 +      x2[i][m] = x1[i][m];
 +      else
 +      x2[i][m] = x1[i][m] + a*f[i][m];
 +    }
 +  }
 +
 +  if (s2->flags & (1<<estCGP)) {
 +    /* Copy the CG p vector */
 +    x1 = s1->cg_p;
 +    x2 = s2->cg_p;
 +    for(i=start; i<end; i++)
 +      copy_rvec(x1[i],x2[i]);
 +  }
 +
 +  if (DOMAINDECOMP(cr)) {
 +    s2->ddp_count = s1->ddp_count;
 +    if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 +      s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 +      srenew(s2->cg_gl,s2->cg_gl_nalloc);
 +    }
 +    s2->ncg_gl = s1->ncg_gl;
 +    for(i=0; i<s2->ncg_gl; i++)
 +      s2->cg_gl[i] = s1->cg_gl[i];
 +    s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 +  }
 +
 +  if (constr) {
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdlambda = 0;
 +    constrain(NULL,TRUE,TRUE,constr,&top->idef,
 +              ir,NULL,cr,count,0,md,
 +              s1->x,s2->x,NULL,s2->box,s2->lambda[efptBONDED],
 +              &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  }
 +}
 +
 +static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 +                                   gmx_mtop_t *top_global,t_inputrec *ir,
 +                                   em_state_t *ems,gmx_localtop_t *top,
 +                                   t_mdatoms *mdatoms,t_forcerec *fr,
 +                                   gmx_vsite_t *vsite,gmx_constr_t constr,
 +                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 +{
 +    /* Repartition the domain decomposition */
 +    wallcycle_start(wcycle,ewcDOMDEC);
 +    dd_partition_system(fplog,step,cr,FALSE,1,
 +                        NULL,top_global,ir,
 +                        &ems->s,&ems->f,
 +                        mdatoms,top,fr,vsite,NULL,constr,
 +                        nrnb,wcycle,FALSE);
 +    dd_store_state(cr->dd,&ems->s);
 +    wallcycle_stop(wcycle,ewcDOMDEC);
 +}
 +
 +static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 +                            t_state *state_global,gmx_mtop_t *top_global,
 +                            em_state_t *ems,gmx_localtop_t *top,
 +                            t_inputrec *inputrec,
 +                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                            gmx_global_stat_t gstat,
 +                            gmx_vsite_t *vsite,gmx_constr_t constr,
 +                            t_fcdata *fcd,
 +                            t_graph *graph,t_mdatoms *mdatoms,
 +                            t_forcerec *fr,rvec mu_tot,
 +                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
 +                            gmx_large_int_t count,gmx_bool bFirst)
 +{
 +  real t;
 +  gmx_bool bNS;
 +  int  nabnsb;
 +  tensor force_vir,shake_vir,ekin;
 +  real dvdlambda,prescorr,enercorr,dvdlcorr;
 +  real terminate=0;
 +
 +  /* Set the time to the initial time, the time does not change during EM */
 +  t = inputrec->init_t;
 +
 +  if (bFirst ||
 +      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 +    /* This the first state or an old state used before the last ns */
 +    bNS = TRUE;
 +  } else {
 +    bNS = FALSE;
 +    if (inputrec->nstlist > 0) {
 +      bNS = TRUE;
 +    } else if (inputrec->nstlist == -1) {
 +      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 +      if (PAR(cr))
 +      gmx_sumi(1,&nabnsb,cr);
 +      bNS = (nabnsb > 0);
 +    }
 +  }
 +
 +  if (vsite)
 +    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 +
 +  if (DOMAINDECOMP(cr)) {
 +    if (bNS) {
 +      /* Repartition the domain decomposition */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                           ems,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +  }
 +
 +    /* Calc force & energy on new trial position  */
 +    /* do_force always puts the charge groups in the box and shifts again
 +     * We do not unshift, so molecules are always whole in congrad.c
 +     */
 +    do_force(fplog,cr,inputrec,
 +             count,nrnb,wcycle,top,top_global,&top_global->groups,
 +             ems->s.box,ems->s.x,&ems->s.hist,
 +             ems->f,force_vir,mdatoms,enerd,fcd,
 +             ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 +             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 +             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 +
 +    /* Clear the unused shake virial and pressure */
 +    clear_mat(shake_vir);
 +    clear_mat(pres);
 +
 +    /* Communicate stuff when parallel */
 +    if (PAR(cr) && inputrec->eI != eiNM)
 +    {
 +        wallcycle_start(wcycle,ewcMoveE);
 +
 +        global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 +                    inputrec,NULL,NULL,NULL,1,&terminate,
 +                    top_global,&ems->s,FALSE,
 +                    CGLO_ENERGY |
 +                    CGLO_PRESSURE |
 +                    CGLO_CONSTRAINT |
 +                    CGLO_FIRSTITERATE);
 +
 +        wallcycle_stop(wcycle,ewcMoveE);
 +    }
 +
 +    /* Calculate long range corrections to pressure and energy */
 +    calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
 +                  pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 +    enerd->term[F_DISPCORR] = enercorr;
 +    enerd->term[F_EPOT] += enercorr;
 +    enerd->term[F_PRES] += prescorr;
 +    enerd->term[F_DVDL] += dvdlcorr;
 +
 +  ems->epot = enerd->term[F_EPOT];
 +
 +  if (constr) {
 +    /* Project out the constraint components of the force */
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdlambda = 0;
 +    constrain(NULL,FALSE,FALSE,constr,&top->idef,
 +              inputrec,NULL,cr,count,0,mdatoms,
 +              ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda[efptBONDED],&dvdlambda,
 +              NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 +    if (fr->bSepDVDL && fplog)
 +      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
 +    enerd->term[F_DVDL_BONDED] += dvdlambda;
 +    m_add(force_vir,shake_vir,vir);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  } else {
 +    copy_mat(force_vir,vir);
 +  }
 +
 +  clear_mat(ekin);
 +  enerd->term[F_PRES] =
 +    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
 +
 +  sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
 +
 +    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 +    {
 +        get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 +    }
 +}
 +
 +static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                            gmx_mtop_t *mtop,
 +                            em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb,*fmg;
 +  t_block *cgs_gl;
 +  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 +  double partsum;
 +  unsigned char *grpnrFREEZE;
 +
 +  if (debug)
 +    fprintf(debug,"Doing reorder_partsum\n");
 +
 +  fm = s_min->f;
 +  fb = s_b->f;
 +
 +  cgs_gl = dd_charge_groups_global(cr->dd);
 +  index = cgs_gl->index;
 +
 +  /* Collect fm in a global vector fmg.
 +   * This conflicts with the spirit of domain decomposition,
 +   * but to fully optimize this a much more complicated algorithm is required.
 +   */
 +  snew(fmg,mtop->natoms);
 +
 +  ncg   = s_min->s.ncg_gl;
 +  cg_gl = s_min->s.cg_gl;
 +  i = 0;
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      copy_rvec(fm[i],fmg[a]);
 +      i++;
 +    }
 +  }
 +  gmx_sum(mtop->natoms*3,fmg[0],cr);
 +
 +  /* Now we will determine the part of the sum for the cgs in state s_b */
 +  ncg   = s_b->s.ncg_gl;
 +  cg_gl = s_b->s.cg_gl;
 +  partsum = 0;
 +  i = 0;
 +  gf = 0;
 +  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      if (mdatoms->cFREEZE && grpnrFREEZE) {
 +      gf = grpnrFREEZE[i];
 +      }
 +      for(m=0; m<DIM; m++) {
 +      if (!opts->nFreeze[gf][m]) {
 +        partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 +      }
 +      }
 +      i++;
 +    }
 +  }
 +
 +  sfree(fmg);
 +
 +  return partsum;
 +}
 +
 +static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                  gmx_mtop_t *mtop,
 +                  em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb;
 +  double sum;
 +  int  gf,i,m;
 +
 +  /* This is just the classical Polak-Ribiere calculation of beta;
 +   * it looks a bit complicated since we take freeze groups into account,
 +   * and might have to sum it in parallel runs.
 +   */
 +
 +  if (!DOMAINDECOMP(cr) ||
 +      (s_min->s.ddp_count == cr->dd->ddp_count &&
 +       s_b->s.ddp_count   == cr->dd->ddp_count)) {
 +    fm = s_min->f;
 +    fb = s_b->f;
 +    sum = 0;
 +    gf = 0;
 +    /* This part of code can be incorrect with DD,
 +     * since the atom ordering in s_b and s_min might differ.
 +     */
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m]) {
 +        sum += (fb[i][m] - fm[i][m])*fb[i][m];
 +      }
 +    }
 +  } else {
 +    /* We need to reorder cgs while summing */
 +    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 +  }
 +  if (PAR(cr))
 +    gmx_sumd(1,&sum,cr);
 +
 +  return sum/sqr(s_min->fnorm);
 +}
 +
 +double do_cg(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +             gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +  const char *CG="Polak-Ribiere Conjugate Gradients";
 +
 +  em_state_t *s_min,*s_a,*s_b,*s_c;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global,*p,*sf,*sfm;
 +  double gpa,gpb,gpc,tmp,sum[2],minstep;
 +  real   fnormn;
 +  real   stepsize;
 +  real   a,b,c,beta=0.0;
 +  real   epot_repl=0;
 +  real   pnorm;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,foundlower;
 +  rvec   mu_tot;
 +  gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 +  tensor vir,pres;
 +  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 +  gmx_mdoutf_t *outf;
 +  int    i,m,gf,step,nminstep;
 +  real   terminate=0;
 +
 +  step=0;
 +
 +  s_min = init_em_state();
 +  s_a   = init_em_state();
 +  s_b   = init_em_state();
 +  s_c   = init_em_state();
 +
 +  /* Init em and store the local state in s_min */
 +  init_em(fplog,CG,cr,inputrec,
 +          state_global,top_global,s_min,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,CG);
 +
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  if (MASTER(cr))
 +    sp_header(stderr,CG,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,CG,inputrec->em_tol,number_steps);
 +
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole in congrad.c
 +   */
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state_global,top_global,s_min,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +
 +    print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +
 +  /* Estimate/guess the initial stepsize */
 +  stepsize = inputrec->em_stepsize/s_min->fnorm;
 +
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(fplog,"\n");
 +  }
 +  /* Start the loop over CG steps.
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 +
 +    /* start taking steps in a new direction
 +     * First time we enter the routine, beta=0, and the direction is
 +     * simply the negative gradient.
 +     */
 +
 +    /* Calculate the new direction in p, and the gradient in this direction, gpa */
 +    p  = s_min->s.cg_p;
 +    sf = s_min->f;
 +    gpa = 0;
 +    gf = 0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++) {
 +      if (!inputrec->opts.nFreeze[gf][m]) {
 +        p[i][m] = sf[i][m] + beta*p[i][m];
 +        gpa -= p[i][m]*sf[i][m];
 +        /* f is negative gradient, thus the sign */
 +      } else {
 +          p[i][m] = 0;
 +      }
 +      }
 +    }
 +
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpa,cr);
 +
 +    /* Calculate the norm of the search vector */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
 +
 +    /* Just in case stepsize reaches zero due to numerical precision... */
 +    if(stepsize<=0)
 +      stepsize = inputrec->em_stepsize/pnorm;
 +
 +    /*
 +     * Double check the value of the derivative in the search direction.
 +     * If it is positive it must be due to the old information in the
 +     * CG formula, so just remove that and start over with beta=0.
 +     * This corresponds to a steepest descent step.
 +     */
 +    if(gpa>0) {
 +      beta = 0;
 +      step--; /* Don't count this step since we are restarting */
 +      continue; /* Go back to the beginning of the big for-loop */
 +    }
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm)
 +     * relative change in coordinate is smaller than precision
 +     */
 +    minstep=0;
 +    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++) {
 +      tmp = fabs(s_min->s.x[i][m]);
 +      if(tmp < 1.0)
 +        tmp = 1.0;
 +      tmp = p[i][m]/tmp;
 +      minstep += tmp*tmp;
 +      }
 +    }
 +    /* Add up from all CPUs */
 +    if(PAR(cr))
 +      gmx_sumd(1,&minstep,cr);
 +
 +    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
 +
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +
 +    write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                  top_global,inputrec,step,
 +                  s_min,state_global,f_global);
 +
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new CG step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next CG step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     *
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik
 +     */
 +    s_a->epot = s_min->epot;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +
 +    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
 +      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
 +                           s_min,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +
 +    /* Take a trial step (new coords in s_c) */
 +    do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
 +             constr,top,nrnb,wcycle,-1);
 +
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_c,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,-1,FALSE);
 +
 +    /* Calc derivative along line */
 +    p  = s_c->s.cg_p;
 +    sf = s_c->f;
 +    gpc=0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++)
 +        gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +
 +    /* This is the max amount of increase in energy we tolerate */
 +    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
 +
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }
 +
 +
 +
 +
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +    if (!foundlower) {
 +      nminstep=0;
 +
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);
 +
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +        /* Reload the old state */
 +        em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
 +                               s_min,top,mdatoms,fr,vsite,constr,
 +                               nrnb,wcycle);
 +      }
 +
 +      /* Take a trial step to this new point - new coords in s_b */
 +      do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
 +                 constr,top,nrnb,wcycle,-1);
 +
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state_global,top_global,s_b,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,-1,FALSE);
 +
 +      /* p does not change within a step, but since the domain decomposition
 +       * might change, we have to use cg_p of s_b here.
 +       */
 +      p  = s_b->s.cg_p;
 +      sf = s_b->f;
 +      gpb=0;
 +      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +        for(m=0; m<DIM; m++)
 +            gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
 +      }
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +
 +      if (debug)
 +        fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
 +                s_a->epot,s_b->epot,s_c->epot,gpb);
 +
 +      epot_repl = s_b->epot;
 +
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if (gpb > 0) {
 +        /* Replace c endpoint with b */
 +        swap_em_state(s_b,s_c);
 +        c = b;
 +        gpc = gpb;
 +      } else {
 +        /* Replace a endpoint with b */
 +        swap_em_state(s_b,s_a);
 +        a = b;
 +        gpa = gpb;
 +      }
 +
 +      /*
 +       * Stop search as soon as we find a value smaller than the endpoints.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++;
 +      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
 +             (nminstep < 20));
 +
 +      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
 +        nminstep >= 20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If beta==0 this was steepest descent, and then we give up.
 +       * If not, set beta=0 and restart with steepest descent before quitting.
 +         */
 +      if (beta == 0.0) {
 +        /* Converged */
 +        converged = TRUE;
 +        break;
 +      } else {
 +        /* Reset memory before giving up */
 +        beta = 0.0;
 +        continue;
 +      }
 +      }
 +
 +      /* Select min energy state of A & C, put the best in B.
 +       */
 +      if (s_c->epot < s_a->epot) {
 +      if (debug)
 +        fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
 +                s_c->epot,s_a->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +      } else {
 +      if (debug)
 +        fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
 +                s_a->epot,s_c->epot);
 +      swap_em_state(s_b,s_a);
 +      gpb = gpa;
 +      b = a;
 +      }
 +
 +    } else {
 +      if (debug)
 +      fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
 +              s_c->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +    }
 +
 +    /* new search direction */
 +    /* beta = 0 means forget all memory and restart with steepest descents. */
 +    if (nstcg && ((step % nstcg)==0))
 +      beta = 0.0;
 +    else {
 +      /* s_min->fnorm cannot be zero, because then we would have converged
 +       * and broken out.
 +       */
 +
 +      /* Polak-Ribiere update.
 +       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
 +       */
 +      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
 +    }
 +    /* Limit beta to prevent oscillations */
 +    if (fabs(beta) > 5.0)
 +      beta = 0.0;
 +
 +
 +    /* update positions */
 +    swap_em_state(s_min,s_b);
 +    gpa = gpb;
 +
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
 +              s_min->fmax,s_min->a_fmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +                 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
 +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +          print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */
 +    converged = converged || (s_min->fmax < inputrec->em_tol);
 +
 +  } /* End of the loop */
 +
 +  if (converged)
 +    step--; /* we never took that last step in this case */
 +
 +    if (s_min->fmax > inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE;
 +    }
 +
 +  if (MASTER(cr)) {
 +    /* If we printed energy and/or logfile last step (which was the last step)
 +     * we don't have to do it again, but otherwise print the final values.
 +     */
 +    if(!do_log) {
 +      /* Write final value to log since we didn't do anything the last step */
 +      print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
 +    }
 +    if (!do_ene || !do_log) {
 +      /* Write final energy file entries */
 +      print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +               !do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +  }
 +
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
 +
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                s_min,state_global,f_global);
 +
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_lbfgs(FILE *fplog,t_commrec *cr,
 +                int nfile,const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                gmx_membed_t membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{
 +  static const char *LBFGS="Low-Memory BFGS Minimizer";
 +  em_state_t ems;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global;
 +  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
 +  double stepsize,gpa,gpb,gpc,tmp,minstep;
 +  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
 +  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
 +  real   a,b,c,maxdelta,delta;
 +  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
 +  real   dgdx,dgdg,sq,yr,beta;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,first;
 +  rvec   mu_tot;
 +  real   fnorm,fmax;
 +  gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
 +  tensor vir,pres;
 +  int    start,end,number_steps;
 +  gmx_mdoutf_t *outf;
 +  int    i,k,m,n,nfmax,gf,step;
 +  int    mdof_flags;
 +  /* not used */
 +  real   terminate;
 +
 +  if (PAR(cr))
 +    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
 +
 +  n = 3*state->natoms;
 +  nmaxcorr = inputrec->nbfgscorr;
 +
 +  /* Allocate memory */
 +  /* Use pointers to real so we dont have to loop over both atoms and
 +   * dimensions all the time...
 +   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
 +   * that point to the same memory.
 +   */
 +  snew(xa,n);
 +  snew(xb,n);
 +  snew(xc,n);
 +  snew(fa,n);
 +  snew(fb,n);
 +  snew(fc,n);
 +  snew(frozen,n);
 +
 +  snew(p,n);
 +  snew(lastx,n);
 +  snew(lastf,n);
 +  snew(rho,nmaxcorr);
 +  snew(alpha,nmaxcorr);
 +
 +  snew(dx,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dx[i],n);
 +
 +  snew(dg,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dg[i],n);
 +
 +  step = 0;
 +  neval = 0;
 +
 +  /* Init em */
 +  init_em(fplog,LBFGS,cr,inputrec,
 +          state,top_global,&ems,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +  /* Do_lbfgs is not completely updated like do_steep and do_cg,
 +   * so we free some memory again.
 +   */
 +  sfree(ems.s.x);
 +  sfree(ems.f);
 +
 +  xx = (real *)state->x;
 +  ff = (real *)f;
 +
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,LBFGS);
 +
 +  do_log = do_ene = do_x = do_f = TRUE;
 +
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +     for(m=0; m<DIM; m++)
 +       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
 +  }
 +  if (MASTER(cr))
 +    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
 +
 +  if (vsite)
 +    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole
 +   */
 +  neval++;
 +  ems.s.x = state->x;
 +  ems.f = f;
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state,top_global,&ems,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +
 +    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +
 +  /* This is the starting energy */
 +  Epot = enerd->term[F_EPOT];
 +
 +  fnorm = ems.fnorm;
 +  fmax  = ems.fmax;
 +  nfmax = ems.a_fmax;
 +
 +  /* Set the initial step.
 +   * since it will be multiplied by the non-normalized search direction
 +   * vector (force vector the first time), we scale it by the
 +   * norm of the force.
 +   */
 +
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(fplog,"\n");
 +  }
 +
 +  point=0;
 +  for(i=0;i<n;i++)
 +    if(!frozen[i])
 +      dx[point][i] = ff[i];  /* Initial search direction */
 +    else
 +      dx[point][i] = 0;
 +
 +  stepsize = 1.0/fnorm;
 +  converged = FALSE;
 +
 +  /* Start the loop over BFGS steps.
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +
 +  ncorr=0;
 +
 +  /* Set the gradient from the force */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
 +
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +
 +    mdof_flags = 0;
 +    if (do_x)
 +    {
 +        mdof_flags |= MDOF_X;
 +    }
 +
 +    if (do_f)
 +    {
 +        mdof_flags |= MDOF_F;
 +    }
 +
 +    write_traj(fplog,cr,outf,mdof_flags,
 +               top_global,step,(real)step,state,state,f,f,NULL,NULL);
 +
 +    /* Do the linesearching in the direction dx[point][0..(n-1)] */
 +
 +    /* pointer to current direction - point=0 first time here */
 +    s=dx[point];
 +
 +    /* calculate line gradient */
 +    for(gpa=0,i=0;i<n;i++)
 +      gpa-=s[i]*ff[i];
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm)
 +     * relative change in coordinate is smaller than precision
 +     */
 +    for(minstep=0,i=0;i<n;i++) {
 +      tmp=fabs(xx[i]);
 +      if(tmp<1.0)
 +      tmp=1.0;
 +      tmp = s[i]/tmp;
 +      minstep += tmp*tmp;
 +    }
 +    minstep = GMX_REAL_EPS/sqrt(minstep/n);
 +
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +
 +    /* Store old forces and coordinates */
 +    for(i=0;i<n;i++) {
 +      lastx[i]=xx[i];
 +      lastf[i]=ff[i];
 +    }
 +    Epot0=Epot;
 +
 +    first=TRUE;
 +
 +    for(i=0;i<n;i++)
 +      xa[i]=xx[i];
 +
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new BFGS step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next BFGS step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     *
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik
 +     */
 +    foundlower=FALSE;
 +    EpotA = Epot0;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +
 +    /* Check stepsize first. We do not allow displacements
 +     * larger than emstep.
 +     */
 +    do {
 +      c = a + stepsize;
 +      maxdelta=0;
 +      for(i=0;i<n;i++) {
 +      delta=c*s[i];
 +      if(delta>maxdelta)
 +        maxdelta=delta;
 +      }
 +      if(maxdelta>inputrec->em_stepsize)
 +      stepsize*=0.1;
 +    } while(maxdelta>inputrec->em_stepsize);
 +
 +    /* Take a trial step */
 +    for (i=0; i<n; i++)
 +      xc[i] = lastx[i] + c*s[i];
 +
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    ems.s.x = (rvec *)xc;
 +    ems.f   = (rvec *)fc;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state,top_global,&ems,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,step,FALSE);
 +    EpotC = ems.epot;
 +
 +    /* Calc derivative along line */
 +    for(gpc=0,i=0; i<n; i++) {
 +      gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +
 +     /* This is the max amount of increase in energy we tolerate */
 +   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
 +
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }
 +
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +
 +    if(!foundlower) {
 +
 +      nminstep=0;
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);
 +
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +
 +      /* Take a trial step */
 +      for (i=0; i<n; i++)
 +        xb[i] = lastx[i] + b*s[i];
 +
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      ems.s.x = (rvec *)xb;
 +      ems.f   = (rvec *)fb;
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state,top_global,&ems,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,step,FALSE);
 +      EpotB = ems.epot;
 +
 +      fnorm = ems.fnorm;
 +
 +      for(gpb=0,i=0; i<n; i++)
 +        gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
 +
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if(gpb>0) {
 +        /* Replace c endpoint with b */
 +        EpotC = EpotB;
 +        c = b;
 +        gpc = gpb;
 +        /* swap coord pointers b/c */
 +        xtmp = xb;
 +        ftmp = fb;
 +        xb = xc;
 +        fb = fc;
 +        xc = xtmp;
 +        fc = ftmp;
 +      } else {
 +        /* Replace a endpoint with b */
 +        EpotA = EpotB;
 +        a = b;
 +        gpa = gpb;
 +        /* swap coord pointers a/b */
 +        xtmp = xb;
 +        ftmp = fb;
 +        xb = xa;
 +        fb = fa;
 +        xa = xtmp;
 +        fa = ftmp;
 +      }
 +
 +      /*
 +       * Stop search as soon as we find a value smaller than the endpoints,
 +       * or if the tolerance is below machine precision.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++;
 +      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
 +
 +      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If ncorr==0 this was steepest descent, and then we give up.
 +       * If not, reset memory to restart as steepest descent before quitting.
 +         */
 +      if(ncorr==0) {
 +      /* Converged */
 +        converged=TRUE;
 +        break;
 +      } else {
 +        /* Reset memory */
 +        ncorr=0;
 +        /* Search in gradient direction */
 +        for(i=0;i<n;i++)
 +          dx[point][i]=ff[i];
 +        /* Reset stepsize */
 +        stepsize = 1.0/fnorm;
 +        continue;
 +      }
 +      }
 +
 +      /* Select min energy state of A & C, put the best in xx/ff/Epot
 +       */
 +      if(EpotC<EpotA) {
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xc[i];
 +        ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +      } else {
 +      Epot = EpotA;
 +      /* Use state A */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xa[i];
 +        ff[i]=fa[i];
 +      }
 +      stepsize=a;
 +      }
 +
 +    } else {
 +      /* found lower */
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +      xx[i]=xc[i];
 +      ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +    }
 +
 +    /* Update the memory information, and calculate a new
 +     * approximation of the inverse hessian
 +     */
 +
 +    /* Have new data in Epot, xx, ff */
 +    if(ncorr<nmaxcorr)
 +      ncorr++;
 +
 +    for(i=0;i<n;i++) {
 +      dg[point][i]=lastf[i]-ff[i];
 +      dx[point][i]*=stepsize;
 +    }
 +
 +    dgdg=0;
 +    dgdx=0;
 +    for(i=0;i<n;i++) {
 +      dgdg+=dg[point][i]*dg[point][i];
 +      dgdx+=dg[point][i]*dx[point][i];
 +    }
 +
 +    diag=dgdx/dgdg;
 +
 +    rho[point]=1.0/dgdx;
 +    point++;
 +
 +    if(point>=nmaxcorr)
 +      point=0;
 +
 +    /* Update */
 +    for(i=0;i<n;i++)
 +      p[i]=ff[i];
 +
 +    cp=point;
 +
 +    /* Recursive update. First go back over the memory points */
 +    for(k=0;k<ncorr;k++) {
 +      cp--;
 +      if(cp<0)
 +      cp=ncorr-1;
 +
 +      sq=0;
 +      for(i=0;i<n;i++)
 +      sq+=dx[cp][i]*p[i];
 +
 +      alpha[cp]=rho[cp]*sq;
 +
 +      for(i=0;i<n;i++)
 +      p[i] -= alpha[cp]*dg[cp][i];
 +    }
 +
 +    for(i=0;i<n;i++)
 +      p[i] *= diag;
 +
 +    /* And then go forward again */
 +    for(k=0;k<ncorr;k++) {
 +      yr = 0;
 +      for(i=0;i<n;i++)
 +      yr += p[i]*dg[cp][i];
 +
 +      beta = rho[cp]*yr;
 +      beta = alpha[cp]-beta;
 +
 +      for(i=0;i<n;i++)
 +      p[i] += beta*dx[cp][i];
 +
 +      cp++;
 +      if(cp>=ncorr)
 +      cp=0;
 +    }
 +
 +    for(i=0;i<n;i++)
 +      if(!frozen[i])
 +      dx[point][i] = p[i];
 +      else
 +      dx[point][i] = 0;
 +
 +    stepsize=1.0;
 +
 +    /* Test whether the convergence criterion is met */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
 +
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +                 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
 +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +          print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */
 +
 +    converged = converged || (fmax < inputrec->em_tol);
 +
 +  } /* End of the loop */
 +
 +  if(converged)
 +    step--; /* we never took that last step in this case */
 +
 +    if(fmax>inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE;
 +    }
 +
 +  /* If we printed energy and/or logfile last step (which was the last step)
 +   * we don't have to do it again, but otherwise print the final values.
 +   */
 +  if(!do_log) /* Write final value to log since we didn't do anythin last step */
 +    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
 +  if(!do_ene || !do_log) /* Write final energy file entries */
 +    print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +             !do_log ? fplog : NULL,step,step,eprNORMAL,
 +             TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = !do_per_step(step,inputrec->nstfout);
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                &ems,state,f);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_steep(FILE *fplog,t_commrec *cr,
 +                int nfile, const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state_global,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                gmx_membed_t membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{
 +  const char *SD="Steepest Descents";
 +  em_state_t *s_min,*s_try;
 +  rvec       *f_global;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  real   stepsize,constepsize;
 +  real   ustep,dvdlambda,fnormn;
 +  gmx_mdoutf_t *outf;
 +  t_mdebin   *mdebin;
 +  gmx_bool   bDone,bAbort,do_x,do_f;
 +  tensor vir,pres;
 +  rvec   mu_tot;
 +  int    nsteps;
 +  int    count=0;
 +  int    steps_accepted=0;
 +  /* not used */
 +  real   terminate=0;
 +
 +  s_min = init_em_state();
 +  s_try = init_em_state();
 +
 +  /* Init em and store the local state in s_try */
 +  init_em(fplog,SD,cr,inputrec,
 +          state_global,top_global,s_try,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +
 +  /* Print to log file  */
 +  print_em_start(fplog,cr,runtime,wcycle,SD);
 +
 +  /* Set variables for stepsize (in nm). This is the largest
 +   * step that we are going to make in any direction.
 +   */
 +  ustep = inputrec->em_stepsize;
 +  stepsize = 0;
 +
 +  /* Max number of steps  */
 +  nsteps = inputrec->nsteps;
 +
 +  if (MASTER(cr))
 +    /* Print to the screen  */
 +    sp_header(stderr,SD,inputrec->em_tol,nsteps);
 +  if (fplog)
 +    sp_header(fplog,SD,inputrec->em_tol,nsteps);
 +
 +  /**** HERE STARTS THE LOOP ****
 +   * count is the counter for the number of steps
 +   * bDone will be TRUE when the minimization has converged
 +   * bAbort will be TRUE when nsteps steps have been performed or when
 +   * the stepsize becomes smaller than is reasonable for machine precision
 +   */
 +  count  = 0;
 +  bDone  = FALSE;
 +  bAbort = FALSE;
 +  while( !bDone && !bAbort ) {
 +    bAbort = (nsteps >= 0) && (count == nsteps);
 +
 +    /* set new coordinates, except for first step */
 +    if (count > 0) {
 +      do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
 +               constr,top,nrnb,wcycle,count);
 +    }
 +
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_try,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,count,count==0);
 +
 +    if (MASTER(cr))
 +      print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
 +
 +    if (count == 0)
 +      s_min->epot = s_try->epot + 1;
 +
 +    /* Print it if necessary  */
 +    if (MASTER(cr)) {
 +      if (bVerbose) {
 +      fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
 +              count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
 +              (s_try->epot < s_min->epot) ? '\n' : '\r');
 +      }
 +
 +      if (s_try->epot < s_min->epot) {
 +      /* Store the new (lower) energies  */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)count,
 +                 mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
 +                   s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      print_ebin(outf->fp_ene,TRUE,
 +                 do_per_step(steps_accepted,inputrec->nstdisreout),
 +                 do_per_step(steps_accepted,inputrec->nstorireout),
 +                 fplog,count,count,eprNORMAL,TRUE,
 +                 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +      fflush(fplog);
 +      }
 +    }
 +
 +    /* Now if the new energy is smaller than the previous...
 +     * or if this is the first step!
 +     * or if we did random steps!
 +     */
 +
 +    if ( (count==0) || (s_try->epot < s_min->epot) ) {
 +      steps_accepted++;
 +
 +      /* Test whether the convergence criterion is met...  */
 +      bDone = (s_try->fmax < inputrec->em_tol);
 +
 +      /* Copy the arrays for force, positions and energy  */
 +      /* The 'Min' array always holds the coords and forces of the minimal
 +       sampled energy  */
 +      swap_em_state(s_min,s_try);
 +      if (count > 0)
 +      ustep *= 1.2;
 +
 +      /* Write to trn, if necessary */
 +      do_x = do_per_step(steps_accepted,inputrec->nstxout);
 +      do_f = do_per_step(steps_accepted,inputrec->nstfout);
 +      write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                    top_global,inputrec,count,
 +                    s_min,state_global,f_global);
 +    }
 +    else {
 +      /* If energy is not smaller make the step smaller...  */
 +      ustep *= 0.5;
 +
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +      /* Reload the old state */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                             s_min,top,mdatoms,fr,vsite,constr,
 +                             nrnb,wcycle);
 +      }
 +    }
 +
 +    /* Determine new step  */
 +    stepsize = ustep/s_min->fmax;
 +
 +    /* Check if stepsize is too small, with 1 nm as a characteristic length */
 +#ifdef GMX_DOUBLE
 +        if (count == nsteps || ustep < 1e-12)
 +#else
 +        if (count == nsteps || ustep < 1e-6)
 +#endif
 +        {
 +            if (MASTER(cr))
 +            {
 +                warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
 +                warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
 +            }
 +            bAbort=TRUE;
 +        }
 +
 +    count++;
 +  } /* End of the loop  */
 +
 +    /* Print some shit...  */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +  write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
 +              top_global,inputrec,count,
 +              s_min,state_global,f_global);
 +
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  inputrec->nsteps=count;
 +
 +  runtime->nsteps_done = count;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_nm(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +             gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    const char *NM = "Normal Mode Analysis";
 +    gmx_mdoutf_t *outf;
 +    int        natoms,atom,d;
 +    int        nnodes,node;
 +    rvec       *f_global;
 +    gmx_localtop_t *top;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f;
 +    gmx_global_stat_t gstat;
 +    t_graph    *graph;
 +    real       t,t0,lambda,lam0;
 +    gmx_bool       bNS;
 +    tensor     vir,pres;
 +    rvec       mu_tot;
 +    rvec       *fneg,*dfdx;
 +    gmx_bool       bSparse; /* use sparse matrix storage format */
 +    size_t     sz;
 +    gmx_sparsematrix_t * sparse_matrix = NULL;
 +    real *     full_matrix             = NULL;
 +    em_state_t *   state_work;
 +
 +    /* added with respect to mdrun */
 +    int        i,j,k,row,col;
 +    real       der_range=10.0*sqrt(GMX_REAL_EPS);
 +    real       x_min;
 +    real       fnorm,fmax;
 +
 +    if (constr != NULL)
 +    {
 +        gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
 +    }
 +
 +    state_work = init_em_state();
 +
 +    /* Init em and store the local state in state_minimum */
 +    init_em(fplog,NM,cr,inputrec,
 +            state_global,top_global,state_work,&top,
 +            &f,&f_global,
 +            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +            nfile,fnm,&outf,NULL);
 +
 +    natoms = top_global->natoms;
 +    snew(fneg,natoms);
 +    snew(dfdx,natoms);
 +
 +#ifndef GMX_DOUBLE
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,
 +                "NOTE: This version of Gromacs has been compiled in single precision,\n"
 +                "      which MIGHT not be accurate enough for normal mode analysis.\n"
 +                "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
 +                "      are fairly modest even if you recompile in double precision.\n\n");
 +    }
 +#endif
 +
 +    /* Check if we can/should use sparse storage format.
 +     *
 +     * Sparse format is only useful when the Hessian itself is sparse, which it
 +      * will be when we use a cutoff.
 +      * For small systems (n<1000) it is easier to always use full matrix format, though.
 +      */
 +    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
 +    {
 +        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
 +        bSparse = FALSE;
 +    }
 +    else if(top_global->natoms < 1000)
 +    {
 +        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
 +        bSparse = FALSE;
 +    }
 +    else
 +    {
 +        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
 +        bSparse = TRUE;
 +    }
 +
 +    sz = DIM*top_global->natoms;
 +
 +    fprintf(stderr,"Allocating Hessian memory...\n\n");
 +
 +    if(bSparse)
 +    {
 +        sparse_matrix=gmx_sparsematrix_init(sz);
 +        sparse_matrix->compressed_symmetric = TRUE;
 +    }
 +    else
 +    {
 +        snew(full_matrix,sz*sz);
 +    }
 +
 +    /* Initial values */
 +    t0           = inputrec->init_t;
 +    lam0         = inputrec->fepvals->init_lambda;
 +    t            = t0;
 +    lambda       = lam0;
 +
 +    init_nrnb(nrnb);
 +
 +    where();
 +
 +    /* Write start time and temperature */
 +    print_em_start(fplog,cr,runtime,wcycle,NM);
 +
 +    /* fudge nr of steps to nr of atoms */
 +    inputrec->nsteps = natoms*2;
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
 +                *(top_global->name),(int)inputrec->nsteps);
 +    }
 +
 +    nnodes = cr->nnodes;
 +
 +    /* Make evaluate_energy do a single node force calculation */
 +    cr->nnodes = 1;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                    state_global,top_global,state_work,top,
 +                    inputrec,nrnb,wcycle,gstat,
 +                    vsite,constr,fcd,graph,mdatoms,fr,
 +                    mu_tot,enerd,vir,pres,-1,TRUE);
 +    cr->nnodes = nnodes;
 +
 +    /* if forces are not small, warn user */
 +    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
 +        if (state_work->fmax > 1.0e-3)
 +        {
 +            fprintf(stderr,"Maximum force probably not small enough to");
 +            fprintf(stderr," ensure that you are in an \nenergy well. ");
 +            fprintf(stderr,"Be aware that negative eigenvalues may occur");
 +            fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
 +        }
 +    }
 +
 +    /***********************************************************
 +     *
 +     *      Loop over all pairs in matrix
 +     *
 +     *      do_force called twice. Once with positive and
 +     *      once with negative displacement
 +     *
 +     ************************************************************/
 +
 +    /* Steps are divided one by one over the nodes */
 +    for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
 +    {
 +
 +        for (d=0; d<DIM; d++)
 +        {
 +            x_min = state_work->s.x[atom][d];
 +
 +            state_work->s.x[atom][d] = x_min - der_range;
 +
 +            /* Make evaluate_energy do a single node force calculation */
 +            cr->nnodes = 1;
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2,FALSE);
 +
 +            for(i=0; i<natoms; i++)
 +            {
 +                copy_rvec(state_work->f[i], fneg[i]);
 +            }
 +
 +            state_work->s.x[atom][d] = x_min + der_range;
 +
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2+1,FALSE);
 +            cr->nnodes = nnodes;
 +
 +            /* x is restored to original */
 +            state_work->s.x[atom][d] = x_min;
 +
 +            for(j=0; j<natoms; j++)
 +            {
 +                for (k=0; (k<DIM); k++)
 +                {
 +                    dfdx[j][k] =
 +                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
 +                }
 +            }
 +
 +            if (!MASTER(cr))
 +            {
 +#ifdef GMX_MPI
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +                MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
 +                         cr->mpi_comm_mygroup);
 +#endif
 +            }
 +            else
 +            {
 +                for(node=0; (node<nnodes && atom+node<natoms); node++)
 +                {
 +                    if (node > 0)
 +                    {
 +#ifdef GMX_MPI
 +                        MPI_Status stat;
 +                        MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
 +                                 cr->mpi_comm_mygroup,&stat);
 +#undef mpi_type
 +#endif
 +                    }
 +
 +                    row = (atom + node)*DIM + d;
 +
 +                    for(j=0; j<natoms; j++)
 +                    {
 +                        for(k=0; k<DIM; k++)
 +                        {
 +                            col = j*DIM + k;
 +
 +                            if (bSparse)
 +                            {
 +                                if (col >= row && dfdx[j][k] != 0.0)
 +                                {
 +                                    gmx_sparsematrix_increment_value(sparse_matrix,
 +                                                                     row,col,dfdx[j][k]);
 +                                }
 +                            }
 +                            else
 +                            {
 +                                full_matrix[row*sz+col] = dfdx[j][k];
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +
 +            if (bVerbose && fplog)
 +            {
 +                fflush(fplog);
 +            }
 +        }
 +        /* write progress */
 +        if (MASTER(cr) && bVerbose)
 +        {
 +            fprintf(stderr,"\rFinished step %d out of %d",
 +                    min(atom+nnodes,natoms),natoms);
 +            fflush(stderr);
 +        }
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n\nWriting Hessian...\n");
 +        gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
 +    }
 +
 +    finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +    runtime->nsteps_done = natoms*2;
 +
 +    return 0;
 +}
 +
index 8837bb5edad325f5139efcdd5d8ad11b751f7450,0000000000000000000000000000000000000000..e87728e9570dff6fbd52dc9aea4fd9834b0b0a80
mode 100644,000000..100644
--- /dev/null
@@@ -1,4364 -1,0 +1,4368 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +/* IMPORTANT FOR DEVELOPERS:
 + *
 + * Triclinic pme stuff isn't entirely trivial, and we've experienced
 + * some bugs during development (many of them due to me). To avoid
 + * this in the future, please check the following things if you make
 + * changes in this file:
 + *
 + * 1. You should obtain identical (at least to the PME precision)
 + *    energies, forces, and virial for
 + *    a rectangular box and a triclinic one where the z (or y) axis is
 + *    tilted a whole box side. For instance you could use these boxes:
 + *
 + *    rectangular       triclinic
 + *     2  0  0           2  0  0
 + *     0  2  0           0  2  0
 + *     0  0  6           2  2  6
 + *
 + * 2. You should check the energy conservation in a triclinic box.
 + *
 + * It might seem an overkill, but better safe than sorry.
 + * /Erik 001109
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include <stdio.h>
 +#include <string.h>
 +#include <math.h>
 +#include <assert.h>
 +#include "typedefs.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "gmxcomplex.h"
 +#include "smalloc.h"
 +#include "futil.h"
 +#include "coulomb.h"
 +#include "gmx_fatal.h"
 +#include "pme.h"
 +#include "network.h"
 +#include "physics.h"
 +#include "nrnb.h"
 +#include "copyrite.h"
 +#include "gmx_wallcycle.h"
 +#include "gmx_parallel_3dfft.h"
 +#include "pdbio.h"
 +#include "gmx_cyclecounter.h"
 +#include "macros.h"
 +
 +/* Single precision, with SSE2 or higher available */
 +#if defined(GMX_X86_SSE2) && !defined(GMX_DOUBLE)
 +
 +#include "gmx_x86_sse2.h"
 +#include "gmx_math_x86_sse2_single.h"
 +
 +#define PME_SSE
 +/* Some old AMD processors could have problems with unaligned loads+stores */
 +#ifndef GMX_FAHCORE
 +#define PME_SSE_UNALIGNED
 +#endif
 +#endif
 +
 +#define DFT_TOL 1e-7
 +/* #define PRT_FORCE */
 +/* conditions for on the fly time-measurement */
 +/* #define TAKETIME (step > 1 && timesteps < 10) */
 +#define TAKETIME FALSE
 +
 +/* #define PME_TIME_THREADS */
 +
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +
 +/* GMX_CACHE_SEP should be a multiple of 16 to preserve alignment */
 +#define GMX_CACHE_SEP 64
 +
 +/* We only define a maximum to be able to use local arrays without allocation.
 + * An order larger than 12 should never be needed, even for test cases.
 + * If needed it can be changed here.
 + */
 +#define PME_ORDER_MAX 12
 +
 +/* Internal datastructures */
 +typedef struct {
 +    int send_index0;
 +    int send_nindex;
 +    int recv_index0;
 +    int recv_nindex;
 +} pme_grid_comm_t;
 +
 +typedef struct {
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +    int  nnodes,nodeid;
 +    int  *s2g0;
 +    int  *s2g1;
 +    int  noverlap_nodes;
 +    int  *send_id,*recv_id;
 +    pme_grid_comm_t *comm_data;
 +    real *sendbuf;
 +    real *recvbuf;
 +} pme_overlap_t;
 +
 +typedef struct {
 +    int *n;     /* Cumulative counts of the number of particles per thread */
 +    int nalloc; /* Allocation size of i */
 +    int *i;     /* Particle indices ordered on thread index (n) */
 +} thread_plist_t;
 +
 +typedef struct {
 +    int  n;
 +    int  *ind;
 +    splinevec theta;
 +    splinevec dtheta;
 +} splinedata_t;
 +
 +typedef struct {
 +    int  dimind;            /* The index of the dimension, 0=x, 1=y */
 +    int  nslab;
 +    int  nodeid;
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +
 +    int  *node_dest;        /* The nodes to send x and q to with DD */
 +    int  *node_src;         /* The nodes to receive x and q from with DD */
 +    int  *buf_index;        /* Index for commnode into the buffers */
 +
 +    int  maxshift;
 +
 +    int  npd;
 +    int  pd_nalloc;
 +    int  *pd;
 +    int  *count;            /* The number of atoms to send to each node */
 +    int  **count_thread;
 +    int  *rcount;           /* The number of atoms to receive */
 +
 +    int  n;
 +    int  nalloc;
 +    rvec *x;
 +    real *q;
 +    rvec *f;
 +    gmx_bool bSpread;       /* These coordinates are used for spreading */
 +    int  pme_order;
 +    ivec *idx;
 +    rvec *fractx;            /* Fractional coordinate relative to the
 +                              * lower cell boundary
 +                              */
 +    int  nthread;
 +    int  *thread_idx;        /* Which thread should spread which charge */
 +    thread_plist_t *thread_plist;
 +    splinedata_t *spline;
 +} pme_atomcomm_t;
 +
 +#define FLBS  3
 +#define FLBSZ 4
 +
 +typedef struct {
 +    ivec ci;     /* The spatial location of this grid       */
 +    ivec n;      /* The size of *grid, including order-1    */
 +    ivec offset; /* The grid offset from the full node grid */
 +    int  order;  /* PME spreading order                     */
 +    real *grid;  /* The grid local thread, size n           */
 +} pmegrid_t;
 +
 +typedef struct {
 +    pmegrid_t grid;     /* The full node grid (non thread-local)            */
 +    int  nthread;       /* The number of threads operating on this grid     */
 +    ivec nc;            /* The local spatial decomposition over the threads */
 +    pmegrid_t *grid_th; /* Array of grids for each thread                   */
 +    int  **g2t;         /* The grid to thread index                         */
 +    ivec nthread_comm;  /* The number of threads to communicate with        */
 +} pmegrids_t;
 +
 +
 +typedef struct {
 +#ifdef PME_SSE
 +    /* Masks for SSE aligned spreading and gathering */
 +    __m128 mask_SSE0[6],mask_SSE1[6];
 +#else
 +    int dummy; /* C89 requires that struct has at least one member */
 +#endif
 +} pme_spline_work_t;
 +
 +typedef struct {
 +    /* work data for solve_pme */
 +    int      nalloc;
 +    real *   mhx;
 +    real *   mhy;
 +    real *   mhz;
 +    real *   m2;
 +    real *   denom;
 +    real *   tmp1_alloc;
 +    real *   tmp1;
 +    real *   eterm;
 +    real *   m2inv;
 +
 +    real     energy;
 +    matrix   vir;
 +} pme_work_t;
 +
 +typedef struct gmx_pme {
 +    int  ndecompdim;         /* The number of decomposition dimensions */
 +    int  nodeid;             /* Our nodeid in mpi->mpi_comm */
 +    int  nodeid_major;
 +    int  nodeid_minor;
 +    int  nnodes;             /* The number of nodes doing PME */
 +    int  nnodes_major;
 +    int  nnodes_minor;
 +
 +    MPI_Comm mpi_comm;
 +    MPI_Comm mpi_comm_d[2];  /* Indexed on dimension, 0=x, 1=y */
 +#ifdef GMX_MPI
 +    MPI_Datatype  rvec_mpi;  /* the pme vector's MPI type */
 +#endif
 +
 +    int  nthread;            /* The number of threads doing PME */
 +
 +    gmx_bool bPPnode;        /* Node also does particle-particle forces */
 +    gmx_bool bFEP;           /* Compute Free energy contribution */
 +    int nkx,nky,nkz;         /* Grid dimensions */
 +    gmx_bool bP3M;           /* Do P3M: optimize the influence function */
 +    int pme_order;
 +    real epsilon_r;
 +
 +    pmegrids_t pmegridA;  /* Grids on which we do spreading/interpolation, includes overlap */
 +    pmegrids_t pmegridB;
 +    /* The PME charge spreading grid sizes/strides, includes pme_order-1 */
 +    int     pmegrid_nx,pmegrid_ny,pmegrid_nz;
 +    /* pmegrid_nz might be larger than strictly necessary to ensure
 +     * memory alignment, pmegrid_nz_base gives the real base size.
 +     */
 +    int     pmegrid_nz_base;
 +    /* The local PME grid starting indices */
 +    int     pmegrid_start_ix,pmegrid_start_iy,pmegrid_start_iz;
 +
 +    /* Work data for spreading and gathering */
 +    pme_spline_work_t *spline_work;
 +
 +    real *fftgridA;             /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
 +    real *fftgridB;             /* inside the interpolation grid, but separate for 2D PME decomp. */
 +    int   fftgrid_nx,fftgrid_ny,fftgrid_nz;
 +
 +    t_complex *cfftgridA;             /* Grids for complex FFT data */
 +    t_complex *cfftgridB;
 +    int   cfftgrid_nx,cfftgrid_ny,cfftgrid_nz;
 +
 +    gmx_parallel_3dfft_t  pfft_setupA;
 +    gmx_parallel_3dfft_t  pfft_setupB;
 +
 +    int  *nnx,*nny,*nnz;
 +    real *fshx,*fshy,*fshz;
 +
 +    pme_atomcomm_t atc[2];  /* Indexed on decomposition index */
 +    matrix    recipbox;
 +    splinevec bsp_mod;
 +
 +    pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
 +
 +    pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
 +
 +    rvec *bufv;             /* Communication buffer */
 +    real *bufr;             /* Communication buffer */
 +    int  buf_nalloc;        /* The communication buffer size */
 +
 +    /* thread local work data for solve_pme */
 +    pme_work_t *work;
 +
 +    /* Work data for PME_redist */
 +    gmx_bool redist_init;
 +    int *    scounts;
 +    int *    rcounts;
 +    int *    sdispls;
 +    int *    rdispls;
 +    int *    sidx;
 +    int *    idxa;
 +    real *   redist_buf;
 +    int      redist_buf_nalloc;
 +
 +    /* Work data for sum_qgrid */
 +    real *   sum_qgrid_tmp;
 +    real *   sum_qgrid_dd_tmp;
 +} t_gmx_pme;
 +
 +
 +static void calc_interpolation_idx(gmx_pme_t pme,pme_atomcomm_t *atc,
 +                                   int start,int end,int thread)
 +{
 +    int  i;
 +    int  *idxptr,tix,tiy,tiz;
 +    real *xptr,*fptr,tx,ty,tz;
 +    real rxx,ryx,ryy,rzx,rzy,rzz;
 +    int  nx,ny,nz;
 +    int  start_ix,start_iy,start_iz;
 +    int  *g2tx,*g2ty,*g2tz;
 +    gmx_bool bThreads;
 +    int  *thread_idx=NULL;
 +    thread_plist_t *tpl=NULL;
 +    int  *tpl_n=NULL;
 +    int  thread_i;
 +
 +    nx  = pme->nkx;
 +    ny  = pme->nky;
 +    nz  = pme->nkz;
 +
 +    start_ix = pme->pmegrid_start_ix;
 +    start_iy = pme->pmegrid_start_iy;
 +    start_iz = pme->pmegrid_start_iz;
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    g2tx = pme->pmegridA.g2t[XX];
 +    g2ty = pme->pmegridA.g2t[YY];
 +    g2tz = pme->pmegridA.g2t[ZZ];
 +
 +    bThreads = (atc->nthread > 1);
 +    if (bThreads)
 +    {
 +        thread_idx = atc->thread_idx;
 +
 +        tpl   = &atc->thread_plist[thread];
 +        tpl_n = tpl->n;
 +        for(i=0; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] = 0;
 +        }
 +    }
 +
 +    for(i=start; i<end; i++) {
 +        xptr   = atc->x[i];
 +        idxptr = atc->idx[i];
 +        fptr   = atc->fractx[i];
 +
 +        /* Fractional coordinates along box vectors, add 2.0 to make 100% sure we are positive for triclinic boxes */
 +        tx = nx * ( xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx + 2.0 );
 +        ty = ny * (                  xptr[YY] * ryy + xptr[ZZ] * rzy + 2.0 );
 +        tz = nz * (                                   xptr[ZZ] * rzz + 2.0 );
 +
 +        tix = (int)(tx);
 +        tiy = (int)(ty);
 +        tiz = (int)(tz);
 +
 +        /* Because decomposition only occurs in x and y,
 +         * we never have a fraction correction in z.
 +         */
 +        fptr[XX] = tx - tix + pme->fshx[tix];
 +        fptr[YY] = ty - tiy + pme->fshy[tiy];
 +        fptr[ZZ] = tz - tiz;
 +
 +        idxptr[XX] = pme->nnx[tix];
 +        idxptr[YY] = pme->nny[tiy];
 +        idxptr[ZZ] = pme->nnz[tiz];
 +
 +#ifdef DEBUG
 +        range_check(idxptr[XX],0,pme->pmegrid_nx);
 +        range_check(idxptr[YY],0,pme->pmegrid_ny);
 +        range_check(idxptr[ZZ],0,pme->pmegrid_nz);
 +#endif
 +
 +        if (bThreads)
 +        {
 +            thread_i = g2tx[idxptr[XX]] + g2ty[idxptr[YY]] + g2tz[idxptr[ZZ]];
 +            thread_idx[i] = thread_i;
 +            tpl_n[thread_i]++;
 +        }
 +    }
 +
 +    if (bThreads)
 +    {
 +        /* Make a list of particle indices sorted on thread */
 +
 +        /* Get the cumulative count */
 +        for(i=1; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] += tpl_n[i-1];
 +        }
 +        /* The current implementation distributes particles equally
 +         * over the threads, so we could actually allocate for that
 +         * in pme_realloc_atomcomm_things.
 +         */
 +        if (tpl_n[atc->nthread-1] > tpl->nalloc)
 +        {
 +            tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
 +            srenew(tpl->i,tpl->nalloc);
 +        }
 +        /* Set tpl_n to the cumulative start */
 +        for(i=atc->nthread-1; i>=1; i--)
 +        {
 +            tpl_n[i] = tpl_n[i-1];
 +        }
 +        tpl_n[0] = 0;
 +
 +        /* Fill our thread local array with indices sorted on thread */
 +        for(i=start; i<end; i++)
 +        {
 +            tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
 +        }
 +        /* Now tpl_n contains the cummulative count again */
 +    }
 +}
 +
 +static void make_thread_local_ind(pme_atomcomm_t *atc,
 +                                  int thread,splinedata_t *spline)
 +{
 +    int  n,t,i,start,end;
 +    thread_plist_t *tpl;
 +
 +    /* Combine the indices made by each thread into one index */
 +
 +    n = 0;
 +    start = 0;
 +    for(t=0; t<atc->nthread; t++)
 +    {
 +        tpl = &atc->thread_plist[t];
 +        /* Copy our part (start - end) from the list of thread t */
 +        if (thread > 0)
 +        {
 +            start = tpl->n[thread-1];
 +        }
 +        end = tpl->n[thread];
 +        for(i=start; i<end; i++)
 +        {
 +            spline->ind[n++] = tpl->i[i];
 +        }
 +    }
 +
 +    spline->n = n;
 +}
 +
 +
 +static void pme_calc_pidx(int start, int end,
 +                          matrix recipbox, rvec x[],
 +                          pme_atomcomm_t *atc, int *count)
 +{
 +    int  nslab,i;
 +    int  si;
 +    real *xptr,s;
 +    real rxx,ryx,rzx,ryy,rzy;
 +    int *pd;
 +
 +    /* Calculate PME task index (pidx) for each grid index.
 +     * Here we always assign equally sized slabs to each node
 +     * for load balancing reasons (the PME grid spacing is not used).
 +     */
 +
 +    nslab = atc->nslab;
 +    pd    = atc->pd;
 +
 +    /* Reset the count */
 +    for(i=0; i<nslab; i++)
 +    {
 +        count[i] = 0;
 +    }
 +
 +    if (atc->dimind == 0)
 +    {
 +        rxx = recipbox[XX][XX];
 +        ryx = recipbox[YY][XX];
 +        rzx = recipbox[ZZ][XX];
 +        /* Calculate the node index in x-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +    else
 +    {
 +        ryy = recipbox[YY][YY];
 +        rzy = recipbox[ZZ][YY];
 +        /* Calculate the node index in y-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +}
 +
 +static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
 +                                  pme_atomcomm_t *atc)
 +{
 +    int nthread,thread,slab;
 +
 +    nthread = atc->nthread;
 +
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        pme_calc_pidx(natoms* thread   /nthread,
 +                      natoms*(thread+1)/nthread,
 +                      recipbox,x,atc,atc->count_thread[thread]);
 +    }
 +    /* Non-parallel reduction, since nslab is small */
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        for(slab=0; slab<atc->nslab; slab++)
 +        {
 +            atc->count_thread[0][slab] += atc->count_thread[thread][slab];
 +        }
 +    }
 +}
 +
 +static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
 +{
 +    int i,d;
 +
 +    srenew(spline->ind,atc->nalloc);
 +    /* Initialize the index to identity so it works without threads */
 +    for(i=0; i<atc->nalloc; i++)
 +    {
 +        spline->ind[i] = i;
 +    }
 +
 +    for(d=0;d<DIM;d++)
 +    {
 +        srenew(spline->theta[d] ,atc->pme_order*atc->nalloc);
 +        srenew(spline->dtheta[d],atc->pme_order*atc->nalloc);
 +    }
 +}
 +
 +static void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
 +{
 +    int nalloc_old,i,j,nalloc_tpl;
 +
 +    /* We have to avoid a NULL pointer for atc->x to avoid
 +     * possible fatal errors in MPI routines.
 +     */
 +    if (atc->n > atc->nalloc || atc->nalloc == 0)
 +    {
 +        nalloc_old = atc->nalloc;
 +        atc->nalloc = over_alloc_dd(max(atc->n,1));
 +
 +        if (atc->nslab > 1) {
 +            srenew(atc->x,atc->nalloc);
 +            srenew(atc->q,atc->nalloc);
 +            srenew(atc->f,atc->nalloc);
 +            for(i=nalloc_old; i<atc->nalloc; i++)
 +            {
 +                clear_rvec(atc->f[i]);
 +            }
 +        }
 +        if (atc->bSpread) {
 +            srenew(atc->fractx,atc->nalloc);
 +            srenew(atc->idx   ,atc->nalloc);
 +
 +            if (atc->nthread > 1)
 +            {
 +                srenew(atc->thread_idx,atc->nalloc);
 +            }
 +
 +            for(i=0; i<atc->nthread; i++)
 +            {
 +                pme_realloc_splinedata(&atc->spline[i],atc);
 +            }
 +        }
 +    }
 +}
 +
 +static void pmeredist_pd(gmx_pme_t pme, gmx_bool forw,
 +                         int n, gmx_bool bXF, rvec *x_f, real *charge,
 +                         pme_atomcomm_t *atc)
 +/* Redistribute particle data for PME calculation */
 +/* domain decomposition by x coordinate           */
 +{
 +    int *idxa;
 +    int i, ii;
 +
 +    if(FALSE == pme->redist_init) {
 +        snew(pme->scounts,atc->nslab);
 +        snew(pme->rcounts,atc->nslab);
 +        snew(pme->sdispls,atc->nslab);
 +        snew(pme->rdispls,atc->nslab);
 +        snew(pme->sidx,atc->nslab);
 +        pme->redist_init = TRUE;
 +    }
 +    if (n > pme->redist_buf_nalloc) {
 +        pme->redist_buf_nalloc = over_alloc_dd(n);
 +        srenew(pme->redist_buf,pme->redist_buf_nalloc*DIM);
 +    }
 +
 +    pme->idxa = atc->pd;
 +
 +#ifdef GMX_MPI
 +    if (forw && bXF) {
 +        /* forward, redistribution from pp to pme */
 +
 +        /* Calculate send counts and exchange them with other nodes */
 +        for(i=0; (i<atc->nslab); i++) pme->scounts[i]=0;
 +        for(i=0; (i<n); i++) pme->scounts[pme->idxa[i]]++;
 +        MPI_Alltoall( pme->scounts, 1, MPI_INT, pme->rcounts, 1, MPI_INT, atc->mpi_comm);
 +
 +        /* Calculate send and receive displacements and index into send
 +           buffer */
 +        pme->sdispls[0]=0;
 +        pme->rdispls[0]=0;
 +        pme->sidx[0]=0;
 +        for(i=1; i<atc->nslab; i++) {
 +            pme->sdispls[i]=pme->sdispls[i-1]+pme->scounts[i-1];
 +            pme->rdispls[i]=pme->rdispls[i-1]+pme->rcounts[i-1];
 +            pme->sidx[i]=pme->sdispls[i];
 +        }
 +        /* Total # of particles to be received */
 +        atc->n = pme->rdispls[atc->nslab-1] + pme->rcounts[atc->nslab-1];
 +
 +        pme_realloc_atomcomm_things(atc);
 +
 +        /* Copy particle coordinates into send buffer and exchange*/
 +        for(i=0; (i<n); i++) {
 +            ii=DIM*pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii+XX]=x_f[i][XX];
 +            pme->redist_buf[ii+YY]=x_f[i][YY];
 +            pme->redist_buf[ii+ZZ]=x_f[i][ZZ];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->x, pme->rcounts, pme->rdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +    }
 +    if (forw) {
 +        /* Copy charge into send buffer and exchange*/
 +        for(i=0; i<atc->nslab; i++) pme->sidx[i]=pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii=pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii]=charge[i];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls, mpi_type,
 +                      atc->q, pme->rcounts, pme->rdispls, mpi_type,
 +                      atc->mpi_comm);
 +    }
 +    else { /* backward, redistribution from pme to pp */
 +        MPI_Alltoallv(atc->f, pme->rcounts, pme->rdispls, pme->rvec_mpi,
 +                      pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +
 +        /* Copy data from receive buffer */
 +        for(i=0; i<atc->nslab; i++)
 +            pme->sidx[i] = pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii = DIM*pme->sidx[pme->idxa[i]];
 +            x_f[i][XX] += pme->redist_buf[ii+XX];
 +            x_f[i][YY] += pme->redist_buf[ii+YY];
 +            x_f[i][ZZ] += pme->redist_buf[ii+ZZ];
 +            pme->sidx[pme->idxa[i]]++;
 +        }
 +    }
 +#endif
 +}
 +
 +static void pme_dd_sendrecv(pme_atomcomm_t *atc,
 +                            gmx_bool bBackward,int shift,
 +                            void *buf_s,int nbyte_s,
 +                            void *buf_r,int nbyte_r)
 +{
 +#ifdef GMX_MPI
 +    int dest,src;
 +    MPI_Status stat;
 +
 +    if (bBackward == FALSE) {
 +        dest = atc->node_dest[shift];
 +        src  = atc->node_src[shift];
 +    } else {
 +        dest = atc->node_src[shift];
 +        src  = atc->node_dest[shift];
 +    }
 +
 +    if (nbyte_s > 0 && nbyte_r > 0) {
 +        MPI_Sendrecv(buf_s,nbyte_s,MPI_BYTE,
 +                     dest,shift,
 +                     buf_r,nbyte_r,MPI_BYTE,
 +                     src,shift,
 +                     atc->mpi_comm,&stat);
 +    } else if (nbyte_s > 0) {
 +        MPI_Send(buf_s,nbyte_s,MPI_BYTE,
 +                 dest,shift,
 +                 atc->mpi_comm);
 +    } else if (nbyte_r > 0) {
 +        MPI_Recv(buf_r,nbyte_r,MPI_BYTE,
 +                 src,shift,
 +                 atc->mpi_comm,&stat);
 +    }
 +#endif
 +}
 +
 +static void dd_pmeredist_x_q(gmx_pme_t pme,
 +                             int n, gmx_bool bX, rvec *x, real *charge,
 +                             pme_atomcomm_t *atc)
 +{
 +    int *commnode,*buf_index;
 +    int nnodes_comm,i,nsend,local_pos,buf_pos,node,scount,rcount;
 +
 +    commnode  = atc->node_dest;
 +    buf_index = atc->buf_index;
 +
 +    nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +    nsend = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        buf_index[commnode[i]] = nsend;
 +        nsend += atc->count[commnode[i]];
 +    }
 +    if (bX) {
 +        if (atc->count[atc->nodeid] + nsend != n)
 +            gmx_fatal(FARGS,"%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
 +                      "This usually means that your system is not well equilibrated.",
 +                      n - (atc->count[atc->nodeid] + nsend),
 +                      pme->nodeid,'x'+atc->dimind);
 +
 +        if (nsend > pme->buf_nalloc) {
 +            pme->buf_nalloc = over_alloc_dd(nsend);
 +            srenew(pme->bufv,pme->buf_nalloc);
 +            srenew(pme->bufr,pme->buf_nalloc);
 +        }
 +
 +        atc->n = atc->count[atc->nodeid];
 +        for(i=0; i<nnodes_comm; i++) {
 +            scount = atc->count[commnode[i]];
 +            /* Communicate the count */
 +            if (debug)
 +                fprintf(debug,"dimind %d PME node %d send to node %d: %d\n",
 +                        atc->dimind,atc->nodeid,commnode[i],scount);
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            &scount,sizeof(int),
 +                            &atc->rcount[i],sizeof(int));
 +            atc->n += atc->rcount[i];
 +        }
 +
 +        pme_realloc_atomcomm_things(atc);
 +    }
 +
 +    local_pos = 0;
 +    for(i=0; i<n; i++) {
 +        node = atc->pd[i];
 +        if (node == atc->nodeid) {
 +            /* Copy direct to the receive buffer */
 +            if (bX) {
 +                copy_rvec(x[i],atc->x[local_pos]);
 +            }
 +            atc->q[local_pos] = charge[i];
 +            local_pos++;
 +        } else {
 +            /* Copy to the send buffer */
 +            if (bX) {
 +                copy_rvec(x[i],pme->bufv[buf_index[node]]);
 +            }
 +            pme->bufr[buf_index[node]] = charge[i];
 +            buf_index[node]++;
 +        }
 +    }
 +
 +    buf_pos = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        scount = atc->count[commnode[i]];
 +        rcount = atc->rcount[i];
 +        if (scount > 0 || rcount > 0) {
 +            if (bX) {
 +                /* Communicate the coordinates */
 +                pme_dd_sendrecv(atc,FALSE,i,
 +                                pme->bufv[buf_pos],scount*sizeof(rvec),
 +                                atc->x[local_pos],rcount*sizeof(rvec));
 +            }
 +            /* Communicate the charges */
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            pme->bufr+buf_pos,scount*sizeof(real),
 +                            atc->q+local_pos,rcount*sizeof(real));
 +            buf_pos   += scount;
 +            local_pos += atc->rcount[i];
 +        }
 +    }
 +}
 +
 +static void dd_pmeredist_f(gmx_pme_t pme, pme_atomcomm_t *atc,
 +                           int n, rvec *f,
 +                           gmx_bool bAddF)
 +{
 +  int *commnode,*buf_index;
 +  int nnodes_comm,local_pos,buf_pos,i,scount,rcount,node;
 +
 +  commnode  = atc->node_dest;
 +  buf_index = atc->buf_index;
 +
 +  nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +  local_pos = atc->count[atc->nodeid];
 +  buf_pos = 0;
 +  for(i=0; i<nnodes_comm; i++) {
 +    scount = atc->rcount[i];
 +    rcount = atc->count[commnode[i]];
 +    if (scount > 0 || rcount > 0) {
 +      /* Communicate the forces */
 +      pme_dd_sendrecv(atc,TRUE,i,
 +                      atc->f[local_pos],scount*sizeof(rvec),
 +                      pme->bufv[buf_pos],rcount*sizeof(rvec));
 +      local_pos += scount;
 +    }
 +    buf_index[commnode[i]] = buf_pos;
 +    buf_pos   += rcount;
 +  }
 +
 +    local_pos = 0;
 +    if (bAddF)
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Add from the local force array */
 +                rvec_inc(f[i],atc->f[local_pos]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Add from the receive buffer */
 +                rvec_inc(f[i],pme->bufv[buf_index[node]]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Copy from the local force array */
 +                copy_rvec(atc->f[local_pos],f[i]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Copy from the receive buffer */
 +                copy_rvec(pme->bufv[buf_index[node]],f[i]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void
 +gmx_sum_qgrid_dd(gmx_pme_t pme, real *grid, int direction)
 +{
 +    pme_overlap_t *overlap;
 +    int send_index0,send_nindex;
 +    int recv_index0,recv_nindex;
 +    MPI_Status stat;
 +    int i,j,k,ix,iy,iz,icnt;
 +    int ipulse,send_id,recv_id,datasize;
 +    real *p;
 +    real *sendptr,*recvptr;
 +
 +    /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
 +    overlap = &pme->overlap[1];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        /* Since we have already (un)wrapped the overlap in the z-dimension,
 +         * we only have to communicate 0 to nkz (not pmegrid_nz).
 +         */
 +        if (direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        }
 +
 +        /* Copy data to contiguous send buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy+send_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<send_nindex;j++)
 +            {
 +                iy = j + send_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
 +                }
 +            }
 +        }
 +
 +        datasize      = pme->pmegrid_nx * pme->nkz;
 +
 +        MPI_Sendrecv(overlap->sendbuf,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     overlap->recvbuf,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* Get data from contiguous recv buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy+recv_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<recv_nindex;j++)
 +            {
 +                iy = j + recv_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    if(direction==GMX_SUM_QGRID_FORWARD)
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
 +                    }
 +                    else
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz]  = overlap->recvbuf[icnt++];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Major dimension is easier, no copying required,
 +     * but we might have to sum to separate array.
 +     * Since we don't copy, we have to communicate up to pmegrid_nz,
 +     * not nkz as for the minor direction.
 +     */
 +    overlap = &pme->overlap[0];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recvptr   = overlap->recvbuf;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recvptr   = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        }
 +
 +        sendptr       = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        datasize      = pme->pmegrid_ny * pme->pmegrid_nz;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix+send_nindex);
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix+recv_nindex);
 +        }
 +
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* ADD data from contiguous recv buffer */
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +            for(i=0;i<recv_nindex*datasize;i++)
 +            {
 +                p[i] += overlap->recvbuf[i];
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +
 +static int
 +copy_pmegrid_to_fftgrid(gmx_pme_t pme, real *pmegrid, real *fftgrid)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     i,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    {
 +#ifdef DEBUG_PME
 +        FILE *fp,*fp2;
 +        char fn[STRLEN],format[STRLEN];
 +        real val;
 +        sprintf(fn,"pmegrid%d.pdb",pme->nodeid);
 +        fp = ffopen(fn,"w");
 +        sprintf(fn,"pmegrid%d.txt",pme->nodeid);
 +        fp2 = ffopen(fn,"w");
 +     sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
 +#endif
 +
 +    for(ix=0;ix<local_fft_ndata[XX];ix++)
 +    {
 +        for(iy=0;iy<local_fft_ndata[YY];iy++)
 +        {
 +            for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +            {
 +                pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
 +                fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
 +                fftgrid[fftidx] = pmegrid[pmeidx];
 +#ifdef DEBUG_PME
 +                val = 100*pmegrid[pmeidx];
 +                if (pmegrid[pmeidx] != 0)
 +                fprintf(fp,format,"ATOM",pmeidx,"CA","GLY",' ',pmeidx,' ',
 +                        5.0*ix,5.0*iy,5.0*iz,1.0,val);
 +                if (pmegrid[pmeidx] != 0)
 +                    fprintf(fp2,"%-12s  %5d  %5d  %5d  %12.5e\n",
 +                            "qgrid",
 +                            pme->pmegrid_start_ix + ix,
 +                            pme->pmegrid_start_iy + iy,
 +                            pme->pmegrid_start_iz + iz,
 +                            pmegrid[pmeidx]);
 +#endif
 +            }
 +        }
 +    }
 +#ifdef DEBUG_PME
 +    ffclose(fp);
 +    ffclose(fp2);
 +#endif
 +    }
 +    return 0;
 +}
 +
 +
 +static gmx_cycles_t omp_cyc_start()
 +{
 +    return gmx_cycles_read();
 +}
 +
 +static gmx_cycles_t omp_cyc_end(gmx_cycles_t c)
 +{
 +    return gmx_cycles_read() - c;
 +}
 +
 +
 +static int
 +copy_fftgrid_to_pmegrid(gmx_pme_t pme, const real *fftgrid, real *pmegrid,
 +                        int nthread,int thread)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     ixy0,ixy1,ixy,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1;
 +    static double cs1=0;
 +    static int cnt=0;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    ixy0 = ((thread  )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +    ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +
 +    for(ixy=ixy0;ixy<ixy1;ixy++)
 +    {
 +        ix = ixy/local_fft_ndata[YY];
 +        iy = ixy - ix*local_fft_ndata[YY];
 +
 +        pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
 +        fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
 +        for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +        {
 +            pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("copy %.2f\n",cs1*1e-9);
 +    }
 +#endif
 +
 +    return 0;
 +}
 +
 +
 +static void
 +wrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix,iy,iz;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    /* Add periodic overlap in z */
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                    pmegrid[(ix*pny+iy)*pnz+nz+iz];
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                       pmegrid[(ix*pny+ny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                        pmegrid[((nx+ix)*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void
 +unwrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            int iy,iz;
 +
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
 +                        pmegrid[(ix*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           int iy,iz;
 +
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+ny+iy)*pnz+iz] =
 +                       pmegrid[(ix*pny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    /* Copy periodic overlap in z */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        int iy,iz;
 +
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+nz+iz] =
 +                    pmegrid[(ix*pny+iy)*pnz+iz];
 +            }
 +        }
 +    }
 +}
 +
 +static void clear_grid(int nx,int ny,int nz,real *grid,
 +                       ivec fs,int *flag,
 +                       int fx,int fy,int fz,
 +                       int order)
 +{
 +    int nc,ncz;
 +    int fsx,fsy,fsz,gx,gy,gz,g0x,g0y,x,y,z;
 +    int flind;
 +
 +    nc  = 2 + (order - 2)/FLBS;
 +    ncz = 2 + (order - 2)/FLBSZ;
 +
 +    for(fsx=fx; fsx<fx+nc; fsx++)
 +    {
 +        for(fsy=fy; fsy<fy+nc; fsy++)
 +        {
 +            for(fsz=fz; fsz<fz+ncz; fsz++)
 +            {
 +                flind = (fsx*fs[YY] + fsy)*fs[ZZ] + fsz;
 +                if (flag[flind] == 0)
 +                {
 +                    gx = fsx*FLBS;
 +                    gy = fsy*FLBS;
 +                    gz = fsz*FLBSZ;
 +                    g0x = (gx*ny + gy)*nz + gz;
 +                    for(x=0; x<FLBS; x++)
 +                    {
 +                        g0y = g0x;
 +                        for(y=0; y<FLBS; y++)
 +                        {
 +                            for(z=0; z<FLBSZ; z++)
 +                            {
 +                                grid[g0y+z] = 0;
 +                            }
 +                            g0y += nz;
 +                        }
 +                        g0x += ny*nz;
 +                    }
 +
 +                    flag[flind] = 1;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* This has to be a macro to enable full compiler optimization with xlC (and probably others too) */
 +#define DO_BSPLINE(order)                            \
 +for(ithx=0; (ithx<order); ithx++)                    \
 +{                                                    \
 +    index_x = (i0+ithx)*pny*pnz;                     \
 +    valx    = qn*thx[ithx];                          \
 +                                                     \
 +    for(ithy=0; (ithy<order); ithy++)                \
 +    {                                                \
 +        valxy    = valx*thy[ithy];                   \
 +        index_xy = index_x+(j0+ithy)*pnz;            \
 +                                                     \
 +        for(ithz=0; (ithz<order); ithz++)            \
 +        {                                            \
 +            index_xyz        = index_xy+(k0+ithz);   \
 +            grid[index_xyz] += valxy*thz[ithz];      \
 +        }                                            \
 +    }                                                \
 +}
 +
 +
 +static void spread_q_bsplines_thread(pmegrid_t *pmegrid,
 +                                     pme_atomcomm_t *atc, splinedata_t *spline,
 +                                     pme_spline_work_t *work)
 +{
 +
 +    /* spread charges from home atoms to local grid */
 +    real     *grid;
 +    pme_overlap_t *ol;
 +    int      b,i,nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int *    idxptr;
 +    int      order,norder,index_x,index_xy,index_xyz;
 +    real     valx,valxy,qn;
 +    real     *thx,*thy,*thz;
 +    int      localsize, bndsize;
 +    int      pnx,pny,pnz,ndatatot;
 +    int      offx,offy,offz;
 +
 +    pnx = pmegrid->n[XX];
 +    pny = pmegrid->n[YY];
 +    pnz = pmegrid->n[ZZ];
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    ndatatot = pnx*pny*pnz;
 +    grid = pmegrid->grid;
 +    for(i=0;i<ndatatot;i++)
 +    {
 +        grid[i] = 0;
 +    }
 +
 +    order = pmegrid->order;
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = atc->q[n];
 +
 +        if (qn != 0)
 +        {
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX] - offx;
 +            j0   = idxptr[YY] - offy;
 +            k0   = idxptr[ZZ] - offz;
 +
 +            thx = spline->theta[XX] + norder;
 +            thy = spline->theta[YY] + norder;
 +            thz = spline->theta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_SPREAD_SSE_ORDER4
 +#else
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_BSPLINE(order);
 +                break;
 +            }
 +        }
 +    }
 +}
 +
 +static void set_grid_alignment(int *pmegrid_nz,int pme_order)
 +{
 +#ifdef PME_SSE
 +    if (pme_order == 5
 +#ifndef PME_SSE_UNALIGNED
 +        || pme_order == 4
 +#endif
 +        )
 +    {
 +        /* Round nz up to a multiple of 4 to ensure alignment */
 +        *pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
 +    }
 +#endif
 +}
 +
 +static void set_gridsize_alignment(int *gridsize,int pme_order)
 +{
 +#ifdef PME_SSE
 +#ifndef PME_SSE_UNALIGNED
 +    if (pme_order == 4)
 +    {
 +        /* Add extra elements to ensured aligned operations do not go
 +         * beyond the allocated grid size.
 +         * Note that for pme_order=5, the pme grid z-size alignment
 +         * ensures that we will not go beyond the grid size.
 +         */
 +         *gridsize += 4;
 +    }
 +#endif
 +#endif
 +}
 +
 +static void pmegrid_init(pmegrid_t *grid,
 +                         int cx, int cy, int cz,
 +                         int x0, int y0, int z0,
 +                         int x1, int y1, int z1,
 +                         gmx_bool set_alignment,
 +                         int pme_order,
 +                         real *ptr)
 +{
 +    int nz,gridsize;
 +
 +    grid->ci[XX] = cx;
 +    grid->ci[YY] = cy;
 +    grid->ci[ZZ] = cz;
 +    grid->offset[XX] = x0;
 +    grid->offset[YY] = y0;
 +    grid->offset[ZZ] = z0;
 +    grid->n[XX]      = x1 - x0 + pme_order - 1;
 +    grid->n[YY]      = y1 - y0 + pme_order - 1;
 +    grid->n[ZZ]      = z1 - z0 + pme_order - 1;
 +
 +    nz = grid->n[ZZ];
 +    set_grid_alignment(&nz,pme_order);
 +    if (set_alignment)
 +    {
 +        grid->n[ZZ] = nz;
 +    }
 +    else if (nz != grid->n[ZZ])
 +    {
 +        gmx_incons("pmegrid_init call with an unaligned z size");
 +    }
 +
 +    grid->order = pme_order;
 +    if (ptr == NULL)
 +    {
 +        gridsize = grid->n[XX]*grid->n[YY]*grid->n[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid->grid,gridsize,16);
 +    }
 +    else
 +    {
 +        grid->grid = ptr;
 +    }
 +}
 +
 +static int div_round_up(int enumerator,int denominator)
 +{
 +    return (enumerator + denominator - 1)/denominator;
 +}
 +
 +static void make_subgrid_division(const ivec n,int ovl,int nthread,
 +                                  ivec nsub)
 +{
 +    int gsize_opt,gsize;
 +    int nsx,nsy,nsz;
 +    char *env;
 +
 +    gsize_opt = -1;
 +    for(nsx=1; nsx<=nthread; nsx++)
 +    {
 +        if (nthread % nsx == 0)
 +        {
 +            for(nsy=1; nsy<=nthread; nsy++)
 +            {
 +                if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
 +                {
 +                    nsz = nthread/(nsx*nsy);
 +
 +                    /* Determine the number of grid points per thread */
 +                    gsize =
 +                        (div_round_up(n[XX],nsx) + ovl)*
 +                        (div_round_up(n[YY],nsy) + ovl)*
 +                        (div_round_up(n[ZZ],nsz) + ovl);
 +
 +                    /* Minimize the number of grids points per thread
 +                     * and, secondarily, the number of cuts in minor dimensions.
 +                     */
 +                    if (gsize_opt == -1 ||
 +                        gsize < gsize_opt ||
 +                        (gsize == gsize_opt &&
 +                         (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
 +                    {
 +                        nsub[XX] = nsx;
 +                        nsub[YY] = nsy;
 +                        nsub[ZZ] = nsz;
 +                        gsize_opt = gsize;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    env = getenv("GMX_PME_THREAD_DIVISION");
 +    if (env != NULL)
 +    {
 +        sscanf(env,"%d %d %d",&nsub[XX],&nsub[YY],&nsub[ZZ]);
 +    }
 +
 +    if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
 +    {
 +        gmx_fatal(FARGS,"PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)",nsub[XX],nsub[YY],nsub[ZZ],nthread);
 +    }
 +}
 +
 +static void pmegrids_init(pmegrids_t *grids,
 +                          int nx,int ny,int nz,int nz_base,
 +                          int pme_order,
 +                          int nthread,
 +                          int overlap_x,
 +                          int overlap_y)
 +{
 +    ivec n,n_base,g0,g1;
 +    int t,x,y,z,d,i,tfac;
 +    int max_comm_lines;
 +
 +    n[XX] = nx - (pme_order - 1);
 +    n[YY] = ny - (pme_order - 1);
 +    n[ZZ] = nz - (pme_order - 1);
 +
 +    copy_ivec(n,n_base);
 +    n_base[ZZ] = nz_base;
 +
 +    pmegrid_init(&grids->grid,0,0,0,0,0,0,n[XX],n[YY],n[ZZ],FALSE,pme_order,
 +                 NULL);
 +
 +    grids->nthread = nthread;
 +
 +    make_subgrid_division(n_base,pme_order-1,grids->nthread,grids->nc);
 +
 +    if (grids->nthread > 1)
 +    {
 +        ivec nst;
 +        int gridsize;
 +        real *grid_all;
 +
 +        for(d=0; d<DIM; d++)
 +        {
 +            nst[d] = div_round_up(n[d],grids->nc[d]) + pme_order - 1;
 +        }
 +        set_grid_alignment(&nst[ZZ],pme_order);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"pmegrid thread local division: %d x %d x %d\n",
 +                    grids->nc[XX],grids->nc[YY],grids->nc[ZZ]);
 +            fprintf(debug,"pmegrid %d %d %d max thread pmegrid %d %d %d\n",
 +                    nx,ny,nz,
 +                    nst[XX],nst[YY],nst[ZZ]);
 +        }
 +
 +        snew(grids->grid_th,grids->nthread);
 +        t = 0;
 +        gridsize = nst[XX]*nst[YY]*nst[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid_all,
 +                     grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
 +                     16);
 +
 +        for(x=0; x<grids->nc[XX]; x++)
 +        {
 +            for(y=0; y<grids->nc[YY]; y++)
 +            {
 +                for(z=0; z<grids->nc[ZZ]; z++)
 +                {
 +                    pmegrid_init(&grids->grid_th[t],
 +                                 x,y,z,
 +                                 (n[XX]*(x  ))/grids->nc[XX],
 +                                 (n[YY]*(y  ))/grids->nc[YY],
 +                                 (n[ZZ]*(z  ))/grids->nc[ZZ],
 +                                 (n[XX]*(x+1))/grids->nc[XX],
 +                                 (n[YY]*(y+1))/grids->nc[YY],
 +                                 (n[ZZ]*(z+1))/grids->nc[ZZ],
 +                                 TRUE,
 +                                 pme_order,
 +                                 grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
 +                    t++;
 +                }
 +            }
 +        }
 +    }
 +
 +    snew(grids->g2t,DIM);
 +    tfac = 1;
 +    for(d=DIM-1; d>=0; d--)
 +    {
 +        snew(grids->g2t[d],n[d]);
 +        t = 0;
 +        for(i=0; i<n[d]; i++)
 +        {
 +            /* The second check should match the parameters
 +             * of the pmegrid_init call above.
 +             */
 +            while (t + 1 < grids->nc[d] && i >= (n[d]*(t+1))/grids->nc[d])
 +            {
 +                t++;
 +            }
 +            grids->g2t[d][i] = t*tfac;
 +        }
 +
 +        tfac *= grids->nc[d];
 +
 +        switch (d)
 +        {
 +        case XX: max_comm_lines = overlap_x;     break;
 +        case YY: max_comm_lines = overlap_y;     break;
 +        case ZZ: max_comm_lines = pme_order - 1; break;
 +        }
 +        grids->nthread_comm[d] = 0;
 +        while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines)
 +        {
 +            grids->nthread_comm[d]++;
 +        }
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"pmegrid thread grid communication range in %c: %d\n",
 +                    'x'+d,grids->nthread_comm[d]);
 +        }
 +        /* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
 +         * work, but this is not a problematic restriction.
 +         */
 +        if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
 +        {
 +            gmx_fatal(FARGS,"Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME",grids->nthread);
 +        }
 +    }
 +}
 +
 +
 +static void pmegrids_destroy(pmegrids_t *grids)
 +{
 +    int t;
 +
 +    if (grids->grid.grid != NULL)
 +    {
 +        sfree(grids->grid.grid);
 +
 +        if (grids->nthread > 0)
 +        {
 +            for(t=0; t<grids->nthread; t++)
 +            {
 +                sfree(grids->grid_th[t].grid);
 +            }
 +            sfree(grids->grid_th);
 +        }
 +    }
 +}
 +
 +
 +static void realloc_work(pme_work_t *work,int nkx)
 +{
 +    if (nkx > work->nalloc)
 +    {
 +        work->nalloc = nkx;
 +        srenew(work->mhx  ,work->nalloc);
 +        srenew(work->mhy  ,work->nalloc);
 +        srenew(work->mhz  ,work->nalloc);
 +        srenew(work->m2   ,work->nalloc);
 +        /* Allocate an aligned pointer for SSE operations, including 3 extra
 +         * elements at the end since SSE operates on 4 elements at a time.
 +         */
 +        sfree_aligned(work->denom);
 +        sfree_aligned(work->tmp1);
 +        sfree_aligned(work->eterm);
 +        snew_aligned(work->denom,work->nalloc+3,16);
 +        snew_aligned(work->tmp1 ,work->nalloc+3,16);
 +        snew_aligned(work->eterm,work->nalloc+3,16);
 +        srenew(work->m2inv,work->nalloc);
 +    }
 +}
 +
 +
 +static void free_work(pme_work_t *work)
 +{
 +    sfree(work->mhx);
 +    sfree(work->mhy);
 +    sfree(work->mhz);
 +    sfree(work->m2);
 +    sfree_aligned(work->denom);
 +    sfree_aligned(work->tmp1);
 +    sfree_aligned(work->eterm);
 +    sfree(work->m2inv);
 +}
 +
 +
 +#ifdef PME_SSE
 +    /* Calculate exponentials through SSE in float precision */
 +inline static void calc_exponentials(int start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
 +{
 +    {
 +        const __m128 two = _mm_set_ps(2.0f,2.0f,2.0f,2.0f);
 +        __m128 f_sse;
 +        __m128 lu;
 +        __m128 tmp_d1,d_inv,tmp_r,tmp_e;
 +        int kx;
 +        f_sse = _mm_load1_ps(&f);
 +        for(kx=0; kx<end; kx+=4)
 +        {
 +            tmp_d1   = _mm_load_ps(d_aligned+kx);
 +            lu       = _mm_rcp_ps(tmp_d1);
 +            d_inv    = _mm_mul_ps(lu,_mm_sub_ps(two,_mm_mul_ps(lu,tmp_d1)));
 +            tmp_r    = _mm_load_ps(r_aligned+kx);
 +            tmp_r    = gmx_mm_exp_ps(tmp_r);
 +            tmp_e    = _mm_mul_ps(f_sse,d_inv);
 +            tmp_e    = _mm_mul_ps(tmp_e,tmp_r);
 +            _mm_store_ps(e_aligned+kx,tmp_e);
 +        }
 +    }
 +}
 +#else
 +inline static void calc_exponentials(int start, int end, real f, real *d, real *r, real *e)
 +{
 +    int kx;
 +    for(kx=start; kx<end; kx++)
 +    {
 +        d[kx] = 1.0/d[kx];
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        r[kx] = exp(r[kx]);
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        e[kx] = f*r[kx]*d[kx];
 +    }
 +}
 +#endif
 +
 +
 +static int solve_pme_yzx(gmx_pme_t pme,t_complex *grid,
 +                         real ewaldcoeff,real vol,
 +                         gmx_bool bEnerVir,
 +                         int nthread,int thread)
 +{
 +    /* do recip sum over local cells in grid */
 +    /* y major, z middle, x minor or continuous */
 +    t_complex *p0;
 +    int     kx,ky,kz,maxkx,maxky,maxkz;
 +    int     nx,ny,nz,iyz0,iyz1,iyz,iy,iz,kxstart,kxend;
 +    real    mx,my,mz;
 +    real    factor=M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
 +    real    ets2,struct2,vfactor,ets2vf;
 +    real    d1,d2,energy=0;
 +    real    by,bz;
 +    real    virxx=0,virxy=0,virxz=0,viryy=0,viryz=0,virzz=0;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    pme_work_t *work;
 +    real    *mhx,*mhy,*mhz,*m2,*denom,*tmp1,*eterm,*m2inv;
 +    real    mhxk,mhyk,mhzk,m2k;
 +    real    corner_fac;
 +    ivec    complex_order;
 +    ivec    local_ndata,local_offset,local_size;
 +    real    elfac;
 +
 +    elfac = ONE_4PI_EPS0/pme->epsilon_r;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_complex_limits(pme->pfft_setupA,
 +                                      complex_order,
 +                                      local_ndata,
 +                                      local_offset,
 +                                      local_size);
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    maxkx = (nx+1)/2;
 +    maxky = (ny+1)/2;
 +    maxkz = nz/2+1;
 +
 +    work = &pme->work[thread];
 +    mhx   = work->mhx;
 +    mhy   = work->mhy;
 +    mhz   = work->mhz;
 +    m2    = work->m2;
 +    denom = work->denom;
 +    tmp1  = work->tmp1;
 +    eterm = work->eterm;
 +    m2inv = work->m2inv;
 +
 +    iyz0 = local_ndata[YY]*local_ndata[ZZ]* thread   /nthread;
 +    iyz1 = local_ndata[YY]*local_ndata[ZZ]*(thread+1)/nthread;
 +
 +    for(iyz=iyz0; iyz<iyz1; iyz++)
 +    {
 +        iy = iyz/local_ndata[ZZ];
 +        iz = iyz - iy*local_ndata[ZZ];
 +
 +        ky = iy + local_offset[YY];
 +
 +        if (ky < maxky)
 +        {
 +            my = ky;
 +        }
 +        else
 +        {
 +            my = (ky - ny);
 +        }
 +
 +        by = M_PI*vol*pme->bsp_mod[YY][ky];
 +
 +        kz = iz + local_offset[ZZ];
 +
 +        mz = kz;
 +
 +        bz = pme->bsp_mod[ZZ][kz];
 +
 +        /* 0.5 correction for corner points */
 +        corner_fac = 1;
 +        if (kz == 0 || kz == (nz+1)/2)
 +        {
 +            corner_fac = 0.5;
 +        }
 +
 +        p0 = grid + iy*local_size[ZZ]*local_size[XX] + iz*local_size[XX];
 +
 +        /* We should skip the k-space point (0,0,0) */
 +        if (local_offset[XX] > 0 || ky > 0 || kz > 0)
 +        {
 +            kxstart = local_offset[XX];
 +        }
 +        else
 +        {
 +            kxstart = local_offset[XX] + 1;
 +            p0++;
 +        }
 +        kxend = local_offset[XX] + local_ndata[XX];
 +
 +        if (bEnerVir)
 +        {
 +            /* More expensive inner loop, especially because of the storage
 +             * of the mh elements in array's.
 +             * Because x is the minor grid index, all mh elements
 +             * depend on kx for triclinic unit cells.
 +             */
 +
 +                /* Two explicit loops to avoid a conditional inside the loop */
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                m2inv[kx] = 1.0/m2[kx];
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +
 +                struct2 = 2.0*(d1*d1+d2*d2);
 +
 +                tmp1[kx] = eterm[kx]*struct2;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                ets2     = corner_fac*tmp1[kx];
 +                vfactor  = (factor*m2[kx] + 1.0)*2.0*m2inv[kx];
 +                energy  += ets2;
 +
 +                ets2vf   = ets2*vfactor;
 +                virxx   += ets2vf*mhx[kx]*mhx[kx] - ets2;
 +                virxy   += ets2vf*mhx[kx]*mhy[kx];
 +                virxz   += ets2vf*mhx[kx]*mhz[kx];
 +                viryy   += ets2vf*mhy[kx]*mhy[kx] - ets2;
 +                viryz   += ets2vf*mhy[kx]*mhz[kx];
 +                virzz   += ets2vf*mhz[kx]*mhz[kx] - ets2;
 +            }
 +        }
 +        else
 +        {
 +            /* We don't need to calculate the energy and the virial.
 +             * In this case the triclinic overhead is small.
 +             */
 +
 +            /* Two explicit loops to avoid a conditional inside the loop */
 +
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +            }
 +        }
 +    }
 +
 +    if (bEnerVir)
 +    {
 +        /* Update virial with local values.
 +         * The virial is symmetric by definition.
 +         * this virial seems ok for isotropic scaling, but I'm
 +         * experiencing problems on semiisotropic membranes.
 +         * IS THAT COMMENT STILL VALID??? (DvdS, 2001/02/07).
 +         */
 +        work->vir[XX][XX] = 0.25*virxx;
 +        work->vir[YY][YY] = 0.25*viryy;
 +        work->vir[ZZ][ZZ] = 0.25*virzz;
 +        work->vir[XX][YY] = work->vir[YY][XX] = 0.25*virxy;
 +        work->vir[XX][ZZ] = work->vir[ZZ][XX] = 0.25*virxz;
 +        work->vir[YY][ZZ] = work->vir[ZZ][YY] = 0.25*viryz;
 +
 +        /* This energy should be corrected for a charged system */
 +        work->energy = 0.5*energy;
 +    }
 +
 +    /* Return the loop count */
 +    return local_ndata[YY]*local_ndata[XX];
 +}
 +
 +static void get_pme_ener_vir(const gmx_pme_t pme,int nthread,
 +                             real *mesh_energy,matrix vir)
 +{
 +    /* This function sums output over threads
 +     * and should therefore only be called after thread synchronization.
 +     */
 +    int thread;
 +
 +    *mesh_energy = pme->work[0].energy;
 +    copy_mat(pme->work[0].vir,vir);
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        *mesh_energy += pme->work[thread].energy;
 +        m_add(vir,pme->work[thread].vir,vir);
 +    }
 +}
 +
 +#define DO_FSPLINE(order)                      \
 +for(ithx=0; (ithx<order); ithx++)              \
 +{                                              \
 +    index_x = (i0+ithx)*pny*pnz;               \
 +    tx      = thx[ithx];                       \
 +    dx      = dthx[ithx];                      \
 +                                               \
 +    for(ithy=0; (ithy<order); ithy++)          \
 +    {                                          \
 +        index_xy = index_x+(j0+ithy)*pnz;      \
 +        ty       = thy[ithy];                  \
 +        dy       = dthy[ithy];                 \
 +        fxy1     = fz1 = 0;                    \
 +                                               \
 +        for(ithz=0; (ithz<order); ithz++)      \
 +        {                                      \
 +            gval  = grid[index_xy+(k0+ithz)];  \
 +            fxy1 += thz[ithz]*gval;            \
 +            fz1  += dthz[ithz]*gval;           \
 +        }                                      \
 +        fx += dx*ty*fxy1;                      \
 +        fy += tx*dy*fxy1;                      \
 +        fz += tx*ty*fz1;                       \
 +    }                                          \
 +}
 +
 +
 +static void gather_f_bsplines(gmx_pme_t pme,real *grid,
 +                              gmx_bool bClearF,pme_atomcomm_t *atc,
 +                              splinedata_t *spline,
 +                              real scale)
 +{
 +    /* sum forces for local particles */
 +    int     nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int     nx,ny,nz,pnx,pny,pnz;
 +    int *   idxptr;
 +    real    tx,ty,dx,dy,qn;
 +    real    fx,fy,fz,gval;
 +    real    fxy1,fz1;
 +    real    *thx,*thy,*thz,*dthx,*dthy,*dthz;
 +    int     norder;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    int     order;
 +
 +    pme_spline_work_t *work;
 +
 +    work = pme->spline_work;
 +
 +    order = pme->pme_order;
 +    thx   = spline->theta[XX];
 +    thy   = spline->theta[YY];
 +    thz   = spline->theta[ZZ];
 +    dthx  = spline->dtheta[XX];
 +    dthy  = spline->dtheta[YY];
 +    dthz  = spline->dtheta[ZZ];
 +    nx    = pme->nkx;
 +    ny    = pme->nky;
 +    nz    = pme->nkz;
 +    pnx   = pme->pmegrid_nx;
 +    pny   = pme->pmegrid_ny;
 +    pnz   = pme->pmegrid_nz;
 +
 +    rxx   = pme->recipbox[XX][XX];
 +    ryx   = pme->recipbox[YY][XX];
 +    ryy   = pme->recipbox[YY][YY];
 +    rzx   = pme->recipbox[ZZ][XX];
 +    rzy   = pme->recipbox[ZZ][YY];
 +    rzz   = pme->recipbox[ZZ][ZZ];
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = scale*atc->q[n];
 +
 +        if (bClearF)
 +        {
 +            atc->f[n][XX] = 0;
 +            atc->f[n][YY] = 0;
 +            atc->f[n][ZZ] = 0;
 +        }
 +        if (qn != 0)
 +        {
 +            fx     = 0;
 +            fy     = 0;
 +            fz     = 0;
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next six statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +            dthx = spline->dtheta[XX] + norder;
 +            dthy = spline->dtheta[YY] + norder;
 +            dthz = spline->dtheta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_GATHER_F_SSE_ORDER4
 +#else
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_FSPLINE(order);
 +                break;
 +            }
 +
 +            atc->f[n][XX] += -qn*( fx*nx*rxx );
 +            atc->f[n][YY] += -qn*( fx*nx*ryx + fy*ny*ryy );
 +            atc->f[n][ZZ] += -qn*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
 +        }
 +    }
 +    /* Since the energy and not forces are interpolated
 +     * the net force might not be exactly zero.
 +     * This can be solved by also interpolating F, but
 +     * that comes at a cost.
 +     * A better hack is to remove the net force every
 +     * step, but that must be done at a higher level
 +     * since this routine doesn't see all atoms if running
 +     * in parallel. Don't know how important it is?  EL 990726
 +     */
 +}
 +
 +
 +static real gather_energy_bsplines(gmx_pme_t pme,real *grid,
 +                                   pme_atomcomm_t *atc)
 +{
 +    splinedata_t *spline;
 +    int     n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int *   idxptr;
 +    real    energy,pot,tx,ty,qn,gval;
 +    real    *thx,*thy,*thz;
 +    int     norder;
 +    int     order;
 +
 +    spline = &atc->spline[0];
 +
 +    order = pme->pme_order;
 +
 +    energy = 0;
 +    for(n=0; (n<atc->n); n++) {
 +        qn      = atc->q[n];
 +
 +        if (qn != 0) {
 +            idxptr = atc->idx[n];
 +            norder = n*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next three statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +
 +            pot = 0;
 +            for(ithx=0; (ithx<order); ithx++)
 +            {
 +                index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
 +                tx      = thx[ithx];
 +
 +                for(ithy=0; (ithy<order); ithy++)
 +                {
 +                    index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
 +                    ty       = thy[ithy];
 +
 +                    for(ithz=0; (ithz<order); ithz++)
 +                    {
 +                        gval  = grid[index_xy+(k0+ithz)];
 +                        pot  += tx*ty*thz[ithz]*gval;
 +                    }
 +
 +                }
 +            }
 +
 +            energy += pot*qn;
 +        }
 +    }
 +
 +    return energy;
 +}
 +
 +/* Macro to force loop unrolling by fixing order.
 + * This gives a significant performance gain.
 + */
 +#define CALC_SPLINE(order)                     \
 +{                                              \
 +    int j,k,l;                                 \
 +    real dr,div;                               \
 +    real data[PME_ORDER_MAX];                  \
 +    real ddata[PME_ORDER_MAX];                 \
 +                                               \
 +    for(j=0; (j<DIM); j++)                     \
 +    {                                          \
 +        dr  = xptr[j];                         \
 +                                               \
 +        /* dr is relative offset from lower cell limit */ \
 +        data[order-1] = 0;                     \
 +        data[1] = dr;                          \
 +        data[0] = 1 - dr;                      \
 +                                               \
 +        for(k=3; (k<order); k++)               \
 +        {                                      \
 +            div = 1.0/(k - 1.0);               \
 +            data[k-1] = div*dr*data[k-2];      \
 +            for(l=1; (l<(k-1)); l++)           \
 +            {                                  \
 +                data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)* \
 +                                   data[k-l-1]);                \
 +            }                                  \
 +            data[0] = div*(1-dr)*data[0];      \
 +        }                                      \
 +        /* differentiate */                    \
 +        ddata[0] = -data[0];                   \
 +        for(k=1; (k<order); k++)               \
 +        {                                      \
 +            ddata[k] = data[k-1] - data[k];    \
 +        }                                      \
 +                                               \
 +        div = 1.0/(order - 1);                 \
 +        data[order-1] = div*dr*data[order-2];  \
 +        for(l=1; (l<(order-1)); l++)           \
 +        {                                      \
 +            data[order-l-1] = div*((dr+l)*data[order-l-2]+    \
 +                               (order-l-dr)*data[order-l-1]); \
 +        }                                      \
 +        data[0] = div*(1 - dr)*data[0];        \
 +                                               \
 +        for(k=0; k<order; k++)                 \
 +        {                                      \
 +            theta[j][i*order+k]  = data[k];    \
 +            dtheta[j][i*order+k] = ddata[k];   \
 +        }                                      \
 +    }                                          \
 +}
 +
 +void make_bsplines(splinevec theta,splinevec dtheta,int order,
 +                   rvec fractx[],int nr,int ind[],real charge[],
 +                   gmx_bool bFreeEnergy)
 +{
 +    /* construct splines for local atoms */
 +    int  i,ii;
 +    real *xptr;
 +
 +    for(i=0; i<nr; i++)
 +    {
 +        /* With free energy we do not use the charge check.
 +         * In most cases this will be more efficient than calling make_bsplines
 +         * twice, since usually more than half the particles have charges.
 +         */
 +        ii = ind[i];
 +        if (bFreeEnergy || charge[ii] != 0.0) {
 +            xptr = fractx[ii];
 +            switch(order) {
 +            case 4:  CALC_SPLINE(4);     break;
 +            case 5:  CALC_SPLINE(5);     break;
 +            default: CALC_SPLINE(order); break;
 +            }
 +        }
 +    }
 +}
 +
 +
 +void make_dft_mod(real *mod,real *data,int ndata)
 +{
 +  int i,j;
 +  real sc,ss,arg;
 +
 +  for(i=0;i<ndata;i++) {
 +    sc=ss=0;
 +    for(j=0;j<ndata;j++) {
 +      arg=(2.0*M_PI*i*j)/ndata;
 +      sc+=data[j]*cos(arg);
 +      ss+=data[j]*sin(arg);
 +    }
 +    mod[i]=sc*sc+ss*ss;
 +  }
 +  for(i=0;i<ndata;i++)
 +    if(mod[i]<1e-7)
 +      mod[i]=(mod[i-1]+mod[i+1])*0.5;
 +}
 +
 +
 +static void make_bspline_moduli(splinevec bsp_mod,
 +                                int nx,int ny,int nz,int order)
 +{
 +  int nmax=max(nx,max(ny,nz));
 +  real *data,*ddata,*bsp_data;
 +  int i,k,l;
 +  real div;
 +
 +  snew(data,order);
 +  snew(ddata,order);
 +  snew(bsp_data,nmax);
 +
 +  data[order-1]=0;
 +  data[1]=0;
 +  data[0]=1;
 +
 +  for(k=3;k<order;k++) {
 +    div=1.0/(k-1.0);
 +    data[k-1]=0;
 +    for(l=1;l<(k-1);l++)
 +      data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
 +    data[0]=div*data[0];
 +  }
 +  /* differentiate */
 +  ddata[0]=-data[0];
 +  for(k=1;k<order;k++)
 +    ddata[k]=data[k-1]-data[k];
 +  div=1.0/(order-1);
 +  data[order-1]=0;
 +  for(l=1;l<(order-1);l++)
 +    data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
 +  data[0]=div*data[0];
 +
 +  for(i=0;i<nmax;i++)
 +    bsp_data[i]=0;
 +  for(i=1;i<=order;i++)
 +    bsp_data[i]=data[i-1];
 +
 +  make_dft_mod(bsp_mod[XX],bsp_data,nx);
 +  make_dft_mod(bsp_mod[YY],bsp_data,ny);
 +  make_dft_mod(bsp_mod[ZZ],bsp_data,nz);
 +
 +  sfree(data);
 +  sfree(ddata);
 +  sfree(bsp_data);
 +}
 +
 +
 +/* Return the P3M optimal influence function */
 +static double do_p3m_influence(double z, int order)
 +{
 +    double z2,z4;
 +
 +    z2 = z*z;
 +    z4 = z2*z2;
 +
 +    /* The formula and most constants can be found in:
 +     * Ballenegger et al., JCTC 8, 936 (2012)
 +     */
 +    switch(order)
 +    {
 +    case 2:
 +        return 1.0 - 2.0*z2/3.0;
 +        break;
 +    case 3:
 +        return 1.0 - z2 + 2.0*z4/15.0;
 +        break;
 +    case 4:
 +        return 1.0 - 4.0*z2/3.0 + 2.0*z4/5.0 + 4.0*z2*z4/315.0;
 +        break;
 +    case 5:
 +        return 1.0 - 5.0*z2/3.0 + 7.0*z4/9.0 - 17.0*z2*z4/189.0 + 2.0*z4*z4/2835.0;
 +        break;
 +    case 6:
 +        return 1.0 - 2.0*z2 + 19.0*z4/15.0 - 256.0*z2*z4/945.0 + 62.0*z4*z4/4725.0 + 4.0*z2*z4*z4/155925.0;
 +        break;
 +    case 7:
 +        return 1.0 - 7.0*z2/3.0 + 28.0*z4/15.0 - 16.0*z2*z4/27.0 + 26.0*z4*z4/405.0 - 2.0*z2*z4*z4/1485.0 + 4.0*z4*z4*z4/6081075.0;
 +    case 8:
 +        return 1.0 - 8.0*z2/3.0 + 116.0*z4/45.0 - 344.0*z2*z4/315.0 + 914.0*z4*z4/4725.0 - 248.0*z4*z4*z2/22275.0 + 21844.0*z4*z4*z4/212837625.0 - 8.0*z4*z4*z4*z2/638512875.0;
 +        break;
 +    }
 +
 +    return 0.0;
 +}
 +
 +/* Calculate the P3M B-spline moduli for one dimension */
 +static void make_p3m_bspline_moduli_dim(real *bsp_mod,int n,int order)
 +{
 +    double zarg,zai,sinzai,infl;
 +    int    maxk,i;
 +
 +    if (order > 8)
 +    {
 +        gmx_fatal(FARGS,"The current P3M code only supports orders up to 8");
 +    }
 +
 +    zarg = M_PI/n;
 +
 +    maxk = (n + 1)/2;
 +
 +    for(i=-maxk; i<0; i++)
 +    {
 +        zai    = zarg*i;
 +        sinzai = sin(zai);
 +        infl   = do_p3m_influence(sinzai,order);
 +        bsp_mod[n+i] = infl*infl*pow(sinzai/zai,-2.0*order);
 +    }
 +    bsp_mod[0] = 1.0;
 +    for(i=1; i<maxk; i++)
 +    {
 +        zai    = zarg*i;
 +        sinzai = sin(zai);
 +        infl   = do_p3m_influence(sinzai,order);
 +        bsp_mod[i] = infl*infl*pow(sinzai/zai,-2.0*order);
 +    }
 +}
 +
 +/* Calculate the P3M B-spline moduli */
 +static void make_p3m_bspline_moduli(splinevec bsp_mod,
 +                                    int nx,int ny,int nz,int order)
 +{
 +    make_p3m_bspline_moduli_dim(bsp_mod[XX],nx,order);
 +    make_p3m_bspline_moduli_dim(bsp_mod[YY],ny,order);
 +    make_p3m_bspline_moduli_dim(bsp_mod[ZZ],nz,order);
 +}
 +
 +
 +static void setup_coordinate_communication(pme_atomcomm_t *atc)
 +{
 +  int nslab,n,i;
 +  int fw,bw;
 +
 +  nslab = atc->nslab;
 +
 +  n = 0;
 +  for(i=1; i<=nslab/2; i++) {
 +    fw = (atc->nodeid + i) % nslab;
 +    bw = (atc->nodeid - i + nslab) % nslab;
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = fw;
 +      atc->node_src[n]  = bw;
 +      n++;
 +    }
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = bw;
 +      atc->node_src[n]  = fw;
 +      n++;
 +    }
 +  }
 +}
 +
 +int gmx_pme_destroy(FILE *log,gmx_pme_t *pmedata)
 +{
 +    int thread;
 +
 +    if(NULL != log)
 +    {
 +        fprintf(log,"Destroying PME data structures.\n");
 +    }
 +
 +    sfree((*pmedata)->nnx);
 +    sfree((*pmedata)->nny);
 +    sfree((*pmedata)->nnz);
 +
 +    pmegrids_destroy(&(*pmedata)->pmegridA);
 +
 +    sfree((*pmedata)->fftgridA);
 +    sfree((*pmedata)->cfftgridA);
 +    gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupA);
 +
 +    if ((*pmedata)->pmegridB.grid.grid != NULL)
 +    {
 +        pmegrids_destroy(&(*pmedata)->pmegridB);
 +        sfree((*pmedata)->fftgridB);
 +        sfree((*pmedata)->cfftgridB);
 +        gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupB);
 +    }
 +    for(thread=0; thread<(*pmedata)->nthread; thread++)
 +    {
 +        free_work(&(*pmedata)->work[thread]);
 +    }
 +    sfree((*pmedata)->work);
 +
 +    sfree(*pmedata);
 +    *pmedata = NULL;
 +
 +  return 0;
 +}
 +
 +static int mult_up(int n,int f)
 +{
 +    return ((n + f - 1)/f)*f;
 +}
 +
 +
 +static double pme_load_imbalance(gmx_pme_t pme)
 +{
 +    int    nma,nmi;
 +    double n1,n2,n3;
 +
 +    nma = pme->nnodes_major;
 +    nmi = pme->nnodes_minor;
 +
 +    n1 = mult_up(pme->nkx,nma)*mult_up(pme->nky,nmi)*pme->nkz;
 +    n2 = mult_up(pme->nkx,nma)*mult_up(pme->nkz,nmi)*pme->nky;
 +    n3 = mult_up(pme->nky,nma)*mult_up(pme->nkz,nmi)*pme->nkx;
 +
 +    /* pme_solve is roughly double the cost of an fft */
 +
 +    return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
 +}
 +
 +static void init_atomcomm(gmx_pme_t pme,pme_atomcomm_t *atc, t_commrec *cr,
 +                          int dimind,gmx_bool bSpread)
 +{
 +    int nk,k,s,thread;
 +
 +    atc->dimind = dimind;
 +    atc->nslab  = 1;
 +    atc->nodeid = 0;
 +    atc->pd_nalloc = 0;
 +#ifdef GMX_MPI
 +    if (pme->nnodes > 1)
 +    {
 +        atc->mpi_comm = pme->mpi_comm_d[dimind];
 +        MPI_Comm_size(atc->mpi_comm,&atc->nslab);
 +        MPI_Comm_rank(atc->mpi_comm,&atc->nodeid);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"For PME atom communication in dimind %d: nslab %d rank %d\n",atc->dimind,atc->nslab,atc->nodeid);
 +    }
 +#endif
 +
 +    atc->bSpread   = bSpread;
 +    atc->pme_order = pme->pme_order;
 +
 +    if (atc->nslab > 1)
 +    {
 +        /* These three allocations are not required for particle decomp. */
 +        snew(atc->node_dest,atc->nslab);
 +        snew(atc->node_src,atc->nslab);
 +        setup_coordinate_communication(atc);
 +
 +        snew(atc->count_thread,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            snew(atc->count_thread[thread],atc->nslab);
 +        }
 +        atc->count = atc->count_thread[0];
 +        snew(atc->rcount,atc->nslab);
 +        snew(atc->buf_index,atc->nslab);
 +    }
 +
 +    atc->nthread = pme->nthread;
 +    if (atc->nthread > 1)
 +    {
 +        snew(atc->thread_plist,atc->nthread);
 +    }
 +    snew(atc->spline,atc->nthread);
 +    for(thread=0; thread<atc->nthread; thread++)
 +    {
 +        if (atc->nthread > 1)
 +        {
 +            snew(atc->thread_plist[thread].n,atc->nthread+2*GMX_CACHE_SEP);
 +            atc->thread_plist[thread].n += GMX_CACHE_SEP;
 +        }
 +    }
 +}
 +
 +static void
 +init_overlap_comm(pme_overlap_t *  ol,
 +                  int              norder,
 +#ifdef GMX_MPI
 +                  MPI_Comm         comm,
 +#endif
 +                  int              nnodes,
 +                  int              nodeid,
 +                  int              ndata,
 +                  int              commplainsize)
 +{
 +    int lbnd,rbnd,maxlr,b,i;
 +    int exten;
 +    int nn,nk;
 +    pme_grid_comm_t *pgc;
 +    gmx_bool bCont;
 +    int fft_start,fft_end,send_index1,recv_index1;
 +
 +#ifdef GMX_MPI
 +    ol->mpi_comm = comm;
 +#endif
 +
 +    ol->nnodes = nnodes;
 +    ol->nodeid = nodeid;
 +
 +    /* Linear translation of the PME grid wo'nt affect reciprocal space
 +     * calculations, so to optimize we only interpolate "upwards",
 +     * which also means we only have to consider overlap in one direction.
 +     * I.e., particles on this node might also be spread to grid indices
 +     * that belong to higher nodes (modulo nnodes)
 +     */
 +
 +    snew(ol->s2g0,ol->nnodes+1);
 +    snew(ol->s2g1,ol->nnodes);
 +    if (debug) { fprintf(debug,"PME slab boundaries:"); }
 +    for(i=0; i<nnodes; i++)
 +    {
 +        /* s2g0 the local interpolation grid start.
 +         * s2g1 the local interpolation grid end.
 +         * Because grid overlap communication only goes forward,
 +         * the grid the slabs for fft's should be rounded down.
 +         */
 +        ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
 +        ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"  %3d %3d",ol->s2g0[i],ol->s2g1[i]);
 +        }
 +    }
 +    ol->s2g0[nnodes] = ndata;
 +    if (debug) { fprintf(debug,"\n"); }
 +
 +    /* Determine with how many nodes we need to communicate the grid overlap */
 +    b = 0;
 +    do
 +    {
 +        b++;
 +        bCont = FALSE;
 +        for(i=0; i<nnodes; i++)
 +        {
 +            if ((i+b <  nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
 +                (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
 +            {
 +                bCont = TRUE;
 +            }
 +        }
 +    }
 +    while (bCont && b < nnodes);
 +    ol->noverlap_nodes = b - 1;
 +
 +    snew(ol->send_id,ol->noverlap_nodes);
 +    snew(ol->recv_id,ol->noverlap_nodes);
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
 +        ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
 +    }
 +    snew(ol->comm_data, ol->noverlap_nodes);
 +
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        pgc = &ol->comm_data[b];
 +        /* Send */
 +        fft_start        = ol->s2g0[ol->send_id[b]];
 +        fft_end          = ol->s2g0[ol->send_id[b]+1];
 +        if (ol->send_id[b] < nodeid)
 +        {
 +            fft_start += ndata;
 +            fft_end   += ndata;
 +        }
 +        send_index1      = ol->s2g1[nodeid];
 +        send_index1      = min(send_index1,fft_end);
 +        pgc->send_index0 = fft_start;
 +        pgc->send_nindex = max(0,send_index1 - pgc->send_index0);
 +
 +        /* We always start receiving to the first index of our slab */
 +        fft_start        = ol->s2g0[ol->nodeid];
 +        fft_end          = ol->s2g0[ol->nodeid+1];
 +        recv_index1      = ol->s2g1[ol->recv_id[b]];
 +        if (ol->recv_id[b] > nodeid)
 +        {
 +            recv_index1 -= ndata;
 +        }
 +        recv_index1      = min(recv_index1,fft_end);
 +        pgc->recv_index0 = fft_start;
 +        pgc->recv_nindex = max(0,recv_index1 - pgc->recv_index0);
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    snew(ol->sendbuf,norder*commplainsize);
 +    snew(ol->recvbuf,norder*commplainsize);
 +}
 +
 +static void
 +make_gridindex5_to_localindex(int n,int local_start,int local_range,
 +                              int **global_to_local,
 +                              real **fraction_shift)
 +{
 +    int i;
 +    int * gtl;
 +    real * fsh;
 +
 +    snew(gtl,5*n);
 +    snew(fsh,5*n);
 +    for(i=0; (i<5*n); i++)
 +    {
 +        /* Determine the global to local grid index */
 +        gtl[i] = (i - local_start + n) % n;
 +        /* For coordinates that fall within the local grid the fraction
 +         * is correct, we don't need to shift it.
 +         */
 +        fsh[i] = 0;
 +        if (local_range < n)
 +        {
 +            /* Due to rounding issues i could be 1 beyond the lower or
 +             * upper boundary of the local grid. Correct the index for this.
 +             * If we shift the index, we need to shift the fraction by
 +             * the same amount in the other direction to not affect
 +             * the weights.
 +             * Note that due to this shifting the weights at the end of
 +             * the spline might change, but that will only involve values
 +             * between zero and values close to the precision of a real,
 +             * which is anyhow the accuracy of the whole mesh calculation.
 +             */
 +            /* With local_range=0 we should not change i=local_start */
 +            if (i % n != local_start)
 +            {
 +                if (gtl[i] == n-1)
 +                {
 +                    gtl[i] = 0;
 +                    fsh[i] = -1;
 +                }
 +                else if (gtl[i] == local_range)
 +                {
 +                    gtl[i] = local_range - 1;
 +                    fsh[i] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    *global_to_local = gtl;
 +    *fraction_shift  = fsh;
 +}
 +
 +static pme_spline_work_t *make_pme_spline_work(int order)
 +{
 +    pme_spline_work_t *work;
 +
 +#ifdef PME_SSE
 +    float  tmp[8];
 +    __m128 zero_SSE;
 +    int    of,i;
 +
 +    snew_aligned(work,1,16);
 +
 +    zero_SSE = _mm_setzero_ps();
 +
 +    /* Generate bit masks to mask out the unused grid entries,
 +     * as we only operate on order of the 8 grid entries that are
 +     * load into 2 SSE float registers.
 +     */
 +    for(of=0; of<8-(order-1); of++)
 +    {
 +        for(i=0; i<8; i++)
 +        {
 +            tmp[i] = (i >= of && i < of+order ? 1 : 0);
 +        }
 +        work->mask_SSE0[of] = _mm_loadu_ps(tmp);
 +        work->mask_SSE1[of] = _mm_loadu_ps(tmp+4);
 +        work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of],zero_SSE);
 +        work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of],zero_SSE);
 +    }
 +#else
 +    work = NULL;
 +#endif
 +
 +    return work;
 +}
 +
 +static void
 +gmx_pme_check_grid_restrictions(FILE *fplog,char dim,int nnodes,int *nk)
 +{
 +    int nk_new;
 +
 +    if (*nk % nnodes != 0)
 +    {
 +        nk_new = nnodes*(*nk/nnodes + 1);
 +
 +        if (2*nk_new >= 3*(*nk))
 +        {
 +            gmx_fatal(FARGS,"The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
 +                      dim,*nk,dim,nnodes,dim);
 +        }
 +
 +        if (fplog != NULL)
 +        {
 +            fprintf(fplog,"\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
 +                    dim,*nk,dim,nnodes,dim,nk_new,dim);
 +        }
 +
 +        *nk = nk_new;
 +    }
 +}
 +
 +int gmx_pme_init(gmx_pme_t *         pmedata,
 +                 t_commrec *         cr,
 +                 int                 nnodes_major,
 +                 int                 nnodes_minor,
 +                 t_inputrec *        ir,
 +                 int                 homenr,
 +                 gmx_bool            bFreeEnergy,
 +                 gmx_bool            bReproducible,
 +                 int                 nthread)
 +{
 +    gmx_pme_t pme=NULL;
 +
 +    pme_atomcomm_t *atc;
 +    ivec ndata;
 +
 +    if (debug)
 +        fprintf(debug,"Creating PME data structures.\n");
 +    snew(pme,1);
 +
 +    pme->redist_init         = FALSE;
 +    pme->sum_qgrid_tmp       = NULL;
 +    pme->sum_qgrid_dd_tmp    = NULL;
 +    pme->buf_nalloc          = 0;
 +    pme->redist_buf_nalloc   = 0;
 +
 +    pme->nnodes              = 1;
 +    pme->bPPnode             = TRUE;
 +
 +    pme->nnodes_major        = nnodes_major;
 +    pme->nnodes_minor        = nnodes_minor;
 +
 +#ifdef GMX_MPI
 +    if (nnodes_major*nnodes_minor > 1)
 +    {
 +        pme->mpi_comm = cr->mpi_comm_mygroup;
 +
 +        MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
 +        MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
 +        if (pme->nnodes != nnodes_major*nnodes_minor)
 +        {
 +            gmx_incons("PME node count mismatch");
 +        }
 +    }
 +    else
 +    {
 +        pme->mpi_comm = MPI_COMM_NULL;
 +    }
 +#endif
 +
 +    if (pme->nnodes == 1)
 +    {
++#ifdef GMX_MPI
++        pme->mpi_comm_d[0] = MPI_COMM_NULL;
++        pme->mpi_comm_d[1] = MPI_COMM_NULL;
++#endif
 +        pme->ndecompdim = 0;
 +        pme->nodeid_major = 0;
 +        pme->nodeid_minor = 0;
 +#ifdef GMX_MPI
 +        pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +    }
 +    else
 +    {
 +        if (nnodes_minor == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = pme->mpi_comm;
 +            pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = pme->nodeid;
 +            pme->nodeid_minor = 0;
 +
 +        }
 +        else if (nnodes_major == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = MPI_COMM_NULL;
 +            pme->mpi_comm_d[1] = pme->mpi_comm;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = 0;
 +            pme->nodeid_minor = pme->nodeid;
 +        }
 +        else
 +        {
 +            if (pme->nnodes % nnodes_major != 0)
 +            {
 +                gmx_incons("For 2D PME decomposition, #PME nodes must be divisible by the number of nodes in the major dimension");
 +            }
 +            pme->ndecompdim = 2;
 +
 +#ifdef GMX_MPI
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid % nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[0]);  /* My communicator along major dimension */
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid/nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[1]);  /* My communicator along minor dimension */
 +
 +            MPI_Comm_rank(pme->mpi_comm_d[0],&pme->nodeid_major);
 +            MPI_Comm_size(pme->mpi_comm_d[0],&pme->nnodes_major);
 +            MPI_Comm_rank(pme->mpi_comm_d[1],&pme->nodeid_minor);
 +            MPI_Comm_size(pme->mpi_comm_d[1],&pme->nnodes_minor);
 +#endif
 +        }
 +        pme->bPPnode = (cr->duty & DUTY_PP);
 +    }
 +
 +    pme->nthread = nthread;
 +
 +    if (ir->ePBC == epbcSCREW)
 +    {
 +        gmx_fatal(FARGS,"pme does not (yet) work with pbc = screw");
 +    }
 +
 +    pme->bFEP        = ((ir->efep != efepNO) && bFreeEnergy);
 +    pme->nkx         = ir->nkx;
 +    pme->nky         = ir->nky;
 +    pme->nkz         = ir->nkz;
 +    pme->bP3M        = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != NULL);
 +    pme->pme_order   = ir->pme_order;
 +    pme->epsilon_r   = ir->epsilon_r;
 +
 +    if (pme->pme_order > PME_ORDER_MAX)
 +    {
 +        gmx_fatal(FARGS,"pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
 +                  pme->pme_order,PME_ORDER_MAX);
 +    }
 +
 +    /* Currently pme.c supports only the fft5d FFT code.
 +     * Therefore the grid always needs to be divisible by nnodes.
 +     * When the old 1D code is also supported again, change this check.
 +     *
 +     * This check should be done before calling gmx_pme_init
 +     * and fplog should be passed iso stderr.
 +     *
 +    if (pme->ndecompdim >= 2)
 +    */
 +    if (pme->ndecompdim >= 1)
 +    {
 +        /*
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'x',nnodes_major,&pme->nkx);
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'y',nnodes_minor,&pme->nky);
 +        */
 +    }
 +
 +    if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
 +        pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
 +        pme->nkz <= pme->pme_order)
 +    {
 +        gmx_fatal(FARGS,"The pme grid dimensions need to be larger than pme_order (%d) and in parallel larger than 2*pme_ordern for x and/or y",pme->pme_order);
 +    }
 +
 +    if (pme->nnodes > 1) {
 +        double imbal;
 +
 +#ifdef GMX_MPI
 +        MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
 +        MPI_Type_commit(&(pme->rvec_mpi));
 +#endif
 +
 +        /* Note that the charge spreading and force gathering, which usually
 +         * takes about the same amount of time as FFT+solve_pme,
 +         * is always fully load balanced
 +         * (unless the charge distribution is inhomogeneous).
 +         */
 +
 +        imbal = pme_load_imbalance(pme);
 +        if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
 +        {
 +            fprintf(stderr,
 +                    "\n"
 +                    "NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
 +                    "      For optimal PME load balancing\n"
 +                    "      PME grid_x (%d) and grid_y (%d) should be divisible by #PME_nodes_x (%d)\n"
 +                    "      and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
 +                    "\n",
 +                    (int)((imbal-1)*100 + 0.5),
 +                    pme->nkx,pme->nky,pme->nnodes_major,
 +                    pme->nky,pme->nkz,pme->nnodes_minor);
 +        }
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    /* In sum_qgrid_dd x overlap is copied in place: take padding into account.
 +     * y is always copied through a buffer: we don't need padding in z,
 +     * but we do need the overlap in x because of the communication order.
 +     */
 +    init_overlap_comm(&pme->overlap[0],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[0],
 +#endif
 +                      pme->nnodes_major,pme->nodeid_major,
 +                      pme->nkx,
 +                      (div_round_up(pme->nky,pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
 +
 +    init_overlap_comm(&pme->overlap[1],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[1],
 +#endif
 +                      pme->nnodes_minor,pme->nodeid_minor,
 +                      pme->nky,
 +                      (div_round_up(pme->nkx,pme->nnodes_major)+pme->pme_order)*pme->nkz);
 +
 +    /* Check for a limitation of the (current) sum_fftgrid_dd code */
 +    if (pme->nthread > 1 &&
 +        (pme->overlap[0].noverlap_nodes > 1 ||
 +         pme->overlap[1].noverlap_nodes > 1))
 +    {
 +        gmx_fatal(FARGS,"With threads the number of grid lines per node along x and or y should be pme_order (%d) or more or exactly pme_order-1",pme->pme_order);
 +    }
 +
 +    snew(pme->bsp_mod[XX],pme->nkx);
 +    snew(pme->bsp_mod[YY],pme->nky);
 +    snew(pme->bsp_mod[ZZ],pme->nkz);
 +
 +    /* The required size of the interpolation grid, including overlap.
 +     * The allocated size (pmegrid_n?) might be slightly larger.
 +     */
 +    pme->pmegrid_nx = pme->overlap[0].s2g1[pme->nodeid_major] -
 +                      pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_ny = pme->overlap[1].s2g1[pme->nodeid_minor] -
 +                      pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_nz_base = pme->nkz;
 +    pme->pmegrid_nz = pme->pmegrid_nz_base + pme->pme_order - 1;
 +    set_grid_alignment(&pme->pmegrid_nz,pme->pme_order);
 +
 +    pme->pmegrid_start_ix = pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_start_iz = 0;
 +
 +    make_gridindex5_to_localindex(pme->nkx,
 +                                  pme->pmegrid_start_ix,
 +                                  pme->pmegrid_nx - (pme->pme_order-1),
 +                                  &pme->nnx,&pme->fshx);
 +    make_gridindex5_to_localindex(pme->nky,
 +                                  pme->pmegrid_start_iy,
 +                                  pme->pmegrid_ny - (pme->pme_order-1),
 +                                  &pme->nny,&pme->fshy);
 +    make_gridindex5_to_localindex(pme->nkz,
 +                                  pme->pmegrid_start_iz,
 +                                  pme->pmegrid_nz_base,
 +                                  &pme->nnz,&pme->fshz);
 +
 +    pmegrids_init(&pme->pmegridA,
 +                  pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                  pme->pmegrid_nz_base,
 +                  pme->pme_order,
 +                  pme->nthread,
 +                  pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1],
 +                  pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]);
 +
 +    pme->spline_work = make_pme_spline_work(pme->pme_order);
 +
 +    ndata[0] = pme->nkx;
 +    ndata[1] = pme->nky;
 +    ndata[2] = pme->nkz;
 +
 +    /* This routine will allocate the grid data to fit the FFTs */
 +    gmx_parallel_3dfft_init(&pme->pfft_setupA,ndata,
 +                            &pme->fftgridA,&pme->cfftgridA,
 +                            pme->mpi_comm_d,
 +                            pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                            bReproducible,pme->nthread);
 +
 +    if (bFreeEnergy)
 +    {
 +        pmegrids_init(&pme->pmegridB,
 +                      pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                      pme->pmegrid_nz_base,
 +                      pme->pme_order,
 +                      pme->nthread,
 +                      pme->nkx % pme->nnodes_major != 0,
 +                      pme->nky % pme->nnodes_minor != 0);
 +
 +        gmx_parallel_3dfft_init(&pme->pfft_setupB,ndata,
 +                                &pme->fftgridB,&pme->cfftgridB,
 +                                pme->mpi_comm_d,
 +                                pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                                bReproducible,pme->nthread);
 +    }
 +    else
 +    {
 +        pme->pmegridB.grid.grid = NULL;
 +        pme->fftgridB           = NULL;
 +        pme->cfftgridB          = NULL;
 +    }
 +
 +    if (!pme->bP3M)
 +    {
 +        /* Use plain SPME B-spline interpolation */
 +        make_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
 +    }
 +    else
 +    {
 +        /* Use the P3M grid-optimized influence function */
 +        make_p3m_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
 +    }
 +
 +    /* Use atc[0] for spreading */
 +    init_atomcomm(pme,&pme->atc[0],cr,nnodes_major > 1 ? 0 : 1,TRUE);
 +    if (pme->ndecompdim >= 2)
 +    {
 +        init_atomcomm(pme,&pme->atc[1],cr,1,FALSE);
 +    }
 +
 +    if (pme->nnodes == 1) {
 +        pme->atc[0].n = homenr;
 +        pme_realloc_atomcomm_things(&pme->atc[0]);
 +    }
 +
 +    {
 +        int thread;
 +
 +        /* Use fft5d, order after FFT is y major, z, x minor */
 +
 +        snew(pme->work,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            realloc_work(&pme->work[thread],pme->nkx);
 +        }
 +    }
 +
 +    *pmedata = pme;
 +
 +    return 0;
 +}
 +
 +
 +static void copy_local_grid(gmx_pme_t pme,
 +                            pmegrids_t *pmegrids,int thread,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_my,fft_mz;
 +    int  nsx,nsy,nsz;
 +    ivec nf;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  d;
 +    pmegrid_t *pmegrid;
 +    real *grid_th;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    nsx = pmegrid->n[XX];
 +    nsy = pmegrid->n[YY];
 +    nsz = pmegrid->n[ZZ];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
 +                    local_fft_ndata[d] - pmegrid->offset[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    /* Directly copy the non-overlapping parts of the local grids.
 +     * This also initializes the full grid.
 +     */
 +    grid_th = pmegrid->grid;
 +    for(x=0; x<nf[XX]; x++)
 +    {
 +        for(y=0; y<nf[YY]; y++)
 +        {
 +            i0  = ((offx + x)*fft_my + (offy + y))*fft_mz + offz;
 +            i0t = (x*nsy + y)*nsz;
 +            for(z=0; z<nf[ZZ]; z++)
 +            {
 +                fftgrid[i0+z] = grid_th[i0t+z];
 +            }
 +        }
 +    }
 +}
 +
 +static void print_sendbuf(gmx_pme_t pme,real *sendbuf)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int datasize,nind;
 +    int i,x,y,z,n;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    /* Major dimension */
 +    overlap = &pme->overlap[0];
 +
 +    nind   = overlap->comm_data[0].send_nindex;
 +
 +    for(y=0; y<local_fft_ndata[YY]; y++) {
 +         printf(" %2d",y);
 +    }
 +    printf("\n");
 +
 +    i = 0;
 +    for(x=0; x<nind; x++) {
 +        for(y=0; y<local_fft_ndata[YY]; y++) {
 +            n = 0;
 +            for(z=0; z<local_fft_ndata[ZZ]; z++) {
 +                if (sendbuf[i] != 0) n++;
 +                i++;
 +            }
 +            printf(" %2d",n);
 +        }
 +        printf("\n");
 +    }
 +}
 +
 +static void
 +reduce_threadgrid_overlap(gmx_pme_t pme,
 +                          const pmegrids_t *pmegrids,int thread,
 +                          real *fftgrid,real *commbuf_x,real *commbuf_y)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_nx,fft_ny,fft_nz;
 +    int  fft_my,fft_mz;
 +    int  buf_my=-1;
 +    int  nsx,nsy,nsz;
 +    ivec ne;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  sx,sy,sz,fx,fy,fz,tx1,ty1,tz1,ox,oy,oz;
 +    gmx_bool bClearBufX,bClearBufY,bClearBufXY,bClearBuf;
 +    gmx_bool bCommX,bCommY;
 +    int  d;
 +    int  thread_f;
 +    const pmegrid_t *pmegrid,*pmegrid_g,*pmegrid_f;
 +    const real *grid_th;
 +    real *commbuf=NULL;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_nx = local_fft_ndata[XX];
 +    fft_ny = local_fft_ndata[YY];
 +    fft_nz = local_fft_ndata[ZZ];
 +
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    /* This routine is called when all thread have finished spreading.
 +     * Here each thread sums grid contributions calculated by other threads
 +     * to the thread local grid volume.
 +     * To minimize the number of grid copying operations,
 +     * this routines sums immediately from the pmegrid to the fftgrid.
 +     */
 +
 +    /* Determine which part of the full node grid we should operate on,
 +     * this is our thread local part of the full grid.
 +     */
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
 +                    local_fft_ndata[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +
 +    bClearBufX  = TRUE;
 +    bClearBufY  = TRUE;
 +    bClearBufXY = TRUE;
 +
 +    /* Now loop over all the thread data blocks that contribute
 +     * to the grid region we (our thread) are operating on.
 +     */
 +    /* Note that ffy_nx/y is equal to the number of grid points
 +     * between the first point of our node grid and the one of the next node.
 +     */
 +    for(sx=0; sx>=-pmegrids->nthread_comm[XX]; sx--)
 +    {
 +        fx = pmegrid->ci[XX] + sx;
 +        ox = 0;
 +        bCommX = FALSE;
 +        if (fx < 0) {
 +            fx += pmegrids->nc[XX];
 +            ox -= fft_nx;
 +            bCommX = (pme->nnodes_major > 1);
 +        }
 +        pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
 +        ox += pmegrid_g->offset[XX];
 +        if (!bCommX)
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],ne[XX]);
 +        }
 +        else
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],pme->pme_order);
 +        }
 +
 +        for(sy=0; sy>=-pmegrids->nthread_comm[YY]; sy--)
 +        {
 +            fy = pmegrid->ci[YY] + sy;
 +            oy = 0;
 +            bCommY = FALSE;
 +            if (fy < 0) {
 +                fy += pmegrids->nc[YY];
 +                oy -= fft_ny;
 +                bCommY = (pme->nnodes_minor > 1);
 +            }
 +            pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
 +            oy += pmegrid_g->offset[YY];
 +            if (!bCommY)
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],ne[YY]);
 +            }
 +            else
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],pme->pme_order);
 +            }
 +
 +            for(sz=0; sz>=-pmegrids->nthread_comm[ZZ]; sz--)
 +            {
 +                fz = pmegrid->ci[ZZ] + sz;
 +                oz = 0;
 +                if (fz < 0)
 +                {
 +                    fz += pmegrids->nc[ZZ];
 +                    oz -= fft_nz;
 +                }
 +                pmegrid_g = &pmegrids->grid_th[fz];
 +                oz += pmegrid_g->offset[ZZ];
 +                tz1 = min(oz + pmegrid_g->n[ZZ],ne[ZZ]);
 +
 +                if (sx == 0 && sy == 0 && sz == 0)
 +                {
 +                    /* We have already added our local contribution
 +                     * before calling this routine, so skip it here.
 +                     */
 +                    continue;
 +                }
 +
 +                thread_f = (fx*pmegrids->nc[YY] + fy)*pmegrids->nc[ZZ] + fz;
 +
 +                pmegrid_f = &pmegrids->grid_th[thread_f];
 +
 +                grid_th = pmegrid_f->grid;
 +
 +                nsx = pmegrid_f->n[XX];
 +                nsy = pmegrid_f->n[YY];
 +                nsz = pmegrid_f->n[ZZ];
 +
 +#ifdef DEBUG_PME_REDUCE
 +                printf("n%d t%d add %d  %2d %2d %2d  %2d %2d %2d  %2d-%2d %2d-%2d, %2d-%2d %2d-%2d, %2d-%2d %2d-%2d\n",
 +                       pme->nodeid,thread,thread_f,
 +                       pme->pmegrid_start_ix,
 +                       pme->pmegrid_start_iy,
 +                       pme->pmegrid_start_iz,
 +                       sx,sy,sz,
 +                       offx-ox,tx1-ox,offx,tx1,
 +                       offy-oy,ty1-oy,offy,ty1,
 +                       offz-oz,tz1-oz,offz,tz1);
 +#endif
 +
 +                if (!(bCommX || bCommY))
 +                {
 +                    /* Copy from the thread local grid to the node grid */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*fft_my + y)*fft_mz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +                            for(z=offz; z<tz1; z++)
 +                            {
 +                                fftgrid[i0+z] += grid_th[i0t+z];
 +                            }
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    /* The order of this conditional decides
 +                     * where the corner volume gets stored with x+y decomp.
 +                     */
 +                    if (bCommY)
 +                    {
 +                        commbuf = commbuf_y;
 +                        buf_my  = ty1 - offy;
 +                        if (bCommX)
 +                        {
 +                            /* We index commbuf modulo the local grid size */
 +                            commbuf += buf_my*fft_nx*fft_nz;
 +
 +                            bClearBuf  = bClearBufXY;
 +                            bClearBufXY = FALSE;
 +                        }
 +                        else
 +                        {
 +                            bClearBuf  = bClearBufY;
 +                            bClearBufY = FALSE;
 +                        }
 +                    }
 +                    else
 +                    {
 +                        commbuf = commbuf_x;
 +                        buf_my  = fft_ny;
 +                        bClearBuf  = bClearBufX;
 +                        bClearBufX = FALSE;
 +                    }
 +
 +                    /* Copy to the communication buffer */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*buf_my + y)*fft_nz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +
 +                            if (bClearBuf)
 +                            {
 +                                /* First access of commbuf, initialize it */
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z]  = grid_th[i0t+z];
 +                                }
 +                            }
 +                            else
 +                            {
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z] += grid_th[i0t+z];
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void sum_fftgrid_dd(gmx_pme_t pme,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int  send_nindex;
 +    int  recv_index0,recv_nindex;
 +#ifdef GMX_MPI
 +    MPI_Status stat;
 +#endif
 +    int  ipulse,send_id,recv_id,datasize,gridsize,size_yx;
 +    real *sendptr,*recvptr;
 +    int  x,y,z,indg,indb;
 +
 +    /* Note that this routine is only used for forward communication.
 +     * Since the force gathering, unlike the charge spreading,
 +     * can be trivially parallelized over the particles,
 +     * the backwards process is much simpler and can use the "old"
 +     * communication setup.
 +     */
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    /* Currently supports only a single communication pulse */
 +
 +/* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_minor > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[1];
 +
 +        if (pme->nnodes_major > 1)
 +        {
 +             size_yx = pme->overlap[0].comm_data[0].send_nindex;
 +        }
 +        else
 +        {
 +            size_yx = 0;
 +        }
 +        datasize = (local_fft_ndata[XX]+size_yx)*local_fft_ndata[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        /*
 +        printf("node %d comm %2d x %2d x %2d\n",pme->nodeid,
 +               local_fft_ndata[XX]+size_yx,send_nindex,local_fft_ndata[ZZ]);
 +        printf("node %d send %f, %f\n",pme->nodeid,
 +               sendptr[0],sendptr[send_nindex*datasize-1]);
 +        */
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<local_fft_ndata[XX]; x++)
 +        {
 +            for(y=0; y<recv_nindex; y++)
 +            {
 +                indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
 +                indb = (x*recv_nindex        + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +        if (pme->nnodes_major > 1)
 +        {
 +            sendptr = pme->overlap[0].sendbuf;
 +            for(x=0; x<size_yx; x++)
 +            {
 +                for(y=0; y<recv_nindex; y++)
 +                {
 +                    indg = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                    indb = ((local_fft_ndata[XX] + x)*recv_nindex +y)*local_fft_ndata[ZZ];
 +                    for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                    {
 +                        sendptr[indg+z] += recvptr[indb+z];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_major > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[0];
 +
 +        datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
 +        gridsize = local_fft_size[YY] *local_fft_size[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"PME fftgrid comm %2d x %2d x %2d\n",
 +                   send_nindex,local_fft_ndata[YY],local_fft_ndata[ZZ]);
 +        }
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<recv_nindex; x++)
 +        {
 +            for(y=0; y<local_fft_ndata[YY]; y++)
 +            {
 +                indg = (x*local_fft_size[YY]  + y)*local_fft_size[ZZ];
 +                indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void spread_on_grid(gmx_pme_t pme,
 +                           pme_atomcomm_t *atc,pmegrids_t *grids,
 +                           gmx_bool bCalcSplines,gmx_bool bSpread,
 +                           real *fftgrid)
 +{
 +    int nthread,thread;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1,c2,c3,ct1a,ct1b,ct1c;
 +    static double cs1=0,cs2=0,cs3=0;
 +    static double cs1a[6]={0,0,0,0,0,0};
 +    static int cnt=0;
 +#endif
 +
 +    nthread = pme->nthread;
 +    assert(nthread>0);
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    if (bCalcSplines)
 +    {
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +        for(thread=0; thread<nthread; thread++)
 +        {
 +            int start,end;
 +
 +            start = atc->n* thread   /nthread;
 +            end   = atc->n*(thread+1)/nthread;
 +
 +            /* Compute fftgrid index for all atoms,
 +             * with help of some extra variables.
 +             */
 +            calc_interpolation_idx(pme,atc,start,end,thread);
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        splinedata_t *spline;
 +        pmegrid_t *grid;
 +
 +        /* make local bsplines  */
 +        if (grids == NULL || grids->nthread == 1)
 +        {
 +            spline = &atc->spline[0];
 +
 +            spline->n = atc->n;
 +
 +            grid = &grids->grid;
 +        }
 +        else
 +        {
 +            spline = &atc->spline[thread];
 +
 +            make_thread_local_ind(atc,thread,spline);
 +
 +            grid = &grids->grid_th[thread];
 +        }
 +
 +        if (bCalcSplines)
 +        {
 +            make_bsplines(spline->theta,spline->dtheta,pme->pme_order,
 +                          atc->fractx,spline->n,spline->ind,atc->q,pme->bFEP);
 +        }
 +
 +        if (bSpread)
 +        {
 +            /* put local atoms on grid. */
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_start();
 +#endif
 +            spread_q_bsplines_thread(grid,atc,spline,pme->spline_work);
 +
 +            if (grids->nthread > 1)
 +            {
 +                copy_local_grid(pme,grids,thread,fftgrid);
 +            }
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_end(ct1a);
 +            cs1a[thread] += (double)ct1a;
 +#endif
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_end(c2);
 +    cs2 += (double)c2;
 +#endif
 +
 +    if (bSpread && grids->nthread > 1)
 +    {
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(grids->nthread) schedule(static)
 +        for(thread=0; thread<grids->nthread; thread++)
 +        {
 +            reduce_threadgrid_overlap(pme,grids,thread,
 +                                      fftgrid,
 +                                      pme->overlap[0].sendbuf,
 +                                      pme->overlap[1].sendbuf);
 +#ifdef PRINT_PME_SENDBUF
 +            print_sendbuf(pme,pme->overlap[0].sendbuf);
 +#endif
 +        }
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_end(c3);
 +        cs3 += (double)c3;
 +#endif
 +
 +        if (pme->nnodes > 1)
 +        {
 +            /* Communicate the overlapping part of the fftgrid */
 +            sum_fftgrid_dd(pme,fftgrid);
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("idx %.2f spread %.2f red %.2f",
 +               cs1*1e-9,cs2*1e-9,cs3*1e-9);
 +#ifdef PME_TIME_SPREAD
 +        for(thread=0; thread<nthread; thread++)
 +            printf(" %.2f",cs1a[thread]*1e-9);
 +#endif
 +        printf("\n");
 +    }
 +#endif
 +}
 +
 +
 +static void dump_grid(FILE *fp,
 +                      int sx,int sy,int sz,int nx,int ny,int nz,
 +                      int my,int mz,const real *g)
 +{
 +    int x,y,z;
 +
 +    for(x=0; x<nx; x++)
 +    {
 +        for(y=0; y<ny; y++)
 +        {
 +            for(z=0; z<nz; z++)
 +            {
 +                fprintf(fp,"%2d %2d %2d %6.3f\n",
 +                        sx+x,sy+y,sz+z,g[(x*my + y)*mz + z]);
 +            }
 +        }
 +    }
 +}
 +
 +static void dump_local_fftgrid(gmx_pme_t pme,const real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    dump_grid(stderr,
 +              pme->pmegrid_start_ix,
 +              pme->pmegrid_start_iy,
 +              pme->pmegrid_start_iz,
 +              pme->pmegrid_nx-pme->pme_order+1,
 +              pme->pmegrid_ny-pme->pme_order+1,
 +              pme->pmegrid_nz-pme->pme_order+1,
 +              local_fft_size[YY],
 +              local_fft_size[ZZ],
 +              fftgrid);
 +}
 +
 +
 +void gmx_pme_calc_energy(gmx_pme_t pme,int n,rvec *x,real *q,real *V)
 +{
 +    pme_atomcomm_t *atc;
 +    pmegrids_t *grid;
 +
 +    if (pme->nnodes > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy called in parallel");
 +    }
 +    if (pme->bFEP > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy with free energy");
 +    }
 +
 +    atc = &pme->atc_energy;
 +    atc->nthread   = 1;
 +    if (atc->spline == NULL)
 +    {
 +        snew(atc->spline,atc->nthread);
 +    }
 +    atc->nslab     = 1;
 +    atc->bSpread   = TRUE;
 +    atc->pme_order = pme->pme_order;
 +    atc->n         = n;
 +    pme_realloc_atomcomm_things(atc);
 +    atc->x         = x;
 +    atc->q         = q;
 +
 +    /* We only use the A-charges grid */
 +    grid = &pme->pmegridA;
 +
 +    spread_on_grid(pme,atc,NULL,TRUE,FALSE,pme->fftgridA);
 +
 +    *V = gather_energy_bsplines(pme,grid->grid.grid,atc);
 +}
 +
 +
 +static void reset_pmeonly_counters(t_commrec *cr,gmx_wallcycle_t wcycle,
 +        t_nrnb *nrnb,t_inputrec *ir, gmx_large_int_t step_rel)
 +{
 +    /* Reset all the counters related to performance over the run */
 +    wallcycle_stop(wcycle,ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    init_nrnb(nrnb);
 +    ir->init_step += step_rel;
 +    ir->nsteps    -= step_rel;
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +
 +
 +int gmx_pmeonly(gmx_pme_t pme,
 +                t_commrec *cr,    t_nrnb *nrnb,
 +                gmx_wallcycle_t wcycle,
 +                real ewaldcoeff,  gmx_bool bGatherOnly,
 +                t_inputrec *ir)
 +{
 +    gmx_pme_pp_t pme_pp;
 +    int  natoms;
 +    matrix box;
 +    rvec *x_pp=NULL,*f_pp=NULL;
 +    real *chargeA=NULL,*chargeB=NULL;
 +    real lambda=0;
 +    int  maxshift_x=0,maxshift_y=0;
 +    real energy,dvdlambda;
 +    matrix vir;
 +    float cycles;
 +    int  count;
 +    gmx_bool bEnerVir;
 +    gmx_large_int_t step,step_rel;
 +
 +
 +    pme_pp = gmx_pme_pp_init(cr);
 +
 +    init_nrnb(nrnb);
 +
 +    count = 0;
 +    do /****** this is a quasi-loop over time steps! */
 +    {
 +        /* Domain decomposition */
 +        natoms = gmx_pme_recv_q_x(pme_pp,
 +                                  &chargeA,&chargeB,box,&x_pp,&f_pp,
 +                                  &maxshift_x,&maxshift_y,
 +                                  &pme->bFEP,&lambda,
 +                                  &bEnerVir,
 +                                  &step);
 +
 +        if (natoms == -1) {
 +            /* We should stop: break out of the loop */
 +            break;
 +        }
 +
 +        step_rel = step - ir->init_step;
 +
 +        if (count == 0)
 +            wallcycle_start(wcycle,ewcRUN);
 +
 +        wallcycle_start(wcycle,ewcPMEMESH);
 +
 +        dvdlambda = 0;
 +        clear_mat(vir);
 +        gmx_pme_do(pme,0,natoms,x_pp,f_pp,chargeA,chargeB,box,
 +                   cr,maxshift_x,maxshift_y,nrnb,wcycle,vir,ewaldcoeff,
 +                   &energy,lambda,&dvdlambda,
 +                   GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0));
 +
 +        cycles = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +        gmx_pme_send_force_vir_ener(pme_pp,
 +                                    f_pp,vir,energy,dvdlambda,
 +                                    cycles);
 +
 +        count++;
 +
 +        if (step_rel == wcycle_get_reset_counters(wcycle))
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_pmeonly_counters(cr,wcycle,nrnb,ir,step_rel);
 +            wcycle_set_reset_counters(wcycle, 0);
 +        }
 +
 +    } /***** end of quasi-loop, we stop with the break above */
 +    while (TRUE);
 +
 +    return 0;
 +}
 +
 +int gmx_pme_do(gmx_pme_t pme,
 +               int start,       int homenr,
 +               rvec x[],        rvec f[],
 +               real *chargeA,   real *chargeB,
 +               matrix box, t_commrec *cr,
 +               int  maxshift_x, int maxshift_y,
 +               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
 +               matrix vir,      real ewaldcoeff,
 +               real *energy,    real lambda,
 +               real *dvdlambda, int flags)
 +{
 +    int     q,d,i,j,ntot,npme;
 +    int     nx,ny,nz;
 +    int     n_d,local_ny;
 +    pme_atomcomm_t *atc=NULL;
 +    pmegrids_t *pmegrid=NULL;
 +    real    *grid=NULL;
 +    real    *ptr;
 +    rvec    *x_d,*f_d;
 +    real    *charge=NULL,*q_d;
 +    real    energy_AB[2];
 +    matrix  vir_AB[2];
 +    gmx_bool bClearF;
 +    gmx_parallel_3dfft_t pfft_setup;
 +    real *  fftgrid;
 +    t_complex * cfftgrid;
 +    int     thread;
 +    const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
 +    const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
 +
 +    assert(pme->nnodes > 0);
 +    assert(pme->nnodes == 1 || pme->ndecompdim > 0);
 +
 +    if (pme->nnodes > 1) {
 +        atc = &pme->atc[0];
 +        atc->npd = homenr;
 +        if (atc->npd > atc->pd_nalloc) {
 +            atc->pd_nalloc = over_alloc_dd(atc->npd);
 +            srenew(atc->pd,atc->pd_nalloc);
 +        }
 +        atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +    }
 +    else
 +    {
 +        /* This could be necessary for TPI */
 +        pme->atc[0].n = homenr;
 +    }
 +
 +    for(q=0; q<(pme->bFEP ? 2 : 1); q++) {
 +        if (q == 0) {
 +            pmegrid = &pme->pmegridA;
 +            fftgrid = pme->fftgridA;
 +            cfftgrid = pme->cfftgridA;
 +            pfft_setup = pme->pfft_setupA;
 +            charge = chargeA+start;
 +        } else {
 +            pmegrid = &pme->pmegridB;
 +            fftgrid = pme->fftgridB;
 +            cfftgrid = pme->cfftgridB;
 +            pfft_setup = pme->pfft_setupB;
 +            charge = chargeB+start;
 +        }
 +        grid = pmegrid->grid.grid;
 +        /* Unpack structure */
 +        if (debug) {
 +            fprintf(debug,"PME: nnodes = %d, nodeid = %d\n",
 +                    cr->nnodes,cr->nodeid);
 +            fprintf(debug,"Grid = %p\n",(void*)grid);
 +            if (grid == NULL)
 +                gmx_fatal(FARGS,"No grid!");
 +        }
 +        where();
 +
 +        m_inv_ur0(box,pme->recipbox);
 +
 +        if (pme->nnodes == 1) {
 +            atc = &pme->atc[0];
 +            if (DOMAINDECOMP(cr)) {
 +                atc->n = homenr;
 +                pme_realloc_atomcomm_things(atc);
 +            }
 +            atc->x = x;
 +            atc->q = charge;
 +            atc->f = f;
 +        } else {
 +            wallcycle_start(wcycle,ewcPME_REDISTXF);
 +            for(d=pme->ndecompdim-1; d>=0; d--)
 +            {
 +                if (d == pme->ndecompdim-1)
 +                {
 +                    n_d = homenr;
 +                    x_d = x + start;
 +                    q_d = charge;
 +                }
 +                else
 +                {
 +                    n_d = pme->atc[d+1].n;
 +                    x_d = atc->x;
 +                    q_d = atc->q;
 +                }
 +                atc = &pme->atc[d];
 +                atc->npd = n_d;
 +                if (atc->npd > atc->pd_nalloc) {
 +                    atc->pd_nalloc = over_alloc_dd(atc->npd);
 +                    srenew(atc->pd,atc->pd_nalloc);
 +                }
 +                atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +                pme_calc_pidx_wrapper(n_d,pme->recipbox,x_d,atc);
 +                where();
 +
 +                /* Redistribute x (only once) and qA or qB */
 +                if (DOMAINDECOMP(cr)) {
 +                    dd_pmeredist_x_q(pme, n_d, q==0, x_d, q_d, atc);
 +                } else {
 +                    pmeredist_pd(pme, TRUE, n_d, q==0, x_d, q_d, atc);
 +                }
 +            }
 +            where();
 +
 +            wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +        }
 +
 +        if (debug)
 +            fprintf(debug,"Node= %6d, pme local particles=%6d\n",
 +                    cr->nodeid,atc->n);
 +
 +        if (flags & GMX_PME_SPREAD_Q)
 +        {
 +            wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +
 +            /* Spread the charges on a grid */
 +            spread_on_grid(pme,&pme->atc[0],pmegrid,q==0,TRUE,fftgrid);
 +
 +            if (q == 0)
 +            {
 +                inc_nrnb(nrnb,eNR_WEIGHTS,DIM*atc->n);
 +            }
 +            inc_nrnb(nrnb,eNR_SPREADQBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
 +
 +            if (pme->nthread == 1)
 +            {
 +                wrap_periodic_pmegrid(pme,grid);
 +
 +                /* sum contributions to local grid from other nodes */
 +#ifdef GMX_MPI
 +                if (pme->nnodes > 1)
 +                {
 +                    gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_FORWARD);
 +                    where();
 +                }
 +#endif
 +
 +                copy_pmegrid_to_fftgrid(pme,grid,fftgrid);
 +            }
 +
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +
 +            /*
 +            dump_local_fftgrid(pme,fftgrid);
 +            exit(0);
 +            */
 +        }
 +
 +        /* Here we start a large thread parallel region */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            if (flags & GMX_PME_SOLVE)
 +            {
 +                int loop_count;
 +
 +                /* do 3d-fft */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,
 +                                           fftgrid,cfftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +                }
 +                where();
 +
 +                /* solve in k-space for our local cells */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_SOLVE);
 +                }
 +                loop_count =
 +                    solve_pme_yzx(pme,cfftgrid,ewaldcoeff,
 +                                  box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
 +                                  bCalcEnerVir,
 +                                  pme->nthread,thread);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_SOLVE);
 +                    where();
 +                    inc_nrnb(nrnb,eNR_SOLVEPME,loop_count);
 +                }
 +            }
 +
 +            if (bCalcF)
 +            {
 +                /* do 3d-invfft */
 +                if (thread == 0)
 +                {
 +                    where();
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,
 +                                           cfftgrid,fftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +
 +                    where();
 +
 +                    if (pme->nodeid == 0)
 +                    {
 +                        ntot = pme->nkx*pme->nky*pme->nkz;
 +                        npme  = ntot*log((real)ntot)/log(2.0);
 +                        inc_nrnb(nrnb,eNR_FFT,2*npme);
 +                    }
 +
 +                    wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +                }
 +
 +                copy_fftgrid_to_pmegrid(pme,fftgrid,grid,pme->nthread,thread);
 +            }
 +        }
 +        /* End of thread parallel section.
 +         * With MPI we have to synchronize here before gmx_sum_qgrid_dd.
 +         */
 +
 +        if (bCalcF)
 +        {
 +            /* distribute local grid to all nodes */
 +#ifdef GMX_MPI
 +            if (pme->nnodes > 1) {
 +                gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_BACKWARD);
 +            }
 +#endif
 +            where();
 +
 +            unwrap_periodic_pmegrid(pme,grid);
 +
 +            /* interpolate forces for our local atoms */
 +
 +            where();
 +
 +            /* If we are running without parallelization,
 +             * atc->f is the actual force array, not a buffer,
 +             * therefore we should not clear it.
 +             */
 +            bClearF = (q == 0 && PAR(cr));
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +            for(thread=0; thread<pme->nthread; thread++)
 +            {
 +                gather_f_bsplines(pme,grid,bClearF,atc,
 +                                  &atc->spline[thread],
 +                                  pme->bFEP ? (q==0 ? 1.0-lambda : lambda) : 1.0);
 +            }
 +
 +            where();
 +
 +            inc_nrnb(nrnb,eNR_GATHERFBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +        }
 +
 +        if (bCalcEnerVir)
 +        {
 +            /* This should only be called on the master thread
 +             * and after the threads have synchronized.
 +             */
 +            get_pme_ener_vir(pme,pme->nthread,&energy_AB[q],vir_AB[q]);
 +        }
 +    } /* of q-loop */
 +
 +    if (bCalcF && pme->nnodes > 1) {
 +        wallcycle_start(wcycle,ewcPME_REDISTXF);
 +        for(d=0; d<pme->ndecompdim; d++)
 +        {
 +            atc = &pme->atc[d];
 +            if (d == pme->ndecompdim - 1)
 +            {
 +                n_d = homenr;
 +                f_d = f + start;
 +            }
 +            else
 +            {
 +                n_d = pme->atc[d+1].n;
 +                f_d = pme->atc[d+1].f;
 +            }
 +            if (DOMAINDECOMP(cr)) {
 +                dd_pmeredist_f(pme,atc,n_d,f_d,
 +                               d==pme->ndecompdim-1 && pme->bPPnode);
 +            } else {
 +                pmeredist_pd(pme, FALSE, n_d, TRUE, f_d, NULL, atc);
 +            }
 +        }
 +
 +        wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +    }
 +    where();
 +
 +    if (bCalcEnerVir)
 +    {
 +        if (!pme->bFEP) {
 +            *energy = energy_AB[0];
 +            m_add(vir,vir_AB[0],vir);
 +        } else {
 +            *energy = (1.0-lambda)*energy_AB[0] + lambda*energy_AB[1];
 +            *dvdlambda += energy_AB[1] - energy_AB[0];
 +            for(i=0; i<DIM; i++)
 +            {
 +                for(j=0; j<DIM; j++)
 +                {
 +                    vir[i][j] += (1.0-lambda)*vir_AB[0][i][j] + 
 +                        lambda*vir_AB[1][i][j];
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        *energy = 0;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"PME mesh energy: %g\n",*energy);
 +    }
 +
 +    return 0;
 +}
index 49650992d3988f007da69c4198bcb1d4b445d7d7,0000000000000000000000000000000000000000..c83b5529578a787f8f7a03d2c28d05c3a3ac67c5
mode 100644,000000..100644
--- /dev/null
@@@ -1,1654 -1,0 +1,1654 @@@
-         do_flood(fplog,cr,x,f,ed,box,step);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#include<catamount/dclock.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include <time.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +#include <math.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
 +#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "copyrite.h"
 +#include "pull_rotation.h"
 +#include "gmx_random.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "gmx_wallcycle.h"
 +#include "genborn.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include "adress.h"
 +#include "qmmm.h"
 +
 +#if 0
 +typedef struct gmx_timeprint {
 +
 +} t_gmx_timeprint;
 +#endif
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +double
 +gmx_gettime()
 +{
 +#ifdef HAVE_GETTIMEOFDAY
 +      struct timeval t;
 +      double seconds;
 +
 +      gettimeofday(&t,NULL);
 +
 +      seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
 +
 +      return seconds;
 +#else
 +      double  seconds;
 +
 +      seconds = time(NULL);
 +
 +      return seconds;
 +#endif
 +}
 +
 +
 +#define difftime(end,start) ((double)(end)-(double)(start))
 +
 +void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,
 +                t_inputrec *ir, t_commrec *cr)
 +{
 +    time_t finish;
 +    char   timebuf[STRLEN];
 +    double dt;
 +    char buf[48];
 +
 +#ifndef GMX_THREAD_MPI
 +    if (!PAR(cr))
 +#endif
 +    {
 +        fprintf(out,"\r");
 +    }
 +    fprintf(out,"step %s",gmx_step_str(step,buf));
 +    if ((step >= ir->nstlist))
 +    {
 +        if ((ir->nstlist == 0) || ((step % ir->nstlist) == 0))
 +        {
 +            /* We have done a full cycle let's update time_per_step */
 +            runtime->last = gmx_gettime();
 +            dt = difftime(runtime->last,runtime->real);
 +            runtime->time_per_step = dt/(step - ir->init_step + 1);
 +        }
 +        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
 +
 +        if (ir->nsteps >= 0)
 +        {
 +            if (dt >= 300)
 +            {
 +                finish = (time_t) (runtime->last + dt);
 +                gmx_ctime_r(&finish,timebuf,STRLEN);
 +                sprintf(buf,"%s",timebuf);
 +                buf[strlen(buf)-1]='\0';
 +                fprintf(out,", will finish %s",buf);
 +            }
 +            else
 +                fprintf(out,", remaining runtime: %5d s          ",(int)dt);
 +        }
 +        else
 +        {
 +            fprintf(out," performance: %.1f ns/day    ",
 +                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        fprintf(out,"\n");
 +    }
 +#endif
 +
 +    fflush(out);
 +}
 +
 +#ifdef NO_CLOCK
 +#define clock() -1
 +#endif
 +
 +static double set_proctime(gmx_runtime_t *runtime)
 +{
 +    double diff;
 +#ifdef GMX_CRAY_XT3
 +    double prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = dclock();
 +
 +    diff = runtime->proc - prev;
 +#else
 +    clock_t prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = clock();
 +
 +    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
 +#endif
 +    if (diff < 0)
 +    {
 +        /* The counter has probably looped, ignore this data */
 +        diff = 0;
 +    }
 +
 +    return diff;
 +}
 +
 +void runtime_start(gmx_runtime_t *runtime)
 +{
 +    runtime->real = gmx_gettime();
 +    runtime->proc          = 0;
 +    set_proctime(runtime);
 +    runtime->realtime      = 0;
 +    runtime->proctime      = 0;
 +    runtime->last          = 0;
 +    runtime->time_per_step = 0;
 +}
 +
 +void runtime_end(gmx_runtime_t *runtime)
 +{
 +    double now;
 +
 +    now = gmx_gettime();
 +
 +    runtime->proctime += set_proctime(runtime);
 +    runtime->realtime  = now - runtime->real;
 +    runtime->real      = now;
 +}
 +
 +void runtime_upd_proc(gmx_runtime_t *runtime)
 +{
 +    runtime->proctime += set_proctime(runtime);
 +}
 +
 +void print_date_and_time(FILE *fplog,int nodeid,const char *title,
 +                         const gmx_runtime_t *runtime)
 +{
 +    int i;
 +    char timebuf[STRLEN];
 +    char time_string[STRLEN];
 +    time_t tmptime;
 +
 +    if (fplog)
 +    {
 +        if (runtime != NULL)
 +        {
 +            tmptime = (time_t) runtime->real;
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        else
 +        {
 +            tmptime = (time_t) gmx_gettime();
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        for(i=0; timebuf[i]>=' '; i++)
 +        {
 +            time_string[i]=timebuf[i];
 +        }
 +        time_string[i]='\0';
 +
 +        fprintf(fplog,"%s on node %d %s\n",title,nodeid,time_string);
 +    }
 +}
 +
 +static void sum_forces(int start,int end,rvec f[],rvec flr[])
 +{
 +  int i;
 +
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"fsr",f+start,end-start);
 +    pr_rvecs(debug,0,"flr",flr+start,end-start);
 +  }
 +  for(i=start; (i<end); i++)
 +    rvec_inc(f[i],flr[i]);
 +}
 +
 +/*
 + * calc_f_el calculates forces due to an electric field.
 + *
 + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e
 + *
 + * Et[] contains the parameters for the time dependent
 + * part of the field (not yet used).
 + * Ex[] contains the parameters for
 + * the spatial dependent part of the field. You can have cool periodic
 + * fields in principle, but only a constant field is supported
 + * now.
 + * The function should return the energy due to the electric field
 + * (if any) but for now returns 0.
 + *
 + * WARNING:
 + * There can be problems with the virial.
 + * Since the field is not self-consistent this is unavoidable.
 + * For neutral molecules the virial is correct within this approximation.
 + * For neutral systems with many charged molecules the error is small.
 + * But for systems with a net charge or a few charged molecules
 + * the error can be significant when the field is high.
 + * Solution: implement a self-consitent electric field into PME.
 + */
 +static void calc_f_el(FILE *fp,int  start,int homenr,
 +                      real charge[],rvec x[],rvec f[],
 +                      t_cosines Ex[],t_cosines Et[],double t)
 +{
 +    rvec Ext;
 +    real t0;
 +    int  i,m;
 +
 +    for(m=0; (m<DIM); m++)
 +    {
 +        if (Et[m].n > 0)
 +        {
 +            if (Et[m].n == 3)
 +            {
 +                t0 = Et[m].a[1];
 +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
 +            }
 +            else
 +            {
 +                Ext[m] = cos(Et[m].a[0]*t);
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 1.0;
 +        }
 +        if (Ex[m].n > 0)
 +        {
 +            /* Convert the field strength from V/nm to MD-units */
 +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
 +            for(i=start; (i<start+homenr); i++)
 +                f[i][m] += charge[i]*Ext[m];
 +        }
 +        else
 +        {
 +            Ext[m] = 0;
 +        }
 +    }
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
 +                Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
 +    }
 +}
 +
 +static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
 +                      tensor vir_part,t_graph *graph,matrix box,
 +                      t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
 +{
 +  int i,j;
 +  tensor virtest;
 +
 +  /* The short-range virial from surrounding boxes */
 +  clear_mat(vir_part);
 +  calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
 +
 +  /* Calculate partial virial, for local atoms only, based on short range.
 +   * Total virial is computed in global_stat, called from do_md
 +   */
 +  f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,homenr);
 +
 +  /* Add position restraint contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][i] += fr->vir_diag_posres[i];
 +  }
 +
 +  /* Add wall contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][ZZ] += fr->vir_wall_z[i];
 +  }
 +
 +  if (debug)
 +    pr_rvecs(debug,0,"vir_part",vir_part,DIM);
 +}
 +
 +static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
 +                             gmx_large_int_t step,real pforce,rvec *x,rvec *f)
 +{
 +  int  i;
 +  real pf2,fn2;
 +  char buf[STEPSTRSIZE];
 +
 +  pf2 = sqr(pforce);
 +  for(i=md->start; i<md->start+md->homenr; i++) {
 +    fn2 = norm2(f[i]);
 +    /* We also catch NAN, if the compiler does not optimize this away. */
 +    if (fn2 >= pf2 || fn2 != fn2) {
 +      fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
 +            gmx_step_str(step,buf),
 +            ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
 +    }
 +  }
 +}
 +
 +void do_force(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real *lambda,t_graph *graph,
 +              t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int    cg0,cg1,i,j;
 +    int    start,homenr;
 +    double mu[2*DIM];
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
 +    gmx_bool   bDoLongRange,bDoForces,bSepLRF;
 +    gmx_bool   bDoAdressWF;
 +    matrix boxs;
 +    real   e,v,dvdlambda[efptNR];
 +    real   dvdl_dum,lambda_dum;
 +    t_pbc  pbc;
 +    float  cycles_ppdpme,cycles_pme,cycles_seppme,cycles_force;
 +
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        pd_cg_range(cr,&cg0,&cg1);
 +    }
 +    else
 +    {
 +        cg0 = 0;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            cg1 = cr->dd->ncg_tot;
 +        }
 +        else
 +        {
 +            cg1 = top->cgs.nr;
 +        }
 +        if (fr->n_tpi > 0)
 +        {
 +            cg1--;
 +        }
 +    }
 +
 +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
 +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE);
 +    bFillGrid     = (bNS && bStateChanged);
 +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
 +    bDoForces     = (flags & GMX_FORCE_FORCES);
 +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
 +    /* should probably move this to the forcerec since it doesn't change */
 +    bDoAdressWF   = ((fr->adress_type!=eAdressOff));
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
 +
 +        /* Calculate total (local) dipole moment in a temporary common array.
 +         * This makes it possible to sum them over nodes faster.
 +         */
 +        calc_mu(start,homenr,
 +                x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                mu,mu+DIM);
 +    }
 +
 +  if (fr->ePBC != epbcNONE) {
 +    /* Compute shift vectors every step,
 +     * because of pressure coupling or box deformation!
 +     */
 +    if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +      calc_shifts(box,fr->shift_vec);
 +
 +    if (bCalcCGCM) {
 +      put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
 +                             &(top->cgs),x,fr->cg_cm);
 +      inc_nrnb(nrnb,eNR_CGCM,homenr);
 +      inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
 +    }
 +    else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +      unshift_self(graph,box,x);
 +    }
 +  }
 +  else if (bCalcCGCM) {
 +    calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
 +    inc_nrnb(nrnb,eNR_CGCM,homenr);
 +  }
 +
 +  if (bCalcCGCM) {
 +    if (PAR(cr)) {
 +      move_cgcm(fplog,cr,fr->cg_cm);
 +    }
 +    if (gmx_debug_at)
 +      pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
 +  }
 +
 +#ifdef GMX_MPI
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Send particle coordinates to the pme nodes.
 +     * Since this is only implemented for domain decomposition
 +     * and domain decomposition does not use the graph,
 +     * we do not need to worry about shifting.
 +     */
 +
 +    wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +    bBS = (inputrec->nwall == 2);
 +    if (bBS) {
 +      copy_mat(box,boxs);
 +      svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +    }
 +
 +    gmx_pme_send_x(cr,bBS ? boxs : box,x,
 +                   mdatoms->nChargePerturbed,lambda[efptCOUL],
 +                   ( flags & GMX_FORCE_VIRIAL),step);
 +
 +    wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +  }
 +#endif /* GMX_MPI */
 +
 +    /* Communicate coordinates and sum dipole if necessary */
 +    if (PAR(cr))
 +    {
 +        wallcycle_start(wcycle,ewcMOVEX);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_move_x(cr->dd,box,x);
 +        }
 +        else
 +        {
 +            move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
 +        }
 +        /* When we don't need the total dipole we sum it in global_stat */
 +        if (bStateChanged && NEED_MUTOT(*inputrec))
 +        {
 +            gmx_sumd(2*DIM,mu,cr);
 +        }
 +        wallcycle_stop(wcycle,ewcMOVEX);
 +    }
 +    if (bStateChanged)
 +    {
 +
 +        /* update adress weight beforehand */
 +        if(bDoAdressWF)
 +        {
 +            /* need pbc for adress weight calculation with pbc_dx */
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +            if(fr->adress_site == eAdressSITEcog)
 +            {
 +                update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEcom)
 +            {
 +                update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEatomatom){
 +                update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else
 +            {
 +                update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                           inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +        }
 +
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0;j<DIM;j++)
 +            {
 +                fr->mu_tot[i][j] = mu[i*DIM + j];
 +            }
 +        }
 +    }
 +    if (fr->efep == efepNO)
 +    {
 +        copy_rvec(fr->mu_tot[0],mu_tot);
 +    }
 +    else
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            mu_tot[j] =
 +                (1.0 - lambda[efptCOUL])*fr->mu_tot[0][j] + lambda[efptCOUL]*fr->mu_tot[1][j];
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (bNS)
 +    {
 +        wallcycle_start(wcycle,ewcNS);
 +
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
 +        /* Reset long range forces if necessary */
 +        if (fr->bTwinRange)
 +        {
 +            /* Reset the (long-range) forces if necessary */
 +            clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
 +        }
 +
 +        /* Do the actual neighbour searching and if twin range electrostatics
 +         * also do the calculation of long range forces and energies.
 +         */
 +        for (i=0;i<efptNR;i++) {dvdlambda[i] = 0;}
 +        ns(fplog,fr,x,box,
 +           groups,&(inputrec->opts),top,mdatoms,
 +           cr,nrnb,lambda,dvdlambda,&enerd->grpp,bFillGrid,
 +           bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdlambda);
 +        }
 +        enerd->dvdl_lin[efptVDW] += dvdlambda[efptVDW];
 +        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +
 +        wallcycle_stop(wcycle,ewcNS);
 +    }
 +
 +    if (inputrec->implicit_solvent && bNS)
 +    {
 +        make_gb_nblist(cr,inputrec->gb_algorithm,inputrec->rlist,
 +                       x,box,fr,&top->idef,graph,fr->born);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
 +
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle,ewcROT);
 +        do_rotation(cr,inputrec,box,x,t,step,wcycle,bNS);
 +        wallcycle_stop(wcycle,ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum)
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        if (bSepLRF)
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for(i=0; i<fr->natoms_force_constr; i++)
 +            {
 +                copy_rvec(fr->f_twin[i],f[i]);
 +            }
 +        }
 +        else if (!(fr->bTwinRange && bNS))
 +        {
 +            /* Clear the short-range forces */
 +            clear_rvecs(fr->natoms_force_constr,f);
 +        }
 +
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        /* Position restraints always require full pbc. Check if we already did it for Adress */
 +        if(!(bStateChanged && bDoAdressWF))
 +        {
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +        }
 +        v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +                   top->idef.iparams_posres,
 +                   (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +                   inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda[efptRESTRAINT],&(dvdlambda[efptRESTRAINT]),
 +                   fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,
 +                    interaction_function[F_POSRES].longname,v,dvdlambda);
 +        }
 +        enerd->term[F_POSRES] += v;
 +        /* This linear lambda dependence assumption is only correct
 +         * when only k depends on lambda,
 +         * not when the reference position depends on lambda.
 +         * grompp checks for this.  (verify this is still the case?)
 +         */
 +        enerd->dvdl_nonlin[efptRESTRAINT] += dvdlambda[efptRESTRAINT]; /* if just the force constant changes, this is linear,
 +                                                                          but we can't be sure w/o additional checking that is
 +                                                                          hard to do at this level of code. Otherwise,
 +                                                                          the dvdl is not differentiable */
 +        inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
 +        if ((inputrec->fepvals->n_lambda > 0) && (flags & GMX_FORCE_DHDL))
 +        {
 +            for(i=0; i<enerd->n_lambda; i++)
 +            {
 +                lambda_dum = (i==0 ? lambda[efptRESTRAINT] : inputrec->fepvals->all_lambda[efptRESTRAINT][i-1]);
 +                v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +                           top->idef.iparams_posres,
 +                           (const rvec*)x,NULL,NULL,
 +                           inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda_dum,&dvdl_dum,
 +                           fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +                enerd->enerpart_lambda[i] += v;
 +            }
 +        }
 +   }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_FBPOSRES].nr > 0)
 +    {
 +        /* Flat-bottomed position restraints always require full pbc */
 +        if(!(bStateChanged && bDoAdressWF))
 +        {
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +        }
 +        v = fbposres(top->idef.il[F_FBPOSRES].nr,top->idef.il[F_FBPOSRES].iatoms,
 +                     top->idef.iparams_fbposres,
 +                     (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +                     inputrec->ePBC==epbcNONE ? NULL : &pbc,
 +                     fr->rc_scaling,fr->ePBC,fr->posres_com);
 +        enerd->term[F_FBPOSRES] += v;
 +        inc_nrnb(nrnb,eNR_FBPOSRES,top->idef.il[F_FBPOSRES].nr/2);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
 +                      x,hist,f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
 +                      inputrec->fepvals,lambda,graph,&(top->excls),fr->mu_tot,
 +                      flags,&cycles_pme);
 +
 +    cycles_force = wallcycle_stop(wcycle,ewcFORCE);
 +
 +    if (ed)
 +    {
++        do_flood(fplog,cr,x,f,ed,box,step,bNS);
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
 +
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
 +        {
 +            /* Compute thermodynamic force in hybrid AdResS region */
 +            adress_thermo_force(start,homenr,&(top->cgs),x,fr->f_novirsum,fr,mdatoms,
 +                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            else
 +            {
 +                pd_move_f(cr,f,nrnb);
 +                if (bSepLRF)
 +                {
 +                    pd_move_f(cr,fr->f_twin,nrnb);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
 +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
 +
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    enerd->term[F_COM_PULL] = 0;
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        /* Calculate the center of mass forces, this requires communication,
 +         * which is why pull_potential is called close to other communication.
 +         * The virial contribution is calculated directly,
 +         * which is why we call pull_potential after calc_virial.
 +         */
 +        set_pbc(&pbc,inputrec->ePBC,box);
 +        dvdlambda[efptRESTRAINT] = 0;
 +        enerd->term[F_COM_PULL] +=
 +            pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
 +                           cr,t,lambda[efptRESTRAINT],x,f,vir_force,&(dvdlambda[efptRESTRAINT]));
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdlambda[efptRESTRAINT]);
 +        }
 +        enerd->dvdl_lin[efptRESTRAINT] += dvdlambda[efptRESTRAINT];
 +    }
 +
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle,ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr,step,t);
 +        wallcycle_stop(wcycle,ewcROTadd);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
 +        dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
 +
 +        /* In case of node-splitting, the PP nodes receive the long-range
 +         * forces, virial and energy from the PME nodes here.
 +         */
 +        wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
 +        dvdlambda[efptCOUL] = 0;
 +        gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdlambda[efptCOUL],
 +                          &cycles_seppme);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdlambda[efptCOUL]);
 +        }
 +        enerd->term[F_COUL_RECIP] += e;
 +        enerd->dvdl_lin[efptCOUL] += dvdlambda[efptCOUL];
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
 +        }
 +        wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
 +    }
 +
 +    if (bDoForces && fr->bF_NoVirSum)
 +    {
 +        if (vsite)
 +        {
 +            /* Spread the mesh force on virtual sites to the other particles...
 +             * This is parallellized. MPI communication is performed
 +             * if the constructing atoms aren't local.
 +             */
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,
 +                           (flags & GMX_FORCE_VIRIAL),fr->vir_el_recip,
 +                           nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +        }
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Now add the forces, this is local */
 +            if (fr->bDomDec)
 +            {
 +                sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
 +            }
 +            else
 +            {
 +                sum_forces(start,start+homenr,f,fr->f_novirsum);
 +            }
 +            if (EEL_FULL(fr->eeltype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force,fr->vir_el_recip,vir_force);
 +            }
 +            if (debug)
 +            {
 +                pr_rvecs(debug,0,"vir_force",vir_force,DIM);
 +            }
 +        }
 +    }
 +
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(inputrec->opts),enerd);
 +
 +    if (fr->print_force >= 0 && bDoForces)
 +    {
 +        print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
 +    }
 +}
 +
 +void do_constrain_first(FILE *fplog,gmx_constr_t constr,
 +                        t_inputrec *ir,t_mdatoms *md,
 +                        t_state *state,rvec *f,
 +                        t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
 +                        t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir)
 +{
 +    int    i,m,start,end;
 +    gmx_large_int_t step;
 +    real   dt=ir->delta_t;
 +    real   dvdl_dum;
 +    rvec   *savex;
 +
 +    snew(savex,state->natoms);
 +
 +    start = md->start;
 +    end   = md->homenr + start;
 +
 +    if (debug)
 +        fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
 +                start,md->homenr,end);
 +    /* Do a first constrain to reset particles... */
 +    step = ir->init_step;
 +    if (fplog)
 +    {
 +        char buf[STEPSTRSIZE];
 +        fprintf(fplog,"\nConstraining the starting coordinates (step %s)\n",
 +                gmx_step_str(step,buf));
 +    }
 +    dvdl_dum = 0;
 +
 +    /* constrain the current position */
 +    constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +              ir,NULL,cr,step,0,md,
 +              state->x,state->x,NULL,
 +              state->box,state->lambda[efptBONDED],&dvdl_dum,
 +              NULL,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
 +    if (EI_VV(ir->eI))
 +    {
 +        /* constrain the inital velocity, and save it */
 +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
 +        /* might not yet treat veta correctly */
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,0,md,
 +                  state->x,state->v,state->v,
 +                  state->box,state->lambda[efptBONDED],&dvdl_dum,
 +                  NULL,NULL,nrnb,econqVeloc,ir->epc==epcMTTK,state->veta,state->veta);
 +    }
 +    /* constrain the inital velocities at t-dt/2 */
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI!=eiVV)
 +    {
 +        for(i=start; (i<end); i++)
 +        {
 +            for(m=0; (m<DIM); m++)
 +            {
 +                /* Reverse the velocity */
 +                state->v[i][m] = -state->v[i][m];
 +                /* Store the position at t-dt in buf */
 +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
 +            }
 +        }
 +    /* Shake the positions at t=-dt with the positions at t=0
 +     * as reference coordinates.
 +         */
 +        if (fplog)
 +        {
 +            char buf[STEPSTRSIZE];
 +            fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %s)\n",
 +                    gmx_step_str(step,buf));
 +        }
 +        dvdl_dum = 0;
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,-1,md,
 +                  state->x,savex,NULL,
 +                  state->box,state->lambda[efptBONDED],&dvdl_dum,
 +                  state->v,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
 +
 +        for(i=start; i<end; i++) {
 +            for(m=0; m<DIM; m++) {
 +                /* Re-reverse the velocities */
 +                state->v[i][m] = -state->v[i][m];
 +            }
 +        }
 +    }
 +    sfree(savex);
 +}
 +
 +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
 +{
 +  double eners[2],virs[2],enersum,virsum,y0,f,g,h;
 +  double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
 +  double invscale,invscale2,invscale3;
 +  int    ri0,ri1,ri,i,offstart,offset;
 +  real   scale,*vdwtab;
 +
 +  fr->enershiftsix = 0;
 +  fr->enershifttwelve = 0;
 +  fr->enerdiffsix = 0;
 +  fr->enerdifftwelve = 0;
 +  fr->virdiffsix = 0;
 +  fr->virdifftwelve = 0;
 +
 +  if (eDispCorr != edispcNO) {
 +    for(i=0; i<2; i++) {
 +      eners[i] = 0;
 +      virs[i]  = 0;
 +    }
 +    if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT)) {
 +      if (fr->rvdw_switch == 0)
 +      gmx_fatal(FARGS,
 +                "With dispersion correction rvdw-switch can not be zero "
 +                "for vdw-type = %s",evdw_names[fr->vdwtype]);
 +
 +      scale  = fr->nblists[0].tab.scale;
 +      vdwtab = fr->nblists[0].vdwtab;
 +
 +      /* Round the cut-offs to exact table values for precision */
 +      ri0 = floor(fr->rvdw_switch*scale);
 +      ri1 = ceil(fr->rvdw*scale);
 +      r0  = ri0/scale;
 +      r1  = ri1/scale;
 +      rc3 = r0*r0*r0;
 +      rc9  = rc3*rc3*rc3;
 +
 +      if (fr->vdwtype == evdwSHIFT) {
 +      /* Determine the constant energy shift below rvdw_switch */
 +      fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
 +      fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
 +      }
 +      /* Add the constant part from 0 to rvdw_switch.
 +       * This integration from 0 to rvdw_switch overcounts the number
 +       * of interactions by 1, as it also counts the self interaction.
 +       * We will correct for this later.
 +       */
 +      eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
 +      eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
 +
 +      invscale = 1.0/(scale);
 +      invscale2 = invscale*invscale;
 +      invscale3 = invscale*invscale2;
 +
 +      /* following summation derived from cubic spline definition,
 +      Numerical Recipies in C, second edition, p. 113-116.  Exact
 +      for the cubic spline.  We first calculate the negative of
 +      the energy from rvdw to rvdw_switch, assuming that g(r)=1,
 +      and then add the more standard, abrupt cutoff correction to
 +      that result, yielding the long-range correction for a
 +      switched function.  We perform both the pressure and energy
 +      loops at the same time for simplicity, as the computational
 +      cost is low. */
 +
 +      for (i=0;i<2;i++) {
 +        enersum = 0.0; virsum = 0.0;
 +        if (i==0)
 +        offstart = 0;
 +      else
 +        offstart = 4;
 +      for (ri=ri0; ri<ri1; ri++) {
 +          r = ri*invscale;
 +          ea = invscale3;
 +          eb = 2.0*invscale2*r;
 +          ec = invscale*r*r;
 +
 +          pa = invscale3;
 +          pb = 3.0*invscale2*r;
 +          pc = 3.0*invscale*r*r;
 +          pd = r*r*r;
 +
 +          /* this "8" is from the packing in the vdwtab array - perhaps
 +          should be #define'ed? */
 +          offset = 8*ri + offstart;
 +          y0 = vdwtab[offset];
 +          f = vdwtab[offset+1];
 +          g = vdwtab[offset+2];
 +          h = vdwtab[offset+3];
 +
 +          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
 +            g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);
 +          virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) +
 +            2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
 +
 +        }
 +        enersum *= 4.0*M_PI;
 +        virsum  *= 4.0*M_PI;
 +        eners[i] -= enersum;
 +        virs[i]  -= virsum;
 +      }
 +
 +      /* now add the correction for rvdw_switch to infinity */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    }
 +    else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER)) {
 +      if (fr->vdwtype == evdwUSER && fplog)
 +      fprintf(fplog,
 +              "WARNING: using dispersion correction with user tables\n");
 +      rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
 +      rc9  = rc3*rc3*rc3;
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } else {
 +      gmx_fatal(FARGS,
 +              "Dispersion correction is not implemented for vdw-type = %s",
 +              evdw_names[fr->vdwtype]);
 +    }
 +    fr->enerdiffsix    = eners[0];
 +    fr->enerdifftwelve = eners[1];
 +    /* The 0.5 is due to the Gromacs definition of the virial */
 +    fr->virdiffsix     = 0.5*virs[0];
 +    fr->virdifftwelve  = 0.5*virs[1];
 +  }
 +}
 +
 +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
 +                   gmx_large_int_t step,int natoms,
 +                   matrix box,real lambda,tensor pres,tensor virial,
 +                   real *prescorr, real *enercorr, real *dvdlcorr)
 +{
 +    gmx_bool bCorrAll,bCorrPres;
 +    real dvdlambda,invvol,dens,ninter,avcsix,avctwelve,enerdiff,svir=0,spres=0;
 +    int  m;
 +
 +    *prescorr = 0;
 +    *enercorr = 0;
 +    *dvdlcorr = 0;
 +
 +    clear_mat(virial);
 +    clear_mat(pres);
 +
 +    if (ir->eDispCorr != edispcNO) {
 +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +
 +        invvol = 1/det(box);
 +        if (fr->n_tpi)
 +        {
 +            /* Only correct for the interactions with the inserted molecule */
 +            dens = (natoms - fr->n_tpi)*invvol;
 +            ninter = fr->n_tpi;
 +        }
 +        else
 +        {
 +            dens = natoms*invvol;
 +            ninter = 0.5*natoms;
 +        }
 +
 +        if (ir->efep == efepNO)
 +        {
 +            avcsix    = fr->avcsix[0];
 +            avctwelve = fr->avctwelve[0];
 +        }
 +        else
 +        {
 +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
 +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
 +        }
 +
 +        enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
 +        *enercorr += avcsix*enerdiff;
 +        dvdlambda = 0.0;
 +        if (ir->efep != efepNO)
 +        {
 +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
 +        }
 +        if (bCorrAll)
 +        {
 +            enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
 +            *enercorr += avctwelve*enerdiff;
 +            if (fr->efep != efepNO)
 +            {
 +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
 +            }
 +        }
 +
 +        if (bCorrPres)
 +        {
 +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
 +            if (ir->eDispCorr == edispcAllEnerPres)
 +            {
 +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
 +            }
 +            /* The factor 2 is because of the Gromacs virial definition */
 +            spres = -2.0*invvol*svir*PRESFAC;
 +
 +            for(m=0; m<DIM; m++) {
 +                virial[m][m] += svir;
 +                pres[m][m] += spres;
 +            }
 +            *prescorr += spres;
 +        }
 +
 +        /* Can't currently control when it prints, for now, just print when degugging */
 +        if (debug)
 +        {
 +            if (bCorrAll) {
 +                fprintf(debug,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                        avcsix,avctwelve);
 +            }
 +            if (bCorrPres)
 +            {
 +                fprintf(debug,
 +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
 +                        *enercorr,spres,svir);
 +            }
 +            else
 +            {
 +                fprintf(debug,"Long Range LJ corr.: Epot %10g\n",*enercorr);
 +            }
 +        }
 +
 +        if (fr->bSepDVDL && do_per_step(step,ir->nstlog))
 +        {
 +            fprintf(fplog,sepdvdlformat,"Dispersion correction",
 +                    *enercorr,dvdlambda);
 +        }
 +        if (fr->efep != efepNO)
 +        {
 +            *dvdlcorr += dvdlambda;
 +        }
 +    }
 +}
 +
 +void do_pbc_first(FILE *fplog,matrix box,t_forcerec *fr,
 +                t_graph *graph,rvec x[])
 +{
 +  if (fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +  calc_shifts(box,fr->shift_vec);
 +  if (graph) {
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 1",graph);
 +    shift_self(graph,box,x);
 +    /* By doing an extra mk_mshift the molecules that are broken
 +     * because they were e.g. imported from another software
 +     * will be made whole again. Such are the healing powers
 +     * of GROMACS.
 +     */
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 2",graph);
 +  }
 +  if (fplog)
 +    fprintf(fplog,"Done rmpbc\n");
 +}
 +
 +static void low_do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +                          gmx_mtop_t *mtop,rvec x[],
 +                          gmx_bool bFirst)
 +{
 +  t_graph *graph;
 +  int mb,as,mol;
 +  gmx_molblock_t *molb;
 +
 +  if (bFirst && fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +
 +  snew(graph,1);
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    if (molb->natoms_mol == 1 ||
 +      (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) {
 +      /* Just one atom or charge group in the molecule, no PBC required */
 +      as += molb->nmol*molb->natoms_mol;
 +    } else {
 +      /* Pass NULL iso fplog to avoid graph prints for each molecule type */
 +      mk_graph_ilist(NULL,mtop->moltype[molb->type].ilist,
 +                   0,molb->natoms_mol,FALSE,FALSE,graph);
 +
 +      for(mol=0; mol<molb->nmol; mol++) {
 +      mk_mshift(fplog,graph,ePBC,box,x+as);
 +
 +      shift_self(graph,box,x+as);
 +      /* The molecule is whole now.
 +       * We don't need the second mk_mshift call as in do_pbc_first,
 +       * since we no longer need this graph.
 +       */
 +
 +      as += molb->natoms_mol;
 +      }
 +      done_graph(graph);
 +    }
 +  }
 +  sfree(graph);
 +}
 +
 +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
 +                     gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,TRUE);
 +}
 +
 +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +               gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,FALSE);
 +}
 +
 +void finish_run(FILE *fplog,t_commrec *cr,const char *confout,
 +                t_inputrec *inputrec,
 +                t_nrnb nrnb[],gmx_wallcycle_t wcycle,
 +                gmx_runtime_t *runtime,
 +                gmx_bool bWriteStat)
 +{
 +  int    i,j;
 +  t_nrnb *nrnb_tot=NULL;
 +  real   delta_t;
 +  double nbfs,mflop;
 +  double cycles[ewcNR];
 +
 +  wallcycle_sum(cr,wcycle,cycles);
 +
 +  if (cr->nnodes > 1) {
 +    if (SIMMASTER(cr))
 +      snew(nrnb_tot,1);
 +#ifdef GMX_MPI
 +    MPI_Reduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
 +               MASTERRANK(cr),cr->mpi_comm_mysim);
 +#endif
 +  } else {
 +    nrnb_tot = nrnb;
 +  }
 +
 +  if (SIMMASTER(cr)) {
 +    print_flop(fplog,nrnb_tot,&nbfs,&mflop);
 +    if (cr->nnodes > 1) {
 +      sfree(nrnb_tot);
 +    }
 +  }
 +
 +  if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) {
 +    print_dd_statistics(cr,inputrec,fplog);
 +  }
 +
 +#ifdef GMX_MPI
 +    if (PARTDECOMP(cr))
 +    {
 +        if (MASTER(cr))
 +        {
 +            t_nrnb     *nrnb_all;
 +            int        s;
 +            MPI_Status stat;
 +
 +            snew(nrnb_all,cr->nnodes);
 +            nrnb_all[0] = *nrnb;
 +            for(s=1; s<cr->nnodes; s++)
 +            {
 +                MPI_Recv(nrnb_all[s].n,eNRNB,MPI_DOUBLE,s,0,
 +                         cr->mpi_comm_mysim,&stat);
 +            }
 +            pr_load(fplog,cr,nrnb_all);
 +            sfree(nrnb_all);
 +        }
 +        else
 +        {
 +            MPI_Send(nrnb->n,eNRNB,MPI_DOUBLE,MASTERRANK(cr),0,
 +                     cr->mpi_comm_mysim);
 +        }
 +    }
 +#endif
 +
 +  if (SIMMASTER(cr)) {
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
 +                    wcycle,cycles);
 +
 +    if (EI_DYNAMICS(inputrec->eI)) {
 +      delta_t = inputrec->delta_t;
 +    } else {
 +      delta_t = 0;
 +    }
 +
 +    if (fplog) {
 +        print_perf(fplog,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +    if (bWriteStat) {
 +        print_perf(stderr,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +
 +    /*
 +    runtime=inputrec->nsteps*inputrec->delta_t;
 +    if (bWriteStat) {
 +      if (cr->nnodes == 1)
 +      fprintf(stderr,"\n\n");
 +      print_perf(stderr,nodetime,realtime,runtime,&ntot,
 +               cr->nnodes-cr->npmenodes,FALSE);
 +    }
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);
 +    print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes,
 +             TRUE);
 +    if (PARTDECOMP(cr))
 +      pr_load(fplog,cr,nrnb_all);
 +    if (cr->nnodes > 1)
 +      sfree(nrnb_all);
 +    */
 +  }
 +}
 +
 +extern void initialize_lambdas(FILE *fplog,t_inputrec *ir,int *fep_state,real *lambda,double *lam0)
 +{
 +    /* this function works, but could probably use a logic rewrite to keep all the different
 +       types of efep straight. */
 +
 +    int i;
 +    t_lambda *fep = ir->fepvals;
 +
 +    if ((ir->efep==efepNO) && (ir->bSimTemp == FALSE)) {
 +        for (i=0;i<efptNR;i++)  {
 +            lambda[i] = 0.0;
 +            if (lam0)
 +            {
 +                lam0[i] = 0.0;
 +            }
 +        }
 +        return;
 +    } else {
 +        *fep_state = fep->init_fep_state; /* this might overwrite the checkpoint
 +                                             if checkpoint is set -- a kludge is in for now
 +                                             to prevent this.*/
 +        for (i=0;i<efptNR;i++)
 +        {
 +            /* overwrite lambda state with init_lambda for now for backwards compatibility */
 +            if (fep->init_lambda>=0) /* if it's -1, it was never initializd */
 +            {
 +                lambda[i] = fep->init_lambda;
 +                if (lam0) {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +            else
 +            {
 +                lambda[i] = fep->all_lambda[i][*fep_state];
 +                if (lam0) {
 +                    lam0[i] = lambda[i];
 +                }
 +            }
 +        }
 +        if (ir->bSimTemp) {
 +            /* need to rescale control temperatures to match current state */
 +            for (i=0;i<ir->opts.ngtc;i++) {
 +                if (ir->opts.ref_t[i] > 0) {
 +                    ir->opts.ref_t[i] = ir->simtempvals->temperatures[*fep_state];
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Send to the log the information on the current lambdas */
 +    if (fplog != NULL)
 +    {
 +        fprintf(fplog,"Initial vector of lambda components:[ ");
 +        for (i=0;i<efptNR;i++)
 +        {
 +            fprintf(fplog,"%10.4f ",lambda[i]);
 +        }
 +        fprintf(fplog,"]\n");
 +    }
 +    return;
 +}
 +
 +
 +void init_md(FILE *fplog,
 +             t_commrec *cr,t_inputrec *ir,const output_env_t oenv,
 +             double *t,double *t0,
 +             real *lambda, int *fep_state, double *lam0,
 +             t_nrnb *nrnb,gmx_mtop_t *mtop,
 +             gmx_update_t *upd,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin,
 +             tensor force_vir,tensor shake_vir,rvec mu_tot,
 +             gmx_bool *bSimAnn,t_vcm **vcm, t_state *state, unsigned long Flags)
 +{
 +    int  i,j,n;
 +    real tmpt,mod;
 +
 +    /* Initial values */
 +    *t = *t0       = ir->init_t;
 +
 +    *bSimAnn=FALSE;
 +    for(i=0;i<ir->opts.ngtc;i++)
 +    {
 +        /* set bSimAnn if any group is being annealed */
 +        if(ir->opts.annealing[i]!=eannNO)
 +        {
 +            *bSimAnn = TRUE;
 +        }
 +    }
 +    if (*bSimAnn)
 +    {
 +        update_annealing_target_temp(&(ir->opts),ir->init_t);
 +    }
 +
 +    /* Initialize lambda variables */
 +    initialize_lambdas(fplog,ir,fep_state,lambda,lam0);
 +
 +    if (upd)
 +    {
 +        *upd = init_update(fplog,ir);
 +    }
 +
 +
 +    if (vcm != NULL)
 +    {
 +        *vcm = init_vcm(fplog,&mtop->groups,ir);
 +    }
 +
 +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
 +    {
 +        if (ir->etc == etcBERENDSEN)
 +        {
 +            please_cite(fplog,"Berendsen84a");
 +        }
 +        if (ir->etc == etcVRESCALE)
 +        {
 +            please_cite(fplog,"Bussi2007a");
 +        }
 +    }
 +
 +    init_nrnb(nrnb);
 +
 +    if (nfile != -1)
 +    {
 +        *outf = init_mdoutf(nfile,fnm,Flags,cr,ir,oenv);
 +
 +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : (*outf)->fp_ene,
 +                              mtop,ir, (*outf)->fp_dhdl);
 +    }
 +
 +    if (ir->bAdress)
 +    {
 +      please_cite(fplog,"Fritsch12");
 +      please_cite(fplog,"Junghans10");
 +    }
 +    /* Initiate variables */
 +    clear_mat(force_vir);
 +    clear_mat(shake_vir);
 +    clear_rvec(mu_tot);
 +
 +    debug_gmx();
 +}
 +
 +
 +
 +
Simple merge
Simple merge
Simple merge
index 963f09f6318ebdf5d0f313f8a2b6ff4d8ea2ba3e,0000000000000000000000000000000000000000..54da13f97cb8effe00679f1509b73b3a8328108a
mode 100644,000000..100644
--- /dev/null
@@@ -1,1680 -1,0 +1,1684 @@@
 +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.03
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <sys/types.h>
 +#include <math.h>
 +#include <string.h>
 +#include <errno.h>
 +#include <limits.h>
 +
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "macros.h"
 +#include "string2.h"
 +#include "readir.h"
 +#include "toputil.h"
 +#include "topio.h"
 +#include "confio.h"
 +#include "copyrite.h"
 +#include "readir.h"
 +#include "symtab.h"
 +#include "names.h"
 +#include "grompp.h"
 +#include "random.h"
 +#include "vec.h"
 +#include "futil.h"
 +#include "statutil.h"
 +#include "splitter.h"
 +#include "sortwater.h"
 +#include "convparm.h"
 +#include "gmx_fatal.h"
 +#include "warninp.h"
 +#include "index.h"
 +#include "gmxfio.h"
 +#include "trnio.h"
 +#include "tpxio.h"
 +#include "vsite_parm.h"
 +#include "txtdump.h"
 +#include "calcgrid.h"
 +#include "add_par.h"
 +#include "enxio.h"
 +#include "perf_est.h"
 +#include "compute_io.h"
 +#include "gpp_atomtype.h"
 +#include "gpp_tomorse.h"
 +#include "mtop_util.h"
 +#include "genborn.h"
 +
 +static int rm_interactions(int ifunc,int nrmols,t_molinfo mols[])
 +{
 +  int  i,n;
 +  
 +  n=0;
 +  /* For all the molecule types */
 +  for(i=0; i<nrmols; i++) {
 +    n += mols[i].plist[ifunc].nr;
 +    mols[i].plist[ifunc].nr=0;
 +  }
 +  return n;
 +}
 +
 +static int check_atom_names(const char *fn1, const char *fn2, 
 +                          gmx_mtop_t *mtop, t_atoms *at)
 +{
 +  int mb,m,i,j,nmismatch;
 +  t_atoms *tat;
 +#define MAXMISMATCH 20
 +
 +  if (mtop->natoms != at->nr)
 +    gmx_incons("comparing atom names");
 +  
 +  nmismatch=0;
 +  i = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    tat = &mtop->moltype[mtop->molblock[mb].type].atoms;
 +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
 +      for(j=0; j < tat->nr; j++) {
 +      if (strcmp( *(tat->atomname[j]) , *(at->atomname[i]) ) != 0) {
 +        if (nmismatch < MAXMISMATCH) {
 +          fprintf(stderr,
 +                  "Warning: atom name %d in %s and %s does not match (%s - %s)\n",
 +                  i+1, fn1, fn2, *(tat->atomname[j]), *(at->atomname[i]));
 +        } else if (nmismatch == MAXMISMATCH) {
 +          fprintf(stderr,"(more than %d non-matching atom names)\n",MAXMISMATCH);
 +        }
 +        nmismatch++;
 +      }
 +      i++;
 +      }
 +    }
 +  }
 +
 +  return nmismatch;
 +}
 +
 +static void check_eg_vs_cg(gmx_mtop_t *mtop)
 +{
 +  int astart,mb,m,cg,j,firstj;
 +  unsigned char firsteg,eg;
 +  gmx_moltype_t *molt;
 +  
 +  /* Go through all the charge groups and make sure all their
 +   * atoms are in the same energy group.
 +   */
 +  
 +  astart = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molt = &mtop->moltype[mtop->molblock[mb].type];
 +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
 +      for(cg=0; cg<molt->cgs.nr;cg++) {
 +      /* Get the energy group of the first atom in this charge group */
 +      firstj = astart + molt->cgs.index[cg];
 +      firsteg = ggrpnr(&mtop->groups,egcENER,firstj);
 +      for(j=molt->cgs.index[cg]+1;j<molt->cgs.index[cg+1];j++) {
 +        eg = ggrpnr(&mtop->groups,egcENER,astart+j);
 +        if(eg != firsteg) {
 +          gmx_fatal(FARGS,"atoms %d and %d in charge group %d of molecule type '%s' are in different energy groups",
 +                    firstj+1,astart+j+1,cg+1,*molt->name);
 +        }
 +      }
 +      }
 +      astart += molt->atoms.nr;
 +    }
 +  }  
 +}
 +
 +static void check_cg_sizes(const char *topfn,t_block *cgs,warninp_t wi)
 +{
 +    int  maxsize,cg;
 +    char warn_buf[STRLEN];
 +
 +    maxsize = 0;
 +    for(cg=0; cg<cgs->nr; cg++)
 +    {
 +        maxsize = max(maxsize,cgs->index[cg+1]-cgs->index[cg]);
 +    }
 +    
 +    if (maxsize > MAX_CHARGEGROUP_SIZE)
 +    {
 +        gmx_fatal(FARGS,"The largest charge group contains %d atoms. The maximum is %d.",maxsize,MAX_CHARGEGROUP_SIZE);
 +    }
 +    else if (maxsize > 10)
 +    {
 +        set_warning_line(wi,topfn,-1);
 +        sprintf(warn_buf,
 +                "The largest charge group contains %d atoms.\n"
 +                "Since atoms only see each other when the centers of geometry of the charge groups they belong to are within the cut-off distance, too large charge groups can lead to serious cut-off artifacts.\n"
 +                "For efficiency and accuracy, charge group should consist of a few atoms.\n"
 +                "For all-atom force fields use: CH3, CH2, CH, NH2, NH, OH, CO2, CO, etc.",
 +                maxsize);
 +        warning_note(wi,warn_buf);
 +    }
 +}
 +
 +static void check_bonds_timestep(gmx_mtop_t *mtop,double dt,warninp_t wi)
 +{
 +    /* This check is not intended to ensure accurate integration,
 +     * rather it is to signal mistakes in the mdp settings.
 +     * A common mistake is to forget to turn on constraints
 +     * for MD after energy minimization with flexible bonds.
 +     * This check can also detect too large time steps for flexible water
 +     * models, but such errors will often be masked by the constraints
 +     * mdp options, which turns flexible water into water with bond constraints,
 +     * but without an angle constraint. Unfortunately such incorrect use
 +     * of water models can not easily be detected without checking
 +     * for specific model names.
 +     *
 +     * The stability limit of leap-frog or velocity verlet is 4.44 steps
 +     * per oscillational period.
 +     * But accurate bonds distributions are lost far before that limit.
 +     * To allow relatively common schemes (although not common with Gromacs)
 +     * of dt=1 fs without constraints and dt=2 fs with only H-bond constraints
 +     * we set the note limit to 10.
 +     */
 +    int       min_steps_warn=5;
 +    int       min_steps_note=10;
 +    t_iparams *ip;
 +    int       molt;
 +    gmx_moltype_t *moltype,*w_moltype;
 +    t_atom    *atom;
 +    t_ilist   *ilist,*ilb,*ilc,*ils;
 +    int       ftype;
 +    int       i,a1,a2,w_a1,w_a2,j;
 +    real      twopi2,limit2,fc,re,m1,m2,period2,w_period2;
 +    gmx_bool  bFound,bWater,bWarn;
 +    char      warn_buf[STRLEN];
 +
 +    ip = mtop->ffparams.iparams;
 +
 +    twopi2 = sqr(2*M_PI);
 +
 +    limit2 = sqr(min_steps_note*dt);
 +
 +    w_a1 = w_a2 = -1;
 +    w_period2 = -1.0;
 +    
 +    w_moltype = NULL;
 +    for(molt=0; molt<mtop->nmoltype; molt++)
 +    {
 +        moltype = &mtop->moltype[molt];
 +        atom  = moltype->atoms.atom;
 +        ilist = moltype->ilist;
 +        ilc = &ilist[F_CONSTR];
 +        ils = &ilist[F_SETTLE];
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if (!(ftype == F_BONDS || ftype == F_G96BONDS || ftype == F_HARMONIC))
 +            {
 +                continue;
 +            }
 +            
 +            ilb = &ilist[ftype];
 +            for(i=0; i<ilb->nr; i+=3)
 +            {
 +                fc = ip[ilb->iatoms[i]].harmonic.krA;
 +                re = ip[ilb->iatoms[i]].harmonic.rA;
 +                if (ftype == F_G96BONDS)
 +                {
 +                    /* Convert squared sqaure fc to harmonic fc */
 +                    fc = 2*fc*re;
 +                }
 +                a1 = ilb->iatoms[i+1];
 +                a2 = ilb->iatoms[i+2];
 +                m1 = atom[a1].m;
 +                m2 = atom[a2].m;
 +                if (fc > 0 && m1 > 0 && m2 > 0)
 +                {
 +                    period2 = twopi2*m1*m2/((m1 + m2)*fc);
 +                }
 +                else
 +                {
 +                    period2 = GMX_FLOAT_MAX;
 +                }
 +                if (debug)
 +                {
 +                    fprintf(debug,"fc %g m1 %g m2 %g period %g\n",
 +                            fc,m1,m2,sqrt(period2));
 +                }
 +                if (period2 < limit2)
 +                {
 +                    bFound = FALSE;
 +                    for(j=0; j<ilc->nr; j+=3)
 +                    {
 +                        if ((ilc->iatoms[j+1] == a1 && ilc->iatoms[j+2] == a2) ||
 +                            (ilc->iatoms[j+1] == a2 && ilc->iatoms[j+2] == a1))
 +                            {
 +                                bFound = TRUE;
 +                            }
 +                        }
 +                    for(j=0; j<ils->nr; j+=4)
 +                    {
 +                        if ((a1 == ils->iatoms[j+1] || a1 == ils->iatoms[j+2] || a1 == ils->iatoms[j+3]) &&
 +                            (a2 == ils->iatoms[j+1] || a2 == ils->iatoms[j+2] || a2 == ils->iatoms[j+3]))
 +                        {
 +                            bFound = TRUE;
 +                        }
 +                    }
 +                    if (!bFound &&
 +                        (w_moltype == NULL || period2 < w_period2))
 +                    {
 +                        w_moltype = moltype;
 +                        w_a1      = a1;
 +                        w_a2      = a2;
 +                        w_period2 = period2;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (w_moltype != NULL)
 +    {
 +        bWarn = (w_period2 < sqr(min_steps_warn*dt));
 +        /* A check that would recognize most water models */
 +        bWater = ((*w_moltype->atoms.atomname[0])[0] == 'O' &&
 +                  w_moltype->atoms.nr <= 5);
 +        sprintf(warn_buf,"The bond in molecule-type %s between atoms %d %s and %d %s has an estimated oscillational period of %.1e ps, which is less than %d times the time step of %.1e ps.\n"
 +                "%s",
 +                *w_moltype->name,
 +                w_a1+1,*w_moltype->atoms.atomname[w_a1],
 +                w_a2+1,*w_moltype->atoms.atomname[w_a2],
 +                sqrt(w_period2),bWarn ? min_steps_warn : min_steps_note,dt,
 +                bWater ?
 +                "Maybe you asked for fexible water." :
 +                "Maybe you forgot to change the constraints mdp option.");
 +        if (bWarn)
 +        {
 +            warning(wi,warn_buf);
 +        }
 +        else
 +        {
 +            warning_note(wi,warn_buf);
 +        }
 +    }
 +}
 +
 +static void check_vel(gmx_mtop_t *mtop,rvec v[])
 +{
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom *atom;
 +  int a;
 +
 +  aloop = gmx_mtop_atomloop_all_init(mtop);
 +  while (gmx_mtop_atomloop_all_next(aloop,&a,&atom)) {
 +    if (atom->ptype == eptShell ||
 +      atom->ptype == eptBond  ||
 +      atom->ptype == eptVSite) {
 +      clear_rvec(v[a]);
 +    }
 +  }
 +}
 +
 +static gmx_bool nint_ftype(gmx_mtop_t *mtop,t_molinfo *mi,int ftype)
 +{
 +  int nint,mb;
 +
 +  nint = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    nint += mtop->molblock[mb].nmol*mi[mtop->molblock[mb].type].plist[ftype].nr;
 +  }
 +
 +  return nint;
 +}
 +
 +/* This routine reorders the molecule type array
 + * in the order of use in the molblocks,
 + * unused molecule types are deleted.
 + */
 +static void renumber_moltypes(gmx_mtop_t *sys,
 +                            int *nmolinfo,t_molinfo **molinfo)
 +{
 +  int *order,norder,i;
 +  int mb,mi;
 +  t_molinfo *minew;
 +
 +  snew(order,*nmolinfo);
 +  norder = 0;
 +  for(mb=0; mb<sys->nmolblock; mb++) {
 +    for(i=0; i<norder; i++) {
 +      if (order[i] == sys->molblock[mb].type) {
 +      break;
 +      }
 +    }
 +    if (i == norder) {
 +      /* This type did not occur yet, add it */
 +      order[norder] = sys->molblock[mb].type;
 +      /* Renumber the moltype in the topology */
 +      norder++;
 +    }
 +    sys->molblock[mb].type = i;
 +  }
 +  
 +  /* We still need to reorder the molinfo structs */
 +  snew(minew,norder);
 +  for(mi=0; mi<*nmolinfo; mi++) {
 +    for(i=0; i<norder; i++) {
 +      if (order[i] == mi) {
 +      break;
 +      }
 +    }
 +    if (i == norder) {
 +      done_mi(&(*molinfo)[mi]);
 +    } else {
 +      minew[i] = (*molinfo)[mi];
 +    }
 +  }
 +  sfree(*molinfo);
 +
 +  *nmolinfo = norder;
 +  *molinfo  = minew;
 +}
 +
 +static void molinfo2mtop(int nmi,t_molinfo *mi,gmx_mtop_t *mtop)
 +{
 +  int m;
 +  gmx_moltype_t *molt;
 +
 +  mtop->nmoltype = nmi;
 +  snew(mtop->moltype,nmi);
 +  for(m=0; m<nmi; m++) {
 +    molt = &mtop->moltype[m];
 +    molt->name  = mi[m].name;
 +    molt->atoms = mi[m].atoms;
 +    /* ilists are copied later */
 +    molt->cgs   = mi[m].cgs;
 +    molt->excls = mi[m].excls;
 +  }
 +}
 +
 +static void
 +new_status(const char *topfile,const char *topppfile,const char *confin,
 +           t_gromppopts *opts,t_inputrec *ir,gmx_bool bZero,
 +           gmx_bool bGenVel,gmx_bool bVerbose,t_state *state,
 +           gpp_atomtype_t atype,gmx_mtop_t *sys,
 +           int *nmi,t_molinfo **mi,t_params plist[],
 +           int *comb,double *reppow,real *fudgeQQ,
 +           gmx_bool bMorse,
 +           warninp_t wi)
 +{
 +  t_molinfo   *molinfo=NULL;
 +  int         nmolblock;
 +  gmx_molblock_t *molblock,*molbs;
 +  t_atoms     *confat;
 +  int         mb,i,nrmols,nmismatch;
 +  char        buf[STRLEN];
 +  gmx_bool        bGB=FALSE;
 +  char        warn_buf[STRLEN];
 +
 +  init_mtop(sys);
 +
 +  /* Set gmx_boolean for GB */
 +  if(ir->implicit_solvent)
 +    bGB=TRUE;
 +  
 +  /* TOPOLOGY processing */
 +  sys->name = do_top(bVerbose,topfile,topppfile,opts,bZero,&(sys->symtab),
 +                     plist,comb,reppow,fudgeQQ,
 +                     atype,&nrmols,&molinfo,ir,
 +                     &nmolblock,&molblock,bGB,
 +                     wi);
 +  
 +  sys->nmolblock = 0;
 +  snew(sys->molblock,nmolblock);
 +  
 +  sys->natoms = 0;
 +  for(mb=0; mb<nmolblock; mb++) {
 +    if (sys->nmolblock > 0 &&
 +      molblock[mb].type == sys->molblock[sys->nmolblock-1].type) {
 +      /* Merge consecutive blocks with the same molecule type */
 +      sys->molblock[sys->nmolblock-1].nmol += molblock[mb].nmol;
 +      sys->natoms += molblock[mb].nmol*sys->molblock[sys->nmolblock-1].natoms_mol;
 +    } else if (molblock[mb].nmol > 0) {
 +      /* Add a new molblock to the topology */
 +      molbs = &sys->molblock[sys->nmolblock];
 +      *molbs = molblock[mb];
 +      molbs->natoms_mol = molinfo[molbs->type].atoms.nr;
 +      molbs->nposres_xA = 0;
 +      molbs->nposres_xB = 0;
 +      sys->natoms += molbs->nmol*molbs->natoms_mol;
 +      sys->nmolblock++;
 +    }
 +  }
 +  if (sys->nmolblock == 0) {
 +    gmx_fatal(FARGS,"No molecules were defined in the system");
 +  }
 +
 +  renumber_moltypes(sys,&nrmols,&molinfo);
 +
 +  if (bMorse)
 +    convert_harmonics(nrmols,molinfo,atype);
 +
 +  if (ir->eDisre == edrNone) {
 +    i = rm_interactions(F_DISRES,nrmols,molinfo);
 +    if (i > 0) {
 +      set_warning_line(wi,"unknown",-1);
 +      sprintf(warn_buf,"disre = no, removed %d distance restraints",i);
 +      warning_note(wi,warn_buf);
 +    }
 +  }
 +  if (opts->bOrire == FALSE) {
 +    i = rm_interactions(F_ORIRES,nrmols,molinfo);
 +    if (i > 0) {
 +      set_warning_line(wi,"unknown",-1);
 +      sprintf(warn_buf,"orire = no, removed %d orientation restraints",i);
 +      warning_note(wi,warn_buf);
 +    }
 +  }
 +  
 +  /* Copy structures from msys to sys */
 +  molinfo2mtop(nrmols,molinfo,sys);
 +
 +  gmx_mtop_finalize(sys);
 + 
 +  /* COORDINATE file processing */
 +  if (bVerbose) 
 +    fprintf(stderr,"processing coordinates...\n");
 +
 +  get_stx_coordnum(confin,&state->natoms);
 +  if (state->natoms != sys->natoms)
 +    gmx_fatal(FARGS,"number of coordinates in coordinate file (%s, %d)\n"
 +              "             does not match topology (%s, %d)",
 +            confin,state->natoms,topfile,sys->natoms);
 +  else {
 +    /* make space for coordinates and velocities */
 +    char title[STRLEN];
 +    snew(confat,1);
 +    init_t_atoms(confat,state->natoms,FALSE);
 +    init_state(state,state->natoms,0,0,0,0);
 +    read_stx_conf(confin,title,confat,state->x,state->v,NULL,state->box);
 +    /* This call fixes the box shape for runs with pressure scaling */
 +    set_box_rel(ir,state);
 +
 +    nmismatch = check_atom_names(topfile, confin, sys, confat);
 +    free_t_atoms(confat,TRUE);
 +    sfree(confat);
 +    
 +    if (nmismatch) {
 +      sprintf(buf,"%d non-matching atom name%s\n"
 +            "atom names from %s will be used\n"
 +            "atom names from %s will be ignored\n",
 +            nmismatch,(nmismatch == 1) ? "" : "s",topfile,confin);
 +      warning(wi,buf);
 +    }    
 +    if (bVerbose) 
 +      fprintf(stderr,"double-checking input for internal consistency...\n");
 +    double_check(ir,state->box,nint_ftype(sys,molinfo,F_CONSTR),wi);
 +  }
 +
 +  if (bGenVel) {
 +    real *mass;
 +    gmx_mtop_atomloop_all_t aloop;
 +    t_atom *atom;
 +
 +    snew(mass,state->natoms);
 +    aloop = gmx_mtop_atomloop_all_init(sys);
 +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +      mass[i] = atom->m;
 +    }
 +
 +    if (opts->seed == -1) {
 +      opts->seed = make_seed();
 +      fprintf(stderr,"Setting gen_seed to %d\n",opts->seed);
 +    }
 +    maxwell_speed(opts->tempi,opts->seed,sys,state->v);
 +
 +    stop_cm(stdout,state->natoms,mass,state->x,state->v);
 +    sfree(mass);
 +  }
 +
 +  *nmi = nrmols;
 +  *mi  = molinfo;
 +}
 +
 +static void copy_state(const char *slog,t_trxframe *fr,
 +                       gmx_bool bReadVel,t_state *state,
 +                       double *use_time)
 +{
 +    int i;
 +
 +    if (fr->not_ok & FRAME_NOT_OK)
 +    {
 +        gmx_fatal(FARGS,"Can not start from an incomplete frame");
 +    }
 +    if (!fr->bX)
 +    {
 +        gmx_fatal(FARGS,"Did not find a frame with coordinates in file %s",
 +                  slog);
 +    }
 +
 +    for(i=0; i<state->natoms; i++)
 +    {
 +        copy_rvec(fr->x[i],state->x[i]);
 +    }
 +    if (bReadVel)
 +    {
 +        if (!fr->bV)
 +        {
 +            gmx_incons("Trajecory frame unexpectedly does not contain velocities");
 +        }
 +        for(i=0; i<state->natoms; i++)
 +        {
 +            copy_rvec(fr->v[i],state->v[i]);
 +        }
 +    }
 +    if (fr->bBox)
 +    {
 +        copy_mat(fr->box,state->box);
 +    }
 +
 +    *use_time = fr->time;
 +}
 +
 +static void cont_status(const char *slog,const char *ener,
 +                      gmx_bool bNeedVel,gmx_bool bGenVel, real fr_time,
 +                      t_inputrec *ir,t_state *state,
 +                      gmx_mtop_t *sys,
 +                        const output_env_t oenv)
 +     /* If fr_time == -1 read the last frame available which is complete */
 +{
 +    gmx_bool bReadVel;
 +    t_trxframe  fr;
 +    t_trxstatus *fp;
 +    int i;
 +    double use_time;
 +
 +    bReadVel = (bNeedVel && !bGenVel);
 +
 +    fprintf(stderr,
 +            "Reading Coordinates%s and Box size from old trajectory\n",
 +            bReadVel ? ", Velocities" : "");
 +    if (fr_time == -1)
 +    {
 +        fprintf(stderr,"Will read whole trajectory\n");
 +    }
 +    else
 +    {
 +        fprintf(stderr,"Will read till time %g\n",fr_time);
 +    }
 +    if (!bReadVel)
 +    {
 +        if (bGenVel)
 +        {
 +            fprintf(stderr,"Velocities generated: "
 +                    "ignoring velocities in input trajectory\n");
 +        }
 +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
 +    }
 +    else
 +    {
 +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X | TRX_NEED_V);
 +        
 +        if (!fr.bV)
 +        {
 +            fprintf(stderr,
 +                    "\n"
 +                    "WARNING: Did not find a frame with velocities in file %s,\n"
 +                    "         all velocities will be set to zero!\n\n",slog);
 +            for(i=0; i<sys->natoms; i++)
 +            {
 +                clear_rvec(state->v[i]);
 +            }
 +            close_trj(fp);
 +            /* Search for a frame without velocities */
 +            bReadVel = FALSE;
 +            read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
 +        }
 +    }
 +
 +    state->natoms = fr.natoms;
 +
 +    if (sys->natoms != state->natoms)
 +    {
 +        gmx_fatal(FARGS,"Number of atoms in Topology "
 +                  "is not the same as in Trajectory");
 +    }
 +    copy_state(slog,&fr,bReadVel,state,&use_time);
 +
 +    /* Find the appropriate frame */
 +    while ((fr_time == -1 || fr.time < fr_time) &&
 +           read_next_frame(oenv,fp,&fr))
 +    {
 +        copy_state(slog,&fr,bReadVel,state,&use_time);
 +    }
 +  
 +    close_trj(fp);
 +
 +    /* Set the relative box lengths for preserving the box shape.
 +     * Note that this call can lead to differences in the last bit
 +     * with respect to using tpbconv to create a [TT].tpx[tt] file.
 +     */
 +    set_box_rel(ir,state);
 +
 +    fprintf(stderr,"Using frame at t = %g ps\n",use_time);
 +    fprintf(stderr,"Starting time for run is %g ps\n",ir->init_t); 
 +  
 +    if ((ir->epc != epcNO  || ir->etc ==etcNOSEHOOVER) && ener)
 +    {
 +        get_enx_state(ener,use_time,&sys->groups,ir,state);
 +        preserve_box_shape(ir,state->box_rel,state->boxv);
 +    }
 +}
 +
 +static void read_posres(gmx_mtop_t *mtop,t_molinfo *molinfo,gmx_bool bTopB,
 +                        char *fn,
 +                        int rc_scaling, int ePBC, 
 +                        rvec com,
 +                        warninp_t wi)
 +{
 +  gmx_bool   bFirst = TRUE, *hadAtom;
 +  rvec   *x,*v,*xp;
 +  dvec   sum;
 +  double totmass;
 +  t_atoms dumat;
 +  matrix box,invbox;
 +  int    natoms,npbcdim=0;
 +  char   warn_buf[STRLEN],title[STRLEN];
 +  int    a,i,ai,j,k,mb,nat_molb;
 +  gmx_molblock_t *molb;
 +  t_params *pr,*prfb;
 +  t_atom *atom;
 +
 +  get_stx_coordnum(fn,&natoms);
 +  if (natoms != mtop->natoms) {
 +    sprintf(warn_buf,"The number of atoms in %s (%d) does not match the number of atoms in the topology (%d). Will assume that the first %d atoms in the topology and %s match.",fn,natoms,mtop->natoms,min(mtop->natoms,natoms),fn);
 +    warning(wi,warn_buf);
 +  }
 +  snew(x,natoms);
 +  snew(v,natoms);
 +  init_t_atoms(&dumat,natoms,FALSE);
 +  read_stx_conf(fn,title,&dumat,x,v,NULL,box);
 +  
 +  npbcdim = ePBC2npbcdim(ePBC);
 +  clear_rvec(com);
 +  if (rc_scaling != erscNO) {
 +    copy_mat(box,invbox);
 +    for(j=npbcdim; j<DIM; j++) {
 +      clear_rvec(invbox[j]);
 +      invbox[j][j] = 1;
 +    }
 +    m_inv_ur0(invbox,invbox);
 +  }
 +
 +  /* Copy the reference coordinates to mtop */
 +  clear_dvec(sum);
 +  totmass = 0;
 +  a = 0;
 +  snew(hadAtom,natoms);
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
 +    pr = &(molinfo[molb->type].plist[F_POSRES]);
 +    prfb = &(molinfo[molb->type].plist[F_FBPOSRES]);
 +    if (pr->nr > 0 || prfb->nr > 0) {
 +      atom = mtop->moltype[molb->type].atoms.atom;
 +      for(i=0; (i<pr->nr); i++) {
 +      ai=pr->param[i].AI;
 +      if (ai >= natoms) {
 +        gmx_fatal(FARGS,"Position restraint atom index (%d) in moltype '%s' is larger than number of atoms in %s (%d).\n",
 +                  ai+1,*molinfo[molb->type].name,fn,natoms);
 +      }
 +    hadAtom[ai]=TRUE;
 +      if (rc_scaling == erscCOM) {
 +        /* Determine the center of mass of the posres reference coordinates */
 +        for(j=0; j<npbcdim; j++) {
 +          sum[j] += atom[ai].m*x[a+ai][j];
 +        }
 +        totmass  += atom[ai].m;
 +      }
 +      }
 +      /* Same for flat-bottomed posres, but do not count an atom twice for COM */
 +      for(i=0; (i<prfb->nr); i++) {
 +          ai=prfb->param[i].AI;
 +          if (ai >= natoms) {
 +              gmx_fatal(FARGS,"Position restraint atom index (%d) in moltype '%s' is larger than number of atoms in %s (%d).\n",
 +                        ai+1,*molinfo[molb->type].name,fn,natoms);
 +          }
 +          if (rc_scaling == erscCOM && hadAtom[ai] == FALSE) {
 +              /* Determine the center of mass of the posres reference coordinates */
 +              for(j=0; j<npbcdim; j++) {
 +                  sum[j] += atom[ai].m*x[a+ai][j];
 +              }
 +              totmass  += atom[ai].m;
 +          }
 +      }
 +      if (!bTopB) {
 +      molb->nposres_xA = nat_molb;
 +      snew(molb->posres_xA,molb->nposres_xA);
 +      for(i=0; i<nat_molb; i++) {
 +        copy_rvec(x[a+i],molb->posres_xA[i]);
 +      }
 +      } else {
 +      molb->nposres_xB = nat_molb;
 +      snew(molb->posres_xB,molb->nposres_xB);
 +      for(i=0; i<nat_molb; i++) {
 +        copy_rvec(x[a+i],molb->posres_xB[i]);
 +      }
 +      }
 +    }
 +    a += nat_molb;
 +  }
 +  if (rc_scaling == erscCOM) {
 +    if (totmass == 0)
 +      gmx_fatal(FARGS,"The total mass of the position restraint atoms is 0");
 +    for(j=0; j<npbcdim; j++)
 +      com[j] = sum[j]/totmass;
 +    fprintf(stderr,"The center of mass of the position restraint coord's is %6.3f %6.3f %6.3f\n",com[XX],com[YY],com[ZZ]);
 +  }
 +
 +  if (rc_scaling != erscNO) {
 +    for(mb=0; mb<mtop->nmolblock; mb++) {
 +      molb = &mtop->molblock[mb];
 +      nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
 +      if (molb->nposres_xA > 0 || molb->nposres_xB > 0) {
 +      xp = (!bTopB ? molb->posres_xA : molb->posres_xB);
 +      for(i=0; i<nat_molb; i++) {
 +        for(j=0; j<npbcdim; j++) {
 +          if (rc_scaling == erscALL) {
 +            /* Convert from Cartesian to crystal coordinates */
 +            xp[i][j] *= invbox[j][j];
 +            for(k=j+1; k<npbcdim; k++) {
 +              xp[i][j] += invbox[k][j]*xp[i][k];
 +            }
 +          } else if (rc_scaling == erscCOM) {
 +            /* Subtract the center of mass */
 +            xp[i][j] -= com[j];
 +          }
 +        }
 +      }
 +      }
 +    }
 +
 +    if (rc_scaling == erscCOM) {
 +      /* Convert the COM from Cartesian to crystal coordinates */
 +      for(j=0; j<npbcdim; j++) {
 +      com[j] *= invbox[j][j];
 +      for(k=j+1; k<npbcdim; k++) {
 +        com[j] += invbox[k][j]*com[k];
 +      }
 +      }
 +    }
 +  }
 +  
 +  free_t_atoms(&dumat,TRUE);
 +  sfree(x);
 +  sfree(v);
 +  sfree(hadAtom);
 +}
 +
 +static void gen_posres(gmx_mtop_t *mtop,t_molinfo *mi,
 +                       char *fnA, char *fnB,
 +                       int rc_scaling, int ePBC,
 +                       rvec com, rvec comB,
 +                       warninp_t wi)
 +{
 +  int i,j;
 +
 +  read_posres  (mtop,mi,FALSE,fnA,rc_scaling,ePBC,com,wi);
 +  if (strcmp(fnA,fnB) != 0) {
 +      read_posres(mtop,mi,TRUE ,fnB,rc_scaling,ePBC,comB,wi);
 +  }
 +}
 +
 +static void set_wall_atomtype(gpp_atomtype_t at,t_gromppopts *opts,
 +                              t_inputrec *ir,warninp_t wi)
 +{
 +  int i;
 +  char warn_buf[STRLEN];
 +
 +  if (ir->nwall > 0)
 +  {
 +      fprintf(stderr,"Searching the wall atom type(s)\n");
 +  }
 +  for(i=0; i<ir->nwall; i++)
 +  {
 +      ir->wall_atomtype[i] = get_atomtype_type(opts->wall_atomtype[i],at);
 +      if (ir->wall_atomtype[i] == NOTSET)
 +      {
 +          sprintf(warn_buf,"Specified wall atom type %s is not defined",opts->wall_atomtype[i]);
 +          warning_error(wi,warn_buf);
 +      }
 +  }
 +}
 +
 +static int nrdf_internal(t_atoms *atoms)
 +{
 +  int i,nmass,nrdf;
 +
 +  nmass = 0;
 +  for(i=0; i<atoms->nr; i++) {
 +    /* Vsite ptype might not be set here yet, so also check the mass */
 +    if ((atoms->atom[i].ptype == eptAtom ||
 +       atoms->atom[i].ptype == eptNucleus)
 +      && atoms->atom[i].m > 0) {
 +      nmass++;
 +    }
 +  }
 +  switch (nmass) {
 +  case 0:  nrdf = 0; break;
 +  case 1:  nrdf = 0; break;
 +  case 2:  nrdf = 1; break;
 +  default: nrdf = nmass*3 - 6; break;
 +  }
 +  
 +  return nrdf;
 +}
 +
 +void
 +spline1d( double        dx,
 +               double *      y,
 +               int           n,
 +               double *      u,
 +               double *      y2 )
 +{
 +    int i;
 +    double p,q;
 +      
 +    y2[0] = 0.0;
 +    u[0]  = 0.0;
 +      
 +    for(i=1;i<n-1;i++)
 +    {
 +              p = 0.5*y2[i-1]+2.0;
 +        y2[i] = -0.5/p;
 +        q = (y[i+1]-2.0*y[i]+y[i-1])/dx;
 +              u[i] = (3.0*q/dx-0.5*u[i-1])/p;
 +    }
 +      
 +    y2[n-1] = 0.0;
 +      
 +    for(i=n-2;i>=0;i--)
 +    {
 +        y2[i] = y2[i]*y2[i+1]+u[i];
 +    }
 +}
 +
 +
 +void
 +interpolate1d( double     xmin,
 +                        double     dx,
 +                        double *   ya,
 +                        double *   y2a,
 +                        double     x,
 +                        double *   y,
 +                        double *   y1)
 +{
 +    int ix;
 +    double a,b;
 +      
 +    ix = (x-xmin)/dx;
 +      
 +    a = (xmin+(ix+1)*dx-x)/dx;
 +    b = (x-xmin-ix*dx)/dx;
 +      
 +    *y  = a*ya[ix]+b*ya[ix+1]+((a*a*a-a)*y2a[ix]+(b*b*b-b)*y2a[ix+1])*(dx*dx)/6.0;
 +    *y1 = (ya[ix+1]-ya[ix])/dx-(3.0*a*a-1.0)/6.0*dx*y2a[ix]+(3.0*b*b-1.0)/6.0*dx*y2a[ix+1];
 +}
 +
 +
 +void
 +setup_cmap (int              grid_spacing,
 +                      int              nc,
 +                      real *           grid ,
 +                      gmx_cmap_t *     cmap_grid)
 +{
 +      double *tmp_u,*tmp_u2,*tmp_yy,*tmp_y1,*tmp_t2,*tmp_grid;
 +      
 +    int    i,j,k,ii,jj,kk,idx;
 +      int    offset;
 +    double dx,xmin,v,v1,v2,v12;
 +    double phi,psi;
 +      
 +      snew(tmp_u,2*grid_spacing);
 +      snew(tmp_u2,2*grid_spacing);
 +      snew(tmp_yy,2*grid_spacing);
 +      snew(tmp_y1,2*grid_spacing);
 +      snew(tmp_t2,2*grid_spacing*2*grid_spacing);
 +      snew(tmp_grid,2*grid_spacing*2*grid_spacing);
 +      
 +    dx = 360.0/grid_spacing;
 +    xmin = -180.0-dx*grid_spacing/2;
 +      
 +      for(kk=0;kk<nc;kk++)
 +      {
 +              /* Compute an offset depending on which cmap we are using 
 +               * Offset will be the map number multiplied with the 
 +                 * grid_spacing * grid_spacing * 2
 +               */
 +              offset = kk * grid_spacing * grid_spacing * 2;
 +              
 +              for(i=0;i<2*grid_spacing;i++)
 +              {
 +                      ii=(i+grid_spacing-grid_spacing/2)%grid_spacing;
 +                      
 +                      for(j=0;j<2*grid_spacing;j++)
 +                      {
 +                              jj=(j+grid_spacing-grid_spacing/2)%grid_spacing;
 +                              tmp_grid[i*grid_spacing*2+j] = grid[offset+ii*grid_spacing+jj];
 +                      }
 +              }
 +              
 +              for(i=0;i<2*grid_spacing;i++)
 +              {
 +                      spline1d(dx,&(tmp_grid[2*grid_spacing*i]),2*grid_spacing,tmp_u,&(tmp_t2[2*grid_spacing*i]));
 +              }
 +              
 +              for(i=grid_spacing/2;i<grid_spacing+grid_spacing/2;i++)
 +              {
 +                      ii = i-grid_spacing/2;
 +                      phi = ii*dx-180.0;
 +                      
 +                      for(j=grid_spacing/2;j<grid_spacing+grid_spacing/2;j++)
 +                      {
 +                              jj = j-grid_spacing/2;
 +                              psi = jj*dx-180.0;
 +                              
 +                              for(k=0;k<2*grid_spacing;k++)
 +                              {
 +                                      interpolate1d(xmin,dx,&(tmp_grid[2*grid_spacing*k]),
 +                                                                &(tmp_t2[2*grid_spacing*k]),psi,&tmp_yy[k],&tmp_y1[k]);
 +                              }
 +                              
 +                              spline1d(dx,tmp_yy,2*grid_spacing,tmp_u,tmp_u2);
 +                              interpolate1d(xmin,dx,tmp_yy,tmp_u2,phi,&v,&v1);
 +                              spline1d(dx,tmp_y1,2*grid_spacing,tmp_u,tmp_u2);
 +                              interpolate1d(xmin,dx,tmp_y1,tmp_u2,phi,&v2,&v12);
 +                              
 +                              idx = ii*grid_spacing+jj;
 +                              cmap_grid->cmapdata[kk].cmap[idx*4] = grid[offset+ii*grid_spacing+jj];
 +                              cmap_grid->cmapdata[kk].cmap[idx*4+1] = v1;
 +                              cmap_grid->cmapdata[kk].cmap[idx*4+2] = v2;
 +                              cmap_grid->cmapdata[kk].cmap[idx*4+3] = v12;
 +                      }
 +              }
 +      }
 +}                             
 +                              
 +void init_cmap_grid(gmx_cmap_t *cmap_grid, int ngrid, int grid_spacing)
 +{
 +      int i,k,nelem;
 +      
 +      cmap_grid->ngrid        = ngrid;
 +      cmap_grid->grid_spacing = grid_spacing;
 +      nelem                   = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
 +      
 +      snew(cmap_grid->cmapdata,ngrid);
 +      
 +      for(i=0;i<cmap_grid->ngrid;i++)
 +      {
 +              snew(cmap_grid->cmapdata[i].cmap,4*nelem);
 +      }
 +}
 +
 +
 +static int count_constraints(gmx_mtop_t *mtop,t_molinfo *mi,warninp_t wi)
 +{
 +  int count,count_mol,i,mb;
 +  gmx_molblock_t *molb;
 +  t_params *plist;
 +  char buf[STRLEN];
 +
 +  count = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    count_mol = 0;
 +    molb  = &mtop->molblock[mb];
 +    plist = mi[molb->type].plist;
 +      
 +    for(i=0; i<F_NRE; i++) {
 +      if (i == F_SETTLE)
 +      count_mol += 3*plist[i].nr;
 +      else if (interaction_function[i].flags & IF_CONSTRAINT)
 +      count_mol += plist[i].nr;
 +    }
 +      
 +    if (count_mol > nrdf_internal(&mi[molb->type].atoms)) {
 +      sprintf(buf,
 +            "Molecule type '%s' has %d constraints.\n"
 +            "For stability and efficiency there should not be more constraints than internal number of degrees of freedom: %d.\n",
 +            *mi[molb->type].name,count_mol,
 +            nrdf_internal(&mi[molb->type].atoms));
 +      warning(wi,buf);
 +    }
 +    count += molb->nmol*count_mol;
 +  }
 +
 +  return count;
 +}
 +
 +static void check_gbsa_params_charged(gmx_mtop_t *sys, gpp_atomtype_t atype)
 +{
 +    int i,nmiss,natoms,mt;
 +    real q;
 +    const t_atoms *atoms;
 +  
 +    nmiss = 0;
 +    for(mt=0;mt<sys->nmoltype;mt++)
 +    {
 +        atoms  = &sys->moltype[mt].atoms;
 +        natoms = atoms->nr;
 +
 +        for(i=0;i<natoms;i++)
 +        {
 +            q = atoms->atom[i].q;
 +            if ((get_atomtype_radius(atoms->atom[i].type,atype)    == 0  ||
 +                 get_atomtype_vol(atoms->atom[i].type,atype)       == 0  ||
 +                 get_atomtype_surftens(atoms->atom[i].type,atype)  == 0  ||
 +                 get_atomtype_gb_radius(atoms->atom[i].type,atype) == 0  ||
 +                 get_atomtype_S_hct(atoms->atom[i].type,atype)     == 0) &&
 +                q != 0)
 +            {
 +                fprintf(stderr,"\nGB parameter(s) zero for atom type '%s' while charge is %g\n",
 +                        get_atomtype_name(atoms->atom[i].type,atype),q);
 +                nmiss++;
 +            }
 +        }
 +    }
 +
 +    if (nmiss > 0)
 +    {
 +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield has parameters with value zero for %d atomtypes that occur as charged atoms.",nmiss);
 +    }
 +}
 +
 +
 +static void check_gbsa_params(t_inputrec *ir,gpp_atomtype_t atype)
 +{
 +    int  nmiss,i;
 +
 +    /* If we are doing GBSA, check that we got the parameters we need
 +     * This checking is to see if there are GBSA paratmeters for all
 +     * atoms in the force field. To go around this for testing purposes
 +     * comment out the nerror++ counter temporarily
 +     */
 +    nmiss = 0;
 +    for(i=0;i<get_atomtype_ntypes(atype);i++)
 +    {
 +        if (get_atomtype_radius(i,atype)    < 0 ||
 +            get_atomtype_vol(i,atype)       < 0 ||
 +            get_atomtype_surftens(i,atype)  < 0 ||
 +            get_atomtype_gb_radius(i,atype) < 0 ||
 +            get_atomtype_S_hct(i,atype)     < 0)
 +        {
 +            fprintf(stderr,"\nGB parameter(s) missing or negative for atom type '%s'\n",
 +                    get_atomtype_name(i,atype));
 +            nmiss++;
 +        }
 +    }
 +    
 +    if (nmiss > 0)
 +    {
 +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield is missing parameters for %d atomtypes or they might be negative.",nmiss);
 +    }
 +  
 +}
 +
 +static void check_settle(gmx_mtop_t   *sys)
 +{
 +    int i,j,cgj1,nra;
 +    
 +    nra = interaction_function[F_SETTLE].nratoms;
 +    for(i=0; (i<sys->nmoltype); i++) 
 +    {
 +        for(j=0; (j<sys->moltype[i].ilist[F_SETTLE].nr); j+=nra+1)
 +        {
 +            cgj1 = sys->moltype[i].cgs.index[j+1];
 +            if (j+2 >= cgj1)
 +                gmx_fatal(FARGS,"For SETTLE you need to have all atoms involved in one charge group. Please fix your topology.");
 +        }
 +    }
 +}
 +
 +int main (int argc, char *argv[])
 +{
 +  static const char *desc[] = {
 +    "The gromacs preprocessor",
 +    "reads a molecular topology file, checks the validity of the",
 +    "file, expands the topology from a molecular description to an atomic",
 +    "description. The topology file contains information about",
 +    "molecule types and the number of molecules, the preprocessor",
 +    "copies each molecule as needed. ",
 +    "There is no limitation on the number of molecule types. ",
 +    "Bonds and bond-angles can be converted into constraints, separately",
 +    "for hydrogens and heavy atoms.",
 +    "Then a coordinate file is read and velocities can be generated",
 +    "from a Maxwellian distribution if requested.",
 +    "[TT]grompp[tt] also reads parameters for the [TT]mdrun[tt] ",
 +    "(eg. number of MD steps, time step, cut-off), and others such as",
 +    "NEMD parameters, which are corrected so that the net acceleration",
 +    "is zero.",
 +    "Eventually a binary file is produced that can serve as the sole input",
 +    "file for the MD program.[PAR]",
 +    
 +    "[TT]grompp[tt] uses the atom names from the topology file. The atom names",
 +    "in the coordinate file (option [TT]-c[tt]) are only read to generate",
 +    "warnings when they do not match the atom names in the topology.",
 +    "Note that the atom names are irrelevant for the simulation as",
 +    "only the atom types are used for generating interaction parameters.[PAR]",
 +
 +    "[TT]grompp[tt] uses a built-in preprocessor to resolve includes, macros, ",
 +    "etc. The preprocessor supports the following keywords:[PAR]",
 +    "#ifdef VARIABLE[BR]",
 +    "#ifndef VARIABLE[BR]",
 +    "#else[BR]",
 +    "#endif[BR]",
 +    "#define VARIABLE[BR]",
 +    "#undef VARIABLE[BR]"
 +    "#include \"filename\"[BR]",
 +    "#include <filename>[PAR]",
 +    "The functioning of these statements in your topology may be modulated by",
 +    "using the following two flags in your [TT].mdp[tt] file:[PAR]",
 +    "[TT]define = -DVARIABLE1 -DVARIABLE2[BR]",
 +    "include = -I/home/john/doe[tt][BR]",
 +    "For further information a C-programming textbook may help you out.",
 +    "Specifying the [TT]-pp[tt] flag will get the pre-processed",
 +    "topology file written out so that you can verify its contents.[PAR]",
 +   
 +    /* cpp has been unnecessary for some time, hasn't it?
 +        "If your system does not have a C-preprocessor, you can still",
 +        "use [TT]grompp[tt], but you do not have access to the features ",
 +        "from the cpp. Command line options to the C-preprocessor can be given",
 +        "in the [TT].mdp[tt] file. See your local manual (man cpp).[PAR]",
 +    */
 +    
 +    "When using position restraints a file with restraint coordinates",
 +    "can be supplied with [TT]-r[tt], otherwise restraining will be done",
 +    "with respect to the conformation from the [TT]-c[tt] option.",
 +    "For free energy calculation the the coordinates for the B topology",
 +    "can be supplied with [TT]-rb[tt], otherwise they will be equal to",
 +    "those of the A topology.[PAR]",
 +    
 +    "Starting coordinates can be read from trajectory with [TT]-t[tt].",
 +    "The last frame with coordinates and velocities will be read,",
 +    "unless the [TT]-time[tt] option is used. Only if this information",
 +    "is absent will the coordinates in the [TT]-c[tt] file be used.",
 +    "Note that these velocities will not be used when [TT]gen_vel = yes[tt]",
 +    "in your [TT].mdp[tt] file. An energy file can be supplied with",
 +    "[TT]-e[tt] to read Nose-Hoover and/or Parrinello-Rahman coupling",
 +    "variables.[PAR]",
 +
 +    "[TT]grompp[tt] can be used to restart simulations (preserving",
 +    "continuity) by supplying just a checkpoint file with [TT]-t[tt].",
 +    "However, for simply changing the number of run steps to extend",
 +    "a run, using [TT]tpbconv[tt] is more convenient than [TT]grompp[tt].",
 +    "You then supply the old checkpoint file directly to [TT]mdrun[tt]",
 +    "with [TT]-cpi[tt]. If you wish to change the ensemble or things",
 +    "like output frequency, then supplying the checkpoint file to",
 +    "[TT]grompp[tt] with [TT]-t[tt] along with a new [TT].mdp[tt] file",
 +    "with [TT]-f[tt] is the recommended procedure.[PAR]",
 +
 +    "By default, all bonded interactions which have constant energy due to",
 +    "virtual site constructions will be removed. If this constant energy is",
 +    "not zero, this will result in a shift in the total energy. All bonded",
 +    "interactions can be kept by turning off [TT]-rmvsbds[tt]. Additionally,",
 +    "all constraints for distances which will be constant anyway because",
 +    "of virtual site constructions will be removed. If any constraints remain",
 +    "which involve virtual sites, a fatal error will result.[PAR]"
 +    
 +    "To verify your run input file, please take note of all warnings",
 +    "on the screen, and correct where necessary. Do also look at the contents",
 +    "of the [TT]mdout.mdp[tt] file; this contains comment lines, as well as",
 +    "the input that [TT]grompp[tt] has read. If in doubt, you can start [TT]grompp[tt]",
 +    "with the [TT]-debug[tt] option which will give you more information",
 +    "in a file called [TT]grompp.log[tt] (along with real debug info). You",
 +    "can see the contents of the run input file with the [TT]gmxdump[tt]",
 +    "program. [TT]gmxcheck[tt] can be used to compare the contents of two",
 +    "run input files.[PAR]"
 +
 +    "The [TT]-maxwarn[tt] option can be used to override warnings printed",
 +    "by [TT]grompp[tt] that otherwise halt output. In some cases, warnings are",
 +    "harmless, but usually they are not. The user is advised to carefully",
 +    "interpret the output messages before attempting to bypass them with",
 +    "this option."
 +  };
 +  t_gromppopts *opts;
 +  gmx_mtop_t   *sys;
 +  int          nmi;
 +  t_molinfo    *mi;
 +  gpp_atomtype_t atype;
 +  t_inputrec   *ir;
 +  int          natoms,nvsite,comb,mt;
 +  t_params     *plist;
 +  t_state      state;
 +  matrix       box;
 +  real         max_spacing,fudgeQQ;
 +  double       reppow;
 +  char         fn[STRLEN],fnB[STRLEN];
 +  const char   *mdparin;
 +  int          ntype;
 +  gmx_bool         bNeedVel,bGenVel;
 +  gmx_bool         have_atomnumber;
 +  int            n12,n13,n14;
 +  t_params     *gb_plist = NULL;
 +  gmx_genborn_t *born = NULL;
 +  output_env_t oenv;
 +  gmx_bool         bVerbose = FALSE;
 +  warninp_t    wi;
 +  char         warn_buf[STRLEN];
 +
 +  t_filenm fnm[] = {
 +    { efMDP, NULL,  NULL,        ffREAD  },
 +    { efMDP, "-po", "mdout",     ffWRITE },
 +    { efSTX, "-c",  NULL,        ffREAD  },
 +    { efSTX, "-r",  NULL,        ffOPTRD },
 +    { efSTX, "-rb", NULL,        ffOPTRD },
 +    { efNDX, NULL,  NULL,        ffOPTRD },
 +    { efTOP, NULL,  NULL,        ffREAD  },
 +    { efTOP, "-pp", "processed", ffOPTWR },
 +    { efTPX, "-o",  NULL,        ffWRITE },
 +    { efTRN, "-t",  NULL,        ffOPTRD },
 +    { efEDR, "-e",  NULL,        ffOPTRD },
 +    { efTRN, "-ref","rotref",    ffOPTRW }
 +  };
 +#define NFILE asize(fnm)
 +
 +  /* Command line options */
 +  static gmx_bool bRenum=TRUE;
 +  static gmx_bool bRmVSBds=TRUE,bZero=FALSE;
 +  static int  i,maxwarn=0;
 +  static real fr_time=-1;
 +  t_pargs pa[] = {
 +    { "-v",       FALSE, etBOOL,{&bVerbose},  
 +      "Be loud and noisy" },
 +    { "-time",    FALSE, etREAL, {&fr_time},
 +      "Take frame at or first after this time." },
 +    { "-rmvsbds",FALSE, etBOOL, {&bRmVSBds},
 +      "Remove constant bonded interactions with virtual sites" },
 +    { "-maxwarn", FALSE, etINT,  {&maxwarn},
 +      "Number of allowed warnings during input processing. Not for normal use and may generate unstable systems" },
 +    { "-zero",    FALSE, etBOOL, {&bZero},
 +      "Set parameters for bonded interactions without defaults to zero instead of generating an error" },
 +    { "-renum",   FALSE, etBOOL, {&bRenum},
 +      "Renumber atomtypes and minimize number of atomtypes" }
 +  };
 +  
 +  CopyRight(stderr,argv[0]);
 +  
 +  /* Initiate some variables */
 +  snew(ir,1);
 +  snew(opts,1);
 +  init_ir(ir,opts);
 +  
 +  /* Parse the command line */
 +  parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,
 +                    asize(desc),desc,0,NULL,&oenv);
 +  
 +  wi = init_warning(TRUE,maxwarn);
 +  
 +  /* PARAMETER file processing */
 +  mdparin = opt2fn("-f",NFILE,fnm);
 +  set_warning_line(wi,mdparin,-1);    
 +  get_ir(mdparin,opt2fn("-po",NFILE,fnm),ir,opts,wi);
 +  
 +  if (bVerbose) 
 +    fprintf(stderr,"checking input for internal consistency...\n");
 +  check_ir(mdparin,ir,opts,wi);
 +
 +  if (ir->ld_seed == -1) {
 +    ir->ld_seed = make_seed();
 +    fprintf(stderr,"Setting the LD random seed to %d\n",ir->ld_seed);
 +  }
 +
 +  if (ir->expandedvals->lmc_seed == -1) {
 +    ir->expandedvals->lmc_seed = make_seed();
 +    fprintf(stderr,"Setting the lambda MC random seed to %d\n",ir->expandedvals->lmc_seed);
 +  }
 +
 +  bNeedVel = EI_STATE_VELOCITY(ir->eI);
 +  bGenVel  = (bNeedVel && opts->bGenVel);
 +
 +  snew(plist,F_NRE);
 +  init_plist(plist);
 +  snew(sys,1);
 +  atype = init_atomtype();
 +  if (debug)
 +    pr_symtab(debug,0,"Just opened",&sys->symtab);
 +    
 +  strcpy(fn,ftp2fn(efTOP,NFILE,fnm));
 +  if (!gmx_fexist(fn)) 
 +    gmx_fatal(FARGS,"%s does not exist",fn);
 +  new_status(fn,opt2fn_null("-pp",NFILE,fnm),opt2fn("-c",NFILE,fnm),
 +           opts,ir,bZero,bGenVel,bVerbose,&state,
 +           atype,sys,&nmi,&mi,plist,&comb,&reppow,&fudgeQQ,
 +           opts->bMorse,
 +           wi);
 +  
 +  if (debug)
 +    pr_symtab(debug,0,"After new_status",&sys->symtab);
 +  
 +  if (count_constraints(sys,mi,wi) && (ir->eConstrAlg == econtSHAKE)) {
 +    if (ir->eI == eiCG || ir->eI == eiLBFGS) {
 +        sprintf(warn_buf,"Can not do %s with %s, use %s",
 +                EI(ir->eI),econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        warning_error(wi,warn_buf);
 +    }
 +    if (ir->bPeriodicMols) {
 +        sprintf(warn_buf,"Can not do periodic molecules with %s, use %s",
 +                econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        warning_error(wi,warn_buf);
 +    }
 +  }
 +
++  if ( EI_SD (ir->eI) &&  ir->etc != etcNO ) {
++      warning_note(wi,"Temperature coupling is ignored with SD integrators.");
++  }
++
 +  /* If we are doing QM/MM, check that we got the atom numbers */
 +  have_atomnumber = TRUE;
 +  for (i=0; i<get_atomtype_ntypes(atype); i++) {
 +    have_atomnumber = have_atomnumber && (get_atomtype_atomnumber(i,atype) >= 0);
 +  }
 +  if (!have_atomnumber && ir->bQMMM)
 +  {
 +      warning_error(wi,
 +                    "\n"
 +                    "It appears as if you are trying to run a QM/MM calculation, but the force\n"
 +                    "field you are using does not contain atom numbers fields. This is an\n"
 +                    "optional field (introduced in Gromacs 3.3) for general runs, but mandatory\n"
 +                    "for QM/MM. The good news is that it is easy to add - put the atom number as\n"
 +                    "an integer just before the mass column in ffXXXnb.itp.\n"
 +                    "NB: United atoms have the same atom numbers as normal ones.\n\n"); 
 +  }
 +
 +  if (ir->bAdress) {
 +    if ((ir->adress->const_wf>1) || (ir->adress->const_wf<0)) {
 +      warning_error(wi,"AdResS contant weighting function should be between 0 and 1\n\n");
 +    }
 +    /** \TODO check size of ex+hy width against box size */
 +  }
 + 
 +  /* Check for errors in the input now, since they might cause problems
 +   * during processing further down.
 +   */
 +  check_warning_error(wi,FARGS);
 +
 +  if (opt2bSet("-r",NFILE,fnm))
 +    sprintf(fn,"%s",opt2fn("-r",NFILE,fnm));
 +  else
 +    sprintf(fn,"%s",opt2fn("-c",NFILE,fnm));
 +  if (opt2bSet("-rb",NFILE,fnm))
 +    sprintf(fnB,"%s",opt2fn("-rb",NFILE,fnm));
 +  else
 +    strcpy(fnB,fn);
 +
 +    if (nint_ftype(sys,mi,F_POSRES) > 0 || nint_ftype(sys,mi,F_FBPOSRES) > 0)
 +    {
 +        if (bVerbose)
 +        {
 +            fprintf(stderr,"Reading position restraint coords from %s",fn);
 +            if (strcmp(fn,fnB) == 0)
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            else
 +            {
 +                fprintf(stderr," and %s\n",fnB);
 +            }
 +        }
 +        gen_posres(sys,mi,fn,fnB,
 +                   ir->refcoord_scaling,ir->ePBC,
 +                   ir->posres_com,ir->posres_comB,
 +                   wi);
 +    }
 +              
 +  nvsite = 0;
 +  /* set parameters for virtual site construction (not for vsiten) */
 +  for(mt=0; mt<sys->nmoltype; mt++) {
 +    nvsite +=
 +      set_vsites(bVerbose, &sys->moltype[mt].atoms, atype, mi[mt].plist);
 +  }
 +  /* now throw away all obsolete bonds, angles and dihedrals: */
 +  /* note: constraints are ALWAYS removed */
 +  if (nvsite) {
 +    for(mt=0; mt<sys->nmoltype; mt++) {
 +      clean_vsite_bondeds(mi[mt].plist,sys->moltype[mt].atoms.nr,bRmVSBds);
 +    }
 +  }
 +  
 +      /* If we are using CMAP, setup the pre-interpolation grid */
 +      if(plist->ncmap>0)
 +      {
 +              init_cmap_grid(&sys->ffparams.cmap_grid, plist->nc, plist->grid_spacing);
 +              setup_cmap(plist->grid_spacing, plist->nc, plist->cmap,&sys->ffparams.cmap_grid);
 +      }
 +      
 +    set_wall_atomtype(atype,opts,ir,wi);
 +  if (bRenum) {
 +    renum_atype(plist, sys, ir->wall_atomtype, atype, bVerbose);
 +    ntype = get_atomtype_ntypes(atype);
 +  }
 +
 +    if (ir->implicit_solvent != eisNO)
 +    {
 +        /* Now we have renumbered the atom types, we can check the GBSA params */
 +        check_gbsa_params(ir,atype);
 +      
 +      /* Check that all atoms that have charge and/or LJ-parameters also have 
 +       * sensible GB-parameters
 +       */
 +      check_gbsa_params_charged(sys,atype);
 +    }
 +
 +      /* PELA: Copy the atomtype data to the topology atomtype list */
 +      copy_atomtype_atomtypes(atype,&(sys->atomtypes));
 +
 +      if (debug)
 +    pr_symtab(debug,0,"After renum_atype",&sys->symtab);
 +
 +  if (bVerbose) 
 +    fprintf(stderr,"converting bonded parameters...\n");
 +      
 +  ntype = get_atomtype_ntypes(atype);
 +  convert_params(ntype, plist, mi, comb, reppow, fudgeQQ, sys);
 +      
 +  if (debug)
 +    pr_symtab(debug,0,"After convert_params",&sys->symtab);
 +
 +  /* set ptype to VSite for virtual sites */
 +  for(mt=0; mt<sys->nmoltype; mt++) {
 +    set_vsites_ptype(FALSE,&sys->moltype[mt]);
 +  }
 +  if (debug) {
 +    pr_symtab(debug,0,"After virtual sites",&sys->symtab);
 +  }
 +  /* Check velocity for virtual sites and shells */
 +  if (bGenVel) {
 +    check_vel(sys,state.v);
 +  }
 +    
 +  /* check for charge groups in settles */
 +  check_settle(sys);
 +  
 +  /* check masses */
 +  check_mol(sys,wi);
 +  
 +  for(i=0; i<sys->nmoltype; i++) {
 +      check_cg_sizes(ftp2fn(efTOP,NFILE,fnm),&sys->moltype[i].cgs,wi);
 +  }
 +
 +  if (EI_DYNAMICS(ir->eI) && ir->eI != eiBD)
 +  {
 +      check_bonds_timestep(sys,ir->delta_t,wi);
 +  }
 +
 +  if (EI_ENERGY_MINIMIZATION(ir->eI) && 0 == ir->nsteps)
 +  {
 +      warning_note(wi,"Zero-step energy minimization will alter the coordinates before calculating the energy. If you just want the energy of a single point, try zero-step MD (with unconstrained_start = yes). To do multiple single-point energy evaluations of different configurations of the same topology, use mdrun -rerun.");
 +  }
 +
 +  check_warning_error(wi,FARGS);
 +      
 +  if (bVerbose) 
 +    fprintf(stderr,"initialising group options...\n");
 +  do_index(mdparin,ftp2fn_null(efNDX,NFILE,fnm),
 +           sys,bVerbose,ir,
 +           bGenVel ? state.v : NULL,
 +           wi);
 +  
 +  /* Init the temperature coupling state */
 +  init_gtc_state(&state,ir->opts.ngtc,0,ir->opts.nhchainlength); /* need to add nnhpres here? */
 +
 +  if (bVerbose)
 +    fprintf(stderr,"Checking consistency between energy and charge groups...\n");
 +  check_eg_vs_cg(sys);
 +  
 +  if (debug)
 +    pr_symtab(debug,0,"After index",&sys->symtab);
 +  triple_check(mdparin,ir,sys,wi);
 +  close_symtab(&sys->symtab);
 +  if (debug)
 +    pr_symtab(debug,0,"After close",&sys->symtab);
 +
 +  /* make exclusions between QM atoms */
 +  if (ir->bQMMM) {
 +    if (ir->QMMMscheme==eQMMMschemenormal && ir->ns_type == ensSIMPLE ){
 +      gmx_fatal(FARGS,"electrostatic embedding only works with grid neighboursearching, use ns-type=grid instead\n");
 +    }
 +    else {
 +     generate_qmexcl(sys,ir,wi);
 +    }
 +  }
 +
 +  if (ftp2bSet(efTRN,NFILE,fnm)) {
 +    if (bVerbose)
 +      fprintf(stderr,"getting data from old trajectory ...\n");
 +    cont_status(ftp2fn(efTRN,NFILE,fnm),ftp2fn_null(efEDR,NFILE,fnm),
 +              bNeedVel,bGenVel,fr_time,ir,&state,sys,oenv);
 +  }
 +
 +    if (ir->ePBC==epbcXY && ir->nwall!=2)
 +    {
 +        clear_rvec(state.box[ZZ]);
 +    }
 +  
 +    if (ir->rlist > 0)
 +    {
 +        set_warning_line(wi,mdparin,-1);
 +        check_chargegroup_radii(sys,ir,state.x,wi);
 +    }
 +
 +  if (EEL_FULL(ir->coulombtype)) {
 +    /* Calculate the optimal grid dimensions */
 +    copy_mat(state.box,box);
 +    if (ir->ePBC==epbcXY && ir->nwall==2)
 +      svmul(ir->wall_ewald_zfac,box[ZZ],box[ZZ]);
 +    max_spacing = calc_grid(stdout,box,opts->fourierspacing,
 +                            &(ir->nkx),&(ir->nky),&(ir->nkz));
 +  }
 +
 +  if (ir->ePull != epullNO)
 +    set_pull_init(ir,sys,state.x,state.box,oenv,opts->pull_start);
 +  
 +  if (ir->bRot)
 +  {
 +      set_reference_positions(ir->rot,sys,state.x,state.box,
 +                              opt2fn("-ref",NFILE,fnm),opt2bSet("-ref",NFILE,fnm),
 +                              wi);
 +  }
 +
 +  /*  reset_multinr(sys); */
 +  
 +  if (EEL_PME(ir->coulombtype)) {
 +      float ratio = pme_load_estimate(sys,ir,state.box);
 +      fprintf(stderr,"Estimate for the relative computational load of the PME mesh part: %.2f\n",ratio);
 +      /* With free energy we might need to do PME both for the A and B state
 +       * charges. This will double the cost, but the optimal performance will
 +       * then probably be at a slightly larger cut-off and grid spacing.
 +       */
 +      if ((ir->efep == efepNO && ratio > 1.0/2.0) ||
 +          (ir->efep != efepNO && ratio > 2.0/3.0)) {
 +          warning_note(wi,
 +                       "The optimal PME mesh load for parallel simulations is below 0.5\n"
 +                       "and for highly parallel simulations between 0.25 and 0.33,\n"
 +                       "for higher performance, increase the cut-off and the PME grid spacing.\n");
 +          if (ir->efep != efepNO) {
 +              warning_note(wi,
 +                           "For free energy simulations, the optimal load limit increases from 0.5 to 0.667\n");
 +          }
 +      }
 +  }
 +  
 +  {
 +        char warn_buf[STRLEN];
 +        double cio = compute_io(ir,sys->natoms,&sys->groups,F_NRE,1);
 +        sprintf(warn_buf,"This run will generate roughly %.0f Mb of data",cio);
 +        if (cio > 2000) {
 +            set_warning_line(wi,mdparin,-1);
 +            warning_note(wi,warn_buf);
 +        } else {
 +            printf("%s\n",warn_buf);
 +        }
 +    }
 +      
 +  /* MRS: eventually figure out better logic for initializing the fep
 +   values that makes declaring the lambda and declaring the state not
 +   potentially conflict if not handled correctly. */
 +  if (ir->efep != efepNO)
 +  {
 +      state.fep_state = ir->fepvals->init_fep_state;
 +      for (i=0;i<efptNR;i++)
 +      {
 +          /* init_lambda trumps state definitions*/
 +          if (ir->fepvals->init_lambda >= 0)
 +          {
 +              state.lambda[i] = ir->fepvals->init_lambda;
 +          }
 +          else
 +          {
 +              if (ir->fepvals->all_lambda[i] == NULL)
 +              {
 +                  gmx_fatal(FARGS,"Values of lambda not set for a free energy calculation!");
 +              }
 +              else
 +              {
 +                  state.lambda[i] = ir->fepvals->all_lambda[i][state.fep_state];
 +              }
 +          }
 +      }
 +  }
 +
 +  if (bVerbose) 
 +    fprintf(stderr,"writing run input file...\n");
 +
 +  done_warning(wi,FARGS);
 +
 +  write_tpx_state(ftp2fn(efTPX,NFILE,fnm),ir,&state,sys);
 +  
 +  thanx(stderr);
 +  
 +  return 0;
 +}
index 14d94b50c5de307a541c860c78dc1ae1a9c46b6b,0000000000000000000000000000000000000000..d982743a9971f8efc3d6e5ff081a0cf4286c33e6
mode 100644,000000..100644
--- /dev/null
@@@ -1,51 -1,0 +1,53 @@@
-     target_link_libraries(mdrun ${GMX_EXTRA_LIBRARIES} libgromacs ${GMX_OPENMM_LIBRARIES})
-     set_target_properties(mdrun PROPERTIES OUTPUT_NAME "mdrun${GMX_BINARY_SUFFIX}")
 +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
 +
 +set(MDRUN_SOURCES
 +    do_gct.c    gctio.c         genalg.c    ionize.c
 +    md.c        md_openmm.c     mdrun.c     membed.c
 +    repl_ex.c   runner.c        xutils.c)
 +
 +if(GMX_OPENMM) 
 +    add_subdirectory(gmx_gpu_utils)
 +    include_directories(./gmx_gpu_utils ${OpenMM_INCLUDE_DIR})
 +    link_directories(${OpenMM_LIBRARY_DIR}) 
 +    # with this define no evn.var. is needed with OPENMM_PLUGIN_DIR
 +    # if the same OpenMM installation is used for running and building 
 +    add_definitions( -DOPENMM_PLUGIN_DIR="${OpenMM_PLUGIN_DIR}" ) 
 +    file(TO_CMAKE_PATH ${OpenMM_PLUGIN_DIR} _path)
 +    add_library(openmm_api_wrapper STATIC openmm_wrapper.cpp)
 +    target_link_libraries(openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})
 +    set(GMX_OPENMM_LIBRARIES openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})   
 +endif(GMX_OPENMM)
 +
 +if(GMX_FAHCORE)
 +    add_library(fahcore ${MDRUN_SOURCES})
 +else(GMX_FAHCORE)
 +    add_executable(mdrun ${MDRUN_SOURCES})
 +    gmx_add_man_page(mdrun)
++    target_link_libraries(mdrun ${GMX_EXTRA_LIBRARIES} libgromacs ${GMX_OPENMM_LIBRARIES}
++        ${OpenMP_LINKER_FLAGS})
++    set_target_properties(mdrun PROPERTIES OUTPUT_NAME "mdrun${GMX_BINARY_SUFFIX}"
++        COMPILE_FLAGS "${OpenMP_C_FLAGS}")
 +    install(TARGETS mdrun DESTINATION ${BIN_INSTALL_DIR} COMPONENT mdrun)
 +
 +    if(GMX_OPENMM AND MSVC)
 +        set_target_properties(mdrun PROPERTIES LINK_FLAGS "/NODEFAULTLIB:LIBCMT")
 +    endif()
 +
 +    # Create the custom install-mdrun target
 +    if (BUILD_SHARED_LIBS)
 +        # If shared libraries are used, we need to install the libraries in
 +        # addition to the mdrun binary.
 +        add_custom_target(install-mdrun
 +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=libraries
 +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
 +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
 +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
 +            COMMENT "Installing mdrun")
 +    else (BUILD_SHARED_LIBS)
 +        add_custom_target(install-mdrun
 +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
 +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
 +            COMMENT "Installing mdrun")
 +    endif (BUILD_SHARED_LIBS)
 +    add_dependencies(install-mdrun mdrun)
 +endif(GMX_FAHCORE)
index 3786eee68cbd2a8c0e61de4308b740e64b4f0319,0000000000000000000000000000000000000000..535b023658ea7ad53a17a6d21295427a91d7648e
mode 100644,000000..100644
--- /dev/null
@@@ -1,2000 -1,0 +1,2000 @@@
-             bDoAndersenConstr = update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "membed.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_nex,int repl_ex_seed,gmx_membed_t membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
 +    double     t,t0,lam0[efptNR];
 +    gmx_bool   bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres,bEnergyHere;
 +    gmx_bool   bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
 +               bFirstStep,bStateFromCP,bStateFromTPX,bInitStep,bLastStep,
 +               bBornRadii,bStartingFromCpt;
 +    gmx_bool   bDoDHDL=FALSE,bDoFEP=FALSE,bDoExpanded=FALSE;
 +    gmx_bool   do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
 +               bForceUpdate=FALSE,bCPT;
 +    int        mdof_flags;
 +    gmx_bool   bMasterState;
 +    int        force_flags,cglo_flags;
 +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
 +    int        i,m;
 +    t_trxstatus *status;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    t_state    *bufstate=NULL;   
 +    matrix     *scale_tot,pcoupl_mu,M,ebox;
 +    gmx_nlheur_t nlh;
 +    t_trxframe rerun_fr;
 +    gmx_repl_ex_t repl_ex=NULL;
 +    int        nchkpt=1;
 +    gmx_localtop_t *top;      
 +    t_mdebin *mdebin=NULL;
 +    df_history_t df_history;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
 +    gmx_rng_t mcrng=NULL;
 +    gmx_bool        bFFscan;
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_shellfc_t shellfc;
 +    int         count,nconverged=0;
 +    real        timestep=0;
 +    double      tcount=0;
 +    gmx_bool        bIonize=FALSE;
 +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
 +    gmx_bool        bAppend;
 +    gmx_bool        bResetCountersHalfMaxH=FALSE;
 +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
 +    real        mu_aver=0,dvdl;
 +    int         a0,a1,gnx=0,ii;
 +    atom_id     *grpindex=NULL;
 +    char        *grpname;
 +    t_coupl_rec *tcr=NULL;
 +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
 +    matrix      boxcopy={{0}},lastbox;
 +      tensor      tmpvir;
 +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
 +      real        vetanew = 0;
 +    int         lamnew=0;
 +    /* for FEP */
 +    int         fep_state=0;
 +    int         nstfep;
 +    real        rate;
 +    double      cycles;
 +      real        saved_conserved_quantity = 0;
 +    real        last_ekin = 0;
 +      int         iter_i;
 +      t_extmass   MassQ;
 +    int         **trotter_seq; 
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t iterate;
 +    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
 +                                          simulation stops. If equal to zero, don't
 +                                          communicate any more between multisims.*/
 +
 +    if(MASTER(cr))
 +    {
 +        fprintf(stderr,
 +                "\n* WARNING * WARNING * WARNING * WARNING * WARNING * WARNING *\n"
 +                "We have just committed the new CPU detection code in this branch,\n"
 +                "and will commit new SSE/AVX kernels in a few days. However, this\n"
 +                "means that currently only the NxN kernels are accelerated!\n"
 +                "In the mean time, you might want to avoid production runs in 4.6.\n\n");
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control 
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf,top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */ 
 +    bIterations = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
 +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)));
 +    
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog,cr,ir,oenv,&t,&t0,state_global->lambda,
 +            &(state_global->fep_state),lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
 +                  enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f,top_global->natoms);
 +    }
 +
 +    /* lambda Monte carlo random number generator  */
 +    if (ir->bExpanded)
 +    {
 +        mcrng = gmx_rng_init(ir->expandedvals->lmc_seed);
 +    }
 +    /* copy the state into df_history */
 +    copy_df_history(&df_history,&state_global->dfhist);
 +
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */    
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global,n_flexible_constraints(constr),
 +                                 (ir->bContinuation || 
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    if (DOMAINDECOMP(cr)) {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state,1);
 +        dd_init_local_state(cr->dd,state_global,state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout) {
 +            snew(f_global,state_global->natoms);
 +        }
 +    } else {
 +        if (PAR(cr)) {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +            pd_at_range(cr,&a0,&a1);
 +        } else {
 +            top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        state = partdec_init_local_state(cr,state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +        if (vsite) {
 +            set_vsite_top(vsite,top,mdatoms,cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
 +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +
 +        if (shellfc) {
 +            make_local_shells(cr,mdatoms,shellfc);
 +        }
 +
 +        if (ir->pull && PAR(cr)) {
 +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            state,&f,mdatoms,top,fr,
 +                            vsite,shellfc,constr,
 +                            nrnb,wcycle,FALSE);
 +    }
 +
 +    update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +
 +    if (opt2bSet("-cpi",nfile,fnm))
 +    {
 +        bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr);
 +    }
 +    else
 +    {
 +        bStateFromCP = FALSE;
 +    }
 +
 +    if (MASTER(cr))
 +    {
 +        if (bStateFromCP)
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if ( Flags & MD_APPENDFILES )
 +            {
 +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    } 
 +
 +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) 
 +    {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd,state);
 +    }
 +
 +    if (state->flags & (1<<estMC_RNG))
 +    {
 +        set_mc_state(mcrng,state);
 +    }
 +
 +    /* Initialize constraints */
 +    if (constr) {
 +        if (!DOMAINDECOMP(cr))
 +            set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT,nfile,fnm);
 +    if (bTCR) {
 +        if (MASTER(cr)) {
 +            fprintf(stderr,"Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex,gnx);
 +        for(i=0; (i<gnx); i++) {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "repl_ex_nst",&repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
 +                                        repl_ex_nst,repl_ex_nex,repl_ex_seed); 
 +    }
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +
 +    /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/
 +    nstfep = ir->fepvals->nstdhdl;
 +    if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded))
 +    {
 +        nstfep = ir->expandedvals->nstexpanded;
 +    }
 +    if (repl_ex_nst > 0 && repl_ex_nst > nstfep)
 +    {
 +        nstfep = repl_ex_nst;
 +    }
 +
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
 +                  | (bVV ? CGLO_PRESSURE:0)
 +                  | (bVV ? CGLO_CONSTRAINT:0)
 +                  | (bRerunMD ? CGLO_RERUNMD:0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
 +    
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                    constr,NULL,FALSE,state->box,
 +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
 +    if (ir->eI == eiVVAK) {
 +        /* a second call to get the half step temperature initialized as well */ 
 +        /* we do the same call as above, but turn the pressure off -- internally to 
 +           compute_globals, this is recognized as a velocity verlet half-step 
 +           kinetic energy calculation.  This minimized excess variables, but 
 +           perhaps loses some logic?*/
 +        
 +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                        constr,NULL,FALSE,state->box,
 +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                        cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
 +    }
 +    
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT)) 
 +    {
 +        for(i=0; (i<ir->opts.ngtc); i++) 
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
 +        } 
 +    }
 +    if (ir->eI != eiVV) 
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +    
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterations) 
 +    {
 +            bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan) 
 +    {
 +        snew(xcopy,state->natoms);
 +        snew(vcopy,state->natoms);
 +        copy_rvecn(state->x,xcopy,0,state->natoms);
 +        copy_rvecn(state->v,vcopy,0,state->natoms);
 +        copy_mat(state->box,boxcopy);
 +    } 
 +    
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
 +    
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr,FALSE));
 +        }
 +        if (EI_STATE_VELOCITY(ir->eI))
 +        {
 +            fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
 +        }
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr,"starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf,"%s","infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                        gmx_step_str(ir->init_step,sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr,"%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
 +                                    NULL,0);
 +    if ( chkpt_ret == 0 ) 
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps 
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv,&status,
 +                                             opt2fn("-rerun",nfile,fnm),
 +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms,top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box,fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !bStateFromCP;
 +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep    = FALSE;
 +    bSumEkinhOld = FALSE;
 +    bExchanged   = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
 +
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        if (bRerunMD) {
 +            if (rerun_fr.bStep) {
 +                step = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime) {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        } 
 +        else 
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t = t0 + step*ir->delta_t;
 +        }
 +
 +        if (ir->efep != efepNO || ir->bSimTemp)
 +        {
 +            /* find and set the current lambdas.  If rerunning, we either read in a state, or a lambda value,
 +               requiring different logic. */
 +            
 +            set_current_lambdas(step,ir->fepvals,bRerunMD,&rerun_fr,state_global,state,lam0);
 +            bDoDHDL = do_per_step(step,ir->fepvals->nstdhdl);
 +            bDoFEP  = (do_per_step(step,nstfep) && (ir->efep != efepNO));
 +            bDoExpanded  = (do_per_step(step,ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0));
 +        }
 +
 +        if (bSimAnn) 
 +        {
 +            update_annealing_target_temp(&(ir->opts),t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for(i=0; i<state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box,state_global->box);
 +            copy_mat(state_global->box,state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for(ii=0; (ii<state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii],state->x[ii]);
 +                copy_rvec(vcopy[ii],state->v[ii]);
 +            }
 +            copy_mat(boxcopy,state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +            
 +            bNS = (bFirstStep || bExchanged || bNStList || bDoFEP ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
 +                set_nlistheuristics(&nlh,bFirstStep || bExchanged || bDoFEP, step);
 +            }
 +        } 
 +
 +        /* check whether we should stop because another simulation has 
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
 +                 (multisim_nsteps != ir->nsteps) )  
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr, 
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep=TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii=bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
 +        {
 +            bBornRadii=TRUE;
 +        }
 +        
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                  (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog,step,state->box,graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd,state,state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle,ewcDOMDEC);
 +                dd_partition_system(fplog,step,cr,
 +                                    bMasterState,nstglobalcomm,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,do_verbose);
 +                wallcycle_stop(wcycle,ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
 +            print_ebin_header(fplog,step,t,state->lambda[efptFEP]); /* can we improve the information printed here? */
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            update_mdatoms(mdatoms,state->lambda[efptMASS]);
 +        }
 +
 +        if (bRerunMD && rerun_fr.bV)
 +        {
 +            
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                            constr,NULL,FALSE,state->box,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +        
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
 +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
 +        }
 +        
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile,fnm,fr,
 +                                  mdatoms->nr,state->x,state->box))
 +            {
 +                gmx_finalize_par();
 +
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
 +
 +        if (EI_VV(ir->eI) && (!bInitStep)) {  /* for vv, the first half actually corresponds to the last step */
 +            bNstEner = do_per_step(step-1,ir->nstcalcenergy);
 +        } else {
 +            bNstEner = do_per_step(step,ir->nstcalcenergy);
 +        }
 +        bCalcEnerPres =
 +            (bNstEner ||
 +             (ir->epc > epcNO && do_per_step(step,ir->nstpcouple)));
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcEnerPres || bStopCM ||
 +                  do_per_step(step,nstglobalcomm) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcEnerPres = TRUE;
 +            bGStat        = TRUE;
 +        }
 +        
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +            );
 +        
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
 +                       (bNStList ? GMX_FORCE_DOLR : 0) |
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcEnerPres ? GMX_FORCE_VIRIAL : 0) |
 +                       (bDoFEP ? GMX_FORCE_DHDL : 0)
 +            );
 +        
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
 +                                      ir,bNS,force_flags,
 +                                      bStopCM,top,top_global,
 +                                      constr,enerd,fcd,
 +                                      state,f,force_vir,mdatoms,
 +                                      nrnb,wcycle,graph,groups,
 +                                      shellfc,fr,bBornRadii,t,mu_tot,
 +                                      state->natoms,&bConverged,vsite,
 +                                      outf->fp_field);
 +            tcount+=count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too. 
 +             * Check comments in sim_util.c
 +             */
 +            do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
 +                     state->box,state->x,&state->hist,
 +                     f,force_vir,mdatoms,enerd,fcd,
 +                     state->lambda,graph,
 +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +        
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
 +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
 +        }
 +        
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
 +            fprintf(fplog,"Done init_coupling\n"); 
 +            fflush(fplog);
 +        }
 +        
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI==eiVV && bInitStep) 
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the 
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
 +            } else {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
 +            }
 +
 +            update_coords(fplog,step,ir,mdatoms,state,
 +                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
 +                          cr,nrnb,constr,&top->idef);
 +            
 +            if (bIterations)
 +            {
 +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +            
 +            /* save the state */
 +            if (bIterations && iterate.bIterate) { 
 +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +            }
 +            
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || (bIterations && iterate.bIterate))
 +            {
 +                if (bIterations && iterate.bIterate) 
 +                {
 +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +                    if (bFirstIterate && bTrotter) 
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +                        
 +                        veta_save = state->veta;
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
 +                        vetanew = state->veta;
 +                        state->veta = veta_save;
 +                    } 
 +                } 
 +                
 +                bOK = TRUE;
 +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
 +                    dvdl = 0;
 +                    
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,shake_vir,NULL,
 +                                       cr,nrnb,wcycle,upd,constr,
 +                                       bInitStep,TRUE,bCalcEnerPres,vetanew);
 +                    
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +                    
 +                } 
 +                else if (graph)
 +                { /* Need to unshift here if a do_force has been
 +                     called in the previous step */
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +
 +                
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
 +                /* bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 +                /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
 +                if (bNstEner && ir->eI==eiVVAK)  /*MRS:  7/9/2010 -- this still doesn't fix it?*/
 +                {
 +                    bSumEkinhOld = TRUE;
 +                }
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,NULL,FALSE,state->box,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | CGLO_ENERGY 
 +                                | (bStopCM ? CGLO_STOPCM : 0)
 +                                | (bTemp ? CGLO_TEMPERATURE:0) 
 +                                | (bPres ? CGLO_PRESSURE : 0) 
 +                                | (bPres ? CGLO_CONSTRAINT : 0)
 +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_SCALEEKIN 
 +                    );
 +                /* explanation of above: 
 +                   a) We compute Ekin at the full time step
 +                   if 1) we are using the AveVel Ekin, and it's not the
 +                   initial step, or 2) if we are using AveEkin, but need the full
 +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
 +                   EkinAveVel because it's needed for the pressure */
 +                
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep) 
 +                {
 +                    if (bTrotter)
 +                    {
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
 +                    } 
 +                    else 
 +                    {
 +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    }
 +                }
 +                
 +                if (bIterations &&
 +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                                   state->veta,&vetanew)) 
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep) {
 +                enerd->term[F_DVDL_BONDED] += dvdl;        /* only add after iterations */
 +                copy_mat(shake_vir,state->svir_prev);
 +                copy_mat(force_vir,state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI==eiVV) {
 +                copy_rvecn(cbuf,state->v,0,state->natoms);
 +            }
 +            
 +            if (fr->bSepDVDL && fplog && do_log) 
 +            {
 +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +            }
 +            enerd->term[F_DVDL_BONDED] += dvdl;
 +        }
 +
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV) {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
 +            if (ir->eI==eiVV) 
 +            {
 +                last_ekin = enerd->term[F_EKIN];
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
 +            /* sum up the foreign energy and dhdl terms for vv.  currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd,state->lambda,ir->fepvals);
 +        }
 +        
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
 +        if (bDoExpanded) {
 +            /* perform extended ensemble sampling in lambda - we don't
 +               actually move to the new state before outputting
 +               statistics, but if performing simulated tempering, we
 +               do update the velocities and the tau_t. */
 +
 +            lamnew = ExpandedEnsembleDynamics(fplog,ir,enerd,state,&MassQ,&df_history,step,mcrng,state->v,mdatoms);
 +        }
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +        
 +        /* Now we have the energies and forces corresponding to the 
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
 +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
 +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
 +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
 +        if (bCPT) { mdof_flags |= MDOF_CPT; };
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +            fcReportProgress( ir->nsteps, step );
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +            fcRequestCheckPoint();
 +#endif
 +        
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd,state);
 +                }
 +                if (state->flags  & (1<<estMC_RNG))
 +                {
 +                    get_mc_state(mcrng,state);
 +                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate,ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist,mdebin);
 +                    if (ir->efep!=efepNO || ir->bSimTemp) 
 +                    {
 +                        state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be
 +                                                                       structured so this isn't necessary.
 +                                                                       Note this reassignment is only necessary
 +                                                                       for single threads.*/
 +                        copy_df_history(&state_global->dfhist,&df_history);
 +                    }
 +                }
 +            }
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols &&
 +                    DOMAINDECOMP(cr))
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +        
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV) 
 +        {
 +            copy_mat(state->svir_prev,shake_vir);
 +            copy_mat(state->fvir_prev,force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +        
 +        /* Determine the pressure:
 +         * always when we want exact averages in the energy file,
 +         * at ns steps when we have pressure coupling,
 +         * otherwise only at energy output steps (set below).
 +         */
 +
 +        
 +        bNstEner = (bGStatEveryStep || do_per_step(step,ir->nstcalcenergy));
 +        bCalcEnerPres = bNstEner;
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bGStatEveryStep || bStopCM || bNS ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcEnerPres = TRUE;
 +            bGStat        = TRUE;
 +        }
 +
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +        /* Check whether everything is still allright */    
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.sig[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.sig[eglsSTOPCOND]=-1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
 +                                                     nlh.scale_tot,state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 || 
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
 +
 +
 +        /* at the start of step, randomize the velocities */
 +        if (ETC_ANDERSEN(ir->etc) && EI_VV(ir->eI))
 +        {
 +            gmx_bool bDoAndersenConstr;
++            bDoAndersenConstr = (constr && update_randomize_velocities(ir,step,mdatoms,state,upd,&top->idef,constr));
 +            /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
 +            if (bDoAndersenConstr)
 +            {
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                   &top->idef,tmp_vir,NULL,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,TRUE,FALSE,vetanew);
 +            }
 +        }
 +
 +        if (bIterations)
 +        {
 +            gmx_iterate_init(&iterate,bIterations);
 +        }
 +    
 +        /* for iterations, we save these vectors, as we will be redoing the calculations */
 +        if (bIterations && iterate.bIterate) 
 +        {
 +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +        }
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || (bIterations && iterate.bIterate))
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */    
 +            if (bIterations) 
 +            { 
 +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +            
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box,lastbox);
 +
 +            bOK = TRUE;
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle,ewcUPDATE);
 +                dvdl = 0;
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter) 
 +                {
 +                    if (bIterations && iterate.bIterate) 
 +                    {
 +                        if (bFirstIterate) 
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else 
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir,scalevir,shake_vir); 
 +                        m_add(force_vir,shake_vir,total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
 +                /* We can only do Berendsen coupling after we have summed
 +                 * the kinetic energy or virial. Since the happens
 +                 * in global_state after update, we should only do it at
 +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                 */
 +                }
 +                else 
 +                {
 +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
 +                                   upd,bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
 +                    /* velocity half-step update */
 +                    update_coords(fplog,step,ir,mdatoms,state,f,
 +                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
 +                                  cr,nrnb,constr,&top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    copy_rvecn(state->x,cbuf,0,state->natoms);
 +                }
 +                
 +                update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                   &top->idef,shake_vir,force_vir,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,FALSE,bCalcEnerPres,state->veta);  
 +                
 +                if (ir->eI==eiVVAK)
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                    constr,NULL,FALSE,lastbox,
 +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                    cglo_flags | CGLO_TEMPERATURE
 +                        );
 +                    wallcycle_start(wcycle,ewcUPDATE);
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf,state->x,0,state->natoms);
 +
 +                    update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                    wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure. 
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,tmp_vir,force_vir,
 +                                       cr,nrnb,wcycle,upd,NULL,
 +                                       bInitStep,FALSE,bCalcEnerPres,
 +                                       state->veta);  
 +                }
 +                if (!bOK && !bFFscan) 
 +                {
 +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +                
 +                if (fr->bSepDVDL && fplog && do_log) 
 +                {
 +                    fprintf(fplog,sepdvdlformat,"Constraint dV/dl",0.0,dvdl);
 +                }
 +                enerd->term[F_DVDL_BONDED] += dvdl;
 +            } 
 +            else if (graph) 
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph,state->box,state->x);
 +            }
 +
 +            if (vsite != NULL) 
 +            {
 +                wallcycle_start(wcycle,ewcVSITECONSTR);
 +                if (graph != NULL) 
 +                {
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                
 +                if (graph != NULL) 
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                wallcycle_stop(wcycle,ewcVSITECONSTR);
 +            }
 +            
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                gs.sig[eglsNABNSB] = nlh.nabnsb;
 +            }
 +            bEnergyHere = (!EI_VV(ir->eI) || (EI_VV(ir->eI) && bRerunMD)); /* this is not quite working for vv and rerun! fails for running rerun on multiple threads. This is caught in runner.c. */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                            constr,
 +                            bFirstIterate ? &gs : NULL, 
 +                            (step_rel % gs.nstms == 0) && 
 +                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
 +                            lastbox,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            cglo_flags 
 +                            | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0) 
 +                            | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
 +                            | (bEnergyHere || bRerunMD ? CGLO_PRESSURE : 0) 
 +                            | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
 +                            | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                            | CGLO_CONSTRAINT
 +                );
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                nlh.nabnsb = gs.set[eglsNABNSB];
 +                gs.set[eglsNABNSB] = 0;
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +        
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (bIterations && 
 +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                               trace(shake_vir),&tracevir)) 
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
 +        /* only add constraint dvdl after constraints */
 +        enerd->term[F_DVDL_BONDED] += dvdl;
 +        if (!bVV)
 +        {
 +            /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */
 +            sum_dhdl(enerd,state->lambda,ir->fepvals);
 +        }
 +        update_box(fplog,step,ir,mdatoms,state,graph,f,
 +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
 +        
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +    
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc==NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
 +                                 f,NULL,xcopy,
 +                                 &(top_global->mols),mdatoms->massT,pres))
 +            {
 +                gmx_finalize_par();
 +
 +                fprintf(stderr,"\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,                                                            
 +             * so signal that we still have to do it.                                                
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +        
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies. 
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
 +                        ir,MASTER(cr),
 +                        mdatoms,&(top->idef),mu_aver,
 +                        top_global->mols.nr,cr,
 +                        state->box,total_vir,pres,
 +                        mu_tot,state->x,f,bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
 +
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI==eiVV) 
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +        
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else 
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize) 
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
 +            {
 +                fprintf(stderr,"Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep) 
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +        
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr,do_or;
 +            
 +            if (fplog && do_log && bDoExpanded)
 +            {
 +                /* only needed if doing expanded ensemble */
 +                PrintFreeEnergyInfoToFile(fplog,ir->fepvals,ir->expandedvals,ir->bSimTemp?ir->simtempvals:NULL,
 +                                          &df_history,state->fep_state,ir->nstlog,step);
 +            }
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
 +            {
 +                if (bNstEner)
 +                {
 +                    upd_mdebin(mdebin,bDoDHDL,TRUE,
 +                               t,mdatoms->tmass,enerd,state,
 +                               ir->fepvals,ir->expandedvals,lastbox,
 +                               shake_vir,force_vir,total_vir,pres,
 +                               ekind,mu_tot,constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +                
 +                do_dr  = do_per_step(step,ir->nstdisreout);
 +                do_or  = do_per_step(step,ir->nstorireout);
 +                
 +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
 +                           step,t,
 +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull,step,t);
 +            }
 +            
 +            if (do_per_step(step,ir->nstlog))
 +            {
 +                if(fflush(fplog) != 0)
 +                {
 +                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
 +        if (bDoExpanded)
 +        {
 +            /* Have to do this part after outputting the logfile and the edr file */
 +            state->fep_state = lamnew;
 +            for (i=0;i<efptNR;i++)
 +            {
 +                state->lambda[i] = ir->fepvals->all_lambda[i][lamnew];
 +            }
 +        }
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
 +        {
 +            if (shellfc) 
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step,repl_ex_nst)) 
 +        {
 +            bExchanged = replica_exchange(fplog,cr,repl_ex,
 +                                          state_global,enerd,
 +                                          state,step,t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr)) 
 +            {
 +                dd_partition_system(fplog,step,cr,TRUE,1,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,FALSE);
 +            }
 +        }
 +        
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres,state->pres_prev);
 +        }
 +        
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed!=NULL) && (!bLastStep) )
 +        {
 +            rescale_membed(step_rel,membed,state_global->x);
 +        }
 +
 +        if (bRerunMD) 
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +            }
 +        }
 +        
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +        
 +        cycles = wallcycle_stop(wcycle,ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
 +        }
 +        
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime);
 +            wcycle_set_reset_counters(wcycle,-1);
 +            /* Correct max_hours for the elapsed time */
 +            max_hours -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +    
 +    /* Stop the time */
 +    runtime_end(runtime);
 +    
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_finish(cr);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
 +    }
 +    
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +    
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog,repl_ex);
 +    }
 +    
 +    runtime->nsteps_done = step_rel;
 +    
 +    return 0;
 +}
index dbf1014ee09c8ea4f43eca2033d31b4519cc7e68,d5833701aa3dc559a7bf80b9128d37c8b86dc0f3..a6162a43418cb81234447ee20f994c336d758292
@@@ -33,8 -35,9 +33,9 @@@ add_library(gmxan
              )
  
  
 -target_link_libraries(gmxana md gmx ${GSL_LIBRARIES})
 +target_link_libraries(gmxana libgromacs ${GSL_LIBRARIES})
- set_target_properties(gmxana PROPERTIES OUTPUT_NAME "gmxana${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION} INSTALL_NAME_DIR "${LIB_INSTALL_DIR}")
+ set_target_properties(gmxana PROPERTIES OUTPUT_NAME "gmxana${GMX_LIBS_SUFFIX}" SOVERSION ${SOVERSION} INSTALL_NAME_DIR "${LIB_INSTALL_DIR}"
+     COMPILE_FLAGS "${OpenMP_C_FLAGS}")
  
  # List of programs with single corresponding .c source file,
  # used to create build rules automatically.
Simple merge
Simple merge
Simple merge
Simple merge