Merge remote-tracking branch 'gerrit/release-4-6'
authorRoland Schulz <roland@utk.edu>
Sun, 18 Mar 2012 08:27:20 +0000 (04:27 -0400)
committerRoland Schulz <roland@utk.edu>
Sun, 18 Mar 2012 08:49:48 +0000 (04:49 -0400)
Conflicts:
cmake/gmxCFlags.cmake
src/gmxlib/trajana/trajana.c
src/gromacs/gmxlib/futil_test.c
src/gromacs/mdlib/gmx_qhop_db.h
src/gromacs/mdlib/gmx_qhop_db_test.c
src/gromacs/mdlib/gmx_qhop_parm.c
src/gromacs/mdlib/gmx_qhop_parm.h
src/gromacs/mdlib/gmx_qhop_xml.c
src/gromacs/mdlib/gmx_qhop_xml.h
src/mdlib/CMakeLists.txt
src/tools/gmx_membed.c

Modified:
        src/gromacs/mdlib/CMakeLists.txt

Change-Id: I488bad71819d07ccf14172e83b5226a312fa696c

61 files changed:
1  2 
CMakeLists.txt
cmake/gmxCFlags.cmake
src/gromacs/gmxlib/checkpoint.c
src/gromacs/gmxlib/confio.c
src/gromacs/gmxlib/copyrite.c
src/gromacs/gmxlib/enxio.c
src/gromacs/gmxlib/gmxfio.c
src/gromacs/gmxlib/maths.c
src/gromacs/gmxlib/md5.c
src/gromacs/gmxlib/network.c
src/gromacs/gmxlib/nonbonded/nb_kernel_bluegene/nb_kernel_gen_bluegene.h
src/gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel430_sse2_double.c
src/gromacs/gmxlib/pdbio.c
src/gromacs/gmxlib/splitter.c
src/gromacs/gmxlib/statutil.c
src/gromacs/gmxlib/string2.c
src/gromacs/gmxlib/thread_mpi/p2p_send_recv.c
src/gromacs/gmxlib/trnio.c
src/gromacs/gmxlib/vmdio.c
src/gromacs/gmxlib/wman.c
src/gromacs/gmxpreprocess/pdb2top.c
src/gromacs/gmxpreprocess/pgutil.c
src/gromacs/gmxpreprocess/readpull.c
src/gromacs/gmxpreprocess/readrot.c
src/gromacs/legacyheaders/assert.h
src/gromacs/legacyheaders/types/simple.h
src/gromacs/mdlib/CMakeLists.txt
src/gromacs/mdlib/domdec.c
src/gromacs/mdlib/domdec_setup.c
src/gromacs/mdlib/ebin.c
src/gromacs/mdlib/edsam.c
src/gromacs/mdlib/fft5d.c
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/minimize.c
src/gromacs/mdlib/pull.c
src/gromacs/mdlib/sim_util.c
src/gromacs/mdlib/stat.c
src/gromacs/mdlib/tables.c
src/gromacs/mdlib/wnblist.c
src/programs/g_x2top/g_x2top.c
src/programs/gmxcheck/gmxcheck.c
src/programs/gmxdump/gmxdump.c
src/programs/mdrun/gmx_gpu_utils/memtestG80_core.cu
src/programs/mdrun/gmx_gpu_utils/memtestG80_core.h
src/programs/mdrun/md.c
src/programs/mdrun/md_openmm.c
src/programs/mdrun/mdrun.c
src/programs/mdrun/openmm_wrapper.cpp
src/programs/mdrun/runner.c
src/programs/mdrun/xmdrun.h
src/programs/mdrun/xutils.c
src/tools/CMakeLists.txt
src/tools/gmx_anaeig.c
src/tools/gmx_bond.c
src/tools/gmx_chi.c
src/tools/gmx_current.c
src/tools/gmx_density.c
src/tools/gmx_editconf.c
src/tools/gmx_membed.c
src/tools/gmx_spatial.c
src/tools/gmx_tune_pme.c

diff --cc CMakeLists.txt
Simple merge
index b3593c521fbf70e40d7de79c57419f79630dc20c,6449944af3e7c12bffa05aa12cc83ece4f6fd3b5..e3120d7b30dc4b95325bfd8b8fb0db929e3738ac
@@@ -46,8 -47,8 +47,9 @@@ MACRO(gmx_c_flags
          if(NOT GMX_OPENMP)
              GMX_TEST_CFLAG(CXXFLAGS_PRAGMA "-Wno-unknown-pragmas" GMXC_CXXFLAGS)
          endif()
-         GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wall -Wno-unused-function" GMXC_CXXFLAGS)
-         GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wnon-virtual-dtor -Wno-unused-parameter -Wsign-compare" GMXC_CXXFLAGS)
 -        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wall -Wno-unused" GMXC_CXXFLAGS)
 -        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wextra -Wno-missing-field-initializers -Wno-sign-compare" GMXC_CXXFLAGS)
++        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wall -Wno-unused-function -Wno-unused-parameter" GMXC_CXXFLAGS)
++        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wnon-virtual-dtor" GMXC_CXXFLAGS)
++        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wextra -Wno-missing-field-initializers" GMXC_CXXFLAGS)
        # new in gcc 4.5
          GMX_TEST_CXXFLAG(CXXFLAGS_EXCESS_PREC "-fexcess-precision=fast" 
                            GMXC_CXXFLAGS)
          GMX_TEST_CFLAG(CFLAGS_WARN "-Wall -Wno-unused" GMXC_CFLAGS)
      endif()
  
 +    if (CMAKE_C_COMPILER_ID MATCHES "Clang")
 +        if(NOT GMX_OPENMP)
 +            GMX_TEST_CFLAG(CXXFLAGS_PRAGMA "-Wno-unknown-pragmas" GMXC_CXXFLAGS)
 +        endif()
 +        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wall -Wno-unused-function" GMXC_CXXFLAGS)
 +    endif()
 +      
++    if (CMAKE_C_COMPILER_ID MATCHES "Clang")
++        if(NOT GMX_OPENMP)
++            GMX_TEST_CFLAG(CFLAGS_PRAGMA "-Wno-unknown-pragmas" GMXC_CFLAGS)
++        endif()
++        GMX_TEST_CFLAG(CFLAGS_WARN "-Wall -Wno-unused" GMXC_CFLAGS)
++    endif()
++
+     if (CMAKE_C_COMPILER_ID MATCHES "Clang")
+         if(NOT GMX_OPENMP)
+             GMX_TEST_CFLAG(CXXFLAGS_PRAGMA "-Wno-unknown-pragmas" GMXC_CXXFLAGS)
+         endif()
+         GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wall -Wno-unused" GMXC_CXXFLAGS)
+     endif()
  
      # now actually set the flags:
      # C
Simple merge
Simple merge
index b8cb89039a177733046f19587aea5271fd0a6d83,0000000000000000000000000000000000000000..828aa3d621fb20e944044a984fd9d7be63f2f905
mode 100644,000000..100644
--- /dev/null
@@@ -1,663 -1,0 +1,663 @@@
- #include "time.h"
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +/* This file is completely threadsafe - keep it that way! */
 +
 +#include <string.h>
 +#include <ctype.h>
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "string2.h"
 +#include "macros.h"
++#include <time.h>
 +#include "random.h"
 +#include "statutil.h"
 +#include "copyrite.h"
 +#include "strdb.h"
 +#include "futil.h"
 +
 +static void pr_two(FILE *out,int c,int i)
 +{
 +  if (i < 10)
 +    fprintf(out,"%c0%1d",c,i);
 +  else
 +    fprintf(out,"%c%2d",c,i);
 +}
 +
 +void pr_difftime(FILE *out,double dt)
 +{
 +  int    ndays,nhours,nmins,nsecs;
 +  gmx_bool   bPrint,bPrinted;
 +
 +  ndays = dt/(24*3600);
 +  dt    = dt-24*3600*ndays;
 +  nhours= dt/3600;
 +  dt    = dt-3600*nhours;
 +  nmins = dt/60;
 +  dt    = dt-nmins*60;
 +  nsecs = dt;
 +  bPrint= (ndays > 0);
 +  bPrinted=bPrint;
 +  if (bPrint) 
 +    fprintf(out,"%d",ndays);
 +  bPrint=bPrint || (nhours > 0);
 +  if (bPrint) {
 +    if (bPrinted)
 +      pr_two(out,'d',nhours);
 +    else 
 +      fprintf(out,"%d",nhours);
 +  }
 +  bPrinted=bPrinted || bPrint;
 +  bPrint=bPrint || (nmins > 0);
 +  if (bPrint) {
 +    if (bPrinted)
 +      pr_two(out,'h',nmins);
 +    else 
 +      fprintf(out,"%d",nmins);
 +  }
 +  bPrinted=bPrinted || bPrint;
 +  if (bPrinted)
 +    pr_two(out,':',nsecs);
 +  else
 +    fprintf(out,"%ds",nsecs);
 +  fprintf(out,"\n");
 +}
 +
 +
 +gmx_bool be_cool(void)
 +{
 +  /* Yes, it is bad to check the environment variable every call,
 +   * but we dont call this routine often, and it avoids using 
 +   * a mutex for locking the variable...
 +   */
 +#ifdef GMX_FAHCORE
 +  /*be uncool*/
 +  return FALSE;
 +#else
 +  return (getenv("GMX_NO_QUOTES") == NULL);
 +#endif
 +}
 +
 +void space(FILE *out, int n)
 +{
 +  fprintf(out,"%*s",n,"");
 +}
 +
 +void f(char *a)
 +{
 +    int i;
 +    int len=strlen(a);
 +    
 +    for(i=0;i<len;i++)
 +        a[i]=~a[i]; 
 +}
 +
 +static void sp_print(FILE *out,const char *s)
 +{
 +  int slen;
 +  
 +  slen=strlen(s);
 +  space(out,(80-slen)/2);
 +  fprintf(out,"%s\n",s);
 +}
 +
 +static void ster_print(FILE *out,const char *s)
 +{
 +  int  slen;
 +  char buf[128];
 +  
 +  snprintf(buf,128,":-)  %s  (-:",s);
 +  slen=strlen(buf);
 +  space(out,(80-slen)/2);
 +  fprintf(out,"%s\n",buf);
 +}
 +
 +
 +static void pukeit(const char *db,const char *defstring, char *retstring, 
 +                 int retsize, int *cqnum)
 +{
 +  FILE *fp;
 +  char **help;
 +  int  i,nhlp;
 +  int  seed;
 + 
 +  if (be_cool() && ((fp = low_libopen(db,FALSE)) != NULL)) {
 +    nhlp=fget_lines(fp,&help);
 +    /* for libraries we can use the low-level close routines */
 +    ffclose(fp);
 +    seed=time(NULL);
 +    *cqnum=nhlp*rando(&seed);
 +    if (strlen(help[*cqnum]) >= STRLEN)
 +      help[*cqnum][STRLEN-1] = '\0';
 +    strncpy(retstring,help[*cqnum],retsize);
 +    f(retstring);
 +    for(i=0; (i<nhlp); i++)
 +      sfree(help[i]);
 +    sfree(help);
 +  }
 +  else 
 +    strncpy(retstring,defstring,retsize);
 +}
 +
 +void bromacs(char *retstring, int retsize)
 +{
 +  int dum;
 +
 +  pukeit("bromacs.dat",
 +       "Groningen Machine for Chemical Simulation",
 +       retstring,retsize,&dum);
 +}
 +
 +void cool_quote(char *retstring, int retsize, int *cqnum)
 +{
 +  char *tmpstr;
 +  char *s,*ptr;
 +  int tmpcq,*p;
 +  
 +  if (cqnum!=NULL)
 +    p = cqnum;
 +  else
 +    p = &tmpcq;
 +  
 +  /* protect audience from explicit lyrics */
 +  snew(tmpstr,retsize+1);
 +  pukeit("gurgle.dat","Thanx for Using GROMACS - Have a Nice Day",
 +       tmpstr,retsize-2,p);
 +
 +  if ((ptr = strchr(tmpstr,'_')) != NULL) {
 +    *ptr='\0';
 +    ptr++;
 +    sprintf(retstring,"\"%s\" %s",tmpstr,ptr);
 +  }
 +  else {
 +    strcpy(retstring,tmpstr);
 +  }
 +  sfree(tmpstr);
 +}
 +
 +void CopyRight(FILE *out,const char *szProgram)
 +{
 +  static const char * CopyrightText[] = {
 +             "Written by Emile Apol, Rossen Apostolov, Herman J.C. Berendsen,",
 +             "Aldert van Buuren, Pär Bjelkmar, Rudi van Drunen, Anton Feenstra, ",
 +             "Gerrit Groenhof, Peter Kasson, Per Larsson, Pieter Meulenhoff, ",
 +             "Teemu Murtola, Szilard Pall, Sander Pronk, Roland Schulz, ",
 +             "Michael Shirts, Alfons Sijbers, Peter Tieleman,\n",
 +             "Berk Hess, David van der Spoel, and Erik Lindahl.\n",
 +             "Copyright (c) 1991-2000, University of Groningen, The Netherlands.",
 +             "Copyright (c) 2001-2010, The GROMACS development team at",
 +             "Uppsala University & The Royal Institute of Technology, Sweden.",
 +             "check out http://www.gromacs.org for more information.\n"
 +  };
 +
 +  static const char * GPLText[] = {
 +              "This program is free software; you can redistribute it and/or",
 +              "modify it under the terms of the GNU General Public License",
 +              "as published by the Free Software Foundation; either version 2",
 +              "of the License, or (at your option) any later version."
 +  };
 +
 +  /* Dont change szProgram arbitrarily - it must be argv[0], i.e. the 
 +   * name of a file. Otherwise, we won't be able to find the library dir.
 +   */
 +#define NCR (int)asize(CopyrightText)
 +#ifdef GMX_FAHCORE
 +#define NGPL 0 /*FAH has an exception permission from GPL to allow digital signatures in Gromacs*/
 +#else
 +#define NGPL (int)asize(GPLText)
 +#endif
 +
 +  char buf[256],tmpstr[1024];
 +  int i;
 +
 +#ifdef GMX_FAHCORE
 +  set_program_name("Gromacs");
 +#else
 +  set_program_name(szProgram);
 +#endif
 +
 +  ster_print(out,"G  R  O  M  A  C  S");
 +  fprintf(out,"\n");
 +  
 +  bromacs(tmpstr,1023);
 +  sp_print(out,tmpstr); 
 +  fprintf(out,"\n");
 +
 +  ster_print(out,GromacsVersion());
 +  fprintf(out,"\n");
 +
 +  /* fprintf(out,"\n");*/
 +
 +  /* sp_print(out,"PLEASE NOTE: THIS IS A BETA VERSION\n");
 +  
 +  fprintf(out,"\n"); */
 +
 +  for(i=0; (i<NCR); i++) 
 +    sp_print(out,CopyrightText[i]);
 +  for(i=0; (i<NGPL); i++)
 +    sp_print(out,GPLText[i]);
 +
 +  fprintf(out,"\n");
 +
 +  snprintf(buf,256,"%s",Program());
 +#ifdef GMX_DOUBLE
 +  strcat(buf," (double precision)");
 +#endif
 +  ster_print(out,buf);
 +  fprintf(out,"\n");
 +}
 +
 +
 +void thanx(FILE *fp)
 +{
 +  char cq[1024];
 +  int  cqnum;
 +
 +  /* protect the audience from suggestive discussions */
 +  cool_quote(cq,1023,&cqnum);
 +  
 +  if (be_cool()) 
 +    fprintf(fp,"\ngcq#%d: %s\n\n",cqnum,cq);
 +  else
 +    fprintf(fp,"\n%s\n\n",cq);
 +}
 +
 +typedef struct {
 +  const char *key;
 +  const char *author;
 +  const char *title;
 +  const char *journal;
 +  int volume,year;
 +  const char *pages;
 +} t_citerec;
 +
 +void please_cite(FILE *fp,const char *key)
 +{
 +  static const t_citerec citedb[] = {
 +    { "Allen1987a",
 +      "M. P. Allen and D. J. Tildesley",
 +      "Computer simulation of liquids",
 +      "Oxford Science Publications",
 +      1, 1987, "1" },
 +    { "Berendsen95a",
 +      "H. J. C. Berendsen, D. van der Spoel and R. van Drunen",
 +      "GROMACS: A message-passing parallel molecular dynamics implementation",
 +      "Comp. Phys. Comm.",
 +      91, 1995, "43-56" },
 +    { "Berendsen84a",
 +      "H. J. C. Berendsen, J. P. M. Postma, A. DiNola and J. R. Haak",
 +      "Molecular dynamics with coupling to an external bath",
 +      "J. Chem. Phys.",
 +      81, 1984, "3684-3690" },
 +    { "Ryckaert77a",
 +      "J. P. Ryckaert and G. Ciccotti and H. J. C. Berendsen",
 +      "Numerical Integration of the Cartesian Equations of Motion of a System with Constraints; Molecular Dynamics of n-Alkanes",
 +      "J. Comp. Phys.",
 +      23, 1977, "327-341" },
 +    { "Miyamoto92a",
 +      "S. Miyamoto and P. A. Kollman",
 +      "SETTLE: An Analytical Version of the SHAKE and RATTLE Algorithms for Rigid Water Models",
 +      "J. Comp. Chem.",
 +      13, 1992, "952-962" },
 +    { "Cromer1968a",
 +      "D. T. Cromer & J. B. Mann",
 +      "X-ray scattering factors computed from numerical Hartree-Fock wave functions",
 +      "Acta Cryst. A",
 +      24, 1968, "321" },
 +    { "Barth95a",
 +      "E. Barth and K. Kuczera and B. Leimkuhler and R. D. Skeel",
 +      "Algorithms for Constrained Molecular Dynamics",
 +      "J. Comp. Chem.",
 +      16, 1995, "1192-1209" },
 +    { "Essmann95a",
 +      "U. Essmann, L. Perera, M. L. Berkowitz, T. Darden, H. Lee and L. G. Pedersen ",
 +      "A smooth particle mesh Ewald method",
 +      "J. Chem. Phys.",
 +      103, 1995, "8577-8592" },
 +    { "Torda89a",
 +      "A. E. Torda and R. M. Scheek and W. F. van Gunsteren",
 +      "Time-dependent distance restraints in molecular dynamics simulations",
 +      "Chem. Phys. Lett.",
 +      157, 1989, "289-294" },
 +    { "Tironi95a",
 +      "I. G. Tironi and R. Sperb and P. E. Smith and W. F. van Gunsteren",
 +      "Generalized reaction field method for molecular dynamics simulations",
 +      "J. Chem. Phys",
 +      102, 1995, "5451-5459" },
 +    { "Hess97a",
 +      "B. Hess and H. Bekker and H. J. C. Berendsen and J. G. E. M. Fraaije",
 +      "LINCS: A Linear Constraint Solver for molecular simulations",
 +      "J. Comp. Chem.",
 +      18, 1997, "1463-1472" },
 +    { "Hess2008a",
 +      "B. Hess",
 +      "P-LINCS: A Parallel Linear Constraint Solver for molecular simulation",
 +      "J. Chem. Theory Comput.",
 +      4, 2008, "116-122" },
 +    { "Hess2008b",
 +      "B. Hess and C. Kutzner and D. van der Spoel and E. Lindahl",
 +      "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation",
 +      "J. Chem. Theory Comput.",
 +      4, 2008, "435-447" },
 +    { "Hub2010",
 +      "J. S. Hub, B. L. de Groot and D. van der Spoel",
 +      "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates",
 +      "J. Chem. Theory Comput.",
 +      6, 2010, "3713-3720"}, 
 +    { "In-Chul99a",
 +      "Y. In-Chul and M. L. Berkowitz",
 +      "Ewald summation for systems with slab geometry",
 +      "J. Chem. Phys.",
 +      111, 1999, "3155-3162" },
 +    { "DeGroot97a",
 +      "B. L. de Groot and D. M. F. van Aalten and R. M. Scheek and A. Amadei and G. Vriend and H. J. C. Berendsen",
 +      "Prediction of Protein Conformational Freedom From Distance Constrains",
 +      "Proteins",
 +      29, 1997, "240-251" },
 +    { "Spoel98a",
 +      "D. van der Spoel and P. J. van Maaren and H. J. C. Berendsen",
 +      "A systematic study of water models for molecular simulation. Derivation of models optimized for use with a reaction-field.",
 +      "J. Chem. Phys.",
 +      108, 1998, "10220-10230" },
 +    { "Wishart98a",
 +      "D. S. Wishart and A. M. Nip",
 +      "Protein Chemical Shift Analysis: A Practical Guide",
 +      "Biochem. Cell Biol.",
 +      76, 1998, "153-163" },
 +    { "Maiorov95",
 +      "V. N. Maiorov and G. M. Crippen",
 +      "Size-Independent Comparison of Protein Three-Dimensional Structures",
 +      "PROTEINS: Struct. Funct. Gen.",
 +      22, 1995, "273-283" },
 +    { "Feenstra99",
 +      "K. A. Feenstra and B. Hess and H. J. C. Berendsen",
 +      "Improving Efficiency of Large Time-scale Molecular Dynamics Simulations of Hydrogen-rich Systems",
 +      "J. Comput. Chem.",
 +      20, 1999, "786-798" },
 +    { "Timneanu2004a",
 +      "N. Timneanu and C. Caleman and J. Hajdu and D. van der Spoel",
 +      "Auger Electron Cascades in Water and Ice",
 +      "Chem. Phys.",
 +      299, 2004, "277-283" },
 +    { "Pascal2011a",
 +      "T. A. Pascal and S. T. Lin and W. A. Goddard III",
 +      "Thermodynamics of liquids: standard molar entropies and heat capacities of common solvents from 2PT molecular dynamics",
 +      "Phys. Chem. Chem. Phys.",
 +      13, 2011, "169-181" },
 +    { "Caleman2011b",
 +      "C. Caleman and M. Hong and J. S. Hub and L. T. da Costa and P. J. van Maaren and D. van der Spoel",
 +      "Force Field Benchmark 1: Density, Heat of Vaporization, Heat Capacity, Surface Tension and Dielectric Constant of 152 Organic Liquids",
 +      "Submitted",
 +      0, 2011, "" },
 +    { "Lindahl2001a",
 +      "E. Lindahl and B. Hess and D. van der Spoel",
 +      "GROMACS 3.0: A package for molecular simulation and trajectory analysis",
 +      "J. Mol. Mod.",
 +      7, 2001, "306-317" },
 +    { "Wang2001a",
 +      "J. Wang and W. Wang and S. Huo and M. Lee and P. A. Kollman",
 +      "Solvation model based on weighted solvent accessible surface area",
 +      "J. Phys. Chem. B",
 +      105, 2001, "5055-5067" },
 +    { "Eisenberg86a",
 +      "D. Eisenberg and A. D. McLachlan",
 +      "Solvation energy in protein folding and binding",
 +      "Nature",
 +      319, 1986, "199-203" },
 +    { "Eisenhaber95",
 +      "Frank Eisenhaber and Philip Lijnzaad and Patrick Argos and Chris Sander and Michael Scharf",
 +      "The Double Cube Lattice Method: Efficient Approaches to Numerical Integration of Surface Area and Volume and to Dot Surface Contouring of Molecular Assemblies",
 +      "J. Comp. Chem.",
 +      16, 1995, "273-284" },
 +    { "Hess2002",
 +      "B. Hess, H. Saint-Martin and H.J.C. Berendsen",
 +      "Flexible constraints: an adiabatic treatment of quantum degrees of freedom, with application to the flexible and polarizable MCDHO model for water",
 +      "J. Chem. Phys.",
 +      116, 2002, "9602-9610" },
 +    { "Hetenyi2002b",
 +      "Csaba Hetenyi and David van der Spoel",
 +      "Efficient docking of peptides to proteins without prior knowledge of the binding site.",
 +      "Prot. Sci.",
 +      11, 2002, "1729-1737" },
 +    { "Hess2003",
 +      "B. Hess and R.M. Scheek",
 +      "Orientation restraints in molecular dynamics simulations using time and ensemble averaging",
 +      "J. Magn. Res.",
 +      164, 2003, "19-27" },
 +    { "Rappe1991a",
 +      "A. K. Rappe and W. A. Goddard III",
 +      "Charge Equillibration for Molecular Dynamics Simulations",
 +      "J. Phys. Chem.",
 +      95, 1991, "3358-3363" },
 +    { "Mu2005a",
 +      "Y. Mu, P. H. Nguyen and G. Stock",
 +      "Energy landscape of a small peptide revelaed by dihedral angle principal component analysis",
 +      "Prot. Struct. Funct. Bioinf.",
 +      58, 2005, "45-52" },
 +    { "Okabe2001a",
 +      "T. Okabe and M. Kawata and Y. Okamoto and M. Mikami",
 +      "Replica-exchange {M}onte {C}arlo method for the isobaric-isothermal ensemble",
 +      "Chem. Phys. Lett.",
 +      335, 2001, "435-439" },
 +    { "Hukushima96a",
 +      "K. Hukushima and K. Nemoto",
 +      "Exchange Monte Carlo Method and Application to Spin Glass Simulations",
 +      "J. Phys. Soc. Jpn.",
 +      65, 1996, "1604-1608" },
 +    { "Tropp80a",
 +      "J. Tropp",
 +      "Dipolar Relaxation and Nuclear Overhauser effects in nonrigid molecules: The effect of fluctuating internuclear distances",
 +      "J. Chem. Phys.",
 +      72, 1980, "6035-6043" },
 +    { "Bultinck2002a",
 +       "P. Bultinck and W. Langenaeker and P. Lahorte and F. De Proft and P. Geerlings and M. Waroquier and J. P. Tollenaere",
 +      "The electronegativity equalization method I: Parametrization and validation for atomic charge calculations",
 +      "J. Phys. Chem. A",
 +      106, 2002, "7887-7894" },
 +    { "Yang2006b",
 +      "Q. Y. Yang and K. A. Sharp",
 +      "Atomic charge parameters for the finite difference Poisson-Boltzmann method using electronegativity neutralization",
 +      "J. Chem. Theory Comput.",
 +      2, 2006, "1152-1167" },
 +    { "Spoel2005a",
 +      "D. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A. E. Mark and H. J. C. Berendsen",
 +      "GROMACS: Fast, Flexible and Free",
 +      "J. Comp. Chem.",
 +      26, 2005, "1701-1719" },
 +    { "Spoel2006b",
 +      "D. van der Spoel, P. J. van Maaren, P. Larsson and N. Timneanu",
 +      "Thermodynamics of hydrogen bonding in hydrophilic and hydrophobic media",
 +      "J. Phys. Chem. B",
 +      110, 2006, "4393-4398" },
 +    { "Spoel2006d",
 +      "D. van der Spoel and M. M. Seibert",
 +      "Protein folding kinetics and thermodynamics from atomistic simulations",
 +      "Phys. Rev. Letters",
 +      96, 2006, "238102" },
 +    { "Palmer94a",
 +      "B. J. Palmer",
 +      "Transverse-current autocorrelation-function calculations of the shear viscosity for molecular liquids",
 +      "Phys. Rev. E",
 +      49, 1994, "359-366" },
 +    { "Bussi2007a",
 +      "G. Bussi, D. Donadio and M. Parrinello",
 +      "Canonical sampling through velocity rescaling",
 +      "J. Chem. Phys.",
 +      126, 2007, "014101" },
 +    { "Hub2006",
 +      "J. S. Hub and B. L. de Groot",
 +      "Does CO2 permeate through Aquaporin-1?",
 +      "Biophys. J.",
 +      91, 2006, "842-848" },
 +    { "Hub2008",
 +      "J. S. Hub and B. L. de Groot",
 +      "Mechanism of selectivity in aquaporins and aquaglyceroporins",
 +      "PNAS",
 +      105, 2008, "1198-1203" },
 +    { "Friedrich2009",
 +      "M. S. Friedrichs, P. Eastman, V. Vaidyanathan, M. Houston, S. LeGrand, A. L. Beberg, D. L. Ensign, C. M. Bruns, and V. S. Pande",
 +      "Accelerating Molecular Dynamic Simulation on Graphics Processing Units",
 +      "J. Comp. Chem.",
 +      30, 2009, "864-872" },
 +    { "Engin2010",
 +      "O. Engin, A. Villa, M. Sayar and B. Hess",
 +      "Driving Forces for Adsorption of Amphiphilic Peptides to Air-Water Interface",
 +      "J. Phys. Chem. B",
 +      0, 2010, "???" },
 +    { "Fritsch12",
 +      "S. Fritsch, C. Junghans and K. Kremer",
 +      "Adaptive molecular simulation study on structure formation of toluene around C60 using Gromacs",
 +      "J. Chem. Theo. Comp.",
 +      0, 2012, "doi:10.1021/ct200706f" },
 +    { "Junghans10",
 +      "C. Junghans and S. Poblete",
 +      "A reference implementation of the adaptive resolution scheme in ESPResSo",
 +      "Comp. Phys. Comm.",
 +      181, 2010, "1449" },
 +    { "Wang2010",
 +      "H. Wang, F. Dommert, C.Holm",
 +      "Optimizing working parameters of the smooth particle mesh Ewald algorithm in terms of accuracy and efficiency",
 +      "J. Chem. Phys. B",
 +      133, 2010, "034117" },
 +    { "Kutzner2011",
 +      "C. Kutzner and J. Czub and H. Grubmuller",
 +      "Keep it Flexible: Driving Macromolecular Rotary Motions in Atomistic Simulations with GROMACS",
 +      "J. Chem. Theory Comput.",
 +      7, 2011, "1381-1393" },
 +    { "Hoefling2011",
 +      "M. Hoefling, N. Lima, D. Haenni, C.A.M. Seidel, B. Schuler, H. Grubmuller",
 +      "Structural Heterogeneity and Quantitative FRET Efficiency Distributions of Polyprolines through a Hybrid Atomistic Simulation and Monte Carlo Approach",
 +      "PLoS ONE",
 +      6, 2011, "e19791"
 +    }
 +  };
 +#define NSTR (int)asize(citedb)
 +  
 +  int  j,index;
 +  char *author;
 +  char *title;
 +#define LINE_WIDTH 79
 +  
 +  if (fp == NULL)
 +    return;
 +
 +  for(index=0; (index<NSTR) && (strcmp(citedb[index].key,key) != 0); index++)
 +    ;
 +  
 +  fprintf(fp,"\n++++ PLEASE READ AND CITE THE FOLLOWING REFERENCE ++++\n");
 +  if (index < NSTR) {
 +    /* Insert newlines */
 +    author = wrap_lines(citedb[index].author,LINE_WIDTH,0,FALSE);
 +    title  = wrap_lines(citedb[index].title,LINE_WIDTH,0,FALSE);
 +    fprintf(fp,"%s\n%s\n%s %d (%d) pp. %s\n",
 +          author,title,citedb[index].journal,
 +          citedb[index].volume,citedb[index].year,
 +          citedb[index].pages);
 +    sfree(author);
 +    sfree(title);
 +  }
 +  else {
 +    fprintf(fp,"Entry %s not found in citation database\n",key);
 +  }
 +  fprintf(fp,"-------- -------- --- Thank You --- -------- --------\n\n");
 +  fflush(fp);
 +}
 +
 +#ifdef USE_VERSION_H
 +/* Version information generated at compile time. */
 +#include "version.h"
 +#else
 +/* Fall back to statically defined version. */
 +static const char _gmx_ver_string[]="VERSION " VERSION;
 +#endif
 +
 +/* This routine only returns a static (constant) string, so we use a 
 + * mutex to initialize it. Since the string is only written to the
 + * first time, there is no risk with multiple calls overwriting the
 + * output for each other.
 + */
 +const char *GromacsVersion()
 +{
 +  return _gmx_ver_string;
 +}
 +
 +
 +void gmx_print_version_info(FILE *fp)
 +{
 +    fprintf(fp, "Version:          %s\n", _gmx_ver_string);
 +#ifdef USE_VERSION_H
 +    fprintf(fp, "GIT SHA1 hash:    %s\n", _gmx_full_git_hash);
 +    /* Only print out the branch information if present.
 +     * The generating script checks whether the branch point actually
 +     * coincides with the hash reported above, and produces an empty string
 +     * in such cases. */
 +    if (_gmx_central_base_hash[0] != 0)
 +    {
 +        fprintf(fp, "Branched from:    %s\n", _gmx_central_base_hash);
 +    }
 +#endif
 +
 +#ifdef GMX_DOUBLE
 +    fprintf(fp, "Precision:        double\n");
 +#else
 +    fprintf(fp, "Precision:        single\n");
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +    fprintf(fp, "Parallellization: thread_mpi\n");
 +#elif defined(GMX_MPI)
 +    fprintf(fp, "Parallellization: MPI\n");
 +#else
 +    fprintf(fp, "Parallellization: none\n");
 +#endif
 +
 +#ifdef GMX_FFT_FFTPACK
 +    fprintf(fp, "FFT Library:      fftpack\n");
 +#elif defined(GMX_FFT_FFTW3)
 +    fprintf(fp, "FFT Library:      fftw3\n");
 +#elif defined(GMX_FFT_MKL)
 +    fprintf(fp, "FFT Library:      MKL\n");
 +#else
 +    fprintf(fp, "FFT Library:      unknown\n");
 +#endif
 +
 +}
index 48553975c9dca943de7e40db02434b1f107a4a24,0000000000000000000000000000000000000000..b6a255b76a8ea6b87e61d402e2e8f8e85eec748c
mode 100644,000000..100644
--- /dev/null
@@@ -1,1157 -1,0 +1,1157 @@@
-             gmx_file("Cannot write energy names to file; maybe you are out of quota?");
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "futil.h"
 +#include "string2.h"
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "gmxfio.h"
 +#include "enxio.h"
 +#include "vec.h"
 +#include "xdrf.h"
 +#include "macros.h"
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +/* This number should be increased whenever the file format changes! */
 +static const int enx_version = 5;
 +
 +const char *enx_block_id_name[] = {
 +    "Averaged orientation restraints",
 +    "Instantaneous orientation restraints",
 +    "Orientation restraint order tensor(s)",
 +    "Distance restraints",
 +    "Free energy data",
 +    "BAR histogram",
 +    "Delta H raw data"
 +};
 +
 +
 +/* Stuff for reading pre 4.1 energy files */
 +typedef struct {
 +    gmx_bool     bOldFileOpen;   /* Is this an open old file? */
 +    gmx_bool     bReadFirstStep; /* Did we read the first step? */
 +    int      first_step;     /* First step in the energy file */
 +    int      step_prev;      /* Previous step */
 +    int      nsum_prev;      /* Previous step sum length */
 +    t_energy *ener_prev;     /* Previous energy sums */
 +} ener_old_t;
 +
 +struct ener_file
 +{
 +    ener_old_t eo;
 +    t_fileio *fio;
 +    int framenr;
 +    real frametime;
 +};
 +
 +static void enxsubblock_init(t_enxsubblock *sb)
 +{
 +    sb->nr=0;
 +#ifdef GMX_DOUBLE
 +    sb->type=xdr_datatype_double;
 +#else
 +    sb->type=xdr_datatype_float;
 +#endif
 +    sb->fval = NULL;
 +    sb->dval = NULL;
 +    sb->ival = NULL;
 +    sb->lval = NULL;
 +    sb->cval = NULL;
 +    sb->sval = NULL;
 +    sb->fval_alloc = 0;
 +    sb->dval_alloc = 0;
 +    sb->ival_alloc = 0;
 +    sb->lval_alloc = 0;
 +    sb->cval_alloc = 0;
 +    sb->sval_alloc = 0;
 +}
 +
 +static void enxsubblock_free(t_enxsubblock *sb)
 +{
 +    if (sb->fval_alloc)
 +    {
 +        free(sb->fval);
 +        sb->fval_alloc=0;
 +        sb->fval=NULL;
 +    }
 +    if (sb->dval_alloc)
 +    {
 +        free(sb->dval);
 +        sb->dval_alloc=0;
 +        sb->dval=NULL;
 +    }
 +    if (sb->ival_alloc)
 +    {
 +        free(sb->ival);
 +        sb->ival_alloc=0;
 +        sb->ival=NULL;
 +    }
 +    if (sb->lval_alloc)
 +    {
 +        free(sb->lval);
 +        sb->lval_alloc=0;
 +        sb->lval=NULL;
 +    }
 +    if (sb->cval_alloc)
 +    {
 +        free(sb->cval);
 +        sb->cval_alloc=0;
 +        sb->cval=NULL;
 +    }
 +    if (sb->sval_alloc)
 +    {
 +        int i;
 +
 +        for(i=0;i<sb->sval_alloc;i++)
 +        {
 +            if (sb->sval[i])
 +            {
 +                free(sb->sval[i]);
 +            }
 +        }
 +        free(sb->sval);
 +        sb->sval_alloc=0;
 +        sb->sval=NULL;
 +    }
 +}
 +
 +/* allocate the appropriate amount of memory for the given type and nr */
 +static void enxsubblock_alloc(t_enxsubblock *sb)
 +{
 +    /* allocate the appropriate amount of memory */
 +    switch(sb->type)
 +    {
 +        case xdr_datatype_float:
 +            if (sb->nr > sb->fval_alloc)
 +            {
 +                srenew(sb->fval, sb->nr);
 +                sb->fval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_double:
 +            if (sb->nr > sb->dval_alloc)
 +            {
 +                srenew(sb->dval, sb->nr);
 +                sb->dval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_int:
 +            if (sb->nr > sb->ival_alloc)
 +            {
 +                srenew(sb->ival, sb->nr);
 +                sb->ival_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_large_int:
 +            if (sb->nr > sb->lval_alloc)
 +            {
 +                srenew(sb->lval, sb->nr);
 +                sb->lval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_char:
 +            if (sb->nr > sb->cval_alloc)
 +            {
 +                srenew(sb->cval, sb->nr);
 +                sb->cval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_string:
 +            if (sb->nr > sb->sval_alloc)
 +            {
 +                int i;
 +
 +                srenew(sb->sval, sb->nr);
 +                for(i=sb->sval_alloc;i<sb->nr;i++)
 +                {
 +                    sb->sval[i]=NULL;
 +                }
 +                sb->sval_alloc=sb->nr;
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown block type: this file is corrupted or from the future");
 +    }
 +}
 +
 +static void enxblock_init(t_enxblock *eb)
 +{
 +    eb->id=enxOR;
 +    eb->nsub=0;
 +    eb->sub=NULL;
 +    eb->nsub_alloc=0;
 +}
 +
 +static void enxblock_free(t_enxblock *eb)
 +{
 +    if (eb->nsub_alloc>0)
 +    {
 +        int i;
 +        for(i=0;i<eb->nsub_alloc;i++)
 +        {
 +            enxsubblock_free(&(eb->sub[i]));
 +        }
 +        free(eb->sub);
 +        eb->nsub_alloc=0;
 +        eb->sub=NULL;
 +    }
 +}
 +
 +void init_enxframe(t_enxframe *fr)
 +{
 +    fr->e_alloc=0;
 +    fr->ener=NULL;
 +
 +    /*fr->d_alloc=0;*/
 +    fr->ener=NULL;
 +
 +    /*fr->ndisre=0;*/
 +
 +    fr->nblock=0;
 +    fr->nblock_alloc=0;
 +    fr->block=NULL;
 +}
 +
 +
 +void free_enxframe(t_enxframe *fr)
 +{
 +  int b;
 +
 +  if (fr->e_alloc)
 +  {
 +    sfree(fr->ener);
 +  }
 +  for(b=0; b<fr->nblock_alloc; b++)
 +  {
 +      enxblock_free(&(fr->block[b]));
 +  }
 +  free(fr->block);
 +}
 +
 +void add_blocks_enxframe(t_enxframe *fr, int n)
 +{
 +    fr->nblock=n;
 +    if (n > fr->nblock_alloc)
 +    {
 +        int b;
 +
 +        srenew(fr->block, n);
 +        for(b=fr->nblock_alloc;b<fr->nblock;b++)
 +        {
 +            enxblock_init(&(fr->block[b]));
 +        }
 +        fr->nblock_alloc=n;
 +    }
 +}
 +
 +t_enxblock *find_block_id_enxframe(t_enxframe *ef, int id, t_enxblock *prev)
 +{
 +    gmx_off_t starti=0;
 +    gmx_off_t i;
 +
 +    if (prev)
 +    {
 +        starti=(prev - ef->block) + 1;
 +    }
 +    for(i=starti; i<ef->nblock; i++)
 +    {
 +        if (ef->block[i].id == id)
 +            return &(ef->block[i]);
 +    }
 +    return NULL;
 +}
 +
 +void add_subblocks_enxblock(t_enxblock *eb, int n)
 +{
 +    eb->nsub=n;
 +    if (eb->nsub > eb->nsub_alloc)
 +    {
 +        int b;
 +
 +        srenew(eb->sub, n);
 +        for(b=eb->nsub_alloc; b<n; b++)
 +        {
 +            enxsubblock_init(&(eb->sub[b]));
 +        } 
 +        eb->nsub_alloc=n;
 +    }
 +}
 +
 +static void enx_warning(const char *msg)
 +{
 +    if (getenv("GMX_ENX_NO_FATAL") != NULL)
 +    {
 +        gmx_warning(msg);
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"%s\n%s",
 +                  msg,
 +                  "If you want to use the correct frames before the corrupted frame and avoid this fatal error set the env.var. GMX_ENX_NO_FATAL");
 +    }
 +}
 +
 +static void edr_strings(XDR *xdr,gmx_bool bRead,int file_version,
 +                        int n,gmx_enxnm_t **nms)
 +{
 +    int  i;
 +    gmx_enxnm_t *nm;
 +
 +    if (*nms == NULL)
 +    {
 +        snew(*nms,n);
 +    }
 +    for(i=0; i<n; i++)
 +    {
 +        nm = &(*nms)[i];
 +        if (bRead)
 +        {
 +            if (nm->name)
 +            {
 +                sfree(nm->name);
 +                nm->name = NULL;
 +            }
 +            if (nm->unit)
 +            {
 +                sfree(nm->unit);
 +                nm->unit = NULL;
 +            }
 +        }
 +        if(!xdr_string(xdr,&(nm->name),STRLEN))
 +        {
-                 gmx_file("Cannot write energy names to file; maybe you are out of quota?");
++            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        if (file_version >= 2)
 +        {
 +            if(!xdr_string(xdr,&(nm->unit),STRLEN))
 +            {
-             gmx_file("Cannot write energy names to file; maybe you are out of quota?");
++                gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +            }
 +        }
 +        else
 +        {
 +            nm->unit = strdup("kJ/mol");
 +        }
 +    }
 +}
 +
 +static void gen_units(int n,char ***units)
 +{
 +    int i;
 +
 +    snew(*units,n);
 +    for(i=0; i<n; i++)
 +    {
 +        (*units)[i] = strdup("kJ/mol");
 +    }
 +}
 +
 +void do_enxnms(ener_file_t ef,int *nre,gmx_enxnm_t **nms)
 +{
 +    int  magic=-55555;
 +    XDR  *xdr;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  file_version;
 +    int  i;
 +   
 +    gmx_fio_checktype(ef->fio); 
 +
 +    xdr = gmx_fio_getxdr(ef->fio);
 +    
 +    if (!xdr_int(xdr,&magic))
 +    {
 +        if(!bRead)
 +        {
-         gmx_file("Cannot close energy file; it might be corrupt, or maybe you are out of quota?");  
++            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        *nre=0;
 +        return;
 +    }
 +    if (magic > 0)
 +    {
 +        /* Assume this is an old edr format */
 +        file_version = 1;
 +        *nre = magic;
 +        ef->eo.bOldFileOpen = TRUE;
 +        ef->eo.bReadFirstStep = FALSE;
 +        srenew(ef->eo.ener_prev,*nre);
 +    }
 +    else
 +    {
 +        ef->eo.bOldFileOpen=FALSE;
 +
 +        if (magic != -55555)
 +        {
 +            gmx_fatal(FARGS,"Energy names magic number mismatch, this is not a GROMACS edr file");
 +        }
 +        file_version = enx_version;
 +        xdr_int(xdr,&file_version);
 +        if (file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        xdr_int(xdr,nre);
 +    }
 +    if (file_version != enx_version)
 +    {
 +        fprintf(stderr,"Note: enx file_version %d, software version %d\n",
 +                file_version,enx_version);
 +    }
 +
 +    edr_strings(xdr,bRead,file_version,*nre,nms);
 +}
 +
 +static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
 +                       int nre_test,gmx_bool *bWrongPrecision,gmx_bool *bOK)
 +{
 +    int  magic=-7777777;
 +    real first_real_to_check;
 +    int  b,i,zero=0,dum=0;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  tempfix_nr=0;
 +    int  ndisre=0;
 +    int  startb=0;
 +#ifndef GMX_DOUBLE
 +    xdr_datatype dtreal=xdr_datatype_float; 
 +#else
 +    xdr_datatype dtreal=xdr_datatype_double; 
 +#endif
 +    
 +    if (nre_test >= 0)
 +    {
 +        *bWrongPrecision = FALSE;
 +    }
 +
 +    *bOK=TRUE;
 +    /* The original energy frame started with a real,
 +     * so we have to use a real for compatibility.
 +     * This is VERY DIRTY code, since do_eheader can be called
 +     * with the wrong precision set and then we could read r > -1e10,
 +     * while actually the intention was r < -1e10.
 +     * When nre_test >= 0, do_eheader should therefore terminate
 +     * before the number of i/o calls starts depending on what has been read
 +     * (which is the case for for instance the block sizes for variable
 +     * number of blocks, where this number is read before).
 +     */
 +    first_real_to_check = -2e10;
 +    if (!gmx_fio_do_real(ef->fio, first_real_to_check))
 +    {
 +        return FALSE;
 +    }
 +    if (first_real_to_check > -1e10)
 +    {
 +        /* Assume we are reading an old format */
 +        *file_version = 1;
 +        fr->t = first_real_to_check;
 +        if (!gmx_fio_do_int(ef->fio, dum))   *bOK = FALSE;
 +        fr->step = dum;
 +    }
 +    else
 +    {
 +        if (!gmx_fio_do_int(ef->fio, magic))       *bOK = FALSE;
 +        if (magic != -7777777)
 +        {
 +            enx_warning("Energy header magic number mismatch, this is not a GROMACS edr file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        *file_version = enx_version;
 +        if (!gmx_fio_do_int(ef->fio, *file_version)) *bOK = FALSE;
 +        if (*bOK && *file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        if (!gmx_fio_do_double(ef->fio, fr->t))       *bOK = FALSE;
 +        if (!gmx_fio_do_gmx_large_int(ef->fio, fr->step)) *bOK = FALSE;
 +        if (!bRead && fr->nsum == 1) {
 +            /* Do not store sums of length 1,
 +             * since this does not add information.
 +             */
 +            if (!gmx_fio_do_int(ef->fio, zero))      *bOK = FALSE;
 +        } else {
 +            if (!gmx_fio_do_int(ef->fio, fr->nsum))  *bOK = FALSE;
 +        }
 +        if (*file_version >= 3)
 +        {
 +            if (!gmx_fio_do_gmx_large_int(ef->fio, fr->nsteps)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->nsteps = max(1,fr->nsum);
 +        }
 +        if (*file_version >= 5)
 +        {
 +            if (!gmx_fio_do_double(ef->fio, fr->dt)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->dt = 0;
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->nre))     *bOK = FALSE;
 +    if (*file_version < 4)
 +    {
 +        if (!gmx_fio_do_int(ef->fio, ndisre))  *bOK = FALSE;
 +    }
 +    else
 +    {
 +        /* now reserved for possible future use */
 +        if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +    }
 +
 +    if (!gmx_fio_do_int(ef->fio, fr->nblock))  *bOK = FALSE;
 +    if (fr->nblock < 0) *bOK=FALSE;
 +
 +    if (ndisre!=0)
 +    {
 +        if (*file_version >= 4)
 +        {
 +            enx_warning("Distance restraint blocks in old style in new style file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        fr->nblock+=1;
 +    }
 +
 +
 +    /* Frames could have nre=0, so we can not rely only on the fr->nre check */
 +    if (bRead && nre_test >= 0 &&
 +        ((fr->nre > 0 && fr->nre != nre_test) ||
 +         fr->nre < 0 || ndisre < 0 || fr->nblock < 0))
 +    {
 +        *bWrongPrecision = TRUE;
 +        return *bOK;
 +    }
 +
 +    /* we now know what these should be, or we've already bailed out because
 +       of wrong precision */
 +    if ( *file_version==1 && (fr->t < 0 || fr->t > 1e20 || fr->step < 0 ) )
 +    {
 +        enx_warning("edr file with negative step number or unreasonable time (and without version number).");
 +        *bOK=FALSE;
 +        return FALSE;
 +    }
 +
 +
 +    if (*bOK && bRead)
 +    {
 +        add_blocks_enxframe(fr, fr->nblock);
 +    }
 +
 +    startb=0;
 +    if (ndisre>0)
 +    {
 +        /* sub[0] is the instantaneous data, sub[1] is time averaged */
 +        add_subblocks_enxblock(&(fr->block[0]), 2);
 +        fr->block[0].id=enxDISRE;
 +        fr->block[0].sub[0].nr=ndisre;
 +        fr->block[0].sub[1].nr=ndisre;
 +        fr->block[0].sub[0].type=dtreal;
 +        fr->block[0].sub[1].type=dtreal;
 +        startb++;
 +    }
 +
 +    /* read block header info */
 +    for(b=startb; b<fr->nblock; b++)
 +    {
 +        if (*file_version<4)
 +        {
 +            /* blocks in old version files always have 1 subblock that 
 +               consists of reals. */
 +            int nrint;
 +
 +            if (bRead)
 +            {
 +                add_subblocks_enxblock(&(fr->block[b]), 1);
 +            }
 +            else
 +            {
 +                if (fr->block[b].nsub != 1)
 +                {
 +                    gmx_incons("Writing an old version .edr file with too many subblocks");
 +                }
 +                if (fr->block[b].sub[0].type != dtreal)
 +                {
 +                    gmx_incons("Writing an old version .edr file the wrong subblock type");
 +                }
 +            }
 +            nrint = fr->block[b].sub[0].nr;
 +            
 +            if (!gmx_fio_do_int(ef->fio, nrint))
 +            {
 +                *bOK = FALSE;
 +            }
 +            fr->block[b].id          = b - startb;
 +            fr->block[b].sub[0].nr   = nrint;
 +            fr->block[b].sub[0].type = dtreal;
 +        }
 +        else
 +        {
 +            int i;
 +            /* in the new version files, the block header only contains
 +               the ID and the number of subblocks */
 +            int nsub=fr->block[b].nsub;
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, fr->block[b].id);
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, nsub);
 +
 +            fr->block[b].nsub=nsub;
 +            if (bRead)
 +                add_subblocks_enxblock(&(fr->block[b]), nsub);
 +
 +            /* read/write type & size for each subblock */
 +            for(i=0;i<nsub;i++)
 +            {
 +                t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +                int typenr=sub->type;
 +
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, typenr);
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, sub->nr);
 +
 +                sub->type = (xdr_datatype)typenr;
 +            }
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->e_size))  *bOK = FALSE;
 +
 +    /* now reserved for possible future use */
 +    if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +
 +    /* Do a dummy int to keep the format compatible with the old code */
 +    if (!gmx_fio_do_int(ef->fio, dum))         *bOK = FALSE;
 +    
 +    if (*bOK && *file_version == 1 && nre_test < 0)
 +    {
 +#if 0
 +        if (fp >= ener_old_nalloc)
 +        {
 +            gmx_incons("Problem with reading old format energy files");
 +        }
 +#endif
 +        
 +        if (!ef->eo.bReadFirstStep)
 +        {
 +            ef->eo.bReadFirstStep = TRUE;
 +            ef->eo.first_step     = fr->step;
 +            ef->eo.step_prev      = fr->step;
 +            ef->eo.nsum_prev      = 0;
 +        }
 +        
 +        fr->nsum   = fr->step - ef->eo.first_step + 1;
 +        fr->nsteps = fr->step - ef->eo.step_prev;
 +        fr->dt     = 0;
 +    }
 +      
 +    return *bOK;
 +}
 +
 +void free_enxnms(int n,gmx_enxnm_t *nms)
 +{
 +    int i;
 +
 +    for(i=0; i<n; i++)
 +    {
 +        sfree(nms[i].name);
 +        sfree(nms[i].unit);
 +    }
 +
 +    sfree(nms);
 +}
 +
 +void close_enx(ener_file_t ef)
 +{
 +    if(gmx_fio_close(ef->fio) != 0)
 +    {
-                 gmx_file("Cannot write energy file header; maybe you are out of quota?");
++        gmx_file("Cannot close energy file; it might be corrupt, or maybe you are out of disk space?");
 +    }
 +}
 +
 +static gmx_bool empty_file(const char *fn)
 +{
 +    FILE *fp;
 +    char dum;
 +    int  ret;
 +    gmx_bool bEmpty;
 +    
 +    fp = gmx_fio_fopen(fn,"r");
 +    ret = fread(&dum,sizeof(dum),1,fp);
 +    bEmpty = feof(fp);
 +    gmx_fio_fclose(fp);
 +    
 +    return bEmpty;
 +}
 +
 +
 +ener_file_t open_enx(const char *fn,const char *mode)
 +{
 +    int        nre,i;
 +    gmx_enxnm_t *nms=NULL;
 +    int        file_version=-1;
 +    t_enxframe *fr;
 +    gmx_bool       bWrongPrecision,bOK=TRUE;
 +    struct ener_file *ef;
 +
 +    snew(ef,1);
 +
 +    if (mode[0]=='r') {
 +        ef->fio=gmx_fio_open(fn,mode);
 +        gmx_fio_checktype(ef->fio);
 +        gmx_fio_setprecision(ef->fio,FALSE);
 +        do_enxnms(ef,&nre,&nms);
 +        snew(fr,1);
 +        do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +        if(!bOK)
 +        {
 +            gmx_file("Cannot read energy file header. Corrupt file?");
 +        }
 +
 +        /* Now check whether this file is in single precision */
 +        if (!bWrongPrecision &&
 +            ((fr->e_size && (fr->nre == nre) && 
 +              (nre*4*(long int)sizeof(float) == fr->e_size)) ) )
 +        {
 +            fprintf(stderr,"Opened %s as single precision energy file\n",fn);
 +            free_enxnms(nre,nms);
 +        }
 +        else
 +        {
 +            gmx_fio_rewind(ef->fio);
 +            gmx_fio_checktype(ef->fio);
 +            gmx_fio_setprecision(ef->fio,TRUE);
 +            do_enxnms(ef,&nre,&nms);
 +            do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +            if(!bOK)
 +            {
-             gmx_file("Cannot write energy file header; maybe you are out of quota?");
++                gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +            }
 +
 +            if (((fr->e_size && (fr->nre == nre) && 
 +                            (nre*4*(long int)sizeof(double) == fr->e_size)) ))
 +                fprintf(stderr,"Opened %s as double precision energy file\n",
 +                        fn);
 +            else {
 +                if (empty_file(fn))
 +                    gmx_fatal(FARGS,"File %s is empty",fn);
 +                else
 +                    gmx_fatal(FARGS,"Energy file %s not recognized, maybe different CPU?",
 +                              fn);
 +            }
 +            free_enxnms(nre,nms);
 +        }
 +        free_enxframe(fr);
 +        sfree(fr);
 +        gmx_fio_rewind(ef->fio);
 +    }
 +    else 
 +        ef->fio = gmx_fio_open(fn,mode);
 +
 +    ef->framenr=0;
 +    ef->frametime=0;
 +    return ef;
 +}
 +
 +t_fileio *enx_file_pointer(const ener_file_t ef)
 +{
 +    return ef->fio;
 +}
 +
 +static void convert_full_sums(ener_old_t *ener_old,t_enxframe *fr)
 +{
 +    int nstep_all;
 +    int ne,ns,i;
 +    double esum_all,eav_all;
 +    
 +    if (fr->nsum > 0)
 +    {
 +        ne = 0;
 +        ns = 0;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            if (fr->ener[i].e    != 0) ne++;
 +            if (fr->ener[i].esum != 0) ns++;
 +        }
 +        if (ne > 0 && ns == 0)
 +        {
 +            /* We do not have all energy sums */
 +            fr->nsum = 0;
 +        }
 +    }
 +    
 +    /* Convert old full simulation sums to sums between energy frames */
 +    nstep_all = fr->step - ener_old->first_step + 1;
 +    if (fr->nsum > 1 && fr->nsum == nstep_all && ener_old->nsum_prev > 0)
 +    {
 +        /* Set the new sum length: the frame step difference */
 +        fr->nsum = fr->step - ener_old->step_prev;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            esum_all = fr->ener[i].esum;
 +            eav_all  = fr->ener[i].eav;
 +            fr->ener[i].esum = esum_all - ener_old->ener_prev[i].esum;
 +            fr->ener[i].eav  = eav_all  - ener_old->ener_prev[i].eav
 +                - dsqr(ener_old->ener_prev[i].esum/(nstep_all - fr->nsum)
 +                       - esum_all/nstep_all)*
 +                (nstep_all - fr->nsum)*nstep_all/(double)fr->nsum;
 +            ener_old->ener_prev[i].esum = esum_all;
 +            ener_old->ener_prev[i].eav  = eav_all;
 +        }
 +        ener_old->nsum_prev = nstep_all;
 +    }
 +    else if (fr->nsum > 0)
 +    {
 +        if (fr->nsum != nstep_all)
 +        {
 +            fprintf(stderr,"\nWARNING: something is wrong with the energy sums, will not use exact averages\n");
 +            ener_old->nsum_prev = 0;
 +        }
 +        else
 +        {
 +            ener_old->nsum_prev = nstep_all;
 +        }
 +        /* Copy all sums to ener_prev */
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            ener_old->ener_prev[i].esum = fr->ener[i].esum;
 +            ener_old->ener_prev[i].eav  = fr->ener[i].eav;
 +        }
 +    }
 +    
 +    ener_old->step_prev = fr->step;
 +}
 +
 +gmx_bool do_enx(ener_file_t ef,t_enxframe *fr)
 +{
 +    int       file_version=-1;
 +    int       i,b;
 +    gmx_bool      bRead,bOK,bOK1,bSane;
 +    real      tmp1,tmp2,rdum;
 +    char      buf[22];
 +    /*int       d_size;*/
 +    
 +    bOK = TRUE;
 +    bRead = gmx_fio_getread(ef->fio);
 +    if (!bRead)
 +    {  
 +        fr->e_size = fr->nre*sizeof(fr->ener[0].e)*4;
 +        /*d_size = fr->ndisre*(sizeof(real)*2);*/
 +    }
 +    gmx_fio_checktype(ef->fio);
 +
 +    if (!do_eheader(ef,&file_version,fr,-1,NULL,&bOK))
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\rLast energy frame read %d time %8.3f         ",
 +                    ef->framenr-1,ef->frametime);
 +            if (!bOK)
 +            {
 +                fprintf(stderr,
 +                        "\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                        ef->framenr,fr->t);
 +            }
 +        }
 +        else
 +        {
-             gmx_file("Cannot write energy file; maybe you are out of quota?");
++            gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +        }
 +        return FALSE;
 +    }
 +    if (bRead)
 +    {
 +        if ((ef->framenr <   20 || ef->framenr %   10 == 0) &&
 +            (ef->framenr <  200 || ef->framenr %  100 == 0) &&
 +            (ef->framenr < 2000 || ef->framenr % 1000 == 0))
 +        {
 +            fprintf(stderr,"\rReading energy frame %6d time %8.3f         ",
 +                    ef->framenr,fr->t);
 +        }
 +        ef->framenr++;
 +        ef->frametime = fr->t;
 +    }
 +    /* Check sanity of this header */
 +    bSane = fr->nre > 0 ;
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        bSane = bSane || (fr->block[b].nsub > 0);
 +    }
 +    if (!((fr->step >= 0) && bSane))
 +    {
 +        fprintf(stderr,"\nWARNING: there may be something wrong with energy file %s\n",
 +                gmx_fio_getname(ef->fio));
 +        fprintf(stderr,"Found: step=%s, nre=%d, nblock=%d, time=%g.\n"
 +                "Trying to skip frame expect a crash though\n",
 +                gmx_step_str(fr->step,buf),fr->nre,fr->nblock,fr->t);
 +    }
 +    if (bRead && fr->nre > fr->e_alloc)
 +    {
 +        srenew(fr->ener,fr->nre);
 +        for(i=fr->e_alloc; (i<fr->nre); i++)
 +        {
 +            fr->ener[i].e    = 0;
 +            fr->ener[i].eav  = 0;
 +            fr->ener[i].esum = 0;
 +        }
 +        fr->e_alloc = fr->nre;
 +    }
 +    
 +    for(i=0; i<fr->nre; i++)
 +    {
 +        bOK = bOK && gmx_fio_do_real(ef->fio, fr->ener[i].e);
 +        
 +        /* Do not store sums of length 1,
 +         * since this does not add information.
 +         */
 +        if (file_version == 1 ||
 +            (bRead && fr->nsum > 0) || fr->nsum > 1)
 +        {
 +            tmp1 = fr->ener[i].eav;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp1);
 +            if (bRead)
 +                fr->ener[i].eav = tmp1;
 +            
 +            /* This is to save only in single precision (unless compiled in DP) */
 +            tmp2 = fr->ener[i].esum;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp2);
 +            if (bRead)
 +                fr->ener[i].esum = tmp2;
 +            
 +            if (file_version == 1)
 +            {
 +                /* Old, unused real */
 +                rdum = 0;
 +                bOK = bOK && gmx_fio_do_real(ef->fio, rdum);
 +            }
 +        }
 +    }
 +    
 +    /* Here we can not check for file_version==1, since one could have
 +     * continued an old format simulation with a new one with mdrun -append.
 +     */
 +    if (bRead && ef->eo.bOldFileOpen)
 +    {
 +        /* Convert old full simulation sums to sums between energy frames */
 +        convert_full_sums(&(ef->eo),fr);
 +    }
 +    /* read the blocks */
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        /* now read the subblocks. */
 +        int nsub=fr->block[b].nsub; /* shortcut */
 +        int i;
 +
 +        for(i=0;i<nsub;i++)
 +        {
 +            t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +
 +            if (bRead)
 +            {
 +                enxsubblock_alloc(sub);
 +            }
 +
 +            /* read/write data */
 +            bOK1=TRUE;
 +            switch (sub->type)
 +            {
 +                case xdr_datatype_float:
 +                    bOK1=gmx_fio_ndo_float(ef->fio, sub->fval, sub->nr); 
 +                    break;
 +                case xdr_datatype_double:
 +                    bOK1=gmx_fio_ndo_double(ef->fio, sub->dval, sub->nr); 
 +                    break;
 +                case xdr_datatype_int:
 +                    bOK1=gmx_fio_ndo_int(ef->fio, sub->ival, sub->nr);
 +                    break;
 +                case xdr_datatype_large_int:
 +                    bOK1=gmx_fio_ndo_gmx_large_int(ef->fio, sub->lval, sub->nr);
 +                    break;
 +                case xdr_datatype_char:
 +                    bOK1=gmx_fio_ndo_uchar(ef->fio, sub->cval, sub->nr);
 +                    break;
 +                case xdr_datatype_string:
 +                    bOK1=gmx_fio_ndo_string(ef->fio, sub->sval, sub->nr);
 +                    break;
 +                default:
 +                    gmx_incons("Reading unknown block data type: this file is corrupted or from the future");
 +            }
 +            bOK = bOK && bOK1;
 +        }
 +    }
 +    
 +    if(!bRead)
 +    {
 +        if( gmx_fio_flush(ef->fio) != 0)
 +        {
++            gmx_file("Cannot write energy file; maybe you are out of disk space?");
 +        }
 +    }
 +    
 +    if (!bOK)
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\nLast energy frame read %d",
 +                    ef->framenr-1);
 +            fprintf(stderr,"\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                    ef->framenr,fr->t);
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS,"could not write energies");
 +        }
 +        return FALSE; 
 +    }
 +    
 +    return TRUE;
 +}
 +
 +static real find_energy(const char *name, int nre, gmx_enxnm_t *enm,
 +                        t_enxframe *fr)
 +{
 +    int i;
 +    
 +    for(i=0; i<nre; i++)
 +    {
 +        if (strcmp(enm[i].name,name) == 0)
 +        {
 +            return  fr->ener[i].e;
 +        }
 +    }
 +    
 +    gmx_fatal(FARGS,"Could not find energy term named '%s'",name);
 +    
 +    return 0;
 +}
 +
 +
 +void get_enx_state(const char *fn, real t, gmx_groups_t *groups, t_inputrec *ir,
 +                   t_state *state)
 +{
 +  /* Should match the names in mdebin.c */
 +  static const char *boxvel_nm[] = {
 +  "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
 +  "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
 +  };
 +  
 +  static const char *pcouplmu_nm[] = {
 +    "Pcoupl-Mu-XX", "Pcoupl-Mu-YY", "Pcoupl-Mu-ZZ",
 +    "Pcoupl-Mu-YX", "Pcoupl-Mu-ZX", "Pcoupl-Mu-ZY"
 +  };
 +  static const char *baro_nm[] = {
 +    "Barostat"
 +  };
 +
 +
 +  int ind0[] = { XX,YY,ZZ,YY,ZZ,ZZ };
 +  int ind1[] = { XX,YY,ZZ,XX,XX,YY };
 +  int nre,nfr,i,j,ni,npcoupl;
 +  char       buf[STRLEN];
 +  const char *bufi;
 +  gmx_enxnm_t *enm=NULL;
 +  t_enxframe *fr;
 +  ener_file_t in;
 +
 +  in = open_enx(fn,"r");
 +  do_enxnms(in,&nre,&enm);
 +  snew(fr,1);
 +  nfr = 0;
 +  while ((nfr==0 || fr->t != t) && do_enx(in,fr)) {
 +    nfr++;
 +  }
 +  close_enx(in);
 +  fprintf(stderr,"\n");
 +
 +  if (nfr == 0 || fr->t != t)
 +    gmx_fatal(FARGS,"Could not find frame with time %f in '%s'",t,fn);
 +  
 +  npcoupl = TRICLINIC(ir->compress) ? 6 : 3;
 +  if (ir->epc == epcPARRINELLORAHMAN) {
 +    clear_mat(state->boxv);
 +    for(i=0; i<npcoupl; i++) {
 +      state->boxv[ind0[i]][ind1[i]] =
 +      find_energy(boxvel_nm[i],nre,enm,fr);
 +    }
 +    fprintf(stderr,"\nREAD %d BOX VELOCITIES FROM %s\n\n",npcoupl,fn);
 +  }
 +
 +  if (ir->etc == etcNOSEHOOVER) 
 +  {
 +      char cns[20];
 +
 +      cns[0] = '\0';
 +
 +      for(i=0; i<state->ngtc; i++) {
 +          ni = groups->grps[egcTC].nm_ind[i];
 +          bufi = *(groups->grpname[ni]);
 +          for(j=0; (j<state->nhchainlength); j++) 
 +          {
 +              if (IR_NVT_TROTTER(ir))
 +              {
 +                  sprintf(cns,"-%d",j);
 +              }
 +              sprintf(buf,"Xi%s-%s",cns,bufi);
 +              state->nosehoover_xi[i] = find_energy(buf,nre,enm,fr);
 +              sprintf(buf,"vXi%s-%s",cns,bufi);
 +              state->nosehoover_vxi[i] = find_energy(buf,nre,enm,fr);
 +          }
 +
 +      }
 +      fprintf(stderr,"\nREAD %d NOSE-HOOVER Xi chains FROM %s\n\n",state->ngtc,fn);
 +
 +      if (IR_NPT_TROTTER(ir)) 
 +      {
 +          for(i=0; i<state->nnhpres; i++) {
 +              bufi = baro_nm[0]; /* All barostat DOF's together for now */
 +              for(j=0; (j<state->nhchainlength); j++) 
 +              {
 +                  sprintf(buf,"Xi-%d-%s",j,bufi); 
 +                  state->nhpres_xi[i] = find_energy(buf,nre,enm,fr);
 +                  sprintf(buf,"vXi-%d-%s",j,bufi);
 +                  state->nhpres_vxi[i] = find_energy(buf,nre,enm,fr);
 +              }
 +          }
 +          fprintf(stderr,"\nREAD %d NOSE-HOOVER BAROSTAT Xi chains FROM %s\n\n",state->nnhpres,fn);
 +      }
 +  } 
 +
 +  free_enxnms(nre,enm);
 +  free_enxframe(fr);
 +  sfree(fr);
 +}
 +
Simple merge
Simple merge
Simple merge
index 0c0eb197e844ba427e47a30d34a7f56d8fef8a47,0000000000000000000000000000000000000000..a264e6913abd594f418560031946a7ccae7fc5be
mode 100644,000000..100644
--- /dev/null
@@@ -1,654 -1,0 +1,654 @@@
- #include "ctype.h"
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include "gmx_fatal.h"
 +#include "main.h"
 +#include "smalloc.h"
 +#include "network.h"
 +#include "copyrite.h"
 +#include "statutil.h"
++#include <ctype.h>
 +#include "macros.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +
 +/* The source code in this file should be thread-safe. 
 +      Please keep it that way. */
 +
 +gmx_bool gmx_mpi_initialized(void)
 +{
 +  int n;
 +#ifndef GMX_MPI
 +  return 0;
 +#else
 +  MPI_Initialized(&n);
 +  
 +  return n;
 +#endif
 +}
 +
 +int gmx_setup(int *argc,char **argv,int *nnodes)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_setup");
 +  return 0;
 +#else
 +  char   buf[256];
 +  int    resultlen;               /* actual length of node name      */
 +  int    i,flag;
 +  int  mpi_num_nodes;
 +  int  mpi_my_rank;
 +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME];
 +
 +  /* Call the MPI routines */
 +#ifdef GMX_LIB_MPI
 +#ifdef GMX_FAHCORE
 +  (void) fah_MPI_Init(argc,&argv);
 +#else
 +  (void) MPI_Init(argc,&argv);
 +#endif
 +#endif
 +  (void) MPI_Comm_size( MPI_COMM_WORLD, &mpi_num_nodes );
 +  (void) MPI_Comm_rank( MPI_COMM_WORLD, &mpi_my_rank );
 +  (void) MPI_Get_processor_name( mpi_hostname, &resultlen );
 + 
 +#ifdef GMX_LIB_MPI 
 +  fprintf(stderr,"NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
 +        mpi_num_nodes,mpi_my_rank,mpi_hostname);
 +#endif
 +  
 +  *nnodes=mpi_num_nodes;
 +  
 +  return mpi_my_rank;
 +#endif
 +}
 +
 +int  gmx_node_num(void)
 +{
 +#ifndef GMX_MPI
 +  return 1;
 +#else
 +  int i;
 +  (void) MPI_Comm_size(MPI_COMM_WORLD, &i);
 +  return i;
 +#endif
 +}
 +
 +int gmx_node_rank(void)
 +{
 +#ifndef GMX_MPI
 +  return 0;
 +#else
 +  int i;
 +  (void) MPI_Comm_rank(MPI_COMM_WORLD, &i);
 +  return i;
 +#endif
 +}
 +
 +
 +int gmx_hostname_num()
 +{
 +#ifndef GMX_MPI
 +  return 0;
 +#else
 +  int  resultlen,hostnum,i,j;
 +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME],hostnum_str[MPI_MAX_PROCESSOR_NAME];
 +
 +  MPI_Get_processor_name(mpi_hostname,&resultlen);
 +  /* This procedure can only differentiate nodes with host names
 +   * that end on unique numbers.
 +   */
 +  i = 0;
 +  j = 0;
 +  /* Only parse the host name up to the first dot */
 +  while(i < resultlen && mpi_hostname[i] != '.') {
 +    if (isdigit(mpi_hostname[i])) {
 +      hostnum_str[j++] = mpi_hostname[i];
 +    }
 +    i++;
 +  }
 +  hostnum_str[j] = '\0';
 +  if (j == 0) {
 +    hostnum = 0;
 +  } else {
 +    /* Use only the last 9 decimals, so we don't overflow an int */
 +    hostnum = strtol(hostnum_str + max(0,j-9), NULL, 10);
 +  }
 +
 +  if (debug) {
 +    fprintf(debug,"In gmx_setup_nodecomm: hostname '%s', hostnum %d\n",
 +        mpi_hostname,hostnum);
 +  }
 +  return hostnum;
 +#endif
 +}
 +
 +void gmx_setup_nodecomm(FILE *fplog,t_commrec *cr)
 +{
 +  gmx_nodecomm_t *nc;
 +  int  n,rank,hostnum,ng,ni;
 +
 +  /* Many MPI implementations do not optimize MPI_Allreduce
 +   * (and probably also other global communication calls)
 +   * for multi-core nodes connected by a network.
 +   * We can optimize such communication by using one MPI call
 +   * within each node and one between the nodes.
 +   * For MVAPICH2 and Intel MPI this reduces the time for
 +   * the global_stat communication by 25%
 +   * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
 +   * B. Hess, November 2007
 +   */
 +
 +  nc = &cr->nc;
 +
 +  nc->bUse = FALSE;
 +#ifndef GMX_THREAD_MPI
 +  if (getenv("GMX_NO_NODECOMM") == NULL) {
 +#ifdef GMX_MPI
 +    MPI_Comm_size(cr->mpi_comm_mygroup,&n);
 +    MPI_Comm_rank(cr->mpi_comm_mygroup,&rank);
 +
 +    hostnum = gmx_hostname_num();
 +
 +    if (debug) {
 +      fprintf(debug,
 +              "In gmx_setup_nodecomm: splitting communicator of size %d\n",
 +              n);
 +    }
 +
 +
 +    /* The intra-node communicator, split on node number */
 +    MPI_Comm_split(cr->mpi_comm_mygroup,hostnum,rank,&nc->comm_intra);
 +    MPI_Comm_rank(nc->comm_intra,&nc->rank_intra);
 +    if (debug) {
 +      fprintf(debug,"In gmx_setup_nodecomm: node rank %d rank_intra %d\n",
 +            rank,nc->rank_intra);
 +    }
 +    /* The inter-node communicator, split on rank_intra.
 +     * We actually only need the one for rank=0,
 +     * but it is easier to create them all.
 +     */
 +    MPI_Comm_split(cr->mpi_comm_mygroup,nc->rank_intra,rank,&nc->comm_inter);
 +    /* Check if this really created two step communication */
 +    MPI_Comm_size(nc->comm_inter,&ng);
 +    MPI_Comm_size(nc->comm_intra,&ni);
 +    if (debug) {
 +      fprintf(debug,"In gmx_setup_nodecomm: groups %d, my group size %d\n",
 +            ng,ni);
 +    }
 +    if ((ng > 1 && ng < n) || (ni > 1 && ni < n)) {
 +      nc->bUse = TRUE;
 +      if (fplog)
 +      fprintf(fplog,"Using two step summing over %d groups of on average %.1f processes\n\n",ng,(real)n/(real)ng);
 +      if (nc->rank_intra > 0)
 +      MPI_Comm_free(&nc->comm_inter);
 +    } else {
 +      /* One group or all processes in a separate group, use normal summing */
 +      MPI_Comm_free(&nc->comm_inter);
 +      MPI_Comm_free(&nc->comm_intra);
 +    }
 +#endif
 +  }
 +#endif
 +}
 +
 +void gmx_barrier(const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_barrier");
 +#else
 +  MPI_Barrier(cr->mpi_comm_mygroup);
 +#endif
 +}
 +
 +void gmx_abort(int noderank,int nnodes,int errorno)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_abort");
 +#else
 +#ifdef GMX_THREAD_MPI
 +  fprintf(stderr,"Halting program %s\n",ShortProgram());
 +  thanx(stderr);
 +  exit(1);
 +#else
 +  if (nnodes > 1)
 +  {
 +      fprintf(stderr,"Halting parallel program %s on CPU %d out of %d\n",
 +              ShortProgram(),noderank,nnodes);
 +  }
 +  else
 +  {
 +      fprintf(stderr,"Halting program %s\n",ShortProgram());
 +  }
 +
 +  thanx(stderr);
 +  MPI_Abort(MPI_COMM_WORLD,errorno);
 +  exit(1);
 +#endif
 +#endif
 +}
 +
 +void gmx_bcast(int nbytes,void *b,const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_bast");
 +#else
 +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mygroup);
 +#endif
 +}
 +
 +void gmx_bcast_sim(int nbytes,void *b,const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_bast");
 +#else
 +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mysim);
 +#endif
 +}
 +
 +void gmx_sumd(int nr,double r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumd");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    if (cr->nc.bUse) {
 +        if (cr->nc.rank_intra == 0)
 +        {
 +            /* Use two step summing. */
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,0,
 +                       cr->nc.comm_intra);
 +            /* Sum the roots of the internal (intra) buffers. */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,MPI_DOUBLE,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM, 
 +                      cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
 +    if (nr > cr->mpb->dbuf_alloc) {
 +        cr->mpb->dbuf_alloc = nr;
 +        srenew(cr->mpb->dbuf,cr->mpb->dbuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(cr->mpb->dbuf,r,nr,MPI_DOUBLE,MPI_SUM, 
 +                          cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
 +    } else {
 +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,
 +                      cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
 +            r[i] = cr->mpb->dbuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +void gmx_sumf(int nr,float r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumf");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    if (cr->nc.bUse) {
 +        /* Use two step summing.  */
 +        if (cr->nc.rank_intra == 0)
 +        {
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,0,
 +                       cr->nc.comm_intra);
 +            /* Sum the roots of the internal (intra) buffers */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,MPI_FLOAT,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
 +    if (nr > cr->mpb->fbuf_alloc) {
 +        cr->mpb->fbuf_alloc = nr;
 +        srenew(cr->mpb->fbuf,cr->mpb->fbuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(cr->mpb->fbuf,r,nr,MPI_FLOAT,MPI_SUM, 
 +                          cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
 +    } else {
 +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,
 +                      cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
 +            r[i] = cr->mpb->fbuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +void gmx_sumi(int nr,int r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumi");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        if (cr->nc.rank_intra == 0) 
 +        {
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
 +    if (nr > cr->mpb->ibuf_alloc) {
 +        cr->mpb->ibuf_alloc = nr;
 +        srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(cr->mpb->ibuf,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
 +    } else {
 +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
 +            r[i] = cr->mpb->ibuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +void gmx_sumli(int nr,gmx_large_int_t r[],const t_commrec *cr)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumli");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        if (cr->nc.rank_intra == 0) 
 +        {
 +            MPI_Reduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,
 +                       cr->nc.comm_intra);
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        else
 +        {
 +            /* This is here because of the silly MPI specification
 +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
 +            MPI_Reduce(r,NULL,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,cr->nc.comm_intra);
 +        }
 +        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
 +    } 
 +    else 
 +    {
 +        MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,cr->mpi_comm_mygroup);
 +    }
 +#else
 +    int i;
 +
 +    if (nr > cr->mpb->libuf_alloc) {
 +        cr->mpb->libuf_alloc = nr;
 +        srenew(cr->mpb->libuf,cr->mpb->libuf_alloc);
 +    }
 +    if (cr->nc.bUse) {
 +        /* Use two step summing */
 +        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                      cr->nc.comm_intra);
 +        if (cr->nc.rank_intra == 0) {
 +            /* Sum with the buffers reversed */
 +            MPI_Allreduce(cr->mpb->libuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                          cr->nc.comm_inter);
 +        }
 +        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
 +    } else {
 +        MPI_Allreduce(r,cr->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                      cr->mpi_comm_mygroup);
 +        for(i=0; i<nr; i++)
 +            r[i] = cr->mpb->libuf[i];
 +    }
 +#endif
 +#endif
 +}
 +
 +
 +
 +#ifdef GMX_MPI
 +void gmx_sumd_comm(int nr,double r[],MPI_Comm mpi_comm)
 +{
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
 +#else
 +    /* this function is only used in code that is not performance critical,
 +       (during setup, when comm_rec is not the appropriate communication  
 +       structure), so this isn't as bad as it looks. */
 +    double *buf;
 +    int i;
 +
 +    snew(buf, nr);
 +    MPI_Allreduce(r,buf,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
 +    for(i=0; i<nr; i++)
 +        r[i] = buf[i];
 +    sfree(buf);
 +#endif
 +}
 +#endif
 +
 +#ifdef GMX_MPI
 +void gmx_sumf_comm(int nr,float r[],MPI_Comm mpi_comm)
 +{
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
 +#else
 +    /* this function is only used in code that is not performance critical,
 +       (during setup, when comm_rec is not the appropriate communication  
 +       structure), so this isn't as bad as it looks. */
 +    float *buf;
 +    int i;
 +
 +    snew(buf, nr);
 +    MPI_Allreduce(r,buf,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
 +    for(i=0; i<nr; i++)
 +        r[i] = buf[i];
 +    sfree(buf);
 +#endif
 +}
 +#endif
 +
 +void gmx_sumd_sim(int nr,double r[],const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_sumd_sim");
 +#else
 +  gmx_sumd_comm(nr,r,ms->mpi_comm_masters);
 +#endif
 +}
 +
 +void gmx_sumf_sim(int nr,float r[],const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_sumf_sim");
 +#else
 +  gmx_sumf_comm(nr,r,ms->mpi_comm_masters);
 +#endif
 +}
 +
 +void gmx_sumi_sim(int nr,int r[], const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumi_sim");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
 +#else
 +    /* this is thread-unsafe, but it will do for now: */
 +    int i;
 +
 +    if (nr > ms->mpb->ibuf_alloc) {
 +        ms->mpb->ibuf_alloc = nr;
 +        srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
 +    }
 +    MPI_Allreduce(r,ms->mpb->ibuf,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
 +    for(i=0; i<nr; i++)
 +        r[i] = ms->mpb->ibuf[i];
 +#endif
 +#endif
 +}
 +
 +void gmx_sumli_sim(int nr,gmx_large_int_t r[], const gmx_multisim_t *ms)
 +{
 +#ifndef GMX_MPI
 +    gmx_call("gmx_sumli_sim");
 +#else
 +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREAD_MPI)
 +    MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                  ms->mpi_comm_masters);
 +#else
 +    /* this is thread-unsafe, but it will do for now: */
 +    int i;
 +
 +    if (nr > ms->mpb->libuf_alloc) {
 +        ms->mpb->libuf_alloc = nr;
 +        srenew(ms->mpb->libuf,ms->mpb->libuf_alloc);
 +    }
 +    MPI_Allreduce(r,ms->mpb->libuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
 +                  ms->mpi_comm_masters);
 +    for(i=0; i<nr; i++)
 +        r[i] = ms->mpb->libuf[i];
 +#endif
 +#endif
 +}
 +
 +
 +void gmx_finalize(void)
 +{
 +#ifndef GMX_MPI
 +  gmx_call("gmx_finalize");
 +#else
 +  int ret;
 +
 +  /* just as a check; we don't want to finalize twice */
 +  int finalized;
 +  MPI_Finalized(&finalized);
 +  if (finalized)
 +      return;
 +
 +  /* We sync the processes here to try to avoid problems
 +   * with buggy MPI implementations that could cause
 +   * unfinished processes to terminate.
 +   */
 +  MPI_Barrier(MPI_COMM_WORLD);
 +
 +  /*
 +  if (DOMAINDECOMP(cr)) {
 +    if (cr->npmenodes > 0 || cr->dd->bCartesian) 
 +      MPI_Comm_free(&cr->mpi_comm_mygroup);
 +    if (cr->dd->bCartesian)
 +      MPI_Comm_free(&cr->mpi_comm_mysim);
 +  }
 +  */
 +
 +  /* Apparently certain mpich implementations cause problems
 +   * with MPI_Finalize. In that case comment out MPI_Finalize.
 +   */
 +  if (debug)
 +    fprintf(debug,"Will call MPI_Finalize now\n");
 +
 +  ret = MPI_Finalize();
 +  if (debug)
 +    fprintf(debug,"Return code from MPI_Finalize = %d\n",ret);
 +#endif
 +}
 +
index 933249b23869e2a1d57c2cc8e635e16ff47339dd,0000000000000000000000000000000000000000..0c2689dd6ae233a49e0904f06713f8eefdeea465
mode 100644,000000..100644
--- /dev/null
@@@ -1,892 -1,0 +1,892 @@@
-                     while (c && (c[0]!=' ')) c++;
-                     while (c && (c[0]==' ')) c++;
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "vec.h"
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "symtab.h"
 +#include "pdbio.h"
 +#include "vec.h"
 +#include "copyrite.h"
 +#include "futil.h"
 +#include "atomprop.h"
 +#include "physics.h"
 +#include "pbc.h"
 +#include "gmxfio.h"
 +
 +typedef struct {
 +  int ai,aj;
 +} gmx_conection_t;
 +
 +typedef struct gmx_conect_t {
 +  int  nconect;
 +  gmx_bool bSorted;
 +  gmx_conection_t *conect;
 +} gmx_conect_t;
 +
 +static const char *pdbtp[epdbNR]={
 +  "ATOM  ","HETATM", "ANISOU", "CRYST1",
 +  "COMPND", "MODEL", "ENDMDL", "TER", "HEADER", "TITLE", "REMARK",
 +  "CONECT"
 +};
 +
 +
 +/* this is not very good, 
 +   but these are only used in gmx_trjconv and gmx_editconv */
 +static gmx_bool bWideFormat=FALSE;
 +#define REMARK_SIM_BOX "REMARK    THIS IS A SIMULATION BOX"
 +
 +void set_pdb_wide_format(gmx_bool bSet)
 +{
 +  bWideFormat = bSet;
 +}
 +
 +static void xlate_atomname_pdb2gmx(char *name)
 +{
 +  int i,length;
 +  char temp;
 +
 +  length=strlen(name);
 +  if (length>3 && isdigit(name[0])) {
 +    temp=name[0]; 
 +    for(i=1; i<length; i++)
 +      name[i-1]=name[i];
 +    name[length-1]=temp;
 +  }
 +}
 +
 +static void xlate_atomname_gmx2pdb(char *name)
 +{
 +      int i,length;
 +      char temp;
 +      
 +      length=strlen(name);
 +      if (length>3 && isdigit(name[length-1])) {
 +              temp=name[length-1]; 
 +              for(i=length-1; i>0; --i)
 +                      name[i]=name[i-1];
 +              name[0]=temp;
 +      }
 +}
 +
 +
 +void gmx_write_pdb_box(FILE *out,int ePBC,matrix box)
 +{
 +  real alpha,beta,gamma;
 +
 +  if (ePBC == -1)
 +    ePBC = guess_ePBC(box);
 +
 +  if (ePBC == epbcNONE)
 +    return;
 +
 +  if (norm2(box[YY])*norm2(box[ZZ])!=0)
 +    alpha = RAD2DEG*acos(cos_angle_no_table(box[YY],box[ZZ]));
 +  else
 +    alpha = 90;
 +  if (norm2(box[XX])*norm2(box[ZZ])!=0)
 +    beta  = RAD2DEG*acos(cos_angle_no_table(box[XX],box[ZZ]));
 +  else
 +    beta  = 90;
 +  if (norm2(box[XX])*norm2(box[YY])!=0)
 +    gamma = RAD2DEG*acos(cos_angle_no_table(box[XX],box[YY]));
 +  else
 +    gamma = 90;
 +  fprintf(out,"REMARK    THIS IS A SIMULATION BOX\n");
 +  if (ePBC != epbcSCREW) {
 +    fprintf(out,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s%4d\n",
 +          10*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
 +          alpha,beta,gamma,"P 1",1);
 +  } else {
 +    /* Double the a-vector length and write the correct space group */
 +    fprintf(out,"CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11s%4d\n",
 +          20*norm(box[XX]),10*norm(box[YY]),10*norm(box[ZZ]),
 +          alpha,beta,gamma,"P 21 1 1",1);
 +    
 +  }
 +}
 +
 +static void read_cryst1(char *line,int *ePBC,matrix box)
 +{
 +#define SG_SIZE 11
 +  char sa[12],sb[12],sc[12],sg[SG_SIZE+1],ident;
 +  double fa,fb,fc,alpha,beta,gamma,cosa,cosb,cosg,sing;
 +  int  syma,symb,symc;
 +  int  ePBC_file;
 +
 +  sscanf(line,"%*s%s%s%s%lf%lf%lf",sa,sb,sc,&alpha,&beta,&gamma);
 +
 +  ePBC_file = -1;
 +  if (strlen(line) >= 55) {
 +    strncpy(sg,line+55,SG_SIZE);
 +    sg[SG_SIZE] = '\0';
 +    ident = ' ';
 +    syma  = 0;
 +    symb  = 0;
 +    symc  = 0;
 +    sscanf(sg,"%c %d %d %d",&ident,&syma,&symb,&symc);
 +    if (ident == 'P' && syma ==  1 && symb <= 1 && symc <= 1) {
 +      fc = strtod(sc,NULL)*0.1;
 +      ePBC_file = (fc > 0 ? epbcXYZ : epbcXY);
 +    }
 +    if (ident == 'P' && syma == 21 && symb == 1 && symc == 1) {
 +      ePBC_file = epbcSCREW;
 +    }
 +  }
 +  if (ePBC) {
 +    *ePBC = ePBC_file;
 +  }
 +
 +  if (box) {
 +    fa = strtod(sa,NULL)*0.1;
 +    fb = strtod(sb,NULL)*0.1;
 +    fc = strtod(sc,NULL)*0.1;
 +    if (ePBC_file == epbcSCREW) {
 +      fa *= 0.5;
 +    }
 +    clear_mat(box);
 +    box[XX][XX] = fa;
 +    if ((alpha!=90.0) || (beta!=90.0) || (gamma!=90.0)) {
 +      if (alpha != 90.0) {
 +      cosa = cos(alpha*DEG2RAD);
 +      } else {
 +      cosa = 0;
 +      }
 +      if (beta != 90.0) {
 +      cosb = cos(beta*DEG2RAD);
 +      } else {
 +      cosb = 0;
 +      }
 +      if (gamma != 90.0) {
 +      cosg = cos(gamma*DEG2RAD);
 +      sing = sin(gamma*DEG2RAD);
 +      } else {
 +      cosg = 0;
 +      sing = 1;
 +      }
 +      box[YY][XX] = fb*cosg;
 +      box[YY][YY] = fb*sing;
 +      box[ZZ][XX] = fc*cosb;
 +      box[ZZ][YY] = fc*(cosa - cosb*cosg)/sing;
 +      box[ZZ][ZZ] = sqrt(fc*fc
 +                       - box[ZZ][XX]*box[ZZ][XX] - box[ZZ][YY]*box[ZZ][YY]);
 +    } else {
 +      box[YY][YY] = fb;
 +      box[ZZ][ZZ] = fc;
 +    }
 +  }
 +}
 +  
 +void write_pdbfile_indexed(FILE *out,const char *title,
 +                         t_atoms *atoms,rvec x[],
 +                         int ePBC,matrix box,char chainid,
 +                         int model_nr, atom_id nindex, atom_id index[],
 +                         gmx_conect conect, gmx_bool bTerSepChains)
 +{
 +  gmx_conect_t *gc = (gmx_conect_t *)conect;
 +  char resnm[6],nm[6],pdbform[128],pukestring[100];
 +  atom_id i,ii;
 +  int  resind,resnr,type;
 +  unsigned char resic,ch;
 +  real occup,bfac;
 +  gmx_bool bOccup;
 +  int  nlongname=0;
 +  int  chainnum,lastchainnum;
 +  int  lastresind,lastchainresind;
 +  gmx_residuetype_t rt;
 +  const char *p_restype;
 +  const char *p_lastrestype;
 +    
 +  gmx_residuetype_init(&rt);  
 +    
 +  bromacs(pukestring,99);
 +  fprintf(out,"TITLE     %s\n",(title && title[0])?title:pukestring);
 +  if (bWideFormat) {
 +    fprintf(out,"REMARK    This file does not adhere to the PDB standard\n");
 +    fprintf(out,"REMARK    As a result of, some programs may not like it\n");
 +  }
 +  if (box && ( norm2(box[XX]) || norm2(box[YY]) || norm2(box[ZZ]) ) ) {
 +    gmx_write_pdb_box(out,ePBC,box);
 +  }
 +  if (atoms->pdbinfo) {
 +    /* Check whether any occupancies are set, in that case leave it as is,
 +     * otherwise set them all to one
 +     */
 +    bOccup = TRUE;
 +    for (ii=0; (ii<nindex) && bOccup; ii++) {
 +      i      = index[ii];
 +      bOccup = bOccup && (atoms->pdbinfo[i].occup == 0.0);
 +    }
 +  } 
 +  else
 +    bOccup = FALSE;
 +
 +  fprintf(out,"MODEL %8d\n",model_nr>0 ? model_nr : 1);
 +
 +  lastchainresind   = -1;
 +  lastchainnum      = -1;
 +  resind            = -1;
 +  p_restype = NULL;
 +    
 +  for (ii=0; ii<nindex; ii++) {
 +    i=index[ii];
 +    lastresind = resind;
 +    resind = atoms->atom[i].resind;
 +    chainnum = atoms->resinfo[resind].chainnum;
 +    p_lastrestype = p_restype;
 +    gmx_residuetype_get_type(rt,*atoms->resinfo[resind].name,&p_restype);        
 +      
 +    /* Add a TER record if we changed chain, and if either the previous or this chain is protein/DNA/RNA. */
 +    if( bTerSepChains && ii>0 && chainnum != lastchainnum)
 +    {
 +        /* Only add TER if the previous chain contained protein/DNA/RNA. */
 +        if(gmx_residuetype_is_protein(rt,p_lastrestype) || gmx_residuetype_is_dna(rt,p_lastrestype) || gmx_residuetype_is_rna(rt,p_lastrestype))
 +        {
 +            fprintf(out,"TER\n");
 +        }
 +        lastchainnum    = chainnum;
 +      lastchainresind = lastresind;
 +    }
 +      
 +    strncpy(resnm,*atoms->resinfo[resind].name,sizeof(resnm)-1);
 +    strncpy(nm,*atoms->atomname[i],sizeof(nm)-1);
 +    /* rename HG12 to 2HG1, etc. */
 +    xlate_atomname_gmx2pdb(nm);
 +    resnr = atoms->resinfo[resind].nr;
 +    resic = atoms->resinfo[resind].ic;
 +    if (chainid!=' ') 
 +    {
 +      ch = chainid;
 +    }
 +    else
 +    {
 +      ch = atoms->resinfo[resind].chainid;
 +
 +      if (ch == 0) 
 +      {
 +          ch = ' ';
 +      }
 +    }
 +    if (resnr>=10000)
 +      resnr = resnr % 10000;
 +    if (atoms->pdbinfo) {
 +      type  = atoms->pdbinfo[i].type;
 +      occup = bOccup ? 1.0 : atoms->pdbinfo[i].occup;
 +      bfac  = atoms->pdbinfo[i].bfac;
 +    }
 +    else {
 +      type  = 0;
 +      occup = 1.0;
 +      bfac  = 0.0;
 +    }
 +    if (bWideFormat)
 +      strcpy(pdbform,
 +           "%-6s%5u %-4.4s %3.3s %c%4d%c   %10.5f%10.5f%10.5f%8.4f%8.4f    %2s\n");
 +    else {
 +      /* Check whether atomname is an element name */
 +      if ((strlen(nm)<4) && (gmx_strcasecmp(nm,atoms->atom[i].elem) != 0))
 +      strcpy(pdbform,get_pdbformat());
 +      else {
 +      strcpy(pdbform,get_pdbformat4());
 +      if (strlen(nm) > 4) {
 +        int maxwln=20;
 +        if (nlongname < maxwln) {
 +          fprintf(stderr,"WARNING: Writing out atom name (%s) longer than 4 characters to .pdb file\n",nm);
 +        } else if (nlongname == maxwln) {
 +          fprintf(stderr,"WARNING: More than %d long atom names, will not write more warnings\n",maxwln);
 +        }
 +        nlongname++;
 +      }
 +      }
 +      strcat(pdbform,"%6.2f%6.2f          %2s\n");
 +    }
 +    fprintf(out,pdbform,pdbtp[type],(i+1)%100000,nm,resnm,ch,resnr,
 +          (resic == '\0') ? ' ' : resic,
 +          10*x[i][XX],10*x[i][YY],10*x[i][ZZ],occup,bfac,atoms->atom[i].elem);
 +    if (atoms->pdbinfo && atoms->pdbinfo[i].bAnisotropic) {
 +      fprintf(out,"ANISOU%5u  %-4.4s%3.3s %c%4d%c %7d%7d%7d%7d%7d%7d\n",
 +            (i+1)%100000,nm,resnm,ch,resnr,
 +            (resic == '\0') ? ' ' : resic,
 +            atoms->pdbinfo[i].uij[0],atoms->pdbinfo[i].uij[1],
 +            atoms->pdbinfo[i].uij[2],atoms->pdbinfo[i].uij[3],
 +            atoms->pdbinfo[i].uij[4],atoms->pdbinfo[i].uij[5]);
 +    }
 +  }
 + 
 +  fprintf(out,"TER\n");
 +  fprintf(out,"ENDMDL\n");
 +    
 +  if (NULL != gc) {
 +    /* Write conect records */
 +    for(i=0; (i<gc->nconect); i++) {
 +      fprintf(out,"CONECT%5d%5d\n",gc->conect[i].ai+1,gc->conect[i].aj+1);
 +    }
 +  }
 +}
 +
 +void write_pdbfile(FILE *out,const char *title, t_atoms *atoms,rvec x[],
 +                 int ePBC,matrix box,char chainid,int model_nr,gmx_conect conect,gmx_bool bTerSepChains)
 +{
 +  atom_id i,*index;
 +
 +  snew(index,atoms->nr);
 +  for(i=0; i<atoms->nr; i++)
 +    index[i]=i;
 +  write_pdbfile_indexed(out,title,atoms,x,ePBC,box,chainid,model_nr,
 +                      atoms->nr,index,conect,bTerSepChains);
 +  sfree(index);
 +}
 +
 +int line2type(char *line)
 +{
 +  int  k;
 +  char type[8];
 +  
 +  for(k=0; (k<6); k++) 
 +    type[k]=line[k];
 +  type[k]='\0';
 +  
 +  for(k=0; (k<epdbNR); k++)
 +    if (strncmp(type,pdbtp[k],strlen(pdbtp[k])) == 0)
 +      break;
 +  
 +  return k;
 +}
 +
 +static void read_anisou(char line[],int natom,t_atoms *atoms)
 +{
 +  int  i,j,k,atomnr;
 +  char nc='\0';
 +  char anr[12],anm[12];
 +
 +  /* Skip over type */  
 +  j=6;
 +  for(k=0; (k<5); k++,j++) anr[k]=line[j];
 +  anr[k]=nc;
 +  j++;
 +  for(k=0; (k<4); k++,j++) anm[k]=line[j];
 +  anm[k]=nc;
 +  j++;
 +  
 +  /* Strip off spaces */
 +  trim(anm);
 +  
 +  /* Search backwards for number and name only */
 +  atomnr = strtol(anr, NULL, 10); 
 +  for(i=natom-1; (i>=0); i--)
 +    if ((strcmp(anm,*(atoms->atomname[i])) == 0) && 
 +      (atomnr == atoms->pdbinfo[i].atomnr))
 +      break;
 +  if (i < 0)
 +    fprintf(stderr,"Skipping ANISOU record (atom %s %d not found)\n",
 +          anm,atomnr);
 +  else {
 +    if (sscanf(line+29,"%d%d%d%d%d%d",
 +             &atoms->pdbinfo[i].uij[U11],&atoms->pdbinfo[i].uij[U22],
 +             &atoms->pdbinfo[i].uij[U33],&atoms->pdbinfo[i].uij[U12],
 +             &atoms->pdbinfo[i].uij[U13],&atoms->pdbinfo[i].uij[U23])
 +               == 6) {
 +      atoms->pdbinfo[i].bAnisotropic = TRUE;
 +    }
 +    else {
 +      fprintf(stderr,"Invalid ANISOU record for atom %d\n",i);
 +      atoms->pdbinfo[i].bAnisotropic = FALSE;
 +    }     
 +  }
 +}
 +
 +void get_pdb_atomnumber(t_atoms *atoms,gmx_atomprop_t aps)
 +{
 +  int  i,atomnumber,len;
 +  size_t k;
 +  char anm[6],anm_copy[6],*ptr;
 +  char nc='\0';
 +  real eval;
 +  
 +  if (!atoms->pdbinfo) {
 +    gmx_incons("Trying to deduce atomnumbers when no pdb information is present");
 +  }
 +  for(i=0; (i<atoms->nr); i++) {
 +    strcpy(anm,atoms->pdbinfo[i].atomnm);
 +    strcpy(anm_copy,atoms->pdbinfo[i].atomnm);
 +    len = strlen(anm);
 +    atomnumber = NOTSET;
 +    if ((anm[0] != ' ') && ((len <=2) || ((len > 2) && !isdigit(anm[2])))) {
 +      anm_copy[2] = nc;
 +      if (gmx_atomprop_query(aps,epropElement,"???",anm_copy,&eval))
 +      atomnumber = gmx_nint(eval);
 +      else {
 +      anm_copy[1] = nc;
 +      if (gmx_atomprop_query(aps,epropElement,"???",anm_copy,&eval))
 +        atomnumber = gmx_nint(eval);
 +      }
 +    }
 +    if (atomnumber == NOTSET) {
 +      k=0;
 +      while ((k < strlen(anm)) && (isspace(anm[k]) || isdigit(anm[k])))
 +      k++;
 +      anm_copy[0] = anm[k];
 +      anm_copy[1] = nc;
 +      if (gmx_atomprop_query(aps,epropElement,"???",anm_copy,&eval))
 +      atomnumber = gmx_nint(eval);
 +    }
 +    atoms->atom[i].atomnumber = atomnumber;
 +    ptr = gmx_atomprop_element(aps,atomnumber);
 +    strncpy(atoms->atom[i].elem,ptr==NULL ? "" : ptr,4);
 +    if (debug)
 +      fprintf(debug,"Atomnumber for atom '%s' is %d\n",anm,atomnumber);
 +  }
 +}
 +
 +static int read_atom(t_symtab *symtab,
 +                   char line[],int type,int natom,
 +                   t_atoms *atoms,rvec x[],int chainnum,gmx_bool bChange)
 +{
 +  t_atom *atomn;
 +  int  j,k;
 +  char nc='\0';
 +  char anr[12],anm[12],anm_copy[12],altloc,resnm[12],rnr[12];
 +  char xc[12],yc[12],zc[12],occup[12],bfac[12];
 +  unsigned char resic;
 +  char chainid;
 +  int  resnr,atomnumber;
 +
 +  if (natom>=atoms->nr)
 +    gmx_fatal(FARGS,"\nFound more atoms (%d) in pdb file than expected (%d)",
 +            natom+1,atoms->nr);
 +
 +  /* Skip over type */  
 +  j=6;
 +  for(k=0; (k<5); k++,j++) anr[k]=line[j];
 +  anr[k]=nc;
 +  trim(anr);
 +  j++;
 +  for(k=0; (k<4); k++,j++) anm[k]=line[j];
 +  anm[k]=nc;
 +  strcpy(anm_copy,anm);
 +  atomnumber = NOTSET;
 +  trim(anm);
 +  altloc=line[j];
 +  j++;
 +  for(k=0; (k<4); k++,j++) 
 +    resnm[k]=line[j];
 +  resnm[k]=nc;
 +  trim(resnm);
 +
 +  chainid = line[j];
 +  j++;
 +  
 +  for(k=0; (k<4); k++,j++) {
 +    rnr[k] = line[j];
 +  }
 +  rnr[k] = nc;
 +  trim(rnr);
 +  resnr = strtol(rnr, NULL, 10); 
 +  resic = line[j];
 +  j+=4;
 +
 +  /* X,Y,Z Coordinate */
 +  for(k=0; (k<8); k++,j++) xc[k]=line[j];
 +  xc[k]=nc;
 +  for(k=0; (k<8); k++,j++) yc[k]=line[j];
 +  yc[k]=nc;
 +  for(k=0; (k<8); k++,j++) zc[k]=line[j];
 +  zc[k]=nc;
 +  
 +  /* Weight */
 +  for(k=0; (k<6); k++,j++) occup[k]=line[j];
 +  occup[k]=nc;
 +  
 +  /* B-Factor */
 +  for(k=0; (k<7); k++,j++) bfac[k]=line[j];
 +  bfac[k]=nc;
 +
 +  if (atoms->atom) {
 +    atomn=&(atoms->atom[natom]);
 +    if ((natom==0) ||
 +      atoms->resinfo[atoms->atom[natom-1].resind].nr != resnr ||
 +      atoms->resinfo[atoms->atom[natom-1].resind].ic != resic ||
 +      (strcmp(*atoms->resinfo[atoms->atom[natom-1].resind].name,resnm) != 0))
 +    {
 +      if (natom == 0) {
 +      atomn->resind = 0;
 +      } else {
 +      atomn->resind = atoms->atom[natom-1].resind + 1;
 +      }
 +      atoms->nres = atomn->resind + 1;
 +      t_atoms_set_resinfo(atoms,natom,symtab,resnm,resnr,resic,chainnum,chainid);
 +    }
 +    else
 +    {
 +      atomn->resind = atoms->atom[natom-1].resind;
 +    }
 +    if (bChange) {
 +      xlate_atomname_pdb2gmx(anm); 
 +    }
 +    atoms->atomname[natom]=put_symtab(symtab,anm);
 +    atomn->m = 0.0;
 +    atomn->q = 0.0;
 +    atomn->atomnumber = atomnumber;
 +    atomn->elem[0] = '\0';
 +  }
 +  x[natom][XX]=strtod(xc,NULL)*0.1;
 +  x[natom][YY]=strtod(yc,NULL)*0.1;
 +  x[natom][ZZ]=strtod(zc,NULL)*0.1;
 +  if (atoms->pdbinfo) {
 +    atoms->pdbinfo[natom].type=type;
 +    atoms->pdbinfo[natom].atomnr=strtol(anr, NULL, 10); 
 +    atoms->pdbinfo[natom].altloc=altloc;
 +    strcpy(atoms->pdbinfo[natom].atomnm,anm_copy);
 +    atoms->pdbinfo[natom].bfac=strtod(bfac,NULL);
 +    atoms->pdbinfo[natom].occup=strtod(occup,NULL);
 +  }
 +  natom++;
 +  
 +  return natom;
 +}
 +
 +gmx_bool is_hydrogen(const char *nm)
 +{
 +  char buf[30];
 +  
 +  strcpy(buf,nm);
 +  trim(buf);
 +  
 +  if (buf[0] == 'H')
 +    return TRUE;
 +  else if ((isdigit(buf[0])) && (buf[1] == 'H'))
 +    return TRUE;
 +  return FALSE;
 +}
 +
 +gmx_bool is_dummymass(const char *nm)
 +{
 +  char buf[30];
 +  
 +  strcpy(buf,nm);
 +  trim(buf);
 +  
 +  if ((buf[0] == 'M') && isdigit(buf[strlen(buf)-1]))
 +    return TRUE;
 +      
 +  return FALSE;
 +}
 +
 +static void gmx_conect_addline(gmx_conect_t *con,char *line)
 +{
 +  int n,ai,aj;
 +  char format[32],form2[32];
 +  
 +  sprintf(form2,"%%*s");
 +  sprintf(format,"%s%%d",form2);
 +  if (sscanf(line,format,&ai) == 1) {
 +    do {
 +      strcat(form2,"%*s");
 +      sprintf(format,"%s%%d",form2);
 +      n = sscanf(line,format,&aj);
 +      if (n == 1) {
 +      srenew(con->conect,++con->nconect);
 +      con->conect[con->nconect-1].ai = ai-1;
 +      con->conect[con->nconect-1].aj = aj-1;
 +      }
 +    } while (n == 1);
 +  }
 +}
 +
 +void gmx_conect_dump(FILE *fp,gmx_conect conect)
 +{
 +  gmx_conect_t *gc = (gmx_conect_t *)conect;
 +  int i;
 +  
 +  for(i=0; (i<gc->nconect); i++)
 +    fprintf(fp,"%6s%5d%5d\n","CONECT",
 +          gc->conect[i].ai+1,gc->conect[i].aj+1);
 +}
 +
 +gmx_conect gmx_conect_init()
 +{
 +  gmx_conect_t *gc;
 +  
 +  snew(gc,1);
 +  
 +  return (gmx_conect) gc;
 +}
 +
 +void gmx_conect_done(gmx_conect conect)
 +{
 +  gmx_conect_t *gc = (gmx_conect_t *)conect;
 +  
 +  sfree(gc->conect);
 +}
 +
 +gmx_bool gmx_conect_exist(gmx_conect conect,int ai,int aj)
 +{
 +  gmx_conect_t *gc = (gmx_conect_t *)conect;
 +  int i;
 +  
 +  /* if (!gc->bSorted) 
 +     sort_conect(gc);*/
 +     
 +  for(i=0; (i<gc->nconect); i++) 
 +    if (((gc->conect[i].ai == ai) &&
 +       (gc->conect[i].aj == aj)) ||
 +      ((gc->conect[i].aj == ai) &&
 +       (gc->conect[i].ai == aj)))
 +      return TRUE;
 +  return FALSE;
 +}
 +
 +void gmx_conect_add(gmx_conect conect,int ai,int aj)
 +{
 +  gmx_conect_t *gc = (gmx_conect_t *)conect;
 +  int i;
 +  
 +  /* if (!gc->bSorted) 
 +     sort_conect(gc);*/
 +  
 +  if (!gmx_conect_exist(conect,ai,aj)) {   
 +    srenew(gc->conect,++gc->nconect);
 +    gc->conect[gc->nconect-1].ai = ai;
 +    gc->conect[gc->nconect-1].aj = aj;
 +  }
 +}
 +
 +int read_pdbfile(FILE *in,char *title,int *model_nr,
 +               t_atoms *atoms,rvec x[],int *ePBC,matrix box,gmx_bool bChange,
 +               gmx_conect conect)
 +{
 +    gmx_conect_t *gc = (gmx_conect_t *)conect;
 +    t_symtab symtab;
 +    gmx_bool bCOMPND;
 +    gmx_bool bConnWarn = FALSE;
 +    char line[STRLEN+1];
 +    int  line_type;
 +    char *c,*d;
 +    int  natom,chainnum,nres_ter_prev=0;
 +    char chidmax=' ';
 +    gmx_bool bStop=FALSE;
 +
 +    if (ePBC) 
 +    {
 +        /* Only assume pbc when there is a CRYST1 entry */
 +        *ePBC = epbcNONE;
 +    }
 +    if (box != NULL) 
 +    {
 +        clear_mat(box);
 +    }
 +    
 +    open_symtab(&symtab);
 +
 +    bCOMPND=FALSE;
 +    title[0]='\0';
 +    natom=0;
 +    chainnum=0;
 +    while (!bStop && (fgets2(line,STRLEN,in) != NULL)) 
 +    {
 +        line_type = line2type(line);
 +        
 +        switch(line_type) 
 +        {
 +            case epdbATOM:
 +            case epdbHETATM:
 +                natom = read_atom(&symtab,line,line_type,natom,atoms,x,chainnum,bChange);
 +                break;
 +      
 +            case epdbANISOU:
 +                if (atoms->pdbinfo)
 +                {
 +                    read_anisou(line,natom,atoms);
 +                }
 +                break;
 +                
 +            case epdbCRYST1:
 +                read_cryst1(line,ePBC,box);
 +                break;
 +                
 +            case epdbTITLE:
 +            case epdbHEADER:
 +                if (strlen(line) > 6) 
 +                {
 +                    c=line+6;
 +                    /* skip HEADER or TITLE and spaces */
-                     while (c && (c[0]!=' ')) c++;
-                     while (c && (c[0]==' ')) c++;
++                    while (c[0]!=' ') c++;
++                    while (c[0]==' ') c++;
 +                    /* truncate after title */
 +                    d=strstr(c,"      ");
 +                    if (d) 
 +                    {
 +                        d[0]='\0';
 +                    }
 +                    if (strlen(c)>0)
 +                    {
 +                        strcpy(title,c);
 +                    }
 +                }
 +                break;
 +      
 +            case epdbCOMPND:
 +                if ((!strstr(line,": ")) || (strstr(line+6,"MOLECULE:"))) 
 +                {
 +                    if ( !(c=strstr(line+6,"MOLECULE:")) )
 +                    {
 +                        c=line;
 +                    }
 +                    /* skip 'MOLECULE:' and spaces */
++                    while (c[0]!=' ') c++;
++                    while (c[0]==' ') c++;
 +                    /* truncate after title */
 +                    d=strstr(c,"   ");
 +                    if (d) 
 +                    {
 +                        while ( (d[-1]==';') && d>c)  d--;
 +                        d[0]='\0';
 +                    }
 +                    if (strlen(c) > 0)
 +                    {
 +                        if (bCOMPND) 
 +                        {
 +                            strcat(title,"; ");
 +                            strcat(title,c);
 +                        } 
 +                        else
 +                        {
 +                            strcpy(title,c);
 +                        }
 +                    }
 +                    bCOMPND=TRUE;
 +                }
 +                break;
 +      
 +            case epdbTER:
 +                chainnum++;
 +                break;
 +                
 +            case epdbMODEL:
 +                if(model_nr)
 +                {
 +                    sscanf(line,"%*s%d",model_nr);
 +                }
 +                break;
 +
 +            case epdbENDMDL:
 +                bStop=TRUE;
 +                break;
 +            case epdbCONECT:
 +                if (gc) 
 +                {
 +                    gmx_conect_addline(gc,line);
 +                }
 +                else if (!bConnWarn)
 +                {
 +                    fprintf(stderr,"WARNING: all CONECT records are ignored\n");
 +                    bConnWarn = TRUE;
 +                }
 +                break;
 +                
 +            default:
 +                break;
 +        }
 +    }
 +
 +    free_symtab(&symtab);
 +    return natom;
 +}
 +
 +void get_pdb_coordnum(FILE *in,int *natoms)
 +{
 +    char line[STRLEN];
 +   
 +    *natoms=0;
 +    while (fgets2(line,STRLEN,in)) 
 +    {
 +        if ( strncmp(line,"ENDMDL",6) == 0 ) 
 +        {
 +            break;
 +        }
 +        if ((strncmp(line,"ATOM  ",6) == 0) || (strncmp(line,"HETATM",6) == 0))
 +        {
 +            (*natoms)++;
 +        }
 +    }
 +}
 +
 +void read_pdb_conf(const char *infile,char *title, 
 +                 t_atoms *atoms,rvec x[],int *ePBC,matrix box,gmx_bool bChange,
 +                 gmx_conect conect)
 +{
 +  FILE *in;
 +  
 +  in = gmx_fio_fopen(infile,"r");
 +  read_pdbfile(in,title,NULL,atoms,x,ePBC,box,bChange,conect);
 +  gmx_fio_fclose(in);
 +}
 +
 +gmx_conect gmx_conect_generate(t_topology *top)
 +{
 +  int f,i;
 +  gmx_conect gc;
 +  
 +  /* Fill the conect records */
 +  gc  = gmx_conect_init();
 +
 +  for(f=0; (f<F_NRE); f++) {
 +    if (IS_CHEMBOND(f))
 +      for(i=0; (i<top->idef.il[f].nr); i+=interaction_function[f].nratoms+1) {
 +      gmx_conect_add(gc,top->idef.il[f].iatoms[i+1],
 +                     top->idef.il[f].iatoms[i+2]);
 +    }
 +  }
 +  return gc;
 +}
 +
 +const char* get_pdbformat()
 +{
 +    static const char *pdbformat ="%-6s%5u  %-4.4s%3.3s %c%4d%c   %8.3f%8.3f%8.3f";
 +    return pdbformat;
 +}
 +
 +const char* get_pdbformat4()
 +{
 +    static const char *pdbformat4="%-6s%5u %-4.4s %3.3s %c%4d%c   %8.3f%8.3f%8.3f";
 +    return pdbformat4;
 +}
Simple merge
Simple merge
index 9f064944b9174a3a03e56a68a189487755bee1c5,0000000000000000000000000000000000000000..f12fae5c506c8cb7f9af4dfbbf74fbda4d14f974
mode 100644,000000..100644
--- /dev/null
@@@ -1,605 -1,0 +1,605 @@@
-   char   *user=NULL;
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +/* This file is completely threadsafe - keep it that way! */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#undef HAVE_PWD_H
 +#endif
 +
 +#include <stdio.h>
 +#include <ctype.h>
 +#include <stdlib.h>
 +#include <errno.h>
 +#include <sys/types.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +
 +#ifdef HAVE_PWD_H
 +#include <pwd.h>
 +#endif
 +#include <time.h>
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +#include "string2.h"
 +#include "futil.h"
 +
 +int continuing(char *s)
 +/* strip trailing spaces and if s ends with a CONTINUE remove that too.
 + * returns TRUE if s ends with a CONTINUE, FALSE otherwise.
 + */
 +{
 +  int sl;
 +
 +  rtrim(s);
 +  sl = strlen(s);
 +  if ((sl > 0) && (s[sl-1] == CONTINUE)) {
 +    s[sl-1] = 0;
 +    return TRUE;
 +  }
 +  else
 +    return FALSE;
 +}
 +
 +
 +
 +char *fgets2(char *line, int n, FILE *stream)
 +/* This routine reads a string from stream of max length n
 + * and zero terminated, without newlines
 + * line should be long enough (>= n)
 + */
 +{
 +  char *c;
 +  if (fgets(line,n,stream) == NULL) {
 +    return NULL;
 +  }
 +  if ((c=strchr(line,'\n')) != NULL) {
 +    *c = '\0';
 +  } else {
 +    /* A line not ending in a newline can only occur at the end of a file,
 +     * or because of n being too small.
 +     * Since both cases occur very infrequently, we can check for EOF.
 +     */
 +    if (!gmx_eof(stream)) {
 +      gmx_fatal(FARGS,"An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'",n,n,line);
 +    }
 +  }
 +  if ((c=strchr(line,'\r')) != NULL) {
 +    *c = '\0';
 +  }
 +
 +  return line;
 +}
 +
 +void strip_comment (char *line)
 +{
 +  char *c;
 +
 +  if (!line)
 +    return;
 +
 +  /* search for a comment mark and replace it by a zero */
 +  if ((c = strchr(line,COMMENTSIGN)) != NULL) 
 +    (*c) = 0;
 +}
 +
 +void upstring (char *str)
 +{
 +  int i;
 +
 +  for (i=0; (i < (int)strlen(str)); i++) 
 +    str[i] = toupper(str[i]);
 +}
 +
 +void ltrim (char *str)
 +{
 +  char *tr;
 +  int i,c;
 +
 +  if (NULL == str)
 +    return;
 +
 +  c = 0;
 +  while (('\0' != str[c]) && isspace(str[c]))
 +    c++;
 +  if (c > 0) 
 +    {
 +      for(i=c; ('\0' != str[i]); i++)
 +      str[i-c] = str[i];
 +      str[i-c] = '\0';
 +    }
 +}
 +
 +void rtrim (char *str)
 +{
 +  int nul;
 +
 +  if (NULL == str)
 +    return;
 +
 +  nul = strlen(str)-1;
 +  while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) ) {
 +    str[nul] = '\0';
 +    nul--;
 +  }
 +}
 +
 +void trim (char *str)
 +{
 +  ltrim (str);
 +  rtrim (str);
 +}
 +
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n)
 +{
 +    char tmpbuf[STRLEN];
 +  
 +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
 +    /* Windows */
 +    ctime_s( tmpbuf, STRLEN, clock );
 +#elif (defined(__sun))
 +    /*Solaris*/
 +    ctime_r(clock, tmpbuf, n);
 +#else
 +    ctime_r(clock,tmpbuf);
 +#endif
 +    strncpy(buf,tmpbuf,n-1);
 +    buf[n-1]='\0';
 +    
 +    return buf;
 +}
 +          
 +void nice_header (FILE *out,const char *fn)
 +{
 +  const char *unk = "onbekend";
 +  time_t clock;
-   char   buf[256];
++  const char *user=unk;
 +  int    gh;
 +  uid_t  uid;
++  char   buf[256]="";
 +  char   timebuf[STRLEN];
 +#ifdef HAVE_PWD_H
 +  struct passwd *pw;
 +#endif
 +
 +  /* Print a nice header above the file */
 +  time(&clock);
 +  fprintf (out,"%c\n",COMMENTSIGN);
 +  fprintf (out,"%c\tFile '%s' was generated\n",COMMENTSIGN,fn ? fn : unk);
 +  
 +#ifdef HAVE_PWD_H
 +  uid = getuid();
 +  pw  = getpwuid(uid);
 +  gh  = gethostname(buf,255);
 +  user= pw->pw_name;
 +#else
 +  uid = 0;
 +  gh  = -1;
 +#endif
 +  
 +  gmx_ctime_r(&clock,timebuf,STRLEN);
 +  fprintf (out,"%c\tBy user: %s (%d)\n",COMMENTSIGN,
 +         user ? user : unk,(int) uid);
 +  fprintf(out,"%c\tOn host: %s\n",COMMENTSIGN,(gh == 0) ? buf : unk);
 +
 +  fprintf (out,"%c\tAt date: %s",COMMENTSIGN,timebuf);
 +  fprintf (out,"%c\n",COMMENTSIGN);
 +}
 +
 +int gmx_strcasecmp_min(const char *str1, const char *str2)
 +{
 +  char ch1,ch2;
 +  
 +  do
 +    {
 +      do
 +      ch1=toupper(*(str1++));
 +      while ((ch1=='-') || (ch1=='_'));
 +      do 
 +      ch2=toupper(*(str2++));
 +      while ((ch2=='-') || (ch2=='_'));
 +      if (ch1!=ch2) return (ch1-ch2);
 +    }
 +  while (ch1);
 +  return 0; 
 +}
 +
 +int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 +{
 +  char ch1,ch2;
 +  char *stri1, *stri2;
 +
 +  stri1=(char *)str1;
 +  stri2=(char *)str2;  
 +  do
 +    {
 +      do
 +      ch1=toupper(*(str1++));
 +      while ((ch1=='-') || (ch1=='_'));
 +      do 
 +      ch2=toupper(*(str2++));
 +      while ((ch2=='-') || (ch2=='_'));
 +      if (ch1!=ch2) return (ch1-ch2);
 +    }
 +  while (ch1 && (str1-stri1<n) && (str2-stri2<n));
 +  return 0; 
 +}
 +
 +int gmx_strcasecmp(const char *str1, const char *str2)
 +{
 +  char ch1,ch2;
 +  
 +  do
 +    {
 +      ch1=toupper(*(str1++));
 +      ch2=toupper(*(str2++));
 +      if (ch1!=ch2) return (ch1-ch2);
 +    }
 +  while (ch1);
 +  return 0; 
 +}
 +
 +int gmx_strncasecmp(const char *str1, const char *str2, int n)
 +{
 +  char ch1,ch2;
 + 
 +  if(n==0) 
 +    return 0;
 +
 +  do
 +    {
 +      ch1=toupper(*(str1++));
 +      ch2=toupper(*(str2++));
 +      if (ch1!=ch2) return (ch1-ch2);
 +      n--;
 +    }
 +  while (ch1 && n);
 +  return 0; 
 +}
 +
 +char *gmx_strdup(const char *src)
 +{
 +  char *dest;
 +
 +  snew(dest,strlen(src)+1);
 +  strcpy(dest,src);
 +  
 +  return dest;
 +}
 +
 +char *
 +gmx_strndup(const char *src, int n)
 +{
 +    int   len;
 +    char *dest;
 +
 +    len = strlen(src);
 +    if (len > n) 
 +    {
 +        len = n;
 +    }
 +    snew(dest, len+1);
 +    strncpy(dest, src, len);
 +    dest[len] = 0;
 +    return dest;
 +}
 +
 +/*!
 + * \param[in] pattern  Pattern to match against.
 + * \param[in] str      String to match.
 + * \returns   0 on match, GMX_NO_WCMATCH if there is no match.
 + *
 + * Matches \p str against \p pattern, which may contain * and ? wildcards.
 + * All other characters are matched literally.
 + * Currently, it is not possible to match literal * or ?.
 + */
 +int
 +gmx_wcmatch(const char *pattern, const char *str)
 +{
 +    while (*pattern)
 +    {
 +        if (*pattern == '*')
 +        {
 +            /* Skip multiple wildcards in a sequence */
 +            while (*pattern == '*' || *pattern == '?')
 +            {
 +                ++pattern;
 +                /* For ?, we need to check that there are characters left
 +                 * in str. */
 +                if (*pattern == '?')
 +                {
 +                    if (*str == 0)
 +                    {
 +                        return GMX_NO_WCMATCH;
 +                    }
 +                    else
 +                    {
 +                        ++str;
 +                    }
 +                }
 +            }
 +            /* If the pattern ends after the star, we have a match */
 +            if (*pattern == 0)
 +            {
 +                return 0;
 +            }
 +            /* Match the rest against each possible suffix of str */
 +            while (*str)
 +            {
 +                /* Only do the recursive call if the first character
 +                 * matches. We don't have to worry about wildcards here,
 +                 * since we have processed them above. */
 +                if (*pattern == *str)
 +                {
 +                    int rc;
 +                    /* Match the suffix, and return if a match or an error */
 +                    rc = gmx_wcmatch(pattern, str);
 +                    if (rc != GMX_NO_WCMATCH)
 +                    {
 +                        return rc;
 +                    }
 +                }
 +                ++str;
 +            }
 +            /* If no suffix of str matches, we don't have a match */
 +            return GMX_NO_WCMATCH;
 +        }
 +        else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 +        {
 +            ++str;
 +        }
 +        else
 +        {
 +            return GMX_NO_WCMATCH;
 +        }
 +        ++pattern;
 +    }
 +    /* When the pattern runs out, we have a match if the string has ended. */
 +    return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 +}
 +
 +char *wrap_lines(const char *buf,int line_width, int indent,gmx_bool bIndentFirst)
 +{
 +  char *b2;
 +  int i,i0,i2,j,b2len,lspace=0,l2space=0;
 +  gmx_bool bFirst,bFitsOnLine;
 +
 +  /* characters are copied from buf to b2 with possible spaces changed
 +   * into newlines and extra space added for indentation.
 +   * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 +   * i0 points to the beginning of the current line (in buf, source)
 +   * lspace and l2space point to the last space on the current line
 +   * bFirst is set to prevent indentation of first line
 +   * bFitsOnLine says if the first space occurred before line_width, if 
 +   * that is not the case, we have a word longer than line_width which 
 +   * will also not fit on the next line, so we might as well keep it on 
 +   * the current line (where it also won't fit, but looks better)
 +   */
 +  
 +  b2=NULL;
 +  b2len=strlen(buf)+1+indent;
 +  snew(b2,b2len);
 +  i0=i2=0;
 +  if (bIndentFirst)
 +    for(i2=0; (i2<indent); i2++)
 +      b2[i2] = ' ';
 +  bFirst=TRUE;
 +  do {
 +    l2space = -1;
 +    /* find the last space before end of line */
 +    for(i=i0; ((i-i0 < line_width) || (l2space==-1)) && (buf[i]); i++) {
 +      b2[i2++] = buf[i];
 +      /* remember the position of a space */
 +      if (buf[i] == ' ') {
 +        lspace = i;
 +      l2space = i2-1;
 +      }
 +      /* if we have a newline before the line is full, reset counters */
 +      if (buf[i]=='\n' && buf[i+1]) { 
 +      i0=i+1;
 +      b2len+=indent;
 +      srenew(b2, b2len);
 +      /* add indentation after the newline */
 +      for(j=0; (j<indent); j++)
 +        b2[i2++]=' ';
 +      }
 +    }
 +    /* If we are at the last newline, copy it */
 +    if (buf[i]=='\n' && !buf[i+1]) {
 +      b2[i2++] = buf[i++];
 +    }
 +    /* if we're not at the end of the string */
 +    if (buf[i]) {
 +      /* check if one word does not fit on the line */
 +      bFitsOnLine = (i-i0 <= line_width);
 +      /* reset line counters to just after the space */
 +      i0 = lspace+1;
 +      i2 = l2space+1;
 +      /* if the words fit on the line, and we're beyond the indentation part */
 +      if ( (bFitsOnLine) && (l2space >= indent) ) {
 +      /* start a new line */
 +      b2[l2space] = '\n';
 +      /* and add indentation */
 +      if (indent) {
 +        if (bFirst) {
 +          line_width-=indent;
 +          bFirst=FALSE;
 +        }
 +        b2len+=indent;
 +        srenew(b2, b2len);
 +        for(j=0; (j<indent); j++)
 +          b2[i2++]=' ';
 +        /* no extra spaces after indent; */
 +        while(buf[i0]==' ')
 +          i0++;
 +      }
 +      }
 +    }
 +  } while (buf[i]);
 +  b2[i2] = '\0';
 +  
 +  return b2;
 +}
 +
 +char **split(char sep,const char *str)
 +{
 +  char **ptr = NULL;
 +  int  n,nn,nptr = 0;
 +  
 +  if (str == NULL)
 +    return NULL;
 +  nn = strlen(str);
 +  for(n=0; (n<nn); n++)
 +    if (str[n] == sep)
 +      nptr++;
 +  snew(ptr,nptr+2);
 +  nptr = 0;
 +  while (*str != '\0') {
 +    while ((*str != '\0') && (*str == sep))
 +      str++;
 +    if (*str != '\0') {
 +      snew(ptr[nptr],1+strlen(str));
 +      n = 0;
 +      while ((*str != '\0') && (*str != sep)) {
 +      ptr[nptr][n] = *str;
 +      str++;
 +      n++;
 +      }
 +      ptr[nptr][n] = '\0';
 +      nptr++;
 +    }
 +  }
 +  ptr[nptr] = NULL;
 +  
 +  return ptr;
 +}
 +
 +
 +gmx_large_int_t
 +str_to_large_int_t(const char *str, char **endptr)
 +{
 +      int         sign = 1;
 +      gmx_large_int_t  val  = 0;
 +      char        ch;
 +      const char  *p;
 +      
 +      p = str;
 +      if(p==NULL)
 +      {
 +              *endptr=NULL;
 +              return 0;
 +      }
 +      
 +      /* Strip off initial white space */
 +      while(isspace(*p))
 +      {
 +              p++;
 +      }
 +      /* Conform to ISO C99 - return original pointer if string does not contain a number */
 +      if(*str=='\0')
 +      {
 +              *endptr=(char *)str;
 +      }
 +      
 +      if(*p=='-')
 +      {
 +              p++;
 +              sign *= -1;
 +      }
 +      
 +      while( ((ch=*p) != '\0') && isdigit(ch) )
 +      {
 +              /* Important to add sign here, so we dont overflow in final multiplication */
 +              ch = (ch-'0')*sign; 
 +              val = val*10 + ch;
 +              if(ch != val%10) 
 +              {
 +                      /* Some sort of overflow has occured, set endptr to original string */
 +                      *endptr=(char *)str;
 +                      errno = ERANGE;
 +                      return(0);
 +              }
 +              p++;
 +      }
 +      
 +      *endptr=(char *)p;
 +      
 +      return val;
 +}
 +
 +char *gmx_strsep(char **stringp, const char *delim)
 +{
 +    char *ret;
 +    int len=strlen(delim);
 +    int i,j=0;
 +    int found=0;
 +
 +    if (! *stringp)
 +        return NULL;
 +    ret=*stringp;
 +    do
 +    {
 +        if ( (*stringp)[j] == '\0')
 +        {
 +            found=1;
 +            *stringp=NULL;
 +            break;
 +        }
 +        for (i=0;i<len;i++)
 +        {
 +            if ( (*stringp)[j]==delim[i])
 +            {
 +                (*stringp)[j]='\0';
 +                *stringp=*stringp+j+1;
 +                found=1;
 +                break;
 +            }
 +        }
 +        j++;
 +    } while (!found);
 +
 +    return ret;
 +}
 +
Simple merge
Simple merge
index be89c2e078144d608c0a27a239f1cedb86600069,0000000000000000000000000000000000000000..e26654ca73a13530df4e1e87e31f2455dce22939
mode 100644,000000..100644
--- /dev/null
@@@ -1,1117 -1,0 +1,1332 @@@
- #include "time.h"
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "filenm.h"
 +#include "macros.h"
 +#include "replace.h"
 +#include "wman.h"
 +#include "statutil.h"
 +#include "copyrite.h"
 +#include "strdb.h"
-   { "[GRK]", "$\\"   },
-   { "[grk]", "$"     },
-   /* The next two lines used to substitute "|" and "||" to "or", but only
-    * g_angle used that functionality, so that was changed to a textual
-    * "or" there, so that other places could use those symbols to indicate
-    * magnitudes. */
-   { "||",    "\\textbar{}\\textbar"    },
-   { "|",     "\\textbar{}"    }
++#include <time.h>
 +#include "readinp.h"
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +
 +typedef struct {
 +  const char *search,*replace;
 +} t_sandr_const;
 +
 +typedef struct {
 +  char *search,*replace;
 +} t_sandr;
 +
++/* The order of these arrays is significant. Text search and replace
++ * for each element occurs in order, so earlier changes can induce
++ * subsequent changes even though the original text might not appear
++ * to invoke the latter changes. */
++
 +const t_sandr_const sandrTeX[] = {
 +  { "[TT]", "{\\tt " },
 +  { "[tt]", "}"      },
 +  { "[BB]", "{\\bf " },
 +  { "[bb]", "}"      },
 +  { "[IT]", "{\\em " },
 +  { "[it]", "}"      },
 +  { "[PAR]","\n\n"   },
 +  /* Escaping underscore for LaTeX is no longer necessary, and it breaks
 +   * text searching and the index if you do. */
 +  /*
 +  { "_",    "\\_"    },
 +  */
 +  { "$",    "\\$"    },
 +  { "<=",   "\\ensuremath{\\leq{}}"},
 +  { ">=",   "\\ensuremath{\\geq{}}"},
 +  { "<",    "\\textless{}" },
 +  { ">",    "\\textgreater{}" },
 +  { "^",    "\\^{}"    },
 +  { "\\^{}t", "\\ensuremath{^t}" },
 +  { "\\^{}a", "\\ensuremath{^a}" },
 +  { "\\^{}b", "\\ensuremath{^b}" },
 +  { "\\^{}2", "\\ensuremath{^2}" },
 +  { "\\^{}3", "\\ensuremath{^3}" },
 +  { "\\^{}6", "\\ensuremath{^6}" },
 +  { "#",    "\\#"    },
 +  { "[BR]", "\\\\"   },
 +  { "%",    "\\%"    },
 +  { "&",    "\\&"    },
 +  /* The next couple of lines allow true Greek symbols to be written to the 
 +     manual, which makes it look pretty */
++  { "[GRK]", "\\ensuremath{\\" },
++  { "[grk]", "}" },
++  { "[MATH]","\\ensuremath{" },
++  { "[math]","}" },
++  { "[CHEVRON]", "\\ensuremath{<}" },
++  { "[chevron]", "\\ensuremath{>}" },
++  { "[MAG]", "\\ensuremath{|}" },
++  { "[mag]", "\\ensuremath{|}" },
++  { "[INT]","\\ensuremath{\\int" },
++  { "[FROM]","_" },
++  { "[from]","" },
++  { "[TO]", "^" },
++  { "[to]", "" },
++  { "[int]","}" },
++  { "[SUM]","\\ensuremath{\\sum" },
++  { "[sum]","}" },
++  { "[SUB]","\\ensuremath{_{" },
++  { "[sub]","}}" },
++  { "[SQRT]","\\ensuremath{\\sqrt{" },
++  { "[sqrt]","}}" },
++  { "[EXP]","\\ensuremath{\\exp{(" },
++  { "[exp]",")}}" },
++  { "[LN]","\\ensuremath{\\ln{(" },
++  { "[ln]",")}}" },
++  { "[LOG]","\\ensuremath{\\log{(" },
++  { "[log]",")}}" },
++  { "[COS]","\\ensuremath{\\cos{(" },
++  { "[cos]",")}}" },
++  { "[SIN]","\\ensuremath{\\sin{(" },
++  { "[sin]",")}}" },
++  { "[TAN]","\\ensuremath{\\tan{(" },
++  { "[tan]",")}}" },
++  { "[COSH]","\\ensuremath{\\cosh{(" },
++  { "[cosh]",")}}" },
++  { "[SINH]","\\ensuremath{\\sinh{(" },
++  { "[sinh]",")}}" },
++  { "[TANH]","\\ensuremath{\\tanh{(" },
++  { "[tanh]",")}}" }
 +};
 +#define NSRTEX asize(sandrTeX)
 +
 +const t_sandr_const sandrTty[] = {
 +  { "[TT]", "" },
 +  { "[tt]", "" },
 +  { "[BB]", "" },
 +  { "[bb]", "" },
 +  { "[IT]", "" },
 +  { "[it]", "" },
++  { "[MATH]","" },
++  { "[math]","" },
++  { "[CHEVRON]","<" },
++  { "[chevron]",">" },
++  { "[MAG]", "|" },
++  { "[mag]", "|" },
++  { "[INT]","integral" },
++  { "[FROM]"," from " },
++  { "[from]","" },
++  { "[TO]", " to " },
++  { "[to]", " of" },
++  { "[int]","" },
++  { "[SUM]","sum" },
++  { "[sum]","" },
++  { "[SUB]","_" },
++  { "[sub]","" },
++  { "[SQRT]","sqrt(" },
++  { "[sqrt]",")" },
++  { "[EXP]","exp(" },
++  { "[exp]",")" },
++  { "[LN]","ln(" },
++  { "[ln]",")" },
++  { "[LOG]","log(" },
++  { "[log]",")" },
++  { "[COS]","cos(" },
++  { "[cos]",")" },
++  { "[SIN]","sin(" },
++  { "[sin]",")" },
++  { "[TAN]","tan(" },
++  { "[tan]",")" },
++  { "[COSH]","cosh(" },
++  { "[cosh]",")" },
++  { "[SINH]","sinh(" },
++  { "[sinh]",")" },
++  { "[TANH]","tanh(" },
++  { "[tanh]",")" },
 +  { "[PAR]","\n\n" },
 +  { "[BR]", "\n"},
 +  { "[GRK]", "" },
 +  { "[grk]", "" }
 +};
 +#define NSRTTY asize(sandrTty)
 +
 +const t_sandr_const sandrWiki[] = {
 +  { "&",    "&amp;" },
 +  { "<",    "&lt;" },
 +  { ">",    "&gt;" },
 +  { "[TT]", "&lt;code&gt;" },
 +  { "[tt]", "&lt;/code&gt;" },
 +  { "[BB]", "'''" },
 +  { "[bb]", "'''" },
 +  { "[IT]", "''" },
 +  { "[it]", "''" },
++  { "[MATH]","" },
++  { "[math]","" },
++  { "[CHEVRON]","<" },
++  { "[chevron]",">" },
++  { "[MAG]", "|" },
++  { "[mag]", "|" },
++  { "[INT]","integral" },
++  { "[FROM]"," from " },
++  { "[from]","" },
++  { "[TO]", " to " },
++  { "[to]", " of" },
++  { "[int]","" },
++  { "[SUM]","sum" },
++  { "[sum]","" },
++  { "[SUB]","_" },
++  { "[sub]","" },
++  { "[SQRT]","sqrt(" },
++  { "[sqrt]",")", },
++  { "[EXP]","exp(" },
++  { "[exp]",")" },
++  { "[LN]","ln(" },
++  { "[ln]",")" },
++  { "[LOG]","log(" },
++  { "[log]",")" },
++  { "[COS]","cos(" },
++  { "[cos]",")" },
++  { "[SIN]","sin(" },
++  { "[sin]",")" },
++  { "[TAN]","tan(" },
++  { "[tan]",")" },
++  { "[COSH]","cosh(" },
++  { "[cosh]",")" },
++  { "[SINH]","sinh(" },
++  { "[sinh]",")" },
++  { "[TANH]","tanh(" },
++  { "[tanh]",")" },
 +  { "[PAR]","\n\n" },
 +  { "[BR]", "\n" },
 +  { "[GRK]", "&" },
 +  { "[grk]", ";" }
 +};
 +#define NSRWIKI asize(sandrWiki)
 +
 +const t_sandr_const sandrNROFF[] = {
 +  { "[TT]", "\\fB " },
 +  { "[tt]", "\\fR" },
 +  { "[BB]", "\\fB " },
 +  { "[bb]", "\\fR" },
 +  { "[IT]", "\\fI " },
 +  { "[it]", "\\fR" },
++  { "[MATH]","" },
++  { "[math]","" },
++  { "[CHEVRON]","<" },
++  { "[chevron]",">" },
++  { "[MAG]", "|" },
++  { "[mag]", "|" },
++  { "[INT]","integral" },
++  { "[FROM]"," from " },
++  { "[from]","" },
++  { "[TO]", " to " },
++  { "[to]", " of" },
++  { "[int]","" },
++  { "[SUM]","sum" },
++  { "[sum]","" },
++  { "[SUB]","_" },
++  { "[sub]","" },
++  { "[SQRT]","sqrt(" },
++  { "[sqrt]",")", },
++  { "[EXP]","exp(" },
++  { "[exp]",")" },
++  { "[LN]","ln(" },
++  { "[ln]",")" },
++  { "[LOG]","log(" },
++  { "[log]",")" },
++  { "[COS]","cos(" },
++  { "[cos]",")" },
++  { "[SIN]","sin(" },
++  { "[sin]",")" },
++  { "[TAN]","tan(" },
++  { "[tan]",")" },
++  { "[COSH]","cosh(" },
++  { "[cosh]",")" },
++  { "[SINH]","sinh(" },
++  { "[sinh]",")" },
++  { "[TANH]","tanh(" },
++  { "[tanh]",")" },
 +  { "[PAR]","\n\n" },
 +  { "\n ",    "\n" },
 +  { "<",    "" },
 +  { ">",    "" },
 +  { "^",    "" },
 +  { "#",    "" },
 +  { "[BR]", "\n"},
 +  { "-",    "\\-"},
 +  { "[GRK]", "" },
 +  { "[grk]", "" }
 +};
 +#define NSRNROFF asize(sandrNROFF)
 +
 +const t_sandr_const sandrHTML[] = {
 +  { "<",    "&lt;" },
 +  { ">",    "&gt;" },
 +  { "[TT]", "<tt>" },
 +  { "[tt]", "</tt>" },
 +  { "[BB]", "<b>" },
 +  { "[bb]", "</b>" },
 +  { "[IT]", "<it>" },
 +  { "[it]", "</it>" },
++  { "[MATH]","" },
++  { "[math]","" },
++  { "[CHEVRON]","<" },
++  { "[chevron]",">" },
++  { "[MAG]", "|" },
++  { "[mag]", "|" },
++  { "[INT]","integral" },
++  { "[FROM]"," from " },
++  { "[from]","" },
++  { "[TO]", " to " },
++  { "[to]", " of" },
++  { "[int]","" },
++  { "[SUM]","sum" },
++  { "[sum]","" },
++  { "[SUB]","_" },
++  { "[sub]","" },
++  { "[SQRT]","sqrt(" },
++  { "[sqrt]",")", },
++  { "[EXP]","exp(" },
++  { "[exp]",")" },
++  { "[LN]","ln(" },
++  { "[ln]",")" },
++  { "[LOG]","log(" },
++  { "[log]",")" },
++  { "[COS]","cos(" },
++  { "[cos]",")" },
++  { "[SIN]","sin(" },
++  { "[sin]",")" },
++  { "[TAN]","tan(" },
++  { "[tan]",")" },
++  { "[COSH]","cosh(" },
++  { "[cosh]",")" },
++  { "[SINH]","sinh(" },
++  { "[sinh]",")" },
++  { "[TANH]","tanh(" },
++  { "[tanh]",")" },
 +  { "[PAR]","<p>" },
 +  { "[BR]", "<br>" },
 +  { "[GRK]", "&"  },
 +  { "[grk]", ";"  }
 +};
 +#define NSRHTML asize(sandrHTML)
 +
 +const t_sandr_const sandrXML[] = {
 +  { "<",    "&lt;" },
 +  { ">",    "&gt;" },
 +  { "[TT]", "<arg>" },
 +  { "[tt]", "</arg>" },
 +  { "[BB]", "<emp>" },
 +  { "[bb]", "</emp>" },
 +  { "[IT]", "<it>" },
 +  { "[it]", "</it>" },
++  { "[MATH]","" },
++  { "[math]","" },
++  { "[CHEVRON]","<" },
++  { "[chevron]",">" },
++  { "[MAG]", "|" },
++  { "[mag]", "|" },
++  { "[INT]","integral" },
++  { "[FROM]"," from " },
++  { "[from]","" },
++  { "[TO]", " to " },
++  { "[to]", " of" },
++  { "[int]","" },
++  { "[SUM]","sum" },
++  { "[sum]","" },
++  { "[SUB]","_" },
++  { "[sub]","" },
++  { "[SQRT]","sqrt(" },
++  { "[sqrt]",")", },
++  { "[EXP]","exp(" },
++  { "[exp]",")" },
++  { "[LN]","ln(" },
++  { "[ln]",")" },
++  { "[LOG]","log(" },
++  { "[log]",")" },
++  { "[COS]","cos(" },
++  { "[cos]",")" },
++  { "[SIN]","sin(" },
++  { "[sin]",")" },
++  { "[TAN]","tan(" },
++  { "[tan]",")" },
++  { "[COSH]","cosh(" },
++  { "[cosh]",")" },
++  { "[SINH]","sinh(" },
++  { "[sinh]",")" },
++  { "[TANH]","tanh(" },
++  { "[tanh]",")" },
 +  { "[PAR]","</par>\n<par>" },
 +  { "[BR]", "<br />" },
 +  { "[GRK]", "" },
 +  { "[grk]", "" }
 +};
 +#define NSRXML asize(sandrXML)
 +
 +static void mynum(char *buf,int n)
 +{
 +  if (n >= 10)
 +    sprintf(buf,"%2d",n);
 +  else
 +    sprintf(buf,"0%1d",n);
 +}
 +
 +static char *mydate(char buf[], int maxsize,gmx_bool bWiki)
 +{
 +  const char *mon[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", 
 +                       "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
 +  const char *day[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
 +  const char *num[] = { "01", "02", "03", "04", "05", "06","07", "08", "09" }; 
 +  time_t now;
 +  struct tm tm;
 +  
 +  time(&now);
 +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
 +  /* Native windows */
 +  localtime_s(&tm,&now);
 +#else
 +  localtime_r(&now,&tm);
 +#endif
 +
 +  /* subtract one from maxsize, so we have room for \0. */
 +  if (bWiki) {
 +    char dd[8],mm[8],ss[8],hh[8],mn[8];
 +    
 +    mynum(dd,tm.tm_mday);
 +    mynum(mm,tm.tm_mon);
 +    mynum(ss,tm.tm_sec);
 +    mynum(hh,tm.tm_hour);
 +    mynum(mn,tm.tm_min);
 +    sprintf(buf,"%4d-%2s-%2sT%2s:%2s:%2sZ",
 +           tm.tm_year+1900,mm,dd,hh,mn,ss);
 +  }
 +  else
 +    sprintf(buf,"%s %d %s %d",day[tm.tm_wday],tm.tm_mday,
 +           mon[tm.tm_mon],tm.tm_year+1900);
 +  
 +  return buf;
 +}
 +
 +/* Data structure for saved HTML links */
 +typedef struct t_linkdata {
 +  int     nsr;
 +  t_sandr *sr;
 +} t_linkdata;
 +
 +static t_linkdata *init_linkdata()
 +{
 +  t_linkdata *p;
 +  snew(p,1);
 +  p->sr=NULL;
 +  p->nsr=0;
 +
 +  return p;
 +}
 +
 +static void finish_linkdata(t_linkdata *p)
 +{
 +  int i;
 +  
 +  for(i=0;i<p->nsr;i++) {
 +    sfree(p->sr[i].search);    
 +    sfree(p->sr[i].replace);
 +  }
 +  sfree(p->sr);
 +  sfree(p);
 +}
 +
 +static char *repall(const char *s,int nsr,const t_sandr_const sa[])
 +{
 +  int  i;
 +  char *buf1,*buf2;
 +  
 +  /* Copy input to a non-constant char buffer.
 +   * buf1 is allocated here 
 +   */
 +  buf1=gmx_strdup(s); 
 +  
 +  for(i=0; (i<nsr); i++) {
 +    /* Replace in buffer1, put result in buffer2.
 +     * buf2 is allocated here.
 +     */
 +    buf2=replace(buf1,sa[i].search,sa[i].replace);
 +    sfree(buf1);
 +    buf1=buf2;
 +  }
 +  
 +  return buf1;
 +} 
 +
 +static char *repallww(const char *s,int nsr,const t_sandr sa[])
 +{
 +  int  i;
 +  char *buf1,*buf2;
 +
 +  /* Copy input to a non-constant char buffer.
 +   * buf1 is allocated here 
 +   */
 +  buf1=gmx_strdup(s); 
 +  
 +  for(i=0; (i<nsr); i++) {
 +    /* Replace in buffer1, put result in buffer2.
 +     * buf2 is allocated here.
 +     */
 +    buf2=replaceww(buf1,sa[i].search,sa[i].replace);
 +    sfree(buf1);
 +    buf1=buf2;
 +  }
 +  return buf1;
 +}
 +
 +static char *html_xref(char *s,const char *program, t_linkdata *links,gmx_bool bWiki)
 +{
 +  char   buf[256],**filestr;
 +  int    i,j,n;
 +  
 +  if (links->sr == NULL) {
 +    n=get_file("links.dat",&(filestr));
 +    links->nsr=n;
 +    snew(links->sr,n);
 +    for(i=0,j=0; (i<n); i++) {
 +      if (!program || (gmx_strcasecmp(program,filestr[i])  != 0)) {
 +      links->sr[j].search=gmx_strdup(filestr[i]);
 +      if (bWiki)
 +        sprintf(buf,"[[%s]]",filestr[i]);
 +      else
 +        sprintf(buf,"<a href=\"%s.html\">%s</a>",filestr[i],filestr[i]);
 +      links->sr[j].replace=gmx_strdup(buf);
 +      j++;
 +      }
 +    }
 +    links->nsr=j;
 +    for(i=0;i<n;i++)
 +      sfree(filestr[i]);
 +    sfree(filestr);
 +  }
 +  return repallww(s,links->nsr,links->sr);
 +}
 +
 +char *check_tex(const char *s)
 +{
 +  return repall(s,NSRTEX,sandrTeX);
 +}
 +
 +static char *check_nroff(const char *s)
 +{
 +  return repall(s,NSRNROFF,sandrNROFF);
 +}
 +
 +static char *check_wiki(const char *s,const char *program, t_linkdata *links)
 +{
 +  char *buf;
 +  
 +  buf = repall(s,NSRWIKI,sandrWiki);
 +  buf = html_xref(buf,program,links,TRUE);
 +  
 +  return buf;
 +}
 +
 +static char *check_html(const char *s,const char *program, t_linkdata *links)
 +{
 +  char *buf;
 +  
 +  buf = repall(s,NSRHTML,sandrHTML);
 +  buf = html_xref(buf,program,links,FALSE);
 +  
 +  return buf;
 +}
 +
 +#define NWR(s) check_wiki(s,program,links)
 +#define NSR(s) check_html(s,program,links)
 +  
 +#define FLAG_SET(flag, mask) ((flag & mask) == mask)
 +char *fileopt(unsigned long flag,char buf[],int maxsize)
 +{
 +  char tmp[256];
 +  
 +  if (FLAG_SET(flag, ffRW))
 +    sprintf(tmp,"In/Out");
 +  else if (FLAG_SET(flag, ffREAD))
 +    sprintf(tmp,"Input");
 +  else if (FLAG_SET(flag, ffWRITE))
 +    sprintf(tmp,"Output");
 +  else
 +    sprintf(tmp,"Dunno");
 +
 +  if (FLAG_SET(flag, ffOPT)) {
 +    strcat(tmp,", Opt");
 +    if (FLAG_SET(flag, ffSET)) 
 +      strcat(tmp,"!");
 +    else
 +      strcat(tmp,".");
 +  }
 +  if (FLAG_SET(flag, ffLIB))
 +    strcat(tmp,", Lib.");
 +  if (FLAG_SET(flag, ffMULT))
 +    strcat(tmp,", Mult.");
 +
 +  sprintf(buf,"%s",tmp);
 +  
 +  return buf;
 +}
 +
 +static void write_texman(FILE *out,const char *program,
 +                       int nldesc,const char **desc,
 +                       int nfile,t_filenm *fnm,
 +                       int npargs,t_pargs *pa,
 +                       int nbug,const char **bugs,
 +                       t_linkdata *links)
 +{
 +  int i;
 +  char tmp[256];
 +  
 +  fprintf(out,"\\section{\\normindex{%s}}\\label{%s}\n\n",check_tex(program),check_tex(program));
 +  
 +  if (nldesc > 0)
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"%s\n",check_tex(desc[i]));
 +
 +  if (nfile > 0) {
 +    fprintf(out,"\\vspace{-2ex}\\begin{tabbing}\n");
 +    fprintf(out,"\n{\\normalsize \\bf Files}\\nopagebreak\\\\\n");
 +    fprintf(out,"{\\tt ~~~~~~~} \\= {\\tt ~~~~~~~~~~~~~~} \\= "
 +          "~~~~~~~~~~~~~~~~~~~~~~ \\= \\nopagebreak\\kill\n");
 +    for(i=0; (i<nfile); i++)
 +      fprintf(out,"\\>{\\tt %s} \\'\\> {\\tt %s} \\' %s \\> "
 +            "\\parbox[t]{0.55\\linewidth}{%s} \\\\\n",
 +            check_tex(fnm[i].opt),check_tex(fnm[i].fns[0]),
 +            check_tex(fileopt(fnm[i].flag,tmp,255)),
 +            check_tex(ftp2desc(fnm[i].ftp)));
 +    fprintf(out,"\\end{tabbing}\\vspace{-4ex}\n");
 +  }
 +  if (npargs > 0) {
 +    fprintf(out,"\\vspace{-2ex}\\begin{tabbing}\n");
 +    fprintf(out,"\n{\\normalsize \\bf Other options}\\nopagebreak\\\\\n");
 +    fprintf(out,"{\\tt ~~~~~~~~~~} \\= vector \\= "
 +          "{\\tt ~~~~~~~} \\= \\nopagebreak\\kill\n");
 +    for(i=0; (i<npargs); i++) {
 +      if (strlen(check_tex(pa_val(&(pa[i]),tmp,255))) <= 8)
 +      fprintf(out,"\\> {\\tt %s} \\'\\> %s \\'\\> {\\tt %s} \\' "
 +              "\\parbox[t]{0.68\\linewidth}{%s}\\\\\n",
 +              check_tex(pa[i].option),get_arg_desc(pa[i].type),
 +              check_tex(pa_val(&(pa[i]),tmp,255)),
 +              check_tex(pa[i].desc));
 +      else
 +              fprintf(out,"\\> {\\tt %s} \\'\\> %s \\'\\>\\\\\n"
 +              "\\> \\'\\> \\'\\> {\\tt %s} \\' "
 +              "\\parbox[t]{0.7\\linewidth}{%s}\\\\\n",
 +              check_tex(pa[i].option),get_arg_desc(pa[i].type),
 +              check_tex(pa_val(&(pa[i]),tmp,255)),
 +              check_tex(pa[i].desc));
 +    }
 +    fprintf(out,"\\end{tabbing}\\vspace{-4ex}\n");
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"\n");
 +    fprintf(out,"\\begin{itemize}\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"\\item %s\n",check_tex(bugs[i]));
 +    fprintf(out,"\\end{itemize}\n");
 +  }
 +/*   fprintf(out,"\n\\newpage\n"); */
 +}
 +
 +static void write_nroffman(FILE *out,
 +                         const char *program,
 +                         int nldesc,const char **desc,
 +                         int nfile,t_filenm *fnm,
 +                         int npargs,t_pargs *pa,
 +                         int nbug,const char **bugs,
 +                         t_linkdata *links)
 +
 +{
 +  int i;
 +  char tmp[256];
 +  
 +  
 +  fprintf(out,".TH %s 1 \"%s\" \"\" \"GROMACS suite, %s\"\n",program,mydate(tmp,255,FALSE),GromacsVersion());
 +  fprintf(out,".SH NAME\n");
 +  fprintf(out,"%s\n",program);
 +  fprintf(out,".B %s\n",GromacsVersion());
 +  
 +  fprintf(out,".SH SYNOPSIS\n");
 +  fprintf(out,"\\f3%s\\fP\n",program);
 +
 +  /* command line arguments */
 +  if (nfile > 0) {
 +    for(i=0; (i<nfile); i++)
 +      fprintf(out,".BI \"%s\" \" %s \"\n",check_nroff(fnm[i].opt),
 +            check_nroff(fnm[i].fns[0]));
 +  }
 +  if (npargs > 0) {
 +    for(i=0; (i<npargs); i++)
 +      if (pa[i].type == etBOOL)
 +      fprintf(out,".BI \"\\-[no]%s\" \"\"\n",check_nroff(pa[i].option+1));
 +      else
 +      fprintf(out,".BI \"%s\" \" %s \"\n",check_nroff(pa[i].option),
 +              check_nroff(get_arg_desc(pa[i].type)));
 +  }
 +  
 +  /* description */
 +  if (nldesc > 0) {
 +    fprintf(out,".SH DESCRIPTION\n");
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"\\&%s\n",check_nroff(desc[i]));
 +  }
 +
 +  /* FILES */
 +  if (nfile > 0) {
 +    fprintf(out,".SH FILES\n");
 +    for(i=0; (i<nfile); i++)
 +      fprintf(out,".BI \"%s\" \" %s\" \n.B %s\n %s \n\n",
 +            check_nroff(fnm[i].opt),
 +              check_nroff(fnm[i].fns[0]),
 +              check_nroff(fileopt(fnm[i].flag,tmp,255)),
 +            check_nroff(ftp2desc(fnm[i].ftp)));
 +  }
 +  
 +  /* other options */
 +  fprintf(out,".SH OTHER OPTIONS\n");
 +  if ( npargs > 0 ) {
 +    for(i=0; (i<npargs); i++) {
 +      if (pa[i].type == etBOOL)
 +      fprintf(out,".BI \"\\-[no]%s\"  \"%s\"\n %s\n\n",
 +              check_nroff(pa[i].option+1),
 +              check_nroff(pa_val(&(pa[i]),tmp,255)),
 +                check_nroff(pa[i].desc));
 +      else
 +      fprintf(out,".BI \"%s\"  \" %s\" \" %s\" \n %s\n\n",
 +              check_nroff(pa[i].option),
 +                check_nroff(get_arg_desc(pa[i].type)),
 +              check_nroff(pa_val(&(pa[i]),tmp,255)),
 +                check_nroff(pa[i].desc));
 +    }
 +  }
 +
 +  if (nbug > 0) {
 +    fprintf(out,".SH KNOWN PROBLEMS\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"\\- %s\n\n",check_nroff(bugs[i]));
 +  }
 +
 +  fprintf(out,".SH SEE ALSO\n.BR gromacs(7)\n\n");
 +  fprintf(out,"More information about \\fBGROMACS\\fR is available at <\\fIhttp://www.gromacs.org/\\fR>.\n");
 +
 +}
 +
 +char *check_tty(const char *s)
 +{
 +  return repall(s,NSRTTY,sandrTty);
 +}
 +
 +void
 +print_tty_formatted(FILE *out, int nldesc, const char **desc,int indent,
 +                    t_linkdata *links,const char *program,gmx_bool bWiki)
 +{
 +  char *buf;
 +  char *temp;
 +  int buflen,i,j;
 +
 +  buflen = 80*nldesc;
 +  snew(buf,buflen);
 +  for(i=0; (i<nldesc); i++) {
 +    if ((strlen(buf)>0) && 
 +      (buf[strlen(buf)-1] !=' ') && (buf[strlen(buf)-1] !='\n'))
 +      strcat(buf," ");
 +    if (bWiki)
 +      temp=NWR(desc[i]);
 +    else
 +      temp=check_tty(desc[i]);
 +    if (strlen(buf) + strlen(temp) >= (size_t)(buflen-2)) {
 +      buflen += strlen(temp);
 +      srenew(buf,buflen);
 +    }
 +    strcat(buf,temp);
 +    sfree(temp);
 +  }
 +  /* Make lines of at most 79 characters */
 +  temp = wrap_lines(buf,78,indent,FALSE);
 +  fprintf(out,"%s\n",temp);
 +  sfree(temp);
 +  sfree(buf);
 +}
 +
 +static void write_ttyman(FILE *out,
 +                       const char *program,
 +                       int nldesc,const char **desc,
 +                       int nfile,t_filenm *fnm,
 +                       int npargs,t_pargs *pa,
 +                       int nbug,const char **bugs,gmx_bool bHeader,
 +                       t_linkdata *links)
 +{
 +  int i;
 +  char buf[256];
 +  char *tmp;
 +  
 +  if (bHeader) {
 +    fprintf(out,"%s\n\n",check_tty(program));
 +    fprintf(out,"%s\n%s\n",GromacsVersion(),mydate(buf,255,FALSE));
 +  }
 +  if (nldesc > 0) {
 +    fprintf(out,"DESCRIPTION\n-----------\n");
 +    print_tty_formatted(out,nldesc,desc,0,links,program,FALSE);
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"\n");
 +    fprintf(out,"KNOWN PROBLEMS\n----------\n");
 +    for(i=0; i<nbug; i++) {
 +      snew(tmp,strlen(bugs[i])+3);
 +      strcpy(tmp,"* ");
 +      strcpy(tmp+2,check_tty(bugs[i]));
 +      fprintf(out,"%s\n",wrap_lines(tmp,78,2,FALSE));
 +      sfree(tmp);
 +    }
 +  }
 +  if (nfile > 0) {
 +    fprintf(out,"\n");
 +    pr_fns(out,nfile,fnm);
 +  }
 +  if (npargs > 0) {
 +    print_pargs(out,npargs,pa,FALSE);
 +  }
 +}
 +
 +static void pr_html_files(FILE *out,int nfile,t_filenm fnm[],
 +                        const char *program,t_linkdata *links,gmx_bool bWiki)
 +{ 
 +  int  i;
 +  char link[10],tmp[255];
 +  
 +  if (bWiki)
 +    fprintf(out," %-10s %-12s %-12s %-s\n"
 +          " -----------------------------------------------------\n",
 +          "Option","Filename","Type","Description");
 +  else
 +    fprintf(out,
 +          "<TABLE BORDER=1 CELLSPACING=0 CELLPADDING=2>\n"
 +          "<TR>"
 +          "<TH>option</TH>"
 +          "<TH>filename</TH>"
 +          "<TH>type</TH>"
 +          "<TH>description</TH>"
 +          "</TR>\n");
 +  
 +  for(i=0; (i<nfile); i++) {
 +    strcpy(link,ftp2ext(fnm[i].ftp));
 +    if (strcmp(link,"???")==0)
 +      strcpy(link,"files");
 +    if (bWiki)
 +      fprintf(out," %-10s %-16s %-12s %-s\n",
 +            fnm[i].opt,
 +            NWR(fnm[i].fns[0]),
 +            fileopt(fnm[i].flag,tmp,255),
 +            NWR(ftp2desc(fnm[i].ftp)));
 +    else
 +      fprintf(out,
 +            "<TR>"
 +            "<TD ALIGN=RIGHT> <b><tt>%s</tt></b> </TD>"
 +            "<TD ALIGN=RIGHT> <tt><a href=\"%s.html\">%12s</a></tt> </TD>"
 +            "<TD> %s </TD>"
 +            "<TD> %s </TD>"
 +            "</TR>\n",
 +            fnm[i].opt,link,fnm[i].fns[0],fileopt(fnm[i].flag,tmp,255),
 +            NSR(ftp2desc(fnm[i].ftp)));
 +  }
 +  if (!bWiki)
 +    fprintf(out,"</TABLE>\n");
 +}
 +
 +static void write_wikiman(FILE *out,
 +                        const char *program,
 +                        int nldesc,const char **desc,
 +                        int nfile,t_filenm *fnm,
 +                        int npargs,t_pargs *pa,
 +                        int nbug,const char **bugs,gmx_bool bHeader,
 +                        t_linkdata *links)
 +{
 +  int i;
 +  char buf[256],link[10];
 +  char *tmp,*tmp2;
 +  fprintf(out,"<page>\n<title>Manual:%s_%s</title>\n",program,
 +        VERSION);
 +  fprintf(out,"<revision>\n");
 +  fprintf(out,"<timestamp>%s</timestamp>\n",mydate(buf,255,TRUE));
 +  fprintf(out,"<text xml:space=\"preserve\">\n");
 +  if (nldesc > 0) {
 +    fprintf(out,"== Description ==\n");
 +    print_tty_formatted(out,nldesc,desc,0,links,program,TRUE);
 +    fprintf(out,"\n");
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"== Known Problems ==\n");
 +    for(i=0; i<nbug; i++) {
 +      snew(tmp,strlen(bugs[i])+3);
 +      strcpy(tmp,"* ");
 +      strcpy(tmp+2,bugs[i]);
 +      fprintf(out,"%s\n",NWR(tmp));
 +      sfree(tmp);
 +    }
 +  }
 +  if (nfile > 0) {
 +    fprintf(out,"\n== Files ==\n");
 +    pr_html_files(out,nfile,fnm,program,links,TRUE);
 +  }
 +  if (npargs > 0) {
 +    fprintf(out,"\n== Options ==\n");
 +    fprintf(out," %-12s %-6s %-6s  %-s\n",
 +          "Option","Type","Value","Description");
 +    fprintf(out," ------------------------------------------------------\n");
 +    for(i=0; (i<npargs); i++) {
 +      tmp = NWR(pargs_print_line(&pa[i],TRUE));
 +      fprintf(out,"%s",tmp);
 +      sfree(tmp);
 +    }
 +  }
 +  fprintf(out,"[[category:Manual_Pages_%s|%s]]\n",VERSION,program);
 +  fprintf(out,"</text>\n");
 +  fprintf(out,"</revision>\n");
 +  fprintf(out,"</page>\n\n");
 +}
 +
 +static void write_htmlman(FILE *out,
 +                        const char *program,
 +                        int nldesc,const char **desc,
 +                        int nfile,t_filenm *fnm,
 +                        int npargs,t_pargs *pa,
 +                        int nbug,const char **bugs,
 +                        t_linkdata *links)
 +{
 +  int i;
 +  char link[10],tmp[255];
 +  
 +  fprintf(out,"<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n",program);
 +  fprintf(out,"<LINK rel=stylesheet href=\"style.css\" type=\"text/css\">\n");
 +  fprintf(out,"<BODY text=\"#000000\" bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#990000\" alink=\"#FF0000\">\n");
 +  fprintf(out,"<TABLE WIDTH=\"98%%\" NOBORDER >\n<TR><TD WIDTH=400>\n");
 +  fprintf(out,"<TABLE WIDTH=400 NOBORDER>\n<TD WIDTH=116>\n");
 +  fprintf(out,"<a href=\"http://www.gromacs.org/\">"
 +        "<img SRC=\"../images/gmxlogo_small.png\""
 +        "BORDER=0 </a></td>\n");
 +  fprintf(out,"<td ALIGN=LEFT VALIGN=TOP WIDTH=280>"
 +        "<br><h2>%s</h2>",program);
 +  fprintf(out,"<font size=-1><A HREF=\"../online.html\">Main Table of Contents</A></font><br>");
 +  fprintf(out,"<br></td>\n</TABLE></TD><TD WIDTH=\"*\" ALIGN=RIGHT VALIGN=BOTTOM><p><B>%s<br>\n",GromacsVersion());
 +  fprintf(out,"%s</B></td></tr></TABLE>\n<HR>\n",mydate(tmp,255,FALSE));
 +  
 +  if (nldesc > 0) {
 +    fprintf(out,"<H3>Description</H3>\n<p>\n");
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"%s\n",NSR(desc[i]));
 +  }
 +  if (nfile > 0) {
 +    fprintf(out,"<P>\n");
 +    fprintf(out,"<H3>Files</H3>\n");
 +    pr_html_files(out,nfile,fnm,program,links,FALSE);
 +  }
 +  if (npargs > 0) {
 +    fprintf(out,"<P>\n");
 +    fprintf(out,"<H3>Other options</H3>\n");
 +    fprintf(out,
 +          "<TABLE BORDER=1 CELLSPACING=0 CELLPADDING=2>\n"
 +          "<TR>"
 +          "<TH>option</TH>"
 +          "<TH>type</TH>"
 +          "<TH>default</TH>"
 +          "<TH>description</TH>"
 +          "</TR>\n");
 +    for(i=0; (i<npargs); i++)
 +      fprintf(out,
 +            "<TR>"
 +            "<TD ALIGN=RIGHT> <b><tt>%s%s</tt></b> </TD>"
 +            "<TD ALIGN=RIGHT> %s </TD>"
 +            "<TD ALIGN=RIGHT> <tt>%s</tt> </TD>"
 +            "<TD> %s </TD>"
 +            "</TD>\n",
 +            (pa[i].type == etBOOL)?"-[no]":"-",pa[i].option+1,
 +            get_arg_desc(pa[i].type),pa_val(&(pa[i]),tmp,255),NSR(pa[i].desc));
 +    fprintf(out,"</TABLE>\n");
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"<P>\n");
 +    fprintf(out,"<H3>Known problems</H3>\n");
 +    fprintf(out,"<UL>\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"<LI>%s\n",NSR(bugs[i]));
 +    fprintf(out,"</UL>\n");
 +  }
 +  fprintf(out,"<P>\n");
 +  fprintf(out,"<hr>\n<div ALIGN=RIGHT>\n");
 +  fprintf(out,"<font size=\"-1\"><a href=\"http://www.gromacs.org\">"
 +        "http://www.gromacs.org</a></font><br>\n");
 +  fprintf(out,"<font size=\"-1\"><a href=\"mailto:gromacs@gromacs.org\">"
 +        "gromacs@gromacs.org</a></font><br>\n");
 +  fprintf(out,"</div>\n");
 +  fprintf(out,"</BODY>\n");
 +}
 +
 +char *check_xml(const char *s,const char *program,t_linkdata *links)
 +{
 +  char *buf;
 +  
 +  buf=repall(s,NSRXML,sandrXML);
 +  buf=html_xref(buf,program,links,FALSE);     /* the same in html and xml */
 +  
 +  return buf;
 +}
 +
 +static void write_xmlman(FILE *out,
 +                       const char *program,
 +                       int nldesc,const char **desc,
 +                       int nfile,t_filenm *fnm,
 +                       int npargs,t_pargs *pa,
 +                       int nbug,const char **bugs,
 +                       t_linkdata *links)
 +{
 +  int i;
 +  char link[10],buf[256],opt[10];
 +
 +#define NSR2(s) check_xml(s,program,links)
 +#define FLAG(w,f) (((w) & (f))==(f)) 
 +
 +  fprintf(out,"<gromacs-manual version=\"%s\" date=\"%s\" www=\"http://www.gromacs.org\">\n",GromacsVersion(),mydate(buf,255,FALSE));
 +  /* fprintf(out,"<LINK rel=stylesheet href=\"style.css\" type=\"text/css\">\n"); */
 +
 +  fprintf(out,"<program name=\"%s\">",program);  
 +  if (nldesc > 0) {
 +    fprintf(out,"\n<description>\n<par>\n");
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"%s\n",NSR2(desc[i]));
 +  }
 +  fprintf(out,"</par>\n</description>\n");
 +
 +  if (nfile > 0) {
 +    fprintf(out,"\n<files>\n");
 +    for(i=0; (i<nfile); i++) {
 +      strcpy(link,ftp2ext(fnm[i].ftp));
 +      if (strcmp(link,"???")==0)
 +      strcpy(link,"files");
 +        if (fnm[i].opt[0]=='-') strcpy(opt,fnm[i].opt+1);
 +      else strcpy(opt,fnm[i].opt);
 +      fprintf(out,
 +            "<file type=\"%s\" typeid=\"%d\">\n"
 +              "\t<flags read=\"%d\" write=\"%d\" optional=\"%d\"/>\n"
 +            "\t<option>%s</option>\n"
 +            "\t<default-name link=\"%s.html\">%s</default-name>\n"
 +            "\t<description>%s</description>\n"
 +            "</file>\n",
 +            ftp2defnm(fnm[i].ftp),    /* from gmxlib/filenm.c */
 +            fnm[i].ftp,
 +            FLAG(fnm[i].flag,ffREAD), FLAG(fnm[i].flag,ffWRITE), FLAG(fnm[i].flag,ffOPT), 
 +            opt,link,fnm[i].fn,/*fileopt(fnm[i].flag),*/
 +            NSR(ftp2desc(fnm[i].ftp)));
 +    }
 +    fprintf(out,"</files>\n");
 +  }
 +
 +  if (npargs > 0) {
 +    fprintf(out,"\n<options>\n");
 +    for(i=0; (i<npargs); i++)
 +      fprintf(out,
 +            "<option type=\"%s\" hidden=\"%d\">\n"
 +            "\t<name >%s</name>\n"
 +            "\t<default-value>%s</default-value>\n"
 +            "\t<description>%s</description>\n"
 +            "</option>\n",
 +            get_arg_desc(pa[i].type), is_hidden(&pa[i]),
 +            pa[i].option+1,                  /* +1 - with no trailing '-' */
 +            pa_val(&(pa[i]),buf,255),pa[i].desc); /*get_argtp()[pa[i].type],*/
 +    fprintf(out,"</options>\n");
 +  }
 +
 +  if (nbug > 0) {
 +    fprintf(out,"\n<bugs>\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"\t<bug>%s</bug>\n",NSR(bugs[i]));
 +    fprintf(out,"</bugs>\n");
 +  }
 +  fprintf(out,"\n</program>\n</gromacs-manual>\n");
 +#undef FLAG  
 +}
 +
 +static void pr_opts(FILE *fp, 
 +                  int nfile,  t_filenm *fnm, 
 +                  int npargs, t_pargs pa[], int shell)
 +{
 +  int i;
 +  
 +  switch (shell) {
 +  case eshellCSH:
 +    fprintf(fp," \"c/-/(");
 +    for (i=0; i<nfile; i++)
 +      fprintf(fp," %s",fnm[i].opt+1);
 +    for (i=0; i<npargs; i++)
 +      if ( (pa[i].type==etBOOL) && *(pa[i].u.b) )
 +      fprintf(fp," no%s",pa[i].option+1);
 +      else
 +      fprintf(fp," %s",pa[i].option+1);
 +    fprintf(fp,")/\"");
 +    break;
 +  case eshellBASH:
 +    fprintf(fp,"if (( $COMP_CWORD <= 1 )) || [[ $c == -* ]]; then COMPREPLY=( $(compgen  -W '");
 +    for (i=0; i<nfile; i++)
 +      fprintf(fp," -%s",fnm[i].opt+1);
 +    for (i=0; i<npargs; i++)
 +      if ( (pa[i].type==etBOOL) && *(pa[i].u.b) )
 +      fprintf(fp," -no%s",pa[i].option+1);
 +      else
 +      fprintf(fp," -%s",pa[i].option+1);
 +    fprintf(fp,"' -- $c)); return 0; fi\n");
 +    break;
 +  case eshellZSH:
 +    fprintf(fp," -x 's[-]' -s \"");
 +    for (i=0; i<nfile; i++)
 +      fprintf(fp," %s",fnm[i].opt+1);
 +    for (i=0; i<npargs; i++)
 +      if ( (pa[i].type==etBOOL) && *(pa[i].u.b) )
 +      fprintf(fp," no%s",pa[i].option+1);
 +      else
 +      fprintf(fp," %s",pa[i].option+1);
 +    fprintf(fp,"\" ");
 +    break;
 +  }
 +}
 +
 +static void write_cshcompl(FILE *out,
 +                         int nfile,  t_filenm *fnm,
 +                         int npargs, t_pargs *pa)
 +{
 +  fprintf(out,"complete %s",ShortProgram());
 +  pr_enums(out,npargs,pa,eshellCSH);
 +  pr_fopts(out,nfile,fnm,eshellCSH);
 +  pr_opts(out,nfile,fnm,npargs,pa,eshellCSH);
 +  fprintf(out,"\n");
 +}
 +
 +static void write_zshcompl(FILE *out,
 +                         int nfile,  t_filenm *fnm,
 +                         int npargs, t_pargs *pa)
 +{
 +  fprintf(out,"compctl ");
 +
 +  /* start with options, since they are always present */
 +  pr_opts(out,nfile,fnm,npargs,pa,eshellZSH);
 +  pr_enums(out,npargs,pa,eshellZSH);
 +  pr_fopts(out,nfile,fnm,eshellZSH);
 +  fprintf(out,"-- %s\n",ShortProgram());
 +}
 +
 +static void write_bashcompl(FILE *out,
 +                          int nfile,  t_filenm *fnm,
 +                          int npargs, t_pargs *pa)
 +{
 +  /* Advanced bash completions are handled by shell functions.
 +   * p and c hold the previous and current word on the command line.
 +   * We need to use extended globbing, so write it in each completion file */
 +  fprintf(out,"shopt -s extglob\n");
 +  fprintf(out,"_%s_compl() {\nlocal p c\n",ShortProgram());
 +  fprintf(out,"COMPREPLY=() c=${COMP_WORDS[COMP_CWORD]} p=${COMP_WORDS[COMP_CWORD-1]}\n");
 +  pr_opts(out,nfile,fnm,npargs,pa,eshellBASH);
 +  fprintf(out,"case \"$p\" in\n");
 +  
 +  pr_enums(out,npargs,pa,eshellBASH);
 +  pr_fopts(out,nfile,fnm,eshellBASH);
 +  fprintf(out,"esac }\ncomplete -F _%s_compl %s\n",ShortProgram(),ShortProgram());
 +}
 +
 +static void write_py(FILE *out,const char *program,
 +                   int nldesc,const char **desc,
 +                   int nfile,t_filenm *fnm,
 +                   int npargs,t_pargs *pa,
 +                   int nbug,const char **bugs,
 +                   t_linkdata *links)
 +{
 +  gmx_bool bHidden;
 +  const char *cls = program;
 +  char *tmp;
 +  int  i,j;
 +
 +  /* Header stuff */  
 +  fprintf(out,"#!/usr/bin/python\n\nfrom GmxDialog import *\n\n");
 +  
 +  /* Class definition */
 +  fprintf(out,"class %s:\n",cls);
 +  fprintf(out,"    def __init__(self,tk):\n");
 +  
 +  /* Help text */
 +  fprintf(out,"        %s_help = \"\"\"\n",cls);
 +  fprintf(out,"        DESCRIPTION\n");
 +  print_tty_formatted(out,nldesc,desc,8,links,program,FALSE);
 +  if (nbug > 0) {
 +    fprintf(out,"\n        BUGS and PROBLEMS\n");
 +    for(i=0; i<nbug; i++) {
 +      snew(tmp,strlen(bugs[i])+3);
 +      strcpy(tmp,"* ");
 +      strcpy(tmp+2,check_tty(bugs[i]));
 +      fprintf(out,"%s\n",wrap_lines(tmp,78,10,TRUE));
 +      sfree(tmp);
 +    }
 +  }
 +  fprintf(out,"        \"\"\"\n\n        # Command line options\n");
 +  /* File options */
 +  fprintf(out,"        flags = []\n");
 +  for(i=0; (i<nfile); i++) 
 +    fprintf(out,"        flags.append(pca_file('%s',\"%s\",0,%d))\n",
 +          ftp2ext_generic(fnm[i].ftp),fnm[i].opt ? fnm[i].opt : "k",
 +          is_optional(&(fnm[i])));
 +          
 +          
 +  /* Other options */
 +  for(i=0; (i<npargs); i++) {
 +    switch(pa[i].type) {
 +    case etINT:
 +      fprintf(out,"        flags.append(pca_int(\"%s\",\"%s\",%d,%d))\n",
 +            pa[i].option,pa[i].desc,*pa[i].u.i,is_hidden(&(pa[i])));
 +      break;
 +    case etREAL:
 +    case etTIME:
 +      fprintf(out,"        flags.append(pca_float(\"%s\",\"%s\",%f,%d))\n",
 +            pa[i].option,pa[i].desc,*pa[i].u.r,is_hidden(&(pa[i])));
 +      break;
 +    case etSTR:
 +    case etBOOL:
 +      fprintf(out,"        flags.append(pca_gmx_bool(\"%s\",\"%s\",%d,%d))\n",
 +            pa[i].option,pa[i].desc,*pa[i].u.b,is_hidden(&(pa[i])));
 +      break;
 +    case etRVEC:
 +      fprintf(stderr,"Sorry, no rvecs yet...\n");
 +      break;
 +    case etENUM:
 +      fprintf(out,"        flags.append(pca_enum(\"%s\",\"%s\",\n",
 +            pa[i].option,pa[i].desc);
 +      fprintf(out,"        ['%s'",pa[i].u.c[1]);
 +      for(j=2; (pa[i].u.c[j] != NULL); j++)
 +      fprintf(out,",'%s'",pa[i].u.c[j]);
 +      fprintf(out,"],%d))\n",is_hidden(&(pa[i])));
 +      break;
 +    default:
 +      break;
 +    }
 +  }
 +    
 +  /* Make the dialog box */
 +  fprintf(out,"        gmxd = gmx_dialog(tk,\"%s\",flags,%s_help)\n\n",
 +        cls,cls);
 +        
 +  /* Main loop */
 +  fprintf(out,"#####################################################\n");
 +  fprintf(out,"tk     = Tk()\n");
 +  fprintf(out,"my%s = %s(tk)\n",cls,cls);
 +  fprintf(out,"tk.mainloop()\n");
 +}
 +
 +void write_man(FILE *out,const char *mantp,
 +             const char *program,
 +             int nldesc,const char **desc,
 +             int nfile,t_filenm *fnm,
 +             int npargs,t_pargs *pa,
 +             int nbug,const char **bugs,
 +             gmx_bool bHidden)
 +{
 +  const char *pr;
 +  int     i,npar;
 +  t_pargs *par;
 + 
 +  t_linkdata *links;
 +  
 +  links=init_linkdata();
 +  
 +  /* Don't write hidden options to completions, it just
 +   * makes the options more complicated for normal users
 +   */
 +
 +  if (bHidden) {
 +    npar=npargs;
 +    par=pa;
 +  }
 +  else {
 +    snew(par,npargs);
 +    npar=0;
 +    for(i=0;i<npargs;i++)
 +      if (!is_hidden(&pa[i])) {
 +      par[npar]=pa[i];
 +      npar++;
 +      }
 +  }
 +  
 +  if ((pr=strrchr(program,DIR_SEPARATOR)) == NULL)
 +    pr=program;
 +  else
 +    pr+=1;
 +  if (strcmp(mantp,"tex")==0)
 +    write_texman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"nroff")==0)
 +    write_nroffman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"ascii")==0)
 +    write_ttyman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,TRUE,links);
 +  if (strcmp(mantp,"wiki")==0)
 +    write_wikiman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,TRUE,links);
 +  if (strcmp(mantp,"help")==0)
 +    write_ttyman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,FALSE,links);
 +  if (strcmp(mantp,"html")==0)
 +    write_htmlman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"py")==0)
 +    write_py(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"xml")==0)
 +    write_xmlman(out,pr,nldesc,desc,nfile,fnm,npargs,pa,nbug,bugs,links);     
 +  if (strcmp(mantp,"completion-zsh")==0)
 +    write_zshcompl(out,nfile,fnm,npar,par);
 +  if (strcmp(mantp,"completion-bash")==0)
 +    write_bashcompl(out,nfile,fnm,npar,par);
 +  if (strcmp(mantp,"completion-csh")==0)
 +    write_cshcompl(out,nfile,fnm,npar,par);
 +
 +  if (!bHidden)
 +    sfree(par);
 +
 +  finish_linkdata(links);
 +}
 +
 +const char *get_arg_desc(int type) {
 +   static const char *argtp[etNR] = {
 +     "int", "step", "real", "time", "string", "bool", "vector", "enum"
 +   };
 +   return argtp[type];
 +}
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index ed6abec7693c4b72568b300dfa9590f0050bd83d,0000000000000000000000000000000000000000..150cd535464b3e68906485ad22fe2f387fd690cd
mode 100644,000000..100644
--- /dev/null
@@@ -1,7 -1,0 +1,2 @@@
- # Files       called xxx_test.c are test drivers with a main() function for 
- # module xxx.c, so they should not be included in the library
- file(GLOB_RECURSE NOT_MDLIB_SOURCES *_test.c)
- list(REMOVE_ITEM MDLIB_SOURCES ${NOT_MDLIB_SOURCES})
 +file(GLOB MDLIB_SOURCES *.c)
 +set(MDLIB_SOURCES ${MDLIB_SOURCES} PARENT_SCOPE)
index f30c64b9f5a177b3eaff1a92f4fab6538e8608c5,0000000000000000000000000000000000000000..aee11f661341e8aae5557369ba1e69417a75e49b
mode 100644,000000..100644
--- /dev/null
@@@ -1,8657 -1,0 +1,8657 @@@
-                     if (dd->nc[d] > 1 && d < ddbox->npbcdim)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include <math.h>
 +#include <string.h>
 +#include <stdlib.h>
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "vec.h"
 +#include "domdec.h"
 +#include "domdec_network.h"
 +#include "nrnb.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "constr.h"
 +#include "mdatoms.h"
 +#include "names.h"
 +#include "pdbio.h"
 +#include "futil.h"
 +#include "force.h"
 +#include "pme.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "gmx_wallcycle.h"
 +#include "mdrun.h"
 +#include "nsgrid.h"
 +#include "shellfc.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "gmx_ga2la.h"
 +#include "gmx_sort.h"
 +#include "macros.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#define DDRANK(dd,rank)    (rank)
 +#define DDMASTERRANK(dd)   (dd->masterrank)
 +
 +typedef struct gmx_domdec_master
 +{
 +    /* The cell boundaries */
 +    real **cell_x;
 +    /* The global charge group division */
 +    int  *ncg;     /* Number of home charge groups for each node */
 +    int  *index;   /* Index of nnodes+1 into cg */
 +    int  *cg;      /* Global charge group index */
 +    int  *nat;     /* Number of home atoms for each node. */
 +    int  *ibuf;    /* Buffer for communication */
 +    rvec *vbuf;    /* Buffer for state scattering and gathering */
 +} gmx_domdec_master_t;
 +
 +typedef struct
 +{
 +    /* The numbers of charge groups to send and receive for each cell
 +     * that requires communication, the last entry contains the total
 +     * number of atoms that needs to be communicated.
 +     */
 +    int nsend[DD_MAXIZONE+2];
 +    int nrecv[DD_MAXIZONE+2];
 +    /* The charge groups to send */
 +    int *index;
 +    int nalloc;
 +    /* The atom range for non-in-place communication */
 +    int cell2at0[DD_MAXIZONE];
 +    int cell2at1[DD_MAXIZONE];
 +} gmx_domdec_ind_t;
 +
 +typedef struct
 +{
 +    int  np;                   /* Number of grid pulses in this dimension */
 +    int  np_dlb;               /* For dlb, for use with edlbAUTO          */
 +    gmx_domdec_ind_t *ind;     /* The indices to communicate, size np     */
 +    int  np_nalloc;
 +    gmx_bool bInPlace;             /* Can we communicate in place?            */
 +} gmx_domdec_comm_dim_t;
 +
 +typedef struct
 +{
 +    gmx_bool *bCellMin;    /* Temp. var.: is this cell size at the limit     */
 +    real *cell_f;      /* State var.: cell boundaries, box relative      */
 +    real *old_cell_f;  /* Temp. var.: old cell size                      */
 +    real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
 +    real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
 +    real *bound_min;   /* Temp. var.: lower limit for cell boundary      */
 +    real *bound_max;   /* Temp. var.: upper limit for cell boundary      */
 +    gmx_bool bLimited;     /* State var.: is DLB limited in this dim and row */
 +    real *buf_ncd;     /* Temp. var.                                     */
 +} gmx_domdec_root_t;
 +
 +#define DD_NLOAD_MAX 9
 +
 +/* Here floats are accurate enough, since these variables
 + * only influence the load balancing, not the actual MD results.
 + */
 +typedef struct
 +{
 +    int  nload;
 +    float *load;
 +    float sum;
 +    float max;
 +    float sum_m;
 +    float cvol_min;
 +    float mdf;
 +    float pme;
 +    int   flags;
 +} gmx_domdec_load_t;
 +
 +typedef struct
 +{
 +    int  nsc;
 +    int  ind_gl;
 +    int  ind;
 +} gmx_cgsort_t;
 +
 +typedef struct
 +{
 +    gmx_cgsort_t *sort1,*sort2;
 +    int  sort_nalloc;
 +    gmx_cgsort_t *sort_new;
 +    int  sort_new_nalloc;
 +    int  *ibuf;
 +    int  ibuf_nalloc;
 +} gmx_domdec_sort_t;
 +
 +typedef struct
 +{
 +    rvec *v;
 +    int  nalloc;
 +} vec_rvec_t;
 +
 +/* This enum determines the order of the coordinates.
 + * ddnatHOME and ddnatZONE should be first and second,
 + * the others can be ordered as wanted.
 + */
 +enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
 +
 +enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
 +const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
 +
 +typedef struct
 +{
 +    int  dim;      /* The dimension                                          */
 +    gmx_bool dim_match;/* Tells if DD and PME dims match                         */
 +    int  nslab;    /* The number of PME slabs in this dimension              */
 +    real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB    */
 +    int  *pp_min;  /* The minimum pp node location, size nslab               */
 +    int  *pp_max;  /* The maximum pp node location,size nslab                */
 +    int  maxshift; /* The maximum shift for coordinate redistribution in PME */
 +} gmx_ddpme_t;
 +
 +typedef struct
 +{
 +    real min0;    /* The minimum bottom of this zone                        */
 +    real max1;    /* The maximum top of this zone                           */
 +    real mch0;    /* The maximum bottom communicaton height for this zone   */
 +    real mch1;    /* The maximum top communicaton height for this zone      */
 +    real p1_0;    /* The bottom value of the first cell in this zone        */
 +    real p1_1;    /* The top value of the first cell in this zone           */
 +} gmx_ddzone_t;
 +
 +typedef struct gmx_domdec_comm
 +{
 +    /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
 +     * unless stated otherwise.
 +     */
 +
 +    /* The number of decomposition dimensions for PME, 0: no PME */
 +    int  npmedecompdim;
 +    /* The number of nodes doing PME (PP/PME or only PME) */
 +    int  npmenodes;
 +    int  npmenodes_x;
 +    int  npmenodes_y;
 +    /* The communication setup including the PME only nodes */
 +    gmx_bool bCartesianPP_PME;
 +    ivec ntot;
 +    int  cartpmedim;
 +    int  *pmenodes;          /* size npmenodes                         */
 +    int  *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
 +                              * but with bCartesianPP_PME              */
 +    gmx_ddpme_t ddpme[2];
 +    
 +    /* The DD particle-particle nodes only */
 +    gmx_bool bCartesianPP;
 +    int  *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
 +    
 +    /* The global charge groups */
 +    t_block cgs_gl;
 +
 +    /* Should we sort the cgs */
 +    int  nstSortCG;
 +    gmx_domdec_sort_t *sort;
 +    
 +    /* Are there bonded and multi-body interactions between charge groups? */
 +    gmx_bool bInterCGBondeds;
 +    gmx_bool bInterCGMultiBody;
 +
 +    /* Data for the optional bonded interaction atom communication range */
 +    gmx_bool bBondComm;
 +    t_blocka *cglink;
 +    char *bLocalCG;
 +
 +    /* The DLB option */
 +    int  eDLB;
 +    /* Are we actually using DLB? */
 +    gmx_bool bDynLoadBal;
 +
 +    /* Cell sizes for static load balancing, first index cartesian */
 +    real **slb_frac;
 +    
 +    /* The width of the communicated boundaries */
 +    real cutoff_mbody;
 +    real cutoff;
 +    /* The minimum cell size (including triclinic correction) */
 +    rvec cellsize_min;
 +    /* For dlb, for use with edlbAUTO */
 +    rvec cellsize_min_dlb;
 +    /* The lower limit for the DD cell size with DLB */
 +    real cellsize_limit;
 +    /* Effectively no NB cut-off limit with DLB for systems without PBC? */
 +    gmx_bool bVacDLBNoLimit;
 +
 +    /* tric_dir is only stored here because dd_get_ns_ranges needs it */
 +    ivec tric_dir;
 +    /* box0 and box_size are required with dim's without pbc and -gcom */
 +    rvec box0;
 +    rvec box_size;
 +    
 +    /* The cell boundaries */
 +    rvec cell_x0;
 +    rvec cell_x1;
 +
 +    /* The old location of the cell boundaries, to check cg displacements */
 +    rvec old_cell_x0;
 +    rvec old_cell_x1;
 +
 +    /* The communication setup and charge group boundaries for the zones */
 +    gmx_domdec_zones_t zones;
 +    
 +    /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
 +     * cell boundaries of neighboring cells for dynamic load balancing.
 +     */
 +    gmx_ddzone_t zone_d1[2];
 +    gmx_ddzone_t zone_d2[2][2];
 +    
 +    /* The coordinate/force communication setup and indices */
 +    gmx_domdec_comm_dim_t cd[DIM];
 +    /* The maximum number of cells to communicate with in one dimension */
 +    int  maxpulse;
 +    
 +    /* Which cg distribution is stored on the master node */
 +    int master_cg_ddp_count;
 +    
 +    /* The number of cg's received from the direct neighbors */
 +    int  zone_ncg1[DD_MAXZONE];
 +    
 +    /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
 +    int  nat[ddnatNR];
 +    
 +    /* Communication buffer for general use */
 +    int  *buf_int;
 +    int  nalloc_int;
 +
 +     /* Communication buffer for general use */
 +    vec_rvec_t vbuf;
 +    
 +    /* Communication buffers only used with multiple grid pulses */
 +    int  *buf_int2;
 +    int  nalloc_int2;
 +    vec_rvec_t vbuf2;
 +    
 +    /* Communication buffers for local redistribution */
 +    int  **cggl_flag;
 +    int  cggl_flag_nalloc[DIM*2];
 +    rvec **cgcm_state;
 +    int  cgcm_state_nalloc[DIM*2];
 +    
 +    /* Cell sizes for dynamic load balancing */
 +    gmx_domdec_root_t **root;
 +    real *cell_f_row;
 +    real cell_f0[DIM];
 +    real cell_f1[DIM];
 +    real cell_f_max0[DIM];
 +    real cell_f_min1[DIM];
 +    
 +    /* Stuff for load communication */
 +    gmx_bool bRecordLoad;
 +    gmx_domdec_load_t *load;
 +#ifdef GMX_MPI
 +    MPI_Comm *mpi_comm_load;
 +#endif
 +
 +    /* Maximum DLB scaling per load balancing step in percent */
 +    int dlb_scale_lim;
 +
 +    /* Cycle counters */
 +    float cycl[ddCyclNr];
 +    int   cycl_n[ddCyclNr];
 +    float cycl_max[ddCyclNr];
 +    /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
 +    int eFlop;
 +    double flop;
 +    int    flop_n;
 +    /* Have often have did we have load measurements */
 +    int    n_load_have;
 +    /* Have often have we collected the load measurements */
 +    int    n_load_collect;
 +    
 +    /* Statistics */
 +    double sum_nat[ddnatNR-ddnatZONE];
 +    int    ndecomp;
 +    int    nload;
 +    double load_step;
 +    double load_sum;
 +    double load_max;
 +    ivec   load_lim;
 +    double load_mdf;
 +    double load_pme;
 +
 +    /* The last partition step */
 +    gmx_large_int_t globalcomm_step;
 +
 +    /* Debugging */
 +    int  nstDDDump;
 +    int  nstDDDumpGrid;
 +    int  DD_debug;
 +} gmx_domdec_comm_t;
 +
 +/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_CGIBS 2
 +
 +/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
 +#define DD_FLAG_NRCG  65535
 +#define DD_FLAG_FW(d) (1<<(16+(d)*2))
 +#define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
 +
 +/* Zone permutation required to obtain consecutive charge groups
 + * for neighbor searching.
 + */
 +static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
 +
 +/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
 + * components see only j zones with that component 0.
 + */
 +
 +/* The DD zone order */
 +static const ivec dd_zo[DD_MAXZONE] =
 +  {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
 +
 +/* The 3D setup */
 +#define dd_z3n  8
 +#define dd_zp3n 4
 +static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
 +
 +/* The 2D setup */
 +#define dd_z2n  4
 +#define dd_zp2n 2
 +static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
 +
 +/* The 1D setup */
 +#define dd_z1n  2
 +#define dd_zp1n 1
 +static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
 +
 +/* Factors used to avoid problems due to rounding issues */
 +#define DD_CELL_MARGIN       1.0001
 +#define DD_CELL_MARGIN2      1.00005
 +/* Factor to account for pressure scaling during nstlist steps */
 +#define DD_PRES_SCALE_MARGIN 1.02
 +
 +/* Allowed performance loss before we DLB or warn */
 +#define DD_PERF_LOSS 0.05
 +
 +#define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
 +
 +/* Use separate MPI send and receive commands
 + * when nnodes <= GMX_DD_NNODES_SENDRECV.
 + * This saves memory (and some copying for small nnodes).
 + * For high parallelization scatter and gather calls are used.
 + */
 +#define GMX_DD_NNODES_SENDRECV 4
 +
 +
 +/*
 +#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
 +
 +static void index2xyz(ivec nc,int ind,ivec xyz)
 +{
 +  xyz[XX] = ind % nc[XX];
 +  xyz[YY] = (ind / nc[XX]) % nc[YY];
 +  xyz[ZZ] = ind / (nc[YY]*nc[XX]);
 +}
 +*/
 +
 +/* This order is required to minimize the coordinate communication in PME
 + * which uses decomposition in the x direction.
 + */
 +#define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
 +
 +static void ddindex2xyz(ivec nc,int ind,ivec xyz)
 +{
 +    xyz[XX] = ind / (nc[YY]*nc[ZZ]);
 +    xyz[YY] = (ind / nc[ZZ]) % nc[YY];
 +    xyz[ZZ] = ind % nc[ZZ];
 +}
 +
 +static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
 +{
 +    int ddindex;
 +    int ddnodeid=-1;
 +    
 +    ddindex = dd_index(dd->nc,c);
 +    if (dd->comm->bCartesianPP_PME)
 +    {
 +        ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
 +    }
 +    else if (dd->comm->bCartesianPP)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddnodeid = ddindex;
 +    }
 +    
 +    return ddnodeid;
 +}
 +
 +static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
 +{
 +    return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
 +}
 +
 +int ddglatnr(gmx_domdec_t *dd,int i)
 +{
 +    int atnr;
 +    
 +    if (dd == NULL)
 +    {
 +        atnr = i + 1;
 +    }
 +    else
 +    {
 +        if (i >= dd->comm->nat[ddnatNR-1])
 +        {
 +            gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
 +        }
 +        atnr = dd->gatindex[i] + 1;
 +    }
 +    
 +    return atnr;
 +}
 +
 +t_block *dd_charge_groups_global(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->cgs_gl;
 +}
 +
 +static void vec_rvec_init(vec_rvec_t *v)
 +{
 +    v->nalloc = 0;
 +    v->v      = NULL;
 +}
 +
 +static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
 +{
 +    if (n > v->nalloc)
 +    {
 +        v->nalloc = over_alloc_dd(n);
 +        srenew(v->v,v->nalloc);
 +    }
 +}
 +
 +void dd_store_state(gmx_domdec_t *dd,t_state *state)
 +{
 +    int i;
 +    
 +    if (state->ddp_count != dd->ddp_count)
 +    {
 +        gmx_incons("The state does not the domain decomposition state");
 +    }
 +    
 +    state->ncg_gl = dd->ncg_home;
 +    if (state->ncg_gl > state->cg_gl_nalloc)
 +    {
 +        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
 +        srenew(state->cg_gl,state->cg_gl_nalloc);
 +    }
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        state->cg_gl[i] = dd->index_gl[i];
 +    }
 +    
 +    state->ddp_count_cg_gl = dd->ddp_count;
 +}
 +
 +gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
 +{
 +    return &dd->comm->zones;
 +}
 +
 +void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
 +                      int *jcg0,int *jcg1,ivec shift0,ivec shift1)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int izone,d,dim;
 +
 +    zones = &dd->comm->zones;
 +
 +    izone = 0;
 +    while (icg >= zones->izone[izone].cg1)
 +    {
 +        izone++;
 +    }
 +    
 +    if (izone == 0)
 +    {
 +        *jcg0 = icg;
 +    }
 +    else if (izone < zones->nizone)
 +    {
 +        *jcg0 = zones->izone[izone].jcg0;
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
 +                  icg,izone,zones->nizone);
 +    }
 +        
 +    *jcg1 = zones->izone[izone].jcg1;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        shift0[dim] = zones->izone[izone].shift0[dim];
 +        shift1[dim] = zones->izone[izone].shift1[dim];
 +        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
 +        {
 +            /* A conservative approach, this can be optimized */
 +            shift0[dim] -= 1;
 +            shift1[dim] += 1;
 +        }
 +    }
 +}
 +
 +int dd_natoms_vsite(gmx_domdec_t *dd)
 +{
 +    return dd->comm->nat[ddnatVSITE];
 +}
 +
 +void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
 +{
 +    *at_start = dd->comm->nat[ddnatCON-1];
 +    *at_end   = dd->comm->nat[ddnatCON];
 +}
 +
 +void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec shift={0,0,0},*buf,*rbuf;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = comm->vbuf.v;
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (bPBC)
 +        {
 +            copy_rvec(box[dd->dim[d]],shift);
 +        }
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        copy_rvec(x[j],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* We need to shift the coordinates */
 +                        rvec_add(x[j],shift,buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Shift x */
 +                        buf[n][XX] = x[j][XX] + shift[XX];
 +                        /* Rotate y and z.
 +                         * This operation requires a special shift force
 +                         * treatment, which is performed in calc_vir.
 +                         */
 +                        buf[n][YY] = box[YY][YY] - x[j][YY];
 +                        buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
 +                        n++;
 +                    }
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = x + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = comm->vbuf2.v;
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_rvec(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(rbuf[j],x[i]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    rvec *buf,*sbuf;
 +    ivec vis;
 +    int  is;
 +    gmx_bool bPBC,bScrew;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = comm->vbuf.v;
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        bPBC   = (dd->ci[dd->dim[d]] == 0);
 +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
 +        if (fshift == NULL && !bScrew)
 +        {
 +            bPBC = FALSE;
 +        }
 +        /* Determine which shift vector we need */
 +        clear_ivec(vis);
 +        vis[dd->dim[d]] = 1;
 +        is = IVEC2IS(vis);
 +        
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = f + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = comm->vbuf2.v;
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        copy_rvec(f[i],sbuf[j]);
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            if (!bPBC)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        n++;
 +                    }
 +                } 
 +            }
 +            else if (!bScrew)
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        rvec_inc(f[j],buf[n]);
 +                        /* Add this force to the shift force */
 +                        rvec_inc(fshift[is],buf[n]);
 +                        n++;
 +                    }
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; i<ind->nsend[nzone]; i++)
 +                {
 +                    at0 = cgindex[index[i]];
 +                    at1 = cgindex[index[i]+1];
 +                    for(j=at0; j<at1; j++)
 +                    {
 +                        /* Rotate the force */
 +                        f[j][XX] += buf[n][XX];
 +                        f[j][YY] -= buf[n][YY];
 +                        f[j][ZZ] -= buf[n][ZZ];
 +                        if (fshift)
 +                        {
 +                            /* Add this force to the shift force */
 +                            rvec_inc(fshift[is],buf[n]);
 +                        }
 +                        n++;
 +                    }
 +                }
 +            }
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*rbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +    
 +    buf = &comm->vbuf.v[0][0];
 +
 +    nzone = 1;
 +    nat_tot = dd->nat_home;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=0; p<cd->np; p++)
 +        {
 +            ind = &cd->ind[p];
 +            index = ind->index;
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    buf[n] = v[j];
 +                    n++;
 +                }
 +            }
 +            
 +            if (cd->bInPlace)
 +            {
 +                rbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                rbuf = &comm->vbuf2.v[0][0];
 +            }
 +            /* Send and receive the coordinates */
 +            dd_sendrecv_real(dd, d, dddirBackward,
 +                             buf,  ind->nsend[nzone+1],
 +                             rbuf, ind->nrecv[nzone+1]);
 +            if (!cd->bInPlace)
 +            {
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        v[i] = rbuf[j];
 +                        j++;
 +                    }
 +                }
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        nzone += nzone;
 +    }
 +}
 +
 +void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
 +{
 +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
 +    int  *index,*cgindex;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    real *buf,*sbuf;
 +    
 +    comm = dd->comm;
 +    
 +    cgindex = dd->cgindex;
 +
 +    buf = &comm->vbuf.v[0][0];
 +
 +    n = 0;
 +    nzone = comm->zones.n/2;
 +    nat_tot = dd->nat_tot;
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        cd = &comm->cd[d];
 +        for(p=cd->np-1; p>=0; p--) {
 +            ind = &cd->ind[p];
 +            nat_tot -= ind->nrecv[nzone+1];
 +            if (cd->bInPlace)
 +            {
 +                sbuf = v + nat_tot;
 +            }
 +            else
 +            {
 +                sbuf = &comm->vbuf2.v[0][0];
 +                j = 0;
 +                for(zone=0; zone<nzone; zone++)
 +                {
 +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
 +                    {
 +                        sbuf[j] = v[i];
 +                        j++;
 +                    }
 +                }
 +            }
 +            /* Communicate the forces */
 +            dd_sendrecv_real(dd, d, dddirForward,
 +                             sbuf, ind->nrecv[nzone+1],
 +                             buf,  ind->nsend[nzone+1]);
 +            index = ind->index;
 +            /* Add the received forces */
 +            n = 0;
 +            for(i=0; i<ind->nsend[nzone]; i++)
 +            {
 +                at0 = cgindex[index[i]];
 +                at1 = cgindex[index[i]+1];
 +                for(j=at0; j<at1; j++)
 +                {
 +                    v[j] += buf[n];
 +                    n++;
 +                }
 +            } 
 +        }
 +        nzone /= 2;
 +    }
 +}
 +
 +static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
 +{
 +    fprintf(fp,"zone d0 %d d1 %d d2 %d  min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
 +            d,i,j,
 +            zone->min0,zone->max1,
 +            zone->mch0,zone->mch0,
 +            zone->p1_0,zone->p1_1);
 +}
 +
 +static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
 +                               int ddimind,int direction,
 +                               gmx_ddzone_t *buf_s,int n_s,
 +                               gmx_ddzone_t *buf_r,int n_r)
 +{
 +    rvec vbuf_s[5*2],vbuf_r[5*2];
 +    int i;
 +
 +    for(i=0; i<n_s; i++)
 +    {
 +        vbuf_s[i*2  ][0] = buf_s[i].min0;
 +        vbuf_s[i*2  ][1] = buf_s[i].max1;
 +        vbuf_s[i*2  ][2] = buf_s[i].mch0;
 +        vbuf_s[i*2+1][0] = buf_s[i].mch1;
 +        vbuf_s[i*2+1][1] = buf_s[i].p1_0;
 +        vbuf_s[i*2+1][2] = buf_s[i].p1_1;
 +    }
 +
 +    dd_sendrecv_rvec(dd, ddimind, direction,
 +                     vbuf_s, n_s*2,
 +                     vbuf_r, n_r*2);
 +
 +    for(i=0; i<n_r; i++)
 +    {
 +        buf_r[i].min0 = vbuf_r[i*2  ][0];
 +        buf_r[i].max1 = vbuf_r[i*2  ][1];
 +        buf_r[i].mch0 = vbuf_r[i*2  ][2];
 +        buf_r[i].mch1 = vbuf_r[i*2+1][0];
 +        buf_r[i].p1_0 = vbuf_r[i*2+1][1];
 +        buf_r[i].p1_1 = vbuf_r[i*2+1][2];
 +    }
 +}
 +
 +static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                          rvec cell_ns_x0,rvec cell_ns_x1)
 +{
 +    int  d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
 +    gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
 +    rvec extr_s[2],extr_r[2];
 +    rvec dh;
 +    real dist_d,c=0,det;
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bPBC,bUse;
 +
 +    comm = dd->comm;
 +
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
 +        zp->min0 = cell_ns_x0[dim];
 +        zp->max1 = cell_ns_x1[dim];
 +        zp->mch0 = cell_ns_x0[dim];
 +        zp->mch1 = cell_ns_x1[dim];
 +        zp->p1_0 = cell_ns_x0[dim];
 +        zp->p1_1 = cell_ns_x1[dim];
 +    }
 +    
 +    for(d=dd->ndim-2; d>=0; d--)
 +    {
 +        dim  = dd->dim[d];
 +        bPBC = (dim < ddbox->npbcdim);
 +
 +        /* Use an rvec to store two reals */
 +        extr_s[d][0] = comm->cell_f0[d+1];
 +        extr_s[d][1] = comm->cell_f1[d+1];
 +        extr_s[d][2] = 0;
 +
 +        pos = 0;
 +        /* Store the extremes in the backward sending buffer,
 +         * so the get updated separately from the forward communication.
 +         */
 +        for(d1=d; d1<dd->ndim-1; d1++)
 +        {
 +            /* We invert the order to be able to use the same loop for buf_e */
 +            buf_s[pos].min0 = extr_s[d1][1];
 +            buf_s[pos].max1 = extr_s[d1][0];
 +            buf_s[pos].mch0 = 0;
 +            buf_s[pos].mch1 = 0;
 +            /* Store the cell corner of the dimension we communicate along */
 +            buf_s[pos].p1_0 = comm->cell_x0[dim];
 +            buf_s[pos].p1_1 = 0;
 +            pos++;
 +        }
 +
 +        buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
 +        pos++;
 +
 +        if (dd->ndim == 3 && d == 0)
 +        {
 +            buf_s[pos] = comm->zone_d2[0][1];
 +            pos++;
 +            buf_s[pos] = comm->zone_d1[0];
 +            pos++;
 +        }
 +
 +        /* We only need to communicate the extremes
 +         * in the forward direction
 +         */
 +        npulse = comm->cd[d].np;
 +        if (bPBC)
 +        {
 +            /* Take the minimum to avoid double communication */
 +            npulse_min = min(npulse,dd->nc[dim]-1-npulse);
 +        }
 +        else
 +        {
 +            /* Without PBC we should really not communicate over
 +             * the boundaries, but implementing that complicates
 +             * the communication setup and therefore we simply
 +             * do all communication, but ignore some data.
 +             */
 +            npulse_min = npulse;
 +        }
 +        for(p=0; p<npulse_min; p++)
 +        {
 +            /* Communicate the extremes forward */
 +            bUse = (bPBC || dd->ci[dim] > 0);
 +
 +            dd_sendrecv_rvec(dd, d, dddirForward,
 +                             extr_s+d, dd->ndim-d-1,
 +                             extr_r+d, dd->ndim-d-1);
 +
 +            if (bUse)
 +            {
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
 +                    extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
 +                }
 +            }
 +        }
 +
 +        buf_size = pos;
 +        for(p=0; p<npulse; p++)
 +        {
 +            /* Communicate all the zone information backward */
 +            bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
 +
 +            dd_sendrecv_ddzone(dd, d, dddirBackward,
 +                               buf_s, buf_size,
 +                               buf_r, buf_size);
 +
 +            clear_rvec(dh);
 +            if (p > 0)
 +            {
 +                for(d1=d+1; d1<dd->ndim; d1++)
 +                {
 +                    /* Determine the decrease of maximum required
 +                     * communication height along d1 due to the distance along d,
 +                     * this avoids a lot of useless atom communication.
 +                     */
 +                    dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
 +
 +                    if (ddbox->tric_dir[dim])
 +                    {
 +                        /* c is the off-diagonal coupling between the cell planes
 +                         * along directions d and d1.
 +                         */
 +                        c = ddbox->v[dim][dd->dim[d1]][dim];
 +                    }
 +                    else
 +                    {
 +                        c = 0;
 +                    }
 +                    det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
 +                    if (det > 0)
 +                    {
 +                        dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
 +                    }
 +                    else
 +                    {
 +                        /* A negative value signals out of range */
 +                        dh[d1] = -1;
 +                    }
 +                }
 +            }
 +
 +            /* Accumulate the extremes over all pulses */
 +            for(i=0; i<buf_size; i++)
 +            {
 +                if (p == 0)
 +                {
 +                    buf_e[i] = buf_r[i];
 +                }
 +                else
 +                {
 +                    if (bUse)
 +                    {
 +                        buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
 +                        buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
 +                    }
 +
 +                    if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
 +                    {
 +                        d1 = 1;
 +                    }
 +                    else
 +                    {
 +                        d1 = d + 1;
 +                    }
 +                    if (bUse && dh[d1] >= 0)
 +                    {
 +                        buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
 +                        buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
 +                    }
 +                }
 +                /* Copy the received buffer to the send buffer,
 +                 * to pass the data through with the next pulse.
 +                 */
 +                buf_s[i] = buf_r[i];
 +            }
 +            if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
 +                (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
 +            {
 +                /* Store the extremes */ 
 +                pos = 0;
 +
 +                for(d1=d; d1<dd->ndim-1; d1++)
 +                {
 +                    extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
 +                    extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
 +                    pos++;
 +                }
 +
 +                if (d == 1 || (d == 0 && dd->ndim == 3))
 +                {
 +                    for(i=d; i<2; i++)
 +                    {
 +                        comm->zone_d2[1-d][i] = buf_e[pos];
 +                        pos++;
 +                    }
 +                }
 +                if (d == 0)
 +                {
 +                    comm->zone_d1[1] = buf_e[pos];
 +                    pos++;
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->ndim >= 2)
 +    {
 +        dim = dd->dim[1];
 +        for(i=0; i<2; i++)
 +        {
 +            if (debug)
 +            {
 +                print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
 +            }
 +            cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
 +            cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        dim = dd->dim[2];
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0; j<2; j++)
 +            {
 +                if (debug)
 +                {
 +                    print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
 +                }
 +                cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
 +                cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
 +            }
 +        }
 +    }
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        comm->cell_f_max0[d] = extr_s[d-1][0];
 +        comm->cell_f_min1[d] = extr_s[d-1][1];
 +        if (debug)
 +        {
 +            fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
 +                    d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
 +        }
 +    }
 +}
 +
 +static void dd_collect_cg(gmx_domdec_t *dd,
 +                          t_state *state_local)
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
 +    t_block *cgs_gl;
 +
 +    if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
 +    {
 +        /* The master has the correct distribution */
 +        return;
 +    }
 +    
 +    if (state_local->ddp_count == dd->ddp_count)
 +    {
 +        ncg_home = dd->ncg_home;
 +        cg       = dd->index_gl;
 +        nat_home = dd->nat_home;
 +    } 
 +    else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        ncg_home = state_local->ncg_gl;
 +        cg       = state_local->cg_gl;
 +        nat_home = 0;
 +        for(i=0; i<ncg_home; i++)
 +        {
 +            nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
 +        }
 +    }
 +    else
 +    {
 +        gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
 +    }
 +    
 +    buf2[0] = dd->ncg_home;
 +    buf2[1] = dd->nat_home;
 +    if (DDMASTER(dd))
 +    {
 +        ma = dd->ma;
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    /* Collect the charge group and atom counts on the master */
 +    dd_gather(dd,2*sizeof(int),buf2,ibuf);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma->index[0] = 0;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ncg[i] = ma->ibuf[2*i];
 +            ma->nat[i] = ma->ibuf[2*i+1];
 +            ma->index[i+1] = ma->index[i] + ma->ncg[i];
 +            
 +        }
 +        /* Make byte counts and indices */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"Initial charge group distribution: ");
 +            for(i=0; i<dd->nnodes; i++)
 +                fprintf(debug," %d",ma->ncg[i]);
 +            fprintf(debug,"\n");
 +        }
 +    }
 +    
 +    /* Collect the charge group indices on the master */
 +    dd_gatherv(dd,
 +               dd->ncg_home*sizeof(int),dd->index_gl,
 +               DDMASTER(dd) ? ma->ibuf : NULL,
 +               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +               DDMASTER(dd) ? ma->cg : NULL);
 +    
 +    dd->comm->master_cg_ddp_count = state_local->ddp_count;
 +}
 +
 +static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
 +                                    rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +
 +    ma = dd->ma;
 +    
 +    if (!DDMASTER(dd))
 +    {
 +#ifdef GMX_MPI
 +        MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 dd->rank,dd->mpi_comm_all);
 +#endif
 +    } else {
 +        /* Copy the master coordinates to the global array */
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(lv[a++],v[c]);
 +            }
 +        }
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +#ifdef GMX_MPI
 +                MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
 +                         n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(buf[a++],v[c]);
 +                    }
 +                }
 +            }
 +        }
 +        sfree(buf);
 +    }
 +}
 +
 +static void get_commbuffer_counts(gmx_domdec_t *dd,
 +                                  int **counts,int **disps)
 +{
 +    gmx_domdec_master_t *ma;
 +    int n;
 +
 +    ma = dd->ma;
 +    
 +    /* Make the rvec count and displacment arrays */
 +    *counts  = ma->ibuf;
 +    *disps   = ma->ibuf + dd->nnodes;
 +    for(n=0; n<dd->nnodes; n++)
 +    {
 +        (*counts)[n] = ma->nat[n]*sizeof(rvec);
 +        (*disps)[n]  = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
 +    }
 +}
 +
 +static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
 +                                   rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *rcounts=NULL,*disps=NULL;
 +    int  n,i,c,a;
 +    rvec *buf=NULL;
 +    t_block *cgs_gl;
 +    
 +    ma = dd->ma;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        get_commbuffer_counts(dd,&rcounts,&disps);
 +
 +        buf = ma->vbuf;
 +    }
 +    
 +    dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
 +
 +    if (DDMASTER(dd))
 +    {
 +        cgs_gl = &dd->comm->cgs_gl;
 +
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(buf[a++],v[c]);
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +void dd_collect_vec(gmx_domdec_t *dd,
 +                    t_state *state_local,rvec *lv,rvec *v)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    dd_collect_cg(dd,state_local);
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_collect_vec_sendrecv(dd,lv,v);
 +    }
 +    else
 +    {
 +        dd_collect_vec_gatherv(dd,lv,v);
 +    }
 +}
 +
 +
 +void dd_collect_state(gmx_domdec_t *dd,
 +                      t_state *state_local,t_state *state)
 +{
 +    int est,i,j,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        state->lambda = state_local->lambda;
 +        state->veta = state_local->veta;
 +        state->vol0 = state_local->vol0;
 +        copy_mat(state_local->box,state->box);
 +        copy_mat(state_local->boxv,state->boxv);
 +        copy_mat(state_local->svir_prev,state->svir_prev);
 +        copy_mat(state_local->fvir_prev,state->fvir_prev);
 +        copy_mat(state_local->pres_prev,state->pres_prev);
 +
 +
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
 +                state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
 +            }
 +            state->therm_integral[i] = state_local->therm_integral[i];            
 +        }
 +        for(i=0; i<state_local->nnhpres; i++) 
 +        {
 +            for(j=0; j<nh; j++) {
 +                state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
 +                state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state_local->flags & (1<<est)))
 +        {
 +            switch (est) {
 +            case estX:
 +                dd_collect_vec(dd,state_local,state_local->x,state->x);
 +                break;
 +            case estV:
 +                dd_collect_vec(dd,state_local,state_local->v,state->v);
 +                break;
 +            case estSDX:
 +                dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
 +                break;
 +            case estCGP:
 +                dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    if (DDMASTER(dd))
 +                    {
 +                        for(i=0; i<state_local->nrng; i++)
 +                        {
 +                            state->ld_rng[i] = state_local->ld_rng[i];
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
 +                              state_local->ld_rng,state->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                   if (DDMASTER(dd))
 +                    {
 +                        state->ld_rngi[0] = state_local->ld_rngi[0];
 +                    } 
 +                }
 +                else
 +                {
 +                    dd_gather(dd,sizeof(state->ld_rngi[0]),
 +                              state_local->ld_rngi,state->ld_rngi);
 +                }
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_collect_state");
 +            }
 +        }
 +    }
 +}
 +
 +static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
 +{
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +    fr->cg_nalloc = over_alloc_dd(nalloc);
 +    srenew(fr->cg_cm,fr->cg_nalloc);
 +    srenew(fr->cginfo,fr->cg_nalloc);
 +}
 +
 +static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
 +{
 +    int est;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
 +    }
 +
 +    state->nalloc = over_alloc_dd(nalloc);
 +    
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est)))
 +        {
 +            switch(est) {
 +            case estX:
 +                srenew(state->x,state->nalloc);
 +                break;
 +            case estV:
 +                srenew(state->v,state->nalloc);
 +                break;
 +            case estSDX:
 +                srenew(state->sd_X,state->nalloc);
 +                break;
 +            case estCGP:
 +                srenew(state->cg_p,state->nalloc);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No reallocation required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_realloc_state");            
 +            }
 +        }
 +    }
 +    
 +    if (f != NULL)
 +    {
 +        srenew(*f,state->nalloc);
 +    }
 +}
 +
 +static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +        
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            if (n != dd->rank)
 +            {
 +                if (ma->nat[n] > nalloc)
 +                {
 +                    nalloc = over_alloc_dd(ma->nat[n]);
 +                    srenew(buf,nalloc);
 +                }
 +                /* Use lv as a temporary buffer */
 +                a = 0;
 +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +                {
 +                    for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                    {
 +                        copy_rvec(v[c],buf[a++]);
 +                    }
 +                }
 +                if (a != ma->nat[n])
 +                {
 +                    gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
 +                              a,ma->nat[n]);
 +                }
 +                
 +#ifdef GMX_MPI
 +                MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
 +                         DDRANK(dd,n),n,dd->mpi_comm_all);
 +#endif
 +            }
 +        }
 +        sfree(buf);
 +        n = DDMASTERRANK(dd);
 +        a = 0;
 +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +        {
 +            for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +            {
 +                copy_rvec(v[c],lv[a++]);
 +            }
 +        }
 +    }
 +    else
 +    {
 +#ifdef GMX_MPI
 +        MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
 +                 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
 +#endif
 +    }
 +}
 +
 +static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
 +                                       rvec *v,rvec *lv)
 +{
 +    gmx_domdec_master_t *ma;
 +    int  *scounts=NULL,*disps=NULL;
 +    int  n,i,c,a,nalloc=0;
 +    rvec *buf=NULL;
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma  = dd->ma;
 +     
 +        get_commbuffer_counts(dd,&scounts,&disps);
 +
 +        buf = ma->vbuf;
 +        a = 0;
 +        for(n=0; n<dd->nnodes; n++)
 +        {
 +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
 +            {
 +                for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
 +                {
 +                    copy_rvec(v[c],buf[a++]);
 +                }
 +            }
 +        }
 +    }
 +
 +    dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
 +}
 +
 +static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
 +{
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        dd_distribute_vec_sendrecv(dd,cgs,v,lv);
 +    }
 +    else
 +    {
 +        dd_distribute_vec_scatterv(dd,cgs,v,lv);
 +    }
 +}
 +
 +static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
 +                                t_state *state,t_state *state_local,
 +                                rvec **f)
 +{
 +    int  i,j,ngtch,ngtcp,nh;
 +
 +    nh = state->nhchainlength;
 +
 +    if (DDMASTER(dd))
 +    {
 +        state_local->lambda = state->lambda;
 +        state_local->veta   = state->veta;
 +        state_local->vol0   = state->vol0;
 +        copy_mat(state->box,state_local->box);
 +        copy_mat(state->box_rel,state_local->box_rel);
 +        copy_mat(state->boxv,state_local->boxv);
 +        copy_mat(state->svir_prev,state_local->svir_prev);
 +        copy_mat(state->fvir_prev,state_local->fvir_prev);
 +        for(i=0; i<state_local->ngtc; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
 +                state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
 +            }
 +            state_local->therm_integral[i] = state->therm_integral[i];
 +        }
 +        for(i=0; i<state_local->nnhpres; i++)
 +        {
 +            for(j=0; j<nh; j++) {
 +                state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
 +                state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
 +            }
 +        }
 +    }
 +    dd_bcast(dd,sizeof(real),&state_local->lambda);
 +    dd_bcast(dd,sizeof(real),&state_local->veta);
 +    dd_bcast(dd,sizeof(real),&state_local->vol0);
 +    dd_bcast(dd,sizeof(state_local->box),state_local->box);
 +    dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
 +    dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
 +    dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
 +    dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
 +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
 +    dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
 +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
 +
 +    if (dd->nat_home > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,dd->nat_home);
 +    }
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state_local->flags & (1<<i)))
 +        {
 +            switch (i) {
 +            case estX:
 +                dd_distribute_vec(dd,cgs,state->x,state_local->x);
 +                break;
 +            case estV:
 +                dd_distribute_vec(dd,cgs,state->v,state_local->v);
 +                break;
 +            case estSDX:
 +                dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
 +                break;
 +            case estCGP:
 +                dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
 +                break;
 +            case estLD_RNG:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,
 +                              state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                              state->ld_rng,state_local->ld_rng);
 +                }
 +                else
 +                {
 +                    dd_scatter(dd,
 +                               state_local->nrng*sizeof(state_local->ld_rng[0]),
 +                               state->ld_rng,state_local->ld_rng);
 +                }
 +                break;
 +            case estLD_RNGI:
 +                if (state->nrngi == 1)
 +                {
 +                    dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
 +                              state->ld_rngi,state_local->ld_rngi);
 +                }
 +                else
 +                {
 +                     dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
 +                               state->ld_rngi,state_local->ld_rngi);
 +                }   
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* Not implemented yet */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_distribute_state");
 +            }
 +        }
 +    }
 +}
 +
 +static char dim2char(int dim)
 +{
 +    char c='?';
 +    
 +    switch (dim)
 +    {
 +    case XX: c = 'X'; break;
 +    case YY: c = 'Y'; break;
 +    case ZZ: c = 'Z'; break;
 +    default: gmx_fatal(FARGS,"Unknown dim %d",dim);
 +    }
 +    
 +    return c;
 +}
 +
 +static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
 +{
 +    rvec grid_s[2],*grid_r=NULL,cx,r;
 +    char fname[STRLEN],format[STRLEN],buf[22];
 +    FILE *out;
 +    int  a,i,d,z,y,x;
 +    matrix tric;
 +    real vol;
 +
 +    copy_rvec(dd->comm->cell_x0,grid_s[0]);
 +    copy_rvec(dd->comm->cell_x1,grid_s[1]);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        snew(grid_r,2*dd->nnodes);
 +    }
 +    
 +    dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
 +    
 +    if (DDMASTER(dd))
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            for(i=0; i<DIM; i++)
 +            {
 +                if (d == i)
 +                {
 +                    tric[d][i] = 1;
 +                }
 +                else
 +                {
-             dd_warning(cr,fplog,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
++                    if (d < ddbox->npbcdim && dd->nc[d] > 1)
 +                    {
 +                        tric[d][i] = box[i][d]/box[i][i];
 +                    }
 +                    else
 +                    {
 +                        tric[d][i] = 0;
 +                    }
 +                }
 +            }
 +        }
 +        sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
 +        sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +        out = gmx_fio_fopen(fname,"w");
 +        gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +        a = 1;
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
 +            for(d=0; d<DIM; d++)
 +            {
 +                vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
 +            }
 +            for(z=0; z<2; z++)
 +            {
 +                for(y=0; y<2; y++)
 +                {
 +                    for(x=0; x<2; x++)
 +                    {
 +                        cx[XX] = grid_r[i*2+x][XX];
 +                        cx[YY] = grid_r[i*2+y][YY];
 +                        cx[ZZ] = grid_r[i*2+z][ZZ];
 +                        mvmul(tric,cx,r);
 +                        fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
 +                                10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
 +                    }
 +                }
 +            }
 +            for(d=0; d<DIM; d++)
 +            {
 +                for(x=0; x<4; x++)
 +                {
 +                    switch(d)
 +                    {
 +                    case 0: y = 1 + i*8 + 2*x; break;
 +                    case 1: y = 1 + i*8 + 2*x - (x % 2); break;
 +                    case 2: y = 1 + i*8 + x; break;
 +                    }
 +                    fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
 +                }
 +            }
 +        }
 +        gmx_fio_fclose(out);
 +        sfree(grid_r);
 +    }
 +}
 +
 +void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
 +                  gmx_mtop_t *mtop,t_commrec *cr,
 +                  int natoms,rvec x[],matrix box)
 +{
 +    char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
 +    FILE *out;
 +    int  i,ii,resnr,c;
 +    char *atomname,*resname;
 +    real b;
 +    gmx_domdec_t *dd;
 +    
 +    dd = cr->dd;
 +    if (natoms == -1)
 +    {
 +        natoms = dd->comm->nat[ddnatVSITE];
 +    }
 +    
 +    sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
 +    
 +    sprintf(format,"%s%s\n",get_pdbformat(),"%6.2f%6.2f");
 +    sprintf(format4,"%s%s\n",get_pdbformat4(),"%6.2f%6.2f");
 +    
 +    out = gmx_fio_fopen(fname,"w");
 +    
 +    fprintf(out,"TITLE     %s\n",title);
 +    gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
 +    for(i=0; i<natoms; i++)
 +    {
 +        ii = dd->gatindex[i];
 +        gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
 +        if (i < dd->comm->nat[ddnatZONE])
 +        {
 +            c = 0;
 +            while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
 +            {
 +                c++;
 +            }
 +            b = c;
 +        }
 +        else if (i < dd->comm->nat[ddnatVSITE])
 +        {
 +            b = dd->comm->zones.n;
 +        }
 +        else
 +        {
 +            b = dd->comm->zones.n + 1;
 +        }
 +        fprintf(out,strlen(atomname)<4 ? format : format4,
 +                "ATOM",(ii+1)%100000,
 +                atomname,resname,' ',resnr%10000,' ',
 +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
 +    }
 +    fprintf(out,"TER\n");
 +    
 +    gmx_fio_fclose(out);
 +}
 +
 +real dd_cutoff_mbody(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  di;
 +    real r;
 +
 +    comm = dd->comm;
 +
 +    r = -1;
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm->cutoff_mbody > 0)
 +        {
 +            r = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            /* cutoff_mbody=0 means we do not have DLB */
 +            r = comm->cellsize_min[dd->dim[0]];
 +            for(di=1; di<dd->ndim; di++)
 +            {
 +                r = min(r,comm->cellsize_min[dd->dim[di]]);
 +            }
 +            if (comm->bBondComm)
 +            {
 +                r = max(r,comm->cutoff_mbody);
 +            }
 +            else
 +            {
 +                r = min(r,comm->cutoff);
 +            }
 +        }
 +    }
 +
 +    return r;
 +}
 +
 +real dd_cutoff_twobody(gmx_domdec_t *dd)
 +{
 +    real r_mb;
 +
 +    r_mb = dd_cutoff_mbody(dd);
 +
 +    return max(dd->comm->cutoff,r_mb);
 +}
 +
 +
 +static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
 +{
 +    int nc,ntot;
 +    
 +    nc   = dd->nc[dd->comm->cartpmedim];
 +    ntot = dd->comm->ntot[dd->comm->cartpmedim];
 +    copy_ivec(coord,coord_pme);
 +    coord_pme[dd->comm->cartpmedim] =
 +        nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
 +}
 +
 +static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
 +{
 +    /* Here we assign a PME node to communicate with this DD node
 +     * by assuming that the major index of both is x.
 +     * We add cr->npmenodes/2 to obtain an even distribution.
 +     */
 +    return (ddindex*npme + npme/2)/ndd;
 +}
 +
 +static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
 +{
 +    return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
 +}
 +
 +static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
 +{
 +    return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
 +}
 +
 +static int *dd_pmenodes(t_commrec *cr)
 +{
 +    int *pmenodes;
 +    int n,i,p0,p1;
 +    
 +    snew(pmenodes,cr->npmenodes);
 +    n = 0;
 +    for(i=0; i<cr->dd->nnodes; i++) {
 +        p0 = cr_ddindex2pmeindex(cr,i);
 +        p1 = cr_ddindex2pmeindex(cr,i+1);
 +        if (i+1 == cr->dd->nnodes || p1 > p0) {
 +            if (debug)
 +                fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
 +            pmenodes[n] = i + 1 + n;
 +            n++;
 +        }
 +    }
 +
 +    return pmenodes;
 +}
 +
 +static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_t *dd;
 +    ivec coords,coords_pme,nc;
 +    int  slab;
 +    
 +    dd = cr->dd;
 +    /*
 +      if (dd->comm->bCartesian) {
 +      gmx_ddindex2xyz(dd->nc,ddindex,coords);
 +      dd_coords2pmecoords(dd,coords,coords_pme);
 +      copy_ivec(dd->ntot,nc);
 +      nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
 +      coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
 +      
 +      slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
 +      } else {
 +      slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
 +      }
 +    */
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
 +    
 +    return slab;
 +}
 +
 +static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
 +{
 +    gmx_domdec_comm_t *comm;
 +    ivec coords;
 +    int  ddindex,nodeid=-1;
 +    
 +    comm = cr->dd->comm;
 +    
 +    coords[XX] = x;
 +    coords[YY] = y;
 +    coords[ZZ] = z;
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
 +#endif
 +    }
 +    else
 +    {
 +        ddindex = dd_index(cr->dd->nc,coords);
 +        if (comm->bCartesianPP)
 +        {
 +            nodeid = comm->ddindex2simnodeid[ddindex];
 +        }
 +        else
 +        {
 +            if (comm->pmenodes)
 +            {
 +                nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
 +            }
 +            else
 +            {
 +                nodeid = ddindex;
 +            }
 +        }
 +    }
 +  
 +    return nodeid;
 +}
 +
 +static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    ivec coord,coord_pme;
 +    int  i;
 +    int  pmenode=-1;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    /* This assumes a uniform x domain decomposition grid cell size */
 +    if (comm->bCartesianPP_PME)
 +    {
 +#ifdef GMX_MPI
 +        MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
 +        if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            /* This is a PP node */
 +            dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +            MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
 +        }
 +#endif
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (sim_nodeid < dd->nnodes)
 +        {
 +            pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +        }
 +    }
 +    else
 +    {
 +        /* This assumes DD cells with identical x coordinates
 +         * are numbered sequentially.
 +         */
 +        if (dd->comm->pmenodes == NULL)
 +        {
 +            if (sim_nodeid < dd->nnodes)
 +            {
 +                /* The DD index equals the nodeid */
 +                pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
 +            }
 +        }
 +        else
 +        {
 +            i = 0;
 +            while (sim_nodeid > dd->comm->pmenodes[i])
 +            {
 +                i++;
 +            }
 +            if (sim_nodeid < dd->comm->pmenodes[i])
 +            {
 +                pmenode = dd->comm->pmenodes[i];
 +            }
 +        }
 +    }
 +    
 +    return pmenode;
 +}
 +
 +gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
 +{
 +    gmx_bool bPMEOnlyNode;
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
 +    }
 +    else
 +    {
 +        bPMEOnlyNode = FALSE;
 +    }
 +    
 +    return bPMEOnlyNode;
 +}
 +
 +void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
 +                     int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
 +{
 +    gmx_domdec_t *dd;
 +    int x,y,z;
 +    ivec coord,coord_pme;
 +    
 +    dd = cr->dd;
 +    
 +    snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
 +    
 +    *nmy_ddnodes = 0;
 +    for(x=0; x<dd->nc[XX]; x++)
 +    {
 +        for(y=0; y<dd->nc[YY]; y++)
 +        {
 +            for(z=0; z<dd->nc[ZZ]; z++)
 +            {
 +                if (dd->comm->bCartesianPP_PME)
 +                {
 +                    coord[XX] = x;
 +                    coord[YY] = y;
 +                    coord[ZZ] = z;
 +                    dd_cart_coord2pmecoord(dd,coord,coord_pme);
 +                    if (dd->ci[XX] == coord_pme[XX] &&
 +                        dd->ci[YY] == coord_pme[YY] &&
 +                        dd->ci[ZZ] == coord_pme[ZZ])
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                }
 +                else
 +                {
 +                    /* The slab corresponds to the nodeid in the PME group */
 +                    if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
 +                    {
 +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* The last PP-only node is the peer node */
 +    *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Receive coordinates from PP nodes:");
 +        for(x=0; x<*nmy_ddnodes; x++)
 +        {
 +            fprintf(debug," %d",(*my_ddnodes)[x]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static gmx_bool receive_vir_ener(t_commrec *cr)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  pmenode,coords[DIM],rank;
 +    gmx_bool bReceive;
 +    
 +    bReceive = TRUE;
 +    if (cr->npmenodes < cr->dd->nnodes)
 +    {
 +        comm = cr->dd->comm;
 +        if (comm->bCartesianPP_PME)
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +#ifdef GMX_MPI
 +            MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
 +            coords[comm->cartpmedim]++;
 +            if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
 +            {
 +                MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
 +                if (dd_simnode2pmenode(cr,rank) == pmenode)
 +                {
 +                    /* This is not the last PP node for pmenode */
 +                    bReceive = FALSE;
 +                }
 +            }
 +#endif  
 +        }
 +        else
 +        {
 +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +            if (cr->sim_nodeid+1 < cr->nnodes &&
 +                dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
 +            {
 +                /* This is not the last PP node for pmenode */
 +                bReceive = FALSE;
 +            }
 +        }
 +    }
 +    
 +    return bReceive;
 +}
 +
 +static void set_zones_ncg_home(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_zones_t *zones;
 +    int i;
 +
 +    zones = &dd->comm->zones;
 +
 +    zones->cg_range[0] = 0;
 +    for(i=1; i<zones->n+1; i++)
 +    {
 +        zones->cg_range[i] = dd->ncg_home;
 +    }
 +}
 +
 +static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
 +{
 +    int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
 +    
 +    ind = state->cg_gl;
 +    dd_cg_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    nat = 0;
 +    cgindex[0] = nat;
 +    for(i=0; i<state->ncg_gl; i++)
 +    {
 +        cgindex[i] = nat;
 +        cg_gl = ind[i];
 +        dd_cg_gl[i] = cg_gl;
 +        nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
 +    }
 +    cgindex[i] = nat;
 +    
 +    dd->ncg_home = state->ncg_gl;
 +    dd->nat_home = nat;
 +
 +    set_zones_ncg_home(dd);
 +}
 +
 +static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
 +{
 +    while (cg >= cginfo_mb->cg_end)
 +    {
 +        cginfo_mb++;
 +    }
 +
 +    return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
 +}
 +
 +static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
 +                          t_forcerec *fr,char *bLocalCG)
 +{
 +    cginfo_mb_t *cginfo_mb;
 +    int *cginfo;
 +    int cg;
 +
 +    if (fr != NULL)
 +    {
 +        cginfo_mb = fr->cginfo_mb;
 +        cginfo    = fr->cginfo;
 +
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
 +        }
 +    }
 +
 +    if (bLocalCG != NULL)
 +    {
 +        for(cg=cg0; cg<cg1; cg++)
 +        {
 +            bLocalCG[index_gl[cg]] = TRUE;
 +        }
 +    }
 +}
 +
 +static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
 +{
 +    int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
 +    int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
 +    gmx_ga2la_t *ga2la;
 +    char *bLocalCG;
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +
 +    if (dd->nat_tot > dd->gatindex_nalloc)
 +    {
 +        dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
 +        srenew(dd->gatindex,dd->gatindex_nalloc);
 +    }
 +
 +    nzone      = dd->comm->zones.n;
 +    zone2cg    = dd->comm->zones.cg_range;
 +    zone_ncg1  = dd->comm->zone_ncg1;
 +    index_gl   = dd->index_gl;
 +    gatindex   = dd->gatindex;
 +
 +    if (zone2cg[1] != dd->ncg_home)
 +    {
 +        gmx_incons("dd->ncg_zone is not up to date");
 +    }
 +    
 +    /* Make the local to global and global to local atom index */
 +    a = dd->cgindex[cg_start];
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        if (zone == 0)
 +        {
 +            cg0 = cg_start;
 +        }
 +        else
 +        {
 +            cg0 = zone2cg[zone];
 +        }
 +        for(cg=cg0; cg<zone2cg[zone+1]; cg++)
 +        {
 +            zone1 = zone;
 +            if (cg - cg0 >= zone_ncg1[zone])
 +            {
 +                /* Signal that this cg is from more than one zone away */
 +                zone1 += nzone;
 +            }
 +            cg_gl = index_gl[cg];
 +            for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
 +            {
 +                gatindex[a] = a_gl;
 +                ga2la_set(dd->ga2la,a_gl,a,zone1);
 +                a++;
 +            }
 +        }
 +    }
 +}
 +
 +static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
 +                          const char *where)
 +{
 +    int ncg,i,ngl,nerr;
 +
 +    nerr = 0;
 +    if (bLocalCG == NULL)
 +    {
 +        return nerr;
 +    }
 +    for(i=0; i<dd->ncg_tot; i++)
 +    {
 +        if (!bLocalCG[dd->index_gl[i]])
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
 +            nerr++;
 +        }
 +    }
 +    ngl = 0;
 +    for(i=0; i<ncg_sys; i++)
 +    {
 +        if (bLocalCG[i])
 +        {
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->ncg_tot)
 +    {
 +        fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
 +        nerr++;
 +    }
 +
 +    return nerr;
 +}
 +
 +static void check_index_consistency(gmx_domdec_t *dd,
 +                                    int natoms_sys,int ncg_sys,
 +                                    const char *where)
 +{
 +    int  nerr,ngl,i,a,cell;
 +    int  *have;
 +
 +    nerr = 0;
 +
 +    if (dd->comm->DD_debug > 1)
 +    {
 +        snew(have,natoms_sys);
 +        for(a=0; a<dd->nat_tot; a++)
 +        {
 +            if (have[dd->gatindex[a]] > 0)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
 +            }
 +            else
 +            {
 +                have[dd->gatindex[a]] = a + 1;
 +            }
 +        }
 +        sfree(have);
 +    }
 +
 +    snew(have,dd->nat_tot);
 +
 +    ngl  = 0;
 +    for(i=0; i<natoms_sys; i++)
 +    {
 +        if (ga2la_get(dd->ga2la,i,&a,&cell))
 +        {
 +            if (a >= dd->nat_tot)
 +            {
 +                fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
 +                nerr++;
 +            }
 +            else
 +            {
 +                have[a] = 1;
 +                if (dd->gatindex[a] != i)
 +                {
 +                    fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
 +                    nerr++;
 +                }
 +            }
 +            ngl++;
 +        }
 +    }
 +    if (ngl != dd->nat_tot)
 +    {
 +        fprintf(stderr,
 +                "DD node %d, %s: %d global atom indices, %d local atoms\n",
 +                dd->rank,where,ngl,dd->nat_tot);
 +    }
 +    for(a=0; a<dd->nat_tot; a++)
 +    {
 +        if (have[a] == 0)
 +        {
 +            fprintf(stderr,
 +                    "DD node %d, %s: local atom %d, global %d has no global index\n",
 +                    dd->rank,where,a+1,dd->gatindex[a]+1);
 +        }
 +    }
 +    sfree(have);
 +
 +    nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
 +
 +    if (nerr > 0) {
 +        gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
 +                  dd->rank,where,nerr);
 +    }
 +}
 +
 +static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
 +{
 +    int  i;
 +    char *bLocalCG;
 +
 +    if (a_start == 0)
 +    {
 +        /* Clear the whole list without searching */
 +        ga2la_clear(dd->ga2la);
 +    }
 +    else
 +    {
 +        for(i=a_start; i<dd->nat_tot; i++)
 +        {
 +            ga2la_del(dd->ga2la,dd->gatindex[i]);
 +        }
 +    }
 +
 +    bLocalCG = dd->comm->bLocalCG;
 +    if (bLocalCG)
 +    {
 +        for(i=cg_start; i<dd->ncg_tot; i++)
 +        {
 +            bLocalCG[dd->index_gl[i]] = FALSE;
 +        }
 +    }
 +
 +    dd_clear_local_vsite_indices(dd);
 +    
 +    if (dd->constraints)
 +    {
 +        dd_clear_local_constraint_indices(dd);
 +    }
 +}
 +
 +static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
 +{
 +    real grid_jump_limit;
 +
 +    /* The distance between the boundaries of cells at distance
 +     * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
 +     * and by the fact that cells should not be shifted by more than
 +     * half their size, such that cg's only shift by one cell
 +     * at redecomposition.
 +     */
 +    grid_jump_limit = comm->cellsize_limit;
 +    if (!comm->bVacDLBNoLimit)
 +    {
 +        grid_jump_limit = max(grid_jump_limit,
 +                              comm->cutoff/comm->cd[dim_ind].np);
 +    }
 +
 +    return grid_jump_limit;
 +}
 +
 +static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim;
 +    real limit,bfac;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        limit = grid_jump_limit(comm,d);
 +        bfac = ddbox->box_size[dim];
 +        if (ddbox->tric_dir[dim])
 +        {
 +            bfac *= ddbox->skew_fac[dim];
 +        }
 +        if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac <  limit ||
 +            (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
 +                      gmx_step_str(step,buf),
 +                      dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +}
 +
 +static int dd_load_count(gmx_domdec_comm_t *comm)
 +{
 +    return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
 +}
 +
 +static float dd_force_load(gmx_domdec_comm_t *comm)
 +{
 +    float load;
 +    
 +    if (comm->eFlop)
 +    {
 +        load = comm->flop;
 +        if (comm->eFlop > 1)
 +        {
 +            load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
 +        }
 +    } 
 +    else
 +    {
 +        load = comm->cycl[ddCyclF];
 +        if (comm->cycl_n[ddCyclF] > 1)
 +        {
 +            /* Subtract the maximum of the last n cycle counts
 +             * to get rid of possible high counts due to other soures,
 +             * for instance system activity, that would otherwise
 +             * affect the dynamic load balancing.
 +             */
 +            load -= comm->cycl_max[ddCyclF];
 +        }
 +    }
 +    
 +    return load;
 +}
 +
 +static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int i;
 +    
 +    comm = dd->comm;
 +    
 +    snew(*dim_f,dd->nc[dim]+1);
 +    (*dim_f)[0] = 0;
 +    for(i=1; i<dd->nc[dim]; i++)
 +    {
 +        if (comm->slb_frac[dim])
 +        {
 +            (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
 +        }
 +        else
 +        {
 +            (*dim_f)[i] = (real)i/(real)dd->nc[dim];
 +        }
 +    }
 +    (*dim_f)[dd->nc[dim]] = 1;
 +}
 +
 +static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
 +{
 +    int        pmeindex,slab,nso,i;
 +    ivec xyz;
 +    
 +    if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
 +    {
 +        ddpme->dim = YY;
 +    }
 +    else
 +    {
 +        ddpme->dim = dimind;
 +    }
 +    ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
 +    
 +    ddpme->nslab = (ddpme->dim == 0 ?
 +                    dd->comm->npmenodes_x :
 +                    dd->comm->npmenodes_y);
 +
 +    if (ddpme->nslab <= 1)
 +    {
 +        return;
 +    }
 +
 +    nso = dd->comm->npmenodes/ddpme->nslab;
 +    /* Determine for each PME slab the PP location range for dimension dim */
 +    snew(ddpme->pp_min,ddpme->nslab);
 +    snew(ddpme->pp_max,ddpme->nslab);
 +    for(slab=0; slab<ddpme->nslab; slab++) {
 +        ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
 +        ddpme->pp_max[slab] = 0;
 +    }
 +    for(i=0; i<dd->nnodes; i++) {
 +        ddindex2xyz(dd->nc,i,xyz);
 +        /* For y only use our y/z slab.
 +         * This assumes that the PME x grid size matches the DD grid size.
 +         */
 +        if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
 +            pmeindex = ddindex2pmeindex(dd,i);
 +            if (dimind == 0) {
 +                slab = pmeindex/nso;
 +            } else {
 +                slab = pmeindex % ddpme->nslab;
 +            }
 +            ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
 +            ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
 +        }
 +    }
 +
 +    set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
 +}
 +
 +int dd_pme_maxshift_x(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == XX)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +int dd_pme_maxshift_y(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->ddpme[0].dim == YY)
 +    {
 +        return dd->comm->ddpme[0].maxshift;
 +    }
 +    else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
 +    {
 +        return dd->comm->ddpme[1].maxshift;
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
 +                             gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  nc,ns,s;
 +    int  *xmin,*xmax;
 +    real range,pme_boundary;
 +    int  sh;
 +    
 +    comm = dd->comm;
 +    nc  = dd->nc[ddpme->dim];
 +    ns  = ddpme->nslab;
 +    
 +    if (!ddpme->dim_match)
 +    {
 +        /* PP decomposition is not along dim: the worst situation */
 +        sh = ns/2;
 +    }
 +    else if (ns <= 3 || (bUniform && ns == nc))
 +    {
 +        /* The optimal situation */
 +        sh = 1;
 +    }
 +    else
 +    {
 +        /* We need to check for all pme nodes which nodes they
 +         * could possibly need to communicate with.
 +         */
 +        xmin = ddpme->pp_min;
 +        xmax = ddpme->pp_max;
 +        /* Allow for atoms to be maximally 2/3 times the cut-off
 +         * out of their DD cell. This is a reasonable balance between
 +         * between performance and support for most charge-group/cut-off
 +         * combinations.
 +         */
 +        range  = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
 +        /* Avoid extra communication when we are exactly at a boundary */
 +        range *= 0.999;
 +        
 +        sh = 1;
 +        for(s=0; s<ns; s++)
 +        {
 +            /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
 +            pme_boundary = (real)s/ns;
 +            while (sh+1 < ns &&
 +                   ((s-(sh+1) >= 0 &&
 +                     cell_f[xmax[s-(sh+1)   ]+1]     + range > pme_boundary) ||
 +                    (s-(sh+1) <  0 &&
 +                     cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
 +            {
 +                sh++;
 +            }
 +            pme_boundary = (real)(s+1)/ns;
 +            while (sh+1 < ns &&
 +                   ((s+(sh+1) <  ns &&
 +                     cell_f[xmin[s+(sh+1)   ]  ]     - range < pme_boundary) ||
 +                    (s+(sh+1) >= ns &&
 +                     cell_f[xmin[s+(sh+1)-ns]  ] + 1 - range < pme_boundary)))
 +            {
 +                sh++;
 +            }
 +        }
 +    }
 +    
 +    ddpme->maxshift = sh;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"PME slab communication range for dim %d is %d\n",
 +                ddpme->dim,ddpme->maxshift);
 +    }
 +}
 +
 +static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d,dim;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        if (dim < ddbox->nboundeddim &&
 +            ddbox->box_size[dim]*ddbox->skew_fac[dim] <
 +            dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
 +        {
 +            gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
 +                      dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                      dd->nc[dim],dd->comm->cellsize_limit);
 +        }
 +    }
 +}
 +
 +static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
 +                                  gmx_bool bMaster,ivec npulse)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,j;
 +    rvec cellsize_min;
 +    real *cell_x,cell_dx,cellsize;
 +    
 +    comm = dd->comm;
 +    
 +    for(d=0; d<DIM; d++)
 +    {
 +        cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
 +        npulse[d] = 1;
 +        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
 +        {
 +            /* Uniform grid */
 +            cell_dx = ddbox->box_size[d]/dd->nc[d];
 +            if (bMaster)
 +            {
 +                for(j=0; j<dd->nc[d]+1; j++)
 +                {
 +                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
 +                }
 +            }
 +            else
 +            {
 +                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
 +                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
 +            }
 +            cellsize = cell_dx*ddbox->skew_fac[d];
 +            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
 +            {
 +                npulse[d]++;
 +            }
 +            cellsize_min[d] = cellsize;
 +        }
 +        else
 +        {
 +            /* Statically load balanced grid */
 +            /* Also when we are not doing a master distribution we determine
 +             * all cell borders in a loop to obtain identical values
 +             * to the master distribution case and to determine npulse.
 +             */
 +            if (bMaster)
 +            {
 +                cell_x = dd->ma->cell_x[d];
 +            }
 +            else
 +            {
 +                snew(cell_x,dd->nc[d]+1);
 +            }
 +            cell_x[0] = ddbox->box0[d];
 +            for(j=0; j<dd->nc[d]; j++)
 +            {
 +                cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
 +                cell_x[j+1] = cell_x[j] + cell_dx;
 +                cellsize = cell_dx*ddbox->skew_fac[d];
 +                while (cellsize*npulse[d] < comm->cutoff &&
 +                       npulse[d] < dd->nc[d]-1)
 +                {
 +                    npulse[d]++;
 +                }
 +                cellsize_min[d] = min(cellsize_min[d],cellsize);
 +            }
 +            if (!bMaster)
 +            {
 +                comm->cell_x0[d] = cell_x[dd->ci[d]];
 +                comm->cell_x1[d] = cell_x[dd->ci[d]+1];
 +                sfree(cell_x);
 +            }
 +        }
 +        /* The following limitation is to avoid that a cell would receive
 +         * some of its own home charge groups back over the periodic boundary.
 +         * Double charge groups cause trouble with the global indices.
 +         */
 +        if (d < ddbox->npbcdim &&
 +            dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
 +                                 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
 +                                 comm->cutoff,
 +                                 dd->nc[d],dd->nc[d],
 +                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
 +        }
 +    }
 +    
 +    if (!comm->bDynLoadBal)
 +    {
 +        copy_rvec(cellsize_min,comm->cellsize_min);
 +    }
 +   
 +    for(d=0; d<comm->npmedecompdim; d++)
 +    {
 +        set_pme_maxshift(dd,&comm->ddpme[d],
 +                         comm->slb_frac[dd->dim[d]]==NULL,ddbox,
 +                         comm->ddpme[d].slb_dim_f);
 +    }
 +}
 +
 +
 +static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,
 +                                       gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,i,j,nmin,nmin_old;
 +    gmx_bool bLimLo,bLimHi;
 +    real *cell_size;
 +    real fac,halfway,cellsize_limit_f_i,region_size;
 +    gmx_bool bPBC,bLastHi=FALSE;
 +    int nrange[]={range[0],range[1]};
 +
 +    region_size= root->cell_f[range[1]]-root->cell_f[range[0]];  
 +
 +    comm = dd->comm;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    if (debug) 
 +    {
 +        fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
 +    }
 +
 +    /* First we need to check if the scaling does not make cells
 +     * smaller than the smallest allowed size.
 +     * We need to do this iteratively, since if a cell is too small,
 +     * it needs to be enlarged, which makes all the other cells smaller,
 +     * which could in turn make another cell smaller than allowed.
 +     */
 +    for(i=range[0]; i<range[1]; i++)
 +    {
 +        root->bCellMin[i] = FALSE;
 +    }
 +    nmin = 0;
 +    do
 +    {
 +        nmin_old = nmin;
 +        /* We need the total for normalization */
 +        fac = 0;
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                fac += cell_size[i];
 +            }
 +        }
 +        fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
 +        /* Determine the cell boundaries */
 +        for(i=range[0]; i<range[1]; i++)
 +        {
 +            if (root->bCellMin[i] == FALSE)
 +            {
 +                cell_size[i] *= fac;
 +                if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
 +                {
 +                    cellsize_limit_f_i = 0;
 +                }
 +                else
 +                {
 +                    cellsize_limit_f_i = cellsize_limit_f;
 +                }
 +                if (cell_size[i] < cellsize_limit_f_i)
 +                {
 +                    root->bCellMin[i] = TRUE;
 +                    cell_size[i] = cellsize_limit_f_i;
 +                    nmin++;
 +                }
 +            }
 +            root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
 +        }
 +    }
 +    while (nmin > nmin_old);
 +    
 +    i=range[1]-1;
 +    cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
 +    /* For this check we should not use DD_CELL_MARGIN,
 +     * but a slightly smaller factor,
 +     * since rounding could get use below the limit.
 +     */
 +    if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
 +    {
 +        char buf[22];
 +        gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
 +                  gmx_step_str(step,buf),
 +                  dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
 +                  ncd,comm->cellsize_min[dim]);
 +    }
 +    
 +    root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
 +    
 +    if (!bUniform)
 +    {
 +        /* Check if the boundary did not displace more than halfway
 +         * each of the cells it bounds, as this could cause problems,
 +         * especially when the differences between cell sizes are large.
 +         * If changes are applied, they will not make cells smaller
 +         * than the cut-off, as we check all the boundaries which
 +         * might be affected by a change and if the old state was ok,
 +         * the cells will at most be shrunk back to their old size.
 +         */
 +        for(i=range[0]+1; i<range[1]; i++)
 +        {
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
 +            if (root->cell_f[i] < halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i+1; j<range[1]; j++)
 +                {
 +                    if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
 +                        root->cell_f[j] =  root->cell_f[j-1] + cellsize_limit_f;
 +                }
 +            }
 +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
 +            if (root->cell_f[i] > halfway)
 +            {
 +                root->cell_f[i] = halfway;
 +                /* Check if the change also causes shifts of the next boundaries */
 +                for(j=i-1; j>=range[0]+1; j--)
 +                {
 +                    if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
 +                        root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* nrange is defined as [lower, upper) range for new call to enforce_limits */
 +    /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
 +     * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
 +     * for a and b nrange is used */
 +    if (d > 0)
 +    {
 +        /* Take care of the staggering of the cell boundaries */
 +        if (bUniform)
 +        {
 +            for(i=range[0]; i<range[1]; i++)
 +            {
 +                root->cell_f_max0[i] = root->cell_f[i];
 +                root->cell_f_min1[i] = root->cell_f[i+1];
 +            }
 +        }
 +        else
 +        {
 +            for(i=range[0]+1; i<range[1]; i++)
 +            {
 +                bLimLo = (root->cell_f[i] < root->bound_min[i]);
 +                bLimHi = (root->cell_f[i] > root->bound_max[i]);
 +                if (bLimLo && bLimHi)
 +                {
 +                    /* Both limits violated, try the best we can */
 +                    /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
 +                    root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
 +                    nrange[0]=range[0];
 +                    nrange[1]=i;
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +
 +                    return;
 +                }
 +                else if (bLimLo)
 +                {
 +                    /* root->cell_f[i] = root->bound_min[i]; */
 +                    nrange[1]=i;  /* only store violation location. There could be a LimLo violation following with an higher index */
 +                    bLastHi=FALSE;
 +                }
 +                else if (bLimHi && !bLastHi)
 +                {
 +                    bLastHi=TRUE;
 +                    if (nrange[1] < range[1])   /* found a LimLo before */
 +                    {
 +                        root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                        dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                        nrange[0]=nrange[1];
 +                    }
 +                    root->cell_f[i] = root->bound_max[i];
 +                    nrange[1]=i; 
 +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                    nrange[0]=i;
 +                    nrange[1]=range[1];
 +                }
 +            }
 +            if (nrange[1] < range[1])   /* found last a LimLo */
 +            {
 +                root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +                nrange[0]=nrange[1];
 +                nrange[1]=range[1];
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            } 
 +            else if (nrange[0] > range[0]) /* found at least one LimHi */
 +            {
 +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
 +                                       int d,int dim,gmx_domdec_root_t *root,
 +                                       gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                       gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  ncd,d1,i,j,pos;
 +    real *cell_size;
 +    real load_aver,load_i,imbalance,change,change_max,sc;
 +    real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
 +    real change_limit;
 +    real relax = 0.5;
 +    gmx_bool bPBC;
 +    int range[] = { 0, 0 };
 +
 +    comm = dd->comm;
 +
 +    /* Convert the maximum change from the input percentage to a fraction */
 +    change_limit = comm->dlb_scale_lim*0.01;
 +
 +    ncd = dd->nc[dim];
 +
 +    bPBC = (dim < ddbox->npbcdim);
 +
 +    cell_size = root->buf_ncd;
 +
 +    /* Store the original boundaries */
 +    for(i=0; i<ncd+1; i++)
 +    {
 +        root->old_cell_f[i] = root->cell_f[i];
 +    }
 +    if (bUniform) {
 +        for(i=0; i<ncd; i++)
 +        {
 +            cell_size[i] = 1.0/ncd;
 +        }
 +    }
 +    else if (dd_load_count(comm))
 +    {
 +        load_aver = comm->load[d].sum_m/ncd;
 +        change_max = 0;
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -relax*imbalance;
 +            change_max = max(change_max,max(change,-change));
 +        }
 +        /* Limit the amount of scaling.
 +         * We need to use the same rescaling for all cells in one row,
 +         * otherwise the load balancing might not converge.
 +         */
 +        sc = relax;
 +        if (change_max > change_limit)
 +        {
 +            sc *= change_limit/change_max;
 +        }
 +        for(i=0; i<ncd; i++)
 +        {
 +            /* Determine the relative imbalance of cell i */
 +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
 +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
 +            /* Determine the change of the cell size using underrelaxation */
 +            change = -sc*imbalance;
 +            cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
 +        }
 +    }
 +    
 +    cellsize_limit_f  = comm->cellsize_min[dim]/ddbox->box_size[dim];
 +    cellsize_limit_f *= DD_CELL_MARGIN;
 +    dist_min_f_hard        = grid_jump_limit(comm,d)/ddbox->box_size[dim];
 +    dist_min_f       = dist_min_f_hard * DD_CELL_MARGIN;
 +    if (ddbox->tric_dir[dim])
 +    {
 +        cellsize_limit_f /= ddbox->skew_fac[dim];
 +        dist_min_f       /= ddbox->skew_fac[dim];
 +    }
 +    if (bDynamicBox && d > 0)
 +    {
 +        dist_min_f *= DD_PRES_SCALE_MARGIN;
 +    }
 +    if (d > 0 && !bUniform)
 +    {
 +        /* Make sure that the grid is not shifted too much */
 +        for(i=1; i<ncd; i++) {
 +            if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard) 
 +            {
 +                gmx_incons("Inconsistent DD boundary staggering limits!");
 +            }
 +            root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
 +            if (space > 0) {
 +                root->bound_min[i] += 0.5*space;
 +            }
 +            root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
 +            space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
 +            if (space < 0) {
 +                root->bound_max[i] += 0.5*space;
 +            }
 +            if (debug)
 +            {
 +                fprintf(debug,
 +                        "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
 +                        d,i,
 +                        root->cell_f_max0[i-1] + dist_min_f,
 +                        root->bound_min[i],root->cell_f[i],root->bound_max[i],
 +                        root->cell_f_min1[i] - dist_min_f);
 +            }
 +        }
 +    }
 +    range[1]=ncd;
 +    root->cell_f[0] = 0;
 +    root->cell_f[ncd] = 1;
 +    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
 +
 +
 +    /* After the checks above, the cells should obey the cut-off
 +     * restrictions, but it does not hurt to check.
 +     */
 +    for(i=0; i<ncd; i++)
 +    {
 +        if (debug)
 +        {
 +            fprintf(debug,"Relative bounds dim %d  cell %d: %f %f\n",
 +                    dim,i,root->cell_f[i],root->cell_f[i+1]);
 +        }
 +
 +        if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
 +            root->cell_f[i+1] - root->cell_f[i] <
 +            cellsize_limit_f/DD_CELL_MARGIN)
 +        {
 +            char buf[22];
 +            fprintf(stderr,
 +                    "\nWARNING step %s: direction %c, cell %d too small: %f\n",
 +                    gmx_step_str(step,buf),dim2char(dim),i,
 +                    (root->cell_f[i+1] - root->cell_f[i])
 +                    *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
 +        }
 +    }
 +    
 +    pos = ncd + 1;
 +    /* Store the cell boundaries of the lower dimensions at the end */
 +    for(d1=0; d1<d; d1++)
 +    {
 +        root->cell_f[pos++] = comm->cell_f0[d1];
 +        root->cell_f[pos++] = comm->cell_f1[d1];
 +    }
 +    
 +    if (d < comm->npmedecompdim)
 +    {
 +        /* The master determines the maximum shift for
 +         * the coordinate communication between separate PME nodes.
 +         */
 +        set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
 +    }
 +    root->cell_f[pos++] = comm->ddpme[0].maxshift;
 +    if (d >= 1)
 +    {
 +        root->cell_f[pos++] = comm->ddpme[1].maxshift;
 +    }
 +}    
 +
 +static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
 +                                             gmx_ddbox_t *ddbox,int dimind)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +
 +    /* Set the cell dimensions */
 +    dim = dd->dim[dimind];
 +    comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
 +    comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
 +    if (dim >= ddbox->nboundeddim)
 +    {
 +        comm->cell_x0[dim] += ddbox->box0[dim];
 +        comm->cell_x1[dim] += ddbox->box0[dim];
 +    }
 +}
 +
 +static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                         int d,int dim,real *cell_f_row,
 +                                         gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d1,dim1,pos;
 +
 +    comm = dd->comm;
 +
 +#ifdef GMX_MPI
 +    /* Each node would only need to know two fractions,
 +     * but it is probably cheaper to broadcast the whole array.
 +     */
 +    MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
 +              0,comm->mpi_comm_load[d]);
 +#endif
 +    /* Copy the fractions for this dimension from the buffer */
 +    comm->cell_f0[d] = cell_f_row[dd->ci[dim]  ];
 +    comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
 +    /* The whole array was communicated, so set the buffer position */
 +    pos = dd->nc[dim] + 1;
 +    for(d1=0; d1<=d; d1++)
 +    {
 +        if (d1 < d)
 +        {
 +            /* Copy the cell fractions of the lower dimensions */
 +            comm->cell_f0[d1] = cell_f_row[pos++];
 +            comm->cell_f1[d1] = cell_f_row[pos++];
 +        }
 +        relative_to_absolute_cell_bounds(dd,ddbox,d1);
 +    }
 +    /* Convert the communicated shift from float to int */
 +    comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    if (d >= 1)
 +    {
 +        comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
 +    }
 +}
 +
 +static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
 +                                         gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                         gmx_bool bUniform,gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int d,dim,d1;
 +    gmx_bool bRowMember,bRowRoot;
 +    real *cell_f_row;
 +    
 +    comm = dd->comm;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        bRowMember = TRUE;
 +        bRowRoot = TRUE;
 +        for(d1=d; d1<dd->ndim; d1++)
 +        {
 +            if (dd->ci[dd->dim[d1]] > 0)
 +            {
 +                if (d1 > d)
 +                {
 +                    bRowMember = FALSE;
 +                }
 +                bRowRoot = FALSE;
 +            }
 +        }
 +        if (bRowMember)
 +        {
 +            if (bRowRoot)
 +            {
 +                set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
 +                                           ddbox,bDynamicBox,bUniform,step);
 +                cell_f_row = comm->root[d]->cell_f;
 +            }
 +            else
 +            {
 +                cell_f_row = comm->cell_f_row;
 +            }
 +            distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
 +        }
 +    }
 +}    
 +
 +static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int d;
 +
 +    /* This function assumes the box is static and should therefore
 +     * not be called when the box has changed since the last
 +     * call to dd_partition_system.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        relative_to_absolute_cell_bounds(dd,ddbox,d); 
 +    }
 +}
 +
 +
 +
 +static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                                  gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                                  gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim;
 +
 +    comm = dd->comm;
 +    
 +    if (bDoDLB)
 +    {
 +        wallcycle_start(wcycle,ewcDDCOMMBOUND);
 +        set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
 +        wallcycle_stop(wcycle,ewcDDCOMMBOUND);
 +    }
 +    else if (bDynamicBox)
 +    {
 +        set_dd_cell_sizes_dlb_nochange(dd,ddbox);
 +    }
 +    
 +    /* Set the dimensions for which no DD is used */
 +    for(dim=0; dim<DIM; dim++) {
 +        if (dd->nc[dim] == 1) {
 +            comm->cell_x0[dim] = 0;
 +            comm->cell_x1[dim] = ddbox->box_size[dim];
 +            if (dim >= ddbox->nboundeddim)
 +            {
 +                comm->cell_x0[dim] += ddbox->box0[dim];
 +                comm->cell_x1[dim] += ddbox->box0[dim];
 +            }
 +        }
 +    }
 +}
 +
 +static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
 +{
 +    int d,np,i;
 +    gmx_domdec_comm_dim_t *cd;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        cd = &dd->comm->cd[d];
 +        np = npulse[dd->dim[d]];
 +        if (np > cd->np_nalloc)
 +        {
 +            if (debug)
 +            {
 +                fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
 +                        dim2char(dd->dim[d]),np);
 +            }
 +            if (DDMASTER(dd) && cd->np_nalloc > 0)
 +            {
 +                fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
 +            }
 +            srenew(cd->ind,np);
 +            for(i=cd->np_nalloc; i<np; i++)
 +            {
 +                cd->ind[i].index  = NULL;
 +                cd->ind[i].nalloc = 0;
 +            }
 +            cd->np_nalloc = np;
 +        }
 +        cd->np = np;
 +    }
 +}
 +
 +
 +static void set_dd_cell_sizes(gmx_domdec_t *dd,
 +                              gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
 +                              gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
 +                              gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec npulse;
 +    
 +    comm = dd->comm;
 +
 +    /* Copy the old cell boundaries for the cg displacement check */
 +    copy_rvec(comm->cell_x0,comm->old_cell_x0);
 +    copy_rvec(comm->cell_x1,comm->old_cell_x1);
 +    
 +    if (comm->bDynLoadBal)
 +    {
 +        if (DDMASTER(dd))
 +        {
 +            check_box_size(dd,ddbox);
 +        }
 +        set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
 +        realloc_comm_ind(dd,npulse);
 +    }
 +    
 +    if (debug)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
 +                    d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
 +        }
 +    }
 +}
 +
 +static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
 +                                  gmx_ddbox_t *ddbox,
 +                                  rvec cell_ns_x0,rvec cell_ns_x1,
 +                                  gmx_large_int_t step)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int dim_ind,dim;
 +    
 +    comm = dd->comm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        
 +        /* Without PBC we don't have restrictions on the outer cells */
 +        if (!(dim >= ddbox->npbcdim && 
 +              (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
 +            comm->bDynLoadBal &&
 +            (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
 +            comm->cellsize_min[dim])
 +        {
 +            char buf[22];
 +            gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
 +                      gmx_step_str(step,buf),dim2char(dim),
 +                      comm->cell_x1[dim] - comm->cell_x0[dim],
 +                      ddbox->skew_fac[dim],
 +                      dd->comm->cellsize_min[dim],
 +                      dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +    }
 +    
 +    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
 +    {
 +        /* Communicate the boundaries and update cell_ns_x0/1 */
 +        dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
 +        if (dd->bGridJump && dd->ndim > 1)
 +        {
 +            check_grid_jump(step,dd,ddbox);
 +        }
 +    }
 +}
 +
 +static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
 +{
 +    if (YY < npbcdim)
 +    {
 +        tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
 +    }
 +    else
 +    {
 +        tcm[YY][XX] = 0;
 +    }
 +    if (ZZ < npbcdim)
 +    {
 +        tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
 +        tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
 +    }
 +    else
 +    {
 +        tcm[ZZ][XX] = 0;
 +        tcm[ZZ][YY] = 0;
 +    }
 +}
 +
 +static void check_screw_box(matrix box)
 +{
 +    /* Mathematical limitation */
 +    if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
 +    {
 +        gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
 +    }
 +    
 +    /* Limitation due to the asymmetry of the eighth shell method */
 +    if (box[ZZ][YY] != 0)
 +    {
 +        gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
 +    }
 +}
 +
 +static void distribute_cg(FILE *fplog,gmx_large_int_t step,
 +                          matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
 +                          gmx_domdec_t *dd)
 +{
 +    gmx_domdec_master_t *ma;
 +    int **tmp_ind=NULL,*tmp_nalloc=NULL;
 +    int  i,icg,j,k,k0,k1,d,npbcdim;
 +    matrix tcm;
 +    rvec box_size,cg_cm;
 +    ivec ind;
 +    real nrcg,inv_ncg,pos_d;
 +    atom_id *cgindex;
 +    gmx_bool bUnbounded,bScrew;
 +
 +    ma = dd->ma;
 +    
 +    if (tmp_ind == NULL)
 +    {
 +        snew(tmp_nalloc,dd->nnodes);
 +        snew(tmp_ind,dd->nnodes);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
 +            snew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +    }
 +    
 +    /* Clear the count */
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->ncg[i] = 0;
 +        ma->nat[i] = 0;
 +    }
 +    
 +    make_tric_corr_matrix(dd->npbcdim,box,tcm);
 +    
 +    cgindex = cgs->index;
 +    
 +    /* Compute the center of geometry for all charge groups */
 +    for(icg=0; icg<cgs->nr; icg++)
 +    {
 +        k0      = cgindex[icg];
 +        k1      = cgindex[icg+1];
 +        nrcg    = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(pos[k0],cg_cm);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cg_cm);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cg_cm,pos[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cg_cm[d] *= inv_ncg;
 +            }
 +        }
 +        /* Put the charge group in the box and determine the cell index */
 +        for(d=DIM-1; d>=0; d--) {
 +            pos_d = cg_cm[d];
 +            if (d < dd->npbcdim)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                if (tric_dir[d] && dd->nc[d] > 1)
 +                {
 +                    /* Use triclinic coordintates for this dimension */
 +                    for(j=d+1; j<DIM; j++)
 +                    {
 +                        pos_d += cg_cm[j]*tcm[j][d];
 +                    }
 +                }
 +                while(pos_d >= box[d][d])
 +                {
 +                    pos_d -= box[d][d];
 +                    rvec_dec(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(pos[k],box[d]);
 +                        if (bScrew)
 +                        {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +                while(pos_d < 0)
 +                {
 +                    pos_d += box[d][d];
 +                    rvec_inc(cg_cm,box[d]);
 +                    if (bScrew)
 +                    {
 +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
 +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
 +                    }
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(pos[k],box[d]);
 +                        if (bScrew) {
 +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
 +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
 +                        }
 +                    }
 +                }
 +            }
 +            /* This could be done more efficiently */
 +            ind[d] = 0;
 +            while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
 +            {
 +                ind[d]++;
 +            }
 +        }
 +        i = dd_index(dd->nc,ind);
 +        if (ma->ncg[i] == tmp_nalloc[i])
 +        {
 +            tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
 +            srenew(tmp_ind[i],tmp_nalloc[i]);
 +        }
 +        tmp_ind[i][ma->ncg[i]] = icg;
 +        ma->ncg[i]++;
 +        ma->nat[i] += cgindex[icg+1] - cgindex[icg];
 +    }
 +    
 +    k1 = 0;
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        ma->index[i] = k1;
 +        for(k=0; k<ma->ncg[i]; k++)
 +        {
 +            ma->cg[k1++] = tmp_ind[i][k];
 +        }
 +    }
 +    ma->index[dd->nnodes] = k1;
 +    
 +    for(i=0; i<dd->nnodes; i++)
 +    {
 +        sfree(tmp_ind[i]);
 +    }
 +    sfree(tmp_ind);
 +    sfree(tmp_nalloc);
 +    
 +    if (fplog)
 +    {
 +        char buf[22];
 +        fprintf(fplog,"Charge group distribution at step %s:",
 +                gmx_step_str(step,buf));
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            fprintf(fplog," %d",ma->ncg[i]);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +}
 +
 +static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
 +                                t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
 +                                rvec pos[])
 +{
 +    gmx_domdec_master_t *ma=NULL;
 +    ivec npulse;
 +    int  i,cg_gl;
 +    int  *ibuf,buf2[2] = { 0, 0 };
 +    
 +    if (DDMASTER(dd))
 +    {
 +        ma = dd->ma;
 +        
 +        if (dd->bScrewPBC)
 +        {
 +            check_screw_box(box);
 +        }
 +    
 +        set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
 +    
 +        distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[2*i]   = ma->ncg[i];
 +            ma->ibuf[2*i+1] = ma->nat[i];
 +        }
 +        ibuf = ma->ibuf;
 +    }
 +    else
 +    {
 +        ibuf = NULL;
 +    }
 +    dd_scatter(dd,2*sizeof(int),ibuf,buf2);
 +    
 +    dd->ncg_home = buf2[0];
 +    dd->nat_home = buf2[1];
 +    dd->ncg_tot  = dd->ncg_home;
 +    dd->nat_tot  = dd->nat_home;
 +    if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
 +    {
 +        dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(dd->index_gl,dd->cg_nalloc);
 +        srenew(dd->cgindex,dd->cg_nalloc+1);
 +    }
 +    if (DDMASTER(dd))
 +    {
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
 +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
 +        }
 +    }
 +    
 +    dd_scatterv(dd,
 +                DDMASTER(dd) ? ma->ibuf : NULL,
 +                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
 +                DDMASTER(dd) ? ma->cg : NULL,
 +                dd->ncg_home*sizeof(int),dd->index_gl);
 +    
 +    /* Determine the home charge group sizes */
 +    dd->cgindex[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cg_gl = dd->index_gl[i];
 +        dd->cgindex[i+1] =
 +            dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
 +    }
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Home charge groups:\n");
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            fprintf(debug," %d",dd->index_gl[i]);
 +            if (i % 10 == 9) 
 +                fprintf(debug,"\n");
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static int compact_and_copy_vec_at(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,int vec,
 +                                   rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                for(i=i0; i<i1; i++)
 +                {
 +                    copy_rvec(src[i],src[home_pos++]);
 +                }
 +            }
 +        }
 +        else
 +        {
 +            /* Copy to the communication buffer */
 +            nrcg = i1 - i0;
 +            pos_vec[m] += 1 + vec*nrcg;
 +            for(i=i0; i<i1; i++)
 +            {
 +                copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
 +            }
 +            pos_vec[m] += (nvec - vec - 1)*nrcg;
 +        }
 +        if (!bCompact)
 +        {
 +            home_pos += i1 - i0;
 +        }
 +        i0 = i1;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_and_copy_vec_cg(int ncg,int *move,
 +                                   int *cgindex,
 +                                   int nvec,rvec *src,gmx_domdec_comm_t *comm,
 +                                   gmx_bool bCompact)
 +{
 +    int m,icg,i0,i1,nrcg;
 +    int home_pos;
 +    int pos_vec[DIM*2];
 +    
 +    home_pos = 0;
 +    
 +    for(m=0; m<DIM*2; m++)
 +    {
 +        pos_vec[m] = 0;
 +    }
 +    
 +    i0 = 0;
 +    for(icg=0; icg<ncg; icg++)
 +    {
 +        i1 = cgindex[icg+1];
 +        m = move[icg];
 +        if (m == -1)
 +        {
 +            if (bCompact)
 +            {
 +                /* Compact the home array in place */
 +                copy_rvec(src[icg],src[home_pos++]);
 +            }
 +        }
 +        else
 +        {
 +            nrcg = i1 - i0;
 +            /* Copy to the communication buffer */
 +            copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
 +            pos_vec[m] += 1 + nrcg*nvec;
 +        }
 +        i0 = i1;
 +    }
 +    if (!bCompact)
 +    {
 +        home_pos = ncg;
 +    }
 +    
 +    return home_pos;
 +}
 +
 +static int compact_ind(int ncg,int *move,
 +                       int *index_gl,int *cgindex,
 +                       int *gatindex,
 +                       gmx_ga2la_t ga2la,char *bLocalCG,
 +                       int *cginfo)
 +{
 +    int cg,nat,a0,a1,a,a_gl;
 +    int home_pos;
 +
 +    home_pos = 0;
 +    nat = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        a0 = cgindex[cg];
 +        a1 = cgindex[cg+1];
 +        if (move[cg] == -1)
 +        {
 +            /* Compact the home arrays in place.
 +             * Anything that can be done here avoids access to global arrays.
 +             */
 +            cgindex[home_pos] = nat;
 +            for(a=a0; a<a1; a++)
 +            {
 +                a_gl = gatindex[a];
 +                gatindex[nat] = a_gl;
 +                /* The cell number stays 0, so we don't need to set it */
 +                ga2la_change_la(ga2la,a_gl,nat);
 +                nat++;
 +            }
 +            index_gl[home_pos] = index_gl[cg];
 +            cginfo[home_pos]   = cginfo[cg];
 +            /* The charge group remains local, so bLocalCG does not change */
 +            home_pos++;
 +        }
 +        else
 +        {
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +        }
 +    }
 +    cgindex[home_pos] = nat;
 +    
 +    return home_pos;
 +}
 +
 +static void clear_and_mark_ind(int ncg,int *move,
 +                               int *index_gl,int *cgindex,int *gatindex,
 +                               gmx_ga2la_t ga2la,char *bLocalCG,
 +                               int *cell_index)
 +{
 +    int cg,a0,a1,a;
 +    
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        if (move[cg] >= 0)
 +        {
 +            a0 = cgindex[cg];
 +            a1 = cgindex[cg+1];
 +            /* Clear the global indices */
 +            for(a=a0; a<a1; a++)
 +            {
 +                ga2la_del(ga2la,gatindex[a]);
 +            }
 +            if (bLocalCG)
 +            {
 +                bLocalCG[index_gl[cg]] = FALSE;
 +            }
 +            /* Signal that this cg has moved using the ns cell index.
 +             * Here we set it to -1.
 +             * fill_grid will change it from -1 to 4*grid->ncells.
 +             */
 +            cell_index[cg] = -1;
 +        }
 +    }
 +}
 +
 +static void print_cg_move(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    gmx_domdec_comm_t *comm;
 +    char buf[22];
 +
 +    comm = dd->comm;
 +
 +    fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
 +    }
 +    else
 +    {
 +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
 +                ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
 +    }
 +    fprintf(fplog,"distance out of cell %f\n",
 +            dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
 +    if (bHaveLimitdAndCMOld)
 +    {
 +        fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
 +                cm_old[XX],cm_old[YY],cm_old[ZZ]);
 +    }
 +    fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
 +            cm_new[XX],cm_new[YY],cm_new[ZZ]);
 +    fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
 +    fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
 +            dim2char(dim),
 +            comm->cell_x0[dim],comm->cell_x1[dim]);
 +}
 +
 +static void cg_move_error(FILE *fplog,
 +                          gmx_domdec_t *dd,
 +                          gmx_large_int_t step,int cg,int dim,int dir,
 +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
 +                          rvec cm_old,rvec cm_new,real pos_d)
 +{
 +    if (fplog)
 +    {
 +        print_cg_move(fplog, dd,step,cg,dim,dir,
 +                      bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    }
 +    print_cg_move(stderr,dd,step,cg,dim,dir,
 +                  bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
 +    gmx_fatal(FARGS,
 +              "A charge group moved too far between two domain decomposition steps\n"
 +              "This usually means that your system is not well equilibrated");
 +}
 +
 +static void rotate_state_atom(t_state *state,int a)
 +{
 +    int est;
 +
 +    for(est=0; est<estNR; est++)
 +    {
 +        if (EST_DISTR(est) && (state->flags & (1<<est))) {
 +            switch (est) {
 +            case estX:
 +                /* Rotate the complete state; for a rectangular box only */
 +                state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
 +                state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
 +                break;
 +            case estV:
 +                state->v[a][YY] = -state->v[a][YY];
 +                state->v[a][ZZ] = -state->v[a][ZZ];
 +                break;
 +            case estSDX:
 +                state->sd_X[a][YY] = -state->sd_X[a][YY];
 +                state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
 +                break;
 +            case estCGP:
 +                state->cg_p[a][YY] = -state->cg_p[a][YY];
 +                state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
 +                break;
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* These are distances, so not affected by rotation */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in rotate_state_atom");            
 +            }
 +        }
 +    }
 +}
 +
 +static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
 +                              gmx_domdec_t *dd,ivec tric_dir,
 +                              t_state *state,rvec **f,
 +                              t_forcerec *fr,t_mdatoms *md,
 +                              gmx_bool bCompact,
 +                              t_nrnb *nrnb)
 +{
 +    int  *move;
 +    int  npbcdim;
 +    int  ncg[DIM*2],nat[DIM*2];
 +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
 +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
 +    int  sbuf[2],rbuf[2];
 +    int  home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
 +    int  flag;
 +    gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
 +    gmx_bool bScrew;
 +    ivec dev;
 +    real inv_ncg,pos_d;
 +    matrix tcm;
 +    rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
 +    atom_id *cgindex;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_domdec_comm_t *comm;
 +    
 +    if (dd->bScrewPBC)
 +    {
 +        check_screw_box(state->box);
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +    
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i))
 +        {
 +            switch (i)
 +            {
 +            case estX:   /* Always present */            break;
 +            case estV:   bV   = (state->flags & (1<<i)); break;
 +            case estSDX: bSDX = (state->flags & (1<<i)); break;
 +            case estCGP: bCGP = (state->flags & (1<<i)); break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No processing required */
 +                break;
 +            default:
 +            gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
 +            }
 +        }
 +    }
 +    
 +    if (dd->ncg_tot > comm->nalloc_int)
 +    {
 +        comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
 +        srenew(comm->buf_int,comm->nalloc_int);
 +    }
 +    move = comm->buf_int;
 +    
 +    /* Clear the count */
 +    for(c=0; c<dd->ndim*2; c++)
 +    {
 +        ncg[c] = 0;
 +        nat[c] = 0;
 +    }
 +
 +    npbcdim = dd->npbcdim;
 +
 +    for(d=0; (d<DIM); d++)
 +    {
 +        limitd[d] = dd->comm->cellsize_min[d];
 +        if (d >= npbcdim && dd->ci[d] == 0)
 +        {
 +            cell_x0[d] = -GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x0[d] = comm->cell_x0[d];
 +        }
 +        if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
 +        {
 +            cell_x1[d] = GMX_FLOAT_MAX;
 +        }
 +        else
 +        {
 +            cell_x1[d] = comm->cell_x1[d];
 +        }
 +        if (d < npbcdim)
 +        {
 +            limit0[d] = comm->old_cell_x0[d] - limitd[d];
 +            limit1[d] = comm->old_cell_x1[d] + limitd[d];
 +        }
 +        else
 +        {
 +            /* We check after communication if a charge group moved
 +             * more than one cell. Set the pre-comm check limit to float_max.
 +             */
 +            limit0[d] = -GMX_FLOAT_MAX;
 +            limit1[d] =  GMX_FLOAT_MAX;
 +        }
 +    }
 +    
 +    make_tric_corr_matrix(npbcdim,state->box,tcm);
 +    
 +    cgindex = dd->cgindex;
 +    
 +    /* Compute the center of geometry for all home charge groups
 +     * and put them in the box and determine where they should go.
 +     */
 +    for(cg=0; cg<dd->ncg_home; cg++)
 +    {
 +        k0   = cgindex[cg];
 +        k1   = cgindex[cg+1];
 +        nrcg = k1 - k0;
 +        if (nrcg == 1)
 +        {
 +            copy_rvec(state->x[k0],cm_new);
 +        }
 +        else
 +        {
 +            inv_ncg = 1.0/nrcg;
 +            
 +            clear_rvec(cm_new);
 +            for(k=k0; (k<k1); k++)
 +            {
 +                rvec_inc(cm_new,state->x[k]);
 +            }
 +            for(d=0; (d<DIM); d++)
 +            {
 +                cm_new[d] = inv_ncg*cm_new[d];
 +            }
 +        }
 +        
 +        clear_ivec(dev);
 +        /* Do pbc and check DD cell boundary crossings */
 +        for(d=DIM-1; d>=0; d--)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                bScrew = (dd->bScrewPBC && d == XX);
 +                /* Determine the location of this cg in lattice coordinates */
 +                pos_d = cm_new[d];
 +                if (tric_dir[d])
 +                {
 +                    for(d2=d+1; d2<DIM; d2++)
 +                    {
 +                        pos_d += cm_new[d2]*tcm[d2][d];
 +                    }
 +                }
 +                /* Put the charge group in the triclinic unit-cell */
 +                if (pos_d >= cell_x1[d])
 +                {
 +                    if (pos_d >= limit1[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = 1;
 +                    if (dd->ci[d] == dd->nc[d] - 1)
 +                    {
 +                        rvec_dec(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_dec(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +                else if (pos_d < cell_x0[d])
 +                {
 +                    if (pos_d < limit0[d])
 +                    {
 +                        cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
 +                                      cg_cm[cg],cm_new,pos_d);
 +                    }
 +                    dev[d] = -1;
 +                    if (dd->ci[d] == 0)
 +                    {
 +                        rvec_inc(cm_new,state->box[d]);
 +                        if (bScrew)
 +                        {
 +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 +                        }
 +                        for(k=k0; (k<k1); k++)
 +                        {
 +                            rvec_inc(state->x[k],state->box[d]);
 +                            if (bScrew)
 +                            {
 +                                rotate_state_atom(state,k);
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            else if (d < npbcdim)
 +            {
 +                /* Put the charge group in the rectangular unit-cell */
 +                while (cm_new[d] >= state->box[d][d])
 +                {
 +                    rvec_dec(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_dec(state->x[k],state->box[d]);
 +                    }
 +                }
 +                while (cm_new[d] < 0)
 +                {
 +                    rvec_inc(cm_new,state->box[d]);
 +                    for(k=k0; (k<k1); k++)
 +                    {
 +                        rvec_inc(state->x[k],state->box[d]);
 +                    }
 +                }
 +            }
 +        }
 +    
 +        copy_rvec(cm_new,cg_cm[cg]);
 +        
 +        /* Determine where this cg should go */
 +        flag = 0;
 +        mc = -1;
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            dim = dd->dim[d];
 +            if (dev[dim] == 1)
 +            {
 +                flag |= DD_FLAG_FW(d);
 +                if (mc == -1)
 +                {
 +                    mc = d*2;
 +                }
 +            }
 +            else if (dev[dim] == -1)
 +            {
 +                flag |= DD_FLAG_BW(d);
 +                if (mc == -1) {
 +                    if (dd->nc[dim] > 2)
 +                    {
 +                        mc = d*2 + 1;
 +                    }
 +                    else
 +                    {
 +                        mc = d*2;
 +                    }
 +                }
 +            }
 +        }
 +        move[cg] = mc;
 +        if (mc >= 0)
 +        {
 +            if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +            {
 +                comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +            }
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
 +            /* We store the cg size in the lower 16 bits
 +             * and the place where the charge group should go
 +             * in the next 6 bits. This saves some communication volume.
 +             */
 +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
 +            ncg[mc] += 1;
 +            nat[mc] += nrcg;
 +        }
 +    }
 +    
 +    inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +    inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
 +    
 +    nvec = 1;
 +    if (bV)
 +    {
 +        nvec++;
 +    }
 +    if (bSDX)
 +    {
 +        nvec++;
 +    }
 +    if (bCGP)
 +    {
 +        nvec++;
 +    }
 +    
 +    /* Make sure the communication buffers are large enough */
 +    for(mc=0; mc<dd->ndim*2; mc++)
 +    {
 +        nvr = ncg[mc] + nat[mc]*nvec;
 +        if (nvr > comm->cgcm_state_nalloc[mc])
 +        {
 +            comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
 +            srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +        }
 +    }
 +    
 +    /* Recalculating cg_cm might be cheaper than communicating,
 +     * but that could give rise to rounding issues.
 +     */
 +    home_pos_cg =
 +        compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
 +                                nvec,cg_cm,comm,bCompact);
 +    
 +    vec = 0;
 +    home_pos_at =
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->x,comm,bCompact);
 +    if (bV)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->v,comm,bCompact);
 +    }
 +    if (bSDX)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->sd_X,comm,bCompact);
 +    }
 +    if (bCGP)
 +    {
 +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
 +                                nvec,vec++,state->cg_p,comm,bCompact);
 +    }
 +    
 +    if (bCompact)
 +    {
 +        compact_ind(dd->ncg_home,move,
 +                    dd->index_gl,dd->cgindex,dd->gatindex,
 +                    dd->ga2la,comm->bLocalCG,
 +                    fr->cginfo);
 +    }
 +    else
 +    {
 +        clear_and_mark_ind(dd->ncg_home,move,
 +                           dd->index_gl,dd->cgindex,dd->gatindex,
 +                           dd->ga2la,comm->bLocalCG,
 +                           fr->ns.grid->cell_index);
 +    }
 +    
 +    cginfo_mb = fr->cginfo_mb;
 +
 +    ncg_stay_home = home_pos_cg;
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        ncg_recv = 0;
 +        nat_recv = 0;
 +        nvr      = 0;
 +        for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
 +        {
 +            cdd = d*2 + dir;
 +            /* Communicate the cg and atom counts */
 +            sbuf[0] = ncg[cdd];
 +            sbuf[1] = nat[cdd];
 +            if (debug)
 +            {
 +                fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
 +                        d,dir,sbuf[0],sbuf[1]);
 +            }
 +            dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
 +            
 +            if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
 +            {
 +                comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
 +                srenew(comm->buf_int,comm->nalloc_int);
 +            }
 +            
 +            /* Communicate the charge group indices, sizes and flags */
 +            dd_sendrecv_int(dd, d, dir,
 +                            comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
 +                            comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
 +            
 +            nvs = ncg[cdd] + nat[cdd]*nvec;
 +            i   = rbuf[0]  + rbuf[1] *nvec;
 +            vec_rvec_check_alloc(&comm->vbuf,nvr+i);
 +            
 +            /* Communicate cgcm and state */
 +            dd_sendrecv_rvec(dd, d, dir,
 +                             comm->cgcm_state[cdd], nvs,
 +                             comm->vbuf.v+nvr, i);
 +            ncg_recv += rbuf[0];
 +            nat_recv += rbuf[1];
 +            nvr      += i;
 +        }
 +        
 +        /* Process the received charge groups */
 +        buf_pos = 0;
 +        for(cg=0; cg<ncg_recv; cg++)
 +        {
 +            flag = comm->buf_int[cg*DD_CGIBS+1];
 +
 +            if (dim >= npbcdim && dd->nc[dim] > 2)
 +            {
 +                /* No pbc in this dim and more than one domain boundary.
 +                 * We to a separate check if a charge did not move too far.
 +                 */
 +                if (((flag & DD_FLAG_FW(d)) &&
 +                     comm->vbuf.v[buf_pos][d] > cell_x1[dim]) ||
 +                    ((flag & DD_FLAG_BW(d)) &&
 +                     comm->vbuf.v[buf_pos][d] < cell_x0[dim]))
 +                {
 +                    cg_move_error(fplog,dd,step,cg,d,
 +                                  (flag & DD_FLAG_FW(d)) ? 1 : 0,
 +                                   FALSE,0,
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos],
 +                                   comm->vbuf.v[buf_pos][d]);
 +                }
 +            }
 +
 +            mc = -1;
 +            if (d < dd->ndim-1)
 +            {
 +                /* Check which direction this cg should go */
 +                for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
 +                {
 +                    if (dd->bGridJump)
 +                    {
 +                        /* The cell boundaries for dimension d2 are not equal
 +                         * for each cell row of the lower dimension(s),
 +                         * therefore we might need to redetermine where
 +                         * this cg should go.
 +                         */
 +                        dim2 = dd->dim[d2];
 +                        /* If this cg crosses the box boundary in dimension d2
 +                         * we can use the communicated flag, so we do not
 +                         * have to worry about pbc.
 +                         */
 +                        if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
 +                               (flag & DD_FLAG_FW(d2))) ||
 +                              (dd->ci[dim2] == 0 &&
 +                               (flag & DD_FLAG_BW(d2)))))
 +                        {
 +                            /* Clear the two flags for this dimension */
 +                            flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
 +                            /* Determine the location of this cg
 +                             * in lattice coordinates
 +                             */
 +                            pos_d = comm->vbuf.v[buf_pos][dim2];
 +                            if (tric_dir[dim2])
 +                            {
 +                                for(d3=dim2+1; d3<DIM; d3++)
 +                                {
 +                                    pos_d +=
 +                                        comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
 +                                }
 +                            }
 +                            /* Check of we are not at the box edge.
 +                             * pbc is only handled in the first step above,
 +                             * but this check could move over pbc while
 +                             * the first step did not due to different rounding.
 +                             */
 +                            if (pos_d >= cell_x1[dim2] &&
 +                                dd->ci[dim2] != dd->nc[dim2]-1)
 +                            {
 +                                flag |= DD_FLAG_FW(d2);
 +                            }
 +                            else if (pos_d < cell_x0[dim2] &&
 +                                     dd->ci[dim2] != 0)
 +                            {
 +                                flag |= DD_FLAG_BW(d2);
 +                            }
 +                            comm->buf_int[cg*DD_CGIBS+1] = flag;
 +                        }
 +                    }
 +                    /* Set to which neighboring cell this cg should go */
 +                    if (flag & DD_FLAG_FW(d2))
 +                    {
 +                        mc = d2*2;
 +                    }
 +                    else if (flag & DD_FLAG_BW(d2))
 +                    {
 +                        if (dd->nc[dd->dim[d2]] > 2)
 +                        {
 +                            mc = d2*2+1;
 +                        }
 +                        else
 +                        {
 +                            mc = d2*2;
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            nrcg = flag & DD_FLAG_NRCG;
 +            if (mc == -1)
 +            {
 +                if (home_pos_cg+1 > dd->cg_nalloc)
 +                {
 +                    dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
 +                    srenew(dd->index_gl,dd->cg_nalloc);
 +                    srenew(dd->cgindex,dd->cg_nalloc+1);
 +                }
 +                /* Set the global charge group index and size */
 +                dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
 +                dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
 +                /* Copy the state from the buffer */
 +                if (home_pos_cg >= fr->cg_nalloc)
 +                {
 +                    dd_realloc_fr_cg(fr,home_pos_cg+1);
 +                    cg_cm = fr->cg_cm;
 +                }
 +                copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
 +                /* Set the cginfo */
 +                fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
 +                                                   dd->index_gl[home_pos_cg]);
 +                if (comm->bLocalCG)
 +                {
 +                    comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
 +                }
 +
 +                if (home_pos_at+nrcg > state->nalloc)
 +                {
 +                    dd_realloc_state(state,f,home_pos_at+nrcg);
 +                }
 +                for(i=0; i<nrcg; i++)
 +                {
 +                    copy_rvec(comm->vbuf.v[buf_pos++],
 +                              state->x[home_pos_at+i]);
 +                }
 +                if (bV)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->v[home_pos_at+i]);
 +                    }
 +                }
 +                if (bSDX)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->sd_X[home_pos_at+i]);
 +                    }
 +                }
 +                if (bCGP)
 +                {
 +                    for(i=0; i<nrcg; i++)
 +                    {
 +                        copy_rvec(comm->vbuf.v[buf_pos++],
 +                                  state->cg_p[home_pos_at+i]);
 +                    }
 +                }
 +                home_pos_cg += 1;
 +                home_pos_at += nrcg;
 +            }
 +            else
 +            {
 +                /* Reallocate the buffers if necessary  */
 +                if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
 +                {
 +                    comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
 +                    srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
 +                }
 +                nvr = ncg[mc] + nat[mc]*nvec;
 +                if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
 +                {
 +                    comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
 +                    srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
 +                }
 +                /* Copy from the receive to the send buffers */
 +                memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
 +                       comm->buf_int + cg*DD_CGIBS,
 +                       DD_CGIBS*sizeof(int));
 +                memcpy(comm->cgcm_state[mc][nvr],
 +                       comm->vbuf.v[buf_pos],
 +                       (1+nrcg*nvec)*sizeof(rvec));
 +                buf_pos += 1 + nrcg*nvec;
 +                ncg[mc] += 1;
 +                nat[mc] += nrcg;
 +            }
 +        }
 +    }
 +    
 +    /* With sorting (!bCompact) the indices are now only partially up to date
 +     * and ncg_home and nat_home are not the real count, since there are
 +     * "holes" in the arrays for the charge groups that moved to neighbors.
 +     */
 +    dd->ncg_home = home_pos_cg;
 +    dd->nat_home = home_pos_at;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished repartitioning\n");
 +    }
 +
 +    return ncg_stay_home;
 +}
 +
 +void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
 +{
 +    dd->comm->cycl[ddCycl] += cycles;
 +    dd->comm->cycl_n[ddCycl]++;
 +    if (cycles > dd->comm->cycl_max[ddCycl])
 +    {
 +        dd->comm->cycl_max[ddCycl] = cycles;
 +    }
 +}
 +
 +static double force_flop_count(t_nrnb *nrnb)
 +{
 +    int i;
 +    double sum;
 +    const char *name;
 +
 +    sum = 0;
 +    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
 +    {
 +        /* To get closer to the real timings, we half the count
 +         * for the normal loops and again half it for water loops.
 +         */
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        {
 +            sum += nrnb->n[i]*0.25*cost_nrnb(i);
 +        }
 +        else
 +        {
 +            sum += nrnb->n[i]*0.50*cost_nrnb(i);
 +        }
 +    }
 +    for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
 +    {
 +        name = nrnb_str(i);
 +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +    for(i=eNR_BONDS; i<=eNR_WALLS; i++)
 +    {
 +        sum += nrnb->n[i]*cost_nrnb(i);
 +    }
 +
 +    return sum;
 +}
 +
 +void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop -= force_flop_count(nrnb);
 +    }
 +}
 +void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
 +{
 +    if (dd->comm->eFlop)
 +    {
 +        dd->comm->flop += force_flop_count(nrnb);
 +        dd->comm->flop_n++;
 +    }
 +}  
 +
 +static void clear_dd_cycle_counts(gmx_domdec_t *dd)
 +{
 +    int i;
 +    
 +    for(i=0; i<ddCyclNr; i++)
 +    {
 +        dd->comm->cycl[i] = 0;
 +        dd->comm->cycl_n[i] = 0;
 +        dd->comm->cycl_max[i] = 0;
 +    }
 +    dd->comm->flop = 0;
 +    dd->comm->flop_n = 0;
 +}
 +
 +static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_load_t *load;
 +    gmx_domdec_root_t *root=NULL;
 +    int  d,dim,cid,i,pos;
 +    float cell_frac=0,sbuf[DD_NLOAD_MAX];
 +    gmx_bool bSepPME;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution start\n");
 +    }
 +
 +    wallcycle_start(wcycle,ewcDDCOMMLOAD);
 +    
 +    comm = dd->comm;
 +    
 +    bSepPME = (dd->pme_nodeid >= 0);
 +    
 +    for(d=dd->ndim-1; d>=0; d--)
 +    {
 +        dim = dd->dim[d];
 +        /* Check if we participate in the communication in this dimension */
 +        if (d == dd->ndim-1 || 
 +            (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
 +        {
 +            load = &comm->load[d];
 +            if (dd->bGridJump)
 +            {
 +                cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
 +            }
 +            pos = 0;
 +            if (d == dd->ndim-1)
 +            {
 +                sbuf[pos++] = dd_force_load(comm);
 +                sbuf[pos++] = sbuf[0];
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = sbuf[0];
 +                    sbuf[pos++] = cell_frac;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
 +                    sbuf[pos++] = comm->cycl[ddCyclPME];
 +                }
 +            }
 +            else
 +            {
 +                sbuf[pos++] = comm->load[d+1].sum;
 +                sbuf[pos++] = comm->load[d+1].max;
 +                if (dd->bGridJump)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].sum_m;
 +                    sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
 +                    sbuf[pos++] = comm->load[d+1].flags;
 +                    if (d > 0)
 +                    {
 +                        sbuf[pos++] = comm->cell_f_max0[d];
 +                        sbuf[pos++] = comm->cell_f_min1[d];
 +                    }
 +                }
 +                if (bSepPME)
 +                {
 +                    sbuf[pos++] = comm->load[d+1].mdf;
 +                    sbuf[pos++] = comm->load[d+1].pme;
 +                }
 +            }
 +            load->nload = pos;
 +            /* Communicate a row in DD direction d.
 +             * The communicators are setup such that the root always has rank 0.
 +             */
 +#ifdef GMX_MPI
 +            MPI_Gather(sbuf      ,load->nload*sizeof(float),MPI_BYTE,
 +                       load->load,load->nload*sizeof(float),MPI_BYTE,
 +                       0,comm->mpi_comm_load[d]);
 +#endif
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* We are the root, process this row */
 +                if (comm->bDynLoadBal)
 +                {
 +                    root = comm->root[d];
 +                }
 +                load->sum = 0;
 +                load->max = 0;
 +                load->sum_m = 0;
 +                load->cvol_min = 1;
 +                load->flags = 0;
 +                load->mdf = 0;
 +                load->pme = 0;
 +                pos = 0;
 +                for(i=0; i<dd->nc[dim]; i++)
 +                {
 +                    load->sum += load->load[pos++];
 +                    load->max = max(load->max,load->load[pos]);
 +                    pos++;
 +                    if (dd->bGridJump)
 +                    {
 +                        if (root->bLimited)
 +                        {
 +                            /* This direction could not be load balanced properly,
 +                             * therefore we need to use the maximum iso the average load.
 +                             */
 +                            load->sum_m = max(load->sum_m,load->load[pos]);
 +                        }
 +                        else
 +                        {
 +                            load->sum_m += load->load[pos];
 +                        }
 +                        pos++;
 +                        load->cvol_min = min(load->cvol_min,load->load[pos]);
 +                        pos++;
 +                        if (d < dd->ndim-1)
 +                        {
 +                            load->flags = (int)(load->load[pos++] + 0.5);
 +                        }
 +                        if (d > 0)
 +                        {
 +                            root->cell_f_max0[i] = load->load[pos++];
 +                            root->cell_f_min1[i] = load->load[pos++];
 +                        }
 +                    }
 +                    if (bSepPME)
 +                    {
 +                        load->mdf = max(load->mdf,load->load[pos]);
 +                        pos++;
 +                        load->pme = max(load->pme,load->load[pos]);
 +                        pos++;
 +                    }
 +                }
 +                if (comm->bDynLoadBal && root->bLimited)
 +                {
 +                    load->sum_m *= dd->nc[dim];
 +                    load->flags |= (1<<d);
 +                }
 +            }
 +        }
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        comm->nload      += dd_load_count(comm);
 +        comm->load_step  += comm->cycl[ddCyclStep];
 +        comm->load_sum   += comm->load[0].sum;
 +        comm->load_max   += comm->load[0].max;
 +        if (comm->bDynLoadBal)
 +        {
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                if (comm->load[0].flags & (1<<d))
 +                {
 +                    comm->load_lim[d]++;
 +                }
 +            }
 +        }
 +        if (bSepPME)
 +        {
 +            comm->load_mdf += comm->load[0].mdf;
 +            comm->load_pme += comm->load[0].pme;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcDDCOMMLOAD);
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"get_load_distribution finished\n");
 +    }
 +}
 +
 +static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
 +{
 +    /* Return the relative performance loss on the total run time
 +     * due to the force calculation load imbalance.
 +     */
 +    if (dd->comm->nload > 0)
 +    {
 +        return
 +            (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
 +            (dd->comm->load_step*dd->nnodes);
 +    }
 +    else
 +    {
 +        return 0;
 +    }
 +}
 +
 +static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    char  buf[STRLEN];
 +    int   npp,npme,nnodes,d,limp;
 +    float imbal,pme_f_ratio,lossf,lossp=0;
 +    gmx_bool  bLim;
 +    gmx_domdec_comm_t *comm;
 +
 +    comm = dd->comm;
 +    if (DDMASTER(dd) && comm->nload > 0)
 +    {
 +        npp    = dd->nnodes;
 +        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
 +        nnodes = npp + npme;
 +        imbal = comm->load_max*npp/comm->load_sum - 1;
 +        lossf = dd_force_imb_perf_loss(dd);
 +        sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"%s",buf);
 +        sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
 +        fprintf(fplog,"%s",buf);
 +        fprintf(stderr,"%s",buf);
 +        bLim = FALSE;
 +        if (comm->bDynLoadBal)
 +        {
 +            sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
 +                sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
 +                if (limp >= 50)
 +                {
 +                    bLim = TRUE;
 +                }
 +            }
 +            sprintf(buf+strlen(buf),"\n");
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        if (npme > 0)
 +        {
 +            pme_f_ratio = comm->load_pme/comm->load_mdf;
 +            lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
 +            if (lossp <= 0)
 +            {
 +                lossp *= (float)npme/(float)nnodes;
 +            }
 +            else
 +            {
 +                lossp *= (float)npp/(float)nnodes;
 +            }
 +            sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +            sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
 +            fprintf(fplog,"%s",buf);
 +            fprintf(stderr,"%s",buf);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(stderr,"\n");
 +        
 +        if (lossf >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost due to load imbalance\n"
 +                    "      in the domain decomposition.\n",lossf*100);
 +            if (!comm->bDynLoadBal)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to use dynamic load balancing (option -dlb.)\n");
 +            }
 +            else if (bLim)
 +            {
 +                sprintf(buf+strlen(buf),"      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
 +            }
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
 +        {
 +            sprintf(buf,
 +                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
 +                    "      had %s work to do than the PP nodes.\n"
 +                    "      You might want to %s the number of PME nodes\n"
 +                    "      or %s the cut-off and the grid spacing.\n",
 +                    fabs(lossp*100),
 +                    (lossp < 0) ? "less"     : "more",
 +                    (lossp < 0) ? "decrease" : "increase",
 +                    (lossp < 0) ? "decrease" : "increase");
 +            fprintf(fplog,"%s\n",buf);
 +            fprintf(stderr,"%s\n",buf);
 +        }
 +    }
 +}
 +
 +static float dd_vol_min(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].cvol_min*dd->nnodes;
 +}
 +
 +static gmx_bool dd_load_flags(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].flags;
 +}
 +
 +static float dd_f_imbal(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
 +}
 +
 +static float dd_pme_f_ratio(gmx_domdec_t *dd)
 +{
 +    return dd->comm->load[0].pme/dd->comm->load[0].mdf;
 +}
 +
 +static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
 +{
 +    int flags,d;
 +    char buf[22];
 +    
 +    flags = dd_load_flags(dd);
 +    if (flags)
 +    {
 +        fprintf(fplog,
 +                "DD  load balancing is limited by minimum cell size in dimension");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (flags & (1<<d))
 +            {
 +                fprintf(fplog," %c",dim2char(dd->dim[d]));
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    fprintf(fplog,"DD  step %s",gmx_step_str(step,buf));
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(fplog,"  vol min/aver %5.3f%c",
 +                dd_vol_min(dd),flags ? '!' : ' ');
 +    }
 +    fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(fplog,"  pme mesh/force %5.3f",dd_pme_f_ratio(dd));
 +    }
 +    fprintf(fplog,"\n\n");
 +}
 +
 +static void dd_print_load_verbose(gmx_domdec_t *dd)
 +{
 +    if (dd->comm->bDynLoadBal)
 +    {
 +        fprintf(stderr,"vol %4.2f%c ",
 +                dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
 +    }
 +    fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
 +    if (dd->comm->cycl_n[ddCyclPME])
 +    {
 +        fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
 +                                   int dim_ind,ivec loc)
 +{
 +    MPI_Group g_row = MPI_GROUP_EMPTY;
 +    MPI_Comm  c_row;
 +    int  dim,i,*rank;
 +    ivec loc_c;
 +    gmx_domdec_root_t *root;
 +    gmx_bool bPartOfGroup = FALSE;
 +    
 +    dim = dd->dim[dim_ind];
 +    copy_ivec(loc,loc_c);
 +    snew(rank,dd->nc[dim]);
 +    for(i=0; i<dd->nc[dim]; i++)
 +    {
 +        loc_c[dim] = i;
 +        rank[i] = dd_index(dd->nc,loc_c);
 +        if (rank[i] == dd->rank)
 +        {
 +            /* This process is part of the group */
 +            bPartOfGroup = TRUE;
 +        }
 +    }
 +    if (bPartOfGroup)
 +    {
 +        MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
 +    }
 +    MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
 +    if (bPartOfGroup)
 +    {
 +        dd->comm->mpi_comm_load[dim_ind] = c_row;
 +        if (dd->comm->eDLB != edlbNO)
 +        {
 +            if (dd->ci[dim] == dd->master_ci[dim])
 +            {
 +                /* This is the root process of this row */
 +                snew(dd->comm->root[dim_ind],1);
 +                root = dd->comm->root[dim_ind];
 +                snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
 +                snew(root->old_cell_f,dd->nc[dim]+1);
 +                snew(root->bCellMin,dd->nc[dim]);
 +                if (dim_ind > 0)
 +                {
 +                    snew(root->cell_f_max0,dd->nc[dim]);
 +                    snew(root->cell_f_min1,dd->nc[dim]);
 +                    snew(root->bound_min,dd->nc[dim]);
 +                    snew(root->bound_max,dd->nc[dim]);
 +                }
 +                snew(root->buf_ncd,dd->nc[dim]);
 +            }
 +            else
 +            {
 +                /* This is not a root process, we only need to receive cell_f */
 +                snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
 +            }
 +        }
 +        if (dd->ci[dim] == dd->master_ci[dim])
 +        {
 +            snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
 +        }
 +    }
 +    sfree(rank);
 +}
 +#endif
 +
 +static void make_load_communicators(gmx_domdec_t *dd)
 +{
 +#ifdef GMX_MPI
 +  MPI_Group g_all;
 +  int  dim0,dim1,i,j;
 +  ivec loc;
 +
 +  if (debug)
 +    fprintf(debug,"Making load communicators\n");
 +
 +  MPI_Comm_group(dd->mpi_comm_all,&g_all);
 +  
 +  snew(dd->comm->load,dd->ndim);
 +  snew(dd->comm->mpi_comm_load,dd->ndim);
 +  
 +  clear_ivec(loc);
 +  make_load_communicator(dd,g_all,0,loc);
 +  if (dd->ndim > 1) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      make_load_communicator(dd,g_all,1,loc);
 +    }
 +  }
 +  if (dd->ndim > 2) {
 +    dim0 = dd->dim[0];
 +    for(i=0; i<dd->nc[dim0]; i++) {
 +      loc[dim0] = i;
 +      dim1 = dd->dim[1];
 +      for(j=0; j<dd->nc[dim1]; j++) {
 +        loc[dim1] = j;
 +        make_load_communicator(dd,g_all,2,loc);
 +      }
 +    }
 +  }
 +
 +  MPI_Group_free(&g_all);
 +
 +  if (debug)
 +    fprintf(debug,"Finished making load communicators\n");
 +#endif
 +}
 +
 +void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    gmx_bool bZYX;
 +    int  d,dim,i,j,m;
 +    ivec tmp,s;
 +    int  nzone,nzonep;
 +    ivec dd_zp[DD_MAXIZONE];
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_ns_ranges_t *izone;
 +    
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dim = dd->dim[d];
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
 +        dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
 +        copy_ivec(dd->ci,tmp);
 +        tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
 +        dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
 +        if (debug)
 +        {
 +            fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
 +                    dd->rank,dim,
 +                    dd->neighbor[d][0],
 +                    dd->neighbor[d][1]);
 +        }
 +    }
 +    
 +    if (DDMASTER(dd))
 +    {
 +        fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
 +          dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
 +                dd->ndim,
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
 +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    switch (dd->ndim)
 +    {
 +    case 3:
 +        nzone  = dd_z3n;
 +        nzonep = dd_zp3n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp3[i],dd_zp[i]);
 +        }
 +        break;
 +    case 2:
 +        nzone  = dd_z2n;
 +        nzonep = dd_zp2n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp2[i],dd_zp[i]);
 +        }
 +        break;
 +    case 1:
 +        nzone  = dd_z1n;
 +        nzonep = dd_zp1n;
 +        for(i=0; i<nzonep; i++)
 +        {
 +            copy_ivec(dd_zp1[i],dd_zp[i]);
 +        }
 +        break;
 +    default:
 +        gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
 +        nzone = 0;
 +        nzonep = 0;
 +    }
 +
 +    zones = &dd->comm->zones;
 +
 +    for(i=0; i<nzone; i++)
 +    {
 +        m = 0;
 +        clear_ivec(zones->shift[i]);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
 +        }
 +    }
 +    
 +    zones->n = nzone;
 +    for(i=0; i<nzone; i++)
 +    {
 +        for(d=0; d<DIM; d++)
 +        {
 +            s[d] = dd->ci[d] - zones->shift[i][d];
 +            if (s[d] < 0)
 +            {
 +                s[d] += dd->nc[d];
 +            }
 +            else if (s[d] >= dd->nc[d])
 +            {
 +                s[d] -= dd->nc[d];
 +            }
 +        }
 +    }
 +    zones->nizone = nzonep;
 +    for(i=0; i<zones->nizone; i++)
 +    {
 +        if (dd_zp[i][0] != i)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
 +        }
 +        izone = &zones->izone[i];
 +        izone->j0 = dd_zp[i][1];
 +        izone->j1 = dd_zp[i][2];
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] == 1)
 +            {
 +                /* All shifts should be allowed */
 +                izone->shift0[dim] = -1;
 +                izone->shift1[dim] = 1;
 +            }
 +            else
 +            {
 +                /*
 +                  izone->shift0[d] = 0;
 +                  izone->shift1[d] = 0;
 +                  for(j=izone->j0; j<izone->j1; j++) {
 +                  if (dd->shift[j][d] > dd->shift[i][d])
 +                  izone->shift0[d] = -1;
 +                  if (dd->shift[j][d] < dd->shift[i][d])
 +                  izone->shift1[d] = 1;
 +                  }
 +                */
 +                
 +                int shift_diff;
 +                
 +                /* Assume the shift are not more than 1 cell */
 +                izone->shift0[dim] = 1;
 +                izone->shift1[dim] = -1;
 +                for(j=izone->j0; j<izone->j1; j++)
 +                {
 +                    shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
 +                    if (shift_diff < izone->shift0[dim])
 +                    {
 +                        izone->shift0[dim] = shift_diff;
 +                    }
 +                    if (shift_diff > izone->shift1[dim])
 +                    {
 +                        izone->shift1[dim] = shift_diff;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    
 +    if (dd->comm->eDLB != edlbNO)
 +    {
 +        snew(dd->comm->root,dd->ndim);
 +    }
 +    
 +    if (dd->comm->bRecordLoad)
 +    {
 +        make_load_communicators(dd);
 +    }
 +}
 +
 +static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank,*buf;
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP)
 +    {
 +        /* Set up cartesian communication for the particle-particle part */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
 +                    dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
 +                        &comm_cart);
 +        /* We overwrite the old communicator with the new cartesian one */
 +        cr->mpi_comm_mygroup = comm_cart;
 +    }
 +    
 +    dd->mpi_comm_all = cr->mpi_comm_mygroup;
 +    MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
 +    
 +    if (comm->bCartesianPP_PME)
 +    {
 +        /* Since we want to use the original cartesian setup for sim,
 +         * and not the one after split, we need to make an index.
 +         */
 +        snew(comm->ddindex2ddnodeid,dd->nnodes);
 +        comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
 +        gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
 +        /* Get the rank of the DD master,
 +         * above we made sure that the master node is a PP node.
 +         */
 +        if (MASTER(cr))
 +        {
 +            rank = dd->rank;
 +        }
 +        else
 +        {
 +            rank = 0;
 +        }
 +        MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
 +    }
 +    else if (comm->bCartesianPP)
 +    {
 +        if (cr->npmenodes == 0)
 +        {
 +            /* The PP communicator is also
 +             * the communicator for this simulation
 +             */
 +            cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
 +        }
 +        cr->nodeid = dd->rank;
 +        
 +        MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
 +        
 +        /* We need to make an index to go from the coordinates
 +         * to the nodeid of this simulation.
 +         */
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +        sfree(buf);
 +        
 +        /* Determine the master coordinates and rank.
 +         * The DD master should be the same node as the master of this sim.
 +         */
 +        for(i=0; i<dd->nnodes; i++)
 +        {
 +            if (comm->ddindex2simnodeid[i] == 0)
 +            {
 +                ddindex2xyz(dd->nc,i,dd->master_ci);
 +                MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"The master rank is %d\n",dd->masterrank);
 +        }
 +    }
 +    else
 +    {
 +        /* No Cartesian communicators */
 +        /* We use the rank in dd->comm->all as DD index */
 +        ddindex2xyz(dd->nc,dd->rank,dd->ci);
 +        /* The simulation master nodeid is 0, so the DD master rank is also 0 */
 +        dd->masterrank = 0;
 +        clear_ivec(dd->master_ci);
 +    }
 +#endif
 +  
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,
 +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
 +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +    }
 +}
 +
 +static void receive_ddindex2simnodeid(t_commrec *cr)
 +{
 +    gmx_domdec_t *dd;
 +    
 +    gmx_domdec_comm_t *comm;
 +    int  *buf;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +#ifdef GMX_MPI
 +    if (!comm->bCartesianPP_PME && comm->bCartesianPP)
 +    {
 +        snew(comm->ddindex2simnodeid,dd->nnodes);
 +        snew(buf,dd->nnodes);
 +        if (cr->duty & DUTY_PP)
 +        {
 +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
 +        }
 +#ifdef GMX_MPI
 +        /* Communicate the ddindex to simulation nodeid index */
 +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
 +                      cr->mpi_comm_mysim);
 +#endif
 +        sfree(buf);
 +    }
 +#endif
 +}
 +
 +static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
 +                                                     int ncg,int natoms)
 +{
 +    gmx_domdec_master_t *ma;
 +    int i;
 +
 +    snew(ma,1);
 +    
 +    snew(ma->ncg,dd->nnodes);
 +    snew(ma->index,dd->nnodes+1);
 +    snew(ma->cg,ncg);
 +    snew(ma->nat,dd->nnodes);
 +    snew(ma->ibuf,dd->nnodes*2);
 +    snew(ma->cell_x,DIM);
 +    for(i=0; i<DIM; i++)
 +    {
 +        snew(ma->cell_x[i],dd->nc[i]+1);
 +    }
 +
 +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
 +    {
 +        ma->vbuf = NULL;
 +    }
 +    else
 +    {
 +        snew(ma->vbuf,natoms);
 +    }
 +
 +    return ma;
 +}
 +
 +static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
 +                               int reorder)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  i,rank;
 +    gmx_bool bDiv[DIM];
 +    ivec periods;
 +#ifdef GMX_MPI
 +    MPI_Comm comm_cart;
 +#endif
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (comm->bCartesianPP)
 +    {
 +        for(i=1; i<DIM; i++)
 +        {
 +            bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
 +        }
 +        if (bDiv[YY] || bDiv[ZZ])
 +        {
 +            comm->bCartesianPP_PME = TRUE;
 +            /* If we have 2D PME decomposition, which is always in x+y,
 +             * we stack the PME only nodes in z.
 +             * Otherwise we choose the direction that provides the thinnest slab
 +             * of PME only nodes as this will have the least effect
 +             * on the PP communication.
 +             * But for the PME communication the opposite might be better.
 +             */
 +            if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
 +                             !bDiv[YY] ||
 +                             dd->nc[YY] > dd->nc[ZZ]))
 +            {
 +                comm->cartpmedim = ZZ;
 +            }
 +            else
 +            {
 +                comm->cartpmedim = YY;
 +            }
 +            comm->ntot[comm->cartpmedim]
 +                += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
 +        }
 +        else if (fplog)
 +        {
 +            fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
 +            fprintf(fplog,
 +                    "Will not use a Cartesian communicator for PP <-> PME\n\n");
 +        }
 +    }
 +    
 +#ifdef GMX_MPI
 +    if (comm->bCartesianPP_PME)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
 +        }
 +        
 +        for(i=0; i<DIM; i++)
 +        {
 +            periods[i] = TRUE;
 +        }
 +        MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
 +                        &comm_cart);
 +        
 +        MPI_Comm_rank(comm_cart,&rank);
 +        if (MASTERNODE(cr) && rank != 0)
 +        {
 +            gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
 +        }
 +        
 +        /* With this assigment we loose the link to the original communicator
 +         * which will usually be MPI_COMM_WORLD, unless have multisim.
 +         */
 +        cr->mpi_comm_mysim = comm_cart;
 +        cr->sim_nodeid = rank;
 +        
 +        MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
 +        
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
 +                    cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
 +        }
 +        
 +        if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        if (cr->npmenodes == 0 ||
 +            dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       dd_index(comm->ntot,dd->ci),
 +                       &cr->mpi_comm_mygroup);
 +    }
 +    else
 +    {
 +        switch (dd_node_order)
 +        {
 +        case ddnoPP_PME:
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Order of the nodes: PP first, PME last\n");
 +            }
 +            break;
 +        case ddnoINTERLEAVE:
 +            /* Interleave the PP-only and PME-only nodes,
 +             * as on clusters with dual-core machines this will double
 +             * the communication bandwidth of the PME processes
 +             * and thus speed up the PP <-> PME and inter PME communication.
 +             */
 +            if (fplog)
 +            {
 +                fprintf(fplog,"Interleaving PP and PME nodes\n");
 +            }
 +            comm->pmenodes = dd_pmenodes(cr);
 +            break;
 +        case ddnoCARTESIAN:
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
 +        }
 +    
 +        if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
 +        {
 +            cr->duty = DUTY_PME;
 +        }
 +        else
 +        {
 +            cr->duty = DUTY_PP;
 +        }
 +        
 +        /* Split the sim communicator into PP and PME only nodes */
 +        MPI_Comm_split(cr->mpi_comm_mysim,
 +                       cr->duty,
 +                       cr->nodeid,
 +                       &cr->mpi_comm_mygroup);
 +        MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
 +    }
 +#endif
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,"This is a %s only node\n\n",
 +                (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
 +    }
 +}
 +
 +void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int CartReorder;
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    copy_ivec(dd->nc,comm->ntot);
 +    
 +    comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
 +    comm->bCartesianPP_PME = FALSE;
 +    
 +    /* Reorder the nodes by default. This might change the MPI ranks.
 +     * Real reordering is only supported on very few architectures,
 +     * Blue Gene is one of them.
 +     */
 +    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
 +    
 +    if (cr->npmenodes > 0)
 +    {
 +        /* Split the communicator into a PP and PME part */
 +        split_communicator(fplog,cr,dd_node_order,CartReorder);
 +        if (comm->bCartesianPP_PME)
 +        {
 +            /* We (possibly) reordered the nodes in split_communicator,
 +             * so it is no longer required in make_pp_communicator.
 +             */
 +            CartReorder = FALSE;
 +        }
 +    }
 +    else
 +    {
 +        /* All nodes do PP and PME */
 +#ifdef GMX_MPI    
 +        /* We do not require separate communicators */
 +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#endif
 +    }
 +    
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* Copy or make a new PP communicator */
 +        make_pp_communicator(fplog,cr,CartReorder);
 +    }
 +    else
 +    {
 +        receive_ddindex2simnodeid(cr);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Set up the commnuication to our PME node */
 +        dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
 +        dd->pme_receive_vir_ener = receive_vir_ener(cr);
 +        if (debug)
 +        {
 +            fprintf(debug,"My pme_nodeid %d receive ener %d\n",
 +                    dd->pme_nodeid,dd->pme_receive_vir_ener);
 +        }
 +    }
 +    else
 +    {
 +        dd->pme_nodeid = -1;
 +    }
 +
 +    if (DDMASTER(dd))
 +    {
 +        dd->ma = init_gmx_domdec_master_t(dd,
 +                                          comm->cgs_gl.nr,
 +                                          comm->cgs_gl.index[comm->cgs_gl.nr]);
 +    }
 +}
 +
 +static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
 +{
 +    real *slb_frac,tot;
 +    int  i,n;
 +    double dbl;
 +    
 +    slb_frac = NULL;
 +    if (nc > 1 && size_string != NULL)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using static load balancing for the %s direction\n",
 +                    dir);
 +        }
 +        snew(slb_frac,nc);
 +        tot = 0;
 +        for (i=0; i<nc; i++)
 +        {
 +            dbl = 0;
 +            sscanf(size_string,"%lf%n",&dbl,&n);
 +            if (dbl == 0)
 +            {
 +                gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
 +            }
 +            slb_frac[i] = dbl;
 +            size_string += n;
 +            tot += slb_frac[i];
 +        }
 +        /* Normalize */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Relative cell sizes:");
 +        }
 +        for (i=0; i<nc; i++)
 +        {
 +            slb_frac[i] /= tot;
 +            if (fplog)
 +            {
 +                fprintf(fplog," %5.3f",slb_frac[i]);
 +            }
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\n");
 +        }
 +    }
 +    
 +    return slb_frac;
 +}
 +
 +static int multi_body_bondeds_count(gmx_mtop_t *mtop)
 +{
 +    int n,nmol,ftype;
 +    gmx_mtop_ilistloop_t iloop;
 +    t_ilist *il;
 +    
 +    n = 0;
 +    iloop = gmx_mtop_ilistloop_init(mtop);
 +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
 +    {
 +        for(ftype=0; ftype<F_NRE; ftype++)
 +        {
 +            if ((interaction_function[ftype].flags & IF_BOND) &&
 +                NRAL(ftype) >  2)
 +            {
 +                n += nmol*il[ftype].nr/(1 + NRAL(ftype));
 +            }
 +        }
 +  }
 +
 +  return n;
 +}
 +
 +static int dd_nst_env(FILE *fplog,const char *env_var,int def)
 +{
 +    char *val;
 +    int  nst;
 +    
 +    nst = def;
 +    val = getenv(env_var);
 +    if (val)
 +    {
 +        if (sscanf(val,"%d",&nst) <= 0)
 +        {
 +            nst = 1;
 +        }
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
 +                    env_var,val,nst);
 +        }
 +    }
 +    
 +    return nst;
 +}
 +
 +static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
 +{
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"\n%s\n",warn_string);
 +    }
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n%s\n",warn_string);
 +    }
 +}
 +
 +static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
 +                                  t_inputrec *ir,FILE *fplog)
 +{
 +    if (ir->ePBC == epbcSCREW &&
 +        (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
 +    {
 +        gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
 +    }
 +
 +    if (ir->ns_type == ensSIMPLE)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
 +    }
 +
 +    if (ir->nstlist == 0)
 +    {
 +        gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
 +    }
 +
 +    if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
 +    {
 +        dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
 +    }
 +}
 +
 +static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
 +{
 +    int  di,d;
 +    real r;
 +
 +    r = ddbox->box_size[XX];
 +    for(di=0; di<dd->ndim; di++)
 +    {
 +        d = dd->dim[di];
 +        /* Check using the initial average cell size */
 +        r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +    }
 +
 +    return r;
 +}
 +
 +static int check_dlb_support(FILE *fplog,t_commrec *cr,
 +                             const char *dlb_opt,gmx_bool bRecordLoad,
 +                             unsigned long Flags,t_inputrec *ir)
 +{
 +    gmx_domdec_t *dd;
 +    int  eDLB=-1;
 +    char buf[STRLEN];
 +
 +    switch (dlb_opt[0])
 +    {
 +    case 'a': eDLB = edlbAUTO; break;
 +    case 'n': eDLB = edlbNO;   break;
 +    case 'y': eDLB = edlbYES;  break;
 +    default: gmx_incons("Unknown dlb_opt");
 +    }
 +
 +    if (Flags & MD_RERUN)
 +    {
 +        return edlbNO;
 +    }
 +
 +    if (!EI_DYNAMICS(ir->eI))
 +    {
 +        if (eDLB == edlbYES)
 +        {
 +            sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
 +            dd_warning(cr,fplog,buf);
 +        }
 +            
 +        return edlbNO;
 +    }
 +
 +    if (!bRecordLoad)
 +    {
 +        dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
 +
 +        return edlbNO;
 +    }
 +
 +    if (Flags & MD_REPRODUCIBLE)
 +    {
 +        switch (eDLB)
 +        {
 +                      case edlbNO: 
 +                              break;
 +                      case edlbAUTO:
 +                              dd_warning(cr,fplog,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
 +                              eDLB = edlbNO;
 +                              break;
 +                      case edlbYES:
 +                              dd_warning(cr,fplog,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
 +                              break;
 +                      default:
 +                              gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
 +                              break;
 +        }
 +    }
 +
 +    return eDLB;
 +}
 +
 +static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
 +{
 +    int dim;
 +
 +    dd->ndim = 0;
 +    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
 +    {
 +        /* Decomposition order z,y,x */
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Using domain decomposition order z, y, x\n");
 +        }
 +        for(dim=DIM-1; dim>=0; dim--)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* Decomposition order x,y,z */
 +        for(dim=0; dim<DIM; dim++)
 +        {
 +            if (dd->nc[dim] > 1)
 +            {
 +                dd->dim[dd->ndim++] = dim;
 +            }
 +        }
 +    }
 +}
 +
 +static gmx_domdec_comm_t *init_dd_comm()
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  i;
 +
 +    snew(comm,1);
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +    for(i=0; i<DIM*2; i++)
 +    {
 +        comm->cggl_flag_nalloc[i]  = 0;
 +        comm->cgcm_state_nalloc[i] = 0;
 +    }
 +    
 +    comm->nalloc_int = 0;
 +    comm->buf_int    = NULL;
 +
 +    vec_rvec_init(&comm->vbuf);
 +
 +    comm->n_load_have    = 0;
 +    comm->n_load_collect = 0;
 +
 +    for(i=0; i<ddnatNR-ddnatZONE; i++)
 +    {
 +        comm->sum_nat[i] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload   = 0;
 +    comm->load_step = 0;
 +    comm->load_sum  = 0;
 +    comm->load_max  = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf  = 0;
 +    comm->load_pme  = 0;
 +
 +    return comm;
 +}
 +
 +gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
 +                                        unsigned long Flags,
 +                                        ivec nc,
 +                                        real comm_distance_min,real rconstr,
 +                                        const char *dlb_opt,real dlb_scale,
 +                                        const char *sizex,const char *sizey,const char *sizez,
 +                                        gmx_mtop_t *mtop,t_inputrec *ir,
 +                                        matrix box,rvec *x,
 +                                        gmx_ddbox_t *ddbox,
 +                                        int *npme_x,int *npme_y)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    int  recload;
 +    int  d,i,j;
 +    real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
 +    gmx_bool bC;
 +    char buf[STRLEN];
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
 +    }
 +    
 +    snew(dd,1);
 +
 +    dd->comm = init_dd_comm();
 +    comm = dd->comm;
 +    snew(comm->cggl_flag,DIM*2);
 +    snew(comm->cgcm_state,DIM*2);
 +
 +    dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
 +    dd->bScrewPBC = (ir->ePBC == epbcSCREW);
 +    
 +    dd->bSendRecv2      = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
 +    comm->dlb_scale_lim = dd_nst_env(fplog,"GMX_DLB_MAX",10);
 +    comm->eFlop         = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
 +    recload             = dd_nst_env(fplog,"GMX_DD_LOAD",1);
 +    comm->nstSortCG     = dd_nst_env(fplog,"GMX_DD_SORT",1);
 +    comm->nstDDDump     = dd_nst_env(fplog,"GMX_DD_DUMP",0);
 +    comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
 +    comm->DD_debug      = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
 +
 +    dd->pme_recv_f_alloc = 0;
 +    dd->pme_recv_f_buf = NULL;
 +
 +    if (dd->bSendRecv2 && fplog)
 +    {
 +        fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
 +    }
 +    if (comm->eFlop)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will load balance based on FLOP count\n");
 +        }
 +        if (comm->eFlop > 1)
 +        {
 +            srand(1+cr->nodeid);
 +        }
 +        comm->bRecordLoad = TRUE;
 +    }
 +    else
 +    {
 +        comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
 +                             
 +    }
 +    
 +    comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
 +    
 +    comm->bDynLoadBal = (comm->eDLB == edlbYES);
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
 +    }
 +    dd->bGridJump = comm->bDynLoadBal;
 +    
 +    if (comm->nstSortCG)
 +    {
 +        if (fplog)
 +        {
 +            if (comm->nstSortCG == 1)
 +            {
 +                fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
 +            }
 +            else
 +            {
 +                fprintf(fplog,"Will sort the charge groups every %d steps\n",
 +                        comm->nstSortCG);
 +            }
 +        }
 +        snew(comm->sort,1);
 +    }
 +    else
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Will not sort the charge groups\n");
 +        }
 +    }
 +    
 +    comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
 +    if (comm->bInterCGBondeds)
 +    {
 +        comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
 +    }
 +    else
 +    {
 +        comm->bInterCGMultiBody = FALSE;
 +    }
 +    
 +    dd->bInterCGcons = inter_charge_group_constraints(mtop);
 +
 +    if (ir->rlistlong == 0)
 +    {
 +        /* Set the cut-off to some very large value,
 +         * so we don't need if statements everywhere in the code.
 +         * We use sqrt, since the cut-off is squared in some places.
 +         */
 +        comm->cutoff   = GMX_CUTOFF_INF;
 +    }
 +    else
 +    {
 +        comm->cutoff   = ir->rlistlong;
 +    }
 +    comm->cutoff_mbody = 0;
 +    
 +    comm->cellsize_limit = 0;
 +    comm->bBondComm = FALSE;
 +
 +    if (comm->bInterCGBondeds)
 +    {
 +        if (comm_distance_min > 0)
 +        {
 +            comm->cutoff_mbody = comm_distance_min;
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
 +            }
 +            else
 +            {
 +                comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else if (ir->bPeriodicMols)
 +        {
 +            /* Can not easily determine the required cut-off */
++            dd_warning(cr,fplog,"NOTE: Periodic molecules are present in this system. Because of this, the domain decomposition algorithm cannot easily determine the minimum cell size that it requires for treating bonded interactions. Instead, domain decomposition will assume that half the non-bonded cut-off will be a suitable lower bound.\n");
 +            comm->cutoff_mbody = comm->cutoff/2;
 +            r_bonded_limit = comm->cutoff_mbody;
 +        }
 +        else
 +        {
 +            if (MASTER(cr))
 +            {
 +                dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
 +                                      Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
 +            }
 +            gmx_bcast(sizeof(r_2b),&r_2b,cr);
 +            gmx_bcast(sizeof(r_mb),&r_mb,cr);
 +
 +            /* We use an initial margin of 10% for the minimum cell size,
 +             * except when we are just below the non-bonded cut-off.
 +             */
 +            if (Flags & MD_DDBONDCOMM)
 +            {
 +                if (max(r_2b,r_mb) > comm->cutoff)
 +                {
 +                    r_bonded       = max(r_2b,r_mb);
 +                    r_bonded_limit = 1.1*r_bonded;
 +                    comm->bBondComm = TRUE;
 +                }
 +                else
 +                {
 +                    r_bonded       = r_mb;
 +                    r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
 +                }
 +                /* We determine cutoff_mbody later */
 +            }
 +            else
 +            {
 +                /* No special bonded communication,
 +                 * simply increase the DD cut-off.
 +                 */
 +                r_bonded_limit     = 1.1*max(r_2b,r_mb);
 +                comm->cutoff_mbody = r_bonded_limit;
 +                comm->cutoff       = max(comm->cutoff,comm->cutoff_mbody);
 +            }
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Minimum cell size due to bonded interactions: %.3f nm\n",
 +                    comm->cellsize_limit);
 +        }
 +    }
 +
 +    if (dd->bInterCGcons && rconstr <= 0)
 +    {
 +        /* There is a cell size limit due to the constraints (P-LINCS) */
 +        rconstr = constr_r_max(fplog,mtop,ir);
 +        if (fplog)
 +        {
 +            fprintf(fplog,
 +                    "Estimated maximum distance required for P-LINCS: %.3f nm\n",
 +                    rconstr);
 +            if (rconstr > comm->cellsize_limit)
 +            {
 +                fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
 +            }
 +        }
 +    }
 +    else if (rconstr > 0 && fplog)
 +    {
 +        /* Here we do not check for dd->bInterCGcons,
 +         * because one can also set a cell size limit for virtual sites only
 +         * and at this point we don't know yet if there are intercg v-sites.
 +         */
 +        fprintf(fplog,
 +                "User supplied maximum distance required for P-LINCS: %.3f nm\n",
 +                rconstr);
 +    }
 +    comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
 +
 +    comm->cgs_gl = gmx_mtop_global_cgs(mtop);
 +
 +    if (nc[XX] > 0)
 +    {
 +        copy_ivec(nc,dd->nc);
 +        set_dd_dim(fplog,dd);
 +        set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        if (cr->npmenodes == -1)
 +        {
 +            cr->npmenodes = 0;
 +        }
 +        acs = average_cellsize_min(dd,ddbox);
 +        if (acs < comm->cellsize_limit)
 +        {
 +            if (fplog)
 +            {
 +                fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
 +            }
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
 +                                 acs,comm->cellsize_limit);
 +        }
 +    }
 +    else
 +    {
 +        set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
 +
 +        /* We need to choose the optimal DD grid and possibly PME nodes */
 +        limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
 +                               comm->eDLB!=edlbNO,dlb_scale,
 +                               comm->cellsize_limit,comm->cutoff,
 +                               comm->bInterCGBondeds,comm->bInterCGMultiBody);
 +        
 +        if (dd->nc[XX] == 0)
 +        {
 +            bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
 +            sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
 +                    !bC ? "-rdd" : "-rcon",
 +                    comm->eDLB!=edlbNO ? " or -dds" : "",
 +                    bC ? " or your LINCS settings" : "");
 +
 +            gmx_fatal_collective(FARGS,cr,NULL,
 +                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
 +                                 "%s\n"
 +                                 "Look in the log file for details on the domain decomposition",
 +                                 cr->nnodes-cr->npmenodes,limit,buf);
 +        }
 +        set_dd_dim(fplog,dd);
 +    }
 +
 +    if (fplog)
 +    {
 +        fprintf(fplog,
 +                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
 +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
 +    }
 +    
 +    dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
 +    if (cr->nnodes - dd->nnodes != cr->npmenodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
 +                             dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
 +    }
 +    if (cr->npmenodes > dd->nnodes)
 +    {
 +        gmx_fatal_collective(FARGS,cr,NULL,
 +                             "The number of separate PME nodes (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
 +    }
 +    if (cr->npmenodes > 0)
 +    {
 +        comm->npmenodes = cr->npmenodes;
 +    }
 +    else
 +    {
 +        comm->npmenodes = dd->nnodes;
 +    }
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        /* The following choices should match those
 +         * in comm_cost_est in domdec_setup.c.
 +         * Note that here the checks have to take into account
 +         * that the decomposition might occur in a different order than xyz
 +         * (for instance through the env.var. GMX_DD_ORDER_ZYX),
 +         * in which case they will not match those in comm_cost_est,
 +         * but since that is mainly for testing purposes that's fine.
 +         */
 +        if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
 +            comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
 +            getenv("GMX_PMEONEDD") == NULL)
 +        {
 +            comm->npmedecompdim = 2;
 +            comm->npmenodes_x   = dd->nc[XX];
 +            comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
 +        }
 +        else
 +        {
 +            /* In case nc is 1 in both x and y we could still choose to
 +             * decompose pme in y instead of x, but we use x for simplicity.
 +             */
 +            comm->npmedecompdim = 1;
 +            if (dd->dim[0] == YY)
 +            {
 +                comm->npmenodes_x = 1;
 +                comm->npmenodes_y = comm->npmenodes;
 +            }
 +            else
 +            {
 +                comm->npmenodes_x = comm->npmenodes;
 +                comm->npmenodes_y = 1;
 +            }
 +        }    
 +        if (fplog)
 +        {
 +            fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
 +                    comm->npmenodes_x,comm->npmenodes_y,1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmedecompdim = 0;
 +        comm->npmenodes_x   = 0;
 +        comm->npmenodes_y   = 0;
 +    }
 +    
 +    /* Technically we don't need both of these,
 +     * but it simplifies code not having to recalculate it.
 +     */
 +    *npme_x = comm->npmenodes_x;
 +    *npme_y = comm->npmenodes_y;
 +        
 +    snew(comm->slb_frac,DIM);
 +    if (comm->eDLB == edlbNO)
 +    {
 +        comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
 +        comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
 +        comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
 +    }
 +
 +    if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
 +    {
 +        if (comm->bBondComm || comm->eDLB != edlbNO)
 +        {
 +            /* Set the bonded communication distance to halfway
 +             * the minimum and the maximum,
 +             * since the extra communication cost is nearly zero.
 +             */
 +            acs = average_cellsize_min(dd,ddbox);
 +            comm->cutoff_mbody = 0.5*(r_bonded + acs);
 +            if (comm->eDLB != edlbNO)
 +            {
 +                /* Check if this does not limit the scaling */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
 +            }
 +            if (!comm->bBondComm)
 +            {
 +                /* Without bBondComm do not go beyond the n.b. cut-off */
 +                comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
 +                if (comm->cellsize_limit >= comm->cutoff)
 +                {
 +                    /* We don't loose a lot of efficieny
 +                     * when increasing it to the n.b. cut-off.
 +                     * It can even be slightly faster, because we need
 +                     * less checks for the communication setup.
 +                     */
 +                    comm->cutoff_mbody = comm->cutoff;
 +                }
 +            }
 +            /* Check if we did not end up below our original limit */
 +            comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
 +
 +            if (comm->cutoff_mbody > comm->cellsize_limit)
 +            {
 +                comm->cellsize_limit = comm->cutoff_mbody;
 +            }
 +        }
 +        /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
 +                "cellsize limit %f\n",
 +                comm->bBondComm,comm->cellsize_limit);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        check_dd_restrictions(cr,dd,ir,fplog);
 +    }
 +
 +    comm->globalcomm_step = INT_MIN;
 +    dd->ddp_count = 0;
 +
 +    clear_dd_cycle_counts(dd);
 +
 +    return dd;
 +}
 +
 +static void set_dlb_limits(gmx_domdec_t *dd)
 +
 +{
 +    int d;
 +
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
 +        dd->comm->cellsize_min[dd->dim[d]] =
 +            dd->comm->cellsize_min_dlb[dd->dim[d]];
 +    }
 +}
 +
 +
 +static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    real cellsize_min;
 +    int  d,nc,i;
 +    char buf[STRLEN];
 +    
 +    dd = cr->dd;
 +    comm = dd->comm;
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
 +    }
 +
 +    cellsize_min = comm->cellsize_min[dd->dim[0]];
 +    for(d=1; d<dd->ndim; d++)
 +    {
 +        cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
 +    }
 +
 +    if (cellsize_min < comm->cellsize_limit*1.05)
 +    {
 +        dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
 +
 +        /* Change DLB from "auto" to "no". */
 +        comm->eDLB = edlbNO;
 +
 +        return;
 +    }
 +
 +    dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
 +    comm->bDynLoadBal = TRUE;
 +    dd->bGridJump = TRUE;
 +    
 +    set_dlb_limits(dd);
 +
 +    /* We can set the required cell size info here,
 +     * so we do not need to communicate this.
 +     * The grid is completely uniform.
 +     */
 +    for(d=0; d<dd->ndim; d++)
 +    {
 +        if (comm->root[d])
 +        {
 +            comm->load[d].sum_m = comm->load[d].sum;
 +
 +            nc = dd->nc[dd->dim[d]];
 +            for(i=0; i<nc; i++)
 +            {
 +                comm->root[d]->cell_f[i]    = i/(real)nc;
 +                if (d > 0)
 +                {
 +                    comm->root[d]->cell_f_max0[i] =  i   /(real)nc;
 +                    comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
 +                }
 +            }
 +            comm->root[d]->cell_f[nc] = 1.0;
 +        }
 +    }
 +}
 +
 +static char *init_bLocalCG(gmx_mtop_t *mtop)
 +{
 +    int  ncg,cg;
 +    char *bLocalCG;
 +    
 +    ncg = ncg_mtop(mtop);
 +    snew(bLocalCG,ncg);
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        bLocalCG[cg] = FALSE;
 +    }
 +
 +    return bLocalCG;
 +}
 +
 +void dd_init_bondeds(FILE *fplog,
 +                     gmx_domdec_t *dd,gmx_mtop_t *mtop,
 +                     gmx_vsite_t *vsite,gmx_constr_t constr,
 +                     t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
 +{
 +    gmx_domdec_comm_t *comm;
 +    gmx_bool bBondComm;
 +    int  d;
 +
 +    dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
 +
 +    comm = dd->comm;
 +
 +    if (comm->bBondComm)
 +    {
 +        /* Communicate atoms beyond the cut-off for bonded interactions */
 +        comm = dd->comm;
 +
 +        comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
 +
 +        comm->bLocalCG = init_bLocalCG(mtop);
 +    }
 +    else
 +    {
 +        /* Only communicate atoms based on cut-off */
 +        comm->cglink   = NULL;
 +        comm->bLocalCG = NULL;
 +    }
 +}
 +
 +static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
 +                              t_inputrec *ir,
 +                              gmx_bool bDynLoadBal,real dlb_scale,
 +                              gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d;
 +    ivec np;
 +    real limit,shrink;
 +    char buf[64];
 +
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +
 +    comm = dd->comm;
 +
 +    if (bDynLoadBal)
 +    {
 +        fprintf(fplog,"The maximum number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
 +        fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
 +        fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
 +        for(d=0; d<DIM; d++)
 +        {
 +            if (dd->nc[d] > 1)
 +            {
 +                if (d >= ddbox->npbcdim && dd->nc[d] == 2)
 +                {
 +                    shrink = 0;
 +                }
 +                else
 +                {
 +                    shrink =
 +                        comm->cellsize_min_dlb[d]/
 +                        (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
 +                }
 +                fprintf(fplog," %c %.2f",dim2char(d),shrink);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    else
 +    {
 +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
 +        fprintf(fplog,"The initial number of communication pulses is:");
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
 +        }
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"The initial domain decomposition cell size is:");
 +        for(d=0; d<DIM; d++) {
 +            if (dd->nc[d] > 1)
 +            {
 +                fprintf(fplog," %c %.2f nm",
 +                        dim2char(d),dd->comm->cellsize_min[d]);
 +            }
 +        }
 +        fprintf(fplog,"\n\n");
 +    }
 +    
 +    if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
 +    {
 +        fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
 +        fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                "non-bonded interactions","",comm->cutoff);
 +
 +        if (bDynLoadBal)
 +        {
 +            limit = dd->comm->cellsize_limit;
 +        }
 +        else
 +        {
 +            if (dynamic_dd_box(ddbox,ir))
 +            {
 +                fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
 +            }
 +            limit = dd->comm->cellsize_min[XX];
 +            for(d=1; d<DIM; d++)
 +            {
 +                limit = min(limit,dd->comm->cellsize_min[d]);
 +            }
 +        }
 +
 +        if (comm->bInterCGBondeds)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "two-body bonded interactions","(-rdd)",
 +                    max(comm->cutoff,comm->cutoff_mbody));
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "multi-body bonded interactions","(-rdd)",
 +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
 +        }
 +        if (dd->vsite_comm)
 +        {
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    "virtual site constructions","(-rcon)",limit);
 +        }
 +        if (dd->constraint_comm)
 +        {
 +            sprintf(buf,"atoms separated by up to %d constraints",
 +                    1+ir->nProjOrder);
 +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
 +                    buf,"(-rcon)",limit);
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    fflush(fplog);
 +}
 +
 +void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
 +                       t_inputrec *ir,t_forcerec *fr,
 +                       gmx_ddbox_t *ddbox)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int  d,dim,npulse,npulse_d_max,npulse_d;
 +    gmx_bool bNoCutOff;
 +    int  natoms_tot;
 +    real vol_frac;
 +
 +    comm = dd->comm;
 +
 +    bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
 +
 +    if (EEL_PME(ir->coulombtype))
 +    {
 +        init_ddpme(dd,&comm->ddpme[0],0);
 +        if (comm->npmedecompdim >= 2)
 +        {
 +            init_ddpme(dd,&comm->ddpme[1],1);
 +        }
 +    }
 +    else
 +    {
 +        comm->npmenodes = 0;
 +        if (dd->pme_nodeid >= 0)
 +        {
 +            gmx_fatal_collective(FARGS,NULL,dd,
 +                                 "Can not have separate PME nodes without PME electrostatics");
 +        }
 +    }
 +    
 +    /* If each molecule is a single charge group
 +     * or we use domain decomposition for each periodic dimension,
 +     * we do not need to take pbc into account for the bonded interactions.
 +     */
 +    if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
 +        (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
 +    {
 +        fr->bMolPBC = FALSE;
 +    }
 +    else
 +    {
 +        fr->bMolPBC = TRUE;
 +    }
 +        
 +    if (debug)
 +    {
 +        fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
 +    }
 +    if (comm->eDLB != edlbNO)
 +    {
 +        /* Determine the maximum number of comm. pulses in one dimension */
 +        
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        
 +        /* Determine the maximum required number of grid pulses */
 +        if (comm->cellsize_limit >= comm->cutoff)
 +        {
 +            /* Only a single pulse is required */
 +            npulse = 1;
 +        }
 +        else if (!bNoCutOff && comm->cellsize_limit > 0)
 +        {
 +            /* We round down slightly here to avoid overhead due to the latency
 +             * of extra communication calls when the cut-off
 +             * would be only slightly longer than the cell size.
 +             * Later cellsize_limit is redetermined,
 +             * so we can not miss interactions due to this rounding.
 +             */
 +            npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
 +        }
 +        else
 +        {
 +            /* There is no cell size limit */
 +            npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
 +        }
 +
 +        if (!bNoCutOff && npulse > 1)
 +        {
 +            /* See if we can do with less pulses, based on dlb_scale */
 +            npulse_d_max = 0;
 +            for(d=0; d<dd->ndim; d++)
 +            {
 +                dim = dd->dim[d];
 +                npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
 +                                 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
 +                npulse_d_max = max(npulse_d_max,npulse_d);
 +            }
 +            npulse = min(npulse,npulse_d_max);
 +        }
 +        
 +        /* This env var can override npulse */
 +        d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
 +        if (d > 0)
 +        {
 +            npulse = d;
 +        }
 +
 +        comm->maxpulse = 1;
 +        comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
 +            comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
 +            snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
 +            comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
 +            if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
 +            {
 +                comm->bVacDLBNoLimit = FALSE;
 +            }
 +        }
 +        
 +        /* cellsize_limit is set for LINCS in init_domain_decomposition */
 +        if (!comm->bVacDLBNoLimit)
 +        {
 +            comm->cellsize_limit = max(comm->cellsize_limit,
 +                                       comm->cutoff/comm->maxpulse);
 +        }
 +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
 +        /* Set the minimum cell size for each DD dimension */
 +        for(d=0; d<dd->ndim; d++)
 +        {
 +            if (comm->bVacDLBNoLimit ||
 +                comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
 +            }
 +            else
 +            {
 +                comm->cellsize_min_dlb[dd->dim[d]] =
 +                    comm->cutoff/comm->cd[d].np_dlb;
 +            }
 +        }
 +        if (comm->cutoff_mbody <= 0)
 +        {
 +            comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
 +        }
 +        if (comm->bDynLoadBal)
 +        {
 +            set_dlb_limits(dd);
 +        }
 +    }
 +    
 +    print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
 +    if (comm->eDLB == edlbAUTO)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
 +        }
 +        print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
 +    }
 +
 +    if (ir->ePBC == epbcNONE)
 +    {
 +        vol_frac = 1 - 1/(double)dd->nnodes;
 +    }
 +    else
 +    {
 +        vol_frac =
 +            (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
 +    }
 +    natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
 +   
 +    dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
 +}
 +
 +static void merge_cg_buffers(int ncell,
 +                             gmx_domdec_comm_dim_t *cd, int pulse,
 +                             int  *ncg_cell,
 +                             int  *index_gl, int  *recv_i,
 +                             rvec *cg_cm,    rvec *recv_vr,
 +                             int *cgindex,
 +                             cginfo_mb_t *cginfo_mb,int *cginfo)
 +{
 +    gmx_domdec_ind_t *ind,*ind_p;
 +    int p,cell,c,cg,cg0,cg1,cg_gl,nat;
 +    int shift,shift_at;
 +    
 +    ind = &cd->ind[pulse];
 +    
 +    /* First correct the already stored data */
 +    shift = ind->nrecv[ncell];
 +    for(cell=ncell-1; cell>=0; cell--)
 +    {
 +        shift -= ind->nrecv[cell];
 +        if (shift > 0)
 +        {
 +            /* Move the cg's present from previous grid pulses */
 +            cg0 = ncg_cell[ncell+cell];
 +            cg1 = ncg_cell[ncell+cell+1];
 +            cgindex[cg1+shift] = cgindex[cg1];
 +            for(cg=cg1-1; cg>=cg0; cg--)
 +            {
 +                index_gl[cg+shift] = index_gl[cg];
 +                copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
 +                cgindex[cg+shift] = cgindex[cg];
 +                cginfo[cg+shift] = cginfo[cg];
 +            }
 +            /* Correct the already stored send indices for the shift */
 +            for(p=1; p<=pulse; p++)
 +            {
 +                ind_p = &cd->ind[p];
 +                cg0 = 0;
 +                for(c=0; c<cell; c++)
 +                {
 +                    cg0 += ind_p->nsend[c];
 +                }
 +                cg1 = cg0 + ind_p->nsend[cell];
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    ind_p->index[cg] += shift;
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Merge in the communicated buffers */
 +    shift = 0;
 +    shift_at = 0;
 +    cg0 = 0;
 +    for(cell=0; cell<ncell; cell++)
 +    {
 +        cg1 = ncg_cell[ncell+cell+1] + shift;
 +        if (shift_at > 0)
 +        {
 +            /* Correct the old cg indices */
 +            for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
 +            {
 +                cgindex[cg+1] += shift_at;
 +            }
 +        }
 +        for(cg=0; cg<ind->nrecv[cell]; cg++)
 +        {
 +            /* Copy this charge group from the buffer */
 +            index_gl[cg1] = recv_i[cg0];
 +            copy_rvec(recv_vr[cg0],cg_cm[cg1]);
 +            /* Add it to the cgindex */
 +            cg_gl = index_gl[cg1];
 +            cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
 +            nat = GET_CGINFO_NATOMS(cginfo[cg1]);
 +            cgindex[cg1+1] = cgindex[cg1] + nat;
 +            cg0++;
 +            cg1++;
 +            shift_at += nat;
 +        }
 +        shift += ind->nrecv[cell];
 +        ncg_cell[ncell+cell+1] = cg1;
 +    }
 +}
 +
 +static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
 +                               int nzone,int cg0,const int *cgindex)
 +{
 +    int cg,zone,p;
 +    
 +    /* Store the atom block boundaries for easy copying of communication buffers
 +     */
 +    cg = cg0;
 +    for(zone=0; zone<nzone; zone++)
 +    {
 +        for(p=0; p<cd->np; p++) {
 +            cd->ind[p].cell2at0[zone] = cgindex[cg];
 +            cg += cd->ind[p].nrecv[zone];
 +            cd->ind[p].cell2at1[zone] = cgindex[cg];
 +        }
 +    }
 +}
 +
 +static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
 +{
 +    int  i;
 +    gmx_bool bMiss;
 +
 +    bMiss = FALSE;
 +    for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
 +    {
 +        if (!bLocalCG[link->a[i]])
 +        {
 +            bMiss = TRUE;
 +        }
 +    }
 +
 +    return bMiss;
 +}
 +
 +static void setup_dd_communication(gmx_domdec_t *dd,
 +                                   matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
 +{
 +    int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
 +    int nzone,nzone_send,zone,zonei,cg0,cg1;
 +    int c,i,j,cg,cg_gl,nrcg;
 +    int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
 +    gmx_domdec_comm_t *comm;
 +    gmx_domdec_zones_t *zones;
 +    gmx_domdec_comm_dim_t *cd;
 +    gmx_domdec_ind_t *ind;
 +    cginfo_mb_t *cginfo_mb;
 +    gmx_bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
 +    real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
 +    rvec rb,rn;
 +    real corner[DIM][4],corner_round_0=0,corner_round_1[4];
 +    real bcorner[DIM],bcorner_round_1=0;
 +    ivec tric_dist;
 +    rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
 +    real skew_fac2_d,skew_fac_01;
 +    rvec sf2_round;
 +    int  nsend,nat;
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"Setting up DD communication\n");
 +    }
 +    
 +    comm  = dd->comm;
 +    cg_cm = fr->cg_cm;
 +
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +
 +        /* Check if we need to use triclinic distances */
 +        tric_dist[dim_ind] = 0;
 +        for(i=0; i<=dim_ind; i++)
 +        {
 +            if (ddbox->tric_dir[dd->dim[i]])
 +            {
 +                tric_dist[dim_ind] = 1;
 +            }
 +        }
 +    }
 +
 +    bBondComm = comm->bBondComm;
 +
 +    /* Do we need to determine extra distances for multi-body bondeds? */
 +    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
 +    
 +    /* Do we need to determine extra distances for only two-body bondeds? */
 +    bDist2B = (bBondComm && !bDistMB);
 +
 +    r_comm2  = sqr(comm->cutoff);
 +    r_bcomm2 = sqr(comm->cutoff_mbody);
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
 +    }
 +
 +    zones = &comm->zones;
 +    
 +    dim0 = dd->dim[0];
 +    /* The first dimension is equal for all cells */
 +    corner[0][0] = comm->cell_x0[dim0];
 +    if (bDistMB)
 +    {
 +        bcorner[0] = corner[0][0];
 +    }
 +    if (dd->ndim >= 2)
 +    {
 +        dim1 = dd->dim[1];
 +        /* This cell row is only seen from the first row */
 +        corner[1][0] = comm->cell_x0[dim1];
 +        /* All rows can see this row */
 +        corner[1][1] = comm->cell_x0[dim1];
 +        if (dd->bGridJump)
 +        {
 +            corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
 +            if (bDistMB)
 +            {
 +                /* For the multi-body distance we need the maximum */
 +                bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
 +            }
 +        }
 +        /* Set the upper-right corner for rounding */
 +        corner_round_0 = comm->cell_x1[dim0];
 +        
 +        if (dd->ndim >= 3)
 +        {
 +            dim2 = dd->dim[2];
 +            for(j=0; j<4; j++)
 +            {
 +                corner[2][j] = comm->cell_x0[dim2];
 +            }
 +            if (dd->bGridJump)
 +            {
 +                /* Use the maximum of the i-cells that see a j-cell */
 +                for(i=0; i<zones->nizone; i++)
 +                {
 +                    for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
 +                    {
 +                        if (j >= 4)
 +                        {
 +                            corner[2][j-4] =
 +                                max(corner[2][j-4],
 +                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
 +                        }
 +                    }
 +                }
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner[2] = comm->cell_x0[dim2];
 +                    for(i=0; i<2; i++)
 +                    {
 +                        for(j=0; j<2; j++)
 +                        {
 +                            bcorner[2] = max(bcorner[2],
 +                                             comm->zone_d2[i][j].p1_0);
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            /* Set the upper-right corner for rounding */
 +            /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
 +             * Only cell (0,0,0) can see cell 7 (1,1,1)
 +             */
 +            corner_round_1[0] = comm->cell_x1[dim1];
 +            corner_round_1[3] = comm->cell_x1[dim1];
 +            if (dd->bGridJump)
 +            {
 +                corner_round_1[0] = max(comm->cell_x1[dim1],
 +                                        comm->zone_d1[1].mch1);
 +                if (bDistMB)
 +                {
 +                    /* For the multi-body distance we need the maximum */
 +                    bcorner_round_1 = max(comm->cell_x1[dim1],
 +                                          comm->zone_d1[1].p1_1);
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* Triclinic stuff */
 +    normal = ddbox->normal;
 +    skew_fac_01 = 0;
 +    if (dd->ndim >= 2)
 +    {
 +        v_0 = ddbox->v[dim0];
 +        if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
 +        {
 +            /* Determine the coupling coefficient for the distances
 +             * to the cell planes along dim0 and dim1 through dim2.
 +             * This is required for correct rounding.
 +             */
 +            skew_fac_01 =
 +                ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
 +            if (debug)
 +            {
 +                fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
 +            }
 +        }
 +    }
 +    if (dd->ndim >= 3)
 +    {
 +        v_1 = ddbox->v[dim1];
 +    }
 +    
 +    zone_cg_range = zones->cg_range;
 +    index_gl = dd->index_gl;
 +    cgindex  = dd->cgindex;
 +    cginfo_mb = fr->cginfo_mb;
 +    
 +    zone_cg_range[0]   = 0;
 +    zone_cg_range[1]   = dd->ncg_home;
 +    comm->zone_ncg1[0] = dd->ncg_home;
 +    pos_cg             = dd->ncg_home;
 +    
 +    nat_tot = dd->nat_home;
 +    nzone = 1;
 +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
 +    {
 +        dim = dd->dim[dim_ind];
 +        cd = &comm->cd[dim_ind];
 +        
 +        if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
 +        {
 +            /* No pbc in this dimension, the first node should not comm. */
 +            nzone_send = 0;
 +        }
 +        else
 +        {
 +            nzone_send = nzone;
 +        }
 +
 +        bScrew = (dd->bScrewPBC && dim == XX);
 +        
 +        v_d = ddbox->v[dim];
 +        skew_fac2_d = sqr(ddbox->skew_fac[dim]);
 +
 +        cd->bInPlace = TRUE;
 +        for(p=0; p<cd->np; p++)
 +        {
 +            /* Only atoms communicated in the first pulse are used
 +             * for multi-body bonded interactions or for bBondComm.
 +             */
 +            bDistBonded   = ((bDistMB || bDist2B) && p == 0);
 +            bDistMB_pulse = (bDistMB && bDistBonded);
 +
 +            ind = &cd->ind[p];
 +            nsend = 0;
 +            nat = 0;
 +            for(zone=0; zone<nzone_send; zone++)
 +            {
 +                if (tric_dist[dim_ind] && dim_ind > 0)
 +                {
 +                    /* Determine slightly more optimized skew_fac's
 +                     * for rounding.
 +                     * This reduces the number of communicated atoms
 +                     * by about 10% for 3D DD of rhombic dodecahedra.
 +                     */
 +                    for(dimd=0; dimd<dim; dimd++)
 +                    {
 +                        sf2_round[dimd] = 1;
 +                        if (ddbox->tric_dir[dimd])
 +                        {
 +                            for(i=dd->dim[dimd]+1; i<DIM; i++)
 +                            {
 +                                /* If we are shifted in dimension i
 +                                 * and the cell plane is tilted forward
 +                                 * in dimension i, skip this coupling.
 +                                 */
 +                                if (!(zones->shift[nzone+zone][i] &&
 +                                      ddbox->v[dimd][i][dimd] >= 0))
 +                                {
 +                                    sf2_round[dimd] +=
 +                                        sqr(ddbox->v[dimd][i][dimd]);
 +                                }
 +                            }
 +                            sf2_round[dimd] = 1/sf2_round[dimd];
 +                        }
 +                    }
 +                }
 +
 +                zonei = zone_perm[dim_ind][zone];
 +                if (p == 0)
 +                {
 +                    /* Here we permutate the zones to obtain a convenient order
 +                     * for neighbor searching
 +                     */
 +                    cg0 = zone_cg_range[zonei];
 +                    cg1 = zone_cg_range[zonei+1];
 +                }
 +                else
 +                {
 +                    /* Look only at the cg's received in the previous grid pulse
 +                     */
 +                    cg1 = zone_cg_range[nzone+zone+1];
 +                    cg0 = cg1 - cd->ind[p-1].nrecv[zone];
 +                }
 +                ind->nsend[zone] = 0;
 +                for(cg=cg0; cg<cg1; cg++)
 +                {
 +                    r2  = 0;
 +                    rb2 = 0;
 +                    if (tric_dist[dim_ind] == 0)
 +                    {
 +                        /* Rectangular direction, easy */
 +                        r = cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        if (r > 0)
 +                        {
 +                            r2 += r*r;
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            r = cg_cm[cg][dim] - bcorner[dim_ind];
 +                            if (r > 0)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        /* Rounding gives at most a 16% reduction
 +                         * in communicated atoms
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            r = cg_cm[cg][dim0] - corner_round_0;
 +                            /* This is the first dimension, so always r >= 0 */
 +                            r2 += r*r;
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb2 += r*r;
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            r = cg_cm[cg][dim1] - corner_round_1[zone];
 +                            if (r > 0)
 +                            {
 +                                r2 += r*r;
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                r = cg_cm[cg][dim1] - bcorner_round_1;
 +                                if (r > 0)
 +                                {
 +                                    rb2 += r*r;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    else
 +                    {
 +                        /* Triclinic direction, more complicated */
 +                        clear_rvec(rn);
 +                        clear_rvec(rb);
 +                        /* Rounding, conservative as the skew_fac multiplication
 +                         * will slightly underestimate the distance.
 +                         */
 +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
 +                        {
 +                            rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
 +                            for(i=dim0+1; i<DIM; i++)
 +                            {
 +                                rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
 +                            }
 +                            r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim0] = rn[dim0];
 +                                rb2 = r2;
 +                            }
 +                            /* Take care that the cell planes along dim0 might not
 +                             * be orthogonal to those along dim1 and dim2.
 +                             */
 +                            for(i=1; i<=dim_ind; i++)
 +                            {
 +                                dimd = dd->dim[i];
 +                                if (normal[dim0][dimd] > 0)
 +                                {
 +                                    rn[dimd] -= rn[dim0]*normal[dim0][dimd];
 +                                    if (bDistMB_pulse)
 +                                    {
 +                                        rb[dimd] -= rb[dim0]*normal[dim0][dimd];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
 +                        {
 +                            rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
 +                            tric_sh = 0;
 +                            for(i=dim1+1; i<DIM; i++)
 +                            {
 +                                tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
 +                            }
 +                            rn[dim1] += tric_sh;
 +                            if (rn[dim1] > 0)
 +                            {
 +                                r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
 +                                /* Take care that the cell planes along dim1
 +                                 * might not be orthogonal to that along dim2.
 +                                 */
 +                                if (normal[dim1][dim2] > 0)
 +                                {
 +                                    rn[dim2] -= rn[dim1]*normal[dim1][dim2];
 +                                }
 +                            }
 +                            if (bDistMB_pulse)
 +                            {
 +                                rb[dim1] +=
 +                                    cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
 +                                if (rb[dim1] > 0)
 +                                {
 +                                    rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
 +                                    /* Take care of coupling of the distances
 +                                     * to the planes along dim0 and dim1 through dim2.
 +                                     */
 +                                    rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
 +                                    /* Take care that the cell planes along dim1
 +                                     * might not be orthogonal to that along dim2.
 +                                     */
 +                                    if (normal[dim1][dim2] > 0)
 +                                    {
 +                                        rb[dim2] -= rb[dim1]*normal[dim1][dim2];
 +                                    }
 +                                }
 +                            }
 +                        }
 +                        /* The distance along the communication direction */
 +                        rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
 +                        tric_sh = 0;
 +                        for(i=dim+1; i<DIM; i++)
 +                        {
 +                            tric_sh -= cg_cm[cg][i]*v_d[i][dim];
 +                        }
 +                        rn[dim] += tric_sh;
 +                        if (rn[dim] > 0)
 +                        {
 +                            r2 += rn[dim]*rn[dim]*skew_fac2_d;
 +                            /* Take care of coupling of the distances
 +                             * to the planes along dim0 and dim1 through dim2.
 +                             */
 +                            if (dim_ind == 1 && zonei == 1)
 +                            {
 +                                r2 -= rn[dim0]*rn[dim]*skew_fac_01;
 +                            }
 +                        }
 +                        if (bDistMB_pulse)
 +                        {
 +                            clear_rvec(rb);
 +                            rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
 +                            if (rb[dim] > 0)
 +                            {
 +                                rb2 += rb[dim]*rb[dim]*skew_fac2_d;
 +                                /* Take care of coupling of the distances
 +                                 * to the planes along dim0 and dim1 through dim2.
 +                                 */
 +                                if (dim_ind == 1 && zonei == 1)
 +                                {
 +                                    rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
 +                                }
 +                            }
 +                        }
 +                    }
 +                    
 +                    if (r2 < r_comm2 ||
 +                        (bDistBonded &&
 +                         ((bDistMB && rb2 < r_bcomm2) ||
 +                          (bDist2B && r2  < r_bcomm2)) &&
 +                         (!bBondComm ||
 +                          (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
 +                           missing_link(comm->cglink,index_gl[cg],
 +                                        comm->bLocalCG)))))
 +                    {
 +                        /* Make an index to the local charge groups */
 +                        if (nsend+1 > ind->nalloc)
 +                        {
 +                            ind->nalloc = over_alloc_large(nsend+1);
 +                            srenew(ind->index,ind->nalloc);
 +                        }
 +                        if (nsend+1 > comm->nalloc_int)
 +                        {
 +                            comm->nalloc_int = over_alloc_large(nsend+1);
 +                            srenew(comm->buf_int,comm->nalloc_int);
 +                        }
 +                        ind->index[nsend] = cg;
 +                        comm->buf_int[nsend] = index_gl[cg];
 +                        ind->nsend[zone]++;
 +                        vec_rvec_check_alloc(&comm->vbuf,nsend+1);
 +
 +                        if (dd->ci[dim] == 0)
 +                        {
 +                            /* Correct cg_cm for pbc */
 +                            rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
 +                            if (bScrew)
 +                            {
 +                                comm->vbuf.v[nsend][YY] =
 +                                    box[YY][YY]-comm->vbuf.v[nsend][YY];
 +                                comm->vbuf.v[nsend][ZZ] =
 +                                    box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
 +                            }
 +                        }
 +                        else
 +                        {
 +                            copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
 +                        }
 +                        nsend++;
 +                        nat += cgindex[cg+1] - cgindex[cg];
 +                    }
 +                }
 +            }
 +            /* Clear the counts in case we do not have pbc */
 +            for(zone=nzone_send; zone<nzone; zone++)
 +            {
 +                ind->nsend[zone] = 0;
 +            }
 +            ind->nsend[nzone]   = nsend;
 +            ind->nsend[nzone+1] = nat;
 +            /* Communicate the number of cg's and atoms to receive */
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            ind->nsend, nzone+2,
 +                            ind->nrecv, nzone+2);
 +            
 +            /* The rvec buffer is also required for atom buffers of size nsend
 +             * in dd_move_x and dd_move_f.
 +             */
 +            vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
 +
 +            if (p > 0)
 +            {
 +                /* We can receive in place if only the last zone is not empty */
 +                for(zone=0; zone<nzone-1; zone++)
 +                {
 +                    if (ind->nrecv[zone] > 0)
 +                    {
 +                        cd->bInPlace = FALSE;
 +                    }
 +                }
 +                if (!cd->bInPlace)
 +                {
 +                    /* The int buffer is only required here for the cg indices */
 +                    if (ind->nrecv[nzone] > comm->nalloc_int2)
 +                    {
 +                        comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
 +                        srenew(comm->buf_int2,comm->nalloc_int2);
 +                    }
 +                    /* The rvec buffer is also required for atom buffers
 +                     * of size nrecv in dd_move_x and dd_move_f.
 +                     */
 +                    i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
 +                    vec_rvec_check_alloc(&comm->vbuf2,i);
 +                }
 +            }
 +            
 +            /* Make space for the global cg indices */
 +            if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
 +                || dd->cg_nalloc == 0)
 +            {
 +                dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
 +                srenew(index_gl,dd->cg_nalloc);
 +                srenew(cgindex,dd->cg_nalloc+1);
 +            }
 +            /* Communicate the global cg indices */
 +            if (cd->bInPlace)
 +            {
 +                recv_i = index_gl + pos_cg;
 +            }
 +            else
 +            {
 +                recv_i = comm->buf_int2;
 +            }
 +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
 +                            comm->buf_int, nsend,
 +                            recv_i,        ind->nrecv[nzone]);
 +
 +            /* Make space for cg_cm */
 +            if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
 +            {
 +                dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
 +                cg_cm = fr->cg_cm;
 +            }
 +            /* Communicate cg_cm */
 +            if (cd->bInPlace)
 +            {
 +                recv_vr = cg_cm + pos_cg;
 +            }
 +            else
 +            {
 +                recv_vr = comm->vbuf2.v;
 +            }
 +            dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
 +                             comm->vbuf.v, nsend,
 +                             recv_vr,      ind->nrecv[nzone]);
 +            
 +            /* Make the charge group index */
 +            if (cd->bInPlace)
 +            {
 +                zone = (p == 0 ? 0 : nzone - 1);
 +                while (zone < nzone)
 +                {
 +                    for(cg=0; cg<ind->nrecv[zone]; cg++)
 +                    {
 +                        cg_gl = index_gl[pos_cg];
 +                        fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
 +                        nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
 +                        cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
 +                        if (bBondComm)
 +                        {
 +                            /* Update the charge group presence,
 +                             * so we can use it in the next pass of the loop.
 +                             */
 +                            comm->bLocalCG[cg_gl] = TRUE;
 +                        }
 +                        pos_cg++;
 +                    }
 +                    if (p == 0)
 +                    {
 +                        comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
 +                    }
 +                    zone++;
 +                    zone_cg_range[nzone+zone] = pos_cg;
 +                }
 +            }
 +            else
 +            {
 +                /* This part of the code is never executed with bBondComm. */
 +                merge_cg_buffers(nzone,cd,p,zone_cg_range,
 +                                 index_gl,recv_i,cg_cm,recv_vr,
 +                                 cgindex,fr->cginfo_mb,fr->cginfo);
 +                pos_cg += ind->nrecv[nzone];
 +            }
 +            nat_tot += ind->nrecv[nzone+1];
 +        }
 +        if (!cd->bInPlace)
 +        {
 +            /* Store the atom block for easy copying of communication buffers */
 +            make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
 +        }
 +        nzone += nzone;
 +    }
 +    dd->index_gl = index_gl;
 +    dd->cgindex  = cgindex;
 +    
 +    dd->ncg_tot = zone_cg_range[zones->n];
 +    dd->nat_tot = nat_tot;
 +    comm->nat[ddnatHOME] = dd->nat_home;
 +    for(i=ddnatZONE; i<ddnatNR; i++)
 +    {
 +        comm->nat[i] = dd->nat_tot;
 +    }
 +
 +    if (!bBondComm)
 +    {
 +        /* We don't need to update cginfo, since that was alrady done above.
 +         * So we pass NULL for the forcerec.
 +         */
 +        dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
 +                      NULL,comm->bLocalCG);
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"Finished setting up DD communication, zones:");
 +        for(c=0; c<zones->n; c++)
 +        {
 +            fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
 +        }
 +        fprintf(debug,"\n");
 +    }
 +}
 +
 +static void set_cg_boundaries(gmx_domdec_zones_t *zones)
 +{
 +    int c;
 +    
 +    for(c=0; c<zones->nizone; c++)
 +    {
 +        zones->izone[c].cg1  = zones->cg_range[c+1];
 +        zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
 +        zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
 +    }
 +}
 +
 +static int comp_cgsort(const void *a,const void *b)
 +{
 +    int comp;
 +    
 +    gmx_cgsort_t *cga,*cgb;
 +    cga = (gmx_cgsort_t *)a;
 +    cgb = (gmx_cgsort_t *)b;
 +    
 +    comp = cga->nsc - cgb->nsc;
 +    if (comp == 0)
 +    {
 +        comp = cga->ind_gl - cgb->ind_gl;
 +    }
 +    
 +    return comp;
 +}
 +
 +static void order_int_cg(int n,gmx_cgsort_t *sort,
 +                         int *a,int *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        buf[i] = a[sort[i].ind];
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        a[i] = buf[i];
 +    }
 +}
 +
 +static void order_vec_cg(int n,gmx_cgsort_t *sort,
 +                         rvec *v,rvec *buf)
 +{
 +    int i;
 +    
 +    /* Order the data */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(v[sort[i].ind],buf[i]);
 +    }
 +    
 +    /* Copy back to the original array */
 +    for(i=0; i<n; i++)
 +    {
 +        copy_rvec(buf[i],v[i]);
 +    }
 +}
 +
 +static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
 +                           rvec *v,rvec *buf)
 +{
 +    int a,atot,cg,cg0,cg1,i;
 +    
 +    /* Order the data */
 +    a = 0;
 +    for(cg=0; cg<ncg; cg++)
 +    {
 +        cg0 = cgindex[sort[cg].ind];
 +        cg1 = cgindex[sort[cg].ind+1];
 +        for(i=cg0; i<cg1; i++)
 +        {
 +            copy_rvec(v[i],buf[a]);
 +            a++;
 +        }
 +    }
 +    atot = a;
 +    
 +    /* Copy back to the original array */
 +    for(a=0; a<atot; a++)
 +    {
 +        copy_rvec(buf[a],v[a]);
 +    }
 +}
 +
 +static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
 +                         int nsort_new,gmx_cgsort_t *sort_new,
 +                         gmx_cgsort_t *sort1)
 +{
 +    int i1,i2,i_new;
 +    
 +    /* The new indices are not very ordered, so we qsort them */
 +    qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
 +    
 +    /* sort2 is already ordered, so now we can merge the two arrays */
 +    i1 = 0;
 +    i2 = 0;
 +    i_new = 0;
 +    while(i2 < nsort2 || i_new < nsort_new)
 +    {
 +        if (i2 == nsort2)
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +        else if (i_new == nsort_new)
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else if (sort2[i2].nsc < sort_new[i_new].nsc ||
 +                 (sort2[i2].nsc == sort_new[i_new].nsc &&
 +                  sort2[i2].ind_gl < sort_new[i_new].ind_gl))
 +        {
 +            sort1[i1++] = sort2[i2++];
 +        }
 +        else
 +        {
 +            sort1[i1++] = sort_new[i_new++];
 +        }
 +    }
 +}
 +
 +static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
 +                          rvec *cgcm,t_forcerec *fr,t_state *state,
 +                          int ncg_home_old)
 +{
 +    gmx_domdec_sort_t *sort;
 +    gmx_cgsort_t *cgsort,*sort_i;
 +    int  ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
 +    rvec *vbuf;
 +    
 +    sort = dd->comm->sort;
 +    
 +    if (dd->ncg_home > sort->sort_nalloc)
 +    {
 +        sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
 +        srenew(sort->sort1,sort->sort_nalloc);
 +        srenew(sort->sort2,sort->sort_nalloc);
 +    }
 +    
 +    if (ncg_home_old >= 0)
 +    {
 +        /* The charge groups that remained in the same ns grid cell
 +         * are completely ordered. So we can sort efficiently by sorting
 +         * the charge groups that did move into the stationary list.
 +         */
 +        ncg_new = 0;
 +        nsort2 = 0;
 +        nsort_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Check if this cg did not move to another node */
 +            cell_index = fr->ns.grid->cell_index[i];
 +            if (cell_index !=  4*fr->ns.grid->ncells)
 +            {
 +                if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
 +                {
 +                    /* This cg is new on this node or moved ns grid cell */
 +                    if (nsort_new >= sort->sort_new_nalloc)
 +                    {
 +                        sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
 +                        srenew(sort->sort_new,sort->sort_new_nalloc);
 +                    }
 +                    sort_i = &(sort->sort_new[nsort_new++]);
 +                }
 +                else
 +                {
 +                    /* This cg did not move */
 +                    sort_i = &(sort->sort2[nsort2++]);
 +                }
 +                /* Sort on the ns grid cell indices
 +                 * and the global topology index
 +                 */
 +                sort_i->nsc    = cell_index;
 +                sort_i->ind_gl = dd->index_gl[i];
 +                sort_i->ind    = i;
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
 +                    nsort2,nsort_new);
 +        }
 +        /* Sort efficiently */
 +        ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
 +    }
 +    else
 +    {
 +        cgsort = sort->sort1;
 +        ncg_new = 0;
 +        for(i=0; i<dd->ncg_home; i++)
 +        {
 +            /* Sort on the ns grid cell indices
 +             * and the global topology index
 +             */
 +            cgsort[i].nsc    = fr->ns.grid->cell_index[i];
 +            cgsort[i].ind_gl = dd->index_gl[i];
 +            cgsort[i].ind    = i;
 +            if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
 +            {
 +                ncg_new++;
 +            }
 +        }
 +        if (debug)
 +        {
 +            fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
 +        }
 +        /* Determine the order of the charge groups using qsort */
 +        qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
 +    }
 +    cgsort = sort->sort1;
 +    
 +    /* We alloc with the old size, since cgindex is still old */
 +    vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
 +    vbuf = dd->comm->vbuf.v;
 +    
 +    /* Remove the charge groups which are no longer at home here */
 +    dd->ncg_home = ncg_new;
 +    
 +    /* Reorder the state */
 +    for(i=0; i<estNR; i++)
 +    {
 +        if (EST_DISTR(i) && (state->flags & (1<<i)))
 +        {
 +            switch (i)
 +            {
 +            case estX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
 +                break;
 +            case estV:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
 +                break;
 +            case estSDX:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
 +                break;
 +            case estCGP:
 +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
 +                break;
 +            case estLD_RNG:
 +            case estLD_RNGI:
 +            case estDISRE_INITF:
 +            case estDISRE_RM3TAV:
 +            case estORIRE_INITF:
 +            case estORIRE_DTAV:
 +                /* No ordering required */
 +                break;
 +            default:
 +                gmx_incons("Unknown state entry encountered in dd_sort_state");
 +                break;
 +            }
 +        }
 +    }
 +    /* Reorder cgcm */
 +    order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
 +    
 +    if (dd->ncg_home+1 > sort->ibuf_nalloc)
 +    {
 +        sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
 +        srenew(sort->ibuf,sort->ibuf_nalloc);
 +    }
 +    ibuf = sort->ibuf;
 +    /* Reorder the global cg index */
 +    order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
 +    /* Reorder the cginfo */
 +    order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
 +    /* Rebuild the local cg index */
 +    ibuf[0] = 0;
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
 +        ibuf[i+1] = ibuf[i] + cgsize;
 +    }
 +    for(i=0; i<dd->ncg_home+1; i++)
 +    {
 +        dd->cgindex[i] = ibuf[i];
 +    }
 +    /* Set the home atom number */
 +    dd->nat_home = dd->cgindex[dd->ncg_home];
 +    
 +    /* Copy the sorted ns cell indices back to the ns grid struct */
 +    for(i=0; i<dd->ncg_home; i++)
 +    {
 +        fr->ns.grid->cell_index[i] = cgsort[i].nsc;
 +    }
 +    fr->ns.grid->nr = dd->ncg_home;
 +}
 +
 +static void add_dd_statistics(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +    
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] +=
 +            comm->nat[ddnat] - comm->nat[ddnat-1];
 +    }
 +    comm->ndecomp++;
 +}
 +
 +void reset_dd_statistics_counters(gmx_domdec_t *dd)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    
 +    comm = dd->comm;
 +
 +    /* Reset all the statistics and counters for total run counting */
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        comm->sum_nat[ddnat-ddnatZONE] = 0;
 +    }
 +    comm->ndecomp = 0;
 +    comm->nload = 0;
 +    comm->load_step = 0;
 +    comm->load_sum = 0;
 +    comm->load_max = 0;
 +    clear_ivec(comm->load_lim);
 +    comm->load_mdf = 0;
 +    comm->load_pme = 0;
 +}
 +
 +void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
 +{
 +    gmx_domdec_comm_t *comm;
 +    int ddnat;
 +    double av;
 +   
 +    comm = cr->dd->comm;
 +    
 +    gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
 +    
 +    if (fplog == NULL)
 +    {
 +        return;
 +    }
 +    
 +    fprintf(fplog,"\n    D O M A I N   D E C O M P O S I T I O N   S T A T I S T I C S\n\n");
 +            
 +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
 +    {
 +        av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
 +        switch(ddnat)
 +        {
 +        case ddnatZONE:
 +            fprintf(fplog,
 +                    " av. #atoms communicated per step for force:  %d x %.1f\n",
 +                    2,av);
 +            break;
 +        case ddnatVSITE:
 +            if (cr->dd->vsite_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for vsites: %d x %.1f\n",
 +                        (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
 +                        av);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (cr->dd->constraint_comm)
 +            {
 +                fprintf(fplog,
 +                        " av. #atoms communicated per step for LINCS:  %d x %.1f\n",
 +                        1 + ir->nLincsIter,av);
 +            }
 +            break;
 +        default:
 +            gmx_incons(" Unknown type for DD statistics");
 +        }
 +    }
 +    fprintf(fplog,"\n");
 +    
 +    if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
 +    {
 +        print_dd_load_av(fplog,cr->dd);
 +    }
 +}
 +
 +void dd_partition_system(FILE            *fplog,
 +                         gmx_large_int_t      step,
 +                         t_commrec       *cr,
 +                         gmx_bool            bMasterState,
 +                         int             nstglobalcomm,
 +                         t_state         *state_global,
 +                         gmx_mtop_t      *top_global,
 +                         t_inputrec      *ir,
 +                         t_state         *state_local,
 +                         rvec            **f,
 +                         t_mdatoms       *mdatoms,
 +                         gmx_localtop_t  *top_local,
 +                         t_forcerec      *fr,
 +                         gmx_vsite_t     *vsite,
 +                         gmx_shellfc_t   shellfc,
 +                         gmx_constr_t    constr,
 +                         t_nrnb          *nrnb,
 +                         gmx_wallcycle_t wcycle,
 +                         gmx_bool            bVerbose)
 +{
 +    gmx_domdec_t *dd;
 +    gmx_domdec_comm_t *comm;
 +    gmx_ddbox_t ddbox={0};
 +    t_block *cgs_gl;
 +    gmx_large_int_t step_pcoupl;
 +    rvec cell_ns_x0,cell_ns_x1;
 +    int  i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
 +    gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
 +    gmx_bool bRedist,bSortCG,bResortAll;
 +    ivec ncells_old,np;
 +    real grid_density;
 +    char sbuf[22];
 +      
 +    dd = cr->dd;
 +    comm = dd->comm;
 +
 +    bBoxChanged = (bMasterState || DEFORM(*ir));
 +    if (ir->epc != epcNO)
 +    {
 +        /* With nstpcouple > 1 pressure coupling happens.
 +         * one step after calculating the pressure.
 +         * Box scaling happens at the end of the MD step,
 +         * after the DD partitioning.
 +         * We therefore have to do DLB in the first partitioning
 +         * after an MD step where P-coupling occured.
 +         * We need to determine the last step in which p-coupling occurred.
 +         * MRS -- need to validate this for vv?
 +         */
 +        n = ir->nstpcouple;
 +        if (n == 1)
 +        {
 +            step_pcoupl = step - 1;
 +        }
 +        else
 +        {
 +            step_pcoupl = ((step - 1)/n)*n + 1;
 +        }
 +        if (step_pcoupl >= comm->globalcomm_step)
 +        {
 +            bBoxChanged = TRUE;
 +        }
 +    }
 +
 +    bNStGlobalComm = (step >= comm->globalcomm_step + nstglobalcomm);
 +
 +    if (!comm->bDynLoadBal)
 +    {
 +        bDoDLB = FALSE;
 +    }
 +    else
 +    {
 +        /* Should we do dynamic load balacing this step?
 +         * Since it requires (possibly expensive) global communication,
 +         * we might want to do DLB less frequently.
 +         */
 +        if (bBoxChanged || ir->epc != epcNO)
 +        {
 +            bDoDLB = bBoxChanged;
 +        }
 +        else
 +        {
 +            bDoDLB = bNStGlobalComm;
 +        }
 +    }
 +
 +    /* Check if we have recorded loads on the nodes */
 +    if (comm->bRecordLoad && dd_load_count(comm))
 +    {
 +        if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
 +        {
 +            /* Check if we should use DLB at the second partitioning
 +             * and every 100 partitionings,
 +             * so the extra communication cost is negligible.
 +             */
 +            n = max(100,nstglobalcomm);
 +            bCheckDLB = (comm->n_load_collect == 0 ||
 +                         comm->n_load_have % n == n-1);
 +        }
 +        else
 +        {
 +            bCheckDLB = FALSE;
 +        }
 +        
 +        /* Print load every nstlog, first and last step to the log file */
 +        bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
 +                    comm->n_load_collect == 0 ||
 +                    (ir->nsteps >= 0 &&
 +                     (step + ir->nstlist > ir->init_step + ir->nsteps)));
 +
 +        /* Avoid extra communication due to verbose screen output
 +         * when nstglobalcomm is set.
 +         */
 +        if (bDoDLB || bLogLoad || bCheckDLB ||
 +            (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
 +        {
 +            get_load_distribution(dd,wcycle);
 +            if (DDMASTER(dd))
 +            {
 +                if (bLogLoad)
 +                {
 +                    dd_print_load(fplog,dd,step-1);
 +                }
 +                if (bVerbose)
 +                {
 +                    dd_print_load_verbose(dd);
 +                }
 +            }
 +            comm->n_load_collect++;
 +
 +            if (bCheckDLB) {
 +                /* Since the timings are node dependent, the master decides */
 +                if (DDMASTER(dd))
 +                {
 +                    bTurnOnDLB =
 +                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
 +                    if (debug)
 +                    {
 +                        fprintf(debug,"step %s, imb loss %f\n",
 +                                gmx_step_str(step,sbuf),
 +                                dd_force_imb_perf_loss(dd));
 +                    }
 +                }
 +                dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
 +                if (bTurnOnDLB)
 +                {
 +                    turn_on_dlb(fplog,cr,step);
 +                    bDoDLB = TRUE;
 +                }
 +            }
 +        }
 +        comm->n_load_have++;
 +    }
 +
 +    cgs_gl = &comm->cgs_gl;
 +
 +    bRedist = FALSE;
 +    if (bMasterState)
 +    {
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_global->box,
 +                  TRUE,cgs_gl,state_global->x,&ddbox);
 +    
 +        get_cg_distribution(fplog,step,dd,cgs_gl,
 +                            state_global->box,&ddbox,state_global->x);
 +        
 +        dd_distribute_state(dd,cgs_gl,
 +                            state_global,state_local,f);
 +        
 +        dd_make_local_cgs(dd,&top_local->cgs);
 +        
 +        if (dd->ncg_home > fr->cg_nalloc)
 +        {
 +            dd_realloc_fr_cg(fr,dd->ncg_home);
 +        }
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +        
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        cg0 = 0;
 +    }
 +    else if (state_local->ddp_count != dd->ddp_count)
 +    {
 +        if (state_local->ddp_count > dd->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
 +        }
 +        
 +        if (state_local->ddp_count_cg_gl != state_local->ddp_count)
 +        {
 +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
 +        }
 +        
 +        /* Clear the old state */
 +        clear_dd_indices(dd,0,0);
 +        
 +        /* Build the new indices */
 +        rebuild_cgindex(dd,cgs_gl->index,state_local);
 +        make_dd_indices(dd,cgs_gl->index,0);
 +        
 +        /* Redetermine the cg COMs */
 +        calc_cgcm(fplog,0,dd->ncg_home,
 +                  &top_local->cgs,state_local->x,fr->cg_cm);
 +        
 +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
 +
 +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
 +
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  TRUE,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bRedist = comm->bDynLoadBal;
 +    }
 +    else
 +    {
 +        /* We have the full state, only redistribute the cgs */
 +
 +        /* Clear the non-home indices */
 +        clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
 +
 +        /* Avoid global communication for dim's without pbc and -gcom */
 +        if (!bNStGlobalComm)
 +        {
 +            copy_rvec(comm->box0    ,ddbox.box0    );
 +            copy_rvec(comm->box_size,ddbox.box_size);
 +        }
 +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
 +                  bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
 +
 +        bBoxChanged = TRUE;
 +        bRedist = TRUE;
 +    }
 +    /* For dim's without pbc and -gcom */
 +    copy_rvec(ddbox.box0    ,comm->box0    );
 +    copy_rvec(ddbox.box_size,comm->box_size);
 +    
 +    set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
 +                      step,wcycle);
 +    
 +    if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
 +    {
 +        write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
 +    }
 +    
 +    /* Check if we should sort the charge groups */
 +    if (comm->nstSortCG > 0)
 +    {
 +        bSortCG = (bMasterState ||
 +                   (bRedist && (step % comm->nstSortCG == 0)));
 +    }
 +    else
 +    {
 +        bSortCG = FALSE;
 +    }
 +
 +    ncg_home_old = dd->ncg_home;
 +
 +    if (bRedist)
 +    {
 +        cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
 +                                 state_local,f,fr,mdatoms,
 +                                 !bSortCG,nrnb);
 +    }
 +    
 +    get_nsgrid_boundaries(fr->ns.grid,dd,
 +                          state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
 +                          dd->ncg_home,fr->cg_cm,
 +                          cell_ns_x0,cell_ns_x1,&grid_density);
 +
 +    if (bBoxChanged)
 +    {
 +        comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
 +    }
 +
 +    copy_ivec(fr->ns.grid->n,ncells_old);
 +    grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
 +               state_local->box,cell_ns_x0,cell_ns_x1,
 +               fr->rlistlong,grid_density);
 +    /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
 +    copy_ivec(ddbox.tric_dir,comm->tric_dir);
 +
 +    if (bSortCG)
 +    {
 +        /* Sort the state on charge group position.
 +         * This enables exact restarts from this step.
 +         * It also improves performance by about 15% with larger numbers
 +         * of atoms per node.
 +         */
 +        
 +        /* Fill the ns grid with the home cell,
 +         * so we can sort with the indices.
 +         */
 +        set_zones_ncg_home(dd);
 +        fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
 +                  0,dd->ncg_home,fr->cg_cm);
 +        
 +        /* Check if we can user the old order and ns grid cell indices
 +         * of the charge groups to sort the charge groups efficiently.
 +         */
 +        bResortAll = (bMasterState ||
 +                      fr->ns.grid->n[XX] != ncells_old[XX] ||
 +                      fr->ns.grid->n[YY] != ncells_old[YY] ||
 +                      fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"Step %s, sorting the %d home charge groups\n",
 +                    gmx_step_str(step,sbuf),dd->ncg_home);
 +        }
 +        dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
 +                      bResortAll ? -1 : ncg_home_old);
 +        /* Rebuild all the indices */
 +        cg0 = 0;
 +        ga2la_clear(dd->ga2la);
 +    }
 +    
 +    /* Setup up the communication and communicate the coordinates */
 +    setup_dd_communication(dd,state_local->box,&ddbox,fr);
 +    
 +    /* Set the indices */
 +    make_dd_indices(dd,cgs_gl->index,cg0);
 +
 +    /* Set the charge group boundaries for neighbor searching */
 +    set_cg_boundaries(&comm->zones);
 +    
 +    /*
 +    write_dd_pdb("dd_home",step,"dump",top_global,cr,
 +                 -1,state_local->x,state_local->box);
 +    */
 +    
 +    /* Extract a local topology from the global topology */
 +    for(i=0; i<dd->ndim; i++)
 +    {
 +        np[dd->dim[i]] = comm->cd[i].np;
 +    }
 +    dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
 +                      comm->cellsize_min,np,
 +                      fr,vsite,top_global,top_local);
 +    
 +    /* Set up the special atom communication */
 +    n = comm->nat[ddnatZONE];
 +    for(i=ddnatZONE+1; i<ddnatNR; i++)
 +    {
 +        switch(i)
 +        {
 +        case ddnatVSITE:
 +            if (vsite && vsite->n_intercg_vsite)
 +            {
 +                n = dd_make_local_vsites(dd,n,top_local->idef.il);
 +            }
 +            break;
 +        case ddnatCON:
 +            if (dd->bInterCGcons)
 +            {
 +                /* Only for inter-cg constraints we need special code */
 +                n = dd_make_local_constraints(dd,n,top_global,
 +                                              constr,ir->nProjOrder,
 +                                              &top_local->idef.il[F_CONSTR]);
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown special atom type setup");
 +        }
 +        comm->nat[i] = n;
 +    }
 +    
 +    /* Make space for the extra coordinates for virtual site
 +     * or constraint communication.
 +     */
 +    state_local->natoms = comm->nat[ddnatNR-1];
 +    if (state_local->natoms > state_local->nalloc)
 +    {
 +        dd_realloc_state(state_local,f,state_local->natoms);
 +    }
 +
 +    if (fr->bF_NoVirSum)
 +    {
 +        if (vsite && vsite->n_intercg_vsite)
 +        {
 +            nat_f_novirsum = comm->nat[ddnatVSITE];
 +        }
 +        else
 +        {
 +            if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
 +            {
 +                nat_f_novirsum = dd->nat_tot;
 +            }
 +            else
 +            {
 +                nat_f_novirsum = dd->nat_home;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        nat_f_novirsum = 0;
 +    }
 +
 +    /* Set the number of atoms required for the force calculation.
 +     * Forces need to be constrained when using a twin-range setup
 +     * or with energy minimization. For simple simulations we could
 +     * avoid some allocation, zeroing and copying, but this is
 +     * probably not worth the complications ande checking.
 +     */
 +    forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
 +                        dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
 +
 +    /* We make the all mdatoms up to nat_tot_con.
 +     * We could save some work by only setting invmass
 +     * between nat_tot and nat_tot_con.
 +     */
 +    /* This call also sets the new number of home particles to dd->nat_home */
 +    atoms2md(top_global,ir,
 +             comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
 +
 +    /* Now we have the charges we can sort the FE interactions */
 +    dd_sort_local_top(dd,mdatoms,top_local);
 +
 +    if (shellfc)
 +    {
 +        /* Make the local shell stuff, currently no communication is done */
 +        make_local_shells(cr,mdatoms,shellfc);
 +    }
 +    
 +      if (ir->implicit_solvent)
 +    {
 +        make_local_gb(cr,fr->born,ir->gb_algorithm);
 +    }
 +      
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Send the charges to our PME only node */
 +        gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
 +                       mdatoms->chargeA,mdatoms->chargeB,
 +                       dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
 +    }
 +    
 +    if (constr)
 +    {
 +        set_constraints(constr,top_local,ir,mdatoms,cr);
 +    }
 +    
 +    if (ir->ePull != epullNO)
 +    {
 +        /* Update the local pull groups */
 +        dd_make_local_pull_groups(dd,ir->pull,mdatoms);
 +    }
 +    
 +    if (ir->bRot)
 +    {
 +        /* Update the local rotation groups */
 +        dd_make_local_rotation_groups(dd,ir->rot);
 +    }
 +
 +
 +    add_dd_statistics(dd);
 +    
 +    /* Make sure we only count the cycles for this DD partitioning */
 +    clear_dd_cycle_counts(dd);
 +    
 +    /* Because the order of the atoms might have changed since
 +     * the last vsite construction, we need to communicate the constructing
 +     * atom coordinates again (for spreading the forces this MD step).
 +     */
 +    dd_move_x_vsites(dd,state_local->box,state_local->x);
 +    
 +    if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
 +    {
 +        dd_move_x(dd,state_local->box,state_local->x);
 +        write_dd_pdb("dd_dump",step,"dump",top_global,cr,
 +                     -1,state_local->x,state_local->box);
 +    }
 +
 +    if (bNStGlobalComm)
 +    {
 +        /* Store the global communication step */
 +        comm->globalcomm_step = step;
 +    }
 +    
 +    /* Increase the DD partitioning counter */
 +    dd->ddp_count++;
 +    /* The state currently matches this DD partitioning count, store it */
 +    state_local->ddp_count = dd->ddp_count;
 +    if (bMasterState)
 +    {
 +        /* The DD master node knows the complete cg distribution,
 +         * store the count so we can possibly skip the cg info communication.
 +         */
 +        comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
 +    }
 +
 +    if (comm->DD_debug > 0)
 +    {
 +        /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
 +        check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
 +                                "after partitioning");
 +    }
 +}
Simple merge
Simple merge
index e7e168aebfaaf42c2a7a6a9c95f29dbdbd103d8f,0000000000000000000000000000000000000000..5238a20c3ce54b32ca1d6bedd2a3e6012df9b882
mode 100644,000000..100644
--- /dev/null
@@@ -1,2575 -1,0 +1,2575 @@@
- #include "time.h"
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <time.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "vec.h"
++#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "nrjac.h"
 +#include "mtop_util.h"
 +#include "edsam.h"
 +#include "gmxfio.h"
 +#include "groupcoord.h"
 +
 +
 +/* We use the same defines as in mvdata.c here */
 +#define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d),(cr))
 +#define nblock_bc(cr,nr,d) gmx_bcast((nr)*sizeof((d)[0]), (d),(cr))
 +#define   snew_bc(cr,d,nr) { if (!MASTER(cr)) snew((d),(nr)); }
 +
 +
 +/* enum to identify the type of ED: none, normal ED, flooding */
 +enum {eEDnone, eEDedsam, eEDflood, eEDnr};
 +
 +/* enum to identify operations on reference, average, origin, target structures */
 +enum {eedREF, eedAV, eedORI, eedTAR, eedNR};
 +
 +
 +typedef struct
 +{
 +    int    neig;     /* nr of eigenvectors             */
 +    int   *ieig;     /* index nrs of eigenvectors      */
 +    real  *stpsz;    /* stepsizes (per eigenvector)    */
 +    rvec  **vec;     /* eigenvector components         */
 +    real  *xproj;    /* instantaneous x projections    */
 +    real  *fproj;    /* instantaneous f projections    */
 +    real  radius;    /* instantaneous radius           */
 +    real  *refproj;  /* starting or target projecions  */
 +    /* When using flooding as harmonic restraint: The current reference projection
 +     * is at each step calculated from the initial refproj0 and the slope. */
 +    real  *refproj0,*refprojslope;
 +} t_eigvec;
 +
 +
 +typedef struct
 +{
 +    t_eigvec      mon;            /* only monitored, no constraints       */
 +    t_eigvec      linfix;         /* fixed linear constraints             */
 +    t_eigvec      linacc;         /* acceptance linear constraints        */
 +    t_eigvec      radfix;         /* fixed radial constraints (exp)       */
 +    t_eigvec      radacc;         /* acceptance radial constraints (exp)  */
 +    t_eigvec      radcon;         /* acceptance rad. contraction constr.  */
 +} t_edvecs;
 +
 +
 +typedef struct
 +{
 +    real deltaF0;
 +    gmx_bool bHarmonic;           /* Use flooding for harmonic restraint on
 +                                     the eigenvector                          */
 +    gmx_bool bConstForce;         /* Do not calculate a flooding potential,
 +                                     instead flood with a constant force      */
 +    real tau;
 +    real deltaF;
 +    real Efl;
 +    real kT;
 +    real Vfl;
 +    real dt;
 +    real constEfl;
 +    real alpha2;
 +    int flood_id;
 +    rvec *forces_cartesian;
 +    t_eigvec vecs;         /* use flooding for these */
 +} t_edflood;
 +
 +
 +/* This type is for the average, reference, target, and origin structure    */
 +typedef struct gmx_edx
 +{
 +    int           nr;             /* number of atoms this structure contains  */
 +    int           nr_loc;         /* number of atoms on local node            */
 +    int           *anrs;          /* atom index numbers                       */
 +    int           *anrs_loc;      /* local atom index numbers                 */
 +    int           nalloc_loc;     /* allocation size of anrs_loc              */
 +    int           *c_ind;         /* at which position of the whole anrs
 +                                   * array is a local atom?, i.e.
 +                                   * c_ind[0...nr_loc-1] gives the atom index
 +                                   * with respect to the collective
 +                                   * anrs[0...nr-1] array                     */
 +    rvec          *x;             /* positions for this structure             */
 +    rvec          *x_old;         /* used to keep track of the shift vectors
 +                                     such that the ED molecule can always be
 +                                     made whole in the parallel case          */
 +    real          *m;             /* masses                                   */
 +    real          mtot;           /* total mass (only used in sref)           */
 +    real          *sqrtm;         /* sqrt of the masses used for mass-
 +                                   * weighting of analysis (only used in sav) */
 +} t_gmx_edx;
 +
 +
 +typedef struct edpar
 +{
 +    int            nini;           /* total Nr of atoms                    */
 +    gmx_bool       fitmas;         /* true if trans fit with cm            */
 +    gmx_bool       pcamas;         /* true if mass-weighted PCA            */
 +    int            presteps;       /* number of steps to run without any
 +                                    *    perturbations ... just monitoring */
 +    int            outfrq;         /* freq (in steps) of writing to edo    */
 +    int            maxedsteps;     /* max nr of steps per cycle            */
 +
 +    /* all gmx_edx datasets are copied to all nodes in the parallel case   */
 +    struct gmx_edx sref;           /* reference positions, to these fitting
 +                                    * will be done                         */
 +    gmx_bool       bRefEqAv;       /* If true, reference & average indices
 +                                    * are the same. Used for optimization  */
 +    struct gmx_edx sav;            /* average positions                    */
 +    struct gmx_edx star;           /* target positions                     */
 +    struct gmx_edx sori;           /* origin positions                     */
 +
 +    t_edvecs       vecs;           /* eigenvectors                         */
 +    real           slope;          /* minimal slope in acceptance radexp   */
 +
 +    gmx_bool       bNeedDoEdsam;   /* if any of the options mon, linfix, ...
 +                                    * is used (i.e. apart from flooding)   */
 +    t_edflood      flood;          /* parameters especially for flooding   */
 +    struct t_ed_buffer *buf;       /* handle to local buffers              */
 +    struct edpar   *next_edi;      /* Pointer to another ed dataset        */
 +} t_edpar;
 +
 +
 +typedef struct gmx_edsam
 +{
 +    int           eEDtype;        /* Type of ED: see enums above          */
 +    const char    *edinam;        /* name of ED sampling input file       */
 +    const char    *edonam;        /*                     output           */
 +    FILE          *edo;           /* output file pointer                  */
 +    t_edpar       *edpar;
 +    gmx_bool      bFirst;
 +    gmx_bool      bStartFromCpt;
 +} t_gmx_edsam;
 +
 +
 +struct t_do_edsam
 +{
 +    matrix old_rotmat;
 +    real oldrad;
 +    rvec old_transvec,older_transvec,transvec_compact;
 +    rvec *xcoll;         /* Positions from all nodes, this is the
 +                            collective set we work on.
 +                            These are the positions of atoms with
 +                            average structure indices */
 +    rvec *xc_ref;        /* same but with reference structure indices */
 +    ivec *shifts_xcoll;        /* Shifts for xcoll  */
 +    ivec *extra_shifts_xcoll;  /* xcoll shift changes since last NS step */
 +    ivec *shifts_xc_ref;       /* Shifts for xc_ref */
 +    ivec *extra_shifts_xc_ref; /* xc_ref shift changes since last NS step */
 +    gmx_bool bUpdateShifts;    /* TRUE in NS steps to indicate that the
 +                                  ED shifts for this ED dataset need to
 +                                  be updated */
 +};
 +
 +
 +/* definition of ED buffer structure */
 +struct t_ed_buffer
 +{
 +    struct t_fit_to_ref *           fit_to_ref;
 +    struct t_do_edfit *             do_edfit;
 +    struct t_do_edsam *             do_edsam;
 +    struct t_do_radcon *            do_radcon;
 +};
 +
 +
 +/* Function declarations */
 +static void fit_to_reference(rvec *xcoll,rvec transvec,matrix rotmat,t_edpar *edi);
 +
 +static void translate_and_rotate(rvec *x,int nat,rvec transvec,matrix rotmat);
 +/* End function declarations */
 +
 +
 +/* Does not subtract average positions, projection on single eigenvector is returned
 + * used by: do_linfix, do_linacc, do_radfix, do_radacc, do_radcon
 + * Average position is subtracted in ed_apply_constraints prior to calling projectx
 + */
 +static real projectx(t_edpar *edi, rvec *xcoll, rvec *vec)
 +{
 +    int  i;
 +    real proj=0.0;
 +
 +
 +    for (i=0; i<edi->sav.nr; i++)
 +        proj += edi->sav.sqrtm[i]*iprod(vec[i], xcoll[i]);
 +
 +    return proj;
 +}
 +
 +
 +/* Specialized: projection is stored in vec->refproj
 + * -> used for radacc, radfix, radcon  and center of flooding potential
 + * subtracts average positions, projects vector x */
 +static void rad_project(t_edpar *edi, rvec *x, t_eigvec *vec, t_commrec *cr)
 +{
 +    int i;
 +    real rad=0.0;
 +
 +    /* Subtract average positions */
 +    for (i = 0; i < edi->sav.nr; i++)
 +        rvec_dec(x[i], edi->sav.x[i]);
 +
 +    for (i = 0; i < vec->neig; i++)
 +    {
 +        vec->refproj[i] = projectx(edi,x,vec->vec[i]);
 +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
 +    }
 +    vec->radius=sqrt(rad);
 +
 +    /* Add average positions */
 +    for (i = 0; i < edi->sav.nr; i++)
 +        rvec_inc(x[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Project vector x, subtract average positions prior to projection and add
 + * them afterwards to retain the unchanged vector. Store in xproj. Mass-weighting
 + * is applied. */
 +static void project_to_eigvectors(rvec       *x,    /* The positions to project to an eigenvector */
 +                                  t_eigvec   *vec,  /* The eigenvectors */
 +                                  t_edpar    *edi)
 +{
 +    int  i;
 +
 +
 +    if (!vec->neig) return;
 +
 +    /* Subtract average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_dec(x[i], edi->sav.x[i]);
 +
 +    for (i=0; i<vec->neig; i++)
 +        vec->xproj[i] = projectx(edi, x, vec->vec[i]);
 +
 +    /* Add average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_inc(x[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Project vector x onto all edi->vecs (mon, linfix,...) */
 +static void project(rvec      *x,     /* positions to project */
 +                    t_edpar   *edi)   /* edi data set */
 +{
 +    /* It is not more work to subtract the average position in every
 +     * subroutine again, because these routines are rarely used simultanely */
 +    project_to_eigvectors(x, &edi->vecs.mon   , edi);
 +    project_to_eigvectors(x, &edi->vecs.linfix, edi);
 +    project_to_eigvectors(x, &edi->vecs.linacc, edi);
 +    project_to_eigvectors(x, &edi->vecs.radfix, edi);
 +    project_to_eigvectors(x, &edi->vecs.radacc, edi);
 +    project_to_eigvectors(x, &edi->vecs.radcon, edi);
 +}
 +
 +
 +static real calc_radius(t_eigvec *vec)
 +{
 +    int i;
 +    real rad=0.0;
 +
 +
 +    for (i=0; i<vec->neig; i++)
 +        rad += pow((vec->refproj[i]-vec->xproj[i]),2);
 +
 +    return rad=sqrt(rad);
 +}
 +
 +
 +/* Debug helper */
 +#ifdef DEBUGHELPERS
 +static void dump_xcoll(t_edpar *edi, struct t_do_edsam *buf, t_commrec *cr,
 +                       int step)
 +{
 +    int i;
 +    FILE *fp;
 +    char fn[STRLEN];
 +    rvec *xcoll;
 +    ivec *shifts, *eshifts;
 +
 +
 +    if (!MASTER(cr))
 +        return;
 +
 +    xcoll   = buf->xcoll;
 +    shifts  = buf->shifts_xcoll;
 +    eshifts = buf->extra_shifts_xcoll;
 +
 +    sprintf(fn, "xcolldump_step%d.txt", step);
 +    fp = fopen(fn, "w");
 +
 +    for (i=0; i<edi->sav.nr; i++)
 +        fprintf(fp, "%d %9.5f %9.5f %9.5f   %d %d %d   %d %d %d\n",
 +                edi->sav.anrs[i]+1,
 +                xcoll[i][XX]  , xcoll[i][YY]  , xcoll[i][ZZ],
 +                shifts[i][XX] , shifts[i][YY] , shifts[i][ZZ],
 +                eshifts[i][XX], eshifts[i][YY], eshifts[i][ZZ]);
 +
 +    fclose(fp);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi_positions(FILE *out, struct gmx_edx *s, const char name[])
 +{
 +    int i;
 +
 +
 +    fprintf(out, "#%s positions:\n%d\n", name, s->nr);
 +    if (s->nr == 0)
 +        return;
 +
 +    fprintf(out, "#index, x, y, z");
 +    if (s->sqrtm)
 +        fprintf(out, ", sqrt(m)");
 +    for (i=0; i<s->nr; i++)
 +    {
 +        fprintf(out, "\n%6d  %11.6f %11.6f %11.6f",s->anrs[i], s->x[i][XX], s->x[i][YY], s->x[i][ZZ]);
 +        if (s->sqrtm)
 +            fprintf(out,"%9.3f",s->sqrtm[i]);
 +    }
 +    fprintf(out, "\n");
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi_eigenvecs(FILE *out, t_eigvec *ev,
 +                               const char name[], int length)
 +{
 +    int i,j;
 +
 +
 +    fprintf(out, "#%s eigenvectors:\n%d\n", name, ev->neig);
 +    /* Dump the data for every eigenvector: */
 +    for (i=0; i<ev->neig; i++)
 +    {
 +        fprintf(out, "EV %4d\ncomponents %d\nstepsize %f\nxproj %f\nfproj %f\nrefproj %f\nradius %f\nComponents:\n",
 +                ev->ieig[i], length, ev->stpsz[i], ev->xproj[i], ev->fproj[i], ev->refproj[i], ev->radius);
 +        for (j=0; j<length; j++)
 +            fprintf(out, "%11.6f %11.6f %11.6f\n", ev->vec[i][j][XX], ev->vec[i][j][YY], ev->vec[i][j][ZZ]);
 +    }
 +}
 +
 +
 +/* Debug helper */
 +static void dump_edi(t_edpar *edpars, t_commrec *cr, int nr_edi)
 +{
 +    FILE  *out;
 +    char  fn[STRLEN];
 +
 +
 +    sprintf(fn, "EDdump_node%d_edi%d", cr->nodeid, nr_edi);
 +    out = ffopen(fn, "w");
 +
 +    fprintf(out,"#NINI\n %d\n#FITMAS\n %d\n#ANALYSIS_MAS\n %d\n",
 +            edpars->nini,edpars->fitmas,edpars->pcamas);
 +    fprintf(out,"#OUTFRQ\n %d\n#MAXLEN\n %d\n#SLOPECRIT\n %f\n",
 +            edpars->outfrq,edpars->maxedsteps,edpars->slope);
 +    fprintf(out,"#PRESTEPS\n %d\n#DELTA_F0\n %f\n#TAU\n %f\n#EFL_NULL\n %f\n#ALPHA2\n %f\n",
 +            edpars->presteps,edpars->flood.deltaF0,edpars->flood.tau,
 +            edpars->flood.constEfl,edpars->flood.alpha2);
 +
 +    /* Dump reference, average, target, origin positions */
 +    dump_edi_positions(out, &edpars->sref, "REFERENCE");
 +    dump_edi_positions(out, &edpars->sav , "AVERAGE"  );
 +    dump_edi_positions(out, &edpars->star, "TARGET"   );
 +    dump_edi_positions(out, &edpars->sori, "ORIGIN"   );
 +
 +    /* Dump eigenvectors */
 +    dump_edi_eigenvecs(out, &edpars->vecs.mon   , "MONITORED", edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.linfix, "LINFIX"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.linacc, "LINACC"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radfix, "RADFIX"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radacc, "RADACC"   , edpars->sav.nr);
 +    dump_edi_eigenvecs(out, &edpars->vecs.radcon, "RADCON"   , edpars->sav.nr);
 +
 +    /* Dump flooding eigenvectors */
 +    dump_edi_eigenvecs(out, &edpars->flood.vecs, "FLOODING"  , edpars->sav.nr);
 +
 +    /* Dump ed local buffer */
 +    fprintf(out, "buf->do_edfit         =%p\n", (void*)edpars->buf->do_edfit  );
 +    fprintf(out, "buf->do_edsam         =%p\n", (void*)edpars->buf->do_edsam  );
 +    fprintf(out, "buf->do_radcon        =%p\n", (void*)edpars->buf->do_radcon );
 +
 +    ffclose(out);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_rotmat(FILE* out,matrix rotmat)
 +{
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[XX][XX],rotmat[XX][YY],rotmat[XX][ZZ]);
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[YY][XX],rotmat[YY][YY],rotmat[YY][ZZ]);
 +    fprintf(out,"ROTMAT: %12.8f %12.8f %12.8f\n",rotmat[ZZ][XX],rotmat[ZZ][YY],rotmat[ZZ][ZZ]);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_rvec(FILE *out, int dim, rvec *x)
 +{
 +    int i;
 +
 +
 +    for (i=0; i<dim; i++)
 +        fprintf(out,"%4d   %f %f %f\n",i,x[i][XX],x[i][YY],x[i][ZZ]);
 +}
 +
 +
 +/* Debug helper */
 +static void dump_mat(FILE* out, int dim, double** mat)
 +{
 +    int i,j;
 +
 +
 +    fprintf(out,"MATRIX:\n");
 +    for (i=0;i<dim;i++)
 +    {
 +        for (j=0;j<dim;j++)
 +            fprintf(out,"%f ",mat[i][j]);
 +        fprintf(out,"\n");
 +    }
 +}
 +#endif
 +
 +
 +struct t_do_edfit {
 +    double **omega;
 +    double **om;
 +};
 +
 +static void do_edfit(int natoms,rvec *xp,rvec *x,matrix R,t_edpar *edi)
 +{
 +    /* this is a copy of do_fit with some modifications */
 +    int    c,r,n,j,i,irot;
 +    double d[6],xnr,xpc;
 +    matrix vh,vk,u;
 +    int    index;
 +    real   max_d;
 +
 +    struct t_do_edfit *loc;
 +    gmx_bool bFirst;
 +
 +    if(edi->buf->do_edfit != NULL)
 +        bFirst = FALSE;
 +    else
 +    {
 +        bFirst = TRUE;
 +        snew(edi->buf->do_edfit,1);
 +    }
 +    loc = edi->buf->do_edfit;
 +
 +    if (bFirst)
 +    {
 +        snew(loc->omega,2*DIM);
 +        snew(loc->om,2*DIM);
 +        for(i=0; i<2*DIM; i++)
 +        {
 +            snew(loc->omega[i],2*DIM);
 +            snew(loc->om[i],2*DIM);
 +        }
 +    }
 +
 +    for(i=0;(i<6);i++)
 +    {
 +        d[i]=0;
 +        for(j=0;(j<6);j++)
 +        {
 +            loc->omega[i][j]=0;
 +            loc->om[i][j]=0;
 +        }
 +    }
 +
 +    /* calculate the matrix U */
 +    clear_mat(u);
 +    for(n=0;(n<natoms);n++)
 +    {
 +        for(c=0; (c<DIM); c++)
 +        {
 +            xpc=xp[n][c];
 +            for(r=0; (r<DIM); r++)
 +            {
 +                xnr=x[n][r];
 +                u[c][r]+=xnr*xpc;
 +            }
 +        }
 +    }
 +
 +    /* construct loc->omega */
 +    /* loc->omega is symmetric -> loc->omega==loc->omega' */
 +    for(r=0;(r<6);r++)
 +        for(c=0;(c<=r);c++)
 +            if ((r>=3) && (c<3))
 +            {
 +                loc->omega[r][c]=u[r-3][c];
 +                loc->omega[c][r]=u[r-3][c];
 +            }
 +            else
 +            {
 +                loc->omega[r][c]=0;
 +                loc->omega[c][r]=0;
 +            }
 +
 +    /* determine h and k */
 +#ifdef DEBUG
 +    {
 +        int i;
 +        dump_mat(stderr,2*DIM,loc->omega);
 +        for (i=0; i<6; i++)
 +            fprintf(stderr,"d[%d] = %f\n",i,d[i]);
 +    }
 +#endif
 +    jacobi(loc->omega,6,d,loc->om,&irot);
 +
 +    if (irot==0)
 +        fprintf(stderr,"IROT=0\n");
 +
 +    index=0; /* For the compiler only */
 +
 +    for(j=0;(j<3);j++)
 +    {
 +        max_d=-1000;
 +        for(i=0;(i<6);i++)
 +            if (d[i]>max_d)
 +            {
 +                max_d=d[i];
 +                index=i;
 +            }
 +        d[index]=-10000;
 +        for(i=0;(i<3);i++)
 +        {
 +            vh[j][i]=M_SQRT2*loc->om[i][index];
 +            vk[j][i]=M_SQRT2*loc->om[i+DIM][index];
 +        }
 +    }
 +
 +    /* determine R */
 +    for(c=0;(c<3);c++)
 +        for(r=0;(r<3);r++)
 +            R[c][r]=vk[0][r]*vh[0][c]+
 +            vk[1][r]*vh[1][c]+
 +            vk[2][r]*vh[2][c];
 +    if (det(R) < 0)
 +        for(c=0;(c<3);c++)
 +            for(r=0;(r<3);r++)
 +                R[c][r]=vk[0][r]*vh[0][c]+
 +                vk[1][r]*vh[1][c]-
 +                vk[2][r]*vh[2][c];
 +}
 +
 +
 +static void rmfit(int nat, rvec *xcoll, rvec transvec, matrix rotmat)
 +{
 +    rvec vec;
 +    matrix tmat;
 +
 +
 +    /* Remove rotation.
 +     * The inverse rotation is described by the transposed rotation matrix */
 +    transpose(rotmat,tmat);
 +    rotate_x(xcoll, nat, tmat);
 +
 +    /* Remove translation */
 +    vec[XX]=-transvec[XX];
 +    vec[YY]=-transvec[YY];
 +    vec[ZZ]=-transvec[ZZ];
 +    translate_x(xcoll, nat, vec);
 +}
 +
 +
 +/**********************************************************************************
 + ******************** FLOODING ****************************************************
 + **********************************************************************************
 +
 +The flooding ability was added later to edsam. Many of the edsam functionality could be reused for that purpose.
 +The flooding covariance matrix, i.e. the selected eigenvectors and their corresponding eigenvalues are
 +read as 7th Component Group. The eigenvalues are coded into the stepsize parameter (as used by -linfix or -linacc).
 +
 +do_md clls right in the beginning the function init_edsam, which reads the edi file, saves all the necessary information in
 +the edi structure and calls init_flood, to initialise some extra fields in the edi->flood structure.
 +
 +since the flooding acts on forces do_flood is called from the function force() (force.c), while the other
 +edsam functionality is hooked into md via the update() (update.c) function acting as constraint on positions.
 +
 +do_flood makes a copy of the positions,
 +fits them, projects them computes flooding_energy, and flooding forces. The forces are computed in the
 +space of the eigenvectors and are then blown up to the full cartesian space and rotated back to remove the
 +fit. Then do_flood adds these forces to the forcefield-forces
 +(given as parameter) and updates the adaptive flooding parameters Efl and deltaF.
 +
 +To center the flooding potential at a different location one can use the -ori option in make_edi. The ori
 +structure is projected to the system of eigenvectors and then this position in the subspace is used as
 +center of the flooding potential.   If the option is not used, the center will be zero in the subspace,
 +i.e. the average structure as given in the make_edi file.
 +
 +To use the flooding potential as restraint, make_edi has the option -restrain, which leads to inverted
 +signs of alpha2 and Efl, such that the sign in the exponential of Vfl is not inverted but the sign of
 +Vfl is inverted. Vfl = Efl * exp (- .../Efl/alpha2*x^2...) With tau>0 the negative Efl will grow slowly
 +so that the restraint is switched off slowly. When Efl==0 and inverted flooding is ON is reached no
 + further adaption is applied, Efl will stay constant at zero.
 +
 +To use restraints with harmonic potentials switch -restrain and -harmonic. Then the eigenvalues are
 +used as spring constants for the harmonic potential.
 +Note that eq3 in the flooding paper (J. Comp. Chem. 2006, 27, 1693-1702) defines the parameter lambda \
 +as the inverse of the spring constant, whereas the implementation uses lambda as the spring constant.
 +
 +To use more than one flooding matrix just concatenate several .edi files (cat flood1.edi flood2.edi > flood_all.edi)
 +the routine read_edi_file reads all of theses flooding files.
 +The structure t_edi is now organized as a list of t_edis and the function do_flood cycles through the list
 +calling the do_single_flood() routine for every single entry. Since every state variables have been kept in one
 +edi there is no interdependence whatsoever. The forces are added together.
 +
 +  To write energies into the .edr file, call the function
 +        get_flood_enx_names(char**, int *nnames) to get the Header (Vfl1 Vfl2... Vfln)
 +and call
 +        get_flood_energies(real Vfl[],int nnames);
 +
 +  TODO:
 +- one could program the whole thing such that Efl, Vfl and deltaF is written to the .edr file. -- i dont know how to do that, yet.
 +
 +  Maybe one should give a range of atoms for which to remove motion, so that motion is removed with
 +  two edsam files from two peptide chains
 +*/
 +
 +static void write_edo_flood(t_edpar *edi, FILE *fp, gmx_large_int_t step)
 +{
 +    int i;
 +    char buf[22];
 +    gmx_bool bOutputRef=FALSE;
 +
 +
 +    fprintf(fp,"%d.th FL: %s %12.5e %12.5e %12.5e\n",
 +            edi->flood.flood_id, gmx_step_str(step,buf),
 +            edi->flood.Efl, edi->flood.Vfl, edi->flood.deltaF);
 +
 +
 +    /* Check whether any of the references changes with time (this can happen
 +     * in case flooding is used as harmonic restraint). If so, output all the
 +     * current reference projections. */
 +    if (edi->flood.bHarmonic)
 +    {
 +        for (i = 0; i < edi->flood.vecs.neig; i++)
 +        {
 +            if (edi->flood.vecs.refprojslope[i] != 0.0)
 +                bOutputRef=TRUE;
 +        }
 +        if (bOutputRef)
 +        {
 +            fprintf(fp, "Ref. projs.: ");
 +            for (i = 0; i < edi->flood.vecs.neig; i++)
 +            {
 +                fprintf(fp, "%12.5e ", edi->flood.vecs.refproj[i]);
 +            }
 +            fprintf(fp, "\n");
 +        }
 +    }
 +    fprintf(fp,"FL_FORCES: ");
 +
 +    for (i=0; i<edi->flood.vecs.neig; i++)
 +        fprintf(fp," %12.5e",edi->flood.vecs.fproj[i]);
 +
 +    fprintf(fp,"\n");
 +}
 +
 +
 +/* From flood.xproj compute the Vfl(x) at this point */
 +static real flood_energy(t_edpar *edi, gmx_large_int_t step)
 +{
 +    /* compute flooding energy Vfl
 +     Vfl = Efl * exp( - \frac {kT} {2Efl alpha^2} * sum_i { \lambda_i c_i^2 } )
 +     \lambda_i is the reciprocal eigenvalue 1/\sigma_i
 +         it is already computed by make_edi and stored in stpsz[i]
 +     bHarmonic:
 +       Vfl = - Efl * 1/2(sum _i {\frac 1{\lambda_i} c_i^2})
 +     */
 +    real sum;
 +    real Vfl;
 +    int i;
 +
 +
 +    /* Each time this routine is called (i.e. each time step), we add a small
 +     * value to the reference projection. This way a harmonic restraint towards
 +     * a moving reference is realized. If no value for the additive constant
 +     * is provided in the edi file, the reference will not change. */
 +    if (edi->flood.bHarmonic)
 +    {
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i] + step * edi->flood.vecs.refprojslope[i];
 +        }
 +    }
 +
 +    sum=0.0;
 +    /* Compute sum which will be the exponent of the exponential */
 +    for (i=0; i<edi->flood.vecs.neig; i++)
 +    {
 +        /* stpsz stores the reciprocal eigenvalue 1/sigma_i */
 +        sum += edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i])*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
 +    }
 +
 +    /* Compute the Gauss function*/
 +    if (edi->flood.bHarmonic)
 +    {
 +        Vfl = -0.5*edi->flood.Efl*sum;  /* minus sign because Efl is negative, if restrain is on. */
 +    }
 +    else
 +    {
 +        Vfl = edi->flood.Efl!=0 ? edi->flood.Efl*exp(-edi->flood.kT/2/edi->flood.Efl/edi->flood.alpha2*sum) :0;
 +    }
 +
 +    return Vfl;
 +}
 +
 +
 +/* From the position and from Vfl compute forces in subspace -> store in edi->vec.flood.fproj */
 +static void flood_forces(t_edpar *edi)
 +{
 +    /* compute the forces in the subspace of the flooding eigenvectors
 +     * by the formula F_i= V_{fl}(c) * ( \frac {kT} {E_{fl}} \lambda_i c_i */
 +
 +    int i;
 +    real energy=edi->flood.Vfl;
 +
 +
 +    if (edi->flood.bHarmonic)
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            edi->flood.vecs.fproj[i] = edi->flood.Efl* edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]);
 +        }
 +    else
 +        for (i=0; i<edi->flood.vecs.neig; i++)
 +        {
 +            /* if Efl is zero the forces are zero if not use the formula */
 +            edi->flood.vecs.fproj[i] = edi->flood.Efl!=0 ? edi->flood.kT/edi->flood.Efl/edi->flood.alpha2*energy*edi->flood.vecs.stpsz[i]*(edi->flood.vecs.xproj[i]-edi->flood.vecs.refproj[i]) : 0;
 +        }
 +}
 +
 +
 +/* Raise forces from subspace into cartesian space */
 +static void flood_blowup(t_edpar *edi, rvec *forces_cart)
 +{
 +    /* this function lifts the forces from the subspace to the cartesian space
 +     all the values not contained in the subspace are assumed to be zero and then
 +     a coordinate transformation from eigenvector to cartesian vectors is performed
 +     The nonexistent values don't have to be set to zero explicitly, they would occur
 +     as zero valued summands, hence we just stop to compute this part of the sum.
 +
 +     for every atom we add all the contributions to this atom from all the different eigenvectors.
 +
 +     NOTE: one could add directly to the forcefield forces, would mean we wouldn't have to clear the
 +     field forces_cart prior the computation, but we compute the forces separately
 +     to have them accessible for diagnostics
 +     */
 +    int  j,eig;
 +    rvec dum;
 +    real *forces_sub;
 +
 +
 +    forces_sub = edi->flood.vecs.fproj;
 +
 +
 +    /* Calculate the cartesian forces for the local atoms */
 +
 +    /* Clear forces first */
 +    for (j=0; j<edi->sav.nr_loc; j++)
 +        clear_rvec(forces_cart[j]);
 +
 +    /* Now compute atomwise */
 +    for (j=0; j<edi->sav.nr_loc; j++)
 +    {
 +        /* Compute forces_cart[edi->sav.anrs[j]] */
 +        for (eig=0; eig<edi->flood.vecs.neig; eig++)
 +        {
 +            /* Force vector is force * eigenvector (compute only atom j) */
 +            svmul(forces_sub[eig],edi->flood.vecs.vec[eig][edi->sav.c_ind[j]],dum);
 +            /* Add this vector to the cartesian forces */
 +            rvec_inc(forces_cart[j],dum);
 +        }
 +    }
 +}
 +
 +
 +/* Update the values of Efl, deltaF depending on tau and Vfl */
 +static void update_adaption(t_edpar *edi)
 +{
 +    /* this function updates the parameter Efl and deltaF according to the rules given in
 +     * 'predicting unimolecular chemical reactions: chemical flooding' M Mueller et al,
 +     * J. chem Phys. */
 +
 +    if ((edi->flood.tau < 0 ? -edi->flood.tau : edi->flood.tau ) > 0.00000001)
 +    {
 +        edi->flood.Efl = edi->flood.Efl+edi->flood.dt/edi->flood.tau*(edi->flood.deltaF0-edi->flood.deltaF);
 +        /* check if restrain (inverted flooding) -> don't let EFL become positive */
 +        if (edi->flood.alpha2<0 && edi->flood.Efl>-0.00000001)
 +            edi->flood.Efl = 0;
 +
 +        edi->flood.deltaF = (1-edi->flood.dt/edi->flood.tau)*edi->flood.deltaF+edi->flood.dt/edi->flood.tau*edi->flood.Vfl;
 +    }
 +}
 +
 +
 +static void do_single_flood(
 +        FILE *edo,
 +        rvec x[],
 +        rvec force[],
 +        t_edpar *edi,
 +        gmx_large_int_t step,
 +        matrix box,
 +        t_commrec *cr)
 +{
 +    int i;
 +    matrix  rotmat;         /* rotation matrix */
 +    matrix  tmat;           /* inverse rotation */
 +    rvec    transvec;       /* translation vector */
 +    struct t_do_edsam *buf;
 +
 +
 +    buf=edi->buf->do_edsam;
 +
 +    /* Broadcast the positions of the AVERAGE structure such that they are known on
 +     * every processor. Each node contributes its local positions x and stores them in
 +     * the collective ED array buf->xcoll */
 +    communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, buf->bUpdateShifts, x,
 +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old, box);
 +
 +    /* Only assembly REFERENCE positions if their indices differ from the average ones */
 +    if (!edi->bRefEqAv)
 +        communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, buf->bUpdateShifts, x,
 +                edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
 +
 +    /* If bUpdateShifts was TRUE, the shifts have just been updated in get_positions.
 +     * We do not need to update the shifts until the next NS step */
 +    buf->bUpdateShifts = FALSE;
 +
 +    /* Now all nodes have all of the ED/flooding positions in edi->sav->xcoll,
 +     * as well as the indices in edi->sav.anrs */
 +
 +    /* Fit the reference indices to the reference structure */
 +    if (edi->bRefEqAv)
 +        fit_to_reference(buf->xcoll , transvec, rotmat, edi);
 +    else
 +        fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
 +
 +    /* Now apply the translation and rotation to the ED structure */
 +    translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
 +
 +    /* Project fitted structure onto supbspace -> store in edi->flood.vecs.xproj */
 +    project_to_eigvectors(buf->xcoll,&edi->flood.vecs,edi);
 +
 +    if (FALSE == edi->flood.bConstForce)
 +    {
 +        /* Compute Vfl(x) from flood.xproj */
 +        edi->flood.Vfl = flood_energy(edi, step);
 +
 +        update_adaption(edi);
 +
 +        /* Compute the flooding forces */
 +        flood_forces(edi);
 +    }
 +
 +    /* Translate them into cartesian positions */
 +    flood_blowup(edi, edi->flood.forces_cartesian);
 +
 +    /* Rotate forces back so that they correspond to the given structure and not to the fitted one */
 +    /* Each node rotates back its local forces */
 +    transpose(rotmat,tmat);
 +    rotate_x(edi->flood.forces_cartesian, edi->sav.nr_loc, tmat);
 +
 +    /* Finally add forces to the main force variable */
 +    for (i=0; i<edi->sav.nr_loc; i++)
 +        rvec_inc(force[edi->sav.anrs_loc[i]],edi->flood.forces_cartesian[i]);
 +
 +    /* Output is written by the master process */
 +    if (do_per_step(step,edi->outfrq) && MASTER(cr))
 +        write_edo_flood(edi,edo,step);
 +}
 +
 +
 +/* Main flooding routine, called from do_force */
 +extern void do_flood(
 +        FILE            *log,    /* md.log file */
 +        t_commrec       *cr,     /* Communication record */
 +        rvec            x[],     /* Positions on the local processor */
 +        rvec            force[], /* forcefield forces, to these the flooding forces are added */
 +        gmx_edsam_t     ed,      /* ed data structure contains all ED and flooding datasets */
 +        matrix          box,     /* the box */
 +        gmx_large_int_t step)    /* The relative time step since ir->init_step is already subtracted */
 +{
 +    t_edpar *edi;
 +
 +
 +    if (ed->eEDtype != eEDflood)
 +        return;
 +
 +    edi = ed->edpar;
 +    while (edi)
 +    {
 +        /* Call flooding for one matrix */
 +        if (edi->flood.vecs.neig)
 +            do_single_flood(ed->edo,x,force,edi,step,box,cr);
 +        edi = edi->next_edi;
 +    }
 +}
 +
 +
 +/* Called by init_edi, configure some flooding related variables and structures,
 + * print headers to output files */
 +static void init_flood(t_edpar *edi, gmx_edsam_t ed, real dt, t_commrec *cr)
 +{
 +    int i;
 +
 +
 +    edi->flood.Efl = edi->flood.constEfl;
 +    edi->flood.Vfl = 0;
 +    edi->flood.dt  = dt;
 +
 +    if (edi->flood.vecs.neig)
 +    {
 +        /* If in any of the datasets we find a flooding vector, flooding is turned on */
 +        ed->eEDtype = eEDflood;
 +
 +        fprintf(stderr,"ED: Flooding of matrix %d is switched on.\n", edi->flood.flood_id);
 +
 +        if (edi->flood.bConstForce)
 +        {
 +            /* We have used stpsz as a vehicle to carry the fproj values for constant
 +             * force flooding. Now we copy that to flood.vecs.fproj. Note that
 +             * in const force flooding, fproj is never changed. */
 +            for (i=0; i<edi->flood.vecs.neig; i++)
 +            {
 +                edi->flood.vecs.fproj[i] = edi->flood.vecs.stpsz[i];
 +
 +                fprintf(stderr, "ED: applying on eigenvector %d a constant force of %g\n",
 +                        edi->flood.vecs.ieig[i], edi->flood.vecs.fproj[i]);
 +            }
 +        }
 +        fprintf(ed->edo,"FL_HEADER: Flooding of matrix %d is switched on! The flooding output will have the following format:\n",
 +                edi->flood.flood_id);
 +        fprintf(ed->edo,"FL_HEADER: Step     Efl          Vfl       deltaF\n");
 +    }
 +}
 +
 +
 +#ifdef DEBUGHELPERS
 +/*********** Energy book keeping ******/
 +static void get_flood_enx_names(t_edpar *edi, char** names, int *nnames)  /* get header of energies */
 +{
 +    t_edpar *actual;
 +    int count;
 +    char buf[STRLEN];
 +    actual=edi;
 +    count = 1;
 +    while (actual)
 +    {
 +        srenew(names,count);
 +        sprintf(buf,"Vfl_%d",count);
 +        names[count-1]=strdup(buf);
 +        actual=actual->next_edi;
 +        count++;
 +    }
 +    *nnames=count-1;
 +}
 +
 +
 +static void get_flood_energies(t_edpar *edi, real Vfl[],int nnames)
 +{
 +    /*fl has to be big enough to capture nnames-many entries*/
 +    t_edpar *actual;
 +    int count;
 +
 +
 +    actual=edi;
 +    count = 1;
 +    while (actual)
 +    {
 +        Vfl[count-1]=actual->flood.Vfl;
 +        actual=actual->next_edi;
 +        count++;
 +    }
 +    if (nnames!=count-1)
 +        gmx_fatal(FARGS,"Number of energies is not consistent with t_edi structure");
 +}
 +/************* END of FLOODING IMPLEMENTATION ****************************/
 +#endif
 +
 +
 +gmx_edsam_t ed_open(int nfile,const t_filenm fnm[],unsigned long Flags,t_commrec *cr)
 +{
 +    gmx_edsam_t ed;
 +
 +
 +    /* Allocate space for the ED data structure */
 +    snew(ed, 1);
 +
 +    /* We want to perform ED (this switch might later be upgraded to eEDflood) */
 +    ed->eEDtype = eEDedsam;
 +
 +    if (MASTER(cr))
 +    {
 +        /* Open .edi input file: */
 +        ed->edinam=ftp2fn(efEDI,nfile,fnm);
 +        /* The master opens the .edo output file */
 +        fprintf(stderr,"ED sampling will be performed!\n");
 +        ed->edonam = ftp2fn(efEDO,nfile,fnm);
 +        ed->edo    = gmx_fio_fopen(ed->edonam,(Flags & MD_APPENDFILES)? "a+" : "w+");
 +        ed->bStartFromCpt = Flags & MD_STARTFROMCPT;
 +    }
 +    return ed;
 +}
 +
 +
 +/* Broadcasts the structure data */
 +static void bc_ed_positions(t_commrec *cr, struct gmx_edx *s, int stype)
 +{
 +    snew_bc(cr, s->anrs, s->nr   );    /* Index numbers     */
 +    snew_bc(cr, s->x   , s->nr   );    /* Positions         */
 +    nblock_bc(cr, s->nr, s->anrs );
 +    nblock_bc(cr, s->nr, s->x    );
 +
 +    /* For the average & reference structures we need an array for the collective indices,
 +     * and we need to broadcast the masses as well */
 +    if (stype == eedAV || stype == eedREF)
 +    {
 +        /* We need these additional variables in the parallel case: */
 +        snew(s->c_ind    , s->nr   );   /* Collective indices */
 +        /* Local atom indices get assigned in dd_make_local_group_indices.
 +         * There, also memory is allocated */
 +        s->nalloc_loc = 0;              /* allocation size of s->anrs_loc */
 +        snew_bc(cr, s->x_old, s->nr);   /* To be able to always make the ED molecule whole, ...        */
 +        nblock_bc(cr, s->nr, s->x_old); /* ... keep track of shift changes with the help of old coords */
 +    }
 +
 +    /* broadcast masses for the reference structure (for mass-weighted fitting) */
 +    if (stype == eedREF)
 +    {
 +        snew_bc(cr, s->m, s->nr);
 +        nblock_bc(cr, s->nr, s->m);
 +    }
 +
 +    /* For the average structure we might need the masses for mass-weighting */
 +    if (stype == eedAV)
 +    {
 +        snew_bc(cr, s->sqrtm, s->nr);
 +        nblock_bc(cr, s->nr, s->sqrtm);
 +        snew_bc(cr, s->m, s->nr);
 +        nblock_bc(cr, s->nr, s->m);
 +    }
 +}
 +
 +
 +/* Broadcasts the eigenvector data */
 +static void bc_ed_vecs(t_commrec *cr, t_eigvec *ev, int length, gmx_bool bHarmonic)
 +{
 +    int i;
 +
 +    snew_bc(cr, ev->ieig   , ev->neig);  /* index numbers of eigenvector  */
 +    snew_bc(cr, ev->stpsz  , ev->neig);  /* stepsizes per eigenvector     */
 +    snew_bc(cr, ev->xproj  , ev->neig);  /* instantaneous x projection    */
 +    snew_bc(cr, ev->fproj  , ev->neig);  /* instantaneous f projection    */
 +    snew_bc(cr, ev->refproj, ev->neig);  /* starting or target projection */
 +
 +    nblock_bc(cr, ev->neig, ev->ieig   );
 +    nblock_bc(cr, ev->neig, ev->stpsz  );
 +    nblock_bc(cr, ev->neig, ev->xproj  );
 +    nblock_bc(cr, ev->neig, ev->fproj  );
 +    nblock_bc(cr, ev->neig, ev->refproj);
 +
 +    snew_bc(cr, ev->vec, ev->neig);      /* Eigenvector components        */
 +    for (i=0; i<ev->neig; i++)
 +    {
 +        snew_bc(cr, ev->vec[i], length);
 +        nblock_bc(cr, length, ev->vec[i]);
 +    }
 +
 +    /* For harmonic restraints the reference projections can change with time */
 +    if (bHarmonic)
 +    {
 +        snew_bc(cr, ev->refproj0    , ev->neig);
 +        snew_bc(cr, ev->refprojslope, ev->neig);
 +        nblock_bc(cr, ev->neig, ev->refproj0    );
 +        nblock_bc(cr, ev->neig, ev->refprojslope);
 +    }
 +}
 +
 +
 +/* Broadcasts the ED / flooding data to other nodes
 + * and allocates memory where needed */
 +static void broadcast_ed_data(t_commrec *cr, gmx_edsam_t ed, int numedis)
 +{
 +    int     nr;
 +    t_edpar *edi;
 +
 +
 +    /* Master lets the other nodes know if its ED only or also flooding */
 +    gmx_bcast(sizeof(ed->eEDtype), &(ed->eEDtype), cr);
 +
 +    snew_bc(cr, ed->edpar,1);
 +    /* Now transfer the ED data set(s) */
 +    edi = ed->edpar;
 +    for (nr=0; nr<numedis; nr++)
 +    {
 +        /* Broadcast a single ED data set */
 +        block_bc(cr, *edi);
 +
 +        /* Broadcast positions */
 +        bc_ed_positions(cr, &(edi->sref), eedREF); /* reference positions (don't broadcast masses)    */
 +        bc_ed_positions(cr, &(edi->sav ), eedAV ); /* average positions (do broadcast masses as well) */
 +        bc_ed_positions(cr, &(edi->star), eedTAR); /* target positions                                */
 +        bc_ed_positions(cr, &(edi->sori), eedORI); /* origin positions                                */
 +
 +        /* Broadcast eigenvectors */
 +        bc_ed_vecs(cr, &edi->vecs.mon   , edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.linfix, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.linacc, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radfix, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radacc, edi->sav.nr, FALSE);
 +        bc_ed_vecs(cr, &edi->vecs.radcon, edi->sav.nr, FALSE);
 +        /* Broadcast flooding eigenvectors and, if needed, values for the moving reference */
 +        bc_ed_vecs(cr, &edi->flood.vecs,  edi->sav.nr, edi->flood.bHarmonic);
 +
 +        /* Set the pointer to the next ED dataset */
 +        if (edi->next_edi)
 +        {
 +          snew_bc(cr, edi->next_edi, 1);
 +          edi = edi->next_edi;
 +        }
 +    }
 +}
 +
 +
 +/* init-routine called for every *.edi-cycle, initialises t_edpar structure */
 +static void init_edi(gmx_mtop_t *mtop,t_inputrec *ir,
 +                     t_commrec *cr,gmx_edsam_t ed,t_edpar *edi)
 +{
 +    int  i;
 +    real totalmass = 0.0;
 +    rvec com;
 +    t_atom *atom;
 +
 +    /* NOTE Init_edi is executed on the master process only
 +     * The initialized data sets are then transmitted to the
 +     * other nodes in broadcast_ed_data */
 +
 +    edi->bNeedDoEdsam = edi->vecs.mon.neig
 +                     || edi->vecs.linfix.neig
 +                     || edi->vecs.linacc.neig
 +                     || edi->vecs.radfix.neig
 +                     || edi->vecs.radacc.neig
 +                     || edi->vecs.radcon.neig;
 +
 +    /* evaluate masses (reference structure) */
 +    snew(edi->sref.m, edi->sref.nr);
 +    for (i = 0; i < edi->sref.nr; i++)
 +    {
 +        if (edi->fitmas)
 +        {
 +            gmx_mtop_atomnr_to_atom(mtop,edi->sref.anrs[i],&atom);
 +            edi->sref.m[i] = atom->m;
 +        }
 +        else
 +        {
 +            edi->sref.m[i] = 1.0;
 +        }
 +
 +        /* Check that every m > 0. Bad things will happen otherwise. */
 +        if (edi->sref.m[i] <= 0.0)
 +        {
 +            gmx_fatal(FARGS, "Reference structure atom %d (sam.edi index %d) has a mass of %g.\n"
 +                             "For a mass-weighted fit, all reference structure atoms need to have a mass >0.\n"
 +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
 +                             "atoms from the reference structure by creating a proper index group.\n",
 +                      i, edi->sref.anrs[i]+1, edi->sref.m[i]);
 +        }
 +
 +        totalmass += edi->sref.m[i];
 +    }
 +    edi->sref.mtot = totalmass;
 +
 +    /* Masses m and sqrt(m) for the average structure. Note that m
 +     * is needed if forces have to be evaluated in do_edsam */
 +    snew(edi->sav.sqrtm, edi->sav.nr );
 +    snew(edi->sav.m    , edi->sav.nr );
 +    for (i = 0; i < edi->sav.nr; i++)
 +    {
 +        gmx_mtop_atomnr_to_atom(mtop,edi->sav.anrs[i],&atom);
 +        edi->sav.m[i] = atom->m;
 +        if (edi->pcamas)
 +        {
 +            edi->sav.sqrtm[i] = sqrt(atom->m);
 +        }
 +        else
 +        {
 +            edi->sav.sqrtm[i] = 1.0;
 +        }
 +
 +        /* Check that every m > 0. Bad things will happen otherwise. */
 +        if (edi->sav.sqrtm[i] <= 0.0)
 +        {
 +            gmx_fatal(FARGS, "Average structure atom %d (sam.edi index %d) has a mass of %g.\n"
 +                             "For ED with mass-weighting, all average structure atoms need to have a mass >0.\n"
 +                             "Either make the covariance analysis non-mass-weighted, or exclude massless\n"
 +                             "atoms from the average structure by creating a proper index group.\n",
 +                      i, edi->sav.anrs[i]+1, atom->m);
 +        }
 +    }
 +
 +    /* put reference structure in origin */
 +    get_center(edi->sref.x, edi->sref.m, edi->sref.nr, com);
 +    com[XX] = -com[XX];
 +    com[YY] = -com[YY];
 +    com[ZZ] = -com[ZZ];
 +    translate_x(edi->sref.x, edi->sref.nr, com);
 +
 +    /* Init ED buffer */
 +    snew(edi->buf, 1);
 +}
 +
 +
 +static void check(const char *line, const char *label)
 +{
 +    if (!strstr(line,label))
 +        gmx_fatal(FARGS,"Could not find input parameter %s at expected position in edsam input-file (.edi)\nline read instead is %s",label,line);
 +}
 +
 +
 +static int read_checked_edint(FILE *file,const char *label)
 +{
 +    char line[STRLEN+1];
 +    int idum;
 +
 +
 +    fgets2 (line,STRLEN,file);
 +    check(line,label);
 +    fgets2 (line,STRLEN,file);
 +    sscanf (line,"%d",&idum);
 +    return idum;
 +}
 +
 +
 +static int read_edint(FILE *file,gmx_bool *bEOF)
 +{
 +    char line[STRLEN+1];
 +    int idum;
 +    char *eof;
 +
 +
 +    eof=fgets2 (line,STRLEN,file);
 +    if (eof==NULL)
 +    {
 +        *bEOF = TRUE;
 +        return -1;
 +    }
 +    eof=fgets2 (line,STRLEN,file);
 +    if (eof==NULL)
 +    {
 +        *bEOF = TRUE;
 +        return -1;
 +    }
 +    sscanf (line,"%d",&idum);
 +    *bEOF = FALSE;
 +    return idum;
 +}
 +
 +
 +static real read_checked_edreal(FILE *file,const char *label)
 +{
 +    char line[STRLEN+1];
 +    double rdum;
 +
 +
 +    fgets2 (line,STRLEN,file);
 +    check(line,label);
 +    fgets2 (line,STRLEN,file);
 +    sscanf (line,"%lf",&rdum);
 +    return (real) rdum; /* always read as double and convert to single */
 +}
 +
 +
 +static void read_edx(FILE *file,int number,int *anrs,rvec *x)
 +{
 +    int i,j;
 +    char line[STRLEN+1];
 +    double d[3];
 +
 +
 +    for(i=0; i<number; i++)
 +    {
 +        fgets2 (line,STRLEN,file);
 +        sscanf (line,"%d%lf%lf%lf",&anrs[i],&d[0],&d[1],&d[2]);
 +        anrs[i]--; /* we are reading FORTRAN indices */
 +        for(j=0; j<3; j++)
 +            x[i][j]=d[j]; /* always read as double and convert to single */
 +    }
 +}
 +
 +
 +static void scan_edvec(FILE *in,int nr,rvec *vec)
 +{
 +    char line[STRLEN+1];
 +    int i;
 +    double x,y,z;
 +
 +
 +    for(i=0; (i < nr); i++)
 +    {
 +        fgets2 (line,STRLEN,in);
 +        sscanf (line,"%le%le%le",&x,&y,&z);
 +        vec[i][XX]=x;
 +        vec[i][YY]=y;
 +        vec[i][ZZ]=z;
 +    }
 +}
 +
 +
 +static void read_edvec(FILE *in,int nr,t_eigvec *tvec,gmx_bool bReadRefproj, gmx_bool *bHaveReference)
 +{
 +    int i,idum,nscan;
 +    double rdum,refproj_dum=0.0,refprojslope_dum=0.0;
 +    char line[STRLEN+1];
 +
 +
 +    tvec->neig=read_checked_edint(in,"NUMBER OF EIGENVECTORS");
 +    if (tvec->neig >0)
 +    {
 +        snew(tvec->ieig   ,tvec->neig);
 +        snew(tvec->stpsz  ,tvec->neig);
 +        snew(tvec->vec    ,tvec->neig);
 +        snew(tvec->xproj  ,tvec->neig);
 +        snew(tvec->fproj  ,tvec->neig);
 +        snew(tvec->refproj,tvec->neig);
 +        if (bReadRefproj)
 +        {
 +            snew(tvec->refproj0    ,tvec->neig);
 +            snew(tvec->refprojslope,tvec->neig);
 +        }
 +
 +        for(i=0; (i < tvec->neig); i++)
 +        {
 +            fgets2 (line,STRLEN,in);
 +            if (bReadRefproj) /* ONLY when using flooding as harmonic restraint */
 +            {
 +                nscan = sscanf(line,"%d%lf%lf%lf",&idum,&rdum,&refproj_dum,&refprojslope_dum);
 +                /* Zero out values which were not scanned */
 +                switch(nscan)
 +                {
 +                    case 4:
 +                        /* Every 4 values read, including reference position */
 +                        *bHaveReference = TRUE;
 +                        break;
 +                    case 3:
 +                        /* A reference position is provided */
 +                        *bHaveReference = TRUE;
 +                        /* No value for slope, set to 0 */
 +                        refprojslope_dum = 0.0;
 +                        break;
 +                    case 2:
 +                        /* No values for reference projection and slope, set to 0 */
 +                        refproj_dum      = 0.0;
 +                        refprojslope_dum = 0.0;
 +                        break;
 +                    default:
 +                        gmx_fatal(FARGS,"Expected 2 - 4 (not %d) values for flooding vec: <nr> <spring const> <refproj> <refproj-slope>\n", nscan);
 +                        break;
 +                }
 +                tvec->refproj[i]=refproj_dum;
 +                tvec->refproj0[i]=refproj_dum;
 +                tvec->refprojslope[i]=refprojslope_dum;
 +            }
 +            else /* Normal flooding */
 +            {
 +                nscan = sscanf(line,"%d%lf",&idum,&rdum);
 +                if (nscan != 2)
 +                    gmx_fatal(FARGS,"Expected 2 values for flooding vec: <nr> <stpsz>\n");
 +            }
 +            tvec->ieig[i]=idum;
 +            tvec->stpsz[i]=rdum;
 +        } /* end of loop over eigenvectors */
 +
 +        for(i=0; (i < tvec->neig); i++)
 +        {
 +            snew(tvec->vec[i],nr);
 +            scan_edvec(in,nr,tvec->vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/* calls read_edvec for the vector groups, only for flooding there is an extra call */
 +static void read_edvecs(FILE *in,int nr,t_edvecs *vecs)
 +{
 +      gmx_bool bHaveReference = FALSE;
 +
 +
 +    read_edvec(in, nr, &vecs->mon   , FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->linfix, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->linacc, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radfix, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radacc, FALSE, &bHaveReference);
 +    read_edvec(in, nr, &vecs->radcon, FALSE, &bHaveReference);
 +}
 +
 +
 +/* Check if the same atom indices are used for reference and average positions */
 +static gmx_bool check_if_same(struct gmx_edx sref, struct gmx_edx sav)
 +{
 +    int i;
 +
 +
 +    /* If the number of atoms differs between the two structures,
 +     * they cannot be identical */
 +    if (sref.nr != sav.nr)
 +        return FALSE;
 +
 +    /* Now that we know that both stuctures have the same number of atoms,
 +     * check if also the indices are identical */
 +    for (i=0; i < sav.nr; i++)
 +    {
 +        if (sref.anrs[i] != sav.anrs[i])
 +            return FALSE;
 +    }
 +    fprintf(stderr, "ED: Note: Reference and average structure are composed of the same atom indices.\n");
 +
 +    return TRUE;
 +}
 +
 +
 +static int read_edi(FILE* in, gmx_edsam_t ed,t_edpar *edi,int nr_mdatoms, int edi_nr, t_commrec *cr)
 +{
 +    int readmagic;
 +    const int magic=670;
 +    gmx_bool bEOF;
 +
 +    /* Was a specific reference point for the flooding/umbrella potential provided in the edi file? */
 +    gmx_bool bHaveReference = FALSE;
 +
 +
 +    /* the edi file is not free format, so expect problems if the input is corrupt. */
 +
 +    /* check the magic number */
 +    readmagic=read_edint(in,&bEOF);
 +    /* Check whether we have reached the end of the input file */
 +    if (bEOF)
 +        return 0;
 +
 +    if (readmagic != magic)
 +    {
 +        if (readmagic==666 || readmagic==667 || readmagic==668)
 +            gmx_fatal(FARGS,"Wrong magic number: Use newest version of make_edi to produce edi file");
 +        else if (readmagic != 669)
 +            gmx_fatal(FARGS,"Wrong magic number %d in %s",readmagic,ed->edinam);
 +    }
 +
 +    /* check the number of atoms */
 +    edi->nini=read_edint(in,&bEOF);
 +    if (edi->nini != nr_mdatoms)
 +        gmx_fatal(FARGS,"Nr of atoms in %s (%d) does not match nr of md atoms (%d)",
 +                ed->edinam,edi->nini,nr_mdatoms);
 +
 +    /* Done checking. For the rest we blindly trust the input */
 +    edi->fitmas          = read_checked_edint(in,"FITMAS");
 +    edi->pcamas          = read_checked_edint(in,"ANALYSIS_MAS");
 +    edi->outfrq          = read_checked_edint(in,"OUTFRQ");
 +    edi->maxedsteps      = read_checked_edint(in,"MAXLEN");
 +    edi->slope           = read_checked_edreal(in,"SLOPECRIT");
 +
 +    edi->presteps        = read_checked_edint(in,"PRESTEPS");
 +    edi->flood.deltaF0   = read_checked_edreal(in,"DELTA_F0");
 +    edi->flood.deltaF    = read_checked_edreal(in,"INIT_DELTA_F");
 +    edi->flood.tau       = read_checked_edreal(in,"TAU");
 +    edi->flood.constEfl  = read_checked_edreal(in,"EFL_NULL");
 +    edi->flood.alpha2    = read_checked_edreal(in,"ALPHA2");
 +    edi->flood.kT        = read_checked_edreal(in,"KT");
 +    edi->flood.bHarmonic = read_checked_edint(in,"HARMONIC");
 +    if (readmagic > 669)
 +        edi->flood.bConstForce = read_checked_edint(in,"CONST_FORCE_FLOODING");
 +    else
 +        edi->flood.bConstForce = FALSE;
 +    edi->flood.flood_id  = edi_nr;
 +    edi->sref.nr         = read_checked_edint(in,"NREF");
 +
 +    /* allocate space for reference positions and read them */
 +    snew(edi->sref.anrs,edi->sref.nr);
 +    snew(edi->sref.x   ,edi->sref.nr);
 +    if (PAR(cr))
 +        snew(edi->sref.x_old,edi->sref.nr);
 +    edi->sref.sqrtm    =NULL;
 +    read_edx(in,edi->sref.nr,edi->sref.anrs,edi->sref.x);
 +
 +    /* average positions. they define which atoms will be used for ED sampling */
 +    edi->sav.nr=read_checked_edint(in,"NAV");
 +    snew(edi->sav.anrs,edi->sav.nr);
 +    snew(edi->sav.x   ,edi->sav.nr);
 +    if (PAR(cr))
 +        snew(edi->sav.x_old,edi->sav.nr);
 +    read_edx(in,edi->sav.nr,edi->sav.anrs,edi->sav.x);
 +
 +    /* Check if the same atom indices are used for reference and average positions */
 +    edi->bRefEqAv = check_if_same(edi->sref, edi->sav);
 +
 +    /* eigenvectors */
 +    read_edvecs(in,edi->sav.nr,&edi->vecs);
 +    read_edvec(in,edi->sav.nr,&edi->flood.vecs,edi->flood.bHarmonic, &bHaveReference);
 +
 +    /* target positions */
 +    edi->star.nr=read_edint(in,&bEOF);
 +    if (edi->star.nr > 0)
 +    {
 +        snew(edi->star.anrs,edi->star.nr);
 +        snew(edi->star.x   ,edi->star.nr);
 +        edi->star.sqrtm    =NULL;
 +        read_edx(in,edi->star.nr,edi->star.anrs,edi->star.x);
 +    }
 +
 +    /* positions defining origin of expansion circle */
 +    edi->sori.nr=read_edint(in,&bEOF);
 +    if (edi->sori.nr > 0)
 +    {
 +      if (bHaveReference)
 +      {
 +              /* Both an -ori structure and a at least one manual reference point have been
 +               * specified. That's ambiguous and probably not intentional. */
 +              gmx_fatal(FARGS, "ED: An origin structure has been provided and a at least one (moving) reference\n"
 +                               "    point was manually specified in the edi file. That is ambiguous. Aborting.\n");
 +      }
 +        snew(edi->sori.anrs,edi->sori.nr);
 +        snew(edi->sori.x   ,edi->sori.nr);
 +        edi->sori.sqrtm    =NULL;
 +        read_edx(in,edi->sori.nr,edi->sori.anrs,edi->sori.x);
 +    }
 +
 +    /* all done */
 +    return 1;
 +}
 +
 +
 +
 +/* Read in the edi input file. Note that it may contain several ED data sets which were
 + * achieved by concatenating multiple edi files. The standard case would be a single ED
 + * data set, though. */
 +static void read_edi_file(gmx_edsam_t ed, t_edpar *edi, int nr_mdatoms, t_commrec *cr)
 +{
 +    FILE    *in;
 +    t_edpar *curr_edi,*last_edi;
 +    t_edpar *edi_read;
 +    int     edi_nr = 0;
 +
 +
 +    /* This routine is executed on the master only */
 +
 +    /* Open the .edi parameter input file */
 +    in = gmx_fio_fopen(ed->edinam,"r");
 +    fprintf(stderr, "ED: Reading edi file %s\n", ed->edinam);
 +
 +    /* Now read a sequence of ED input parameter sets from the edi file */
 +    curr_edi=edi;
 +    last_edi=edi;
 +    while( read_edi(in, ed, curr_edi, nr_mdatoms, edi_nr, cr) )
 +    {
 +        edi_nr++;
 +        /* Make shure that the number of atoms in each dataset is the same as in the tpr file */
 +        if (edi->nini != nr_mdatoms)
 +            gmx_fatal(FARGS,"edi file %s (dataset #%d) was made for %d atoms, but the simulation contains %d atoms.",
 +                    ed->edinam, edi_nr, edi->nini, nr_mdatoms);
 +        /* Since we arrived within this while loop we know that there is still another data set to be read in */
 +        /* We need to allocate space for the data: */
 +        snew(edi_read,1);
 +        /* Point the 'next_edi' entry to the next edi: */
 +        curr_edi->next_edi=edi_read;
 +        /* Keep the curr_edi pointer for the case that the next dataset is empty: */
 +        last_edi = curr_edi;
 +        /* Let's prepare to read in the next edi data set: */
 +        curr_edi = edi_read;
 +    }
 +    if (edi_nr == 0)
 +        gmx_fatal(FARGS, "No complete ED data set found in edi file %s.", ed->edinam);
 +
 +    /* Terminate the edi dataset list with a NULL pointer: */
 +    last_edi->next_edi = NULL;
 +
 +    fprintf(stderr, "ED: Found %d ED dataset%s.\n", edi_nr, edi_nr>1? "s" : "");
 +
 +    /* Close the .edi file again */
 +    gmx_fio_fclose(in);
 +}
 +
 +
 +struct t_fit_to_ref {
 +    rvec *xcopy;       /* Working copy of the positions in fit_to_reference */
 +};
 +
 +/* Fit the current positions to the reference positions
 + * Do not actually do the fit, just return rotation and translation.
 + * Note that the COM of the reference structure was already put into
 + * the origin by init_edi. */
 +static void fit_to_reference(rvec      *xcoll,    /* The positions to be fitted */
 +                             rvec      transvec,  /* The translation vector */
 +                             matrix    rotmat,    /* The rotation matrix */
 +                             t_edpar   *edi)      /* Just needed for do_edfit */
 +{
 +    rvec com;          /* center of mass */
 +    int  i;
 +    struct t_fit_to_ref *loc;
 +
 +
 +    /* Allocate memory the first time this routine is called for each edi dataset */
 +    if (NULL == edi->buf->fit_to_ref)
 +    {
 +        snew(edi->buf->fit_to_ref, 1);
 +        snew(edi->buf->fit_to_ref->xcopy, edi->sref.nr);
 +    }
 +    loc = edi->buf->fit_to_ref;
 +
 +    /* We do not touch the original positions but work on a copy. */
 +    for (i=0; i<edi->sref.nr; i++)
 +        copy_rvec(xcoll[i], loc->xcopy[i]);
 +
 +    /* Calculate the center of mass */
 +    get_center(loc->xcopy, edi->sref.m, edi->sref.nr, com);
 +
 +    transvec[XX] = -com[XX];
 +    transvec[YY] = -com[YY];
 +    transvec[ZZ] = -com[ZZ];
 +
 +    /* Subtract the center of mass from the copy */
 +    translate_x(loc->xcopy, edi->sref.nr, transvec);
 +
 +    /* Determine the rotation matrix */
 +    do_edfit(edi->sref.nr, edi->sref.x, loc->xcopy, rotmat, edi);
 +}
 +
 +
 +static void translate_and_rotate(rvec *x,         /* The positions to be translated and rotated */
 +                                 int nat,         /* How many positions are there? */
 +                                 rvec transvec,   /* The translation vector */
 +                                 matrix rotmat)   /* The rotation matrix */
 +{
 +    /* Translation */
 +    translate_x(x, nat, transvec);
 +
 +    /* Rotation */
 +    rotate_x(x, nat, rotmat);
 +}
 +
 +
 +/* Gets the rms deviation of the positions to the structure s */
 +/* fit_to_structure has to be called before calling this routine! */
 +static real rmsd_from_structure(rvec           *x,  /* The positions under consideration */
 +                                struct gmx_edx *s)  /* The structure from which the rmsd shall be computed */
 +{
 +    real  rmsd=0.0;
 +    int   i;
 +
 +
 +    for (i=0; i < s->nr; i++)
 +        rmsd += distance2(s->x[i], x[i]);
 +
 +    rmsd /= (real) s->nr;
 +    rmsd = sqrt(rmsd);
 +
 +    return rmsd;
 +}
 +
 +
 +void dd_make_local_ed_indices(gmx_domdec_t *dd, struct gmx_edsam *ed)
 +{
 +    t_edpar *edi;
 +
 +
 +    if (ed->eEDtype != eEDnone)
 +    {
 +        /* Loop over ED datasets (usually there is just one dataset, though) */
 +        edi=ed->edpar;
 +        while (edi)
 +        {
 +            /* Local atoms of the reference structure (for fitting), need only be assembled
 +             * if their indices differ from the average ones */
 +            if (!edi->bRefEqAv)
 +                dd_make_local_group_indices(dd->ga2la, edi->sref.nr, edi->sref.anrs,
 +                        &edi->sref.nr_loc, &edi->sref.anrs_loc, &edi->sref.nalloc_loc, edi->sref.c_ind);
 +
 +            /* Local atoms of the average structure (on these ED will be performed) */
 +            dd_make_local_group_indices(dd->ga2la, edi->sav.nr, edi->sav.anrs,
 +                    &edi->sav.nr_loc, &edi->sav.anrs_loc, &edi->sav.nalloc_loc, edi->sav.c_ind);
 +
 +            /* Indicate that the ED shift vectors for this structure need to be updated
 +             * at the next call to communicate_group_positions, since obviously we are in a NS step */
 +            edi->buf->do_edsam->bUpdateShifts = TRUE;
 +
 +            /* Set the pointer to the next ED dataset (if any) */
 +            edi=edi->next_edi;
 +        }
 +    }
 +}
 +
 +
 +static inline void ed_unshift_single_coord(matrix box, const rvec x, const ivec is, rvec xu)
 +{
 +    int tx,ty,tz;
 +
 +
 +    tx=is[XX];
 +    ty=is[YY];
 +    tz=is[ZZ];
 +
 +    if(TRICLINIC(box))
 +    {
 +        xu[XX] = x[XX]-tx*box[XX][XX]-ty*box[YY][XX]-tz*box[ZZ][XX];
 +        xu[YY] = x[YY]-ty*box[YY][YY]-tz*box[ZZ][YY];
 +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
 +    } else
 +    {
 +        xu[XX] = x[XX]-tx*box[XX][XX];
 +        xu[YY] = x[YY]-ty*box[YY][YY];
 +        xu[ZZ] = x[ZZ]-tz*box[ZZ][ZZ];
 +    }
 +}
 +
 +
 +static void do_linfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
 +{
 +    int  i, j;
 +    real proj, add;
 +    rvec vec_dum;
 +
 +
 +    /* loop over linfix vectors */
 +    for (i=0; i<edi->vecs.linfix.neig; i++)
 +    {
 +        /* calculate the projection */
 +        proj = projectx(edi, xcoll, edi->vecs.linfix.vec[i]);
 +
 +        /* calculate the correction */
 +        add = edi->vecs.linfix.refproj[i] + step*edi->vecs.linfix.stpsz[i] - proj;
 +
 +        /* apply the correction */
 +        add /= edi->sav.sqrtm[i];
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(add, edi->vecs.linfix.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +}
 +
 +
 +static void do_linacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i, j;
 +    real proj, add;
 +    rvec vec_dum;
 +
 +
 +    /* loop over linacc vectors */
 +    for (i=0; i<edi->vecs.linacc.neig; i++)
 +    {
 +        /* calculate the projection */
 +        proj=projectx(edi, xcoll, edi->vecs.linacc.vec[i]);
 +
 +        /* calculate the correction */
 +        add = 0.0;
 +        if (edi->vecs.linacc.stpsz[i] > 0.0)
 +        {
 +            if ((proj-edi->vecs.linacc.refproj[i]) < 0.0)
 +                add = edi->vecs.linacc.refproj[i] - proj;
 +        }
 +        if (edi->vecs.linacc.stpsz[i] < 0.0)
 +        {
 +            if ((proj-edi->vecs.linacc.refproj[i]) > 0.0)
 +                add = edi->vecs.linacc.refproj[i] - proj;
 +        }
 +
 +        /* apply the correction */
 +        add /= edi->sav.sqrtm[i];
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(add, edi->vecs.linacc.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +
 +        /* new positions will act as reference */
 +        edi->vecs.linacc.refproj[i] = proj + add;
 +    }
 +}
 +
 +
 +static void do_radfix(rvec *xcoll, t_edpar *edi, int step, t_commrec *cr)
 +{
 +    int  i,j;
 +    real *proj, rad=0.0, ratio;
 +    rvec vec_dum;
 +
 +
 +    if (edi->vecs.radfix.neig == 0)
 +        return;
 +
 +    snew(proj, edi->vecs.radfix.neig);
 +
 +    /* loop over radfix vectors */
 +    for (i=0; i<edi->vecs.radfix.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        proj[i] = projectx(edi, xcoll, edi->vecs.radfix.vec[i]);
 +        rad += pow(proj[i] - edi->vecs.radfix.refproj[i], 2);
 +    }
 +
 +    rad   = sqrt(rad);
 +    ratio = (edi->vecs.radfix.stpsz[0]+edi->vecs.radfix.radius)/rad - 1.0;
 +    edi->vecs.radfix.radius += edi->vecs.radfix.stpsz[0];
 +
 +    /* loop over radfix vectors */
 +    for (i=0; i<edi->vecs.radfix.neig; i++)
 +    {
 +        proj[i] -= edi->vecs.radfix.refproj[i];
 +
 +        /* apply the correction */
 +        proj[i] /= edi->sav.sqrtm[i];
 +        proj[i] *= ratio;
 +        for (j=0; j<edi->sav.nr; j++) {
 +            svmul(proj[i], edi->vecs.radfix.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +
 +    sfree(proj);
 +}
 +
 +
 +static void do_radacc(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i,j;
 +    real *proj, rad=0.0, ratio=0.0;
 +    rvec vec_dum;
 +
 +
 +    if (edi->vecs.radacc.neig == 0)
 +        return;
 +
 +    snew(proj,edi->vecs.radacc.neig);
 +
 +    /* loop over radacc vectors */
 +    for (i=0; i<edi->vecs.radacc.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        proj[i] = projectx(edi, xcoll, edi->vecs.radacc.vec[i]);
 +        rad += pow(proj[i] - edi->vecs.radacc.refproj[i], 2);
 +    }
 +    rad = sqrt(rad);
 +
 +    /* only correct when radius decreased */
 +    if (rad < edi->vecs.radacc.radius)
 +    {
 +        ratio = edi->vecs.radacc.radius/rad - 1.0;
 +        rad   = edi->vecs.radacc.radius;
 +    }
 +    else
 +        edi->vecs.radacc.radius = rad;
 +
 +    /* loop over radacc vectors */
 +    for (i=0; i<edi->vecs.radacc.neig; i++)
 +    {
 +        proj[i] -= edi->vecs.radacc.refproj[i];
 +
 +        /* apply the correction */
 +        proj[i] /= edi->sav.sqrtm[i];
 +        proj[i] *= ratio;
 +        for (j=0; j<edi->sav.nr; j++)
 +        {
 +            svmul(proj[i], edi->vecs.radacc.vec[i][j], vec_dum);
 +            rvec_inc(xcoll[j], vec_dum);
 +        }
 +    }
 +    sfree(proj);
 +}
 +
 +
 +struct t_do_radcon {
 +    real *proj;
 +};
 +
 +static void do_radcon(rvec *xcoll, t_edpar *edi, t_commrec *cr)
 +{
 +    int  i,j;
 +    real rad=0.0, ratio=0.0;
 +    struct t_do_radcon *loc;
 +    gmx_bool bFirst;
 +    rvec vec_dum;
 +
 +
 +    if(edi->buf->do_radcon != NULL)
 +    {
 +        bFirst = FALSE;
 +        loc    = edi->buf->do_radcon;
 +    }
 +    else
 +    {
 +        bFirst = TRUE;
 +        snew(edi->buf->do_radcon, 1);
 +    }
 +    loc = edi->buf->do_radcon;
 +
 +    if (edi->vecs.radcon.neig == 0)
 +        return;
 +
 +    if (bFirst)
 +        snew(loc->proj, edi->vecs.radcon.neig);
 +
 +    /* loop over radcon vectors */
 +    for (i=0; i<edi->vecs.radcon.neig; i++)
 +    {
 +        /* calculate the projections, radius */
 +        loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
 +        rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
 +    }
 +    rad = sqrt(rad);
 +    /* only correct when radius increased */
 +    if (rad > edi->vecs.radcon.radius)
 +    {
 +        ratio = edi->vecs.radcon.radius/rad - 1.0;
 +
 +        /* loop over radcon vectors */
 +        for (i=0; i<edi->vecs.radcon.neig; i++)
 +        {
 +            /* apply the correction */
 +            loc->proj[i] -= edi->vecs.radcon.refproj[i];
 +            loc->proj[i] /= edi->sav.sqrtm[i];
 +            loc->proj[i] *= ratio;
 +
 +            for (j=0; j<edi->sav.nr; j++)
 +            {
 +                svmul(loc->proj[i], edi->vecs.radcon.vec[i][j], vec_dum);
 +                rvec_inc(xcoll[j], vec_dum);
 +            }
 +        }
 +    }
 +    else
 +        edi->vecs.radcon.radius = rad;
 +
 +    if (rad != edi->vecs.radcon.radius)
 +    {
 +        rad = 0.0;
 +        for (i=0; i<edi->vecs.radcon.neig; i++)
 +        {
 +            /* calculate the projections, radius */
 +            loc->proj[i] = projectx(edi, xcoll, edi->vecs.radcon.vec[i]);
 +            rad += pow(loc->proj[i] - edi->vecs.radcon.refproj[i], 2);
 +        }
 +        rad = sqrt(rad);
 +    }
 +}
 +
 +
 +static void ed_apply_constraints(rvec *xcoll, t_edpar *edi, gmx_large_int_t step, t_commrec *cr)
 +{
 +    int i;
 +
 +
 +    /* subtract the average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_dec(xcoll[i], edi->sav.x[i]);
 +
 +    /* apply the constraints */
 +    if (step >= 0)
 +        do_linfix(xcoll, edi, step, cr);
 +    do_linacc(xcoll, edi, cr);
 +    if (step >= 0)
 +        do_radfix(xcoll, edi, step, cr);
 +    do_radacc(xcoll, edi, cr);
 +    do_radcon(xcoll, edi, cr);
 +
 +    /* add back the average positions */
 +    for (i=0; i<edi->sav.nr; i++)
 +        rvec_inc(xcoll[i], edi->sav.x[i]);
 +}
 +
 +
 +/* Write out the projections onto the eigenvectors */
 +static void write_edo(int nr_edi, t_edpar *edi, gmx_edsam_t ed, gmx_large_int_t step,real rmsd)
 +{
 +    int i;
 +    char buf[22];
 +
 +
 +    if (edi->bNeedDoEdsam)
 +    {
 +        if (step == -1)
 +            fprintf(ed->edo, "Initial projections:\n");
 +        else
 +        {
 +            fprintf(ed->edo,"Step %s, ED #%d  ", gmx_step_str(step, buf), nr_edi);
 +            fprintf(ed->edo,"  RMSD %f nm\n",rmsd);
 +        }
 +
 +        if (edi->vecs.mon.neig)
 +        {
 +            fprintf(ed->edo,"  Monitor eigenvectors");
 +            for (i=0; i<edi->vecs.mon.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.mon.ieig[i],edi->vecs.mon.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.linfix.neig)
 +        {
 +            fprintf(ed->edo,"  Linfix  eigenvectors");
 +            for (i=0; i<edi->vecs.linfix.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linfix.ieig[i],edi->vecs.linfix.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.linacc.neig)
 +        {
 +            fprintf(ed->edo,"  Linacc  eigenvectors");
 +            for (i=0; i<edi->vecs.linacc.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.linacc.ieig[i],edi->vecs.linacc.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +        }
 +        if (edi->vecs.radfix.neig)
 +        {
 +            fprintf(ed->edo,"  Radfix  eigenvectors");
 +            for (i=0; i<edi->vecs.radfix.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radfix.ieig[i],edi->vecs.radfix.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  fixed increment radius = %f\n", calc_radius(&edi->vecs.radfix));
 +        }
 +        if (edi->vecs.radacc.neig)
 +        {
 +            fprintf(ed->edo,"  Radacc  eigenvectors");
 +            for (i=0; i<edi->vecs.radacc.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radacc.ieig[i],edi->vecs.radacc.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  acceptance radius      = %f\n", calc_radius(&edi->vecs.radacc));
 +        }
 +        if (edi->vecs.radcon.neig)
 +        {
 +            fprintf(ed->edo,"  Radcon  eigenvectors");
 +            for (i=0; i<edi->vecs.radcon.neig; i++)
 +                fprintf(ed->edo," %d: %12.5e ",edi->vecs.radcon.ieig[i],edi->vecs.radcon.xproj[i]);
 +            fprintf(ed->edo,"\n");
 +            fprintf(ed->edo,"  contracting radius     = %f\n", calc_radius(&edi->vecs.radcon));
 +        }
 +    }
 +}
 +
 +/* Returns if any constraints are switched on */
 +static int ed_constraints(gmx_bool edtype, t_edpar *edi)
 +{
 +    if (edtype == eEDedsam || edtype == eEDflood)
 +    {
 +        return (edi->vecs.linfix.neig || edi->vecs.linacc.neig ||
 +                edi->vecs.radfix.neig || edi->vecs.radacc.neig ||
 +                edi->vecs.radcon.neig);
 +    }
 +    return 0;
 +}
 +
 +
 +/* Copies reference projection 'refproj' to fixed 'refproj0' variable for flooding/
 + * umbrella sampling simulations. */
 +static void copyEvecReference(t_eigvec* floodvecs)
 +{
 +      int i;
 +
 +
 +      for (i=0; i<floodvecs->neig; i++)
 +      {
 +              floodvecs->refproj0[i] = floodvecs->refproj[i];
 +      }
 +}
 +
 +
 +void init_edsam(gmx_mtop_t  *mtop,   /* global topology                    */
 +                t_inputrec  *ir,     /* input record                       */
 +                t_commrec   *cr,     /* communication record               */
 +                gmx_edsam_t ed,      /* contains all ED data               */
 +                rvec        x[],     /* positions of the whole MD system   */
 +                matrix      box)     /* the box                            */
 +{
 +    t_edpar *edi = NULL;    /* points to a single edi data set */
 +    int     numedis=0;      /* keep track of the number of ED data sets in edi file */
 +    int     i,nr_edi;
 +    rvec    *x_pbc  = NULL; /* positions of the whole MD system with pbc removed  */
 +    rvec    *xfit   = NULL; /* the positions which will be fitted to the reference structure  */
 +    rvec    *xstart = NULL; /* the positions which are subject to ED sampling */
 +    rvec    fit_transvec;   /* translation ... */
 +    matrix  fit_rotmat;     /* ... and rotation from fit to reference structure */
 +
 +
 +    if (!DOMAINDECOMP(cr) && PAR(cr) && MASTER(cr))
 +        gmx_fatal(FARGS, "Please switch on domain decomposition to use essential dynamics in parallel.");
 +
 +    if (MASTER(cr))
 +        fprintf(stderr, "ED: Initializing essential dynamics constraints.\n");
 +
 +    /* Needed for initializing radacc radius in do_edsam */
 +    ed->bFirst = 1;
 +
 +    /* The input file is read by the master and the edi structures are
 +     * initialized here. Input is stored in ed->edpar. Then the edi
 +     * structures are transferred to the other nodes */
 +    if (MASTER(cr))
 +    {
 +        snew(ed->edpar,1);
 +        /* Read the whole edi file at once: */
 +        read_edi_file(ed,ed->edpar,mtop->natoms,cr);
 +
 +        /* Initialization for every ED/flooding dataset. Flooding uses one edi dataset per
 +         * flooding vector, Essential dynamics can be applied to more than one structure
 +         * as well, but will be done in the order given in the edi file, so
 +         * expect different results for different order of edi file concatenation! */
 +        edi=ed->edpar;
 +        while(edi != NULL)
 +        {
 +            init_edi(mtop,ir,cr,ed,edi);
 +
 +            /* Init flooding parameters if needed */
 +            init_flood(edi,ed,ir->delta_t,cr);
 +
 +            edi=edi->next_edi;
 +            numedis++;
 +        }
 +    }
 +
 +    /* The master does the work here. The other nodes get the positions
 +     * not before dd_partition_system which is called after init_edsam */
 +    if (MASTER(cr))
 +    {
 +        /* Remove pbc, make molecule whole.
 +         * When ir->bContinuation=TRUE this has already been done, but ok.
 +         */
 +        snew(x_pbc,mtop->natoms);
 +        m_rveccopy(mtop->natoms,x,x_pbc);
 +        do_pbc_first_mtop(NULL,ir->ePBC,box,mtop,x_pbc);
 +
 +        /* Reset pointer to first ED data set which contains the actual ED data */
 +        edi=ed->edpar;
 +
 +        /* Loop over all ED/flooding data sets (usually only one, though) */
 +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +        {
 +            /* We use srenew to allocate memory since the size of the buffers
 +             * is likely to change with every ED dataset */
 +            srenew(xfit  , edi->sref.nr );
 +            srenew(xstart, edi->sav.nr  );
 +
 +            /* Extract the positions of the atoms to which will be fitted */
 +            for (i=0; i < edi->sref.nr; i++)
 +            {
 +                copy_rvec(x_pbc[edi->sref.anrs[i]], xfit[i]);
 +
 +                /* Save the sref positions such that in the next time step the molecule can
 +                 * be made whole again (in the parallel case) */
 +                if (PAR(cr))
 +                    copy_rvec(xfit[i], edi->sref.x_old[i]);
 +            }
 +
 +            /* Extract the positions of the atoms subject to ED sampling */
 +            for (i=0; i < edi->sav.nr; i++)
 +            {
 +                copy_rvec(x_pbc[edi->sav.anrs[i]], xstart[i]);
 +
 +                /* Save the sav positions such that in the next time step the molecule can
 +                 * be made whole again (in the parallel case) */
 +                if (PAR(cr))
 +                    copy_rvec(xstart[i], edi->sav.x_old[i]);
 +            }
 +
 +            /* Make the fit to the REFERENCE structure, get translation and rotation */
 +            fit_to_reference(xfit, fit_transvec, fit_rotmat, edi);
 +
 +            /* Output how well we fit to the reference at the start */
 +            translate_and_rotate(xfit, edi->sref.nr, fit_transvec, fit_rotmat);
 +            fprintf(stderr, "ED: Initial RMSD from reference after fit = %f nm (dataset #%d)\n",
 +                    rmsd_from_structure(xfit, &edi->sref), nr_edi);
 +
 +            /* Now apply the translation and rotation to the atoms on which ED sampling will be performed */
 +            translate_and_rotate(xstart, edi->sav.nr, fit_transvec, fit_rotmat);
 +
 +            /* calculate initial projections */
 +            project(xstart, edi);
 +
 +            /* process target structure, if required */
 +            if (edi->star.nr > 0)
 +            {
 +                fprintf(stderr, "ED: Fitting target structure to reference structure\n");
 +                /* get translation & rotation for fit of target structure to reference structure */
 +                fit_to_reference(edi->star.x, fit_transvec, fit_rotmat, edi);
 +                /* do the fit */
 +                translate_and_rotate(edi->star.x, edi->sav.nr, fit_transvec, fit_rotmat);
 +                rad_project(edi, edi->star.x, &edi->vecs.radcon, cr);
 +            } else
 +                rad_project(edi, xstart, &edi->vecs.radcon, cr);
 +
 +            /* process structure that will serve as origin of expansion circle */
 +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                fprintf(stderr, "ED: Setting center of flooding potential (0 = average structure)\n");
 +            if (edi->sori.nr > 0)
 +            {
 +                fprintf(stderr, "ED: Fitting origin structure to reference structure\n");
 +                /* fit this structure to reference structure */
 +                fit_to_reference(edi->sori.x, fit_transvec, fit_rotmat, edi);
 +                /* do the fit */
 +                translate_and_rotate(edi->sori.x, edi->sav.nr, fit_transvec, fit_rotmat);
 +                rad_project(edi, edi->sori.x, &edi->vecs.radacc, cr);
 +                rad_project(edi, edi->sori.x, &edi->vecs.radfix, cr);
 +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                {
 +                    fprintf(stderr, "ED: The ORIGIN structure will define the flooding potential center.\n");
 +                    /* Set center of flooding potential to the ORIGIN structure */
 +                    rad_project(edi, edi->sori.x, &edi->flood.vecs, cr);
 +                    /* We already know that no (moving) reference position was provided,
 +                     * therefore we can overwrite refproj[0]*/
 +                    copyEvecReference(&edi->flood.vecs);
 +                }
 +            }
 +            else /* No origin structure given */
 +            {
 +                rad_project(edi, xstart, &edi->vecs.radacc, cr);
 +                rad_project(edi, xstart, &edi->vecs.radfix, cr);
 +                if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +                {
 +                    if (edi->flood.bHarmonic)
 +                    {
 +                        fprintf(stderr, "ED: A (possibly changing) ref. projection will define the flooding potential center.\n");
 +                        for (i=0; i<edi->flood.vecs.neig; i++)
 +                            edi->flood.vecs.refproj[i] = edi->flood.vecs.refproj0[i];
 +                    }
 +                    else
 +                    {
 +                        fprintf(stderr, "ED: The AVERAGE structure will define the flooding potential center.\n");
 +                        /* Set center of flooding potential to the center of the covariance matrix,
 +                         * i.e. the average structure, i.e. zero in the projected system */
 +                        for (i=0; i<edi->flood.vecs.neig; i++)
 +                            edi->flood.vecs.refproj[i] = 0.0;
 +                    }
 +                }
 +            }
 +            /* For convenience, output the center of the flooding potential for the eigenvectors */
 +            if ( (eEDflood == ed->eEDtype) && (FALSE == edi->flood.bConstForce) )
 +            {
 +                for (i=0; i<edi->flood.vecs.neig; i++)
 +                {
 +                    fprintf(stdout, "ED: EV %d flooding potential center: %11.4e", i, edi->flood.vecs.refproj[i]);
 +                    if (edi->flood.bHarmonic)
 +                        fprintf(stdout, " (adding %11.4e/timestep)", edi->flood.vecs.refprojslope[i]);
 +                    fprintf(stdout, "\n");
 +                }
 +            }
 +
 +            /* set starting projections for linsam */
 +            rad_project(edi, xstart, &edi->vecs.linacc, cr);
 +            rad_project(edi, xstart, &edi->vecs.linfix, cr);
 +
 +            /* Output to file, set the step to -1 so that write_edo knows it was called from init_edsam */
 +            if (ed->edo && !(ed->bStartFromCpt))
 +                write_edo(nr_edi, edi, ed, -1, 0);
 +
 +            /* Prepare for the next edi data set: */
 +            edi=edi->next_edi;
 +        }
 +        /* Cleaning up on the master node: */
 +        sfree(x_pbc);
 +        sfree(xfit);
 +        sfree(xstart);
 +
 +    } /* end of MASTER only section */
 +
 +    if (PAR(cr))
 +    {
 +        /* First let everybody know how many ED data sets to expect */
 +        gmx_bcast(sizeof(numedis), &numedis, cr);
 +        /* Broadcast the essential dynamics / flooding data to all nodes */
 +        broadcast_ed_data(cr, ed, numedis);
 +    }
 +    else
 +    {
 +        /* In the single-CPU case, point the local atom numbers pointers to the global
 +         * one, so that we can use the same notation in serial and parallel case: */
 +
 +        /* Loop over all ED data sets (usually only one, though) */
 +        edi=ed->edpar;
 +        for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +        {
 +            edi->sref.anrs_loc = edi->sref.anrs;
 +            edi->sav.anrs_loc  = edi->sav.anrs;
 +            edi->star.anrs_loc = edi->star.anrs;
 +            edi->sori.anrs_loc = edi->sori.anrs;
 +            /* For the same reason as above, make a dummy c_ind array: */
 +            snew(edi->sav.c_ind, edi->sav.nr);
 +            /* Initialize the array */
 +            for (i=0; i<edi->sav.nr; i++)
 +                edi->sav.c_ind[i] = i;
 +            /* In the general case we will need a different-sized array for the reference indices: */
 +            if (!edi->bRefEqAv)
 +            {
 +                snew(edi->sref.c_ind, edi->sref.nr);
 +                for (i=0; i<edi->sref.nr; i++)
 +                    edi->sref.c_ind[i] = i;
 +            }
 +            /* Point to the very same array in case of other structures: */
 +            edi->star.c_ind = edi->sav.c_ind;
 +            edi->sori.c_ind = edi->sav.c_ind;
 +            /* In the serial case, the local number of atoms is the global one: */
 +            edi->sref.nr_loc = edi->sref.nr;
 +            edi->sav.nr_loc  = edi->sav.nr;
 +            edi->star.nr_loc = edi->star.nr;
 +            edi->sori.nr_loc = edi->sori.nr;
 +
 +            /* An on we go to the next edi dataset */
 +            edi=edi->next_edi;
 +        }
 +    }
 +
 +    /* Allocate space for ED buffer variables */
 +    /* Again, loop over ED data sets */
 +    edi=ed->edpar;
 +    for (nr_edi = 1; nr_edi <= numedis; nr_edi++)
 +    {
 +        /* Allocate space for ED buffer */
 +        snew(edi->buf, 1);
 +        snew(edi->buf->do_edsam, 1);
 +
 +        /* Space for collective ED buffer variables */
 +
 +        /* Collective positions of atoms with the average indices */
 +        snew(edi->buf->do_edsam->xcoll                  , edi->sav.nr);
 +        snew(edi->buf->do_edsam->shifts_xcoll           , edi->sav.nr); /* buffer for xcoll shifts */
 +        snew(edi->buf->do_edsam->extra_shifts_xcoll     , edi->sav.nr);
 +        /* Collective positions of atoms with the reference indices */
 +        if (!edi->bRefEqAv)
 +        {
 +            snew(edi->buf->do_edsam->xc_ref             , edi->sref.nr);
 +            snew(edi->buf->do_edsam->shifts_xc_ref      , edi->sref.nr); /* To store the shifts in */
 +            snew(edi->buf->do_edsam->extra_shifts_xc_ref, edi->sref.nr);
 +        }
 +
 +        /* Get memory for flooding forces */
 +        snew(edi->flood.forces_cartesian                , edi->sav.nr);
 +
 +#ifdef DUMPEDI
 +        /* Dump it all into one file per process */
 +        dump_edi(edi, cr, nr_edi);
 +#endif
 +
 +        /* An on we go to the next edi dataset */
 +        edi=edi->next_edi;
 +    }
 +
 +    /* Flush the edo file so that the user can check some things
 +     * when the simulation has started */
 +    if (ed->edo)
 +        fflush(ed->edo);
 +}
 +
 +
 +void do_edsam(t_inputrec  *ir,
 +              gmx_large_int_t step,
 +              t_mdatoms   *md,
 +              t_commrec   *cr,
 +              rvec        xs[],   /* The local current positions on this processor */
 +              rvec        v[],    /* The velocities */
 +              matrix      box,
 +              gmx_edsam_t ed)
 +{
 +    int     i,edinr,iupdate=500;
 +    matrix  rotmat;         /* rotation matrix */
 +    rvec    transvec;       /* translation vector */
 +    rvec    dv,dx,x_unsh;   /* tmp vectors for velocity, distance, unshifted x coordinate */
 +    real    dt_1;           /* 1/dt */
 +    struct t_do_edsam *buf;
 +    t_edpar *edi;
 +    real    rmsdev=-1;      /* RMSD from reference structure prior to applying the constraints */
 +    gmx_bool bSuppress=FALSE; /* Write .edo file on master? */
 +
 +
 +    /* Check if ED sampling has to be performed */
 +    if ( ed->eEDtype==eEDnone )
 +        return;
 +
 +    /* Suppress output on first call of do_edsam if
 +     * two-step sd2 integrator is used */
 +    if ( (ir->eI==eiSD2) && (v != NULL) )
 +        bSuppress = TRUE;
 +
 +    dt_1 = 1.0/ir->delta_t;
 +
 +    /* Loop over all ED datasets (usually one) */
 +    edi  = ed->edpar;
 +    edinr = 0;
 +    while (edi != NULL)
 +    {
 +        edinr++;
 +        if (edi->bNeedDoEdsam)
 +        {
 +
 +            buf=edi->buf->do_edsam;
 +
 +            if (ed->bFirst)
 +                /* initialise radacc radius for slope criterion */
 +                buf->oldrad=calc_radius(&edi->vecs.radacc);
 +
 +            /* Copy the positions into buf->xc* arrays and after ED
 +             * feed back corrections to the official positions */
 +
 +            /* Broadcast the ED positions such that every node has all of them
 +             * Every node contributes its local positions xs and stores it in
 +             * the collective buf->xcoll array. Note that for edinr > 1
 +             * xs could already have been modified by an earlier ED */
 +
 +            communicate_group_positions(cr, buf->xcoll, buf->shifts_xcoll, buf->extra_shifts_xcoll, buf->bUpdateShifts, xs,
 +                    edi->sav.nr, edi->sav.nr_loc, edi->sav.anrs_loc, edi->sav.c_ind, edi->sav.x_old,  box);
 +
 +#ifdef DEBUG_ED
 +            dump_xcoll(edi, buf, cr, step);
 +#endif
 +            /* Only assembly reference positions if their indices differ from the average ones */
 +            if (!edi->bRefEqAv)
 +                communicate_group_positions(cr, buf->xc_ref, buf->shifts_xc_ref, buf->extra_shifts_xc_ref, buf->bUpdateShifts, xs,
 +                        edi->sref.nr, edi->sref.nr_loc, edi->sref.anrs_loc, edi->sref.c_ind, edi->sref.x_old, box);
 +
 +            /* If bUpdateShifts was TRUE then the shifts have just been updated in get_positions.
 +             * We do not need to uptdate the shifts until the next NS step */
 +            buf->bUpdateShifts = FALSE;
 +
 +            /* Now all nodes have all of the ED positions in edi->sav->xcoll,
 +             * as well as the indices in edi->sav.anrs */
 +
 +            /* Fit the reference indices to the reference structure */
 +            if (edi->bRefEqAv)
 +                fit_to_reference(buf->xcoll , transvec, rotmat, edi);
 +            else
 +                fit_to_reference(buf->xc_ref, transvec, rotmat, edi);
 +
 +            /* Now apply the translation and rotation to the ED structure */
 +            translate_and_rotate(buf->xcoll, edi->sav.nr, transvec, rotmat);
 +
 +            /* Find out how well we fit to the reference (just for output steps) */
 +            if (do_per_step(step,edi->outfrq) && MASTER(cr))
 +            {
 +                if (edi->bRefEqAv)
 +                {
 +                    /* Indices of reference and average structures are identical,
 +                     * thus we can calculate the rmsd to SREF using xcoll */
 +                    rmsdev = rmsd_from_structure(buf->xcoll,&edi->sref);
 +                }
 +                else
 +                {
 +                    /* We have to translate & rotate the reference atoms first */
 +                    translate_and_rotate(buf->xc_ref, edi->sref.nr, transvec, rotmat);
 +                    rmsdev = rmsd_from_structure(buf->xc_ref,&edi->sref);
 +                }
 +            }
 +
 +            /* update radsam references, when required */
 +            if (do_per_step(step,edi->maxedsteps) && step >= edi->presteps)
 +            {
 +                project(buf->xcoll, edi);
 +                rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
 +                rad_project(edi, buf->xcoll, &edi->vecs.radfix, cr);
 +                buf->oldrad=-1.e5;
 +            }
 +
 +            /* update radacc references, when required */
 +            if (do_per_step(step,iupdate) && step >= edi->presteps)
 +            {
 +                edi->vecs.radacc.radius = calc_radius(&edi->vecs.radacc);
 +                if (edi->vecs.radacc.radius - buf->oldrad < edi->slope)
 +                {
 +                    project(buf->xcoll, edi);
 +                    rad_project(edi, buf->xcoll, &edi->vecs.radacc, cr);
 +                    buf->oldrad = 0.0;
 +                } else
 +                    buf->oldrad = edi->vecs.radacc.radius;
 +            }
 +
 +            /* apply the constraints */
 +            if (step >= edi->presteps && ed_constraints(ed->eEDtype, edi))
 +            {
 +                /* ED constraints should be applied already in the first MD step
 +                 * (which is step 0), therefore we pass step+1 to the routine */
 +                ed_apply_constraints(buf->xcoll, edi, step+1 - ir->init_step, cr);
 +            }
 +
 +            /* write to edo, when required */
 +            if (do_per_step(step,edi->outfrq))
 +            {
 +                project(buf->xcoll, edi);
 +                if (MASTER(cr) && !bSuppress)
 +                    write_edo(edinr, edi, ed, step, rmsdev);
 +            }
 +
 +            /* Copy back the positions unless monitoring only */
 +            if (ed_constraints(ed->eEDtype, edi))
 +            {
 +                /* remove fitting */
 +                rmfit(edi->sav.nr, buf->xcoll, transvec, rotmat);
 +
 +                /* Copy the ED corrected positions into the coordinate array */
 +                /* Each node copies its local part. In the serial case, nat_loc is the
 +                 * total number of ED atoms */
 +                for (i=0; i<edi->sav.nr_loc; i++)
 +                {
 +                    /* Unshift local ED coordinate and store in x_unsh */
 +                    ed_unshift_single_coord(box, buf->xcoll[edi->sav.c_ind[i]],
 +                                            buf->shifts_xcoll[edi->sav.c_ind[i]], x_unsh);
 +
 +                    /* dx is the ED correction to the positions: */
 +                    rvec_sub(x_unsh, xs[edi->sav.anrs_loc[i]], dx);
 +
 +                    if (v != NULL)
 +                    {
 +                        /* dv is the ED correction to the velocity: */
 +                        svmul(dt_1, dx, dv);
 +                        /* apply the velocity correction: */
 +                        rvec_inc(v[edi->sav.anrs_loc[i]], dv);
 +                    }
 +                    /* Finally apply the position correction due to ED: */
 +                    copy_rvec(x_unsh, xs[edi->sav.anrs_loc[i]]);
 +                }
 +            }
 +        } /* END of if (edi->bNeedDoEdsam) */
 +
 +        /* Prepare for the next ED dataset */
 +        edi = edi->next_edi;
 +
 +    } /* END of loop over ED datasets */
 +
 +    ed->bFirst = FALSE;
 +}
Simple merge
Simple merge
index c11481d7577792c903af98bd9f91ae363505a688,0000000000000000000000000000000000000000..47452c56b820b39b3e96d06fbfc0958fb58a25af
mode 100644,000000..100644
--- /dev/null
@@@ -1,2516 -1,0 +1,2516 @@@
- static void copy_em_coords_back(em_state_t *ems,t_state *state,rvec *f)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <time.h>
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "network.h"
 +#include "confio.h"
 +#include "copyrite.h"
 +#include "smalloc.h"
 +#include "nrnb.h"
 +#include "main.h"
 +#include "force.h"
 +#include "macros.h"
 +#include "random.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "txtdump.h"
 +#include "typedefs.h"
 +#include "update.h"
 +#include "constr.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "tgroup.h"
 +#include "mdebin.h"
 +#include "vsite.h"
 +#include "force.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "trnio.h"
 +#include "sparsematrix.h"
 +#include "mtxio.h"
 +#include "mdatoms.h"
 +#include "ns.h"
 +#include "gmx_wallcycle.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "pme.h"
 +#include "membed.h"
 +
 +typedef struct {
 +  t_state s;
 +  rvec    *f;
 +  real    epot;
 +  real    fnorm;
 +  real    fmax;
 +  int     a_fmax;
 +} em_state_t;
 +
 +static em_state_t *init_em_state()
 +{
 +  em_state_t *ems;
 +  
 +  snew(ems,1);
 +
 +  return ems;
 +}
 +
 +static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                           gmx_wallcycle_t wcycle,
 +                           const char *name)
 +{
 +    char buf[STRLEN];
 +
 +    runtime_start(runtime);
 +
 +    sprintf(buf,"Started %s",name);
 +    print_date_and_time(fplog,cr->nodeid,buf,NULL);
 +
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                        gmx_wallcycle_t wcycle)
 +{
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    runtime_end(runtime);
 +}
 +
 +static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 +{
 +    fprintf(out,"\n");
 +    fprintf(out,"%s:\n",minimizer);
 +    fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 +    fprintf(out,"   Number of steps    = %12d\n",nsteps);
 +}
 +
 +static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 +{
 +    if (bLastStep)
 +    {
 +        fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 +    }
 +    else
 +    {
 +        fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 +                "Converged to machine precision,\n"
 +                "but not to the requested precision Fmax < %g\n",
 +                ftol);
 +        if (sizeof(real)<sizeof(double))
 +        {
 +            fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 +        }
 +        if (bConstrain)
 +        {
 +            fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 +                    "off constraints alltogether (set constraints = none in mdp file)\n");
 +        }
 +    }
 +}
 +
 +
 +
 +static void print_converged(FILE *fp,const char *alg,real ftol,
 +                          gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 +                          real epot,real fmax, int nfmax, real fnorm)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  if (bDone)
 +    fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 +          alg,ftol,gmx_step_str(count,buf)); 
 +  else if(count<nsteps)
 +    fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 +               "but did not reach the requested Fmax < %g.\n",
 +          alg,gmx_step_str(count,buf),ftol);
 +  else 
 +    fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 +          alg,ftol,gmx_step_str(count,buf));
 +
 +#ifdef GMX_DOUBLE
 +  fprintf(fp,"Potential Energy  = %21.14e\n",epot); 
 +  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1); 
 +  fprintf(fp,"Norm of force     = %21.14e\n",fnorm); 
 +#else
 +  fprintf(fp,"Potential Energy  = %14.7e\n",epot); 
 +  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1); 
 +  fprintf(fp,"Norm of force     = %14.7e\n",fnorm); 
 +#endif
 +}
 +
 +static void get_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 +                         real *fnorm,real *fmax,int *a_fmax)
 +{
 +  double fnorm2,*sum;
 +  real fmax2,fmax2_0,fam;
 +  int  la_max,a_max,start,end,i,m,gf;
 +
 +  /* This routine finds the largest force and returns it.
 +   * On parallel machines the global max is taken.
 +   */
 +  fnorm2 = 0;
 +  fmax2 = 0;
 +  la_max = -1;
 +  gf = 0;
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +  if (mdatoms->cFREEZE) {
 +    for(i=start; i<end; i++) {
 +      gf = mdatoms->cFREEZE[i];
 +      fam = 0;
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m])
 +        fam += sqr(f[i][m]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  } else {
 +    for(i=start; i<end; i++) {
 +      fam = norm2(f[i]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  }
 +
 +  if (la_max >= 0 && DOMAINDECOMP(cr)) {
 +    a_max = cr->dd->gatindex[la_max];
 +  } else {
 +    a_max = la_max;
 +  }
 +  if (PAR(cr)) {
 +    snew(sum,2*cr->nnodes+1);
 +    sum[2*cr->nodeid]   = fmax2;
 +    sum[2*cr->nodeid+1] = a_max;
 +    sum[2*cr->nnodes]   = fnorm2;
 +    gmx_sumd(2*cr->nnodes+1,sum,cr);
 +    fnorm2 = sum[2*cr->nnodes];
 +    /* Determine the global maximum */
 +    for(i=0; i<cr->nnodes; i++) {
 +      if (sum[2*i] > fmax2) {
 +      fmax2 = sum[2*i];
 +      a_max = (int)(sum[2*i+1] + 0.5);
 +      }
 +    }
 +    sfree(sum);
 +  }
 +
 +  if (fnorm)
 +    *fnorm = sqrt(fnorm2);
 +  if (fmax)
 +    *fmax  = sqrt(fmax2);
 +  if (a_fmax)
 +    *a_fmax = a_max;
 +}
 +
 +static void get_state_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,
 +                         em_state_t *ems)
 +{
 +  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 +}
 +
 +void init_em(FILE *fplog,const char *title,
 +             t_commrec *cr,t_inputrec *ir,
 +             t_state *state_global,gmx_mtop_t *top_global,
 +             em_state_t *ems,gmx_localtop_t **top,
 +             rvec **f,rvec **f_global,
 +             t_nrnb *nrnb,rvec mu_tot,
 +             t_forcerec *fr,gmx_enerdata_t **enerd,
 +             t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin)
 +{
 +    int  start,homenr,i;
 +    real dvdlambda;
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Initiating %s\n",title);
 +    }
 +    
 +    state_global->ngtc = 0;
 +    
 +    /* Initiate some variables */
 +    if (ir->efep != efepNO)
 +    {
 +        state_global->lambda = ir->init_lambda;
 +    }
 +    else 
 +    {
 +        state_global->lambda = 0.0;
 +    }
 +    
 +    init_nrnb(nrnb);
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        *top = dd_init_local_top(top_global);
 +        
 +        dd_init_local_state(cr->dd,state_global,&ems->s);
 +
 +        *f = NULL;
 +        
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            &ems->s,&ems->f,mdatoms,*top,
 +                            fr,vsite,NULL,constr,
 +                            nrnb,NULL,FALSE);
 +        dd_store_state(cr->dd,&ems->s);
 +        
 +        if (ir->nstfout)
 +        {
 +            snew(*f_global,top_global->natoms);
 +        }
 +        else
 +        {
 +            *f_global = NULL;
 +        }
 +        *graph = NULL;
 +    }
 +    else
 +    {
 +        snew(*f,top_global->natoms);
 +
 +        /* Just copy the state */
 +        ems->s = *state_global;
 +        snew(ems->s.x,ems->s.nalloc);
 +        snew(ems->f,ems->s.nalloc);
 +        for(i=0; i<state_global->natoms; i++)
 +        {
 +            copy_rvec(state_global->x[i],ems->s.x[i]);
 +        }
 +        copy_mat(state_global->box,ems->s.box);
 +        
 +        if (PAR(cr) && ir->eI != eiNM)
 +        {
 +            /* Initialize the particle decomposition and split the topology */
 +            *top = split_system(fplog,top_global,ir,cr);
 +            
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +        }
 +        else
 +        {
 +            *top = gmx_mtop_generate_local_top(top_global,ir);
 +        }
 +        *f_global = *f;
 +        
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +        {
 +            *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +        else
 +        {
 +            *graph = NULL;
 +        }
 +
 +        if (PARTDECOMP(cr))
 +        {
 +            pd_at_range(cr,&start,&homenr);
 +            homenr -= start;
 +        }
 +        else
 +        {
 +            start  = 0;
 +            homenr = top_global->natoms;
 +        }
 +        atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 +        update_mdatoms(mdatoms,state_global->lambda);
 +    
 +        if (vsite)
 +        {
 +            set_vsite_top(vsite,*top,mdatoms,cr);
 +        }
 +    }
 +    
 +    if (constr)
 +    {
 +        if (ir->eConstrAlg == econtSHAKE &&
 +            gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 +        {
 +            gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 +                      econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        }
 +        
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            set_constraints(constr,*top,ir,mdatoms,cr);
 +        }
 +
 +        if (!ir->bContinuation)
 +        {
 +            /* Constrain the starting coordinates */
 +            dvdlambda=0;
 +            constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 +                      ir,NULL,cr,-1,0,mdatoms,
 +                      ems->s.x,ems->s.x,NULL,ems->s.box,
 +                      ems->s.lambda,&dvdlambda,
 +                      NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +        }
 +    }
 +    
 +    if (PAR(cr))
 +    {
 +        *gstat = global_stat_init(ir);
 +    }
 +    
 +    *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 +
 +    snew(*enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
 +
 +    if (mdebin != NULL)
 +    {
 +        /* Init bin for energy stuff */
 +        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL); 
 +    }
 +
 +    clear_rvec(mu_tot);
 +    calc_shifts(ems->s.box,fr->shift_vec);
 +}
 +
 +static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 +                      gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 +{
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Tell the PME only node to finish */
 +    gmx_pme_finish(cr);
 +  }
 +
 +  done_mdoutf(outf);
 +
 +  em_time_end(fplog,cr,runtime,wcycle);
 +}
 +
 +static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 +{
 +  em_state_t tmp;
 +
 +  tmp   = *ems1;
 +  *ems1 = *ems2;
 +  *ems2 = tmp;
 +}
 +
-   int i;
++static void copy_em_coords(em_state_t *ems,t_state *state)
 +{
-   for(i=0; (i<state->natoms); i++)
-     copy_rvec(ems->s.x[i],state->x[i]);
-   if (f != NULL)
-     copy_rvec(ems->f[i],f[i]);
++    int i;
 +
-         copy_em_coords_back(state,state_global,bF ? f_global : NULL);
++    for(i=0; (i<state->natoms); i++)
++    {
++        copy_rvec(ems->s.x[i],state->x[i]);
++    }
 +}
 +
 +static void write_em_traj(FILE *fplog,t_commrec *cr,
 +                          gmx_mdoutf_t *outf,
 +                          gmx_bool bX,gmx_bool bF,const char *confout,
 +                          gmx_mtop_t *top_global,
 +                          t_inputrec *ir,gmx_large_int_t step,
 +                          em_state_t *state,
 +                          t_state *state_global,rvec *f_global)
 +{
 +    int mdof_flags;
 +
 +    if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 +    {
++        copy_em_coords(state,state_global);
 +        f_global = state->f;
 +    }
 +    
 +    mdof_flags = 0;
 +    if (bX) { mdof_flags |= MDOF_X; }
 +    if (bF) { mdof_flags |= MDOF_F; }
 +    write_traj(fplog,cr,outf,mdof_flags,
 +               top_global,step,(double)step,
 +               &state->s,state_global,state->f,f_global,NULL,NULL);
 +    
 +    if (confout != NULL && MASTER(cr))
 +    {
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 +        {
 +            /* Make molecules whole only for confout writing */
 +            do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 +                        state_global->x);
 +        }
 +
 +        write_sto_conf_mtop(confout,
 +                            *top_global->name,top_global,
 +                            state_global->x,NULL,ir->ePBC,state_global->box);
 +    }
 +}
 +
 +static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 +                     em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 +                     gmx_constr_t constr,gmx_localtop_t *top,
 +                     t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                     gmx_large_int_t count)
 +
 +{
 +  t_state *s1,*s2;
 +  int  start,end,gf,i,m;
 +  rvec *x1,*x2;
 +  real dvdlambda;
 +
 +  s1 = &ems1->s;
 +  s2 = &ems2->s;
 +
 +  if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 +    gmx_incons("state mismatch in do_em_step");
 +
 +  s2->flags = s1->flags;
 +
 +  if (s2->nalloc != s1->nalloc) {
 +    s2->nalloc = s1->nalloc;
 +    srenew(s2->x,s1->nalloc);
 +    srenew(ems2->f,  s1->nalloc);
 +    if (s2->flags & (1<<estCGP))
 +      srenew(s2->cg_p,  s1->nalloc);
 +  }
 +  
 +  s2->natoms = s1->natoms;
 +  s2->lambda = s1->lambda;
 +  copy_mat(s1->box,s2->box);
 +
 +  start = md->start;
 +  end   = md->start + md->homenr;
 +
 +  x1 = s1->x;
 +  x2 = s2->x;
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (md->cFREEZE)
 +      gf = md->cFREEZE[i];
 +    for(m=0; m<DIM; m++) {
 +      if (ir->opts.nFreeze[gf][m])
 +      x2[i][m] = x1[i][m];
 +      else
 +      x2[i][m] = x1[i][m] + a*f[i][m];
 +    }
 +  }
 +
 +  if (s2->flags & (1<<estCGP)) {
 +    /* Copy the CG p vector */
 +    x1 = s1->cg_p;
 +    x2 = s2->cg_p;
 +    for(i=start; i<end; i++)
 +      copy_rvec(x1[i],x2[i]);
 +  }
 +
 +  if (DOMAINDECOMP(cr)) {
 +    s2->ddp_count = s1->ddp_count;
 +    if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 +      s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 +      srenew(s2->cg_gl,s2->cg_gl_nalloc);
 +    }
 +    s2->ncg_gl = s1->ncg_gl;
 +    for(i=0; i<s2->ncg_gl; i++)
 +      s2->cg_gl[i] = s1->cg_gl[i];
 +    s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 +  }
 +
 +  if (constr) {
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdlambda = 0;
 +    constrain(NULL,TRUE,TRUE,constr,&top->idef,       
 +              ir,NULL,cr,count,0,md,
 +              s1->x,s2->x,NULL,s2->box,s2->lambda,
 +              &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  }
 +}
 +
 +static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
 +
 +{
 +  int  start,end,i,m;
 +
 +  if (DOMAINDECOMP(cr)) {
 +    start = 0;
 +    end   = cr->dd->nat_home;
 +  } else if (PARTDECOMP(cr)) {
 +    pd_at_range(cr,&start,&end);
 +  } else {
 +    start = 0;
 +    end   = n;
 +  }
 +
 +  for(i=start; i<end; i++) {
 +    for(m=0; m<DIM; m++) {
 +      x2[i][m] = x1[i][m] + a*f[i][m];
 +    }
 +  }
 +}
 +
 +static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
 +
 +{
 +  int  start,end,i,m;
 +
 +  if (DOMAINDECOMP(cr)) {
 +    start = 0;
 +    end   = cr->dd->nat_home;
 +  } else if (PARTDECOMP(cr)) {
 +    pd_at_range(cr,&start,&end);
 +  } else {
 +    start = 0;
 +    end   = n;
 +  }
 +
 +  for(i=start; i<end; i++) {
 +    for(m=0; m<DIM; m++) {
 +      f[i][m] = (x1[i][m] - x2[i][m])*a;
 +    }
 +  }
 +}
 +
 +static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 +                                   gmx_mtop_t *top_global,t_inputrec *ir,
 +                                   em_state_t *ems,gmx_localtop_t *top,
 +                                   t_mdatoms *mdatoms,t_forcerec *fr,
 +                                   gmx_vsite_t *vsite,gmx_constr_t constr,
 +                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 +{
 +    /* Repartition the domain decomposition */
 +    wallcycle_start(wcycle,ewcDOMDEC);
 +    dd_partition_system(fplog,step,cr,FALSE,1,
 +                        NULL,top_global,ir,
 +                        &ems->s,&ems->f,
 +                        mdatoms,top,fr,vsite,NULL,constr,
 +                        nrnb,wcycle,FALSE);
 +    dd_store_state(cr->dd,&ems->s);
 +    wallcycle_stop(wcycle,ewcDOMDEC);
 +}
 +    
 +static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 +                            t_state *state_global,gmx_mtop_t *top_global,
 +                            em_state_t *ems,gmx_localtop_t *top,
 +                            t_inputrec *inputrec,
 +                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                            gmx_global_stat_t gstat,
 +                            gmx_vsite_t *vsite,gmx_constr_t constr,
 +                            t_fcdata *fcd,
 +                            t_graph *graph,t_mdatoms *mdatoms,
 +                            t_forcerec *fr,rvec mu_tot,
 +                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
 +                            gmx_large_int_t count,gmx_bool bFirst)
 +{
 +  real t;
 +  gmx_bool bNS;
 +  int  nabnsb;
 +  tensor force_vir,shake_vir,ekin;
 +  real dvdl,prescorr,enercorr,dvdlcorr;
 +  real terminate=0;
 +  
 +  /* Set the time to the initial time, the time does not change during EM */
 +  t = inputrec->init_t;
 +
 +  if (bFirst ||
 +      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 +    /* This the first state or an old state used before the last ns */
 +    bNS = TRUE;
 +  } else {
 +    bNS = FALSE;
 +    if (inputrec->nstlist > 0) {
 +      bNS = TRUE;
 +    } else if (inputrec->nstlist == -1) {
 +      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 +      if (PAR(cr))
 +      gmx_sumi(1,&nabnsb,cr);
 +      bNS = (nabnsb > 0);
 +    }
 +  }
 +
 +  if (vsite)
 +    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 +
 +  if (DOMAINDECOMP(cr)) {
 +    if (bNS) {
 +      /* Repartition the domain decomposition */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                           ems,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +  }
 +      
 +    /* Calc force & energy on new trial position  */
 +    /* do_force always puts the charge groups in the box and shifts again
 +     * We do not unshift, so molecules are always whole in congrad.c
 +     */
 +    do_force(fplog,cr,inputrec,
 +             count,nrnb,wcycle,top,top_global,&top_global->groups,
 +             ems->s.box,ems->s.x,&ems->s.hist,
 +             ems->f,force_vir,mdatoms,enerd,fcd,
 +             ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 +             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 +             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 +      
 +  /* Clear the unused shake virial and pressure */
 +  clear_mat(shake_vir);
 +  clear_mat(pres);
 +
 +  /* Calculate long range corrections to pressure and energy */
 +  calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
 +                pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 +  /* don't think these next 4 lines  can be moved in for now, because we 
 +     don't always want to write it -- figure out how to clean this up MRS 8/4/2009 */
 +  enerd->term[F_DISPCORR] = enercorr;
 +  enerd->term[F_EPOT] += enercorr;
 +  enerd->term[F_PRES] += prescorr;
 +  enerd->term[F_DVDL] += dvdlcorr;
 +
 +    /* Communicate stuff when parallel */
 +    if (PAR(cr) && inputrec->eI != eiNM)
 +    {
 +        wallcycle_start(wcycle,ewcMoveE);
 +
 +        global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 +                    inputrec,NULL,NULL,NULL,1,&terminate,
 +                    top_global,&ems->s,FALSE,
 +                    CGLO_ENERGY | 
 +                    CGLO_PRESSURE | 
 +                    CGLO_CONSTRAINT | 
 +                    CGLO_FIRSTITERATE);
 +
 +        wallcycle_stop(wcycle,ewcMoveE);
 +    }
 +
 +  ems->epot = enerd->term[F_EPOT];
 +  
 +  if (constr) {
 +    /* Project out the constraint components of the force */
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdl = 0;
 +    constrain(NULL,FALSE,FALSE,constr,&top->idef,
 +              inputrec,NULL,cr,count,0,mdatoms,
 +              ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
 +              NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 +    if (fr->bSepDVDL && fplog)
 +      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
 +    enerd->term[F_DHDL_CON] += dvdl;
 +    m_add(force_vir,shake_vir,vir);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  } else {
 +    copy_mat(force_vir,vir);
 +  }
 +
 +  clear_mat(ekin);
 +  enerd->term[F_PRES] =
 +    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
 +            (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
 +
 +  sum_dhdl(enerd,ems->s.lambda,inputrec);
 +
 +    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 +    {
 +        get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 +    }
 +}
 +
 +static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                            gmx_mtop_t *mtop,
 +                            em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb,*fmg;
 +  t_block *cgs_gl;
 +  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 +  double partsum;
 +  unsigned char *grpnrFREEZE;
 +
 +  if (debug)
 +    fprintf(debug,"Doing reorder_partsum\n");
 +
 +  fm = s_min->f;
 +  fb = s_b->f;
 +
 +  cgs_gl = dd_charge_groups_global(cr->dd);
 +  index = cgs_gl->index;
 +
 +  /* Collect fm in a global vector fmg.
 +   * This conflicts with the spirit of domain decomposition,
 +   * but to fully optimize this a much more complicated algorithm is required.
 +   */
 +  snew(fmg,mtop->natoms);
 +  
 +  ncg   = s_min->s.ncg_gl;
 +  cg_gl = s_min->s.cg_gl;
 +  i = 0;
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      copy_rvec(fm[i],fmg[a]);
 +      i++;
 +    }
 +  }
 +  gmx_sum(mtop->natoms*3,fmg[0],cr);
 +
 +  /* Now we will determine the part of the sum for the cgs in state s_b */
 +  ncg   = s_b->s.ncg_gl;
 +  cg_gl = s_b->s.cg_gl;
 +  partsum = 0;
 +  i = 0;
 +  gf = 0;
 +  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      if (mdatoms->cFREEZE && grpnrFREEZE) {
 +      gf = grpnrFREEZE[i];
 +      }
 +      for(m=0; m<DIM; m++) {
 +      if (!opts->nFreeze[gf][m]) {
 +        partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 +      }
 +      }
 +      i++;
 +    }
 +  }
 +  
 +  sfree(fmg);
 +
 +  return partsum;
 +}
 +
 +static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                  gmx_mtop_t *mtop,
 +                  em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb;
 +  double sum;
 +  int  gf,i,m;
 +
 +  /* This is just the classical Polak-Ribiere calculation of beta;
 +   * it looks a bit complicated since we take freeze groups into account,
 +   * and might have to sum it in parallel runs.
 +   */
 +  
 +  if (!DOMAINDECOMP(cr) ||
 +      (s_min->s.ddp_count == cr->dd->ddp_count &&
 +       s_b->s.ddp_count   == cr->dd->ddp_count)) {
 +    fm = s_min->f;
 +    fb = s_b->f;
 +    sum = 0;
 +    gf = 0;
 +    /* This part of code can be incorrect with DD,
 +     * since the atom ordering in s_b and s_min might differ.
 +     */
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m]) {
 +        sum += (fb[i][m] - fm[i][m])*fb[i][m];
 +      } 
 +    }
 +  } else {
 +    /* We need to reorder cgs while summing */
 +    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 +  }
 +  if (PAR(cr))
 +    gmx_sumd(1,&sum,cr);
 +
 +  return sum/sqr(s_min->fnorm);
 +}
 +
 +double do_cg(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_seed,
 +             gmx_membed_t *membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +  const char *CG="Polak-Ribiere Conjugate Gradients";
 +
 +  em_state_t *s_min,*s_a,*s_b,*s_c;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global,*p,*sf,*sfm;
 +  double gpa,gpb,gpc,tmp,sum[2],minstep;
 +  real   fnormn;
 +  real   stepsize;    
 +  real   a,b,c,beta=0.0;
 +  real   epot_repl=0;
 +  real   pnorm;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,foundlower;
 +  rvec   mu_tot;
 +  gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 +  tensor vir,pres;
 +  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 +  gmx_mdoutf_t *outf;
 +  int    i,m,gf,step,nminstep;
 +  real   terminate=0;  
 +
 +  step=0;
 +
 +  s_min = init_em_state();
 +  s_a   = init_em_state();
 +  s_b   = init_em_state();
 +  s_c   = init_em_state();
 +
 +  /* Init em and store the local state in s_min */
 +  init_em(fplog,CG,cr,inputrec,
 +          state_global,top_global,s_min,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +  
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,CG);
 +  
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  if (MASTER(cr))
 +    sp_header(stderr,CG,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,CG,inputrec->em_tol,number_steps);
 +
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole in congrad.c
 +   */
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state_global,top_global,s_min,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +             mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 +             NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +    
 +    print_ebin_header(fplog,step,step,s_min->s.lambda);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +
 +  /* Estimate/guess the initial stepsize */
 +  stepsize = inputrec->em_stepsize/s_min->fnorm;
 + 
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(fplog,"\n");
 +  }  
 +  /* Start the loop over CG steps.            
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 +    
 +    /* start taking steps in a new direction 
 +     * First time we enter the routine, beta=0, and the direction is 
 +     * simply the negative gradient.
 +     */
 +
 +    /* Calculate the new direction in p, and the gradient in this direction, gpa */
 +    p  = s_min->s.cg_p;
 +    sf = s_min->f;
 +    gpa = 0;
 +    gf = 0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE) 
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++) {
 +      if (!inputrec->opts.nFreeze[gf][m]) {
 +        p[i][m] = sf[i][m] + beta*p[i][m];
 +        gpa -= p[i][m]*sf[i][m];
 +        /* f is negative gradient, thus the sign */
 +      } else {
 +          p[i][m] = 0;
 +      }
 +      }
 +    }
 +    
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpa,cr);
 +
 +    /* Calculate the norm of the search vector */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
 +    
 +    /* Just in case stepsize reaches zero due to numerical precision... */
 +    if(stepsize<=0)     
 +      stepsize = inputrec->em_stepsize/pnorm;
 +    
 +    /* 
 +     * Double check the value of the derivative in the search direction.
 +     * If it is positive it must be due to the old information in the
 +     * CG formula, so just remove that and start over with beta=0.
 +     * This corresponds to a steepest descent step.
 +     */
 +    if(gpa>0) {
 +      beta = 0;
 +      step--; /* Don't count this step since we are restarting */
 +      continue; /* Go back to the beginning of the big for-loop */
 +    }
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm)
 +     * relative change in coordinate is smaller than precision
 +     */
 +    minstep=0;
 +    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++) {
 +      tmp = fabs(s_min->s.x[i][m]);
 +      if(tmp < 1.0)
 +        tmp = 1.0;
 +      tmp = p[i][m]/tmp;
 +      minstep += tmp*tmp;
 +      }
 +    }
 +    /* Add up from all CPUs */
 +    if(PAR(cr))
 +      gmx_sumd(1,&minstep,cr);
 +
 +    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
 +
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +    
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +    
 +    write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                  top_global,inputrec,step,
 +                  s_min,state_global,f_global);
 +    
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new CG step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next CG step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     * 
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik 
 +     */
 +    s_a->epot = s_min->epot;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +    
 +    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
 +      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
 +                           s_min,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +
 +    /* Take a trial step (new coords in s_c) */
 +    do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
 +             constr,top,nrnb,wcycle,-1);
 +    
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_c,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,-1,FALSE);
 +    
 +    /* Calc derivative along line */
 +    p  = s_c->s.cg_p;
 +    sf = s_c->f;
 +    gpc=0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++) 
 +        gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +
 +    /* This is the max amount of increase in energy we tolerate */
 +    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
 +
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }    
 +
 +
 +
 +    
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +    if (!foundlower) {
 +      nminstep=0;
 +
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);                
 +      
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +      
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +        /* Reload the old state */
 +        em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
 +                               s_min,top,mdatoms,fr,vsite,constr,
 +                               nrnb,wcycle);
 +      }
 +
 +      /* Take a trial step to this new point - new coords in s_b */
 +      do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
 +                 constr,top,nrnb,wcycle,-1);
 +      
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state_global,top_global,s_b,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,-1,FALSE);
 +      
 +      /* p does not change within a step, but since the domain decomposition
 +       * might change, we have to use cg_p of s_b here.
 +       */
 +      p  = s_b->s.cg_p;
 +      sf = s_b->f;
 +      gpb=0;
 +      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +        for(m=0; m<DIM; m++)
 +            gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
 +      }
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +      
 +      if (debug)
 +        fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
 +                s_a->epot,s_b->epot,s_c->epot,gpb);
 +
 +      epot_repl = s_b->epot;
 +      
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if (gpb > 0) {
 +        /* Replace c endpoint with b */
 +        swap_em_state(s_b,s_c);
 +        c = b;
 +        gpc = gpb;
 +      } else {
 +        /* Replace a endpoint with b */
 +        swap_em_state(s_b,s_a);
 +        a = b;
 +        gpa = gpb;
 +      }
 +      
 +      /* 
 +       * Stop search as soon as we find a value smaller than the endpoints.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++;
 +      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
 +             (nminstep < 20));     
 +      
 +      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
 +        nminstep >= 20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If beta==0 this was steepest descent, and then we give up.
 +       * If not, set beta=0 and restart with steepest descent before quitting.
 +         */
 +      if (beta == 0.0) {
 +        /* Converged */
 +        converged = TRUE;
 +        break;
 +      } else {
 +        /* Reset memory before giving up */
 +        beta = 0.0;
 +        continue;
 +      }
 +      }
 +      
 +      /* Select min energy state of A & C, put the best in B.
 +       */
 +      if (s_c->epot < s_a->epot) {
 +      if (debug)
 +        fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
 +                s_c->epot,s_a->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +      } else {
 +      if (debug)
 +        fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
 +                s_a->epot,s_c->epot);
 +      swap_em_state(s_b,s_a);
 +      gpb = gpa;
 +      b = a;
 +      }
 +      
 +    } else {
 +      if (debug)
 +      fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
 +              s_c->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +    }
 +    
 +    /* new search direction */
 +    /* beta = 0 means forget all memory and restart with steepest descents. */
 +    if (nstcg && ((step % nstcg)==0)) 
 +      beta = 0.0;
 +    else {
 +      /* s_min->fnorm cannot be zero, because then we would have converged
 +       * and broken out.
 +       */
 +
 +      /* Polak-Ribiere update.
 +       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
 +       */
 +      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
 +    }
 +    /* Limit beta to prevent oscillations */
 +    if (fabs(beta) > 5.0)
 +      beta = 0.0;
 +    
 +    
 +    /* update positions */
 +    swap_em_state(s_min,s_b);
 +    gpa = gpb;
 +    
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
 +              s_min->fmax,s_min->a_fmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +      print_ebin_header(fplog,step,step,s_min->s.lambda);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +    
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */       
 +    converged = converged || (s_min->fmax < inputrec->em_tol);
 +    
 +  } /* End of the loop */
 +  
 +  if (converged)      
 +    step--; /* we never took that last step in this case */
 +  
 +    if (s_min->fmax > inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE; 
 +    }
 +  
 +  if (MASTER(cr)) {
 +    /* If we printed energy and/or logfile last step (which was the last step)
 +     * we don't have to do it again, but otherwise print the final values.
 +     */
 +    if(!do_log) {
 +      /* Write final value to log since we didn't do anything the last step */
 +      print_ebin_header(fplog,step,step,s_min->s.lambda);
 +    }
 +    if (!do_ene || !do_log) {
 +      /* Write final energy file entries */
 +      print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +               !do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +  }
 +
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +  
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */  
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
 +  
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                s_min,state_global,f_global);
 +  
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +  
 +  if (MASTER(cr)) {
 +    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +  
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +  
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_lbfgs(FILE *fplog,t_commrec *cr,
 +                int nfile,const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst,int repl_ex_seed,
 +                gmx_membed_t *membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{
 +  static const char *LBFGS="Low-Memory BFGS Minimizer";
 +  em_state_t ems;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global;
 +  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
 +  double stepsize,gpa,gpb,gpc,tmp,minstep;
 +  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;   
 +  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
 +  real   a,b,c,maxdelta,delta;
 +  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
 +  real   dgdx,dgdg,sq,yr,beta;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,first;
 +  rvec   mu_tot;
 +  real   fnorm,fmax;
 +  gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
 +  tensor vir,pres;
 +  int    start,end,number_steps;
 +  gmx_mdoutf_t *outf;
 +  int    i,k,m,n,nfmax,gf,step;
 +  /* not used */
 +  real   terminate;
 +
 +  if (PAR(cr))
 +    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
 +  
 +  n = 3*state->natoms;
 +  nmaxcorr = inputrec->nbfgscorr;
 +  
 +  /* Allocate memory */
 +  /* Use pointers to real so we dont have to loop over both atoms and
 +   * dimensions all the time...
 +   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
 +   * that point to the same memory.
 +   */
 +  snew(xa,n);
 +  snew(xb,n);
 +  snew(xc,n);
 +  snew(fa,n);
 +  snew(fb,n);
 +  snew(fc,n);
 +  snew(frozen,n);
 +
 +  snew(p,n); 
 +  snew(lastx,n); 
 +  snew(lastf,n); 
 +  snew(rho,nmaxcorr);
 +  snew(alpha,nmaxcorr);
 +  
 +  snew(dx,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dx[i],n);
 +  
 +  snew(dg,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dg[i],n);
 +
 +  step = 0;
 +  neval = 0; 
 +
 +  /* Init em */
 +  init_em(fplog,LBFGS,cr,inputrec,
 +          state,top_global,&ems,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +  /* Do_lbfgs is not completely updated like do_steep and do_cg,
 +   * so we free some memory again.
 +   */
 +  sfree(ems.s.x);
 +  sfree(ems.f);
 +
 +  xx = (real *)state->x;
 +  ff = (real *)f;
 +
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +    
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,LBFGS);
 +  
 +  do_log = do_ene = do_x = do_f = TRUE;
 +  
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +     for(m=0; m<DIM; m++) 
 +       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];  
 +  }
 +  if (MASTER(cr))
 +    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
 +  
 +  if (vsite)
 +    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +  
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole
 +   */
 +  neval++;
 +  ems.s.x = state->x;
 +  ems.f = f;
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state,top_global,&ems,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +      
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +             mdatoms->tmass,enerd,state,state->box,
 +             NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +    
 +    print_ebin_header(fplog,step,step,state->lambda);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +  
 +  /* This is the starting energy */
 +  Epot = enerd->term[F_EPOT];
 +  
 +  fnorm = ems.fnorm;
 +  fmax  = ems.fmax;
 +  nfmax = ems.a_fmax;
 +  
 +  /* Set the initial step.
 +   * since it will be multiplied by the non-normalized search direction 
 +   * vector (force vector the first time), we scale it by the
 +   * norm of the force.
 +   */
 +  
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(fplog,"\n");
 +  }   
 +  
 +  point=0;
 +  for(i=0;i<n;i++)
 +    if(!frozen[i])
 +      dx[point][i] = ff[i];  /* Initial search direction */
 +    else
 +      dx[point][i] = 0;
 +
 +  stepsize = 1.0/fnorm;
 +  converged = FALSE;
 +  
 +  /* Start the loop over BFGS steps.          
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +  
 +  ncorr=0;
 +
 +  /* Set the gradient from the force */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
 +    
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +    
 +    write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
 +               top_global,step,(real)step,state,state,f,f,NULL,NULL);
 +
 +    /* Do the linesearching in the direction dx[point][0..(n-1)] */
 +    
 +    /* pointer to current direction - point=0 first time here */
 +    s=dx[point];
 +    
 +    /* calculate line gradient */
 +    for(gpa=0,i=0;i<n;i++) 
 +      gpa-=s[i]*ff[i];
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm) 
 +     * relative change in coordinate is smaller than precision 
 +     */
 +    for(minstep=0,i=0;i<n;i++) {
 +      tmp=fabs(xx[i]);
 +      if(tmp<1.0)
 +      tmp=1.0;
 +      tmp = s[i]/tmp;
 +      minstep += tmp*tmp;
 +    }
 +    minstep = GMX_REAL_EPS/sqrt(minstep/n);
 +    
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +    
 +    /* Store old forces and coordinates */
 +    for(i=0;i<n;i++) {
 +      lastx[i]=xx[i];
 +      lastf[i]=ff[i];
 +    }
 +    Epot0=Epot;
 +    
 +    first=TRUE;
 +    
 +    for(i=0;i<n;i++)
 +      xa[i]=xx[i];
 +    
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new BFGS step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next BFGS step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     * 
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik 
 +     */
 +    foundlower=FALSE;
 +    EpotA = Epot0;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +
 +    /* Check stepsize first. We do not allow displacements 
 +     * larger than emstep.
 +     */
 +    do {
 +      c = a + stepsize;
 +      maxdelta=0;
 +      for(i=0;i<n;i++) {
 +      delta=c*s[i];
 +      if(delta>maxdelta)
 +        maxdelta=delta;
 +      }
 +      if(maxdelta>inputrec->em_stepsize)
 +      stepsize*=0.1;
 +    } while(maxdelta>inputrec->em_stepsize);
 +
 +    /* Take a trial step */
 +    for (i=0; i<n; i++)
 +      xc[i] = lastx[i] + c*s[i];
 +    
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    ems.s.x = (rvec *)xc;
 +    ems.f   = (rvec *)fc;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state,top_global,&ems,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,step,FALSE);
 +    EpotC = ems.epot;
 +    
 +    /* Calc derivative along line */
 +    for(gpc=0,i=0; i<n; i++) {
 +      gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +    
 +     /* This is the max amount of increase in energy we tolerate */
 +   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
 +    
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }    
 +    
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +
 +    if(!foundlower) {
 +     
 +      nminstep=0;
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +      
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);                
 +      
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +      
 +      /* Take a trial step */
 +      for (i=0; i<n; i++) 
 +        xb[i] = lastx[i] + b*s[i];
 +      
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      ems.s.x = (rvec *)xb;
 +      ems.f   = (rvec *)fb;
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state,top_global,&ems,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,step,FALSE);
 +      EpotB = ems.epot;
 +      
 +      fnorm = ems.fnorm;
 +      
 +      for(gpb=0,i=0; i<n; i++) 
 +        gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
 +      
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +      
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if(gpb>0) {
 +        /* Replace c endpoint with b */
 +        EpotC = EpotB;
 +        c = b;
 +        gpc = gpb;
 +        /* swap coord pointers b/c */
 +        xtmp = xb; 
 +        ftmp = fb;
 +        xb = xc; 
 +        fb = fc;
 +        xc = xtmp;
 +        fc = ftmp;
 +      } else {
 +        /* Replace a endpoint with b */
 +        EpotA = EpotB;
 +        a = b;
 +        gpa = gpb;
 +        /* swap coord pointers a/b */
 +        xtmp = xb; 
 +        ftmp = fb;
 +        xb = xa; 
 +        fb = fa;
 +        xa = xtmp; 
 +        fa = ftmp;
 +      }
 +      
 +      /* 
 +       * Stop search as soon as we find a value smaller than the endpoints,
 +       * or if the tolerance is below machine precision.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++; 
 +      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
 +
 +      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If ncorr==0 this was steepest descent, and then we give up.
 +       * If not, reset memory to restart as steepest descent before quitting.
 +         */
 +      if(ncorr==0) {
 +      /* Converged */
 +        converged=TRUE;
 +        break;
 +      } else {
 +        /* Reset memory */
 +        ncorr=0;
 +        /* Search in gradient direction */
 +        for(i=0;i<n;i++)
 +          dx[point][i]=ff[i];
 +        /* Reset stepsize */
 +        stepsize = 1.0/fnorm;
 +        continue;
 +      }
 +      }
 +      
 +      /* Select min energy state of A & C, put the best in xx/ff/Epot
 +       */
 +      if(EpotC<EpotA) {
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xc[i];
 +        ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +      } else {
 +      Epot = EpotA;
 +      /* Use state A */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xa[i];
 +        ff[i]=fa[i];
 +      }
 +      stepsize=a;
 +      }
 +      
 +    } else {
 +      /* found lower */
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +      xx[i]=xc[i];
 +      ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +    }
 +
 +    /* Update the memory information, and calculate a new 
 +     * approximation of the inverse hessian 
 +     */
 +    
 +    /* Have new data in Epot, xx, ff */       
 +    if(ncorr<nmaxcorr)
 +      ncorr++;
 +
 +    for(i=0;i<n;i++) {
 +      dg[point][i]=lastf[i]-ff[i];
 +      dx[point][i]*=stepsize;
 +    }
 +    
 +    dgdg=0;
 +    dgdx=0;   
 +    for(i=0;i<n;i++) {
 +      dgdg+=dg[point][i]*dg[point][i];
 +      dgdx+=dg[point][i]*dx[point][i];
 +    }
 +    
 +    diag=dgdx/dgdg;
 +    
 +    rho[point]=1.0/dgdx;
 +    point++;
 +    
 +    if(point>=nmaxcorr)
 +      point=0;
 +    
 +    /* Update */
 +    for(i=0;i<n;i++)
 +      p[i]=ff[i];
 +    
 +    cp=point;
 +    
 +    /* Recursive update. First go back over the memory points */
 +    for(k=0;k<ncorr;k++) {
 +      cp--;
 +      if(cp<0) 
 +      cp=ncorr-1;
 +      
 +      sq=0;
 +      for(i=0;i<n;i++)
 +      sq+=dx[cp][i]*p[i];
 +      
 +      alpha[cp]=rho[cp]*sq;
 +      
 +      for(i=0;i<n;i++)
 +      p[i] -= alpha[cp]*dg[cp][i];            
 +    }
 +    
 +    for(i=0;i<n;i++)
 +      p[i] *= diag;
 +    
 +    /* And then go forward again */
 +    for(k=0;k<ncorr;k++) {
 +      yr = 0;
 +      for(i=0;i<n;i++)
 +      yr += p[i]*dg[cp][i];
 +      
 +      beta = rho[cp]*yr;          
 +      beta = alpha[cp]-beta;
 +      
 +      for(i=0;i<n;i++)
 +      p[i] += beta*dx[cp][i];
 +      
 +      cp++;   
 +      if(cp>=ncorr)
 +      cp=0;
 +    }
 +    
 +    for(i=0;i<n;i++)
 +      if(!frozen[i])
 +      dx[point][i] = p[i];
 +      else
 +      dx[point][i] = 0;
 +
 +    stepsize=1.0;
 +    
 +    /* Test whether the convergence criterion is met */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
 +    
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,state,state->box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +      print_ebin_header(fplog,step,step,state->lambda);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +    
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */
 +    
 +    converged = converged || (fmax < inputrec->em_tol);
 +    
 +  } /* End of the loop */
 +  
 +  if(converged)       
 +    step--; /* we never took that last step in this case */
 +  
 +    if(fmax>inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE; 
 +    }
 +  
 +  /* If we printed energy and/or logfile last step (which was the last step)
 +   * we don't have to do it again, but otherwise print the final values.
 +   */
 +  if(!do_log) /* Write final value to log since we didn't do anythin last step */
 +    print_ebin_header(fplog,step,step,state->lambda);
 +  if(!do_ene || !do_log) /* Write final energy file entries */
 +    print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +             !do_log ? fplog : NULL,step,step,eprNORMAL,
 +             TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +  
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */  
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = !do_per_step(step,inputrec->nstfout);
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                &ems,state,f);
 +  
 +  if (MASTER(cr)) {
 +    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +    
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +  
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_steep(FILE *fplog,t_commrec *cr,
 +                int nfile, const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state_global,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst,int repl_ex_seed,
 +                gmx_membed_t *membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{ 
 +  const char *SD="Steepest Descents";
 +  em_state_t *s_min,*s_try;
 +  rvec       *f_global;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  real   stepsize,constepsize;
 +  real   ustep,dvdlambda,fnormn;
 +  gmx_mdoutf_t *outf;
 +  t_mdebin   *mdebin; 
 +  gmx_bool   bDone,bAbort,do_x,do_f; 
 +  tensor vir,pres; 
 +  rvec   mu_tot;
 +  int    nsteps;
 +  int    count=0; 
 +  int    steps_accepted=0; 
 +  /* not used */
 +  real   terminate=0;
 +
 +  s_min = init_em_state();
 +  s_try = init_em_state();
 +
 +  /* Init em and store the local state in s_try */
 +  init_em(fplog,SD,cr,inputrec,
 +          state_global,top_global,s_try,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +      
 +  /* Print to log file  */
 +  print_em_start(fplog,cr,runtime,wcycle,SD);
 +    
 +  /* Set variables for stepsize (in nm). This is the largest  
 +   * step that we are going to make in any direction. 
 +   */
 +  ustep = inputrec->em_stepsize; 
 +  stepsize = 0;
 +  
 +  /* Max number of steps  */
 +  nsteps = inputrec->nsteps; 
 +  
 +  if (MASTER(cr)) 
 +    /* Print to the screen  */
 +    sp_header(stderr,SD,inputrec->em_tol,nsteps);
 +  if (fplog)
 +    sp_header(fplog,SD,inputrec->em_tol,nsteps);
 +    
 +  /**** HERE STARTS THE LOOP ****
 +   * count is the counter for the number of steps 
 +   * bDone will be TRUE when the minimization has converged
 +   * bAbort will be TRUE when nsteps steps have been performed or when
 +   * the stepsize becomes smaller than is reasonable for machine precision
 +   */
 +  count  = 0;
 +  bDone  = FALSE;
 +  bAbort = FALSE;
 +  while( !bDone && !bAbort ) {
 +    bAbort = (nsteps >= 0) && (count == nsteps);
 +    
 +    /* set new coordinates, except for first step */
 +    if (count > 0) {
 +      do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
 +               constr,top,nrnb,wcycle,count);
 +    }
 +    
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_try,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,count,count==0);
 +       
 +    if (MASTER(cr))
 +      print_ebin_header(fplog,count,count,s_try->s.lambda);
 +
 +    if (count == 0)
 +      s_min->epot = s_try->epot + 1;
 +    
 +    /* Print it if necessary  */
 +    if (MASTER(cr)) {
 +      if (bVerbose) {
 +      fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
 +              count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
 +              (s_try->epot < s_min->epot) ? '\n' : '\r');
 +      }
 +      
 +      if (s_try->epot < s_min->epot) {
 +      /* Store the new (lower) energies  */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)count,
 +                 mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
 +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      print_ebin(outf->fp_ene,TRUE,
 +                 do_per_step(steps_accepted,inputrec->nstdisreout),
 +                 do_per_step(steps_accepted,inputrec->nstorireout),
 +                 fplog,count,count,eprNORMAL,TRUE,
 +                 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +      fflush(fplog);
 +      }
 +    } 
 +    
 +    /* Now if the new energy is smaller than the previous...  
 +     * or if this is the first step!
 +     * or if we did random steps! 
 +     */
 +    
 +    if ( (count==0) || (s_try->epot < s_min->epot) ) {
 +      steps_accepted++; 
 +
 +      /* Test whether the convergence criterion is met...  */
 +      bDone = (s_try->fmax < inputrec->em_tol);
 +      
 +      /* Copy the arrays for force, positions and energy  */
 +      /* The 'Min' array always holds the coords and forces of the minimal 
 +       sampled energy  */
 +      swap_em_state(s_min,s_try);
 +      if (count > 0)
 +      ustep *= 1.2;
 +
 +      /* Write to trn, if necessary */
 +      do_x = do_per_step(steps_accepted,inputrec->nstxout);
 +      do_f = do_per_step(steps_accepted,inputrec->nstfout);
 +      write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                    top_global,inputrec,count,
 +                    s_min,state_global,f_global);
 +    } 
 +    else {
 +      /* If energy is not smaller make the step smaller...  */
 +      ustep *= 0.5;
 +
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +      /* Reload the old state */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                             s_min,top,mdatoms,fr,vsite,constr,
 +                             nrnb,wcycle);
 +      }
 +    }
 +    
 +    /* Determine new step  */
 +    stepsize = ustep/s_min->fmax;
 +    
 +    /* Check if stepsize is too small, with 1 nm as a characteristic length */
 +#ifdef GMX_DOUBLE
 +        if (count == nsteps || ustep < 1e-12)
 +#else
 +        if (count == nsteps || ustep < 1e-6)
 +#endif
 +        {
 +            if (MASTER(cr))
 +            {
 +                warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
 +                warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
 +            }
 +            bAbort=TRUE;
 +        }
 +    
 +    count++;
 +  } /* End of the loop  */
 +  
 +    /* Print some shit...  */
 +  if (MASTER(cr)) 
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n"); 
 +  write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
 +              top_global,inputrec,count,
 +              s_min,state_global,f_global);
 +
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +  
 +  /* To print the actual number of steps we needed somewhere */
 +  inputrec->nsteps=count;
 +
 +  runtime->nsteps_done = count;
 +  
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_nm(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_seed,
 +             gmx_membed_t *membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    const char *NM = "Normal Mode Analysis";
 +    gmx_mdoutf_t *outf;
 +    int        natoms,atom,d;
 +    int        nnodes,node;
 +    rvec       *f_global;
 +    gmx_localtop_t *top;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f;
 +    gmx_global_stat_t gstat;
 +    t_graph    *graph;
 +    real       t,lambda;
 +    gmx_bool       bNS;
 +    tensor     vir,pres;
 +    rvec       mu_tot;
 +    rvec       *fneg,*dfdx;
 +    gmx_bool       bSparse; /* use sparse matrix storage format */
 +    size_t     sz;
 +    gmx_sparsematrix_t * sparse_matrix = NULL;
 +    real *     full_matrix             = NULL;
 +    em_state_t *   state_work;
 +      
 +    /* added with respect to mdrun */
 +    int        i,j,k,row,col;
 +    real       der_range=10.0*sqrt(GMX_REAL_EPS);
 +    real       x_min;
 +    real       fnorm,fmax;
 +    
 +    if (constr != NULL)
 +    {
 +        gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
 +    }
 +
 +    state_work = init_em_state();
 +    
 +    /* Init em and store the local state in state_minimum */
 +    init_em(fplog,NM,cr,inputrec,
 +            state_global,top_global,state_work,&top,
 +            &f,&f_global,
 +            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +            nfile,fnm,&outf,NULL);
 +    
 +    natoms = top_global->natoms;
 +    snew(fneg,natoms);
 +    snew(dfdx,natoms);
 +    
 +#ifndef GMX_DOUBLE
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,
 +                "NOTE: This version of Gromacs has been compiled in single precision,\n"
 +                "      which MIGHT not be accurate enough for normal mode analysis.\n"
 +                "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
 +                "      are fairly modest even if you recompile in double precision.\n\n");
 +    }
 +#endif
 +    
 +    /* Check if we can/should use sparse storage format.
 +     *
 +     * Sparse format is only useful when the Hessian itself is sparse, which it
 +      * will be when we use a cutoff.    
 +      * For small systems (n<1000) it is easier to always use full matrix format, though.
 +      */
 +    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
 +    {
 +        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
 +        bSparse = FALSE;
 +    }
 +    else if(top_global->natoms < 1000)
 +    {
 +        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
 +        bSparse = FALSE;
 +    }
 +    else
 +    {
 +        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
 +        bSparse = TRUE;
 +    }
 +    
 +    sz = DIM*top_global->natoms;
 +    
 +    fprintf(stderr,"Allocating Hessian memory...\n\n");
 +
 +    if(bSparse)
 +    {
 +        sparse_matrix=gmx_sparsematrix_init(sz);
 +        sparse_matrix->compressed_symmetric = TRUE;
 +    }
 +    else
 +    {
 +        snew(full_matrix,sz*sz);
 +    }
 +    
 +    /* Initial values */
 +    t      = inputrec->init_t;
 +    lambda = inputrec->init_lambda;
 +    
 +    init_nrnb(nrnb);
 +    
 +    where();
 +    
 +    /* Write start time and temperature */
 +    print_em_start(fplog,cr,runtime,wcycle,NM);
 +
 +    /* fudge nr of steps to nr of atoms */
 +    inputrec->nsteps = natoms*2;
 +
 +    if (MASTER(cr)) 
 +    {
 +        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
 +                *(top_global->name),(int)inputrec->nsteps);
 +    }
 +
 +    nnodes = cr->nnodes;
 +   
 +    /* Make evaluate_energy do a single node force calculation */
 +    cr->nnodes = 1;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                    state_global,top_global,state_work,top,
 +                    inputrec,nrnb,wcycle,gstat,
 +                    vsite,constr,fcd,graph,mdatoms,fr,
 +                    mu_tot,enerd,vir,pres,-1,TRUE);
 +    cr->nnodes = nnodes;
 +
 +    /* if forces are not small, warn user */
 +    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
 +        if (state_work->fmax > 1.0e-3) 
 +        {
 +            fprintf(stderr,"Maximum force probably not small enough to");
 +            fprintf(stderr," ensure that you are in an \nenergy well. ");
 +            fprintf(stderr,"Be aware that negative eigenvalues may occur");
 +            fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
 +        }
 +    }
 +    
 +    /***********************************************************
 +     *
 +     *      Loop over all pairs in matrix 
 +     * 
 +     *      do_force called twice. Once with positive and 
 +     *      once with negative displacement 
 +     *
 +     ************************************************************/
 +
 +    /* Steps are divided one by one over the nodes */
 +    for(atom=cr->nodeid; atom<natoms; atom+=nnodes) 
 +    {
 +        
 +        for (d=0; d<DIM; d++) 
 +        {
 +            x_min = state_work->s.x[atom][d];
 +
 +            state_work->s.x[atom][d] = x_min - der_range;
 +          
 +            /* Make evaluate_energy do a single node force calculation */
 +            cr->nnodes = 1;
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2,FALSE);
 +                      
 +            for(i=0; i<natoms; i++)
 +            {
 +                copy_rvec(state_work->f[i], fneg[i]);
 +            }
 +            
 +            state_work->s.x[atom][d] = x_min + der_range;
 +            
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2+1,FALSE);
 +            cr->nnodes = nnodes;
 +
 +            /* x is restored to original */
 +            state_work->s.x[atom][d] = x_min;
 +
 +            for(j=0; j<natoms; j++) 
 +            {
 +                for (k=0; (k<DIM); k++) 
 +                {
 +                    dfdx[j][k] =
 +                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
 +                }
 +            }
 +
 +            if (!MASTER(cr))
 +            {
 +#ifdef GMX_MPI
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +                MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
 +                         cr->mpi_comm_mygroup);
 +#endif
 +            }
 +            else
 +            {
 +                for(node=0; (node<nnodes && atom+node<natoms); node++)
 +                {
 +                    if (node > 0)
 +                    {
 +#ifdef GMX_MPI
 +                        MPI_Status stat;
 +                        MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
 +                                 cr->mpi_comm_mygroup,&stat);
 +#undef mpi_type
 +#endif
 +                    }
 +
 +                    row = (atom + node)*DIM + d;
 +
 +                    for(j=0; j<natoms; j++) 
 +                    {
 +                        for(k=0; k<DIM; k++) 
 +                        {
 +                            col = j*DIM + k;
 +                            
 +                            if (bSparse)
 +                            {
 +                                if (col >= row && dfdx[j][k] != 0.0)
 +                                {
 +                                    gmx_sparsematrix_increment_value(sparse_matrix,
 +                                                                     row,col,dfdx[j][k]);
 +                                }
 +                            }
 +                            else
 +                            {
 +                                full_matrix[row*sz+col] = dfdx[j][k];
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            if (bVerbose && fplog)
 +            {
 +                fflush(fplog);            
 +            }
 +        }
 +        /* write progress */
 +        if (MASTER(cr) && bVerbose) 
 +        {
 +            fprintf(stderr,"\rFinished step %d out of %d",
 +                    min(atom+nnodes,natoms),natoms); 
 +            fflush(stderr);
 +        }
 +    }
 +    
 +    if (MASTER(cr)) 
 +    {
 +        fprintf(stderr,"\n\nWriting Hessian...\n");
 +        gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
 +    }
 +
 +    finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +    runtime->nsteps_done = natoms*2;
 +    
 +    return 0;
 +}
index df8c75415b85b4c65f46c641d56bfde940e16e38,0000000000000000000000000000000000000000..1260ef82d861d4289a2faba7bddb7eac43355247
mode 100644,000000..100644
--- /dev/null
@@@ -1,1353 -1,0 +1,1353 @@@
- #include "string.h"
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +
 +#include <math.h>
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include "futil.h"
 +#include "rdgroup.h"
 +#include "statutil.h"
 +#include "gmxfio.h"
 +#include "vec.h" 
 +#include "typedefs.h"
 +#include "network.h"
 +#include "filenm.h"
++#include <string.h>
 +#include "smalloc.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "names.h"
 +#include "partdec.h"
 +#include "pbc.h"
 +#include "mtop_util.h"
 +#include "mdrun.h"
 +#include "gmx_ga2la.h"
 +#include "copyrite.h"
 +#include "macros.h"
 +
 +static void pull_print_x_grp(FILE *out,gmx_bool bRef,ivec dim,t_pullgrp *pgrp) 
 +{
 +    int m;
 +    
 +    for(m=0; m<DIM; m++)
 +    {
 +        if (dim[m])
 +        {
 +            fprintf(out,"\t%g",bRef ? pgrp->x[m] : pgrp->dr[m]);
 +        }
 +    }
 +}
 +
 +static void pull_print_x(FILE *out,t_pull *pull,double t) 
 +{
 +    int g;
 +  
 +    fprintf(out, "%.4f", t);
 +    
 +    if (PULL_CYL(pull))
 +    {
 +        for (g=1; g<1+pull->ngrp; g++)
 +        {
 +            pull_print_x_grp(out,TRUE ,pull->dim,&pull->dyna[g]);
 +            pull_print_x_grp(out,FALSE,pull->dim,&pull->grp[g]);
 +        }
 +    }
 +    else
 +    {
 +        for (g=0; g<1+pull->ngrp; g++)
 +        {
 +            if (pull->grp[g].nat > 0)
 +            {
 +                pull_print_x_grp(out,g==0,pull->dim,&pull->grp[g]);
 +            }
 +        }
 +    }
 +    fprintf(out,"\n");
 +}
 +
 +static void pull_print_f(FILE *out,t_pull *pull,double t) 
 +{
 +    int g,d;
 +    
 +    fprintf(out, "%.4f", t);
 +    
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        if (pull->eGeom == epullgPOS)
 +        {
 +            for(d=0; d<DIM; d++)
 +            {
 +                if (pull->dim[d])
 +                {
 +                    fprintf(out,"\t%g",pull->grp[g].f[d]);
 +                }
 +            }
 +        }
 +        else
 +        {
 +            fprintf(out,"\t%g",pull->grp[g].f_scal);
 +        }
 +    }
 +    fprintf(out,"\n");
 +}
 +
 +void pull_print_output(t_pull *pull, gmx_large_int_t step, double time)
 +{
 +    if ((pull->nstxout != 0) && (step % pull->nstxout == 0))
 +    {
 +        pull_print_x(pull->out_x,pull,time);
 +    }
 +    
 +    if ((pull->nstfout != 0) && (step % pull->nstfout == 0))
 +    {
 +        pull_print_f(pull->out_f,pull,time);
 +    }
 +}
 +
 +static FILE *open_pull_out(const char *fn,t_pull *pull,const output_env_t oenv, 
 +                           gmx_bool bCoord, unsigned long Flags)
 +{
 +    FILE *fp;
 +    int  nsets,g,m;
 +    char **setname,buf[10];
 +    
 +    if(Flags & MD_APPENDFILES)
 +    {
 +        fp = gmx_fio_fopen(fn,"a+");
 +    }
 +    else
 +    {
 +        fp = gmx_fio_fopen(fn,"w+");
 +        if (bCoord)
 +        {
 +            xvgr_header(fp,"Pull COM",  "Time (ps)","Position (nm)",
 +                        exvggtXNY,oenv);
 +        }
 +        else
 +        {
 +            xvgr_header(fp,"Pull force","Time (ps)","Force (kJ/mol/nm)",
 +                        exvggtXNY,oenv);
 +        }
 +        
 +        snew(setname,(1+pull->ngrp)*DIM);
 +        nsets = 0;
 +        for(g=0; g<1+pull->ngrp; g++)
 +        {
 +            if (pull->grp[g].nat > 0 &&
 +                (g > 0 || (bCoord && !PULL_CYL(pull))))
 +            {
 +                if (bCoord || pull->eGeom == epullgPOS)
 +                {
 +                    if (PULL_CYL(pull))
 +                    {
 +                        for(m=0; m<DIM; m++)
 +                        {
 +                            if (pull->dim[m])
 +                            {
 +                                sprintf(buf,"%d %s%c",g,"c",'X'+m);
 +                                setname[nsets] = strdup(buf);
 +                                nsets++;
 +                            }
 +                        }
 +                    }
 +                    for(m=0; m<DIM; m++)
 +                    {
 +                        if (pull->dim[m])
 +                        {
 +                            sprintf(buf,"%d %s%c",
 +                                    g,(bCoord && g > 0)?"d":"",'X'+m);
 +                            setname[nsets] = strdup(buf);
 +                            nsets++;
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    sprintf(buf,"%d",g);
 +                    setname[nsets] = strdup(buf);
 +                    nsets++;
 +                }
 +            }
 +        }
 +        if (bCoord || nsets > 1)
 +        {
 +            xvgr_legend(fp,nsets,(const char**)setname,oenv);
 +        }
 +        for(g=0; g<nsets; g++)
 +        {
 +            sfree(setname[g]);
 +        }
 +        sfree(setname);
 +    }
 +    
 +    return fp;
 +}
 +
 +/* Apply forces in a mass weighted fashion */
 +static void apply_forces_grp(t_pullgrp *pgrp, t_mdatoms * md,
 +                             gmx_ga2la_t ga2la,
 +                             dvec f_pull, int sign, rvec *f)
 +{
 +    int i,ii,m,start,end;
 +    double wmass,inv_wm;
 +    
 +    start = md->start;
 +    end   = md->homenr + start;
 +    
 +    inv_wm = pgrp->wscale*pgrp->invtm;
 +    
 +    for(i=0; i<pgrp->nat_loc; i++)
 +    {
 +        ii = pgrp->ind_loc[i];
 +        wmass = md->massT[ii];
 +        if (pgrp->weight_loc)
 +        {
 +            wmass *= pgrp->weight_loc[i];
 +        }
 +    
 +        for(m=0; m<DIM; m++)
 +        {
 +            f[ii][m] += sign * wmass * f_pull[m] * inv_wm;
 +        }
 +    }
 +}
 +
 +/* Apply forces in a mass weighted fashion */
 +static void apply_forces(t_pull * pull, t_mdatoms * md, gmx_ga2la_t ga2la,
 +                         rvec *f)
 +{
 +    int i;
 +    t_pullgrp *pgrp;
 +    
 +    for(i=1; i<pull->ngrp+1; i++)
 +    {
 +        pgrp = &(pull->grp[i]);
 +        apply_forces_grp(pgrp,md,ga2la,pgrp->f,1,f);
 +        if (pull->grp[0].nat)
 +        {
 +            if (PULL_CYL(pull))
 +            {
 +                apply_forces_grp(&(pull->dyna[i]),md,ga2la,pgrp->f,-1,f);
 +            }
 +            else
 +            {
 +                apply_forces_grp(&(pull->grp[0]),md,ga2la,pgrp->f,-1,f);
 +            }
 +        }
 +    }
 +}
 +
 +static double max_pull_distance2(const t_pull *pull,const t_pbc *pbc)
 +{
 +    double max_d2;
 +    int    m;
 +
 +    max_d2 = GMX_DOUBLE_MAX;
 +
 +    if (pull->eGeom != epullgDIRPBC)
 +    {
 +        for(m=0; m<pbc->ndim_ePBC; m++)
 +        {
 +            if (pull->dim[m] != 0)
 +            {
 +                max_d2 = min(max_d2,norm2(pbc->box[m]));
 +            }
 +        }
 +    }
 +    
 +    return 0.25*max_d2;
 +}
 +
 +static void get_pullgrps_dr(const t_pull *pull,const t_pbc *pbc,int g,double t,
 +                            dvec xg,dvec xref,double max_dist2,
 +                            dvec dr)
 +{
 +    t_pullgrp *pref,*pgrp;
 +    int       m;
 +    dvec      xrefr,dref={0,0,0};
 +    double    dr2;
 +    
 +    pgrp = &pull->grp[g];
 +    
 +    copy_dvec(xref,xrefr);
 +
 +    if (pull->eGeom == epullgDIRPBC)
 +    {
 +        for(m=0; m<DIM; m++)
 +        {
 +            dref[m] = (pgrp->init[0] + pgrp->rate*t)*pull->grp[g].vec[m];
 +        }
 +        /* Add the reference position, so we use the correct periodic image */
 +        dvec_inc(xrefr,dref);
 +    }
 +  
 +    pbc_dx_d(pbc, xg, xrefr, dr);
 +    dr2 = 0;
 +    for(m=0; m<DIM; m++)
 +    {
 +        dr[m] *= pull->dim[m];
 +        dr2 += dr[m]*dr[m];
 +    }
 +    if (max_dist2 >= 0 && dr2 > 0.98*0.98*max_dist2)
 +    {
 +        gmx_fatal(FARGS,"Distance of pull group %d (%f nm) is larger than 0.49 times the box size (%f)",g,sqrt(dr2),sqrt(max_dist2));
 +    }
 +
 +    if (pull->eGeom == epullgDIRPBC)
 +    {
 +        dvec_inc(dr,dref);
 +    }
 +}
 +
 +static void get_pullgrp_dr(const t_pull *pull,const t_pbc *pbc,int g,double t,
 +                           dvec dr)
 +{
 +    double md2;
 +
 +    if (pull->eGeom == epullgDIRPBC)
 +    {
 +        md2 = -1;
 +    }
 +    else
 +    {
 +        md2 = max_pull_distance2(pull,pbc);
 +    }
 +
 +    get_pullgrps_dr(pull,pbc,g,t,
 +                    pull->grp[g].x,
 +                    PULL_CYL(pull) ? pull->dyna[g].x : pull->grp[0].x,
 +                    md2,
 +                    dr);
 +}
 +
 +void get_pullgrp_distance(t_pull *pull,t_pbc *pbc,int g,double t,
 +                          dvec dr,dvec dev)
 +{
 +    static gmx_bool bWarned=FALSE; /* TODO: this should be fixed for thread-safety, 
 +                                  but is fairly benign */
 +    t_pullgrp *pgrp;
 +    int       m;
 +    dvec      ref;
 +    double    drs,inpr;
 +    
 +    pgrp = &pull->grp[g];
 +    
 +    get_pullgrp_dr(pull,pbc,g,t,dr);
 +    
 +    if (pull->eGeom == epullgPOS)
 +    {
 +        for(m=0; m<DIM; m++)
 +        {
 +            ref[m] = pgrp->init[m] + pgrp->rate*t*pgrp->vec[m];
 +        }
 +    }
 +    else
 +    {
 +        ref[0] = pgrp->init[0] + pgrp->rate*t;
 +    }
 +    
 +    switch (pull->eGeom)
 +    {
 +    case epullgDIST:
 +        /* Pull along the vector between the com's */
 +        if (ref[0] < 0 && !bWarned)
 +        {
 +            fprintf(stderr,"\nPull reference distance for group %d is negative (%f)\n",g,ref[0]);
 +            bWarned = TRUE;
 +        }
 +        drs = dnorm(dr);
 +        if (drs == 0)
 +        {
 +            /* With no vector we can not determine the direction for the force,
 +             * so we set the force to zero.
 +             */
 +            dev[0] = 0;
 +        }
 +        else
 +        {
 +            /* Determine the deviation */
 +            dev[0] = drs - ref[0];
 +        }
 +        break;
 +    case epullgDIR:
 +    case epullgDIRPBC:
 +    case epullgCYL:
 +        /* Pull along vec */
 +        inpr = 0;
 +        for(m=0; m<DIM; m++)
 +        {
 +            inpr += pgrp->vec[m]*dr[m];
 +        }
 +        dev[0] = inpr - ref[0];
 +        break;
 +    case epullgPOS:
 +        /* Determine the difference of dr and ref along each dimension */
 +        for(m=0; m<DIM; m++)
 +        {
 +            dev[m] = (dr[m] - ref[m])*pull->dim[m];
 +        }
 +        break;
 +    }
 +}
 +
 +void clear_pull_forces(t_pull *pull)
 +{
 +    int i;
 +    
 +    /* Zeroing the forces is only required for constraint pulling.
 +     * It can happen that multiple constraint steps need to be applied
 +     * and therefore the constraint forces need to be accumulated.
 +     */
 +    for(i=0; i<1+pull->ngrp; i++)
 +    {
 +        clear_dvec(pull->grp[i].f);
 +        pull->grp[i].f_scal = 0;
 +    }
 +}
 +
 +/* Apply constraint using SHAKE */
 +static void do_constraint(t_pull *pull, t_mdatoms *md, t_pbc *pbc,
 +                          rvec *x, rvec *v,
 +                          gmx_bool bMaster, tensor vir,
 +                          double dt, double t) 
 +{
 +
 +    dvec *r_ij;  /* x[i] com of i in prev. step. Obeys constr. -> r_ij[i] */
 +    dvec unc_ij; /* xp[i] com of i this step, before constr.   -> unc_ij  */
 +
 +    dvec *rinew;           /* current 'new' position of group i */
 +    dvec *rjnew;           /* current 'new' position of group j */
 +    dvec  ref,vec;
 +    double d0,inpr;
 +    double lambda, rm, mass, invdt=0;
 +    gmx_bool bConverged_all,bConverged=FALSE;
 +    int niter=0,g,ii,j,m,max_iter=100;
 +    double q,a,b,c;  /* for solving the quadratic equation, 
 +                        see Num. Recipes in C ed 2 p. 184 */
 +    dvec *dr;        /* correction for group i */
 +    dvec ref_dr;     /* correction for group j */
 +    dvec f;          /* the pull force */
 +    dvec tmp,tmp3;
 +    t_pullgrp *pdyna,*pgrp,*pref;
 +    
 +    snew(r_ij,pull->ngrp+1);
 +    if (PULL_CYL(pull))
 +    {
 +        snew(rjnew,pull->ngrp+1);
 +    }
 +    else
 +    {
 +        snew(rjnew,1);
 +    }
 +    snew(dr,pull->ngrp+1);
 +    snew(rinew,pull->ngrp+1);
 +    
 +    /* copy the current unconstrained positions for use in iterations. We 
 +       iterate until rinew[i] and rjnew[j] obey the constraints. Then
 +       rinew - pull.x_unc[i] is the correction dr to group i */
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        copy_dvec(pull->grp[g].xp,rinew[g]);
 +    }
 +    if (PULL_CYL(pull))
 +    {
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            copy_dvec(pull->dyna[g].xp,rjnew[g]);
 +        }
 +    }
 +    else
 +    {
 +        copy_dvec(pull->grp[0].xp,rjnew[0]);
 +    }
 +    
 +    /* Determine the constraint directions from the old positions */
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        get_pullgrp_dr(pull,pbc,g,t,r_ij[g]);
 +        /* Store the difference vector at time t for printing */
 +        copy_dvec(r_ij[g],pull->grp[g].dr);
 +        if (debug)
 +        {
 +            fprintf(debug,"Pull group %d dr %f %f %f\n",
 +                    g,r_ij[g][XX],r_ij[g][YY],r_ij[g][ZZ]);
 +        }
 +        
 +        if (pull->eGeom == epullgDIR || pull->eGeom == epullgDIRPBC)
 +        {
 +            /* Select the component along vec */
 +            a = 0;
 +            for(m=0; m<DIM; m++)
 +            {
 +                a += pull->grp[g].vec[m]*r_ij[g][m];
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                r_ij[g][m] = a*pull->grp[g].vec[m];
 +            }
 +        }
 +    }
 +    
 +    bConverged_all = FALSE;
 +    while (!bConverged_all && niter < max_iter)
 +    {
 +        bConverged_all = TRUE;
 +
 +        /* loop over all constraints */
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            pgrp = &pull->grp[g];
 +            if (PULL_CYL(pull))
 +                pref = &pull->dyna[g];
 +            else
 +                pref = &pull->grp[0];
 +
 +            /* Get the current difference vector */
 +            get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[PULL_CYL(pull) ? g : 0],
 +                            -1,unc_ij);
 +
 +            if (pull->eGeom == epullgPOS)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    ref[m] = pgrp->init[m] + pgrp->rate*t*pgrp->vec[m];
 +                }
 +            }
 +            else
 +            {
 +                ref[0] = pgrp->init[0] + pgrp->rate*t;
 +                /* Keep the compiler happy */
 +                ref[1] = 0;
 +                ref[2] = 0;
 +            }
 +            
 +            if (debug)
 +            {
 +                fprintf(debug,"Pull group %d, iteration %d\n",g,niter);
 +            }
 +            
 +            rm = 1.0/(pull->grp[g].invtm + pref->invtm);
 +            
 +            switch (pull->eGeom)
 +            {
 +            case epullgDIST:
 +                if (ref[0] <= 0)
 +                {
 +                    gmx_fatal(FARGS,"The pull constraint reference distance for group %d is <= 0 (%f)",g,ref[0]);
 +                }
 +                
 +                a = diprod(r_ij[g],r_ij[g]); 
 +                b = diprod(unc_ij,r_ij[g])*2;
 +                c = diprod(unc_ij,unc_ij) - dsqr(ref[0]);
 +                
 +                if (b < 0)
 +                {
 +                    q = -0.5*(b - sqrt(b*b - 4*a*c));
 +                    lambda = -q/a;
 +                }
 +                else
 +                {
 +                    q = -0.5*(b + sqrt(b*b - 4*a*c));
 +                    lambda = -c/q;
 +                }
 +                
 +                if (debug)
 +                {
 +                    fprintf(debug,
 +                            "Pull ax^2+bx+c=0: a=%e b=%e c=%e lambda=%e\n",
 +                            a,b,c,lambda);
 +                }
 +                
 +                /* The position corrections dr due to the constraints */
 +                dsvmul(-lambda*rm*pgrp->invtm, r_ij[g],  dr[g]);
 +                dsvmul( lambda*rm*pref->invtm, r_ij[g], ref_dr);
 +                break;
 +            case epullgDIR:
 +            case epullgDIRPBC:
 +            case epullgCYL:
 +                /* A 1-dimensional constraint along a vector */
 +                a = 0;
 +                for(m=0; m<DIM; m++)
 +                {
 +                    vec[m] = pgrp->vec[m];
 +                    a += unc_ij[m]*vec[m];
 +                }
 +                /* Select only the component along the vector */
 +                dsvmul(a,vec,unc_ij);
 +                lambda = a - ref[0];
 +                if (debug)
 +                {
 +                    fprintf(debug,"Pull inpr %e lambda: %e\n",a,lambda);
 +                }
 +                
 +                /* The position corrections dr due to the constraints */
 +                dsvmul(-lambda*rm*pull->grp[g].invtm, vec, dr[g]);
 +                dsvmul( lambda*rm*       pref->invtm, vec,ref_dr);
 +                break;
 +            case epullgPOS:
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (pull->dim[m])
 +                    {
 +                        lambda = r_ij[g][m] - ref[m];
 +                        /* The position corrections dr due to the constraints */
 +                        dr[g][m]  = -lambda*rm*pull->grp[g].invtm;
 +                        ref_dr[m] =  lambda*rm*pref->invtm;
 +                    }
 +                    else
 +                    {
 +                        dr[g][m]  = 0;
 +                        ref_dr[m] = 0;
 +                    }
 +                }
 +                break;
 +            }
 +            
 +            /* DEBUG */
 +            if (debug)
 +            {
 +                j = (PULL_CYL(pull) ? g : 0);
 +                get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[j],-1,tmp);
 +                get_pullgrps_dr(pull,pbc,g,t,dr[g]   ,ref_dr  ,-1,tmp3);
 +                fprintf(debug,
 +                        "Pull cur %8.5f %8.5f %8.5f j:%8.5f %8.5f %8.5f d: %8.5f\n",
 +                        rinew[g][0],rinew[g][1],rinew[g][2], 
 +                        rjnew[j][0],rjnew[j][1],rjnew[j][2], dnorm(tmp));
 +                if (pull->eGeom == epullgPOS)
 +                {
 +                    fprintf(debug,
 +                            "Pull ref %8.5f %8.5f %8.5f\n",
 +                            pgrp->vec[0],pgrp->vec[1],pgrp->vec[2]);
 +                }
 +                else
 +                {
 +                    fprintf(debug,
 +                            "Pull ref %8s %8s %8s   %8s %8s %8s d: %8.5f %8.5f %8.5f\n",
 +                            "","","","","","",ref[0],ref[1],ref[2]);
 +                }
 +                fprintf(debug,
 +                        "Pull cor %8.5f %8.5f %8.5f j:%8.5f %8.5f %8.5f d: %8.5f\n",
 +                        dr[g][0],dr[g][1],dr[g][2],
 +                        ref_dr[0],ref_dr[1],ref_dr[2],
 +                        dnorm(tmp3));
 +                fprintf(debug,
 +                        "Pull cor %10.7f %10.7f %10.7f\n",
 +                        dr[g][0],dr[g][1],dr[g][2]);
 +            } /* END DEBUG */
 +            
 +            /* Update the COMs with dr */
 +            dvec_inc(rinew[g],                     dr[g]);
 +            dvec_inc(rjnew[PULL_CYL(pull) ? g : 0],ref_dr);
 +        }
 +        
 +        /* Check if all constraints are fullfilled now */
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            pgrp = &pull->grp[g];
 +            
 +            get_pullgrps_dr(pull,pbc,g,t,rinew[g],rjnew[PULL_CYL(pull) ? g : 0],
 +                            -1,unc_ij);
 +            
 +            switch (pull->eGeom)
 +            {
 +            case epullgDIST:
 +                bConverged = fabs(dnorm(unc_ij) - ref[0]) < pull->constr_tol;
 +                break;
 +            case epullgDIR:
 +            case epullgDIRPBC:
 +            case epullgCYL:
 +                for(m=0; m<DIM; m++)
 +                {
 +                    vec[m] = pgrp->vec[m];
 +                }
 +                inpr = diprod(unc_ij,vec);
 +                dsvmul(inpr,vec,unc_ij);
 +                bConverged =
 +                    fabs(diprod(unc_ij,vec) - ref[0]) < pull->constr_tol;
 +                break;
 +            case epullgPOS:
 +                bConverged = TRUE;
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (pull->dim[m] && 
 +                        fabs(unc_ij[m] - ref[m]) >= pull->constr_tol)
 +                    {
 +                        bConverged = FALSE;
 +                    }
 +                }
 +                break;
 +            }
 +            
 +            if (!bConverged)
 +            {
 +                if (debug)
 +                {
 +                    fprintf(debug,"NOT CONVERGED YET: Group %d:"
 +                            "d_ref = %f %f %f, current d = %f\n",
 +                            g,ref[0],ref[1],ref[2],dnorm(unc_ij));
 +                }
 +
 +                bConverged_all = FALSE;
 +            }
 +        }
 +        
 +        niter++;
 +        /* if after all constraints are dealt with and bConverged is still TRUE
 +           we're finished, if not we do another iteration */
 +    }
 +    if (niter > max_iter)
 +    {
 +        gmx_fatal(FARGS,"Too many iterations for constraint run: %d",niter);
 +    }
 +    
 +    /* DONE ITERATING, NOW UPDATE COORDINATES AND CALC. CONSTRAINT FORCES */
 +    
 +    if (v)
 +    {
 +        invdt = 1/dt;
 +    }
 +    
 +    /* update the normal groups */
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        pgrp = &pull->grp[g];
 +        /* get the final dr and constraint force for group i */
 +        dvec_sub(rinew[g],pgrp->xp,dr[g]);
 +        /* select components of dr */
 +        for(m=0; m<DIM; m++)
 +        {
 +            dr[g][m] *= pull->dim[m];
 +        }
 +        dsvmul(1.0/(pgrp->invtm*dt*dt),dr[g],f);
 +        dvec_inc(pgrp->f,f);
 +        switch (pull->eGeom)
 +        {
 +        case epullgDIST:
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f_scal += r_ij[g][m]*f[m]/dnorm(r_ij[g]);
 +            }
 +            break;
 +        case epullgDIR:
 +        case epullgDIRPBC:
 +        case epullgCYL:
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f_scal += pgrp->vec[m]*f[m];
 +            }
 +            break;
 +        case epullgPOS:
 +            break;
 +        }
 +        
 +        if (vir && bMaster) {
 +            /* Add the pull contribution to the virial */
 +            for(j=0; j<DIM; j++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    vir[j][m] -= 0.5*f[j]*r_ij[g][m];
 +                }
 +            }
 +        }
 +        
 +        /* update the atom positions */
 +        copy_dvec(dr[g],tmp);
 +        for(j=0;j<pgrp->nat_loc;j++)
 +        {
 +            ii = pgrp->ind_loc[j];
 +            if (pgrp->weight_loc)
 +            {
 +                dsvmul(pgrp->wscale*pgrp->weight_loc[j],dr[g],tmp); 
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                x[ii][m] += tmp[m];
 +            }
 +            if (v)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    v[ii][m] += invdt*tmp[m];
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* update the reference groups */
 +    if (PULL_CYL(pull))
 +    {
 +        /* update the dynamic reference groups */
 +        for(g=1; g<1+pull->ngrp; g++)
 +        {
 +            pdyna = &pull->dyna[g];
 +            dvec_sub(rjnew[g],pdyna->xp,ref_dr);
 +            /* select components of ref_dr */
 +            for(m=0; m<DIM; m++)
 +            {
 +                ref_dr[m] *= pull->dim[m];
 +            }
 +            
 +            for(j=0;j<pdyna->nat_loc;j++)
 +            {
 +                /* reset the atoms with dr, weighted by w_i */
 +                dsvmul(pdyna->wscale*pdyna->weight_loc[j],ref_dr,tmp); 
 +                ii = pdyna->ind_loc[j];
 +                for(m=0; m<DIM; m++)
 +                {
 +                    x[ii][m] += tmp[m];
 +                }
 +                if (v)
 +                {
 +                    for(m=0; m<DIM; m++)
 +                    {
 +                        v[ii][m] += invdt*tmp[m];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        pgrp = &pull->grp[0];
 +        /* update the reference group */
 +        dvec_sub(rjnew[0],pgrp->xp, ref_dr); 
 +        /* select components of ref_dr */
 +        for(m=0;m<DIM;m++)
 +        {
 +            ref_dr[m] *= pull->dim[m];
 +        }
 +        
 +        copy_dvec(ref_dr,tmp);
 +        for(j=0; j<pgrp->nat_loc;j++)
 +        {
 +            ii = pgrp->ind_loc[j];
 +            if (pgrp->weight_loc)
 +            {
 +                dsvmul(pgrp->wscale*pgrp->weight_loc[j],ref_dr,tmp); 
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                x[ii][m] += tmp[m];
 +            }
 +            if (v)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    v[ii][m] += invdt*tmp[m];
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* finished! I hope. Give back some memory */
 +    sfree(r_ij);
 +    sfree(rinew);
 +    sfree(rjnew);
 +    sfree(dr);
 +}
 +
 +/* Pulling with a harmonic umbrella potential or constant force */
 +static void do_pull_pot(int ePull,
 +                        t_pull *pull, t_pbc *pbc, double t, real lambda,
 +                        real *V, tensor vir, real *dVdl)
 +{
 +    int       g,j,m;
 +    dvec      dev;
 +    double    ndr,invdr;
 +    real      k,dkdl;
 +    t_pullgrp *pgrp;
 +    
 +    /* loop over the groups that are being pulled */
 +    *V    = 0;
 +    *dVdl = 0;
 +    for(g=1; g<1+pull->ngrp; g++)
 +    {
 +        pgrp = &pull->grp[g];
 +        get_pullgrp_distance(pull,pbc,g,t,pgrp->dr,dev);
 +        
 +        k    = (1.0 - lambda)*pgrp->k + lambda*pgrp->kB;
 +        dkdl = pgrp->kB - pgrp->k;
 +        
 +        switch (pull->eGeom)
 +        {
 +        case epullgDIST:
 +            ndr   = dnorm(pgrp->dr);
 +            invdr = 1/ndr;
 +            if (ePull == epullUMBRELLA)
 +            {
 +                pgrp->f_scal  =       -k*dev[0];
 +                *V           += 0.5*   k*dsqr(dev[0]);
 +                *dVdl        += 0.5*dkdl*dsqr(dev[0]);
 +            }
 +            else
 +            {
 +                pgrp->f_scal  =   -k;
 +                *V           +=    k*ndr;
 +                *dVdl        += dkdl*ndr;
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f[m]    = pgrp->f_scal*pgrp->dr[m]*invdr;
 +            }
 +            break;
 +        case epullgDIR:
 +        case epullgDIRPBC:
 +        case epullgCYL:
 +            if (ePull == epullUMBRELLA)
 +            {
 +                pgrp->f_scal  =       -k*dev[0];
 +                *V           += 0.5*   k*dsqr(dev[0]);
 +                *dVdl        += 0.5*dkdl*dsqr(dev[0]);
 +            }
 +            else
 +            {
 +                ndr = 0;
 +                for(m=0; m<DIM; m++)
 +                {
 +                    ndr += pgrp->vec[m]*pgrp->dr[m];
 +                }
 +                pgrp->f_scal  =   -k;
 +                *V           +=    k*ndr;
 +                *dVdl        += dkdl*ndr;
 +            }
 +            for(m=0; m<DIM; m++)
 +            {
 +                pgrp->f[m]    = pgrp->f_scal*pgrp->vec[m];
 +            }
 +            break;
 +        case epullgPOS:
 +            for(m=0; m<DIM; m++)
 +            {
 +                if (ePull == epullUMBRELLA)
 +                {
 +                    pgrp->f[m]  =       -k*dev[m];
 +                    *V         += 0.5*   k*dsqr(dev[m]);
 +                    *dVdl      += 0.5*dkdl*dsqr(dev[m]);
 +                }
 +                else
 +                {
 +                    pgrp->f[m]  =   -k*pull->dim[m];
 +                    *V         +=    k*pgrp->dr[m]*pull->dim[m];
 +                    *dVdl      += dkdl*pgrp->dr[m]*pull->dim[m];
 +                }
 +            }
 +            break;
 +        }
 +        
 +        if (vir)
 +        {
 +            /* Add the pull contribution to the virial */
 +            for(j=0; j<DIM; j++)
 +            {
 +                for(m=0;m<DIM;m++)
 +                {
 +                    vir[j][m] -= 0.5*pgrp->f[j]*pgrp->dr[m];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +real pull_potential(int ePull,t_pull *pull, t_mdatoms *md, t_pbc *pbc,
 +                  t_commrec *cr, double t, real lambda,
 +                  rvec *x, rvec *f, tensor vir, real *dvdlambda)
 +{
 +  real V,dVdl;
 +
 +  pull_calc_coms(cr,pull,md,pbc,t,x,NULL);
 +
 +  do_pull_pot(ePull,pull,pbc,t,lambda,
 +            &V,pull->bVirial && MASTER(cr) ? vir : NULL,&dVdl);
 +
 +  /* Distribute forces over pulled groups */
 +  apply_forces(pull, md, DOMAINDECOMP(cr) ? cr->dd->ga2la : NULL, f);
 +
 +  if (MASTER(cr)) {
 +    *dvdlambda += dVdl;
 +  }
 +
 +  return (MASTER(cr) ? V : 0.0);
 +}
 +
 +void pull_constraint(t_pull *pull, t_mdatoms *md, t_pbc *pbc,
 +                   t_commrec *cr, double dt, double t,
 +                   rvec *x, rvec *xp, rvec *v, tensor vir)
 +{
 +  pull_calc_coms(cr,pull,md,pbc,t,x,xp);
 +
 +  do_constraint(pull,md,pbc,xp,v,pull->bVirial && MASTER(cr),vir,dt,t);
 +}
 +
 +static void make_local_pull_group(gmx_ga2la_t ga2la,
 +                                t_pullgrp *pg,int start,int end)
 +{
 +  int i,ii;
 +
 +  pg->nat_loc = 0;
 +  for(i=0; i<pg->nat; i++) {
 +    ii = pg->ind[i];
 +    if (ga2la) {
 +      if (!ga2la_get_home(ga2la,ii,&ii)) {
 +        ii = -1;
 +      }
 +    }
 +    if (ii >= start && ii < end) {
 +      /* This is a home atom, add it to the local pull group */
 +      if (pg->nat_loc >= pg->nalloc_loc) {
 +      pg->nalloc_loc = over_alloc_dd(pg->nat_loc+1);
 +      srenew(pg->ind_loc,pg->nalloc_loc);
 +      if (pg->epgrppbc == epgrppbcCOS || pg->weight) {
 +        srenew(pg->weight_loc,pg->nalloc_loc);
 +      }
 +      }
 +      pg->ind_loc[pg->nat_loc] = ii;
 +      if (pg->weight) {
 +        pg->weight_loc[pg->nat_loc] = pg->weight[i];
 +      }
 +      pg->nat_loc++;
 +    }
 +  }
 +}
 +
 +void dd_make_local_pull_groups(gmx_domdec_t *dd,t_pull *pull,t_mdatoms *md)
 +{
 +  gmx_ga2la_t ga2la;
 +  int g;
 +  
 +  if (dd) {
 +    ga2la = dd->ga2la;
 +  } else {
 +    ga2la = NULL;
 +  }
 +
 +  if (pull->grp[0].nat > 0)
 +    make_local_pull_group(ga2la,&pull->grp[0],md->start,md->start+md->homenr);
 +  for(g=1; g<1+pull->ngrp; g++)
 +    make_local_pull_group(ga2la,&pull->grp[g],md->start,md->start+md->homenr);
 +}
 +
 +static void init_pull_group_index(FILE *fplog,t_commrec *cr,
 +                                int start,int end,
 +                                int g,t_pullgrp *pg,ivec pulldims,
 +                                gmx_mtop_t *mtop,t_inputrec *ir)
 +{
 +  int i,ii,d,nfrozen,ndim;
 +  real m,w,mbd;
 +  double tmass,wmass,wwmass;
 +  gmx_bool bDomDec;
 +  gmx_ga2la_t ga2la=NULL;
 +  gmx_groups_t *groups;
 +  t_atom *atom;
 +
 +  bDomDec = (cr && DOMAINDECOMP(cr));
 +  if (bDomDec) {
 +    ga2la = cr->dd->ga2la;
 +  }
 +
 +  if (EI_ENERGY_MINIMIZATION(ir->eI) || ir->eI == eiBD) {
 +    /* There are no masses in the integrator.
 +     * But we still want to have the correct mass-weighted COMs.
 +     * So we store the real masses in the weights.
 +     * We do not set nweight, so these weights do not end up in the tpx file.
 +     */
 +    if (pg->nweight == 0) {
 +      snew(pg->weight,pg->nat);
 +    }
 +  }
 +
 +  if (cr && PAR(cr)) {
 +    pg->nat_loc    = 0;
 +    pg->nalloc_loc = 0;
 +    pg->ind_loc    = NULL;
 +    pg->weight_loc = NULL;
 +  } else {
 +    pg->nat_loc = pg->nat;
 +    pg->ind_loc = pg->ind;
 +    if (pg->epgrppbc == epgrppbcCOS) {
 +      snew(pg->weight_loc,pg->nat);
 +    } else {
 +      pg->weight_loc = pg->weight;
 +    }
 +  }
 +
 +  groups = &mtop->groups;
 +
 +  nfrozen = 0;
 +  tmass  = 0;
 +  wmass  = 0;
 +  wwmass = 0;
 +  for(i=0; i<pg->nat; i++) {
 +    ii = pg->ind[i];
 +    gmx_mtop_atomnr_to_atom(mtop,ii,&atom);
 +    if (cr && PAR(cr) && !bDomDec && ii >= start && ii < end)
 +      pg->ind_loc[pg->nat_loc++] = ii;
 +    if (ir->opts.nFreeze) {
 +      for(d=0; d<DIM; d++)
 +      if (pulldims[d] && ir->opts.nFreeze[ggrpnr(groups,egcFREEZE,ii)][d])
 +        nfrozen++;
 +    }
 +    if (ir->efep == efepNO) {
 +      m = atom->m;
 +    } else {
 +      m = (1 - ir->init_lambda)*atom->m + ir->init_lambda*atom->mB;
 +    }
 +    if (pg->nweight > 0) {
 +      w = pg->weight[i];
 +    } else {
 +      w = 1;
 +    }
 +    if (EI_ENERGY_MINIMIZATION(ir->eI)) {
 +      /* Move the mass to the weight */
 +      w *= m;
 +      m = 1;
 +      pg->weight[i] = w;
 +    } else if (ir->eI == eiBD) {
 +      if (ir->bd_fric) {
 +      mbd = ir->bd_fric*ir->delta_t;
 +      } else {
 +      if (groups->grpnr[egcTC] == NULL) {
 +        mbd = ir->delta_t/ir->opts.tau_t[0];
 +      } else {
 +        mbd = ir->delta_t/ir->opts.tau_t[groups->grpnr[egcTC][ii]];
 +      }
 +      }
 +      w *= m/mbd;
 +      m = mbd;
 +      pg->weight[i] = w;
 +    }
 +    tmass  += m;
 +    wmass  += m*w;
 +    wwmass += m*w*w;
 +  }
 +
 +  if (wmass == 0) {
 +    gmx_fatal(FARGS,"The total%s mass of pull group %d is zero",
 +            pg->weight ? " weighted" : "",g);
 +  }
 +  if (fplog) {
 +    fprintf(fplog,
 +          "Pull group %d: %5d atoms, mass %9.3f",g,pg->nat,tmass);
 +    if (pg->weight || EI_ENERGY_MINIMIZATION(ir->eI) || ir->eI == eiBD) {
 +      fprintf(fplog,", weighted mass %9.3f",wmass*wmass/wwmass);
 +    }
 +    if (pg->epgrppbc == epgrppbcCOS) {
 +      fprintf(fplog,", cosine weighting will be used");
 +    }
 +    fprintf(fplog,"\n");
 +  }
 +  
 +  if (nfrozen == 0) {
 +    /* A value > 0 signals not frozen, it is updated later */
 +    pg->invtm  = 1.0;
 +  } else {
 +    ndim = 0;
 +    for(d=0; d<DIM; d++)
 +      ndim += pulldims[d]*pg->nat;
 +    if (fplog && nfrozen > 0 && nfrozen < ndim) {
 +      fprintf(fplog,
 +            "\nWARNING: In pull group %d some, but not all of the degrees of freedom\n"
 +            "         that are subject to pulling are frozen.\n"
 +            "         For pulling the whole group will be frozen.\n\n",
 +            g);
 +    }
 +    pg->invtm  = 0.0;
 +    pg->wscale = 1.0;
 +  }
 +}
 +
 +void init_pull(FILE *fplog,t_inputrec *ir,int nfile,const t_filenm fnm[],
 +             gmx_mtop_t *mtop,t_commrec *cr,const output_env_t oenv,
 +               gmx_bool bOutFile, unsigned long Flags)
 +{
 +    t_pull    *pull;
 +    t_pullgrp *pgrp;
 +    int       g,start=0,end=0,m;
 +    gmx_bool      bCite;
 +    
 +    pull = ir->pull;
 +    
 +    pull->ePBC = ir->ePBC;
 +    switch (pull->ePBC)
 +    {
 +    case epbcNONE: pull->npbcdim = 0; break;
 +    case epbcXY:   pull->npbcdim = 2; break;
 +    default:       pull->npbcdim = 3; break;
 +    }
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\nWill apply %s COM pulling in geometry '%s'\n",
 +                EPULLTYPE(ir->ePull),EPULLGEOM(pull->eGeom));
 +        if (pull->grp[0].nat > 0)
 +        {
 +            fprintf(fplog,"between a reference group and %d group%s\n",
 +                    pull->ngrp,pull->ngrp==1 ? "" : "s");
 +        }
 +        else
 +        {
 +            fprintf(fplog,"with an absolute reference on %d group%s\n",
 +                    pull->ngrp,pull->ngrp==1 ? "" : "s");
 +        }
 +        bCite = FALSE;
 +        for(g=0; g<pull->ngrp+1; g++)
 +        {
 +            if (pull->grp[g].nat > 1 &&
 +                pull->grp[g].pbcatom < 0)
 +            {
 +                /* We are using cosine weighting */
 +                fprintf(fplog,"Cosine weighting is used for group %d\n",g);
 +                bCite = TRUE;
 +            }
 +        }
 +        if (bCite)
 +        {
 +            please_cite(fplog,"Engin2010");
 +        }
 +    }
 +    
 +    /* We always add the virial contribution,
 +     * except for geometry = direction_periodic where this is impossible.
 +     */
 +    pull->bVirial = (pull->eGeom != epullgDIRPBC);
 +    if (getenv("GMX_NO_PULLVIR") != NULL)
 +    {
 +        if (fplog)
 +        {
 +            fprintf(fplog,"Found env. var., will not add the virial contribution of the COM pull forces\n");
 +        }
 +        pull->bVirial = FALSE;
 +    }
 +    
 +    if (cr && PARTDECOMP(cr))
 +    {
 +        pd_at_range(cr,&start,&end);
 +    }
 +    pull->rbuf=NULL;
 +    pull->dbuf=NULL;
 +    pull->dbuf_cyl=NULL;
 +    pull->bRefAt = FALSE;
 +    pull->cosdim = -1;
 +    for(g=0; g<pull->ngrp+1; g++)
 +    {
 +        pgrp = &pull->grp[g];
 +        pgrp->epgrppbc = epgrppbcNONE;
 +        if (pgrp->nat > 0)
 +        {
 +            /* Determine if we need to take PBC into account for calculating
 +             * the COM's of the pull groups.
 +             */
 +            for(m=0; m<pull->npbcdim; m++)
 +            {
 +                if (pull->dim[m] && pgrp->nat > 1)
 +                {
 +                    if (pgrp->pbcatom >= 0)
 +                    {
 +                        pgrp->epgrppbc = epgrppbcREFAT;
 +                        pull->bRefAt   = TRUE;
 +                    }
 +                    else
 +                    {
 +                        if (pgrp->weight)
 +                        {
 +                            gmx_fatal(FARGS,"Pull groups can not have relative weights and cosine weighting at same time");
 +                        }
 +                        pgrp->epgrppbc = epgrppbcCOS;
 +                        if (pull->cosdim >= 0 && pull->cosdim != m)
 +                        {
 +                            gmx_fatal(FARGS,"Can only use cosine weighting with pulling in one dimension (use mdp option pull_dim)");
 +                        }
 +                        pull->cosdim = m;
 +                    }
 +                }
 +            }
 +            /* Set the indices */
 +            init_pull_group_index(fplog,cr,start,end,g,pgrp,pull->dim,mtop,ir);
 +            if (PULL_CYL(pull) && pgrp->invtm == 0)
 +            {
 +                gmx_fatal(FARGS,"Can not have frozen atoms in a cylinder pull group");
 +            }
 +        }
 +        else
 +        {
 +            /* Absolute reference, set the inverse mass to zero */
 +            pgrp->invtm  = 0;
 +            pgrp->wscale = 1;
 +        }
 +    }      
 +    
 +    /* if we use dynamic reference groups, do some initialising for them */
 +    if (PULL_CYL(pull))
 +    {
 +        if (pull->grp[0].nat == 0)
 +        {
 +            gmx_fatal(FARGS, "Dynamic reference groups are not supported when using absolute reference!\n");
 +        }
 +        snew(pull->dyna,pull->ngrp+1);
 +    }
 +    
 +    /* Only do I/O when we are doing dynamics and if we are the MASTER */
 +    pull->out_x = NULL;
 +    pull->out_f = NULL;
 +    if (bOutFile)
 +    {
 +        if (pull->nstxout > 0)
 +        {
 +            pull->out_x = open_pull_out(opt2fn("-px",nfile,fnm),pull,oenv,TRUE,Flags);
 +        }
 +        if (pull->nstfout > 0)
 +        {
 +            pull->out_f = open_pull_out(opt2fn("-pf",nfile,fnm),pull,oenv,
 +                                        FALSE,Flags);
 +        }
 +    }
 +}
 +
 +void finish_pull(FILE *fplog,t_pull *pull)
 +{
 +    if (pull->out_x)
 +    {
 +        gmx_fio_fclose(pull->out_x);
 +    }
 +    if (pull->out_f)
 +    {
 +        gmx_fio_fclose(pull->out_f);
 +    }
 +}
index 04e1aa8560dfb96f82f9614aaaf2fc634a2345c9,0000000000000000000000000000000000000000..07a56283e6ab15189e2494f50dc8d29b6cbb73bf
mode 100644,000000..100644
--- /dev/null
@@@ -1,1591 -1,0 +1,1591 @@@
- #include "time.h"
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#include<catamount/dclock.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include <time.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +#include <math.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
++#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "pppm.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "copyrite.h"
 +#include "pull_rotation.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "gmx_wallcycle.h"
 +#include "genborn.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include "adress.h"
 +#include "qmmm.h"
 +
 +#if 0
 +typedef struct gmx_timeprint {
 +    
 +} t_gmx_timeprint;
 +#endif
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +double
 +gmx_gettime()
 +{
 +#ifdef HAVE_GETTIMEOFDAY
 +      struct timeval t;
 +      double seconds;
 +      
 +      gettimeofday(&t,NULL);
 +      
 +      seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
 +      
 +      return seconds;
 +#else
 +      double  seconds;
 +      
 +      seconds = time(NULL);
 +      
 +      return seconds;
 +#endif
 +}
 +
 +
 +#define difftime(end,start) ((double)(end)-(double)(start))
 +
 +void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,   
 +                t_inputrec *ir, t_commrec *cr)
 +{
 +    time_t finish;
 +    char   timebuf[STRLEN];
 +    double dt;
 +    char buf[48];
 +    
 +#ifndef GMX_THREAD_MPI
 +    if (!PAR(cr))
 +#endif
 +    {
 +        fprintf(out,"\r");
 +    }
 +    fprintf(out,"step %s",gmx_step_str(step,buf));
 +    if ((step >= ir->nstlist))
 +    {
 +        if ((ir->nstlist == 0) || ((step % ir->nstlist) == 0))
 +        {
 +            /* We have done a full cycle let's update time_per_step */
 +            runtime->last = gmx_gettime();
 +            dt = difftime(runtime->last,runtime->real);
 +            runtime->time_per_step = dt/(step - ir->init_step + 1);
 +        }
 +        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
 +        
 +        if (ir->nsteps >= 0)
 +        {
 +            if (dt >= 300)
 +            {    
 +                finish = (time_t) (runtime->last + dt);
 +                gmx_ctime_r(&finish,timebuf,STRLEN);
 +                sprintf(buf,"%s",timebuf);
 +                buf[strlen(buf)-1]='\0';
 +                fprintf(out,", will finish %s",buf);
 +            }
 +            else
 +                fprintf(out,", remaining runtime: %5d s          ",(int)dt);
 +        }
 +        else
 +        {
 +            fprintf(out," performance: %.1f ns/day    ",
 +                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        fprintf(out,"\n");
 +    }
 +#endif
 +
 +    fflush(out);
 +}
 +
 +#ifdef NO_CLOCK 
 +#define clock() -1
 +#endif
 +
 +static double set_proctime(gmx_runtime_t *runtime)
 +{
 +    double diff;
 +#ifdef GMX_CRAY_XT3
 +    double prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = dclock();
 +    
 +    diff = runtime->proc - prev;
 +#else
 +    clock_t prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = clock();
 +
 +    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
 +#endif
 +    if (diff < 0)
 +    {
 +        /* The counter has probably looped, ignore this data */
 +        diff = 0;
 +    }
 +
 +    return diff;
 +}
 +
 +void runtime_start(gmx_runtime_t *runtime)
 +{
 +    runtime->real = gmx_gettime();
 +    runtime->proc          = 0;
 +    set_proctime(runtime);
 +    runtime->realtime      = 0;
 +    runtime->proctime      = 0;
 +    runtime->last          = 0;
 +    runtime->time_per_step = 0;
 +}
 +
 +void runtime_end(gmx_runtime_t *runtime)
 +{
 +    double now;
 +    
 +    now = gmx_gettime();
 +    
 +    runtime->proctime += set_proctime(runtime);
 +    runtime->realtime  = now - runtime->real;
 +    runtime->real      = now;
 +}
 +
 +void runtime_upd_proc(gmx_runtime_t *runtime)
 +{
 +    runtime->proctime += set_proctime(runtime);
 +}
 +
 +void print_date_and_time(FILE *fplog,int nodeid,const char *title,
 +                         const gmx_runtime_t *runtime)
 +{
 +    int i;
 +    char timebuf[STRLEN];
 +    char time_string[STRLEN];
 +    time_t tmptime;
 +
 +    if (fplog)
 +    {
 +        if (runtime != NULL)
 +        {
 +            tmptime = (time_t) runtime->real;
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        else
 +        {
 +            tmptime = (time_t) gmx_gettime();
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        for(i=0; timebuf[i]>=' '; i++)
 +        {
 +            time_string[i]=timebuf[i];
 +        }
 +        time_string[i]='\0';
 +
 +        fprintf(fplog,"%s on node %d %s\n",title,nodeid,time_string);
 +    }
 +}
 +
 +static void sum_forces(int start,int end,rvec f[],rvec flr[])
 +{
 +  int i;
 +  
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"fsr",f+start,end-start);
 +    pr_rvecs(debug,0,"flr",flr+start,end-start);
 +  }
 +  for(i=start; (i<end); i++)
 +    rvec_inc(f[i],flr[i]);
 +}
 +
 +/* 
 + * calc_f_el calculates forces due to an electric field.
 + *
 + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e 
 + *
 + * Et[] contains the parameters for the time dependent 
 + * part of the field (not yet used). 
 + * Ex[] contains the parameters for
 + * the spatial dependent part of the field. You can have cool periodic
 + * fields in principle, but only a constant field is supported
 + * now. 
 + * The function should return the energy due to the electric field
 + * (if any) but for now returns 0.
 + *
 + * WARNING:
 + * There can be problems with the virial.
 + * Since the field is not self-consistent this is unavoidable.
 + * For neutral molecules the virial is correct within this approximation.
 + * For neutral systems with many charged molecules the error is small.
 + * But for systems with a net charge or a few charged molecules
 + * the error can be significant when the field is high.
 + * Solution: implement a self-consitent electric field into PME.
 + */
 +static void calc_f_el(FILE *fp,int  start,int homenr,
 +                      real charge[],rvec x[],rvec f[],
 +                      t_cosines Ex[],t_cosines Et[],double t)
 +{
 +    rvec Ext;
 +    real t0;
 +    int  i,m;
 +    
 +    for(m=0; (m<DIM); m++)
 +    {
 +        if (Et[m].n > 0)
 +        {
 +            if (Et[m].n == 3)
 +            {
 +                t0 = Et[m].a[1];
 +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
 +            }
 +            else
 +            {
 +                Ext[m] = cos(Et[m].a[0]*t);
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 1.0;
 +        }
 +        if (Ex[m].n > 0)
 +        {
 +            /* Convert the field strength from V/nm to MD-units */
 +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
 +            for(i=start; (i<start+homenr); i++)
 +                f[i][m] += charge[i]*Ext[m];
 +        }
 +        else
 +        {
 +            Ext[m] = 0;
 +        }
 +    }
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
 +                Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
 +    }
 +}
 +
 +static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
 +                      tensor vir_part,t_graph *graph,matrix box,
 +                      t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
 +{
 +  int i,j;
 +  tensor virtest;
 +
 +  /* The short-range virial from surrounding boxes */
 +  clear_mat(vir_part);
 +  calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
 +  
 +  /* Calculate partial virial, for local atoms only, based on short range. 
 +   * Total virial is computed in global_stat, called from do_md 
 +   */
 +  f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,homenr);
 +
 +  /* Add position restraint contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][i] += fr->vir_diag_posres[i];
 +  }
 +
 +  /* Add wall contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][ZZ] += fr->vir_wall_z[i];
 +  }
 +
 +  if (debug)
 +    pr_rvecs(debug,0,"vir_part",vir_part,DIM);
 +}
 +
 +static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
 +                             gmx_large_int_t step,real pforce,rvec *x,rvec *f)
 +{
 +  int  i;
 +  real pf2,fn2;
 +  char buf[STEPSTRSIZE];
 +
 +  pf2 = sqr(pforce);
 +  for(i=md->start; i<md->start+md->homenr; i++) {
 +    fn2 = norm2(f[i]);
 +    /* We also catch NAN, if the compiler does not optimize this away. */
 +    if (fn2 >= pf2 || fn2 != fn2) {
 +      fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
 +            gmx_step_str(step,buf),
 +            ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
 +    }
 +  }
 +}
 +
 +void do_force(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real lambda,t_graph *graph,
 +              t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int    cg0,cg1,i,j;
 +    int    start,homenr;
 +    double mu[2*DIM]; 
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
 +    gmx_bool   bDoLongRange,bDoForces,bSepLRF;
 +    gmx_bool   bDoAdressWF;
 +    matrix boxs;
 +    real   e,v,dvdl;
 +    t_pbc  pbc;
 +    float  cycles_ppdpme,cycles_pme,cycles_seppme,cycles_force;
 +  
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        pd_cg_range(cr,&cg0,&cg1);
 +    }
 +    else
 +    {
 +        cg0 = 0;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            cg1 = cr->dd->ncg_tot;
 +        }
 +        else
 +        {
 +            cg1 = top->cgs.nr;
 +        }
 +        if (fr->n_tpi > 0)
 +        {
 +            cg1--;
 +        }
 +    }
 +
 +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
 +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
 +    bFillGrid     = (bNS && bStateChanged);
 +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
 +    bDoForces     = (flags & GMX_FORCE_FORCES);
 +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
 +    /* should probably move this to the forcerec since it doesn't change */
 +    bDoAdressWF   = ((fr->adress_type!=eAdressOff));
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
 +        
 +        /* Calculate total (local) dipole moment in a temporary common array. 
 +         * This makes it possible to sum them over nodes faster.
 +         */
 +        calc_mu(start,homenr,
 +                x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                mu,mu+DIM);
 +    }
 +  
 +  if (fr->ePBC != epbcNONE) { 
 +    /* Compute shift vectors every step,
 +     * because of pressure coupling or box deformation!
 +     */
 +    if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +      calc_shifts(box,fr->shift_vec);
 +    
 +    if (bCalcCGCM) { 
 +      put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
 +                             &(top->cgs),x,fr->cg_cm);
 +      inc_nrnb(nrnb,eNR_CGCM,homenr);
 +      inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
 +    } 
 +    else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +      unshift_self(graph,box,x);
 +    }
 +  } 
 +  else if (bCalcCGCM) {
 +    calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
 +    inc_nrnb(nrnb,eNR_CGCM,homenr);
 +  }
 +  
 +  if (bCalcCGCM) {
 +    if (PAR(cr)) {
 +      move_cgcm(fplog,cr,fr->cg_cm);
 +    }
 +    if (gmx_debug_at)
 +      pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
 +  }
 +
 +#ifdef GMX_MPI
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Send particle coordinates to the pme nodes.
 +     * Since this is only implemented for domain decomposition
 +     * and domain decomposition does not use the graph,
 +     * we do not need to worry about shifting.
 +     */    
 +
 +    wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +    bBS = (inputrec->nwall == 2);
 +    if (bBS) {
 +      copy_mat(box,boxs);
 +      svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +    }
 +
 +    gmx_pme_send_x(cr,bBS ? boxs : box,x,
 +                   mdatoms->nChargePerturbed,lambda,
 +                   ( flags & GMX_FORCE_VIRIAL),step);
 +
 +    wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +  }
 +#endif /* GMX_MPI */
 +
 +    /* Communicate coordinates and sum dipole if necessary */
 +    if (PAR(cr))
 +    {
 +        wallcycle_start(wcycle,ewcMOVEX);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_move_x(cr->dd,box,x);
 +        }
 +        else
 +        {
 +            move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
 +        }
 +        /* When we don't need the total dipole we sum it in global_stat */
 +        if (bStateChanged && NEED_MUTOT(*inputrec))
 +        {
 +            gmx_sumd(2*DIM,mu,cr);
 +        }
 +        wallcycle_stop(wcycle,ewcMOVEX);
 +    }
 +    if (bStateChanged)
 +    {
 +
 +        /* update adress weight beforehand */
 +        if(bDoAdressWF)
 +        {
 +            /* need pbc for adress weight calculation with pbc_dx */
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +            if(fr->adress_site == eAdressSITEcog)
 +            {
 +                update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEcom)
 +            {
 +                update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEatomatom){
 +                update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else
 +            {
 +                update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                           inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +        }
 +
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0;j<DIM;j++)
 +            {
 +                fr->mu_tot[i][j] = mu[i*DIM + j];
 +            }
 +        }
 +    }
 +    if (fr->efep == efepNO)
 +    {
 +        copy_rvec(fr->mu_tot[0],mu_tot);
 +    }
 +    else
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            mu_tot[j] =
 +                (1.0 - lambda)*fr->mu_tot[0][j] + lambda*fr->mu_tot[1][j];
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (bNS)
 +    {
 +        wallcycle_start(wcycle,ewcNS);
 +        
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
 +        /* Reset long range forces if necessary */
 +        if (fr->bTwinRange)
 +        {
 +            /* Reset the (long-range) forces if necessary */
 +            clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
 +        }
 +
 +        /* Do the actual neighbour searching and if twin range electrostatics
 +         * also do the calculation of long range forces and energies.
 +         */
 +        dvdl = 0; 
 +        ns(fplog,fr,x,box,
 +           groups,&(inputrec->opts),top,mdatoms,
 +           cr,nrnb,lambda,&dvdl,&enerd->grpp,bFillGrid,
 +           bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdl);
 +        }
 +        enerd->dvdl_lin += dvdl;
 +        
 +        wallcycle_stop(wcycle,ewcNS);
 +    }
 +      
 +    if (inputrec->implicit_solvent && bNS) 
 +    {
 +        make_gb_nblist(cr,inputrec->gb_algorithm,inputrec->rlist,
 +                       x,box,fr,&top->idef,graph,fr->born);
 +    }
 +      
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
 +    
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle,ewcROT);
 +        do_rotation(cr,inputrec,box,x,t,step,wcycle,bNS);
 +        wallcycle_stop(wcycle,ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum) 
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        if (bSepLRF)
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for(i=0; i<fr->natoms_force_constr; i++)
 +            {
 +                copy_rvec(fr->f_twin[i],f[i]);
 +            }
 +        }
 +        else if (!(fr->bTwinRange && bNS))
 +        {
 +            /* Clear the short-range forces */
 +            clear_rvecs(fr->natoms_force_constr,f);
 +        }
 +
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        /* Position restraints always require full pbc. Check if we already did it for Adress */
 +        if(!(bStateChanged && bDoAdressWF))
 +        {
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +        }
 +        v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +                   top->idef.iparams_posres,
 +                   (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +                   inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl,
 +                   fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,
 +                    interaction_function[F_POSRES].longname,v,dvdl);
 +        }
 +        enerd->term[F_POSRES] += v;
 +        /* This linear lambda dependence assumption is only correct
 +         * when only k depends on lambda,
 +         * not when the reference position depends on lambda.
 +         * grompp checks for this.
 +         */
 +        enerd->dvdl_lin += dvdl;
 +        inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */    
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
 +                      x,hist,f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
 +                      lambda,graph,&(top->excls),fr->mu_tot,
 +                      flags,&cycles_pme);
 +    
 +    cycles_force = wallcycle_stop(wcycle,ewcFORCE);
 +    
 +    if (ed)
 +    {
 +        do_flood(fplog,cr,x,f,ed,box,step);
 +    }
 +      
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
 +    
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
 +        {
 +            /* Compute thermodynamic force in hybrid AdResS region */
 +            adress_thermo_force(start,homenr,&(top->cgs),x,fr->f_novirsum,fr,mdatoms,
 +                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            else
 +            {
 +                pd_move_f(cr,f,nrnb);
 +                if (bSepLRF)
 +                {
 +                    pd_move_f(cr,fr->f_twin,nrnb);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
 +                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
 +        
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    enerd->term[F_COM_PULL] = 0;
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        /* Calculate the center of mass forces, this requires communication,
 +         * which is why pull_potential is called close to other communication.
 +         * The virial contribution is calculated directly,
 +         * which is why we call pull_potential after calc_virial.
 +         */
 +        set_pbc(&pbc,inputrec->ePBC,box);
 +        dvdl = 0; 
 +        enerd->term[F_COM_PULL] +=
 +            pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
 +                           cr,t,lambda,x,f,vir_force,&dvdl);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
 +        }
 +        enerd->dvdl_lin += dvdl;
 +    }
 +    
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle,ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr,step,t);
 +        wallcycle_stop(wcycle,ewcROTadd);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
 +        dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
 +
 +        /* In case of node-splitting, the PP nodes receive the long-range 
 +         * forces, virial and energy from the PME nodes here.
 +         */    
 +        wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
 +        dvdl = 0;
 +        gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
 +                          &cycles_seppme);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
 +        }
 +        enerd->term[F_COUL_RECIP] += e;
 +        enerd->dvdl_lin += dvdl;
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
 +        }
 +        wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
 +    }
 +
 +    if (bDoForces && fr->bF_NoVirSum)
 +    {
 +        if (vsite)
 +        {
 +            /* Spread the mesh force on virtual sites to the other particles... 
 +             * This is parallellized. MPI communication is performed
 +             * if the constructing atoms aren't local.
 +             */
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
 +            spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +        }
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Now add the forces, this is local */
 +            if (fr->bDomDec)
 +            {
 +                sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
 +            }
 +            else
 +            {
 +                sum_forces(start,start+homenr,f,fr->f_novirsum);
 +            }
 +            if (EEL_FULL(fr->eeltype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force,fr->vir_el_recip,vir_force);
 +            }
 +            if (debug)
 +            {
 +                pr_rvecs(debug,0,"vir_force",vir_force,DIM);
 +            }
 +        }
 +    }
 +    
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(inputrec->opts),enerd);
 +    
 +    if (fr->print_force >= 0 && bDoForces)
 +    {
 +        print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
 +    }
 +}
 +
 +void do_constrain_first(FILE *fplog,gmx_constr_t constr,
 +                        t_inputrec *ir,t_mdatoms *md,
 +                        t_state *state,rvec *f,
 +                        t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
 +                        t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir)
 +{
 +    int    i,m,start,end;
 +    gmx_large_int_t step;
 +    double mass,tmass,vcm[4];
 +    real   dt=ir->delta_t;
 +    real   dvdlambda;
 +    rvec   *savex;
 +    
 +    snew(savex,state->natoms);
 +
 +    start = md->start;
 +    end   = md->homenr + start;
 +    
 +    if (debug)
 +        fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
 +                start,md->homenr,end);
 +    /* Do a first constrain to reset particles... */
 +    step = ir->init_step;
 +    if (fplog)
 +    {
 +        char buf[STEPSTRSIZE];
 +        fprintf(fplog,"\nConstraining the starting coordinates (step %s)\n",
 +                gmx_step_str(step,buf));
 +    }
 +    dvdlambda = 0;
 +    
 +    /* constrain the current position */
 +    constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +              ir,NULL,cr,step,0,md,
 +              state->x,state->x,NULL,
 +              state->box,state->lambda,&dvdlambda,
 +              NULL,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
 +    if (EI_VV(ir->eI)) 
 +    {
 +        /* constrain the inital velocity, and save it */
 +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
 +        /* might not yet treat veta correctly */
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,0,md,
 +                  state->x,state->v,state->v,
 +                  state->box,state->lambda,&dvdlambda,
 +                  NULL,NULL,nrnb,econqVeloc,ir->epc==epcMTTK,state->veta,state->veta);
 +    }
 +    /* constrain the inital velocities at t-dt/2 */
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI!=eiVV)
 +    {
 +        for(i=start; (i<end); i++) 
 +        {
 +            for(m=0; (m<DIM); m++) 
 +            {
 +                /* Reverse the velocity */
 +                state->v[i][m] = -state->v[i][m];
 +                /* Store the position at t-dt in buf */
 +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
 +            }
 +        }
 +    /* Shake the positions at t=-dt with the positions at t=0                        
 +     * as reference coordinates.                                                     
 +         */
 +        if (fplog)
 +        {
 +            char buf[STEPSTRSIZE];
 +            fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %s)\n",
 +                    gmx_step_str(step,buf));
 +        }
 +        dvdlambda = 0;
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,-1,md,
 +                  state->x,savex,NULL,
 +                  state->box,state->lambda,&dvdlambda,
 +                  state->v,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
 +        
 +        for(i=start; i<end; i++) {
 +            for(m=0; m<DIM; m++) {
 +                /* Re-reverse the velocities */
 +                state->v[i][m] = -state->v[i][m];
 +            }
 +        }
 +    }
 +    
 +    for(m=0; (m<4); m++)
 +        vcm[m] = 0;
 +    for(i=start; i<end; i++) {
 +        mass = md->massT[i];
 +        for(m=0; m<DIM; m++) {
 +            vcm[m] += state->v[i][m]*mass;
 +        }
 +        vcm[3] += mass;
 +    }
 +    
 +    if (ir->nstcomm != 0 || debug) {
 +        /* Compute the global sum of vcm */
 +        if (debug)
 +            fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
 +                    " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],vcm[3]);
 +        if (PAR(cr))
 +            gmx_sumd(4,vcm,cr);
 +        tmass = vcm[3];
 +        for(m=0; (m<DIM); m++)
 +            vcm[m] /= tmass;
 +        if (debug) 
 +            fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
 +                    " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],tmass);
 +        if (ir->nstcomm != 0) {
 +            /* Now we have the velocity of center of mass, let's remove it */
 +            for(i=start; (i<end); i++) {
 +                for(m=0; (m<DIM); m++)
 +                    state->v[i][m] -= vcm[m];
 +            }
 +
 +        }
 +    }
 +    sfree(savex);
 +}
 +
 +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
 +{
 +  double eners[2],virs[2],enersum,virsum,y0,f,g,h;
 +  double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
 +  double invscale,invscale2,invscale3;
 +  int    ri0,ri1,ri,i,offstart,offset;
 +  real   scale,*vdwtab; 
 +
 +  fr->enershiftsix = 0;
 +  fr->enershifttwelve = 0;
 +  fr->enerdiffsix = 0;
 +  fr->enerdifftwelve = 0;
 +  fr->virdiffsix = 0;
 +  fr->virdifftwelve = 0;
 +
 +  if (eDispCorr != edispcNO) {
 +    for(i=0; i<2; i++) {
 +      eners[i] = 0;
 +      virs[i]  = 0;
 +    }
 +    if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT)) {
 +      if (fr->rvdw_switch == 0)
 +      gmx_fatal(FARGS,
 +                "With dispersion correction rvdw-switch can not be zero "
 +                "for vdw-type = %s",evdw_names[fr->vdwtype]);
 +
 +      scale  = fr->nblists[0].tab.scale;
 +      vdwtab = fr->nblists[0].vdwtab;
 +
 +      /* Round the cut-offs to exact table values for precision */
 +      ri0 = floor(fr->rvdw_switch*scale);
 +      ri1 = ceil(fr->rvdw*scale);
 +      r0  = ri0/scale;
 +      r1  = ri1/scale;
 +      rc3 = r0*r0*r0;
 +      rc9  = rc3*rc3*rc3;
 +
 +      if (fr->vdwtype == evdwSHIFT) {
 +      /* Determine the constant energy shift below rvdw_switch */
 +      fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
 +      fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
 +      }
 +      /* Add the constant part from 0 to rvdw_switch.
 +       * This integration from 0 to rvdw_switch overcounts the number
 +       * of interactions by 1, as it also counts the self interaction.
 +       * We will correct for this later.
 +       */
 +      eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
 +      eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
 +      
 +      invscale = 1.0/(scale);  
 +      invscale2 = invscale*invscale;
 +      invscale3 = invscale*invscale2;
 +
 +      /* following summation derived from cubic spline definition,
 +      Numerical Recipies in C, second edition, p. 113-116.  Exact
 +      for the cubic spline.  We first calculate the negative of
 +      the energy from rvdw to rvdw_switch, assuming that g(r)=1,
 +      and then add the more standard, abrupt cutoff correction to
 +      that result, yielding the long-range correction for a
 +      switched function.  We perform both the pressure and energy
 +      loops at the same time for simplicity, as the computational
 +      cost is low. */
 +      
 +      for (i=0;i<2;i++) {
 +        enersum = 0.0; virsum = 0.0;
 +        if (i==0)
 +        offstart = 0;
 +      else
 +        offstart = 4;
 +      for (ri=ri0; ri<ri1; ri++) {
 +          r = ri*invscale;
 +          ea = invscale3;
 +          eb = 2.0*invscale2*r;
 +          ec = invscale*r*r;
 +          
 +          pa = invscale3;
 +          pb = 3.0*invscale2*r;
 +          pc = 3.0*invscale*r*r;
 +          pd = r*r*r;
 +          
 +          /* this "8" is from the packing in the vdwtab array - perhaps
 +          should be #define'ed? */
 +          offset = 8*ri + offstart;
 +          y0 = vdwtab[offset];
 +          f = vdwtab[offset+1];
 +          g = vdwtab[offset+2];
 +          h = vdwtab[offset+3];
 +        
 +          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
 +            g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);  
 +          virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) + 
 +            2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
 +        
 +        }
 +        enersum *= 4.0*M_PI;
 +        virsum  *= 4.0*M_PI; 
 +        eners[i] -= enersum;
 +        virs[i]  -= virsum;
 +      }
 +
 +      /* now add the correction for rvdw_switch to infinity */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } 
 +    else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER)) {
 +      if (fr->vdwtype == evdwUSER && fplog)
 +      fprintf(fplog,
 +              "WARNING: using dispersion correction with user tables\n");
 +      rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
 +      rc9  = rc3*rc3*rc3;
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } else {
 +      gmx_fatal(FARGS,
 +              "Dispersion correction is not implemented for vdw-type = %s",
 +              evdw_names[fr->vdwtype]);
 +    }
 +    fr->enerdiffsix    = eners[0];
 +    fr->enerdifftwelve = eners[1];
 +    /* The 0.5 is due to the Gromacs definition of the virial */
 +    fr->virdiffsix     = 0.5*virs[0];
 +    fr->virdifftwelve  = 0.5*virs[1];
 +  }
 +}
 +
 +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
 +                   gmx_large_int_t step,int natoms,
 +                   matrix box,real lambda,tensor pres,tensor virial,
 +                   real *prescorr, real *enercorr, real *dvdlcorr)
 +{
 +    gmx_bool bCorrAll,bCorrPres;
 +    real dvdlambda,invvol,dens,ninter,avcsix,avctwelve,enerdiff,svir=0,spres=0;
 +    int  m;
 +    
 +    *prescorr = 0;
 +    *enercorr = 0;
 +    *dvdlcorr = 0;
 +    
 +    clear_mat(virial);
 +    clear_mat(pres);
 +    
 +    if (ir->eDispCorr != edispcNO) {
 +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        
 +        invvol = 1/det(box);
 +        if (fr->n_tpi) 
 +        {
 +            /* Only correct for the interactions with the inserted molecule */
 +            dens = (natoms - fr->n_tpi)*invvol;
 +            ninter = fr->n_tpi;
 +        } 
 +        else 
 +        {
 +            dens = natoms*invvol;
 +            ninter = 0.5*natoms;
 +        }
 +        
 +        if (ir->efep == efepNO) 
 +        {
 +            avcsix    = fr->avcsix[0];
 +            avctwelve = fr->avctwelve[0];
 +        } 
 +        else 
 +        {
 +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
 +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
 +        }
 +        
 +        enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
 +        *enercorr += avcsix*enerdiff;
 +        dvdlambda = 0.0;
 +        if (ir->efep != efepNO) 
 +        {
 +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
 +        }
 +        if (bCorrAll) 
 +        {
 +            enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
 +            *enercorr += avctwelve*enerdiff;
 +            if (fr->efep != efepNO) 
 +            {
 +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
 +            }
 +        }
 +        
 +        if (bCorrPres) 
 +        {
 +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
 +            if (ir->eDispCorr == edispcAllEnerPres)
 +            {
 +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
 +            }
 +            /* The factor 2 is because of the Gromacs virial definition */
 +            spres = -2.0*invvol*svir*PRESFAC;
 +            
 +            for(m=0; m<DIM; m++) {
 +                virial[m][m] += svir;
 +                pres[m][m] += spres;
 +            }
 +            *prescorr += spres;
 +        }
 +        
 +        /* Can't currently control when it prints, for now, just print when degugging */
 +        if (debug)
 +        {
 +            if (bCorrAll) {
 +                fprintf(debug,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                        avcsix,avctwelve);
 +            }
 +            if (bCorrPres) 
 +            {
 +                fprintf(debug,
 +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
 +                        *enercorr,spres,svir);
 +            }
 +            else
 +            {
 +                fprintf(debug,"Long Range LJ corr.: Epot %10g\n",*enercorr);
 +            }
 +        }
 +        
 +        if (fr->bSepDVDL && do_per_step(step,ir->nstlog))
 +        {
 +            fprintf(fplog,sepdvdlformat,"Dispersion correction",
 +                    *enercorr,dvdlambda);
 +        }
 +        if (fr->efep != efepNO) 
 +        {
 +            *dvdlcorr += dvdlambda;
 +        }
 +    }
 +}
 +
 +void do_pbc_first(FILE *fplog,matrix box,t_forcerec *fr,
 +                t_graph *graph,rvec x[])
 +{
 +  if (fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +  calc_shifts(box,fr->shift_vec);
 +  if (graph) {
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 1",graph);
 +    shift_self(graph,box,x);
 +    /* By doing an extra mk_mshift the molecules that are broken
 +     * because they were e.g. imported from another software
 +     * will be made whole again. Such are the healing powers
 +     * of GROMACS.
 +     */
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 2",graph);
 +  }
 +  if (fplog)
 +    fprintf(fplog,"Done rmpbc\n");
 +}
 +
 +static void low_do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +                          gmx_mtop_t *mtop,rvec x[],
 +                          gmx_bool bFirst)
 +{
 +  t_graph *graph;
 +  int mb,as,mol;
 +  gmx_molblock_t *molb;
 +
 +  if (bFirst && fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +
 +  snew(graph,1);
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    if (molb->natoms_mol == 1 || 
 +      (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) {
 +      /* Just one atom or charge group in the molecule, no PBC required */
 +      as += molb->nmol*molb->natoms_mol;
 +    } else {
 +      /* Pass NULL iso fplog to avoid graph prints for each molecule type */
 +      mk_graph_ilist(NULL,mtop->moltype[molb->type].ilist,
 +                   0,molb->natoms_mol,FALSE,FALSE,graph);
 +      
 +      for(mol=0; mol<molb->nmol; mol++) {
 +      mk_mshift(fplog,graph,ePBC,box,x+as);
 +      
 +      shift_self(graph,box,x+as);
 +      /* The molecule is whole now.
 +       * We don't need the second mk_mshift call as in do_pbc_first,
 +       * since we no longer need this graph.
 +       */
 +      
 +      as += molb->natoms_mol;
 +      }
 +      done_graph(graph);
 +    }
 +  }
 +  sfree(graph);
 +}
 +
 +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
 +                     gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,TRUE);
 +}
 +
 +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +               gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,FALSE);
 +}
 +
 +void finish_run(FILE *fplog,t_commrec *cr,const char *confout,
 +                t_inputrec *inputrec,
 +                t_nrnb nrnb[],gmx_wallcycle_t wcycle,
 +                gmx_runtime_t *runtime,
 +                gmx_bool bWriteStat)
 +{
 +  int    i,j;
 +  t_nrnb *nrnb_tot=NULL;
 +  real   delta_t;
 +  double nbfs,mflop;
 +  double cycles[ewcNR];
 +
 +  wallcycle_sum(cr,wcycle,cycles);
 +
 +  if (cr->nnodes > 1) {
 +    if (SIMMASTER(cr))
 +      snew(nrnb_tot,1);
 +#ifdef GMX_MPI
 +    MPI_Reduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
 +               MASTERRANK(cr),cr->mpi_comm_mysim);
 +#endif  
 +  } else {
 +    nrnb_tot = nrnb;
 +  }
 +    
 +  if (SIMMASTER(cr)) {
 +    print_flop(fplog,nrnb_tot,&nbfs,&mflop);
 +    if (cr->nnodes > 1) {
 +      sfree(nrnb_tot);
 +    }
 +  }
 +
 +  if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) {
 +    print_dd_statistics(cr,inputrec,fplog);
 +  }
 +
 +#ifdef GMX_MPI
 +    if (PARTDECOMP(cr))
 +    {
 +        if (MASTER(cr))
 +        {
 +            t_nrnb     *nrnb_all;
 +            int        s;
 +            MPI_Status stat;
 +
 +            snew(nrnb_all,cr->nnodes);
 +            nrnb_all[0] = *nrnb;
 +            for(s=1; s<cr->nnodes; s++)
 +            {
 +                MPI_Recv(nrnb_all[s].n,eNRNB,MPI_DOUBLE,s,0,
 +                         cr->mpi_comm_mysim,&stat);
 +            }
 +            pr_load(fplog,cr,nrnb_all);
 +            sfree(nrnb_all);
 +        }
 +        else
 +        {
 +            MPI_Send(nrnb->n,eNRNB,MPI_DOUBLE,MASTERRANK(cr),0,
 +                     cr->mpi_comm_mysim);
 +        }
 +    }
 +#endif  
 +
 +  if (SIMMASTER(cr)) {
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
 +                    wcycle,cycles);
 +
 +    if (EI_DYNAMICS(inputrec->eI)) {
 +      delta_t = inputrec->delta_t;
 +    } else {
 +      delta_t = 0;
 +    }
 +    
 +    if (fplog) {
 +        print_perf(fplog,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +    if (bWriteStat) {
 +        print_perf(stderr,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +
 +    /*
 +    runtime=inputrec->nsteps*inputrec->delta_t;
 +    if (bWriteStat) {
 +      if (cr->nnodes == 1)
 +      fprintf(stderr,"\n\n");
 +      print_perf(stderr,nodetime,realtime,runtime,&ntot,
 +               cr->nnodes-cr->npmenodes,FALSE);
 +    }
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);
 +    print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes,
 +             TRUE);
 +    if (PARTDECOMP(cr))
 +      pr_load(fplog,cr,nrnb_all);
 +    if (cr->nnodes > 1)
 +      sfree(nrnb_all);
 +    */
 +  }
 +}
 +
 +void init_md(FILE *fplog,
 +             t_commrec *cr,t_inputrec *ir,const output_env_t oenv,
 +             double *t,double *t0,
 +             real *lambda,double *lam0,
 +             t_nrnb *nrnb,gmx_mtop_t *mtop,
 +             gmx_update_t *upd,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin,
 +             tensor force_vir,tensor shake_vir,rvec mu_tot,
 +             gmx_bool *bSimAnn,t_vcm **vcm, t_state *state, unsigned long Flags)
 +{
 +    int  i,j,n;
 +    real tmpt,mod;
 +      
 +    /* Initial values */
 +    *t = *t0       = ir->init_t;
 +    if (ir->efep != efepNO)
 +    {
 +        *lam0 = ir->init_lambda;
 +        *lambda = *lam0 + ir->init_step*ir->delta_lambda;
 +    }
 +    else
 +    {
 +        *lambda = *lam0   = 0.0;
 +    } 
 +
 +    *bSimAnn=FALSE;
 +    for(i=0;i<ir->opts.ngtc;i++)
 +    {
 +        /* set bSimAnn if any group is being annealed */
 +        if(ir->opts.annealing[i]!=eannNO)
 +        {
 +            *bSimAnn = TRUE;
 +        }
 +    }
 +    if (*bSimAnn)
 +    {
 +        update_annealing_target_temp(&(ir->opts),ir->init_t);
 +    }
 +    
 +    if (upd)
 +    {
 +        *upd = init_update(fplog,ir);
 +    }
 +    
 +    if (vcm != NULL)
 +    {
 +        *vcm = init_vcm(fplog,&mtop->groups,ir);
 +    }
 +    
 +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
 +    {
 +        if (ir->etc == etcBERENDSEN)
 +        {
 +            please_cite(fplog,"Berendsen84a");
 +        }
 +        if (ir->etc == etcVRESCALE)
 +        {
 +            please_cite(fplog,"Bussi2007a");
 +        }
 +    }
 +    
 +    init_nrnb(nrnb);
 +    
 +    if (nfile != -1)
 +    {
 +        *outf = init_mdoutf(nfile,fnm,Flags,cr,ir,oenv);
 +
 +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : (*outf)->fp_ene,
 +                              mtop,ir, (*outf)->fp_dhdl);
 +    }
 +    
 +    if (ir->bAdress)
 +    {
 +      please_cite(fplog,"Fritsch12");
 +      please_cite(fplog,"Junghans10");
 +    }
 +    /* Initiate variables */  
 +    clear_mat(force_vir);
 +    clear_mat(shake_vir);
 +    clear_rvec(mu_tot);
 +    
 +    debug_gmx();
 +}
 +
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 37ecd28b8f7df56910edb88da94704a72c009f50,0000000000000000000000000000000000000000..8a951c1ea0762992f8b3ffc6388e61f2196a264d
mode 100644,000000..100644
--- /dev/null
@@@ -1,1866 -1,0 +1,1866 @@@
-                     gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?");
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
 +/* _isnan() */
 +#include <float.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "dihre.h"
 +#include "pppm.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "membed.h"
 +#include "string2.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +
 +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_seed,gmx_membed_t *membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
 +    double     t,t0,lam0;
 +    gmx_bool       bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres;
 +    gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
 +               bFirstStep,bStateFromTPX,bInitStep,bLastStep,
 +               bBornRadii,bStartingFromCpt;
 +    gmx_bool       bDoDHDL=FALSE;
 +    gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
 +               bForceUpdate=FALSE,bCPT;
 +    int        mdof_flags;
 +    gmx_bool       bMasterState;
 +    int        force_flags,cglo_flags;
 +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
 +    int        i,m;
 +    t_trxstatus *status;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    t_state    *bufstate=NULL;   
 +    matrix     *scale_tot,pcoupl_mu,M,ebox;
 +    gmx_nlheur_t nlh;
 +    t_trxframe rerun_fr;
 +    gmx_repl_ex_t repl_ex=NULL;
 +    int        nchkpt=1;
 +
 +    gmx_localtop_t *top;      
 +    t_mdebin *mdebin=NULL;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
 +
 +    gmx_bool        bFFscan;
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_shellfc_t shellfc;
 +    int         count,nconverged=0;
 +    real        timestep=0;
 +    double      tcount=0;
 +    gmx_bool        bIonize=FALSE;
 +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
 +    gmx_bool        bAppend;
 +    gmx_bool        bResetCountersHalfMaxH=FALSE;
 +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
 +    real        temp0,mu_aver=0,dvdl;
 +    int         a0,a1,gnx=0,ii;
 +    atom_id     *grpindex=NULL;
 +    char        *grpname;
 +    t_coupl_rec *tcr=NULL;
 +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
 +    matrix      boxcopy={{0}},lastbox;
 +      tensor      tmpvir;
 +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
 +      real        vetanew = 0;
 +    double      cycles;
 +      real        saved_conserved_quantity = 0;
 +    real        last_ekin = 0;
 +      int         iter_i;
 +      t_extmass   MassQ;
 +    int         **trotter_seq; 
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t iterate;
 +    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
 +                                          simulation stops. If equal to zero, don't
 +                                          communicate any more between multisims.*/
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control 
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf,top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */ 
 +    bIterations = ((IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
 +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || (IR_NVT_TROTTER(ir))));        
 +    
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f,top_global->natoms);
 +    }
 +
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */    
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global,n_flexible_constraints(constr),
 +                                 (ir->bContinuation || 
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    if (DOMAINDECOMP(cr)) {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state,1);
 +        dd_init_local_state(cr->dd,state_global,state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout) {
 +            snew(f_global,state_global->natoms);
 +        }
 +    } else {
 +        if (PAR(cr)) {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +            pd_at_range(cr,&a0,&a1);
 +        } else {
 +            top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        state = partdec_init_local_state(cr,state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +        if (vsite) {
 +            set_vsite_top(vsite,top,mdatoms,cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
 +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +
 +        if (shellfc) {
 +            make_local_shells(cr,mdatoms,shellfc);
 +        }
 +
 +        if (ir->pull && PAR(cr)) {
 +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            state,&f,mdatoms,top,fr,
 +                            vsite,shellfc,constr,
 +                            nrnb,wcycle,FALSE);
 +    }
 +
 +    update_mdatoms(mdatoms,state->lambda);
 +
 +    if (MASTER(cr))
 +    {
 +        if (opt2bSet("-cpi",nfile,fnm))
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if ( Flags & MD_APPENDFILES )
 +            {
 +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    } 
 +
 +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd,state);
 +    }
 +
 +    /* Initialize constraints */
 +    if (constr) {
 +        if (!DOMAINDECOMP(cr))
 +            set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT,nfile,fnm);
 +    if (bTCR) {
 +        if (MASTER(cr)) {
 +            fprintf(stderr,"Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex,gnx);
 +        for(i=0; (i<gnx); i++) {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "repl_ex_nst",&repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
 +                                        repl_ex_nst,repl_ex_seed);
 +
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +  
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | (bVV ? CGLO_PRESSURE:0)
 +                  | (bVV ? CGLO_CONSTRAINT:0)
 +                  | (bRerunMD ? CGLO_RERUNMD:0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
 +    
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                    constr,NULL,FALSE,state->box,
 +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
 +    if (ir->eI == eiVVAK) {
 +        /* a second call to get the half step temperature initialized as well */ 
 +        /* we do the same call as above, but turn the pressure off -- internally to 
 +           compute_globals, this is recognized as a velocity verlet half-step 
 +           kinetic energy calculation.  This minimized excess variables, but 
 +           perhaps loses some logic?*/
 +        
 +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                        constr,NULL,FALSE,state->box,
 +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                        cglo_flags &~ CGLO_PRESSURE);
 +    }
 +    
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT)) 
 +    {
 +        for(i=0; (i<ir->opts.ngtc); i++) 
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
 +        } 
 +    }
 +    if (ir->eI != eiVV) 
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +    temp0 = enerd->term[F_TEMP];
 +    
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterations) 
 +    {
 +            bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan) 
 +    {
 +        snew(xcopy,state->natoms);
 +        snew(vcopy,state->natoms);
 +        copy_rvecn(state->x,xcopy,0,state->natoms);
 +        copy_rvecn(state->v,vcopy,0,state->natoms);
 +        copy_mat(state->box,boxcopy);
 +    } 
 +    
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
 +    
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr,FALSE));
 +        }
 +        fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr,"starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf,"%s","infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                        gmx_step_str(ir->init_step,sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr,"%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
 +        fprintf(fplog,"\n");
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
 +                                    NULL,0);
 +    if ( chkpt_ret == 0 ) 
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps 
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv,&status,
 +                                             opt2fn("-rerun",nfile,fnm),
 +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms,top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box,fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
 +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep    = FALSE;
 +    bSumEkinhOld = FALSE;
 +    bExchanged   = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
 +
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        if (bRerunMD) {
 +            if (rerun_fr.bStep) {
 +                step = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime) {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        } 
 +        else 
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t = t0 + step*ir->delta_t;
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            if (bRerunMD && rerun_fr.bLambda && (ir->delta_lambda!=0))
 +            {
 +                state_global->lambda = rerun_fr.lambda;
 +            }
 +            else
 +            {
 +                state_global->lambda = lam0 + step*ir->delta_lambda;
 +            }
 +            state->lambda = state_global->lambda;
 +            bDoDHDL = do_per_step(step,ir->nstdhdl);
 +        }
 +
 +        if (bSimAnn) 
 +        {
 +            update_annealing_target_temp(&(ir->opts),t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for(i=0; i<state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box,state_global->box);
 +            copy_mat(state_global->box,state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for(ii=0; (ii<state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii],state->x[ii]);
 +                copy_rvec(vcopy[ii],state->v[ii]);
 +            }
 +            copy_mat(boxcopy,state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +            
 +            bNS = (bFirstStep || bExchanged || bNStList ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
 +                set_nlistheuristics(&nlh,bFirstStep || bExchanged,step);
 +            }
 +        } 
 +
 +        /* check whether we should stop because another simulation has 
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
 +                 (multisim_nsteps != ir->nsteps) )  
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr, 
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep=TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii=bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
 +        {
 +            bBornRadii=TRUE;
 +        }
 +        
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                  (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog,step,state->box,graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd,state,state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle,ewcDOMDEC);
 +                dd_partition_system(fplog,step,cr,
 +                                    bMasterState,nstglobalcomm,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,do_verbose);
 +                wallcycle_stop(wcycle,ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
 +            print_ebin_header(fplog,step,t,state->lambda);
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            update_mdatoms(mdatoms,state->lambda); 
 +        }
 +
 +        if (bRerunMD && rerun_fr.bV)
 +        {
 +            
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                            constr,NULL,FALSE,state->box,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +        
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
 +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
 +        }
 +        
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile,fnm,fr,
 +                                  mdatoms->nr,state->x,state->box)) {
 +                if (gmx_parallel_env_initialized())
 +                {
 +                    gmx_finalize();
 +                }
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
 +        bNstEner = do_per_step(step,ir->nstcalcenergy);
 +        bCalcEnerPres =
 +            (bNstEner ||
 +             (ir->epc != epcNO && do_per_step(step,ir->nstpcouple)));
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcEnerPres || bStopCM ||
 +                  do_per_step(step,nstglobalcomm) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcEnerPres = TRUE;
 +            bGStat        = TRUE;
 +        }
 +        
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bStopCM ? CGLO_STOPCM : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +            );
 +        
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
 +                       (bNStList ? GMX_FORCE_DOLR : 0) |
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcEnerPres ? GMX_FORCE_VIRIAL : 0) |
 +                       (bDoDHDL ? GMX_FORCE_DHDL : 0)
 +            );
 +        
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
 +                                      ir,bNS,force_flags,
 +                                      bStopCM,top,top_global,
 +                                      constr,enerd,fcd,
 +                                      state,f,force_vir,mdatoms,
 +                                      nrnb,wcycle,graph,groups,
 +                                      shellfc,fr,bBornRadii,t,mu_tot,
 +                                      state->natoms,&bConverged,vsite,
 +                                      outf->fp_field);
 +            tcount+=count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too. 
 +             * Check comments in sim_util.c
 +             */
 +        
 +            do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
 +                     state->box,state->x,&state->hist,
 +                     f,force_vir,mdatoms,enerd,fcd,
 +                     state->lambda,graph,
 +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +        
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
 +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
 +        }
 +        
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
 +            fprintf(fplog,"Done init_coupling\n"); 
 +            fflush(fplog);
 +        }
 +        
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI==eiVV && bInitStep) 
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the 
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
 +            } else {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
 +            }
 +
 +            update_coords(fplog,step,ir,mdatoms,state,
 +                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
 +                          cr,nrnb,constr,&top->idef);
 +            
 +            if (bIterations)
 +            {
 +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +            
 +            /* save the state */
 +            if (bIterations && iterate.bIterate) { 
 +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +            }
 +            
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || (bIterations && iterate.bIterate))
 +            {
 +                if (bIterations && iterate.bIterate) 
 +                {
 +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +                    if (bFirstIterate && bTrotter) 
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +                        
 +                        veta_save = state->veta;
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
 +                        vetanew = state->veta;
 +                        state->veta = veta_save;
 +                    } 
 +                } 
 +                
 +                bOK = TRUE;
 +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
 +                    dvdl = 0;
 +                    
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,shake_vir,NULL,
 +                                       cr,nrnb,wcycle,upd,constr,
 +                                       bInitStep,TRUE,bCalcEnerPres,vetanew);
 +                    
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +                    
 +                } 
 +                else if (graph)
 +                { /* Need to unshift here if a do_force has been
 +                     called in the previous step */
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                
 +                
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
 +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 +                    /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,NULL,FALSE,state->box,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | CGLO_ENERGY 
 +                                | (bTemp ? CGLO_TEMPERATURE:0) 
 +                                | (bPres ? CGLO_PRESSURE : 0) 
 +                                | (bPres ? CGLO_CONSTRAINT : 0)
 +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_SCALEEKIN 
 +                    );
 +                /* explanation of above: 
 +                   a) We compute Ekin at the full time step
 +                   if 1) we are using the AveVel Ekin, and it's not the
 +                   initial step, or 2) if we are using AveEkin, but need the full
 +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
 +                   EkinAveVel because it's needed for the pressure */
 +                
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep) 
 +                {
 +                    if (bTrotter)
 +                    {
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
 +                    } 
 +                    else 
 +                    {
 +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    }
 +                }
 +                
 +                if (bIterations &&
 +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                                   state->veta,&vetanew)) 
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep) {
 +                copy_mat(shake_vir,state->svir_prev);
 +                copy_mat(force_vir,state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI==eiVV) {
 +                copy_rvecn(cbuf,state->v,0,state->natoms);
 +            }
 +            
 +            if (fr->bSepDVDL && fplog && do_log) 
 +            {
 +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +            }
 +            enerd->term[F_DHDL_CON] += dvdl;
 +        }
 +    
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV) {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
 +            if (ir->eI==eiVV) 
 +            {
 +                last_ekin = enerd->term[F_EKIN]; /* does this get preserved through checkpointing? */
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
 +        }
 +        
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
 +        
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +        
 +        /* Now we have the energies and forces corresponding to the 
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
 +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
 +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
 +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
 +        if (bCPT) { mdof_flags |= MDOF_CPT; };
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +            fcReportProgress( ir->nsteps, step );
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +            fcRequestCheckPoint();
 +#endif
 +        
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd,state);
 +                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate,ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist,mdebin);
 +                }
 +            }
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols &&
 +                    DOMAINDECOMP(cr))
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +        
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV) 
 +        {
 +            copy_mat(state->svir_prev,shake_vir);
 +            copy_mat(state->fvir_prev,force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +        
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */    
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.sig[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.sig[eglsSTOPCOND]=-1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
 +                                                     nlh.scale_tot,state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 || 
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
 +  
 +        if (bIterations)
 +        {
 +            gmx_iterate_init(&iterate,bIterations);
 +        }
 +    
 +        /* for iterations, we save these vectors, as we will be redoing the calculations */
 +        if (bIterations && iterate.bIterate) 
 +        {
 +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +        }
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || (bIterations && iterate.bIterate))
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */    
 +            if (bIterations) 
 +            { 
 +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +            
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box,lastbox);
 +
 +            bOK = TRUE;
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle,ewcUPDATE);
 +                dvdl = 0;
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter) 
 +                {
 +                    if (bIterations && iterate.bIterate) 
 +                    {
 +                        if (bFirstIterate) 
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else 
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir,scalevir,shake_vir); 
 +                        m_add(force_vir,shake_vir,total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
 +                /* We can only do Berendsen coupling after we have summed
 +                 * the kinetic energy or virial. Since the happens
 +                 * in global_state after update, we should only do it at
 +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                 */
 +                }
 +                else 
 +                {
 +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
 +                                   upd,bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
 +                    /* velocity half-step update */
 +                    update_coords(fplog,step,ir,mdatoms,state,f,
 +                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
 +                                  cr,nrnb,constr,&top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    copy_rvecn(state->x,cbuf,0,state->natoms);
 +                }
 +                
 +                update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                   &top->idef,shake_vir,force_vir,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,FALSE,bCalcEnerPres,state->veta);  
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                    constr,NULL,FALSE,lastbox,
 +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                    cglo_flags | CGLO_TEMPERATURE    
 +                        );
 +                    wallcycle_start(wcycle,ewcUPDATE);
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf,state->x,0,state->natoms);
 +
 +                    update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                    wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure. 
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,tmp_vir,force_vir,
 +                                       cr,nrnb,wcycle,upd,NULL,
 +                                       bInitStep,FALSE,bCalcEnerPres,
 +                                       state->veta);  
 +                }
 +                if (!bOK && !bFFscan) 
 +                {
 +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +                
 +                if (fr->bSepDVDL && fplog && do_log) 
 +                {
 +                    fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +                }
 +                enerd->term[F_DHDL_CON] += dvdl;
 +            } 
 +            else if (graph) 
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph,state->box,state->x);
 +            }
 +
 +            if (vsite != NULL) 
 +            {
 +                wallcycle_start(wcycle,ewcVSITECONSTR);
 +                if (graph != NULL) 
 +                {
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                
 +                if (graph != NULL) 
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                wallcycle_stop(wcycle,ewcVSITECONSTR);
 +            }
 +            
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                gs.sig[eglsNABNSB] = nlh.nabnsb;
 +            }
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                            constr,
 +                            bFirstIterate ? &gs : NULL, 
 +                            (step_rel % gs.nstms == 0) && 
 +                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
 +                            lastbox,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            cglo_flags 
 +                            | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0) 
 +                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
 +                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
 +                            | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
 +                            | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                            | CGLO_CONSTRAINT 
 +                );
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                nlh.nabnsb = gs.set[eglsNABNSB];
 +                gs.set[eglsNABNSB] = 0;
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +        
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (bIterations && 
 +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                               trace(shake_vir),&tracevir)) 
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
 +        update_box(fplog,step,ir,mdatoms,state,graph,f,
 +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
 +        
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +    
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc==NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
 +                                 f,NULL,xcopy,
 +                                 &(top_global->mols),mdatoms->massT,pres))
 +            {
 +                if (gmx_parallel_env_initialized())
 +                {
 +                    gmx_finalize();
 +                }
 +                fprintf(stderr,"\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,                                                            
 +             * so signal that we still have to do it.                                                
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +        
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies. 
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
 +                        ir,MASTER(cr),
 +                        mdatoms,&(top->idef),mu_aver,
 +                        top_global->mols.nr,cr,
 +                        state->box,total_vir,pres,
 +                        mu_tot,state->x,f,bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* sum up the foreign energy and dhdl terms */
 +        sum_dhdl(enerd,state->lambda,ir);
 +
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI==eiVV) 
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +        
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else 
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize) 
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
 +            {
 +                fprintf(stderr,"Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep) 
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +        
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr,do_or;
 +            
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
 +            {
 +                if (bNstEner)
 +                {
 +                    upd_mdebin(mdebin,bDoDHDL, TRUE,
 +                               t,mdatoms->tmass,enerd,state,lastbox,
 +                               shake_vir,force_vir,total_vir,pres,
 +                               ekind,mu_tot,constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +                
 +                do_dr  = do_per_step(step,ir->nstdisreout);
 +                do_or  = do_per_step(step,ir->nstorireout);
 +                
 +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
 +                           step,t,
 +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull,step,t);
 +            }
 +            
 +            if (do_per_step(step,ir->nstlog))
 +            {
 +                if(fflush(fplog) != 0)
 +                {
++                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
 +
 +
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
 +        {
 +            if (shellfc) 
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step,repl_ex_nst)) 
 +        {
 +            bExchanged = replica_exchange(fplog,cr,repl_ex,
 +                                          state_global,enerd->term,
 +                                          state,step,t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr)) 
 +            {
 +                dd_partition_system(fplog,step,cr,TRUE,1,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,FALSE);
 +            }
 +        }
 +        
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres,state->pres_prev);
 +        }
 +        
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed!=NULL) && (!bLastStep) )
 +            rescale_membed(step_rel,membed,state_global->x);
 +        
 +        if (bRerunMD) 
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +            }
 +        }
 +        
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +        
 +        cycles = wallcycle_stop(wcycle,ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
 +        }
 +        
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime);
 +            wcycle_set_reset_counters(wcycle,-1);
 +            /* Correct max_hours for the elapsed time */
 +            max_hours -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +    
 +    /* Stop the time */
 +    runtime_end(runtime);
 +    
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_finish(cr);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
 +    }
 +    
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +    
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog,repl_ex);
 +    }
 +    
 +    runtime->nsteps_done = step_rel;
 +    
 +    return 0;
 +}
index 71fae7c7375ae6d469346ea1c501d6e2b7a246e8,0000000000000000000000000000000000000000..564ada37b1a59b34b7d80f2f3dcbba4202c097b1
mode 100644,000000..100644
--- /dev/null
@@@ -1,579 -1,0 +1,578 @@@
-     t_mdebin *mdebin=NULL;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2010, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <signal.h>
 +#include <stdlib.h>
 +
 +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
 +/* _isnan() */
 +#include <float.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "dihre.h"
 +#include "pppm.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "genborn.h"
 +#include "string2.h"
 +#include "copyrite.h"
 +#include "membed.h"
 +
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +/* include even when OpenMM not used to force compilation of do_md_openmm */
 +#include "openmm_wrapper.h"
 +
 +double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +                    const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                    int nstglobalcomm,
 +                    gmx_vsite_t *vsite,gmx_constr_t constr,
 +                    int stepout,t_inputrec *ir,
 +                    gmx_mtop_t *top_global,
 +                    t_fcdata *fcd,
 +                    t_state *state_global,
 +                    t_mdatoms *mdatoms,
 +                    t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                    gmx_edsam_t ed,t_forcerec *fr,
 +                    int repl_ex_nst,int repl_ex_seed,
 +                    gmx_membed_t *membed,
 +                    real cpt_period,real max_hours,
 +                    const char *deviceOptions,
 +                    unsigned long Flags,
 +                    gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
 +    double     t,t0,lam0;
 +    gmx_bool       bSimAnn,
 +    bFirstStep,bStateFromTPX,bLastStep,bStartingFromCpt;
 +    gmx_bool       bInitStep=TRUE;
 +    gmx_bool       do_ene,do_log, do_verbose,
 +    bX,bV,bF,bCPT;
 +    tensor     force_vir,shake_vir,total_vir,pres;
 +    int        i,m;
 +    int        mdof_flags;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    int        nchkpt=1;
 +    gmx_localtop_t *top;
-         fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
++    t_mdebin *mdebin;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
 +
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_bool        bAppend;
 +    int         a0,a1;
 +    matrix      lastbox;
 +    real        reset_counters=0,reset_counters_now=0;
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; 
 +
 +    const char *ommOptions = NULL;
 +    void   *openmmData;
 +
 +#ifdef GMX_DOUBLE
 +    /* Checks in cmake should prevent the compilation in double precision
 +     * with OpenMM, but just to be sure we check here.
 +     */
 +    gmx_fatal(FARGS,"Compilation was performed in double precision, but OpenMM only supports single precision. If you want to use to OpenMM, compile in single precision.");
 +#endif
 +
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
 +    snew(f,top_global->natoms);
 +
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +    a0 = 0;
 +    a1 = top_global->natoms;
 +
 +    state = partdec_init_local_state(cr,state_global);
 +    f_global = f;
 +
 +    atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +    if (vsite)
 +    {
 +        set_vsite_top(vsite,top,mdatoms,cr);
 +    }
 +
 +    if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +    {
 +        graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +    }
 +
 +    update_mdatoms(mdatoms,state->lambda);
 +
 +    if (deviceOptions[0]=='\0')
 +    {
 +        /* empty options, which should default to OpenMM in this build */
 +        ommOptions=deviceOptions;
 +    }
 +    else
 +    {
 +        if (gmx_strncasecmp(deviceOptions,"OpenMM",6)!=0)
 +        {
 +            gmx_fatal(FARGS, "This Gromacs version currently only works with OpenMM. Use -device \"OpenMM:<options>\"");
 +        }
 +        else
 +        {
 +            ommOptions=strchr(deviceOptions,':');
 +            if (NULL!=ommOptions)
 +            {
 +                /* Increase the pointer to skip the colon */
 +                ommOptions++;
 +            }
 +        }
 +    }
 +
 +    openmmData = openmm_init(fplog, ommOptions, ir, top_global, top, mdatoms, fr, state);
 +    please_cite(fplog,"Friedrichs2009");
 +
 +    if (MASTER(cr))
 +    {
 +        /* Update mdebin with energy history if appending to output files */
 +        if ( Flags & MD_APPENDFILES )
 +        {
 +            restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +        }
 +        /* Set the initial energy history in state to zero by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    }
 +
 +    if (constr)
 +    {
 +        set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    if (!ir->bContinuation)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for (m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +
 +    if (MASTER(cr))
 +    {
 +        char tbuf[20];
-                 gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?");
 +        fprintf(stderr,"starting mdrun '%s'\n",
 +                *(top_global->name));
 +        if (ir->nsteps >= 0)
 +        {
 +            sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +        }
 +        else
 +        {
 +            sprintf(tbuf,"%s","infinite");
 +        }
 +        if (ir->init_step > 0)
 +        {
 +            fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                    gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                    gmx_step_str(ir->init_step,sbuf2),
 +                    ir->init_step*ir->delta_t);
 +        }
 +        else
 +        {
 +            fprintf(stderr,"%s steps, %s ps.\n",
 +                    gmx_step_str(ir->nsteps,sbuf),tbuf);
 +        }
 +    }
 +
 +    fprintf(fplog,"\n");
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
 +        fprintf(fplog,"\n");
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps
 +     *
 +     ************************************************************/
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
 +    bInitStep = bFirstStep && bStateFromTPX;
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    while (!bLastStep)
 +    {
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        bLastStep = (step_rel == ir->nsteps);
 +        t = t0 + step*ir->delta_t;
 +
 +        if (gs.set[eglsSTOPCOND] != 0)
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                     (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (MASTER(cr) && do_log)
 +        {
 +            print_ebin_header(fplog,step,t,state->lambda);
 +        }
 +
 +        clear_mat(force_vir);
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either when we signalled through gs (in OpenMM NS works different),
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step.
 +         */
 +        bCPT = ((gs.set[eglsCHKPT] ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step );
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Now we have the energies and forces corresponding to the
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout))
 +        {
 +            mdof_flags |= MDOF_X;
 +        }
 +        if (do_per_step(step,ir->nstvout))
 +        {
 +            mdof_flags |= MDOF_V;
 +        }
 +        if (do_per_step(step,ir->nstfout))
 +        {
 +            mdof_flags |= MDOF_F;
 +        }
 +        if (do_per_step(step,ir->nstxtcout))
 +        {
 +            mdof_flags |= MDOF_XTC;
 +        }
 +        if (bCPT)
 +        {
 +            mdof_flags |= MDOF_CPT;
 +        };
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (mdof_flags != 0 || do_ene || do_log)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            bF = (mdof_flags & MDOF_F);
 +            bX = (mdof_flags & (MDOF_X | MDOF_XTC | MDOF_CPT));
 +            bV = (mdof_flags & (MDOF_V | MDOF_CPT));
 +
 +            openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene);
 +
 +            upd_mdebin(mdebin,FALSE,TRUE,
 +                       t,mdatoms->tmass,enerd,state,lastbox,
 +                       shake_vir,force_vir,total_vir,pres,
 +                       ekind,mu_tot,constr);
 +            print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,do_log?fplog:NULL,
 +                       step,t,
 +                       eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                    (Flags & MD_CONFOUT) && MASTER(cr))
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +           /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            /* NOTE: this only works for serial code. For code that allows
 +               MPI nodes to propagate their condition, see kernel/md.c*/
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.set[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.set[eglsSTOPCOND]=1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.set[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        /* checkpoints */
 +        if (MASTER(cr) && (cpt_period >= 0 &&
 +                           (cpt_period == 0 ||
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +                gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.set[eglsCHKPT] = 1;
 +        }
 +
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep)
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +
 +        if (do_per_step(step,ir->nstlog))
 +        {
 +            if (fflush(fplog) != 0)
 +            {
++                gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +            }
 +        }
 +
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
 +        {
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +        step++;
 +        step_rel++;
 +
 +        openmm_take_one_step(openmmData);
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +
 +    /* Stop the time */
 +    runtime_end(runtime);
 +
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    openmm_cleanup(fplog, openmmData);
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    runtime->nsteps_done = step_rel;
 +
 +    return 0;
 +}
index a13938379b05f8bc454e6f2e1fda3cc17a3b065f,0000000000000000000000000000000000000000..552865e7427e3e5c2ae1c0a570b7bb6b832e54db
mode 100644,000000..100644
--- /dev/null
@@@ -1,702 -1,0 +1,702 @@@
-       "Attempt replica exchange every # steps" },
 +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "copyrite.h"
 +#include "main.h"
 +#include "statutil.h"
 +#include "smalloc.h"
 +#include "futil.h"
 +#include "smalloc.h"
 +#include "edsam.h"
 +#include "mdrun.h"
 +#include "xmdrun.h"
 +#include "checkpoint.h"
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +/* afm stuf */
 +#include "pull.h"
 +
 +int main(int argc,char *argv[])
 +{
 +  const char *desc[] = {
 + #ifdef GMX_OPENMM
 +    "This is an experimental release of GROMACS for accelerated",
 +      "Molecular Dynamics simulations on GPU processors. Support is provided",
 +      "by the OpenMM library (https://simtk.org/home/openmm).[PAR]",
 +      "*Warning*[BR]",
 +      "This release is targeted at developers and advanced users and",
 +      "care should be taken before production use. The following should be",
 +      "noted before using the program:[PAR]",
 +      " * The current release runs only on modern nVidia GPU hardware with CUDA support.",
 +      "Make sure that the necessary CUDA drivers and libraries for your operating system",
 +      "are already installed. The CUDA SDK also should be installed in order to compile",
 +      "the program from source (http://www.nvidia.com/object/cuda_home.html).[PAR]",
 +      " * Multiple GPU cards are not supported.[PAR]",
 +      " * Only a small subset of the GROMACS features and options are supported on the GPUs.",
 +      "See below for a detailed list.[PAR]",
 +      " * Consumer level GPU cards are known to often have problems with faulty memory.",
 +      "It is recommended that a full memory check of the cards is done at least once",
 +      "(for example, using the memtest=full option).",
 +      "A partial memory check (for example, memtest=15) before and",
 +      "after the simulation run would help spot",
 +      "problems resulting from processor overheating.[PAR]",
 +      " * The maximum size of the simulated systems depends on the available",
 +      "GPU memory,for example, a GTX280 with 1GB memory has been tested with systems",
 +      "of up to about 100,000 atoms.[PAR]",
 +      " * In order to take a full advantage of the GPU platform features, many algorithms",
 +      "have been implemented in a very different way than they are on the CPUs.",
 +      "Therefore numercal correspondence between properties of the state of",
 +      "simulated systems should not be expected. Moreover, the values will likely vary",
 +      "when simulations are done on different GPU hardware.[PAR]",
 +      " * Frequent retrieval of system state information such as",
 +      "trajectory coordinates and energies can greatly influence the performance",
 +      "of the program due to slow CPU<->GPU memory transfer speed.[PAR]",
 +      " * MD algorithms are complex, and although the Gromacs code is highly tuned for them,",
 +      "they often do not translate very well onto the streaming architetures.",
 +      "Realistic expectations about the achievable speed-up from test with GTX280:",
 +      "For small protein systems in implicit solvent using all-vs-all kernels the acceleration",
 +      "can be as high as 20 times, but in most other setups involving cutoffs and PME the",
 +      "acceleration is usually only ~4 times relative to a 3GHz CPU.[PAR]",
 +      "Supported features:[PAR]",
 +      " * Integrators: md/md-vv/md-vv-avek, sd/sd1 and bd.\n",
 +      " * Long-range interactions (option coulombtype): Reaction-Field, Ewald, PME, and cut-off (for Implicit Solvent only)\n",
 +      " * Temperature control: Supported only with the md/md-vv/md-vv-avek, sd/sd1 and bd integrators.\n",
 +      " * Pressure control: Supported.\n",
 +      " * Implicit solvent: Supported.\n",
 +      "A detailed description can be found on the GROMACS website:\n",
 +      "http://www.gromacs.org/gpu[PAR]",
 +/* From the original mdrun documentaion */
 +    "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
 +    "and distributes the topology over nodes if needed.",
 +    "[TT]mdrun[tt] produces at least four output files.",
 +    "A single log file ([TT]-g[tt]) is written, unless the option",
 +    "[TT]-seppot[tt] is used, in which case each node writes a log file.",
 +    "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
 +    "optionally forces.",
 +    "The structure file ([TT]-c[tt]) contains the coordinates and",
 +    "velocities of the last step.",
 +    "The energy file ([TT]-e[tt]) contains energies, the temperature,",
 +    "pressure, etc, a lot of these things are also printed in the log file.",
 +    "Optionally coordinates can be written to a compressed trajectory file",
 +    "([TT]-x[tt]).[PAR]",
 +/* openmm specific information */
 +      "Usage with OpenMM:[BR]",
 +      "[TT]mdrun -device \"OpenMM:platform=Cuda,memtest=15,deviceid=0,force-device=no\"[tt][PAR]",
 +      "Options:[PAR]",
 +      "      [TT]platform[tt] = Cuda\t\t:\tThe only available value. OpenCL support will be available in future.\n",
 +      "      [TT]memtest[tt] = 15\t\t:\tRun a partial, random GPU memory test for the given amount of seconds. A full test",
 +      "(recommended!) can be run with \"memtest=full\". Memory testing can be disabled with \"memtest=off\".\n",
 +      "      [TT]deviceid[tt] = 0\t\t:\tSpecify the target device when multiple cards are present.",
 +      "Only one card can be used at any given time though.\n",
 +      "      [TT]force-device[tt] = no\t\t:\tIf set to \"yes\" [TT]mdrun[tt]  will be forced to execute on",
 +      "hardware that is not officially supported. GPU acceleration can also be achieved on older",
 +      "but Cuda capable cards, although the simulation might be too slow, and the memory limits too strict.",
 +#else
 +    "The [TT]mdrun[tt] program is the main computational chemistry engine",
 +    "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
 +    "but it can also perform Stochastic Dynamics, Energy Minimization,",
 +    "test particle insertion or (re)calculation of energies.",
 +    "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
 +    "builds a Hessian matrix from single conformation.",
 +    "For usual Normal Modes-like calculations, make sure that",
 +    "the structure provided is properly energy-minimized.",
 +    "The generated matrix can be diagonalized by [TT]g_nmeig[tt].[PAR]",
 +    "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
 +    "and distributes the topology over nodes if needed.",
 +    "[TT]mdrun[tt] produces at least four output files.",
 +    "A single log file ([TT]-g[tt]) is written, unless the option",
 +    "[TT]-seppot[tt] is used, in which case each node writes a log file.",
 +    "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
 +    "optionally forces.",
 +    "The structure file ([TT]-c[tt]) contains the coordinates and",
 +    "velocities of the last step.",
 +    "The energy file ([TT]-e[tt]) contains energies, the temperature,",
 +    "pressure, etc, a lot of these things are also printed in the log file.",
 +    "Optionally coordinates can be written to a compressed trajectory file",
 +    "([TT]-x[tt]).[PAR]",
 +    "The option [TT]-dhdl[tt] is only used when free energy calculation is",
 +    "turned on.[PAR]",
 +    "When [TT]mdrun[tt] is started using MPI with more than 1 node, parallelization",
 +    "is used. By default domain decomposition is used, unless the [TT]-pd[tt]",
 +    "option is set, which selects particle decomposition.[PAR]",
 +    "With domain decomposition, the spatial decomposition can be set",
 +    "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.",
 +    "The user only needs to change this when the system is very inhomogeneous.",
 +    "Dynamic load balancing is set with the option [TT]-dlb[tt],",
 +    "which can give a significant performance improvement,",
 +    "especially for inhomogeneous systems. The only disadvantage of",
 +    "dynamic load balancing is that runs are no longer binary reproducible,",
 +    "but in most cases this is not important.",
 +    "By default the dynamic load balancing is automatically turned on",
 +    "when the measured performance loss due to load imbalance is 5% or more.",
 +    "At low parallelization these are the only important options",
 +    "for domain decomposition.",
 +    "At high parallelization the options in the next two sections",
 +    "could be important for increasing the performace.",
 +    "[PAR]",
 +    "When PME is used with domain decomposition, separate nodes can",
 +    "be assigned to do only the PME mesh calculation;",
 +    "this is computationally more efficient starting at about 12 nodes.",
 +    "The number of PME nodes is set with option [TT]-npme[tt],",
 +    "this can not be more than half of the nodes.",
 +    "By default [TT]mdrun[tt] makes a guess for the number of PME",
 +    "nodes when the number of nodes is larger than 11 or performance wise",
 +    "not compatible with the PME grid x dimension.",
 +    "But the user should optimize npme. Performance statistics on this issue",
 +    "are written at the end of the log file.",
 +    "For good load balancing at high parallelization, the PME grid x and y",
 +    "dimensions should be divisible by the number of PME nodes",
 +    "(the simulation will run correctly also when this is not the case).",
 +    "[PAR]",
 +    "This section lists all options that affect the domain decomposition.",
 +    "[PAR]",
 +    "Option [TT]-rdd[tt] can be used to set the required maximum distance",
 +    "for inter charge-group bonded interactions.",
 +    "Communication for two-body bonded interactions below the non-bonded",
 +    "cut-off distance always comes for free with the non-bonded communication.",
 +    "Atoms beyond the non-bonded cut-off are only communicated when they have",
 +    "missing bonded interactions; this means that the extra cost is minor",
 +    "and nearly indepedent of the value of [TT]-rdd[tt].",
 +    "With dynamic load balancing option [TT]-rdd[tt] also sets",
 +    "the lower limit for the domain decomposition cell sizes.",
 +    "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on",
 +    "the initial coordinates. The chosen value will be a balance",
 +    "between interaction range and communication cost.",
 +    "[PAR]",
 +    "When inter charge-group bonded interactions are beyond",
 +    "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.",
 +    "For pair interactions and tabulated bonds",
 +    "that do not generate exclusions, this check can be turned off",
 +    "with the option [TT]-noddcheck[tt].",
 +    "[PAR]",
 +    "When constraints are present, option [TT]-rcon[tt] influences",
 +    "the cell size limit as well.",
 +    "Atoms connected by NC constraints, where NC is the LINCS order plus 1,",
 +    "should not be beyond the smallest cell size. A error message is",
 +    "generated when this happens and the user should change the decomposition",
 +    "or decrease the LINCS order and increase the number of LINCS iterations.",
 +    "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS",
 +    "in a conservative fashion. For high parallelization it can be useful",
 +    "to set the distance required for P-LINCS with the option [TT]-rcon[tt].",
 +    "[PAR]",
 +    "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling",
 +    "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that",
 +    "the cells can scale down by at least this factor. This option is used",
 +    "for the automated spatial decomposition (when not using [TT]-dd[tt])",
 +    "as well as for determining the number of grid pulses, which in turn",
 +    "sets the minimum allowed cell size. Under certain circumstances",
 +    "the value of [TT]-dds[tt] might need to be adjusted to account for",
 +    "high or low spatial inhomogeneity of the system.",
 +    "[PAR]",
 +    "The option [TT]-gcom[tt] can be used to only do global communication",
 +    "every n steps.",
 +    "This can improve performance for highly parallel simulations",
 +    "where this global communication step becomes the bottleneck.",
 +    "For a global thermostat and/or barostat the temperature",
 +    "and/or pressure will also only be updated every [TT]-gcom[tt] steps.",
 +    "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]",
 +    "With [TT]-rerun[tt] an input trajectory can be given for which ",
 +    "forces and energies will be (re)calculated. Neighbor searching will be",
 +    "performed for every frame, unless [TT]nstlist[tt] is zero",
 +    "(see the [TT].mdp[tt] file).[PAR]",
 +    "ED (essential dynamics) sampling is switched on by using the [TT]-ei[tt]",
 +    "flag followed by an [TT].edi[tt] file.",
 +    "The [TT].edi[tt] file can be produced using options in the essdyn",
 +    "menu of the WHAT IF program. [TT]mdrun[tt] produces a [TT].edo[tt] file that",
 +    "contains projections of positions, velocities and forces onto selected",
 +    "eigenvectors.[PAR]",
 +    "When user-defined potential functions have been selected in the",
 +    "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
 +    "a formatted table with potential functions. The file is read from",
 +    "either the current directory or from the [TT]GMXLIB[tt] directory.",
 +    "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
 +    "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
 +    "normal Coulomb.",
 +    "When pair interactions are present, a separate table for pair interaction",
 +    "functions is read using the [TT]-tablep[tt] option.[PAR]",
 +    "When tabulated bonded functions are present in the topology,",
 +    "interaction functions are read using the [TT]-tableb[tt] option.",
 +    "For each different tabulated interaction type the table file name is",
 +    "modified in a different way: before the file extension an underscore is",
 +    "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals",
 +    "and finally the table number of the interaction type.[PAR]",
 +    "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
 +    "coordinates and forces when pulling is selected",
 +    "in the [TT].mdp[tt] file.[PAR]",
 +    "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ",
 +    "simulated in parallel.",
 +    "As many input files/directories are required as the number of systems. ",
 +    "The [TT]-multidir[tt] option takes a list of directories (one for each ",
 +    "system) and runs in each of them, using the input/output file names, ",
 +    "such as specified by e.g. the [TT]-s[tt] option, relative to these ",
 +    "directories.",
 +    "With [TT]-multi[tt], the system number is appended to the run input ",
 +    "and each output filename, for instance [TT]topol.tpr[tt] becomes",
 +    "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.",
 +    "The number of nodes per system is the total number of nodes",
 +    "divided by the number of systems.",
 +    "One use of this option is for NMR refinement: when distance",
 +    "or orientation restraints are present these can be ensemble averaged",
 +    "over all the systems.[PAR]",
 +    "With [TT]-replex[tt] replica exchange is attempted every given number",
 +    "of steps. The number of replicas is set with the [TT]-multi[tt] or ",
 +    "[TT]-multidir[tt] option, described above.",
 +    "All run input files should use a different coupling temperature,",
 +    "the order of the files is not important. The random seed is set with",
 +    "[TT]-reseed[tt]. The velocities are scaled and neighbor searching",
 +    "is performed after every exchange.[PAR]",
 +    "Finally some experimental algorithms can be tested when the",
 +    "appropriate options have been given. Currently under",
 +    "investigation are: polarizability and X-ray bombardments.",
 +    "[PAR]",
 +    "The option [TT]-membed[dd] does what used to be g_membed, i.e. embed",
 +    "a protein into a membrane. The data file should contain the options",
 +    "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]",
 +    "both apply to this as well.",
 +    "[PAR]",
 +    "The option [TT]-pforce[tt] is useful when you suspect a simulation",
 +    "crashes due to too large forces. With this option coordinates and",
 +    "forces of atoms with a force larger than a certain value will",
 +    "be printed to stderr.",
 +    "[PAR]",
 +    "Checkpoints containing the complete state of the system are written",
 +    "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
 +    "unless option [TT]-cpt[tt] is set to -1.",
 +    "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
 +    "make sure that a recent state of the system is always available,",
 +    "even when the simulation is terminated while writing a checkpoint.",
 +    "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
 +    "with the step number.",
 +    "A simulation can be continued by reading the full state from file",
 +    "with option [TT]-cpi[tt]. This option is intelligent in the way that",
 +    "if no checkpoint file is found, Gromacs just assumes a normal run and",
 +    "starts from the first step of the [TT].tpr[tt] file. By default the output",
 +    "will be appending to the existing output files. The checkpoint file",
 +    "contains checksums of all output files, such that you will never",
 +    "loose data when some output files are modified, corrupt or removed.",
 +    "There are three scenarios with [TT]-cpi[tt]:[PAR]",
 +    "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
 +    "[TT]*[tt] all files are present with names and checksums matching those stored",
 +    "in the checkpoint file: files are appended[PAR]",
 +    "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
 +    "With [TT]-noappend[tt] new output files are opened and the simulation",
 +    "part number is added to all output file names.",
 +    "Note that in all cases the checkpoint file itself is not renamed",
 +    "and will be overwritten, unless its name does not match",
 +    "the [TT]-cpo[tt] option.",
 +    "[PAR]",
 +    "With checkpointing the output is appended to previously written",
 +    "output files, unless [TT]-noappend[tt] is used or none of the previous",
 +    "output files are present (except for the checkpoint file).",
 +    "The integrity of the files to be appended is verified using checksums",
 +    "which are stored in the checkpoint file. This ensures that output can",
 +    "not be mixed up or corrupted due to file appending. When only some",
 +    "of the previous output files are present, a fatal error is generated",
 +    "and no old output files are modified and no new output files are opened.",
 +    "The result with appending will be the same as from a single run.",
 +    "The contents will be binary identical, unless you use a different number",
 +    "of nodes or dynamic load balancing or the FFT library uses optimizations",
 +    "through timing.",
 +    "[PAR]",
 +    "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
 +    "file is written at the first neighbor search step where the run time",
 +    "exceeds [TT]-maxh[tt]*0.99 hours.",
 +    "[PAR]",
 +    "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
 +    "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
 +    "pressed), it will stop after the next neighbor search step ",
 +    "(with nstlist=0 at the next step).",
 +    "In both cases all the usual output will be written to file.",
 +    "When running with MPI, a signal to one of the [TT]mdrun[tt] processes",
 +    "is sufficient, this signal should not be sent to mpirun or",
 +    "the [TT]mdrun[tt] process that is the parent of the others.",
 +    "[PAR]",
 +    "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
 +#endif
 +  };
 +  t_commrec    *cr;
 +  t_filenm fnm[] = {
 +    { efTPX, NULL,      NULL,       ffREAD },
 +    { efTRN, "-o",      NULL,       ffWRITE },
 +    { efXTC, "-x",      NULL,       ffOPTWR },
 +    { efCPT, "-cpi",    NULL,       ffOPTRD },
 +    { efCPT, "-cpo",    NULL,       ffOPTWR },
 +    { efSTO, "-c",      "confout",  ffWRITE },
 +    { efEDR, "-e",      "ener",     ffWRITE },
 +    { efLOG, "-g",      "md",       ffWRITE },
 +    { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
 +    { efXVG, "-field",  "field",    ffOPTWR },
 +    { efXVG, "-table",  "table",    ffOPTRD },
 +    { efXVG, "-tabletf", "tabletf",    ffOPTRD },
 +    { efXVG, "-tablep", "tablep",   ffOPTRD },
 +    { efXVG, "-tableb", "table",    ffOPTRD },
 +    { efTRX, "-rerun",  "rerun",    ffOPTRD },
 +    { efXVG, "-tpi",    "tpi",      ffOPTWR },
 +    { efXVG, "-tpid",   "tpidist",  ffOPTWR },
 +    { efEDI, "-ei",     "sam",      ffOPTRD },
 +    { efEDO, "-eo",     "sam",      ffOPTWR },
 +    { efGCT, "-j",      "wham",     ffOPTRD },
 +    { efGCT, "-jo",     "bam",      ffOPTWR },
 +    { efXVG, "-ffout",  "gct",      ffOPTWR },
 +    { efXVG, "-devout", "deviatie", ffOPTWR },
 +    { efXVG, "-runav",  "runaver",  ffOPTWR },
 +    { efXVG, "-px",     "pullx",    ffOPTWR },
 +    { efXVG, "-pf",     "pullf",    ffOPTWR },
 +    { efXVG, "-ro",     "rotation", ffOPTWR },
 +    { efLOG, "-ra",     "rotangles",ffOPTWR },
 +    { efLOG, "-rs",     "rotslabs", ffOPTWR },
 +    { efLOG, "-rt",     "rottorque",ffOPTWR },
 +    { efMTX, "-mtx",    "nm",       ffOPTWR },
 +    { efNDX, "-dn",     "dipole",   ffOPTWR },
 +    { efDAT, "-membed", "membed",   ffOPTRD },
 +    { efTOP, "-mp",     "membed",   ffOPTRD },
 +    { efNDX, "-mn",     "membed",   ffOPTRD },
 +    { efRND, "-multidir",NULL,      ffOPTRDMULT}
 +  };
 +#define NFILE asize(fnm)
 +
 +  /* Command line options ! */
 +  gmx_bool bCart        = FALSE;
 +  gmx_bool bPPPME       = FALSE;
 +  gmx_bool bPartDec     = FALSE;
 +  gmx_bool bDDBondCheck = TRUE;
 +  gmx_bool bDDBondComm  = TRUE;
 +  gmx_bool bVerbose     = FALSE;
 +  gmx_bool bCompact     = TRUE;
 +  gmx_bool bSepPot      = FALSE;
 +  gmx_bool bRerunVSite  = FALSE;
 +  gmx_bool bIonize      = FALSE;
 +  gmx_bool bConfout     = TRUE;
 +  gmx_bool bReproducible = FALSE;
 +    
 +  int  npme=-1;
 +  int  nmultisim=0;
 +  int  nstglobalcomm=-1;
 +  int  repl_ex_nst=0;
 +  int  repl_ex_seed=-1;
 +  int  nstepout=100;
 +  int  nthreads=0; /* set to determine # of threads automatically */
 +  int  resetstep=-1;
 +  
 +  rvec realddxyz={0,0,0};
 +  const char *ddno_opt[ddnoNR+1] =
 +    { NULL, "interleave", "pp_pme", "cartesian", NULL };
 +    const char *dddlb_opt[] =
 +    { NULL, "auto", "no", "yes", NULL };
 +  real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
 +  char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
 +  real cpt_period=15.0,max_hours=-1;
 +  gmx_bool bAppendFiles=TRUE;
 +  gmx_bool bKeepAndNumCPT=FALSE;
 +  gmx_bool bResetCountersHalfWay=FALSE;
 +  output_env_t oenv=NULL;
 +  const char *deviceOptions = "";
 +
 +  t_pargs pa[] = {
 +
 +    { "-pd",      FALSE, etBOOL,{&bPartDec},
 +      "Use particle decompostion" },
 +    { "-dd",      FALSE, etRVEC,{&realddxyz},
 +      "Domain decomposition grid, 0 is optimize" },
 +#ifdef GMX_THREAD_MPI
 +    { "-nt",      FALSE, etINT, {&nthreads},
 +      "Number of threads to start (0 is guess)" },
 +#endif
 +    { "-npme",    FALSE, etINT, {&npme},
 +      "Number of separate nodes to be used for PME, -1 is guess" },
 +    { "-ddorder", FALSE, etENUM, {ddno_opt},
 +      "DD node order" },
 +    { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
 +      "Check for all bonded interactions with DD" },
 +    { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
 +      "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
 +    { "-rdd",     FALSE, etREAL, {&rdd},
 +      "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
 +    { "-rcon",    FALSE, etREAL, {&rconstr},
 +      "Maximum distance for P-LINCS (nm), 0 is estimate" },
 +    { "-dlb",     FALSE, etENUM, {dddlb_opt},
 +      "Dynamic load balancing (with DD)" },
 +    { "-dds",     FALSE, etREAL, {&dlb_scale},
 +      "Minimum allowed dlb scaling of the DD cell size" },
 +    { "-ddcsx",   FALSE, etSTR, {&ddcsx},
 +      "HIDDENThe DD cell sizes in x" },
 +    { "-ddcsy",   FALSE, etSTR, {&ddcsy},
 +      "HIDDENThe DD cell sizes in y" },
 +    { "-ddcsz",   FALSE, etSTR, {&ddcsz},
 +      "HIDDENThe DD cell sizes in z" },
 +    { "-gcom",    FALSE, etINT,{&nstglobalcomm},
 +      "Global communication frequency" },
 +    { "-v",       FALSE, etBOOL,{&bVerbose},  
 +      "Be loud and noisy" },
 +    { "-compact", FALSE, etBOOL,{&bCompact},  
 +      "Write a compact log file" },
 +    { "-seppot",  FALSE, etBOOL, {&bSepPot},
 +      "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
 +    { "-pforce",  FALSE, etREAL, {&pforce},
 +      "Print all forces larger than this (kJ/mol nm)" },
 +    { "-reprod",  FALSE, etBOOL,{&bReproducible},  
 +      "Try to avoid optimizations that affect binary reproducibility" },
 +    { "-cpt",     FALSE, etREAL, {&cpt_period},
 +      "Checkpoint interval (minutes)" },
 +    { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
 +      "Keep and number checkpoint files" },
 +    { "-append",  FALSE, etBOOL, {&bAppendFiles},
 +      "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
 +    { "-maxh",   FALSE, etREAL, {&max_hours},
 +      "Terminate after 0.99 times this time (hours)" },
 +    { "-multi",   FALSE, etINT,{&nmultisim}, 
 +      "Do multiple simulations in parallel" },
 +    { "-replex",  FALSE, etINT, {&repl_ex_nst}, 
++      "Attempt replica exchange periodically with this period (steps)" },
 +    { "-reseed",  FALSE, etINT, {&repl_ex_seed}, 
 +      "Seed for replica exchange, -1 is generate a seed" },
 +    { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
 +      "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
 +    { "-ionize",  FALSE, etBOOL,{&bIonize},
 +      "Do a simulation including the effect of an X-Ray bombardment on your system" },
 +    { "-confout", FALSE, etBOOL, {&bConfout},
 +      "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
 +    { "-stepout", FALSE, etINT, {&nstepout},
 +      "HIDDENFrequency of writing the remaining runtime" },
 +    { "-resetstep", FALSE, etINT, {&resetstep},
 +      "HIDDENReset cycle counters after these many time steps" },
 +    { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
 +      "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
 +#ifdef GMX_OPENMM
 +    ,
 +    { "-device",  FALSE, etSTR, {&deviceOptions},
 +      "Device option string" }
 +#endif
 +  };
 +  gmx_edsam_t  ed;
 +  unsigned long Flags, PCA_Flags;
 +  ivec     ddxyz;
 +  int      dd_node_order;
 +  gmx_bool     bAddPart;
 +  FILE     *fplog,*fptest;
 +  int      sim_part,sim_part_fn;
 +  const char *part_suffix=".part";
 +  char     suffix[STRLEN];
 +  int      rc;
 +  char **multidir=NULL;
 +
 +
 +  cr = init_par(&argc,&argv);
 +
 +  if (MASTER(cr))
 +    CopyRight(stderr, argv[0]);
 +
 +  PCA_Flags = (PCA_KEEP_ARGS | PCA_NOEXIT_ON_ARGS | PCA_CAN_SET_DEFFNM
 +             | (MASTER(cr) ? 0 : PCA_QUIET));
 +  
 +
 +  /* Comment this in to do fexist calls only on master
 +   * works not with rerun or tables at the moment
 +   * also comment out the version of init_forcerec in md.c 
 +   * with NULL instead of opt2fn
 +   */
 +  /*
 +     if (!MASTER(cr))
 +     {
 +     PCA_Flags |= PCA_NOT_READ_NODE;
 +     }
 +     */
 +
 +  parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
 +                    asize(desc),desc,0,NULL, &oenv);
 +
 +
 +
 +  /* we set these early because they might be used in init_multisystem() 
 +     Note that there is the potential for npme>nnodes until the number of
 +     threads is set later on, if there's thread parallelization. That shouldn't
 +     lead to problems. */ 
 +  dd_node_order = nenum(ddno_opt);
 +  cr->npmenodes = npme;
 +
 +#ifndef GMX_THREAD_MPI
 +  nthreads=1;
 +#endif
 +
 +  /* now check the -multi and -multidir option */
 +  if (opt2bSet("-multidir", NFILE, fnm))
 +  {
 +      int i;
 +      if (nmultisim > 0)
 +      {
 +          gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
 +      }
 +      nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
 +  }
 +
 +
 +  if (repl_ex_nst != 0 && nmultisim < 2)
 +      gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
 +
 +  if (nmultisim > 1) {
 +#ifndef GMX_THREAD_MPI
 +    gmx_bool bParFn = (multidir == NULL);
 +    init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
 +#else
 +    gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
 +#endif
 +  }
 +
 +  bAddPart = !bAppendFiles;
 +
 +  /* Check if there is ANY checkpoint file available */       
 +  sim_part    = 1;
 +  sim_part_fn = sim_part;
 +  if (opt2bSet("-cpi",NFILE,fnm))
 +  {
 +      if (bSepPot && bAppendFiles)
 +      {
 +          gmx_fatal(FARGS,"Output file appending is not supported with -seppot");
 +      }
 +
 +      bAppendFiles =
 +                read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
 +                                                              fnm,cr),
 +                                                &sim_part_fn,NULL,cr,
 +                                                bAppendFiles,NFILE,fnm,
 +                                                part_suffix,&bAddPart);
 +      if (sim_part_fn==0 && MASTER(cr))
 +      {
 +          fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
 +      }
 +      else
 +      {
 +          sim_part = sim_part_fn + 1;
 +      }
 +
 +      if (MULTISIM(cr) && MASTER(cr))
 +      {
 +          check_multi_int(stdout,cr->ms,sim_part,"simulation part");
 +      }
 +  } 
 +  else
 +  {
 +      bAppendFiles = FALSE;
 +  }
 +
 +  if (!bAppendFiles)
 +  {
 +      sim_part_fn = sim_part;
 +  }
 +
 +  if (bAddPart)
 +  {
 +      /* Rename all output files (except checkpoint files) */
 +      /* create new part name first (zero-filled) */
 +      sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
 +
 +      add_suffix_to_output_names(fnm,NFILE,suffix);
 +      if (MASTER(cr))
 +      {
 +          fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
 +      }
 +  }
 +
 +  Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
 +  Flags = Flags | (bSepPot       ? MD_SEPPOT       : 0);
 +  Flags = Flags | (bIonize       ? MD_IONIZE       : 0);
 +  Flags = Flags | (bPartDec      ? MD_PARTDEC      : 0);
 +  Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
 +  Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
 +  Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
 +  Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
 +  Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
 +  Flags = Flags | (bAppendFiles  ? MD_APPENDFILES  : 0); 
 +  Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); 
 +  Flags = Flags | (sim_part>1    ? MD_STARTFROMCPT : 0); 
 +  Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
 +
 +
 +  /* We postpone opening the log file if we are appending, so we can 
 +     first truncate the old log file and append to the correct position 
 +     there instead.  */
 +  if ((MASTER(cr) || bSepPot) && !bAppendFiles) 
 +  {
 +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
 +      CopyRight(fplog,argv[0]);
 +      please_cite(fplog,"Hess2008b");
 +      please_cite(fplog,"Spoel2005a");
 +      please_cite(fplog,"Lindahl2001a");
 +      please_cite(fplog,"Berendsen95a");
 +  }
 +  else if (!MASTER(cr) && bSepPot)
 +  {
 +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
 +  }
 +  else
 +  {
 +      fplog = NULL;
 +  }
 +
 +  ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
 +  ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
 +  ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
 +
 +  rc = mdrunner(nthreads, fplog,cr,NFILE,fnm,oenv,bVerbose,bCompact,
 +                nstglobalcomm, ddxyz,dd_node_order,rdd,rconstr,
 +                dddlb_opt[0],dlb_scale,ddcsx,ddcsy,ddcsz,
 +                nstepout,resetstep,nmultisim,repl_ex_nst,repl_ex_seed,
 +                pforce, cpt_period,max_hours,deviceOptions,Flags);
 +
 +  if (gmx_parallel_env_initialized())
 +      gmx_finalize();
 +
 +  if (MULTIMASTER(cr)) {
 +      thanx(stderr);
 +  }
 +
 +  /* Log file has to be closed in mdrunner if we are appending to it 
 +     (fplog not set here) */
 +  if (MASTER(cr) && !bAppendFiles) 
 +  {
 +      gmx_log_close(fplog);
 +  }
 +
 +  return rc;
 +}
 +
Simple merge
index 9fbeff689d6fd794cde4047ed2c8a8249ba55b2d,0000000000000000000000000000000000000000..8a3c710ee375e3b28816aa2cf4e63ba01031b119
mode 100644,000000..100644
--- /dev/null
@@@ -1,984 -1,0 +1,998 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#ifdef __linux
 +#define _GNU_SOURCE
 +#include <sched.h>
 +#include <sys/syscall.h>
 +#endif
 +#include <signal.h>
 +#include <stdlib.h>
 +
 +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
 +/* _isnan() */
 +#include <float.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "statutil.h"
 +#include "mdrun.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "names.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "dihre.h"
 +#include "pppm.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "pull_rotation.h"
 +#include "membed.h"
 +#include "macros.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#ifdef GMX_OPENMM
 +#include "md_openmm.h"
 +#endif
 +
 +#ifdef GMX_OPENMP
 +#include <omp.h>
 +#endif
 +
 +
 +typedef struct { 
 +    gmx_integrator_t *func;
 +} gmx_intp_t;
 +
 +/* The array should match the eI array in include/types/enums.h */
 +#ifdef GMX_OPENMM  /* FIXME do_md_openmm needs fixing */
 +const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
 +#else
 +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
 +#endif
 +
 +gmx_large_int_t     deform_init_init_step_tpx;
 +matrix              deform_init_box_tpx;
 +#ifdef GMX_THREAD_MPI
 +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +struct mdrunner_arglist
 +{
 +    FILE *fplog;
 +    t_commrec *cr;
 +    int nfile;
 +    const t_filenm *fnm;
 +    output_env_t oenv;
 +    gmx_bool bVerbose;
 +    gmx_bool bCompact;
 +    int nstglobalcomm;
 +    ivec ddxyz;
 +    int dd_node_order;
 +    real rdd;
 +    real rconstr;
 +    const char *dddlb_opt;
 +    real dlb_scale;
 +    const char *ddcsx;
 +    const char *ddcsy;
 +    const char *ddcsz;
 +    int nstepout;
 +    int resetstep;
 +    int nmultisim;
 +    int repl_ex_nst;
 +    int repl_ex_seed;
 +    real pforce;
 +    real cpt_period;
 +    real max_hours;
 +    const char *deviceOptions;
 +    unsigned long Flags;
 +    int ret; /* return value */
 +};
 +
 +
 +/* The function used for spawning threads. Extracts the mdrunner() 
 +   arguments from its one argument and calls mdrunner(), after making
 +   a commrec. */
 +static void mdrunner_start_fn(void *arg)
 +{
 +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
 +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
 +                                        that it's thread-local. This doesn't
 +                                        copy pointed-to items, of course,
 +                                        but those are all const. */
 +    t_commrec *cr;  /* we need a local version of this */
 +    FILE *fplog=NULL;
 +    t_filenm *fnm;
 +
 +    fnm = dup_tfn(mc.nfile, mc.fnm);
 +
 +    cr = init_par_threads(mc.cr);
 +
 +    if (MASTER(cr))
 +    {
 +        fplog=mc.fplog;
 +    }
 +
 +    mda->ret=mdrunner(cr->nnodes, fplog, cr, mc.nfile, fnm, mc.oenv, 
 +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
 +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
 +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
 +                      mc.ddcsx, mc.ddcsy, mc.ddcsz, mc.nstepout, mc.resetstep, 
 +                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_seed, mc.pforce, 
 +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
 +}
 +
 +/* called by mdrunner() to start a specific number of threads (including 
 +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
 +   for each thread. 
 +   All options besides nthreads are the same as for mdrunner(). */
 +static t_commrec *mdrunner_start_threads(int nthreads, 
 +              FILE *fplog,t_commrec *cr,int nfile, 
 +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +              gmx_bool bCompact, int nstglobalcomm,
 +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +              const char *dddlb_opt,real dlb_scale,
 +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +              int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
 +              int repl_ex_seed, real pforce,real cpt_period, real max_hours, 
 +              const char *deviceOptions, unsigned long Flags)
 +{
 +    int ret;
 +    struct mdrunner_arglist *mda;
 +    t_commrec *crn; /* the new commrec */
 +    t_filenm *fnmn;
 +
 +    /* first check whether we even need to start tMPI */
 +    if (nthreads<2)
 +        return cr;
 +
 +    /* a few small, one-time, almost unavoidable memory leaks: */
 +    snew(mda,1);
 +    fnmn=dup_tfn(nfile, fnm);
 +
 +    /* fill the data structure to pass as void pointer to thread start fn */
 +    mda->fplog=fplog;
 +    mda->cr=cr;
 +    mda->nfile=nfile;
 +    mda->fnm=fnmn;
 +    mda->oenv=oenv;
 +    mda->bVerbose=bVerbose;
 +    mda->bCompact=bCompact;
 +    mda->nstglobalcomm=nstglobalcomm;
 +    mda->ddxyz[XX]=ddxyz[XX];
 +    mda->ddxyz[YY]=ddxyz[YY];
 +    mda->ddxyz[ZZ]=ddxyz[ZZ];
 +    mda->dd_node_order=dd_node_order;
 +    mda->rdd=rdd;
 +    mda->rconstr=rconstr;
 +    mda->dddlb_opt=dddlb_opt;
 +    mda->dlb_scale=dlb_scale;
 +    mda->ddcsx=ddcsx;
 +    mda->ddcsy=ddcsy;
 +    mda->ddcsz=ddcsz;
 +    mda->nstepout=nstepout;
 +    mda->resetstep=resetstep;
 +    mda->nmultisim=nmultisim;
 +    mda->repl_ex_nst=repl_ex_nst;
 +    mda->repl_ex_seed=repl_ex_seed;
 +    mda->pforce=pforce;
 +    mda->cpt_period=cpt_period;
 +    mda->max_hours=max_hours;
 +    mda->deviceOptions=deviceOptions;
 +    mda->Flags=Flags;
 +
 +    fprintf(stderr, "Starting %d threads\n",nthreads);
 +    fflush(stderr);
 +    /* now spawn new threads that start mdrunner_start_fn(), while 
 +       the main thread returns */
 +    ret=tMPI_Init_fn(TRUE, nthreads, mdrunner_start_fn, (void*)(mda) );
 +    if (ret!=TMPI_SUCCESS)
 +        return NULL;
 +
 +    /* make a new comm_rec to reflect the new situation */
 +    crn=init_par_threads(cr);
 +    return crn;
 +}
 +
 +
 +/* Get the number of threads to use for thread-MPI based on how many
 + * were requested, which algorithms we're using,
 + * and how many particles there are.
 + */
 +static int get_nthreads_mpi(int nthreads_requested, t_inputrec *inputrec,
 +                            gmx_mtop_t *mtop)
 +{
 +    int nthreads,nthreads_new;
 +    int min_atoms_per_thread;
 +    char *env;
 +
 +    nthreads = nthreads_requested;
 +
 +    /* determine # of hardware threads. */
 +    if (nthreads_requested < 1)
 +    {
 +        if ((env = getenv("GMX_MAX_THREADS")) != NULL)
 +        {
 +            nthreads = 0;
 +            sscanf(env,"%d",&nthreads);
 +            if (nthreads < 1)
 +            {
 +                gmx_fatal(FARGS,"GMX_MAX_THREADS (%d) should be larger than 0",
 +                          nthreads);
 +            }
 +        }
 +        else
 +        {
 +            nthreads = tMPI_Thread_get_hw_number();
 +        }
 +    }
 +
 +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
 +    {
 +        /* Steps are divided over the nodes iso splitting the atoms */
 +        min_atoms_per_thread = 0;
 +    }
 +    else
 +    {
 +        min_atoms_per_thread = MIN_ATOMS_PER_THREAD;
 +    }
 +
 +    /* Check if an algorithm does not support parallel simulation.  */
 +    if (nthreads != 1 && 
 +        ( inputrec->eI == eiLBFGS ||
 +          inputrec->coulombtype == eelEWALD ) )
 +    {
 +        fprintf(stderr,"\nThe integration or electrostatics algorithm doesn't support parallel runs. Not starting any threads.\n");
 +        nthreads = 1;
 +    }
 +    else if (nthreads_requested < 1 &&
 +             mtop->natoms/nthreads < min_atoms_per_thread)
 +    {
 +        /* the thread number was chosen automatically, but there are too many
 +           threads (too few atoms per thread) */
 +        nthreads_new = max(1,mtop->natoms/min_atoms_per_thread);
 +
 +        if (nthreads_new > 8 || (nthreads == 8 && nthreads_new > 4))
 +        {
 +            /* Use only multiples of 4 above 8 threads
 +             * or with an 8-core processor
 +             * (to avoid 6 threads on 8 core processors with 4 real cores).
 +             */
 +            nthreads_new = (nthreads_new/4)*4;
 +        }
 +        else if (nthreads_new > 4)
 +        {
 +            /* Avoid 5 or 7 threads */
 +            nthreads_new = (nthreads_new/2)*2;
 +        }
 +
 +        nthreads = nthreads_new;
 +
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
 +        fprintf(stderr,"      only starting %d threads.\n",nthreads);
 +        fprintf(stderr,"      You can use the -nt option to optimize the number of threads.\n\n");
 +    }
 +    return nthreads;
 +}
 +#endif
 +
 +
 +int mdrunner(int nthreads_requested, FILE *fplog,t_commrec *cr,int nfile,
 +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +             gmx_bool bCompact, int nstglobalcomm,
 +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +             const char *dddlb_opt,real dlb_scale,
 +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +             int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
 +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
 +             const char *deviceOptions, unsigned long Flags)
 +{
 +    double     nodetime=0,realtime;
 +    t_inputrec *inputrec;
 +    t_state    *state=NULL;
 +    matrix     box;
 +    gmx_ddbox_t ddbox={0};
 +    int        npme_major,npme_minor;
 +    real       tmpr1,tmpr2;
 +    t_nrnb     *nrnb;
 +    gmx_mtop_t *mtop=NULL;
 +    t_mdatoms  *mdatoms=NULL;
 +    t_forcerec *fr=NULL;
 +    t_fcdata   *fcd=NULL;
 +    real       ewaldcoeff=0;
 +    gmx_pme_t  *pmedata=NULL;
 +    gmx_vsite_t *vsite=NULL;
 +    gmx_constr_t constr;
 +    int        i,m,nChargePerturbed=-1,status,nalloc;
 +    char       *gro;
 +    gmx_wallcycle_t wcycle;
 +    gmx_bool       bReadRNG,bReadEkin;
 +    int        list;
 +    gmx_runtime_t runtime;
 +    int        rc;
 +    gmx_large_int_t reset_counters;
 +    gmx_edsam_t ed=NULL;
 +    t_commrec   *cr_old=cr; 
 +    int         nthreads_mpi=1;
 +    int         nthreads_pme=1;
 +    gmx_membed_t *membed=NULL;
 +
 +    /* CAUTION: threads may be started later on in this function, so
 +       cr doesn't reflect the final parallel state right now */
 +    snew(inputrec,1);
 +    snew(mtop,1);
 +
 +    if (bVerbose && SIMMASTER(cr))
 +    {
 +        fprintf(stderr,"Getting Loaded...\n");
 +    }
 +    
 +    if (Flags & MD_APPENDFILES) 
 +    {
 +        fplog = NULL;
 +    }
 +
 +    snew(state,1);
 +    if (MASTER(cr)) 
 +    {
 +        /* Read (nearly) all data required for the simulation */
 +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
 +
 +        /* NOW the threads will be started: */
 +#ifdef GMX_THREAD_MPI
 +        nthreads_mpi = get_nthreads_mpi(nthreads_requested, inputrec, mtop);
 +
 +        if (nthreads_mpi > 1)
 +        {
 +            /* now start the threads. */
 +            cr=mdrunner_start_threads(nthreads_mpi, fplog, cr_old, nfile, fnm,
 +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
 +                                      ddxyz, dd_node_order, rdd, rconstr, 
 +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
 +                                      nstepout, resetstep, nmultisim, 
 +                                      repl_ex_nst, repl_ex_seed, pforce, 
 +                                      cpt_period, max_hours, deviceOptions, 
 +                                      Flags);
 +            /* the main thread continues here with a new cr. We don't deallocate
 +               the old cr because other threads may still be reading it. */
 +            if (cr == NULL)
 +            {
 +                gmx_comm("Failed to spawn threads");
 +            }
 +        }
 +#endif
 +    }
 +    /* END OF CAUTION: cr is now reliable */
 +
 +    /* g_membed initialisation *
 +     * Because we change the mtop, init_membed is called before the init_parallel *
 +     * (in case we ever want to make it run in parallel) */
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +      fprintf(stderr,"Entering membed code");
 +        snew(membed,1);
 +        init_membed(fplog,membed,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* now broadcast everything to the non-master nodes/threads: */
 +        init_parallel(fplog, cr, inputrec, mtop);
 +    }
 +    if (fplog != NULL)
 +    {
 +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
 +    }
 +
 +    /* now make sure the state is initialized and propagated */
 +    set_state_entries(state,inputrec,cr->nnodes);
 +
 +    /* A parallel command line option consistency check that we can
 +       only do after any threads have started. */
 +    if (!PAR(cr) &&
 +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
 +    {
 +        gmx_fatal(FARGS,
 +                  "The -dd or -npme option request a parallel simulation, "
 +#ifndef GMX_MPI
 +                  "but mdrun was compiled without threads or MPI enabled"
 +#else
 +#ifdef GMX_THREAD_MPI
 +                  "but the number of threads (option -nt) is 1"
 +#else
 +                  "but mdrun was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec" 
 +#endif
 +#endif
 +            );
 +    }
 +
 +    if ((Flags & MD_RERUN) &&
 +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 +    {
 +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 +    }
 +
 +    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog))
 +    {
 +        /* All-vs-all loops do not work with domain decomposition */
 +        Flags |= MD_PARTDEC;
 +    }
 +
 +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
 +    {
++        if (cr->npmenodes > 0)
++        {
++            if (!EEL_PME(inputrec->coulombtype))
++            {
++                gmx_fatal_collective(FARGS,cr,NULL,
++                                     "PME nodes are requested, but the system does not use PME electrostatics");
++            }
++            if (Flags & MD_PARTDEC)
++            {
++                gmx_fatal_collective(FARGS,cr,NULL,
++                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
++            }
++        }
++
 +        cr->npmenodes = 0;
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
 +#endif
 +
 +    /* NMR restraints must be initialized before load_checkpoint,
 +     * since with time averaging the history is added to t_state.
 +     * For proper consistency check we therefore need to extend
 +     * t_state here.
 +     * So the PME-only nodes (if present) will also initialize
 +     * the distance restraints.
 +     */
 +    snew(fcd,1);
 +
 +    /* This needs to be called before read_checkpoint to extend the state */
 +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
 +
 +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
 +    {
 +        if (PAR(cr) && !(Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
 +        }
 +        /* Orientation restraints */
 +        if (MASTER(cr))
 +        {
 +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
 +                        state);
 +        }
 +    }
 +
 +    if (DEFORM(*inputrec))
 +    {
 +        /* Store the deform reference box before reading the checkpoint */
 +        if (SIMMASTER(cr))
 +        {
 +            copy_mat(state->box,box);
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(box),box,cr);
 +        }
 +        /* Because we do not have the update struct available yet
 +         * in which the reference values should be stored,
 +         * we store them temporarily in static variables.
 +         * This should be thread safe, since they are only written once
 +         * and with identical values.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        deform_init_init_step_tpx = inputrec->init_step;
 +        copy_mat(box,deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    if (opt2bSet("-cpi",nfile,fnm)) 
 +    {
 +        /* Check if checkpoint file exists before doing continuation.
 +         * This way we can use identical input options for the first and subsequent runs...
 +         */
 +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
 +        {
 +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
 +                            cr,Flags & MD_PARTDEC,ddxyz,
 +                            inputrec,state,&bReadRNG,&bReadEkin,
 +                            (Flags & MD_APPENDFILES));
 +            
 +            if (bReadRNG)
 +            {
 +                Flags |= MD_READ_RNG;
 +            }
 +            if (bReadEkin)
 +            {
 +                Flags |= MD_READ_EKIN;
 +            }
 +        }
 +    }
 +
 +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI only the master node/thread exists in mdrun.c,
 +         * therefore non-master nodes need to open the "seppot" log file here.
 +         */
 +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
 +#endif
 +        )
 +    {
 +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
 +                             Flags,&fplog);
 +    }
 +
 +    if (SIMMASTER(cr)) 
 +    {
 +        copy_mat(state->box,box);
 +    }
 +
 +    if (PAR(cr)) 
 +    {
 +        gmx_bcast(sizeof(box),box,cr);
 +    }
 +
 +    /* Essential dynamics */
 +    if (opt2bSet("-ei",nfile,fnm))
 +    {
 +        /* Open input and output files, allocate space for ED data structure */
 +        ed = ed_open(nfile,fnm,Flags,cr);
 +    }
 +
 +    if (bVerbose && SIMMASTER(cr))
 +    {
 +        fprintf(stderr,"Loaded with Money\n\n");
 +    }
 +
 +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
 +                     EI_TPI(inputrec->eI) ||
 +                     inputrec->eI == eiNM))
 +    {
 +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
 +                                           dddlb_opt,dlb_scale,
 +                                           ddcsx,ddcsy,ddcsz,
 +                                           mtop,inputrec,
 +                                           box,state->x,
 +                                           &ddbox,&npme_major,&npme_minor);
 +
 +        make_dd_communicators(fplog,cr,dd_node_order);
 +
 +        /* Set overallocation to avoid frequent reallocation of arrays */
 +        set_over_alloc_dd(TRUE);
 +    }
 +    else
 +    {
 +        /* PME, if used, is done on all nodes with 1D decomposition */
 +        cr->npmenodes = 0;
 +        cr->duty = (DUTY_PP | DUTY_PME);
 +        npme_major = 1;
 +        npme_minor = 1;
 +        if (!EI_TPI(inputrec->eI))
 +        {
 +            npme_major = cr->nnodes;
 +        }
 +        
 +        if (inputrec->ePBC == epbcSCREW)
 +        {
 +            gmx_fatal(FARGS,
 +                      "pbc=%s is only implemented with domain decomposition",
 +                      epbc_names[inputrec->ePBC]);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* After possible communicator splitting in make_dd_communicators.
 +         * we can set up the intra/inter node communication.
 +         */
 +        gmx_setup_nodecomm(fplog,cr);
 +    }
 +
 +    /* get number of OpenMP/PME threads
 +     * env variable should be read only on one node to make sure it is identical everywhere */
 +#ifdef GMX_OPENMP
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (MASTER(cr))
 +        {
 +            char *ptr;
 +            if ((ptr=getenv("GMX_PME_NTHREADS")) != NULL)
 +            {
 +                sscanf(ptr,"%d",&nthreads_pme);
 +            }
 +            if (fplog != NULL && nthreads_pme > 1)
 +            {
 +                fprintf(fplog,"Using %d threads for PME\n",nthreads_pme);
 +            }
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast_sim(sizeof(nthreads_pme),&nthreads_pme,cr);
 +        }
 +    }
 +#endif
 +
 +    wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pme);
 +    if (PAR(cr))
 +    {
 +        /* Master synchronizes its value of reset_counters with all nodes 
 +         * including PME only nodes */
 +        reset_counters = wcycle_get_reset_counters(wcycle);
 +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
 +        wcycle_set_reset_counters(wcycle, reset_counters);
 +    }
 +
 +
 +    snew(nrnb,1);
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* For domain decomposition we allocate dynamically
 +         * in dd_partition_system.
 +         */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            bcast_state_setup(cr,state);
 +        }
 +        else
 +        {
 +            if (PAR(cr))
 +            {
 +                bcast_state(cr,state,TRUE);
 +            }
 +        }
 +
 +        /* Dihedral Restraints */
 +        if (gmx_mtop_ftype_count(mtop,F_DIHRES) > 0)
 +        {
 +            init_dihres(fplog,mtop,inputrec,fcd);
 +        }
 +
 +        /* Initiate forcerecord */
 +        fr = mk_forcerec();
 +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +                      opt2fn("-table",nfile,fnm),
 +                      opt2fn("-tabletf",nfile,fnm),
 +                      opt2fn("-tablep",nfile,fnm),
 +                      opt2fn("-tableb",nfile,fnm),FALSE,pforce);
 +
 +        /* version for PCA_NOT_READ_NODE (see md.c) */
 +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +          "nofile","nofile","nofile","nofile",FALSE,pforce);
 +          */        
 +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
 +
 +        /* Initialize QM-MM */
 +        if(fr->bQMMM)
 +        {
 +            init_QMMMrec(cr,box,mtop,inputrec,fr);
 +        }
 +
 +        /* Initialize the mdatoms structure.
 +         * mdatoms is not filled with atom data,
 +         * as this can not be done now with domain decomposition.
 +         */
 +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
 +
 +        /* Initialize the virtual site communication */
 +        vsite = init_vsite(mtop,cr);
 +
 +        calc_shifts(box,fr->shift_vec);
 +
 +        /* With periodic molecules the charge groups should be whole at start up
 +         * and the virtual sites should not be far from their proper positions.
 +         */
 +        if (!inputrec->bContinuation && MASTER(cr) &&
 +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        {
 +            /* Make molecules whole at start of run */
 +            if (fr->ePBC != epbcNONE)
 +            {
 +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
 +            }
 +            if (vsite)
 +            {
 +                /* Correct initial vsite positions are required
 +                 * for the initial distribution in the domain decomposition
 +                 * and for the initial shell prediction.
 +                 */
 +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
 +            }
 +        }
 +
 +        /* Initiate PPPM if necessary */
 +        if (fr->eeltype == eelPPPM)
 +        {
 +            if (mdatoms->nChargePerturbed)
 +            {
 +                gmx_fatal(FARGS,"Free energy with %s is not implemented",
 +                          eel_names[fr->eeltype]);
 +            }
 +            status = gmx_pppm_init(fplog,cr,oenv,FALSE,TRUE,box,
 +                                   getenv("GMXGHAT"),inputrec, (Flags & MD_REPRODUCIBLE));
 +            if (status != 0)
 +            {
 +                gmx_fatal(FARGS,"Error %d initializing PPPM",status);
 +            }
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            ewaldcoeff = fr->ewaldcoeff;
 +            pmedata = &fr->pmedata;
 +        }
 +        else
 +        {
 +            pmedata = NULL;
 +        }
 +    }
 +    else
 +    {
 +        /* This is a PME only node */
 +
 +        /* We don't need the state */
 +        done_state(state);
 +
 +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
 +        snew(pmedata,1);
 +    }
 +
 +    /* Initiate PME if necessary,
 +     * either on all nodes or on dedicated PME nodes only. */
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (mdatoms)
 +        {
 +            nChargePerturbed = mdatoms->nChargePerturbed;
 +        }
 +        if (cr->npmenodes > 0)
 +        {
 +            /* The PME only nodes need to know nChargePerturbed */
 +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
 +        }
 +
 +
 +        /* Set CPU affinity. Can be important for performance.
 +           On some systems (e.g. Cray) CPU Affinity is set by default.
 +           But default assigning doesn't work (well) with only some ranks
 +           having threads. This causes very low performance.
 +           External tools have cumbersome syntax for setting affinity
 +           in the case that only some ranks have threads.
 +           Thus it is important that GROMACS sets the affinity internally at
 +           if only PME is using threads.
 +        */
 +
 +#ifdef GMX_OPENMP
 +#ifdef __linux
 +#ifdef GMX_LIB_MPI
 +        {
 +            int core;
 +            MPI_Comm comm_intra; /* intra communicator (but different to nc.comm_intra includes PME nodes) */
 +            MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),gmx_node_rank(),&comm_intra);
 +            int local_omp_nthreads = (cr->duty & DUTY_PME) ? nthreads_pme : 1; /* threads on this node */
 +            MPI_Scan(&local_omp_nthreads,&core, 1, MPI_INT, MPI_SUM, comm_intra);
 +            core-=local_omp_nthreads; /* make exclusive scan */
 +#pragma omp parallel firstprivate(core) num_threads(local_omp_nthreads)
 +            {
 +                cpu_set_t mask;
 +                CPU_ZERO(&mask);
 +                core+=omp_get_thread_num();
 +                CPU_SET(core,&mask);
 +                sched_setaffinity((pid_t) syscall (SYS_gettid),sizeof(cpu_set_t),&mask);
 +            }
 +        }
 +#endif /*GMX_MPI*/
 +#endif /*__linux*/
 +#endif /*GMX_OPENMP*/
 +
 +        if (cr->duty & DUTY_PME)
 +        {
 +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
 +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
 +                                  (Flags & MD_REPRODUCIBLE),nthreads_pme);
 +            if (status != 0) 
 +            {
 +                gmx_fatal(FARGS,"Error %d initializing PME",status);
 +            }
 +        }
 +    }
 +
 +
 +    if (integrator[inputrec->eI].func == do_md
 +#ifdef GMX_OPENMM
 +        ||
 +        integrator[inputrec->eI].func == do_md_openmm
 +#endif
 +        )
 +    {
 +        /* Turn on signal handling on all nodes */
 +        /*
 +         * (A user signal from the PME nodes (if any)
 +         * is communicated to the PP nodes.
 +         */
 +        signal_handler_install();
 +    }
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        if (inputrec->ePull != epullNO)
 +        {
 +            /* Initialize pull code */
 +            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv,
 +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +           /* Initialize enforced rotation code */
 +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,state->box,mtop,oenv,
 +                    bVerbose,Flags);
 +        }
 +
 +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
 +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
 +
 +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
 +
 +            setup_dd_grid(fplog,cr->dd);
 +        }
 +
 +        /* Now do whatever the user wants us to do (how flexible...) */
 +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
 +                                      oenv,bVerbose,bCompact,
 +                                      nstglobalcomm,
 +                                      vsite,constr,
 +                                      nstepout,inputrec,mtop,
 +                                      fcd,state,
 +                                      mdatoms,nrnb,wcycle,ed,fr,
 +                                      repl_ex_nst,repl_ex_seed,
 +                                      membed,
 +                                      cpt_period,max_hours,
 +                                      deviceOptions,
 +                                      Flags,
 +                                      &runtime);
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            finish_pull(fplog,inputrec->pull);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +            finish_rot(fplog,inputrec->rot);
 +        }
 +
 +    } 
 +    else 
 +    {
 +        /* do PME only */
 +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
 +    }
 +
 +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
 +    {
 +        /* Some timing stats */  
 +        if (SIMMASTER(cr))
 +        {
 +            if (runtime.proc == 0)
 +            {
 +                runtime.proc = runtime.real;
 +            }
 +        }
 +        else
 +        {
 +            runtime.real = 0;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    /* Finish up, write some stuff
 +     * if rerunMD, don't write last frame again 
 +     */
 +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
 +               inputrec,nrnb,wcycle,&runtime,
 +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
 +    
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        sfree(membed);
 +    }
 +
 +    /* Does what it says */  
 +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
 +
 +    /* Close logfile already here if we were appending to it */
 +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
 +    {
 +        gmx_log_close(fplog);
 +    } 
 +
 +    rc=(int)gmx_get_stop_condition();
 +
 +#ifdef GMX_THREAD_MPI
 +    /* we need to join all threads. The sub-threads join when they
 +       exit this function, but the master thread needs to be told to 
 +       wait for that. */
 +    if (PAR(cr) && MASTER(cr))
 +    {
 +        tMPI_Finalize();
 +    }
 +#endif
 +
 +    return rc;
 +}
Simple merge
index c5ca942fa90db26dc759bc82ab3d27e1cb160449,0000000000000000000000000000000000000000..2a15467d2bd44610587b74006a2303f403a2101a
mode 100644,000000..100644
--- /dev/null
@@@ -1,468 -1,0 +1,468 @@@
-   char    **lines=NULL;
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "strdb.h"
 +#include "string2.h"
 +#include "xmdrun.h"
 +#include "vec.h"
 +#include "genalg.h"
 +#include "random.h"
 +#include "macros.h"
 +
 +real mol_dipole(int k0,int k1,rvec x[],real q[])
 +{
 +  int  k,m;
 +  rvec mu;
 +  
 +  clear_rvec(mu);
 +  for(k=k0; (k<k1); k++) {
 +    for(m=0; (m<DIM); m++) 
 +      mu[m] += q[k]*x[k][m];
 +  }
 +  return norm(mu);  /* Dipole moment of this molecule in e nm */
 +}
 +
 +real calc_mu_aver(t_commrec *cr,rvec x[],real q[],rvec mu,
 +                t_block *mols,t_mdatoms *md,int gnx,atom_id grpindex[])
 +{
 +  int     i,start,end;
 +  real    mu_ave;
 +  
 +  start = md->start;
 +  end   = md->homenr + start;  
 +
 +  /*
 +  clear_rvec(mu);
 +  for(i=start; (i<end); i++)
 +    for(m=0; (m<DIM); m++)
 +      mu[m] += q[i]*x[i][m];
 +  if (PAR(cr)) {
 +    gmx_sum(DIM,mu,cr);
 +  }
 +  */
 +  /* I guess we have to parallelise this one! */
 +
 +  if (gnx > 0) {
 +    mu_ave = 0.0;
 +    for(i=0; (i<gnx); i++) {
 +      int gi = grpindex[i];
 +      mu_ave += mol_dipole(mols->index[gi],mols->index[gi+1],x,q);
 +    }
 +    
 +    return(mu_ave/gnx);
 +  }
 +  else
 +    return 0;
 +}
 +
 +/* Lots of global variables! Yummy... */
 +static t_ffscan ff;
 +
 +void set_ffvars(t_ffscan *fff)
 +{
 +  ff = *fff;
 +}
 +
 +real cost(tensor P,real MSF,real E)
 +{
 +  return (ff.fac_msf*MSF+ff.fac_epot*sqr(E-ff.epot)+ff.fac_pres*
 +        (sqr(P[XX][XX]-ff.pres)+sqr(P[YY][YY]-ff.pres)+sqr(P[ZZ][ZZ]-ff.pres)));
 +  
 +}
 +
 +static const char     *esenm[eseNR] = { "SIG", "EPS", "BHAMA", "BHAMB", "BHAMC", "CELlX", "CELLY", "CELLZ" };
 +static int      nparm=0,*param_val=NULL;
 +static t_range  *range=NULL;
 +static t_genalg *ga=NULL;
 +static rvec     scale = { 1,1,1 };
 +
 +static void init_range(t_range *r,int np,int atype,int ptype,
 +                     real rmin,real rmax)
 +{
 +  if (rmin > rmax)
 +    gmx_fatal(FARGS,"rmin (%f) > rmax (%f)",rmin,rmax);
 +  if (np <= 0)
 +    gmx_fatal(FARGS,"np (%d) should be > 0",np);
 +  if ((rmax > rmin) && (np <= 1))
 +    gmx_fatal(FARGS,"If rmax > rmin, np should be > 1");
 +  if ((ptype < 0) || (ptype >= eseNR))
 +    gmx_fatal(FARGS,"ptype (%d) should be < %d",ptype,eseNR);
 +  r->np    = np;
 +  r->atype = atype;
 +  r->ptype = ptype;
 +  r->rmin  = rmin;
 +  r->rmax  = rmax;
 +  r->rval  = rmin;
 +  r->dr    = r->rmax - r->rmin;
 +}
 +
 +static t_range *read_range(const char *db,int *nrange)
 +{
 +  int     nlines,nr,np,i;
++  char    **lines;
 +  t_range *range;
 +  int     atype,ptype;
 +  double  rmin,rmax;
 +  
 +  nlines = get_file(db,&lines);
 +  snew(range,nlines);
 +  
 +  nr=0;
 +  for(i=0; (i < nlines); i++) {
 +    strip_comment(lines[i]);
 +    if (sscanf(lines[i],"%d%d%d%lf%lf",&np,&atype,&ptype,&rmin,&rmax) == 5) {
 +      if (ff.bLogEps && (ptype == eseEPSILON) && (rmin <= 0))
 +      gmx_fatal(FARGS,"When using logarithmic epsilon increments the minimum"
 +                  "value must be > 0");
 +      init_range(&range[nr],np,atype,ptype,rmin,rmax);
 +      nr++;
 +    }
 +  }
 +  fprintf(stderr,"found %d variables to iterate over\n",nr);
 +  
 +  *nrange = nr;
 +
 +  for(nr=0; (nr < nlines); nr++)
 +    sfree(lines[nr]);
 +  sfree(lines);
 +    
 +  return range;
 +}
 +
 +static real value_range(t_range *r,int n)
 +{
 +  real logrmin,logrmax;
 +  
 +  if ((n < 0) || (n > r->np))
 +    gmx_fatal(FARGS,"Value (%d) out of range for value_range (max %d)",n,r->np);
 +
 +  if (r->np == 1)
 +    r->rval = r->rmin;
 +  else {
 +    if ((r->ptype == eseEPSILON) && ff.bLogEps) {
 +      logrmin = log(r->rmin);
 +      logrmax = log(r->rmax);
 +      r->rval = exp(logrmin + (n*(logrmax-logrmin))/(r->np-1));
 +    }
 +    else
 +      r->rval = r->rmin+(n*(r->dr))/(r->np-1);
 +  }
 +  return r->rval;
 +}
 +
 +real value_rand(t_range *r,int *seed)
 +{
 +  real logrmin,logrmax;
 +  real mr;
 +  
 +  if (r->np == 1)
 +    r->rval = r->rmin;
 +  else {
 +    mr = rando(seed);
 +    if ((r->ptype == eseEPSILON) && ff.bLogEps) {
 +      logrmin = log(r->rmin);
 +      logrmax = log(r->rmax);
 +      r->rval = exp(logrmin + mr*(logrmax-logrmin));
 +    }
 +    else
 +      r->rval = r->rmin + mr*(r->rmax-r->rmin);
 +  }
 +  if (debug)
 +    fprintf(debug,"type: %s, value: %g\n",esenm[r->ptype],r->rval);
 +  return r->rval;
 +}
 +
 +static void update_ff(t_forcerec *fr,int nparm,t_range range[],int param_val[])
 +{
 +  static double *sigma=NULL,*eps=NULL,*c6=NULL,*cn=NULL,*bhama=NULL,*bhamb=NULL,*bhamc=NULL;
 +  real   val,*nbfp;
 +  int    i,j,atnr;
 +  
 +  atnr = fr->ntype;
 +  nbfp = fr->nbfp;
 +  
 +  if (fr->bBHAM) {
 +    if (bhama == NULL) {
 +      snew(bhama,atnr);
 +      snew(bhamb,atnr);
 +      snew(bhamc,atnr);
 +    }
 +  }
 +  else {
 +    if (sigma == NULL) {
 +      snew(sigma,atnr);
 +      snew(eps,atnr);
 +      snew(c6,atnr);
 +      snew(cn,atnr);
 +    }
 +  }
 +  /* Get current values for everything */
 +  for(i=0; (i<nparm); i++) {
 +    if (ga)
 +      val = range[i].rval;
 +    else
 +      val = value_range(&range[i],param_val[i]);
 +    if(debug)
 +      fprintf(debug,"val = %g\n",val);
 +    switch (range[i].ptype) {
 +    case eseSIGMA:
 +      sigma[range[i].atype] = val;
 +      break;
 +    case eseEPSILON:
 +      eps[range[i].atype] = val;
 +      break;
 +    case eseBHAMA:
 +      bhama[range[i].atype] = val;
 +      break;
 +    case eseBHAMB:
 +      bhamb[range[i].atype] = val;
 +      break;
 +    case eseBHAMC:
 +      bhamc[range[i].atype] = val;
 +      break;
 +    case eseCELLX:
 +      scale[XX] = val;
 +      break;
 +    case eseCELLY:
 +      scale[YY] = val;
 +      break;
 +    case eseCELLZ:
 +      scale[ZZ] = val;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Unknown ptype");
 +    }
 +  }
 +  if (fr->bBHAM) {
 +    for(i=0; (i<atnr); i++) {
 +      for(j=0; (j<=i); j++) {
 +      BHAMA(nbfp,atnr,i,j) = BHAMA(nbfp,atnr,j,i) = sqrt(bhama[i]*bhama[j]);
 +      BHAMB(nbfp,atnr,i,j) = BHAMB(nbfp,atnr,j,i) = sqrt(bhamb[i]*bhamb[j]);
 +      BHAMC(nbfp,atnr,i,j) = BHAMC(nbfp,atnr,j,i) = sqrt(bhamc[i]*bhamc[j]);
 +      }
 +    }
 +  }
 +  else {  
 +    /* Now build a new matrix */
 +    for(i=0; (i<atnr); i++) {
 +      c6[i] = 4*eps[i]*pow(sigma[i],6.0);
 +      cn[i] = 4*eps[i]*pow(sigma[i],ff.npow);
 +    }
 +    for(i=0; (i<atnr); i++) {
 +      for(j=0; (j<=i); j++) {
 +      C6(nbfp,atnr,i,j)  = C6(nbfp,atnr,j,i)  = sqrt(c6[i]*c6[j]);
 +      C12(nbfp,atnr,i,j) = C12(nbfp,atnr,j,i) = sqrt(cn[i]*cn[j]);
 +      }
 +    }
 +  }
 +  
 +  if (debug) {
 +    if (!fr->bBHAM) 
 +      for(i=0; (i<atnr); i++)
 +      fprintf(debug,"atnr = %2d  sigma = %8.4f  eps = %8.4f\n",i,sigma[i],eps[i]);
 +    for(i=0; (i<atnr); i++) {
 +      for(j=0; (j<atnr); j++) {
 +      if (fr->bBHAM)
 +        fprintf(debug,"i: %2d  j: %2d  A:  %10.5e  B:  %10.5e  C:  %10.5e\n",i,j,
 +                BHAMA(nbfp,atnr,i,j),BHAMB(nbfp,atnr,i,j),BHAMC(nbfp,atnr,i,j));
 +      else
 +        fprintf(debug,"i: %2d  j: %2d  c6:  %10.5e  cn:  %10.5e\n",i,j,
 +                C6(nbfp,atnr,i,j),C12(nbfp,atnr,i,j));
 +      }
 +    }
 +  }
 +}
 +
 +static void scale_box(int natoms,rvec x[],matrix box)
 +{
 +  int i,m;
 +  
 +  if ((scale[XX] != 1.0) ||   (scale[YY] != 1.0) ||   (scale[ZZ] != 1.0)) {
 +    if (debug)
 +      fprintf(debug,"scale = %8.4f  %8.4f  %8.4f\n",
 +            scale[XX],scale[YY],scale[ZZ]);
 +    for(m=0; (m<DIM); m++)
 +      box[m][m] *= scale[m];
 +    for(i=0; (i<natoms); i++) 
 +      for(m=0; (m<DIM); m++)
 +      x[i][m] *= scale[m];
 +  }
 +}
 +
 +gmx_bool update_forcefield(FILE *fplog,
 +                     int nfile,const t_filenm fnm[],t_forcerec *fr,
 +                     int natoms,rvec x[],matrix box)
 +{
 +  static int ntry,ntried;
 +  int    i,j;
 +  gmx_bool   bDone;
 +
 +  /* First time around we have to read the parameters */  
 +  if (nparm == 0) {    
 +    range = read_range(ftp2fn(efDAT,nfile,fnm),&nparm);
 +    if (nparm == 0) 
 +      gmx_fatal(FARGS,"No correct parameter info in %s",ftp2fn(efDAT,nfile,fnm));
 +    snew(param_val,nparm);
 +
 +    if (opt2bSet("-ga",nfile,fnm)) {
 +      /* Genetic algorithm time */
 +      ga = init_ga(fplog,opt2fn("-ga",nfile,fnm),nparm,range);
 +    }
 +    else {  
 +      /* Determine the grid size */
 +      ntry = 1;
 +      for(i=0; (i<nparm); i++)
 +      ntry *= range[i].np;
 +      ntried = 0;
 +      
 +      fprintf(fplog,"Going to try %d different combinations of %d parameters\n",
 +            ntry,nparm);
 +    }
 +  }
 +  if (ga) {
 +    update_ga(fplog,range,ga);
 +  }
 +  else {
 +    /* Increment the counter
 +     * Non-trivial, since this is nparm nested loops in principle 
 +     */
 +    for(i=0; (i<nparm); i++) {
 +      if (param_val[i] < (range[i].np-1)) {
 +      param_val[i]++;
 +      for(j=0; (j<i); j++)
 +        param_val[j] = 0;
 +      ntried++;
 +      break;
 +      }
 +    }
 +    if (i == nparm) {
 +      fprintf(fplog,"Finished with %d out of %d iterations\n",ntried+1,ntry);
 +      return TRUE;
 +    }
 +  }
 +
 +  /* Now do the real updating */
 +  update_ff(fr,nparm,range,param_val);
 +  
 +  /* Update box and coordinates if necessary */
 +  scale_box(natoms,x,box);
 +  
 +  return FALSE;
 +}
 +
 +static void print_range(FILE *fp,tensor P,real MSF,real energy)
 +{
 +  int  i;
 +  
 +  fprintf(fp,"%8.3f  %8.3f  %8.3f  %8.3f",
 +        cost(P,MSF,energy),trace(P)/3,MSF,energy);
 +  for(i=0; (i<nparm); i++)
 +    fprintf(fp," %s %10g",esenm[range[i].ptype],range[i].rval);
 +  fprintf(fp," FF\n");
 +  fflush(fp);
 +}
 +
 +static real msf(int n,rvec f1[],rvec f2[])
 +{
 +  int  i,j;
 +  rvec ff2;
 +  real msf1=0;
 +  
 +  for(i=0; (i<n); ) {
 +    clear_rvec(ff2);
 +    for(j=0; ((j<ff.molsize) && (i<n)); j++,i++) {
 +      rvec_inc(ff2,f1[i]);
 +      if (f2)
 +      rvec_inc(ff2,f2[i]);
 +    }
 +    msf1 += iprod(ff2,ff2);
 +  }
 +  
 +  return msf1/n;
 +}
 +
 +static void print_grid(FILE *fp,real ener[],int natoms,rvec f[],rvec fshake[],
 +                     rvec x[],t_block *mols,real mass[],tensor pres)
 +{
 +  static gmx_bool bFirst = TRUE;
 +  static const char *desc[] = {
 +    "------------------------------------------------------------------------",
 +    "In the output from the forcefield scan we have the potential energy,", 
 +    "then the root mean square force on the atoms, and finally the parameters",
 +    "in the order they appear in the input file.",
 +    "------------------------------------------------------------------------" 
 +  };
 +  real msf1;
 +  int  i;
 +  
 +  if (bFirst) {
 +    for(i=0; (i<asize(desc)); i++)
 +      fprintf(fp,"%s\n",desc[i]);
 +    fflush(fp);
 +    bFirst = FALSE;
 +  }
 +  if ((ff.tol == 0) || (fabs(ener[F_EPOT]/ff.nmol-ff.epot) < ff.tol)) {
 +    msf1 = msf(natoms,f,fshake);
 +    if ((ff.f_max == 0) || (msf1 < sqr(ff.f_max))) 
 +      print_range(fp,pres,msf1,ener[F_EPOT]/ff.nmol);
 +  }
 +}
 +
 +gmx_bool print_forcefield(FILE *fp,real ener[],int natoms,rvec f[],rvec fshake[],
 +                    rvec x[],t_block *mols,real mass[],tensor pres)
 +{
 +  real msf1;
 +  
 +  if (ga) {
 +    msf1 = msf(natoms,f,fshake);
 +    if (debug)
 +      fprintf(fp,"Pressure: %12g, RMSF: %12g, Energy-Epot: %12g, cost: %12g\n",
 +            ener[F_PRES],sqrt(msf1),ener[F_EPOT]/ff.nmol-ff.epot,
 +            cost(pres,msf1,ener[F_EPOT]/ff.nmol));
 +    if (print_ga(fp,ga,msf1,pres,scale,(ener[F_EPOT]/ff.nmol),range,ff.tol)) {
 +      return TRUE;
 +    }
 +    fflush(fp);
 +  }
 +  else
 +    print_grid(fp,ener,natoms,f,fshake,x,mols,mass,pres);
 +  return FALSE;
 +}
 + 
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge