Merge remote-tracking branch 'origin/release-4-6' into HEAD
authorRoland Schulz <roland@utk.edu>
Tue, 10 Jul 2012 04:46:15 +0000 (00:46 -0400)
committerRoland Schulz <roland@utk.edu>
Tue, 10 Jul 2012 06:05:20 +0000 (02:05 -0400)
Conflicts:
admin/mknroff.pl (deleted)
share/template/CMakeLists.txt (added both)
src/CMakeLists.txt (trivial)
src/gromacs/gmxlib/oenv.cpp (see next)
src/gromacs/gmxlib/statutil.cpp (change moved to
           src/gromacs/utility/programinfo.cpp)
src/kernel/CMakeLists.txt (added gmx_add_man_page
           to programs/*/CMakeLists.txt)
src/kernel/mk_ghat.c (was deleted. no change)
src/ngmx/CMakeLists.txt (trivial)

Change-Id: Ic8c69541f8d801e3b1d32b26e886cf0602c34cf4

101 files changed:
1  2 
CMakeLists.txt
cmake/gmxSetBuildInformation.cmake
share/CMakeLists.txt
share/template/CMakeLists.txt
src/CMakeLists.txt
src/config.h.cmakein
src/gromacs/gmxlib/bondfree.c
src/gromacs/gmxlib/checkpoint.c
src/gromacs/gmxlib/gmx_fatal.c
src/gromacs/gmxlib/gmx_omp.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel010_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel020_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel030_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel100_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel101_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel102_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel103_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel104_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel110_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel111_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel112_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel113_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel114_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel120_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel121_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel122_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel123_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel124_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel130_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel131_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel132_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel133_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel134_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel200_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel201_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel202_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel203_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel204_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel210_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel211_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel212_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel213_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel214_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel220_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel221_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel222_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel223_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel224_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel230_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel231_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel232_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel233_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel234_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel300_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel301_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel302_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel303_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel304_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel310_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel311_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel312_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel313_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel314_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel320_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel321_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel322_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel323_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel324_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel330_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel331_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel332_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel333_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel334_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel400_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel410_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel420_c_adress.c
src/gromacs/gmxlib/nonbonded/nb_kernel_adress_c/nb_kernel430_c_adress.c
src/gromacs/gmxlib/txtdump.c
src/gromacs/gmxlib/wman.cpp
src/gromacs/gmxpreprocess/readir.c
src/gromacs/legacyheaders/gmx_omp.h
src/gromacs/mdlib/fft5d.cpp
src/gromacs/mdlib/forcerec.c
src/gromacs/mdlib/mdebin.c
src/gromacs/mdlib/pme.c
src/gromacs/utility/programinfo.cpp
src/ngmx/CMakeLists.txt
src/programs/g_protonate/CMakeLists.txt
src/programs/g_x2top/CMakeLists.txt
src/programs/g_x2top/g_x2top.c
src/programs/gmxcheck/CMakeLists.txt
src/programs/gmxcheck/gmxcheck.c
src/programs/gmxdump/CMakeLists.txt
src/programs/grompp/CMakeLists.txt
src/programs/grompp/grompp.c
src/programs/mdrun/CMakeLists.txt
src/programs/mdrun/runner.c
src/programs/pdb2gmx/CMakeLists.txt
src/programs/tpbconv/CMakeLists.txt
src/programs/tpbconv/tpbconv.c
src/tools/CMakeLists.txt

diff --cc CMakeLists.txt
Simple merge
Simple merge
index 2c966d728038389f1f610695b858a52dd77f144e,66d05c1ecb964cadfad954355555b6cb9534a784..251744f74b245a525d63aebbaf7afc7d9c06a517
@@@ -5,12 -4,8 +5,6 @@@ install(DIRECTORY . DESTINATION ${DATA_
    PATTERN "Makefile*" EXCLUDE
    PATTERN "CMake*" EXCLUDE
    PATTERN "cmake*" EXCLUDE
-   PATTERN "template_doc.c" EXCLUDE
-   PATTERN "Template.mak" EXCLUDE
    PATTERN "*~" EXCLUDE
+   PATTERN "template" EXCLUDE
  )
--
- install(FILES template/CMakeLists.txt.template
-         DESTINATION ${DATA_INSTALL_DIR}
-         RENAME template/CMakeLists.txt
-         COMPONENT data)
 -add_subdirectory(template)
index 6665d1242ed24ef5f151dbfff0258d8fcdf9388e,00d6be71470da100daeaa85c1cc4a1572abbf8dc..9d5fd42ca2f6fbf42e6d0ad06038bc76c1932dca
@@@ -1,2 -1,13 +1,15 @@@
 -install(FILES README template.c Makefile.pkg
 +add_executable(template template.cpp)
 +target_link_libraries(template libgromacs)
+ install(FILES CMakeLists.txt.template
+         DESTINATION ${DATA_INSTALL_DIR}/template
+         RENAME CMakeLists.txt
+         COMPONENT development)
++install(FILES README template.cpp Makefile.pkg
+         DESTINATION ${DATA_INSTALL_DIR}/template
+         COMPONENT development)
+ install(FILES cmake/FindGROMACS.cmake
+         DESTINATION ${DATA_INSTALL_DIR}/template/cmake
+         COMPONENT development)
index 3988bda34144a9cdd48bd66d0c24de50ca16f943,2d61ef4ec407ec637ad886bb5b92e983e4032e00..d4390cad804b1630ba375a5dc3002f5c30240b9b
@@@ -1,18 -1,10 +1,19 @@@
  configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmakein ${CMAKE_CURRENT_BINARY_DIR}/config.h)
  
+ include(../cmake/BuildManPages.cmake)
 +if (BUILD_TESTING)
 +    add_custom_target(tests)
 +    if (GMX_BUILD_UNITTESTS)
 +        add_subdirectory(external/gmock-1.6.0)
 +    endif (GMX_BUILD_UNITTESTS)
 +    include(testutils/TestMacros.cmake)
 +    if (GMX_BUILD_UNITTESTS)
 +        add_subdirectory(testutils)
 +    endif (GMX_BUILD_UNITTESTS)
 +endif (BUILD_TESTING)
  
 -add_subdirectory(gmxlib)
 -add_subdirectory(mdlib)
 -add_subdirectory(kernel)
 +add_subdirectory(gromacs)
 +add_subdirectory(programs)
  
  if(NOT GMX_FAHCORE)
    add_subdirectory(tools)
Simple merge
Simple merge
index c1792367b06d48c8c0fdb5d3f53b15c894d603cb,0000000000000000000000000000000000000000..075cd658cbdb696068b9fc9488847b0a2abd375c
mode 100644,000000..100644
--- /dev/null
@@@ -1,2404 -1,0 +1,2404 @@@
- static const int cpt_version = 13;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + * This file is part of Gromacs        Copyright (c) 1991-2008
 + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the research papers on the package. Check out http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gnomes, ROck Monsters And Chili Sauce
 + */
 +
 +/* The source code in this file should be thread-safe. 
 + Please keep it that way. */
 +
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <string.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +/* _chsize_s */
 +#include <io.h>
 +#include <sys/locking.h>
 +#endif
 +
 +
 +#include "filenm.h"
 +#include "names.h"
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "gmxfio.h"
 +#include "xdrf.h"
 +#include "statutil.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "mdrun.h"
 +#include "network.h"
 +#include "gmx_random.h"
 +#include "checkpoint.h"
 +#include "futil.h"
 +#include "string2.h"
 +#include <fcntl.h>
 +
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +#define CPT_MAGIC1 171817
 +#define CPT_MAGIC2 171819
 +#define CPTSTRLEN 1024
 +
 +#ifdef GMX_DOUBLE
 +#define GMX_CPT_BUILD_DP 1
 +#else
 +#define GMX_CPT_BUILD_DP 0
 +#endif
 +
 +/* cpt_version should normally only be changed
 + * when the header of footer format changes.
 + * The state data format itself is backward and forward compatible.
 + * But old code can not read a new entry that is present in the file
 + * (but can read a new format when new entries are not present).
 + */
-     if (*file_version >= 12)
++static const int cpt_version = 14;
 +
 +
 +const char *est_names[estNR]=
 +{
 +    "FE-lambda",
 +    "box", "box-rel", "box-v", "pres_prev",
 +    "nosehoover-xi", "thermostat-integral",
 +    "x", "v", "SDx", "CGp", "LD-rng", "LD-rng-i",
 +    "disre_initf", "disre_rm3tav",
 +    "orire_initf", "orire_Dtav",
 +    "svir_prev", "nosehoover-vxi", "v_eta", "vol0", "nhpres_xi", "nhpres_vxi", "fvir_prev","fep_state", "MC-rng", "MC-rng-i"
 +};
 +
 +enum { eeksEKIN_N, eeksEKINH, eeksDEKINDL, eeksMVCOS, eeksEKINF, eeksEKINO, eeksEKINSCALEF, eeksEKINSCALEH, eeksVSCALE, eeksEKINTOTAL, eeksNR };
 +
 +const char *eeks_names[eeksNR]=
 +{
 +    "Ekin_n", "Ekinh", "dEkindlambda", "mv_cos",
 +    "Ekinf", "Ekinh_old", "EkinScaleF_NHC", "EkinScaleH_NHC","Vscale_NHC","Ekin_Total"
 +};
 +
 +enum { eenhENERGY_N, eenhENERGY_AVER, eenhENERGY_SUM, eenhENERGY_NSUM,
 +       eenhENERGY_SUM_SIM, eenhENERGY_NSUM_SIM,
 +       eenhENERGY_NSTEPS, eenhENERGY_NSTEPS_SIM, 
 +       eenhENERGY_DELTA_H_NN,
 +       eenhENERGY_DELTA_H_LIST, 
 +       eenhENERGY_DELTA_H_STARTTIME, 
 +       eenhENERGY_DELTA_H_STARTLAMBDA, 
 +       eenhNR };
 +
 +const char *eenh_names[eenhNR]=
 +{
 +    "energy_n", "energy_aver", "energy_sum", "energy_nsum",
 +    "energy_sum_sim", "energy_nsum_sim",
 +    "energy_nsteps", "energy_nsteps_sim", 
 +    "energy_delta_h_nn",
 +    "energy_delta_h_list", 
 +    "energy_delta_h_start_time", 
 +    "energy_delta_h_start_lambda"
 +};
 +
 +/* free energy history variables -- need to be preserved over checkpoint */
 +enum { edfhBEQUIL,edfhNATLAMBDA,edfhWLHISTO,edfhWLDELTA,edfhSUMWEIGHTS,edfhSUMDG,edfhSUMMINVAR,edfhSUMVAR,
 +       edfhACCUMP,edfhACCUMM,edfhACCUMP2,edfhACCUMM2,edfhTIJ,edfhTIJEMP,edfhNR };
 +/* free energy history variable names  */
 +const char *edfh_names[edfhNR]=
 +{
 +    "bEquilibrated","N_at_state", "Wang-Landau_Histogram", "Wang-Landau-delta", "Weights", "Free Energies", "minvar","variance",
 +    "accumulated_plus", "accumulated_minus", "accumulated_plus_2",  "accumulated_minus_2", "Tij", "Tij_empirical"
 +};
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +static int
 +gmx_wintruncate(const char *filename, __int64 size)
 +{
 +#ifdef GMX_FAHCORE
 +    /*we do this elsewhere*/
 +    return 0;
 +#else
 +    FILE *fp;
 +    int   rc;
 +    
 +    fp=fopen(filename,"rb+");
 +    
 +    if(fp==NULL)
 +    {
 +        return -1;
 +    }
 +    
 +    return _chsize_s( fileno(fp), size);
 +#endif
 +}
 +#endif
 +
 +
 +enum { ecprREAL, ecprRVEC, ecprMATRIX };
 +
 +enum { cptpEST, cptpEEKS, cptpEENH, cptpEDFH };
 +/* enums for the different components of checkpoint variables, replacing the hard coded ones.
 +   cptpEST - state variables.
 +   cptpEEKS - Kinetic energy state variables.
 +   cptpEENH - Energy history state variables.
 +   cptpEDFH - free energy history variables.
 +*/
 +
 +
 +static const char *st_names(int cptp,int ecpt)
 +{
 +    switch (cptp)
 +    {
 +    case cptpEST: return est_names [ecpt]; break;
 +    case cptpEEKS: return eeks_names[ecpt]; break;
 +    case cptpEENH: return eenh_names[ecpt]; break;
 +    case cptpEDFH: return edfh_names[ecpt]; break;
 +    }
 +
 +    return NULL;
 +}
 +
 +static void cp_warning(FILE *fp)
 +{
 +    fprintf(fp,"\nWARNING: Checkpoint file is corrupted or truncated\n\n");
 +}
 +
 +static void cp_error()
 +{
 +    gmx_fatal(FARGS,"Checkpoint file corrupted/truncated, or maybe you are out of disk space?");
 +}
 +
 +static void do_cpt_string_err(XDR *xd,gmx_bool bRead,const char *desc,char **s,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    if (bRead)
 +    {
 +        snew(*s,CPTSTRLEN);
 +    }
 +    res = xdr_string(xd,s,CPTSTRLEN);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %s\n",desc,*s);
 +        sfree(*s);
 +    }
 +}
 +
 +static int do_cpt_int(XDR *xd,const char *desc,int *i,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    res = xdr_int(xd,i);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %d\n",desc,*i);
 +    }
 +    return 0;
 +}
 +
 +static int do_cpt_u_chars(XDR *xd,const char *desc,int n,unsigned char *i,FILE *list)
 +{
 +    bool_t res=1;
 +    int j;
 +    if (list)
 +    {
 +        fprintf(list,"%s = ",desc);
 +    }
 +    for (j=0; j<n && res; j++)
 +    {
 +        res &= xdr_u_char(xd,&i[j]);
 +        if (list)
 +        {
 +            fprintf(list,"%02x",i[j]);
 +        }
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"\n");
 +    }
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +
 +    return 0;
 +}
 +
 +static void do_cpt_int_err(XDR *xd,const char *desc,int *i,FILE *list)
 +{
 +    if (do_cpt_int(xd,desc,i,list) < 0)
 +    {
 +        cp_error();
 +    }
 +}
 +
 +static void do_cpt_step_err(XDR *xd,const char *desc,gmx_large_int_t *i,FILE *list)
 +{
 +    bool_t res=0;
 +    char   buf[STEPSTRSIZE];
 +
 +    res = xdr_gmx_large_int(xd,i,"reading checkpoint file");
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %s\n",desc,gmx_step_str(*i,buf));
 +    }
 +}
 +
 +static void do_cpt_double_err(XDR *xd,const char *desc,double *f,FILE *list)
 +{
 +    bool_t res=0;
 +    
 +    res = xdr_double(xd,f);
 +    if (res == 0)
 +    {
 +        cp_error();
 +    }
 +    if (list)
 +    {
 +        fprintf(list,"%s = %f\n",desc,*f);
 +    }
 +}
 +
 +/* If nval >= 0, nval is used; on read this should match the passed value.
 + * If nval n<0, *nptr is used; on read the value is stored in nptr
 + */
 +static int do_cpte_reals_low(XDR *xd,int cptp,int ecpt,int sflags,
 +                             int nval,int *nptr,real **v,
 +                             FILE *list,int erealtype)
 +{
 +    bool_t res=0;
 +#ifndef GMX_DOUBLE
 +    int  dtc=xdr_datatype_float; 
 +#else
 +    int  dtc=xdr_datatype_double;
 +#endif
 +    real *vp,*va=NULL;
 +    float  *vf;
 +    double *vd;
 +    int  nf,dt,i;
 +    
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            nf = nval;
 +        }
 +        else
 +        {
 +        if (nptr == NULL)
 +        {
 +            gmx_incons("*ntpr=NULL in do_cpte_reals_low");
 +        }
 +        nf = *nptr;
 +        }
 +    }
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL)
 +    {
 +        if (nval >= 0)
 +        {
 +            if (nf != nval)
 +            {
 +                gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),nval,nf);
 +            }
 +        }
 +        else
 +        {
 +            *nptr = nf;
 +        }
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        fprintf(stderr,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    if (dt == xdr_datatype_float)
 +    {
 +        if (dtc == xdr_datatype_float)
 +        {
 +            vf = (float *)vp;
 +        }
 +        else
 +        {
 +            snew(vf,nf);
 +        }
 +        res = xdr_vector(xd,(char *)vf,nf,
 +                         (unsigned int)sizeof(float),(xdrproc_t)xdr_float);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_float)
 +        {
 +            for(i=0; i<nf; i++)
 +            {
 +                vp[i] = vf[i];
 +            }
 +            sfree(vf);
 +        }
 +    }
 +    else
 +    {
 +        if (dtc == xdr_datatype_double)
 +        {
 +            vd = (double *)vp;
 +        }
 +        else
 +        {
 +            snew(vd,nf);
 +        }
 +        res = xdr_vector(xd,(char *)vd,nf,
 +                         (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
 +        if (res == 0)
 +        {
 +            return -1;
 +        }
 +        if (dtc != xdr_datatype_double)
 +        {
 +            for(i=0; i<nf; i++)
 +            {
 +                vp[i] = vd[i];
 +            }
 +            sfree(vd);
 +        }
 +    }
 +    
 +    if (list)
 +    {
 +        switch (erealtype)
 +        {
 +        case ecprREAL:
 +            pr_reals(list,0,st_names(cptp,ecpt),vp,nf);
 +            break;
 +        case ecprRVEC:
 +            pr_rvecs(list,0,st_names(cptp,ecpt),(rvec *)vp,nf/3);
 +            break;
 +        default:
 +            gmx_incons("Unknown checkpoint real type");
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +
 +/* This function stores n along with the reals for reading,
 + * but on reading it assumes that n matches the value in the checkpoint file,
 + * a fatal error is generated when this is not the case.
 + */
 +static int do_cpte_reals(XDR *xd,int cptp,int ecpt,int sflags,
 +                         int n,real **v,FILE *list)
 +{
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,v,list,ecprREAL);
 +}
 +
 +/* This function does the same as do_cpte_reals,
 + * except that on reading it ignores the passed value of *n
 + * and stored the value read from the checkpoint file in *n.
 + */
 +static int do_cpte_n_reals(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int *n,real **v,FILE *list)
 +{
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,-1,n,v,list,ecprREAL);
 +}
 +
 +static int do_cpte_real(XDR *xd,int cptp,int ecpt,int sflags,
 +                        real *r,FILE *list)
 +{
 +    int n;
 +
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,1,NULL,&r,list,ecprREAL);
 +}
 +
 +static int do_cpte_ints(XDR *xd,int cptp,int ecpt,int sflags,
 +                        int n,int **v,FILE *list)
 +{
 +    bool_t res=0;
 +    int  dtc=xdr_datatype_int;
 +    int *vp,*va=NULL;
 +    int  nf,dt,i;
 +    
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && v != NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS,"Type mismatch for state entry %s, code type is %s, file type is %s\n",
 +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)) || v == NULL)
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd,(char *)vp,nf,
 +                     (unsigned int)sizeof(int),(xdrproc_t)xdr_int);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_ivec(list,0,st_names(cptp,ecpt),vp,nf,TRUE);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_int(XDR *xd,int cptp,int ecpt,int sflags,
 +                       int *i,FILE *list)
 +{
 +    return do_cpte_ints(xd,cptp,ecpt,sflags,1,&i,list);
 +}
 +
 +static int do_cpte_doubles(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int n,double **v,FILE *list)
 +{
 +    bool_t res=0;
 +    int  dtc=xdr_datatype_double;
 +    double *vp,*va=NULL;
 +    int  nf,dt,i;
 +    
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    dt = dtc;
 +    res = xdr_int(xd,&dt);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (dt != dtc)
 +    {
 +        gmx_fatal(FARGS,"Precision mismatch for state entry %s, code precision is %s, file precision is %s\n",
 +                  st_names(cptp,ecpt),xdr_datatype_names[dtc],
 +                  xdr_datatype_names[dt]);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    res = xdr_vector(xd,(char *)vp,nf,
 +                     (unsigned int)sizeof(double),(xdrproc_t)xdr_double);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list)
 +    {
 +        pr_doubles(list,0,st_names(cptp,ecpt),vp,nf);
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpte_double(XDR *xd,int cptp,int ecpt,int sflags,
 +                          double *r,FILE *list)
 +{
 +    return do_cpte_doubles(xd,cptp,ecpt,sflags,1,&r,list);
 +}
 +
 +
 +static int do_cpte_rvecs(XDR *xd,int cptp,int ecpt,int sflags,
 +                         int n,rvec **v,FILE *list)
 +{
 +    int n3;
 +
 +    return do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                             n*DIM,NULL,(real **)v,list,ecprRVEC);
 +}
 +
 +static int do_cpte_matrix(XDR *xd,int cptp,int ecpt,int sflags,
 +                          matrix v,FILE *list)
 +{
 +    real *vr;
 +    real ret;
 +
 +    vr = (real *)&(v[0][0]);
 +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                            DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
 +    
 +    if (list && ret == 0)
 +    {
 +        pr_rvecs(list,0,st_names(cptp,ecpt),v,DIM);
 +    }
 +    
 +    return ret;
 +}
 +
 +
 +static int do_cpte_nmatrix(XDR *xd,int cptp,int ecpt,int sflags,
 +                           int n, real **v,FILE *list)
 +{
 +    int i;
 +    real *vr;
 +    real ret,reti;
 +    char name[CPTSTRLEN];
 +
 +    ret = 0;
 +    if (v==NULL)
 +    {
 +        snew(v,n);
 +    }
 +    for (i=0;i<n;i++)
 +    {
 +        reti = 0;
 +        vr = v[i];
 +        reti = do_cpte_reals_low(xd,cptp,ecpt,sflags,n,NULL,&(v[i]),NULL,ecprREAL);
 +        if (list && reti == 0)
 +        {
 +            sprintf(name,"%s[%d]",st_names(cptp,ecpt),i);
 +            pr_reals(list,0,name,v[i],n);
 +        }
 +        if (reti == 0)
 +        {
 +            ret = 0;
 +        }
 +    }
 +    return ret;
 +}
 +
 +static int do_cpte_matrices(XDR *xd,int cptp,int ecpt,int sflags,
 +                            int n,matrix **v,FILE *list)
 +{
 +    bool_t res=0;
 +    matrix *vp,*va=NULL;
 +    real *vr;
 +    int  nf,i,j,k;
 +    int  ret;
 +
 +    nf = n;
 +    res = xdr_int(xd,&nf);
 +    if (res == 0)
 +    {
 +        return -1;
 +    }
 +    if (list == NULL && nf != n)
 +    {
 +        gmx_fatal(FARGS,"Count mismatch for state entry %s, code count is %d, file count is %d\n",st_names(cptp,ecpt),n,nf);
 +    }
 +    if (list || !(sflags & (1<<ecpt)))
 +    {
 +        snew(va,nf);
 +        vp = va;
 +    }
 +    else
 +    {
 +        if (*v == NULL)
 +        {
 +            snew(*v,nf);
 +        }
 +        vp = *v;
 +    }
 +    snew(vr,nf*DIM*DIM);
 +    for(i=0; i<nf; i++)
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            for(k=0; k<DIM; k++)
 +            {
 +                vr[(i*DIM+j)*DIM+k] = vp[i][j][k];
 +            }
 +        }
 +    }
 +    ret = do_cpte_reals_low(xd,cptp,ecpt,sflags,
 +                            nf*DIM*DIM,NULL,&vr,NULL,ecprMATRIX);
 +    for(i=0; i<nf; i++)
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            for(k=0; k<DIM; k++)
 +            {
 +                vp[i][j][k] = vr[(i*DIM+j)*DIM+k];
 +            }
 +        }
 +    }
 +    sfree(vr);
 +    
 +    if (list && ret == 0)
 +    {
 +        for(i=0; i<nf; i++)
 +        {
 +            pr_rvecs(list,0,st_names(cptp,ecpt),vp[i],DIM);
 +        }
 +    }
 +    if (va)
 +    {
 +        sfree(va);
 +    }
 +    
 +    return ret;
 +}
 +
 +static void do_cpt_header(XDR *xd,gmx_bool bRead,int *file_version,
 +                          char **version,char **btime,char **buser,char **bhost,
 +                          int *double_prec,
 +                          char **fprog,char **ftime,
 +                          int *eIntegrator,int *simulation_part,
 +                          gmx_large_int_t *step,double *t,
 +                          int *nnodes,int *dd_nc,int *npme,
 +                          int *natoms,int *ngtc, int *nnhpres, int *nhchainlength,
 +                          int *nlambda, int *flags_state,
 +                          int *flags_eks,int *flags_enh, int *flags_dfh,
 +                          FILE *list)
 +{
 +    bool_t res=0;
 +    int  magic;
 +    int  idum=0;
 +    int  i;
 +    char *fhost;
 +
 +    if (bRead)
 +    {
 +        magic = -1;
 +    }
 +    else
 +    {
 +        magic = CPT_MAGIC1;
 +    }
 +    res = xdr_int(xd,&magic);
 +    if (res == 0)
 +    {
 +        gmx_fatal(FARGS,"The checkpoint file is empty/corrupted, or maybe you are out of disk space?");
 +    }
 +    if (magic != CPT_MAGIC1)
 +    {
 +        gmx_fatal(FARGS,"Start of file magic number mismatch, checkpoint file has %d, should be %d\n"
 +                  "The checkpoint file is corrupted or not a checkpoint file",
 +                  magic,CPT_MAGIC1);
 +    }
 +    if (!bRead)
 +    {
 +        snew(fhost,255);
 +#ifdef HAVE_UNISTD_H
 +        if (gethostname(fhost,255) != 0)
 +        {
 +            sprintf(fhost,"unknown");
 +        }
 +#else
 +        sprintf(fhost,"unknown");
 +#endif  
 +    }
 +    do_cpt_string_err(xd,bRead,"GROMACS version"           ,version,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build time"        ,btime,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build user"        ,buser,list);
 +    do_cpt_string_err(xd,bRead,"GROMACS build host"        ,bhost,list);
 +    do_cpt_string_err(xd,bRead,"generating program"        ,fprog,list);
 +    do_cpt_string_err(xd,bRead,"generation time"           ,ftime,list);
 +    *file_version = cpt_version;
 +    do_cpt_int_err(xd,"checkpoint file version",file_version,list);
 +    if (*file_version > cpt_version)
 +    {
 +        gmx_fatal(FARGS,"Attempting to read a checkpoint file of version %d with code of version %d\n",*file_version,cpt_version);
 +    }
 +    if (*file_version >= 13)
 +    {
 +        do_cpt_int_err(xd,"GROMACS double precision",double_prec,list);
 +    }
 +    else
 +    {
 +        *double_prec = -1;
 +    }
 +    if (*file_version >= 12)
 +    {
 +        do_cpt_string_err(xd,bRead,"generating host"           ,&fhost,list);
 +        if (list == NULL)
 +        {
 +            sfree(fhost);
 +        }
 +    }
 +    do_cpt_int_err(xd,"#atoms"            ,natoms     ,list);
 +    do_cpt_int_err(xd,"#T-coupling groups",ngtc       ,list);
 +    if (*file_version >= 10) 
 +    {
 +        do_cpt_int_err(xd,"#Nose-Hoover T-chains",nhchainlength,list);
 +    }
 +    else
 +    {
 +        *nhchainlength = 1;
 +    }
 +    if (*file_version >= 11)
 +    {
 +        do_cpt_int_err(xd,"#Nose-Hoover T-chains for barostat ",nnhpres,list);
 +    }
 +    else
 +    {
 +        *nnhpres = 0;
 +    }
-       if (*file_version >= 12)
++    if (*file_version >= 14)
 +    {
 +        do_cpt_int_err(xd,"# of total lambda states ",nlambda,list);
 +    }
 +    else
 +    {
 +        *nlambda = 0;
 +    }
 +    do_cpt_int_err(xd,"integrator"        ,eIntegrator,list);
 +      if (*file_version >= 3)
 +      {
 +              do_cpt_int_err(xd,"simulation part #", simulation_part,list);
 +      }
 +      else
 +      {
 +              *simulation_part = 1;
 +      }
 +    if (*file_version >= 5)
 +    {
 +        do_cpt_step_err(xd,"step"         ,step       ,list);
 +    }
 +    else
 +    {
 +        do_cpt_int_err(xd,"step"          ,&idum      ,list);
 +        *step = idum;
 +    }
 +    do_cpt_double_err(xd,"t"              ,t          ,list);
 +    do_cpt_int_err(xd,"#PP-nodes"         ,nnodes     ,list);
 +    idum = 1;
 +    do_cpt_int_err(xd,"dd_nc[x]",dd_nc ? &(dd_nc[0]) : &idum,list);
 +    do_cpt_int_err(xd,"dd_nc[y]",dd_nc ? &(dd_nc[1]) : &idum,list);
 +    do_cpt_int_err(xd,"dd_nc[z]",dd_nc ? &(dd_nc[2]) : &idum,list);
 +    do_cpt_int_err(xd,"#PME-only nodes",npme,list);
 +    do_cpt_int_err(xd,"state flags",flags_state,list);
 +      if (*file_version >= 4)
 +    {
 +        do_cpt_int_err(xd,"ekin data flags",flags_eks,list);
 +        do_cpt_int_err(xd,"energy history flags",flags_enh,list);
 +    }
 +    else
 +    {
 +        *flags_eks  = 0;
 +        *flags_enh   = (*flags_state >> (estORIRE_DTAV+1));
 +        *flags_state = (*flags_state & ~((1<<(estORIRE_DTAV+1)) |
 +                                         (1<<(estORIRE_DTAV+2)) |
 +                                         (1<<(estORIRE_DTAV+3))));
 +    }
++      if (*file_version >= 14)
 +    {
 +        do_cpt_int_err(xd,"df history flags",flags_dfh,list);
 +    } else {
 +        *flags_dfh = 0;
 +    }
 +}
 +
 +static int do_cpt_footer(XDR *xd,gmx_bool bRead,int file_version)
 +{
 +    bool_t res=0;
 +    int  magic;
 +    
 +    if (file_version >= 2)
 +    {
 +        magic = CPT_MAGIC2;
 +        res = xdr_int(xd,&magic);
 +        if (res == 0)
 +        {
 +            cp_error();
 +        }
 +        if (magic != CPT_MAGIC2)
 +        {
 +            return -1;
 +        }
 +    }
 +
 +    return 0;
 +}
 +
 +static int do_cpt_state(XDR *xd,gmx_bool bRead,
 +                        int fflags,t_state *state,
 +                        gmx_bool bReadRNG,FILE *list)
 +{
 +    int  sflags;
 +    int  **rng_p,**rngi_p;
 +    int  i;
 +    int  ret;
 +    int  nnht,nnhtp;
 +
 +    ret = 0;
 +    
 +    nnht = state->nhchainlength*state->ngtc;
 +    nnhtp = state->nhchainlength*state->nnhpres;
 +
 +    if (bReadRNG)
 +    {
 +        rng_p  = (int **)&state->ld_rng;
 +        rngi_p = &state->ld_rngi;
 +    }
 +    else
 +    {
 +        /* Do not read the RNG data */
 +        rng_p  = NULL;
 +        rngi_p = NULL;
 +    }
 +    /* We want the MC_RNG the same across all the notes for now -- lambda MC is global */
 +
 +    sflags = state->flags;
 +    for(i=0; (i<estNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +            case estLAMBDA:  ret = do_cpte_reals(xd,cptpEST,i,sflags,efptNR,&(state->lambda),list); break;
 +            case estFEPSTATE: ret = do_cpte_int (xd,cptpEST,i,sflags,&state->fep_state,list); break;
 +            case estBOX:     ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box,list); break;
 +            case estBOX_REL: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->box_rel,list); break;
 +            case estBOXV:    ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->boxv,list); break;
 +            case estPRES_PREV: ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->pres_prev,list); break;
 +            case estSVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->svir_prev,list); break;
 +            case estFVIR_PREV:  ret = do_cpte_matrix(xd,cptpEST,i,sflags,state->fvir_prev,list); break;
 +            case estNH_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_xi,list); break;
 +            case estNH_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnht,&state->nosehoover_vxi,list); break;
 +            case estNHPRES_XI:   ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_xi,list); break;
 +            case estNHPRES_VXI:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,nnhtp,&state->nhpres_vxi,list); break;
 +            case estTC_INT:  ret = do_cpte_doubles(xd,cptpEST,i,sflags,state->ngtc,&state->therm_integral,list); break;
 +            case estVETA:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->veta,list); break;
 +            case estVOL0:    ret = do_cpte_real(xd,cptpEST,i,sflags,&state->vol0,list); break;
 +            case estX:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->x,list); break;
 +            case estV:       ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->v,list); break;
 +            case estSDX:     ret = do_cpte_rvecs(xd,cptpEST,i,sflags,state->natoms,&state->sd_X,list); break;
 +            case estLD_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrng,rng_p,list); break;
 +            case estLD_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nrngi,rngi_p,list); break;
 +            case estMC_RNG:  ret = do_cpte_ints(xd,cptpEST,i,sflags,state->nmcrng,(int **)&state->mc_rng,list); break;
 +            case estMC_RNGI: ret = do_cpte_ints(xd,cptpEST,i,sflags,1,&state->mc_rngi,list); break;
 +            case estDISRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.disre_initf,list); break;
 +            case estDISRE_RM3TAV: ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.ndisrepairs,&state->hist.disre_rm3tav,list); break;
 +            case estORIRE_INITF:  ret = do_cpte_real (xd,cptpEST,i,sflags,&state->hist.orire_initf,list); break;
 +            case estORIRE_DTAV:   ret = do_cpte_reals(xd,cptpEST,i,sflags,state->hist.norire_Dtav,&state->hist.orire_Dtav,list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown state entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +    
 +    return ret;
 +}
 +
 +static int do_cpt_ekinstate(XDR *xd,gmx_bool bRead,
 +                            int fflags,ekinstate_t *ekins,
 +                            FILE *list)
 +{
 +    int  i;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    for(i=0; (i<eeksNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                
 +                      case eeksEKIN_N:     ret = do_cpte_int(xd,cptpEEKS,i,fflags,&ekins->ekin_n,list); break;
 +                      case eeksEKINH :     ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh,list); break;
 +                      case eeksEKINF:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinf,list); break;
 +                      case eeksEKINO:      ret = do_cpte_matrices(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinh_old,list); break;
 +            case eeksEKINTOTAL:  ret = do_cpte_matrix(xd,cptpEEKS,i,fflags,ekins->ekin_total,list); break;
 +            case eeksEKINSCALEF: ret = do_cpte_doubles(xd,cptpEEKS,i,fflags,ekins->ekin_n,&ekins->ekinscalef_nhc,list); break;
 +            case eeksVSCALE:     ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->vscale_nhc,list); break;
 +            case eeksEKINSCALEH: ret = do_cpte_doubles(xd,1,cptpEEKS,fflags,ekins->ekin_n,&ekins->ekinscaleh_nhc,list); break;
 +                      case eeksDEKINDL :   ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->dekindl,list); break;
 +            case eeksMVCOS:      ret = do_cpte_real(xd,1,cptpEEKS,fflags,&ekins->mvcos,list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown ekin data state entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +    
 +    return ret;
 +}
 +
 +
 +static int do_cpt_enerhist(XDR *xd,gmx_bool bRead,
 +                           int fflags,energyhistory_t *enerhist,
 +                           FILE *list)
 +{
 +    int  i;
 +    int  j;
 +    int  ret;
 +
 +    ret = 0;
 +
 +    if (bRead)
 +    {
 +        enerhist->nsteps     = 0;
 +        enerhist->nsum       = 0;
 +        enerhist->nsteps_sim = 0;
 +        enerhist->nsum_sim   = 0;
 +        enerhist->dht        = NULL;
 +
 +        if (fflags & (1<< eenhENERGY_DELTA_H_NN) )
 +        {
 +            snew(enerhist->dht,1);
 +            enerhist->dht->ndh = NULL;
 +            enerhist->dht->dh = NULL;
 +            enerhist->dht->start_lambda_set=FALSE;
 +        }
 +    }
 +
 +    for(i=0; (i<eenhNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +                      case eenhENERGY_N:     ret = do_cpte_int(xd,cptpEENH,i,fflags,&enerhist->nener,list); break;
 +                      case eenhENERGY_AVER:  ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_ave,list); break;
 +                      case eenhENERGY_SUM:   ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum,list); break;
 +            case eenhENERGY_NSUM:  do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum,list); break;
 +            case eenhENERGY_SUM_SIM: ret = do_cpte_doubles(xd,cptpEENH,i,fflags,enerhist->nener,&enerhist->ener_sum_sim,list); break;
 +            case eenhENERGY_NSUM_SIM:   do_cpt_step_err(xd,eenh_names[i],&enerhist->nsum_sim,list); break;
 +            case eenhENERGY_NSTEPS:     do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps,list); break;
 +            case eenhENERGY_NSTEPS_SIM: do_cpt_step_err(xd,eenh_names[i],&enerhist->nsteps_sim,list); break;
 +            case eenhENERGY_DELTA_H_NN: do_cpt_int_err(xd,eenh_names[i], &(enerhist->dht->nndh), list);
 +                if (bRead) /* now allocate memory for it */
 +                {
 +                    snew(enerhist->dht->dh, enerhist->dht->nndh);
 +                    snew(enerhist->dht->ndh, enerhist->dht->nndh);
 +                    for(j=0;j<enerhist->dht->nndh;j++)
 +                    {
 +                        enerhist->dht->ndh[j] = 0;
 +                        enerhist->dht->dh[j] = NULL;
 +                    }
 +                }
 +                break;
 +            case eenhENERGY_DELTA_H_LIST:
 +                for(j=0;j<enerhist->dht->nndh;j++)
 +                {
 +                    ret=do_cpte_n_reals(xd, cptpEENH, i, fflags, &enerhist->dht->ndh[j], &(enerhist->dht->dh[j]), list);
 +                }
 +                break;
 +            case eenhENERGY_DELTA_H_STARTTIME:
 +                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_time), list); break;
 +            case eenhENERGY_DELTA_H_STARTLAMBDA:
 +                ret=do_cpte_double(xd, cptpEENH, i, fflags, &(enerhist->dht->start_lambda), list); break;
 +            default:
 +                gmx_fatal(FARGS,"Unknown energy history entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +
 +    if ((fflags & (1<<eenhENERGY_SUM)) && !(fflags & (1<<eenhENERGY_SUM_SIM)))
 +    {
 +        /* Assume we have an old file format and copy sum to sum_sim */
 +        srenew(enerhist->ener_sum_sim,enerhist->nener);
 +        for(i=0; i<enerhist->nener; i++)
 +        {
 +            enerhist->ener_sum_sim[i] = enerhist->ener_sum[i];
 +        }
 +        fflags |= (1<<eenhENERGY_SUM_SIM);
 +    }
 +    
 +    if ( (fflags & (1<<eenhENERGY_NSUM)) &&
 +        !(fflags & (1<<eenhENERGY_NSTEPS)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps = enerhist->nsum;
 +        fflags |= (1<<eenhENERGY_NSTEPS);
 +    }
 +    if ( (fflags & (1<<eenhENERGY_NSUM_SIM)) &&
 +        !(fflags & (1<<eenhENERGY_NSTEPS_SIM)))
 +    {
 +        /* Assume we have an old file format and copy nsum to nsteps */
 +        enerhist->nsteps_sim = enerhist->nsum_sim;
 +        fflags |= (1<<eenhENERGY_NSTEPS_SIM);
 +    }
 +
 +    return ret;
 +}
 +
 +static int do_cpt_df_hist(XDR *xd,gmx_bool bRead,int fflags,df_history_t *dfhist,FILE *list)
 +{
 +    int  i,nlambda;
 +    int  ret;
 +
 +    nlambda = dfhist->nlambda;
 +    ret = 0;
 +
 +    for(i=0; (i<edfhNR && ret == 0); i++)
 +    {
 +        if (fflags & (1<<i))
 +        {
 +            switch (i)
 +            {
 +            case edfhBEQUIL:       ret = do_cpte_int(xd,cptpEDFH,i,fflags,&dfhist->bEquil,list); break;
 +            case edfhNATLAMBDA:    ret = do_cpte_ints(xd,cptpEDFH,i,fflags,nlambda,&dfhist->n_at_lam,list); break;
 +            case edfhWLHISTO:      ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->wl_histo,list); break;
 +            case edfhWLDELTA:      ret = do_cpte_real(xd,cptpEDFH,i,fflags,&dfhist->wl_delta,list); break;
 +            case edfhSUMWEIGHTS:   ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_weights,list); break;
 +            case edfhSUMDG:        ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_dg,list); break;
 +            case edfhSUMMINVAR:    ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_minvar,list); break;
 +            case edfhSUMVAR:       ret = do_cpte_reals(xd,cptpEDFH,i,fflags,nlambda,&dfhist->sum_variance,list); break;
 +            case edfhACCUMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p,list); break;
 +            case edfhACCUMM:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m,list); break;
 +            case edfhACCUMP2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_p2,list); break;
 +            case edfhACCUMM2:      ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->accum_m2,list); break;
 +            case edfhTIJ:          ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij,list); break;
 +            case edfhTIJEMP:       ret = do_cpte_nmatrix(xd,cptpEDFH,i,fflags,nlambda,dfhist->Tij_empirical,list); break;
 +
 +            default:
 +                gmx_fatal(FARGS,"Unknown df history entry %d\n"
 +                          "You are probably reading a new checkpoint file with old code",i);
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +static int do_cpt_files(XDR *xd, gmx_bool bRead, 
 +                        gmx_file_position_t **p_outputfiles, int *nfiles, 
 +                        FILE *list, int file_version)
 +{
 +    int    i,j;
 +    gmx_off_t  offset;
 +    gmx_off_t  mask = 0xFFFFFFFFL;
 +    int    offset_high,offset_low;
 +    char   *buf;
 +    gmx_file_position_t *outputfiles;
 +
 +    if (do_cpt_int(xd,"number of output files",nfiles,list) != 0)
 +    {
 +        return -1;
 +    }
 +
 +    if(bRead)
 +    {
 +        snew(*p_outputfiles,*nfiles);
 +    }
 +
 +    outputfiles = *p_outputfiles;
 +
 +    for(i=0;i<*nfiles;i++)
 +    {
 +        /* 64-bit XDR numbers are not portable, so it is stored as separate high/low fractions */
 +        if(bRead)
 +        {
 +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
 +            strncpy(outputfiles[i].filename,buf,CPTSTRLEN-1);
 +            if(list==NULL)
 +            {
 +                sfree(buf);                   
 +            }
 +
 +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
 +            {
 +                return -1;
 +            }
 +#if (SIZEOF_GMX_OFF_T > 4)
 +            outputfiles[i].offset = ( ((gmx_off_t) offset_high) << 32 ) | ( (gmx_off_t) offset_low & mask );
 +#else
 +            outputfiles[i].offset = offset_low;
 +#endif
 +        }
 +        else
 +        {
 +            buf = outputfiles[i].filename;
 +            do_cpt_string_err(xd,bRead,"output filename",&buf,list);
 +            /* writing */
 +            offset      = outputfiles[i].offset;
 +            if (offset == -1)
 +            {
 +                offset_low  = -1;
 +                offset_high = -1;
 +            }
 +            else
 +            {
 +#if (SIZEOF_GMX_OFF_T > 4)
 +                offset_low  = (int) (offset & mask);
 +                offset_high = (int) ((offset >> 32) & mask);
 +#else
 +                offset_low  = offset;
 +                offset_high = 0;
 +#endif
 +            }
 +            if (do_cpt_int(xd,"file_offset_high",&offset_high,list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_int(xd,"file_offset_low",&offset_low,list) != 0)
 +            {
 +                return -1;
 +            }
 +        }
 +        if (file_version >= 8)
 +        {
 +            if (do_cpt_int(xd,"file_checksum_size",&(outputfiles[i].chksum_size),
 +                           list) != 0)
 +            {
 +                return -1;
 +            }
 +            if (do_cpt_u_chars(xd,"file_checksum",16,outputfiles[i].chksum,list) != 0)
 +            {
 +                return -1;
 +            }
 +        } 
 +        else 
 +        {
 +            outputfiles[i].chksum_size = -1;
 +        }
 +    }
 +    return 0;
 +}
 +
 +
 +void write_checkpoint(const char *fn,gmx_bool bNumberAndKeep,
 +                      FILE *fplog,t_commrec *cr,
 +                      int eIntegrator,int simulation_part,
 +                      gmx_bool bExpanded, int elamstats,
 +                      gmx_large_int_t step,double t,t_state *state)
 +{
 +    t_fileio *fp;
 +    int  file_version;
 +    char *version;
 +    char *btime;
 +    char *buser;
 +    char *bhost;
 +    int  double_prec;
 +    char *fprog;
 +    char *fntemp; /* the temporary checkpoint file name */
 +    time_t now;
 +    char timebuf[STRLEN];
 +    int  nppnodes,npmenodes,flag_64bit;
 +    char buf[1024],suffix[5+STEPSTRSIZE],sbuf[STEPSTRSIZE];
 +    gmx_file_position_t *outputfiles;
 +    int  noutputfiles;
 +    char *ftime;
 +    int  flags_eks,flags_enh,flags_dfh,i;
 +    t_fileio *ret;
 +              
 +    if (PAR(cr))
 +    {
 +        if (DOMAINDECOMP(cr))
 +        {
 +            nppnodes  = cr->dd->nnodes;
 +            npmenodes = cr->npmenodes;
 +        }
 +        else
 +        {
 +            nppnodes  = cr->nnodes;
 +            npmenodes = 0;
 +        }
 +    }
 +    else
 +    {
 +        nppnodes  = 1;
 +        npmenodes = 0;
 +    }
 +
 +    /* make the new temporary filename */
 +    snew(fntemp, strlen(fn)+5+STEPSTRSIZE);
 +    strcpy(fntemp,fn);
 +    fntemp[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +    sprintf(suffix,"_%s%s","step",gmx_step_str(step,sbuf));
 +    strcat(fntemp,suffix);
 +    strcat(fntemp,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
 +   
 +    time(&now);
 +    gmx_ctime_r(&now,timebuf,STRLEN);
 +
 +    if (fplog)
 +    { 
 +        fprintf(fplog,"Writing checkpoint, step %s at %s\n\n",
 +                gmx_step_str(step,buf),timebuf);
 +    }
 +    
 +    /* Get offsets for open files */
 +    gmx_fio_get_output_file_positions(&outputfiles, &noutputfiles);
 +
 +    fp = gmx_fio_open(fntemp,"w");
 +      
 +    if (state->ekinstate.bUpToDate)
 +    {
 +        flags_eks =
 +            ((1<<eeksEKIN_N) | (1<<eeksEKINH) | (1<<eeksEKINF) | 
 +             (1<<eeksEKINO) | (1<<eeksEKINSCALEF) | (1<<eeksEKINSCALEH) | 
 +             (1<<eeksVSCALE) | (1<<eeksDEKINDL) | (1<<eeksMVCOS));
 +    }
 +    else
 +    {
 +        flags_eks = 0;
 +    }
 +
 +    flags_enh = 0;
 +    if (state->enerhist.nsum > 0 || state->enerhist.nsum_sim > 0)
 +    {
 +        flags_enh |= (1<<eenhENERGY_N);
 +        if (state->enerhist.nsum > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_AVER) | (1<<eenhENERGY_SUM) |
 +                          (1<<eenhENERGY_NSTEPS) | (1<<eenhENERGY_NSUM));
 +        }
 +        if (state->enerhist.nsum_sim > 0)
 +        {
 +            flags_enh |= ((1<<eenhENERGY_SUM_SIM) | (1<<eenhENERGY_NSTEPS_SIM) |
 +                          (1<<eenhENERGY_NSUM_SIM));
 +        }
 +        if (state->enerhist.dht)
 +        {
 +            flags_enh |= ( (1<< eenhENERGY_DELTA_H_NN) |
 +                           (1<< eenhENERGY_DELTA_H_LIST) | 
 +                           (1<< eenhENERGY_DELTA_H_STARTTIME) |
 +                           (1<< eenhENERGY_DELTA_H_STARTLAMBDA) );
 +        }
 +    }
 +
 +    if (bExpanded)
 +    {
 +        flags_dfh = ((1<<edfhBEQUIL) | (1<<edfhNATLAMBDA) | (1<<edfhSUMWEIGHTS) |  (1<<edfhSUMDG)  |
 +                     (1<<edfhTIJ) | (1<<edfhTIJEMP));
 +        if (EWL(elamstats))
 +        {
 +            flags_dfh |= ((1<<edfhWLDELTA) | (1<<edfhWLHISTO));
 +        }
 +        if ((elamstats == elamstatsMINVAR) || (elamstats == elamstatsBARKER) || (elamstats == elamstatsMETROPOLIS))
 +        {
 +            flags_dfh |= ((1<<edfhACCUMP) | (1<<edfhACCUMM) | (1<<edfhACCUMP2) | (1<<edfhACCUMM2)
 +                          | (1<<edfhSUMMINVAR) | (1<<edfhSUMVAR));
 +        }
 +    } else {
 +        flags_dfh = 0;
 +    }
 +    
 +    /* We can check many more things now (CPU, acceleration, etc), but
 +     * it is highly unlikely to have two separate builds with exactly
 +     * the same version, user, time, and build host!
 +     */
 +
 +    version = gmx_strdup(VERSION);
 +    btime   = gmx_strdup(BUILD_TIME);
 +    buser   = gmx_strdup(BUILD_USER);
 +    bhost   = gmx_strdup(BUILD_HOST);
 +
 +    double_prec = GMX_CPT_BUILD_DP;
 +    fprog   = gmx_strdup(Program());
 +
 +    ftime   = &(timebuf[0]);
 +    
 +    do_cpt_header(gmx_fio_getxdr(fp),FALSE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,
 +                  DOMAINDECOMP(cr) ? cr->dd->nc : NULL,&npmenodes,
 +                  &state->natoms,&state->ngtc,&state->nnhpres,
 +                  &state->nhchainlength,&(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,
 +                  NULL);
 +    
 +    sfree(version);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +    sfree(fprog);
 +
 +    if((do_cpt_state(gmx_fio_getxdr(fp),FALSE,state->flags,state,TRUE,NULL) < 0)        ||
 +       (do_cpt_ekinstate(gmx_fio_getxdr(fp),FALSE,flags_eks,&state->ekinstate,NULL) < 0)||
 +       (do_cpt_enerhist(gmx_fio_getxdr(fp),FALSE,flags_enh,&state->enerhist,NULL) < 0)  ||
 +       (do_cpt_df_hist(gmx_fio_getxdr(fp),FALSE,flags_dfh,&state->dfhist,NULL) < 0)  ||
 +       (do_cpt_files(gmx_fio_getxdr(fp),FALSE,&outputfiles,&noutputfiles,NULL,
 +                     file_version) < 0))
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    do_cpt_footer(gmx_fio_getxdr(fp),FALSE,file_version);
 +
 +    /* we really, REALLY, want to make sure to physically write the checkpoint, 
 +       and all the files it depends on, out to disk. Because we've
 +       opened the checkpoint with gmx_fio_open(), it's in our list
 +       of open files.  */
 +    ret=gmx_fio_all_output_fsync();
 +
 +    if (ret)
 +    {
 +        char buf[STRLEN];
 +        sprintf(buf,
 +                "Cannot fsync '%s'; maybe you are out of disk space?",
 +                gmx_fio_getname(ret));
 +
 +        if (getenv(GMX_IGNORE_FSYNC_FAILURE_ENV)==NULL)
 +        {
 +            gmx_file(buf);
 +        }
 +        else
 +        {
 +            gmx_warning(buf);
 +        }
 +    }
 +
 +    if( gmx_fio_close(fp) != 0)
 +    {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +    }
 +
 +    /* we don't move the checkpoint if the user specified they didn't want it,
 +       or if the fsyncs failed */
 +    if (!bNumberAndKeep && !ret)
 +    {
 +        if (gmx_fexist(fn))
 +        {
 +            /* Rename the previous checkpoint file */
 +            strcpy(buf,fn);
 +            buf[strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1] = '\0';
 +            strcat(buf,"_prev");
 +            strcat(buf,fn+strlen(fn) - strlen(ftp2ext(fn2ftp(fn))) - 1);
 +#ifndef GMX_FAHCORE
 +            /* we copy here so that if something goes wrong between now and
 +             * the rename below, there's always a state.cpt.
 +             * If renames are atomic (such as in POSIX systems),
 +             * this copying should be unneccesary.
 +             */
 +            gmx_file_copy(fn, buf, FALSE);
 +            /* We don't really care if this fails: 
 +             * there's already a new checkpoint.
 +             */
 +#else
 +            gmx_file_rename(fn, buf);
 +#endif
 +        }
 +        if (gmx_file_rename(fntemp, fn) != 0)
 +        {
 +            gmx_file("Cannot rename checkpoint file; maybe you are out of disk space?");
 +        }
 +    }
 +
 +    sfree(outputfiles);
 +    sfree(fntemp);
 +
 +#ifdef GMX_FAHCORE
 +    /*code for alternate checkpointing scheme.  moved from top of loop over 
 +      steps */
 +    fcRequestCheckPoint();
 +    if ( fcCheckPointParallel( cr->nodeid, NULL,0) == 0 ) {
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", step );
 +    }
 +#endif /* end GMX_FAHCORE block */
 +}
 +
 +static void print_flag_mismatch(FILE *fplog,int sflags,int fflags)
 +{
 +    int i;
 +    
 +    fprintf(fplog,"\nState entry mismatch between the simulation and the checkpoint file\n");
 +    fprintf(fplog,"Entries which are not present in the checkpoint file will not be updated\n");
 +    fprintf(fplog,"  %24s    %11s    %11s\n","","simulation","checkpoint");
 +    for(i=0; i<estNR; i++)
 +    {
 +        if ((sflags & (1<<i)) || (fflags & (1<<i)))
 +        {
 +            fprintf(fplog,"  %24s    %11s    %11s\n",
 +                    est_names[i],
 +                    (sflags & (1<<i)) ? "  present  " : "not present",
 +                    (fflags & (1<<i)) ? "  present  " : "not present");
 +        }
 +    }
 +}
 +
 +static void check_int(FILE *fplog,const char *type,int p,int f,gmx_bool *mm)
 +{
 +      FILE *fp = fplog ? fplog : stderr;
 +
 +    if (p != f)
 +    {
 +              fprintf(fp,"  %s mismatch,\n",type);
 +              fprintf(fp,"    current program: %d\n",p);
 +              fprintf(fp,"    checkpoint file: %d\n",f);
 +              fprintf(fp,"\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_string(FILE *fplog,const char *type,const char *p,
 +                         const char *f,gmx_bool *mm)
 +{
 +      FILE *fp = fplog ? fplog : stderr;
 +      
 +    if (strcmp(p,f) != 0)
 +    {
 +              fprintf(fp,"  %s mismatch,\n",type);
 +              fprintf(fp,"    current program: %s\n",p);
 +              fprintf(fp,"    checkpoint file: %s\n",f);
 +              fprintf(fp,"\n");
 +        *mm = TRUE;
 +    }
 +}
 +
 +static void check_match(FILE *fplog,
 +                        char *version,
 +                        char *btime,char *buser,char *bhost,int double_prec,
 +                        char *fprog,
 +                        t_commrec *cr,gmx_bool bPartDecomp,int npp_f,int npme_f,
 +                        ivec dd_nc,ivec dd_nc_f)
 +{
 +    int  npp;
 +    gmx_bool mm;
 +    
 +    mm = FALSE;
 +    
 +    check_string(fplog,"Version"      ,VERSION      ,version,&mm);
 +    check_string(fplog,"Build time"   ,BUILD_TIME   ,btime  ,&mm);
 +    check_string(fplog,"Build user"   ,BUILD_USER   ,buser  ,&mm);
 +    check_string(fplog,"Build host"   ,BUILD_HOST   ,bhost  ,&mm);
 +    check_int   (fplog,"Double prec." ,GMX_CPT_BUILD_DP,double_prec,&mm);
 +    check_string(fplog,"Program name" ,Program()    ,fprog  ,&mm);
 +    
 +    check_int   (fplog,"#nodes"       ,cr->nnodes   ,npp_f+npme_f ,&mm);
 +    if (bPartDecomp)
 +    {
 +        dd_nc[XX] = 1;
 +        dd_nc[YY] = 1;
 +        dd_nc[ZZ] = 1;
 +    }
 +    if (cr->nnodes > 1)
 +    {
 +        check_int (fplog,"#PME-nodes"  ,cr->npmenodes,npme_f     ,&mm);
 +
 +        npp = cr->nnodes;
 +        if (cr->npmenodes >= 0)
 +        {
 +            npp -= cr->npmenodes;
 +        }
 +        if (npp == npp_f)
 +        {
 +            check_int (fplog,"#DD-cells[x]",dd_nc[XX]    ,dd_nc_f[XX],&mm);
 +            check_int (fplog,"#DD-cells[y]",dd_nc[YY]    ,dd_nc_f[YY],&mm);
 +            check_int (fplog,"#DD-cells[z]",dd_nc[ZZ]    ,dd_nc_f[ZZ],&mm);
 +        }
 +    }
 +    
 +    if (mm)
 +    {
 +              fprintf(stderr,
 +                              "Gromacs binary or parallel settings not identical to previous run.\n"
 +                              "Continuation is exact, but is not guaranteed to be binary identical%s.\n\n",
 +                              fplog ? ",\n see the log file for details" : "");
 +              
 +        if (fplog)
 +        {
 +                      fprintf(fplog,
 +                                      "Gromacs binary or parallel settings not identical to previous run.\n"
 +                                      "Continuation is exact, but is not guaranteed to be binary identical.\n\n");
 +              }
 +    }
 +}
 +
 +static void read_checkpoint(const char *fn,FILE **pfplog,
 +                            t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
 +                            int eIntegrator, int *init_fep_state, gmx_large_int_t *step,double *t,
 +                            t_state *state,gmx_bool *bReadRNG,gmx_bool *bReadEkin,
 +                            int *simulation_part,
 +                            gmx_bool bAppendOutputFiles,gmx_bool bForceAppend)
 +{
 +    t_fileio *fp;
 +    int  i,j,rc;
 +    int  file_version;
 +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
 +    int  double_prec;
 +      char filename[STRLEN],buf[STEPSTRSIZE];
 +    int  nppnodes,eIntegrator_f,nppnodes_f,npmenodes_f;
 +    ivec dd_nc_f;
 +    int  natoms,ngtc,nnhpres,nhchainlength,nlambda,fflags,flags_eks,flags_enh,flags_dfh;
 +    int  d;
 +    int  ret;
 +    gmx_file_position_t *outputfiles;
 +    int  nfiles;
 +    t_fileio *chksum_file;
 +    FILE* fplog = *pfplog;
 +    unsigned char digest[16];
 +#ifndef GMX_NATIVE_WINDOWS
 +    struct flock fl;  /* don't initialize here: the struct order is OS 
 +                         dependent! */
 +#endif
 +
 +    const char *int_warn=
 +              "WARNING: The checkpoint file was generated with integrator %s,\n"
 +              "         while the simulation uses integrator %s\n\n";
 +    const char *sd_note=
 +        "NOTE: The checkpoint file was for %d nodes doing SD or BD,\n"
 +        "      while the simulation uses %d SD or BD nodes,\n"
 +        "      continuation will be exact, except for the random state\n\n";
 +    
 +#ifndef GMX_NATIVE_WINDOWS
 +    fl.l_type=F_WRLCK;
 +    fl.l_whence=SEEK_SET;
 +    fl.l_start=0;
 +    fl.l_len=0;
 +    fl.l_pid=0;
 +#endif
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        gmx_fatal(FARGS,
 +                  "read_checkpoint not (yet) supported with particle decomposition");
 +    }
 +    
 +    fp = gmx_fio_open(fn,"r");
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator_f,simulation_part,step,t,
 +                  &nppnodes_f,dd_nc_f,&npmenodes_f,
 +                  &natoms,&ngtc,&nnhpres,&nhchainlength,&nlambda,
 +                  &fflags,&flags_eks,&flags_enh,&flags_dfh,NULL);
 +
 +    if (bAppendOutputFiles &&
 +        file_version >= 13 && double_prec != GMX_CPT_BUILD_DP)
 +    {
 +        gmx_fatal(FARGS,"Output file appending requested, but the code and checkpoint file precision (single/double) don't match");
 +    }
 +    
 +    if (cr == NULL || MASTER(cr))
 +    {
 +        fprintf(stderr,"\nReading checkpoint file %s generated: %s\n\n",
 +                fn,ftime);
 +    }
 +      
 +      /* This will not be written if we do appending, since fplog is still NULL then */
 +    if (fplog)
 +    {
 +        fprintf(fplog,"\n");
 +        fprintf(fplog,"Reading checkpoint file %s\n",fn);
 +        fprintf(fplog,"  file generated by:     %s\n",fprog);  
 +        fprintf(fplog,"  file generated at:     %s\n",ftime);  
 +        fprintf(fplog,"  GROMACS build time:    %s\n",btime);  
 +        fprintf(fplog,"  GROMACS build user:    %s\n",buser);  
 +        fprintf(fplog,"  GROMACS build host:    %s\n",bhost);
 +        fprintf(fplog,"  GROMACS double prec.:  %d\n",double_prec);
 +        fprintf(fplog,"  simulation part #:     %d\n",*simulation_part);
 +        fprintf(fplog,"  step:                  %s\n",gmx_step_str(*step,buf));
 +        fprintf(fplog,"  time:                  %f\n",*t);  
 +        fprintf(fplog,"\n");
 +    }
 +    
 +    if (natoms != state->natoms)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d atoms, while the current system consists of %d atoms",natoms,state->natoms);
 +    }
 +    if (ngtc != state->ngtc)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d T-coupling groups, while the current system consists of %d T-coupling groups",ngtc,state->ngtc);
 +    }
 +    if (nnhpres != state->nnhpres)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system of %d NH-pressure-coupling variables, while the current system consists of %d NH-pressure-coupling variables",nnhpres,state->nnhpres);
 +    }
 +
 +    if (nlambda != state->dfhist.nlambda)
 +    {
 +        gmx_fatal(FARGS,"Checkpoint file is for a system with %d lambda states, while the current system consists of %d lambda states",nlambda,state->dfhist.nlambda);
 +    }
 +
 +    init_gtc_state(state,state->ngtc,state->nnhpres,nhchainlength); /* need to keep this here to keep the tpr format working */
 +    /* write over whatever was read; we use the number of Nose-Hoover chains from the checkpoint */
 +    
 +    if (eIntegrator_f != eIntegrator)
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,int_warn,EI(eIntegrator_f),EI(eIntegrator));
 +        }
 +              if(bAppendOutputFiles)
 +              {
 +                      gmx_fatal(FARGS,
 +                                        "Output file appending requested, but input/checkpoint integrators do not match.\n"
 +                                        "Stopping the run to prevent you from ruining all your data...\n"
 +                                        "If you _really_ know what you are doing, try with the -noappend option.\n");
 +              }
 +        if (fplog)
 +        {
 +            fprintf(fplog,int_warn,EI(eIntegrator_f),EI(eIntegrator));
 +        }
 +    }
 +
 +    if (!PAR(cr))
 +    {
 +        nppnodes = 1;
 +        cr->npmenodes = 0;
 +    }
 +    else if (bPartDecomp)
 +    {
 +        nppnodes = cr->nnodes;
 +        cr->npmenodes = 0;
 +    }
 +    else if (cr->nnodes == nppnodes_f + npmenodes_f)
 +    {
 +        if (cr->npmenodes < 0)
 +        {
 +            cr->npmenodes = npmenodes_f;
 +        }
 +        nppnodes = cr->nnodes - cr->npmenodes;
 +        if (nppnodes == nppnodes_f)
 +        {
 +            for(d=0; d<DIM; d++)
 +            {
 +                if (dd_nc[d] == 0)
 +                {
 +                    dd_nc[d] = dd_nc_f[d];
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        /* The number of PP nodes has not been set yet */
 +        nppnodes = -1;
 +    }
 +
 +    if ((EI_SD(eIntegrator) || eIntegrator == eiBD) && nppnodes > 0)
 +    {
 +        /* Correct the RNG state size for the number of PP nodes.
 +         * Such assignments should all be moved to one central function.
 +         */
 +        state->nrng  = nppnodes*gmx_rng_n();
 +        state->nrngi = nppnodes;
 +    }
 +    
 +    *bReadRNG = TRUE;
 +    if (fflags != state->flags)
 +    {
 +              
 +        if (MASTER(cr))
 +        {
 +                      if(bAppendOutputFiles)
 +                      {
 +                              gmx_fatal(FARGS,
 +                                                "Output file appending requested, but input and checkpoint states are not identical.\n"
 +                                                "Stopping the run to prevent you from ruining all your data...\n"
 +                                                "You can try with the -noappend option, and get more info in the log file.\n");
 +                      }
 +                      
 +            if (getenv("GMX_ALLOW_CPT_MISMATCH") == NULL)
 +            {
 +                gmx_fatal(FARGS,"You seem to have switched ensemble, integrator, T and/or P-coupling algorithm between the cpt and tpr file. The recommended way of doing this is passing the cpt file to grompp (with option -t) instead of to mdrun. If you know what you are doing, you can override this error by setting the env.var. GMX_ALLOW_CPT_MISMATCH");
 +            }
 +            else
 +            {
 +                fprintf(stderr,
 +                        "WARNING: The checkpoint state entries do not match the simulation,\n"
 +                        "         see the log file for details\n\n");
 +            }
 +        }
 +              
 +              if(fplog)
 +              {
 +                      print_flag_mismatch(fplog,state->flags,fflags);
 +              }
 +    }
 +    else
 +    {
 +        if ((EI_SD(eIntegrator) || eIntegrator == eiBD) &&
 +            nppnodes != nppnodes_f)
 +        {
 +            *bReadRNG = FALSE;
 +            if (MASTER(cr))
 +            {
 +                fprintf(stderr,sd_note,nppnodes_f,nppnodes);
 +            }
 +            if (fplog)
 +            {
 +                fprintf(fplog ,sd_note,nppnodes_f,nppnodes);
 +            }
 +        }
 +        if (MASTER(cr))
 +        {
 +            check_match(fplog,version,btime,buser,bhost,double_prec,fprog,
 +                        cr,bPartDecomp,nppnodes_f,npmenodes_f,dd_nc,dd_nc_f);
 +        }
 +    }
 +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,fflags,state,*bReadRNG,NULL);
 +    *init_fep_state = state->fep_state;  /* there should be a better way to do this than setting it here.
 +                                            Investigate for 5.0. */
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state->ekinstate,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    *bReadEkin = ((flags_eks & (1<<eeksEKINH)) || (flags_eks & (1<<eeksEKINF)) || (flags_eks & (1<<eeksEKINO)) ||
 +                  ((flags_eks & (1<<eeksEKINSCALEF)) | (flags_eks & (1<<eeksEKINSCALEH)) | (flags_eks & (1<<eeksVSCALE))));
 +    
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state->enerhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    if (file_version < 6)
 +    {
 +        const char *warn="Reading checkpoint file in old format, assuming that the run that generated this file started at step 0, if this is not the case the averages stored in the energy file will be incorrect.";
 +
 +        fprintf(stderr,"\nWARNING: %s\n\n",warn);
 +        if (fplog)
 +        {
 +            fprintf(fplog,"\nWARNING: %s\n\n",warn);
 +        }
 +        state->enerhist.nsum     = *step;
 +        state->enerhist.nsum_sim = *step;
 +    }
 +
 +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
 +                         flags_dfh,&state->dfhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +      ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,NULL,file_version);
 +      if (ret)
 +      {
 +              cp_error();
 +      }
 +                                         
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +    
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +      
 +      /* If the user wants to append to output files,
 +     * we use the file pointer positions of the output files stored
 +     * in the checkpoint file and truncate the files such that any frames
 +     * written after the checkpoint time are removed.
 +     * All files are md5sum checked such that we can be sure that
 +     * we do not truncate other (maybe imprortant) files.
 +       */
 +    if (bAppendOutputFiles)
 +    {
 +        if (fn2ftp(outputfiles[0].filename)!=efLOG)
 +        {
 +            /* make sure first file is log file so that it is OK to use it for 
 +             * locking
 +             */
 +            gmx_fatal(FARGS,"The first output file should always be the log "
 +                      "file but instead is: %s. Cannot do appending because of this condition.", outputfiles[0].filename);
 +        }
 +        for(i=0;i<nfiles;i++)
 +        {
 +            if (outputfiles[i].offset < 0)
 +            {
 +                gmx_fatal(FARGS,"The original run wrote a file called '%s' which "
 +                    "is larger than 2 GB, but mdrun did not support large file"
 +                    " offsets. Can not append. Run mdrun with -noappend",
 +                    outputfiles[i].filename);
 +            }
 +#ifdef GMX_FAHCORE
 +            chksum_file=gmx_fio_open(outputfiles[i].filename,"a");
 +
 +#else
 +            chksum_file=gmx_fio_open(outputfiles[i].filename,"r+");
 +
 +            /* lock log file */                
 +            if (i==0)
 +            {
 +                /* Note that there are systems where the lock operation
 +                 * will succeed, but a second process can also lock the file.
 +                 * We should probably try to detect this.
 +                 */
 +#ifndef GMX_NATIVE_WINDOWS
 +                if (fcntl(fileno(gmx_fio_getfp(chksum_file)), F_SETLK, &fl)
 +                    ==-1)
 +#else
 +                if (_locking(fileno(gmx_fio_getfp(chksum_file)), _LK_NBLCK, LONG_MAX)==-1)
 +#endif
 +                {
 +                    if (errno == ENOSYS)
 +                    {
 +                        if (!bForceAppend)
 +                        {
 +                            gmx_fatal(FARGS,"File locking is not supported on this system. Use -noappend or specify -append explicitly to append anyhow.");
 +                        }
 +                        else
 +                        {
 +                            fprintf(stderr,"\nNOTE: File locking is not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
 +                            if (fplog)
 +                            {
 +                                fprintf(fplog,"\nNOTE: File locking not supported on this system, will not lock %s\n\n",outputfiles[i].filename);
 +                            }
 +                        }
 +                    }
 +                    else if (errno == EACCES || errno == EAGAIN)
 +                    {
 +                        gmx_fatal(FARGS,"Failed to lock: %s. Already running "
 +                                  "simulation?", outputfiles[i].filename);
 +                    }
 +                    else
 +                    {
 +                        gmx_fatal(FARGS,"Failed to lock: %s. %s.",
 +                                  outputfiles[i].filename, strerror(errno));
 +                    }
 +                }
 +            }
 +            
 +            /* compute md5 chksum */ 
 +            if (outputfiles[i].chksum_size != -1)
 +            {
 +                if (gmx_fio_get_file_md5(chksum_file,outputfiles[i].offset,
 +                                     digest) != outputfiles[i].chksum_size)  /*at the end of the call the file position is at the end of the file*/
 +                {
 +                    gmx_fatal(FARGS,"Can't read %d bytes of '%s' to compute checksum. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                              outputfiles[i].chksum_size, 
 +                              outputfiles[i].filename);
 +                }
 +            } 
 +            if (i==0)  /*log file needs to be seeked in case we need to truncate (other files are truncated below)*/
 +            {
 +                if (gmx_fio_seek(chksum_file,outputfiles[i].offset))
 +                {
 +                      gmx_fatal(FARGS,"Seek error! Failed to truncate log-file: %s.", strerror(errno));
 +                }
 +            }
 +#endif
 +            
 +            if (i==0) /*open log file here - so that lock is never lifted 
 +                        after chksum is calculated */
 +            {
 +                *pfplog = gmx_fio_getfp(chksum_file);
 +            }
 +            else
 +            {
 +                gmx_fio_close(chksum_file);
 +            }
 +#ifndef GMX_FAHCORE            
 +            /* compare md5 chksum */
 +            if (outputfiles[i].chksum_size != -1 &&
 +                memcmp(digest,outputfiles[i].chksum,16)!=0) 
 +            {
 +                if (debug)
 +                {
 +                    fprintf(debug,"chksum for %s: ",outputfiles[i].filename);
 +                    for (j=0; j<16; j++)
 +                    {
 +                        fprintf(debug,"%02x",digest[j]);
 +                    }
 +                    fprintf(debug,"\n");
 +                }
 +                gmx_fatal(FARGS,"Checksum wrong for '%s'. The file has been replaced or its contents have been modified. Cannot do appending because of this condition.",
 +                          outputfiles[i].filename);
 +            }
 +#endif        
 +
 +              
 +            if (i!=0) /*log file is already seeked to correct position */
 +            {
 +#ifdef GMX_NATIVE_WINDOWS
 +                rc = gmx_wintruncate(outputfiles[i].filename,outputfiles[i].offset);
 +#else            
 +                rc = truncate(outputfiles[i].filename,outputfiles[i].offset);
 +#endif
 +                if(rc!=0)
 +                {
 +                    gmx_fatal(FARGS,"Truncation of file %s failed. Cannot do appending because of this failure.",outputfiles[i].filename);
 +                }
 +            }
 +        }
 +    }
 +
 +    sfree(outputfiles);
 +}
 +
 +
 +void load_checkpoint(const char *fn,FILE **fplog,
 +                     t_commrec *cr,gmx_bool bPartDecomp,ivec dd_nc,
 +                     t_inputrec *ir,t_state *state,
 +                     gmx_bool *bReadRNG,gmx_bool *bReadEkin,
 +                     gmx_bool bAppend,gmx_bool bForceAppend)
 +{
 +    gmx_large_int_t step;
 +    double t;
 +
 +    if (SIMMASTER(cr)) {
 +      /* Read the state from the checkpoint file */
 +      read_checkpoint(fn,fplog,
 +                      cr,bPartDecomp,dd_nc,
 +                      ir->eI,&(ir->fepvals->init_fep_state),&step,&t,state,bReadRNG,bReadEkin,
 +                      &ir->simulation_part,bAppend,bForceAppend);
 +    }
 +    if (PAR(cr)) {
 +      gmx_bcast(sizeof(cr->npmenodes),&cr->npmenodes,cr);
 +      gmx_bcast(DIM*sizeof(dd_nc[0]),dd_nc,cr);
 +      gmx_bcast(sizeof(step),&step,cr);
 +      gmx_bcast(sizeof(*bReadRNG),bReadRNG,cr);
 +      gmx_bcast(sizeof(*bReadEkin),bReadEkin,cr);
 +    }
 +    ir->bContinuation    = TRUE;
 +    if (ir->nsteps >= 0)
 +    {
 +        ir->nsteps          += ir->init_step - step;
 +    }
 +    ir->init_step        = step;
 +      ir->simulation_part += 1;
 +}
 +
 +static void read_checkpoint_data(t_fileio *fp,int *simulation_part,
 +                                 gmx_large_int_t *step,double *t,t_state *state,
 +                                 gmx_bool bReadRNG,
 +                                 int *nfiles,gmx_file_position_t **outputfiles)
 +{
 +    int  file_version;
 +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
 +    int  double_prec;
 +    int  eIntegrator;
 +    int  nppnodes,npme;
 +    ivec dd_nc;
 +    int  flags_eks,flags_enh,flags_dfh;
 +    int  nfiles_loc;
 +    gmx_file_position_t *files_loc=NULL;
 +    int  ret;
 +      
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,simulation_part,step,t,&nppnodes,dd_nc,&npme,
 +                  &state->natoms,&state->ngtc,&state->nnhpres,&state->nhchainlength,
 +                  &(state->dfhist.nlambda),&state->flags,&flags_eks,&flags_enh,&flags_dfh,NULL);
 +    ret =
 +        do_cpt_state(gmx_fio_getxdr(fp),TRUE,state->flags,state,bReadRNG,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state->ekinstate,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state->enerhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_dfh,&state->dfhist,NULL);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    ret = do_cpt_files(gmx_fio_getxdr(fp),TRUE,
 +                       outputfiles != NULL ? outputfiles : &files_loc,
 +                       outputfiles != NULL ? nfiles : &nfiles_loc,
 +                       NULL,file_version);
 +    if (files_loc != NULL)
 +    {
 +        sfree(files_loc);
 +    }
 +      
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +      
 +    ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +
 +    sfree(fprog);
 +    sfree(ftime);
 +    sfree(btime);
 +    sfree(buser);
 +    sfree(bhost);
 +}
 +
 +void 
 +read_checkpoint_state(const char *fn,int *simulation_part,
 +                      gmx_large_int_t *step,double *t,t_state *state)
 +{
 +    t_fileio *fp;
 +    
 +    fp = gmx_fio_open(fn,"r");
 +    read_checkpoint_data(fp,simulation_part,step,t,state,FALSE,NULL,NULL);
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +}
 +
 +void read_checkpoint_trxframe(t_fileio *fp,t_trxframe *fr)
 +{
 +    t_state state;
 +    int simulation_part;
 +    gmx_large_int_t step;
 +    double t;
 +    
 +    init_state(&state,0,0,0,0,0);
 +    
 +    read_checkpoint_data(fp,&simulation_part,&step,&t,&state,FALSE,NULL,NULL);
 +    
 +    fr->natoms  = state.natoms;
 +    fr->bTitle  = FALSE;
 +    fr->bStep   = TRUE;
 +    fr->step    = gmx_large_int_to_int(step,
 +                                    "conversion of checkpoint to trajectory");
 +    fr->bTime   = TRUE;
 +    fr->time    = t;
 +    fr->bLambda = TRUE;
 +    fr->lambda  = state.lambda[efptFEP];
 +    fr->fep_state  = state.fep_state;
 +    fr->bAtoms  = FALSE;
 +    fr->bX      = (state.flags & (1<<estX));
 +    if (fr->bX)
 +    {
 +        fr->x     = state.x;
 +        state.x   = NULL;
 +    }
 +    fr->bV      = (state.flags & (1<<estV));
 +    if (fr->bV)
 +    {
 +        fr->v     = state.v;
 +        state.v   = NULL;
 +    }
 +    fr->bF      = FALSE;
 +    fr->bBox    = (state.flags & (1<<estBOX));
 +    if (fr->bBox)
 +    {
 +        copy_mat(state.box,fr->box);
 +    }
 +    done_state(&state);
 +}
 +
 +void list_checkpoint(const char *fn,FILE *out)
 +{
 +    t_fileio *fp;
 +    int  file_version;
 +    char *version,*btime,*buser,*bhost,*fprog,*ftime;
 +    int  double_prec;
 +    int  eIntegrator,simulation_part,nppnodes,npme;
 +    gmx_large_int_t step;
 +    double t;
 +    ivec dd_nc;
 +    t_state state;
 +    int  flags_eks,flags_enh,flags_dfh;
 +    int  indent;
 +    int  i,j;
 +    int  ret;
 +    gmx_file_position_t *outputfiles;
 +      int  nfiles;
 +      
 +    init_state(&state,-1,-1,-1,-1,0);
 +
 +    fp = gmx_fio_open(fn,"r");
 +    do_cpt_header(gmx_fio_getxdr(fp),TRUE,&file_version,
 +                  &version,&btime,&buser,&bhost,&double_prec,&fprog,&ftime,
 +                  &eIntegrator,&simulation_part,&step,&t,&nppnodes,dd_nc,&npme,
 +                  &state.natoms,&state.ngtc,&state.nnhpres,&state.nhchainlength,
 +                  &(state.dfhist.nlambda),&state.flags,
 +                  &flags_eks,&flags_enh,&flags_dfh,out);
 +    ret = do_cpt_state(gmx_fio_getxdr(fp),TRUE,state.flags,&state,TRUE,out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_ekinstate(gmx_fio_getxdr(fp),TRUE,
 +                           flags_eks,&state.ekinstate,out);
 +    if (ret)
 +    {
 +        cp_error();
 +    }
 +    ret = do_cpt_enerhist(gmx_fio_getxdr(fp),TRUE,
 +                          flags_enh,&state.enerhist,out);
 +
 +    if (ret == 0)
 +    {
 +        init_df_history(&state.dfhist,state.dfhist.nlambda,0); /* reinitialize state with correct sizes */
 +        ret = do_cpt_df_hist(gmx_fio_getxdr(fp),TRUE,
 +                             flags_dfh,&state.dfhist,out);
 +    }
 +    if (ret == 0)
 +    {
 +              do_cpt_files(gmx_fio_getxdr(fp),TRUE,&outputfiles,&nfiles,out,file_version);
 +      }
 +      
 +    if (ret == 0)
 +    {
 +        ret = do_cpt_footer(gmx_fio_getxdr(fp),TRUE,file_version);
 +    }
 +      
 +    if (ret)
 +    {
 +        cp_warning(out);
 +    }
 +    if( gmx_fio_close(fp) != 0)
 +      {
 +        gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +      }
 +    
 +    done_state(&state);
 +}
 +
 +
 +static gmx_bool exist_output_file(const char *fnm_cp,int nfile,const t_filenm fnm[])
 +{
 +    int i;
 +
 +    /* Check if the output file name stored in the checkpoint file
 +     * is one of the output file names of mdrun.
 +     */
 +    i = 0;
 +    while (i < nfile &&
 +           !(is_output(&fnm[i]) && strcmp(fnm_cp,fnm[i].fns[0]) == 0))
 +    {
 +        i++;
 +    }
 +    
 +    return (i < nfile && gmx_fexist(fnm_cp));
 +}
 +
 +/* This routine cannot print tons of data, since it is called before the log file is opened. */
 +gmx_bool read_checkpoint_simulation_part(const char *filename, int *simulation_part,
 +                                     gmx_large_int_t *cpt_step,t_commrec *cr,
 +                                     gmx_bool bAppendReq,
 +                                     int nfile,const t_filenm fnm[],
 +                                     const char *part_suffix,gmx_bool *bAddPart)
 +{
 +    t_fileio *fp;
 +    gmx_large_int_t step=0;
 +      double t;
 +    t_state state;
 +    int  nfiles;
 +    gmx_file_position_t *outputfiles;
 +    int  nexist,f;
 +    gmx_bool bAppend;
 +    char *fn,suf_up[STRLEN];
 +
 +    bAppend = FALSE;
 +
 +    if (SIMMASTER(cr)) {
 +        if(!gmx_fexist(filename) || (!(fp = gmx_fio_open(filename,"r")) ))
 +        {
 +            *simulation_part = 0;
 +        }
 +        else 
 +        {
 +            init_state(&state,0,0,0,0,0);
 +
 +            read_checkpoint_data(fp,simulation_part,&step,&t,&state,FALSE,
 +                                 &nfiles,&outputfiles);
 +            if( gmx_fio_close(fp) != 0)
 +            {
 +                gmx_file("Cannot read/write checkpoint; corrupt file, or maybe you are out of disk space?");
 +            }
 +            done_state(&state);
 +
 +            if (bAppendReq)
 +            {
 +                nexist = 0;
 +                for(f=0; f<nfiles; f++)
 +                {
 +                    if (exist_output_file(outputfiles[f].filename,nfile,fnm))
 +                    {
 +                        nexist++;
 +                    }
 +                }
 +                if (nexist == nfiles)
 +                {
 +                    bAppend = bAppendReq;
 +                }
 +                else if (nexist > 0)
 +                {
 +                    fprintf(stderr,
 +                            "Output file appending has been requested,\n"
 +                            "but some output files listed in the checkpoint file %s\n"
 +                            "are not present or are named differently by the current program:\n",
 +                            filename);
 +                    fprintf(stderr,"output files present:");
 +                    for(f=0; f<nfiles; f++)
 +                    {
 +                        if (exist_output_file(outputfiles[f].filename,
 +                                              nfile,fnm))
 +                        {
 +                            fprintf(stderr," %s",outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr,"\n");
 +                    fprintf(stderr,"output files not present or named differently:");
 +                    for(f=0; f<nfiles; f++)
 +                    {
 +                        if (!exist_output_file(outputfiles[f].filename,
 +                                               nfile,fnm))
 +                        {
 +                            fprintf(stderr," %s",outputfiles[f].filename);
 +                        }
 +                    }
 +                    fprintf(stderr,"\n");
 +                    
 +                    gmx_fatal(FARGS,"File appending requested, but only %d of the %d output files are present",nexist,nfiles);
 +                }
 +            }
 +            
 +            if (bAppend)
 +            {
 +                if (nfiles == 0)
 +                {
 +                    gmx_fatal(FARGS,"File appending requested, but no output file information is stored in the checkpoint file");
 +                }
 +                fn = outputfiles[0].filename;
 +                if (strlen(fn) < 4 ||
 +                    gmx_strcasecmp(fn+strlen(fn)-4,ftp2ext(efLOG)) == 0)
 +                {
 +                    gmx_fatal(FARGS,"File appending requested, but the log file is not the first file listed in the checkpoint file");
 +                }
 +                /* Set bAddPart to whether the suffix string '.part' is present
 +                 * in the log file name.
 +                 */
 +                strcpy(suf_up,part_suffix);
 +                upstring(suf_up);
 +                *bAddPart = (strstr(fn,part_suffix) != NULL ||
 +                             strstr(fn,suf_up) != NULL);
 +            }
 +
 +            sfree(outputfiles);
 +        }
 +    }
 +    if (PAR(cr))
 +    {
 +        gmx_bcast(sizeof(*simulation_part),simulation_part,cr);
 +
 +        if (*simulation_part > 0 && bAppendReq)
 +        {
 +            gmx_bcast(sizeof(bAppend),&bAppend,cr);
 +            gmx_bcast(sizeof(*bAddPart),bAddPart,cr);
 +        }
 +    }
 +    if (NULL != cpt_step)
 +    {
 +        *cpt_step = step;
 +    }
 +
 +    return bAppend;
 +}
Simple merge
index 0000000000000000000000000000000000000000,a5040dce0af315075de0a53d967a383844d7e9c3..a5040dce0af315075de0a53d967a383844d7e9c3
mode 000000,100644..100644
--- /dev/null
index 22101ee6361b23de57bf832262da0cb738610f1a,0000000000000000000000000000000000000000..47987bfe4316b95a65f96b5ce23dbd506a806f5e
mode 100644,000000..100644
--- /dev/null
@@@ -1,1588 -1,0 +1,1588 @@@
-     PI("init-fep_state",fep->init_fep_state);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +/* This file is completely threadsafe - please keep it that way! */
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "vec.h"
 +#include "macros.h"
 +
 +
 +int pr_indent(FILE *fp,int n)
 +{
 +  int i;
 +
 +  for (i=0; i<n; i++) (void) fprintf(fp," ");
 +  return n;
 +}
 +
 +int available(FILE *fp,void *p,int indent,const char *title)
 +{
 +  if (!p) {
 +    if (indent > 0)
 +      pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s: not available\n",title);
 +  }
 +  return (p!=NULL);
 +}
 +
 +int pr_title(FILE *fp,int indent,const char *title)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s:\n",title);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_n(FILE *fp,int indent,const char *title,int n)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%d):\n",title,n);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_nxn(FILE *fp,int indent,const char *title,int n1,int n2)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%dx%d):\n",title,n1,n2);
 +  return (indent+INDENT);
 +}
 +
 +void pr_ivec(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%d\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_ivec_block(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +    
 +    if (available(fp,vec,indent,title))
 +    {
 +        indent=pr_title_n(fp,indent,title,n);
 +        i = 0;
 +        while (i < n)
 +        {
 +            j = i+1;
 +            while (j < n && vec[j] == vec[j-1]+1)
 +            {
 +                j++;
 +            }
 +            /* Print consecutive groups of 3 or more as blocks */
 +            if (j - i < 3)
 +            {
 +                while(i < j)
 +                {
 +                    (void) pr_indent(fp,indent);
 +                    (void) fprintf(fp,"%s[%d]=%d\n",
 +                                   title,bShowNumbers?i:-1,vec[i]);
 +                    i++;
 +                }
 +            }
 +            else
 +            {
 +                (void) pr_indent(fp,indent);
 +                (void) fprintf(fp,"%s[%d,...,%d] = {%d,...,%d}\n",
 +                               title,
 +                               bShowNumbers?i:-1,
 +                               bShowNumbers?j-1:-1,
 +                               vec[i],vec[j-1]); 
 +                i = j;
 +            }
 +        }
 +    }
 +}
 +
 +void pr_bvec(FILE *fp,int indent,const char *title,gmx_bool vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%s\n",title,bShowNumbers?i:-1,
 +                       EBOOL(vec[i]));
 +        }
 +    }
 +}
 +
 +void pr_ivecs(FILE *fp,int indent,const char *title,ivec vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_nxn(fp,indent,title,n,DIM);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={",title,bShowNumbers?i:-1);
 +          for (j=0; j<DIM; j++)
 +            {
 +              if (j!=0) (void) fprintf(fp,", ");
 +              fprintf(fp,"%d",vec[i][j]);
 +            }
 +          (void) fprintf(fp,"}\n");
 +        }
 +    }
 +}
 +
 +void pr_rvec(FILE *fp,int indent,const char *title,real vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          pr_indent(fp,indent);
 +          fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_dvec(FILE *fp,int indent,const char *title,double vec[],int n, gmx_bool bShowNumbers)
 +{
 +      int i;
 +      
 +      if (available(fp,vec,indent,title))
 +    {  
 +              indent=pr_title_n(fp,indent,title,n);
 +              for (i=0; i<n; i++)
 +        {
 +                      pr_indent(fp,indent);
 +                      fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/*
 +void pr_mat(FILE *fp,int indent,char *title,matrix m)
 +{
 +  int i,j;
 +  
 +  if (available(fp,m,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for(i=0; i<n; i++) {
 +      pr_indent(fp,indent);
 +      fprintf(fp,"%s[%d]=%12.5e %12.5e %12.5e\n",
 +            title,bShowNumbers?i:-1,m[i][XX],m[i][YY],m[i][ZZ]);
 +    }
 +  }
 +}
 +*/
 +
 +void pr_rvecs_len(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,"%12.5e",vec[i][j]);
 +      }
 +      (void) fprintf(fp,"} len=%12.5e\n",norm(vec[i]));
 +    }
 +  }
 +}
 +
 +void pr_rvecs(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  const char *fshort = "%12.5e";
 +  const char *flong  = "%15.8e";
 +  const char *format;
 +  int i,j;
 +
 +  if (getenv("LONGFORMAT") != NULL)
 +    format = flong;
 +  else
 +    format = fshort;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,format,vec[i][j]);
 +      }
 +      (void) fprintf(fp,"}\n");
 +    }
 +  }
 +}
 +
 +
 +void pr_reals(FILE *fp,int indent,const char *title,real *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +void pr_doubles(FILE *fp,int indent,const char *title,double *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +static void pr_int(FILE *fp,int indent,const char *title,int i)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %d\n",title,i);
 +}
 +
 +static void pr_gmx_large_int(FILE *fp,int indent,const char *title,gmx_large_int_t i)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,gmx_step_str(i,buf));
 +}
 +
 +static void pr_real(FILE *fp,int indent,const char *title,real r)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,r);
 +}
 +
 +static void pr_double(FILE *fp,int indent,const char *title,double d)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,d);
 +}
 +
 +static void pr_str(FILE *fp,int indent,const char *title,const char *s)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,s);
 +}
 +
 +void pr_qm_opts(FILE *fp,int indent,const char *title,t_grpopts *opts)
 +{
 +  int i,m,j;
 +
 +  fprintf(fp,"%s:\n",title);
 +  
 +  pr_int(fp,indent,"ngQM",opts->ngQM);
 +  if (opts->ngQM > 0) {
 +    pr_ivec(fp,indent,"QMmethod",opts->QMmethod,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMbasis",opts->QMbasis,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMcharge",opts->QMcharge,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMmult",opts->QMmult,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bSH",opts->bSH,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASorbitals",opts->CASorbitals,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASelectrons",opts->CASelectrons,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"SAsteps",opts->SAsteps,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bOPT",opts->bOPT,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bTS",opts->bTS,opts->ngQM,FALSE);
 +  }
 +}
 +
 +static void pr_grp_opts(FILE *out,int indent,const char *title,t_grpopts *opts,
 +                      gmx_bool bMDPformat)
 +{
 +  int i,m,j;
 +
 +  if (!bMDPformat)
 +    fprintf(out,"%s:\n",title);
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"nrdf%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->nrdf[i]);
 +  fprintf(out,"\n");
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"ref-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->ref_t[i]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"tau-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->tau_t[i]);
 +  fprintf(out,"\n");  
 +  
 +  /* Pretty-print the simulated annealing info */
 +  fprintf(out,"anneal%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10s",EANNEAL(opts->annealing[i]));
 +  fprintf(out,"\n");  
 + 
 +  fprintf(out,"ann-npoints%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10d",opts->anneal_npoints[i]);
 +  fprintf(out,"\n");  
 + 
 +  for(i=0; (i<opts->ngtc); i++) {
 +    if(opts->anneal_npoints[i]>0) {
 +      fprintf(out,"ann. times [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_time[i][j]);
 +      fprintf(out,"\n");  
 +      fprintf(out,"ann. temps [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_temp[i][j]);
 +      fprintf(out,"\n");  
 +    }
 +  }
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"acc:\t");
 +  for(i=0; (i<opts->ngacc); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10g",opts->acc[i][m]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"nfreeze:");
 +  for(i=0; (i<opts->ngfrz); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10s",opts->nFreeze[i][m] ? "Y" : "N");
 +  fprintf(out,"\n");
 +
 +
 +  for(i=0; (i<opts->ngener); i++) {
 +    pr_indent(out,indent);
 +    fprintf(out,"energygrp-flags[%3d]:",i);
 +    for(m=0; (m<opts->ngener); m++)
 +      fprintf(out," %d",opts->egp_flags[opts->ngener*i+m]);
 +    fprintf(out,"\n");
 +  }
 +
 +  fflush(out);
 +}
 +
 +static void pr_matrix(FILE *fp,int indent,const char *title,rvec *m,
 +                    gmx_bool bMDPformat)
 +{
 +  if (bMDPformat)
 +    fprintf(fp,"%-10s    = %g %g %g %g %g %g\n",title,
 +          m[XX][XX],m[YY][YY],m[ZZ][ZZ],m[XX][YY],m[XX][ZZ],m[YY][ZZ]);
 +  else
 +    pr_rvecs(fp,indent,title,m,DIM);
 +}
 +
 +static void pr_cosine(FILE *fp,int indent,const char *title,t_cosines *cos,
 +                    gmx_bool bMDPformat)
 +{
 +  int j;
 +  
 +  if (bMDPformat) {
 +    fprintf(fp,"%s = %d\n",title,cos->n);
 +  }
 +  else {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"n = %d\n",cos->n);
 +    if (cos->n > 0) {
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"a =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->a[j]);
 +      fprintf(fp,"\n");
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"phi =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->phi[j]);
 +      fprintf(fp,"\n");
 +    }
 +  }
 +}
 +
 +#define PS(t,s) pr_str(fp,indent,t,s)
 +#define PI(t,s) pr_int(fp,indent,t,s)
 +#define PSTEP(t,s) pr_gmx_large_int(fp,indent,t,s)
 +#define PR(t,s) pr_real(fp,indent,t,s)
 +#define PD(t,s) pr_double(fp,indent,t,s)
 +
 +static void pr_pullgrp(FILE *fp,int indent,int g,t_pullgrp *pg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"pull-group %d:\n",g);
 +  indent += 2;
 +  pr_ivec_block(fp,indent,"atom",pg->ind,pg->nat,TRUE);
 +  pr_rvec(fp,indent,"weight",pg->weight,pg->nweight,TRUE);
 +  PI("pbcatom",pg->pbcatom);
 +  pr_rvec(fp,indent,"vec",pg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"init",pg->init,DIM,TRUE);
 +  PR("rate",pg->rate);
 +  PR("k",pg->k);
 +  PR("kB",pg->kB);
 +}
 +
 +static void pr_simtempvals(FILE *fp,int indent,t_simtemp *simtemp, int n_lambda, gmx_bool bMDPformat)
 +{
 +    PR("simtemp_low",simtemp->simtemp_low);
 +    PR("simtemp_high",simtemp->simtemp_high);
 +    PS("simulated-tempering-scaling",ESIMTEMP(simtemp->eSimTempScale));
 +    pr_rvec(fp,indent,"simulated tempering temperatures",simtemp->temperatures,n_lambda,TRUE);
 +}
 +
 +static void pr_expandedvals(FILE *fp,int indent,t_expanded *expand, int n_lambda, gmx_bool bMDPformat)
 +{
 +
 +    PI("nstexpanded", expand->nstexpanded);
 +    PS("lambda-stats", elamstats_names[expand->elamstats]);
 +    PS("lambda-mc-move", elmcmove_names[expand->elmcmove]);
 +    PI("lmc-repeats",expand->lmc_repeats);
 +    PI("lmc-gibbsdelta",expand->gibbsdeltalam);
 +    PI("lmc-nstart",expand->lmc_forced_nstart);
 +    PS("symmetrized-transition-matrix", EBOOL(expand->bSymmetrizedTMatrix));
 +    PI("nst-transition-matrix",expand->nstTij);
 +    PI("mininum-var-min",expand->minvarmin); /*default is reasonable */
 +    PI("weight-c-range",expand->c_range); /* default is just C=0 */
 +    PR("wl-scale",expand->wl_scale);
 +    PR("init-wl-delta",expand->init_wl_delta);
 +    PR("wl-ratio",expand->wl_ratio);
 +    PS("bWLoneovert",EBOOL(expand->bWLoneovert));
 +    PI("lmc-seed",expand->lmc_seed);
 +    PR("mc-temperature",expand->mc_temp);
 +    PS("lmc-weights-equil",elmceq_names[expand->elmceq]);
 +    if (expand->elmceq == elmceqNUMATLAM)
 +    {
 +        PI("weight-equil-number-all-lambda",expand->equil_n_at_lam);
 +    }
 +    if (expand->elmceq == elmceqSAMPLES)
 +    {
 +        PI("weight-equil-number-samples",expand->equil_samples);
 +    }
 +    if (expand->elmceq == elmceqSTEPS)
 +    {
 +        PI("weight-equil-number-steps",expand->equil_steps);
 +    }
 +    if (expand->elmceq == elmceqWLDELTA)
 +    {
 +        PR("weight-equil-wl-delta",expand->equil_wl_delta);
 +    }
 +    if (expand->elmceq == elmceqRATIO)
 +    {
 +        PR("weight-equil-count-ratio",expand->equil_ratio);
 +    }
 +
 +    pr_indent(fp,indent);
 +    pr_rvec(fp,indent,"init-lambda-weights",expand->init_lambda_weights,n_lambda,TRUE);
 +    PS("init-weights",EBOOL(expand->bInit_weights));
 +}
 +
 +static void pr_fepvals(FILE *fp,int indent,t_lambda *fep, gmx_bool bMDPformat)
 +{
 +    int i,j;
 +
 +    PI("nstdhdl",fep->nstdhdl);
-     PR("sc-sigma_min",fep->sc_sigma_min);
++    PI("init-lambda-state",fep->init_fep_state);
 +    PR("init-lambda",fep->init_lambda);
 +    PR("delta-lambda",fep->delta_lambda);
 +    if (!bMDPformat)
 +    {
 +        PI("n-lambdas",fep->n_lambda);
 +    }
 +    if (fep->n_lambda > 0)
 +    {
 +        pr_indent(fp,indent);
 +        fprintf(fp,"all-lambdas%s\n",bMDPformat ? " = " : ":");
 +        for(i=0; i<efptNR; i++) {
 +            fprintf(fp,"%18s = ",efpt_names[i]);
 +            for(j=0; j<fep->n_lambda; j++)
 +            {
 +                fprintf(fp,"  %10g",fep->all_lambda[i][j]);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +
 +    PR("sc-alpha",fep->sc_alpha);
 +    PS("bScCoul",EBOOL(fep->bScCoul));
 +    PS("bScPrintEnergy",EBOOL(fep->bPrintEnergy));
 +    PI("sc-power",fep->sc_power);
 +    PR("sc-r-power",fep->sc_r_power);
 +    PR("sc-sigma",fep->sc_sigma);
++    PR("sc-sigma-min",fep->sc_sigma_min);
 +    PS("separate-dhdl-file", SEPDHDLFILETYPE(fep->separate_dhdl_file));
 +    PS("dhdl-derivatives", DHDLDERIVATIVESTYPE(fep->dhdl_derivatives));
 +    PI("dh-hist-size", fep->dh_hist_size);
 +    PD("dh-hist-spacing", fep->dh_hist_spacing);
 +};
 +
 +static void pr_pull(FILE *fp,int indent,t_pull *pull)
 +{
 +  int g;
 +
 +  PS("pull-geometry",EPULLGEOM(pull->eGeom));
 +  pr_ivec(fp,indent,"pull-dim",pull->dim,DIM,TRUE);
 +  PR("pull-r1",pull->cyl_r1);
 +  PR("pull-r0",pull->cyl_r0);
 +  PR("pull-constr-tol",pull->constr_tol);
 +  PI("pull-nstxout",pull->nstxout);
 +  PI("pull-nstfout",pull->nstfout);
 +  PI("pull-ngrp",pull->ngrp);
 +  for(g=0; g<pull->ngrp+1; g++)
 +    pr_pullgrp(fp,indent,g,&pull->grp[g]);
 +}
 +
 +static void pr_rotgrp(FILE *fp,int indent,int g,t_rotgrp *rotg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"rotation_group %d:\n",g);
 +  indent += 2;
 +  PS("type",EROTGEOM(rotg->eType));
 +  PS("massw",EBOOL(rotg->bMassW));
 +  pr_ivec_block(fp,indent,"atom",rotg->ind,rotg->nat,TRUE);
 +  pr_rvecs(fp,indent,"x_ref",rotg->x_ref,rotg->nat);
 +  pr_rvec(fp,indent,"vec",rotg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"pivot",rotg->pivot,DIM,TRUE);
 +  PR("rate",rotg->rate);
 +  PR("k",rotg->k);
 +  PR("slab_dist",rotg->slab_dist);
 +  PR("min_gaussian",rotg->min_gaussian);
 +  PR("epsilon",rotg->eps);
 +  PS("fit_method",EROTFIT(rotg->eFittype));
 +  PI("potfitangle_nstep",rotg->PotAngle_nstep);
 +  PR("potfitangle_step",rotg->PotAngle_step);
 +}
 +
 +static void pr_rot(FILE *fp,int indent,t_rot *rot)
 +{
 +  int g;
 +
 +  PI("rot_nstrout",rot->nstrout);
 +  PI("rot_nstsout",rot->nstsout);
 +  PI("rot_ngrp",rot->ngrp);
 +  for(g=0; g<rot->ngrp; g++)
 +    pr_rotgrp(fp,indent,g,&rot->grp[g]);
 +}
 +
 +void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
 +                 gmx_bool bMDPformat)
 +{
 +  const char *infbuf="inf";
 +  int  i;
 +  
 +  if (available(fp,ir,indent,title)) {
 +    if (!bMDPformat)
 +      indent=pr_title(fp,indent,title);
 +    PS("integrator",EI(ir->eI));
 +    PSTEP("nsteps",ir->nsteps);
 +    PSTEP("init-step",ir->init_step);
 +    PS("ns-type",ENS(ir->ns_type));
 +    PI("nstlist",ir->nstlist);
 +    PI("ndelta",ir->ndelta);
 +    PI("nstcomm",ir->nstcomm);
 +    PS("comm-mode",ECOM(ir->comm_mode));
 +    PI("nstlog",ir->nstlog);
 +    PI("nstxout",ir->nstxout);
 +    PI("nstvout",ir->nstvout);
 +    PI("nstfout",ir->nstfout);
 +    PI("nstcalcenergy",ir->nstcalcenergy);
 +    PI("nstenergy",ir->nstenergy);
 +    PI("nstxtcout",ir->nstxtcout);
 +    PR("init-t",ir->init_t);
 +    PR("delta-t",ir->delta_t);
 +    
 +    PR("xtcprec",ir->xtcprec);
 +    PI("nkx",ir->nkx);
 +    PI("nky",ir->nky);
 +    PI("nkz",ir->nkz);
 +    PI("pme-order",ir->pme_order);
 +    PR("ewald-rtol",ir->ewald_rtol);
 +    PR("ewald-geometry",ir->ewald_geometry);
 +    PR("epsilon-surface",ir->epsilon_surface);
 +    PS("optimize-fft",EBOOL(ir->bOptFFT));
 +    PS("ePBC",EPBC(ir->ePBC));
 +    PS("bPeriodicMols",EBOOL(ir->bPeriodicMols));
 +    PS("bContinuation",EBOOL(ir->bContinuation));
 +    PS("bShakeSOR",EBOOL(ir->bShakeSOR));
 +    PS("etc",ETCOUPLTYPE(ir->etc));
 +    PS("bPrintNHChains",EBOOL(ir->bPrintNHChains));
 +    PI("nsttcouple",ir->nsttcouple);
 +    PS("epc",EPCOUPLTYPE(ir->epc));
 +    PS("epctype",EPCOUPLTYPETYPE(ir->epct));
 +    PI("nstpcouple",ir->nstpcouple);
 +    PR("tau-p",ir->tau_p);
 +    pr_matrix(fp,indent,"ref-p",ir->ref_p,bMDPformat);
 +    pr_matrix(fp,indent,"compress",ir->compress,bMDPformat);
 +    PS("refcoord-scaling",EREFSCALINGTYPE(ir->refcoord_scaling));
 +    if (bMDPformat)
 +      fprintf(fp,"posres-com  = %g %g %g\n",ir->posres_com[XX],
 +            ir->posres_com[YY],ir->posres_com[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-com",ir->posres_com,DIM,TRUE);
 +    if (bMDPformat)
 +      fprintf(fp,"posres-comB = %g %g %g\n",ir->posres_comB[XX],
 +            ir->posres_comB[YY],ir->posres_comB[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-comB",ir->posres_comB,DIM,TRUE);
 +    PR("rlist",ir->rlist);
 +    PR("rlistlong",ir->rlistlong);
 +    PR("rtpi",ir->rtpi);
 +    PS("coulombtype",EELTYPE(ir->coulombtype));
 +    PR("rcoulomb-switch",ir->rcoulomb_switch);
 +    PR("rcoulomb",ir->rcoulomb);
 +    PS("vdwtype",EVDWTYPE(ir->vdwtype));
 +    PR("rvdw-switch",ir->rvdw_switch);
 +    PR("rvdw",ir->rvdw);
 +    if (ir->epsilon_r != 0)
 +      PR("epsilon-r",ir->epsilon_r);
 +    else
 +      PS("epsilon-r",infbuf);
 +    if (ir->epsilon_rf != 0)
 +      PR("epsilon-rf",ir->epsilon_rf);
 +    else
 +      PS("epsilon-rf",infbuf);
 +    PR("tabext",ir->tabext);
 +    PS("implicit-solvent",EIMPLICITSOL(ir->implicit_solvent));
 +    PS("gb-algorithm",EGBALGORITHM(ir->gb_algorithm));
 +    PR("gb-epsilon-solvent",ir->gb_epsilon_solvent);
 +    PI("nstgbradii",ir->nstgbradii);
 +    PR("rgbradii",ir->rgbradii);
 +    PR("gb-saltconc",ir->gb_saltconc);
 +    PR("gb-obc-alpha",ir->gb_obc_alpha);
 +    PR("gb-obc-beta",ir->gb_obc_beta);
 +    PR("gb-obc-gamma",ir->gb_obc_gamma);
 +    PR("gb-dielectric-offset",ir->gb_dielectric_offset);
 +    PS("sa-algorithm",ESAALGORITHM(ir->gb_algorithm));
 +    PR("sa-surface-tension",ir->sa_surface_tension);
 +    PS("DispCorr",EDISPCORR(ir->eDispCorr));
 +    PS("bSimTemp",EBOOL(ir->bSimTemp));
 +    if (ir->bSimTemp) {
 +        pr_simtempvals(fp,indent,ir->simtempvals,ir->fepvals->n_lambda,bMDPformat);
 +    }
 +    PS("free-energy",EFEPTYPE(ir->efep));
 +    if (ir->efep != efepNO || ir->bSimTemp) {
 +        pr_fepvals(fp,indent,ir->fepvals,bMDPformat);
 +    }
 +    if (ir->bExpanded) {
 +        pr_expandedvals(fp,indent,ir->expandedvals,ir->fepvals->n_lambda,bMDPformat);
 +    }
 +
 +    PI("nwall",ir->nwall);
 +    PS("wall-type",EWALLTYPE(ir->wall_type));
 +    PI("wall-atomtype[0]",ir->wall_atomtype[0]);
 +    PI("wall-atomtype[1]",ir->wall_atomtype[1]);
 +    PR("wall-density[0]",ir->wall_density[0]);
 +    PR("wall-density[1]",ir->wall_density[1]);
 +    PR("wall-ewald-zfac",ir->wall_ewald_zfac);
 +
 +    PS("pull",EPULLTYPE(ir->ePull));
 +    if (ir->ePull != epullNO)
 +      pr_pull(fp,indent,ir->pull);
 +    
 +    PS("rotation",EBOOL(ir->bRot));
 +    if (ir->bRot)
 +      pr_rot(fp,indent,ir->rot);
 +
 +    PS("disre",EDISRETYPE(ir->eDisre));
 +    PS("disre-weighting",EDISREWEIGHTING(ir->eDisreWeighting));
 +    PS("disre-mixed",EBOOL(ir->bDisreMixed));
 +    PR("dr-fc",ir->dr_fc);
 +    PR("dr-tau",ir->dr_tau);
 +    PR("nstdisreout",ir->nstdisreout);
 +    PR("orires-fc",ir->orires_fc);
 +    PR("orires-tau",ir->orires_tau);
 +    PR("nstorireout",ir->nstorireout);
 +
 +    PR("dihre-fc",ir->dihre_fc);
 +    
 +    PR("em-stepsize",ir->em_stepsize);
 +    PR("em-tol",ir->em_tol);
 +    PI("niter",ir->niter);
 +    PR("fc-stepsize",ir->fc_stepsize);
 +    PI("nstcgsteep",ir->nstcgsteep);
 +    PI("nbfgscorr",ir->nbfgscorr);
 +
 +    PS("ConstAlg",ECONSTRTYPE(ir->eConstrAlg));
 +    PR("shake-tol",ir->shake_tol);
 +    PI("lincs-order",ir->nProjOrder);
 +    PR("lincs-warnangle",ir->LincsWarnAngle);
 +    PI("lincs-iter",ir->nLincsIter);
 +    PR("bd-fric",ir->bd_fric);
 +    PI("ld-seed",ir->ld_seed);
 +    PR("cos-accel",ir->cos_accel);
 +    pr_matrix(fp,indent,"deform",ir->deform,bMDPformat);
 +
 +    PS("adress",EBOOL(ir->bAdress));
 +    if (ir->bAdress){
 +        PS("adress_type",EADRESSTYPE(ir->adress->type));
 +        PR("adress_const_wf",ir->adress->const_wf);
 +        PR("adress_ex_width",ir->adress->ex_width);
 +        PR("adress_hy_width",ir->adress->hy_width);
 +        PS("adress_interface_correction",EADRESSICTYPE(ir->adress->icor));
 +        PS("adress_site",EADRESSSITETYPE(ir->adress->site));
 +        PR("adress_ex_force_cap",ir->adress->ex_forcecap);
 +        PS("adress_do_hybridpairs", EBOOL(ir->adress->do_hybridpairs));
 +
 +        pr_rvec(fp,indent,"adress_reference_coords",ir->adress->refs,DIM,TRUE);
 +    }
 +    PI("userint1",ir->userint1);
 +    PI("userint2",ir->userint2);
 +    PI("userint3",ir->userint3);
 +    PI("userint4",ir->userint4);
 +    PR("userreal1",ir->userreal1);
 +    PR("userreal2",ir->userreal2);
 +    PR("userreal3",ir->userreal3);
 +    PR("userreal4",ir->userreal4);
 +    pr_grp_opts(fp,indent,"grpopts",&(ir->opts),bMDPformat);
 +    pr_cosine(fp,indent,"efield-x",&(ir->ex[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-xt",&(ir->et[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-y",&(ir->ex[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-yt",&(ir->et[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-z",&(ir->ex[ZZ]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-zt",&(ir->et[ZZ]),bMDPformat);
 +    PS("bQMMM",EBOOL(ir->bQMMM));
 +    PI("QMconstraints",ir->QMconstraints);
 +    PI("QMMMscheme",ir->QMMMscheme);
 +    PR("scalefactor",ir->scalefactor);
 +    pr_qm_opts(fp,indent,"qm-opts",&(ir->opts));
 +  }
 +}
 +#undef PS
 +#undef PR
 +#undef PI
 +
 +static void pr_harm(FILE *fp,t_iparams *iparams,const char *r,const char *kr)
 +{
 +  fprintf(fp,"%sA=%12.5e, %sA=%12.5e, %sB=%12.5e, %sB=%12.5e\n",
 +        r,iparams->harmonic.rA,kr,iparams->harmonic.krA,
 +        r,iparams->harmonic.rB,kr,iparams->harmonic.krB);
 +}
 +
 +void pr_iparams(FILE *fp,t_functype ftype,t_iparams *iparams)
 +{
 +  int i;
 +  real VA[4],VB[4],*rbcA,*rbcB;
 +
 +  switch (ftype) {
 +  case F_ANGLES:
 +  case F_G96ANGLES:
 +    pr_harm(fp,iparams,"th","ct");
 +    break;
 +  case F_CROSS_BOND_BONDS:
 +    fprintf(fp,"r1e=%15.8e, r2e=%15.8e, krr=%15.8e\n",
 +          iparams->cross_bb.r1e,iparams->cross_bb.r2e,
 +          iparams->cross_bb.krr);
 +    break;
 +  case F_CROSS_BOND_ANGLES:
 +    fprintf(fp,"r1e=%15.8e, r1e=%15.8e, r3e=%15.8e, krt=%15.8e\n",
 +          iparams->cross_ba.r1e,iparams->cross_ba.r2e,
 +          iparams->cross_ba.r3e,iparams->cross_ba.krt);
 +    break;
 +  case F_LINEAR_ANGLES:
 +    fprintf(fp,"klinA=%15.8e, aA=%15.8e, klinB=%15.8e, aB=%15.8e\n",
 +            iparams->linangle.klinA,iparams->linangle.aA,
 +            iparams->linangle.klinB,iparams->linangle.aB);
 +    break;
 +  case F_UREY_BRADLEY:
 +      fprintf(fp,"thetaA=%15.8e, kthetaA=%15.8e, r13A=%15.8e, kUBA=%15.8e, thetaB=%15.8e, kthetaB=%15.8e, r13B=%15.8e, kUBB=%15.8e\n",iparams->u_b.thetaA,iparams->u_b.kthetaA,iparams->u_b.r13A,iparams->u_b.kUBA,iparams->u_b.thetaB,iparams->u_b.kthetaB,iparams->u_b.r13B,iparams->u_b.kUBB);
 +    break;
 +  case F_QUARTIC_ANGLES:
 +    fprintf(fp,"theta=%15.8e",iparams->qangle.theta);
 +    for(i=0; i<5; i++)
 +      fprintf(fp,", c%c=%15.8e",'0'+i,iparams->qangle.c[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_BHAM:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->bham.a,iparams->bham.b,iparams->bham.c);
 +    break;
 +  case F_BONDS:
 +  case F_G96BONDS:
 +  case F_HARMONIC:
 +    pr_harm(fp,iparams,"b0","cb");
 +    break;
 +  case F_IDIHS:
 +    pr_harm(fp,iparams,"xi","cx");
 +    break;
 +  case F_MORSE:
 +    fprintf(fp,"b0A=%15.8e, cbA=%15.8e, betaA=%15.8e, b0B=%15.8e, cbB=%15.8e, betaB=%15.8e\n",
 +            iparams->morse.b0A,iparams->morse.cbA,iparams->morse.betaA,
 +            iparams->morse.b0B,iparams->morse.cbB,iparams->morse.betaB);
 +    break;
 +  case F_CUBICBONDS:
 +    fprintf(fp,"b0=%15.8e, kb=%15.8e, kcub=%15.8e\n",
 +          iparams->cubic.b0,iparams->cubic.kb,iparams->cubic.kcub);
 +    break;
 +  case F_CONNBONDS:
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FENEBONDS:
 +    fprintf(fp,"bm=%15.8e, kb=%15.8e\n",iparams->fene.bm,iparams->fene.kb);
 +    break;
 +  case F_RESTRBONDS:
 +      fprintf(fp,"lowA=%15.8e, up1A=%15.8e, up2A=%15.8e, kA=%15.8e, lowB=%15.8e, up1B=%15.8e, up2B=%15.8e, kB=%15.8e,\n",
 +              iparams->restraint.lowA,iparams->restraint.up1A,
 +              iparams->restraint.up2A,iparams->restraint.kA,
 +              iparams->restraint.lowB,iparams->restraint.up1B,
 +              iparams->restraint.up2B,iparams->restraint.kB);
 +      break;
 +  case F_TABBONDS:
 +  case F_TABBONDSNC:
 +  case F_TABANGLES:
 +  case F_TABDIHS:
 +    fprintf(fp,"tab=%d, kA=%15.8e, kB=%15.8e\n",
 +          iparams->tab.table,iparams->tab.kA,iparams->tab.kB);
 +    break;
 +  case F_POLARIZATION:
 +    fprintf(fp,"alpha=%15.8e\n",iparams->polarize.alpha);
 +    break;
 +  case F_ANHARM_POL:
 +    fprintf(fp,"alpha=%15.8e drcut=%15.8e khyp=%15.8e\n",
 +            iparams->anharm_polarize.alpha,
 +            iparams->anharm_polarize.drcut,
 +            iparams->anharm_polarize.khyp);
 +    break;
 +  case F_THOLE_POL:
 +    fprintf(fp,"a=%15.8e, alpha1=%15.8e, alpha2=%15.8e, rfac=%15.8e\n",
 +          iparams->thole.a,iparams->thole.alpha1,iparams->thole.alpha2,
 +          iparams->thole.rfac);
 +    break;
 +  case F_WATER_POL:
 +    fprintf(fp,"al_x=%15.8e, al_y=%15.8e, al_z=%15.8e, rOH=%9.6f, rHH=%9.6f, rOD=%9.6f\n",
 +          iparams->wpol.al_x,iparams->wpol.al_y,iparams->wpol.al_z,
 +          iparams->wpol.rOH,iparams->wpol.rHH,iparams->wpol.rOD);
 +    break;
 +  case F_LJ:
 +    fprintf(fp,"c6=%15.8e, c12=%15.8e\n",iparams->lj.c6,iparams->lj.c12);
 +    break;
 +  case F_LJ14:
 +    fprintf(fp,"c6A=%15.8e, c12A=%15.8e, c6B=%15.8e, c12B=%15.8e\n",
 +          iparams->lj14.c6A,iparams->lj14.c12A,
 +          iparams->lj14.c6B,iparams->lj14.c12B);
 +    break;
 +  case F_LJC14_Q:
 +    fprintf(fp,"fqq=%15.8e, qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljc14.fqq,
 +          iparams->ljc14.qi,iparams->ljc14.qj,
 +          iparams->ljc14.c6,iparams->ljc14.c12);
 +    break;
 +  case F_LJC_PAIRS_NB:
 +    fprintf(fp,"qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljcnb.qi,iparams->ljcnb.qj,
 +          iparams->ljcnb.c6,iparams->ljcnb.c12);
 +    break;
 +  case F_PDIHS:
 +  case F_PIDIHS:
 +  case F_ANGRES:
 +  case F_ANGRESZ:
 +    fprintf(fp,"phiA=%15.8e, cpA=%15.8e, phiB=%15.8e, cpB=%15.8e, mult=%d\n",
 +          iparams->pdihs.phiA,iparams->pdihs.cpA,
 +          iparams->pdihs.phiB,iparams->pdihs.cpB,
 +          iparams->pdihs.mult);
 +    break;
 +  case F_DISRES:
 +    fprintf(fp,"label=%4d, type=%1d, low=%15.8e, up1=%15.8e, up2=%15.8e, fac=%15.8e)\n",
 +          iparams->disres.label,iparams->disres.type,
 +          iparams->disres.low,iparams->disres.up1,
 +          iparams->disres.up2,iparams->disres.kfac);
 +    break;
 +  case F_ORIRES:
 +    fprintf(fp,"ex=%4d, label=%d, power=%4d, c=%15.8e, obs=%15.8e, kfac=%15.8e)\n",
 +          iparams->orires.ex,iparams->orires.label,iparams->orires.power,
 +          iparams->orires.c,iparams->orires.obs,iparams->orires.kfac);
 +    break;
 +  case F_DIHRES:
 +      fprintf(fp,"phiA=%15.8e, dphiA=%15.8e, kfacA=%15.8e, phiB=%15.8e, dphiB=%15.8e, kfacB=%15.8e\n",
 +              iparams->dihres.phiA,iparams->dihres.dphiA,iparams->dihres.kfacA,
 +              iparams->dihres.phiB,iparams->dihres.dphiB,iparams->dihres.kfacB);
 +    break;
 +  case F_POSRES:
 +    fprintf(fp,"pos0A=(%15.8e,%15.8e,%15.8e), fcA=(%15.8e,%15.8e,%15.8e), pos0B=(%15.8e,%15.8e,%15.8e), fcB=(%15.8e,%15.8e,%15.8e)\n",
 +          iparams->posres.pos0A[XX],iparams->posres.pos0A[YY],
 +          iparams->posres.pos0A[ZZ],iparams->posres.fcA[XX],
 +          iparams->posres.fcA[YY],iparams->posres.fcA[ZZ],
 +          iparams->posres.pos0B[XX],iparams->posres.pos0B[YY],
 +          iparams->posres.pos0B[ZZ],iparams->posres.fcB[XX],
 +          iparams->posres.fcB[YY],iparams->posres.fcB[ZZ]);
 +    break;
 +  case F_RBDIHS:
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcA[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcB[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FOURDIHS:
 +    /* Use the OPLS -> Ryckaert-Bellemans formula backwards to get the
 +     * OPLS potential constants back.
 +     */
 +    rbcA = iparams->rbdihs.rbcA;
 +    rbcB = iparams->rbdihs.rbcB;
 +
 +    VA[3] = -0.25*rbcA[4];
 +    VA[2] = -0.5*rbcA[3];
 +    VA[1] = 4.0*VA[3]-rbcA[2];
 +    VA[0] = 3.0*VA[2]-2.0*rbcA[1];
 +
 +    VB[3] = -0.25*rbcB[4];
 +    VB[2] = -0.5*rbcB[3];
 +    VB[1] = 4.0*VB[3]-rbcB[2];
 +    VB[0] = 3.0*VB[2]-2.0*rbcB[1];
 +
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourA[%d]=%15.8e",i==0?"":", ",i,VA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourB[%d]=%15.8e",i==0?"":", ",i,VB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +   
 +  case F_CONSTR:
 +  case F_CONSTRNC:
 +    fprintf(fp,"dA=%15.8e, dB=%15.8e\n",iparams->constr.dA,iparams->constr.dB);
 +    break;
 +  case F_SETTLE:
 +    fprintf(fp,"doh=%15.8e, dhh=%15.8e\n",iparams->settle.doh,
 +          iparams->settle.dhh);
 +    break;
 +  case F_VSITE2:
 +    fprintf(fp,"a=%15.8e\n",iparams->vsite.a);
 +    break;
 +  case F_VSITE3:
 +  case F_VSITE3FD:
 +  case F_VSITE3FAD:
 +    fprintf(fp,"a=%15.8e, b=%15.8e\n",iparams->vsite.a,iparams->vsite.b);
 +    break;
 +  case F_VSITE3OUT:
 +  case F_VSITE4FD:
 +  case F_VSITE4FDN:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->vsite.a,iparams->vsite.b,iparams->vsite.c);
 +    break;
 +  case F_VSITEN:
 +    fprintf(fp,"n=%2d, a=%15.8e\n",iparams->vsiten.n,iparams->vsiten.a);
 +    break;
 +  case F_GB12:
 +  case F_GB13:
 +  case F_GB14:
 +    fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n",iparams->gb.sar,iparams->gb.st,iparams->gb.pi,iparams->gb.gbr,iparams->gb.bmlt);
 +    break;              
 +  case F_CMAP:
 +    fprintf(fp, "cmapA=%1d, cmapB=%1d\n",iparams->cmap.cmapA, iparams->cmap.cmapB);
 +    break;              
 +  default:
 +    gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
 +            ftype,interaction_function[ftype].name,__FILE__,__LINE__);
 +  }
 +}
 +
 +void pr_ilist(FILE *fp,int indent,const char *title,
 +              t_functype *functype,t_ilist *ilist, gmx_bool bShowNumbers)
 +{
 +    int i,j,k,type,ftype;
 +    t_iatom *iatoms;
 +    
 +    if (available(fp,ilist,indent,title) && ilist->nr > 0)
 +    {  
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"nr: %d\n",ilist->nr);
 +        if (ilist->nr > 0) {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"iatoms:\n");
 +            iatoms=ilist->iatoms;
 +            for (i=j=0; i<ilist->nr;) {
 +#ifndef DEBUG
 +                (void) pr_indent(fp,indent+INDENT);
 +                type=*(iatoms++);
 +                ftype=functype[type];
 +                (void) fprintf(fp,"%d type=%d (%s)",
 +                               bShowNumbers?j:-1,bShowNumbers?type:-1,
 +                               interaction_function[ftype].name);
 +                j++;
 +                for (k=0; k<interaction_function[ftype].nratoms; k++)
 +                    (void) fprintf(fp," %u",*(iatoms++));
 +                (void) fprintf(fp,"\n");
 +                i+=1+interaction_function[ftype].nratoms;
 +#else
 +                fprintf(fp,"%5d%5d\n",i,iatoms[i]);
 +                i++;
 +#endif
 +            }
 +        }
 +    }
 +}
 +
 +static void pr_cmap(FILE *fp, int indent, const char *title,
 +                    gmx_cmap_t *cmap_grid, gmx_bool bShowNumbers)
 +{
 +    int i,j,nelem;
 +    real dx,idx;
 +      
 +    dx    = 360.0 / cmap_grid->grid_spacing;
 +    nelem = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
 +      
 +    if(available(fp,cmap_grid,indent,title))
 +    {
 +        fprintf(fp,"%s\n",title);
 +              
 +        for(i=0;i<cmap_grid->ngrid;i++)
 +        {
 +            idx = -180.0;
 +            fprintf(fp,"%8s %8s %8s %8s\n","V","dVdx","dVdy","d2dV");
 +                      
 +            fprintf(fp,"grid[%3d]={\n",bShowNumbers?i:-1);
 +                      
 +            for(j=0;j<nelem;j++)
 +            {
 +                if( (j%cmap_grid->grid_spacing)==0)
 +                {
 +                    fprintf(fp,"%8.1f\n",idx);
 +                    idx+=dx;
 +                }
 +                              
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+1]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+2]);
 +                fprintf(fp,"%8.3f\n",cmap_grid->cmapdata[i].cmap[j*4+3]);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +      
 +}
 +
 +void pr_ffparams(FILE *fp,int indent,const char *title,
 +                 gmx_ffparams_t *ffparams,
 +                 gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  indent=pr_title(fp,indent,title);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"atnr=%d\n",ffparams->atnr);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"ntypes=%d\n",ffparams->ntypes);
 +  for (i=0; i<ffparams->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                     bShowNumbers?i:-1,
 +                     interaction_function[ffparams->functype[i]].name);
 +      pr_iparams(fp,ffparams->functype[i],&ffparams->iparams[i]);
 +  }
 +  (void) pr_double(fp,indent,"reppow",ffparams->reppow);
 +  (void) pr_real(fp,indent,"fudgeQQ",ffparams->fudgeQQ);
 +  pr_cmap(fp,indent,"cmap",&ffparams->cmap_grid,bShowNumbers);
 +}
 +
 +void pr_idef(FILE *fp,int indent,const char *title,t_idef *idef, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  if (available(fp,idef,indent,title)) {  
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"atnr=%d\n",idef->atnr);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"ntypes=%d\n",idef->ntypes);
 +    for (i=0; i<idef->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                   bShowNumbers?i:-1,
 +                   interaction_function[idef->functype[i]].name);
 +      pr_iparams(fp,idef->functype[i],&idef->iparams[i]);
 +    }
 +    (void) pr_real(fp,indent,"fudgeQQ",idef->fudgeQQ);
 +
 +    for(j=0; (j<F_NRE); j++)
 +      pr_ilist(fp,indent,interaction_function[j].longname,
 +               idef->functype,&idef->il[j],bShowNumbers);
 +  }
 +}
 +
 +static int pr_block_title(FILE *fp,int indent,const char *title,t_block *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +    }
 +  return indent;
 +}
 +
 +static int pr_blocka_title(FILE *fp,int indent,const char *title,t_blocka *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nra=%d\n",block->nra);
 +    }
 +  return indent;
 +}
 +
 +static void low_pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block, gmx_bool bShowNumbers)
 +{
 +  int i;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      for (i=0; i<=block->nr; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->index[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->index[i]);
 +        }
 +      for (i=0; i<block->nra; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->a[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->a[i]);
 +        }
 +    }
 +}
 +
 +void pr_block(FILE *fp,int indent,const char *title,t_block *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_block_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={}\n",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d]={%d..%d}\n",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            start=end;
 +          }
 +    }
 +}
 +
 +void pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d][%d..%d]={",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            for (j=start; j<end; j++)
 +              {
 +                if (j>start) size+=fprintf(fp,", ");
 +                if ((size)>(USE_WIDTH))
 +                  {
 +                    (void) fprintf(fp,"\n");
 +                    size=pr_indent(fp,indent+INDENT);
 +                  }
 +                size+=fprintf(fp,"%u",block->a[j]);
 +              }
 +            (void) fprintf(fp,"}\n");
 +            start=end;
 +          }
 +      if ((end!=block->nra)||(!ok)) 
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"tables inconsistent, dumping complete tables:\n");
 +          low_pr_blocka(fp,indent,title,block,bShowNumbers);
 +        }
 +    }
 +}
 +
 +static void pr_strings(FILE *fp,int indent,const char *title,char ***nm,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]));
 +        }
 +    }
 +}
 +
 +static void pr_strings2(FILE *fp,int indent,const char *title,
 +                      char ***nm,char ***nmB,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\",nameB=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]),*(nmB[i]));
 +        }
 +    }
 +}
 +
 +static void pr_resinfo(FILE *fp,int indent,const char *title,t_resinfo *resinfo,int n, gmx_bool bShowNumbers)
 +{
 +    int i;
 +    
 +    if (available(fp,resinfo,indent,title))
 +    {  
 +        indent=pr_title_n(fp,indent,title,n);
 +        for (i=0; i<n; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            (void) fprintf(fp,"%s[%d]={name=\"%s\", nr=%d, ic='%c'}\n",
 +                           title,bShowNumbers?i:-1,
 +                           *(resinfo[i].name),resinfo[i].nr,
 +                           (resinfo[i].ic == '\0') ? ' ' : resinfo[i].ic);
 +        }
 +    }
 +}
 +
 +static void pr_atom(FILE *fp,int indent,const char *title,t_atom *atom,int n)
 +{
 +  int i,j;
 +  
 +  if (available(fp,atom,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      fprintf(fp,"%s[%6d]={type=%3d, typeB=%3d, ptype=%8s, m=%12.5e, "
 +              "q=%12.5e, mB=%12.5e, qB=%12.5e, resind=%5d, atomnumber=%3d}\n",
 +              title,i,atom[i].type,atom[i].typeB,ptype_str[atom[i].ptype],
 +              atom[i].m,atom[i].q,atom[i].mB,atom[i].qB,
 +              atom[i].resind,atom[i].atomnumber);
 +    }
 +  }
 +}
 +
 +static void pr_grps(FILE *fp,int indent,const char *title,t_grps grps[],
 +                  char **grpname[], gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +
 +    for(i=0; (i<egcNR); i++)
 +    {
 +        fprintf(fp,"%s[%-12s] nr=%d, name=[",title,gtypes[i],grps[i].nr);
 +        for(j=0; (j<grps[i].nr); j++)
 +        {
 +            fprintf(fp," %s",*(grpname[grps[i].nm_ind[j]]));
 +        }
 +        fprintf(fp,"]\n");
 +    }
 +}
 +
 +static void pr_groups(FILE *fp,int indent,const char *title,
 +                      gmx_groups_t *groups,
 +                      gmx_bool bShowNumbers)
 +{
 +    int grpnr[egcNR];
 +    int nat_max,i,g;
 +
 +    pr_grps(fp,indent,"grp",groups->grps,groups->grpname,bShowNumbers);
 +    pr_strings(fp,indent,"grpname",groups->grpname,groups->ngrpname,bShowNumbers);
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"groups          ");
 +    for(g=0; g<egcNR; g++)
 +    {
 +       printf(" %5.5s",gtypes[g]);
 +    }
 +    printf("\n");
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"allocated       ");
 +    nat_max = 0;
 +    for(g=0; g<egcNR; g++)
 +    {
 +        printf(" %5d",groups->ngrpnr[g]);
 +        nat_max = max(nat_max,groups->ngrpnr[g]);
 +    }
 +    printf("\n");
 +
 +    if (nat_max == 0)
 +    {
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"groupnr[%5s] =","*");
 +        for(g=0; g<egcNR; g++)
 +        {
 +            fprintf(fp,"  %3d ",0);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +    else
 +    {
 +        for(i=0; i<nat_max; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"groupnr[%5d] =",i);
 +            for(g=0; g<egcNR; g++)
 +            {
 +                fprintf(fp,"  %3d ",
 +                        groups->grpnr[g] ? groups->grpnr[g][i] : 0);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +}
 +
 +void pr_atoms(FILE *fp,int indent,const char *title,t_atoms *atoms, 
 +            gmx_bool bShownumbers)
 +{
 +  if (available(fp,atoms,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_atom(fp,indent,"atom",atoms->atom,atoms->nr);
 +      pr_strings(fp,indent,"atom",atoms->atomname,atoms->nr,bShownumbers);
 +      pr_strings2(fp,indent,"type",atoms->atomtype,atoms->atomtypeB,atoms->nr,bShownumbers);
 +      pr_resinfo(fp,indent,"residue",atoms->resinfo,atoms->nres,bShownumbers);
 +    }
 +}
 +
 +
 +void pr_atomtypes(FILE *fp,int indent,const char *title,t_atomtypes *atomtypes, 
 +                gmx_bool bShowNumbers)
 +{
 +  int i;
 +  if (available(fp,atomtypes,indent,title)) 
 +  {
 +    indent=pr_title(fp,indent,title);
 +    for(i=0;i<atomtypes->nr;i++) {
 +      pr_indent(fp,indent);
 +              fprintf(fp,
 +                              "atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n",
 +                              bShowNumbers?i:-1,atomtypes->radius[i],atomtypes->vol[i],
 +                              atomtypes->gb_radius[i],
 +                              atomtypes->surftens[i],atomtypes->atomnumber[i],atomtypes->S_hct[i]);
 +    }
 +  }
 +}
 +
 +static void pr_moltype(FILE *fp,int indent,const char *title,
 +                       gmx_moltype_t *molt,int n,
 +                       gmx_ffparams_t *ffparams,
 +                       gmx_bool bShowNumbers)
 +{
 +    int j;
 +
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(molt->name));
 +    pr_atoms(fp,indent,"atoms",&(molt->atoms),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&molt->cgs, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&molt->excls, bShowNumbers);
 +    for(j=0; (j<F_NRE); j++) {
 +        pr_ilist(fp,indent,interaction_function[j].longname,
 +                 ffparams->functype,&molt->ilist[j],bShowNumbers);
 +    }
 +}
 +
 +static void pr_molblock(FILE *fp,int indent,const char *title,
 +                        gmx_molblock_t *molb,int n,
 +                        gmx_moltype_t *molt,
 +                        gmx_bool bShowNumbers)
 +{
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%-20s = %d \"%s\"\n",
 +                   "moltype",molb->type,*(molt[molb->type].name));
 +    pr_int(fp,indent,"#molecules",molb->nmol);
 +    pr_int(fp,indent,"#atoms_mol",molb->natoms_mol);
 +    pr_int(fp,indent,"#posres_xA",molb->nposres_xA);
 +    if (molb->nposres_xA > 0) {
 +        pr_rvecs(fp,indent,"posres_xA",molb->posres_xA,molb->nposres_xA);
 +    }
 +    pr_int(fp,indent,"#posres_xB",molb->nposres_xB);
 +    if (molb->nposres_xB > 0) {
 +        pr_rvecs(fp,indent,"posres_xB",molb->posres_xB,molb->nposres_xB);
 +    }
 +}
 +
 +void pr_mtop(FILE *fp,int indent,const char *title,gmx_mtop_t *mtop,
 +             gmx_bool bShowNumbers)
 +{
 +    int mt,mb;
 +
 +    if (available(fp,mtop,indent,title)) {
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        (void) fprintf(fp,"name=\"%s\"\n",*(mtop->name));
 +        pr_int(fp,indent,"#atoms",mtop->natoms);
 +        for(mb=0; mb<mtop->nmolblock; mb++) {
 +            pr_molblock(fp,indent,"molblock",&mtop->molblock[mb],mb,
 +                        mtop->moltype,bShowNumbers);
 +        }
 +        pr_ffparams(fp,indent,"ffparams",&(mtop->ffparams),bShowNumbers);
 +        pr_atomtypes(fp,indent,"atomtypes",&(mtop->atomtypes),bShowNumbers);
 +        for(mt=0; mt<mtop->nmoltype; mt++) {
 +            pr_moltype(fp,indent,"moltype",&mtop->moltype[mt],mt,
 +                       &mtop->ffparams,bShowNumbers);
 +        }
 +        pr_groups(fp,indent,"groups",&mtop->groups,bShowNumbers);
 +    }
 +}
 +
 +void pr_top(FILE *fp,int indent,const char *title,t_topology *top, gmx_bool bShowNumbers)
 +{
 +  if (available(fp,top,indent,title)) {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(top->name));
 +    pr_atoms(fp,indent,"atoms",&(top->atoms),bShowNumbers);
 +    pr_atomtypes(fp,indent,"atomtypes",&(top->atomtypes),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&top->cgs, bShowNumbers);
 +    pr_block(fp,indent,"mols",&top->mols, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&top->excls, bShowNumbers);
 +    pr_idef(fp,indent,"idef",&top->idef,bShowNumbers);
 +  }
 +}
 +
 +void pr_header(FILE *fp,int indent,const char *title,t_tpxheader *sh)
 +{
 +  char buf[22];
 +    
 +  if (available(fp,sh,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bIr    = %spresent\n",sh->bIr?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bBox   = %spresent\n",sh->bBox?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bTop   = %spresent\n",sh->bTop?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bX     = %spresent\n",sh->bX?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bV     = %spresent\n",sh->bV?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bF     = %spresent\n",sh->bF?"":"not ");
 +      
 +      pr_indent(fp,indent);
 +      fprintf(fp,"natoms = %d\n",sh->natoms);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"lambda = %e\n",sh->lambda);
 +    }
 +}
 +
 +void pr_commrec(FILE *fp,int indent,t_commrec *cr)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"commrec:\n");
 +  indent+=2;
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nodeid    = %d\n",cr->nodeid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nnodes    = %d\n",cr->nnodes);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"npmenodes = %d\n",cr->npmenodes);
 +  /*
 +  pr_indent(fp,indent);
 +  fprintf(fp,"threadid  = %d\n",cr->threadid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nthreads  = %d\n",cr->nthreads);
 +  */
 +}
index 83b30d344bd40d68f1b947763ef20e5a559d767b,0000000000000000000000000000000000000000..cc41718e06ea59aff96c51c4cc3b84c98ef20403
mode 100644,000000..100644
--- /dev/null
@@@ -1,1322 -1,0 +1,1322 @@@
-   fprintf(out,"%s\n",program);
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <time.h>
 +
 +#include <string>
 +
 +#include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/stringutil.h"
 +
 +#include "gmx_fatal.h"
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "filenm.h"
 +#include "macros.h"
 +#include "wman.h"
 +#include "statutil.h"
 +#include "copyrite.h"
 +#include "strdb.h"
 +#include "readinp.h"
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +
 +typedef struct {
 +  const char *search,*replace;
 +} t_sandr_const;
 +
 +typedef struct {
 +  char *search,*replace;
 +} t_sandr;
 +
 +/* The order of these arrays is significant. Text search and replace
 + * for each element occurs in order, so earlier changes can induce
 + * subsequent changes even though the original text might not appear
 + * to invoke the latter changes. */
 +
 +const t_sandr_const sandrTeX[] = {
 +  { "[TT]", "{\\tt " },
 +  { "[tt]", "}"      },
 +  { "[BB]", "{\\bf " },
 +  { "[bb]", "}"      },
 +  { "[IT]", "{\\em " },
 +  { "[it]", "}"      },
 +  { "[PAR]","\n\n"   },
 +  /* Escaping underscore for LaTeX is no longer necessary, and it breaks
 +   * text searching and the index if you do. */
 +  /*
 +  { "_",    "\\_"    },
 +  */
 +  { "$",    "\\$"    },
 +  { "<=",   "\\ensuremath{\\leq{}}"},
 +  { ">=",   "\\ensuremath{\\geq{}}"},
 +  { "<",    "\\textless{}" },
 +  { ">",    "\\textgreater{}" },
 +  { "^",    "\\^{}"    },
 +  { "\\^{}t", "\\ensuremath{^t}" },
 +  { "\\^{}a", "\\ensuremath{^a}" },
 +  { "\\^{}b", "\\ensuremath{^b}" },
 +  { "\\^{}2", "\\ensuremath{^2}" },
 +  { "\\^{}3", "\\ensuremath{^3}" },
 +  { "\\^{}6", "\\ensuremath{^6}" },
 +  { "#",    "\\#"    },
 +  { "[BR]", "\\\\"   },
 +  { "%",    "\\%"    },
 +  { "&",    "\\&"    },
 +  /* The next couple of lines allow true Greek symbols to be written to the 
 +     manual, which makes it look pretty */
 +  { "[GRK]", "\\ensuremath{\\" },
 +  { "[grk]", "}" },
 +  { "[MATH]","\\ensuremath{" },
 +  { "[math]","}" },
 +  { "[CHEVRON]", "\\ensuremath{<}" },
 +  { "[chevron]", "\\ensuremath{>}" },
 +  { "[MAG]", "\\ensuremath{|}" },
 +  { "[mag]", "\\ensuremath{|}" },
 +  { "[INT]","\\ensuremath{\\int" },
 +  { "[FROM]","_" },
 +  { "[from]","" },
 +  { "[TO]", "^" },
 +  { "[to]", "" },
 +  { "[int]","}" },
 +  { "[SUM]","\\ensuremath{\\sum" },
 +  { "[sum]","}" },
 +  { "[SUB]","\\ensuremath{_{" },
 +  { "[sub]","}}" },
 +  { "[SQRT]","\\ensuremath{\\sqrt{" },
 +  { "[sqrt]","}}" },
 +  { "[EXP]","\\ensuremath{\\exp{(" },
 +  { "[exp]",")}}" },
 +  { "[LN]","\\ensuremath{\\ln{(" },
 +  { "[ln]",")}}" },
 +  { "[LOG]","\\ensuremath{\\log{(" },
 +  { "[log]",")}}" },
 +  { "[COS]","\\ensuremath{\\cos{(" },
 +  { "[cos]",")}}" },
 +  { "[SIN]","\\ensuremath{\\sin{(" },
 +  { "[sin]",")}}" },
 +  { "[TAN]","\\ensuremath{\\tan{(" },
 +  { "[tan]",")}}" },
 +  { "[COSH]","\\ensuremath{\\cosh{(" },
 +  { "[cosh]",")}}" },
 +  { "[SINH]","\\ensuremath{\\sinh{(" },
 +  { "[sinh]",")}}" },
 +  { "[TANH]","\\ensuremath{\\tanh{(" },
 +  { "[tanh]",")}}" }
 +};
 +#define NSRTEX asize(sandrTeX)
 +
 +const t_sandr_const sandrTty[] = {
 +  { "[TT]", "" },
 +  { "[tt]", "" },
 +  { "[BB]", "" },
 +  { "[bb]", "" },
 +  { "[IT]", "" },
 +  { "[it]", "" },
 +  { "[MATH]","" },
 +  { "[math]","" },
 +  { "[CHEVRON]","<" },
 +  { "[chevron]",">" },
 +  { "[MAG]", "|" },
 +  { "[mag]", "|" },
 +  { "[INT]","integral" },
 +  { "[FROM]"," from " },
 +  { "[from]","" },
 +  { "[TO]", " to " },
 +  { "[to]", " of" },
 +  { "[int]","" },
 +  { "[SUM]","sum" },
 +  { "[sum]","" },
 +  { "[SUB]","_" },
 +  { "[sub]","" },
 +  { "[SQRT]","sqrt(" },
 +  { "[sqrt]",")" },
 +  { "[EXP]","exp(" },
 +  { "[exp]",")" },
 +  { "[LN]","ln(" },
 +  { "[ln]",")" },
 +  { "[LOG]","log(" },
 +  { "[log]",")" },
 +  { "[COS]","cos(" },
 +  { "[cos]",")" },
 +  { "[SIN]","sin(" },
 +  { "[sin]",")" },
 +  { "[TAN]","tan(" },
 +  { "[tan]",")" },
 +  { "[COSH]","cosh(" },
 +  { "[cosh]",")" },
 +  { "[SINH]","sinh(" },
 +  { "[sinh]",")" },
 +  { "[TANH]","tanh(" },
 +  { "[tanh]",")" },
 +  { "[PAR]","\n\n" },
 +  { "[BR]", "\n"},
 +  { "[GRK]", "" },
 +  { "[grk]", "" }
 +};
 +#define NSRTTY asize(sandrTty)
 +
 +const t_sandr_const sandrWiki[] = {
 +  { "&",    "&amp;" },
 +  { "<",    "&lt;" },
 +  { ">",    "&gt;" },
 +  { "[TT]", "&lt;code&gt;" },
 +  { "[tt]", "&lt;/code&gt;" },
 +  { "[BB]", "'''" },
 +  { "[bb]", "'''" },
 +  { "[IT]", "''" },
 +  { "[it]", "''" },
 +  { "[MATH]","" },
 +  { "[math]","" },
 +  { "[CHEVRON]","<" },
 +  { "[chevron]",">" },
 +  { "[MAG]", "|" },
 +  { "[mag]", "|" },
 +  { "[INT]","integral" },
 +  { "[FROM]"," from " },
 +  { "[from]","" },
 +  { "[TO]", " to " },
 +  { "[to]", " of" },
 +  { "[int]","" },
 +  { "[SUM]","sum" },
 +  { "[sum]","" },
 +  { "[SUB]","_" },
 +  { "[sub]","" },
 +  { "[SQRT]","sqrt(" },
 +  { "[sqrt]",")", },
 +  { "[EXP]","exp(" },
 +  { "[exp]",")" },
 +  { "[LN]","ln(" },
 +  { "[ln]",")" },
 +  { "[LOG]","log(" },
 +  { "[log]",")" },
 +  { "[COS]","cos(" },
 +  { "[cos]",")" },
 +  { "[SIN]","sin(" },
 +  { "[sin]",")" },
 +  { "[TAN]","tan(" },
 +  { "[tan]",")" },
 +  { "[COSH]","cosh(" },
 +  { "[cosh]",")" },
 +  { "[SINH]","sinh(" },
 +  { "[sinh]",")" },
 +  { "[TANH]","tanh(" },
 +  { "[tanh]",")" },
 +  { "[PAR]","\n\n" },
 +  { "[BR]", "\n" },
 +  { "[GRK]", "&" },
 +  { "[grk]", ";" }
 +};
 +#define NSRWIKI asize(sandrWiki)
 +
 +const t_sandr_const sandrNROFF[] = {
 +  { "[TT]", "\\fB " },
 +  { "[tt]", "\\fR" },
 +  { "[BB]", "\\fB " },
 +  { "[bb]", "\\fR" },
 +  { "[IT]", "\\fI " },
 +  { "[it]", "\\fR" },
 +  { "[MATH]","" },
 +  { "[math]","" },
 +  { "[CHEVRON]","<" },
 +  { "[chevron]",">" },
 +  { "[MAG]", "|" },
 +  { "[mag]", "|" },
 +  { "[INT]","integral" },
 +  { "[FROM]"," from " },
 +  { "[from]","" },
 +  { "[TO]", " to " },
 +  { "[to]", " of" },
 +  { "[int]","" },
 +  { "[SUM]","sum" },
 +  { "[sum]","" },
 +  { "[SUB]","_" },
 +  { "[sub]","" },
 +  { "[SQRT]","sqrt(" },
 +  { "[sqrt]",")", },
 +  { "[EXP]","exp(" },
 +  { "[exp]",")" },
 +  { "[LN]","ln(" },
 +  { "[ln]",")" },
 +  { "[LOG]","log(" },
 +  { "[log]",")" },
 +  { "[COS]","cos(" },
 +  { "[cos]",")" },
 +  { "[SIN]","sin(" },
 +  { "[sin]",")" },
 +  { "[TAN]","tan(" },
 +  { "[tan]",")" },
 +  { "[COSH]","cosh(" },
 +  { "[cosh]",")" },
 +  { "[SINH]","sinh(" },
 +  { "[sinh]",")" },
 +  { "[TANH]","tanh(" },
 +  { "[tanh]",")" },
 +  { "[PAR]","\n\n" },
 +  { "\n ",    "\n" },
 +  { "<",    "" },
 +  { ">",    "" },
 +  { "^",    "" },
 +  { "#",    "" },
 +  { "[BR]", "\n"},
 +  { "-",    "\\-"},
 +  { "[GRK]", "" },
 +  { "[grk]", "" }
 +};
 +#define NSRNROFF asize(sandrNROFF)
 +
 +const t_sandr_const sandrHTML[] = {
 +  { "<",    "&lt;" },
 +  { ">",    "&gt;" },
 +  { "[TT]", "<tt>" },
 +  { "[tt]", "</tt>" },
 +  { "[BB]", "<b>" },
 +  { "[bb]", "</b>" },
 +  { "[IT]", "<it>" },
 +  { "[it]", "</it>" },
 +  { "[MATH]","" },
 +  { "[math]","" },
 +  { "[CHEVRON]","<" },
 +  { "[chevron]",">" },
 +  { "[MAG]", "|" },
 +  { "[mag]", "|" },
 +  { "[INT]","integral" },
 +  { "[FROM]"," from " },
 +  { "[from]","" },
 +  { "[TO]", " to " },
 +  { "[to]", " of" },
 +  { "[int]","" },
 +  { "[SUM]","sum" },
 +  { "[sum]","" },
 +  { "[SUB]","_" },
 +  { "[sub]","" },
 +  { "[SQRT]","sqrt(" },
 +  { "[sqrt]",")", },
 +  { "[EXP]","exp(" },
 +  { "[exp]",")" },
 +  { "[LN]","ln(" },
 +  { "[ln]",")" },
 +  { "[LOG]","log(" },
 +  { "[log]",")" },
 +  { "[COS]","cos(" },
 +  { "[cos]",")" },
 +  { "[SIN]","sin(" },
 +  { "[sin]",")" },
 +  { "[TAN]","tan(" },
 +  { "[tan]",")" },
 +  { "[COSH]","cosh(" },
 +  { "[cosh]",")" },
 +  { "[SINH]","sinh(" },
 +  { "[sinh]",")" },
 +  { "[TANH]","tanh(" },
 +  { "[tanh]",")" },
 +  { "[PAR]","<p>" },
 +  { "[BR]", "<br>" },
 +  { "[GRK]", "&"  },
 +  { "[grk]", ";"  }
 +};
 +#define NSRHTML asize(sandrHTML)
 +
 +const t_sandr_const sandrXML[] = {
 +  { "<",    "&lt;" },
 +  { ">",    "&gt;" },
 +  { "[TT]", "<arg>" },
 +  { "[tt]", "</arg>" },
 +  { "[BB]", "<emp>" },
 +  { "[bb]", "</emp>" },
 +  { "[IT]", "<it>" },
 +  { "[it]", "</it>" },
 +  { "[MATH]","" },
 +  { "[math]","" },
 +  { "[CHEVRON]","<" },
 +  { "[chevron]",">" },
 +  { "[MAG]", "|" },
 +  { "[mag]", "|" },
 +  { "[INT]","integral" },
 +  { "[FROM]"," from " },
 +  { "[from]","" },
 +  { "[TO]", " to " },
 +  { "[to]", " of" },
 +  { "[int]","" },
 +  { "[SUM]","sum" },
 +  { "[sum]","" },
 +  { "[SUB]","_" },
 +  { "[sub]","" },
 +  { "[SQRT]","sqrt(" },
 +  { "[sqrt]",")", },
 +  { "[EXP]","exp(" },
 +  { "[exp]",")" },
 +  { "[LN]","ln(" },
 +  { "[ln]",")" },
 +  { "[LOG]","log(" },
 +  { "[log]",")" },
 +  { "[COS]","cos(" },
 +  { "[cos]",")" },
 +  { "[SIN]","sin(" },
 +  { "[sin]",")" },
 +  { "[TAN]","tan(" },
 +  { "[tan]",")" },
 +  { "[COSH]","cosh(" },
 +  { "[cosh]",")" },
 +  { "[SINH]","sinh(" },
 +  { "[sinh]",")" },
 +  { "[TANH]","tanh(" },
 +  { "[tanh]",")" },
 +  { "[PAR]","</par>\n<par>" },
 +  { "[BR]", "<br />" },
 +  { "[GRK]", "" },
 +  { "[grk]", "" }
 +};
 +#define NSRXML asize(sandrXML)
 +
 +static void mynum(char *buf,int n)
 +{
 +  if (n >= 10)
 +    sprintf(buf,"%2d",n);
 +  else
 +    sprintf(buf,"0%1d",n);
 +}
 +
 +static char *mydate(char buf[], int maxsize,gmx_bool bWiki)
 +{
 +  const char *mon[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", 
 +                       "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
 +  const char *day[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" };
 +  time_t now;
 +  struct tm tm;
 +  
 +  time(&now);
 +#ifdef GMX_NATIVE_WINDOWS
 +  /* Native windows */
 +  localtime_s(&tm,&now);
 +#else
 +  localtime_r(&now,&tm);
 +#endif
 +
 +  /* subtract one from maxsize, so we have room for \0. */
 +  if (bWiki) {
 +    char dd[8],mm[8],ss[8],hh[8],mn[8];
 +    
 +    mynum(dd,tm.tm_mday);
 +    mynum(mm,tm.tm_mon);
 +    mynum(ss,tm.tm_sec);
 +    mynum(hh,tm.tm_hour);
 +    mynum(mn,tm.tm_min);
 +    sprintf(buf,"%4d-%2s-%2sT%2s:%2s:%2sZ",
 +           tm.tm_year+1900,mm,dd,hh,mn,ss);
 +  }
 +  else
 +    sprintf(buf,"%s %d %s %d",day[tm.tm_wday],tm.tm_mday,
 +           mon[tm.tm_mon],tm.tm_year+1900);
 +  
 +  return buf;
 +}
 +
 +/* Data structure for saved HTML links */
 +typedef struct t_linkdata {
 +  int     nsr;
 +  t_sandr *sr;
 +} t_linkdata;
 +
 +static t_linkdata *init_linkdata()
 +{
 +  t_linkdata *p;
 +  snew(p,1);
 +  p->sr=NULL;
 +  p->nsr=0;
 +
 +  return p;
 +}
 +
 +static void finish_linkdata(t_linkdata *p)
 +{
 +  int i;
 +  
 +  for(i=0;i<p->nsr;i++) {
 +    sfree(p->sr[i].search);    
 +    sfree(p->sr[i].replace);
 +  }
 +  sfree(p->sr);
 +  sfree(p);
 +}
 +
 +static char *repall(const char *s,int nsr,const t_sandr_const sa[])
 +{
 +    try
 +    {
 +        std::string result(s);
 +        for (int i = 0; i < nsr; ++i)
 +        {
 +            result = gmx::replaceAll(result, sa[i].search, sa[i].replace);
 +        }
 +        return gmx_strdup(result.c_str());
 +    }
 +    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 +}
 +
 +static char *repallww(const char *s,int nsr,const t_sandr sa[])
 +{
 +    try
 +    {
 +        std::string result(s);
 +        for (int i = 0; i < nsr; ++i)
 +        {
 +            result = gmx::replaceAllWords(result, sa[i].search, sa[i].replace);
 +        }
 +        return gmx_strdup(result.c_str());
 +    }
 +    GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 +}
 +
 +static char *html_xref(char *s,const char *program, t_linkdata *links,gmx_bool bWiki)
 +{
 +  char   buf[256],**filestr;
 +  int    i,j,n;
 +  
 +  if (links->sr == NULL) {
 +    n=get_file("links.dat",&(filestr));
 +    links->nsr=n;
 +    snew(links->sr,n);
 +    for(i=0,j=0; (i<n); i++) {
 +      if (!program || (gmx_strcasecmp(program,filestr[i])  != 0)) {
 +      links->sr[j].search=gmx_strdup(filestr[i]);
 +      if (bWiki)
 +        sprintf(buf,"[[%s]]",filestr[i]);
 +      else
 +        sprintf(buf,"<a href=\"%s.html\">%s</a>",filestr[i],filestr[i]);
 +      links->sr[j].replace=gmx_strdup(buf);
 +      j++;
 +      }
 +    }
 +    links->nsr=j;
 +    for(i=0;i<n;i++)
 +      sfree(filestr[i]);
 +    sfree(filestr);
 +  }
 +  return repallww(s,links->nsr,links->sr);
 +}
 +
 +char *check_tex(const char *s)
 +{
 +  return repall(s,NSRTEX,sandrTeX);
 +}
 +
 +static char *check_nroff(const char *s)
 +{
 +  return repall(s,NSRNROFF,sandrNROFF);
 +}
 +
 +static char *check_wiki(const char *s,const char *program, t_linkdata *links)
 +{
 +  char *buf;
 +  
 +  buf = repall(s,NSRWIKI,sandrWiki);
 +  buf = html_xref(buf,program,links,TRUE);
 +  
 +  return buf;
 +}
 +
 +static char *check_html(const char *s,const char *program, t_linkdata *links)
 +{
 +  char *buf;
 +  
 +  buf = repall(s,NSRHTML,sandrHTML);
 +  buf = html_xref(buf,program,links,FALSE);
 +  
 +  return buf;
 +}
 +
 +#define NWR(s) check_wiki(s,program,links)
 +#define NSR(s) check_html(s,program,links)
 +  
 +#define FLAG_SET(flag, mask) ((flag & mask) == mask)
 +char *fileopt(unsigned long flag,char buf[],int maxsize)
 +{
 +  char tmp[256];
 +  
 +  if (FLAG_SET(flag, ffRW))
 +    sprintf(tmp,"In/Out");
 +  else if (FLAG_SET(flag, ffREAD))
 +    sprintf(tmp,"Input");
 +  else if (FLAG_SET(flag, ffWRITE))
 +    sprintf(tmp,"Output");
 +  else
 +    sprintf(tmp,"Dunno");
 +
 +  if (FLAG_SET(flag, ffOPT)) {
 +    strcat(tmp,", Opt");
 +    if (FLAG_SET(flag, ffSET)) 
 +      strcat(tmp,"!");
 +    else
 +      strcat(tmp,".");
 +  }
 +  if (FLAG_SET(flag, ffLIB))
 +    strcat(tmp,", Lib.");
 +  if (FLAG_SET(flag, ffMULT))
 +    strcat(tmp,", Mult.");
 +
 +  sprintf(buf,"%s",tmp);
 +  
 +  return buf;
 +}
 +
 +static void write_texman(FILE *out,const char *program,
 +                       int nldesc,const char **desc,
 +                       int nfile,t_filenm *fnm,
 +                       int npargs,t_pargs *pa,
 +                       int nbug,const char **bugs,
 +                       t_linkdata *links)
 +{
 +  int i;
 +  char tmp[256];
 +  
 +  fprintf(out,"\\section{\\normindex{%s}}\\label{%s}\n\n",check_tex(program),check_tex(program));
 +  
 +  if (nldesc > 0)
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"%s\n",check_tex(desc[i]));
 +
 +  if (nfile > 0) {
 +    fprintf(out,"\\vspace{-2ex}\\begin{tabbing}\n");
 +    fprintf(out,"\n{\\normalsize \\bf Files}\\nopagebreak\\\\\n");
 +    fprintf(out,"{\\tt ~~~~~~~} \\= {\\tt ~~~~~~~~~~~~~~} \\= "
 +          "~~~~~~~~~~~~~~~~~~~~~~ \\= \\nopagebreak\\kill\n");
 +    for(i=0; (i<nfile); i++)
 +      fprintf(out,"\\>{\\tt %s} \\'\\> {\\tt %s} \\' %s \\> "
 +            "\\parbox[t]{0.55\\linewidth}{%s} \\\\\n",
 +            check_tex(fnm[i].opt),check_tex(fnm[i].fns[0]),
 +            check_tex(fileopt(fnm[i].flag,tmp,255)),
 +            check_tex(ftp2desc(fnm[i].ftp)));
 +    fprintf(out,"\\end{tabbing}\\vspace{-4ex}\n");
 +  }
 +  if (npargs > 0) {
 +    fprintf(out,"\\vspace{-2ex}\\begin{tabbing}\n");
 +    fprintf(out,"\n{\\normalsize \\bf Other options}\\nopagebreak\\\\\n");
 +    fprintf(out,"{\\tt ~~~~~~~~~~} \\= vector \\= "
 +          "{\\tt ~~~~~~~} \\= \\nopagebreak\\kill\n");
 +    for(i=0; (i<npargs); i++) {
 +      if (strlen(check_tex(pa_val(&(pa[i]),tmp,255))) <= 8)
 +      fprintf(out,"\\> {\\tt %s} \\'\\> %s \\'\\> {\\tt %s} \\' "
 +              "\\parbox[t]{0.68\\linewidth}{%s}\\\\\n",
 +              check_tex(pa[i].option),get_arg_desc(pa[i].type),
 +              check_tex(pa_val(&(pa[i]),tmp,255)),
 +              check_tex(pa[i].desc));
 +      else
 +              fprintf(out,"\\> {\\tt %s} \\'\\> %s \\'\\>\\\\\n"
 +              "\\> \\'\\> \\'\\> {\\tt %s} \\' "
 +              "\\parbox[t]{0.7\\linewidth}{%s}\\\\\n",
 +              check_tex(pa[i].option),get_arg_desc(pa[i].type),
 +              check_tex(pa_val(&(pa[i]),tmp,255)),
 +              check_tex(pa[i].desc));
 +    }
 +    fprintf(out,"\\end{tabbing}\\vspace{-4ex}\n");
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"\n");
 +    fprintf(out,"\\begin{itemize}\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"\\item %s\n",check_tex(bugs[i]));
 +    fprintf(out,"\\end{itemize}\n");
 +  }
 +/*   fprintf(out,"\n\\newpage\n"); */
 +}
 +
 +static void write_nroffman(FILE *out,
 +                         const char *program,
 +                         int nldesc,const char **desc,
 +                         int nfile,t_filenm *fnm,
 +                         int npargs,t_pargs *pa,
 +                         int nbug,const char **bugs,
 +                         t_linkdata *links)
 +
 +{
 +  int i;
 +  char tmp[256];
 +  
 +  
 +  fprintf(out,".TH %s 1 \"%s\" \"\" \"GROMACS suite, %s\"\n",program,mydate(tmp,255,FALSE),GromacsVersion());
 +  fprintf(out,".SH NAME\n");
++  fprintf(out,"%s@DESC@\n\n",program);
 +  fprintf(out,".B %s\n",GromacsVersion());
 +  
 +  fprintf(out,".SH SYNOPSIS\n");
 +  fprintf(out,"\\f3%s\\fP\n",program);
 +
 +  /* command line arguments */
 +  if (nfile > 0) {
 +    for(i=0; (i<nfile); i++)
 +      fprintf(out,".BI \"%s\" \" %s \"\n",check_nroff(fnm[i].opt),
 +            check_nroff(fnm[i].fns[0]));
 +  }
 +  if (npargs > 0) {
 +    for(i=0; (i<npargs); i++)
 +      if (pa[i].type == etBOOL)
 +      fprintf(out,".BI \"\\-[no]%s\" \"\"\n",check_nroff(pa[i].option+1));
 +      else
 +      fprintf(out,".BI \"%s\" \" %s \"\n",check_nroff(pa[i].option),
 +              check_nroff(get_arg_desc(pa[i].type)));
 +  }
 +  
 +  /* description */
 +  if (nldesc > 0) {
 +    fprintf(out,".SH DESCRIPTION\n");
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"\\&%s\n",check_nroff(desc[i]));
 +  }
 +
 +  /* FILES */
 +  if (nfile > 0) {
 +    fprintf(out,".SH FILES\n");
 +    for(i=0; (i<nfile); i++)
 +      fprintf(out,".BI \"%s\" \" %s\" \n.B %s\n %s \n\n",
 +            check_nroff(fnm[i].opt),
 +              check_nroff(fnm[i].fns[0]),
 +              check_nroff(fileopt(fnm[i].flag,tmp,255)),
 +            check_nroff(ftp2desc(fnm[i].ftp)));
 +  }
 +  
 +  /* other options */
 +  fprintf(out,".SH OTHER OPTIONS\n");
 +  if ( npargs > 0 ) {
 +    for(i=0; (i<npargs); i++) {
 +      if (pa[i].type == etBOOL)
 +      fprintf(out,".BI \"\\-[no]%s\"  \"%s\"\n %s\n\n",
 +              check_nroff(pa[i].option+1),
 +              check_nroff(pa_val(&(pa[i]),tmp,255)),
 +                check_nroff(pa[i].desc));
 +      else
 +      fprintf(out,".BI \"%s\"  \" %s\" \" %s\" \n %s\n\n",
 +              check_nroff(pa[i].option),
 +                check_nroff(get_arg_desc(pa[i].type)),
 +              check_nroff(pa_val(&(pa[i]),tmp,255)),
 +                check_nroff(pa[i].desc));
 +    }
 +  }
 +
 +  if (nbug > 0) {
 +    fprintf(out,".SH KNOWN PROBLEMS\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"\\- %s\n\n",check_nroff(bugs[i]));
 +  }
 +
 +  fprintf(out,".SH SEE ALSO\n.BR gromacs(7)\n\n");
 +  fprintf(out,"More information about \\fBGROMACS\\fR is available at <\\fIhttp://www.gromacs.org/\\fR>.\n");
 +
 +}
 +
 +char *check_tty(const char *s)
 +{
 +  return repall(s,NSRTTY,sandrTty);
 +}
 +
 +void
 +print_tty_formatted(FILE *out, int nldesc, const char **desc,int indent,
 +                    t_linkdata *links,const char *program,gmx_bool bWiki)
 +{
 +  char *buf;
 +  char *temp;
 +  int buflen,i;
 +
 +  buflen = 80*nldesc;
 +  snew(buf,buflen);
 +  for(i=0; (i<nldesc); i++) {
 +    if ((strlen(buf)>0) && 
 +      (buf[strlen(buf)-1] !=' ') && (buf[strlen(buf)-1] !='\n'))
 +      strcat(buf," ");
 +    if (bWiki)
 +      temp=NWR(desc[i]);
 +    else
 +      temp=check_tty(desc[i]);
 +    if (strlen(buf) + strlen(temp) >= (size_t)(buflen-2)) {
 +      buflen += strlen(temp);
 +      srenew(buf,buflen);
 +    }
 +    strcat(buf,temp);
 +    sfree(temp);
 +  }
 +  /* Make lines of at most 79 characters */
 +  temp = wrap_lines(buf,78,indent,FALSE);
 +  fprintf(out,"%s\n",temp);
 +  sfree(temp);
 +  sfree(buf);
 +}
 +
 +static void write_ttyman(FILE *out,
 +                       const char *program,
 +                       int nldesc,const char **desc,
 +                       int nfile,t_filenm *fnm,
 +                       int npargs,t_pargs *pa,
 +                       int nbug,const char **bugs,gmx_bool bHeader,
 +                       t_linkdata *links)
 +{
 +  int i;
 +  char buf[256];
 +  char *tmp;
 +  
 +  if (bHeader) {
 +    fprintf(out,"%s\n\n",check_tty(program));
 +    fprintf(out,"%s\n%s\n",GromacsVersion(),mydate(buf,255,FALSE));
 +  }
 +  if (nldesc > 0) {
 +    fprintf(out,"DESCRIPTION\n-----------\n");
 +    print_tty_formatted(out,nldesc,desc,0,links,program,FALSE);
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"\n");
 +    fprintf(out,"KNOWN PROBLEMS\n----------\n");
 +    for(i=0; i<nbug; i++) {
 +      snew(tmp,strlen(bugs[i])+3);
 +      strcpy(tmp,"* ");
 +      strcpy(tmp+2,check_tty(bugs[i]));
 +      fprintf(out,"%s\n",wrap_lines(tmp,78,2,FALSE));
 +      sfree(tmp);
 +    }
 +  }
 +  if (nfile > 0) {
 +    fprintf(out,"\n");
 +    pr_fns(out,nfile,fnm);
 +  }
 +  if (npargs > 0) {
 +    print_pargs(out,npargs,pa,FALSE);
 +  }
 +}
 +
 +static void pr_html_files(FILE *out,int nfile,t_filenm fnm[],
 +                        const char *program,t_linkdata *links,gmx_bool bWiki)
 +{ 
 +  int  i;
 +  char link[10],tmp[255];
 +  
 +  if (bWiki)
 +    fprintf(out," %-10s %-12s %-12s %-s\n"
 +          " -----------------------------------------------------\n",
 +          "Option","Filename","Type","Description");
 +  else
 +    fprintf(out,
 +          "<TABLE BORDER=1 CELLSPACING=0 CELLPADDING=2>\n"
 +          "<TR>"
 +          "<TH>option</TH>"
 +          "<TH>filename</TH>"
 +          "<TH>type</TH>"
 +          "<TH>description</TH>"
 +          "</TR>\n");
 +  
 +  for(i=0; (i<nfile); i++) {
 +    strcpy(link,ftp2ext(fnm[i].ftp));
 +    if (strcmp(link,"???")==0)
 +      strcpy(link,"files");
 +    if (bWiki)
 +      fprintf(out," %-10s %-16s %-12s %-s\n",
 +            fnm[i].opt,
 +            NWR(fnm[i].fns[0]),
 +            fileopt(fnm[i].flag,tmp,255),
 +            NWR(ftp2desc(fnm[i].ftp)));
 +    else
 +      fprintf(out,
 +            "<TR>"
 +            "<TD ALIGN=RIGHT> <b><tt>%s</tt></b> </TD>"
 +            "<TD ALIGN=RIGHT> <tt><a href=\"%s.html\">%12s</a></tt> </TD>"
 +            "<TD> %s </TD>"
 +            "<TD> %s </TD>"
 +            "</TR>\n",
 +            fnm[i].opt,link,fnm[i].fns[0],fileopt(fnm[i].flag,tmp,255),
 +            NSR(ftp2desc(fnm[i].ftp)));
 +  }
 +  if (!bWiki)
 +    fprintf(out,"</TABLE>\n");
 +}
 +
 +static void write_wikiman(FILE *out,
 +                        const char *program,
 +                        int nldesc,const char **desc,
 +                        int nfile,t_filenm *fnm,
 +                        int npargs,t_pargs *pa,
 +                        int nbug,const char **bugs,gmx_bool bHeader,
 +                        t_linkdata *links)
 +{
 +  int i;
 +  char buf[256];
 +  char *tmp;
 +  fprintf(out,"<page>\n<title>Manual:%s_%s</title>\n",program,
 +        VERSION);
 +  fprintf(out,"<revision>\n");
 +  fprintf(out,"<timestamp>%s</timestamp>\n",mydate(buf,255,TRUE));
 +  fprintf(out,"<text xml:space=\"preserve\">\n");
 +  if (nldesc > 0) {
 +    fprintf(out,"== Description ==\n");
 +    print_tty_formatted(out,nldesc,desc,0,links,program,TRUE);
 +    fprintf(out,"\n");
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"== Known Problems ==\n");
 +    for(i=0; i<nbug; i++) {
 +      snew(tmp,strlen(bugs[i])+3);
 +      strcpy(tmp,"* ");
 +      strcpy(tmp+2,bugs[i]);
 +      fprintf(out,"%s\n",NWR(tmp));
 +      sfree(tmp);
 +    }
 +  }
 +  if (nfile > 0) {
 +    fprintf(out,"\n== Files ==\n");
 +    pr_html_files(out,nfile,fnm,program,links,TRUE);
 +  }
 +  if (npargs > 0) {
 +    fprintf(out,"\n== Options ==\n");
 +    fprintf(out," %-12s %-6s %-6s  %-s\n",
 +          "Option","Type","Value","Description");
 +    fprintf(out," ------------------------------------------------------\n");
 +    for(i=0; (i<npargs); i++) {
 +      tmp = NWR(pargs_print_line(&pa[i],TRUE));
 +      fprintf(out,"%s",tmp);
 +      sfree(tmp);
 +    }
 +  }
 +  fprintf(out,"[[category:Manual_Pages_%s|%s]]\n",VERSION,program);
 +  fprintf(out,"</text>\n");
 +  fprintf(out,"</revision>\n");
 +  fprintf(out,"</page>\n\n");
 +}
 +
 +static void write_htmlman(FILE *out,
 +                        const char *program,
 +                        int nldesc,const char **desc,
 +                        int nfile,t_filenm *fnm,
 +                        int npargs,t_pargs *pa,
 +                        int nbug,const char **bugs,
 +                        t_linkdata *links)
 +{
 +  int i;
 +  char tmp[255];
 +  
 +  fprintf(out,"<HTML>\n<HEAD>\n<TITLE>%s</TITLE>\n",program);
 +  fprintf(out,"<LINK rel=stylesheet href=\"style.css\" type=\"text/css\">\n");
 +  fprintf(out,"<BODY text=\"#000000\" bgcolor=\"#FFFFFF\" link=\"#0000FF\" vlink=\"#990000\" alink=\"#FF0000\">\n");
 +  fprintf(out,"<TABLE WIDTH=\"98%%\" NOBORDER >\n<TR><TD WIDTH=400>\n");
 +  fprintf(out,"<TABLE WIDTH=400 NOBORDER>\n<TD WIDTH=116>\n");
 +  fprintf(out,"<a href=\"http://www.gromacs.org/\">"
 +        "<img SRC=\"../images/gmxlogo_small.png\""
 +        "BORDER=0 </a></td>\n");
 +  fprintf(out,"<td ALIGN=LEFT VALIGN=TOP WIDTH=280>"
 +        "<br><h2>%s</h2>",program);
 +  fprintf(out,"<font size=-1><A HREF=\"../online.html\">Main Table of Contents</A></font><br>");
 +  fprintf(out,"<br></td>\n</TABLE></TD><TD WIDTH=\"*\" ALIGN=RIGHT VALIGN=BOTTOM><p><B>%s<br>\n",GromacsVersion());
 +  fprintf(out,"%s</B></td></tr></TABLE>\n<HR>\n",mydate(tmp,255,FALSE));
 +  
 +  if (nldesc > 0) {
 +    fprintf(out,"<H3>Description</H3>\n<p>\n");
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"%s\n",NSR(desc[i]));
 +  }
 +  if (nfile > 0) {
 +    fprintf(out,"<P>\n");
 +    fprintf(out,"<H3>Files</H3>\n");
 +    pr_html_files(out,nfile,fnm,program,links,FALSE);
 +  }
 +  if (npargs > 0) {
 +    fprintf(out,"<P>\n");
 +    fprintf(out,"<H3>Other options</H3>\n");
 +    fprintf(out,
 +          "<TABLE BORDER=1 CELLSPACING=0 CELLPADDING=2>\n"
 +          "<TR>"
 +          "<TH>option</TH>"
 +          "<TH>type</TH>"
 +          "<TH>default</TH>"
 +          "<TH>description</TH>"
 +          "</TR>\n");
 +    for(i=0; (i<npargs); i++)
 +      fprintf(out,
 +            "<TR>"
 +            "<TD ALIGN=RIGHT> <b><tt>%s%s</tt></b> </TD>"
 +            "<TD ALIGN=RIGHT> %s </TD>"
 +            "<TD ALIGN=RIGHT> <tt>%s</tt> </TD>"
 +            "<TD> %s </TD>"
 +            "</TD>\n",
 +            (pa[i].type == etBOOL)?"-[no]":"-",pa[i].option+1,
 +            get_arg_desc(pa[i].type),pa_val(&(pa[i]),tmp,255),NSR(pa[i].desc));
 +    fprintf(out,"</TABLE>\n");
 +  }
 +  if (nbug > 0) {
 +    fprintf(out,"<P>\n");
 +    fprintf(out,"<H3>Known problems</H3>\n");
 +    fprintf(out,"<UL>\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"<LI>%s\n",NSR(bugs[i]));
 +    fprintf(out,"</UL>\n");
 +  }
 +  fprintf(out,"<P>\n");
 +  fprintf(out,"<hr>\n<div ALIGN=RIGHT>\n");
 +  fprintf(out,"<font size=\"-1\"><a href=\"http://www.gromacs.org\">"
 +        "http://www.gromacs.org</a></font><br>\n");
 +  fprintf(out,"<font size=\"-1\"><a href=\"mailto:gromacs@gromacs.org\">"
 +        "gromacs@gromacs.org</a></font><br>\n");
 +  fprintf(out,"</div>\n");
 +  fprintf(out,"</BODY>\n");
 +}
 +
 +char *check_xml(const char *s,const char *program,t_linkdata *links)
 +{
 +  char *buf;
 +  
 +  buf=repall(s,NSRXML,sandrXML);
 +  buf=html_xref(buf,program,links,FALSE);     /* the same in html and xml */
 +  
 +  return buf;
 +}
 +
 +static void write_xmlman(FILE *out,
 +                       const char *program,
 +                       int nldesc,const char **desc,
 +                       int nfile,t_filenm *fnm,
 +                       int npargs,t_pargs *pa,
 +                       int nbug,const char **bugs,
 +                       t_linkdata *links)
 +{
 +  int i;
 +  char link[10],buf[256],opt[10];
 +
 +#define NSR2(s) check_xml(s,program,links)
 +#define FLAG(w,f) (((w) & (f))==(f)) 
 +
 +  fprintf(out,"<gromacs-manual version=\"%s\" date=\"%s\" www=\"http://www.gromacs.org\">\n",GromacsVersion(),mydate(buf,255,FALSE));
 +  /* fprintf(out,"<LINK rel=stylesheet href=\"style.css\" type=\"text/css\">\n"); */
 +
 +  fprintf(out,"<program name=\"%s\">",program);  
 +  if (nldesc > 0) {
 +    fprintf(out,"\n<description>\n<par>\n");
 +    for(i=0; (i<nldesc); i++) 
 +      fprintf(out,"%s\n",NSR2(desc[i]));
 +  }
 +  fprintf(out,"</par>\n</description>\n");
 +
 +  if (nfile > 0) {
 +    fprintf(out,"\n<files>\n");
 +    for(i=0; (i<nfile); i++) {
 +      strcpy(link,ftp2ext(fnm[i].ftp));
 +      if (strcmp(link,"???")==0)
 +      strcpy(link,"files");
 +        if (fnm[i].opt[0]=='-') strcpy(opt,fnm[i].opt+1);
 +      else strcpy(opt,fnm[i].opt);
 +      fprintf(out,
 +            "<file type=\"%s\" typeid=\"%d\">\n"
 +              "\t<flags read=\"%d\" write=\"%d\" optional=\"%d\"/>\n"
 +            "\t<option>%s</option>\n"
 +            "\t<default-name link=\"%s.html\">%s</default-name>\n"
 +            "\t<description>%s</description>\n"
 +            "</file>\n",
 +            ftp2defnm(fnm[i].ftp),    /* from gmxlib/filenm.c */
 +            fnm[i].ftp,
 +            FLAG(fnm[i].flag,ffREAD), FLAG(fnm[i].flag,ffWRITE), FLAG(fnm[i].flag,ffOPT), 
 +            opt,link,fnm[i].fn,/*fileopt(fnm[i].flag),*/
 +            NSR(ftp2desc(fnm[i].ftp)));
 +    }
 +    fprintf(out,"</files>\n");
 +  }
 +
 +  if (npargs > 0) {
 +    fprintf(out,"\n<options>\n");
 +    for(i=0; (i<npargs); i++)
 +      fprintf(out,
 +            "<option type=\"%s\" hidden=\"%d\">\n"
 +            "\t<name >%s</name>\n"
 +            "\t<default-value>%s</default-value>\n"
 +            "\t<description>%s</description>\n"
 +            "</option>\n",
 +            get_arg_desc(pa[i].type), is_hidden(&pa[i]),
 +            pa[i].option+1,                  /* +1 - with no trailing '-' */
 +            pa_val(&(pa[i]),buf,255),pa[i].desc); /*get_argtp()[pa[i].type],*/
 +    fprintf(out,"</options>\n");
 +  }
 +
 +  if (nbug > 0) {
 +    fprintf(out,"\n<bugs>\n");
 +    for(i=0; (i<nbug); i++)
 +      fprintf(out,"\t<bug>%s</bug>\n",NSR(bugs[i]));
 +    fprintf(out,"</bugs>\n");
 +  }
 +  fprintf(out,"\n</program>\n</gromacs-manual>\n");
 +#undef FLAG  
 +}
 +
 +static void pr_opts(FILE *fp, 
 +                  int nfile,  t_filenm *fnm, 
 +                  int npargs, t_pargs pa[], int shell)
 +{
 +  int i;
 +  
 +  switch (shell) {
 +  case eshellCSH:
 +    fprintf(fp," \"c/-/(");
 +    for (i=0; i<nfile; i++)
 +      fprintf(fp," %s",fnm[i].opt+1);
 +    for (i=0; i<npargs; i++)
 +      if ( (pa[i].type==etBOOL) && *(pa[i].u.b) )
 +      fprintf(fp," no%s",pa[i].option+1);
 +      else
 +      fprintf(fp," %s",pa[i].option+1);
 +    fprintf(fp,")/\"");
 +    break;
 +  case eshellBASH:
 +    fprintf(fp,"if (( $COMP_CWORD <= 1 )) || [[ $c == -* ]]; then COMPREPLY=( $(compgen  -W '");
 +    for (i=0; i<nfile; i++)
 +      fprintf(fp," -%s",fnm[i].opt+1);
 +    for (i=0; i<npargs; i++)
 +      if ( (pa[i].type==etBOOL) && *(pa[i].u.b) )
 +      fprintf(fp," -no%s",pa[i].option+1);
 +      else
 +      fprintf(fp," -%s",pa[i].option+1);
 +    fprintf(fp,"' -- $c)); return 0; fi\n");
 +    break;
 +  case eshellZSH:
 +    fprintf(fp," -x 's[-]' -s \"");
 +    for (i=0; i<nfile; i++)
 +      fprintf(fp," %s",fnm[i].opt+1);
 +    for (i=0; i<npargs; i++)
 +      if ( (pa[i].type==etBOOL) && *(pa[i].u.b) )
 +      fprintf(fp," no%s",pa[i].option+1);
 +      else
 +      fprintf(fp," %s",pa[i].option+1);
 +    fprintf(fp,"\" ");
 +    break;
 +  }
 +}
 +
 +static void write_cshcompl(FILE *out,
 +                         int nfile,  t_filenm *fnm,
 +                         int npargs, t_pargs *pa)
 +{
 +  fprintf(out,"complete %s",ShortProgram());
 +  pr_enums(out,npargs,pa,eshellCSH);
 +  pr_fopts(out,nfile,fnm,eshellCSH);
 +  pr_opts(out,nfile,fnm,npargs,pa,eshellCSH);
 +  fprintf(out,"\n");
 +}
 +
 +static void write_zshcompl(FILE *out,
 +                         int nfile,  t_filenm *fnm,
 +                         int npargs, t_pargs *pa)
 +{
 +  fprintf(out,"compctl ");
 +
 +  /* start with options, since they are always present */
 +  pr_opts(out,nfile,fnm,npargs,pa,eshellZSH);
 +  pr_enums(out,npargs,pa,eshellZSH);
 +  pr_fopts(out,nfile,fnm,eshellZSH);
 +  fprintf(out,"-- %s\n",ShortProgram());
 +}
 +
 +static void write_bashcompl(FILE *out,
 +                          int nfile,  t_filenm *fnm,
 +                          int npargs, t_pargs *pa)
 +{
 +  /* Advanced bash completions are handled by shell functions.
 +   * p and c hold the previous and current word on the command line.
 +   * We need to use extended globbing, so write it in each completion file */
 +  fprintf(out,"shopt -s extglob\n");
 +  fprintf(out,"_%s_compl() {\nlocal p c\n",ShortProgram());
 +  fprintf(out,"COMPREPLY=() c=${COMP_WORDS[COMP_CWORD]} p=${COMP_WORDS[COMP_CWORD-1]}\n");
 +  pr_opts(out,nfile,fnm,npargs,pa,eshellBASH);
 +  fprintf(out,"case \"$p\" in\n");
 +  
 +  pr_enums(out,npargs,pa,eshellBASH);
 +  pr_fopts(out,nfile,fnm,eshellBASH);
 +  fprintf(out,"esac }\ncomplete -F _%s_compl %s\n",ShortProgram(),ShortProgram());
 +}
 +
 +static void write_py(FILE *out,const char *program,
 +                   int nldesc,const char **desc,
 +                   int nfile,t_filenm *fnm,
 +                   int npargs,t_pargs *pa,
 +                   int nbug,const char **bugs,
 +                   t_linkdata *links)
 +{
 +  const char *cls = program;
 +  char *tmp;
 +  int  i,j;
 +
 +  /* Header stuff */  
 +  fprintf(out,"#!/usr/bin/python\n\nfrom GmxDialog import *\n\n");
 +  
 +  /* Class definition */
 +  fprintf(out,"class %s:\n",cls);
 +  fprintf(out,"    def __init__(self,tk):\n");
 +  
 +  /* Help text */
 +  fprintf(out,"        %s_help = \"\"\"\n",cls);
 +  fprintf(out,"        DESCRIPTION\n");
 +  print_tty_formatted(out,nldesc,desc,8,links,program,FALSE);
 +  if (nbug > 0) {
 +    fprintf(out,"\n        BUGS and PROBLEMS\n");
 +    for(i=0; i<nbug; i++) {
 +      snew(tmp,strlen(bugs[i])+3);
 +      strcpy(tmp,"* ");
 +      strcpy(tmp+2,check_tty(bugs[i]));
 +      fprintf(out,"%s\n",wrap_lines(tmp,78,10,TRUE));
 +      sfree(tmp);
 +    }
 +  }
 +  fprintf(out,"        \"\"\"\n\n        # Command line options\n");
 +  /* File options */
 +  fprintf(out,"        flags = []\n");
 +  for(i=0; (i<nfile); i++) 
 +    fprintf(out,"        flags.append(pca_file('%s',\"%s\",0,%d))\n",
 +          ftp2ext_generic(fnm[i].ftp),fnm[i].opt ? fnm[i].opt : "k",
 +          is_optional(&(fnm[i])));
 +          
 +          
 +  /* Other options */
 +  for(i=0; (i<npargs); i++) {
 +    switch(pa[i].type) {
 +    case etINT:
 +      fprintf(out,"        flags.append(pca_int(\"%s\",\"%s\",%d,%d))\n",
 +            pa[i].option,pa[i].desc,*pa[i].u.i,is_hidden(&(pa[i])));
 +      break;
 +    case etREAL:
 +    case etTIME:
 +      fprintf(out,"        flags.append(pca_float(\"%s\",\"%s\",%f,%d))\n",
 +            pa[i].option,pa[i].desc,*pa[i].u.r,is_hidden(&(pa[i])));
 +      break;
 +    case etSTR:
 +    case etBOOL:
 +      fprintf(out,"        flags.append(pca_gmx_bool(\"%s\",\"%s\",%d,%d))\n",
 +            pa[i].option,pa[i].desc,*pa[i].u.b,is_hidden(&(pa[i])));
 +      break;
 +    case etRVEC:
 +      fprintf(stderr,"Sorry, no rvecs yet...\n");
 +      break;
 +    case etENUM:
 +      fprintf(out,"        flags.append(pca_enum(\"%s\",\"%s\",\n",
 +            pa[i].option,pa[i].desc);
 +      fprintf(out,"        ['%s'",pa[i].u.c[1]);
 +      for(j=2; (pa[i].u.c[j] != NULL); j++)
 +      fprintf(out,",'%s'",pa[i].u.c[j]);
 +      fprintf(out,"],%d))\n",is_hidden(&(pa[i])));
 +      break;
 +    default:
 +      break;
 +    }
 +  }
 +    
 +  /* Make the dialog box */
 +  fprintf(out,"        gmxd = gmx_dialog(tk,\"%s\",flags,%s_help)\n\n",
 +        cls,cls);
 +        
 +  /* Main loop */
 +  fprintf(out,"#####################################################\n");
 +  fprintf(out,"tk     = Tk()\n");
 +  fprintf(out,"my%s = %s(tk)\n",cls,cls);
 +  fprintf(out,"tk.mainloop()\n");
 +}
 +
 +void write_man(FILE *out,const char *mantp,
 +             const char *program,
 +             int nldesc,const char **desc,
 +             int nfile,t_filenm *fnm,
 +             int npargs,t_pargs *pa,
 +             int nbug,const char **bugs,
 +             gmx_bool bHidden)
 +{
 +  const char *pr;
 +  int     i,npar;
 +  t_pargs *par;
 + 
 +  t_linkdata *links;
 +  
 +  links=init_linkdata();
 +  
 +  /* Don't write hidden options to completions, it just
 +   * makes the options more complicated for normal users
 +   */
 +
 +  if (bHidden) {
 +    npar=npargs;
 +    par=pa;
 +  }
 +  else {
 +    snew(par,npargs);
 +    npar=0;
 +    for(i=0;i<npargs;i++)
 +      if (!is_hidden(&pa[i])) {
 +      par[npar]=pa[i];
 +      npar++;
 +      }
 +  }
 +  
 +  if ((pr=strrchr(program,DIR_SEPARATOR)) == NULL)
 +    pr=program;
 +  else
 +    pr+=1;
 +  if (strcmp(mantp,"tex")==0)
 +    write_texman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"nroff")==0)
 +    write_nroffman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"ascii")==0)
 +    write_ttyman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,TRUE,links);
 +  if (strcmp(mantp,"wiki")==0)
 +    write_wikiman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,TRUE,links);
 +  if (strcmp(mantp,"help")==0)
 +    write_ttyman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,FALSE,links);
 +  if (strcmp(mantp,"html")==0)
 +    write_htmlman(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"py")==0)
 +    write_py(out,pr,nldesc,desc,nfile,fnm,npar,par,nbug,bugs,links);
 +  if (strcmp(mantp,"xml")==0)
 +    write_xmlman(out,pr,nldesc,desc,nfile,fnm,npargs,pa,nbug,bugs,links);     
 +  if (strcmp(mantp,"completion-zsh")==0)
 +    write_zshcompl(out,nfile,fnm,npar,par);
 +  if (strcmp(mantp,"completion-bash")==0)
 +    write_bashcompl(out,nfile,fnm,npar,par);
 +  if (strcmp(mantp,"completion-csh")==0)
 +    write_cshcompl(out,nfile,fnm,npar,par);
 +
 +  if (!bHidden)
 +    sfree(par);
 +
 +  finish_linkdata(links);
 +}
 +
 +const char *get_arg_desc(int type) {
 +   static const char *argtp[etNR] = {
 +     "int", "step", "real", "time", "string", "bool", "vector", "enum"
 +   };
 +   return argtp[type];
 +}
Simple merge
index 0000000000000000000000000000000000000000,3fe53b00a762d954f594403276241b8e433bc5cb..3fe53b00a762d954f594403276241b8e433bc5cb
mode 000000,100644..100644
--- /dev/null
index 43838b30dd49c42cc340a6904c071e23f7d6536c,0000000000000000000000000000000000000000..4ceada5ac3498547090fcea9a0b1254f67f1e216
mode 100644,000000..100644
--- /dev/null
@@@ -1,1240 -1,0 +1,1241 @@@
- #include <omp.h>
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2012, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + 
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Groningen Machine for Chemical Simulation
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <algorithm>
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +
 +#ifdef NOGMX
 +#define GMX_PARALLEL_ENV_INITIALIZED 1
 +#else 
 +#ifdef GMX_MPI
 +#define GMX_PARALLEL_ENV_INITIALIZED 1
 +#else
 +#define GMX_PARALLEL_ENV_INITIALIZED 0
 +#endif
 +#endif
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_OPENMP
++/* TODO: Do we still need this? Are we still planning ot use fftw + OpenMP? */
 +#define FFT5D_THREADS
 +#endif
 +#ifdef FFT5D_THREADS
++#include "gmx_omp.h"
 +/* requires fftw compiled with openmp */
 +/* #define FFT5D_FFTW_THREADS (now set by cmake) */
 +#endif
 +
 +#include "fft5d.h"
 +#include <float.h>
 +#include <math.h>
 +#include <assert.h>
 +#include "smalloc.h"
 +
 +#ifndef __FLT_EPSILON__
 +#define __FLT_EPSILON__ FLT_EPSILON
 +#define __DBL_EPSILON__ DBL_EPSILON
 +#endif
 +
 +#ifdef NOGMX
 +FILE* debug=0;
 +#endif
 +
 +#include "gmx_fatal.h"
 +
 +
 +#ifdef GMX_FFT_FFTW3 
 +#include "thread_mpi/mutex.h"
 +#include "gromacs/utility/exceptions.h"
 +/* none of the fftw3 calls, except execute(), are thread-safe, so 
 +   we need to serialize them with this mutex. */
 +static tMPI::mutex big_fftw_mutex;
 +#define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 +#define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
 +#endif /* GMX_FFT_FFTW3 */
 +
 +/* largest factor smaller than sqrt */
 +static int lfactor(int z) {  
 +      int i;
 +      for (i=static_cast<int>(sqrt(static_cast<double>(z)));;i--)
 +              if (z%i==0) return i;
 +      return 1;
 +}
 +
 +/* largest factor */
 +static int l2factor(int z) {  
 +      int i;
 +      if (z==1) return 1;
 +      for (i=z/2;;i--)
 +              if (z%i==0) return i;
 +      return 1;
 +}
 +
 +/* largest prime factor: WARNING: slow recursion, only use for small numbers */
 +static int lpfactor(int z) {
 +      int f = l2factor(z);
 +      if (f==1) return z;
 +      return std::max(lpfactor(f),lpfactor(z/f));
 +}
 +
 +#ifndef GMX_MPI
 +#ifdef HAVE_GETTIMEOFDAY
 +#include <sys/time.h>
 +double MPI_Wtime() {
 +    struct timeval tv;
 +    gettimeofday(&tv,0);
 +    return tv.tv_sec+tv.tv_usec*1e-6;
 +}
 +#else
 +double MPI_Wtime() {
 +    return 0.0;
 +}
 +#endif
 +#endif
 +
 +static int vmax(int* a, int s) {
 +    int i,max=0;
 +    for (i=0;i<s;i++) 
 +    {
 +        if (a[i]>max) max=a[i];
 +    }
 +    return max;
 +} 
 +
 +
 +/* NxMxK the size of the data
 + * comm communicator to use for fft5d
 + * P0 number of processor in 1st axes (can be null for automatic)
 + * lin is allocated by fft5d because size of array is only known after planning phase 
 + * rlout2 is only used as intermediate buffer - only returned after allocation to reuse for back transform - should not be used by caller
 +*/
 +fft5d_plan fft5d_plan_3d(int NG, int MG, int KG, MPI_Comm comm[2], int flags, t_complex** rlin, t_complex** rlout, t_complex** rlout2, t_complex** rlout3, int nthreads)
 +{
 +
 +    int P[2],bMaster,prank[2],i,t;
 +    int rNG,rMG,rKG;
 +    int *N0=0, *N1=0, *M0=0, *M1=0, *K0=0, *K1=0, *oN0=0, *oN1=0, *oM0=0, *oM1=0, *oK0=0, *oK1=0;
 +    int N[3],M[3],K[3],pN[3],pM[3],pK[3],oM[3],oK[3],*iNin[3]={0},*oNin[3]={0},*iNout[3]={0},*oNout[3]={0};
 +    int C[3],rC[3],nP[2];
 +    int lsize;
 +    t_complex *lin=0,*lout=0,*lout2=0,*lout3=0;
 +    fft5d_plan plan;
 +    int s;
 +
 +    /* comm, prank and P are in the order of the decomposition (plan->cart is in the order of transposes) */
 +#ifdef GMX_MPI
 +    if (GMX_PARALLEL_ENV_INITIALIZED && comm[0] != MPI_COMM_NULL)
 +    {
 +        MPI_Comm_size(comm[0],&P[0]);
 +        MPI_Comm_rank(comm[0],&prank[0]);
 +    }
 +    else
 +#endif
 +    {
 +        P[0] = 1;
 +        prank[0] = 0;
 +    }
 +#ifdef GMX_MPI
 +    if (GMX_PARALLEL_ENV_INITIALIZED && comm[1] != MPI_COMM_NULL)
 +    {
 +        MPI_Comm_size(comm[1],&P[1]);
 +        MPI_Comm_rank(comm[1],&prank[1]);
 +    }
 +    else
 +#endif
 +    {
 +        P[1] = 1;
 +        prank[1] = 0;
 +    }
 +   
 +    bMaster=(prank[0]==0&&prank[1]==0);
 +   
 +    
 +    if (debug)
 +    {
 +        fprintf(debug,"FFT5D: Using %dx%d processor grid, rank %d,%d\n",
 +                P[0],P[1],prank[0],prank[1]);
 +    }
 +    
 +    if (bMaster) {
 +        if (debug) 
 +            fprintf(debug,"FFT5D: N: %d, M: %d, K: %d, P: %dx%d, real2complex: %d, backward: %d, order yz: %d, debug %d\n",
 +                NG,MG,KG,P[0],P[1],(flags&FFT5D_REALCOMPLEX)>0,(flags&FFT5D_BACKWARD)>0,(flags&FFT5D_ORDER_YZ)>0,(flags&FFT5D_DEBUG)>0);
 +        /* The check below is not correct, one prime factor 11 or 13 is ok.
 +        if (fft5d_fmax(fft5d_fmax(lpfactor(NG),lpfactor(MG)),lpfactor(KG))>7) {
 +            printf("WARNING: FFT very slow with prime factors larger 7\n");
 +            printf("Change FFT size or in case you cannot change it look at\n");
 +            printf("http://www.fftw.org/fftw3_doc/Generating-your-own-code.html\n");
 +        }
 +        */
 +    }
 +    
 +    if (NG==0 || MG==0 || KG==0) {
 +        if (bMaster) printf("FFT5D: FATAL: Datasize cannot be zero in any dimension\n");
 +        return 0;
 +    }
 +
 +    rNG=NG;rMG=MG;rKG=KG;
 +    
 +    if (flags&FFT5D_REALCOMPLEX) {
 +        if (!(flags&FFT5D_BACKWARD)) NG = NG/2+1;
 +        else {
 +            if (!(flags&FFT5D_ORDER_YZ)) MG=MG/2+1;
 +            else KG=KG/2+1;
 +        }
 +    }
 +    
 +    
 +    /*for transpose we need to know the size for each processor not only our own size*/
 +
 +    N0 = (int*)malloc(P[0]*sizeof(int)); N1 = (int*)malloc(P[1]*sizeof(int)); 
 +    M0 = (int*)malloc(P[0]*sizeof(int)); M1 = (int*)malloc(P[1]*sizeof(int));
 +    K0 = (int*)malloc(P[0]*sizeof(int)); K1 = (int*)malloc(P[1]*sizeof(int));
 +    oN0 = (int*)malloc(P[0]*sizeof(int));oN1 = (int*)malloc(P[1]*sizeof(int));
 +    oM0 = (int*)malloc(P[0]*sizeof(int));oM1 = (int*)malloc(P[1]*sizeof(int));
 +    oK0 = (int*)malloc(P[0]*sizeof(int));oK1 = (int*)malloc(P[1]*sizeof(int));
 +    
 +    for (i=0;i<P[0];i++) 
 +    {
 +        #define EVENDIST
 +        #ifndef EVENDIST
 +        oN0[i]=i*ceil((double)NG/P[0]);
 +        oM0[i]=i*ceil((double)MG/P[0]);
 +        oK0[i]=i*ceil((double)KG/P[0]);
 +        #else
 +        oN0[i]=(NG*i)/P[0];
 +        oM0[i]=(MG*i)/P[0];
 +        oK0[i]=(KG*i)/P[0];
 +        #endif
 +    }
 +    for (i=0;i<P[1];i++) 
 +    {
 +        #ifndef EVENDIST
 +        oN1[i]=i*ceil((double)NG/P[1]); 
 +        oM1[i]=i*ceil((double)MG/P[1]); 
 +        oK1[i]=i*ceil((double)KG/P[1]); 
 +        #else
 +        oN1[i]=(NG*i)/P[1]; 
 +        oM1[i]=(MG*i)/P[1]; 
 +        oK1[i]=(KG*i)/P[1]; 
 +        #endif
 +    }
 +    for (i=0;i<P[0]-1;i++) 
 +    {
 +        N0[i]=oN0[i+1]-oN0[i];
 +        M0[i]=oM0[i+1]-oM0[i];
 +        K0[i]=oK0[i+1]-oK0[i];
 +    }
 +    N0[P[0]-1]=NG-oN0[P[0]-1];
 +    M0[P[0]-1]=MG-oM0[P[0]-1];
 +    K0[P[0]-1]=KG-oK0[P[0]-1];
 +    for (i=0;i<P[1]-1;i++) 
 +    {
 +        N1[i]=oN1[i+1]-oN1[i];
 +        M1[i]=oM1[i+1]-oM1[i];
 +        K1[i]=oK1[i+1]-oK1[i];
 +    }
 +    N1[P[1]-1]=NG-oN1[P[1]-1];
 +    M1[P[1]-1]=MG-oM1[P[1]-1];
 +    K1[P[1]-1]=KG-oK1[P[1]-1];
 +
 +    /* for step 1-3 the local N,M,K sizes of the transposed system
 +       C: contiguous dimension, and nP: number of processor in subcommunicator 
 +       for that step */
 +    
 +    
 +    pM[0] = M0[prank[0]];
 +    oM[0] = oM0[prank[0]];
 +    pK[0] = K1[prank[1]];
 +    oK[0] = oK1[prank[1]];
 +    C[0] = NG;
 +    rC[0] = rNG;
 +    if (!(flags&FFT5D_ORDER_YZ)) {
 +        N[0] = vmax(N1,P[1]);
 +        M[0] = M0[prank[0]];
 +        K[0] = vmax(K1,P[1]);
 +        pN[0] = N1[prank[1]];
 +        iNout[0] = N1;
 +        oNout[0] = oN1;
 +        nP[0] = P[1];
 +        C[1] = KG;
 +        rC[1] =rKG;
 +        N[1] = vmax(K0,P[0]);
 +        pN[1] = K0[prank[0]];
 +        iNin[1] = K1;
 +        oNin[1] = oK1; 
 +        iNout[1] = K0;
 +        oNout[1] = oK0;
 +        M[1] = vmax(M0,P[0]);
 +        pM[1] = M0[prank[0]];
 +        oM[1] = oM0[prank[0]];
 +        K[1] = N1[prank[1]];
 +        pK[1] = N1[prank[1]];
 +        oK[1] = oN1[prank[1]];
 +        nP[1] = P[0];
 +        C[2] = MG;
 +        rC[2] = rMG;
 +        iNin[2] = M0;
 +        oNin[2] = oM0;
 +        M[2] = vmax(K0,P[0]);
 +        pM[2] = K0[prank[0]];
 +        oM[2] = oK0[prank[0]];
 +        K[2] = vmax(N1,P[1]);
 +        pK[2] = N1[prank[1]];
 +        oK[2] = oN1[prank[1]];
 +        free(N0); free(oN0); /*these are not used for this order*/
 +        free(M1); free(oM1); /*the rest is freed in destroy*/
 +    } else {
 +        N[0] = vmax(N0,P[0]);
 +        M[0] = vmax(M0,P[0]);
 +        K[0] = K1[prank[1]];
 +        pN[0] = N0[prank[0]];
 +        iNout[0] = N0;
 +        oNout[0] = oN0;
 +        nP[0] = P[0];
 +        C[1] = MG;
 +        rC[1] =rMG;
 +        N[1] = vmax(M1,P[1]);
 +        pN[1] = M1[prank[1]];
 +        iNin[1] = M0;
 +        oNin[1] = oM0;
 +        iNout[1] = M1;
 +        oNout[1] = oM1;
 +        M[1] = N0[prank[0]];
 +        pM[1] = N0[prank[0]];
 +        oM[1] = oN0[prank[0]];
 +        K[1] = vmax(K1,P[1]);
 +        pK[1] = K1[prank[1]];
 +        oK[1] = oK1[prank[1]];
 +        nP[1] = P[1];
 +        C[2] = KG;
 +        rC[2] = rKG;
 +        iNin[2] = K1;
 +        oNin[2] = oK1;
 +        M[2] = vmax(N0,P[0]);
 +        pM[2] = N0[prank[0]];
 +        oM[2] = oN0[prank[0]];
 +        K[2] = vmax(M1,P[1]);
 +        pK[2] = M1[prank[1]];
 +        oK[2] = oM1[prank[1]];
 +        free(N1); free(oN1); /*these are not used for this order*/
 +        free(K0); free(oK0); /*the rest is freed in destroy*/
 +    }
 +    N[2]=pN[2]=-1; /*not used*/
 +    
 +    /*
 +      Difference between x-y-z regarding 2d decomposition is whether they are 
 +      distributed along axis 1, 2 or both 
 +    */
 +    
 +    /* int lsize = fmax(N[0]*M[0]*K[0]*nP[0],N[1]*M[1]*K[1]*nP[1]); */
 +    lsize = std::max(N[0]*M[0]*K[0]*nP[0],std::max(N[1]*M[1]*K[1]*nP[1],C[2]*M[2]*K[2]));
 +    /* int lsize = fmax(C[0]*M[0]*K[0],fmax(C[1]*M[1]*K[1],C[2]*M[2]*K[2])); */
 +    if (!(flags&FFT5D_NOMALLOC)) { 
 +        snew_aligned(lin, lsize, 32);
 +        snew_aligned(lout, lsize, 32);
 +        snew_aligned(lout2, lsize, 32);
 +        snew_aligned(lout3, lsize, 32);
 +    } else {
 +        lin = *rlin;
 +        lout = *rlout;
 +        lout2 = *rlout2;
 +        lout3 = *rlout3;
 +    }
 +
 +    plan = (fft5d_plan)calloc(1,sizeof(struct fft5d_plan_t));
 +
 +    
 +    if (debug)
 +    {
 +        fprintf(debug, "Running on %d threads\n",nthreads);        
 +    }
 +
 +#ifdef GMX_FFT_FFTW3  /*if not FFTW - then we don't do a 3d plan but instead use only 1D plans */
 +    /* It is possible to use the 3d plan with OMP threads - but in that case it is not allowed to be called from
 +     * within a parallel region. For now deactivated. If it should be supported it has to made sure that
 +     * that the execute of the 3d plan is in a master/serial block (since it contains it own parallel region)
 +     * and that the 3d plan is faster than the 1d plan.
 +     */
 +    if ((!(flags&FFT5D_INPLACE)) && (!(P[0]>1 || P[1]>1)) && nthreads==1) {  /*don't do 3d plan in parallel or if in_place requested */
 +            int fftwflags=FFTW_DESTROY_INPUT;
 +            FFTW(iodim) dims[3];
 +            int inNG=NG,outMG=MG,outKG=KG;
 +
 +            FFTW_LOCK;
 +            if (!(flags&FFT5D_NOMEASURE)) fftwflags|=FFTW_MEASURE;
 +            if (flags&FFT5D_REALCOMPLEX) {
 +                if (!(flags&FFT5D_BACKWARD)) {  /*input pointer is not complex*/
 +                    inNG*=2; 
 +                } else {                        /*output pointer is not complex*/
 +                    if (!(flags&FFT5D_ORDER_YZ)) outMG*=2;
 +                    else outKG*=2;
 +                }
 +            }
 +
 +            if (!(flags&FFT5D_BACKWARD)) {
 +                dims[0].n  = KG;
 +                dims[1].n  = MG;
 +                dims[2].n  = rNG;
 +                
 +                dims[0].is = inNG*MG;     /*N M K*/
 +                dims[1].is = inNG;
 +                dims[2].is = 1;
 +                if (!(flags&FFT5D_ORDER_YZ)) {
 +                    dims[0].os = MG;       /*M K N*/
 +                    dims[1].os = 1;
 +                    dims[2].os = MG*KG;
 +                } else  {
 +                    dims[0].os = 1;       /*K N M*/
 +                    dims[1].os = KG*NG;
 +                    dims[2].os = KG;
 +                }
 +            } else {
 +                if (!(flags&FFT5D_ORDER_YZ)) {
 +                    dims[0].n  = NG;   
 +                    dims[1].n  = KG;   
 +                    dims[2].n  = rMG;  
 +                    
 +                    dims[0].is = 1;     
 +                    dims[1].is = NG*MG;
 +                    dims[2].is = NG;
 +
 +                    dims[0].os = outMG*KG;       
 +                    dims[1].os = outMG;
 +                    dims[2].os = 1;                  
 +                } else {
 +                    dims[0].n  = MG;
 +                    dims[1].n  = NG;
 +                    dims[2].n  = rKG;
 +                    
 +                    dims[0].is = NG;     
 +                    dims[1].is = 1;
 +                    dims[2].is = NG*MG;
 +
 +                    dims[0].os = outKG*NG;       
 +                    dims[1].os = outKG;
 +                    dims[2].os = 1;                  
 +                }           
 +            }
 +#ifdef FFT5D_THREADS
 +#ifdef FFT5D_FFTW_THREADS
 +            FFTW(plan_with_nthreads)(nthreads);
 +#endif
 +#endif
 +            if ((flags&FFT5D_REALCOMPLEX) && !(flags&FFT5D_BACKWARD)) {
 +                plan->p3d = FFTW(plan_guru_dft_r2c)(/*rank*/ 3, dims,
 +                                     /*howmany*/ 0, /*howmany_dims*/0 ,
 +                                     (real*)lin, (FFTW(complex) *)lout,
 +                                     /*flags*/ fftwflags);              
 +            } else if ((flags&FFT5D_REALCOMPLEX) && (flags&FFT5D_BACKWARD)) {
 +                plan->p3d = FFTW(plan_guru_dft_c2r)(/*rank*/ 3, dims,
 +                                     /*howmany*/ 0, /*howmany_dims*/0 ,
 +                                     (FFTW(complex) *)lin, (real*)lout,
 +                                     /*flags*/ fftwflags);              
 +            } else {
 +                plan->p3d = FFTW(plan_guru_dft)(/*rank*/ 3, dims,
 +                                     /*howmany*/ 0, /*howmany_dims*/0 ,
 +                                     (FFTW(complex) *)lin, (FFTW(complex) *)lout,
 +                                     /*sign*/ (flags&FFT5D_BACKWARD)?1:-1, /*flags*/ fftwflags);
 +            }
 +#ifdef FFT5D_THREADS
 +#ifdef FFT5D_FFTW_THREADS
 +            FFTW(plan_with_nthreads)(1);
 +#endif
 +#endif
 +            FFTW_UNLOCK;
 +    }
 +    if (!plan->p3d) {  /* for decomposition and if 3d plan did not work */
 +#endif /* GMX_FFT_FFTW3 */
 +        for (s=0;s<3;s++) {
 +            if (debug)
 +            {
 +                fprintf(debug,"FFT5D: Plan s %d rC %d M %d pK %d C %d lsize %d\n",
 +                        s,rC[s],M[s],pK[s],C[s],lsize);
 +            }
 +            plan->p1d[s] = (gmx_fft_t*)malloc(sizeof(gmx_fft_t)*nthreads);
 +
 +            /* Make sure that the init routines are only called by one thread at a time and in order
 +               (later is only important to not confuse valgrind)
 +             */
 +#pragma omp parallel for num_threads(nthreads) schedule(static) ordered
 +            for(t=0; t<nthreads; t++)
 +            {
 +                int tsize = ((t+1)*pM[s]*pK[s]/nthreads)-(t*pM[s]*pK[s]/nthreads);
 +
 +                if ((flags&FFT5D_REALCOMPLEX) && ((!(flags&FFT5D_BACKWARD) && s==0) || ((flags&FFT5D_BACKWARD) && s==2))) {
 +                    gmx_fft_init_many_1d_real( &plan->p1d[s][t], rC[s], tsize, (flags&FFT5D_NOMEASURE)?GMX_FFT_FLAG_CONSERVATIVE:0 );
 +                } else {
 +                    gmx_fft_init_many_1d     ( &plan->p1d[s][t],  C[s], tsize, (flags&FFT5D_NOMEASURE)?GMX_FFT_FLAG_CONSERVATIVE:0 );
 +                }
 +            }
 +        }
 +
 +#ifdef GMX_FFT_FFTW3 
 +    }
 +#endif
 +    if ((flags&FFT5D_ORDER_YZ)) { /*plan->cart is in the order of transposes */
 +        plan->cart[0]=comm[0]; plan->cart[1]=comm[1];
 +    } else {
 +        plan->cart[1]=comm[0]; plan->cart[0]=comm[1];
 +    }
 +#ifdef FFT5D_MPI_TRANSPOSE
 +    FFTW_LOCK;
 +    for (s=0;s<2;s++) {
 +        if ((s==0 && !(flags&FFT5D_ORDER_YZ)) || (s==1 && (flags&FFT5D_ORDER_YZ))) 
 +            plan->mpip[s] = FFTW(mpi_plan_many_transpose)(nP[s], nP[s], N[s]*K[s]*pM[s]*2, 1, 1, (real*)lout2, (real*)lout3, plan->cart[s], FFTW_PATIENT);
 +        else
 +            plan->mpip[s] = FFTW(mpi_plan_many_transpose)(nP[s], nP[s], N[s]*pK[s]*M[s]*2, 1, 1, (real*)lout2, (real*)lout3, plan->cart[s], FFTW_PATIENT);
 +    }
 +    FFTW_UNLOCK;
 +#endif 
 +
 +    
 +    plan->lin=lin;
 +    plan->lout=lout;
 +    plan->lout2=lout2;
 +    plan->lout3=lout3;
 +    
 +    plan->NG=NG;plan->MG=MG;plan->KG=KG;
 +    
 +    for (s=0;s<3;s++) {
 +        plan->N[s]=N[s];plan->M[s]=M[s];plan->K[s]=K[s];plan->pN[s]=pN[s];plan->pM[s]=pM[s];plan->pK[s]=pK[s];
 +        plan->oM[s]=oM[s];plan->oK[s]=oK[s];
 +        plan->C[s]=C[s];plan->rC[s]=rC[s];
 +        plan->iNin[s]=iNin[s];plan->oNin[s]=oNin[s];plan->iNout[s]=iNout[s];plan->oNout[s]=oNout[s];
 +    }
 +    for (s=0;s<2;s++) {
 +        plan->P[s]=nP[s];plan->coor[s]=prank[s];
 +    }
 +    
 +/*    plan->fftorder=fftorder;
 +    plan->direction=direction;    
 +    plan->realcomplex=realcomplex;
 +*/
 +    plan->flags=flags;
 +    plan->nthreads=nthreads;
 +    *rlin=lin;
 +    *rlout=lout;
 +    *rlout2=lout2;
 +    *rlout3=lout3;
 +    return plan;
 +}
 +
 +
 +enum order {
 +    XYZ,
 +    XZY,
 +    YXZ,
 +    YZX,
 +    ZXY,
 +    ZYX
 +};
 +
 +
 +
 +/*here x,y,z and N,M,K is in rotated coordinate system!!
 +  x (and N) is mayor (consecutive) dimension, y (M) middle and z (K) major
 +  maxN,maxM,maxK is max size of local data
 +  pN, pM, pK is local size specific to current processor (only different to max if not divisible)
 +  NG, MG, KG is size of global data*/
 +static void splitaxes(t_complex* lout,const t_complex* lin,
 +                      int maxN,int maxM,int maxK, int pN, int pM, int pK,
 +                      int P,int NG,int *N, int* oN,int starty,int startz,int endy, int endz)
 +{
 +    int x,y,z,i;
 +    int in_i,out_i,in_z,out_z,in_y,out_y;
 +    int s_y,e_y;
 +
 +    for (z=startz; z<endz+1; z++) /*3. z l*/
 +    {
 +        if (z==startz) {
 +            s_y=starty;
 +        } else {
 +            s_y=0;
 +        }
 +        if (z==endz) {
 +            e_y=endy;
 +        } else {
 +            e_y=pM;
 +        }
 +        out_z  = z*maxN*maxM;
 +        in_z = z*NG*pM;
 +
 +        for (i=0; i<P; i++) /*index cube along long axis*/
 +        {
 +            out_i  = out_z  + i*maxN*maxM*maxK;
 +            in_i = in_z + oN[i];
 +            for (y=s_y;y<e_y;y++) { /*2. y k*/
 +                out_y  = out_i  + y*maxN;
 +                in_y = in_i + y*NG;
 +                for (x=0;x<N[i];x++) { /*1. x j*/
 +                    lout[out_y+x] = lin[in_y+x];    /*in=z*NG*pM+oN[i]+y*NG+x*/
 +                    /*after split important that each processor chunk i has size maxN*maxM*maxK and thus being the same size*/
 +                    /*before split data contiguos - thus if different processor get different amount oN is different*/
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/*make axis contiguous again (after AllToAll) and also do local transpose*/
 +/*transpose mayor and major dimension
 +  variables see above
 +  the major, middle, minor order is only correct for x,y,z (N,M,K) for the input
 +  N,M,K local dimensions
 +  KG global size*/
 +static void joinAxesTrans13(t_complex* lout,const t_complex* lin,
 +                            int maxN,int maxM,int maxK,int pN, int pM, int pK, 
 +                            int P,int KG, int* K, int* oK,int starty, int startx, int endy, int endx)
 +{
 +    int i,x,y,z;
 +    int out_i,in_i,out_x,in_x,out_z,in_z;
 +    int s_y,e_y;
 +
 +    for (x=startx;x<endx+1;x++) /*1.j*/
 +    {
 +        if (x==startx)
 +        {
 +            s_y=starty;
 +        }
 +        else
 +        {
 +            s_y=0;
 +        }
 +        if (x==endx)
 +        {
 +            e_y=endy;
 +        }
 +        else
 +        {
 +            e_y=pM;
 +        }
 +
 +        out_x  = x*KG*pM;
 +        in_x = x;
 +
 +        for (i=0;i<P;i++) /*index cube along long axis*/
 +        {
 +            out_i  = out_x  + oK[i];
 +            in_i = in_x + i*maxM*maxN*maxK;
 +            for (z=0;z<K[i];z++) /*3.l*/
 +            {
 +                out_z  = out_i  + z;
 +                in_z = in_i + z*maxM*maxN;
 +                for (y=s_y;y<e_y;y++) /*2.k*/
 +                {
 +                    lout[out_z+y*KG] = lin[in_z+y*maxN]; /*out=x*KG*pM+oK[i]+z+y*KG*/
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/*make axis contiguous again (after AllToAll) and also do local transpose
 +  tranpose mayor and middle dimension
 +  variables see above
 +  the minor, middle, major order is only correct for x,y,z (N,M,K) for the input
 +  N,M,K local size
 +  MG, global size*/
 +static void joinAxesTrans12(t_complex* lout,const t_complex* lin,int maxN,int maxM,int maxK,int pN, int pM, int pK,
 +                            int P,int MG, int* M, int* oM, int startx, int startz, int endx, int endz) {
 +    int i,z,y,x;
 +    int out_i,in_i,out_z,in_z,out_x,in_x;
 +    int s_x,e_x;
 +
 +    for (z=startz; z<endz+1; z++)
 +    {
 +        if (z==startz)
 +        {
 +            s_x=startx;
 +        }
 +        else
 +        {
 +            s_x=0;
 +        }
 +        if (z==endz)
 +        {
 +            e_x=endx;
 +        }
 +        else
 +        {
 +            e_x=pN;
 +        }
 +        out_z  = z*MG*pN;
 +        in_z = z*maxM*maxN;
 +
 +        for (i=0; i<P; i++) /*index cube along long axis*/
 +        {
 +            out_i  = out_z  + oM[i];
 +            in_i = in_z + i*maxM*maxN*maxK;
 +            for (x=s_x;x<e_x;x++)
 +            {
 +                out_x  = out_i  + x*MG;
 +                in_x = in_i + x;
 +                for (y=0;y<M[i];y++)
 +                {
 +                    lout[out_x+y] = lin[in_x+y*maxN]; /*out=z*MG*pN+oM[i]+x*MG+y*/
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void rotate_offsets(int x[]) {
 +    int t=x[0];
 +/*    x[0]=x[2];
 +    x[2]=x[1];
 +    x[1]=t;*/
 +    x[0]=x[1];
 +    x[1]=x[2];
 +    x[2]=t;
 +}
 +
 +/*compute the offset to compare or print transposed local data in original input coordinates
 +  xs matrix dimension size, xl dimension length, xc decomposition offset 
 +  s: step in computation = number of transposes*/
 +static void compute_offsets(fft5d_plan plan, int xs[], int xl[], int xc[], int NG[], int s) {
 +/*    int direction = plan->direction;
 +    int fftorder = plan->fftorder;*/
 +    
 +    int o;
 +    int pos[3],i;
 +    int *pM=plan->pM, *pK=plan->pK, *oM=plan->oM, *oK=plan->oK,
 +        *C=plan->C, *rC=plan->rC;
 +
 +    NG[0]=plan->NG;NG[1]=plan->MG;NG[2]=plan->KG;
 +
 +    if (!(plan->flags&FFT5D_ORDER_YZ)) {
 +        switch (s) {
 +        case 0: o=XYZ; break;
 +        case 1: o=ZYX; break;
 +        case 2: o=YZX; break;
 +        default: assert(0);
 +        }
 +    } else {
 +        switch (s) {
 +        case 0: o=XYZ; break;
 +        case 1: o=YXZ; break;
 +        case 2: o=ZXY; break;
 +        default: assert(0);
 +        }
 +    }
 + 
 +    switch (o) {
 +        case XYZ:pos[0]=1;pos[1]=2;pos[2]=3;break;
 +        case XZY:pos[0]=1;pos[1]=3;pos[2]=2;break;
 +        case YXZ:pos[0]=2;pos[1]=1;pos[2]=3;break;
 +        case YZX:pos[0]=3;pos[1]=1;pos[2]=2;break;
 +        case ZXY:pos[0]=2;pos[1]=3;pos[2]=1;break;
 +        case ZYX:pos[0]=3;pos[1]=2;pos[2]=1;break;
 +    }
 +    /*if (debug) printf("pos: %d %d %d\n",pos[0],pos[1],pos[2]);*/
 +        
 +    /*xs, xl give dimension size and data length in local transposed coordinate system
 +      for 0(/1/2): x(/y/z) in original coordinate system*/
 +    for (i=0;i<3;i++) {
 +        switch (pos[i]) {
 +        case 1: xs[i]=1;         xc[i]=0;     xl[i]=C[s];break;
 +        case 2: xs[i]=C[s];      xc[i]=oM[s]; xl[i]=pM[s];break;
 +        case 3: xs[i]=C[s]*pM[s];xc[i]=oK[s]; xl[i]=pK[s];break;
 +        }
 +    }
 +    /*input order is different for test program to match FFTW order 
 +      (important for complex to real)*/
 +    if (plan->flags&FFT5D_BACKWARD) {
 +        rotate_offsets(xs);
 +        rotate_offsets(xl);
 +        rotate_offsets(xc);
 +        rotate_offsets(NG);
 +        if (plan->flags&FFT5D_ORDER_YZ) {
 +            rotate_offsets(xs);
 +            rotate_offsets(xl);
 +            rotate_offsets(xc);
 +            rotate_offsets(NG);
 +        }
 +    }
 +    if ((plan->flags&FFT5D_REALCOMPLEX) && ((!(plan->flags&FFT5D_BACKWARD) && s==0) || ((plan->flags&FFT5D_BACKWARD) && s==2))) {
 +        xl[0] = rC[s];
 +    }
 +}
 +
 +static void print_localdata(const t_complex* lin, const char* txt, int s, fft5d_plan plan) {
 +    int x,y,z,l;
 +    int *coor = plan->coor;
 +    int xs[3],xl[3],xc[3],NG[3];        
 +    int ll=(plan->flags&FFT5D_REALCOMPLEX)?1:2;
 +    compute_offsets(plan,xs,xl,xc,NG,s);
 +    fprintf(debug,txt,coor[0],coor[1],s);
 +    /*printf("xs: %d %d %d, xl: %d %d %d\n",xs[0],xs[1],xs[2],xl[0],xl[1],xl[2]);*/
 +    for(z=0;z<xl[2];z++) {
 +        for(y=0;y<xl[1];y++) {
 +            fprintf(debug,"%d %d: ",coor[0],coor[1]);
 +            for (x=0;x<xl[0];x++) {
 +                for (l=0;l<ll;l++) {
 +                    fprintf(debug,"%f ",((real*)lin)[(z*xs[2]+y*xs[1])*2+(x*xs[0])*ll+l]);
 +                }
 +                fprintf(debug,",");
 +            }
 +            fprintf(debug,"\n");
 +        }
 +    }
 +}
 +
 +void fft5d_execute(fft5d_plan plan,int thread,fft5d_time times) {
 +    t_complex *lin = plan->lin;
 +    t_complex *lout = plan->lout;
 +    t_complex *lout2 = plan->lout2;
 +    t_complex *lout3 = plan->lout3;
 +    t_complex *fftout,*joinin;
 +
 +    gmx_fft_t **p1d=plan->p1d;
 +#ifdef FFT5D_MPI_TRANSPOSE
 +    FFTW(plan) *mpip=plan->mpip;
 +#endif
 +#ifdef GMX_MPI
 +    MPI_Comm *cart=plan->cart;
 +#endif
 +#ifdef NOGMX
 +    double time_fft=0,time_local=0,time_mpi[2]={0},time=0;    
 +#endif
 +    int *N=plan->N,*M=plan->M,*K=plan->K,*pN=plan->pN,*pM=plan->pM,*pK=plan->pK,
 +        *C=plan->C,*P=plan->P,**iNin=plan->iNin,**oNin=plan->oNin,**iNout=plan->iNout,**oNout=plan->oNout;
 +    int s=0,tstart,tend,bParallelDim;
 +    
 +    
 +#ifdef GMX_FFT_FFTW3 
 +    if (plan->p3d)
 +    {
 +        if (thread == 0)
 +        {
 +#ifdef NOGMX
 +            if (times!=0)
 +            {
 +                time=MPI_Wtime();
 +            }
 +#endif
 +            FFTW(execute)(plan->p3d);
 +#ifdef NOGMX
 +            if (times!=0)
 +            {
 +                times->fft+=MPI_Wtime()-time;
 +            }
 +#endif
 +        }
 +        return;
 +    }
 +#endif
 +
 +        s=0;
 +
 +    /*lin: x,y,z*/
 +        if (plan->flags&FFT5D_DEBUG && thread == 0)
 +        {
 +            print_localdata(lin, "%d %d: copy in lin\n", s, plan);
 +        }
 +
 +        for (s=0;s<2;s++) {  /*loop over first two FFT steps (corner rotations)*/
 +
 +#ifdef GMX_MPI
 +        if (GMX_PARALLEL_ENV_INITIALIZED && cart[s]!=MPI_COMM_NULL && P[s]>1)
 +        {
 +            bParallelDim = 1;
 +        }
 +        else
 +#endif
 +        {
 +            bParallelDim = 0;
 +        }
 +
 +        /* ---------- START FFT ------------ */
 +#ifdef NOGMX
 +        if (times!=0 && thread == 0)
 +        {
 +            time=MPI_Wtime();
 +        }
 +#endif
 +
 +        if (bParallelDim) {
 +            fftout = lout;
 +        }
 +        else
 +        {
 +            if (s==0)
 +            {
 +                fftout = lout3;
 +            } else
 +            {
 +                fftout = lout2;
 +            }
 +        }
 +
 +        tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
 +        if ((plan->flags&FFT5D_REALCOMPLEX) && !(plan->flags&FFT5D_BACKWARD) && s==0)
 +        {
 +            gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,fftout+tstart);
 +        } else
 +        {
 +            gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,fftout+tstart);
 +
 +        }
 +
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
 +            time_fft+=MPI_Wtime()-time;
 +        }
 +#endif
 +        if (plan->flags&FFT5D_DEBUG && thread == 0)
 +        {
 +            print_localdata(lout, "%d %d: FFT %d\n", s, plan);
 +        }
 +        /* ---------- END FFT ------------ */
 +
 +        /* ---------- START SPLIT + TRANSPOSE------------ (if parallel in in this dimension)*/
 +        if (bParallelDim) {
 +#ifdef NOGMX
 +            if (times != NULL && thread == 0)
 +            {
 +                time=MPI_Wtime();
 +            }
 +#endif
 +            /*prepare for A
 +llToAll
 +              1. (most outer) axes (x) is split into P[s] parts of size N[s]
 +              for sending*/
 +            if (pM[s]>0)
 +            {
 +                tend = ((thread+1)*pM[s]*pK[s]/plan->nthreads);
 +                tstart/=C[s];
 +                splitaxes(lout2,lout,N[s],M[s],K[s], pN[s],pM[s],pK[s],P[s],C[s],iNout[s],oNout[s],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
 +            }
 +#pragma omp barrier /*barrier required before AllToAll (all input has to be their) - before timing to make timing more acurate*/
 +#ifdef NOGMX
 +            if (times != NULL && thread == 0)
 +            {
 +                time_local+=MPI_Wtime()-time;
 +            }
 +#endif
 +
 +        /* ---------- END SPLIT , START TRANSPOSE------------ */
 +
 +            if (thread == 0)
 +            {
 +#ifdef NOGMX
 +                if (times!=0)
 +                {
 +                    time=MPI_Wtime();
 +                }
 +#else
 +                wallcycle_start(times,ewcPME_FFTCOMM);
 +#endif
 +#ifdef FFT5D_MPI_TRANSPOSE
 +                FFTW(execute)(mpip[s]);
 +#else
 +#ifdef GMX_MPI
 +                if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ)))
 +                    MPI_Alltoall(lout2,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*pM[s]*K[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
 +                else
 +                    MPI_Alltoall(lout2,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,lout3,N[s]*M[s]*pK[s]*sizeof(t_complex)/sizeof(real),GMX_MPI_REAL,cart[s]);
 +#else
 +                gmx_incons("fft5d MPI call without MPI configuration");
 +#endif /*GMX_MPI*/
 +#endif /*FFT5D_MPI_TRANSPOSE*/
 +#ifdef NOGMX
 +                if (times!=0)
 +                {
 +                    time_mpi[s]=MPI_Wtime()-time;
 +                }
 +#else
 +                wallcycle_stop(times,ewcPME_FFTCOMM);
 +#endif
 +            }  /*master*/
 +        }  /* bPrallelDim */
 +#pragma omp barrier  /*both needed for parallel and non-parallel dimension (either have to wait on data from AlltoAll or from last FFT*/
 +
 +        /* ---------- END SPLIT + TRANSPOSE------------ */
 +
 +        /* ---------- START JOIN ------------ */
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
 +            time=MPI_Wtime();
 +        }
 +#endif
 +
 +        if (bParallelDim) {
 +            joinin = lout3;
 +        } else {
 +            joinin = fftout;
 +        }
 +        /*bring back in matrix form 
 +          thus make  new 1. axes contiguos
 +          also local transpose 1 and 2/3 
 +          runs on thread used for following FFT (thus needing a barrier before but not afterwards)
 +        */
 +        if ((s==0 && !(plan->flags&FFT5D_ORDER_YZ)) || (s==1 && (plan->flags&FFT5D_ORDER_YZ))) {
 +            if (pM[s]>0)
 +            {
 +                tstart = ( thread   *pM[s]*pN[s]/plan->nthreads);
 +                tend   = ((thread+1)*pM[s]*pN[s]/plan->nthreads);
 +                joinAxesTrans13(lin,joinin,N[s],pM[s],K[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pM[s],tstart/pM[s],tend%pM[s],tend/pM[s]);
 +            }
 +        }
 +        else {
 +            if (pN[s]>0)
 +            {
 +                tstart = ( thread   *pK[s]*pN[s]/plan->nthreads);
 +                tend   = ((thread+1)*pK[s]*pN[s]/plan->nthreads);
 +                joinAxesTrans12(lin,joinin,N[s],M[s],pK[s],pN[s],pM[s],pK[s],P[s],C[s+1],iNin[s+1],oNin[s+1],tstart%pN[s],tstart/pN[s],tend%pN[s],tend/pN[s]);
 +            }
 +        }
 +
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
 +            time_local+=MPI_Wtime()-time;
 +        }
 +#endif
 +        if (plan->flags&FFT5D_DEBUG && thread == 0)
 +        {
 +            print_localdata(lin, "%d %d: tranposed %d\n", s+1, plan);
 +        }
 +        /* ---------- END JOIN ------------ */
 +
 +        /*if (debug) print_localdata(lin, "%d %d: transposed x-z\n", N1, M0, K, ZYX, coor);*/
 +    }  /* for(s=0;s<2;s++) */
 +#ifdef NOGMX
 +        if (times != NULL && thread == 0)
 +        {
 +            time=MPI_Wtime();
 +        }
 +#endif
 +
 +    if (plan->flags&FFT5D_INPLACE) lout=lin; /*in place currently not supported*/
 +
 +    /*  ----------- FFT ----------- */
 +    tstart = (thread*pM[s]*pK[s]/plan->nthreads)*C[s];
 +    if ((plan->flags&FFT5D_REALCOMPLEX) && (plan->flags&FFT5D_BACKWARD)) {
 +        gmx_fft_many_1d_real(p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_COMPLEX_TO_REAL:GMX_FFT_REAL_TO_COMPLEX,lin+tstart,lout+tstart);
 +    } else {
 +        gmx_fft_many_1d(     p1d[s][thread],(plan->flags&FFT5D_BACKWARD)?GMX_FFT_BACKWARD:GMX_FFT_FORWARD,               lin+tstart,lout+tstart);
 +    }
 +    /* ------------ END FFT ---------*/
 +
 +#ifdef NOGMX
 +    if (times != NULL && thread == 0)
 +    {
 +        time_fft+=MPI_Wtime()-time;
 +
 +        times->fft+=time_fft;
 +        times->local+=time_local;
 +        times->mpi2+=time_mpi[1];
 +        times->mpi1+=time_mpi[0];
 +    }
 +#endif
 +
 +    if (plan->flags&FFT5D_DEBUG && thread == 0)
 +    {
 +        print_localdata(lout, "%d %d: FFT %d\n", s, plan);
 +    }
 +}
 +
 +void fft5d_destroy(fft5d_plan plan) {
 +    int s,t;
 +    for (s=0;s<3;s++)
 +    {
 +        if (plan->p1d[s])
 +        {
 +            for (t=0;t<plan->nthreads;t++)
 +            {
 +                gmx_many_fft_destroy(plan->p1d[s][t]);
 +            }
 +            free(plan->p1d[s]);
 +        }
 +        if (plan->iNin[s])
 +        {
 +            free(plan->iNin[s]);
 +            plan->iNin[s]=0;
 +        }
 +        if (plan->oNin[s])
 +        {
 +            free(plan->oNin[s]);
 +            plan->oNin[s]=0;
 +        }
 +        if (plan->iNout[s])
 +        {
 +            free(plan->iNout[s]);
 +            plan->iNout[s]=0;
 +        }
 +        if (plan->oNout[s])
 +        {
 +            free(plan->oNout[s]);
 +            plan->oNout[s]=0;
 +        }
 +    }
 +#ifdef GMX_FFT_FFTW3 
 +    FFTW_LOCK;
 +#ifdef FFT5D_MPI_TRANSPOS
 +    for (s=0;s<2;s++)    
 +    {
 +        FFTW(destroy_plan)(plan->mpip[s]);
 +    }
 +#endif /* FFT5D_MPI_TRANSPOS */
 +    if (plan->p3d)
 +    {
 +        FFTW(destroy_plan)(plan->p3d);
 +    }
 +    FFTW_UNLOCK;
 +#endif /* GMX_FFT_FFTW3 */
 +
 +    if (!(plan->flags&FFT5D_NOMALLOC))
 +    {
 +        sfree_aligned(plan->lin);
 +        sfree_aligned(plan->lout);
 +        sfree_aligned(plan->lout2);
 +        sfree_aligned(plan->lout3);
 +    }
 +    
 +#ifdef FFT5D_THREADS
 +#ifdef FFT5D_FFTW_THREADS
 +    /*FFTW(cleanup_threads)();*/
 +#endif
 +#endif
 +
 +    free(plan);
 +}
 +
 +/*Is this better than direct access of plan? enough data?
 +  here 0,1 reference divided by which processor grid dimension (not FFT step!)*/
 +void fft5d_local_size(fft5d_plan plan,int* N1,int* M0,int* K0,int* K1,int** coor) {
 +    *N1=plan->N[0];
 +    *M0=plan->M[0];
 +    *K1=plan->K[0];
 +    *K0=plan->N[1];
 +    
 +    *coor=plan->coor;
 +}
 +
 +
 +/*same as fft5d_plan_3d but with cartesian coordinator and automatic splitting 
 +  of processor dimensions*/
 +fft5d_plan fft5d_plan_3d_cart(int NG, int MG, int KG, MPI_Comm comm, int P0, int flags, t_complex** rlin, t_complex** rlout, t_complex** rlout2, t_complex** rlout3, int nthreads) {
 +    MPI_Comm cart[2]={0};
 +#ifdef GMX_MPI
 +    int size=1,prank=0;
 +    int P[2];
 +    int coor[2];
 +    int wrap[]={0,0};
 +    MPI_Comm gcart;
 +    int rdim1[] = {0,1}, rdim2[] = {1,0};
 +
 +    MPI_Comm_size(comm,&size);
 +    MPI_Comm_rank(comm,&prank);
 +
 +    if (P0==0) P0 = lfactor(size);
 +    if (size%P0!=0)
 +    {
 +        if (prank==0) printf("FFT5D: WARNING: Number of processors %d not evenly dividable by %d\n",size,P0);
 +        P0 = lfactor(size);
 +    }
 +        
 +    P[0] = P0; P[1]=size/P0; /*number of processors in the two dimensions*/
 +    
 +    /*Difference between x-y-z regarding 2d decomposition is whether they are 
 +      distributed along axis 1, 2 or both*/
 +    
 +    MPI_Cart_create(comm,2,P,wrap,1,&gcart); /*parameter 4: value 1: reorder*/
 +    MPI_Cart_get(gcart,2,P,wrap,coor); 
 +    MPI_Cart_sub(gcart, rdim1 , &cart[0]);
 +    MPI_Cart_sub(gcart, rdim2 , &cart[1]);
 +#endif
 +    return fft5d_plan_3d(NG, MG, KG, cart, flags, rlin, rlout,rlout2,rlout3,nthreads);
 +}
 +
 +
 +
 +/*prints in original coordinate system of data (as the input to FFT)*/
 +void fft5d_compare_data(const t_complex* lin, const t_complex* in, fft5d_plan plan, int bothLocal, int normalize) {
 +    int xs[3],xl[3],xc[3],NG[3];
 +    int x,y,z,l;
 +    int *coor = plan->coor;
 +    int ll=2; /*compare ll values per element (has to be 2 for complex)*/
 +    if ((plan->flags&FFT5D_REALCOMPLEX) && (plan->flags&FFT5D_BACKWARD))
 +    {
 +        ll=1;
 +    }
 +
 +    compute_offsets(plan,xs,xl,xc,NG,2);
 +    if (plan->flags&FFT5D_DEBUG) printf("Compare2\n");
 +    for (z=0;z<xl[2];z++) {
 +        for(y=0;y<xl[1];y++) {
 +            if (plan->flags&FFT5D_DEBUG) printf("%d %d: ",coor[0],coor[1]);
 +            for (x=0;x<xl[0];x++) {
 +                for (l=0;l<ll;l++) { /*loop over real/complex parts*/
 +                    real a,b;
 +                    a=((real*)lin)[(z*xs[2]+y*xs[1])*2+x*xs[0]*ll+l];
 +                    if (normalize) a/=plan->rC[0]*plan->rC[1]*plan->rC[2];
 +                    if (!bothLocal) 
 +                        b=((real*)in)[((z+xc[2])*NG[0]*NG[1]+(y+xc[1])*NG[0])*2+(x+xc[0])*ll+l];
 +                    else 
 +                        b=((real*)in)[(z*xs[2]+y*xs[1])*2+x*xs[0]*ll+l];
 +                    if (plan->flags&FFT5D_DEBUG) {
 +                        printf("%f %f, ",a,b);
 +                    } else {
 +                        if (fabs(a-b)>2*NG[0]*NG[1]*NG[2]*GMX_REAL_EPS) {
 +                            printf("result incorrect on %d,%d at %d,%d,%d: FFT5D:%f reference:%f\n",coor[0],coor[1],x,y,z,a,b);
 +                        }
 +/*                        assert(fabs(a-b)<2*NG[0]*NG[1]*NG[2]*GMX_REAL_EPS);*/
 +                    }
 +                }
 +                if (plan->flags&FFT5D_DEBUG) printf(",");
 +            }
 +            if (plan->flags&FFT5D_DEBUG) printf("\n");
 +        }
 +    }
 +    
 +}
 +
Simple merge
index 825b47caa1f481ae92732896d51d561edca7d20f,0000000000000000000000000000000000000000..63dfc8b9e3ce0e74e6d28a8f1a9885ba56e7e9bf
mode 100644,000000..100644
--- /dev/null
@@@ -1,1436 -1,0 +1,1442 @@@
-             fprintf(log,"   Total Virial (%s)\n",unit_energy);
-             pr_ebin(log,md->ebin,md->ivir,9,3,mode,FALSE);
-             fprintf(log,"\n");
-             fprintf(log,"   Pressure (%s)\n",unit_pres_bar);
-             pr_ebin(log,md->ebin,md->ipres,9,3,mode,FALSE);
-             fprintf(log,"\n");
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <float.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "mdebin.h"
 +#include "smalloc.h"
 +#include "physics.h"
 +#include "enxio.h"
 +#include "vec.h"
 +#include "disre.h"
 +#include "main.h"
 +#include "network.h"
 +#include "names.h"
 +#include "orires.h"
 +#include "constr.h"
 +#include "mtop_util.h"
 +#include "xvgr.h"
 +#include "gmxfio.h"
 +#include "macros.h"
 +#include "mdrun.h"
 +#include "mdebin_bar.h"
 +
 +
 +static const char *conrmsd_nm[] = { "Constr. rmsd", "Constr.2 rmsd" };
 +
 +static const char *boxs_nm[] = { "Box-X", "Box-Y", "Box-Z" };
 +
 +static const char *tricl_boxs_nm[] = {
 +    "Box-XX", "Box-YY", "Box-ZZ",
 +    "Box-YX", "Box-ZX", "Box-ZY"
 +};
 +
 +static const char *vol_nm[] = { "Volume" };
 +
 +static const char *dens_nm[] = {"Density" };
 +
 +static const char *pv_nm[] = {"pV" };
 +
 +static const char *enthalpy_nm[] = {"Enthalpy" };
 +
 +static const char *boxvel_nm[] = {
 +    "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
 +    "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
 +};
 +
 +#define NBOXS asize(boxs_nm)
 +#define NTRICLBOXS asize(tricl_boxs_nm)
 +
 +t_mdebin *init_mdebin(ener_file_t fp_ene,
 +                      const gmx_mtop_t *mtop,
 +                      const t_inputrec *ir,
 +                      FILE *fp_dhdl)
 +{
 +    const char *ener_nm[F_NRE];
 +    static const char *vir_nm[] = {
 +        "Vir-XX", "Vir-XY", "Vir-XZ",
 +        "Vir-YX", "Vir-YY", "Vir-YZ",
 +        "Vir-ZX", "Vir-ZY", "Vir-ZZ"
 +    };
 +    static const char *sv_nm[] = {
 +        "ShakeVir-XX", "ShakeVir-XY", "ShakeVir-XZ",
 +        "ShakeVir-YX", "ShakeVir-YY", "ShakeVir-YZ",
 +        "ShakeVir-ZX", "ShakeVir-ZY", "ShakeVir-ZZ"
 +    };
 +    static const char *fv_nm[] = {
 +        "ForceVir-XX", "ForceVir-XY", "ForceVir-XZ",
 +        "ForceVir-YX", "ForceVir-YY", "ForceVir-YZ",
 +        "ForceVir-ZX", "ForceVir-ZY", "ForceVir-ZZ"
 +    };
 +    static const char *pres_nm[] = {
 +        "Pres-XX","Pres-XY","Pres-XZ",
 +        "Pres-YX","Pres-YY","Pres-YZ",
 +        "Pres-ZX","Pres-ZY","Pres-ZZ"
 +    };
 +    static const char *surft_nm[] = {
 +        "#Surf*SurfTen"
 +    };
 +    static const char *mu_nm[] = {
 +        "Mu-X", "Mu-Y", "Mu-Z"
 +    };
 +    static const char *vcos_nm[] = {
 +        "2CosZ*Vel-X"
 +    };
 +    static const char *visc_nm[] = {
 +        "1/Viscosity"
 +    };
 +    static const char *baro_nm[] = {
 +        "Barostat"
 +    };
 +
 +    char     **grpnms;
 +    const gmx_groups_t *groups;
 +    char     **gnm;
 +    char     buf[256];
 +    const char     *bufi;
 +    t_mdebin *md;
 +    int      i,j,ni,nj,n,nh,k,kk,ncon,nset;
 +    gmx_bool     bBHAM,bNoseHoover,b14;
 +
 +    snew(md,1);
 +
 +    md->bVir=TRUE;
 +    md->bPress=TRUE;
 +    md->bSurft=TRUE;
 +    md->bMu=TRUE;
 +
 +    if (EI_DYNAMICS(ir->eI))
 +    {
 +        md->delta_t = ir->delta_t;
 +    }
 +    else
 +    {
 +        md->delta_t = 0;
 +    }
 +
 +    groups = &mtop->groups;
 +
 +    bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 +    b14   = (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
 +             gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0);
 +
 +    ncon = gmx_mtop_ftype_count(mtop,F_CONSTR);
 +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
 +    md->bConstr    = (ncon > 0 || nset > 0);
 +    md->bConstrVir = FALSE;
 +    if (md->bConstr) {
 +        if (ncon > 0 && ir->eConstrAlg == econtLINCS) {
 +            if (ir->eI == eiSD2)
 +                md->nCrmsd = 2;
 +            else
 +                md->nCrmsd = 1;
 +        }
 +        md->bConstrVir = (getenv("GMX_CONSTRAINTVIR") != NULL);
 +    } else {
 +        md->nCrmsd = 0;
 +    }
 +
 +    /* Energy monitoring */
 +    for(i=0;i<egNR;i++)
 +    {
 +        md->bEInd[i]=FALSE;
 +    }
 +
 +#ifndef GMX_OPENMM
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        md->bEner[i] = FALSE;
 +        if (i == F_LJ)
 +            md->bEner[i] = !bBHAM;
 +        else if (i == F_BHAM)
 +            md->bEner[i] = bBHAM;
 +        else if (i == F_EQM)
 +            md->bEner[i] = ir->bQMMM;
 +        else if (i == F_COUL_LR)
 +            md->bEner[i] = (ir->rcoulomb > ir->rlist);
 +        else if (i == F_LJ_LR)
 +            md->bEner[i] = (!bBHAM && ir->rvdw > ir->rlist);
 +        else if (i == F_BHAM_LR)
 +            md->bEner[i] = (bBHAM && ir->rvdw > ir->rlist);
 +        else if (i == F_RF_EXCL)
 +            md->bEner[i] = (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC);
 +        else if (i == F_COUL_RECIP)
 +            md->bEner[i] = EEL_FULL(ir->coulombtype);
 +        else if (i == F_LJ14)
 +            md->bEner[i] = b14;
 +        else if (i == F_COUL14)
 +            md->bEner[i] = b14;
 +        else if (i == F_LJC14_Q || i == F_LJC_PAIRS_NB)
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_DVDL_COUL && ir->fepvals->separate_dvdl[efptCOUL]) ||
 +                 (i == F_DVDL_VDW  && ir->fepvals->separate_dvdl[efptVDW]) ||
 +                 (i == F_DVDL_BONDED && ir->fepvals->separate_dvdl[efptBONDED]) ||
 +                 (i == F_DVDL_RESTRAINT && ir->fepvals->separate_dvdl[efptRESTRAINT]) ||
 +                 (i == F_DKDL && ir->fepvals->separate_dvdl[efptMASS]) ||
 +                 (i == F_DVDL && ir->fepvals->separate_dvdl[efptFEP]))
 +            md->bEner[i] = (ir->efep != efepNO);
 +        else if ((interaction_function[i].flags & IF_VSITE) ||
 +                 (i == F_CONSTR) || (i == F_CONSTRNC) || (i == F_SETTLE))
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_COUL_SR) || (i == F_EPOT) || (i == F_PRES)  || (i==F_EQM))
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_GBPOL) && ir->implicit_solvent==eisGBSA)
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_NPSOLVATION) && ir->implicit_solvent==eisGBSA && (ir->sa_algorithm != esaNO))
 +            md->bEner[i] = TRUE;
 +        else if ((i == F_GB12) || (i == F_GB13) || (i == F_GB14))
 +            md->bEner[i] = FALSE;
 +        else if ((i == F_ETOT) || (i == F_EKIN) || (i == F_TEMP))
 +            md->bEner[i] = EI_DYNAMICS(ir->eI);
 +        else if (i==F_VTEMP)
 +            md->bEner[i] =  (EI_DYNAMICS(ir->eI) && getenv("GMX_VIRIAL_TEMPERATURE"));
 +        else if (i == F_DISPCORR || i == F_PDISPCORR)
 +            md->bEner[i] = (ir->eDispCorr != edispcNO);
 +        else if (i == F_DISRESVIOL)
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_DISRES) > 0);
 +        else if (i == F_ORIRESDEV)
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0);
 +        else if (i == F_CONNBONDS)
 +            md->bEner[i] = FALSE;
 +        else if (i == F_COM_PULL)
 +            md->bEner[i] = (ir->ePull == epullUMBRELLA || ir->ePull == epullCONST_F || ir->bRot);
 +        else if (i == F_ECONSERVED)
 +            md->bEner[i] = ((ir->etc == etcNOSEHOOVER || ir->etc == etcVRESCALE) &&
 +                            (ir->epc == epcNO || ir->epc==epcMTTK));
 +        else
 +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,i) > 0);
 +    }
 +#else
 +    /* OpenMM always produces only the following 4 energy terms */
 +    md->bEner[F_EPOT] = TRUE;
 +    md->bEner[F_EKIN] = TRUE;
 +    md->bEner[F_ETOT] = TRUE;
 +    md->bEner[F_TEMP] = TRUE;
 +#endif
 +
 +    /* for adress simulations, most energy terms are not meaningfull, and thus disabled*/
 +    if (ir->bAdress && !debug) {
 +        for (i = 0; i < F_NRE; i++) {
 +            md->bEner[i] = FALSE;
 +            if(i == F_EKIN){ md->bEner[i] = TRUE;}
 +            if(i == F_TEMP){ md->bEner[i] = TRUE;}
 +        }
 +        md->bVir=FALSE;
 +        md->bPress=FALSE;
 +        md->bSurft=FALSE;
 +        md->bMu=FALSE;
 +    }
 +
 +    md->f_nre=0;
 +    for(i=0; i<F_NRE; i++)
 +    {
 +        if (md->bEner[i])
 +        {
 +            /* FIXME: The constness should not be cast away */
 +            /*ener_nm[f_nre]=(char *)interaction_function[i].longname;*/
 +            ener_nm[md->f_nre]=interaction_function[i].longname;
 +            md->f_nre++;
 +        }
 +    }
 +
 +    md->epc = ir->epc;
 +    md->bDiagPres = !TRICLINIC(ir->ref_p);
 +    md->ref_p = (ir->ref_p[XX][XX]+ir->ref_p[YY][YY]+ir->ref_p[ZZ][ZZ])/DIM;
 +    md->bTricl = TRICLINIC(ir->compress) || TRICLINIC(ir->deform);
 +    md->bDynBox = DYNAMIC_BOX(*ir);
 +    md->etc = ir->etc;
 +    md->bNHC_trotter = IR_NVT_TROTTER(ir);
 +    md->bPrintNHChains = ir-> bPrintNHChains;
 +    md->bMTTK = (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir));
 +
 +    md->ebin  = mk_ebin();
 +    /* Pass NULL for unit to let get_ebin_space determine the units
 +     * for interaction_function[i].longname
 +     */
 +    md->ie    = get_ebin_space(md->ebin,md->f_nre,ener_nm,NULL);
 +    if (md->nCrmsd)
 +    {
 +        /* This should be called directly after the call for md->ie,
 +         * such that md->iconrmsd follows directly in the list.
 +         */
 +        md->iconrmsd = get_ebin_space(md->ebin,md->nCrmsd,conrmsd_nm,"");
 +    }
 +    if (md->bDynBox)
 +    {
 +        md->ib    = get_ebin_space(md->ebin,
 +                                   md->bTricl ? NTRICLBOXS : NBOXS,
 +                                   md->bTricl ? tricl_boxs_nm : boxs_nm,
 +                                   unit_length);
 +        md->ivol  = get_ebin_space(md->ebin, 1, vol_nm,  unit_volume);
 +        md->idens = get_ebin_space(md->ebin, 1, dens_nm, unit_density_SI);
 +        if (md->bDiagPres)
 +        {
 +            md->ipv   = get_ebin_space(md->ebin, 1, pv_nm,   unit_energy);
 +            md->ienthalpy = get_ebin_space(md->ebin, 1, enthalpy_nm,   unit_energy);
 +        }
 +    }
 +    if (md->bConstrVir)
 +    {
 +        md->isvir = get_ebin_space(md->ebin,asize(sv_nm),sv_nm,unit_energy);
 +        md->ifvir = get_ebin_space(md->ebin,asize(fv_nm),fv_nm,unit_energy);
 +    }
 +    if (md->bVir)
 +        md->ivir   = get_ebin_space(md->ebin,asize(vir_nm),vir_nm,unit_energy);
 +    if (md->bPress)
 +        md->ipres  = get_ebin_space(md->ebin,asize(pres_nm),pres_nm,unit_pres_bar);
 +    if (md->bSurft)
 +        md->isurft = get_ebin_space(md->ebin,asize(surft_nm),surft_nm,
 +                                unit_surft_bar);
 +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
 +    {
 +        md->ipc = get_ebin_space(md->ebin,md->bTricl ? 6 : 3,
 +                                 boxvel_nm,unit_vel);
 +    }
 +    if (md->bMu)
 +        md->imu    = get_ebin_space(md->ebin,asize(mu_nm),mu_nm,unit_dipole_D);
 +    if (ir->cos_accel != 0)
 +    {
 +        md->ivcos = get_ebin_space(md->ebin,asize(vcos_nm),vcos_nm,unit_vel);
 +        md->ivisc = get_ebin_space(md->ebin,asize(visc_nm),visc_nm,
 +                                   unit_invvisc_SI);
 +    }
 +
 +    /* Energy monitoring */
 +    for(i=0;i<egNR;i++)
 +    {
 +        md->bEInd[i] = FALSE;
 +    }
 +    md->bEInd[egCOULSR] = TRUE;
 +    md->bEInd[egLJSR  ] = TRUE;
 +
 +    if (ir->rcoulomb > ir->rlist)
 +    {
 +        md->bEInd[egCOULLR] = TRUE;
 +    }
 +    if (!bBHAM)
 +    {
 +        if (ir->rvdw > ir->rlist)
 +        {
 +            md->bEInd[egLJLR]   = TRUE;
 +        }
 +    }
 +    else
 +    {
 +        md->bEInd[egLJSR]   = FALSE;
 +        md->bEInd[egBHAMSR] = TRUE;
 +        if (ir->rvdw > ir->rlist)
 +        {
 +            md->bEInd[egBHAMLR]   = TRUE;
 +        }
 +    }
 +    if (b14)
 +    {
 +        md->bEInd[egLJ14] = TRUE;
 +        md->bEInd[egCOUL14] = TRUE;
 +    }
 +    md->nEc=0;
 +    for(i=0; (i<egNR); i++)
 +    {
 +        if (md->bEInd[i])
 +        {
 +            md->nEc++;
 +        }
 +    }
 +
 +    n=groups->grps[egcENER].nr;
 +    /* for adress simulations, most energy terms are not meaningfull, and thus disabled*/
 +    if (!ir->bAdress){
 +        /*standard simulation*/
 +        md->nEg=n;
 +        md->nE=(n*(n+1))/2;
 +    }
 +    else if (!debug) {
 +        /*AdResS simulation*/
 +       md->nU=0;
 +       md->nEg=0;
 +       md->nE=0;
 +       md->nEc=0;
 +       md->isvir=FALSE;
 +    }
 +    snew(md->igrp,md->nE);
 +    if (md->nE > 1)
 +    {
 +        n=0;
 +        snew(gnm,md->nEc);
 +        for(k=0; (k<md->nEc); k++)
 +        {
 +            snew(gnm[k],STRLEN);
 +        }
 +        for(i=0; (i<groups->grps[egcENER].nr); i++)
 +        {
 +            ni=groups->grps[egcENER].nm_ind[i];
 +            for(j=i; (j<groups->grps[egcENER].nr); j++)
 +            {
 +                nj=groups->grps[egcENER].nm_ind[j];
 +                for(k=kk=0; (k<egNR); k++)
 +                {
 +                    if (md->bEInd[k])
 +                    {
 +                        sprintf(gnm[kk],"%s:%s-%s",egrp_nm[k],
 +                                *(groups->grpname[ni]),*(groups->grpname[nj]));
 +                        kk++;
 +                    }
 +                }
 +                md->igrp[n]=get_ebin_space(md->ebin,md->nEc,
 +                                           (const char **)gnm,unit_energy);
 +                n++;
 +            }
 +        }
 +        for(k=0; (k<md->nEc); k++)
 +        {
 +            sfree(gnm[k]);
 +        }
 +        sfree(gnm);
 +
 +        if (n != md->nE)
 +        {
 +            gmx_incons("Number of energy terms wrong");
 +        }
 +    }
 +
 +    md->nTC=groups->grps[egcTC].nr;
 +    md->nNHC = ir->opts.nhchainlength; /* shorthand for number of NH chains */
 +    if (md->bMTTK)
 +    {
 +        md->nTCP = 1;  /* assume only one possible coupling system for barostat
 +                          for now */
 +    }
 +    else
 +    {
 +        md->nTCP = 0;
 +    }
 +    if (md->etc == etcNOSEHOOVER)
 +    {
 +        if (md->bNHC_trotter)
 +        {
 +            md->mde_n = 2*md->nNHC*md->nTC;
 +        }
 +        else
 +        {
 +            md->mde_n = 2*md->nTC;
 +        }
 +        if (md->epc == epcMTTK)
 +        {
 +            md->mdeb_n = 2*md->nNHC*md->nTCP;
 +        }
 +    } else {
 +        md->mde_n = md->nTC;
 +        md->mdeb_n = 0;
 +    }
 +
 +    snew(md->tmp_r,md->mde_n);
 +    snew(md->tmp_v,md->mde_n);
 +    snew(md->grpnms,md->mde_n);
 +    grpnms = md->grpnms;
 +
 +    for(i=0; (i<md->nTC); i++)
 +    {
 +        ni=groups->grps[egcTC].nm_ind[i];
 +        sprintf(buf,"T-%s",*(groups->grpname[ni]));
 +        grpnms[i]=strdup(buf);
 +    }
 +    md->itemp=get_ebin_space(md->ebin,md->nTC,(const char **)grpnms,
 +                             unit_temp_K);
 +
 +    if (md->etc == etcNOSEHOOVER)
 +    {
 +        if (md->bPrintNHChains)
 +        {
 +            if (md->bNHC_trotter)
 +            {
 +                for(i=0; (i<md->nTC); i++)
 +                {
 +                    ni=groups->grps[egcTC].nm_ind[i];
 +                    bufi = *(groups->grpname[ni]);
 +                    for(j=0; (j<md->nNHC); j++)
 +                    {
 +                        sprintf(buf,"Xi-%d-%s",j,bufi);
 +                        grpnms[2*(i*md->nNHC+j)]=strdup(buf);
 +                        sprintf(buf,"vXi-%d-%s",j,bufi);
 +                        grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
 +                    }
 +                }
 +                md->itc=get_ebin_space(md->ebin,md->mde_n,
 +                                       (const char **)grpnms,unit_invtime);
 +                if (md->bMTTK)
 +                {
 +                    for(i=0; (i<md->nTCP); i++)
 +                    {
 +                        bufi = baro_nm[0];  /* All barostat DOF's together for now. */
 +                        for(j=0; (j<md->nNHC); j++)
 +                        {
 +                            sprintf(buf,"Xi-%d-%s",j,bufi);
 +                            grpnms[2*(i*md->nNHC+j)]=strdup(buf);
 +                            sprintf(buf,"vXi-%d-%s",j,bufi);
 +                            grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
 +                        }
 +                    }
 +                    md->itcb=get_ebin_space(md->ebin,md->mdeb_n,
 +                                            (const char **)grpnms,unit_invtime);
 +                }
 +            }
 +            else
 +            {
 +                for(i=0; (i<md->nTC); i++)
 +                {
 +                    ni=groups->grps[egcTC].nm_ind[i];
 +                    bufi = *(groups->grpname[ni]);
 +                    sprintf(buf,"Xi-%s",bufi);
 +                    grpnms[2*i]=strdup(buf);
 +                    sprintf(buf,"vXi-%s",bufi);
 +                    grpnms[2*i+1]=strdup(buf);
 +                }
 +                md->itc=get_ebin_space(md->ebin,md->mde_n,
 +                                       (const char **)grpnms,unit_invtime);
 +            }
 +        }
 +    }
 +    else if (md->etc == etcBERENDSEN || md->etc == etcYES ||
 +             md->etc == etcVRESCALE)
 +    {
 +        for(i=0; (i<md->nTC); i++)
 +        {
 +            ni=groups->grps[egcTC].nm_ind[i];
 +            sprintf(buf,"Lamb-%s",*(groups->grpname[ni]));
 +            grpnms[i]=strdup(buf);
 +        }
 +        md->itc=get_ebin_space(md->ebin,md->mde_n,(const char **)grpnms,"");
 +    }
 +
 +    sfree(grpnms);
 +
 +
 +    md->nU=groups->grps[egcACC].nr;
 +    if (md->nU > 1)
 +    {
 +        snew(grpnms,3*md->nU);
 +        for(i=0; (i<md->nU); i++)
 +        {
 +            ni=groups->grps[egcACC].nm_ind[i];
 +            sprintf(buf,"Ux-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+XX]=strdup(buf);
 +            sprintf(buf,"Uy-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+YY]=strdup(buf);
 +            sprintf(buf,"Uz-%s",*(groups->grpname[ni]));
 +            grpnms[3*i+ZZ]=strdup(buf);
 +        }
 +        md->iu=get_ebin_space(md->ebin,3*md->nU,(const char **)grpnms,unit_vel);
 +        sfree(grpnms);
 +    }
 +
 +    if ( fp_ene )
 +    {
 +        do_enxnms(fp_ene,&md->ebin->nener,&md->ebin->enm);
 +    }
 +
 +    md->print_grpnms=NULL;
 +
 +    /* check whether we're going to write dh histograms */
 +    md->dhc=NULL;
 +    if (ir->fepvals->separate_dhdl_file == esepdhdlfileNO )
 +    {
 +        snew(md->dhc, 1);
 +
 +        mde_delta_h_coll_init(md->dhc, ir);
 +        md->fp_dhdl = NULL;
 +    }
 +    else
 +    {
 +        md->fp_dhdl = fp_dhdl;
 +    }
 +    if (ir->bSimTemp) {
 +        int i;
 +        snew(md->temperatures,ir->fepvals->n_lambda);
 +        for (i=0;i<ir->fepvals->n_lambda;i++)
 +        {
 +            md->temperatures[i] = ir->simtempvals->temperatures[i];
 +        }
 +    }
 +    return md;
 +}
 +
 +extern FILE *open_dhdl(const char *filename,const t_inputrec *ir,
 +                       const output_env_t oenv)
 +{
 +    FILE *fp;
 +    const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda",
 +        *lambdastate="\\lambda state",*remain="remaining";
 +    char title[STRLEN],label_x[STRLEN],label_y[STRLEN];
 +    int  i,np,nps,nsets,nsets_de,nsetsbegin;
 +    t_lambda *fep;
 +    char **setname;
 +    char buf[STRLEN];
 +    int bufplace=0;
 +
 +    int nsets_dhdl = 0;
 +    int s = 0;
 +    int nsetsextend;
 +
 +    /* for simplicity */
 +    fep = ir->fepvals;
 +
 +    if (fep->n_lambda == 0)
 +    {
 +        sprintf(title,"%s",dhdl);
 +        sprintf(label_x,"Time (ps)");
 +        sprintf(label_y,"%s (%s %s)",
 +                dhdl,unit_energy,"[\\lambda]\\S-1\\N");
 +    }
 +    else
 +    {
 +        sprintf(title,"%s and %s",dhdl,deltag);
 +        sprintf(label_x,"Time (ps)");
 +        sprintf(label_y,"%s and %s (%s %s)",
 +                dhdl,deltag,unit_energy,"[\\8l\\4]\\S-1\\N");
 +    }
 +    fp = gmx_fio_fopen(filename,"w+");
 +    xvgr_header(fp,title,label_x,label_y,exvggtXNY,oenv);
 +
 +    if (!(ir->bSimTemp))
 +    {
 +        bufplace = sprintf(buf,"T = %g (K) ",
 +                ir->opts.ref_t[0]);
 +    }
 +    if (ir->efep != efepSLOWGROWTH)
 +    {
 +        if (fep->n_lambda == 0)
 +        {
 +            sprintf(&(buf[bufplace]),"%s = %g",
 +                    lambda,fep->init_lambda);
 +        }
 +        else
 +        {
 +            sprintf(&(buf[bufplace]),"%s = %d",
 +                    lambdastate,fep->init_fep_state);
 +        }
 +    }
 +    xvgr_subtitle(fp,buf,oenv);
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (fep->separate_dvdl[i]) {nsets_dhdl++;}
 +    }
 +
 +    /* count the number of delta_g states */
 +    nsets_de = fep->n_lambda;
 +
 +    nsets = nsets_dhdl + nsets_de; /* dhdl + fep differences */
 +
 +    if (fep->n_lambda>0 && ir->bExpanded)
 +    {
 +        nsets += 1;   /*add fep state for expanded ensemble */
 +    }
 +
 +    if (fep->bPrintEnergy)
 +    {
 +        nsets += 1;  /* add energy to the dhdl as well */
 +    }
 +
 +    nsetsextend = nsets;
 +    if ((ir->epc!=epcNO) && (fep->n_lambda>0))
 +    {
 +        nsetsextend += 1; /* for PV term, other terms possible if required for the reduced potential (only needed with foreign lambda) */
 +    }
 +    snew(setname,nsetsextend);
 +
 +    if (ir->bExpanded)
 +    {
 +        /* state for the fep_vals, if we have alchemical sampling */
 +        sprintf(buf,"%s","Thermodynamic state");
 +        setname[s] = strdup(buf);
 +        s+=1;
 +    }
 +
 +    if (fep->bPrintEnergy)
 +    {
 +        sprintf(buf,"%s (%s)","Energy",unit_energy);
 +        setname[s] = strdup(buf);
 +        s+=1;
 +    }
 +
 +    for (i=0;i<efptNR;i++)
 +    {
 +        if (fep->separate_dvdl[i]) {
 +            sprintf(buf,"%s (%s)",dhdl,efpt_names[i]);
 +            setname[s] = strdup(buf);
 +            s+=1;
 +        }
 +    }
 +
 +    if (fep->n_lambda > 0)
 +    {
 +        /* g_bar has to determine the lambda values used in this simulation
 +         * from this xvg legend.
 +         */
 +
 +        if (ir->bExpanded) {
 +            nsetsbegin = 1;  /* for including the expanded ensemble */
 +        } else {
 +            nsetsbegin = 0;
 +        }
 +
 +        if (fep->bPrintEnergy)
 +        {
 +            nsetsbegin += 1;
 +        }
 +        nsetsbegin += nsets_dhdl;
 +
 +        for(s=nsetsbegin; s<nsets; s++)
 +        {
 +            nps = sprintf(buf,"%s %s (",deltag,lambda);
 +            for (i=0;i<efptNR;i++)
 +            {
 +                if (fep->separate_dvdl[i])
 +                {
 +                    np = sprintf(&buf[nps],"%g,",fep->all_lambda[i][s-(nsetsbegin)]);
 +                    nps += np;
 +                }
 +            }
 +            if (ir->bSimTemp)
 +            {
 +                /* print the temperature for this state if doing simulated annealing */
 +                sprintf(&buf[nps],"T = %g (%s))",ir->simtempvals->temperatures[s-(nsetsbegin)],unit_temp_K);
 +            }
 +            else
 +            {
 +                sprintf(&buf[nps-1],")");  /* -1 to overwrite the last comma */
 +            }
 +            setname[s] = strdup(buf);
 +        }
 +        if (ir->epc!=epcNO) {
 +            np = sprintf(buf,"pV (%s)",unit_energy);
 +            setname[nsetsextend-1] = strdup(buf);  /* the first entry after nsets */
 +        }
 +
 +        xvgr_legend(fp,nsetsextend,(const char **)setname,oenv);
 +
 +        for(s=0; s<nsetsextend; s++)
 +        {
 +            sfree(setname[s]);
 +        }
 +        sfree(setname);
 +    }
 +
 +    return fp;
 +}
 +
 +static void copy_energy(t_mdebin *md, real e[],real ecpy[])
 +{
 +    int i,j;
 +
 +    for(i=j=0; (i<F_NRE); i++)
 +        if (md->bEner[i])
 +            ecpy[j++] = e[i];
 +    if (j != md->f_nre)
 +        gmx_incons("Number of energy terms wrong");
 +}
 +
 +void upd_mdebin(t_mdebin *md,
 +                gmx_bool bDoDHDL,
 +                gmx_bool bSum,
 +                double time,
 +                real tmass,
 +                gmx_enerdata_t *enerd,
 +                t_state *state,
 +                t_lambda *fep,
 +                t_expanded *expand,
 +                matrix  box,
 +                tensor svir,
 +                tensor fvir,
 +                tensor vir,
 +                tensor pres,
 +                gmx_ekindata_t *ekind,
 +                rvec mu_tot,
 +                gmx_constr_t constr)
 +{
 +    int    i,j,k,kk,m,n,gid;
 +    real   crmsd[2],tmp6[6];
 +    real   bs[NTRICLBOXS],vol,dens,pv,enthalpy;
 +    real   eee[egNR];
 +    real   ecopy[F_NRE];
 +    double store_dhdl[efptNR];
 +    double *dE=NULL;
 +    real   store_energy=0;
 +    real   tmp;
 +
 +    /* Do NOT use the box in the state variable, but the separate box provided
 +     * as an argument. This is because we sometimes need to write the box from
 +     * the last timestep to match the trajectory frames.
 +     */
 +    copy_energy(md, enerd->term,ecopy);
 +    add_ebin(md->ebin,md->ie,md->f_nre,ecopy,bSum);
 +    if (md->nCrmsd)
 +    {
 +        crmsd[0] = constr_rmsd(constr,FALSE);
 +        if (md->nCrmsd > 1)
 +        {
 +            crmsd[1] = constr_rmsd(constr,TRUE);
 +        }
 +        add_ebin(md->ebin,md->iconrmsd,md->nCrmsd,crmsd,FALSE);
 +    }
 +    if (md->bDynBox)
 +    {
 +        int nboxs;
 +        if(md->bTricl)
 +        {
 +            bs[0] = box[XX][XX];
 +            bs[1] = box[YY][YY];
 +            bs[2] = box[ZZ][ZZ];
 +            bs[3] = box[YY][XX];
 +            bs[4] = box[ZZ][XX];
 +            bs[5] = box[ZZ][YY];
 +            nboxs=NTRICLBOXS;
 +        }
 +        else
 +        {
 +            bs[0] = box[XX][XX];
 +            bs[1] = box[YY][YY];
 +            bs[2] = box[ZZ][ZZ];
 +            nboxs=NBOXS;
 +        }
 +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
 +        add_ebin(md->ebin,md->ib   ,nboxs,bs   ,bSum);
 +        add_ebin(md->ebin,md->ivol ,1    ,&vol ,bSum);
 +        add_ebin(md->ebin,md->idens,1    ,&dens,bSum);
 +
 +        if (md->bDiagPres)
 +        {
 +            /* This is pV (in kJ/mol).  The pressure is the reference pressure,
 +               not the instantaneous pressure */
 +            pv = vol*md->ref_p/PRESFAC;
 +
 +            add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
 +            enthalpy = pv + enerd->term[F_ETOT];
 +            add_ebin(md->ebin,md->ienthalpy  ,1    ,&enthalpy  ,bSum);
 +        }
 +    }
 +    if (md->bConstrVir)
 +    {
 +        add_ebin(md->ebin,md->isvir,9,svir[0],bSum);
 +        add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum);
 +    }
 +    if (md->bVir)
 +        add_ebin(md->ebin,md->ivir,9,vir[0],bSum);
 +    if (md->bPress)
 +        add_ebin(md->ebin,md->ipres,9,pres[0],bSum);
 +    if (md->bSurft){
 +        tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
 +        add_ebin(md->ebin,md->isurft,1,&tmp,bSum);
 +    }
 +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
 +    {
 +        tmp6[0] = state->boxv[XX][XX];
 +        tmp6[1] = state->boxv[YY][YY];
 +        tmp6[2] = state->boxv[ZZ][ZZ];
 +        tmp6[3] = state->boxv[YY][XX];
 +        tmp6[4] = state->boxv[ZZ][XX];
 +        tmp6[5] = state->boxv[ZZ][YY];
 +        add_ebin(md->ebin,md->ipc,md->bTricl ? 6 : 3,tmp6,bSum);
 +    }
 +    if(md->bMu)
 +        add_ebin(md->ebin,md->imu,3,mu_tot,bSum);
 +    if (ekind && ekind->cosacc.cos_accel != 0)
 +    {
 +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
 +        add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum);
 +        /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
 +        tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
 +                 *dens*vol*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
 +        add_ebin(md->ebin,md->ivisc,1,&tmp,bSum);
 +    }
 +    if (md->nE > 1)
 +    {
 +        n=0;
 +        for(i=0; (i<md->nEg); i++)
 +        {
 +            for(j=i; (j<md->nEg); j++)
 +            {
 +                gid=GID(i,j,md->nEg);
 +                for(k=kk=0; (k<egNR); k++)
 +                {
 +                    if (md->bEInd[k])
 +                    {
 +                        eee[kk++] = enerd->grpp.ener[k][gid];
 +                    }
 +                }
 +                add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum);
 +                n++;
 +            }
 +        }
 +    }
 +
 +    if (ekind)
 +    {
 +        for(i=0; (i<md->nTC); i++)
 +        {
 +            md->tmp_r[i] = ekind->tcstat[i].T;
 +        }
 +        add_ebin(md->ebin,md->itemp,md->nTC,md->tmp_r,bSum);
 +
 +        if (md->etc == etcNOSEHOOVER)
 +        {
 +            /* whether to print Nose-Hoover chains: */
 +            if (md->bPrintNHChains)
 +            {
 +                if (md->bNHC_trotter)
 +                {
 +                    for(i=0; (i<md->nTC); i++)
 +                    {
 +                        for (j=0;j<md->nNHC;j++)
 +                        {
 +                            k = i*md->nNHC+j;
 +                            md->tmp_r[2*k] = state->nosehoover_xi[k];
 +                            md->tmp_r[2*k+1] = state->nosehoover_vxi[k];
 +                        }
 +                    }
 +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
 +
 +                    if (md->bMTTK) {
 +                        for(i=0; (i<md->nTCP); i++)
 +                        {
 +                            for (j=0;j<md->nNHC;j++)
 +                            {
 +                                k = i*md->nNHC+j;
 +                                md->tmp_r[2*k] = state->nhpres_xi[k];
 +                                md->tmp_r[2*k+1] = state->nhpres_vxi[k];
 +                            }
 +                        }
 +                        add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum);
 +                    }
 +                }
 +                else
 +                {
 +                    for(i=0; (i<md->nTC); i++)
 +                    {
 +                        md->tmp_r[2*i] = state->nosehoover_xi[i];
 +                        md->tmp_r[2*i+1] = state->nosehoover_vxi[i];
 +                    }
 +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
 +                }
 +            }
 +        }
 +        else if (md->etc == etcBERENDSEN || md->etc == etcYES ||
 +                 md->etc == etcVRESCALE)
 +        {
 +            for(i=0; (i<md->nTC); i++)
 +            {
 +                md->tmp_r[i] = ekind->tcstat[i].lambda;
 +            }
 +            add_ebin(md->ebin,md->itc,md->nTC,md->tmp_r,bSum);
 +        }
 +    }
 +
 +    if (ekind && md->nU > 1)
 +    {
 +        for(i=0; (i<md->nU); i++)
 +        {
 +            copy_rvec(ekind->grpstat[i].u,md->tmp_v[i]);
 +        }
 +        add_ebin(md->ebin,md->iu,3*md->nU,md->tmp_v[0],bSum);
 +    }
 +
 +    ebin_increase_count(md->ebin,bSum);
 +
 +    /* BAR + thermodynamic integration values */
 +    if ((md->fp_dhdl || md->dhc) && bDoDHDL && (enerd->n_lambda > 0))
 +    {
 +        snew(dE,enerd->n_lambda-1);
 +        for(i=0; i<enerd->n_lambda-1; i++) {
 +            dE[i] = enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0];  /* zero for simulated tempering */
 +            if (md->temperatures!=NULL)
 +            {
 +                /* MRS: is this right, given the way we have defined the exchange probabilities? */
 +                /* is this even useful to have at all? */
 +                dE[i] += (md->temperatures[i]/md->temperatures[state->fep_state]-1.0)*enerd->term[F_EKIN];
 +            }
 +        }
 +    }
 +
 +    if (md->fp_dhdl && bDoDHDL)
 +    {
 +        fprintf(md->fp_dhdl,"%.4f",time);
 +        /* the current free energy state */
 +
 +        /* print the current state if we are doing expanded ensemble */
 +        if (expand->elmcmove > elmcmoveNO) {
 +            fprintf(md->fp_dhdl," %4d",state->fep_state);
 +        }
 +        /* total energy (for if the temperature changes */
 +        if (fep->bPrintEnergy)
 +        {
 +            store_energy = enerd->term[F_ETOT];
 +            fprintf(md->fp_dhdl," %#.8g",store_energy);
 +        }
 +
 +        for (i=0;i<efptNR;i++)
 +        {
 +            if (fep->separate_dvdl[i])
 +            {
 +                fprintf(md->fp_dhdl," %#.8g",enerd->term[F_DVDL+i]); /* assumes F_DVDL is first */
 +            }
 +        }
 +        for(i=1; i<enerd->n_lambda; i++)
 +        {
 +            fprintf(md->fp_dhdl," %#.8g",dE[i-1]);
 +
 +        }
 +        if ((md->epc!=epcNO)  && (enerd->n_lambda > 0))
 +        {
 +            fprintf(md->fp_dhdl," %#.8g",pv);   /* PV term only needed when there are alternate state lambda */
 +        }
 +        fprintf(md->fp_dhdl,"\n");
 +        /* and the binary free energy output */
 +    }
 +    if (md->dhc && bDoDHDL)
 +    {
 +        int idhdl = 0;
 +        for (i=0;i<efptNR;i++)
 +        {
 +            if (fep->separate_dvdl[i])
 +            {
 +                store_dhdl[idhdl] = enerd->term[F_DVDL+i]; /* assumes F_DVDL is first */
 +                idhdl+=1;
 +            }
 +        }
 +        /* store_dh is dE */
 +        mde_delta_h_coll_add_dh(md->dhc,
 +                                (double)state->fep_state,
 +                                store_energy,
 +                                pv,
 +                                (expand->elamstats>elamstatsNO),
 +                                (fep->bPrintEnergy),
 +                                (md->epc!=epcNO),
 +                                idhdl,
 +                                fep->n_lambda,
 +                                store_dhdl,
 +                                dE,
 +                                time);
 +    }
 +    if ((md->fp_dhdl || md->dhc) && bDoDHDL && (enerd->n_lambda >0))
 +    {
 +        sfree(dE);
 +    }
 +}
 +
 +
 +void upd_mdebin_step(t_mdebin *md)
 +{
 +    ebin_increase_count(md->ebin,FALSE);
 +}
 +
 +static void npr(FILE *log,int n,char c)
 +{
 +    for(; (n>0); n--) fprintf(log,"%c",c);
 +}
 +
 +static void pprint(FILE *log,const char *s,t_mdebin *md)
 +{
 +    char CHAR='#';
 +    int  slen;
 +    char buf1[22],buf2[22];
 +
 +    slen = strlen(s);
 +    fprintf(log,"\t<======  ");
 +    npr(log,slen,CHAR);
 +    fprintf(log,"  ==>\n");
 +    fprintf(log,"\t<====  %s  ====>\n",s);
 +    fprintf(log,"\t<==  ");
 +    npr(log,slen,CHAR);
 +    fprintf(log,"  ======>\n\n");
 +
 +    fprintf(log,"\tStatistics over %s steps using %s frames\n",
 +            gmx_step_str(md->ebin->nsteps_sim,buf1),
 +            gmx_step_str(md->ebin->nsum_sim,buf2));
 +    fprintf(log,"\n");
 +}
 +
 +void print_ebin_header(FILE *log,gmx_large_int_t steps,double time,real lambda)
 +{
 +    char buf[22];
 +
 +    fprintf(log,"   %12s   %12s   %12s\n"
 +            "   %12s   %12.5f   %12.5f\n\n",
 +            "Step","Time","Lambda",gmx_step_str(steps,buf),time,lambda);
 +}
 +
 +void print_ebin(ener_file_t fp_ene,gmx_bool bEne,gmx_bool bDR,gmx_bool bOR,
 +                FILE *log,
 +                gmx_large_int_t step,double time,
 +                int mode,gmx_bool bCompact,
 +                t_mdebin *md,t_fcdata *fcd,
 +                gmx_groups_t *groups,t_grpopts *opts)
 +{
 +    /*static char **grpnms=NULL;*/
 +    char        buf[246];
 +    int         i,j,n,ni,nj,ndr,nor,b;
 +    int         ndisre=0;
 +    real        *disre_rm3tav, *disre_rt;
 +
 +    /* these are for the old-style blocks (1 subblock, only reals), because
 +       there can be only one per ID for these */
 +    int         nr[enxNR];
 +    int         id[enxNR];
 +    real        *block[enxNR];
 +
 +    /* temporary arrays for the lambda values to write out */
 +    double      enxlambda_data[2];
 +
 +    t_enxframe  fr;
 +
 +    switch (mode)
 +    {
 +        case eprNORMAL:
 +            init_enxframe(&fr);
 +            fr.t            = time;
 +            fr.step         = step;
 +            fr.nsteps       = md->ebin->nsteps;
 +            fr.dt           = md->delta_t;
 +            fr.nsum         = md->ebin->nsum;
 +            fr.nre          = (bEne) ? md->ebin->nener : 0;
 +            fr.ener         = md->ebin->e;
 +            ndisre          = bDR ? fcd->disres.npair : 0;
 +            disre_rm3tav    = fcd->disres.rm3tav;
 +            disre_rt        = fcd->disres.rt;
 +            /* Optional additional old-style (real-only) blocks. */
 +            for(i=0; i<enxNR; i++)
 +            {
 +                nr[i] = 0;
 +            }
 +            if (fcd->orires.nr > 0 && bOR)
 +            {
 +                diagonalize_orires_tensors(&(fcd->orires));
 +                nr[enxOR]     = fcd->orires.nr;
 +                block[enxOR]  = fcd->orires.otav;
 +                id[enxOR]     = enxOR;
 +                nr[enxORI]    = (fcd->orires.oinsl != fcd->orires.otav) ?
 +                          fcd->orires.nr : 0;
 +                block[enxORI] = fcd->orires.oinsl;
 +                id[enxORI]    = enxORI;
 +                nr[enxORT]    = fcd->orires.nex*12;
 +                block[enxORT] = fcd->orires.eig;
 +                id[enxORT]    = enxORT;
 +            }
 +
 +            /* whether we are going to wrte anything out: */
 +            if (fr.nre || ndisre || nr[enxOR] || nr[enxORI])
 +            {
 +
 +                /* the old-style blocks go first */
 +                fr.nblock = 0;
 +                for(i=0; i<enxNR; i++)
 +                {
 +                    if (nr[i] > 0)
 +                    {
 +                        fr.nblock = i + 1;
 +                    }
 +                }
 +                add_blocks_enxframe(&fr, fr.nblock);
 +                for(b=0;b<fr.nblock;b++)
 +                {
 +                    add_subblocks_enxblock(&(fr.block[b]), 1);
 +                    fr.block[b].id=id[b];
 +                    fr.block[b].sub[0].nr = nr[b];
 +#ifndef GMX_DOUBLE
 +                    fr.block[b].sub[0].type = xdr_datatype_float;
 +                    fr.block[b].sub[0].fval = block[b];
 +#else
 +                    fr.block[b].sub[0].type = xdr_datatype_double;
 +                    fr.block[b].sub[0].dval = block[b];
 +#endif
 +                }
 +
 +                /* check for disre block & fill it. */
 +                if (ndisre>0)
 +                {
 +                    int db = fr.nblock;
 +                    fr.nblock+=1;
 +                    add_blocks_enxframe(&fr, fr.nblock);
 +
 +                    add_subblocks_enxblock(&(fr.block[db]), 2);
 +                    fr.block[db].id=enxDISRE;
 +                    fr.block[db].sub[0].nr=ndisre;
 +                    fr.block[db].sub[1].nr=ndisre;
 +#ifndef GMX_DOUBLE
 +                    fr.block[db].sub[0].type=xdr_datatype_float;
 +                    fr.block[db].sub[1].type=xdr_datatype_float;
 +                    fr.block[db].sub[0].fval=disre_rt;
 +                    fr.block[db].sub[1].fval=disre_rm3tav;
 +#else
 +                    fr.block[db].sub[0].type=xdr_datatype_double;
 +                    fr.block[db].sub[1].type=xdr_datatype_double;
 +                    fr.block[db].sub[0].dval=disre_rt;
 +                    fr.block[db].sub[1].dval=disre_rm3tav;
 +#endif
 +                }
 +                /* here we can put new-style blocks */
 +
 +                /* Free energy perturbation blocks */
 +                if (md->dhc)
 +                {
 +                    mde_delta_h_coll_handle_block(md->dhc, &fr, fr.nblock);
 +                }
 +
 +                /* we can now free & reset the data in the blocks */
 +                if (md->dhc)
 +                {
 +                    mde_delta_h_coll_reset(md->dhc);
 +                }
 +
 +                /* do the actual I/O */
 +                do_enx(fp_ene,&fr);
 +                gmx_fio_check_file_position(enx_file_pointer(fp_ene));
 +                if (fr.nre)
 +                {
 +                    /* We have stored the sums, so reset the sum history */
 +                    reset_ebin_sums(md->ebin);
 +                }
 +            }
 +            free_enxframe(&fr);
 +            break;
 +        case eprAVER:
 +            if (log)
 +            {
 +                pprint(log,"A V E R A G E S",md);
 +            }
 +            break;
 +        case eprRMS:
 +            if (log)
 +            {
 +                pprint(log,"R M S - F L U C T U A T I O N S",md);
 +            }
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Invalid print mode (%d)",mode);
 +    }
 +
 +    if (log)
 +    {
 +        for(i=0;i<opts->ngtc;i++)
 +        {
 +            if(opts->annealing[i]!=eannNO)
 +            {
 +                fprintf(log,"Current ref_t for group %s: %8.1f\n",
 +                        *(groups->grpname[groups->grps[egcTC].nm_ind[i]]),
 +                        opts->ref_t[i]);
 +            }
 +        }
 +        if (mode==eprNORMAL && fcd->orires.nr>0)
 +        {
 +            print_orires_log(log,&(fcd->orires));
 +        }
 +        fprintf(log,"   Energies (%s)\n",unit_energy);
 +        pr_ebin(log,md->ebin,md->ie,md->f_nre+md->nCrmsd,5,mode,TRUE);
 +        fprintf(log,"\n");
 +
 +        if (!bCompact)
 +        {
 +            if (md->bDynBox)
 +            {
 +                pr_ebin(log,md->ebin,md->ib, md->bTricl ? NTRICLBOXS : NBOXS,5,
 +                        mode,TRUE);
 +                fprintf(log,"\n");
 +            }
 +            if (md->bConstrVir)
 +            {
 +                fprintf(log,"   Constraint Virial (%s)\n",unit_energy);
 +                pr_ebin(log,md->ebin,md->isvir,9,3,mode,FALSE);
 +                fprintf(log,"\n");
 +                fprintf(log,"   Force Virial (%s)\n",unit_energy);
 +                pr_ebin(log,md->ebin,md->ifvir,9,3,mode,FALSE);
 +                fprintf(log,"\n");
 +            }
++            if (md->bVir)
++            {
++                fprintf(log,"   Total Virial (%s)\n",unit_energy);
++                pr_ebin(log,md->ebin,md->ivir,9,3,mode,FALSE);
++                fprintf(log,"\n");
++            }
++            if (md->bPress)
++            {
++                fprintf(log,"   Pressure (%s)\n",unit_pres_bar);
++                pr_ebin(log,md->ebin,md->ipres,9,3,mode,FALSE);
++                fprintf(log,"\n");
++            }
 +            fprintf(log,"   Total Dipole (%s)\n",unit_dipole_D);
 +            pr_ebin(log,md->ebin,md->imu,3,3,mode,FALSE);
 +            fprintf(log,"\n");
 +
 +            if (md->nE > 1)
 +            {
 +                if (md->print_grpnms==NULL)
 +                {
 +                    snew(md->print_grpnms,md->nE);
 +                    n=0;
 +                    for(i=0; (i<md->nEg); i++)
 +                    {
 +                        ni=groups->grps[egcENER].nm_ind[i];
 +                        for(j=i; (j<md->nEg); j++)
 +                        {
 +                            nj=groups->grps[egcENER].nm_ind[j];
 +                            sprintf(buf,"%s-%s",*(groups->grpname[ni]),
 +                                    *(groups->grpname[nj]));
 +                            md->print_grpnms[n++]=strdup(buf);
 +                        }
 +                    }
 +                }
 +                sprintf(buf,"Epot (%s)",unit_energy);
 +                fprintf(log,"%15s   ",buf);
 +                for(i=0; (i<egNR); i++)
 +                {
 +                    if (md->bEInd[i])
 +                    {
 +                        fprintf(log,"%12s   ",egrp_nm[i]);
 +                    }
 +                }
 +                fprintf(log,"\n");
 +                for(i=0; (i<md->nE); i++)
 +                {
 +                    fprintf(log,"%15s",md->print_grpnms[i]);
 +                    pr_ebin(log,md->ebin,md->igrp[i],md->nEc,md->nEc,mode,
 +                            FALSE);
 +                }
 +                fprintf(log,"\n");
 +            }
 +            if (md->nTC > 1)
 +            {
 +                pr_ebin(log,md->ebin,md->itemp,md->nTC,4,mode,TRUE);
 +                fprintf(log,"\n");
 +            }
 +            if (md->nU > 1)
 +            {
 +                fprintf(log,"%15s   %12s   %12s   %12s\n",
 +                        "Group","Ux","Uy","Uz");
 +                for(i=0; (i<md->nU); i++)
 +                {
 +                    ni=groups->grps[egcACC].nm_ind[i];
 +                    fprintf(log,"%15s",*groups->grpname[ni]);
 +                    pr_ebin(log,md->ebin,md->iu+3*i,3,3,mode,FALSE);
 +                }
 +                fprintf(log,"\n");
 +            }
 +        }
 +    }
 +
 +}
 +
 +void update_energyhistory(energyhistory_t * enerhist,t_mdebin * mdebin)
 +{
 +    int i;
 +
 +    enerhist->nsteps     = mdebin->ebin->nsteps;
 +    enerhist->nsum       = mdebin->ebin->nsum;
 +    enerhist->nsteps_sim = mdebin->ebin->nsteps_sim;
 +    enerhist->nsum_sim   = mdebin->ebin->nsum_sim;
 +    enerhist->nener      = mdebin->ebin->nener;
 +
 +    if (mdebin->ebin->nsum > 0)
 +    {
 +        /* Check if we need to allocate first */
 +        if(enerhist->ener_ave == NULL)
 +        {
 +            snew(enerhist->ener_ave,enerhist->nener);
 +            snew(enerhist->ener_sum,enerhist->nener);
 +        }
 +
 +        for(i=0;i<enerhist->nener;i++)
 +        {
 +            enerhist->ener_ave[i] = mdebin->ebin->e[i].eav;
 +            enerhist->ener_sum[i] = mdebin->ebin->e[i].esum;
 +        }
 +    }
 +
 +    if (mdebin->ebin->nsum_sim > 0)
 +    {
 +        /* Check if we need to allocate first */
 +        if(enerhist->ener_sum_sim == NULL)
 +        {
 +            snew(enerhist->ener_sum_sim,enerhist->nener);
 +        }
 +
 +        for(i=0;i<enerhist->nener;i++)
 +        {
 +            enerhist->ener_sum_sim[i] = mdebin->ebin->e_sim[i].esum;
 +        }
 +    }
 +    if (mdebin->dhc)
 +    {
 +        mde_delta_h_coll_update_energyhistory(mdebin->dhc, enerhist);
 +    }
 +}
 +
 +void restore_energyhistory_from_state(t_mdebin * mdebin,
 +                                      energyhistory_t * enerhist)
 +{
 +    int i;
 +
 +    if ((enerhist->nsum > 0 || enerhist->nsum_sim > 0) &&
 +        mdebin->ebin->nener != enerhist->nener)
 +    {
 +        gmx_fatal(FARGS,"Mismatch between number of energies in run input (%d) and checkpoint file (%d).",
 +                  mdebin->ebin->nener,enerhist->nener);
 +    }
 +
 +    mdebin->ebin->nsteps     = enerhist->nsteps;
 +    mdebin->ebin->nsum       = enerhist->nsum;
 +    mdebin->ebin->nsteps_sim = enerhist->nsteps_sim;
 +    mdebin->ebin->nsum_sim   = enerhist->nsum_sim;
 +
 +    for(i=0; i<mdebin->ebin->nener; i++)
 +    {
 +        mdebin->ebin->e[i].eav  =
 +                  (enerhist->nsum > 0 ? enerhist->ener_ave[i] : 0);
 +        mdebin->ebin->e[i].esum =
 +                  (enerhist->nsum > 0 ? enerhist->ener_sum[i] : 0);
 +        mdebin->ebin->e_sim[i].esum =
 +                  (enerhist->nsum_sim > 0 ? enerhist->ener_sum_sim[i] : 0);
 +    }
 +    if (mdebin->dhc)
 +    {
 +        mde_delta_h_coll_restore_energyhistory(mdebin->dhc, enerhist);
 +    }
 +}
index 8993d5d2193778c06654dddf93784a7e23eba0e4,0000000000000000000000000000000000000000..ac1892f3a71952229d2f4098108e5c406935a0ab
mode 100644,000000..100644
--- /dev/null
@@@ -1,4356 -1,0 +1,4352 @@@
- #ifdef GMX_OPENMP
- #include <omp.h>
- #endif
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +/* IMPORTANT FOR DEVELOPERS:
 + *
 + * Triclinic pme stuff isn't entirely trivial, and we've experienced
 + * some bugs during development (many of them due to me). To avoid
 + * this in the future, please check the following things if you make
 + * changes in this file:
 + *
 + * 1. You should obtain identical (at least to the PME precision)
 + *    energies, forces, and virial for
 + *    a rectangular box and a triclinic one where the z (or y) axis is
 + *    tilted a whole box side. For instance you could use these boxes:
 + *
 + *    rectangular       triclinic
 + *     2  0  0           2  0  0
 + *     0  2  0           0  2  0
 + *     0  0  6           2  2  6
 + *
 + * 2. You should check the energy conservation in a triclinic box.
 + *
 + * It might seem an overkill, but better safe than sorry.
 + * /Erik 001109
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include <stdio.h>
 +#include <string.h>
 +#include <math.h>
 +#include <assert.h>
 +#include "typedefs.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "gmxcomplex.h"
 +#include "smalloc.h"
 +#include "futil.h"
 +#include "coulomb.h"
 +#include "gmx_fatal.h"
 +#include "pme.h"
 +#include "network.h"
 +#include "physics.h"
 +#include "nrnb.h"
 +#include "copyrite.h"
 +#include "gmx_wallcycle.h"
 +#include "gmx_parallel_3dfft.h"
 +#include "pdbio.h"
 +#include "gmx_cyclecounter.h"
 +#include "macros.h"
 +
 +/* Single precision, with SSE2 or higher available */
 +#if defined(GMX_X86_SSE2) && !defined(GMX_DOUBLE)
 +
 +#include "gmx_x86_sse2.h"
 +#include "gmx_math_x86_sse2_single.h"
 +
 +#define PME_SSE
 +/* Some old AMD processors could have problems with unaligned loads+stores */
 +#ifndef GMX_FAHCORE
 +#define PME_SSE_UNALIGNED
 +#endif
 +#endif
 +
 +#define DFT_TOL 1e-7
 +/* #define PRT_FORCE */
 +/* conditions for on the fly time-measurement */
 +/* #define TAKETIME (step > 1 && timesteps < 10) */
 +#define TAKETIME FALSE
 +
 +/* #define PME_TIME_THREADS */
 +
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +
 +/* GMX_CACHE_SEP should be a multiple of 16 to preserve alignment */
 +#define GMX_CACHE_SEP 64
 +
 +/* We only define a maximum to be able to use local arrays without allocation.
 + * An order larger than 12 should never be needed, even for test cases.
 + * If needed it can be changed here.
 + */
 +#define PME_ORDER_MAX 12
 +
 +/* Internal datastructures */
 +typedef struct {
 +    int send_index0;
 +    int send_nindex;
 +    int recv_index0;
 +    int recv_nindex;
 +} pme_grid_comm_t;
 +
 +typedef struct {
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +    int  nnodes,nodeid;
 +    int  *s2g0;
 +    int  *s2g1;
 +    int  noverlap_nodes;
 +    int  *send_id,*recv_id;
 +    pme_grid_comm_t *comm_data;
 +    real *sendbuf;
 +    real *recvbuf;
 +} pme_overlap_t;
 +
 +typedef struct {
 +    int *n;     /* Cumulative counts of the number of particles per thread */
 +    int nalloc; /* Allocation size of i */
 +    int *i;     /* Particle indices ordered on thread index (n) */
 +} thread_plist_t;
 +
 +typedef struct {
 +    int  n;
 +    int  *ind;
 +    splinevec theta;
 +    splinevec dtheta;
 +} splinedata_t;
 +
 +typedef struct {
 +    int  dimind;            /* The index of the dimension, 0=x, 1=y */
 +    int  nslab;
 +    int  nodeid;
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +
 +    int  *node_dest;        /* The nodes to send x and q to with DD */
 +    int  *node_src;         /* The nodes to receive x and q from with DD */
 +    int  *buf_index;        /* Index for commnode into the buffers */
 +
 +    int  maxshift;
 +
 +    int  npd;
 +    int  pd_nalloc;
 +    int  *pd;
 +    int  *count;            /* The number of atoms to send to each node */
 +    int  **count_thread;
 +    int  *rcount;           /* The number of atoms to receive */
 +
 +    int  n;
 +    int  nalloc;
 +    rvec *x;
 +    real *q;
 +    rvec *f;
 +    gmx_bool bSpread;       /* These coordinates are used for spreading */
 +    int  pme_order;
 +    ivec *idx;
 +    rvec *fractx;            /* Fractional coordinate relative to the
 +                              * lower cell boundary
 +                              */
 +    int  nthread;
 +    int  *thread_idx;        /* Which thread should spread which charge */
 +    thread_plist_t *thread_plist;
 +    splinedata_t *spline;
 +} pme_atomcomm_t;
 +
 +#define FLBS  3
 +#define FLBSZ 4
 +
 +typedef struct {
 +    ivec ci;     /* The spatial location of this grid       */
 +    ivec n;      /* The size of *grid, including order-1    */
 +    ivec offset; /* The grid offset from the full node grid */
 +    int  order;  /* PME spreading order                     */
 +    real *grid;  /* The grid local thread, size n           */
 +} pmegrid_t;
 +
 +typedef struct {
 +    pmegrid_t grid;     /* The full node grid (non thread-local)            */
 +    int  nthread;       /* The number of threads operating on this grid     */
 +    ivec nc;            /* The local spatial decomposition over the threads */
 +    pmegrid_t *grid_th; /* Array of grids for each thread                   */
 +    int  **g2t;         /* The grid to thread index                         */
 +    ivec nthread_comm;  /* The number of threads to communicate with        */
 +} pmegrids_t;
 +
 +
 +typedef struct {
 +#ifdef PME_SSE
 +    /* Masks for SSE aligned spreading and gathering */
 +    __m128 mask_SSE0[6],mask_SSE1[6];
 +#else
 +    int dummy; /* C89 requires that struct has at least one member */
 +#endif
 +} pme_spline_work_t;
 +
 +typedef struct {
 +    /* work data for solve_pme */
 +    int      nalloc;
 +    real *   mhx;
 +    real *   mhy;
 +    real *   mhz;
 +    real *   m2;
 +    real *   denom;
 +    real *   tmp1_alloc;
 +    real *   tmp1;
 +    real *   eterm;
 +    real *   m2inv;
 +
 +    real     energy;
 +    matrix   vir;
 +} pme_work_t;
 +
 +typedef struct gmx_pme {
 +    int  ndecompdim;         /* The number of decomposition dimensions */
 +    int  nodeid;             /* Our nodeid in mpi->mpi_comm */
 +    int  nodeid_major;
 +    int  nodeid_minor;
 +    int  nnodes;             /* The number of nodes doing PME */
 +    int  nnodes_major;
 +    int  nnodes_minor;
 +
 +    MPI_Comm mpi_comm;
 +    MPI_Comm mpi_comm_d[2];  /* Indexed on dimension, 0=x, 1=y */
 +#ifdef GMX_MPI
 +    MPI_Datatype  rvec_mpi;  /* the pme vector's MPI type */
 +#endif
 +
 +    int  nthread;            /* The number of threads doing PME */
 +
 +    gmx_bool bPPnode;        /* Node also does particle-particle forces */
 +    gmx_bool bFEP;           /* Compute Free energy contribution */
 +    int nkx,nky,nkz;         /* Grid dimensions */
 +    gmx_bool bP3M;           /* Do P3M: optimize the influence function */
 +    int pme_order;
 +    real epsilon_r;
 +
 +    pmegrids_t pmegridA;  /* Grids on which we do spreading/interpolation, includes overlap */
 +    pmegrids_t pmegridB;
 +    /* The PME charge spreading grid sizes/strides, includes pme_order-1 */
 +    int     pmegrid_nx,pmegrid_ny,pmegrid_nz;
 +    /* pmegrid_nz might be larger than strictly necessary to ensure
 +     * memory alignment, pmegrid_nz_base gives the real base size.
 +     */
 +    int     pmegrid_nz_base;
 +    /* The local PME grid starting indices */
 +    int     pmegrid_start_ix,pmegrid_start_iy,pmegrid_start_iz;
 +
 +    /* Work data for spreading and gathering */
 +    pme_spline_work_t spline_work;
 +
 +    real *fftgridA;             /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
 +    real *fftgridB;             /* inside the interpolation grid, but separate for 2D PME decomp. */
 +    int   fftgrid_nx,fftgrid_ny,fftgrid_nz;
 +
 +    t_complex *cfftgridA;             /* Grids for complex FFT data */
 +    t_complex *cfftgridB;
 +    int   cfftgrid_nx,cfftgrid_ny,cfftgrid_nz;
 +
 +    gmx_parallel_3dfft_t  pfft_setupA;
 +    gmx_parallel_3dfft_t  pfft_setupB;
 +
 +    int  *nnx,*nny,*nnz;
 +    real *fshx,*fshy,*fshz;
 +
 +    pme_atomcomm_t atc[2];  /* Indexed on decomposition index */
 +    matrix    recipbox;
 +    splinevec bsp_mod;
 +
 +    pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
 +
 +    pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
 +
 +    rvec *bufv;             /* Communication buffer */
 +    real *bufr;             /* Communication buffer */
 +    int  buf_nalloc;        /* The communication buffer size */
 +
 +    /* thread local work data for solve_pme */
 +    pme_work_t *work;
 +
 +    /* Work data for PME_redist */
 +    gmx_bool redist_init;
 +    int *    scounts;
 +    int *    rcounts;
 +    int *    sdispls;
 +    int *    rdispls;
 +    int *    sidx;
 +    int *    idxa;
 +    real *   redist_buf;
 +    int      redist_buf_nalloc;
 +
 +    /* Work data for sum_qgrid */
 +    real *   sum_qgrid_tmp;
 +    real *   sum_qgrid_dd_tmp;
 +} t_gmx_pme;
 +
 +
 +static void calc_interpolation_idx(gmx_pme_t pme,pme_atomcomm_t *atc,
 +                                   int start,int end,int thread)
 +{
 +    int  i;
 +    int  *idxptr,tix,tiy,tiz;
 +    real *xptr,*fptr,tx,ty,tz;
 +    real rxx,ryx,ryy,rzx,rzy,rzz;
 +    int  nx,ny,nz;
 +    int  start_ix,start_iy,start_iz;
 +    int  *g2tx,*g2ty,*g2tz;
 +    gmx_bool bThreads;
 +    int  *thread_idx=NULL;
 +    thread_plist_t *tpl=NULL;
 +    int  *tpl_n=NULL;
 +    int  thread_i;
 +
 +    nx  = pme->nkx;
 +    ny  = pme->nky;
 +    nz  = pme->nkz;
 +
 +    start_ix = pme->pmegrid_start_ix;
 +    start_iy = pme->pmegrid_start_iy;
 +    start_iz = pme->pmegrid_start_iz;
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    g2tx = pme->pmegridA.g2t[XX];
 +    g2ty = pme->pmegridA.g2t[YY];
 +    g2tz = pme->pmegridA.g2t[ZZ];
 +
 +    bThreads = (atc->nthread > 1);
 +    if (bThreads)
 +    {
 +        thread_idx = atc->thread_idx;
 +
 +        tpl   = &atc->thread_plist[thread];
 +        tpl_n = tpl->n;
 +        for(i=0; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] = 0;
 +        }
 +    }
 +
 +    for(i=start; i<end; i++) {
 +        xptr   = atc->x[i];
 +        idxptr = atc->idx[i];
 +        fptr   = atc->fractx[i];
 +
 +        /* Fractional coordinates along box vectors, add 2.0 to make 100% sure we are positive for triclinic boxes */
 +        tx = nx * ( xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx + 2.0 );
 +        ty = ny * (                  xptr[YY] * ryy + xptr[ZZ] * rzy + 2.0 );
 +        tz = nz * (                                   xptr[ZZ] * rzz + 2.0 );
 +
 +        tix = (int)(tx);
 +        tiy = (int)(ty);
 +        tiz = (int)(tz);
 +
 +        /* Because decomposition only occurs in x and y,
 +         * we never have a fraction correction in z.
 +         */
 +        fptr[XX] = tx - tix + pme->fshx[tix];
 +        fptr[YY] = ty - tiy + pme->fshy[tiy];
 +        fptr[ZZ] = tz - tiz;
 +
 +        idxptr[XX] = pme->nnx[tix];
 +        idxptr[YY] = pme->nny[tiy];
 +        idxptr[ZZ] = pme->nnz[tiz];
 +
 +#ifdef DEBUG
 +        range_check(idxptr[XX],0,pme->pmegrid_nx);
 +        range_check(idxptr[YY],0,pme->pmegrid_ny);
 +        range_check(idxptr[ZZ],0,pme->pmegrid_nz);
 +#endif
 +
 +        if (bThreads)
 +        {
 +            thread_i = g2tx[idxptr[XX]] + g2ty[idxptr[YY]] + g2tz[idxptr[ZZ]];
 +            thread_idx[i] = thread_i;
 +            tpl_n[thread_i]++;
 +        }
 +    }
 +
 +    if (bThreads)
 +    {
 +        /* Make a list of particle indices sorted on thread */
 +
 +        /* Get the cumulative count */
 +        for(i=1; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] += tpl_n[i-1];
 +        }
 +        /* The current implementation distributes particles equally
 +         * over the threads, so we could actually allocate for that
 +         * in pme_realloc_atomcomm_things.
 +         */
 +        if (tpl_n[atc->nthread-1] > tpl->nalloc)
 +        {
 +            tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
 +            srenew(tpl->i,tpl->nalloc);
 +        }
 +        /* Set tpl_n to the cumulative start */
 +        for(i=atc->nthread-1; i>=1; i--)
 +        {
 +            tpl_n[i] = tpl_n[i-1];
 +        }
 +        tpl_n[0] = 0;
 +
 +        /* Fill our thread local array with indices sorted on thread */
 +        for(i=start; i<end; i++)
 +        {
 +            tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
 +        }
 +        /* Now tpl_n contains the cummulative count again */
 +    }
 +}
 +
 +static void make_thread_local_ind(pme_atomcomm_t *atc,
 +                                  int thread,splinedata_t *spline)
 +{
 +    int  n,t,i,start,end;
 +    thread_plist_t *tpl;
 +
 +    /* Combine the indices made by each thread into one index */
 +
 +    n = 0;
 +    start = 0;
 +    for(t=0; t<atc->nthread; t++)
 +    {
 +        tpl = &atc->thread_plist[t];
 +        /* Copy our part (start - end) from the list of thread t */
 +        if (thread > 0)
 +        {
 +            start = tpl->n[thread-1];
 +        }
 +        end = tpl->n[thread];
 +        for(i=start; i<end; i++)
 +        {
 +            spline->ind[n++] = tpl->i[i];
 +        }
 +    }
 +
 +    spline->n = n;
 +}
 +
 +
 +static void pme_calc_pidx(int start, int end,
 +                          matrix recipbox, rvec x[],
 +                          pme_atomcomm_t *atc, int *count)
 +{
 +    int  nslab,i;
 +    int  si;
 +    real *xptr,s;
 +    real rxx,ryx,rzx,ryy,rzy;
 +    int *pd;
 +
 +    /* Calculate PME task index (pidx) for each grid index.
 +     * Here we always assign equally sized slabs to each node
 +     * for load balancing reasons (the PME grid spacing is not used).
 +     */
 +
 +    nslab = atc->nslab;
 +    pd    = atc->pd;
 +
 +    /* Reset the count */
 +    for(i=0; i<nslab; i++)
 +    {
 +        count[i] = 0;
 +    }
 +
 +    if (atc->dimind == 0)
 +    {
 +        rxx = recipbox[XX][XX];
 +        ryx = recipbox[YY][XX];
 +        rzx = recipbox[ZZ][XX];
 +        /* Calculate the node index in x-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +    else
 +    {
 +        ryy = recipbox[YY][YY];
 +        rzy = recipbox[ZZ][YY];
 +        /* Calculate the node index in y-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +}
 +
 +static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
 +                                  pme_atomcomm_t *atc)
 +{
 +    int nthread,thread,slab;
 +
 +    nthread = atc->nthread;
 +
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        pme_calc_pidx(natoms* thread   /nthread,
 +                      natoms*(thread+1)/nthread,
 +                      recipbox,x,atc,atc->count_thread[thread]);
 +    }
 +    /* Non-parallel reduction, since nslab is small */
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        for(slab=0; slab<atc->nslab; slab++)
 +        {
 +            atc->count_thread[0][slab] += atc->count_thread[thread][slab];
 +        }
 +    }
 +}
 +
 +static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
 +{
 +    int i,d;
 +
 +    srenew(spline->ind,atc->nalloc);
 +    /* Initialize the index to identity so it works without threads */
 +    for(i=0; i<atc->nalloc; i++)
 +    {
 +        spline->ind[i] = i;
 +    }
 +
 +    for(d=0;d<DIM;d++)
 +    {
 +        srenew(spline->theta[d] ,atc->pme_order*atc->nalloc);
 +        srenew(spline->dtheta[d],atc->pme_order*atc->nalloc);
 +    }
 +}
 +
 +static void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
 +{
 +    int nalloc_old,i,j,nalloc_tpl;
 +
 +    /* We have to avoid a NULL pointer for atc->x to avoid
 +     * possible fatal errors in MPI routines.
 +     */
 +    if (atc->n > atc->nalloc || atc->nalloc == 0)
 +    {
 +        nalloc_old = atc->nalloc;
 +        atc->nalloc = over_alloc_dd(max(atc->n,1));
 +
 +        if (atc->nslab > 1) {
 +            srenew(atc->x,atc->nalloc);
 +            srenew(atc->q,atc->nalloc);
 +            srenew(atc->f,atc->nalloc);
 +            for(i=nalloc_old; i<atc->nalloc; i++)
 +            {
 +                clear_rvec(atc->f[i]);
 +            }
 +        }
 +        if (atc->bSpread) {
 +            srenew(atc->fractx,atc->nalloc);
 +            srenew(atc->idx   ,atc->nalloc);
 +
 +            if (atc->nthread > 1)
 +            {
 +                srenew(atc->thread_idx,atc->nalloc);
 +            }
 +
 +            for(i=0; i<atc->nthread; i++)
 +            {
 +                pme_realloc_splinedata(&atc->spline[i],atc);
 +            }
 +        }
 +    }
 +}
 +
 +static void pmeredist_pd(gmx_pme_t pme, gmx_bool forw,
 +                         int n, gmx_bool bXF, rvec *x_f, real *charge,
 +                         pme_atomcomm_t *atc)
 +/* Redistribute particle data for PME calculation */
 +/* domain decomposition by x coordinate           */
 +{
 +    int *idxa;
 +    int i, ii;
 +
 +    if(FALSE == pme->redist_init) {
 +        snew(pme->scounts,atc->nslab);
 +        snew(pme->rcounts,atc->nslab);
 +        snew(pme->sdispls,atc->nslab);
 +        snew(pme->rdispls,atc->nslab);
 +        snew(pme->sidx,atc->nslab);
 +        pme->redist_init = TRUE;
 +    }
 +    if (n > pme->redist_buf_nalloc) {
 +        pme->redist_buf_nalloc = over_alloc_dd(n);
 +        srenew(pme->redist_buf,pme->redist_buf_nalloc*DIM);
 +    }
 +
 +    pme->idxa = atc->pd;
 +
 +#ifdef GMX_MPI
 +    if (forw && bXF) {
 +        /* forward, redistribution from pp to pme */
 +
 +        /* Calculate send counts and exchange them with other nodes */
 +        for(i=0; (i<atc->nslab); i++) pme->scounts[i]=0;
 +        for(i=0; (i<n); i++) pme->scounts[pme->idxa[i]]++;
 +        MPI_Alltoall( pme->scounts, 1, MPI_INT, pme->rcounts, 1, MPI_INT, atc->mpi_comm);
 +
 +        /* Calculate send and receive displacements and index into send
 +           buffer */
 +        pme->sdispls[0]=0;
 +        pme->rdispls[0]=0;
 +        pme->sidx[0]=0;
 +        for(i=1; i<atc->nslab; i++) {
 +            pme->sdispls[i]=pme->sdispls[i-1]+pme->scounts[i-1];
 +            pme->rdispls[i]=pme->rdispls[i-1]+pme->rcounts[i-1];
 +            pme->sidx[i]=pme->sdispls[i];
 +        }
 +        /* Total # of particles to be received */
 +        atc->n = pme->rdispls[atc->nslab-1] + pme->rcounts[atc->nslab-1];
 +
 +        pme_realloc_atomcomm_things(atc);
 +
 +        /* Copy particle coordinates into send buffer and exchange*/
 +        for(i=0; (i<n); i++) {
 +            ii=DIM*pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii+XX]=x_f[i][XX];
 +            pme->redist_buf[ii+YY]=x_f[i][YY];
 +            pme->redist_buf[ii+ZZ]=x_f[i][ZZ];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->x, pme->rcounts, pme->rdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +    }
 +    if (forw) {
 +        /* Copy charge into send buffer and exchange*/
 +        for(i=0; i<atc->nslab; i++) pme->sidx[i]=pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii=pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii]=charge[i];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls, mpi_type,
 +                      atc->q, pme->rcounts, pme->rdispls, mpi_type,
 +                      atc->mpi_comm);
 +    }
 +    else { /* backward, redistribution from pme to pp */
 +        MPI_Alltoallv(atc->f, pme->rcounts, pme->rdispls, pme->rvec_mpi,
 +                      pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +
 +        /* Copy data from receive buffer */
 +        for(i=0; i<atc->nslab; i++)
 +            pme->sidx[i] = pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii = DIM*pme->sidx[pme->idxa[i]];
 +            x_f[i][XX] += pme->redist_buf[ii+XX];
 +            x_f[i][YY] += pme->redist_buf[ii+YY];
 +            x_f[i][ZZ] += pme->redist_buf[ii+ZZ];
 +            pme->sidx[pme->idxa[i]]++;
 +        }
 +    }
 +#endif
 +}
 +
 +static void pme_dd_sendrecv(pme_atomcomm_t *atc,
 +                            gmx_bool bBackward,int shift,
 +                            void *buf_s,int nbyte_s,
 +                            void *buf_r,int nbyte_r)
 +{
 +#ifdef GMX_MPI
 +    int dest,src;
 +    MPI_Status stat;
 +
 +    if (bBackward == FALSE) {
 +        dest = atc->node_dest[shift];
 +        src  = atc->node_src[shift];
 +    } else {
 +        dest = atc->node_src[shift];
 +        src  = atc->node_dest[shift];
 +    }
 +
 +    if (nbyte_s > 0 && nbyte_r > 0) {
 +        MPI_Sendrecv(buf_s,nbyte_s,MPI_BYTE,
 +                     dest,shift,
 +                     buf_r,nbyte_r,MPI_BYTE,
 +                     src,shift,
 +                     atc->mpi_comm,&stat);
 +    } else if (nbyte_s > 0) {
 +        MPI_Send(buf_s,nbyte_s,MPI_BYTE,
 +                 dest,shift,
 +                 atc->mpi_comm);
 +    } else if (nbyte_r > 0) {
 +        MPI_Recv(buf_r,nbyte_r,MPI_BYTE,
 +                 src,shift,
 +                 atc->mpi_comm,&stat);
 +    }
 +#endif
 +}
 +
 +static void dd_pmeredist_x_q(gmx_pme_t pme,
 +                             int n, gmx_bool bX, rvec *x, real *charge,
 +                             pme_atomcomm_t *atc)
 +{
 +    int *commnode,*buf_index;
 +    int nnodes_comm,i,nsend,local_pos,buf_pos,node,scount,rcount;
 +
 +    commnode  = atc->node_dest;
 +    buf_index = atc->buf_index;
 +
 +    nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +    nsend = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        buf_index[commnode[i]] = nsend;
 +        nsend += atc->count[commnode[i]];
 +    }
 +    if (bX) {
 +        if (atc->count[atc->nodeid] + nsend != n)
 +            gmx_fatal(FARGS,"%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
 +                      "This usually means that your system is not well equilibrated.",
 +                      n - (atc->count[atc->nodeid] + nsend),
 +                      pme->nodeid,'x'+atc->dimind);
 +
 +        if (nsend > pme->buf_nalloc) {
 +            pme->buf_nalloc = over_alloc_dd(nsend);
 +            srenew(pme->bufv,pme->buf_nalloc);
 +            srenew(pme->bufr,pme->buf_nalloc);
 +        }
 +
 +        atc->n = atc->count[atc->nodeid];
 +        for(i=0; i<nnodes_comm; i++) {
 +            scount = atc->count[commnode[i]];
 +            /* Communicate the count */
 +            if (debug)
 +                fprintf(debug,"dimind %d PME node %d send to node %d: %d\n",
 +                        atc->dimind,atc->nodeid,commnode[i],scount);
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            &scount,sizeof(int),
 +                            &atc->rcount[i],sizeof(int));
 +            atc->n += atc->rcount[i];
 +        }
 +
 +        pme_realloc_atomcomm_things(atc);
 +    }
 +
 +    local_pos = 0;
 +    for(i=0; i<n; i++) {
 +        node = atc->pd[i];
 +        if (node == atc->nodeid) {
 +            /* Copy direct to the receive buffer */
 +            if (bX) {
 +                copy_rvec(x[i],atc->x[local_pos]);
 +            }
 +            atc->q[local_pos] = charge[i];
 +            local_pos++;
 +        } else {
 +            /* Copy to the send buffer */
 +            if (bX) {
 +                copy_rvec(x[i],pme->bufv[buf_index[node]]);
 +            }
 +            pme->bufr[buf_index[node]] = charge[i];
 +            buf_index[node]++;
 +        }
 +    }
 +
 +    buf_pos = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        scount = atc->count[commnode[i]];
 +        rcount = atc->rcount[i];
 +        if (scount > 0 || rcount > 0) {
 +            if (bX) {
 +                /* Communicate the coordinates */
 +                pme_dd_sendrecv(atc,FALSE,i,
 +                                pme->bufv[buf_pos],scount*sizeof(rvec),
 +                                atc->x[local_pos],rcount*sizeof(rvec));
 +            }
 +            /* Communicate the charges */
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            pme->bufr+buf_pos,scount*sizeof(real),
 +                            atc->q+local_pos,rcount*sizeof(real));
 +            buf_pos   += scount;
 +            local_pos += atc->rcount[i];
 +        }
 +    }
 +}
 +
 +static void dd_pmeredist_f(gmx_pme_t pme, pme_atomcomm_t *atc,
 +                           int n, rvec *f,
 +                           gmx_bool bAddF)
 +{
 +  int *commnode,*buf_index;
 +  int nnodes_comm,local_pos,buf_pos,i,scount,rcount,node;
 +
 +  commnode  = atc->node_dest;
 +  buf_index = atc->buf_index;
 +
 +  nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +  local_pos = atc->count[atc->nodeid];
 +  buf_pos = 0;
 +  for(i=0; i<nnodes_comm; i++) {
 +    scount = atc->rcount[i];
 +    rcount = atc->count[commnode[i]];
 +    if (scount > 0 || rcount > 0) {
 +      /* Communicate the forces */
 +      pme_dd_sendrecv(atc,TRUE,i,
 +                      atc->f[local_pos],scount*sizeof(rvec),
 +                      pme->bufv[buf_pos],rcount*sizeof(rvec));
 +      local_pos += scount;
 +    }
 +    buf_index[commnode[i]] = buf_pos;
 +    buf_pos   += rcount;
 +  }
 +
 +    local_pos = 0;
 +    if (bAddF)
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Add from the local force array */
 +                rvec_inc(f[i],atc->f[local_pos]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Add from the receive buffer */
 +                rvec_inc(f[i],pme->bufv[buf_index[node]]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Copy from the local force array */
 +                copy_rvec(atc->f[local_pos],f[i]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Copy from the receive buffer */
 +                copy_rvec(pme->bufv[buf_index[node]],f[i]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void
 +gmx_sum_qgrid_dd(gmx_pme_t pme, real *grid, int direction)
 +{
 +    pme_overlap_t *overlap;
 +    int send_index0,send_nindex;
 +    int recv_index0,recv_nindex;
 +    MPI_Status stat;
 +    int i,j,k,ix,iy,iz,icnt;
 +    int ipulse,send_id,recv_id,datasize;
 +    real *p;
 +    real *sendptr,*recvptr;
 +
 +    /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
 +    overlap = &pme->overlap[1];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        /* Since we have already (un)wrapped the overlap in the z-dimension,
 +         * we only have to communicate 0 to nkz (not pmegrid_nz).
 +         */
 +        if (direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        }
 +
 +        /* Copy data to contiguous send buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy+send_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<send_nindex;j++)
 +            {
 +                iy = j + send_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
 +                }
 +            }
 +        }
 +
 +        datasize      = pme->pmegrid_nx * pme->nkz;
 +
 +        MPI_Sendrecv(overlap->sendbuf,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     overlap->recvbuf,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* Get data from contiguous recv buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy+recv_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<recv_nindex;j++)
 +            {
 +                iy = j + recv_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    if(direction==GMX_SUM_QGRID_FORWARD)
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
 +                    }
 +                    else
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz]  = overlap->recvbuf[icnt++];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Major dimension is easier, no copying required,
 +     * but we might have to sum to separate array.
 +     * Since we don't copy, we have to communicate up to pmegrid_nz,
 +     * not nkz as for the minor direction.
 +     */
 +    overlap = &pme->overlap[0];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recvptr   = overlap->recvbuf;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recvptr   = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        }
 +
 +        sendptr       = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        datasize      = pme->pmegrid_ny * pme->pmegrid_nz;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix+send_nindex);
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix+recv_nindex);
 +        }
 +
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* ADD data from contiguous recv buffer */
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +            for(i=0;i<recv_nindex*datasize;i++)
 +            {
 +                p[i] += overlap->recvbuf[i];
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +
 +static int
 +copy_pmegrid_to_fftgrid(gmx_pme_t pme, real *pmegrid, real *fftgrid)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     i,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    {
 +#ifdef DEBUG_PME
 +        FILE *fp,*fp2;
 +        char fn[STRLEN],format[STRLEN];
 +        real val;
 +        sprintf(fn,"pmegrid%d.pdb",pme->nodeid);
 +        fp = ffopen(fn,"w");
 +        sprintf(fn,"pmegrid%d.txt",pme->nodeid);
 +        fp2 = ffopen(fn,"w");
 +     sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
 +#endif
 +
 +    for(ix=0;ix<local_fft_ndata[XX];ix++)
 +    {
 +        for(iy=0;iy<local_fft_ndata[YY];iy++)
 +        {
 +            for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +            {
 +                pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
 +                fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
 +                fftgrid[fftidx] = pmegrid[pmeidx];
 +#ifdef DEBUG_PME
 +                val = 100*pmegrid[pmeidx];
 +                if (pmegrid[pmeidx] != 0)
 +                fprintf(fp,format,"ATOM",pmeidx,"CA","GLY",' ',pmeidx,' ',
 +                        5.0*ix,5.0*iy,5.0*iz,1.0,val);
 +                if (pmegrid[pmeidx] != 0)
 +                    fprintf(fp2,"%-12s  %5d  %5d  %5d  %12.5e\n",
 +                            "qgrid",
 +                            pme->pmegrid_start_ix + ix,
 +                            pme->pmegrid_start_iy + iy,
 +                            pme->pmegrid_start_iz + iz,
 +                            pmegrid[pmeidx]);
 +#endif
 +            }
 +        }
 +    }
 +#ifdef DEBUG_PME
 +    ffclose(fp);
 +    ffclose(fp2);
 +#endif
 +    }
 +    return 0;
 +}
 +
 +
 +static gmx_cycles_t omp_cyc_start()
 +{
 +    return gmx_cycles_read();
 +}
 +
 +static gmx_cycles_t omp_cyc_end(gmx_cycles_t c)
 +{
 +    return gmx_cycles_read() - c;
 +}
 +
 +
 +static int
 +copy_fftgrid_to_pmegrid(gmx_pme_t pme, const real *fftgrid, real *pmegrid,
 +                        int nthread,int thread)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     ixy0,ixy1,ixy,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1;
 +    static double cs1=0;
 +    static int cnt=0;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    ixy0 = ((thread  )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +    ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +
 +    for(ixy=ixy0;ixy<ixy1;ixy++)
 +    {
 +        ix = ixy/local_fft_ndata[YY];
 +        iy = ixy - ix*local_fft_ndata[YY];
 +
 +        pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
 +        fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
 +        for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +        {
 +            pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("copy %.2f\n",cs1*1e-9);
 +    }
 +#endif
 +
 +    return 0;
 +}
 +
 +
 +static void
 +wrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix,iy,iz;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    /* Add periodic overlap in z */
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                    pmegrid[(ix*pny+iy)*pnz+nz+iz];
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                       pmegrid[(ix*pny+ny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                        pmegrid[((nx+ix)*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void
 +unwrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            int iy,iz;
 +
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
 +                        pmegrid[(ix*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           int iy,iz;
 +
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+ny+iy)*pnz+iz] =
 +                       pmegrid[(ix*pny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    /* Copy periodic overlap in z */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        int iy,iz;
 +
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+nz+iz] =
 +                    pmegrid[(ix*pny+iy)*pnz+iz];
 +            }
 +        }
 +    }
 +}
 +
 +static void clear_grid(int nx,int ny,int nz,real *grid,
 +                       ivec fs,int *flag,
 +                       int fx,int fy,int fz,
 +                       int order)
 +{
 +    int nc,ncz;
 +    int fsx,fsy,fsz,gx,gy,gz,g0x,g0y,x,y,z;
 +    int flind;
 +
 +    nc  = 2 + (order - 2)/FLBS;
 +    ncz = 2 + (order - 2)/FLBSZ;
 +
 +    for(fsx=fx; fsx<fx+nc; fsx++)
 +    {
 +        for(fsy=fy; fsy<fy+nc; fsy++)
 +        {
 +            for(fsz=fz; fsz<fz+ncz; fsz++)
 +            {
 +                flind = (fsx*fs[YY] + fsy)*fs[ZZ] + fsz;
 +                if (flag[flind] == 0)
 +                {
 +                    gx = fsx*FLBS;
 +                    gy = fsy*FLBS;
 +                    gz = fsz*FLBSZ;
 +                    g0x = (gx*ny + gy)*nz + gz;
 +                    for(x=0; x<FLBS; x++)
 +                    {
 +                        g0y = g0x;
 +                        for(y=0; y<FLBS; y++)
 +                        {
 +                            for(z=0; z<FLBSZ; z++)
 +                            {
 +                                grid[g0y+z] = 0;
 +                            }
 +                            g0y += nz;
 +                        }
 +                        g0x += ny*nz;
 +                    }
 +
 +                    flag[flind] = 1;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* This has to be a macro to enable full compiler optimization with xlC (and probably others too) */
 +#define DO_BSPLINE(order)                            \
 +for(ithx=0; (ithx<order); ithx++)                    \
 +{                                                    \
 +    index_x = (i0+ithx)*pny*pnz;                     \
 +    valx    = qn*thx[ithx];                          \
 +                                                     \
 +    for(ithy=0; (ithy<order); ithy++)                \
 +    {                                                \
 +        valxy    = valx*thy[ithy];                   \
 +        index_xy = index_x+(j0+ithy)*pnz;            \
 +                                                     \
 +        for(ithz=0; (ithz<order); ithz++)            \
 +        {                                            \
 +            index_xyz        = index_xy+(k0+ithz);   \
 +            grid[index_xyz] += valxy*thz[ithz];      \
 +        }                                            \
 +    }                                                \
 +}
 +
 +
 +static void spread_q_bsplines_thread(pmegrid_t *pmegrid,
 +                                     pme_atomcomm_t *atc, splinedata_t *spline,
 +                                     pme_spline_work_t *work)
 +{
 +
 +    /* spread charges from home atoms to local grid */
 +    real     *grid;
 +    pme_overlap_t *ol;
 +    int      b,i,nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int *    idxptr;
 +    int      order,norder,index_x,index_xy,index_xyz;
 +    real     valx,valxy,qn;
 +    real     *thx,*thy,*thz;
 +    int      localsize, bndsize;
 +    int      pnx,pny,pnz,ndatatot;
 +    int      offx,offy,offz;
 +
 +    pnx = pmegrid->n[XX];
 +    pny = pmegrid->n[YY];
 +    pnz = pmegrid->n[ZZ];
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    ndatatot = pnx*pny*pnz;
 +    grid = pmegrid->grid;
 +    for(i=0;i<ndatatot;i++)
 +    {
 +        grid[i] = 0;
 +    }
 +
 +    order = pmegrid->order;
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = atc->q[n];
 +
 +        if (qn != 0)
 +        {
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX] - offx;
 +            j0   = idxptr[YY] - offy;
 +            k0   = idxptr[ZZ] - offz;
 +
 +            thx = spline->theta[XX] + norder;
 +            thy = spline->theta[YY] + norder;
 +            thz = spline->theta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_SPREAD_SSE_ORDER4
 +#else
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_BSPLINE(order);
 +                break;
 +            }
 +        }
 +    }
 +}
 +
 +static void set_grid_alignment(int *pmegrid_nz,int pme_order)
 +{
 +#ifdef PME_SSE
 +    if (pme_order == 5
 +#ifndef PME_SSE_UNALIGNED
 +        || pme_order == 4
 +#endif
 +        )
 +    {
 +        /* Round nz up to a multiple of 4 to ensure alignment */
 +        *pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
 +    }
 +#endif
 +}
 +
 +static void set_gridsize_alignment(int *gridsize,int pme_order)
 +{
 +#ifdef PME_SSE
 +#ifndef PME_SSE_UNALIGNED
 +    if (pme_order == 4)
 +    {
 +        /* Add extra elements to ensured aligned operations do not go
 +         * beyond the allocated grid size.
 +         * Note that for pme_order=5, the pme grid z-size alignment
 +         * ensures that we will not go beyond the grid size.
 +         */
 +         *gridsize += 4;
 +    }
 +#endif
 +#endif
 +}
 +
 +static void pmegrid_init(pmegrid_t *grid,
 +                         int cx, int cy, int cz,
 +                         int x0, int y0, int z0,
 +                         int x1, int y1, int z1,
 +                         gmx_bool set_alignment,
 +                         int pme_order,
 +                         real *ptr)
 +{
 +    int nz,gridsize;
 +
 +    grid->ci[XX] = cx;
 +    grid->ci[YY] = cy;
 +    grid->ci[ZZ] = cz;
 +    grid->offset[XX] = x0;
 +    grid->offset[YY] = y0;
 +    grid->offset[ZZ] = z0;
 +    grid->n[XX]      = x1 - x0 + pme_order - 1;
 +    grid->n[YY]      = y1 - y0 + pme_order - 1;
 +    grid->n[ZZ]      = z1 - z0 + pme_order - 1;
 +
 +    nz = grid->n[ZZ];
 +    set_grid_alignment(&nz,pme_order);
 +    if (set_alignment)
 +    {
 +        grid->n[ZZ] = nz;
 +    }
 +    else if (nz != grid->n[ZZ])
 +    {
 +        gmx_incons("pmegrid_init call with an unaligned z size");
 +    }
 +
 +    grid->order = pme_order;
 +    if (ptr == NULL)
 +    {
 +        gridsize = grid->n[XX]*grid->n[YY]*grid->n[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid->grid,gridsize,16);
 +    }
 +    else
 +    {
 +        grid->grid = ptr;
 +    }
 +}
 +
 +static int div_round_up(int enumerator,int denominator)
 +{
 +    return (enumerator + denominator - 1)/denominator;
 +}
 +
 +static void make_subgrid_division(const ivec n,int ovl,int nthread,
 +                                  ivec nsub)
 +{
 +    int gsize_opt,gsize;
 +    int nsx,nsy,nsz;
 +    char *env;
 +
 +    gsize_opt = -1;
 +    for(nsx=1; nsx<=nthread; nsx++)
 +    {
 +        if (nthread % nsx == 0)
 +        {
 +            for(nsy=1; nsy<=nthread; nsy++)
 +            {
 +                if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
 +                {
 +                    nsz = nthread/(nsx*nsy);
 +
 +                    /* Determine the number of grid points per thread */
 +                    gsize =
 +                        (div_round_up(n[XX],nsx) + ovl)*
 +                        (div_round_up(n[YY],nsy) + ovl)*
 +                        (div_round_up(n[ZZ],nsz) + ovl);
 +
 +                    /* Minimize the number of grids points per thread
 +                     * and, secondarily, the number of cuts in minor dimensions.
 +                     */
 +                    if (gsize_opt == -1 ||
 +                        gsize < gsize_opt ||
 +                        (gsize == gsize_opt &&
 +                         (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
 +                    {
 +                        nsub[XX] = nsx;
 +                        nsub[YY] = nsy;
 +                        nsub[ZZ] = nsz;
 +                        gsize_opt = gsize;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    env = getenv("GMX_PME_THREAD_DIVISION");
 +    if (env != NULL)
 +    {
 +        sscanf(env,"%d %d %d",&nsub[XX],&nsub[YY],&nsub[ZZ]);
 +    }
 +
 +    if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
 +    {
 +        gmx_fatal(FARGS,"PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)",nsub[XX],nsub[YY],nsub[ZZ],nthread);
 +    }
 +}
 +
 +static void pmegrids_init(pmegrids_t *grids,
 +                          int nx,int ny,int nz,int nz_base,
 +                          int pme_order,
 +                          int nthread,
 +                          int overlap_x,
 +                          int overlap_y)
 +{
 +    ivec n,n_base,g0,g1;
 +    int t,x,y,z,d,i,tfac;
 +    int max_comm_lines;
 +
 +    n[XX] = nx - (pme_order - 1);
 +    n[YY] = ny - (pme_order - 1);
 +    n[ZZ] = nz - (pme_order - 1);
 +
 +    copy_ivec(n,n_base);
 +    n_base[ZZ] = nz_base;
 +
 +    pmegrid_init(&grids->grid,0,0,0,0,0,0,n[XX],n[YY],n[ZZ],FALSE,pme_order,
 +                 NULL);
 +
 +    grids->nthread = nthread;
 +
 +    make_subgrid_division(n_base,pme_order-1,grids->nthread,grids->nc);
 +
 +    if (grids->nthread > 1)
 +    {
 +        ivec nst;
 +        int gridsize;
 +        real *grid_all;
 +
 +        for(d=0; d<DIM; d++)
 +        {
 +            nst[d] = div_round_up(n[d],grids->nc[d]) + pme_order - 1;
 +        }
 +        set_grid_alignment(&nst[ZZ],pme_order);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"pmegrid thread local division: %d x %d x %d\n",
 +                    grids->nc[XX],grids->nc[YY],grids->nc[ZZ]);
 +            fprintf(debug,"pmegrid %d %d %d max thread pmegrid %d %d %d\n",
 +                    nx,ny,nz,
 +                    nst[XX],nst[YY],nst[ZZ]);
 +        }
 +
 +        snew(grids->grid_th,grids->nthread);
 +        t = 0;
 +        gridsize = nst[XX]*nst[YY]*nst[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid_all,
 +                     grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
 +                     16);
 +
 +        for(x=0; x<grids->nc[XX]; x++)
 +        {
 +            for(y=0; y<grids->nc[YY]; y++)
 +            {
 +                for(z=0; z<grids->nc[ZZ]; z++)
 +                {
 +                    pmegrid_init(&grids->grid_th[t],
 +                                 x,y,z,
 +                                 (n[XX]*(x  ))/grids->nc[XX],
 +                                 (n[YY]*(y  ))/grids->nc[YY],
 +                                 (n[ZZ]*(z  ))/grids->nc[ZZ],
 +                                 (n[XX]*(x+1))/grids->nc[XX],
 +                                 (n[YY]*(y+1))/grids->nc[YY],
 +                                 (n[ZZ]*(z+1))/grids->nc[ZZ],
 +                                 TRUE,
 +                                 pme_order,
 +                                 grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
 +                    t++;
 +                }
 +            }
 +        }
 +    }
 +
 +    snew(grids->g2t,DIM);
 +    tfac = 1;
 +    for(d=DIM-1; d>=0; d--)
 +    {
 +        snew(grids->g2t[d],n[d]);
 +        t = 0;
 +        for(i=0; i<n[d]; i++)
 +        {
 +            /* The second check should match the parameters
 +             * of the pmegrid_init call above.
 +             */
 +            while (t + 1 < grids->nc[d] && i >= (n[d]*(t+1))/grids->nc[d])
 +            {
 +                t++;
 +            }
 +            grids->g2t[d][i] = t*tfac;
 +        }
 +
 +        tfac *= grids->nc[d];
 +
 +        switch (d)
 +        {
 +        case XX: max_comm_lines = overlap_x;     break;
 +        case YY: max_comm_lines = overlap_y;     break;
 +        case ZZ: max_comm_lines = pme_order - 1; break;
 +        }
 +        grids->nthread_comm[d] = 0;
 +        while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines)
 +        {
 +            grids->nthread_comm[d]++;
 +        }
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"pmegrid thread grid communication range in %c: %d\n",
 +                    'x'+d,grids->nthread_comm[d]);
 +        }
 +        /* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
 +         * work, but this is not a problematic restriction.
 +         */
 +        if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
 +        {
 +            gmx_fatal(FARGS,"Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME",grids->nthread);
 +        }
 +    }
 +}
 +
 +
 +static void pmegrids_destroy(pmegrids_t *grids)
 +{
 +    int t;
 +
 +    if (grids->grid.grid != NULL)
 +    {
 +        sfree(grids->grid.grid);
 +
 +        if (grids->nthread > 0)
 +        {
 +            for(t=0; t<grids->nthread; t++)
 +            {
 +                sfree(grids->grid_th[t].grid);
 +            }
 +            sfree(grids->grid_th);
 +        }
 +    }
 +}
 +
 +
 +static void realloc_work(pme_work_t *work,int nkx)
 +{
 +    if (nkx > work->nalloc)
 +    {
 +        work->nalloc = nkx;
 +        srenew(work->mhx  ,work->nalloc);
 +        srenew(work->mhy  ,work->nalloc);
 +        srenew(work->mhz  ,work->nalloc);
 +        srenew(work->m2   ,work->nalloc);
 +        /* Allocate an aligned pointer for SSE operations, including 3 extra
 +         * elements at the end since SSE operates on 4 elements at a time.
 +         */
 +        sfree_aligned(work->denom);
 +        sfree_aligned(work->tmp1);
 +        sfree_aligned(work->eterm);
 +        snew_aligned(work->denom,work->nalloc+3,16);
 +        snew_aligned(work->tmp1 ,work->nalloc+3,16);
 +        snew_aligned(work->eterm,work->nalloc+3,16);
 +        srenew(work->m2inv,work->nalloc);
 +    }
 +}
 +
 +
 +static void free_work(pme_work_t *work)
 +{
 +    sfree(work->mhx);
 +    sfree(work->mhy);
 +    sfree(work->mhz);
 +    sfree(work->m2);
 +    sfree_aligned(work->denom);
 +    sfree_aligned(work->tmp1);
 +    sfree_aligned(work->eterm);
 +    sfree(work->m2inv);
 +}
 +
 +
 +#ifdef PME_SSE
 +    /* Calculate exponentials through SSE in float precision */
 +inline static void calc_exponentials(int start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
 +{
 +    {
 +        const __m128 two = _mm_set_ps(2.0f,2.0f,2.0f,2.0f);
 +        __m128 f_sse;
 +        __m128 lu;
 +        __m128 tmp_d1,d_inv,tmp_r,tmp_e;
 +        int kx;
 +        f_sse = _mm_load1_ps(&f);
 +        for(kx=0; kx<end; kx+=4)
 +        {
 +            tmp_d1   = _mm_load_ps(d_aligned+kx);
 +            lu       = _mm_rcp_ps(tmp_d1);
 +            d_inv    = _mm_mul_ps(lu,_mm_sub_ps(two,_mm_mul_ps(lu,tmp_d1)));
 +            tmp_r    = _mm_load_ps(r_aligned+kx);
 +            tmp_r    = gmx_mm_exp_ps(tmp_r);
 +            tmp_e    = _mm_mul_ps(f_sse,d_inv);
 +            tmp_e    = _mm_mul_ps(tmp_e,tmp_r);
 +            _mm_store_ps(e_aligned+kx,tmp_e);
 +        }
 +    }
 +}
 +#else
 +inline static void calc_exponentials(int start, int end, real f, real *d, real *r, real *e)
 +{
 +    int kx;
 +    for(kx=start; kx<end; kx++)
 +    {
 +        d[kx] = 1.0/d[kx];
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        r[kx] = exp(r[kx]);
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        e[kx] = f*r[kx]*d[kx];
 +    }
 +}
 +#endif
 +
 +
 +static int solve_pme_yzx(gmx_pme_t pme,t_complex *grid,
 +                         real ewaldcoeff,real vol,
 +                         gmx_bool bEnerVir,
 +                         int nthread,int thread)
 +{
 +    /* do recip sum over local cells in grid */
 +    /* y major, z middle, x minor or continuous */
 +    t_complex *p0;
 +    int     kx,ky,kz,maxkx,maxky,maxkz;
 +    int     nx,ny,nz,iyz0,iyz1,iyz,iy,iz,kxstart,kxend;
 +    real    mx,my,mz;
 +    real    factor=M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
 +    real    ets2,struct2,vfactor,ets2vf;
 +    real    d1,d2,energy=0;
 +    real    by,bz;
 +    real    virxx=0,virxy=0,virxz=0,viryy=0,viryz=0,virzz=0;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    pme_work_t *work;
 +    real    *mhx,*mhy,*mhz,*m2,*denom,*tmp1,*eterm,*m2inv;
 +    real    mhxk,mhyk,mhzk,m2k;
 +    real    corner_fac;
 +    ivec    complex_order;
 +    ivec    local_ndata,local_offset,local_size;
 +    real    elfac;
 +
 +    elfac = ONE_4PI_EPS0/pme->epsilon_r;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_complex_limits(pme->pfft_setupA,
 +                                      complex_order,
 +                                      local_ndata,
 +                                      local_offset,
 +                                      local_size);
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    maxkx = (nx+1)/2;
 +    maxky = (ny+1)/2;
 +    maxkz = nz/2+1;
 +
 +    work = &pme->work[thread];
 +    mhx   = work->mhx;
 +    mhy   = work->mhy;
 +    mhz   = work->mhz;
 +    m2    = work->m2;
 +    denom = work->denom;
 +    tmp1  = work->tmp1;
 +    eterm = work->eterm;
 +    m2inv = work->m2inv;
 +
 +    iyz0 = local_ndata[YY]*local_ndata[ZZ]* thread   /nthread;
 +    iyz1 = local_ndata[YY]*local_ndata[ZZ]*(thread+1)/nthread;
 +
 +    for(iyz=iyz0; iyz<iyz1; iyz++)
 +    {
 +        iy = iyz/local_ndata[ZZ];
 +        iz = iyz - iy*local_ndata[ZZ];
 +
 +        ky = iy + local_offset[YY];
 +
 +        if (ky < maxky)
 +        {
 +            my = ky;
 +        }
 +        else
 +        {
 +            my = (ky - ny);
 +        }
 +
 +        by = M_PI*vol*pme->bsp_mod[YY][ky];
 +
 +        kz = iz + local_offset[ZZ];
 +
 +        mz = kz;
 +
 +        bz = pme->bsp_mod[ZZ][kz];
 +
 +        /* 0.5 correction for corner points */
 +        corner_fac = 1;
 +        if (kz == 0 || kz == (nz+1)/2)
 +        {
 +            corner_fac = 0.5;
 +        }
 +
 +        p0 = grid + iy*local_size[ZZ]*local_size[XX] + iz*local_size[XX];
 +
 +        /* We should skip the k-space point (0,0,0) */
 +        if (local_offset[XX] > 0 || ky > 0 || kz > 0)
 +        {
 +            kxstart = local_offset[XX];
 +        }
 +        else
 +        {
 +            kxstart = local_offset[XX] + 1;
 +            p0++;
 +        }
 +        kxend = local_offset[XX] + local_ndata[XX];
 +
 +        if (bEnerVir)
 +        {
 +            /* More expensive inner loop, especially because of the storage
 +             * of the mh elements in array's.
 +             * Because x is the minor grid index, all mh elements
 +             * depend on kx for triclinic unit cells.
 +             */
 +
 +                /* Two explicit loops to avoid a conditional inside the loop */
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                m2inv[kx] = 1.0/m2[kx];
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +
 +                struct2 = 2.0*(d1*d1+d2*d2);
 +
 +                tmp1[kx] = eterm[kx]*struct2;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                ets2     = corner_fac*tmp1[kx];
 +                vfactor  = (factor*m2[kx] + 1.0)*2.0*m2inv[kx];
 +                energy  += ets2;
 +
 +                ets2vf   = ets2*vfactor;
 +                virxx   += ets2vf*mhx[kx]*mhx[kx] - ets2;
 +                virxy   += ets2vf*mhx[kx]*mhy[kx];
 +                virxz   += ets2vf*mhx[kx]*mhz[kx];
 +                viryy   += ets2vf*mhy[kx]*mhy[kx] - ets2;
 +                viryz   += ets2vf*mhy[kx]*mhz[kx];
 +                virzz   += ets2vf*mhz[kx]*mhz[kx] - ets2;
 +            }
 +        }
 +        else
 +        {
 +            /* We don't need to calculate the energy and the virial.
 +             * In this case the triclinic overhead is small.
 +             */
 +
 +            /* Two explicit loops to avoid a conditional inside the loop */
 +
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +            }
 +        }
 +    }
 +
 +    if (bEnerVir)
 +    {
 +        /* Update virial with local values.
 +         * The virial is symmetric by definition.
 +         * this virial seems ok for isotropic scaling, but I'm
 +         * experiencing problems on semiisotropic membranes.
 +         * IS THAT COMMENT STILL VALID??? (DvdS, 2001/02/07).
 +         */
 +        work->vir[XX][XX] = 0.25*virxx;
 +        work->vir[YY][YY] = 0.25*viryy;
 +        work->vir[ZZ][ZZ] = 0.25*virzz;
 +        work->vir[XX][YY] = work->vir[YY][XX] = 0.25*virxy;
 +        work->vir[XX][ZZ] = work->vir[ZZ][XX] = 0.25*virxz;
 +        work->vir[YY][ZZ] = work->vir[ZZ][YY] = 0.25*viryz;
 +
 +        /* This energy should be corrected for a charged system */
 +        work->energy = 0.5*energy;
 +    }
 +
 +    /* Return the loop count */
 +    return local_ndata[YY]*local_ndata[XX];
 +}
 +
 +static void get_pme_ener_vir(const gmx_pme_t pme,int nthread,
 +                             real *mesh_energy,matrix vir)
 +{
 +    /* This function sums output over threads
 +     * and should therefore only be called after thread synchronization.
 +     */
 +    int thread;
 +
 +    *mesh_energy = pme->work[0].energy;
 +    copy_mat(pme->work[0].vir,vir);
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        *mesh_energy += pme->work[thread].energy;
 +        m_add(vir,pme->work[thread].vir,vir);
 +    }
 +}
 +
 +#define DO_FSPLINE(order)                      \
 +for(ithx=0; (ithx<order); ithx++)              \
 +{                                              \
 +    index_x = (i0+ithx)*pny*pnz;               \
 +    tx      = thx[ithx];                       \
 +    dx      = dthx[ithx];                      \
 +                                               \
 +    for(ithy=0; (ithy<order); ithy++)          \
 +    {                                          \
 +        index_xy = index_x+(j0+ithy)*pnz;      \
 +        ty       = thy[ithy];                  \
 +        dy       = dthy[ithy];                 \
 +        fxy1     = fz1 = 0;                    \
 +                                               \
 +        for(ithz=0; (ithz<order); ithz++)      \
 +        {                                      \
 +            gval  = grid[index_xy+(k0+ithz)];  \
 +            fxy1 += thz[ithz]*gval;            \
 +            fz1  += dthz[ithz]*gval;           \
 +        }                                      \
 +        fx += dx*ty*fxy1;                      \
 +        fy += tx*dy*fxy1;                      \
 +        fz += tx*ty*fz1;                       \
 +    }                                          \
 +}
 +
 +
 +static void gather_f_bsplines(gmx_pme_t pme,real *grid,
 +                              gmx_bool bClearF,pme_atomcomm_t *atc,
 +                              splinedata_t *spline,
 +                              real scale)
 +{
 +    /* sum forces for local particles */
 +    int     nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int     nx,ny,nz,pnx,pny,pnz;
 +    int *   idxptr;
 +    real    tx,ty,dx,dy,qn;
 +    real    fx,fy,fz,gval;
 +    real    fxy1,fz1;
 +    real    *thx,*thy,*thz,*dthx,*dthy,*dthz;
 +    int     norder;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    int     order;
 +
 +    pme_spline_work_t *work;
 +
 +    work = &pme->spline_work;
 +
 +    order = pme->pme_order;
 +    thx   = spline->theta[XX];
 +    thy   = spline->theta[YY];
 +    thz   = spline->theta[ZZ];
 +    dthx  = spline->dtheta[XX];
 +    dthy  = spline->dtheta[YY];
 +    dthz  = spline->dtheta[ZZ];
 +    nx    = pme->nkx;
 +    ny    = pme->nky;
 +    nz    = pme->nkz;
 +    pnx   = pme->pmegrid_nx;
 +    pny   = pme->pmegrid_ny;
 +    pnz   = pme->pmegrid_nz;
 +
 +    rxx   = pme->recipbox[XX][XX];
 +    ryx   = pme->recipbox[YY][XX];
 +    ryy   = pme->recipbox[YY][YY];
 +    rzx   = pme->recipbox[ZZ][XX];
 +    rzy   = pme->recipbox[ZZ][YY];
 +    rzz   = pme->recipbox[ZZ][ZZ];
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = scale*atc->q[n];
 +
 +        if (bClearF)
 +        {
 +            atc->f[n][XX] = 0;
 +            atc->f[n][YY] = 0;
 +            atc->f[n][ZZ] = 0;
 +        }
 +        if (qn != 0)
 +        {
 +            fx     = 0;
 +            fy     = 0;
 +            fz     = 0;
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next six statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +            dthx = spline->dtheta[XX] + norder;
 +            dthy = spline->dtheta[YY] + norder;
 +            dthz = spline->dtheta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_GATHER_F_SSE_ORDER4
 +#else
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_FSPLINE(order);
 +                break;
 +            }
 +
 +            atc->f[n][XX] += -qn*( fx*nx*rxx );
 +            atc->f[n][YY] += -qn*( fx*nx*ryx + fy*ny*ryy );
 +            atc->f[n][ZZ] += -qn*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
 +        }
 +    }
 +    /* Since the energy and not forces are interpolated
 +     * the net force might not be exactly zero.
 +     * This can be solved by also interpolating F, but
 +     * that comes at a cost.
 +     * A better hack is to remove the net force every
 +     * step, but that must be done at a higher level
 +     * since this routine doesn't see all atoms if running
 +     * in parallel. Don't know how important it is?  EL 990726
 +     */
 +}
 +
 +
 +static real gather_energy_bsplines(gmx_pme_t pme,real *grid,
 +                                   pme_atomcomm_t *atc)
 +{
 +    splinedata_t *spline;
 +    int     n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int *   idxptr;
 +    real    energy,pot,tx,ty,qn,gval;
 +    real    *thx,*thy,*thz;
 +    int     norder;
 +    int     order;
 +
 +    spline = &atc->spline[0];
 +
 +    order = pme->pme_order;
 +
 +    energy = 0;
 +    for(n=0; (n<atc->n); n++) {
 +        qn      = atc->q[n];
 +
 +        if (qn != 0) {
 +            idxptr = atc->idx[n];
 +            norder = n*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next three statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +
 +            pot = 0;
 +            for(ithx=0; (ithx<order); ithx++)
 +            {
 +                index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
 +                tx      = thx[ithx];
 +
 +                for(ithy=0; (ithy<order); ithy++)
 +                {
 +                    index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
 +                    ty       = thy[ithy];
 +
 +                    for(ithz=0; (ithz<order); ithz++)
 +                    {
 +                        gval  = grid[index_xy+(k0+ithz)];
 +                        pot  += tx*ty*thz[ithz]*gval;
 +                    }
 +
 +                }
 +            }
 +
 +            energy += pot*qn;
 +        }
 +    }
 +
 +    return energy;
 +}
 +
 +/* Macro to force loop unrolling by fixing order.
 + * This gives a significant performance gain.
 + */
 +#define CALC_SPLINE(order)                     \
 +{                                              \
 +    int j,k,l;                                 \
 +    real dr,div;                               \
 +    real data[PME_ORDER_MAX];                  \
 +    real ddata[PME_ORDER_MAX];                 \
 +                                               \
 +    for(j=0; (j<DIM); j++)                     \
 +    {                                          \
 +        dr  = xptr[j];                         \
 +                                               \
 +        /* dr is relative offset from lower cell limit */ \
 +        data[order-1] = 0;                     \
 +        data[1] = dr;                          \
 +        data[0] = 1 - dr;                      \
 +                                               \
 +        for(k=3; (k<order); k++)               \
 +        {                                      \
 +            div = 1.0/(k - 1.0);               \
 +            data[k-1] = div*dr*data[k-2];      \
 +            for(l=1; (l<(k-1)); l++)           \
 +            {                                  \
 +                data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)* \
 +                                   data[k-l-1]);                \
 +            }                                  \
 +            data[0] = div*(1-dr)*data[0];      \
 +        }                                      \
 +        /* differentiate */                    \
 +        ddata[0] = -data[0];                   \
 +        for(k=1; (k<order); k++)               \
 +        {                                      \
 +            ddata[k] = data[k-1] - data[k];    \
 +        }                                      \
 +                                               \
 +        div = 1.0/(order - 1);                 \
 +        data[order-1] = div*dr*data[order-2];  \
 +        for(l=1; (l<(order-1)); l++)           \
 +        {                                      \
 +            data[order-l-1] = div*((dr+l)*data[order-l-2]+    \
 +                               (order-l-dr)*data[order-l-1]); \
 +        }                                      \
 +        data[0] = div*(1 - dr)*data[0];        \
 +                                               \
 +        for(k=0; k<order; k++)                 \
 +        {                                      \
 +            theta[j][i*order+k]  = data[k];    \
 +            dtheta[j][i*order+k] = ddata[k];   \
 +        }                                      \
 +    }                                          \
 +}
 +
 +void make_bsplines(splinevec theta,splinevec dtheta,int order,
 +                   rvec fractx[],int nr,int ind[],real charge[],
 +                   gmx_bool bFreeEnergy)
 +{
 +    /* construct splines for local atoms */
 +    int  i,ii;
 +    real *xptr;
 +
 +    for(i=0; i<nr; i++)
 +    {
 +        /* With free energy we do not use the charge check.
 +         * In most cases this will be more efficient than calling make_bsplines
 +         * twice, since usually more than half the particles have charges.
 +         */
 +        ii = ind[i];
 +        if (bFreeEnergy || charge[ii] != 0.0) {
 +            xptr = fractx[ii];
 +            switch(order) {
 +            case 4:  CALC_SPLINE(4);     break;
 +            case 5:  CALC_SPLINE(5);     break;
 +            default: CALC_SPLINE(order); break;
 +            }
 +        }
 +    }
 +}
 +
 +
 +void make_dft_mod(real *mod,real *data,int ndata)
 +{
 +  int i,j;
 +  real sc,ss,arg;
 +
 +  for(i=0;i<ndata;i++) {
 +    sc=ss=0;
 +    for(j=0;j<ndata;j++) {
 +      arg=(2.0*M_PI*i*j)/ndata;
 +      sc+=data[j]*cos(arg);
 +      ss+=data[j]*sin(arg);
 +    }
 +    mod[i]=sc*sc+ss*ss;
 +  }
 +  for(i=0;i<ndata;i++)
 +    if(mod[i]<1e-7)
 +      mod[i]=(mod[i-1]+mod[i+1])*0.5;
 +}
 +
 +
 +static void make_bspline_moduli(splinevec bsp_mod,
 +                                int nx,int ny,int nz,int order)
 +{
 +  int nmax=max(nx,max(ny,nz));
 +  real *data,*ddata,*bsp_data;
 +  int i,k,l;
 +  real div;
 +
 +  snew(data,order);
 +  snew(ddata,order);
 +  snew(bsp_data,nmax);
 +
 +  data[order-1]=0;
 +  data[1]=0;
 +  data[0]=1;
 +
 +  for(k=3;k<order;k++) {
 +    div=1.0/(k-1.0);
 +    data[k-1]=0;
 +    for(l=1;l<(k-1);l++)
 +      data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
 +    data[0]=div*data[0];
 +  }
 +  /* differentiate */
 +  ddata[0]=-data[0];
 +  for(k=1;k<order;k++)
 +    ddata[k]=data[k-1]-data[k];
 +  div=1.0/(order-1);
 +  data[order-1]=0;
 +  for(l=1;l<(order-1);l++)
 +    data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
 +  data[0]=div*data[0];
 +
 +  for(i=0;i<nmax;i++)
 +    bsp_data[i]=0;
 +  for(i=1;i<=order;i++)
 +    bsp_data[i]=data[i-1];
 +
 +  make_dft_mod(bsp_mod[XX],bsp_data,nx);
 +  make_dft_mod(bsp_mod[YY],bsp_data,ny);
 +  make_dft_mod(bsp_mod[ZZ],bsp_data,nz);
 +
 +  sfree(data);
 +  sfree(ddata);
 +  sfree(bsp_data);
 +}
 +
 +
 +/* Return the P3M optimal influence function */
 +static double do_p3m_influence(double z, int order)
 +{
 +    double z2,z4;
 +
 +    z2 = z*z;
 +    z4 = z2*z2;
 +
 +    /* The formula and most constants can be found in:
 +     * Ballenegger et al., JCTC 8, 936 (2012)
 +     */
 +    switch(order)
 +    {
 +    case 2:
 +        return 1.0 - 2.0*z2/3.0;
 +        break;
 +    case 3:
 +        return 1.0 - z2 + 2.0*z4/15.0;
 +        break;
 +    case 4:
 +        return 1.0 - 4.0*z2/3.0 + 2.0*z4/5.0 + 4.0*z2*z4/315.0;
 +        break;
 +    case 5:
 +        return 1.0 - 5.0*z2/3.0 + 7.0*z4/9.0 - 17.0*z2*z4/189.0 + 2.0*z4*z4/2835.0;
 +        break;
 +    case 6:
 +        return 1.0 - 2.0*z2 + 19.0*z4/15.0 - 256.0*z2*z4/945.0 + 62.0*z4*z4/4725.0 + 4.0*z2*z4*z4/155925.0;
 +        break;
 +    case 7:
 +        return 1.0 - 7.0*z2/3.0 + 28.0*z4/15.0 - 16.0*z2*z4/27.0 + 26.0*z4*z4/405.0 - 2.0*z2*z4*z4/1485.0 + 4.0*z4*z4*z4/6081075.0;
 +    case 8:
 +        return 1.0 - 8.0*z2/3.0 + 116.0*z4/45.0 - 344.0*z2*z4/315.0 + 914.0*z4*z4/4725.0 - 248.0*z4*z4*z2/22275.0 + 21844.0*z4*z4*z4/212837625.0 - 8.0*z4*z4*z4*z2/638512875.0;
 +        break;
 +    }
 +
 +    return 0.0;
 +}
 +
 +/* Calculate the P3M B-spline moduli for one dimension */
 +static void make_p3m_bspline_moduli_dim(real *bsp_mod,int n,int order)
 +{
 +    double zarg,zai,sinzai,infl;
 +    int    maxk,i;
 +
 +    if (order > 8)
 +    {
 +        gmx_fatal(FARGS,"The current P3M code only supports orders up to 8");
 +    }
 +
 +    zarg = M_PI/n;
 +
 +    maxk = (n + 1)/2;
 +
 +    for(i=-maxk; i<0; i++)
 +    {
 +        zai    = zarg*i;
 +        sinzai = sin(zai);
 +        infl   = do_p3m_influence(sinzai,order);
 +        bsp_mod[n+i] = infl*infl*pow(sinzai/zai,-2.0*order);
 +    }
 +    bsp_mod[0] = 1.0;
 +    for(i=1; i<maxk; i++)
 +    {
 +        zai    = zarg*i;
 +        sinzai = sin(zai);
 +        infl   = do_p3m_influence(sinzai,order);
 +        bsp_mod[i] = infl*infl*pow(sinzai/zai,-2.0*order);
 +    }
 +}
 +
 +/* Calculate the P3M B-spline moduli */
 +static void make_p3m_bspline_moduli(splinevec bsp_mod,
 +                                    int nx,int ny,int nz,int order)
 +{
 +    make_p3m_bspline_moduli_dim(bsp_mod[XX],nx,order);
 +    make_p3m_bspline_moduli_dim(bsp_mod[YY],ny,order);
 +    make_p3m_bspline_moduli_dim(bsp_mod[ZZ],nz,order);
 +}
 +
 +
 +static void setup_coordinate_communication(pme_atomcomm_t *atc)
 +{
 +  int nslab,n,i;
 +  int fw,bw;
 +
 +  nslab = atc->nslab;
 +
 +  n = 0;
 +  for(i=1; i<=nslab/2; i++) {
 +    fw = (atc->nodeid + i) % nslab;
 +    bw = (atc->nodeid - i + nslab) % nslab;
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = fw;
 +      atc->node_src[n]  = bw;
 +      n++;
 +    }
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = bw;
 +      atc->node_src[n]  = fw;
 +      n++;
 +    }
 +  }
 +}
 +
 +int gmx_pme_destroy(FILE *log,gmx_pme_t *pmedata)
 +{
 +    int thread;
 +
 +    if(NULL != log)
 +    {
 +        fprintf(log,"Destroying PME data structures.\n");
 +    }
 +
 +    sfree((*pmedata)->nnx);
 +    sfree((*pmedata)->nny);
 +    sfree((*pmedata)->nnz);
 +
 +    pmegrids_destroy(&(*pmedata)->pmegridA);
 +
 +    sfree((*pmedata)->fftgridA);
 +    sfree((*pmedata)->cfftgridA);
 +    gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupA);
 +
 +    if ((*pmedata)->pmegridB.grid.grid != NULL)
 +    {
 +        pmegrids_destroy(&(*pmedata)->pmegridB);
 +        sfree((*pmedata)->fftgridB);
 +        sfree((*pmedata)->cfftgridB);
 +        gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupB);
 +    }
 +    for(thread=0; thread<(*pmedata)->nthread; thread++)
 +    {
 +        free_work(&(*pmedata)->work[thread]);
 +    }
 +    sfree((*pmedata)->work);
 +
 +    sfree(*pmedata);
 +    *pmedata = NULL;
 +
 +  return 0;
 +}
 +
 +static int mult_up(int n,int f)
 +{
 +    return ((n + f - 1)/f)*f;
 +}
 +
 +
 +static double pme_load_imbalance(gmx_pme_t pme)
 +{
 +    int    nma,nmi;
 +    double n1,n2,n3;
 +
 +    nma = pme->nnodes_major;
 +    nmi = pme->nnodes_minor;
 +
 +    n1 = mult_up(pme->nkx,nma)*mult_up(pme->nky,nmi)*pme->nkz;
 +    n2 = mult_up(pme->nkx,nma)*mult_up(pme->nkz,nmi)*pme->nky;
 +    n3 = mult_up(pme->nky,nma)*mult_up(pme->nkz,nmi)*pme->nkx;
 +
 +    /* pme_solve is roughly double the cost of an fft */
 +
 +    return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
 +}
 +
 +static void init_atomcomm(gmx_pme_t pme,pme_atomcomm_t *atc, t_commrec *cr,
 +                          int dimind,gmx_bool bSpread)
 +{
 +    int nk,k,s,thread;
 +
 +    atc->dimind = dimind;
 +    atc->nslab  = 1;
 +    atc->nodeid = 0;
 +    atc->pd_nalloc = 0;
 +#ifdef GMX_MPI
 +    if (pme->nnodes > 1)
 +    {
 +        atc->mpi_comm = pme->mpi_comm_d[dimind];
 +        MPI_Comm_size(atc->mpi_comm,&atc->nslab);
 +        MPI_Comm_rank(atc->mpi_comm,&atc->nodeid);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"For PME atom communication in dimind %d: nslab %d rank %d\n",atc->dimind,atc->nslab,atc->nodeid);
 +    }
 +#endif
 +
 +    atc->bSpread   = bSpread;
 +    atc->pme_order = pme->pme_order;
 +
 +    if (atc->nslab > 1)
 +    {
 +        /* These three allocations are not required for particle decomp. */
 +        snew(atc->node_dest,atc->nslab);
 +        snew(atc->node_src,atc->nslab);
 +        setup_coordinate_communication(atc);
 +
 +        snew(atc->count_thread,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            snew(atc->count_thread[thread],atc->nslab);
 +        }
 +        atc->count = atc->count_thread[0];
 +        snew(atc->rcount,atc->nslab);
 +        snew(atc->buf_index,atc->nslab);
 +    }
 +
 +    atc->nthread = pme->nthread;
 +    if (atc->nthread > 1)
 +    {
 +        snew(atc->thread_plist,atc->nthread);
 +    }
 +    snew(atc->spline,atc->nthread);
 +    for(thread=0; thread<atc->nthread; thread++)
 +    {
 +        if (atc->nthread > 1)
 +        {
 +            snew(atc->thread_plist[thread].n,atc->nthread+2*GMX_CACHE_SEP);
 +            atc->thread_plist[thread].n += GMX_CACHE_SEP;
 +        }
 +    }
 +}
 +
 +static void
 +init_overlap_comm(pme_overlap_t *  ol,
 +                  int              norder,
 +#ifdef GMX_MPI
 +                  MPI_Comm         comm,
 +#endif
 +                  int              nnodes,
 +                  int              nodeid,
 +                  int              ndata,
 +                  int              commplainsize)
 +{
 +    int lbnd,rbnd,maxlr,b,i;
 +    int exten;
 +    int nn,nk;
 +    pme_grid_comm_t *pgc;
 +    gmx_bool bCont;
 +    int fft_start,fft_end,send_index1,recv_index1;
 +
 +#ifdef GMX_MPI
 +    ol->mpi_comm = comm;
 +#endif
 +
 +    ol->nnodes = nnodes;
 +    ol->nodeid = nodeid;
 +
 +    /* Linear translation of the PME grid wo'nt affect reciprocal space
 +     * calculations, so to optimize we only interpolate "upwards",
 +     * which also means we only have to consider overlap in one direction.
 +     * I.e., particles on this node might also be spread to grid indices
 +     * that belong to higher nodes (modulo nnodes)
 +     */
 +
 +    snew(ol->s2g0,ol->nnodes+1);
 +    snew(ol->s2g1,ol->nnodes);
 +    if (debug) { fprintf(debug,"PME slab boundaries:"); }
 +    for(i=0; i<nnodes; i++)
 +    {
 +        /* s2g0 the local interpolation grid start.
 +         * s2g1 the local interpolation grid end.
 +         * Because grid overlap communication only goes forward,
 +         * the grid the slabs for fft's should be rounded down.
 +         */
 +        ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
 +        ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"  %3d %3d",ol->s2g0[i],ol->s2g1[i]);
 +        }
 +    }
 +    ol->s2g0[nnodes] = ndata;
 +    if (debug) { fprintf(debug,"\n"); }
 +
 +    /* Determine with how many nodes we need to communicate the grid overlap */
 +    b = 0;
 +    do
 +    {
 +        b++;
 +        bCont = FALSE;
 +        for(i=0; i<nnodes; i++)
 +        {
 +            if ((i+b <  nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
 +                (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
 +            {
 +                bCont = TRUE;
 +            }
 +        }
 +    }
 +    while (bCont && b < nnodes);
 +    ol->noverlap_nodes = b - 1;
 +
 +    snew(ol->send_id,ol->noverlap_nodes);
 +    snew(ol->recv_id,ol->noverlap_nodes);
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
 +        ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
 +    }
 +    snew(ol->comm_data, ol->noverlap_nodes);
 +
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        pgc = &ol->comm_data[b];
 +        /* Send */
 +        fft_start        = ol->s2g0[ol->send_id[b]];
 +        fft_end          = ol->s2g0[ol->send_id[b]+1];
 +        if (ol->send_id[b] < nodeid)
 +        {
 +            fft_start += ndata;
 +            fft_end   += ndata;
 +        }
 +        send_index1      = ol->s2g1[nodeid];
 +        send_index1      = min(send_index1,fft_end);
 +        pgc->send_index0 = fft_start;
 +        pgc->send_nindex = max(0,send_index1 - pgc->send_index0);
 +
 +        /* We always start receiving to the first index of our slab */
 +        fft_start        = ol->s2g0[ol->nodeid];
 +        fft_end          = ol->s2g0[ol->nodeid+1];
 +        recv_index1      = ol->s2g1[ol->recv_id[b]];
 +        if (ol->recv_id[b] > nodeid)
 +        {
 +            recv_index1 -= ndata;
 +        }
 +        recv_index1      = min(recv_index1,fft_end);
 +        pgc->recv_index0 = fft_start;
 +        pgc->recv_nindex = max(0,recv_index1 - pgc->recv_index0);
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    snew(ol->sendbuf,norder*commplainsize);
 +    snew(ol->recvbuf,norder*commplainsize);
 +}
 +
 +static void
 +make_gridindex5_to_localindex(int n,int local_start,int local_range,
 +                              int **global_to_local,
 +                              real **fraction_shift)
 +{
 +    int i;
 +    int * gtl;
 +    real * fsh;
 +
 +    snew(gtl,5*n);
 +    snew(fsh,5*n);
 +    for(i=0; (i<5*n); i++)
 +    {
 +        /* Determine the global to local grid index */
 +        gtl[i] = (i - local_start + n) % n;
 +        /* For coordinates that fall within the local grid the fraction
 +         * is correct, we don't need to shift it.
 +         */
 +        fsh[i] = 0;
 +        if (local_range < n)
 +        {
 +            /* Due to rounding issues i could be 1 beyond the lower or
 +             * upper boundary of the local grid. Correct the index for this.
 +             * If we shift the index, we need to shift the fraction by
 +             * the same amount in the other direction to not affect
 +             * the weights.
 +             * Note that due to this shifting the weights at the end of
 +             * the spline might change, but that will only involve values
 +             * between zero and values close to the precision of a real,
 +             * which is anyhow the accuracy of the whole mesh calculation.
 +             */
 +            /* With local_range=0 we should not change i=local_start */
 +            if (i % n != local_start)
 +            {
 +                if (gtl[i] == n-1)
 +                {
 +                    gtl[i] = 0;
 +                    fsh[i] = -1;
 +                }
 +                else if (gtl[i] == local_range)
 +                {
 +                    gtl[i] = local_range - 1;
 +                    fsh[i] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    *global_to_local = gtl;
 +    *fraction_shift  = fsh;
 +}
 +
 +static void sse_mask_init(pme_spline_work_t *work,int order)
 +{
 +#ifdef PME_SSE
 +    float  tmp[8];
 +    __m128 zero_SSE;
 +    int    of,i;
 +
 +    zero_SSE = _mm_setzero_ps();
 +
 +    for(of=0; of<8-(order-1); of++)
 +    {
 +        for(i=0; i<8; i++)
 +        {
 +            tmp[i] = (i >= of && i < of+order ? 1 : 0);
 +        }
 +        work->mask_SSE0[of] = _mm_loadu_ps(tmp);
 +        work->mask_SSE1[of] = _mm_loadu_ps(tmp+4);
 +        work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of],zero_SSE);
 +        work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of],zero_SSE);
 +    }
 +#endif
 +}
 +
 +static void
 +gmx_pme_check_grid_restrictions(FILE *fplog,char dim,int nnodes,int *nk)
 +{
 +    int nk_new;
 +
 +    if (*nk % nnodes != 0)
 +    {
 +        nk_new = nnodes*(*nk/nnodes + 1);
 +
 +        if (2*nk_new >= 3*(*nk))
 +        {
 +            gmx_fatal(FARGS,"The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
 +                      dim,*nk,dim,nnodes,dim);
 +        }
 +
 +        if (fplog != NULL)
 +        {
 +            fprintf(fplog,"\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
 +                    dim,*nk,dim,nnodes,dim,nk_new,dim);
 +        }
 +
 +        *nk = nk_new;
 +    }
 +}
 +
 +int gmx_pme_init(gmx_pme_t *         pmedata,
 +                 t_commrec *         cr,
 +                 int                 nnodes_major,
 +                 int                 nnodes_minor,
 +                 t_inputrec *        ir,
 +                 int                 homenr,
 +                 gmx_bool            bFreeEnergy,
 +                 gmx_bool            bReproducible,
 +                 int                 nthread)
 +{
 +    gmx_pme_t pme=NULL;
 +
 +    pme_atomcomm_t *atc;
 +    ivec ndata;
 +
 +    if (debug)
 +        fprintf(debug,"Creating PME data structures.\n");
 +    snew(pme,1);
 +
 +    pme->redist_init         = FALSE;
 +    pme->sum_qgrid_tmp       = NULL;
 +    pme->sum_qgrid_dd_tmp    = NULL;
 +    pme->buf_nalloc          = 0;
 +    pme->redist_buf_nalloc   = 0;
 +
 +    pme->nnodes              = 1;
 +    pme->bPPnode             = TRUE;
 +
 +    pme->nnodes_major        = nnodes_major;
 +    pme->nnodes_minor        = nnodes_minor;
 +
 +#ifdef GMX_MPI
 +    if (nnodes_major*nnodes_minor > 1)
 +    {
 +        pme->mpi_comm = cr->mpi_comm_mygroup;
 +
 +        MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
 +        MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
 +        if (pme->nnodes != nnodes_major*nnodes_minor)
 +        {
 +            gmx_incons("PME node count mismatch");
 +        }
 +    }
 +    else
 +    {
 +        pme->mpi_comm = MPI_COMM_NULL;
 +    }
 +#endif
 +
 +    if (pme->nnodes == 1)
 +    {
 +        pme->ndecompdim = 0;
 +        pme->nodeid_major = 0;
 +        pme->nodeid_minor = 0;
 +#ifdef GMX_MPI
 +        pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +    }
 +    else
 +    {
 +        if (nnodes_minor == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = pme->mpi_comm;
 +            pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = pme->nodeid;
 +            pme->nodeid_minor = 0;
 +
 +        }
 +        else if (nnodes_major == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = MPI_COMM_NULL;
 +            pme->mpi_comm_d[1] = pme->mpi_comm;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = 0;
 +            pme->nodeid_minor = pme->nodeid;
 +        }
 +        else
 +        {
 +            if (pme->nnodes % nnodes_major != 0)
 +            {
 +                gmx_incons("For 2D PME decomposition, #PME nodes must be divisible by the number of nodes in the major dimension");
 +            }
 +            pme->ndecompdim = 2;
 +
 +#ifdef GMX_MPI
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid % nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[0]);  /* My communicator along major dimension */
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid/nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[1]);  /* My communicator along minor dimension */
 +
 +            MPI_Comm_rank(pme->mpi_comm_d[0],&pme->nodeid_major);
 +            MPI_Comm_size(pme->mpi_comm_d[0],&pme->nnodes_major);
 +            MPI_Comm_rank(pme->mpi_comm_d[1],&pme->nodeid_minor);
 +            MPI_Comm_size(pme->mpi_comm_d[1],&pme->nnodes_minor);
 +#endif
 +        }
 +        pme->bPPnode = (cr->duty & DUTY_PP);
 +    }
 +
 +    pme->nthread = nthread;
 +
 +    if (ir->ePBC == epbcSCREW)
 +    {
 +        gmx_fatal(FARGS,"pme does not (yet) work with pbc = screw");
 +    }
 +
 +    pme->bFEP        = ((ir->efep != efepNO) && bFreeEnergy);
 +    pme->nkx         = ir->nkx;
 +    pme->nky         = ir->nky;
 +    pme->nkz         = ir->nkz;
 +    pme->bP3M        = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != NULL);
 +    pme->pme_order   = ir->pme_order;
 +    pme->epsilon_r   = ir->epsilon_r;
 +
 +    if (pme->pme_order > PME_ORDER_MAX)
 +    {
 +        gmx_fatal(FARGS,"pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
 +                  pme->pme_order,PME_ORDER_MAX);
 +    }
 +
 +    /* Currently pme.c supports only the fft5d FFT code.
 +     * Therefore the grid always needs to be divisible by nnodes.
 +     * When the old 1D code is also supported again, change this check.
 +     *
 +     * This check should be done before calling gmx_pme_init
 +     * and fplog should be passed iso stderr.
 +     *
 +    if (pme->ndecompdim >= 2)
 +    */
 +    if (pme->ndecompdim >= 1)
 +    {
 +        /*
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'x',nnodes_major,&pme->nkx);
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'y',nnodes_minor,&pme->nky);
 +        */
 +    }
 +
 +    if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
 +        pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
 +        pme->nkz <= pme->pme_order)
 +    {
 +        gmx_fatal(FARGS,"The pme grid dimensions need to be larger than pme_order (%d) and in parallel larger than 2*pme_ordern for x and/or y",pme->pme_order);
 +    }
 +
 +    if (pme->nnodes > 1) {
 +        double imbal;
 +
 +#ifdef GMX_MPI
 +        MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
 +        MPI_Type_commit(&(pme->rvec_mpi));
 +#endif
 +
 +        /* Note that the charge spreading and force gathering, which usually
 +         * takes about the same amount of time as FFT+solve_pme,
 +         * is always fully load balanced
 +         * (unless the charge distribution is inhomogeneous).
 +         */
 +
 +        imbal = pme_load_imbalance(pme);
 +        if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
 +        {
 +            fprintf(stderr,
 +                    "\n"
 +                    "NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
 +                    "      For optimal PME load balancing\n"
 +                    "      PME grid_x (%d) and grid_y (%d) should be divisible by #PME_nodes_x (%d)\n"
 +                    "      and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
 +                    "\n",
 +                    (int)((imbal-1)*100 + 0.5),
 +                    pme->nkx,pme->nky,pme->nnodes_major,
 +                    pme->nky,pme->nkz,pme->nnodes_minor);
 +        }
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    /* In sum_qgrid_dd x overlap is copied in place: take padding into account.
 +     * y is always copied through a buffer: we don't need padding in z,
 +     * but we do need the overlap in x because of the communication order.
 +     */
 +    init_overlap_comm(&pme->overlap[0],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[0],
 +#endif
 +                      pme->nnodes_major,pme->nodeid_major,
 +                      pme->nkx,
 +                      (div_round_up(pme->nky,pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
 +
 +    init_overlap_comm(&pme->overlap[1],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[1],
 +#endif
 +                      pme->nnodes_minor,pme->nodeid_minor,
 +                      pme->nky,
 +                      (div_round_up(pme->nkx,pme->nnodes_major)+pme->pme_order)*pme->nkz);
 +
 +    /* Check for a limitation of the (current) sum_fftgrid_dd code */
 +    if (pme->nthread > 1 &&
 +        (pme->overlap[0].noverlap_nodes > 1 ||
 +         pme->overlap[1].noverlap_nodes > 1))
 +    {
 +        gmx_fatal(FARGS,"With threads the number of grid lines per node along x and or y should be pme_order (%d) or more or exactly pme_order-1",pme->pme_order);
 +    }
 +
 +    snew(pme->bsp_mod[XX],pme->nkx);
 +    snew(pme->bsp_mod[YY],pme->nky);
 +    snew(pme->bsp_mod[ZZ],pme->nkz);
 +
 +    /* The required size of the interpolation grid, including overlap.
 +     * The allocated size (pmegrid_n?) might be slightly larger.
 +     */
 +    pme->pmegrid_nx = pme->overlap[0].s2g1[pme->nodeid_major] -
 +                      pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_ny = pme->overlap[1].s2g1[pme->nodeid_minor] -
 +                      pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_nz_base = pme->nkz;
 +    pme->pmegrid_nz = pme->pmegrid_nz_base + pme->pme_order - 1;
 +    set_grid_alignment(&pme->pmegrid_nz,pme->pme_order);
 +
 +    pme->pmegrid_start_ix = pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_start_iz = 0;
 +
 +    make_gridindex5_to_localindex(pme->nkx,
 +                                  pme->pmegrid_start_ix,
 +                                  pme->pmegrid_nx - (pme->pme_order-1),
 +                                  &pme->nnx,&pme->fshx);
 +    make_gridindex5_to_localindex(pme->nky,
 +                                  pme->pmegrid_start_iy,
 +                                  pme->pmegrid_ny - (pme->pme_order-1),
 +                                  &pme->nny,&pme->fshy);
 +    make_gridindex5_to_localindex(pme->nkz,
 +                                  pme->pmegrid_start_iz,
 +                                  pme->pmegrid_nz_base,
 +                                  &pme->nnz,&pme->fshz);
 +
 +    pmegrids_init(&pme->pmegridA,
 +                  pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                  pme->pmegrid_nz_base,
 +                  pme->pme_order,
 +                  pme->nthread,
 +                  pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1],
 +                  pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]);
 +
 +    sse_mask_init(&pme->spline_work,pme->pme_order);
 +
 +    ndata[0] = pme->nkx;
 +    ndata[1] = pme->nky;
 +    ndata[2] = pme->nkz;
 +
 +    /* This routine will allocate the grid data to fit the FFTs */
 +    gmx_parallel_3dfft_init(&pme->pfft_setupA,ndata,
 +                            &pme->fftgridA,&pme->cfftgridA,
 +                            pme->mpi_comm_d,
 +                            pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                            bReproducible,pme->nthread);
 +
 +    if (bFreeEnergy)
 +    {
 +        pmegrids_init(&pme->pmegridB,
 +                      pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                      pme->pmegrid_nz_base,
 +                      pme->pme_order,
 +                      pme->nthread,
 +                      pme->nkx % pme->nnodes_major != 0,
 +                      pme->nky % pme->nnodes_minor != 0);
 +
 +        gmx_parallel_3dfft_init(&pme->pfft_setupB,ndata,
 +                                &pme->fftgridB,&pme->cfftgridB,
 +                                pme->mpi_comm_d,
 +                                pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                                bReproducible,pme->nthread);
 +    }
 +    else
 +    {
 +        pme->pmegridB.grid.grid = NULL;
 +        pme->fftgridB           = NULL;
 +        pme->cfftgridB          = NULL;
 +    }
 +
 +    if (!pme->bP3M)
 +    {
 +        /* Use plain SPME B-spline interpolation */
 +        make_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
 +    }
 +    else
 +    {
 +        /* Use the P3M grid-optimized influence function */
 +        make_p3m_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
 +    }
 +
 +    /* Use atc[0] for spreading */
 +    init_atomcomm(pme,&pme->atc[0],cr,nnodes_major > 1 ? 0 : 1,TRUE);
 +    if (pme->ndecompdim >= 2)
 +    {
 +        init_atomcomm(pme,&pme->atc[1],cr,1,FALSE);
 +    }
 +
 +    if (pme->nnodes == 1) {
 +        pme->atc[0].n = homenr;
 +        pme_realloc_atomcomm_things(&pme->atc[0]);
 +    }
 +
 +    {
 +        int thread;
 +
 +        /* Use fft5d, order after FFT is y major, z, x minor */
 +
 +        snew(pme->work,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            realloc_work(&pme->work[thread],pme->nkx);
 +        }
 +    }
 +
 +    *pmedata = pme;
 +
 +    return 0;
 +}
 +
 +
 +static void copy_local_grid(gmx_pme_t pme,
 +                            pmegrids_t *pmegrids,int thread,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_my,fft_mz;
 +    int  nsx,nsy,nsz;
 +    ivec nf;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  d;
 +    pmegrid_t *pmegrid;
 +    real *grid_th;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    nsx = pmegrid->n[XX];
 +    nsy = pmegrid->n[YY];
 +    nsz = pmegrid->n[ZZ];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
 +                    local_fft_ndata[d] - pmegrid->offset[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    /* Directly copy the non-overlapping parts of the local grids.
 +     * This also initializes the full grid.
 +     */
 +    grid_th = pmegrid->grid;
 +    for(x=0; x<nf[XX]; x++)
 +    {
 +        for(y=0; y<nf[YY]; y++)
 +        {
 +            i0  = ((offx + x)*fft_my + (offy + y))*fft_mz + offz;
 +            i0t = (x*nsy + y)*nsz;
 +            for(z=0; z<nf[ZZ]; z++)
 +            {
 +                fftgrid[i0+z] = grid_th[i0t+z];
 +            }
 +        }
 +    }
 +}
 +
 +static void print_sendbuf(gmx_pme_t pme,real *sendbuf)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int datasize,nind;
 +    int i,x,y,z,n;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    /* Major dimension */
 +    overlap = &pme->overlap[0];
 +
 +    nind   = overlap->comm_data[0].send_nindex;
 +
 +    for(y=0; y<local_fft_ndata[YY]; y++) {
 +         printf(" %2d",y);
 +    }
 +    printf("\n");
 +
 +    i = 0;
 +    for(x=0; x<nind; x++) {
 +        for(y=0; y<local_fft_ndata[YY]; y++) {
 +            n = 0;
 +            for(z=0; z<local_fft_ndata[ZZ]; z++) {
 +                if (sendbuf[i] != 0) n++;
 +                i++;
 +            }
 +            printf(" %2d",n);
 +        }
 +        printf("\n");
 +    }
 +}
 +
 +static void
 +reduce_threadgrid_overlap(gmx_pme_t pme,
 +                          const pmegrids_t *pmegrids,int thread,
 +                          real *fftgrid,real *commbuf_x,real *commbuf_y)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_nx,fft_ny,fft_nz;
 +    int  fft_my,fft_mz;
 +    int  buf_my=-1;
 +    int  nsx,nsy,nsz;
 +    ivec ne;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  sx,sy,sz,fx,fy,fz,tx1,ty1,tz1,ox,oy,oz;
 +    gmx_bool bClearBufX,bClearBufY,bClearBufXY,bClearBuf;
 +    gmx_bool bCommX,bCommY;
 +    int  d;
 +    int  thread_f;
 +    const pmegrid_t *pmegrid,*pmegrid_g,*pmegrid_f;
 +    const real *grid_th;
 +    real *commbuf=NULL;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_nx = local_fft_ndata[XX];
 +    fft_ny = local_fft_ndata[YY];
 +    fft_nz = local_fft_ndata[ZZ];
 +
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    /* This routine is called when all thread have finished spreading.
 +     * Here each thread sums grid contributions calculated by other threads
 +     * to the thread local grid volume.
 +     * To minimize the number of grid copying operations,
 +     * this routines sums immediately from the pmegrid to the fftgrid.
 +     */
 +
 +    /* Determine which part of the full node grid we should operate on,
 +     * this is our thread local part of the full grid.
 +     */
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
 +                    local_fft_ndata[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +
 +    bClearBufX  = TRUE;
 +    bClearBufY  = TRUE;
 +    bClearBufXY = TRUE;
 +
 +    /* Now loop over all the thread data blocks that contribute
 +     * to the grid region we (our thread) are operating on.
 +     */
 +    /* Note that ffy_nx/y is equal to the number of grid points
 +     * between the first point of our node grid and the one of the next node.
 +     */
 +    for(sx=0; sx>=-pmegrids->nthread_comm[XX]; sx--)
 +    {
 +        fx = pmegrid->ci[XX] + sx;
 +        ox = 0;
 +        bCommX = FALSE;
 +        if (fx < 0) {
 +            fx += pmegrids->nc[XX];
 +            ox -= fft_nx;
 +            bCommX = (pme->nnodes_major > 1);
 +        }
 +        pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
 +        ox += pmegrid_g->offset[XX];
 +        if (!bCommX)
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],ne[XX]);
 +        }
 +        else
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],pme->pme_order);
 +        }
 +
 +        for(sy=0; sy>=-pmegrids->nthread_comm[YY]; sy--)
 +        {
 +            fy = pmegrid->ci[YY] + sy;
 +            oy = 0;
 +            bCommY = FALSE;
 +            if (fy < 0) {
 +                fy += pmegrids->nc[YY];
 +                oy -= fft_ny;
 +                bCommY = (pme->nnodes_minor > 1);
 +            }
 +            pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
 +            oy += pmegrid_g->offset[YY];
 +            if (!bCommY)
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],ne[YY]);
 +            }
 +            else
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],pme->pme_order);
 +            }
 +
 +            for(sz=0; sz>=-pmegrids->nthread_comm[ZZ]; sz--)
 +            {
 +                fz = pmegrid->ci[ZZ] + sz;
 +                oz = 0;
 +                if (fz < 0)
 +                {
 +                    fz += pmegrids->nc[ZZ];
 +                    oz -= fft_nz;
 +                }
 +                pmegrid_g = &pmegrids->grid_th[fz];
 +                oz += pmegrid_g->offset[ZZ];
 +                tz1 = min(oz + pmegrid_g->n[ZZ],ne[ZZ]);
 +
 +                if (sx == 0 && sy == 0 && sz == 0)
 +                {
 +                    /* We have already added our local contribution
 +                     * before calling this routine, so skip it here.
 +                     */
 +                    continue;
 +                }
 +
 +                thread_f = (fx*pmegrids->nc[YY] + fy)*pmegrids->nc[ZZ] + fz;
 +
 +                pmegrid_f = &pmegrids->grid_th[thread_f];
 +
 +                grid_th = pmegrid_f->grid;
 +
 +                nsx = pmegrid_f->n[XX];
 +                nsy = pmegrid_f->n[YY];
 +                nsz = pmegrid_f->n[ZZ];
 +
 +#ifdef DEBUG_PME_REDUCE
 +                printf("n%d t%d add %d  %2d %2d %2d  %2d %2d %2d  %2d-%2d %2d-%2d, %2d-%2d %2d-%2d, %2d-%2d %2d-%2d\n",
 +                       pme->nodeid,thread,thread_f,
 +                       pme->pmegrid_start_ix,
 +                       pme->pmegrid_start_iy,
 +                       pme->pmegrid_start_iz,
 +                       sx,sy,sz,
 +                       offx-ox,tx1-ox,offx,tx1,
 +                       offy-oy,ty1-oy,offy,ty1,
 +                       offz-oz,tz1-oz,offz,tz1);
 +#endif
 +
 +                if (!(bCommX || bCommY))
 +                {
 +                    /* Copy from the thread local grid to the node grid */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*fft_my + y)*fft_mz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +                            for(z=offz; z<tz1; z++)
 +                            {
 +                                fftgrid[i0+z] += grid_th[i0t+z];
 +                            }
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    /* The order of this conditional decides
 +                     * where the corner volume gets stored with x+y decomp.
 +                     */
 +                    if (bCommY)
 +                    {
 +                        commbuf = commbuf_y;
 +                        buf_my  = ty1 - offy;
 +                        if (bCommX)
 +                        {
 +                            /* We index commbuf modulo the local grid size */
 +                            commbuf += buf_my*fft_nx*fft_nz;
 +
 +                            bClearBuf  = bClearBufXY;
 +                            bClearBufXY = FALSE;
 +                        }
 +                        else
 +                        {
 +                            bClearBuf  = bClearBufY;
 +                            bClearBufY = FALSE;
 +                        }
 +                    }
 +                    else
 +                    {
 +                        commbuf = commbuf_x;
 +                        buf_my  = fft_ny;
 +                        bClearBuf  = bClearBufX;
 +                        bClearBufX = FALSE;
 +                    }
 +
 +                    /* Copy to the communication buffer */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*buf_my + y)*fft_nz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +
 +                            if (bClearBuf)
 +                            {
 +                                /* First access of commbuf, initialize it */
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z]  = grid_th[i0t+z];
 +                                }
 +                            }
 +                            else
 +                            {
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z] += grid_th[i0t+z];
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void sum_fftgrid_dd(gmx_pme_t pme,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int  send_nindex;
 +    int  recv_index0,recv_nindex;
 +#ifdef GMX_MPI
 +    MPI_Status stat;
 +#endif
 +    int  ipulse,send_id,recv_id,datasize,gridsize,size_yx;
 +    real *sendptr,*recvptr;
 +    int  x,y,z,indg,indb;
 +
 +    /* Note that this routine is only used for forward communication.
 +     * Since the force gathering, unlike the charge spreading,
 +     * can be trivially parallelized over the particles,
 +     * the backwards process is much simpler and can use the "old"
 +     * communication setup.
 +     */
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    /* Currently supports only a single communication pulse */
 +
 +/* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_minor > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[1];
 +
 +        if (pme->nnodes_major > 1)
 +        {
 +             size_yx = pme->overlap[0].comm_data[0].send_nindex;
 +        }
 +        else
 +        {
 +            size_yx = 0;
 +        }
 +        datasize = (local_fft_ndata[XX]+size_yx)*local_fft_ndata[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        /*
 +        printf("node %d comm %2d x %2d x %2d\n",pme->nodeid,
 +               local_fft_ndata[XX]+size_yx,send_nindex,local_fft_ndata[ZZ]);
 +        printf("node %d send %f, %f\n",pme->nodeid,
 +               sendptr[0],sendptr[send_nindex*datasize-1]);
 +        */
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<local_fft_ndata[XX]; x++)
 +        {
 +            for(y=0; y<recv_nindex; y++)
 +            {
 +                indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
 +                indb = (x*recv_nindex        + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +        if (pme->nnodes_major > 1)
 +        {
 +            sendptr = pme->overlap[0].sendbuf;
 +            for(x=0; x<size_yx; x++)
 +            {
 +                for(y=0; y<recv_nindex; y++)
 +                {
 +                    indg = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                    indb = ((local_fft_ndata[XX] + x)*recv_nindex +y)*local_fft_ndata[ZZ];
 +                    for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                    {
 +                        sendptr[indg+z] += recvptr[indb+z];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_major > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[0];
 +
 +        datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
 +        gridsize = local_fft_size[YY] *local_fft_size[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"PME fftgrid comm %2d x %2d x %2d\n",
 +                   send_nindex,local_fft_ndata[YY],local_fft_ndata[ZZ]);
 +        }
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<recv_nindex; x++)
 +        {
 +            for(y=0; y<local_fft_ndata[YY]; y++)
 +            {
 +                indg = (x*local_fft_size[YY]  + y)*local_fft_size[ZZ];
 +                indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void spread_on_grid(gmx_pme_t pme,
 +                           pme_atomcomm_t *atc,pmegrids_t *grids,
 +                           gmx_bool bCalcSplines,gmx_bool bSpread,
 +                           real *fftgrid)
 +{
 +    int nthread,thread;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1,c2,c3,ct1a,ct1b,ct1c;
 +    static double cs1=0,cs2=0,cs3=0;
 +    static double cs1a[6]={0,0,0,0,0,0};
 +    static int cnt=0;
 +#endif
 +
 +    nthread = pme->nthread;
 +    assert(nthread>0);
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    if (bCalcSplines)
 +    {
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +        for(thread=0; thread<nthread; thread++)
 +        {
 +            int start,end;
 +
 +            start = atc->n* thread   /nthread;
 +            end   = atc->n*(thread+1)/nthread;
 +
 +            /* Compute fftgrid index for all atoms,
 +             * with help of some extra variables.
 +             */
 +            calc_interpolation_idx(pme,atc,start,end,thread);
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        splinedata_t *spline;
 +        pmegrid_t *grid;
 +
 +        /* make local bsplines  */
 +        if (grids == NULL || grids->nthread == 1)
 +        {
 +            spline = &atc->spline[0];
 +
 +            spline->n = atc->n;
 +
 +            grid = &grids->grid;
 +        }
 +        else
 +        {
 +            spline = &atc->spline[thread];
 +
 +            make_thread_local_ind(atc,thread,spline);
 +
 +            grid = &grids->grid_th[thread];
 +        }
 +
 +        if (bCalcSplines)
 +        {
 +            make_bsplines(spline->theta,spline->dtheta,pme->pme_order,
 +                          atc->fractx,spline->n,spline->ind,atc->q,pme->bFEP);
 +        }
 +
 +        if (bSpread)
 +        {
 +            /* put local atoms on grid. */
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_start();
 +#endif
 +            spread_q_bsplines_thread(grid,atc,spline,&pme->spline_work);
 +
 +            if (grids->nthread > 1)
 +            {
 +                copy_local_grid(pme,grids,thread,fftgrid);
 +            }
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_end(ct1a);
 +            cs1a[thread] += (double)ct1a;
 +#endif
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_end(c2);
 +    cs2 += (double)c2;
 +#endif
 +
 +    if (bSpread && grids->nthread > 1)
 +    {
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(grids->nthread) schedule(static)
 +        for(thread=0; thread<grids->nthread; thread++)
 +        {
 +            reduce_threadgrid_overlap(pme,grids,thread,
 +                                      fftgrid,
 +                                      pme->overlap[0].sendbuf,
 +                                      pme->overlap[1].sendbuf);
 +#ifdef PRINT_PME_SENDBUF
 +            print_sendbuf(pme,pme->overlap[0].sendbuf);
 +#endif
 +        }
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_end(c3);
 +        cs3 += (double)c3;
 +#endif
 +
 +        if (pme->nnodes > 1)
 +        {
 +            /* Communicate the overlapping part of the fftgrid */
 +            sum_fftgrid_dd(pme,fftgrid);
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("idx %.2f spread %.2f red %.2f",
 +               cs1*1e-9,cs2*1e-9,cs3*1e-9);
 +#ifdef PME_TIME_SPREAD
 +        for(thread=0; thread<nthread; thread++)
 +            printf(" %.2f",cs1a[thread]*1e-9);
 +#endif
 +        printf("\n");
 +    }
 +#endif
 +}
 +
 +
 +static void dump_grid(FILE *fp,
 +                      int sx,int sy,int sz,int nx,int ny,int nz,
 +                      int my,int mz,const real *g)
 +{
 +    int x,y,z;
 +
 +    for(x=0; x<nx; x++)
 +    {
 +        for(y=0; y<ny; y++)
 +        {
 +            for(z=0; z<nz; z++)
 +            {
 +                fprintf(fp,"%2d %2d %2d %6.3f\n",
 +                        sx+x,sy+y,sz+z,g[(x*my + y)*mz + z]);
 +            }
 +        }
 +    }
 +}
 +
 +static void dump_local_fftgrid(gmx_pme_t pme,const real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    dump_grid(stderr,
 +              pme->pmegrid_start_ix,
 +              pme->pmegrid_start_iy,
 +              pme->pmegrid_start_iz,
 +              pme->pmegrid_nx-pme->pme_order+1,
 +              pme->pmegrid_ny-pme->pme_order+1,
 +              pme->pmegrid_nz-pme->pme_order+1,
 +              local_fft_size[YY],
 +              local_fft_size[ZZ],
 +              fftgrid);
 +}
 +
 +
 +void gmx_pme_calc_energy(gmx_pme_t pme,int n,rvec *x,real *q,real *V)
 +{
 +    pme_atomcomm_t *atc;
 +    pmegrids_t *grid;
 +
 +    if (pme->nnodes > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy called in parallel");
 +    }
 +    if (pme->bFEP > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy with free energy");
 +    }
 +
 +    atc = &pme->atc_energy;
 +    atc->nthread   = 1;
 +    if (atc->spline == NULL)
 +    {
 +        snew(atc->spline,atc->nthread);
 +    }
 +    atc->nslab     = 1;
 +    atc->bSpread   = TRUE;
 +    atc->pme_order = pme->pme_order;
 +    atc->n         = n;
 +    pme_realloc_atomcomm_things(atc);
 +    atc->x         = x;
 +    atc->q         = q;
 +
 +    /* We only use the A-charges grid */
 +    grid = &pme->pmegridA;
 +
 +    spread_on_grid(pme,atc,NULL,TRUE,FALSE,pme->fftgridA);
 +
 +    *V = gather_energy_bsplines(pme,grid->grid.grid,atc);
 +}
 +
 +
 +static void reset_pmeonly_counters(t_commrec *cr,gmx_wallcycle_t wcycle,
 +        t_nrnb *nrnb,t_inputrec *ir, gmx_large_int_t step_rel)
 +{
 +    /* Reset all the counters related to performance over the run */
 +    wallcycle_stop(wcycle,ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    init_nrnb(nrnb);
 +    ir->init_step += step_rel;
 +    ir->nsteps    -= step_rel;
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +
 +
 +int gmx_pmeonly(gmx_pme_t pme,
 +                t_commrec *cr,    t_nrnb *nrnb,
 +                gmx_wallcycle_t wcycle,
 +                real ewaldcoeff,  gmx_bool bGatherOnly,
 +                t_inputrec *ir)
 +{
 +    gmx_pme_pp_t pme_pp;
 +    int  natoms;
 +    matrix box;
 +    rvec *x_pp=NULL,*f_pp=NULL;
 +    real *chargeA=NULL,*chargeB=NULL;
 +    real lambda=0;
 +    int  maxshift_x=0,maxshift_y=0;
 +    real energy,dvdlambda;
 +    matrix vir;
 +    float cycles;
 +    int  count;
 +    gmx_bool bEnerVir;
 +    gmx_large_int_t step,step_rel;
 +
 +
 +    pme_pp = gmx_pme_pp_init(cr);
 +
 +    init_nrnb(nrnb);
 +
 +    count = 0;
 +    do /****** this is a quasi-loop over time steps! */
 +    {
 +        /* Domain decomposition */
 +        natoms = gmx_pme_recv_q_x(pme_pp,
 +                                  &chargeA,&chargeB,box,&x_pp,&f_pp,
 +                                  &maxshift_x,&maxshift_y,
 +                                  &pme->bFEP,&lambda,
 +                                  &bEnerVir,
 +                                  &step);
 +
 +        if (natoms == -1) {
 +            /* We should stop: break out of the loop */
 +            break;
 +        }
 +
 +        step_rel = step - ir->init_step;
 +
 +        if (count == 0)
 +            wallcycle_start(wcycle,ewcRUN);
 +
 +        wallcycle_start(wcycle,ewcPMEMESH);
 +
 +        dvdlambda = 0;
 +        clear_mat(vir);
 +        gmx_pme_do(pme,0,natoms,x_pp,f_pp,chargeA,chargeB,box,
 +                   cr,maxshift_x,maxshift_y,nrnb,wcycle,vir,ewaldcoeff,
 +                   &energy,lambda,&dvdlambda,
 +                   GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0));
 +
 +        cycles = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +        gmx_pme_send_force_vir_ener(pme_pp,
 +                                    f_pp,vir,energy,dvdlambda,
 +                                    cycles);
 +
 +        count++;
 +
 +        if (step_rel == wcycle_get_reset_counters(wcycle))
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_pmeonly_counters(cr,wcycle,nrnb,ir,step_rel);
 +            wcycle_set_reset_counters(wcycle, 0);
 +        }
 +
 +    } /***** end of quasi-loop, we stop with the break above */
 +    while (TRUE);
 +
 +    return 0;
 +}
 +
 +int gmx_pme_do(gmx_pme_t pme,
 +               int start,       int homenr,
 +               rvec x[],        rvec f[],
 +               real *chargeA,   real *chargeB,
 +               matrix box, t_commrec *cr,
 +               int  maxshift_x, int maxshift_y,
 +               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
 +               matrix vir,      real ewaldcoeff,
 +               real *energy,    real lambda,
 +               real *dvdlambda, int flags)
 +{
 +    int     q,d,i,j,ntot,npme;
 +    int     nx,ny,nz;
 +    int     n_d,local_ny;
 +    pme_atomcomm_t *atc=NULL;
 +    pmegrids_t *pmegrid=NULL;
 +    real    *grid=NULL;
 +    real    *ptr;
 +    rvec    *x_d,*f_d;
 +    real    *charge=NULL,*q_d;
 +    real    energy_AB[2];
 +    matrix  vir_AB[2];
 +    gmx_bool bClearF;
 +    gmx_parallel_3dfft_t pfft_setup;
 +    real *  fftgrid;
 +    t_complex * cfftgrid;
 +    int     thread;
 +    const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
 +    const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
 +
 +    assert(pme->nnodes > 0);
 +    assert(pme->nnodes == 1 || pme->ndecompdim > 0);
 +
 +    if (pme->nnodes > 1) {
 +        atc = &pme->atc[0];
 +        atc->npd = homenr;
 +        if (atc->npd > atc->pd_nalloc) {
 +            atc->pd_nalloc = over_alloc_dd(atc->npd);
 +            srenew(atc->pd,atc->pd_nalloc);
 +        }
 +        atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +    }
 +    else
 +    {
 +        /* This could be necessary for TPI */
 +        pme->atc[0].n = homenr;
 +    }
 +
 +    for(q=0; q<(pme->bFEP ? 2 : 1); q++) {
 +        if (q == 0) {
 +            pmegrid = &pme->pmegridA;
 +            fftgrid = pme->fftgridA;
 +            cfftgrid = pme->cfftgridA;
 +            pfft_setup = pme->pfft_setupA;
 +            charge = chargeA+start;
 +        } else {
 +            pmegrid = &pme->pmegridB;
 +            fftgrid = pme->fftgridB;
 +            cfftgrid = pme->cfftgridB;
 +            pfft_setup = pme->pfft_setupB;
 +            charge = chargeB+start;
 +        }
 +        grid = pmegrid->grid.grid;
 +        /* Unpack structure */
 +        if (debug) {
 +            fprintf(debug,"PME: nnodes = %d, nodeid = %d\n",
 +                    cr->nnodes,cr->nodeid);
 +            fprintf(debug,"Grid = %p\n",(void*)grid);
 +            if (grid == NULL)
 +                gmx_fatal(FARGS,"No grid!");
 +        }
 +        where();
 +
 +        m_inv_ur0(box,pme->recipbox);
 +
 +        if (pme->nnodes == 1) {
 +            atc = &pme->atc[0];
 +            if (DOMAINDECOMP(cr)) {
 +                atc->n = homenr;
 +                pme_realloc_atomcomm_things(atc);
 +            }
 +            atc->x = x;
 +            atc->q = charge;
 +            atc->f = f;
 +        } else {
 +            wallcycle_start(wcycle,ewcPME_REDISTXF);
 +            for(d=pme->ndecompdim-1; d>=0; d--)
 +            {
 +                if (d == pme->ndecompdim-1)
 +                {
 +                    n_d = homenr;
 +                    x_d = x + start;
 +                    q_d = charge;
 +                }
 +                else
 +                {
 +                    n_d = pme->atc[d+1].n;
 +                    x_d = atc->x;
 +                    q_d = atc->q;
 +                }
 +                atc = &pme->atc[d];
 +                atc->npd = n_d;
 +                if (atc->npd > atc->pd_nalloc) {
 +                    atc->pd_nalloc = over_alloc_dd(atc->npd);
 +                    srenew(atc->pd,atc->pd_nalloc);
 +                }
 +                atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +                pme_calc_pidx_wrapper(n_d,pme->recipbox,x_d,atc);
 +                where();
 +
 +                /* Redistribute x (only once) and qA or qB */
 +                if (DOMAINDECOMP(cr)) {
 +                    dd_pmeredist_x_q(pme, n_d, q==0, x_d, q_d, atc);
 +                } else {
 +                    pmeredist_pd(pme, TRUE, n_d, q==0, x_d, q_d, atc);
 +                }
 +            }
 +            where();
 +
 +            wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +        }
 +
 +        if (debug)
 +            fprintf(debug,"Node= %6d, pme local particles=%6d\n",
 +                    cr->nodeid,atc->n);
 +
 +        if (flags & GMX_PME_SPREAD_Q)
 +        {
 +            wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +
 +            /* Spread the charges on a grid */
 +            spread_on_grid(pme,&pme->atc[0],pmegrid,q==0,TRUE,fftgrid);
 +
 +            if (q == 0)
 +            {
 +                inc_nrnb(nrnb,eNR_WEIGHTS,DIM*atc->n);
 +            }
 +            inc_nrnb(nrnb,eNR_SPREADQBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
 +
 +            if (pme->nthread == 1)
 +            {
 +                wrap_periodic_pmegrid(pme,grid);
 +
 +                /* sum contributions to local grid from other nodes */
 +#ifdef GMX_MPI
 +                if (pme->nnodes > 1)
 +                {
 +                    gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_FORWARD);
 +                    where();
 +                }
 +#endif
 +
 +                copy_pmegrid_to_fftgrid(pme,grid,fftgrid);
 +            }
 +
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +
 +            /*
 +            dump_local_fftgrid(pme,fftgrid);
 +            exit(0);
 +            */
 +        }
 +
 +        /* Here we start a large thread parallel region */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            if (flags & GMX_PME_SOLVE)
 +            {
 +                int loop_count;
 +
 +                /* do 3d-fft */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,
 +                                           fftgrid,cfftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +                }
 +                where();
 +
 +                /* solve in k-space for our local cells */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_SOLVE);
 +                }
 +                loop_count =
 +                    solve_pme_yzx(pme,cfftgrid,ewaldcoeff,
 +                                  box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
 +                                  bCalcEnerVir,
 +                                  pme->nthread,thread);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_SOLVE);
 +                    where();
 +                    inc_nrnb(nrnb,eNR_SOLVEPME,loop_count);
 +                }
 +            }
 +
 +            if (bCalcF)
 +            {
 +                /* do 3d-invfft */
 +                if (thread == 0)
 +                {
 +                    where();
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,
 +                                           cfftgrid,fftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +
 +                    where();
 +
 +                    if (pme->nodeid == 0)
 +                    {
 +                        ntot = pme->nkx*pme->nky*pme->nkz;
 +                        npme  = ntot*log((real)ntot)/log(2.0);
 +                        inc_nrnb(nrnb,eNR_FFT,2*npme);
 +                    }
 +
 +                    wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +                }
 +
 +                copy_fftgrid_to_pmegrid(pme,fftgrid,grid,pme->nthread,thread);
 +            }
 +        }
 +        /* End of thread parallel section.
 +         * With MPI we have to synchronize here before gmx_sum_qgrid_dd.
 +         */
 +
 +        if (bCalcF)
 +        {
 +            /* distribute local grid to all nodes */
 +#ifdef GMX_MPI
 +            if (pme->nnodes > 1) {
 +                gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_BACKWARD);
 +            }
 +#endif
 +            where();
 +
 +            unwrap_periodic_pmegrid(pme,grid);
 +
 +            /* interpolate forces for our local atoms */
 +
 +            where();
 +
 +            /* If we are running without parallelization,
 +             * atc->f is the actual force array, not a buffer,
 +             * therefore we should not clear it.
 +             */
 +            bClearF = (q == 0 && PAR(cr));
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +            for(thread=0; thread<pme->nthread; thread++)
 +            {
 +                gather_f_bsplines(pme,grid,bClearF,atc,
 +                                  &atc->spline[thread],
 +                                  pme->bFEP ? (q==0 ? 1.0-lambda : lambda) : 1.0);
 +            }
 +
 +            where();
 +
 +            inc_nrnb(nrnb,eNR_GATHERFBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +        }
 +
 +        if (bCalcEnerVir)
 +        {
 +            /* This should only be called on the master thread
 +             * and after the threads have synchronized.
 +             */
 +            get_pme_ener_vir(pme,pme->nthread,&energy_AB[q],vir_AB[q]);
 +        }
 +    } /* of q-loop */
 +
 +    if (bCalcF && pme->nnodes > 1) {
 +        wallcycle_start(wcycle,ewcPME_REDISTXF);
 +        for(d=0; d<pme->ndecompdim; d++)
 +        {
 +            atc = &pme->atc[d];
 +            if (d == pme->ndecompdim - 1)
 +            {
 +                n_d = homenr;
 +                f_d = f + start;
 +            }
 +            else
 +            {
 +                n_d = pme->atc[d+1].n;
 +                f_d = pme->atc[d+1].f;
 +            }
 +            if (DOMAINDECOMP(cr)) {
 +                dd_pmeredist_f(pme,atc,n_d,f_d,
 +                               d==pme->ndecompdim-1 && pme->bPPnode);
 +            } else {
 +                pmeredist_pd(pme, FALSE, n_d, TRUE, f_d, NULL, atc);
 +            }
 +        }
 +
 +        wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +    }
 +    where();
 +
 +    if (bCalcEnerVir)
 +    {
 +        if (!pme->bFEP) {
 +            *energy = energy_AB[0];
 +            m_add(vir,vir_AB[0],vir);
 +        } else {
 +            *energy = (1.0-lambda)*energy_AB[0] + lambda*energy_AB[1];
 +            *dvdlambda += energy_AB[1] - energy_AB[0];
 +            for(i=0; i<DIM; i++)
 +            {
 +                for(j=0; j<DIM; j++)
 +                {
 +                    vir[i][j] += (1.0-lambda)*vir_AB[0][i][j] + 
 +                        lambda*vir_AB[1][i][j];
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        *energy = 0;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"PME mesh energy: %g\n",*energy);
 +    }
 +
 +    return 0;
 +}
index d850cf939b8fad91e8178dc9d4bb50942f7fe05d,0000000000000000000000000000000000000000..4acaee132dcc0435c746c939c2fed161eb8be27e
mode 100644,000000..100644
--- /dev/null
@@@ -1,248 -1,0 +1,248 @@@
-       programName_(Path::splitToPathAndFilename(fullInvokedProgram_).second),
-       invariantProgramName_(programName_)
 +/*
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2009, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + */
 +/*! \internal \file
 + * \brief
 + * Implements gmx::ProgramInfo.
 + *
 + * \author Teemu Murtola <teemu.murtola@cbr.su.se>
 + * \ingroup module_utility
 + */
 +#include "programinfo.h"
 +
 +// For GMX_BINARY_SUFFIX
 +#ifdef HAVE_CONFIG_H
 +#include "config.h"
 +#endif
 +
 +#include <cstdlib>
 +#include <cstring>
 +
 +#include <algorithm>
 +#include <string>
 +
 +#include <boost/scoped_ptr.hpp>
 +
 +#include "gromacs/legacyheaders/futil.h"
 +#include "gromacs/legacyheaders/thread_mpi/mutex.h"
 +
 +#include "gromacs/utility/exceptions.h"
 +#include "gromacs/utility/path.h"
 +#include "gromacs/utility/stringutil.h"
 +
 +namespace gmx
 +{
 +
 +namespace
 +{
 +tMPI::mutex g_programInfoMutex;
 +//! Partially filled program info, needed to support set_program_name().
 +boost::scoped_ptr<ProgramInfo> g_partialProgramInfo;
 +boost::scoped_ptr<ProgramInfo> g_programInfo;
 +} // namespace
 +
 +/********************************************************************
 + * ProgramInfo::Impl
 + */
 +
 +class ProgramInfo::Impl
 +{
 +    public:
 +        Impl();
 +        Impl(const char *realBinaryName, int argc, const char *const argv[]);
 +
 +        std::string realBinaryName_;
 +        std::string fullInvokedProgram_;
 +        std::string programName_;
 +        std::string invariantProgramName_;
 +        std::string commandLine_;
 +};
 +
 +ProgramInfo::Impl::Impl()
 +    : realBinaryName_("GROMACS"), fullInvokedProgram_("GROMACS"),
 +      programName_("GROMACS"), invariantProgramName_("GROMACS")
 +{
 +}
 +
 +ProgramInfo::Impl::Impl(const char *realBinaryName,
 +                        int argc, const char *const argv[])
 +    : realBinaryName_(realBinaryName != NULL ? realBinaryName : ""),
 +      fullInvokedProgram_(argc != 0 ? argv[0] : ""),
-     invariantProgramName_ = stripSuffixIfPresent(invariantProgramName_, ".exe");
++      programName_(Path::splitToPathAndFilename(fullInvokedProgram_).second)
 +{
 +    // Temporary hack to make things work on Windows while waiting for #950.
 +    // Some places in the existing code expect to have DIR_SEPARATOR in all
 +    // input paths, but Windows may also give '/' (and does that, e.g., for
 +    // tests invoked through CTest).
 +    // When removing this, remove also the #include "futil.h".
 +    if (DIR_SEPARATOR == '\\')
 +    {
 +        std::replace(fullInvokedProgram_.begin(), fullInvokedProgram_.end(),
 +                     '/', '\\');
 +    }
++    programName_ = stripSuffixIfPresent(programName_, ".exe");
++    invariantProgramName_ = programName_;
 +#ifdef GMX_BINARY_SUFFIX
 +    invariantProgramName_ =
 +        stripSuffixIfPresent(invariantProgramName_, GMX_BINARY_SUFFIX);
 +#endif
 +    if (realBinaryName == NULL)
 +    {
 +        realBinaryName_ = invariantProgramName_;
 +    }
 +
 +    for (int i = 0; i < argc; ++i)
 +    {
 +        if (i > 0)
 +        {
 +            commandLine_.append(" ");
 +        }
 +        const char *arg = argv[i];
 +        bool bSpaces = (std::strchr(arg, ' ') != NULL);
 +        if (bSpaces)
 +        {
 +            commandLine_.append("'");
 +        }
 +        commandLine_.append(arg);
 +        if (bSpaces)
 +        {
 +            commandLine_.append("'");
 +        }
 +    }
 +}
 +
 +/********************************************************************
 + * ProgramInfo
 + */
 +
 +// static
 +const ProgramInfo &ProgramInfo::getInstance()
 +{
 +    tMPI::lock_guard<tMPI::mutex> lock(g_programInfoMutex);
 +    if (g_programInfo.get() == NULL)
 +    {
 +        if (g_partialProgramInfo.get() != NULL)
 +        {
 +            return *g_partialProgramInfo;
 +        }
 +        static ProgramInfo fallbackInfo;
 +        return fallbackInfo;
 +    }
 +    return *g_programInfo;
 +}
 +
 +// static
 +const ProgramInfo &ProgramInfo::init(int argc, const char *const argv[])
 +{
 +    return init(NULL, argc, argv);
 +}
 +
 +// static
 +const ProgramInfo &ProgramInfo::init(const char *realBinaryName,
 +                                     int argc, const char *const argv[])
 +{
 +    try
 +    {
 +        tMPI::lock_guard<tMPI::mutex> lock(g_programInfoMutex);
 +        if (g_programInfo.get() == NULL)
 +        {
 +            // TODO: Remove this hack with negative argc once there is no need for
 +            // set_program_name().
 +            if (argc < 0)
 +            {
 +                if (g_partialProgramInfo.get() == NULL)
 +                {
 +                    g_partialProgramInfo.reset(
 +                            new ProgramInfo(realBinaryName, -argc, argv));
 +                }
 +                return *g_partialProgramInfo;
 +            }
 +            g_programInfo.reset(new ProgramInfo(realBinaryName, argc, argv));
 +        }
 +        return *g_programInfo;
 +    }
 +    catch (const std::exception &ex)
 +    {
 +        printFatalErrorMessage(stderr, ex);
 +        std::exit(1);
 +    }
 +}
 +
 +ProgramInfo::ProgramInfo()
 +    : impl_(new Impl)
 +{
 +}
 +
 +ProgramInfo::ProgramInfo(const char *realBinaryName)
 +    : impl_(new Impl(realBinaryName, 1, &realBinaryName))
 +{
 +}
 +
 +ProgramInfo::ProgramInfo(int argc, const char *const argv[])
 +    : impl_(new Impl(NULL, argc, argv))
 +{
 +}
 +
 +ProgramInfo::ProgramInfo(const char *realBinaryName,
 +                         int argc, const char *const argv[])
 +    : impl_(new Impl(realBinaryName, argc, argv))
 +{
 +}
 +
 +ProgramInfo::~ProgramInfo()
 +{
 +}
 +
 +const std::string &ProgramInfo::realBinaryName() const
 +{
 +    return impl_->realBinaryName_;
 +}
 +
 +const std::string &ProgramInfo::programNameWithPath() const
 +{
 +    return impl_->fullInvokedProgram_;
 +}
 +
 +const std::string &ProgramInfo::programName() const
 +{
 +    return impl_->programName_;
 +}
 +
 +const std::string &ProgramInfo::invariantProgramName() const
 +{
 +    return impl_->invariantProgramName_;
 +}
 +
 +const std::string &ProgramInfo::commandLine() const
 +{
 +    return impl_->commandLine_;
 +}
 +
 +} // namespace gmx
index fbf185b9a5f679a350b1b698b012a07a1b6d671e,d5e7fbddbcf184f4e4e5307e5190e7b6ffb43f03..af42da368c113e336365358f64339bafba1aa63c
@@@ -19,7 -19,8 +19,8 @@@ set(NGMX_PROGRAM
  
  foreach(PROG ${NGMX_PROGRAMS})
          add_executable(${PROG} ${PROG}.c ${NGMX_COMMON_SOURCE})
 -        target_link_libraries(${PROG} gmx ${GMX_EXTRA_LIBRARIES} ${X11_LIBRARIES})        
 +        target_link_libraries(${PROG} libgromacs ${GMX_EXTRA_LIBRARIES} ${X11_LIBRARIES})        
+         gmx_add_man_page(${PROG})
          set_target_properties(${PROG} PROPERTIES OUTPUT_NAME "${PROG}${GMX_BINARY_SUFFIX}")
  endforeach(PROG) 
  
index fd296b0824e8c8238d9e59c8e83f9c1eab8587a8,0000000000000000000000000000000000000000..60645be5e1325080532be8dde56aa934b8e18628
mode 100644,000000..100644
--- /dev/null
@@@ -1,8 -1,0 +1,9 @@@
 +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
 +
 +file(GLOB PROTONATE_SOURCES g_protonate.c)
 +
 +add_executable(g_protonate ${PROTONATE_SOURCES})
++gmx_add_man_page(g_protonate)
 +target_link_libraries(g_protonate ${GMX_EXTRA_LIBRARIES} libgromacs)
 +set_target_properties(g_protonate PROPERTIES OUTPUT_NAME "g_protonate${GMX_BINARY_SUFFIX}")
 +install(TARGETS g_protonate DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
index 58a639d918e6b19f9f3a17de5cce6a422fac35d9,0000000000000000000000000000000000000000..0e5549620076f9e7a8c52287ac9fd9b5be4d5353
mode 100644,000000..100644
--- /dev/null
@@@ -1,8 -1,0 +1,9 @@@
 +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
 +
 +file(GLOB X2TOP_SOURCES g_x2top.c nm2type.c)
 +
 +add_executable(g_x2top ${X2TOP_SOURCES})
++gmx_add_man_page(g_x2top)
 +target_link_libraries(g_x2top ${GMX_EXTRA_LIBRARIES} libgromacs)
 +set_target_properties(g_x2top PROPERTIES OUTPUT_NAME "g_x2top${GMX_BINARY_SUFFIX}")
 +install(TARGETS g_x2top DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
Simple merge
index a88639bb54dd48df3ac8f1172fb03bcd001608d8,0000000000000000000000000000000000000000..8a38a59a63b25babf322b859cbef3335aeda5751
mode 100644,000000..100644
--- /dev/null
@@@ -1,6 -1,0 +1,7 @@@
 +file(GLOB GMXCHECK_SOURCES gmxcheck.c tpbcmp.c)
 +
 +add_executable(gmxcheck ${GMXCHECK_SOURCES})
++gmx_add_man_page(gmxcheck)
 +target_link_libraries(gmxcheck ${GMX_EXTRA_LIBRARIES} libgromacs)
 +set_target_properties(gmxcheck PROPERTIES OUTPUT_NAME "gmxcheck${GMX_BINARY_SUFFIX}")
 +install(TARGETS gmxcheck DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
Simple merge
index 76225fed82ededd0c07335674f855d0b466031c6,0000000000000000000000000000000000000000..9ce84757d39a590b5475ba25518898a349428f7a
mode 100644,000000..100644
--- /dev/null
@@@ -1,6 -1,0 +1,7 @@@
 +file(GLOB GMXDUMP_SOURCES gmxdump.c)
 +
 +add_executable(gmxdump ${GMXDUMP_SOURCES})
++gmx_add_man_page(gmxdump)
 +target_link_libraries(gmxdump ${GMX_EXTRA_LIBRARIES} libgromacs)
 +set_target_properties(gmxdump PROPERTIES OUTPUT_NAME "gmxdump${GMX_BINARY_SUFFIX}")
 +install(TARGETS gmxdump DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
index 70869e426aefb69773e8cc242acec8438fac9fad,0000000000000000000000000000000000000000..d8a437cffe23cce55ebb9aa6b4cb93b821fd364c
mode 100644,000000..100644
--- /dev/null
@@@ -1,8 -1,0 +1,9 @@@
 +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
 +
 +file(GLOB GROMPP_SOURCES *.c)
 +
 +add_executable(grompp ${GROMPP_SOURCES})
++gmx_add_man_page(grompp)
 +target_link_libraries(grompp ${GMX_EXTRA_LIBRARIES} libgromacs)
 +set_target_properties(grompp PROPERTIES OUTPUT_NAME "grompp${GMX_BINARY_SUFFIX}")
 +install(TARGETS grompp DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
Simple merge
index 774b6f6749d9af850075c08b802b0460994caec1,0000000000000000000000000000000000000000..14d94b50c5de307a541c860c78dc1ae1a9c46b6b
mode 100644,000000..100644
--- /dev/null
@@@ -1,50 -1,0 +1,51 @@@
 +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
 +
 +set(MDRUN_SOURCES
 +    do_gct.c    gctio.c         genalg.c    ionize.c
 +    md.c        md_openmm.c     mdrun.c     membed.c
 +    repl_ex.c   runner.c        xutils.c)
 +
 +if(GMX_OPENMM) 
 +    add_subdirectory(gmx_gpu_utils)
 +    include_directories(./gmx_gpu_utils ${OpenMM_INCLUDE_DIR})
 +    link_directories(${OpenMM_LIBRARY_DIR}) 
 +    # with this define no evn.var. is needed with OPENMM_PLUGIN_DIR
 +    # if the same OpenMM installation is used for running and building 
 +    add_definitions( -DOPENMM_PLUGIN_DIR="${OpenMM_PLUGIN_DIR}" ) 
 +    file(TO_CMAKE_PATH ${OpenMM_PLUGIN_DIR} _path)
 +    add_library(openmm_api_wrapper STATIC openmm_wrapper.cpp)
 +    target_link_libraries(openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})
 +    set(GMX_OPENMM_LIBRARIES openmm_api_wrapper gmx_gpu_utils ${OpenMM_LIBRARIES})   
 +endif(GMX_OPENMM)
 +
 +if(GMX_FAHCORE)
 +    add_library(fahcore ${MDRUN_SOURCES})
 +else(GMX_FAHCORE)
 +    add_executable(mdrun ${MDRUN_SOURCES})
++    gmx_add_man_page(mdrun)
 +    target_link_libraries(mdrun ${GMX_EXTRA_LIBRARIES} libgromacs ${GMX_OPENMM_LIBRARIES})
 +    set_target_properties(mdrun PROPERTIES OUTPUT_NAME "mdrun${GMX_BINARY_SUFFIX}")
 +    install(TARGETS mdrun DESTINATION ${BIN_INSTALL_DIR} COMPONENT mdrun)
 +
 +    if(GMX_OPENMM AND MSVC)
 +        set_target_properties(mdrun PROPERTIES LINK_FLAGS "/NODEFAULTLIB:LIBCMT")
 +    endif()
 +
 +    # Create the custom install-mdrun target
 +    if (BUILD_SHARED_LIBS)
 +        # If shared libraries are used, we need to install the libraries in
 +        # addition to the mdrun binary.
 +        add_custom_target(install-mdrun
 +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=libraries
 +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
 +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
 +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
 +            COMMENT "Installing mdrun")
 +    else (BUILD_SHARED_LIBS)
 +        add_custom_target(install-mdrun
 +            COMMAND ${CMAKE_COMMAND} -DCOMPONENT=mdrun
 +                    -P ${CMAKE_BINARY_DIR}/cmake_install.cmake
 +            COMMENT "Installing mdrun")
 +    endif (BUILD_SHARED_LIBS)
 +    add_dependencies(install-mdrun mdrun)
 +endif(GMX_FAHCORE)
index ad22f7194eed216326ae0bcff151167c7116d007,0000000000000000000000000000000000000000..f5b862d0cf17ed7b805663b6d1a75edf1a735a5c
mode 100644,000000..100644
--- /dev/null
@@@ -1,984 -1,0 +1,982 @@@
- #ifdef GMX_OPENMP
- #include <omp.h>
- #endif
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#ifdef __linux
 +#define _GNU_SOURCE
 +#include <sched.h>
 +#include <sys/syscall.h>
 +#endif
 +#include <signal.h>
 +#include <stdlib.h>
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "statutil.h"
 +#include "mdrun.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "pull_rotation.h"
 +#include "names.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "tpxio.h"
 +#include "txtdump.h"
 +#include "pull_rotation.h"
 +#include "membed.h"
 +#include "macros.h"
 +
++#include "gmx_omp.h"
++
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +#ifdef GMX_OPENMM
 +#include "md_openmm.h"
 +#endif
 +
-                 core+=omp_get_thread_num();
 +
 +typedef struct { 
 +    gmx_integrator_t *func;
 +} gmx_intp_t;
 +
 +/* The array should match the eI array in include/types/enums.h */
 +#ifdef GMX_OPENMM  /* FIXME do_md_openmm needs fixing */
 +const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
 +#else
 +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
 +#endif
 +
 +gmx_large_int_t     deform_init_init_step_tpx;
 +matrix              deform_init_box_tpx;
 +#ifdef GMX_THREAD_MPI
 +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +#ifdef GMX_THREAD_MPI
 +struct mdrunner_arglist
 +{
 +    FILE *fplog;
 +    t_commrec *cr;
 +    int nfile;
 +    const t_filenm *fnm;
 +    output_env_t oenv;
 +    gmx_bool bVerbose;
 +    gmx_bool bCompact;
 +    int nstglobalcomm;
 +    ivec ddxyz;
 +    int dd_node_order;
 +    real rdd;
 +    real rconstr;
 +    const char *dddlb_opt;
 +    real dlb_scale;
 +    const char *ddcsx;
 +    const char *ddcsy;
 +    const char *ddcsz;
 +    int nstepout;
 +    int resetstep;
 +    int nmultisim;
 +    int repl_ex_nst;
 +    int repl_ex_nex;
 +    int repl_ex_seed;
 +    real pforce;
 +    real cpt_period;
 +    real max_hours;
 +    const char *deviceOptions;
 +    unsigned long Flags;
 +    int ret; /* return value */
 +};
 +
 +
 +/* The function used for spawning threads. Extracts the mdrunner() 
 +   arguments from its one argument and calls mdrunner(), after making
 +   a commrec. */
 +static void mdrunner_start_fn(void *arg)
 +{
 +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
 +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
 +                                        that it's thread-local. This doesn't
 +                                        copy pointed-to items, of course,
 +                                        but those are all const. */
 +    t_commrec *cr;  /* we need a local version of this */
 +    FILE *fplog=NULL;
 +    t_filenm *fnm;
 +
 +    fnm = dup_tfn(mc.nfile, mc.fnm);
 +
 +    cr = init_par_threads(mc.cr);
 +
 +    if (MASTER(cr))
 +    {
 +        fplog=mc.fplog;
 +    }
 +
 +    mda->ret=mdrunner(cr->nnodes, fplog, cr, mc.nfile, fnm, mc.oenv, 
 +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
 +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
 +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
 +                      mc.ddcsx, mc.ddcsy, mc.ddcsz, mc.nstepout, mc.resetstep, 
 +                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
 +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
 +}
 +
 +/* called by mdrunner() to start a specific number of threads (including 
 +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
 +   for each thread. 
 +   All options besides nthreads are the same as for mdrunner(). */
 +static t_commrec *mdrunner_start_threads(int nthreads, 
 +              FILE *fplog,t_commrec *cr,int nfile, 
 +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +              gmx_bool bCompact, int nstglobalcomm,
 +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +              const char *dddlb_opt,real dlb_scale,
 +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +              int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
 +              int repl_ex_nex, int repl_ex_seed, real pforce,real cpt_period, real max_hours,
 +              const char *deviceOptions, unsigned long Flags)
 +{
 +    int ret;
 +    struct mdrunner_arglist *mda;
 +    t_commrec *crn; /* the new commrec */
 +    t_filenm *fnmn;
 +
 +    /* first check whether we even need to start tMPI */
 +    if (nthreads<2)
 +        return cr;
 +
 +    /* a few small, one-time, almost unavoidable memory leaks: */
 +    snew(mda,1);
 +    fnmn=dup_tfn(nfile, fnm);
 +
 +    /* fill the data structure to pass as void pointer to thread start fn */
 +    mda->fplog=fplog;
 +    mda->cr=cr;
 +    mda->nfile=nfile;
 +    mda->fnm=fnmn;
 +    mda->oenv=oenv;
 +    mda->bVerbose=bVerbose;
 +    mda->bCompact=bCompact;
 +    mda->nstglobalcomm=nstglobalcomm;
 +    mda->ddxyz[XX]=ddxyz[XX];
 +    mda->ddxyz[YY]=ddxyz[YY];
 +    mda->ddxyz[ZZ]=ddxyz[ZZ];
 +    mda->dd_node_order=dd_node_order;
 +    mda->rdd=rdd;
 +    mda->rconstr=rconstr;
 +    mda->dddlb_opt=dddlb_opt;
 +    mda->dlb_scale=dlb_scale;
 +    mda->ddcsx=ddcsx;
 +    mda->ddcsy=ddcsy;
 +    mda->ddcsz=ddcsz;
 +    mda->nstepout=nstepout;
 +    mda->resetstep=resetstep;
 +    mda->nmultisim=nmultisim;
 +    mda->repl_ex_nst=repl_ex_nst;
 +    mda->repl_ex_nex=repl_ex_nex;
 +    mda->repl_ex_seed=repl_ex_seed;
 +    mda->pforce=pforce;
 +    mda->cpt_period=cpt_period;
 +    mda->max_hours=max_hours;
 +    mda->deviceOptions=deviceOptions;
 +    mda->Flags=Flags;
 +
 +    fprintf(stderr, "Starting %d threads\n",nthreads);
 +    fflush(stderr);
 +    /* now spawn new threads that start mdrunner_start_fn(), while 
 +       the main thread returns */
 +    ret=tMPI_Init_fn(TRUE, nthreads, mdrunner_start_fn, (void*)(mda) );
 +    if (ret!=TMPI_SUCCESS)
 +        return NULL;
 +
 +    /* make a new comm_rec to reflect the new situation */
 +    crn=init_par_threads(cr);
 +    return crn;
 +}
 +
 +
 +/* Get the number of threads to use for thread-MPI based on how many
 + * were requested, which algorithms we're using,
 + * and how many particles there are.
 + */
 +static int get_nthreads_mpi(int nthreads_requested, t_inputrec *inputrec,
 +                            gmx_mtop_t *mtop)
 +{
 +    int nthreads,nthreads_new;
 +    int min_atoms_per_thread;
 +    char *env;
 +
 +    nthreads = nthreads_requested;
 +
 +    /* determine # of hardware threads. */
 +    if (nthreads_requested < 1)
 +    {
 +        if ((env = getenv("GMX_MAX_THREADS")) != NULL)
 +        {
 +            nthreads = 0;
 +            sscanf(env,"%d",&nthreads);
 +            if (nthreads < 1)
 +            {
 +                gmx_fatal(FARGS,"GMX_MAX_THREADS (%d) should be larger than 0",
 +                          nthreads);
 +            }
 +        }
 +        else
 +        {
 +            nthreads = tMPI_Thread_get_hw_number();
 +        }
 +    }
 +
 +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
 +    {
 +        /* Steps are divided over the nodes iso splitting the atoms */
 +        min_atoms_per_thread = 0;
 +    }
 +    else
 +    {
 +        min_atoms_per_thread = MIN_ATOMS_PER_THREAD;
 +    }
 +
 +    /* Check if an algorithm does not support parallel simulation.  */
 +    if (nthreads != 1 && 
 +        ( inputrec->eI == eiLBFGS ||
 +          inputrec->coulombtype == eelEWALD ) )
 +    {
 +        fprintf(stderr,"\nThe integration or electrostatics algorithm doesn't support parallel runs. Not starting any threads.\n");
 +        nthreads = 1;
 +    }
 +    else if (nthreads_requested < 1 &&
 +             mtop->natoms/nthreads < min_atoms_per_thread)
 +    {
 +        /* the thread number was chosen automatically, but there are too many
 +           threads (too few atoms per thread) */
 +        nthreads_new = max(1,mtop->natoms/min_atoms_per_thread);
 +
 +        if (nthreads_new > 8 || (nthreads == 8 && nthreads_new > 4))
 +        {
 +            /* Use only multiples of 4 above 8 threads
 +             * or with an 8-core processor
 +             * (to avoid 6 threads on 8 core processors with 4 real cores).
 +             */
 +            nthreads_new = (nthreads_new/4)*4;
 +        }
 +        else if (nthreads_new > 4)
 +        {
 +            /* Avoid 5 or 7 threads */
 +            nthreads_new = (nthreads_new/2)*2;
 +        }
 +
 +        nthreads = nthreads_new;
 +
 +        fprintf(stderr,"\n");
 +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
 +        fprintf(stderr,"      only starting %d threads.\n",nthreads);
 +        fprintf(stderr,"      You can use the -nt option to optimize the number of threads.\n\n");
 +    }
 +    return nthreads;
 +}
 +#endif
 +
 +
 +int mdrunner(int nthreads_requested, FILE *fplog,t_commrec *cr,int nfile,
 +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
 +             gmx_bool bCompact, int nstglobalcomm,
 +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
 +             const char *dddlb_opt,real dlb_scale,
 +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
 +             int nstepout,int resetstep,int nmultisim, int repl_ex_nst, int repl_ex_nex,
 +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
 +             const char *deviceOptions, unsigned long Flags)
 +{
 +    double     nodetime=0,realtime;
 +    t_inputrec *inputrec;
 +    t_state    *state=NULL;
 +    matrix     box;
 +    gmx_ddbox_t ddbox={0};
 +    int        npme_major,npme_minor;
 +    real       tmpr1,tmpr2;
 +    t_nrnb     *nrnb;
 +    gmx_mtop_t *mtop=NULL;
 +    t_mdatoms  *mdatoms=NULL;
 +    t_forcerec *fr=NULL;
 +    t_fcdata   *fcd=NULL;
 +    real       ewaldcoeff=0;
 +    gmx_pme_t  *pmedata=NULL;
 +    gmx_vsite_t *vsite=NULL;
 +    gmx_constr_t constr;
 +    int        i,m,nChargePerturbed=-1,status,nalloc;
 +    char       *gro;
 +    gmx_wallcycle_t wcycle;
 +    gmx_bool       bReadRNG,bReadEkin;
 +    int        list;
 +    gmx_runtime_t runtime;
 +    int        rc;
 +    gmx_large_int_t reset_counters;
 +    gmx_edsam_t ed=NULL;
 +    t_commrec   *cr_old=cr; 
 +    int         nthreads_mpi=1;
 +    int         nthreads_pme=1;
 +    gmx_membed_t membed=NULL;
 +
 +    /* CAUTION: threads may be started later on in this function, so
 +       cr doesn't reflect the final parallel state right now */
 +    snew(inputrec,1);
 +    snew(mtop,1);
 +
 +    if (bVerbose && SIMMASTER(cr))
 +    {
 +        fprintf(stderr,"Getting Loaded...\n");
 +    }
 +    
 +    if (Flags & MD_APPENDFILES) 
 +    {
 +        fplog = NULL;
 +    }
 +
 +    snew(state,1);
 +    if (MASTER(cr)) 
 +    {
 +        /* Read (nearly) all data required for the simulation */
 +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
 +
 +        /* NOW the threads will be started: */
 +#ifdef GMX_THREAD_MPI
 +        nthreads_mpi = get_nthreads_mpi(nthreads_requested, inputrec, mtop);
 +
 +        if (nthreads_mpi > 1)
 +        {
 +            /* now start the threads. */
 +            cr=mdrunner_start_threads(nthreads_mpi, fplog, cr_old, nfile, fnm,
 +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
 +                                      ddxyz, dd_node_order, rdd, rconstr, 
 +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
 +                                      nstepout, resetstep, nmultisim, 
 +                                      repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
 +                                      cpt_period, max_hours, deviceOptions, 
 +                                      Flags);
 +            /* the main thread continues here with a new cr. We don't deallocate
 +               the old cr because other threads may still be reading it. */
 +            if (cr == NULL)
 +            {
 +                gmx_comm("Failed to spawn threads");
 +            }
 +        }
 +#endif
 +    }
 +    /* END OF CAUTION: cr is now reliable */
 +
 +    /* g_membed initialisation *
 +     * Because we change the mtop, init_membed is called before the init_parallel *
 +     * (in case we ever want to make it run in parallel) */
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        if (MASTER(cr))
 +        {
 +            fprintf(stderr,"Initializing membed");
 +        }
 +        membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* now broadcast everything to the non-master nodes/threads: */
 +        init_parallel(fplog, cr, inputrec, mtop);
 +    }
 +    if (fplog != NULL)
 +    {
 +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
 +    }
 +
 +    /* now make sure the state is initialized and propagated */
 +    set_state_entries(state,inputrec,cr->nnodes);
 +
 +    /* remove when vv and rerun works correctly! */
 +    if (PAR(cr) && EI_VV(inputrec->eI) && ((Flags & MD_RERUN) || (Flags & MD_RERUN_VSITE)))
 +    {
 +        gmx_fatal(FARGS,
 +                  "Currently can't do velocity verlet with rerun in parallel.");
 +    }
 +
 +    /* A parallel command line option consistency check that we can
 +       only do after any threads have started. */
 +    if (!PAR(cr) &&
 +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
 +    {
 +        gmx_fatal(FARGS,
 +                  "The -dd or -npme option request a parallel simulation, "
 +#ifndef GMX_MPI
 +                  "but mdrun was compiled without threads or MPI enabled"
 +#else
 +#ifdef GMX_THREAD_MPI
 +                  "but the number of threads (option -nt) is 1"
 +#else
 +                  "but mdrun was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec" 
 +#endif
 +#endif
 +            );
 +    }
 +
 +    if ((Flags & MD_RERUN) &&
 +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
 +    {
 +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
 +    }
 +
 +    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog))
 +    {
 +        /* All-vs-all loops do not work with domain decomposition */
 +        Flags |= MD_PARTDEC;
 +    }
 +
 +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
 +    {
 +        if (cr->npmenodes > 0)
 +        {
 +            if (!EEL_PME(inputrec->coulombtype))
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but the system does not use PME electrostatics");
 +            }
 +            if (Flags & MD_PARTDEC)
 +            {
 +                gmx_fatal_collective(FARGS,cr,NULL,
 +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
 +            }
 +        }
 +
 +        cr->npmenodes = 0;
 +    }
 +
 +#ifdef GMX_FAHCORE
 +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
 +#endif
 +
 +    /* NMR restraints must be initialized before load_checkpoint,
 +     * since with time averaging the history is added to t_state.
 +     * For proper consistency check we therefore need to extend
 +     * t_state here.
 +     * So the PME-only nodes (if present) will also initialize
 +     * the distance restraints.
 +     */
 +    snew(fcd,1);
 +
 +    /* This needs to be called before read_checkpoint to extend the state */
 +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
 +
 +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
 +    {
 +        if (PAR(cr) && !(Flags & MD_PARTDEC))
 +        {
 +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
 +        }
 +        /* Orientation restraints */
 +        if (MASTER(cr))
 +        {
 +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
 +                        state);
 +        }
 +    }
 +
 +    if (DEFORM(*inputrec))
 +    {
 +        /* Store the deform reference box before reading the checkpoint */
 +        if (SIMMASTER(cr))
 +        {
 +            copy_mat(state->box,box);
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast(sizeof(box),box,cr);
 +        }
 +        /* Because we do not have the update struct available yet
 +         * in which the reference values should be stored,
 +         * we store them temporarily in static variables.
 +         * This should be thread safe, since they are only written once
 +         * and with identical values.
 +         */
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        deform_init_init_step_tpx = inputrec->init_step;
 +        copy_mat(box,deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    if (opt2bSet("-cpi",nfile,fnm)) 
 +    {
 +        /* Check if checkpoint file exists before doing continuation.
 +         * This way we can use identical input options for the first and subsequent runs...
 +         */
 +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
 +        {
 +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
 +                            cr,Flags & MD_PARTDEC,ddxyz,
 +                            inputrec,state,&bReadRNG,&bReadEkin,
 +                            (Flags & MD_APPENDFILES),
 +                            (Flags & MD_APPENDFILESSET));
 +            
 +            if (bReadRNG)
 +            {
 +                Flags |= MD_READ_RNG;
 +            }
 +            if (bReadEkin)
 +            {
 +                Flags |= MD_READ_EKIN;
 +            }
 +        }
 +    }
 +
 +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI only the master node/thread exists in mdrun.c,
 +         * therefore non-master nodes need to open the "seppot" log file here.
 +         */
 +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
 +#endif
 +        )
 +    {
 +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
 +                             Flags,&fplog);
 +    }
 +
 +    if (SIMMASTER(cr)) 
 +    {
 +        copy_mat(state->box,box);
 +    }
 +
 +    if (PAR(cr)) 
 +    {
 +        gmx_bcast(sizeof(box),box,cr);
 +    }
 +
 +    /* Essential dynamics */
 +    if (opt2bSet("-ei",nfile,fnm))
 +    {
 +        /* Open input and output files, allocate space for ED data structure */
 +        ed = ed_open(nfile,fnm,Flags,cr);
 +    }
 +
 +    if (bVerbose && SIMMASTER(cr))
 +    {
 +        fprintf(stderr,"Loaded with Money\n\n");
 +    }
 +
 +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
 +                     EI_TPI(inputrec->eI) ||
 +                     inputrec->eI == eiNM))
 +    {
 +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
 +                                           dddlb_opt,dlb_scale,
 +                                           ddcsx,ddcsy,ddcsz,
 +                                           mtop,inputrec,
 +                                           box,state->x,
 +                                           &ddbox,&npme_major,&npme_minor);
 +
 +        make_dd_communicators(fplog,cr,dd_node_order);
 +
 +        /* Set overallocation to avoid frequent reallocation of arrays */
 +        set_over_alloc_dd(TRUE);
 +    }
 +    else
 +    {
 +        /* PME, if used, is done on all nodes with 1D decomposition */
 +        cr->npmenodes = 0;
 +        cr->duty = (DUTY_PP | DUTY_PME);
 +        npme_major = 1;
 +        npme_minor = 1;
 +        if (!EI_TPI(inputrec->eI))
 +        {
 +            npme_major = cr->nnodes;
 +        }
 +        
 +        if (inputrec->ePBC == epbcSCREW)
 +        {
 +            gmx_fatal(FARGS,
 +                      "pbc=%s is only implemented with domain decomposition",
 +                      epbc_names[inputrec->ePBC]);
 +        }
 +    }
 +
 +    if (PAR(cr))
 +    {
 +        /* After possible communicator splitting in make_dd_communicators.
 +         * we can set up the intra/inter node communication.
 +         */
 +        gmx_setup_nodecomm(fplog,cr);
 +    }
 +
 +    /* get number of OpenMP/PME threads
 +     * env variable should be read only on one node to make sure it is identical everywhere */
 +#ifdef GMX_OPENMP
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (MASTER(cr))
 +        {
 +            char *ptr;
 +            if ((ptr=getenv("GMX_PME_NTHREADS")) != NULL)
 +            {
 +                sscanf(ptr,"%d",&nthreads_pme);
 +            }
 +            if (fplog != NULL && nthreads_pme > 1)
 +            {
 +                fprintf(fplog,"Using %d threads for PME\n",nthreads_pme);
 +            }
 +        }
 +        if (PAR(cr))
 +        {
 +            gmx_bcast_sim(sizeof(nthreads_pme),&nthreads_pme,cr);
 +        }
 +    }
 +#endif
 +
 +    wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pme);
 +    if (PAR(cr))
 +    {
 +        /* Master synchronizes its value of reset_counters with all nodes 
 +         * including PME only nodes */
 +        reset_counters = wcycle_get_reset_counters(wcycle);
 +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
 +        wcycle_set_reset_counters(wcycle, reset_counters);
 +    }
 +
 +
 +    snew(nrnb,1);
 +    if (cr->duty & DUTY_PP)
 +    {
 +        /* For domain decomposition we allocate dynamically
 +         * in dd_partition_system.
 +         */
 +        if (DOMAINDECOMP(cr))
 +        {
 +            bcast_state_setup(cr,state);
 +        }
 +        else
 +        {
 +            if (PAR(cr))
 +            {
 +                bcast_state(cr,state,TRUE);
 +            }
 +        }
 +
 +        /* Initiate forcerecord */
 +        fr = mk_forcerec();
 +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +                      opt2fn("-table",nfile,fnm),
 +                      opt2fn("-tabletf",nfile,fnm),
 +                      opt2fn("-tablep",nfile,fnm),
 +                      opt2fn("-tableb",nfile,fnm),FALSE,pforce);
 +
 +        /* version for PCA_NOT_READ_NODE (see md.c) */
 +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
 +          "nofile","nofile","nofile","nofile",FALSE,pforce);
 +          */        
 +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
 +
 +        /* Initialize QM-MM */
 +        if(fr->bQMMM)
 +        {
 +            init_QMMMrec(cr,box,mtop,inputrec,fr);
 +        }
 +
 +        /* Initialize the mdatoms structure.
 +         * mdatoms is not filled with atom data,
 +         * as this can not be done now with domain decomposition.
 +         */
 +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
 +
 +        /* Initialize the virtual site communication */
 +        vsite = init_vsite(mtop,cr);
 +
 +        calc_shifts(box,fr->shift_vec);
 +
 +        /* With periodic molecules the charge groups should be whole at start up
 +         * and the virtual sites should not be far from their proper positions.
 +         */
 +        if (!inputrec->bContinuation && MASTER(cr) &&
 +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
 +        {
 +            /* Make molecules whole at start of run */
 +            if (fr->ePBC != epbcNONE)
 +            {
 +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
 +            }
 +            if (vsite)
 +            {
 +                /* Correct initial vsite positions are required
 +                 * for the initial distribution in the domain decomposition
 +                 * and for the initial shell prediction.
 +                 */
 +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
 +            }
 +        }
 +
 +        if (EEL_PME(fr->eeltype))
 +        {
 +            ewaldcoeff = fr->ewaldcoeff;
 +            pmedata = &fr->pmedata;
 +        }
 +        else
 +        {
 +            pmedata = NULL;
 +        }
 +    }
 +    else
 +    {
 +        /* This is a PME only node */
 +
 +        /* We don't need the state */
 +        done_state(state);
 +
 +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
 +        snew(pmedata,1);
 +    }
 +
 +    /* Initiate PME if necessary,
 +     * either on all nodes or on dedicated PME nodes only. */
 +    if (EEL_PME(inputrec->coulombtype))
 +    {
 +        if (mdatoms)
 +        {
 +            nChargePerturbed = mdatoms->nChargePerturbed;
 +        }
 +        if (cr->npmenodes > 0)
 +        {
 +            /* The PME only nodes need to know nChargePerturbed */
 +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
 +        }
 +
 +
 +        /* Set CPU affinity. Can be important for performance.
 +           On some systems (e.g. Cray) CPU Affinity is set by default.
 +           But default assigning doesn't work (well) with only some ranks
 +           having threads. This causes very low performance.
 +           External tools have cumbersome syntax for setting affinity
 +           in the case that only some ranks have threads.
 +           Thus it is important that GROMACS sets the affinity internally at
 +           if only PME is using threads.
 +        */
 +
 +#ifdef GMX_OPENMP
 +#ifdef __linux
 +#ifdef GMX_LIB_MPI
 +        {
 +            int core;
 +            MPI_Comm comm_intra; /* intra communicator (but different to nc.comm_intra includes PME nodes) */
 +            MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),gmx_node_rank(),&comm_intra);
 +            int local_omp_nthreads = (cr->duty & DUTY_PME) ? nthreads_pme : 1; /* threads on this node */
 +            MPI_Scan(&local_omp_nthreads,&core, 1, MPI_INT, MPI_SUM, comm_intra);
 +            core-=local_omp_nthreads; /* make exclusive scan */
 +#pragma omp parallel firstprivate(core) num_threads(local_omp_nthreads)
 +            {
 +                cpu_set_t mask;
 +                CPU_ZERO(&mask);
++                core+=gmx_omp_get_thread_num();
 +                CPU_SET(core,&mask);
 +                sched_setaffinity((pid_t) syscall (SYS_gettid),sizeof(cpu_set_t),&mask);
 +            }
 +        }
 +#endif /*GMX_MPI*/
 +#endif /*__linux*/
 +#endif /*GMX_OPENMP*/
 +
 +        if (cr->duty & DUTY_PME)
 +        {
 +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
 +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
 +                                  (Flags & MD_REPRODUCIBLE),nthreads_pme);
 +            if (status != 0) 
 +            {
 +                gmx_fatal(FARGS,"Error %d initializing PME",status);
 +            }
 +        }
 +    }
 +
 +
 +    if (integrator[inputrec->eI].func == do_md
 +#ifdef GMX_OPENMM
 +        ||
 +        integrator[inputrec->eI].func == do_md_openmm
 +#endif
 +        )
 +    {
 +        /* Turn on signal handling on all nodes */
 +        /*
 +         * (A user signal from the PME nodes (if any)
 +         * is communicated to the PP nodes.
 +         */
 +        signal_handler_install();
 +    }
 +
 +    if (cr->duty & DUTY_PP)
 +    {
 +        if (inputrec->ePull != epullNO)
 +        {
 +            /* Initialize pull code */
 +            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
 +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +           /* Initialize enforced rotation code */
 +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,state->box,mtop,oenv,
 +                    bVerbose,Flags);
 +        }
 +
 +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
 +
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
 +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
 +
 +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
 +
 +            setup_dd_grid(fplog,cr->dd);
 +        }
 +
 +        /* Now do whatever the user wants us to do (how flexible...) */
 +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
 +                                      oenv,bVerbose,bCompact,
 +                                      nstglobalcomm,
 +                                      vsite,constr,
 +                                      nstepout,inputrec,mtop,
 +                                      fcd,state,
 +                                      mdatoms,nrnb,wcycle,ed,fr,
 +                                      repl_ex_nst,repl_ex_nex,repl_ex_seed,
 +                                      membed,
 +                                      cpt_period,max_hours,
 +                                      deviceOptions,
 +                                      Flags,
 +                                      &runtime);
 +
 +        if (inputrec->ePull != epullNO)
 +        {
 +            finish_pull(fplog,inputrec->pull);
 +        }
 +        
 +        if (inputrec->bRot)
 +        {
 +            finish_rot(fplog,inputrec->rot);
 +        }
 +
 +    } 
 +    else 
 +    {
 +        /* do PME only */
 +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
 +    }
 +
 +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
 +    {
 +        /* Some timing stats */  
 +        if (SIMMASTER(cr))
 +        {
 +            if (runtime.proc == 0)
 +            {
 +                runtime.proc = runtime.real;
 +            }
 +        }
 +        else
 +        {
 +            runtime.real = 0;
 +        }
 +    }
 +
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    /* Finish up, write some stuff
 +     * if rerunMD, don't write last frame again 
 +     */
 +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
 +               inputrec,nrnb,wcycle,&runtime,
 +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
 +
 +    if (opt2bSet("-membed",nfile,fnm))
 +    {
 +        sfree(membed);
 +    }
 +
 +    /* Does what it says */  
 +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
 +
 +    /* Close logfile already here if we were appending to it */
 +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
 +    {
 +        gmx_log_close(fplog);
 +    } 
 +
 +    rc=(int)gmx_get_stop_condition();
 +
 +#ifdef GMX_THREAD_MPI
 +    /* we need to join all threads. The sub-threads join when they
 +       exit this function, but the master thread needs to be told to 
 +       wait for that. */
 +    if (PAR(cr) && MASTER(cr))
 +    {
 +        tMPI_Finalize();
 +    }
 +#endif
 +
 +    return rc;
 +}
index a90ce45915327f832f4f132a4d1aaf1ac78d2e1c,0000000000000000000000000000000000000000..0c5bd2f6bd474e87fd6944f84185f642e855a2be
mode 100644,000000..100644
--- /dev/null
@@@ -1,8 -1,0 +1,9 @@@
 +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
 +
 +file(GLOB PDB2GMX_SOURCES *.c)
 +
 +add_executable(pdb2gmx ${PDB2GMX_SOURCES})
++gmx_add_man_page(pdb2gmx)
 +target_link_libraries(pdb2gmx ${GMX_EXTRA_LIBRARIES} libgromacs)
 +set_target_properties(pdb2gmx PROPERTIES OUTPUT_NAME "pdb2gmx${GMX_BINARY_SUFFIX}")
 +install(TARGETS pdb2gmx DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
index 323225f7f5e6ca405c38a7c853499c9d8d077176,0000000000000000000000000000000000000000..bf63c8d0d627c62e2cb401494a92c54b8009d5c5
mode 100644,000000..100644
--- /dev/null
@@@ -1,8 -1,0 +1,9 @@@
 +include_directories(${CMAKE_SOURCE_DIR}/src/gromacs/gmxpreprocess)
 +
 +file(GLOB TPBCONV_SOURCES tpbconv.c)
 +
 +add_executable(tpbconv ${TPBCONV_SOURCES})
++gmx_add_man_page(tpbconv)
 +target_link_libraries(tpbconv ${GMX_EXTRA_LIBRARIES} libgromacs)
 +set_target_properties(tpbconv PROPERTIES OUTPUT_NAME "tpbconv${GMX_BINARY_SUFFIX}")
 +install(TARGETS tpbconv DESTINATION ${BIN_INSTALL_DIR} COMPONENT runtime)
Simple merge
Simple merge