Merge 'origin/release-4-6' into master
authorRoland Schulz <roland@utk.edu>
Fri, 11 May 2012 14:36:02 +0000 (10:36 -0400)
committerRoland Schulz <roland@utk.edu>
Wed, 16 May 2012 15:26:00 +0000 (11:26 -0400)
Conflicts:
src/gromacs/genversion.sh
src/kernel/CMakeLists.txt
src/programs/mdrun/mdrun.c
src/programs/mdrun/runner.c

Change-Id: Ifa85592226e84dea08d707ef1a76ee3020ed3bc3

16 files changed:
1  2 
CMakeLists.txt
cmake/gmxCFlags.cmake
src/gromacs/gmxlib/bondfree.c
src/gromacs/gmxlib/confio.c
src/gromacs/gmxlib/enxio.c
src/gromacs/gmxlib/futil.c
src/gromacs/gmxlib/gmx_random.c
src/gromacs/gmxlib/index.c
src/gromacs/gmxlib/libxdrf.c
src/gromacs/gmxlib/main.c
src/gromacs/gmxlib/statutil.c
src/gromacs/legacyheaders/pull_rotation.h
src/gromacs/legacyheaders/types/commrec.h
src/gromacs/mdlib/pme.c
src/programs/mdrun/md.c
src/programs/mdrun/mdrun.c

diff --cc CMakeLists.txt
Simple merge
index d1b42fe8127eeb9164c46560ae8071576a909b7a,11de448eb967e725c782ba951c746072e409a744..77657961cf1c5db1939efc6f00bdb1ad140c5663
@@@ -58,12 -58,11 +58,12 @@@ MACRO(gmx_c_flags
          if(NOT GMX_OPENMP)
              GMX_TEST_CFLAG(CXXFLAGS_PRAGMA "-Wno-unknown-pragmas" GMXC_CXXFLAGS)
          endif()
 -        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wall -Wno-unused -Wunused-value" GMXC_CXXFLAGS)
 -        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wextra -Wno-missing-field-initializers -Wno-sign-compare" GMXC_CXXFLAGS)
 +        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wall -Wno-unused-function -Wno-unused-parameter" GMXC_CXXFLAGS)
 +        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wnon-virtual-dtor" GMXC_CXXFLAGS)
 +        GMX_TEST_CXXFLAG(CXXFLAGS_WARN "-Wextra -Wno-missing-field-initializers" GMXC_CXXFLAGS)
        # new in gcc 4.5
          GMX_TEST_CXXFLAG(CXXFLAGS_EXCESS_PREC "-fexcess-precision=fast" GMXC_CXXFLAGS_RELEASE)
-         GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-fomit-frame-pointer -finline-functions -funroll-all-loops"
+         GMX_TEST_CXXFLAG(CXXFLAGS_COPT "-fomit-frame-pointer -funroll-all-loops"
                           GMXC_CXXFLAGS_RELEASE)
          GMX_TEST_CXXFLAG(CXXFLAGS_NOINLINE "-fno-inline" GMXC_CXXFLAGS_DEBUG)
      endif()
Simple merge
Simple merge
index a3b46f783c10365bf4e16fa135b6bd768c683714,0000000000000000000000000000000000000000..308914f96188bbcc013a360581c41ba58fccc8b4
mode 100644,000000..100644
--- /dev/null
@@@ -1,1146 -1,0 +1,1146 @@@
-     if (nre_test >= 0)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "futil.h"
 +#include "string2.h"
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "gmxfio.h"
 +#include "enxio.h"
 +#include "vec.h"
 +#include "xdrf.h"
 +#include "macros.h"
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +/* This number should be increased whenever the file format changes! */
 +static const int enx_version = 5;
 +
 +const char *enx_block_id_name[] = {
 +    "Averaged orientation restraints",
 +    "Instantaneous orientation restraints",
 +    "Orientation restraint order tensor(s)",
 +    "Distance restraints",
 +    "Free energy data",
 +    "BAR histogram",
 +    "Delta H raw data"
 +};
 +
 +
 +/* Stuff for reading pre 4.1 energy files */
 +typedef struct {
 +    gmx_bool     bOldFileOpen;   /* Is this an open old file? */
 +    gmx_bool     bReadFirstStep; /* Did we read the first step? */
 +    int      first_step;     /* First step in the energy file */
 +    int      step_prev;      /* Previous step */
 +    int      nsum_prev;      /* Previous step sum length */
 +    t_energy *ener_prev;     /* Previous energy sums */
 +} ener_old_t;
 +
 +struct ener_file
 +{
 +    ener_old_t eo;
 +    t_fileio *fio;
 +    int framenr;
 +    real frametime;
 +};
 +
 +static void enxsubblock_init(t_enxsubblock *sb)
 +{
 +    sb->nr=0;
 +#ifdef GMX_DOUBLE
 +    sb->type=xdr_datatype_double;
 +#else
 +    sb->type=xdr_datatype_float;
 +#endif
 +    sb->fval = NULL;
 +    sb->dval = NULL;
 +    sb->ival = NULL;
 +    sb->lval = NULL;
 +    sb->cval = NULL;
 +    sb->sval = NULL;
 +    sb->fval_alloc = 0;
 +    sb->dval_alloc = 0;
 +    sb->ival_alloc = 0;
 +    sb->lval_alloc = 0;
 +    sb->cval_alloc = 0;
 +    sb->sval_alloc = 0;
 +}
 +
 +static void enxsubblock_free(t_enxsubblock *sb)
 +{
 +    if (sb->fval_alloc)
 +    {
 +        free(sb->fval);
 +        sb->fval_alloc=0;
 +        sb->fval=NULL;
 +    }
 +    if (sb->dval_alloc)
 +    {
 +        free(sb->dval);
 +        sb->dval_alloc=0;
 +        sb->dval=NULL;
 +    }
 +    if (sb->ival_alloc)
 +    {
 +        free(sb->ival);
 +        sb->ival_alloc=0;
 +        sb->ival=NULL;
 +    }
 +    if (sb->lval_alloc)
 +    {
 +        free(sb->lval);
 +        sb->lval_alloc=0;
 +        sb->lval=NULL;
 +    }
 +    if (sb->cval_alloc)
 +    {
 +        free(sb->cval);
 +        sb->cval_alloc=0;
 +        sb->cval=NULL;
 +    }
 +    if (sb->sval_alloc)
 +    {
 +        int i;
 +
 +        for(i=0;i<sb->sval_alloc;i++)
 +        {
 +            if (sb->sval[i])
 +            {
 +                free(sb->sval[i]);
 +            }
 +        }
 +        free(sb->sval);
 +        sb->sval_alloc=0;
 +        sb->sval=NULL;
 +    }
 +}
 +
 +/* allocate the appropriate amount of memory for the given type and nr */
 +static void enxsubblock_alloc(t_enxsubblock *sb)
 +{
 +    /* allocate the appropriate amount of memory */
 +    switch(sb->type)
 +    {
 +        case xdr_datatype_float:
 +            if (sb->nr > sb->fval_alloc)
 +            {
 +                srenew(sb->fval, sb->nr);
 +                sb->fval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_double:
 +            if (sb->nr > sb->dval_alloc)
 +            {
 +                srenew(sb->dval, sb->nr);
 +                sb->dval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_int:
 +            if (sb->nr > sb->ival_alloc)
 +            {
 +                srenew(sb->ival, sb->nr);
 +                sb->ival_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_large_int:
 +            if (sb->nr > sb->lval_alloc)
 +            {
 +                srenew(sb->lval, sb->nr);
 +                sb->lval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_char:
 +            if (sb->nr > sb->cval_alloc)
 +            {
 +                srenew(sb->cval, sb->nr);
 +                sb->cval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_string:
 +            if (sb->nr > sb->sval_alloc)
 +            {
 +                int i;
 +
 +                srenew(sb->sval, sb->nr);
 +                for(i=sb->sval_alloc;i<sb->nr;i++)
 +                {
 +                    sb->sval[i]=NULL;
 +                }
 +                sb->sval_alloc=sb->nr;
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown block type: this file is corrupted or from the future");
 +    }
 +}
 +
 +static void enxblock_init(t_enxblock *eb)
 +{
 +    eb->id=enxOR;
 +    eb->nsub=0;
 +    eb->sub=NULL;
 +    eb->nsub_alloc=0;
 +}
 +
 +static void enxblock_free(t_enxblock *eb)
 +{
 +    if (eb->nsub_alloc>0)
 +    {
 +        int i;
 +        for(i=0;i<eb->nsub_alloc;i++)
 +        {
 +            enxsubblock_free(&(eb->sub[i]));
 +        }
 +        free(eb->sub);
 +        eb->nsub_alloc=0;
 +        eb->sub=NULL;
 +    }
 +}
 +
 +void init_enxframe(t_enxframe *fr)
 +{
 +    fr->e_alloc=0;
 +    fr->ener=NULL;
 +
 +    /*fr->d_alloc=0;*/
 +    fr->ener=NULL;
 +
 +    /*fr->ndisre=0;*/
 +
 +    fr->nblock=0;
 +    fr->nblock_alloc=0;
 +    fr->block=NULL;
 +}
 +
 +
 +void free_enxframe(t_enxframe *fr)
 +{
 +  int b;
 +
 +  if (fr->e_alloc)
 +  {
 +    sfree(fr->ener);
 +  }
 +  for(b=0; b<fr->nblock_alloc; b++)
 +  {
 +      enxblock_free(&(fr->block[b]));
 +  }
 +  free(fr->block);
 +}
 +
 +void add_blocks_enxframe(t_enxframe *fr, int n)
 +{
 +    fr->nblock=n;
 +    if (n > fr->nblock_alloc)
 +    {
 +        int b;
 +
 +        srenew(fr->block, n);
 +        for(b=fr->nblock_alloc;b<fr->nblock;b++)
 +        {
 +            enxblock_init(&(fr->block[b]));
 +        }
 +        fr->nblock_alloc=n;
 +    }
 +}
 +
 +t_enxblock *find_block_id_enxframe(t_enxframe *ef, int id, t_enxblock *prev)
 +{
 +    gmx_off_t starti=0;
 +    gmx_off_t i;
 +
 +    if (prev)
 +    {
 +        starti=(prev - ef->block) + 1;
 +    }
 +    for(i=starti; i<ef->nblock; i++)
 +    {
 +        if (ef->block[i].id == id)
 +            return &(ef->block[i]);
 +    }
 +    return NULL;
 +}
 +
 +void add_subblocks_enxblock(t_enxblock *eb, int n)
 +{
 +    eb->nsub=n;
 +    if (eb->nsub > eb->nsub_alloc)
 +    {
 +        int b;
 +
 +        srenew(eb->sub, n);
 +        for(b=eb->nsub_alloc; b<n; b++)
 +        {
 +            enxsubblock_init(&(eb->sub[b]));
 +        } 
 +        eb->nsub_alloc=n;
 +    }
 +}
 +
 +static void enx_warning(const char *msg)
 +{
 +    if (getenv("GMX_ENX_NO_FATAL") != NULL)
 +    {
 +        gmx_warning(msg);
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"%s\n%s",
 +                  msg,
 +                  "If you want to use the correct frames before the corrupted frame and avoid this fatal error set the env.var. GMX_ENX_NO_FATAL");
 +    }
 +}
 +
 +static void edr_strings(XDR *xdr,gmx_bool bRead,int file_version,
 +                        int n,gmx_enxnm_t **nms)
 +{
 +    int  i;
 +    gmx_enxnm_t *nm;
 +
 +    if (*nms == NULL)
 +    {
 +        snew(*nms,n);
 +    }
 +    for(i=0; i<n; i++)
 +    {
 +        nm = &(*nms)[i];
 +        if (bRead)
 +        {
 +            if (nm->name)
 +            {
 +                sfree(nm->name);
 +                nm->name = NULL;
 +            }
 +            if (nm->unit)
 +            {
 +                sfree(nm->unit);
 +                nm->unit = NULL;
 +            }
 +        }
 +        if(!xdr_string(xdr,&(nm->name),STRLEN))
 +        {
 +            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        if (file_version >= 2)
 +        {
 +            if(!xdr_string(xdr,&(nm->unit),STRLEN))
 +            {
 +                gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +            }
 +        }
 +        else
 +        {
 +            nm->unit = strdup("kJ/mol");
 +        }
 +    }
 +}
 +
 +void do_enxnms(ener_file_t ef,int *nre,gmx_enxnm_t **nms)
 +{
 +    int  magic=-55555;
 +    XDR  *xdr;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  file_version;
 +    int  i;
 +   
 +    gmx_fio_checktype(ef->fio); 
 +
 +    xdr = gmx_fio_getxdr(ef->fio);
 +    
 +    if (!xdr_int(xdr,&magic))
 +    {
 +        if(!bRead)
 +        {
 +            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        *nre=0;
 +        return;
 +    }
 +    if (magic > 0)
 +    {
 +        /* Assume this is an old edr format */
 +        file_version = 1;
 +        *nre = magic;
 +        ef->eo.bOldFileOpen = TRUE;
 +        ef->eo.bReadFirstStep = FALSE;
 +        srenew(ef->eo.ener_prev,*nre);
 +    }
 +    else
 +    {
 +        ef->eo.bOldFileOpen=FALSE;
 +
 +        if (magic != -55555)
 +        {
 +            gmx_fatal(FARGS,"Energy names magic number mismatch, this is not a GROMACS edr file");
 +        }
 +        file_version = enx_version;
 +        xdr_int(xdr,&file_version);
 +        if (file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        xdr_int(xdr,nre);
 +    }
 +    if (file_version != enx_version)
 +    {
 +        fprintf(stderr,"Note: enx file_version %d, software version %d\n",
 +                file_version,enx_version);
 +    }
 +
 +    edr_strings(xdr,bRead,file_version,*nre,nms);
 +}
 +
 +static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
 +                       int nre_test,gmx_bool *bWrongPrecision,gmx_bool *bOK)
 +{
 +    int  magic=-7777777;
 +    real first_real_to_check;
 +    int  b,i,zero=0,dum=0;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  tempfix_nr=0;
 +    int  ndisre=0;
 +    int  startb=0;
 +#ifndef GMX_DOUBLE
 +    xdr_datatype dtreal=xdr_datatype_float; 
 +#else
 +    xdr_datatype dtreal=xdr_datatype_double; 
 +#endif
 +    
++    if (bWrongPrecision)
 +    {
 +        *bWrongPrecision = FALSE;
 +    }
 +
 +    *bOK=TRUE;
 +    /* The original energy frame started with a real,
 +     * so we have to use a real for compatibility.
 +     * This is VERY DIRTY code, since do_eheader can be called
 +     * with the wrong precision set and then we could read r > -1e10,
 +     * while actually the intention was r < -1e10.
 +     * When nre_test >= 0, do_eheader should therefore terminate
 +     * before the number of i/o calls starts depending on what has been read
 +     * (which is the case for for instance the block sizes for variable
 +     * number of blocks, where this number is read before).
 +     */
 +    first_real_to_check = -2e10;
 +    if (!gmx_fio_do_real(ef->fio, first_real_to_check))
 +    {
 +        return FALSE;
 +    }
 +    if (first_real_to_check > -1e10)
 +    {
 +        /* Assume we are reading an old format */
 +        *file_version = 1;
 +        fr->t = first_real_to_check;
 +        if (!gmx_fio_do_int(ef->fio, dum))   *bOK = FALSE;
 +        fr->step = dum;
 +    }
 +    else
 +    {
 +        if (!gmx_fio_do_int(ef->fio, magic))       *bOK = FALSE;
 +        if (magic != -7777777)
 +        {
 +            enx_warning("Energy header magic number mismatch, this is not a GROMACS edr file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        *file_version = enx_version;
 +        if (!gmx_fio_do_int(ef->fio, *file_version)) *bOK = FALSE;
 +        if (*bOK && *file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        if (!gmx_fio_do_double(ef->fio, fr->t))       *bOK = FALSE;
 +        if (!gmx_fio_do_gmx_large_int(ef->fio, fr->step)) *bOK = FALSE;
 +        if (!bRead && fr->nsum == 1) {
 +            /* Do not store sums of length 1,
 +             * since this does not add information.
 +             */
 +            if (!gmx_fio_do_int(ef->fio, zero))      *bOK = FALSE;
 +        } else {
 +            if (!gmx_fio_do_int(ef->fio, fr->nsum))  *bOK = FALSE;
 +        }
 +        if (*file_version >= 3)
 +        {
 +            if (!gmx_fio_do_gmx_large_int(ef->fio, fr->nsteps)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->nsteps = max(1,fr->nsum);
 +        }
 +        if (*file_version >= 5)
 +        {
 +            if (!gmx_fio_do_double(ef->fio, fr->dt)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->dt = 0;
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->nre))     *bOK = FALSE;
 +    if (*file_version < 4)
 +    {
 +        if (!gmx_fio_do_int(ef->fio, ndisre))  *bOK = FALSE;
 +    }
 +    else
 +    {
 +        /* now reserved for possible future use */
 +        if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +    }
 +
 +    if (!gmx_fio_do_int(ef->fio, fr->nblock))  *bOK = FALSE;
 +    if (fr->nblock < 0) *bOK=FALSE;
 +
 +    if (ndisre!=0)
 +    {
 +        if (*file_version >= 4)
 +        {
 +            enx_warning("Distance restraint blocks in old style in new style file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        fr->nblock+=1;
 +    }
 +
 +
 +    /* Frames could have nre=0, so we can not rely only on the fr->nre check */
 +    if (bRead && nre_test >= 0 &&
 +        ((fr->nre > 0 && fr->nre != nre_test) ||
 +         fr->nre < 0 || ndisre < 0 || fr->nblock < 0))
 +    {
 +        *bWrongPrecision = TRUE;
 +        return *bOK;
 +    }
 +
 +    /* we now know what these should be, or we've already bailed out because
 +       of wrong precision */
 +    if ( *file_version==1 && (fr->t < 0 || fr->t > 1e20 || fr->step < 0 ) )
 +    {
 +        enx_warning("edr file with negative step number or unreasonable time (and without version number).");
 +        *bOK=FALSE;
 +        return FALSE;
 +    }
 +
 +
 +    if (*bOK && bRead)
 +    {
 +        add_blocks_enxframe(fr, fr->nblock);
 +    }
 +
 +    startb=0;
 +    if (ndisre>0)
 +    {
 +        /* sub[0] is the instantaneous data, sub[1] is time averaged */
 +        add_subblocks_enxblock(&(fr->block[0]), 2);
 +        fr->block[0].id=enxDISRE;
 +        fr->block[0].sub[0].nr=ndisre;
 +        fr->block[0].sub[1].nr=ndisre;
 +        fr->block[0].sub[0].type=dtreal;
 +        fr->block[0].sub[1].type=dtreal;
 +        startb++;
 +    }
 +
 +    /* read block header info */
 +    for(b=startb; b<fr->nblock; b++)
 +    {
 +        if (*file_version<4)
 +        {
 +            /* blocks in old version files always have 1 subblock that 
 +               consists of reals. */
 +            int nrint;
 +
 +            if (bRead)
 +            {
 +                add_subblocks_enxblock(&(fr->block[b]), 1);
 +            }
 +            else
 +            {
 +                if (fr->block[b].nsub != 1)
 +                {
 +                    gmx_incons("Writing an old version .edr file with too many subblocks");
 +                }
 +                if (fr->block[b].sub[0].type != dtreal)
 +                {
 +                    gmx_incons("Writing an old version .edr file the wrong subblock type");
 +                }
 +            }
 +            nrint = fr->block[b].sub[0].nr;
 +            
 +            if (!gmx_fio_do_int(ef->fio, nrint))
 +            {
 +                *bOK = FALSE;
 +            }
 +            fr->block[b].id          = b - startb;
 +            fr->block[b].sub[0].nr   = nrint;
 +            fr->block[b].sub[0].type = dtreal;
 +        }
 +        else
 +        {
 +            int i;
 +            /* in the new version files, the block header only contains
 +               the ID and the number of subblocks */
 +            int nsub=fr->block[b].nsub;
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, fr->block[b].id);
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, nsub);
 +
 +            fr->block[b].nsub=nsub;
 +            if (bRead)
 +                add_subblocks_enxblock(&(fr->block[b]), nsub);
 +
 +            /* read/write type & size for each subblock */
 +            for(i=0;i<nsub;i++)
 +            {
 +                t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +                int typenr=sub->type;
 +
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, typenr);
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, sub->nr);
 +
 +                sub->type = (xdr_datatype)typenr;
 +            }
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->e_size))  *bOK = FALSE;
 +
 +    /* now reserved for possible future use */
 +    if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +
 +    /* Do a dummy int to keep the format compatible with the old code */
 +    if (!gmx_fio_do_int(ef->fio, dum))         *bOK = FALSE;
 +    
 +    if (*bOK && *file_version == 1 && nre_test < 0)
 +    {
 +#if 0
 +        if (fp >= ener_old_nalloc)
 +        {
 +            gmx_incons("Problem with reading old format energy files");
 +        }
 +#endif
 +        
 +        if (!ef->eo.bReadFirstStep)
 +        {
 +            ef->eo.bReadFirstStep = TRUE;
 +            ef->eo.first_step     = fr->step;
 +            ef->eo.step_prev      = fr->step;
 +            ef->eo.nsum_prev      = 0;
 +        }
 +        
 +        fr->nsum   = fr->step - ef->eo.first_step + 1;
 +        fr->nsteps = fr->step - ef->eo.step_prev;
 +        fr->dt     = 0;
 +    }
 +      
 +    return *bOK;
 +}
 +
 +void free_enxnms(int n,gmx_enxnm_t *nms)
 +{
 +    int i;
 +
 +    for(i=0; i<n; i++)
 +    {
 +        sfree(nms[i].name);
 +        sfree(nms[i].unit);
 +    }
 +
 +    sfree(nms);
 +}
 +
 +void close_enx(ener_file_t ef)
 +{
 +    if(gmx_fio_close(ef->fio) != 0)
 +    {
 +        gmx_file("Cannot close energy file; it might be corrupt, or maybe you are out of disk space?");
 +    }
 +}
 +
 +static gmx_bool empty_file(const char *fn)
 +{
 +    FILE *fp;
 +    char dum;
 +    int  ret;
 +    gmx_bool bEmpty;
 +    
 +    fp = gmx_fio_fopen(fn,"r");
 +    ret = fread(&dum,sizeof(dum),1,fp);
 +    bEmpty = feof(fp);
 +    gmx_fio_fclose(fp);
 +    
 +    return bEmpty;
 +}
 +
 +
 +ener_file_t open_enx(const char *fn,const char *mode)
 +{
 +    int        nre,i;
 +    gmx_enxnm_t *nms=NULL;
 +    int        file_version=-1;
 +    t_enxframe *fr;
 +    gmx_bool       bWrongPrecision,bOK=TRUE;
 +    struct ener_file *ef;
 +
 +    snew(ef,1);
 +
 +    if (mode[0]=='r') {
 +        ef->fio=gmx_fio_open(fn,mode);
 +        gmx_fio_checktype(ef->fio);
 +        gmx_fio_setprecision(ef->fio,FALSE);
 +        do_enxnms(ef,&nre,&nms);
 +        snew(fr,1);
 +        do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +        if(!bOK)
 +        {
 +            gmx_file("Cannot read energy file header. Corrupt file?");
 +        }
 +
 +        /* Now check whether this file is in single precision */
 +        if (!bWrongPrecision &&
 +            ((fr->e_size && (fr->nre == nre) && 
 +              (nre*4*(long int)sizeof(float) == fr->e_size)) ) )
 +        {
 +            fprintf(stderr,"Opened %s as single precision energy file\n",fn);
 +            free_enxnms(nre,nms);
 +        }
 +        else
 +        {
 +            gmx_fio_rewind(ef->fio);
 +            gmx_fio_checktype(ef->fio);
 +            gmx_fio_setprecision(ef->fio,TRUE);
 +            do_enxnms(ef,&nre,&nms);
 +            do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +            if(!bOK)
 +            {
 +                gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +            }
 +
 +            if (((fr->e_size && (fr->nre == nre) && 
 +                            (nre*4*(long int)sizeof(double) == fr->e_size)) ))
 +                fprintf(stderr,"Opened %s as double precision energy file\n",
 +                        fn);
 +            else {
 +                if (empty_file(fn))
 +                    gmx_fatal(FARGS,"File %s is empty",fn);
 +                else
 +                    gmx_fatal(FARGS,"Energy file %s not recognized, maybe different CPU?",
 +                              fn);
 +            }
 +            free_enxnms(nre,nms);
 +        }
 +        free_enxframe(fr);
 +        sfree(fr);
 +        gmx_fio_rewind(ef->fio);
 +    }
 +    else 
 +        ef->fio = gmx_fio_open(fn,mode);
 +
 +    ef->framenr=0;
 +    ef->frametime=0;
 +    return ef;
 +}
 +
 +t_fileio *enx_file_pointer(const ener_file_t ef)
 +{
 +    return ef->fio;
 +}
 +
 +static void convert_full_sums(ener_old_t *ener_old,t_enxframe *fr)
 +{
 +    int nstep_all;
 +    int ne,ns,i;
 +    double esum_all,eav_all;
 +    
 +    if (fr->nsum > 0)
 +    {
 +        ne = 0;
 +        ns = 0;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            if (fr->ener[i].e    != 0) ne++;
 +            if (fr->ener[i].esum != 0) ns++;
 +        }
 +        if (ne > 0 && ns == 0)
 +        {
 +            /* We do not have all energy sums */
 +            fr->nsum = 0;
 +        }
 +    }
 +    
 +    /* Convert old full simulation sums to sums between energy frames */
 +    nstep_all = fr->step - ener_old->first_step + 1;
 +    if (fr->nsum > 1 && fr->nsum == nstep_all && ener_old->nsum_prev > 0)
 +    {
 +        /* Set the new sum length: the frame step difference */
 +        fr->nsum = fr->step - ener_old->step_prev;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            esum_all = fr->ener[i].esum;
 +            eav_all  = fr->ener[i].eav;
 +            fr->ener[i].esum = esum_all - ener_old->ener_prev[i].esum;
 +            fr->ener[i].eav  = eav_all  - ener_old->ener_prev[i].eav
 +                - dsqr(ener_old->ener_prev[i].esum/(nstep_all - fr->nsum)
 +                       - esum_all/nstep_all)*
 +                (nstep_all - fr->nsum)*nstep_all/(double)fr->nsum;
 +            ener_old->ener_prev[i].esum = esum_all;
 +            ener_old->ener_prev[i].eav  = eav_all;
 +        }
 +        ener_old->nsum_prev = nstep_all;
 +    }
 +    else if (fr->nsum > 0)
 +    {
 +        if (fr->nsum != nstep_all)
 +        {
 +            fprintf(stderr,"\nWARNING: something is wrong with the energy sums, will not use exact averages\n");
 +            ener_old->nsum_prev = 0;
 +        }
 +        else
 +        {
 +            ener_old->nsum_prev = nstep_all;
 +        }
 +        /* Copy all sums to ener_prev */
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            ener_old->ener_prev[i].esum = fr->ener[i].esum;
 +            ener_old->ener_prev[i].eav  = fr->ener[i].eav;
 +        }
 +    }
 +    
 +    ener_old->step_prev = fr->step;
 +}
 +
 +gmx_bool do_enx(ener_file_t ef,t_enxframe *fr)
 +{
 +    int       file_version=-1;
 +    int       i,b;
 +    gmx_bool      bRead,bOK,bOK1,bSane;
 +    real      tmp1,tmp2,rdum;
 +    char      buf[22];
 +    /*int       d_size;*/
 +    
 +    bOK = TRUE;
 +    bRead = gmx_fio_getread(ef->fio);
 +    if (!bRead)
 +    {  
 +        fr->e_size = fr->nre*sizeof(fr->ener[0].e)*4;
 +        /*d_size = fr->ndisre*(sizeof(real)*2);*/
 +    }
 +    gmx_fio_checktype(ef->fio);
 +
 +    if (!do_eheader(ef,&file_version,fr,-1,NULL,&bOK))
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\rLast energy frame read %d time %8.3f         ",
 +                    ef->framenr-1,ef->frametime);
 +            if (!bOK)
 +            {
 +                fprintf(stderr,
 +                        "\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                        ef->framenr,fr->t);
 +            }
 +        }
 +        else
 +        {
 +            gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +        }
 +        return FALSE;
 +    }
 +    if (bRead)
 +    {
 +        if ((ef->framenr <   20 || ef->framenr %   10 == 0) &&
 +            (ef->framenr <  200 || ef->framenr %  100 == 0) &&
 +            (ef->framenr < 2000 || ef->framenr % 1000 == 0))
 +        {
 +            fprintf(stderr,"\rReading energy frame %6d time %8.3f         ",
 +                    ef->framenr,fr->t);
 +        }
 +        ef->framenr++;
 +        ef->frametime = fr->t;
 +    }
 +    /* Check sanity of this header */
 +    bSane = fr->nre > 0 ;
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        bSane = bSane || (fr->block[b].nsub > 0);
 +    }
 +    if (!((fr->step >= 0) && bSane))
 +    {
 +        fprintf(stderr,"\nWARNING: there may be something wrong with energy file %s\n",
 +                gmx_fio_getname(ef->fio));
 +        fprintf(stderr,"Found: step=%s, nre=%d, nblock=%d, time=%g.\n"
 +                "Trying to skip frame expect a crash though\n",
 +                gmx_step_str(fr->step,buf),fr->nre,fr->nblock,fr->t);
 +    }
 +    if (bRead && fr->nre > fr->e_alloc)
 +    {
 +        srenew(fr->ener,fr->nre);
 +        for(i=fr->e_alloc; (i<fr->nre); i++)
 +        {
 +            fr->ener[i].e    = 0;
 +            fr->ener[i].eav  = 0;
 +            fr->ener[i].esum = 0;
 +        }
 +        fr->e_alloc = fr->nre;
 +    }
 +    
 +    for(i=0; i<fr->nre; i++)
 +    {
 +        bOK = bOK && gmx_fio_do_real(ef->fio, fr->ener[i].e);
 +        
 +        /* Do not store sums of length 1,
 +         * since this does not add information.
 +         */
 +        if (file_version == 1 ||
 +            (bRead && fr->nsum > 0) || fr->nsum > 1)
 +        {
 +            tmp1 = fr->ener[i].eav;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp1);
 +            if (bRead)
 +                fr->ener[i].eav = tmp1;
 +            
 +            /* This is to save only in single precision (unless compiled in DP) */
 +            tmp2 = fr->ener[i].esum;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp2);
 +            if (bRead)
 +                fr->ener[i].esum = tmp2;
 +            
 +            if (file_version == 1)
 +            {
 +                /* Old, unused real */
 +                rdum = 0;
 +                bOK = bOK && gmx_fio_do_real(ef->fio, rdum);
 +            }
 +        }
 +    }
 +    
 +    /* Here we can not check for file_version==1, since one could have
 +     * continued an old format simulation with a new one with mdrun -append.
 +     */
 +    if (bRead && ef->eo.bOldFileOpen)
 +    {
 +        /* Convert old full simulation sums to sums between energy frames */
 +        convert_full_sums(&(ef->eo),fr);
 +    }
 +    /* read the blocks */
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        /* now read the subblocks. */
 +        int nsub=fr->block[b].nsub; /* shortcut */
 +        int i;
 +
 +        for(i=0;i<nsub;i++)
 +        {
 +            t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +
 +            if (bRead)
 +            {
 +                enxsubblock_alloc(sub);
 +            }
 +
 +            /* read/write data */
 +            bOK1=TRUE;
 +            switch (sub->type)
 +            {
 +                case xdr_datatype_float:
 +                    bOK1=gmx_fio_ndo_float(ef->fio, sub->fval, sub->nr); 
 +                    break;
 +                case xdr_datatype_double:
 +                    bOK1=gmx_fio_ndo_double(ef->fio, sub->dval, sub->nr); 
 +                    break;
 +                case xdr_datatype_int:
 +                    bOK1=gmx_fio_ndo_int(ef->fio, sub->ival, sub->nr);
 +                    break;
 +                case xdr_datatype_large_int:
 +                    bOK1=gmx_fio_ndo_gmx_large_int(ef->fio, sub->lval, sub->nr);
 +                    break;
 +                case xdr_datatype_char:
 +                    bOK1=gmx_fio_ndo_uchar(ef->fio, sub->cval, sub->nr);
 +                    break;
 +                case xdr_datatype_string:
 +                    bOK1=gmx_fio_ndo_string(ef->fio, sub->sval, sub->nr);
 +                    break;
 +                default:
 +                    gmx_incons("Reading unknown block data type: this file is corrupted or from the future");
 +            }
 +            bOK = bOK && bOK1;
 +        }
 +    }
 +    
 +    if(!bRead)
 +    {
 +        if( gmx_fio_flush(ef->fio) != 0)
 +        {
 +            gmx_file("Cannot write energy file; maybe you are out of disk space?");
 +        }
 +    }
 +    
 +    if (!bOK)
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\nLast energy frame read %d",
 +                    ef->framenr-1);
 +            fprintf(stderr,"\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                    ef->framenr,fr->t);
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS,"could not write energies");
 +        }
 +        return FALSE; 
 +    }
 +    
 +    return TRUE;
 +}
 +
 +static real find_energy(const char *name, int nre, gmx_enxnm_t *enm,
 +                        t_enxframe *fr)
 +{
 +    int i;
 +    
 +    for(i=0; i<nre; i++)
 +    {
 +        if (strcmp(enm[i].name,name) == 0)
 +        {
 +            return  fr->ener[i].e;
 +        }
 +    }
 +    
 +    gmx_fatal(FARGS,"Could not find energy term named '%s'",name);
 +    
 +    return 0;
 +}
 +
 +
 +void get_enx_state(const char *fn, real t, gmx_groups_t *groups, t_inputrec *ir,
 +                   t_state *state)
 +{
 +  /* Should match the names in mdebin.c */
 +  static const char *boxvel_nm[] = {
 +  "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
 +  "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
 +  };
 +  
 +  static const char *pcouplmu_nm[] = {
 +    "Pcoupl-Mu-XX", "Pcoupl-Mu-YY", "Pcoupl-Mu-ZZ",
 +    "Pcoupl-Mu-YX", "Pcoupl-Mu-ZX", "Pcoupl-Mu-ZY"
 +  };
 +  static const char *baro_nm[] = {
 +    "Barostat"
 +  };
 +
 +
 +  int ind0[] = { XX,YY,ZZ,YY,ZZ,ZZ };
 +  int ind1[] = { XX,YY,ZZ,XX,XX,YY };
 +  int nre,nfr,i,j,ni,npcoupl;
 +  char       buf[STRLEN];
 +  const char *bufi;
 +  gmx_enxnm_t *enm=NULL;
 +  t_enxframe *fr;
 +  ener_file_t in;
 +
 +  in = open_enx(fn,"r");
 +  do_enxnms(in,&nre,&enm);
 +  snew(fr,1);
 +  nfr = 0;
 +  while ((nfr==0 || fr->t != t) && do_enx(in,fr)) {
 +    nfr++;
 +  }
 +  close_enx(in);
 +  fprintf(stderr,"\n");
 +
 +  if (nfr == 0 || fr->t != t)
 +    gmx_fatal(FARGS,"Could not find frame with time %f in '%s'",t,fn);
 +  
 +  npcoupl = TRICLINIC(ir->compress) ? 6 : 3;
 +  if (ir->epc == epcPARRINELLORAHMAN) {
 +    clear_mat(state->boxv);
 +    for(i=0; i<npcoupl; i++) {
 +      state->boxv[ind0[i]][ind1[i]] =
 +      find_energy(boxvel_nm[i],nre,enm,fr);
 +    }
 +    fprintf(stderr,"\nREAD %d BOX VELOCITIES FROM %s\n\n",npcoupl,fn);
 +  }
 +
 +  if (ir->etc == etcNOSEHOOVER) 
 +  {
 +      char cns[20];
 +
 +      cns[0] = '\0';
 +
 +      for(i=0; i<state->ngtc; i++) {
 +          ni = groups->grps[egcTC].nm_ind[i];
 +          bufi = *(groups->grpname[ni]);
 +          for(j=0; (j<state->nhchainlength); j++) 
 +          {
 +              if (IR_NVT_TROTTER(ir))
 +              {
 +                  sprintf(cns,"-%d",j);
 +              }
 +              sprintf(buf,"Xi%s-%s",cns,bufi);
 +              state->nosehoover_xi[i] = find_energy(buf,nre,enm,fr);
 +              sprintf(buf,"vXi%s-%s",cns,bufi);
 +              state->nosehoover_vxi[i] = find_energy(buf,nre,enm,fr);
 +          }
 +
 +      }
 +      fprintf(stderr,"\nREAD %d NOSE-HOOVER Xi chains FROM %s\n\n",state->ngtc,fn);
 +
 +      if (IR_NPT_TROTTER(ir)) 
 +      {
 +          for(i=0; i<state->nnhpres; i++) {
 +              bufi = baro_nm[0]; /* All barostat DOF's together for now */
 +              for(j=0; (j<state->nhchainlength); j++) 
 +              {
 +                  sprintf(buf,"Xi-%d-%s",j,bufi); 
 +                  state->nhpres_xi[i] = find_energy(buf,nre,enm,fr);
 +                  sprintf(buf,"vXi-%d-%s",j,bufi);
 +                  state->nhpres_vxi[i] = find_energy(buf,nre,enm,fr);
 +              }
 +          }
 +          fprintf(stderr,"\nREAD %d NOSE-HOOVER BAROSTAT Xi chains FROM %s\n\n",state->nnhpres,fn);
 +      }
 +  } 
 +
 +  free_enxnms(nre,enm);
 +  free_enxframe(fr);
 +  sfree(fr);
 +}
 +
index 815ab675d6c89ec18e809527ab076e30c009ba57,0000000000000000000000000000000000000000..16e249323f386b805a04d1ca583b2c3bde9e5871
mode 100644,000000..100644
--- /dev/null
@@@ -1,1187 -1,0 +1,1192 @@@
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <sys/types.h>
 +#include <sys/stat.h>
 +#include <fcntl.h>
 +
 +#ifdef HAVE_DIRENT_H
 +/* POSIX */
 +#include <dirent.h>
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <direct.h>
 +#include <io.h>
 +#endif
 +
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "futil.h"
 +#include "network.h"
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "statutil.h"
 +
 +
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +/* Windows file stuff, only necessary for visual studio */
 +#ifdef _MSC_VER
 +#include "windows.h"
 +#endif
 +
 +/* we keep a linked list of all files opened through pipes (i.e. 
 +   compressed or .gzipped files. This way we can distinguish between them
 +   without having to change the semantics of reading from/writing to files) 
 +   */
 +typedef struct t_pstack {
 +    FILE   *fp;
 +    struct t_pstack *prev;
 +} t_pstack;
 +
 +static t_pstack *pstack=NULL;
 +static gmx_bool     bUnbuffered=FALSE;
 +
 +#ifdef GMX_THREAD_MPI
 +/* this linked list is an intrinsically globally shared object, so we have
 +   to protect it with mutexes */
 +static tMPI_Thread_mutex_t pstack_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +void no_buffers(void)
 +{
 +    bUnbuffered=TRUE;
 +}
 +
 +void push_ps(FILE *fp)
 +{
 +    t_pstack *ps;
 +
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    snew(ps,1);
 +    ps->fp   = fp;
 +    ps->prev = pstack;
 +    pstack   = ps;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +}
 +
 +#ifdef GMX_FAHCORE
 +/* don't use pipes!*/
 +#define popen fah_fopen
 +#define pclose fah_fclose
 +#define SKIP_FFOPS 1
 +#else
 +#ifdef ffclose
 +#undef ffclose
 +#endif
 +#endif
 +
 +#ifndef GMX_FAHCORE
 +#ifndef HAVE_PIPES
 +static FILE *popen(const char *nm,const char *mode)
 +{
 +    gmx_impl("Sorry no pipes...");
 +
 +    return NULL;
 +}
 +
 +static int pclose(FILE *fp)
 +{
 +    gmx_impl("Sorry no pipes...");
 +
 +    return 0;
 +}
 +#endif
 +#endif
 +
 +int ffclose(FILE *fp)
 +{
 +#ifdef SKIP_FFOPS
 +    return fclose(fp);
 +#else
 +    t_pstack *ps,*tmp;
 +    int ret=0;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    ps=pstack;
 +    if (ps == NULL) {
 +        if (fp != NULL) 
 +            ret = fclose(fp);
 +    }
 +    else if (ps->fp == fp) {
 +        if (fp != NULL)
 +            ret = pclose(fp);
 +        pstack=pstack->prev;
 +        sfree(ps);
 +    }
 +    else {
 +        while ((ps->prev != NULL) && (ps->prev->fp != fp))
 +            ps=ps->prev;
 +        if ((ps->prev != NULL) && ps->prev->fp == fp) {
 +            if (ps->prev->fp != NULL)
 +                ret = pclose(ps->prev->fp);
 +            tmp=ps->prev;
 +            ps->prev=ps->prev->prev;
 +            sfree(tmp);
 +        }
 +        else {
 +            if (fp != NULL)
 +                ret = fclose(fp);
 +        }
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +    return ret;
 +#endif
 +}
 +
 +
 +#ifdef rewind
 +#undef rewind
 +#endif
 +
 +void frewind(FILE *fp)
 +{
 +    t_pstack *ps;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    ps=pstack;
 +    while (ps != NULL) {
 +        if (ps->fp == fp) {
 +            fprintf(stderr,"Cannot rewind compressed file!\n");
 +#ifdef GMX_THREAD_MPI
 +            tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +            return;
 +        }
 +        ps=ps->prev;
 +    }
 +    rewind(fp);
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +}
 +
 +int gmx_fseek(FILE *stream, gmx_off_t offset, int whence)
 +{
 +#ifdef HAVE_FSEEKO
 +    return fseeko(stream, offset, whence);
 +#else
 +#ifdef HAVE__FSEEKI64
 +    return _fseeki64(stream, offset, whence);
 +#else
 +    return fseek(stream, offset, whence);
 +#endif
 +#endif
 +}
 +
 +gmx_off_t gmx_ftell(FILE *stream)
 +{
 +#ifdef HAVE_FSEEKO
 +    return ftello(stream);
 +#else
 +#ifdef HAVE__FSEEKI64 
 +    return _ftelli64(stream);
 +#else
 +    return ftell(stream);
 +#endif
 +#endif
 +}
 +
 +
 +gmx_bool is_pipe(FILE *fp)
 +{
 +    t_pstack *ps;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&pstack_mutex);
 +#endif
 +
 +    ps=pstack;
 +    while (ps != NULL) {
 +        if (ps->fp == fp) {
 +#ifdef GMX_THREAD_MPI
 +            tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +            return TRUE;
 +        }
 +        ps=ps->prev;
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&pstack_mutex);
 +#endif
 +    return FALSE;
 +}
 +
 +
 +static FILE *uncompress(const char *fn,const char *mode)
 +{
 +    FILE *fp;
 +    char buf[256];
 +
 +    sprintf(buf,"uncompress -c < %s",fn);
 +    fprintf(stderr,"Going to execute '%s'\n",buf);
 +    if ((fp=popen(buf,mode)) == NULL)
 +        gmx_open(fn);
 +    push_ps(fp);
 +
 +    return fp;
 +}
 +
 +static FILE *gunzip(const char *fn,const char *mode)
 +{
 +    FILE *fp;
 +    char buf[256];
 +
 +    sprintf(buf,"gunzip -c < %s",fn);
 +    fprintf(stderr,"Going to execute '%s'\n",buf);
 +    if ((fp=popen(buf,mode)) == NULL)
 +        gmx_open(fn);
 +    push_ps(fp);
 +
 +    return fp;
 +}
 +
 +gmx_bool gmx_fexist(const char *fname)
 +{
 +    FILE *test;
 +
 +    if (fname == NULL)
 +        return FALSE;
 +    test=fopen(fname,"r");
 +    if (test == NULL) {
 +        /*Windows doesn't allow fopen of directory - so we need to check this seperately */
 +        #ifdef GMX_NATIVE_WINDOWS
 +            DWORD attr = GetFileAttributes(fname);
 +            return (attr != INVALID_FILE_ATTRIBUTES) && (attr & FILE_ATTRIBUTE_DIRECTORY);
 +        #else 
 +            return FALSE;
 +        #endif
 +    } else {
 +        fclose(test);
 +        return TRUE;
 +    }
 +}
 +
 +static gmx_bool gmx_is_file(const char *fname)
 +{
 +    FILE *test;
 +
 +    if (fname == NULL)
 +        return FALSE;
 +    test=fopen(fname,"r");
 +    if (test == NULL)
 +    {
 +        return FALSE;
 +    }
 +    else
 +    {
 +        fclose(test);
 +        /*Windows doesn't allow fopen of directory - so we don't need to check this seperately */
 +        #if (!((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__))
 +        {
 +            int status;
 +            struct stat st_buf;
 +            #ifdef HAVE_LSTAT
 +                status = lstat (fname, &st_buf);
 +            #else
 +                status = stat (fname, &st_buf);
 +            #endif
 +            if (status != 0 || !S_ISREG(st_buf.st_mode))
 +            {
 +                return FALSE;
 +            }
 +        }
 +        #endif
 +        return TRUE;
 +    }
 +}
 +
 +
 +gmx_bool gmx_fexist_master(const char *fname, t_commrec *cr)
 +{
 +  gmx_bool bExist;
 +  
 +  if (SIMMASTER(cr)) 
 +  {
 +      bExist = gmx_fexist(fname);
 +  }
 +  if (PAR(cr)) 
 +  {
 +      gmx_bcast(sizeof(bExist),&bExist,cr);
 +  }
 +  return bExist;
 +}
 +
 +gmx_bool gmx_eof(FILE *fp)
 +{
 +    char data[4];
 +    gmx_bool beof;
 +
 +    if (is_pipe(fp))
 +        return feof(fp);
 +    else {
 +        if ((beof=fread(data,1,1,fp))==1)
 +            gmx_fseek(fp,-1,SEEK_CUR);
 +        return !beof;
 +    }
 +}
 +
 +static char *backup_fn(const char *file,int count_max)
 +{
 +    /* Use a reasonably low value for countmax; we might
 +     * generate 4-5 files in each round, and we dont
 +     * want to hit directory limits of 1024 or 2048 files.
 +     */
 +#define COUNTMAX 99
 +    int         i,count=1;
 +    char        *directory,*fn;
 +    char        *buf;
 +
 +    if (count_max == -1)
 +    {
 +        count_max = COUNTMAX;
 +    }
 +
 +    smalloc(buf, GMX_PATH_MAX);
 +
 +    for(i=strlen(file)-1; ((i > 0) && (file[i] != DIR_SEPARATOR)); i--)
 +        ;
 +    /* Must check whether i > 0, i.e. whether there is a directory
 +     * in the file name. In that case we overwrite the / sign with
 +     * a '\0' to end the directory string .
 +     */
 +    if (i > 0) {
 +        directory    = gmx_strdup(file);
 +        directory[i] = '\0';
 +        fn           = gmx_strdup(file+i+1);
 +    }
 +    else {
 +        directory    = gmx_strdup(".");
 +        fn           = gmx_strdup(file);
 +    }
 +    do {
 +        sprintf(buf,"%s/#%s.%d#",directory,fn,count);
 +        count++;
 +    } while ((count <= count_max) && gmx_fexist(buf));
 +
 +    /* Arbitrarily bail out */
 +    if (count > count_max) 
 +        gmx_fatal(FARGS,"Won't make more than %d backups of %s for you.\n"
 +                  "The env.var. GMX_MAXBACKUP controls this maximum, -1 disables backups.",
 +                  count_max,fn);
 +
 +    sfree(directory);
 +    sfree(fn);
 +
 +    return buf;
 +}
 +
 +gmx_bool make_backup(const char * name)
 +{
 +    char * env;
 +    int  count_max;
 +    char * backup;
 +
 +#ifdef GMX_FAHCORE
 +    return FALSE; /* skip making backups */
 +#else
 +
 +    if (gmx_fexist(name))
 +    {
 +        env = getenv("GMX_MAXBACKUP");
 +        if (env != NULL)
 +        {
 +            count_max = 0;
 +            sscanf(env,"%d",&count_max);
 +            if (count_max == -1)
 +            {
 +                /* Do not make backups and possibly overwrite old files */
 +                return TRUE;
 +            }
 +        }
 +        else
 +        {
 +            /* Use the default maximum */
 +            count_max = -1;
 +        }
 +        backup = backup_fn(name,count_max);
 +        if(rename(name, backup) == 0) {
 +            fprintf(stderr, "\nBack Off! I just backed up %s to %s\n",
 +                    name, backup);
 +        } else {
 +            fprintf(stderr, "Sorry couldn't backup %s to %s\n", name, backup);
 +            return FALSE;
 +        }
 +        sfree(backup);
 +    }
 +    return TRUE;
 +#endif
 +}
 +
 +FILE *ffopen(const char *file,const char *mode)
 +{
 +#ifdef SKIP_FFOPS
 +    return fopen(file,mode);
 +#else
 +    FILE *ff=NULL;
 +    char buf[256],*bf,*bufsize=0,*ptr;
 +    gmx_bool bRead;
 +    int  bs;
 +
++    if (file == NULL) 
++    {
++        return NULL;
++    }
++
 +    if (mode[0]=='w') {
 +        make_backup(file);
 +    }
 +    where();
 +
 +    bRead= (mode[0]=='r'&&mode[1]!='+');
 +    strcpy(buf,file);
 +    if (!bRead || gmx_fexist(buf)) {
 +        if ((ff=fopen(buf,mode))==NULL)
 +            gmx_file(buf);
 +        where();
 +        /* Check whether we should be using buffering (default) or not
 +         * (for debugging)
 +         */
 +        if (bUnbuffered || ((bufsize=getenv("LOG_BUFS")) != NULL)) {
 +            /* Check whether to use completely unbuffered */
 +            if (bUnbuffered)
 +                bs = 0;
 +            else
 +                bs=strtol(bufsize, NULL, 10); 
 +            if (bs <= 0)
 +                setbuf(ff,NULL); 
 +            else {
 +                snew(ptr,bs+8);
 +                if (setvbuf(ff,ptr,_IOFBF,bs) != 0)
 +                    gmx_file("Buffering File");
 +            }
 +        }
 +        where();
 +    }
 +    else {
 +        sprintf(buf,"%s.Z",file);
 +        if (gmx_fexist(buf)) {
 +            ff=uncompress(buf,mode);
 +        }
 +        else {
 +            sprintf(buf,"%s.gz",file);
 +            if (gmx_fexist(buf)) {
 +                ff=gunzip(buf,mode);
 +            }
 +            else 
 +                gmx_file(file);
 +        }
 +    }
 +    return ff;
 +#endif
 +}
 +
 +/* Our own implementation of dirent-like functionality to scan directories. */
 +struct gmx_directory
 +{
 +#ifdef HAVE_DIRENT_H
 +    DIR  *               dirent_handle;
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    intptr_t             windows_handle;
 +    struct _finddata_t   finddata;
 +    int                  first;
 +#else
 +    int      dummy;
 +#endif
 +};
 +
 +
 +int
 +gmx_directory_open(gmx_directory_t *p_gmxdir,const char *dirname)
 +{
 +    struct gmx_directory *  gmxdir;
 +    int                     rc;
 +    
 +    snew(gmxdir,1);
 +    
 +    *p_gmxdir = gmxdir;
 +    
 +#ifdef HAVE_DIRENT_H
 +    if( (gmxdir->dirent_handle = opendir(dirname)) != NULL)
 +    {
 +        rc = 0;
 +    }
 +    else 
 +    {
 +        sfree(gmxdir);
 +        *p_gmxdir = NULL;
 +        rc        = EINVAL;
 +    }
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    
 +    if(dirname!=NULL && strlen(dirname)>0)
 +    {
 +        char *     tmpname;
 +        size_t     namelength;
 +        int        len;
 +        
 +        len = strlen(dirname);
 +        snew(tmpname,len+3);
 +        
 +        strncpy(tmpname,dirname,len+1);
 +        
 +        /* Remove possible trailing directory separator */
 +        if(tmpname[len]=='/' || tmpname[len]=='\\')
 +        {
 +            tmpname[len]='\0';
 +        }
 +        
 +        /* Add wildcard */
 +        strcat(tmpname,"/*");
 +        
 +        gmxdir->first = 1;
 +        if( (gmxdir->windows_handle=_findfirst(tmpname,&gmxdir->finddata))>0L)
 +        {
 +            rc = 0;
 +        }
 +        else
 +        {
 +            if(errno==EINVAL)
 +            {
 +                sfree(gmxdir);
 +                *p_gmxdir = NULL;
 +                rc        = EINVAL;                
 +            }
 +            else
 +            {
 +                rc        = 0;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        rc = EINVAL;
 +    }
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n"
 +              "In the very unlikely event this is not a compile-time mistake you could consider\n"
 +              "implementing support for your platform in futil.c, but contact the developers\n"
 +              "to make sure it's really necessary!\n");
 +    rc = -1;
 +#endif
 +    return rc;
 +}
 +
 +
 +int
 +gmx_directory_nextfile(gmx_directory_t gmxdir,char *name,int maxlength_name)
 +{
 +    int                     rc;
 +    
 +#ifdef HAVE_DIRENT_H
 +    
 +    struct dirent *         direntp_large;
 +    struct dirent *         p;
 +    
 +    
 +    if(gmxdir!=NULL && gmxdir->dirent_handle!=NULL)
 +    {
 +        /* On some platforms no space is present for d_name in dirent.
 +         * Since d_name is guaranteed to be the last entry, allocating
 +         * extra space for dirent will allow more size for d_name.
 +         * GMX_MAX_PATH should always be >= the max possible d_name.
 +         */
 +        smalloc(direntp_large, sizeof(*direntp_large) + GMX_PATH_MAX);
 +        rc = readdir_r(gmxdir->dirent_handle,direntp_large,&p);
 +
 +        if(p!=NULL && rc==0)
 +        {
 +            strncpy(name,direntp_large->d_name,maxlength_name);
 +        }
 +        else
 +        {
 +            name[0] = '\0';
 +            rc      = ENOENT;
 +        }
 +        sfree(direntp_large);
 +    }
 +    else 
 +    {
 +        name[0] = '\0';
 +        rc      = EINVAL;
 +    }
 +    
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    
 +    if(gmxdir!=NULL)
 +    {
 +        if(gmxdir->windows_handle<=0)
 +        {
 +            
 +            name[0] = '\0';
 +            rc      = ENOENT;
 +        }
 +        else if(gmxdir->first==1)
 +        {
 +            strncpy(name,gmxdir->finddata.name,maxlength_name);
 +            rc            = 0;
 +            gmxdir->first = 0;
 +        }
 +        else
 +        {
 +            if(_findnext(gmxdir->windows_handle,&gmxdir->finddata)==0)
 +            {
 +                strncpy(name,gmxdir->finddata.name,maxlength_name);
 +                rc      = 0;
 +            }
 +            else
 +            {
 +                name[0] = '\0';
 +                rc      = ENOENT;
 +            }
 +        }
 +    }
 +    
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
 +    rc = -1;
 +#endif
 +    return rc;
 +}
 +
 +
 +int 
 +gmx_directory_close(gmx_directory_t gmxdir)
 +{
 +    int                     rc;
 +#ifdef HAVE_DIRENT_H
 +    rc = (gmxdir != NULL) ? closedir(gmxdir->dirent_handle) : EINVAL;
 +#elif (defined GMX_NATIVE_WINDOWS)
 +    rc = (gmxdir != NULL) ? _findclose(gmxdir->windows_handle) : EINVAL;
 +#else
 +    gmx_fatal(FARGS,
 +              "Source compiled without POSIX dirent or windows support - cannot scan directories.\n");
 +    rc = -1;
 +#endif
 +    
 +    sfree(gmxdir);
 +    return rc;
 +}
 +
 +
 +
 +
 +gmx_bool search_subdirs(const char *parent, char *libdir)
 +{
 +    char *ptr;
 +    gmx_bool found;
 +
 +    /* Search a few common subdirectory names for the gromacs library dir */
 +    sprintf(libdir,"%s%cshare%ctop%cgurgle.dat",parent,
 +            DIR_SEPARATOR,DIR_SEPARATOR,DIR_SEPARATOR);
 +    found=gmx_fexist(libdir);
 +    if(!found) {
 +        sprintf(libdir,"%s%cshare%cgromacs%ctop%cgurgle.dat",parent,
 +                DIR_SEPARATOR,DIR_SEPARATOR,
 +                DIR_SEPARATOR,DIR_SEPARATOR);
 +        found=gmx_fexist(libdir);
 +    }    
 +    if(!found) {
 +        sprintf(libdir,"%s%cshare%cgromacs-%s%ctop%cgurgle.dat",parent,
 +                DIR_SEPARATOR,DIR_SEPARATOR,VERSION,
 +                DIR_SEPARATOR,DIR_SEPARATOR);
 +        found=gmx_fexist(libdir);
 +    }    
 +    if(!found) {
 +        sprintf(libdir,"%s%cshare%cgromacs%cgromacs-%s%ctop%cgurgle.dat",parent,
 +                DIR_SEPARATOR,DIR_SEPARATOR,DIR_SEPARATOR,
 +                VERSION,DIR_SEPARATOR,DIR_SEPARATOR);
 +        found=gmx_fexist(libdir);
 +    }    
 +
 +    /* Remove the gurgle.dat part from libdir if we found something */
 +    if(found) {
 +        ptr=strrchr(libdir,DIR_SEPARATOR); /* slash or backslash always present, no check necessary */
 +        *ptr='\0';
 +    }
 +    return found;
 +}
 +
 +
 +/* Check if the program name begins with "/" on unix/cygwin, or
 + * with "\" or "X:\" on windows. If not, the program name
 + * is relative to the current directory.
 + */
 +static gmx_bool filename_is_absolute(char *name)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    return ((name[0] == DIR_SEPARATOR) || ((strlen(name)>3) && strncmp(name+1,":\\",2)) == 0);
 +#else
 +    return (name[0] == DIR_SEPARATOR);
 +#endif
 +}
 +
 +gmx_bool get_libdir(char *libdir)
 +{
 +#define GMX_BINNAME_MAX 512
 +    char bin_name[GMX_BINNAME_MAX];
 +    char buf[GMX_BINNAME_MAX];
 +    char full_path[GMX_PATH_MAX+GMX_BINNAME_MAX];
 +    char system_path[GMX_PATH_MAX];
 +    char *dir,*ptr,*s,*pdum;
 +    gmx_bool found=FALSE;
 +    int i;
 +
 +    if (Program() != NULL)
 +    {
 +
 +    /* First - detect binary name */
 +    if (strlen(Program()) >= GMX_BINNAME_MAX)
 +    {
 +        gmx_fatal(FARGS,"The name of the binary is longer than the allowed buffer size (%d):\n'%s'",GMX_BINNAME_MAX,Program());
 +    }
 +    strncpy(bin_name,Program(),GMX_BINNAME_MAX-1);
 +
 +    /* On windows & cygwin we need to add the .exe extension
 +     * too, or we wont be able to detect that the file exists
 +     */
 +#if (defined GMX_NATIVE_WINDOWS || defined GMX_CYGWIN)
 +    if(strlen(bin_name)<3 || gmx_strncasecmp(bin_name+strlen(bin_name)-4,".exe",4))
 +        strcat(bin_name,".exe");
 +#endif
 +
 +    /* Only do the smart search part if we got a real name */
 +    if (NULL!=bin_name && strncmp(bin_name,"GROMACS",GMX_BINNAME_MAX)) {
 +
 +        if (!strchr(bin_name,DIR_SEPARATOR)) {
 +            /* No slash or backslash in name means it must be in the path - search it! */
 +            /* Add the local dir since it is not in the path on windows */
 +            gmx_getcwd(system_path, sizeof(system_path));
 +            sprintf(full_path,"%s%c%s",system_path,DIR_SEPARATOR,bin_name);
 +            found = gmx_is_file(full_path);
 +            if (!found && (s=getenv("PATH")) != NULL)
 +            {
 +                char *dupped;
 +                
 +                dupped=gmx_strdup(s);
 +                s=dupped;
 +                while(!found && (dir=gmx_strsep(&s, PATH_SEPARATOR)) != NULL)
 +                {
 +                    sprintf(full_path,"%s%c%s",dir,DIR_SEPARATOR,bin_name);
 +                    found = gmx_is_file(full_path);
 +                }
 +                sfree(dupped);
 +            }
 +            if (!found)
 +            {
 +                return FALSE;
 +            }
 +        } else if (!filename_is_absolute(bin_name)) {
 +            /* name contains directory separators, but 
 +             * it does not start at the root, i.e.
 +             * name is relative to the current dir 
 +             */
 +            gmx_getcwd(buf, sizeof(buf));
 +            sprintf(full_path,"%s%c%s",buf,DIR_SEPARATOR,bin_name);
 +        } else {
 +            strncpy(full_path,bin_name,GMX_PATH_MAX);
 +        }
 +
 +        /* Now we should have a full path and name in full_path,
 +         * but on unix it might be a link, or a link to a link to a link..
 +         */
 +#ifndef GMX_NATIVE_WINDOWS
 +        while( (i=readlink(full_path,buf,sizeof(buf)-1)) > 0 ) {
 +            buf[i]='\0';
 +            /* If it doesn't start with "/" it is relative */
 +            if (buf[0]!=DIR_SEPARATOR) {
 +                strncpy(strrchr(full_path,DIR_SEPARATOR)+1,buf,GMX_PATH_MAX);
 +            } else
 +                strncpy(full_path,buf,GMX_PATH_MAX);
 +        }
 +#endif
 +
 +        /* Remove the executable name - it always contains at least one slash */
 +        *(strrchr(full_path,DIR_SEPARATOR)+1)='\0';
 +        /* Now we have the full path to the gromacs executable.
 +         * Use it to find the library dir. 
 +         */
 +        found=FALSE;
 +        while(!found && ( (ptr=strrchr(full_path,DIR_SEPARATOR)) != NULL ) ) {
 +            *ptr='\0';
 +            found=search_subdirs(full_path,libdir);
 +        }
 +    }
 +    }
 +    /* End of smart searching. If we didn't find it in our parent tree,
 +     * or if the program name wasn't set, at least try some standard 
 +     * locations before giving up, in case we are running from e.g. 
 +     * a users home directory. This only works on unix or cygwin...
 +     */
 +#ifndef GMX_NATIVE_WINDOWS
 +    if(!found) 
 +        found=search_subdirs("/usr/local",libdir);
 +    if(!found) 
 +        found=search_subdirs("/usr",libdir);
 +    if(!found) 
 +        found=search_subdirs("/opt",libdir);
 +#endif
 +    return found;
 +}
 +
 +
 +char *low_gmxlibfn(const char *file, gmx_bool bAddCWD, gmx_bool bFatal)
 +{
 +    char *ret;
 +    char *lib,*dir;
 +    char buf[1024];
 +    char libpath[GMX_PATH_MAX];
 +    gmx_bool env_is_set=FALSE;
 +    char   *s,tmppath[GMX_PATH_MAX];
 +
 +    /* GMXLIB can be a path now */
 +    lib=getenv("GMXLIB");
 +    if (lib != NULL)
 +    {
 +        env_is_set=TRUE;
 +        strncpy(libpath,lib,GMX_PATH_MAX);
 +    } 
 +    else if (!get_libdir(libpath))
 +    {
 +        strncpy(libpath,GMXLIBDIR,GMX_PATH_MAX);
 +    }
 +
 +    ret = NULL;
 +    if (bAddCWD && gmx_fexist(file))
 +    {
 +        ret = gmx_strdup(file);
 +    }
 +    else 
 +    {
 +        strncpy(tmppath,libpath,GMX_PATH_MAX);
 +        s=tmppath;
 +        while(ret == NULL && (dir=gmx_strsep(&s, PATH_SEPARATOR)) != NULL )
 +        {
 +            sprintf(buf,"%s%c%s",dir,DIR_SEPARATOR,file);
 +            if (gmx_fexist(buf))
 +            {
 +                ret = gmx_strdup(buf);
 +            }
 +        }
 +        if (ret == NULL && bFatal) 
 +        {
 +            if (env_is_set) 
 +            {
 +                gmx_fatal(FARGS,
 +                          "Library file %s not found %sin your GMXLIB path.",
 +                          file, bAddCWD ? "in current dir nor " : "");
 +            }
 +            else
 +            {
 +                gmx_fatal(FARGS,
 +                          "Library file %s not found %sin default directories.\n"
 +                        "(You can set the directories to search with the GMXLIB path variable)",
 +                          file, bAddCWD ? "in current dir nor " : "");
 +            }
 +        }
 +    }
 +
 +    return ret;
 +}
 +
 +
 +
 +
 +
 +FILE *low_libopen(const char *file,gmx_bool bFatal)
 +{
 +    FILE *ff;
 +    char *fn;
 +
 +    fn=low_gmxlibfn(file,TRUE,bFatal);
 +
 +    if (fn==NULL) {
 +        ff=NULL;
 +    } else {
 +      if (debug)
 +      fprintf(debug,"Opening library file %s\n",fn);
 +      ff=fopen(fn,"r");
 +    }
 +    sfree(fn);
 +
 +    return ff;
 +}
 +
 +char *gmxlibfn(const char *file)
 +{
 +    return low_gmxlibfn(file,TRUE,TRUE);
 +}
 +
 +FILE *libopen(const char *file)
 +{
 +    return low_libopen(file,TRUE);
 +}
 +
 +void gmx_tmpnam(char *buf)
 +{
 +    int i,len,fd;
 +
 +    if ((len = strlen(buf)) < 7)
 +        gmx_fatal(FARGS,"Buf passed to gmx_tmpnam must be at least 7 bytes long");
 +    for(i=len-6; (i<len); i++) {
 +        buf[i] = 'X';
 +    }
 +    /* mktemp is dangerous and we should use mkstemp instead, but 
 +     * since windows doesnt support it we have to separate the cases.
 +     * 20090307: mktemp deprecated, use iso c++ _mktemp instead.
 +     */
 +#ifdef GMX_NATIVE_WINDOWS
 +    _mktemp(buf);
 +#else
 +    fd = mkstemp(buf);
 +
 +    switch (fd) {
 +        case EINVAL:
 +            gmx_fatal(FARGS,"Invalid template %s for mkstemp",buf);
 +            break;
 +        case EEXIST:
 +            gmx_fatal(FARGS,"mkstemp created existing file",buf);
 +            break;
 +        case EACCES: 
 +            gmx_fatal(FARGS,"Permission denied for opening %s",buf);
 +            break;
 +        default:
 +            break;
 +    }   
 +    close(fd);
 +#endif
 +    /* name in Buf should now be OK */
 +}
 +
 +int gmx_truncatefile(char *path, gmx_off_t length)
 +{
 +#ifdef _MSC_VER
 +    /* Microsoft visual studio does not have "truncate" */
 +    HANDLE fh;
 +    LARGE_INTEGER win_length;
 +
 +    win_length.QuadPart = length;
 +
 +    fh = CreateFile(path,GENERIC_READ | GENERIC_WRITE,0,NULL,
 +            OPEN_EXISTING,0,NULL);
 +    SetFilePointerEx(fh,win_length,NULL,FILE_BEGIN);
 +    SetEndOfFile(fh);
 +    CloseHandle(fh);
 +
 +    return 0;
 +#else
 +    return truncate(path,length);
 +#endif
 +}
 +
 +
 +int gmx_file_rename(const char *oldname, const char *newname)
 +{
 +#ifndef GMX_NATIVE_WINDOWS
 +    /* under unix, rename() is atomic (at least, it should be). */
 +    return rename(oldname, newname);
 +#else
 +    if (MoveFileEx(oldname, newname, 
 +                   MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH))
 +        return 0;
 +    else
 +        return 1;
 +#endif
 +}
 +
 +int gmx_file_copy(const char *oldname, const char *newname, gmx_bool copy_if_empty)
 +{
 +/* the full copy buffer size: */
 +#define FILECOPY_BUFSIZE (1<<16)
 +    FILE *in=NULL; 
 +    FILE *out=NULL;
 +    char *buf;
 +
 +    snew(buf, FILECOPY_BUFSIZE); 
 +
 +    in=fopen(oldname, "rb");
 +    if (!in)
 +        goto error;
 +
 +    /* If we don't copy when empty, we postpone opening the file
 +       until we're actually ready to write. */
 +    if (copy_if_empty)
 +    {
 +        out=fopen(newname, "wb");
 +        if (!out)
 +            goto error;
 +    }
 +
 +    while(!feof(in))
 +    {
 +        size_t nread;
 +        
 +        nread=fread(buf, sizeof(char), FILECOPY_BUFSIZE, in);
 +        if (nread>0)
 +        {
 +            size_t ret;
 +            if (!out)
 +            {
 +                /* so this is where we open when copy_if_empty is false:
 +                   here we know we read something. */
 +                out=fopen(newname, "wb");
 +                if (!out)
 +                    goto error;
 +            }
 +            ret=fwrite(buf, sizeof(char), nread, out);
 +            if (ret!=nread)
 +            {
 +                goto error;
 +            }
 +        }
 +        if (ferror(in))
 +            goto error;
 +    }
 +    sfree(buf);
 +    fclose(in);
 +    fclose(out);
 +    return 0;
 +error:
 +    sfree(buf);
 +    if (in)
 +        fclose(in);
 +    if (out)
 +        fclose(out);
 +    return 1;
 +#undef FILECOPY_BUFSIZE
 +}
 +
 +
 +int gmx_fsync(FILE *fp)
 +{
 +    int rc=0;
 +
 +#ifdef GMX_FAHCORE
 +    /* the fahcore defines its own os-independent fsync */
 +    rc=fah_fsync(fp);
 +#else /* GMX_FAHCORE */
 +    {
 +        int fn=-1;
 +
 +        /* get the file number */
 +#if defined(HAVE_FILENO)
 +        fn= fileno(fp);
 +#elif defined(HAVE__FILENO)
 +        fn= _fileno(fp);
 +#endif
 +
 +        /* do the actual fsync */
 +        if (fn >= 0)
 +        {
 +#if (defined(HAVE_FSYNC))
 +            rc=fsync(fn);
 +#elif (defined(HAVE__COMMIT)) 
 +            rc=_commit(fn);
 +#endif
 +        }
 +    }
 +#endif /* GMX_FAHCORE */
 +
 +    /* We check for these error codes this way because POSIX requires them
 +       to be defined, and using anything other than macros is unlikely: */
 +#ifdef EINTR
 +    /* we don't want to report an error just because fsync() caught a signal.
 +       For our purposes, we can just ignore this. */
 +    if (rc && errno==EINTR)
 +        rc=0;
 +#endif
 +#ifdef EINVAL
 +    /* we don't want to report an error just because we tried to fsync() 
 +       stdout, a socket or a pipe. */
 +    if (rc && errno==EINVAL)
 +        rc=0;
 +#endif
 +    return rc;
 +}
 +
 +void gmx_chdir(const char *directory)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    int rc = _chdir(directory);
 +#else
 +    int rc = chdir(directory);
 +#endif
 +    if (rc != 0)
 +    {
 +        gmx_fatal(FARGS, "Cannot change directory to '%s'. Reason: %s",
 +                  directory, strerror(errno));
 +    }
 +}
 +
 +void gmx_getcwd(char *buffer, size_t size)
 +{
 +#ifdef GMX_NATIVE_WINDOWS
 +    char *pdum = _getcwd(buffer, size);
 +#else
 +    char *pdum = getcwd(buffer, size);
 +#endif
 +    if (pdum == NULL)
 +    {
 +        gmx_fatal(FARGS, "Cannot get working directory. Reason: %s",
 +                  strerror(errno));
 +    }
 +}
index 76b26c3fd7a595070b66551fc2d84e8b5aba8b63,0000000000000000000000000000000000000000..653cc01914ea2f1fab24f50bf8115b2a1a55fb43
mode 100644,000000..100644
--- /dev/null
@@@ -1,412 -1,0 +1,356 @@@
-  * We have included the routine print_gaussian_table() in this file
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 4.5
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2008, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + 
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Groningen Machine for Chemical Simulation
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <gmx_random.h>
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +#include <time.h>
 +#include <math.h>
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <process.h>
 +#endif
 +
 +#include "maths.h"
 +#include "gmx_random_gausstable.h"
 +
 +#define RNG_N 624
 +#define RNG_M 397
 +#define RNG_MATRIX_A 0x9908b0dfUL   /* constant vector a */
 +#define RNG_UPPER_MASK 0x80000000UL /* most significant w-r bits */
 +#define RNG_LOWER_MASK 0x7fffffffUL /* least significant r bits */
 +
 +/* Note that if you change the size of the Gaussian table you will also
 + * have to generate new initialization data for the table in
 + * gmx_random_gausstable.h
 + *
- /*
-  * Print a lookup table for Gaussian numbers with 4 entries on each
-  * line, formatted for inclusion in this file. Size is 2^bits.
-  */
- void
- print_gaussian_table(int bits)
- {
-   int n,nh,i,j;
-   double invn,fac,x,invgauss,det,dx;
-   real  *table;
-   
-   n = 1 << bits;
-   table = (real *)malloc(n*sizeof(real));
-   
-   /* Fill a table of size n such that random draws from it
-     * produce a Gaussian distribution.
-     * We integrate the Gaussian distribution G approximating:
-     *   integral(x->x+dx) G(y) dy
-     * with:
-     *   G(x) dx + G'(x) dx^2/2 = G(x) dx - G(x) x dx^2/2
-     * Then we need to find dx such that the integral is 1/n.
-     * The last step uses dx = 1/x as the approximation is not accurate enough.
-     */
-   invn = 1.0/n;
-   fac = sqrt(2*M_PI);
-   x = 0.5*fac*invn;
-   nh = n/2;
-   for(i=0; i<nh; i++) {
-     if (i > 0) {
-       if (i < nh-1) {
-       invgauss = fac*exp(0.5*x*x);
-       /* det is larger than 0 for all x, except for the last */
-       det = 1 - 2*invn*x*invgauss;
-       dx = (1 - sqrt(det))/x;
-       } else {
-       dx = 1/x;
-       }
-       x = x + dx;
-     }
-     table[nh-1-i] = -x;
-     table[nh+i]   =  x;
-   }
-   printf("static const real *\ngaussian_table[%d] = {\n",n);
-   for(i=0;i<n;i+=4) {
-     printf("  ");
-     for(j=0;j<4;j++) {
-       printf("%14.7e",table[i+j]);
-       if(i+j<(n-1))
-       printf(",");
-     }
-     printf("\n");
-   }
-   printf("};\n");
-   free(table);
- }
++ * A routine print_gaussian_table() is in contrib/random.c
 + * for convenience - use it if you need a different size of the table.
 + */
 +#define GAUSS_TABLE 14 /* the size of the gauss table is 2^GAUSS_TABLE */
 +#define GAUSS_SHIFT (32 - GAUSS_TABLE)
 +
 +
 +struct gmx_rng {
 +  unsigned int  mt[RNG_N];
 +  int           mti;
 +  int           has_saved;  
 +  double        gauss_saved;
 +};
 +
 +
 +
 +int
 +gmx_rng_n(void)
 +{
 +  return RNG_N;
 +}
 +
 +
 +gmx_rng_t 
 +gmx_rng_init(unsigned int seed)
 +{
 +  struct gmx_rng *rng;
 +    
 +  if((rng=(struct gmx_rng *)malloc(sizeof(struct gmx_rng)))==NULL)
 +      return NULL;
 +  
 +  rng->has_saved=0; /* no saved gaussian number yet */
 +
 +  rng->mt[0]= seed & 0xffffffffUL;
 +  for (rng->mti=1; rng->mti<RNG_N; rng->mti++) {
 +    rng->mt[rng->mti] = 
 +      (1812433253UL * (rng->mt[rng->mti-1] ^
 +                     (rng->mt[rng->mti-1] >> 30)) + rng->mti); 
 +    /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
 +    /* In the previous versions, MSBs of the seed affect   */
 +    /* only MSBs of the array mt[].                        */
 +    /* 2002/01/09 modified by Makoto Matsumoto             */
 +    rng->mt[rng->mti] &= 0xffffffffUL;
 +    /* for >32 bit machines */
 +  }
 +  return rng;
 +}
 +
 +gmx_rng_t 
 +gmx_rng_init_array(unsigned int seed[], int seed_length)
 +{
 +    int i, j, k;
 +    gmx_rng_t rng;
 +
 +    if((rng=gmx_rng_init(19650218UL))==NULL)
 +        return NULL;
 +      
 +    i=1; j=0;
 +    k = (RNG_N>seed_length ? RNG_N : seed_length);
 +    for (; k; k--) {
 +        rng->mt[i] = (rng->mt[i] ^ ((rng->mt[i-1] ^
 +                                   (rng->mt[i-1] >> 30)) * 1664525UL))
 +          + seed[j] + j; /* non linear */
 +        rng->mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
 +        i++; j++;
 +        if (i>=RNG_N) { rng->mt[0] = rng->mt[RNG_N-1]; i=1; }
 +        if (j>=seed_length) j=0;
 +    }
 +    for (k=RNG_N-1; k; k--) {
 +        rng->mt[i] = (rng->mt[i] ^ ((rng->mt[i-1] ^ 
 +                                   (rng->mt[i-1] >> 30)) * 
 +                                  1566083941UL))
 +          - i; /* non linear */
 +        rng->mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
 +        i++;
 +        if (i>=RNG_N) { rng->mt[0] = rng->mt[RNG_N-1]; i=1; }
 +    }
 +
 +    rng->mt[0] = 0x80000000UL; 
 +    /* MSB is 1; assuring non-zero initial array */ 
 +    return rng;
 +}
 +
 +
 +void
 +gmx_rng_destroy(gmx_rng_t rng)
 +{
 +  if(rng)
 +    free(rng);
 +}
 +
 +
 +void
 +gmx_rng_get_state(gmx_rng_t rng, unsigned int *mt,int *mti)
 +{
 +  int i;
 +
 +  for(i=0; i<RNG_N; i++) {
 +    mt[i] = rng->mt[i];
 +  }
 +  *mti = rng->mti;
 +}
 +
 +
 +void
 +gmx_rng_set_state(gmx_rng_t rng,  unsigned int *mt,int mti)
 +{
 +  int i;
 +
 +  for(i=0; i<RNG_N; i++) {
 +    rng->mt[i] = mt[i];
 +  }
 +  rng->mti = mti;
 +}
 +
 +
 +unsigned int
 +gmx_rng_make_seed(void)
 +{
 +  FILE *fp;
 +  unsigned int data;
 +  int ret;
 +  long my_pid;
 +
 +#ifdef HAVE_UNISTD_H
 +  fp=fopen("/dev/random","rb"); /* will return NULL if it is not present */
 +#else
 +  fp=NULL;
 +#endif
 +  if(fp!=NULL) {
 +    ret=fread(&data,sizeof(unsigned int),1,fp);
 +    fclose(fp);
 +  } else {
 +    /* No random device available, use time-of-day and process id */
 +#ifdef GMX_NATIVE_WINDOWS
 +    my_pid = (long)_getpid();
 +#else
 +    my_pid = (long)getpid();
 +#endif
 +    data=(unsigned int)(((long)time(NULL)+my_pid) % (long)1000000);
 +  }
 +  return data;
 +}
 +
 +
 +/* The random number state contains RNG_N entries that are returned one by
 + * one as random numbers. When we run out of them, this routine is called to
 + * regenerate RNG_N new entries.
 + */
 +static void
 +gmx_rng_update(gmx_rng_t rng)
 +{
 +    unsigned int lastx,x1,x2,y,*mt;
 +    int mti,k;
 +    const unsigned int mag01[2] = {0x0UL, RNG_MATRIX_A};
 +    /* mag01[x] = x * MATRIX_A  for x=0,1 */
 +
 +    /* update random numbers */
 +    mt = rng->mt;   /* pointer to array - avoid repeated dereferencing */
 +    mti = rng->mti;
 +
 +    x1        = mt[0];
 +    for (k = 0; k < RNG_N-RNG_M-3; k += 4)
 +    {
 +        x2      = mt[k+1];
 +        y       = (x1 & RNG_UPPER_MASK) | (x2 & RNG_LOWER_MASK);
 +        mt[k]   = mt[k+RNG_M]   ^ (y >> 1) ^ mag01[y & 0x1UL];
 +        x1      = mt[k+2];
 +        y       = (x2 & RNG_UPPER_MASK) | (x1 & RNG_LOWER_MASK);
 +        mt[k+1] = mt[k+RNG_M+1] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +        x2      = mt[k+3];
 +        y       = (x1 & RNG_UPPER_MASK) | (x2 & RNG_LOWER_MASK);
 +        mt[k+2] = mt[k+RNG_M+2] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +        x1      = mt[k+4];
 +        y       = (x2 & RNG_UPPER_MASK) | (x1 & RNG_LOWER_MASK);
 +        mt[k+3] = mt[k+RNG_M+3] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +    }
 +    x2        = mt[k+1];
 +    y         = (x1 & RNG_UPPER_MASK) | (x2 & RNG_LOWER_MASK);
 +    mt[k]     = mt[k+RNG_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +    k++;
 +    x1        = mt[k+1];
 +    y         = (x2 & RNG_UPPER_MASK) | (x1 & RNG_LOWER_MASK);
 +    mt[k]     = mt[k+RNG_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +    k++;
 +    x2        = mt[k+1];
 +    y         = (x1 & RNG_UPPER_MASK) | (x2 & RNG_LOWER_MASK);
 +    mt[k]     = mt[k+RNG_M] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +    k++;
 +    for (; k < RNG_N-1; k += 4)
 +    {
 +        x1      = mt[k+1];
 +        y       = (x2 & RNG_UPPER_MASK) | (x1 & RNG_LOWER_MASK);
 +        mt[k]   = mt[k+(RNG_M-RNG_N)]   ^ (y >> 1) ^ mag01[y & 0x1UL];
 +        x2      = mt[k+2];
 +        y       = (x1 & RNG_UPPER_MASK) | (x2 & RNG_LOWER_MASK);
 +        mt[k+1] = mt[k+(RNG_M-RNG_N)+1] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +        x1      = mt[k+3];
 +        y       = (x2 & RNG_UPPER_MASK) | (x1 & RNG_LOWER_MASK);
 +        mt[k+2] = mt[k+(RNG_M-RNG_N)+2] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +        x2      = mt[k+4];
 +        y       = (x1 & RNG_UPPER_MASK) | (x2 & RNG_LOWER_MASK);
 +        mt[k+3] = mt[k+(RNG_M-RNG_N)+3] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +    }
 +    y = (x2 & RNG_UPPER_MASK) | (mt[0] & RNG_LOWER_MASK);
 +    mt[RNG_N-1] = mt[RNG_M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
 +
 +    rng->mti = 0;
 +}
 +
 +
 +real
 +gmx_rng_gaussian_real(gmx_rng_t rng)
 +{
 +  real x,y,r;
 +
 +  if(rng->has_saved) {
 +    rng->has_saved=0;
 +    return rng->gauss_saved;
 +  } else {
 +    do {
 +      x=2.0*gmx_rng_uniform_real(rng)-1.0;
 +      y=2.0*gmx_rng_uniform_real(rng)-1.0;
 +      r=x*x+y*y;
 +    } while(r>1.0 || r==0.0);
 +    
 +    r=sqrt(-2.0*log(r)/r);
 +    rng->gauss_saved=y*r; /* save second random number */
 +    rng->has_saved=1;
 +    return x*r; /* return first random number */
 +  }
 +}
 +
 +
 +
 +
 +/* Return a random unsigned integer, i.e. 0..4294967295 
 + * Provided in header file for performace reasons.
 + * Unfortunately this function cannot be inlined, since
 + * it needs to refer the internal-linkage gmx_rng_update().
 + */
 +unsigned int
 +gmx_rng_uniform_uint32(gmx_rng_t rng)
 +{
 +  unsigned int y;
 +  
 +  if(rng->mti==RNG_N)
 +    gmx_rng_update(rng);
 +  y=rng->mt[rng->mti++];
 +  
 +  y ^= (y >> 11);
 +  y ^= (y << 7) & 0x9d2c5680UL;
 +  y ^= (y << 15) & 0xefc60000UL;
 +  y ^= (y >> 18);
 +  
 +  return y;  
 +} 
 +
 +
 +
 +
 +
 +/* Return a uniform floating point number on the interval 0<=x<1 */
 +real
 +gmx_rng_uniform_real(gmx_rng_t rng)
 +{
 +  if(sizeof(real)==sizeof(double))
 +    return ((double)gmx_rng_uniform_uint32(rng))*(1.0/4294967296.0); 
 +  else
 +    return ((float)gmx_rng_uniform_uint32(rng))*(1.0/4294967423.0); 
 +  /* divided by the smallest number that will generate a 
 +    * single precision real number on 0<=x<1.
 +    * This needs to be slightly larger than MAX_UNIT since
 +    * we are limited to an accuracy of 1e-7.
 +    */
 +}
 +
 +
 +
 +real 
 +gmx_rng_gaussian_table(gmx_rng_t rng)
 +{
 +  unsigned int i;
 +  
 +  i = gmx_rng_uniform_uint32(rng);
 +  
 +  /* The Gaussian table is a static constant in this file */
 +  return gaussian_table[i >> GAUSS_SHIFT];
 +}
 +
 +
index cfee70125aadaa1f518b3b353978d06273842dd1,0000000000000000000000000000000000000000..6fff42416ee209cfcace96d88f900b1fb8ea2cf4
mode 100644,000000..100644
--- /dev/null
@@@ -1,1154 -1,0 +1,1156 @@@
-     gmx_residuetype_t rt;
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +#include <string.h>
++#include <assert.h>
 +#include "sysstuff.h"
 +#include "strdb.h"
 +#include "futil.h"
 +#include "macros.h"
 +#include "names.h"
 +#include "string2.h"
 +#include "statutil.h"
 +#include "confio.h"
 +#include "main.h"
 +#include "copyrite.h"
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "invblock.h"
 +#include "macros.h"
 +#include "index.h"
 +#include "txtdump.h"
 +#include "gmxfio.h"
 +
 +
 +
 +const char gmx_residuetype_undefined[]="Other";
 +
 +struct gmx_residuetype
 +{
 +    int      n; 
 +    char **  resname;
 +    char **  restype;
 +    
 +};
 +
 +
 +static gmx_bool gmx_ask_yesno(gmx_bool bASK)
 +{
 +  char c;
 +
 +  if (bASK) {
 +    do {
 +      c=toupper(fgetc(stdin));
 +    } while ((c != 'Y') && (c != 'N'));
 +
 +    return (c == 'Y');
 +  }
 +  else
 +    return FALSE;
 +}
 +
 +t_blocka *new_blocka(void)
 +{
 +  t_blocka *block;
 +
 +  snew(block,1);
 +  snew(block->index,1);
 +
 +  return block;
 +}
 +
 +void write_index(const char *outf, t_blocka *b,char **gnames)
 +{
 +  FILE *out;
 +  int  i,j,k;
 +
 +  out=gmx_fio_fopen(outf,"w");
 +  /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 +  for(i=0; (i<b->nr); i++) {
 +    fprintf(out,"[ %s ]\n",gnames[i]);
 +    for(k=0,j=b->index[i]; j<b->index[i+1]; j++,k++) {
 +      fprintf(out,"%4d ",b->a[j]+1);
 +      if ((k % 15) == 14)
 +      fprintf(out,"\n");
 +    }
 +    fprintf(out,"\n");
 +  }
 +  gmx_fio_fclose(out);
 +}
 +
 +void add_grp(t_blocka *b,char ***gnames,int nra,atom_id a[],const char *name)
 +{
 +  int i;
 +
 +  srenew(b->index,b->nr+2);
 +  srenew(*gnames,b->nr+1);
 +  (*gnames)[b->nr]=strdup(name);
 +
 +  srenew(b->a,b->nra+nra);
 +  for(i=0; (i<nra); i++)
 +    b->a[b->nra++]=a[i];
 +  b->nr++;
 +  b->index[b->nr]=b->nra;
 +}
 +
 +/* compare index in `a' with group in `b' at `index', 
 +   when `index'<0 it is relative to end of `b' */
 +static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 +{
 +  int i;
 +  
 +  if (index < 0)
 +    index = b->nr-1+index;
 +  if (index >= b->nr)
 +    gmx_fatal(FARGS,"no such index group %d in t_blocka (nr=%d)",index,b->nr);
 +  /* compare sizes */
 +  if ( nra != b->index[index+1] - b->index[index] )
 +    return FALSE;
 +  for(i=0; i<nra; i++)
 +    if ( a[i] != b->a[b->index[index]+i] )
 +      return FALSE;
 +  return TRUE;
 +}
 +
 +static void 
 +p_status(const char **restype, int nres, const char **typenames, int ntypes)
 +{
 +    int i,j;
 +    int found;
 +    
 +    int * counter;
 +    
 +    snew(counter,ntypes);
 +    for(i=0;i<ntypes;i++)
 +    {
 +        counter[i]=0;
 +    }
 +    for(i=0;i<nres;i++)
 +    {
 +        found=0;
 +        for(j=0;j<ntypes;j++)
 +        {
 +            if(!gmx_strcasecmp(restype[i],typenames[j]))
 +            {
 +                counter[j]++;
 +            }
 +        }
 +    }
 +    
 +    for(i=0; (i<ntypes); i++) 
 +    {
 +        if (counter[i] > 0)
 +        {
 +            printf("There are: %5d %10s residues\n",counter[i],typenames[i]);
 +        }
 +    }
 +
 +    sfree(counter);
 +}
 +
 +
 +atom_id *
 +mk_aid(t_atoms *atoms,const char ** restype,const char * typestring,int *nra,gmx_bool bMatch)
 +/* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 +{
 +    atom_id *a;
 +    int     i;
 +    int     res;
 +    
 +    snew(a,atoms->nr);
 +    *nra=0;
 +    for(i=0; (i<atoms->nr); i++) 
 +    {
 +        res=!gmx_strcasecmp(restype[atoms->atom[i].resind],typestring);
 +        if(bMatch==FALSE)
 +        {
 +            res=!res;
 +        }
 +        if(res)
 +        {
 +            a[(*nra)++]=i;
 +        }
 +    }
 +  
 +    return a;
 +}
 +
 +typedef struct {
 +  char *rname;
 +  gmx_bool bNeg;
 +  char *gname;
 +} restp_t;
 +
 +static void analyse_other(const char ** restype,t_atoms *atoms,
 +                        t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 +{
 +  restp_t *restp=NULL;
 +  char **attp=NULL;
 +  char *rname,*aname;
 +  atom_id *other_ndx,*aid,*aaid;
 +  int  i,j,k,l,resind,naid,naaid,natp,nrestp=0;
 +  
 +    for(i=0; (i<atoms->nres); i++)
 +    {
 +        if (gmx_strcasecmp(restype[i],"Protein") && gmx_strcasecmp(restype[i],"DNA") && gmx_strcasecmp(restype[i],"RNA") && gmx_strcasecmp(restype[i],"Water"))
 +        {
 +            break;
 +        }
 +    }
 +  if (i < atoms->nres) {
 +    /* we have others */
 +    if (bVerb)
 +      printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 +    snew(other_ndx,atoms->nr);
 +    for(k=0; (k<atoms->nr); k++) {
 +      resind = atoms->atom[k].resind;
 +      rname = *atoms->resinfo[resind].name;
 +        if (gmx_strcasecmp(restype[resind],"Protein") && gmx_strcasecmp(restype[resind],"DNA") && 
 +            gmx_strcasecmp(restype[resind],"RNA") && gmx_strcasecmp(restype[resind],"Water")) 
 +        {
 +
 +      for(l=0; (l<nrestp); l++)
 +        if (strcmp(restp[l].rname,rname) == 0)
 +          break;
 +      if (l==nrestp) {
 +        srenew(restp,nrestp+1);
 +        restp[nrestp].rname = strdup(rname);
 +        restp[nrestp].bNeg  = FALSE;
 +        restp[nrestp].gname = strdup(rname);
 +        nrestp++;
 +        
 +    }
 +      }
 +    }
 +    for(i=0; (i<nrestp); i++) {
 +      snew(aid,atoms->nr);
 +      naid=0;
 +      for(j=0; (j<atoms->nr); j++) {
 +      rname = *atoms->resinfo[atoms->atom[j].resind].name;
 +      if ((strcmp(restp[i].rname,rname) == 0 && !restp[i].bNeg) ||
 +          (strcmp(restp[i].rname,rname) != 0 &&  restp[i].bNeg)) {
 +        aid[naid++] = j;
 +      }
 +      }
 +      add_grp(gb,gn,naid,aid,restp[i].gname);
 +      if (bASK) {
 +      printf("split %s into atoms (y/n) ? ",restp[i].gname);
 +      fflush(stdout);
 +      if (gmx_ask_yesno(bASK)) {
 +        natp=0;
 +        for(k=0; (k<naid); k++) {
 +          aname=*atoms->atomname[aid[k]];
 +          for(l=0; (l<natp); l++)
 +            if (strcmp(aname,attp[l]) == 0)
 +              break;
 +          if (l == natp) {
 +            srenew(attp,++natp);
 +            attp[natp-1]=aname;
 +          }
 +        }
 +        if (natp > 1) {
 +          for(l=0; (l<natp); l++) {
 +            snew(aaid,naid);
 +            naaid=0;
 +            for(k=0; (k<naid); k++) {
 +              aname=*atoms->atomname[aid[k]];
 +              if (strcmp(aname,attp[l])==0) 
 +                aaid[naaid++]=aid[k];
 +            }
 +            add_grp(gb,gn,naaid,aaid,attp[l]);
 +            sfree(aaid);
 +          }
 +        }
 +        sfree(attp);
 +        attp=NULL;
 +      }
 +      sfree(aid);
 +      }
 +    }
 +    sfree(other_ndx);
 +  }
 +}
 +
 +/*! /brief Instances of this struct contain the data necessary to
 + *         construct a single (protein) index group in
 + *         analyse_prot(). */
 +typedef struct gmx_help_make_index_group
 +{
 +  /** The set of atom names that will be used to form this index group */
 +  const char **defining_atomnames;
 +  /** Size of the defining_atomnames array */
 +  const int num_defining_atomnames;
 +  /** Name of this index group */
 +  const char *group_name;
 +  /** Whether the above atom names name the atoms in the group, or
 +      those not in the group */
 +  gmx_bool bTakeComplement;
 +  /** The index in wholename gives the first item in the arrays of
 +     atomnames that should be tested with 'gmx_strncasecmp' in stead of
 +     gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 +     This is comparable to using a '*' wildcard at the end of specific
 +     atom names, but that is more involved to implement...
 +   */
 +  int wholename;
 +  /** Only create this index group if it differs from the one specified in compareto,
 +     where -1 means to always create this group. */
 +  int compareto;
 +} t_gmx_help_make_index_group;
 +
 +static void analyse_prot(const char ** restype,t_atoms *atoms,
 +                       t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 +{
 +  /* lists of atomnames to be used in constructing index groups: */
 +  static const char *pnoh[]    = { "H", "HN" };
 +  static const char *pnodum[]  = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2", 
 +                           "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 +  static const char *calpha[]  = { "CA" };
 +  static const char *bb[]      = { "N","CA","C" };
 +  static const char *mc[]      = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT" };
 +  static const char *mcb[]     = { "N","CA","CB","C","O","O1","O2","OC1","OC2","OT","OXT" };
 +  static const char *mch[]     = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT",
 +                                 "H1","H2","H3","H","HN" };
 +
 +  static const t_gmx_help_make_index_group constructing_data[] =
 +    {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 +     { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 +     { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 +     { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 +     { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 +     { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 +     { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 +     { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 +     { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 +     { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0},
 +    };
 +  const int num_index_groups = asize(constructing_data);
 +
 +  int     n,j;
 +  atom_id *aid;
 +  int     nra,nnpres,npres;
 +  gmx_bool    match;
 +  char    ndx_name[STRLEN],*atnm;
 +  int i;
 +
 +  if (bVerb)
 +  {
 +    printf("Analysing Protein...\n");
 +  }
 +  snew(aid,atoms->nr);
 +
 +  /* calculate the number of protein residues */
 +  npres=0;
 +  for(i=0; (i<atoms->nres); i++) {
 +    if (0 == gmx_strcasecmp(restype[i],"Protein")) {
 +      npres++;
 +    }
 +  }
 +  /* find matching or complement atoms */
 +  for(i=0; (i<(int)num_index_groups); i++) {
 +    nra=0;
 +    for(n=0; (n<atoms->nr); n++) {
 +      if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind],"Protein")) {
 +      match=FALSE;
 +      for(j=0; (j<constructing_data[i].num_defining_atomnames); j++) {
 +        /* skip digits at beginning of atomname, e.g. 1H */
 +        atnm=*atoms->atomname[n];
 +        while (isdigit(atnm[0])) {
 +          atnm++;
 +        }
 +        if ( (constructing_data[i].wholename==-1) || (j<constructing_data[i].wholename) ) {
 +          if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],atnm)) {
 +            match=TRUE;
 +          }
 +        } else {
 +          if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j],atnm,strlen(constructing_data[i].defining_atomnames[j]))) {
 +            match=TRUE;
 +          }
 +        }
 +      }
 +      if (constructing_data[i].bTakeComplement != match) {
 +        aid[nra++]=n;
 +      }
 +      }
 +    }
 +    /* if we want to add this group always or it differs from previous 
 +       group, add it: */
 +    if ( -1 == constructing_data[i].compareto || !grp_cmp(gb,nra,aid,constructing_data[i].compareto-i) ) {
 +      add_grp(gb,gn,nra,aid,constructing_data[i].group_name);
 +    }
 +  }
 +  
 +  if (bASK) {
 +    for(i=0; (i<(int)num_index_groups); i++) {
 +      printf("Split %12s into %5d residues (y/n) ? ",constructing_data[i].group_name,npres);
 +      if (gmx_ask_yesno(bASK)) {
 +      int resind;
 +      nra = 0;
 +      for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 +        resind = atoms->atom[n].resind;
 +        for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++) {
 +          match=FALSE;
 +          for(j=0;(j<constructing_data[i].num_defining_atomnames); j++) {
 +            if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],*atoms->atomname[n])) {
 +              match=TRUE;
 +            }
 +          }
 +          if (constructing_data[i].bTakeComplement != match) {
 +            aid[nra++]=n;
 +          }
 +        }
 +        /* copy the residuename to the tail of the groupname */
 +        if (nra > 0) {
 +          t_resinfo *ri;
 +          ri = &atoms->resinfo[resind];
 +          sprintf(ndx_name,"%s_%s%d%c",
 +                  constructing_data[i].group_name,*ri->name,ri->nr,ri->ic==' ' ? '\0' : ri->ic);
 +          add_grp(gb,gn,nra,aid,ndx_name);
 +          nra = 0;
 +        }
 +      }
 +      } 
 +    }
 +    printf("Make group with sidechain and C=O swapped (y/n) ? ");
 +    if (gmx_ask_yesno(bASK)) {
 +      /* Make swap sidechain C=O index */
 +      int resind,hold;
 +      nra = 0;
 +      for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 +      resind = atoms->atom[n].resind;
 +      hold  = -1;
 +      for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++)
 +        if (strcmp("CA",*atoms->atomname[n]) == 0) {
 +          aid[nra++]=n;
 +          hold=nra;
 +          nra+=2;
 +        } else if (strcmp("C",*atoms->atomname[n]) == 0) {
 +          if (hold == -1) {
 +            gmx_incons("Atom naming problem");
 +          }
 +          aid[hold]=n;
 +        } else if (strcmp("O",*atoms->atomname[n]) == 0) {
 +          if (hold == -1) {
 +            gmx_incons("Atom naming problem");
 +          }
 +          aid[hold+1]=n;
 +        } else if (strcmp("O1",*atoms->atomname[n]) == 0) {
 +          if (hold == -1) {
 +            gmx_incons("Atom naming problem");
 +          }
 +          aid[hold+1]=n;
 +        } else 
 +          aid[nra++]=n;
 +      }
 +      /* copy the residuename to the tail of the groupname */
 +      if (nra > 0) {
 +      add_grp(gb,gn,nra,aid,"SwapSC-CO");
 +      nra = 0;
 +      } 
 +    }
 +  }
 +  sfree(aid);
 +}
 +
 +
 +
 +
 +/* Return 0 if the name was found, otherwise -1.
 + * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 + */
 +int
 +gmx_residuetype_get_type(gmx_residuetype_t rt,const char * resname, const char ** p_restype)
 +{
 +    int    i,rc;
 +    
 +    rc=-1;
 +    for(i=0;i<rt->n && rc;i++)
 +    {
 +        rc=gmx_strcasecmp(rt->resname[i],resname);
 +    }
 +    
 +    *p_restype = (rc==0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 +    
 +    return rc;
 +}
 +
 +int
 +gmx_residuetype_add(gmx_residuetype_t rt,const char *newresname, const char *newrestype)
 +{
 +    int     i;
 +    int     found;
 +    const char *  p_oldtype;
 +    
 +    found = !gmx_residuetype_get_type(rt,newresname,&p_oldtype);
 +    
 +    if(found && gmx_strcasecmp(p_oldtype,newrestype))
 +    {
 +        fprintf(stderr,"Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 +                newresname,p_oldtype,newrestype);
 +    }
 +    
 +    if(found==0)
 +    {
 +        srenew(rt->resname,rt->n+1);
 +        srenew(rt->restype,rt->n+1);
 +        rt->resname[rt->n]=strdup(newresname);
 +        rt->restype[rt->n]=strdup(newrestype);
 +        rt->n++;
 +    }
 +  
 +    return 0;
 +}
 +
 +
 +int
 +gmx_residuetype_init(gmx_residuetype_t *prt)
 +{
 +    FILE *  db;
 +    char    line[STRLEN];
 +    char    resname[STRLEN],restype[STRLEN],dum[STRLEN];
 +    char *  p;
 +    int     i;
 +    struct gmx_residuetype *rt;
 +    
 +    snew(rt,1);
 +    *prt=rt;
 +    
 +    rt->n        = 0;
 +    rt->resname  = NULL;
 +    rt->restype = NULL;
 +    
 +    db=libopen("residuetypes.dat");
 +    
 +    while(get_a_line(db,line,STRLEN)) 
 +    {
 +        strip_comment(line);
 +        trim(line);
 +        if(line[0]!='\0')
 +        {
 +            if(sscanf(line,"%s %s %s",resname,restype,dum)!=2)
 +            {
 +                gmx_fatal(FARGS,"Incorrect number of columns (2 expected) for line in residuetypes.dat");
 +            }
 +            gmx_residuetype_add(rt,resname,restype);
 +        }
 +    }
 +    
 +    fclose(db);
 +    
 +    return 0;
 +}
 +
 +
 +
 +int
 +gmx_residuetype_destroy(gmx_residuetype_t rt)
 +{
 +    int i;
 +    
 +    for(i=0;i<rt->n;i++)
 +    {
 +        free(rt->resname[i]);
 +        free(rt->restype[i]);
 +    }
 +    free(rt);
 +    
 +    return 0;
 +}
 +
 +int
 +gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 +                             const char ***       p_typenames,
 +                             int *                ntypes)
 +{
 +    int      i,j,n;
 +    int      found;
 +    const char **  my_typename;
 +    char *   p;
 +    
 +    n=0;
 +    
 +    my_typename=NULL;
 +    for(i=0;i<rt->n;i++)
 +    {
 +        p=rt->restype[i];
 +        found=0;
 +        for(j=0;j<n && !found;j++)
 +        {
 +            found=!gmx_strcasecmp(p,my_typename[j]);
 +        }
 +        
 +        if(!found)
 +        {
 +            srenew(my_typename,n+1);
 +            my_typename[n]=p;
 +            n++;
 +        }
 +    }
 +    *ntypes=n;
 +    *p_typenames=my_typename; 
 +    
 +    return 0;
 +}
 +    
 +
 +
 +gmx_bool 
 +gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 +{
 +    gmx_bool rc;
 +    const char *p_type;
 +    
 +    if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 +       gmx_strcasecmp(p_type,"Protein")==0)
 +    {
 +        rc=TRUE;
 +    }
 +    else
 +    {
 +        rc=FALSE;
 +    }
 +    return rc;
 +}
 +
 +gmx_bool 
 +gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 +{
 +    gmx_bool rc;
 +    const char *p_type;
 +
 +    if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 +       gmx_strcasecmp(p_type,"DNA")==0)
 +    {
 +        rc=TRUE;
 +    }
 +    else
 +    {
 +        rc=FALSE;
 +    }
 +    return rc;
 +}
 +
 +gmx_bool 
 +gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 +{
 +    gmx_bool rc;
 +    const char *p_type;
 +
 +    if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 +       gmx_strcasecmp(p_type,"RNA")==0)
 +    {
 +        rc=TRUE;
 +    }
 +    else
 +    {
 +        rc=FALSE;
 +    }
 +    return rc;
 +}
 +
 +/* Return the size of the arrays */
 +int
 +gmx_residuetype_get_size(gmx_residuetype_t rt)
 +{
 +    return rt->n;
 +}
 +
 +/* Search for a residuetype with name resnm within the
 + * gmx_residuetype database. Return the index if found,
 + * otherwise -1.
 + */
 +int
 +gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 +{
 +    int i,rc;
 +
 +    rc=-1;
 +    for(i=0;i<rt->n && rc;i++)
 +    {
 +        rc=gmx_strcasecmp(rt->resname[i],resnm);
 +    }
 +
 +    return (0 == rc) ? i-1 : -1;
 +}
 +
 +/* Return the name of the residuetype with the given index, or
 + * NULL if not found. */
 +const char *
 +gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 +{
 +  if(index >= 0 && index < rt->n) {
 +    return rt->resname[index];
 +  } else {
 +    return NULL;
 +  }
 +}
 +
 +
 +
 +void analyse(t_atoms *atoms,t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 +{
++    gmx_residuetype_t rt=NULL;
 +    char    *resnm;
 +    atom_id *aid;
 +    const char **  restype;
 +    int     nra;
 +    int     i,k;
 +    size_t  j;
 +    int     ntypes;
 +    char *  p;
 +    const char ** p_typename;
 +    int     iwater,iion;
 +    int     nwater,nion;
 +    int     found;
 +    
 +    if (bVerb)
 +    {
 +        printf("Analysing residue names:\n");
 +    }
 +    /* Create system group, every single atom */
 +    snew(aid,atoms->nr);
 +    for(i=0;i<atoms->nr;i++)
 +    {
 +        aid[i]=i;
 +    }
 +    add_grp(gb,gn,atoms->nr,aid,"System"); 
 +    sfree(aid);
 +
 +    /* For every residue, get a pointer to the residue type name */
 +    gmx_residuetype_init(&rt);
++    assert(rt);
 +
 +    snew(restype,atoms->nres);
 +    ntypes = 0;
 +    p_typename = NULL;
 +    for(i=0;i<atoms->nres;i++)
 +    {
 +        resnm = *atoms->resinfo[i].name;
 +        gmx_residuetype_get_type(rt,resnm,&(restype[i]));
 +
 +        /* Note that this does not lead to a N*N loop, but N*K, where
 +         * K is the number of residue _types_, which is small and independent of N.
 +         */
 +        found = 0;
 +        for(k=0;k<ntypes && !found;k++)
 +        {
 +            found = !strcmp(restype[i],p_typename[k]);
 +        }
 +        if(!found)
 +        {
 +            srenew(p_typename,ntypes+1);
 +            p_typename[ntypes++] = strdup(restype[i]);
 +        }
 +    }    
 +    
 +    if (bVerb)
 +    {
 +        p_status(restype,atoms->nres,p_typename,ntypes);
 +    }
 +
 +    for(k=0;k<ntypes;k++)
 +    {              
 +        aid=mk_aid(atoms,restype,p_typename[k],&nra,TRUE);
 +
 +        /* Check for special types to do fancy stuff with */
 +        
 +        if(!gmx_strcasecmp(p_typename[k],"Protein") && nra>0)
 +        {
 +            sfree(aid);
 +            /* PROTEIN */
 +            analyse_prot(restype,atoms,gb,gn,bASK,bVerb);
 +            
 +            /* Create a Non-Protein group */
 +            aid=mk_aid(atoms,restype,"Protein",&nra,FALSE);
 +            if ((nra > 0) && (nra < atoms->nr))
 +            {
 +                add_grp(gb,gn,nra,aid,"non-Protein"); 
 +            }
 +            sfree(aid);
 +        }
 +        else if(!gmx_strcasecmp(p_typename[k],"Water") && nra>0)
 +        {
 +            add_grp(gb,gn,nra,aid,p_typename[k]); 
 +            /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 +            add_grp(gb,gn,nra,aid,"SOL"); 
 +
 +            sfree(aid);
 +
 +            /* Solvent, create a negated group too */
 +            aid=mk_aid(atoms,restype,"Water",&nra,FALSE);
 +            if ((nra > 0) && (nra < atoms->nr))
 +            {
 +                add_grp(gb,gn,nra,aid,"non-Water"); 
 +            }
 +            sfree(aid);
 +        }
 +        else if(nra>0)
 +        {
 +            /* Other groups */
 +            add_grp(gb,gn,nra,aid,p_typename[k]); 
 +            sfree(aid);
 +            analyse_other(restype,atoms,gb,gn,bASK,bVerb);
 +        }
 +    }
 +    
 +    sfree(p_typename);
 +    sfree(restype);
 +    gmx_residuetype_destroy(rt);      
 +    
 +    /* Create a merged water_and_ions group */
 +    iwater = -1;
 +    iion   = -1;
 +    nwater = 0;
 +    nion   = 0;
 +        
 +    for(i=0;i<gb->nr;i++)
 +    {        
 +        if(!gmx_strcasecmp((*gn)[i],"Water"))
 +        {
 +            iwater = i;
 +            nwater = gb->index[i+1]-gb->index[i];
 +        }
 +        else if(!gmx_strcasecmp((*gn)[i],"Ion"))
 +        {
 +            iion = i;
 +            nion = gb->index[i+1]-gb->index[i];
 +        }
 +    }
 +    
 +    if(nwater>0 && nion>0)
 +    {
 +        srenew(gb->index,gb->nr+2);
 +        srenew(*gn,gb->nr+1);
 +        (*gn)[gb->nr] = strdup("Water_and_ions");
 +        srenew(gb->a,gb->nra+nwater+nion);
 +        if(nwater>0)
 +        {
 +            for(i=gb->index[iwater];i<gb->index[iwater+1];i++)
 +            {
 +                gb->a[gb->nra++] = gb->a[i];
 +            }
 +        }
 +        if(nion>0)
 +        {
 +            for(i=gb->index[iion];i<gb->index[iion+1];i++)
 +            {
 +                gb->a[gb->nra++] = gb->a[i];
 +            }
 +        }
 +        gb->nr++;
 +        gb->index[gb->nr]=gb->nra;
 +    }
 +}
 +
 +
 +void check_index(char *gname,int n,atom_id index[],char *traj,int natoms)
 +{
 +  int i;
 +  
 +  for(i=0; i<n; i++)
 +    if (index[i] >= natoms)
 +      gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 +                gname ? gname : "Index",i+1, index[i]+1,
 +                traj ? traj : "the trajectory",natoms);
 +    else if (index[i] < 0)
 +      gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is less than zero",
 +              gname ? gname : "Index",i+1, index[i]+1);
 +}
 +
 +t_blocka *init_index(const char *gfile, char ***grpname)
 +{
 +  FILE     *in;
 +  t_blocka  *b;
 +  int      a,maxentries;
 +  int      i,j,ng,nread;
 +  char     line[STRLEN],*pt,str[STRLEN];
 +
 +  in=gmx_fio_fopen(gfile,"r");
 +  snew(b,1);
 +  get_a_line(in,line,STRLEN);
 +  if ( line[0]=='[' ) {
 +    /* new format */
 +    b->nr=0;
 +    b->index=NULL;
 +    b->nra=0;
 +    b->a=NULL;
 +    *grpname=NULL;
 +    maxentries=0;
 +    do {
 +      if (get_header(line,str)) {
 +      b->nr++;
 +      srenew(b->index,b->nr+1);
 +      srenew(*grpname,b->nr);
 +      if (b->nr==1)
 +        b->index[0]=0;
 +      b->index[b->nr]=b->index[b->nr-1];
 +      (*grpname)[b->nr-1]=strdup(str);
 +      } else {
 +      pt=line;
 +      while (sscanf(pt,"%s",str) == 1) {
 +        i=b->index[b->nr];
 +        if (i>=maxentries) {
 +          maxentries+=1024;
 +          srenew(b->a,maxentries);
 +        }
 +        b->a[i]=strtol(str, NULL, 10)-1;
 +        b->index[b->nr]++;
 +        (b->nra)++;
 +        pt=strstr(pt,str)+strlen(str);
 +      }
 +      }
 +    } while (get_a_line(in,line,STRLEN));
 +  } 
 +  else {
 +    /* old format */
 +    sscanf(line,"%d%d",&b->nr,&b->nra);
 +    snew(b->index,b->nr+1);
 +    snew(*grpname,b->nr);
 +    b->index[0]=0;
 +    snew(b->a,b->nra);
 +    for (i=0; (i<b->nr); i++) {
 +      nread=fscanf(in,"%s%d",str,&ng);
 +      (*grpname)[i]=strdup(str);
 +      b->index[i+1]=b->index[i]+ng;
 +      if (b->index[i+1] > b->nra)
 +      gmx_fatal(FARGS,"Something wrong in your indexfile at group %s",str);
 +      for(j=0; (j<ng); j++) {
 +      nread=fscanf(in,"%d",&a);
 +      b->a[b->index[i]+j]=a;
 +      }
 +    }
 +  }
 +  gmx_fio_fclose(in);
 +
 +  for(i=0; (i<b->nr); i++) {
 +    for(j=b->index[i]; (j<b->index[i+1]); j++) {
 +      if (b->a[j] < 0) 
 +      fprintf(stderr,"\nWARNING: negative index %d in group %s\n\n",
 +              b->a[j],(*grpname)[i]);
 +    }
 +  }
 +  
 +  return b;
 +}
 +
 +static void minstring(char *str)
 +{
 +  int i;
 +
 +  for (i=0; (i < (int)strlen(str)); i++) 
 +    if (str[i]=='-')
 +      str[i]='_';
 +}
 +
 +int find_group(char s[], int ngrps, char **grpname)
 +{
 +  int aa, i, n;
 +  char string[STRLEN];
 +  gmx_bool bMultiple;
 +  
 +  bMultiple = FALSE;
 +  n = strlen(s);
 +  aa=NOTSET;
 +  /* first look for whole name match */
 +  if (aa==NOTSET)
 +    for(i=0; i<ngrps; i++)
 +      if (gmx_strcasecmp_min(s,grpname[i])==0) {
 +      if(aa!=NOTSET)
 +        bMultiple = TRUE;
 +      aa=i;
 +      }
 +  /* second look for first string match */
 +  if (aa==NOTSET)
 +    for(i=0; i<ngrps; i++)
 +      if (gmx_strncasecmp_min(s,grpname[i],n)==0) {
 +      if(aa!=NOTSET)
 +        bMultiple = TRUE;
 +      aa=i;
 +      }
 +  /* last look for arbitrary substring match */
 +  if (aa==NOTSET) {
 +    upstring(s);
 +    minstring(s);
 +    for(i=0; i<ngrps; i++) {
 +      strcpy(string, grpname[i]);
 +      upstring(string);
 +      minstring(string);
 +      if (strstr(string,s)!=NULL) {
 +      if(aa!=NOTSET)
 +        bMultiple = TRUE;
 +      aa=i;
 +      }
 +    }
 +  }
 +  if (bMultiple) {
 +    printf("Error: Multiple groups '%s' selected\n", s);
 +    aa=NOTSET;
 +  }
 +  return aa;
 +}
 +
 +static int qgroup(int *a, int ngrps, char **grpname)
 +{
 +    char s[STRLEN];
 +    int  aa;
 +    gmx_bool bInRange;
 +    char *end;
 +
 +    do {
 +        fprintf(stderr,"Select a group: ");
 +        do {
 +            if ( scanf("%s",s)!=1 ) 
 +                gmx_fatal(FARGS,"Cannot read from input");
 +            trim(s); /* remove spaces */
 +        } while (strlen(s)==0);
 +        aa = strtol(s, &end, 10);
 +        if (aa==0 && end[0] != '\0') /* string entered */
 +            aa = find_group(s, ngrps, grpname);
 +        bInRange = (aa >= 0 && aa < ngrps);
 +        if (!bInRange)
 +            printf("Error: No such group '%s'\n", s);
 +    } while (!bInRange);
 +    printf("Selected %d: '%s'\n", aa, grpname[aa]);
 +    *a = aa;
 +    return aa;
 +}
 +
 +static void rd_groups(t_blocka *grps,char **grpname,char *gnames[],
 +                    int ngrps,int isize[],atom_id *index[],int grpnr[])
 +{
 +  int i,j,gnr1;
 +
 +  if (grps->nr==0)
 +    gmx_fatal(FARGS,"Error: no groups in indexfile");
 +  for(i=0; (i<grps->nr); i++)
 +    fprintf(stderr,"Group %5d (%15s) has %5d elements\n",i,grpname[i],
 +         grps->index[i+1]-grps->index[i]);
 +  for(i=0; (i<ngrps); i++) {
 +    if (grps->nr > 1)
 +      do {
 +      gnr1=qgroup(&grpnr[i], grps->nr, grpname);
 +      if ((gnr1<0) || (gnr1>=grps->nr))
 +        fprintf(stderr,"Select between %d and %d.\n",0,grps->nr-1);
 +      }       while ((gnr1<0) || (gnr1>=grps->nr));
 +    else {
 +      fprintf(stderr,"There is one group in the index\n");
 +      gnr1=0;
 +    }
 +    gnames[i]=strdup(grpname[gnr1]);
 +    isize[i]=grps->index[gnr1+1]-grps->index[gnr1];
 +    snew(index[i],isize[i]);
 +    for(j=0; (j<isize[i]); j++)
 +      index[i][j]=grps->a[grps->index[gnr1]+j];
 +  }
 +}
 +
 +void rd_index(const char *statfile,int ngrps,int isize[],
 +            atom_id *index[],char *grpnames[])
 +{
 +  char    **gnames;
 +  t_blocka *grps;
 +  int     *grpnr;
 +  
 +  snew(grpnr,ngrps);
 +  if (!statfile)
 +    gmx_fatal(FARGS,"No index file specified");
 +  grps=init_index(statfile,&gnames);
 +  rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
 +}
 +
 +void rd_index_nrs(char *statfile,int ngrps,int isize[],
 +                atom_id *index[],char *grpnames[],int grpnr[])
 +{
 +  char    **gnames;
 +  t_blocka *grps;
 +  
 +  if (!statfile)
 +    gmx_fatal(FARGS,"No index file specified");
 +  grps=init_index(statfile,&gnames);
 +  
 +  rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
 +}
 +
 +void get_index(t_atoms *atoms, const char *fnm, int ngrps,
 +             int isize[], atom_id *index[],char *grpnames[])
 +{
 +  char    ***gnames;
 +  t_blocka *grps = NULL; 
 +  int     *grpnr;
 +  
 +  snew(grpnr,ngrps);
 +  snew(gnames,1);
 +  if (fnm != NULL) {
 +    grps=init_index(fnm,gnames);
 +  }
 +  else if (atoms) {
 +    snew(grps,1);
 +    snew(grps->index,1);
 +    analyse(atoms,grps,gnames,FALSE,FALSE);
 +  }
 +  else 
 +    gmx_incons("You need to supply a valid atoms structure or a valid index file name");
 +  
 +  rd_groups(grps,*gnames,grpnames,ngrps,isize,index,grpnr);
 +}
 +
 +t_cluster_ndx *cluster_index(FILE *fplog,const char *ndx)
 +{
 +  t_cluster_ndx *c;
 +  int i;
 +  
 +  snew(c,1);
 +  c->clust     = init_index(ndx,&c->grpname);
 +  c->maxframe = -1;
 +  for(i=0; (i<c->clust->nra); i++)
 +    c->maxframe = max(c->maxframe,c->clust->a[i]);
 +  fprintf(fplog ? fplog : stdout,
 +        "There are %d clusters containing %d structures, highest framenr is %d\n",
 +        c->clust->nr,c->clust->nra,c->maxframe);
 +  if (debug) {
 +    pr_blocka(debug,0,"clust",c->clust,TRUE);
 +    for(i=0; (i<c->clust->nra); i++)
 +      if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
 +      gmx_fatal(FARGS,"Range check error for c->clust->a[%d] = %d\n"
 +                "should be within 0 and %d",i,c->clust->a[i],c->maxframe+1);
 +  }
 +  c->inv_clust=make_invblocka(c->clust,c->maxframe);
 +        
 +  return c;
 +}
 +
Simple merge
index 80f81a5ac8376e982c9613f2cc5c01698a27dd4d,0000000000000000000000000000000000000000..fd74ebb57277f2bf21918497c6fb9167a5e0a8ef
mode 100644,000000..100644
--- /dev/null
@@@ -1,627 -1,0 +1,627 @@@
-   if ((cr) && PAR(cr))
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +#include "gromacs/utility/gmx_header_config.h"
 +
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <limits.h>
 +#include <time.h>
 +
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +
 +#include "smalloc.h"
 +#include "gmx_fatal.h"
 +#include "network.h"
 +#include "main.h"
 +#include "macros.h"
 +#include "futil.h"
 +#include "filenm.h"
 +#include "mdrun.h"
 +#include "gmxfio.h"
 +#include "string2.h"
 +
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +#ifdef GMX_NATIVE_WINDOWS
 +#include <process.h>
 +#endif
 +
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +#define BUFSIZE       1024
 +
 +/* this is not strictly thread-safe, but it's only written to at the beginning
 +   of the simulation, once by each thread with the same value. We assume
 +   that writing to an int is atomic.*/
 +static gmx_bool parallel_env_val;
 +#ifdef GMX_THREAD_MPI
 +tMPI_Thread_mutex_t parallel_env_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +/* returns 1 when running in a parallel environment, so could also be 1 if
 +   mdrun was started with: mpirun -np 1.
 +     
 +   Use this function only to check whether a parallel environment has   
 +   been initialized, for example when checking whether gmx_finalize()   
 +   needs to be called. Use PAR(cr) to check whether the simulation actually
 +   has more than one node/thread.  */
 +gmx_bool gmx_parallel_env_initialized(void)
 +{
 +    gmx_bool ret;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&parallel_env_mutex);
 +#endif
 +    ret=parallel_env_val;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&parallel_env_mutex);
 +#endif
 +    return ret;
 +}
 +
 +static void set_parallel_env(gmx_bool val)
 +{
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&parallel_env_mutex);
 +#endif
 +    if (!parallel_env_val)
 +    {
 +        /* we only allow it to be set, not unset */
 +        parallel_env_val=val;
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&parallel_env_mutex);
 +#endif
 +}
 +
 +
 +static void par_fn(char *base,int ftp,const t_commrec *cr,
 +                 gmx_bool bAppendSimId,gmx_bool bAppendNodeId,
 +                 char buf[],int bufsize)
 +{
 +  int n;
 +  
 +  if((size_t)bufsize<(strlen(base)+10))
 +     gmx_mem("Character buffer too small!");
 +
 +  /* Copy to buf, and strip extension */
 +  strcpy(buf,base);
 +  buf[strlen(base) - strlen(ftp2ext(fn2ftp(base))) - 1] = '\0';
 +
 +  if (bAppendSimId) {
 +    sprintf(buf+strlen(buf),"%d",cr->ms->sim);
 +  }
 +  if (bAppendNodeId) {
 +    strcat(buf,"_node");
 +    sprintf(buf+strlen(buf),"%d",cr->nodeid);
 +  }
 +  strcat(buf,".");
 +  
 +  /* Add extension again */
 +  strcat(buf,(ftp == efTPX) ? "tpr" : (ftp == efEDR) ? "edr" : ftp2ext(ftp));
 +  if (cr->nodeid == 0) {
 +    printf("node %d par_fn '%s'\n",cr->nodeid,buf);
 +    if (fn2ftp(buf) == efLOG) {
 +      printf("log\n");
 +    }
 +  }
 +}
 +
 +void check_multi_int(FILE *log,const gmx_multisim_t *ms,int val,
 +                     const char *name)
 +{
 +  int  *ibuf,p;
 +  gmx_bool bCompatible;
 +
 +  if (NULL != log)
 +      fprintf(log,"Multi-checking %s ... ",name);
 +  
 +  if (ms == NULL)
 +    gmx_fatal(FARGS,
 +            "check_multi_int called with a NULL communication pointer");
 +
 +  snew(ibuf,ms->nsim);
 +  ibuf[ms->sim] = val;
 +  gmx_sumi_sim(ms->nsim,ibuf,ms);
 +  
 +  bCompatible = TRUE;
 +  for(p=1; p<ms->nsim; p++)
 +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +  
 +  if (bCompatible) 
 +  {
 +      if (NULL != log)
 +          fprintf(log,"OK\n");
 +  }
 +  else 
 +  {
 +      if (NULL != log)
 +      {
 +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
 +          for(p=0; p<ms->nsim; p++)
 +              fprintf(log,"  subsystem %d: %d\n",p,ibuf[p]);
 +      }
 +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
 +  }
 +  
 +  sfree(ibuf);
 +}
 +
 +void check_multi_large_int(FILE *log,const gmx_multisim_t *ms,
 +                           gmx_large_int_t val, const char *name)
 +{
 +  gmx_large_int_t  *ibuf;
 +  int p;
 +  gmx_bool bCompatible;
 +
 +  if (NULL != log)
 +      fprintf(log,"Multi-checking %s ... ",name);
 +  
 +  if (ms == NULL)
 +    gmx_fatal(FARGS,
 +            "check_multi_int called with a NULL communication pointer");
 +
 +  snew(ibuf,ms->nsim);
 +  ibuf[ms->sim] = val;
 +  gmx_sumli_sim(ms->nsim,ibuf,ms);
 +  
 +  bCompatible = TRUE;
 +  for(p=1; p<ms->nsim; p++)
 +    bCompatible = bCompatible && (ibuf[p-1] == ibuf[p]);
 +  
 +  if (bCompatible) 
 +  {
 +      if (NULL != log)
 +          fprintf(log,"OK\n");
 +  }
 +  else 
 +  {
 +      if (NULL != log)
 +      {
 +          fprintf(log,"\n%s is not equal for all subsystems\n",name);
 +          for(p=0; p<ms->nsim; p++)
 +          {
 +              char strbuf[255];
 +              /* first make the format string */
 +              snprintf(strbuf, 255, "  subsystem %%d: %s\n", 
 +                       gmx_large_int_pfmt);
 +              fprintf(log,strbuf,p,ibuf[p]);
 +          }
 +      }
 +      gmx_fatal(FARGS,"The %d subsystems are not compatible\n",ms->nsim);
 +  }
 +  
 +  sfree(ibuf);
 +}
 +
 +
 +void gmx_log_open(const char *lognm,const t_commrec *cr,gmx_bool bMasterOnly, 
 +                   unsigned long Flags, FILE** fplog)
 +{
 +    int  len,testlen,pid;
 +    char buf[256],host[256];
 +    time_t t;
 +    char timebuf[STRLEN];
 +    FILE *fp=*fplog;
 +    char *tmpnm;
 +
 +    gmx_bool bAppend = Flags & MD_APPENDFILES;        
 +  
 +    debug_gmx();
 +  
 +    /* Communicate the filename for logfile */
 +    if (cr->nnodes > 1 && !bMasterOnly
 +#ifdef GMX_THREAD_MPI
 +        /* With thread MPI the non-master log files are opened later
 +         * when the files names are already known on all nodes.
 +         */
 +        && FALSE
 +#endif
 +        )
 +    {
 +        if (MASTER(cr))
 +        {
 +            len = strlen(lognm) + 1;
 +        }
 +        gmx_bcast(sizeof(len),&len,cr);
 +        if (!MASTER(cr))
 +        {
 +            snew(tmpnm,len+8);
 +        }
 +        else
 +        {
 +            tmpnm=gmx_strdup(lognm);
 +        }
 +        gmx_bcast(len*sizeof(*tmpnm),tmpnm,cr);
 +    }
 +    else
 +    {
 +        tmpnm=gmx_strdup(lognm);
 +    }
 +  
 +    debug_gmx();
 +
 +    if (!bMasterOnly && !MASTER(cr))
 +    {
 +        /* Since log always ends with '.log' let's use this info */
 +        par_fn(tmpnm,efLOG,cr,FALSE,!bMasterOnly,buf,255);
 +        fp = gmx_fio_fopen(buf, bAppend ? "a+" : "w+" );
 +    }
 +    else if (!bAppend)
 +    {
 +        fp = gmx_fio_fopen(tmpnm, bAppend ? "a+" : "w+" );
 +    }
 +
 +    sfree(tmpnm);
 +
 +    gmx_fatal_set_log_file(fp);
 +  
 +    /* Get some machine parameters */
 +#ifdef HAVE_UNISTD_H
 +    if (gethostname(host,255) != 0)
 +    {
 +        sprintf(host,"unknown");
 +    }
 +#else
 +    sprintf(host,"unknown");
 +#endif  
 +
 +    time(&t);
 +
 +#ifndef NO_GETPID
 +#   ifdef GMX_NATIVE_WINDOWS
 +    pid = _getpid();
 +#   else
 +    pid = getpid();
 +#   endif
 +#else
 +      pid = 0;
 +#endif
 +
 +    if (bAppend)
 +    {
 +        fprintf(fp,
 +                "\n"
 +                "\n"
 +                "-----------------------------------------------------------\n"
 +                "Restarting from checkpoint, appending to previous log file.\n"
 +                "\n"
 +            );
 +    }
 +      
 +    gmx_ctime_r(&t,timebuf,STRLEN);
 +
 +    fprintf(fp,
 +            "Log file opened on %s"
 +            "Host: %s  pid: %d  nodeid: %d  nnodes:  %d\n",
 +            timebuf,host,pid,cr->nodeid,cr->nnodes);
 +
 +#if (defined BUILD_MACHINE && defined BUILD_TIME && defined BUILD_USER) 
 +    fprintf(fp,
 +            "The Gromacs distribution was built %s by\n"
 +            "%s (%s)\n\n\n",BUILD_TIME,BUILD_USER,BUILD_MACHINE);
 +#endif
 +
 +    fflush(fp);
 +    debug_gmx();
 +
 +    *fplog = fp;
 +}
 +
 +void gmx_log_close(FILE *fp)
 +{
 +  if (fp) {
 +    gmx_fatal_set_log_file(NULL);
 +    gmx_fio_fclose(fp);
 +  }
 +}
 +
 +static void comm_args(const t_commrec *cr,int *argc,char ***argv)
 +{
 +  int i,len;
 +  
++  if (PAR(cr))
 +    gmx_bcast(sizeof(*argc),argc,cr);
 +  
 +  if (!MASTER(cr))
 +    snew(*argv,*argc+1);
 +  fprintf(stderr,"NODEID=%d argc=%d\n",cr->nodeid,*argc);
 +  for(i=0; (i<*argc); i++) {
 +    if (MASTER(cr))
 +      len = strlen((*argv)[i])+1;
 +    gmx_bcast(sizeof(len),&len,cr);
 +    if (!MASTER(cr))
 +      snew((*argv)[i],len);
 +    /*gmx_bcast(len*sizeof((*argv)[i][0]),(*argv)[i],cr);*/
 +    gmx_bcast(len*sizeof(char),(*argv)[i],cr);
 +  }
 +  debug_gmx();
 +}
 +
 +void init_multisystem(t_commrec *cr,int nsim, char **multidirs,
 +                      int nfile, const t_filenm fnm[],gmx_bool bParFn)
 +{
 +    gmx_multisim_t *ms;
 +    int  nnodes,nnodpersim,sim,i,ftp;
 +    char buf[256];
 +#ifdef GMX_MPI
 +    MPI_Group mpi_group_world;
 +#endif  
 +    int *rank;
 +
 +#ifndef GMX_MPI
 +    if (nsim > 1)
 +    {
 +        gmx_fatal(FARGS,"This binary is compiled without MPI support, can not do multiple simulations.");
 +    }
 +#endif
 +
 +    nnodes  = cr->nnodes;
 +    if (nnodes % nsim != 0)
 +    {
 +        gmx_fatal(FARGS,"The number of nodes (%d) is not a multiple of the number of simulations (%d)",nnodes,nsim);
 +    }
 +
 +    nnodpersim = nnodes/nsim;
 +    sim = cr->nodeid/nnodpersim;
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"We have %d simulations, %d nodes per simulation, local simulation is %d\n",nsim,nnodpersim,sim);
 +    }
 +
 +    snew(ms,1);
 +    cr->ms = ms;
 +    ms->nsim = nsim;
 +    ms->sim  = sim;
 +#ifdef GMX_MPI
 +    /* Create a communicator for the master nodes */
 +    snew(rank,ms->nsim);
 +    for(i=0; i<ms->nsim; i++)
 +    {
 +        rank[i] = i*nnodpersim;
 +    }
 +    MPI_Comm_group(MPI_COMM_WORLD,&mpi_group_world);
 +    MPI_Group_incl(mpi_group_world,nsim,rank,&ms->mpi_group_masters);
 +    sfree(rank);
 +    MPI_Comm_create(MPI_COMM_WORLD,ms->mpi_group_masters,
 +                    &ms->mpi_comm_masters);
 +
 +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
 +    /* initialize the MPI_IN_PLACE replacement buffers */
 +    snew(ms->mpb, 1);
 +    ms->mpb->ibuf=NULL;
 +    ms->mpb->libuf=NULL;
 +    ms->mpb->fbuf=NULL;
 +    ms->mpb->dbuf=NULL;
 +    ms->mpb->ibuf_alloc=0;
 +    ms->mpb->libuf_alloc=0;
 +    ms->mpb->fbuf_alloc=0;
 +    ms->mpb->dbuf_alloc=0;
 +#endif
 +
 +#endif
 +
 +    /* Reduce the intra-simulation communication */
 +    cr->sim_nodeid = cr->nodeid % nnodpersim;
 +    cr->nnodes = nnodpersim;
 +#ifdef GMX_MPI
 +    MPI_Comm_split(MPI_COMM_WORLD,sim,cr->sim_nodeid,&cr->mpi_comm_mysim);
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +    cr->nodeid = cr->sim_nodeid;
 +#endif
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"This is simulation %d",cr->ms->sim);
 +        if (PAR(cr))
 +        {
 +            fprintf(debug,", local number of nodes %d, local nodeid %d",
 +                    cr->nnodes,cr->sim_nodeid);
 +        }
 +        fprintf(debug,"\n\n");
 +    }
 +
 +    if (multidirs)
 +    {
 +        int ret;
 +        if (debug)
 +        {
 +            fprintf(debug,"Changing to directory %s\n",multidirs[cr->ms->sim]);
 +        }
 +        gmx_chdir(multidirs[cr->ms->sim]);
 +    }
 +    else if (bParFn)
 +    {
 +        /* Patch output and tpx, cpt and rerun input file names */
 +        for(i=0; (i<nfile); i++)
 +        {
 +            /* Because of possible multiple extensions per type we must look 
 +             * at the actual file name 
 +             */
 +            if (is_output(&fnm[i]) ||
 +                fnm[i].ftp == efTPX || fnm[i].ftp == efCPT ||
 +                strcmp(fnm[i].opt,"-rerun") == 0)
 +            {
 +                ftp = fn2ftp(fnm[i].fns[0]);
 +                par_fn(fnm[i].fns[0],ftp,cr,TRUE,FALSE,buf,255);
 +                sfree(fnm[i].fns[0]);
 +                fnm[i].fns[0] = gmx_strdup(buf);
 +            }
 +        }
 +    }
 +}
 +
 +t_commrec *init_par(int *argc,char ***argv_ptr)
 +{
 +    t_commrec *cr;
 +    char      **argv;
 +    int       i;
 +    gmx_bool      pe=FALSE;
 +
 +    snew(cr,1);
 +
 +    argv = *argv_ptr;
 +
 +#ifdef GMX_MPI
 +#ifdef GMX_LIB_MPI
 +    pe = TRUE;
 +#ifdef GMX_CHECK_MPI_ENV
 +    /* Do not use MPI calls when env.var. GMX_CHECK_MPI_ENV is not set */
 +    if (getenv(GMX_CHECK_MPI_ENV) == NULL)
 +        pe = FALSE;
 +#endif /* GMX_CHECK_MPI_ENV */
 +#endif /* GMX_LIB_MPI  */
 +    set_parallel_env(pe);
 +    if (pe) {
 +        cr->sim_nodeid = gmx_setup(argc,argv,&cr->nnodes);
 +    } else {
 +        cr->nnodes     = 1;
 +        cr->sim_nodeid = 0;
 +    }
 +#else /* GMX_MPI */
 +    pe=FALSE;
 +    set_parallel_env(pe);
 +    cr->sim_nodeid   = 0;
 +    cr->nnodes       = 1;
 +#endif /* GMX_MPI */
 +
 +    if (!PAR(cr) && (cr->sim_nodeid != 0))
 +        gmx_comm("(!PAR(cr) && (cr->sim_nodeid != 0))");
 +
 +    if (PAR(cr)) 
 +    {
 +#ifdef GMX_MPI
 +        cr->mpi_comm_mysim = MPI_COMM_WORLD;
 +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +#endif /* GMX_MPI */
 +    }
 +    cr->nodeid = cr->sim_nodeid;
 +
 +    cr->duty = (DUTY_PP | DUTY_PME);
 +
 +    /* Communicate arguments if parallel */
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +        comm_args(cr,argc,argv_ptr);
 +#endif /* GMX_THREAD_MPI */
 +
 +#ifdef GMX_MPI
 +#if !defined(GMX_THREAD_MPI) && !defined(MPI_IN_PLACE_EXISTS)
 +  /* initialize the MPI_IN_PLACE replacement buffers */
 +  snew(cr->mpb, 1);
 +  cr->mpb->ibuf=NULL;
 +  cr->mpb->libuf=NULL;
 +  cr->mpb->fbuf=NULL;
 +  cr->mpb->dbuf=NULL;
 +  cr->mpb->ibuf_alloc=0;
 +  cr->mpb->libuf_alloc=0;
 +  cr->mpb->fbuf_alloc=0;
 +  cr->mpb->dbuf_alloc=0;
 +#endif
 +#endif
 +
 +    return cr;
 +}
 +
 +t_commrec *init_par_threads(const t_commrec *cro)
 +{
 +#ifdef GMX_THREAD_MPI
 +    int initialized;
 +    t_commrec *cr;
 +
 +    /* make a thread-specific commrec */
 +    snew(cr,1);
 +    /* now copy the whole thing, so settings like the number of PME nodes
 +       get propagated. */
 +    *cr=*cro;
 +
 +    /* and we start setting our own thread-specific values for things */
 +    MPI_Initialized(&initialized);
 +    if (!initialized)
 +        gmx_comm("Initializing threads without comm");
 +    set_parallel_env(TRUE);
 +    /* once threads will be used together with MPI, we'll
 +       fill the cr structure with distinct data here. This might even work: */
 +    cr->sim_nodeid = gmx_setup(0,NULL, &cr->nnodes);
 +
 +    cr->mpi_comm_mysim = MPI_COMM_WORLD;
 +    cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
 +    cr->nodeid = cr->sim_nodeid;
 +    cr->duty = (DUTY_PP | DUTY_PME);
 +
 +    return cr;
 +#else
 +    return NULL;
 +#endif
 +}
 +
 +
 +t_commrec *init_cr_nopar(void)
 +{
 +    t_commrec *cr;
 +
 +    snew(cr,1);
 +
 +    cr->nnodes     = 1; 
 +    /* cr->nthreads   = 1; */
 +    cr->sim_nodeid = 0;
 +    cr->nodeid     = 0;
 +    /* cr->threadid   = 0; */
 +    cr->duty       = (DUTY_PP | DUTY_PME);
 +
 +    return cr;
 +}
index 47d69855e421e21dab0409078f61e3906d415700,0000000000000000000000000000000000000000..0e245660d9b0c91e46cbf0a3891ac03ccca50a28
mode 100644,000000..100644
--- /dev/null
@@@ -1,818 -1,0 +1,819 @@@
-     if (!bCanTime || select == NULL || strcmp(time_list[i], select) != 0)
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + 
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <ctype.h>
 +#include <assert.h>
 +#include "copyrite.h"
 +#include "sysstuff.h"
 +#include "macros.h"
 +#include "string2.h"
 +#include "smalloc.h"
 +#include "pbc.h"
 +#include "statutil.h"
 +#include "names.h"
 +#include "vec.h"
 +#include "futil.h"
 +#include "wman.h"
 +#include "tpxio.h"
 +#include "gmx_fatal.h"
 +#include "network.h"
 +#include "vec.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +
 +#ifdef GMX_THREAD_MPI
 +#include "thread_mpi.h"
 +#endif
 +
 +#ifdef HAVE_UNISTD_H
 +#include <unistd.h>
 +#endif
 +
 +/* used for npri */
 +#ifdef __sgi
 +#include <sys/schedctl.h>
 +#include <sys/sysmp.h>
 +#endif
 +
 +/* The source code in this file should be thread-safe. 
 +      Please keep it that way. */
 +
 +/******************************************************************
 + *
 + *             T R A J E C T O R Y   S T U F F
 + *
 + ******************************************************************/
 +
 +/* inherently globally shared names: */
 +static const char *program_name=NULL;
 +static char *cmd_line=NULL;
 +
 +#ifdef GMX_THREAD_MPI
 +/* For now, some things here are simply not re-entrant, so
 +   we have to actively lock them. */
 +static tMPI_Thread_mutex_t init_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
 +#endif
 +
 +
 +/****************************************************************
 + *
 + *            E X P O R T E D   F U N C T I O N S
 + *
 + ****************************************************************/
 +
 +
 +/* progam names, etc. */
 +
 +const char *ShortProgram(void)
 +{
 +    const char *pr,*ret;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&init_mutex);
 +#endif
 +    pr=ret=program_name; 
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&init_mutex);
 +#endif
 +    if ((pr=strrchr(ret,DIR_SEPARATOR)) != NULL)
 +        ret=pr+1;
 +    return ret;
 +}
 +
 +const char *Program(void)
 +{
 +    const char *ret;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&init_mutex);
 +#endif
 +    ret=program_name; 
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&init_mutex);
 +#endif
 +    return ret;
 +}
 +
 +const char *command_line(void)
 +{
 +    const char *ret;
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&init_mutex);
 +#endif
 +    ret=cmd_line; 
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&init_mutex);
 +#endif
 +    return ret;
 +}
 +
 +void set_program_name(const char *argvzero)
 +{
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&init_mutex);
 +#endif
 +    if (program_name == NULL)
 +    {
 +        program_name = strdup(argvzero);
 +    }
 +    if (program_name == NULL)
 +        program_name="GROMACS";
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&init_mutex);
 +#endif
 +}
 +
 +
 +void set_command_line(int argc, char *argv[])
 +{
 +    int i;
 +    size_t cmdlength;
 +
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_lock(&init_mutex);
 +#endif
 +    if (cmd_line==NULL)
 +    {
 +        cmdlength = strlen(argv[0]);
 +        for (i=1; i<argc; i++) 
 +        {
 +            cmdlength += strlen(argv[i]);
 +        }
 +        
 +        /* Fill the cmdline string */
 +        snew(cmd_line,cmdlength+argc+1);
 +        for (i=0; i<argc; i++) 
 +        {
 +            strcat(cmd_line,argv[i]);
 +            strcat(cmd_line," ");
 +        }
 +    }
 +#ifdef GMX_THREAD_MPI
 +    tMPI_Thread_mutex_unlock(&init_mutex);
 +#endif
 +
 +}
 +
 +/* utility functions */
 +
 +gmx_bool bRmod_fd(double a, double b, double c, gmx_bool bDouble)
 +{
 +    int iq;
 +    double tol;
 +    
 +    tol = 2*(bDouble ? GMX_DOUBLE_EPS : GMX_FLOAT_EPS);
 +    
 +    iq = (a - b + tol*a)/c;
 +    
 +    if (fabs(a - b - c*iq) <= tol*fabs(a))
 +        return TRUE;
 +    else
 +        return FALSE;
 +}
 +
 +int check_times2(real t,real t0,real tp, real tpp, gmx_bool bDouble)
 +{
 +    int  r;
 +    real margin;
 +    
 +#ifndef GMX_DOUBLE
 +    /* since t is float, we can not use double precision for bRmod */
 +    bDouble = FALSE;
 +#endif
 +    
 +    if (t-tp>0 && tp-tpp>0)
 +        margin = 0.1*min(t-tp,tp-tpp);
 +    else
 +        margin = 0;
 +    
 +    r=-1;
 +    if ((!bTimeSet(TBEGIN) || (t >= rTimeValue(TBEGIN)))  &&
 +        (!bTimeSet(TEND)   || (t <= rTimeValue(TEND)))) {
 +        if (bTimeSet(TDELTA) && !bRmod_fd(t,t0,rTimeValue(TDELTA),bDouble))
 +            r = -1;
 +        else
 +            r = 0;
 +    }
 +    else if (bTimeSet(TEND) && (t >= rTimeValue(TEND)))
 +        r = 1;
 +    if (debug) 
 +        fprintf(debug,"t=%g, t0=%g, b=%g, e=%g, dt=%g: r=%d\n",
 +                t,t0,rTimeValue(TBEGIN),rTimeValue(TEND),rTimeValue(TDELTA),r);
 +    return r;
 +}
 +
 +int check_times(real t)
 +{
 +    return check_times2(t,t,t,t,FALSE);
 +}
 +
 +
 +
 +
 +static void set_default_time_unit(const char *time_list[], gmx_bool bCanTime)
 +{
 +    int i=0,j;
 +    const char *select;
 +
 +    if (bCanTime)
 +    {
 +        select = getenv("GMXTIMEUNIT");
 +        if (select != NULL)
 +        {
 +            i = 1;
 +            while(time_list[i] && strcmp(time_list[i], select) != 0)
 +            {
 +                i++;
 +            }
 +        }
 +    }
++    if (!bCanTime || select == NULL || 
++        time_list[i]==NULL || strcmp(time_list[i], select) != 0)
 +    {
 +        /* Set it to the default: ps */
 +        i = 1;
 +        while(time_list[i] && strcmp(time_list[i], "ps") != 0)
 +        {
 +            i++;
 +        }
 +        
 +    }
 +    time_list[0] = time_list[i];
 +}
 +
 +
 +static void set_default_xvg_format(const char *xvg_list[])
 +{
 +    int i,j;
 +    const char *select,*tmp;
 +
 +    select = getenv("GMX_VIEW_XVG");
 +    if (select == NULL)
 +    {
 +        /* The default is the first option */
 +        xvg_list[0] = xvg_list[1];
 +    }
 +    else
 +    {
 +        i = 1;
 +        while (xvg_list[i] && strcmp(xvg_list[i], select) != 0)
 +        {
 +            i++;
 +        }
 +        if (xvg_list[i] != NULL)
 +        {
 +            xvg_list[0] = xvg_list[i];
 +        }
 +        else
 +        {
 +            xvg_list[0] = xvg_list[exvgNONE];
 +        }
 +    }
 +}
 +
 +
 +/***** T O P O L O G Y   S T U F F ******/
 +
 +t_topology *read_top(const char *fn,int *ePBC)
 +{
 +    int        epbc,natoms;
 +    t_topology *top;
 +    
 +    snew(top,1);
 +    epbc = read_tpx_top(fn,NULL,NULL,&natoms,NULL,NULL,NULL,top);
 +    if (ePBC)
 +        *ePBC = epbc;
 +    
 +    return top;
 +}
 +
 +/*************************************************************
 + *
 + *           P A R S I N G   S T U F F
 + *
 + *************************************************************/
 +
 +static void usage(const char *type,const char *arg)
 +{
 +    assert(arg);
 +    gmx_fatal(FARGS,"Expected %s argument for option %s\n",type,arg);
 +}
 +
 +int iscan(int argc,char *argv[],int *i)
 +{
 +    int var;
 +    
 +    if (argc > (*i)+1) {
 +        if (!sscanf(argv[++(*i)],"%d",&var))
 +            usage("an integer",argv[(*i)-1]);
 +    } else
 +        usage("an integer",argv[*i]);
 +    
 +    return var;
 +}
 +
 +gmx_large_int_t istepscan(int argc,char *argv[],int *i)
 +{
 +    gmx_large_int_t var;
 +    
 +    if (argc > (*i)+1) {
 +        if (!sscanf(argv[++(*i)],gmx_large_int_pfmt,&var))
 +            usage("an integer",argv[(*i)-1]);
 +    } else
 +        usage("an integer",argv[*i]);
 +    
 +    return var;
 +}
 +
 +double dscan(int argc,char *argv[],int *i)
 +{
 +    double var;
 +    
 +    if (argc > (*i)+1) {
 +        if (!sscanf(argv[++(*i)],"%lf",&var))
 +            usage("a real",argv[(*i)-1]);
 +    } else
 +        usage("a real",argv[*i]);
 +    
 +    return var;
 +}
 +
 +char *sscan(int argc,char *argv[],int *i)
 +{
 +    if (argc > (*i)+1) 
 +    {
 +        if ( (argv[(*i)+1][0]=='-') && (argc > (*i)+2) && 
 +           (argv[(*i)+2][0]!='-') )
 +        {
 +            fprintf(stderr,"Possible missing string argument for option %s\n\n",
 +                    argv[*i]);
 +        }
 +    } 
 +    else
 +        usage("a string",argv[*i]);
 +    
 +    return argv[++(*i)];
 +}
 +
 +int nenum(const char *const enumc[])
 +{
 +    int i;
 +    
 +    i=1;
 +    /* we *can* compare pointers directly here! */
 +    while(enumc[i] && enumc[0]!=enumc[i])
 +        i++;
 +    
 +    return i;
 +}
 +
 +static void pdesc(char *desc)
 +{
 +    char *ptr,*nptr;
 +    
 +    ptr=desc;
 +    if ((int)strlen(ptr) < 70)
 +        fprintf(stderr,"\t%s\n",ptr);
 +    else {
 +        for(nptr=ptr+70; (nptr != ptr) && (!isspace(*nptr)); nptr--)
 +            ;
 +        if (nptr == ptr)
 +            fprintf(stderr,"\t%s\n",ptr);
 +        else {
 +            *nptr='\0';
 +            nptr++;
 +            fprintf(stderr,"\t%s\n",ptr);
 +            pdesc(nptr);
 +        }
 +    }
 +}
 +
 +static FILE *man_file(const output_env_t oenv,const char *mantp)
 +{
 +    FILE   *fp;
 +    char   buf[256];
 +    const char *pr = output_env_get_short_program_name(oenv);
 +    
 +    if (strcmp(mantp,"ascii") != 0)
 +        sprintf(buf,"%s.%s",pr,mantp);
 +    else
 +        sprintf(buf,"%s.txt",pr);
 +    fp = gmx_fio_fopen(buf,"w");
 +    
 +    return fp;
 +}
 +
 +static int add_parg(int npargs,t_pargs *pa,t_pargs *pa_add)
 +{
 +    memcpy(&(pa[npargs]),pa_add,sizeof(*pa_add));
 +    
 +    return npargs+1;
 +}
 +
 +static char *mk_desc(t_pargs *pa, const char *time_unit_str)
 +{
 +    char *newdesc=NULL,*ndesc=NULL,*nptr=NULL;
 +    const char*ptr=NULL;
 +    int  len,k;
 +    
 +    /* First compute length for description */
 +    len = strlen(pa->desc)+1;
 +    if ((ptr = strstr(pa->desc,"HIDDEN")) != NULL)
 +        len += 4;
 +    if (pa->type == etENUM) {
 +        len += 10;
 +        for(k=1; (pa->u.c[k] != NULL); k++) {
 +            len += strlen(pa->u.c[k])+12;
 +        }
 +    }
 +    snew(newdesc,len);
 +    
 +    /* add label for hidden options */
 +    if (is_hidden(pa)) 
 +        sprintf(newdesc,"[hidden] %s",ptr+6);
 +    else
 +        strcpy(newdesc,pa->desc);
 +    
 +    /* change '%t' into time_unit */
 +#define TUNITLABEL "%t"
 +#define NTUNIT strlen(TUNITLABEL)
 +    if (pa->type == etTIME)
 +        while( (nptr=strstr(newdesc,TUNITLABEL)) != NULL ) {
 +            nptr[0]='\0';
 +            nptr+=NTUNIT;
 +            len+=strlen(time_unit_str)-NTUNIT;
 +            snew(ndesc,len);
 +            strcpy(ndesc,newdesc);
 +            strcat(ndesc,time_unit_str);
 +            strcat(ndesc,nptr);
 +            sfree(newdesc);
 +            newdesc=ndesc;
 +            ndesc=NULL;
 +        }
 +#undef TUNITLABEL
 +#undef NTUNIT
 +    
 +    /* Add extra comment for enumerateds */
 +    if (pa->type == etENUM) {
 +        strcat(newdesc,": ");
 +        for(k=1; (pa->u.c[k] != NULL); k++) {
 +            strcat(newdesc,"[TT]");
 +            strcat(newdesc,pa->u.c[k]);
 +            strcat(newdesc,"[tt]");
 +            /* Print a comma everywhere but at the last one */
 +            if (pa->u.c[k+1] != NULL) {
 +                if (pa->u.c[k+2] == NULL)
 +                    strcat(newdesc," or ");
 +                else
 +                    strcat(newdesc,", ");
 +            }
 +        }
 +    }
 +    return newdesc;
 +}
 +
 +
 +void parse_common_args(int *argc,char *argv[],unsigned long Flags,
 +                     int nfile,t_filenm fnm[],int npargs,t_pargs *pa,
 +                     int ndesc,const char **desc,
 +                     int nbugs,const char **bugs,
 +                       output_env_t *oenv)
 +{
 +    gmx_bool bHelp=FALSE,bHidden=FALSE,bQuiet=FALSE,bVersion=FALSE;
 +    const char *manstr[] = { NULL, "no", "html", "tex", "nroff", "ascii", 
 +                            "completion", "py", "xml", "wiki", NULL };
 +    /* This array should match the order of the enum in oenv.h */
 +    const char *xvg_format[] = { NULL, "xmgrace", "xmgr", "none", NULL };
 +    /* This array should match the order of the enum in oenv.h */
 +    const char *time_units[] = { NULL, "fs", "ps", "ns", "us", "ms", "s", 
 +                                NULL };
 +    int  nicelevel=0,mantp=0,npri=0,debug_level=0,verbose_level=0;
 +    char *deffnm=NULL;
 +    real tbegin=0,tend=0,tdelta=0;
 +    gmx_bool bView=FALSE;
 +    
 +    t_pargs *all_pa=NULL;
 +    
 +    t_pargs npri_pa   = { "-npri", FALSE, etINT,   {&npri},
 +    "HIDDEN Set non blocking priority (try 128)" };
 +    t_pargs nice_pa   = { "-nice", FALSE, etINT,   {&nicelevel}, 
 +    "Set the nicelevel" };
 +    t_pargs deffnm_pa = { "-deffnm", FALSE, etSTR, {&deffnm}, 
 +    "Set the default filename for all file options" };
 +    t_pargs begin_pa  = { "-b",    FALSE, etTIME,  {&tbegin},        
 +    "First frame (%t) to read from trajectory" };
 +    t_pargs end_pa    = { "-e",    FALSE, etTIME,  {&tend},        
 +    "Last frame (%t) to read from trajectory" };
 +    t_pargs dt_pa     = { "-dt",   FALSE, etTIME,  {&tdelta},        
 +    "Only use frame when t MOD dt = first time (%t)" };
 +    t_pargs view_pa   = { "-w",    FALSE, etBOOL,  {&bView},
 +    "View output [TT].xvg[tt], [TT].xpm[tt], [TT].eps[tt] and [TT].pdb[tt] files" };
 +    t_pargs xvg_pa    = { "-xvg",  FALSE, etENUM,  {xvg_format},
 +    "xvg plot formatting" };
 +    t_pargs time_pa   = { "-tu",   FALSE, etENUM,  {time_units},
 +    "Time unit" };
 +    /* Maximum number of extra arguments */
 +#define EXTRA_PA 16
 +    
 +    t_pargs pca_pa[] = {
 +      { "-h",    FALSE, etBOOL, {&bHelp},     
 +      "Print help info and quit" }, 
 +      { "-version",  FALSE, etBOOL, {&bVersion},     
 +      "Print version info and quit" }, 
 +      { "-verb",    FALSE,  etINT, {&verbose_level},
 +      "HIDDENLevel of verbosity for this program" },
 +      { "-hidden", FALSE, etBOOL, {&bHidden},
 +        "HIDDENPrint hidden options" },
 +      { "-quiet",FALSE, etBOOL, {&bQuiet},
 +        "HIDDENDo not print help info" },
 +      { "-man",  FALSE, etENUM,  {manstr},
 +        "HIDDENWrite manual and quit" },
 +      { "-debug",FALSE, etINT, {&debug_level},
 +        "HIDDENWrite file with debug information, 1: short, 2: also x and f" },
 +    };
 +#define NPCA_PA asize(pca_pa)
 +    FILE *fp;  
 +    gmx_bool bPrint,bExit,bXvgr;
 +    int  i,j,k,npall,max_pa,cmdlength;
 +    char *ptr,*newdesc;
 +    const char *envstr;
 +    
 +#define FF(arg) ((Flags & arg)==arg)
 +
 +    cmdlength = strlen(argv[0]);
 +    /* Check for double arguments */
 +    for (i=1; (i<*argc); i++) 
 +    {
 +        cmdlength += strlen(argv[i]);
 +        if (argv[i] && (strlen(argv[i]) > 1) && (!isdigit(argv[i][1]))) 
 +        {
 +            for (j=i+1; (j<*argc); j++) 
 +            {
 +                if ( (argv[i][0]=='-') && (argv[j][0]=='-') && 
 +                    (strcmp(argv[i],argv[j])==0) ) 
 +                {
 +                    if (FF(PCA_NOEXIT_ON_ARGS))
 +                        fprintf(stderr,"Double command line argument %s\n",
 +                                argv[i]);
 +                    else
 +                        gmx_fatal(FARGS,"Double command line argument %s\n",
 +                                  argv[i]);
 +                }
 +            }
 +        }
 +    }
 +    debug_gmx();
 +    set_program_name(argv[0]);
 +    set_command_line(*argc, argv);
 +      
 +    /* Handle the flags argument, which is a bit field 
 +     * The FF macro returns whether or not the bit is set
 +     */
 +    bPrint        = !FF(PCA_SILENT);
 +    
 +    /* Check ALL the flags ... */
 +    max_pa = NPCA_PA + EXTRA_PA + npargs+1;
 +    snew(all_pa,max_pa);
 +    
 +    for(i=npall=0; (i<NPCA_PA); i++)
 +        npall = add_parg(npall,all_pa,&(pca_pa[i]));
 +    
 +#ifdef __sgi
 +    envstr = getenv("GMXNPRIALL");
 +    if (envstr)
 +        npri=strtol(envstr,NULL,10);
 +    if (FF(PCA_BE_NICE)) {
 +        envstr = getenv("GMXNPRI");
 +        if (envstr)
 +            npri=strtol(envstr,NULL,10);
 +    }
 +    npall = add_parg(npall,all_pa,&npri_pa);
 +#endif
 +    
 +    if (FF(PCA_BE_NICE)) 
 +        nicelevel=19;
 +    npall = add_parg(npall,all_pa,&nice_pa);
 +    
 +    if (FF(PCA_CAN_SET_DEFFNM)) 
 +        npall = add_parg(npall,all_pa,&deffnm_pa);   
 +    if (FF(PCA_CAN_BEGIN)) 
 +        npall = add_parg(npall,all_pa,&begin_pa);
 +    if (FF(PCA_CAN_END))
 +        npall = add_parg(npall,all_pa,&end_pa);
 +    if (FF(PCA_CAN_DT))
 +    {
 +        npall = add_parg(npall,all_pa,&dt_pa);
 +    }
 +    if (FF(PCA_TIME_UNIT)) {
 +        npall = add_parg(npall,all_pa,&time_pa);
 +    } 
 +    if (FF(PCA_CAN_VIEW)) 
 +        npall = add_parg(npall,all_pa,&view_pa);
 +    
 +    bXvgr = FALSE;
 +    for(i=0; (i<nfile); i++)
 +    {
 +        bXvgr = bXvgr ||  (fnm[i].ftp == efXVG);
 +    }
 +    if (bXvgr)
 +    {
 +        npall = add_parg(npall,all_pa,&xvg_pa);
 +    }
 +    
 +    /* Now append the program specific arguments */
 +    for(i=0; (i<npargs); i++)
 +        npall = add_parg(npall,all_pa,&(pa[i]));
 +    
 +    /* set etENUM options to default */
 +    for(i=0; (i<npall); i++)
 +    {
 +        if (all_pa[i].type==etENUM)
 +        {
 +            all_pa[i].u.c[0]=all_pa[i].u.c[1];
 +        }
 +    }
 +    set_default_time_unit(time_units,FF(PCA_TIME_UNIT));
 +    set_default_xvg_format(xvg_format);
 +  
 +    /* Now parse all the command-line options */
 +    get_pargs(argc,argv,npall,all_pa,FF(PCA_KEEP_ARGS));
 +
 +    /* set program name, command line, and default values for output options */
 +    output_env_init(oenv, *argc, argv, (time_unit_t)nenum(time_units), bView,
 +                    (xvg_format_t)nenum(xvg_format), verbose_level, debug_level);
 + 
 +    if (bVersion) {
 +      printf("Program: %s\n",output_env_get_program_name(*oenv));
 +      gmx_print_version_info(stdout);
 +      exit(0);
 +    }
 +    
 +    if (FF(PCA_CAN_SET_DEFFNM) && (deffnm!=NULL))
 +        set_default_file_name(deffnm);
 +    
 +    /* Parse the file args */
 +    parse_file_args(argc,argv,nfile,fnm,FF(PCA_KEEP_ARGS),!FF(PCA_NOT_READ_NODE));
 +    
 +    /* Open the debug file */
 +    if (debug_level > 0) {
 +        char buf[256];
 +        
 +        if (gmx_mpi_initialized())
 +            sprintf(buf,"%s%d.debug",output_env_get_short_program_name(*oenv),
 +                    gmx_node_rank());
 +        else
 +            sprintf(buf,"%s.debug",output_env_get_short_program_name(*oenv));
 +        
 +        init_debug(debug_level,buf);
 +        fprintf(stderr,"Opening debug file %s (src code file %s, line %d)\n",
 +                buf,__FILE__,__LINE__);
 +    }
 +    
 +    /* Now copy the results back... */
 +    for(i=0,k=npall-npargs; (i<npargs); i++,k++) 
 +        memcpy(&(pa[i]),&(all_pa[k]),(size_t)sizeof(pa[i]));
 +
 +
 +    for(i=0; (i<npall); i++)
 +        all_pa[i].desc = mk_desc(&(all_pa[i]), output_env_get_time_unit(*oenv));
 +   
 +    bExit = bHelp || (strcmp(manstr[0],"no") != 0);
 +    
 +#if (defined __sgi && USE_SGI_FPE)
 +    doexceptions();
 +#endif
 +    
 +    /* Set the nice level */
 +#ifdef __sgi
 +    if (npri != 0 && !bExit) {
 +        schedctl(MPTS_RTPRI,0,npri);
 +    }
 +#endif 
 +    
 +#ifdef HAVE_UNISTD_H
 +    
 +#ifndef GMX_NO_NICE
 +    /* The some system, e.g. the catamount kernel on cray xt3 do not have nice(2). */
 +    if (nicelevel != 0 && !bExit)
 +    {
 +#ifdef GMX_THREAD_MPI
 +        static gmx_bool nice_set=FALSE; /* only set it once */
 +        tMPI_Thread_mutex_lock(&init_mutex);
 +        if (!nice_set)
 +        {
 +#endif
 +            i=nice(nicelevel); /* assign ret value to avoid warnings */
 +#ifdef GMX_THREAD_MPI
 +            nice_set=TRUE;
 +        }
 +        tMPI_Thread_mutex_unlock(&init_mutex);
 +#endif
 +    }
 +#endif
 +#endif
 +    
 +    if (!(FF(PCA_QUIET) || bQuiet )) {
 +        if (bHelp)
 +            write_man(stderr,"help",output_env_get_program_name(*oenv),
 +                      ndesc,desc,nfile, fnm,npall,all_pa, nbugs,bugs,bHidden);
 +        else if (bPrint) {
 +            pr_fns(stderr,nfile,fnm);
 +            print_pargs(stderr,npall,all_pa,FALSE);
 +        }
 +    }
 +    
 +    if (strcmp(manstr[0],"no") != 0) {
 +        if(!strcmp(manstr[0],"completion")) {
 +            /* one file each for csh, bash and zsh if we do completions */
 +            fp=man_file(*oenv,"completion-zsh");
 +        
 +            write_man(fp,"completion-zsh",output_env_get_program_name(*oenv),
 +                      ndesc,desc,nfile, fnm, npall,all_pa,nbugs,bugs,bHidden);
 +            gmx_fio_fclose(fp);
 +            fp=man_file(*oenv,"completion-bash");
 +            write_man(fp,"completion-bash",output_env_get_program_name(*oenv),
 +                      ndesc,desc,nfile, fnm, npall,all_pa,nbugs,bugs,bHidden);
 +            gmx_fio_fclose(fp);
 +            fp=man_file(*oenv,"completion-csh");
 +            write_man(fp,"completion-csh",output_env_get_program_name(*oenv),
 +                      ndesc,desc,nfile, fnm, npall,all_pa,nbugs,bugs,bHidden);
 +            gmx_fio_fclose(fp);
 +        } else {
 +            fp=man_file(*oenv,manstr[0]);
 +            write_man(fp,manstr[0],output_env_get_program_name(*oenv),
 +                      ndesc,desc,nfile,fnm, npall, all_pa,nbugs,bugs,bHidden);
 +            gmx_fio_fclose(fp);
 +        }
 +    }
 +    
 +    /* convert time options, must be done after printing! */
 +    
 +    for(i=0; i<npall; i++) {
 +        if ((all_pa[i].type == etTIME) && (*all_pa[i].u.r >= 0)) {
 +            *all_pa[i].u.r *= output_env_get_time_invfactor(*oenv);
 +        }
 +    }
 +    
 +    /* Extract Time info from arguments */
 +    if (FF(PCA_CAN_BEGIN) && opt2parg_bSet("-b",npall,all_pa))
 +        setTimeValue(TBEGIN,opt2parg_real("-b",npall,all_pa));
 +    
 +    if (FF(PCA_CAN_END) && opt2parg_bSet("-e",npall,all_pa))
 +        setTimeValue(TEND,opt2parg_real("-e",npall,all_pa));
 +    
 +    if (FF(PCA_CAN_DT) && opt2parg_bSet("-dt",npall,all_pa))
 +        setTimeValue(TDELTA,opt2parg_real("-dt",npall,all_pa));
 +    
 +    /* clear memory */
 +    for (i = 0; i < npall; ++i)
 +        sfree((void *)all_pa[i].desc);
 +    sfree(all_pa);
 +    
 +    if (!FF(PCA_NOEXIT_ON_ARGS)) {
 +        if (*argc > 1) {
 +            gmx_cmd(argv[1]);
 +        }
 +    } 
 +    if (bExit) {
 +        if (gmx_parallel_env_initialized())
 +            /*gmx_abort(gmx_node_rank(),gmx_node_num(),0);*/
 +            gmx_finalize();
 +        exit(0);
 +    }
 +#undef FF
 +}
 +
index a80677daa9d0ce6fe2efe364e20bd152de77a875,0000000000000000000000000000000000000000..d37d5b7519323c8d74dd13f89f8b75df1290213c
mode 100644,000000..100644
--- /dev/null
@@@ -1,4352 -1,0 +1,4353 @@@
-     gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
-     gmx_bool bCalcF = flags & GMX_PME_CALC_F;
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +/* IMPORTANT FOR DEVELOPERS:
 + *
 + * Triclinic pme stuff isn't entirely trivial, and we've experienced
 + * some bugs during development (many of them due to me). To avoid
 + * this in the future, please check the following things if you make
 + * changes in this file:
 + *
 + * 1. You should obtain identical (at least to the PME precision)
 + *    energies, forces, and virial for
 + *    a rectangular box and a triclinic one where the z (or y) axis is
 + *    tilted a whole box side. For instance you could use these boxes:
 + *
 + *    rectangular       triclinic
 + *     2  0  0           2  0  0
 + *     0  2  0           0  2  0
 + *     0  0  6           2  2  6
 + *
 + * 2. You should check the energy conservation in a triclinic box.
 + *
 + * It might seem an overkill, but better safe than sorry.
 + * /Erik 001109
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_OPENMP
 +#include <omp.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <string.h>
 +#include <math.h>
 +#include <assert.h>
 +#include "typedefs.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "gmxcomplex.h"
 +#include "smalloc.h"
 +#include "futil.h"
 +#include "coulomb.h"
 +#include "gmx_fatal.h"
 +#include "pme.h"
 +#include "network.h"
 +#include "physics.h"
 +#include "nrnb.h"
 +#include "copyrite.h"
 +#include "gmx_wallcycle.h"
 +#include "gmx_parallel_3dfft.h"
 +#include "pdbio.h"
 +#include "gmx_cyclecounter.h"
 +#include "macros.h"
 +
 +#if ( !defined(GMX_DOUBLE) && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_X86_64_SSE2) ) )
 +#include "gmx_sse2_single.h"
 +
 +#define PME_SSE
 +/* Some old AMD processors could have problems with unaligned loads+stores */
 +#ifndef GMX_FAHCORE
 +#define PME_SSE_UNALIGNED
 +#endif
 +#endif
 +
 +#define DFT_TOL 1e-7
 +/* #define PRT_FORCE */
 +/* conditions for on the fly time-measurement */
 +/* #define TAKETIME (step > 1 && timesteps < 10) */
 +#define TAKETIME FALSE
 +
 +/* #define PME_TIME_THREADS */
 +
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +
 +/* GMX_CACHE_SEP should be a multiple of 16 to preserve alignment */
 +#define GMX_CACHE_SEP 64
 +
 +/* We only define a maximum to be able to use local arrays without allocation.
 + * An order larger than 12 should never be needed, even for test cases.
 + * If needed it can be changed here.
 + */
 +#define PME_ORDER_MAX 12
 +
 +/* Internal datastructures */
 +typedef struct {
 +    int send_index0;
 +    int send_nindex;
 +    int recv_index0;
 +    int recv_nindex;
 +} pme_grid_comm_t;
 +
 +typedef struct {
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +    int  nnodes,nodeid;
 +    int  *s2g0;
 +    int  *s2g1;
 +    int  noverlap_nodes;
 +    int  *send_id,*recv_id;
 +    pme_grid_comm_t *comm_data;
 +    real *sendbuf;
 +    real *recvbuf;
 +} pme_overlap_t;
 +
 +typedef struct {
 +    int *n;     /* Cumulative counts of the number of particles per thread */
 +    int nalloc; /* Allocation size of i */
 +    int *i;     /* Particle indices ordered on thread index (n) */
 +} thread_plist_t;
 +
 +typedef struct {
 +    int  n;
 +    int  *ind;
 +    splinevec theta;
 +    splinevec dtheta;
 +} splinedata_t;
 +
 +typedef struct {
 +    int  dimind;            /* The index of the dimension, 0=x, 1=y */
 +    int  nslab;
 +    int  nodeid;
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +
 +    int  *node_dest;        /* The nodes to send x and q to with DD */
 +    int  *node_src;         /* The nodes to receive x and q from with DD */
 +    int  *buf_index;        /* Index for commnode into the buffers */
 +
 +    int  maxshift;
 +
 +    int  npd;
 +    int  pd_nalloc;
 +    int  *pd;
 +    int  *count;            /* The number of atoms to send to each node */
 +    int  **count_thread;
 +    int  *rcount;           /* The number of atoms to receive */
 +
 +    int  n;
 +    int  nalloc;
 +    rvec *x;
 +    real *q;
 +    rvec *f;
 +    gmx_bool bSpread;       /* These coordinates are used for spreading */
 +    int  pme_order;
 +    ivec *idx;
 +    rvec *fractx;            /* Fractional coordinate relative to the
 +                              * lower cell boundary
 +                              */
 +    int  nthread;
 +    int  *thread_idx;        /* Which thread should spread which charge */
 +    thread_plist_t *thread_plist;
 +    splinedata_t *spline;
 +} pme_atomcomm_t;
 +
 +#define FLBS  3
 +#define FLBSZ 4
 +
 +typedef struct {
 +    ivec ci;     /* The spatial location of this grid       */
 +    ivec n;      /* The size of *grid, including order-1    */
 +    ivec offset; /* The grid offset from the full node grid */
 +    int  order;  /* PME spreading order                     */
 +    real *grid;  /* The grid local thread, size n           */
 +} pmegrid_t;
 +
 +typedef struct {
 +    pmegrid_t grid;     /* The full node grid (non thread-local)            */
 +    int  nthread;       /* The number of threads operating on this grid     */
 +    ivec nc;            /* The local spatial decomposition over the threads */
 +    pmegrid_t *grid_th; /* Array of grids for each thread                   */
 +    int  **g2t;         /* The grid to thread index                         */
 +    ivec nthread_comm;  /* The number of threads to communicate with        */
 +} pmegrids_t;
 +
 +
 +typedef struct {
 +#ifdef PME_SSE
 +    /* Masks for SSE aligned spreading and gathering */
 +    __m128 mask_SSE0[6],mask_SSE1[6];
 +#else
 +    int dummy; /* C89 requires that struct has at least one member */
 +#endif
 +} pme_spline_work_t;
 +
 +typedef struct {
 +    /* work data for solve_pme */
 +    int      nalloc;
 +    real *   mhx;
 +    real *   mhy;
 +    real *   mhz;
 +    real *   m2;
 +    real *   denom;
 +    real *   tmp1_alloc;
 +    real *   tmp1;
 +    real *   eterm;
 +    real *   m2inv;
 +
 +    real     energy;
 +    matrix   vir;
 +} pme_work_t;
 +
 +typedef struct gmx_pme {
 +    int  ndecompdim;         /* The number of decomposition dimensions */
 +    int  nodeid;             /* Our nodeid in mpi->mpi_comm */
 +    int  nodeid_major;
 +    int  nodeid_minor;
 +    int  nnodes;             /* The number of nodes doing PME */
 +    int  nnodes_major;
 +    int  nnodes_minor;
 +
 +    MPI_Comm mpi_comm;
 +    MPI_Comm mpi_comm_d[2];  /* Indexed on dimension, 0=x, 1=y */
 +#ifdef GMX_MPI
 +    MPI_Datatype  rvec_mpi;  /* the pme vector's MPI type */
 +#endif
 +
 +    int  nthread;            /* The number of threads doing PME */
 +
 +    gmx_bool bPPnode;        /* Node also does particle-particle forces */
 +    gmx_bool bFEP;           /* Compute Free energy contribution */
 +    int nkx,nky,nkz;         /* Grid dimensions */
 +    gmx_bool bP3M;           /* Do P3M: optimize the influence function */
 +    int pme_order;
 +    real epsilon_r;
 +
 +    pmegrids_t pmegridA;  /* Grids on which we do spreading/interpolation, includes overlap */
 +    pmegrids_t pmegridB;
 +    /* The PME charge spreading grid sizes/strides, includes pme_order-1 */
 +    int     pmegrid_nx,pmegrid_ny,pmegrid_nz;
 +    /* pmegrid_nz might be larger than strictly necessary to ensure
 +     * memory alignment, pmegrid_nz_base gives the real base size.
 +     */
 +    int     pmegrid_nz_base;
 +    /* The local PME grid starting indices */
 +    int     pmegrid_start_ix,pmegrid_start_iy,pmegrid_start_iz;
 +
 +    /* Work data for spreading and gathering */
 +    pme_spline_work_t spline_work;
 +
 +    real *fftgridA;             /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
 +    real *fftgridB;             /* inside the interpolation grid, but separate for 2D PME decomp. */
 +    int   fftgrid_nx,fftgrid_ny,fftgrid_nz;
 +
 +    t_complex *cfftgridA;             /* Grids for complex FFT data */
 +    t_complex *cfftgridB;
 +    int   cfftgrid_nx,cfftgrid_ny,cfftgrid_nz;
 +
 +    gmx_parallel_3dfft_t  pfft_setupA;
 +    gmx_parallel_3dfft_t  pfft_setupB;
 +
 +    int  *nnx,*nny,*nnz;
 +    real *fshx,*fshy,*fshz;
 +
 +    pme_atomcomm_t atc[2];  /* Indexed on decomposition index */
 +    matrix    recipbox;
 +    splinevec bsp_mod;
 +
 +    pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
 +
 +    pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
 +
 +    rvec *bufv;             /* Communication buffer */
 +    real *bufr;             /* Communication buffer */
 +    int  buf_nalloc;        /* The communication buffer size */
 +
 +    /* thread local work data for solve_pme */
 +    pme_work_t *work;
 +
 +    /* Work data for PME_redist */
 +    gmx_bool redist_init;
 +    int *    scounts;
 +    int *    rcounts;
 +    int *    sdispls;
 +    int *    rdispls;
 +    int *    sidx;
 +    int *    idxa;
 +    real *   redist_buf;
 +    int      redist_buf_nalloc;
 +
 +    /* Work data for sum_qgrid */
 +    real *   sum_qgrid_tmp;
 +    real *   sum_qgrid_dd_tmp;
 +} t_gmx_pme;
 +
 +
 +static void calc_interpolation_idx(gmx_pme_t pme,pme_atomcomm_t *atc,
 +                                   int start,int end,int thread)
 +{
 +    int  i;
 +    int  *idxptr,tix,tiy,tiz;
 +    real *xptr,*fptr,tx,ty,tz;
 +    real rxx,ryx,ryy,rzx,rzy,rzz;
 +    int  nx,ny,nz;
 +    int  start_ix,start_iy,start_iz;
 +    int  *g2tx,*g2ty,*g2tz;
 +    gmx_bool bThreads;
 +    int  *thread_idx=NULL;
 +    thread_plist_t *tpl=NULL;
 +    int  *tpl_n=NULL;
 +    int  thread_i;
 +
 +    nx  = pme->nkx;
 +    ny  = pme->nky;
 +    nz  = pme->nkz;
 +
 +    start_ix = pme->pmegrid_start_ix;
 +    start_iy = pme->pmegrid_start_iy;
 +    start_iz = pme->pmegrid_start_iz;
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    g2tx = pme->pmegridA.g2t[XX];
 +    g2ty = pme->pmegridA.g2t[YY];
 +    g2tz = pme->pmegridA.g2t[ZZ];
 +
 +    bThreads = (atc->nthread > 1);
 +    if (bThreads)
 +    {
 +        thread_idx = atc->thread_idx;
 +
 +        tpl   = &atc->thread_plist[thread];
 +        tpl_n = tpl->n;
 +        for(i=0; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] = 0;
 +        }
 +    }
 +
 +    for(i=start; i<end; i++) {
 +        xptr   = atc->x[i];
 +        idxptr = atc->idx[i];
 +        fptr   = atc->fractx[i];
 +
 +        /* Fractional coordinates along box vectors, add 2.0 to make 100% sure we are positive for triclinic boxes */
 +        tx = nx * ( xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx + 2.0 );
 +        ty = ny * (                  xptr[YY] * ryy + xptr[ZZ] * rzy + 2.0 );
 +        tz = nz * (                                   xptr[ZZ] * rzz + 2.0 );
 +
 +        tix = (int)(tx);
 +        tiy = (int)(ty);
 +        tiz = (int)(tz);
 +
 +        /* Because decomposition only occurs in x and y,
 +         * we never have a fraction correction in z.
 +         */
 +        fptr[XX] = tx - tix + pme->fshx[tix];
 +        fptr[YY] = ty - tiy + pme->fshy[tiy];
 +        fptr[ZZ] = tz - tiz;
 +
 +        idxptr[XX] = pme->nnx[tix];
 +        idxptr[YY] = pme->nny[tiy];
 +        idxptr[ZZ] = pme->nnz[tiz];
 +
 +#ifdef DEBUG
 +        range_check(idxptr[XX],0,pme->pmegrid_nx);
 +        range_check(idxptr[YY],0,pme->pmegrid_ny);
 +        range_check(idxptr[ZZ],0,pme->pmegrid_nz);
 +#endif
 +
 +        if (bThreads)
 +        {
 +            thread_i = g2tx[idxptr[XX]] + g2ty[idxptr[YY]] + g2tz[idxptr[ZZ]];
 +            thread_idx[i] = thread_i;
 +            tpl_n[thread_i]++;
 +        }
 +    }
 +
 +    if (bThreads)
 +    {
 +        /* Make a list of particle indices sorted on thread */
 +
 +        /* Get the cumulative count */
 +        for(i=1; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] += tpl_n[i-1];
 +        }
 +        /* The current implementation distributes particles equally
 +         * over the threads, so we could actually allocate for that
 +         * in pme_realloc_atomcomm_things.
 +         */
 +        if (tpl_n[atc->nthread-1] > tpl->nalloc)
 +        {
 +            tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
 +            srenew(tpl->i,tpl->nalloc);
 +        }
 +        /* Set tpl_n to the cumulative start */
 +        for(i=atc->nthread-1; i>=1; i--)
 +        {
 +            tpl_n[i] = tpl_n[i-1];
 +        }
 +        tpl_n[0] = 0;
 +
 +        /* Fill our thread local array with indices sorted on thread */
 +        for(i=start; i<end; i++)
 +        {
 +            tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
 +        }
 +        /* Now tpl_n contains the cummulative count again */
 +    }
 +}
 +
 +static void make_thread_local_ind(pme_atomcomm_t *atc,
 +                                  int thread,splinedata_t *spline)
 +{
 +    int  n,t,i,start,end;
 +    thread_plist_t *tpl;
 +
 +    /* Combine the indices made by each thread into one index */
 +
 +    n = 0;
 +    start = 0;
 +    for(t=0; t<atc->nthread; t++)
 +    {
 +        tpl = &atc->thread_plist[t];
 +        /* Copy our part (start - end) from the list of thread t */
 +        if (thread > 0)
 +        {
 +            start = tpl->n[thread-1];
 +        }
 +        end = tpl->n[thread];
 +        for(i=start; i<end; i++)
 +        {
 +            spline->ind[n++] = tpl->i[i];
 +        }
 +    }
 +
 +    spline->n = n;
 +}
 +
 +
 +static void pme_calc_pidx(int start, int end,
 +                          matrix recipbox, rvec x[],
 +                          pme_atomcomm_t *atc, int *count)
 +{
 +    int  nslab,i;
 +    int  si;
 +    real *xptr,s;
 +    real rxx,ryx,rzx,ryy,rzy;
 +    int *pd;
 +
 +    /* Calculate PME task index (pidx) for each grid index.
 +     * Here we always assign equally sized slabs to each node
 +     * for load balancing reasons (the PME grid spacing is not used).
 +     */
 +
 +    nslab = atc->nslab;
 +    pd    = atc->pd;
 +
 +    /* Reset the count */
 +    for(i=0; i<nslab; i++)
 +    {
 +        count[i] = 0;
 +    }
 +
 +    if (atc->dimind == 0)
 +    {
 +        rxx = recipbox[XX][XX];
 +        ryx = recipbox[YY][XX];
 +        rzx = recipbox[ZZ][XX];
 +        /* Calculate the node index in x-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +    else
 +    {
 +        ryy = recipbox[YY][YY];
 +        rzy = recipbox[ZZ][YY];
 +        /* Calculate the node index in y-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +}
 +
 +static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
 +                                  pme_atomcomm_t *atc)
 +{
 +    int nthread,thread,slab;
 +
 +    nthread = atc->nthread;
 +
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        pme_calc_pidx(natoms* thread   /nthread,
 +                      natoms*(thread+1)/nthread,
 +                      recipbox,x,atc,atc->count_thread[thread]);
 +    }
 +    /* Non-parallel reduction, since nslab is small */
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        for(slab=0; slab<atc->nslab; slab++)
 +        {
 +            atc->count_thread[0][slab] += atc->count_thread[thread][slab];
 +        }
 +    }
 +}
 +
 +static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
 +{
 +    int i,d;
 +
 +    srenew(spline->ind,atc->nalloc);
 +    /* Initialize the index to identity so it works without threads */
 +    for(i=0; i<atc->nalloc; i++)
 +    {
 +        spline->ind[i] = i;
 +    }
 +
 +    for(d=0;d<DIM;d++)
 +    {
 +        srenew(spline->theta[d] ,atc->pme_order*atc->nalloc);
 +        srenew(spline->dtheta[d],atc->pme_order*atc->nalloc);
 +    }
 +}
 +
 +static void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
 +{
 +    int nalloc_old,i,j,nalloc_tpl;
 +
 +    /* We have to avoid a NULL pointer for atc->x to avoid
 +     * possible fatal errors in MPI routines.
 +     */
 +    if (atc->n > atc->nalloc || atc->nalloc == 0)
 +    {
 +        nalloc_old = atc->nalloc;
 +        atc->nalloc = over_alloc_dd(max(atc->n,1));
 +
 +        if (atc->nslab > 1) {
 +            srenew(atc->x,atc->nalloc);
 +            srenew(atc->q,atc->nalloc);
 +            srenew(atc->f,atc->nalloc);
 +            for(i=nalloc_old; i<atc->nalloc; i++)
 +            {
 +                clear_rvec(atc->f[i]);
 +            }
 +        }
 +        if (atc->bSpread) {
 +            srenew(atc->fractx,atc->nalloc);
 +            srenew(atc->idx   ,atc->nalloc);
 +
 +            if (atc->nthread > 1)
 +            {
 +                srenew(atc->thread_idx,atc->nalloc);
 +            }
 +
 +            for(i=0; i<atc->nthread; i++)
 +            {
 +                pme_realloc_splinedata(&atc->spline[i],atc);
 +            }
 +        }
 +    }
 +}
 +
 +static void pmeredist_pd(gmx_pme_t pme, gmx_bool forw,
 +                         int n, gmx_bool bXF, rvec *x_f, real *charge,
 +                         pme_atomcomm_t *atc)
 +/* Redistribute particle data for PME calculation */
 +/* domain decomposition by x coordinate           */
 +{
 +    int *idxa;
 +    int i, ii;
 +
 +    if(FALSE == pme->redist_init) {
 +        snew(pme->scounts,atc->nslab);
 +        snew(pme->rcounts,atc->nslab);
 +        snew(pme->sdispls,atc->nslab);
 +        snew(pme->rdispls,atc->nslab);
 +        snew(pme->sidx,atc->nslab);
 +        pme->redist_init = TRUE;
 +    }
 +    if (n > pme->redist_buf_nalloc) {
 +        pme->redist_buf_nalloc = over_alloc_dd(n);
 +        srenew(pme->redist_buf,pme->redist_buf_nalloc*DIM);
 +    }
 +
 +    pme->idxa = atc->pd;
 +
 +#ifdef GMX_MPI
 +    if (forw && bXF) {
 +        /* forward, redistribution from pp to pme */
 +
 +        /* Calculate send counts and exchange them with other nodes */
 +        for(i=0; (i<atc->nslab); i++) pme->scounts[i]=0;
 +        for(i=0; (i<n); i++) pme->scounts[pme->idxa[i]]++;
 +        MPI_Alltoall( pme->scounts, 1, MPI_INT, pme->rcounts, 1, MPI_INT, atc->mpi_comm);
 +
 +        /* Calculate send and receive displacements and index into send
 +           buffer */
 +        pme->sdispls[0]=0;
 +        pme->rdispls[0]=0;
 +        pme->sidx[0]=0;
 +        for(i=1; i<atc->nslab; i++) {
 +            pme->sdispls[i]=pme->sdispls[i-1]+pme->scounts[i-1];
 +            pme->rdispls[i]=pme->rdispls[i-1]+pme->rcounts[i-1];
 +            pme->sidx[i]=pme->sdispls[i];
 +        }
 +        /* Total # of particles to be received */
 +        atc->n = pme->rdispls[atc->nslab-1] + pme->rcounts[atc->nslab-1];
 +
 +        pme_realloc_atomcomm_things(atc);
 +
 +        /* Copy particle coordinates into send buffer and exchange*/
 +        for(i=0; (i<n); i++) {
 +            ii=DIM*pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii+XX]=x_f[i][XX];
 +            pme->redist_buf[ii+YY]=x_f[i][YY];
 +            pme->redist_buf[ii+ZZ]=x_f[i][ZZ];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->x, pme->rcounts, pme->rdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +    }
 +    if (forw) {
 +        /* Copy charge into send buffer and exchange*/
 +        for(i=0; i<atc->nslab; i++) pme->sidx[i]=pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii=pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii]=charge[i];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls, mpi_type,
 +                      atc->q, pme->rcounts, pme->rdispls, mpi_type,
 +                      atc->mpi_comm);
 +    }
 +    else { /* backward, redistribution from pme to pp */
 +        MPI_Alltoallv(atc->f, pme->rcounts, pme->rdispls, pme->rvec_mpi,
 +                      pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +
 +        /* Copy data from receive buffer */
 +        for(i=0; i<atc->nslab; i++)
 +            pme->sidx[i] = pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii = DIM*pme->sidx[pme->idxa[i]];
 +            x_f[i][XX] += pme->redist_buf[ii+XX];
 +            x_f[i][YY] += pme->redist_buf[ii+YY];
 +            x_f[i][ZZ] += pme->redist_buf[ii+ZZ];
 +            pme->sidx[pme->idxa[i]]++;
 +        }
 +    }
 +#endif
 +}
 +
 +static void pme_dd_sendrecv(pme_atomcomm_t *atc,
 +                            gmx_bool bBackward,int shift,
 +                            void *buf_s,int nbyte_s,
 +                            void *buf_r,int nbyte_r)
 +{
 +#ifdef GMX_MPI
 +    int dest,src;
 +    MPI_Status stat;
 +
 +    if (bBackward == FALSE) {
 +        dest = atc->node_dest[shift];
 +        src  = atc->node_src[shift];
 +    } else {
 +        dest = atc->node_src[shift];
 +        src  = atc->node_dest[shift];
 +    }
 +
 +    if (nbyte_s > 0 && nbyte_r > 0) {
 +        MPI_Sendrecv(buf_s,nbyte_s,MPI_BYTE,
 +                     dest,shift,
 +                     buf_r,nbyte_r,MPI_BYTE,
 +                     src,shift,
 +                     atc->mpi_comm,&stat);
 +    } else if (nbyte_s > 0) {
 +        MPI_Send(buf_s,nbyte_s,MPI_BYTE,
 +                 dest,shift,
 +                 atc->mpi_comm);
 +    } else if (nbyte_r > 0) {
 +        MPI_Recv(buf_r,nbyte_r,MPI_BYTE,
 +                 src,shift,
 +                 atc->mpi_comm,&stat);
 +    }
 +#endif
 +}
 +
 +static void dd_pmeredist_x_q(gmx_pme_t pme,
 +                             int n, gmx_bool bX, rvec *x, real *charge,
 +                             pme_atomcomm_t *atc)
 +{
 +    int *commnode,*buf_index;
 +    int nnodes_comm,i,nsend,local_pos,buf_pos,node,scount,rcount;
 +
 +    commnode  = atc->node_dest;
 +    buf_index = atc->buf_index;
 +
 +    nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +    nsend = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        buf_index[commnode[i]] = nsend;
 +        nsend += atc->count[commnode[i]];
 +    }
 +    if (bX) {
 +        if (atc->count[atc->nodeid] + nsend != n)
 +            gmx_fatal(FARGS,"%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
 +                      "This usually means that your system is not well equilibrated.",
 +                      n - (atc->count[atc->nodeid] + nsend),
 +                      pme->nodeid,'x'+atc->dimind);
 +
 +        if (nsend > pme->buf_nalloc) {
 +            pme->buf_nalloc = over_alloc_dd(nsend);
 +            srenew(pme->bufv,pme->buf_nalloc);
 +            srenew(pme->bufr,pme->buf_nalloc);
 +        }
 +
 +        atc->n = atc->count[atc->nodeid];
 +        for(i=0; i<nnodes_comm; i++) {
 +            scount = atc->count[commnode[i]];
 +            /* Communicate the count */
 +            if (debug)
 +                fprintf(debug,"dimind %d PME node %d send to node %d: %d\n",
 +                        atc->dimind,atc->nodeid,commnode[i],scount);
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            &scount,sizeof(int),
 +                            &atc->rcount[i],sizeof(int));
 +            atc->n += atc->rcount[i];
 +        }
 +
 +        pme_realloc_atomcomm_things(atc);
 +    }
 +
 +    local_pos = 0;
 +    for(i=0; i<n; i++) {
 +        node = atc->pd[i];
 +        if (node == atc->nodeid) {
 +            /* Copy direct to the receive buffer */
 +            if (bX) {
 +                copy_rvec(x[i],atc->x[local_pos]);
 +            }
 +            atc->q[local_pos] = charge[i];
 +            local_pos++;
 +        } else {
 +            /* Copy to the send buffer */
 +            if (bX) {
 +                copy_rvec(x[i],pme->bufv[buf_index[node]]);
 +            }
 +            pme->bufr[buf_index[node]] = charge[i];
 +            buf_index[node]++;
 +        }
 +    }
 +
 +    buf_pos = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        scount = atc->count[commnode[i]];
 +        rcount = atc->rcount[i];
 +        if (scount > 0 || rcount > 0) {
 +            if (bX) {
 +                /* Communicate the coordinates */
 +                pme_dd_sendrecv(atc,FALSE,i,
 +                                pme->bufv[buf_pos],scount*sizeof(rvec),
 +                                atc->x[local_pos],rcount*sizeof(rvec));
 +            }
 +            /* Communicate the charges */
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            pme->bufr+buf_pos,scount*sizeof(real),
 +                            atc->q+local_pos,rcount*sizeof(real));
 +            buf_pos   += scount;
 +            local_pos += atc->rcount[i];
 +        }
 +    }
 +}
 +
 +static void dd_pmeredist_f(gmx_pme_t pme, pme_atomcomm_t *atc,
 +                           int n, rvec *f,
 +                           gmx_bool bAddF)
 +{
 +  int *commnode,*buf_index;
 +  int nnodes_comm,local_pos,buf_pos,i,scount,rcount,node;
 +
 +  commnode  = atc->node_dest;
 +  buf_index = atc->buf_index;
 +
 +  nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +  local_pos = atc->count[atc->nodeid];
 +  buf_pos = 0;
 +  for(i=0; i<nnodes_comm; i++) {
 +    scount = atc->rcount[i];
 +    rcount = atc->count[commnode[i]];
 +    if (scount > 0 || rcount > 0) {
 +      /* Communicate the forces */
 +      pme_dd_sendrecv(atc,TRUE,i,
 +                      atc->f[local_pos],scount*sizeof(rvec),
 +                      pme->bufv[buf_pos],rcount*sizeof(rvec));
 +      local_pos += scount;
 +    }
 +    buf_index[commnode[i]] = buf_pos;
 +    buf_pos   += rcount;
 +  }
 +
 +    local_pos = 0;
 +    if (bAddF)
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Add from the local force array */
 +                rvec_inc(f[i],atc->f[local_pos]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Add from the receive buffer */
 +                rvec_inc(f[i],pme->bufv[buf_index[node]]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Copy from the local force array */
 +                copy_rvec(atc->f[local_pos],f[i]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Copy from the receive buffer */
 +                copy_rvec(pme->bufv[buf_index[node]],f[i]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void
 +gmx_sum_qgrid_dd(gmx_pme_t pme, real *grid, int direction)
 +{
 +    pme_overlap_t *overlap;
 +    int send_index0,send_nindex;
 +    int recv_index0,recv_nindex;
 +    MPI_Status stat;
 +    int i,j,k,ix,iy,iz,icnt;
 +    int ipulse,send_id,recv_id,datasize;
 +    real *p;
 +    real *sendptr,*recvptr;
 +
 +    /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
 +    overlap = &pme->overlap[1];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        /* Since we have already (un)wrapped the overlap in the z-dimension,
 +         * we only have to communicate 0 to nkz (not pmegrid_nz).
 +         */
 +        if (direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        }
 +
 +        /* Copy data to contiguous send buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy+send_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<send_nindex;j++)
 +            {
 +                iy = j + send_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
 +                }
 +            }
 +        }
 +
 +        datasize      = pme->pmegrid_nx * pme->nkz;
 +
 +        MPI_Sendrecv(overlap->sendbuf,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     overlap->recvbuf,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* Get data from contiguous recv buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy+recv_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<recv_nindex;j++)
 +            {
 +                iy = j + recv_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    if(direction==GMX_SUM_QGRID_FORWARD)
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
 +                    }
 +                    else
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz]  = overlap->recvbuf[icnt++];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Major dimension is easier, no copying required,
 +     * but we might have to sum to separate array.
 +     * Since we don't copy, we have to communicate up to pmegrid_nz,
 +     * not nkz as for the minor direction.
 +     */
 +    overlap = &pme->overlap[0];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recvptr   = overlap->recvbuf;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recvptr   = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        }
 +
 +        sendptr       = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        datasize      = pme->pmegrid_ny * pme->pmegrid_nz;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix+send_nindex);
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix+recv_nindex);
 +        }
 +
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* ADD data from contiguous recv buffer */
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +            for(i=0;i<recv_nindex*datasize;i++)
 +            {
 +                p[i] += overlap->recvbuf[i];
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +
 +static int
 +copy_pmegrid_to_fftgrid(gmx_pme_t pme, real *pmegrid, real *fftgrid)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     i,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    {
 +#ifdef DEBUG_PME
 +        FILE *fp,*fp2;
 +        char fn[STRLEN],format[STRLEN];
 +        real val;
 +        sprintf(fn,"pmegrid%d.pdb",pme->nodeid);
 +        fp = ffopen(fn,"w");
 +        sprintf(fn,"pmegrid%d.txt",pme->nodeid);
 +        fp2 = ffopen(fn,"w");
 +     sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
 +#endif
 +
 +    for(ix=0;ix<local_fft_ndata[XX];ix++)
 +    {
 +        for(iy=0;iy<local_fft_ndata[YY];iy++)
 +        {
 +            for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +            {
 +                pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
 +                fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
 +                fftgrid[fftidx] = pmegrid[pmeidx];
 +#ifdef DEBUG_PME
 +                val = 100*pmegrid[pmeidx];
 +                if (pmegrid[pmeidx] != 0)
 +                fprintf(fp,format,"ATOM",pmeidx,"CA","GLY",' ',pmeidx,' ',
 +                        5.0*ix,5.0*iy,5.0*iz,1.0,val);
 +                if (pmegrid[pmeidx] != 0)
 +                    fprintf(fp2,"%-12s  %5d  %5d  %5d  %12.5e\n",
 +                            "qgrid",
 +                            pme->pmegrid_start_ix + ix,
 +                            pme->pmegrid_start_iy + iy,
 +                            pme->pmegrid_start_iz + iz,
 +                            pmegrid[pmeidx]);
 +#endif
 +            }
 +        }
 +    }
 +#ifdef DEBUG_PME
 +    ffclose(fp);
 +    ffclose(fp2);
 +#endif
 +    }
 +    return 0;
 +}
 +
 +
 +static gmx_cycles_t omp_cyc_start()
 +{
 +    return gmx_cycles_read();
 +}
 +
 +static gmx_cycles_t omp_cyc_end(gmx_cycles_t c)
 +{
 +    return gmx_cycles_read() - c;
 +}
 +
 +
 +static int
 +copy_fftgrid_to_pmegrid(gmx_pme_t pme, const real *fftgrid, real *pmegrid,
 +                        int nthread,int thread)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     ixy0,ixy1,ixy,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1;
 +    static double cs1=0;
 +    static int cnt=0;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    ixy0 = ((thread  )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +    ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +
 +    for(ixy=ixy0;ixy<ixy1;ixy++)
 +    {
 +        ix = ixy/local_fft_ndata[YY];
 +        iy = ixy - ix*local_fft_ndata[YY];
 +
 +        pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
 +        fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
 +        for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +        {
 +            pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("copy %.2f\n",cs1*1e-9);
 +    }
 +#endif
 +
 +    return 0;
 +}
 +
 +
 +static void
 +wrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix,iy,iz;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    /* Add periodic overlap in z */
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                    pmegrid[(ix*pny+iy)*pnz+nz+iz];
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                       pmegrid[(ix*pny+ny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                        pmegrid[((nx+ix)*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void
 +unwrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            int iy,iz;
 +
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
 +                        pmegrid[(ix*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           int iy,iz;
 +
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+ny+iy)*pnz+iz] =
 +                       pmegrid[(ix*pny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    /* Copy periodic overlap in z */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        int iy,iz;
 +
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+nz+iz] =
 +                    pmegrid[(ix*pny+iy)*pnz+iz];
 +            }
 +        }
 +    }
 +}
 +
 +static void clear_grid(int nx,int ny,int nz,real *grid,
 +                       ivec fs,int *flag,
 +                       int fx,int fy,int fz,
 +                       int order)
 +{
 +    int nc,ncz;
 +    int fsx,fsy,fsz,gx,gy,gz,g0x,g0y,x,y,z;
 +    int flind;
 +
 +    nc  = 2 + (order - 2)/FLBS;
 +    ncz = 2 + (order - 2)/FLBSZ;
 +
 +    for(fsx=fx; fsx<fx+nc; fsx++)
 +    {
 +        for(fsy=fy; fsy<fy+nc; fsy++)
 +        {
 +            for(fsz=fz; fsz<fz+ncz; fsz++)
 +            {
 +                flind = (fsx*fs[YY] + fsy)*fs[ZZ] + fsz;
 +                if (flag[flind] == 0)
 +                {
 +                    gx = fsx*FLBS;
 +                    gy = fsy*FLBS;
 +                    gz = fsz*FLBSZ;
 +                    g0x = (gx*ny + gy)*nz + gz;
 +                    for(x=0; x<FLBS; x++)
 +                    {
 +                        g0y = g0x;
 +                        for(y=0; y<FLBS; y++)
 +                        {
 +                            for(z=0; z<FLBSZ; z++)
 +                            {
 +                                grid[g0y+z] = 0;
 +                            }
 +                            g0y += nz;
 +                        }
 +                        g0x += ny*nz;
 +                    }
 +
 +                    flag[flind] = 1;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* This has to be a macro to enable full compiler optimization with xlC (and probably others too) */
 +#define DO_BSPLINE(order)                            \
 +for(ithx=0; (ithx<order); ithx++)                    \
 +{                                                    \
 +    index_x = (i0+ithx)*pny*pnz;                     \
 +    valx    = qn*thx[ithx];                          \
 +                                                     \
 +    for(ithy=0; (ithy<order); ithy++)                \
 +    {                                                \
 +        valxy    = valx*thy[ithy];                   \
 +        index_xy = index_x+(j0+ithy)*pnz;            \
 +                                                     \
 +        for(ithz=0; (ithz<order); ithz++)            \
 +        {                                            \
 +            index_xyz        = index_xy+(k0+ithz);   \
 +            grid[index_xyz] += valxy*thz[ithz];      \
 +        }                                            \
 +    }                                                \
 +}
 +
 +
 +static void spread_q_bsplines_thread(pmegrid_t *pmegrid,
 +                                     pme_atomcomm_t *atc, splinedata_t *spline,
 +                                     pme_spline_work_t *work)
 +{
 +
 +    /* spread charges from home atoms to local grid */
 +    real     *grid;
 +    pme_overlap_t *ol;
 +    int      b,i,nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int *    idxptr;
 +    int      order,norder,index_x,index_xy,index_xyz;
 +    real     valx,valxy,qn;
 +    real     *thx,*thy,*thz;
 +    int      localsize, bndsize;
 +    int      pnx,pny,pnz,ndatatot;
 +    int      offx,offy,offz;
 +
 +    pnx = pmegrid->n[XX];
 +    pny = pmegrid->n[YY];
 +    pnz = pmegrid->n[ZZ];
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    ndatatot = pnx*pny*pnz;
 +    grid = pmegrid->grid;
 +    for(i=0;i<ndatatot;i++)
 +    {
 +        grid[i] = 0;
 +    }
 +
 +    order = pmegrid->order;
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = atc->q[n];
 +
 +        if (qn != 0)
 +        {
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX] - offx;
 +            j0   = idxptr[YY] - offy;
 +            k0   = idxptr[ZZ] - offz;
 +
 +            thx = spline->theta[XX] + norder;
 +            thy = spline->theta[YY] + norder;
 +            thz = spline->theta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_SPREAD_SSE_ORDER4
 +#else
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_BSPLINE(order);
 +                break;
 +            }
 +        }
 +    }
 +}
 +
 +static void set_grid_alignment(int *pmegrid_nz,int pme_order)
 +{
 +#ifdef PME_SSE
 +    if (pme_order == 5
 +#ifndef PME_SSE_UNALIGNED
 +        || pme_order == 4
 +#endif
 +        )
 +    {
 +        /* Round nz up to a multiple of 4 to ensure alignment */
 +        *pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
 +    }
 +#endif
 +}
 +
 +static void set_gridsize_alignment(int *gridsize,int pme_order)
 +{
 +#ifdef PME_SSE
 +#ifndef PME_SSE_UNALIGNED
 +    if (pme_order == 4)
 +    {
 +        /* Add extra elements to ensured aligned operations do not go
 +         * beyond the allocated grid size.
 +         * Note that for pme_order=5, the pme grid z-size alignment
 +         * ensures that we will not go beyond the grid size.
 +         */
 +         *gridsize += 4;
 +    }
 +#endif
 +#endif
 +}
 +
 +static void pmegrid_init(pmegrid_t *grid,
 +                         int cx, int cy, int cz,
 +                         int x0, int y0, int z0,
 +                         int x1, int y1, int z1,
 +                         gmx_bool set_alignment,
 +                         int pme_order,
 +                         real *ptr)
 +{
 +    int nz,gridsize;
 +
 +    grid->ci[XX] = cx;
 +    grid->ci[YY] = cy;
 +    grid->ci[ZZ] = cz;
 +    grid->offset[XX] = x0;
 +    grid->offset[YY] = y0;
 +    grid->offset[ZZ] = z0;
 +    grid->n[XX]      = x1 - x0 + pme_order - 1;
 +    grid->n[YY]      = y1 - y0 + pme_order - 1;
 +    grid->n[ZZ]      = z1 - z0 + pme_order - 1;
 +
 +    nz = grid->n[ZZ];
 +    set_grid_alignment(&nz,pme_order);
 +    if (set_alignment)
 +    {
 +        grid->n[ZZ] = nz;
 +    }
 +    else if (nz != grid->n[ZZ])
 +    {
 +        gmx_incons("pmegrid_init call with an unaligned z size");
 +    }
 +
 +    grid->order = pme_order;
 +    if (ptr == NULL)
 +    {
 +        gridsize = grid->n[XX]*grid->n[YY]*grid->n[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid->grid,gridsize,16);
 +    }
 +    else
 +    {
 +        grid->grid = ptr;
 +    }
 +}
 +
 +static int div_round_up(int enumerator,int denominator)
 +{
 +    return (enumerator + denominator - 1)/denominator;
 +}
 +
 +static void make_subgrid_division(const ivec n,int ovl,int nthread,
 +                                  ivec nsub)
 +{
 +    int gsize_opt,gsize;
 +    int nsx,nsy,nsz;
 +    char *env;
 +
 +    gsize_opt = -1;
 +    for(nsx=1; nsx<=nthread; nsx++)
 +    {
 +        if (nthread % nsx == 0)
 +        {
 +            for(nsy=1; nsy<=nthread; nsy++)
 +            {
 +                if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
 +                {
 +                    nsz = nthread/(nsx*nsy);
 +
 +                    /* Determine the number of grid points per thread */
 +                    gsize =
 +                        (div_round_up(n[XX],nsx) + ovl)*
 +                        (div_round_up(n[YY],nsy) + ovl)*
 +                        (div_round_up(n[ZZ],nsz) + ovl);
 +
 +                    /* Minimize the number of grids points per thread
 +                     * and, secondarily, the number of cuts in minor dimensions.
 +                     */
 +                    if (gsize_opt == -1 ||
 +                        gsize < gsize_opt ||
 +                        (gsize == gsize_opt &&
 +                         (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
 +                    {
 +                        nsub[XX] = nsx;
 +                        nsub[YY] = nsy;
 +                        nsub[ZZ] = nsz;
 +                        gsize_opt = gsize;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    env = getenv("GMX_PME_THREAD_DIVISION");
 +    if (env != NULL)
 +    {
 +        sscanf(env,"%d %d %d",&nsub[XX],&nsub[YY],&nsub[ZZ]);
 +    }
 +
 +    if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
 +    {
 +        gmx_fatal(FARGS,"PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)",nsub[XX],nsub[YY],nsub[ZZ],nthread);
 +    }
 +}
 +
 +static void pmegrids_init(pmegrids_t *grids,
 +                          int nx,int ny,int nz,int nz_base,
 +                          int pme_order,
 +                          int nthread,
 +                          int overlap_x,
 +                          int overlap_y)
 +{
 +    ivec n,n_base,g0,g1;
 +    int t,x,y,z,d,i,tfac;
 +    int max_comm_lines;
 +
 +    n[XX] = nx - (pme_order - 1);
 +    n[YY] = ny - (pme_order - 1);
 +    n[ZZ] = nz - (pme_order - 1);
 +
 +    copy_ivec(n,n_base);
 +    n_base[ZZ] = nz_base;
 +
 +    pmegrid_init(&grids->grid,0,0,0,0,0,0,n[XX],n[YY],n[ZZ],FALSE,pme_order,
 +                 NULL);
 +
 +    grids->nthread = nthread;
 +
 +    make_subgrid_division(n_base,pme_order-1,grids->nthread,grids->nc);
 +
 +    if (grids->nthread > 1)
 +    {
 +        ivec nst;
 +        int gridsize;
 +        real *grid_all;
 +
 +        for(d=0; d<DIM; d++)
 +        {
 +            nst[d] = div_round_up(n[d],grids->nc[d]) + pme_order - 1;
 +        }
 +        set_grid_alignment(&nst[ZZ],pme_order);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"pmegrid thread local division: %d x %d x %d\n",
 +                    grids->nc[XX],grids->nc[YY],grids->nc[ZZ]);
 +            fprintf(debug,"pmegrid %d %d %d max thread pmegrid %d %d %d\n",
 +                    nx,ny,nz,
 +                    nst[XX],nst[YY],nst[ZZ]);
 +        }
 +
 +        snew(grids->grid_th,grids->nthread);
 +        t = 0;
 +        gridsize = nst[XX]*nst[YY]*nst[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid_all,
 +                     grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
 +                     16);
 +
 +        for(x=0; x<grids->nc[XX]; x++)
 +        {
 +            for(y=0; y<grids->nc[YY]; y++)
 +            {
 +                for(z=0; z<grids->nc[ZZ]; z++)
 +                {
 +                    pmegrid_init(&grids->grid_th[t],
 +                                 x,y,z,
 +                                 (n[XX]*(x  ))/grids->nc[XX],
 +                                 (n[YY]*(y  ))/grids->nc[YY],
 +                                 (n[ZZ]*(z  ))/grids->nc[ZZ],
 +                                 (n[XX]*(x+1))/grids->nc[XX],
 +                                 (n[YY]*(y+1))/grids->nc[YY],
 +                                 (n[ZZ]*(z+1))/grids->nc[ZZ],
 +                                 TRUE,
 +                                 pme_order,
 +                                 grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
 +                    t++;
 +                }
 +            }
 +        }
 +    }
 +
 +    snew(grids->g2t,DIM);
 +    tfac = 1;
 +    for(d=DIM-1; d>=0; d--)
 +    {
 +        snew(grids->g2t[d],n[d]);
 +        t = 0;
 +        for(i=0; i<n[d]; i++)
 +        {
 +            /* The second check should match the parameters
 +             * of the pmegrid_init call above.
 +             */
 +            while (t + 1 < grids->nc[d] && i >= (n[d]*(t+1))/grids->nc[d])
 +            {
 +                t++;
 +            }
 +            grids->g2t[d][i] = t*tfac;
 +        }
 +
 +        tfac *= grids->nc[d];
 +
 +        switch (d)
 +        {
 +        case XX: max_comm_lines = overlap_x;     break;
 +        case YY: max_comm_lines = overlap_y;     break;
 +        case ZZ: max_comm_lines = pme_order - 1; break;
 +        }
 +        grids->nthread_comm[d] = 0;
 +        while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines)
 +        {
 +            grids->nthread_comm[d]++;
 +        }
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"pmegrid thread grid communication range in %c: %d\n",
 +                    'x'+d,grids->nthread_comm[d]);
 +        }
 +        /* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
 +         * work, but this is not a problematic restriction.
 +         */
 +        if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
 +        {
 +            gmx_fatal(FARGS,"Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME",grids->nthread);
 +        }
 +    }
 +}
 +
 +
 +static void pmegrids_destroy(pmegrids_t *grids)
 +{
 +    int t;
 +
 +    if (grids->grid.grid != NULL)
 +    {
 +        sfree(grids->grid.grid);
 +
 +        if (grids->nthread > 0)
 +        {
 +            for(t=0; t<grids->nthread; t++)
 +            {
 +                sfree(grids->grid_th[t].grid);
 +            }
 +            sfree(grids->grid_th);
 +        }
 +    }
 +}
 +
 +
 +static void realloc_work(pme_work_t *work,int nkx)
 +{
 +    if (nkx > work->nalloc)
 +    {
 +        work->nalloc = nkx;
 +        srenew(work->mhx  ,work->nalloc);
 +        srenew(work->mhy  ,work->nalloc);
 +        srenew(work->mhz  ,work->nalloc);
 +        srenew(work->m2   ,work->nalloc);
 +        /* Allocate an aligned pointer for SSE operations, including 3 extra
 +         * elements at the end since SSE operates on 4 elements at a time.
 +         */
 +        sfree_aligned(work->denom);
 +        sfree_aligned(work->tmp1);
 +        sfree_aligned(work->eterm);
 +        snew_aligned(work->denom,work->nalloc+3,16);
 +        snew_aligned(work->tmp1 ,work->nalloc+3,16);
 +        snew_aligned(work->eterm,work->nalloc+3,16);
 +        srenew(work->m2inv,work->nalloc);
 +    }
 +}
 +
 +
 +static void free_work(pme_work_t *work)
 +{
 +    sfree(work->mhx);
 +    sfree(work->mhy);
 +    sfree(work->mhz);
 +    sfree(work->m2);
 +    sfree_aligned(work->denom);
 +    sfree_aligned(work->tmp1);
 +    sfree_aligned(work->eterm);
 +    sfree(work->m2inv);
 +}
 +
 +
 +#ifdef PME_SSE
 +    /* Calculate exponentials through SSE in float precision */
 +inline static void calc_exponentials(int start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
 +{
 +    {
 +        const __m128 two = _mm_set_ps(2.0f,2.0f,2.0f,2.0f);
 +        __m128 f_sse;
 +        __m128 lu;
 +        __m128 tmp_d1,d_inv,tmp_r,tmp_e;
 +        int kx;
 +        f_sse = _mm_load1_ps(&f);
 +        for(kx=0; kx<end; kx+=4)
 +        {
 +            tmp_d1   = _mm_load_ps(d_aligned+kx);
 +            lu       = _mm_rcp_ps(tmp_d1);
 +            d_inv    = _mm_mul_ps(lu,_mm_sub_ps(two,_mm_mul_ps(lu,tmp_d1)));
 +            tmp_r    = _mm_load_ps(r_aligned+kx);
 +            tmp_r    = gmx_mm_exp_ps(tmp_r);
 +            tmp_e    = _mm_mul_ps(f_sse,d_inv);
 +            tmp_e    = _mm_mul_ps(tmp_e,tmp_r);
 +            _mm_store_ps(e_aligned+kx,tmp_e);
 +        }
 +    }
 +}
 +#else
 +inline static void calc_exponentials(int start, int end, real f, real *d, real *r, real *e)
 +{
 +    int kx;
 +    for(kx=start; kx<end; kx++)
 +    {
 +        d[kx] = 1.0/d[kx];
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        r[kx] = exp(r[kx]);
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        e[kx] = f*r[kx]*d[kx];
 +    }
 +}
 +#endif
 +
 +
 +static int solve_pme_yzx(gmx_pme_t pme,t_complex *grid,
 +                         real ewaldcoeff,real vol,
 +                         gmx_bool bEnerVir,
 +                         int nthread,int thread)
 +{
 +    /* do recip sum over local cells in grid */
 +    /* y major, z middle, x minor or continuous */
 +    t_complex *p0;
 +    int     kx,ky,kz,maxkx,maxky,maxkz;
 +    int     nx,ny,nz,iyz0,iyz1,iyz,iy,iz,kxstart,kxend;
 +    real    mx,my,mz;
 +    real    factor=M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
 +    real    ets2,struct2,vfactor,ets2vf;
 +    real    d1,d2,energy=0;
 +    real    by,bz;
 +    real    virxx=0,virxy=0,virxz=0,viryy=0,viryz=0,virzz=0;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    pme_work_t *work;
 +    real    *mhx,*mhy,*mhz,*m2,*denom,*tmp1,*eterm,*m2inv;
 +    real    mhxk,mhyk,mhzk,m2k;
 +    real    corner_fac;
 +    ivec    complex_order;
 +    ivec    local_ndata,local_offset,local_size;
 +    real    elfac;
 +
 +    elfac = ONE_4PI_EPS0/pme->epsilon_r;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_complex_limits(pme->pfft_setupA,
 +                                      complex_order,
 +                                      local_ndata,
 +                                      local_offset,
 +                                      local_size);
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    maxkx = (nx+1)/2;
 +    maxky = (ny+1)/2;
 +    maxkz = nz/2+1;
 +
 +    work = &pme->work[thread];
 +    mhx   = work->mhx;
 +    mhy   = work->mhy;
 +    mhz   = work->mhz;
 +    m2    = work->m2;
 +    denom = work->denom;
 +    tmp1  = work->tmp1;
 +    eterm = work->eterm;
 +    m2inv = work->m2inv;
 +
 +    iyz0 = local_ndata[YY]*local_ndata[ZZ]* thread   /nthread;
 +    iyz1 = local_ndata[YY]*local_ndata[ZZ]*(thread+1)/nthread;
 +
 +    for(iyz=iyz0; iyz<iyz1; iyz++)
 +    {
 +        iy = iyz/local_ndata[ZZ];
 +        iz = iyz - iy*local_ndata[ZZ];
 +
 +        ky = iy + local_offset[YY];
 +
 +        if (ky < maxky)
 +        {
 +            my = ky;
 +        }
 +        else
 +        {
 +            my = (ky - ny);
 +        }
 +
 +        by = M_PI*vol*pme->bsp_mod[YY][ky];
 +
 +        kz = iz + local_offset[ZZ];
 +
 +        mz = kz;
 +
 +        bz = pme->bsp_mod[ZZ][kz];
 +
 +        /* 0.5 correction for corner points */
 +        corner_fac = 1;
 +        if (kz == 0 || kz == (nz+1)/2)
 +        {
 +            corner_fac = 0.5;
 +        }
 +
 +        p0 = grid + iy*local_size[ZZ]*local_size[XX] + iz*local_size[XX];
 +
 +        /* We should skip the k-space point (0,0,0) */
 +        if (local_offset[XX] > 0 || ky > 0 || kz > 0)
 +        {
 +            kxstart = local_offset[XX];
 +        }
 +        else
 +        {
 +            kxstart = local_offset[XX] + 1;
 +            p0++;
 +        }
 +        kxend = local_offset[XX] + local_ndata[XX];
 +
 +        if (bEnerVir)
 +        {
 +            /* More expensive inner loop, especially because of the storage
 +             * of the mh elements in array's.
 +             * Because x is the minor grid index, all mh elements
 +             * depend on kx for triclinic unit cells.
 +             */
 +
 +                /* Two explicit loops to avoid a conditional inside the loop */
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                m2inv[kx] = 1.0/m2[kx];
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +
 +                struct2 = 2.0*(d1*d1+d2*d2);
 +
 +                tmp1[kx] = eterm[kx]*struct2;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                ets2     = corner_fac*tmp1[kx];
 +                vfactor  = (factor*m2[kx] + 1.0)*2.0*m2inv[kx];
 +                energy  += ets2;
 +
 +                ets2vf   = ets2*vfactor;
 +                virxx   += ets2vf*mhx[kx]*mhx[kx] - ets2;
 +                virxy   += ets2vf*mhx[kx]*mhy[kx];
 +                virxz   += ets2vf*mhx[kx]*mhz[kx];
 +                viryy   += ets2vf*mhy[kx]*mhy[kx] - ets2;
 +                viryz   += ets2vf*mhy[kx]*mhz[kx];
 +                virzz   += ets2vf*mhz[kx]*mhz[kx] - ets2;
 +            }
 +        }
 +        else
 +        {
 +            /* We don't need to calculate the energy and the virial.
 +             * In this case the triclinic overhead is small.
 +             */
 +
 +            /* Two explicit loops to avoid a conditional inside the loop */
 +
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +            }
 +        }
 +    }
 +
 +    if (bEnerVir)
 +    {
 +        /* Update virial with local values.
 +         * The virial is symmetric by definition.
 +         * this virial seems ok for isotropic scaling, but I'm
 +         * experiencing problems on semiisotropic membranes.
 +         * IS THAT COMMENT STILL VALID??? (DvdS, 2001/02/07).
 +         */
 +        work->vir[XX][XX] = 0.25*virxx;
 +        work->vir[YY][YY] = 0.25*viryy;
 +        work->vir[ZZ][ZZ] = 0.25*virzz;
 +        work->vir[XX][YY] = work->vir[YY][XX] = 0.25*virxy;
 +        work->vir[XX][ZZ] = work->vir[ZZ][XX] = 0.25*virxz;
 +        work->vir[YY][ZZ] = work->vir[ZZ][YY] = 0.25*viryz;
 +
 +        /* This energy should be corrected for a charged system */
 +        work->energy = 0.5*energy;
 +    }
 +
 +    /* Return the loop count */
 +    return local_ndata[YY]*local_ndata[XX];
 +}
 +
 +static void get_pme_ener_vir(const gmx_pme_t pme,int nthread,
 +                             real *mesh_energy,matrix vir)
 +{
 +    /* This function sums output over threads
 +     * and should therefore only be called after thread synchronization.
 +     */
 +    int thread;
 +
 +    *mesh_energy = pme->work[0].energy;
 +    copy_mat(pme->work[0].vir,vir);
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        *mesh_energy += pme->work[thread].energy;
 +        m_add(vir,pme->work[thread].vir,vir);
 +    }
 +}
 +
 +#define DO_FSPLINE(order)                      \
 +for(ithx=0; (ithx<order); ithx++)              \
 +{                                              \
 +    index_x = (i0+ithx)*pny*pnz;               \
 +    tx      = thx[ithx];                       \
 +    dx      = dthx[ithx];                      \
 +                                               \
 +    for(ithy=0; (ithy<order); ithy++)          \
 +    {                                          \
 +        index_xy = index_x+(j0+ithy)*pnz;      \
 +        ty       = thy[ithy];                  \
 +        dy       = dthy[ithy];                 \
 +        fxy1     = fz1 = 0;                    \
 +                                               \
 +        for(ithz=0; (ithz<order); ithz++)      \
 +        {                                      \
 +            gval  = grid[index_xy+(k0+ithz)];  \
 +            fxy1 += thz[ithz]*gval;            \
 +            fz1  += dthz[ithz]*gval;           \
 +        }                                      \
 +        fx += dx*ty*fxy1;                      \
 +        fy += tx*dy*fxy1;                      \
 +        fz += tx*ty*fz1;                       \
 +    }                                          \
 +}
 +
 +
 +static void gather_f_bsplines(gmx_pme_t pme,real *grid,
 +                              gmx_bool bClearF,pme_atomcomm_t *atc,
 +                              splinedata_t *spline,
 +                              real scale)
 +{
 +    /* sum forces for local particles */
 +    int     nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int     nx,ny,nz,pnx,pny,pnz;
 +    int *   idxptr;
 +    real    tx,ty,dx,dy,qn;
 +    real    fx,fy,fz,gval;
 +    real    fxy1,fz1;
 +    real    *thx,*thy,*thz,*dthx,*dthy,*dthz;
 +    int     norder;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    int     order;
 +
 +    pme_spline_work_t *work;
 +
 +    work = &pme->spline_work;
 +
 +    order = pme->pme_order;
 +    thx   = spline->theta[XX];
 +    thy   = spline->theta[YY];
 +    thz   = spline->theta[ZZ];
 +    dthx  = spline->dtheta[XX];
 +    dthy  = spline->dtheta[YY];
 +    dthz  = spline->dtheta[ZZ];
 +    nx    = pme->nkx;
 +    ny    = pme->nky;
 +    nz    = pme->nkz;
 +    pnx   = pme->pmegrid_nx;
 +    pny   = pme->pmegrid_ny;
 +    pnz   = pme->pmegrid_nz;
 +
 +    rxx   = pme->recipbox[XX][XX];
 +    ryx   = pme->recipbox[YY][XX];
 +    ryy   = pme->recipbox[YY][YY];
 +    rzx   = pme->recipbox[ZZ][XX];
 +    rzy   = pme->recipbox[ZZ][YY];
 +    rzz   = pme->recipbox[ZZ][ZZ];
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = scale*atc->q[n];
 +
 +        if (bClearF)
 +        {
 +            atc->f[n][XX] = 0;
 +            atc->f[n][YY] = 0;
 +            atc->f[n][ZZ] = 0;
 +        }
 +        if (qn != 0)
 +        {
 +            fx     = 0;
 +            fy     = 0;
 +            fz     = 0;
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next six statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +            dthx = spline->dtheta[XX] + norder;
 +            dthy = spline->dtheta[YY] + norder;
 +            dthz = spline->dtheta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_GATHER_F_SSE_ORDER4
 +#else
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_FSPLINE(order);
 +                break;
 +            }
 +
 +            atc->f[n][XX] += -qn*( fx*nx*rxx );
 +            atc->f[n][YY] += -qn*( fx*nx*ryx + fy*ny*ryy );
 +            atc->f[n][ZZ] += -qn*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
 +        }
 +    }
 +    /* Since the energy and not forces are interpolated
 +     * the net force might not be exactly zero.
 +     * This can be solved by also interpolating F, but
 +     * that comes at a cost.
 +     * A better hack is to remove the net force every
 +     * step, but that must be done at a higher level
 +     * since this routine doesn't see all atoms if running
 +     * in parallel. Don't know how important it is?  EL 990726
 +     */
 +}
 +
 +
 +static real gather_energy_bsplines(gmx_pme_t pme,real *grid,
 +                                   pme_atomcomm_t *atc)
 +{
 +    splinedata_t *spline;
 +    int     n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int *   idxptr;
 +    real    energy,pot,tx,ty,qn,gval;
 +    real    *thx,*thy,*thz;
 +    int     norder;
 +    int     order;
 +
 +    spline = &atc->spline[0];
 +
 +    order = pme->pme_order;
 +
 +    energy = 0;
 +    for(n=0; (n<atc->n); n++) {
 +        qn      = atc->q[n];
 +
 +        if (qn != 0) {
 +            idxptr = atc->idx[n];
 +            norder = n*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next three statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +
 +            pot = 0;
 +            for(ithx=0; (ithx<order); ithx++)
 +            {
 +                index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
 +                tx      = thx[ithx];
 +
 +                for(ithy=0; (ithy<order); ithy++)
 +                {
 +                    index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
 +                    ty       = thy[ithy];
 +
 +                    for(ithz=0; (ithz<order); ithz++)
 +                    {
 +                        gval  = grid[index_xy+(k0+ithz)];
 +                        pot  += tx*ty*thz[ithz]*gval;
 +                    }
 +
 +                }
 +            }
 +
 +            energy += pot*qn;
 +        }
 +    }
 +
 +    return energy;
 +}
 +
 +/* Macro to force loop unrolling by fixing order.
 + * This gives a significant performance gain.
 + */
 +#define CALC_SPLINE(order)                     \
 +{                                              \
 +    int j,k,l;                                 \
 +    real dr,div;                               \
 +    real data[PME_ORDER_MAX];                  \
 +    real ddata[PME_ORDER_MAX];                 \
 +                                               \
 +    for(j=0; (j<DIM); j++)                     \
 +    {                                          \
 +        dr  = xptr[j];                         \
 +                                               \
 +        /* dr is relative offset from lower cell limit */ \
 +        data[order-1] = 0;                     \
 +        data[1] = dr;                          \
 +        data[0] = 1 - dr;                      \
 +                                               \
 +        for(k=3; (k<order); k++)               \
 +        {                                      \
 +            div = 1.0/(k - 1.0);               \
 +            data[k-1] = div*dr*data[k-2];      \
 +            for(l=1; (l<(k-1)); l++)           \
 +            {                                  \
 +                data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)* \
 +                                   data[k-l-1]);                \
 +            }                                  \
 +            data[0] = div*(1-dr)*data[0];      \
 +        }                                      \
 +        /* differentiate */                    \
 +        ddata[0] = -data[0];                   \
 +        for(k=1; (k<order); k++)               \
 +        {                                      \
 +            ddata[k] = data[k-1] - data[k];    \
 +        }                                      \
 +                                               \
 +        div = 1.0/(order - 1);                 \
 +        data[order-1] = div*dr*data[order-2];  \
 +        for(l=1; (l<(order-1)); l++)           \
 +        {                                      \
 +            data[order-l-1] = div*((dr+l)*data[order-l-2]+    \
 +                               (order-l-dr)*data[order-l-1]); \
 +        }                                      \
 +        data[0] = div*(1 - dr)*data[0];        \
 +                                               \
 +        for(k=0; k<order; k++)                 \
 +        {                                      \
 +            theta[j][i*order+k]  = data[k];    \
 +            dtheta[j][i*order+k] = ddata[k];   \
 +        }                                      \
 +    }                                          \
 +}
 +
 +void make_bsplines(splinevec theta,splinevec dtheta,int order,
 +                   rvec fractx[],int nr,int ind[],real charge[],
 +                   gmx_bool bFreeEnergy)
 +{
 +    /* construct splines for local atoms */
 +    int  i,ii;
 +    real *xptr;
 +
 +    for(i=0; i<nr; i++)
 +    {
 +        /* With free energy we do not use the charge check.
 +         * In most cases this will be more efficient than calling make_bsplines
 +         * twice, since usually more than half the particles have charges.
 +         */
 +        ii = ind[i];
 +        if (bFreeEnergy || charge[ii] != 0.0) {
 +            xptr = fractx[ii];
 +            switch(order) {
 +            case 4:  CALC_SPLINE(4);     break;
 +            case 5:  CALC_SPLINE(5);     break;
 +            default: CALC_SPLINE(order); break;
 +            }
 +        }
 +    }
 +}
 +
 +
 +void make_dft_mod(real *mod,real *data,int ndata)
 +{
 +  int i,j;
 +  real sc,ss,arg;
 +
 +  for(i=0;i<ndata;i++) {
 +    sc=ss=0;
 +    for(j=0;j<ndata;j++) {
 +      arg=(2.0*M_PI*i*j)/ndata;
 +      sc+=data[j]*cos(arg);
 +      ss+=data[j]*sin(arg);
 +    }
 +    mod[i]=sc*sc+ss*ss;
 +  }
 +  for(i=0;i<ndata;i++)
 +    if(mod[i]<1e-7)
 +      mod[i]=(mod[i-1]+mod[i+1])*0.5;
 +}
 +
 +
 +static void make_bspline_moduli(splinevec bsp_mod,
 +                                int nx,int ny,int nz,int order)
 +{
 +  int nmax=max(nx,max(ny,nz));
 +  real *data,*ddata,*bsp_data;
 +  int i,k,l;
 +  real div;
 +
 +  snew(data,order);
 +  snew(ddata,order);
 +  snew(bsp_data,nmax);
 +
 +  data[order-1]=0;
 +  data[1]=0;
 +  data[0]=1;
 +
 +  for(k=3;k<order;k++) {
 +    div=1.0/(k-1.0);
 +    data[k-1]=0;
 +    for(l=1;l<(k-1);l++)
 +      data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
 +    data[0]=div*data[0];
 +  }
 +  /* differentiate */
 +  ddata[0]=-data[0];
 +  for(k=1;k<order;k++)
 +    ddata[k]=data[k-1]-data[k];
 +  div=1.0/(order-1);
 +  data[order-1]=0;
 +  for(l=1;l<(order-1);l++)
 +    data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
 +  data[0]=div*data[0];
 +
 +  for(i=0;i<nmax;i++)
 +    bsp_data[i]=0;
 +  for(i=1;i<=order;i++)
 +    bsp_data[i]=data[i-1];
 +
 +  make_dft_mod(bsp_mod[XX],bsp_data,nx);
 +  make_dft_mod(bsp_mod[YY],bsp_data,ny);
 +  make_dft_mod(bsp_mod[ZZ],bsp_data,nz);
 +
 +  sfree(data);
 +  sfree(ddata);
 +  sfree(bsp_data);
 +}
 +
 +
 +/* Return the P3M optimal influence function */
 +static double do_p3m_influence(double z, int order)
 +{
 +    double z2,z4;
 +
 +    z2 = z*z;
 +    z4 = z2*z2;
 +
 +    /* The formula and most constants can be found in:
 +     * Ballenegger et al., JCTC 8, 936 (2012)
 +     */
 +    switch(order)
 +    {
 +    case 2:
 +        return 1.0 - 2.0*z2/3.0;
 +        break;
 +    case 3:
 +        return 1.0 - z2 + 2.0*z4/15.0;
 +        break;
 +    case 4:
 +        return 1.0 - 4.0*z2/3.0 + 2.0*z4/5.0 + 4.0*z2*z4/315.0;
 +        break;
 +    case 5:
 +        return 1.0 - 5.0*z2/3.0 + 7.0*z4/9.0 - 17.0*z2*z4/189.0 + 2.0*z4*z4/2835.0;
 +        break;
 +    case 6:
 +        return 1.0 - 2.0*z2 + 19.0*z4/15.0 - 256.0*z2*z4/945.0 + 62.0*z4*z4/4725.0 + 4.0*z2*z4*z4/155925.0;
 +        break;
 +    case 7:
 +        return 1.0 - 7.0*z2/3.0 + 28.0*z4/15.0 - 16.0*z2*z4/27.0 + 26.0*z4*z4/405.0 - 2.0*z2*z4*z4/1485.0 + 4.0*z4*z4*z4/6081075.0;
 +    case 8:
 +        return 1.0 - 8.0*z2/3.0 + 116.0*z4/45.0 - 344.0*z2*z4/315.0 + 914.0*z4*z4/4725.0 - 248.0*z4*z4*z2/22275.0 + 21844.0*z4*z4*z4/212837625.0 - 8.0*z4*z4*z4*z2/638512875.0;
 +        break;
 +    }
 +
 +    return 0.0;
 +}
 +
 +/* Calculate the P3M B-spline moduli for one dimension */
 +static void make_p3m_bspline_moduli_dim(real *bsp_mod,int n,int order)
 +{
 +    double zarg,zai,sinzai,infl;
 +    int    maxk,i;
 +
 +    if (order > 8)
 +    {
 +        gmx_fatal(FARGS,"The current P3M code only supports orders up to 8");
 +    }
 +
 +    zarg = M_PI/n;
 +
 +    maxk = (n + 1)/2;
 +
 +    for(i=-maxk; i<0; i++)
 +    {
 +        zai    = zarg*i;
 +        sinzai = sin(zai);
 +        infl   = do_p3m_influence(sinzai,order);
 +        bsp_mod[n+i] = infl*infl*pow(sinzai/zai,-2.0*order);
 +    }
 +    bsp_mod[0] = 1.0;
 +    for(i=1; i<maxk; i++)
 +    {
 +        zai    = zarg*i;
 +        sinzai = sin(zai);
 +        infl   = do_p3m_influence(sinzai,order);
 +        bsp_mod[i] = infl*infl*pow(sinzai/zai,-2.0*order);
 +    }
 +}
 +
 +/* Calculate the P3M B-spline moduli */
 +static void make_p3m_bspline_moduli(splinevec bsp_mod,
 +                                    int nx,int ny,int nz,int order)
 +{
 +    make_p3m_bspline_moduli_dim(bsp_mod[XX],nx,order);
 +    make_p3m_bspline_moduli_dim(bsp_mod[YY],ny,order);
 +    make_p3m_bspline_moduli_dim(bsp_mod[ZZ],nz,order);
 +}
 +
 +
 +static void setup_coordinate_communication(pme_atomcomm_t *atc)
 +{
 +  int nslab,n,i;
 +  int fw,bw;
 +
 +  nslab = atc->nslab;
 +
 +  n = 0;
 +  for(i=1; i<=nslab/2; i++) {
 +    fw = (atc->nodeid + i) % nslab;
 +    bw = (atc->nodeid - i + nslab) % nslab;
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = fw;
 +      atc->node_src[n]  = bw;
 +      n++;
 +    }
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = bw;
 +      atc->node_src[n]  = fw;
 +      n++;
 +    }
 +  }
 +}
 +
 +int gmx_pme_destroy(FILE *log,gmx_pme_t *pmedata)
 +{
 +    int thread;
 +
 +    if(NULL != log)
 +    {
 +        fprintf(log,"Destroying PME data structures.\n");
 +    }
 +
 +    sfree((*pmedata)->nnx);
 +    sfree((*pmedata)->nny);
 +    sfree((*pmedata)->nnz);
 +
 +    pmegrids_destroy(&(*pmedata)->pmegridA);
 +
 +    sfree((*pmedata)->fftgridA);
 +    sfree((*pmedata)->cfftgridA);
 +    gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupA);
 +
 +    if ((*pmedata)->pmegridB.grid.grid != NULL)
 +    {
 +        pmegrids_destroy(&(*pmedata)->pmegridB);
 +        sfree((*pmedata)->fftgridB);
 +        sfree((*pmedata)->cfftgridB);
 +        gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupB);
 +    }
 +    for(thread=0; thread<(*pmedata)->nthread; thread++)
 +    {
 +        free_work(&(*pmedata)->work[thread]);
 +    }
 +    sfree((*pmedata)->work);
 +
 +    sfree(*pmedata);
 +    *pmedata = NULL;
 +
 +  return 0;
 +}
 +
 +static int mult_up(int n,int f)
 +{
 +    return ((n + f - 1)/f)*f;
 +}
 +
 +
 +static double pme_load_imbalance(gmx_pme_t pme)
 +{
 +    int    nma,nmi;
 +    double n1,n2,n3;
 +
 +    nma = pme->nnodes_major;
 +    nmi = pme->nnodes_minor;
 +
 +    n1 = mult_up(pme->nkx,nma)*mult_up(pme->nky,nmi)*pme->nkz;
 +    n2 = mult_up(pme->nkx,nma)*mult_up(pme->nkz,nmi)*pme->nky;
 +    n3 = mult_up(pme->nky,nma)*mult_up(pme->nkz,nmi)*pme->nkx;
 +
 +    /* pme_solve is roughly double the cost of an fft */
 +
 +    return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
 +}
 +
 +static void init_atomcomm(gmx_pme_t pme,pme_atomcomm_t *atc, t_commrec *cr,
 +                          int dimind,gmx_bool bSpread)
 +{
 +    int nk,k,s,thread;
 +
 +    atc->dimind = dimind;
 +    atc->nslab  = 1;
 +    atc->nodeid = 0;
 +    atc->pd_nalloc = 0;
 +#ifdef GMX_MPI
 +    if (pme->nnodes > 1)
 +    {
 +        atc->mpi_comm = pme->mpi_comm_d[dimind];
 +        MPI_Comm_size(atc->mpi_comm,&atc->nslab);
 +        MPI_Comm_rank(atc->mpi_comm,&atc->nodeid);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"For PME atom communication in dimind %d: nslab %d rank %d\n",atc->dimind,atc->nslab,atc->nodeid);
 +    }
 +#endif
 +
 +    atc->bSpread   = bSpread;
 +    atc->pme_order = pme->pme_order;
 +
 +    if (atc->nslab > 1)
 +    {
 +        /* These three allocations are not required for particle decomp. */
 +        snew(atc->node_dest,atc->nslab);
 +        snew(atc->node_src,atc->nslab);
 +        setup_coordinate_communication(atc);
 +
 +        snew(atc->count_thread,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            snew(atc->count_thread[thread],atc->nslab);
 +        }
 +        atc->count = atc->count_thread[0];
 +        snew(atc->rcount,atc->nslab);
 +        snew(atc->buf_index,atc->nslab);
 +    }
 +
 +    atc->nthread = pme->nthread;
 +    if (atc->nthread > 1)
 +    {
 +        snew(atc->thread_plist,atc->nthread);
 +    }
 +    snew(atc->spline,atc->nthread);
 +    for(thread=0; thread<atc->nthread; thread++)
 +    {
 +        if (atc->nthread > 1)
 +        {
 +            snew(atc->thread_plist[thread].n,atc->nthread+2*GMX_CACHE_SEP);
 +            atc->thread_plist[thread].n += GMX_CACHE_SEP;
 +        }
 +    }
 +}
 +
 +static void
 +init_overlap_comm(pme_overlap_t *  ol,
 +                  int              norder,
 +#ifdef GMX_MPI
 +                  MPI_Comm         comm,
 +#endif
 +                  int              nnodes,
 +                  int              nodeid,
 +                  int              ndata,
 +                  int              commplainsize)
 +{
 +    int lbnd,rbnd,maxlr,b,i;
 +    int exten;
 +    int nn,nk;
 +    pme_grid_comm_t *pgc;
 +    gmx_bool bCont;
 +    int fft_start,fft_end,send_index1,recv_index1;
 +
 +#ifdef GMX_MPI
 +    ol->mpi_comm = comm;
 +#endif
 +
 +    ol->nnodes = nnodes;
 +    ol->nodeid = nodeid;
 +
 +    /* Linear translation of the PME grid wo'nt affect reciprocal space
 +     * calculations, so to optimize we only interpolate "upwards",
 +     * which also means we only have to consider overlap in one direction.
 +     * I.e., particles on this node might also be spread to grid indices
 +     * that belong to higher nodes (modulo nnodes)
 +     */
 +
 +    snew(ol->s2g0,ol->nnodes+1);
 +    snew(ol->s2g1,ol->nnodes);
 +    if (debug) { fprintf(debug,"PME slab boundaries:"); }
 +    for(i=0; i<nnodes; i++)
 +    {
 +        /* s2g0 the local interpolation grid start.
 +         * s2g1 the local interpolation grid end.
 +         * Because grid overlap communication only goes forward,
 +         * the grid the slabs for fft's should be rounded down.
 +         */
 +        ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
 +        ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"  %3d %3d",ol->s2g0[i],ol->s2g1[i]);
 +        }
 +    }
 +    ol->s2g0[nnodes] = ndata;
 +    if (debug) { fprintf(debug,"\n"); }
 +
 +    /* Determine with how many nodes we need to communicate the grid overlap */
 +    b = 0;
 +    do
 +    {
 +        b++;
 +        bCont = FALSE;
 +        for(i=0; i<nnodes; i++)
 +        {
 +            if ((i+b <  nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
 +                (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
 +            {
 +                bCont = TRUE;
 +            }
 +        }
 +    }
 +    while (bCont && b < nnodes);
 +    ol->noverlap_nodes = b - 1;
 +
 +    snew(ol->send_id,ol->noverlap_nodes);
 +    snew(ol->recv_id,ol->noverlap_nodes);
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
 +        ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
 +    }
 +    snew(ol->comm_data, ol->noverlap_nodes);
 +
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        pgc = &ol->comm_data[b];
 +        /* Send */
 +        fft_start        = ol->s2g0[ol->send_id[b]];
 +        fft_end          = ol->s2g0[ol->send_id[b]+1];
 +        if (ol->send_id[b] < nodeid)
 +        {
 +            fft_start += ndata;
 +            fft_end   += ndata;
 +        }
 +        send_index1      = ol->s2g1[nodeid];
 +        send_index1      = min(send_index1,fft_end);
 +        pgc->send_index0 = fft_start;
 +        pgc->send_nindex = max(0,send_index1 - pgc->send_index0);
 +
 +        /* We always start receiving to the first index of our slab */
 +        fft_start        = ol->s2g0[ol->nodeid];
 +        fft_end          = ol->s2g0[ol->nodeid+1];
 +        recv_index1      = ol->s2g1[ol->recv_id[b]];
 +        if (ol->recv_id[b] > nodeid)
 +        {
 +            recv_index1 -= ndata;
 +        }
 +        recv_index1      = min(recv_index1,fft_end);
 +        pgc->recv_index0 = fft_start;
 +        pgc->recv_nindex = max(0,recv_index1 - pgc->recv_index0);
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    snew(ol->sendbuf,norder*commplainsize);
 +    snew(ol->recvbuf,norder*commplainsize);
 +}
 +
 +static void
 +make_gridindex5_to_localindex(int n,int local_start,int local_range,
 +                              int **global_to_local,
 +                              real **fraction_shift)
 +{
 +    int i;
 +    int * gtl;
 +    real * fsh;
 +
 +    snew(gtl,5*n);
 +    snew(fsh,5*n);
 +    for(i=0; (i<5*n); i++)
 +    {
 +        /* Determine the global to local grid index */
 +        gtl[i] = (i - local_start + n) % n;
 +        /* For coordinates that fall within the local grid the fraction
 +         * is correct, we don't need to shift it.
 +         */
 +        fsh[i] = 0;
 +        if (local_range < n)
 +        {
 +            /* Due to rounding issues i could be 1 beyond the lower or
 +             * upper boundary of the local grid. Correct the index for this.
 +             * If we shift the index, we need to shift the fraction by
 +             * the same amount in the other direction to not affect
 +             * the weights.
 +             * Note that due to this shifting the weights at the end of
 +             * the spline might change, but that will only involve values
 +             * between zero and values close to the precision of a real,
 +             * which is anyhow the accuracy of the whole mesh calculation.
 +             */
 +            /* With local_range=0 we should not change i=local_start */
 +            if (i % n != local_start)
 +            {
 +                if (gtl[i] == n-1)
 +                {
 +                    gtl[i] = 0;
 +                    fsh[i] = -1;
 +                }
 +                else if (gtl[i] == local_range)
 +                {
 +                    gtl[i] = local_range - 1;
 +                    fsh[i] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    *global_to_local = gtl;
 +    *fraction_shift  = fsh;
 +}
 +
 +static void sse_mask_init(pme_spline_work_t *work,int order)
 +{
 +#ifdef PME_SSE
 +    float  tmp[8];
 +    __m128 zero_SSE;
 +    int    of,i;
 +
 +    zero_SSE = _mm_setzero_ps();
 +
 +    for(of=0; of<8-(order-1); of++)
 +    {
 +        for(i=0; i<8; i++)
 +        {
 +            tmp[i] = (i >= of && i < of+order ? 1 : 0);
 +        }
 +        work->mask_SSE0[of] = _mm_loadu_ps(tmp);
 +        work->mask_SSE1[of] = _mm_loadu_ps(tmp+4);
 +        work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of],zero_SSE);
 +        work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of],zero_SSE);
 +    }
 +#endif
 +}
 +
 +static void
 +gmx_pme_check_grid_restrictions(FILE *fplog,char dim,int nnodes,int *nk)
 +{
 +    int nk_new;
 +
 +    if (*nk % nnodes != 0)
 +    {
 +        nk_new = nnodes*(*nk/nnodes + 1);
 +
 +        if (2*nk_new >= 3*(*nk))
 +        {
 +            gmx_fatal(FARGS,"The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
 +                      dim,*nk,dim,nnodes,dim);
 +        }
 +
 +        if (fplog != NULL)
 +        {
 +            fprintf(fplog,"\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
 +                    dim,*nk,dim,nnodes,dim,nk_new,dim);
 +        }
 +
 +        *nk = nk_new;
 +    }
 +}
 +
 +int gmx_pme_init(gmx_pme_t *         pmedata,
 +                 t_commrec *         cr,
 +                 int                 nnodes_major,
 +                 int                 nnodes_minor,
 +                 t_inputrec *        ir,
 +                 int                 homenr,
 +                 gmx_bool            bFreeEnergy,
 +                 gmx_bool            bReproducible,
 +                 int                 nthread)
 +{
 +    gmx_pme_t pme=NULL;
 +
 +    pme_atomcomm_t *atc;
 +    ivec ndata;
 +
 +    if (debug)
 +        fprintf(debug,"Creating PME data structures.\n");
 +    snew(pme,1);
 +
 +    pme->redist_init         = FALSE;
 +    pme->sum_qgrid_tmp       = NULL;
 +    pme->sum_qgrid_dd_tmp    = NULL;
 +    pme->buf_nalloc          = 0;
 +    pme->redist_buf_nalloc   = 0;
 +
 +    pme->nnodes              = 1;
 +    pme->bPPnode             = TRUE;
 +
 +    pme->nnodes_major        = nnodes_major;
 +    pme->nnodes_minor        = nnodes_minor;
 +
 +#ifdef GMX_MPI
 +    if (nnodes_major*nnodes_minor > 1)
 +    {
 +        pme->mpi_comm = cr->mpi_comm_mygroup;
 +
 +        MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
 +        MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
 +        if (pme->nnodes != nnodes_major*nnodes_minor)
 +        {
 +            gmx_incons("PME node count mismatch");
 +        }
 +    }
 +    else
 +    {
 +        pme->mpi_comm = MPI_COMM_NULL;
 +    }
 +#endif
 +
 +    if (pme->nnodes == 1)
 +    {
 +        pme->ndecompdim = 0;
 +        pme->nodeid_major = 0;
 +        pme->nodeid_minor = 0;
 +#ifdef GMX_MPI
 +        pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +    }
 +    else
 +    {
 +        if (nnodes_minor == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = pme->mpi_comm;
 +            pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = pme->nodeid;
 +            pme->nodeid_minor = 0;
 +
 +        }
 +        else if (nnodes_major == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = MPI_COMM_NULL;
 +            pme->mpi_comm_d[1] = pme->mpi_comm;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = 0;
 +            pme->nodeid_minor = pme->nodeid;
 +        }
 +        else
 +        {
 +            if (pme->nnodes % nnodes_major != 0)
 +            {
 +                gmx_incons("For 2D PME decomposition, #PME nodes must be divisible by the number of nodes in the major dimension");
 +            }
 +            pme->ndecompdim = 2;
 +
 +#ifdef GMX_MPI
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid % nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[0]);  /* My communicator along major dimension */
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid/nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[1]);  /* My communicator along minor dimension */
 +
 +            MPI_Comm_rank(pme->mpi_comm_d[0],&pme->nodeid_major);
 +            MPI_Comm_size(pme->mpi_comm_d[0],&pme->nnodes_major);
 +            MPI_Comm_rank(pme->mpi_comm_d[1],&pme->nodeid_minor);
 +            MPI_Comm_size(pme->mpi_comm_d[1],&pme->nnodes_minor);
 +#endif
 +        }
 +        pme->bPPnode = (cr->duty & DUTY_PP);
 +    }
 +
 +    pme->nthread = nthread;
 +
 +    if (ir->ePBC == epbcSCREW)
 +    {
 +        gmx_fatal(FARGS,"pme does not (yet) work with pbc = screw");
 +    }
 +
 +    pme->bFEP        = ((ir->efep != efepNO) && bFreeEnergy);
 +    pme->nkx         = ir->nkx;
 +    pme->nky         = ir->nky;
 +    pme->nkz         = ir->nkz;
 +    pme->bP3M        = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != NULL);
 +    pme->pme_order   = ir->pme_order;
 +    pme->epsilon_r   = ir->epsilon_r;
 +
 +    if (pme->pme_order > PME_ORDER_MAX)
 +    {
 +        gmx_fatal(FARGS,"pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
 +                  pme->pme_order,PME_ORDER_MAX);
 +    }
 +
 +    /* Currently pme.c supports only the fft5d FFT code.
 +     * Therefore the grid always needs to be divisible by nnodes.
 +     * When the old 1D code is also supported again, change this check.
 +     *
 +     * This check should be done before calling gmx_pme_init
 +     * and fplog should be passed iso stderr.
 +     *
 +    if (pme->ndecompdim >= 2)
 +    */
 +    if (pme->ndecompdim >= 1)
 +    {
 +        /*
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'x',nnodes_major,&pme->nkx);
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'y',nnodes_minor,&pme->nky);
 +        */
 +    }
 +
 +    if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
 +        pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
 +        pme->nkz <= pme->pme_order)
 +    {
 +        gmx_fatal(FARGS,"The pme grid dimensions need to be larger than pme_order (%d) and in parallel larger than 2*pme_ordern for x and/or y",pme->pme_order);
 +    }
 +
 +    if (pme->nnodes > 1) {
 +        double imbal;
 +
 +#ifdef GMX_MPI
 +        MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
 +        MPI_Type_commit(&(pme->rvec_mpi));
 +#endif
 +
 +        /* Note that the charge spreading and force gathering, which usually
 +         * takes about the same amount of time as FFT+solve_pme,
 +         * is always fully load balanced
 +         * (unless the charge distribution is inhomogeneous).
 +         */
 +
 +        imbal = pme_load_imbalance(pme);
 +        if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
 +        {
 +            fprintf(stderr,
 +                    "\n"
 +                    "NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
 +                    "      For optimal PME load balancing\n"
 +                    "      PME grid_x (%d) and grid_y (%d) should be divisible by #PME_nodes_x (%d)\n"
 +                    "      and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
 +                    "\n",
 +                    (int)((imbal-1)*100 + 0.5),
 +                    pme->nkx,pme->nky,pme->nnodes_major,
 +                    pme->nky,pme->nkz,pme->nnodes_minor);
 +        }
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    /* In sum_qgrid_dd x overlap is copied in place: take padding into account.
 +     * y is always copied through a buffer: we don't need padding in z,
 +     * but we do need the overlap in x because of the communication order.
 +     */
 +    init_overlap_comm(&pme->overlap[0],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[0],
 +#endif
 +                      pme->nnodes_major,pme->nodeid_major,
 +                      pme->nkx,
 +                      (div_round_up(pme->nky,pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
 +
 +    init_overlap_comm(&pme->overlap[1],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[1],
 +#endif
 +                      pme->nnodes_minor,pme->nodeid_minor,
 +                      pme->nky,
 +                      (div_round_up(pme->nkx,pme->nnodes_major)+pme->pme_order)*pme->nkz);
 +
 +    /* Check for a limitation of the (current) sum_fftgrid_dd code */
 +    if (pme->nthread > 1 &&
 +        (pme->overlap[0].noverlap_nodes > 1 ||
 +         pme->overlap[1].noverlap_nodes > 1))
 +    {
 +        gmx_fatal(FARGS,"With threads the number of grid lines per node along x and or y should be pme_order (%d) or more or exactly pme_order-1",pme->pme_order);
 +    }
 +
 +    snew(pme->bsp_mod[XX],pme->nkx);
 +    snew(pme->bsp_mod[YY],pme->nky);
 +    snew(pme->bsp_mod[ZZ],pme->nkz);
 +
 +    /* The required size of the interpolation grid, including overlap.
 +     * The allocated size (pmegrid_n?) might be slightly larger.
 +     */
 +    pme->pmegrid_nx = pme->overlap[0].s2g1[pme->nodeid_major] -
 +                      pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_ny = pme->overlap[1].s2g1[pme->nodeid_minor] -
 +                      pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_nz_base = pme->nkz;
 +    pme->pmegrid_nz = pme->pmegrid_nz_base + pme->pme_order - 1;
 +    set_grid_alignment(&pme->pmegrid_nz,pme->pme_order);
 +
 +    pme->pmegrid_start_ix = pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_start_iz = 0;
 +
 +    make_gridindex5_to_localindex(pme->nkx,
 +                                  pme->pmegrid_start_ix,
 +                                  pme->pmegrid_nx - (pme->pme_order-1),
 +                                  &pme->nnx,&pme->fshx);
 +    make_gridindex5_to_localindex(pme->nky,
 +                                  pme->pmegrid_start_iy,
 +                                  pme->pmegrid_ny - (pme->pme_order-1),
 +                                  &pme->nny,&pme->fshy);
 +    make_gridindex5_to_localindex(pme->nkz,
 +                                  pme->pmegrid_start_iz,
 +                                  pme->pmegrid_nz_base,
 +                                  &pme->nnz,&pme->fshz);
 +
 +    pmegrids_init(&pme->pmegridA,
 +                  pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                  pme->pmegrid_nz_base,
 +                  pme->pme_order,
 +                  pme->nthread,
 +                  pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1],
 +                  pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]);
 +
 +    sse_mask_init(&pme->spline_work,pme->pme_order);
 +
 +    ndata[0] = pme->nkx;
 +    ndata[1] = pme->nky;
 +    ndata[2] = pme->nkz;
 +
 +    /* This routine will allocate the grid data to fit the FFTs */
 +    gmx_parallel_3dfft_init(&pme->pfft_setupA,ndata,
 +                            &pme->fftgridA,&pme->cfftgridA,
 +                            pme->mpi_comm_d,
 +                            pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                            bReproducible,pme->nthread);
 +
 +    if (bFreeEnergy)
 +    {
 +        pmegrids_init(&pme->pmegridB,
 +                      pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                      pme->pmegrid_nz_base,
 +                      pme->pme_order,
 +                      pme->nthread,
 +                      pme->nkx % pme->nnodes_major != 0,
 +                      pme->nky % pme->nnodes_minor != 0);
 +
 +        gmx_parallel_3dfft_init(&pme->pfft_setupB,ndata,
 +                                &pme->fftgridB,&pme->cfftgridB,
 +                                pme->mpi_comm_d,
 +                                pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                                bReproducible,pme->nthread);
 +    }
 +    else
 +    {
 +        pme->pmegridB.grid.grid = NULL;
 +        pme->fftgridB           = NULL;
 +        pme->cfftgridB          = NULL;
 +    }
 +
 +    if (!pme->bP3M)
 +    {
 +        /* Use plain SPME B-spline interpolation */
 +        make_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
 +    }
 +    else
 +    {
 +        /* Use the P3M grid-optimized influence function */
 +        make_p3m_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
 +    }
 +
 +    /* Use atc[0] for spreading */
 +    init_atomcomm(pme,&pme->atc[0],cr,nnodes_major > 1 ? 0 : 1,TRUE);
 +    if (pme->ndecompdim >= 2)
 +    {
 +        init_atomcomm(pme,&pme->atc[1],cr,1,FALSE);
 +    }
 +
 +    if (pme->nnodes == 1) {
 +        pme->atc[0].n = homenr;
 +        pme_realloc_atomcomm_things(&pme->atc[0]);
 +    }
 +
 +    {
 +        int thread;
 +
 +        /* Use fft5d, order after FFT is y major, z, x minor */
 +
 +        snew(pme->work,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            realloc_work(&pme->work[thread],pme->nkx);
 +        }
 +    }
 +
 +    *pmedata = pme;
 +
 +    return 0;
 +}
 +
 +
 +static void copy_local_grid(gmx_pme_t pme,
 +                            pmegrids_t *pmegrids,int thread,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_my,fft_mz;
 +    int  nsx,nsy,nsz;
 +    ivec nf;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  d;
 +    pmegrid_t *pmegrid;
 +    real *grid_th;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    nsx = pmegrid->n[XX];
 +    nsy = pmegrid->n[YY];
 +    nsz = pmegrid->n[ZZ];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
 +                    local_fft_ndata[d] - pmegrid->offset[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    /* Directly copy the non-overlapping parts of the local grids.
 +     * This also initializes the full grid.
 +     */
 +    grid_th = pmegrid->grid;
 +    for(x=0; x<nf[XX]; x++)
 +    {
 +        for(y=0; y<nf[YY]; y++)
 +        {
 +            i0  = ((offx + x)*fft_my + (offy + y))*fft_mz + offz;
 +            i0t = (x*nsy + y)*nsz;
 +            for(z=0; z<nf[ZZ]; z++)
 +            {
 +                fftgrid[i0+z] = grid_th[i0t+z];
 +            }
 +        }
 +    }
 +}
 +
 +static void print_sendbuf(gmx_pme_t pme,real *sendbuf)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int datasize,nind;
 +    int i,x,y,z,n;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    /* Major dimension */
 +    overlap = &pme->overlap[0];
 +
 +    nind   = overlap->comm_data[0].send_nindex;
 +
 +    for(y=0; y<local_fft_ndata[YY]; y++) {
 +         printf(" %2d",y);
 +    }
 +    printf("\n");
 +
 +    i = 0;
 +    for(x=0; x<nind; x++) {
 +        for(y=0; y<local_fft_ndata[YY]; y++) {
 +            n = 0;
 +            for(z=0; z<local_fft_ndata[ZZ]; z++) {
 +                if (sendbuf[i] != 0) n++;
 +                i++;
 +            }
 +            printf(" %2d",n);
 +        }
 +        printf("\n");
 +    }
 +}
 +
 +static void
 +reduce_threadgrid_overlap(gmx_pme_t pme,
 +                          const pmegrids_t *pmegrids,int thread,
 +                          real *fftgrid,real *commbuf_x,real *commbuf_y)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_nx,fft_ny,fft_nz;
 +    int  fft_my,fft_mz;
 +    int  buf_my=-1;
 +    int  nsx,nsy,nsz;
 +    ivec ne;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  sx,sy,sz,fx,fy,fz,tx1,ty1,tz1,ox,oy,oz;
 +    gmx_bool bClearBufX,bClearBufY,bClearBufXY,bClearBuf;
 +    gmx_bool bCommX,bCommY;
 +    int  d;
 +    int  thread_f;
 +    const pmegrid_t *pmegrid,*pmegrid_g,*pmegrid_f;
 +    const real *grid_th;
 +    real *commbuf=NULL;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_nx = local_fft_ndata[XX];
 +    fft_ny = local_fft_ndata[YY];
 +    fft_nz = local_fft_ndata[ZZ];
 +
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    /* This routine is called when all thread have finished spreading.
 +     * Here each thread sums grid contributions calculated by other threads
 +     * to the thread local grid volume.
 +     * To minimize the number of grid copying operations,
 +     * this routines sums immediately from the pmegrid to the fftgrid.
 +     */
 +
 +    /* Determine which part of the full node grid we should operate on,
 +     * this is our thread local part of the full grid.
 +     */
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
 +                    local_fft_ndata[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +
 +    bClearBufX  = TRUE;
 +    bClearBufY  = TRUE;
 +    bClearBufXY = TRUE;
 +
 +    /* Now loop over all the thread data blocks that contribute
 +     * to the grid region we (our thread) are operating on.
 +     */
 +    /* Note that ffy_nx/y is equal to the number of grid points
 +     * between the first point of our node grid and the one of the next node.
 +     */
 +    for(sx=0; sx>=-pmegrids->nthread_comm[XX]; sx--)
 +    {
 +        fx = pmegrid->ci[XX] + sx;
 +        ox = 0;
 +        bCommX = FALSE;
 +        if (fx < 0) {
 +            fx += pmegrids->nc[XX];
 +            ox -= fft_nx;
 +            bCommX = (pme->nnodes_major > 1);
 +        }
 +        pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
 +        ox += pmegrid_g->offset[XX];
 +        if (!bCommX)
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],ne[XX]);
 +        }
 +        else
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],pme->pme_order);
 +        }
 +
 +        for(sy=0; sy>=-pmegrids->nthread_comm[YY]; sy--)
 +        {
 +            fy = pmegrid->ci[YY] + sy;
 +            oy = 0;
 +            bCommY = FALSE;
 +            if (fy < 0) {
 +                fy += pmegrids->nc[YY];
 +                oy -= fft_ny;
 +                bCommY = (pme->nnodes_minor > 1);
 +            }
 +            pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
 +            oy += pmegrid_g->offset[YY];
 +            if (!bCommY)
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],ne[YY]);
 +            }
 +            else
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],pme->pme_order);
 +            }
 +
 +            for(sz=0; sz>=-pmegrids->nthread_comm[ZZ]; sz--)
 +            {
 +                fz = pmegrid->ci[ZZ] + sz;
 +                oz = 0;
 +                if (fz < 0)
 +                {
 +                    fz += pmegrids->nc[ZZ];
 +                    oz -= fft_nz;
 +                }
 +                pmegrid_g = &pmegrids->grid_th[fz];
 +                oz += pmegrid_g->offset[ZZ];
 +                tz1 = min(oz + pmegrid_g->n[ZZ],ne[ZZ]);
 +
 +                if (sx == 0 && sy == 0 && sz == 0)
 +                {
 +                    /* We have already added our local contribution
 +                     * before calling this routine, so skip it here.
 +                     */
 +                    continue;
 +                }
 +
 +                thread_f = (fx*pmegrids->nc[YY] + fy)*pmegrids->nc[ZZ] + fz;
 +
 +                pmegrid_f = &pmegrids->grid_th[thread_f];
 +
 +                grid_th = pmegrid_f->grid;
 +
 +                nsx = pmegrid_f->n[XX];
 +                nsy = pmegrid_f->n[YY];
 +                nsz = pmegrid_f->n[ZZ];
 +
 +#ifdef DEBUG_PME_REDUCE
 +                printf("n%d t%d add %d  %2d %2d %2d  %2d %2d %2d  %2d-%2d %2d-%2d, %2d-%2d %2d-%2d, %2d-%2d %2d-%2d\n",
 +                       pme->nodeid,thread,thread_f,
 +                       pme->pmegrid_start_ix,
 +                       pme->pmegrid_start_iy,
 +                       pme->pmegrid_start_iz,
 +                       sx,sy,sz,
 +                       offx-ox,tx1-ox,offx,tx1,
 +                       offy-oy,ty1-oy,offy,ty1,
 +                       offz-oz,tz1-oz,offz,tz1);
 +#endif
 +
 +                if (!(bCommX || bCommY))
 +                {
 +                    /* Copy from the thread local grid to the node grid */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*fft_my + y)*fft_mz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +                            for(z=offz; z<tz1; z++)
 +                            {
 +                                fftgrid[i0+z] += grid_th[i0t+z];
 +                            }
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    /* The order of this conditional decides
 +                     * where the corner volume gets stored with x+y decomp.
 +                     */
 +                    if (bCommY)
 +                    {
 +                        commbuf = commbuf_y;
 +                        buf_my  = ty1 - offy;
 +                        if (bCommX)
 +                        {
 +                            /* We index commbuf modulo the local grid size */
 +                            commbuf += buf_my*fft_nx*fft_nz;
 +
 +                            bClearBuf  = bClearBufXY;
 +                            bClearBufXY = FALSE;
 +                        }
 +                        else
 +                        {
 +                            bClearBuf  = bClearBufY;
 +                            bClearBufY = FALSE;
 +                        }
 +                    }
 +                    else
 +                    {
 +                        commbuf = commbuf_x;
 +                        buf_my  = fft_ny;
 +                        bClearBuf  = bClearBufX;
 +                        bClearBufX = FALSE;
 +                    }
 +
 +                    /* Copy to the communication buffer */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*buf_my + y)*fft_nz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +
 +                            if (bClearBuf)
 +                            {
 +                                /* First access of commbuf, initialize it */
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z]  = grid_th[i0t+z];
 +                                }
 +                            }
 +                            else
 +                            {
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z] += grid_th[i0t+z];
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void sum_fftgrid_dd(gmx_pme_t pme,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int  send_nindex;
 +    int  recv_index0,recv_nindex;
 +#ifdef GMX_MPI
 +    MPI_Status stat;
 +#endif
 +    int  ipulse,send_id,recv_id,datasize,gridsize,size_yx;
 +    real *sendptr,*recvptr;
 +    int  x,y,z,indg,indb;
 +
 +    /* Note that this routine is only used for forward communication.
 +     * Since the force gathering, unlike the charge spreading,
 +     * can be trivially parallelized over the particles,
 +     * the backwards process is much simpler and can use the "old"
 +     * communication setup.
 +     */
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    /* Currently supports only a single communication pulse */
 +
 +/* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_minor > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[1];
 +
 +        if (pme->nnodes_major > 1)
 +        {
 +             size_yx = pme->overlap[0].comm_data[0].send_nindex;
 +        }
 +        else
 +        {
 +            size_yx = 0;
 +        }
 +        datasize = (local_fft_ndata[XX]+size_yx)*local_fft_ndata[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        /*
 +        printf("node %d comm %2d x %2d x %2d\n",pme->nodeid,
 +               local_fft_ndata[XX]+size_yx,send_nindex,local_fft_ndata[ZZ]);
 +        printf("node %d send %f, %f\n",pme->nodeid,
 +               sendptr[0],sendptr[send_nindex*datasize-1]);
 +        */
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<local_fft_ndata[XX]; x++)
 +        {
 +            for(y=0; y<recv_nindex; y++)
 +            {
 +                indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
 +                indb = (x*recv_nindex        + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +        if (pme->nnodes_major > 1)
 +        {
 +            sendptr = pme->overlap[0].sendbuf;
 +            for(x=0; x<size_yx; x++)
 +            {
 +                for(y=0; y<recv_nindex; y++)
 +                {
 +                    indg = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                    indb = ((local_fft_ndata[XX] + x)*recv_nindex +y)*local_fft_ndata[ZZ];
 +                    for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                    {
 +                        sendptr[indg+z] += recvptr[indb+z];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_major > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[0];
 +
 +        datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
 +        gridsize = local_fft_size[YY] *local_fft_size[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"PME fftgrid comm %2d x %2d x %2d\n",
 +                   send_nindex,local_fft_ndata[YY],local_fft_ndata[ZZ]);
 +        }
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<recv_nindex; x++)
 +        {
 +            for(y=0; y<local_fft_ndata[YY]; y++)
 +            {
 +                indg = (x*local_fft_size[YY]  + y)*local_fft_size[ZZ];
 +                indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void spread_on_grid(gmx_pme_t pme,
 +                           pme_atomcomm_t *atc,pmegrids_t *grids,
 +                           gmx_bool bCalcSplines,gmx_bool bSpread,
 +                           real *fftgrid)
 +{
 +    int nthread,thread;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1,c2,c3,ct1a,ct1b,ct1c;
 +    static double cs1=0,cs2=0,cs3=0;
 +    static double cs1a[6]={0,0,0,0,0,0};
 +    static int cnt=0;
 +#endif
 +
 +    nthread = pme->nthread;
++    assert(nthread>0);
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    if (bCalcSplines)
 +    {
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +        for(thread=0; thread<nthread; thread++)
 +        {
 +            int start,end;
 +
 +            start = atc->n* thread   /nthread;
 +            end   = atc->n*(thread+1)/nthread;
 +
 +            /* Compute fftgrid index for all atoms,
 +             * with help of some extra variables.
 +             */
 +            calc_interpolation_idx(pme,atc,start,end,thread);
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        splinedata_t *spline;
 +        pmegrid_t *grid;
 +
 +        /* make local bsplines  */
 +        if (grids == NULL || grids->nthread == 1)
 +        {
 +            spline = &atc->spline[0];
 +
 +            spline->n = atc->n;
 +
 +            grid = &grids->grid;
 +        }
 +        else
 +        {
 +            spline = &atc->spline[thread];
 +
 +            make_thread_local_ind(atc,thread,spline);
 +
 +            grid = &grids->grid_th[thread];
 +        }
 +
 +        if (bCalcSplines)
 +        {
 +            make_bsplines(spline->theta,spline->dtheta,pme->pme_order,
 +                          atc->fractx,spline->n,spline->ind,atc->q,pme->bFEP);
 +        }
 +
 +        if (bSpread)
 +        {
 +            /* put local atoms on grid. */
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_start();
 +#endif
 +            spread_q_bsplines_thread(grid,atc,spline,&pme->spline_work);
 +
 +            if (grids->nthread > 1)
 +            {
 +                copy_local_grid(pme,grids,thread,fftgrid);
 +            }
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_end(ct1a);
 +            cs1a[thread] += (double)ct1a;
 +#endif
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_end(c2);
 +    cs2 += (double)c2;
 +#endif
 +
 +    if (bSpread && grids->nthread > 1)
 +    {
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(grids->nthread) schedule(static)
 +        for(thread=0; thread<grids->nthread; thread++)
 +        {
 +            reduce_threadgrid_overlap(pme,grids,thread,
 +                                      fftgrid,
 +                                      pme->overlap[0].sendbuf,
 +                                      pme->overlap[1].sendbuf);
 +#ifdef PRINT_PME_SENDBUF
 +            print_sendbuf(pme,pme->overlap[0].sendbuf);
 +#endif
 +        }
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_end(c3);
 +        cs3 += (double)c3;
 +#endif
 +
 +        if (pme->nnodes > 1)
 +        {
 +            /* Communicate the overlapping part of the fftgrid */
 +            sum_fftgrid_dd(pme,fftgrid);
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("idx %.2f spread %.2f red %.2f",
 +               cs1*1e-9,cs2*1e-9,cs3*1e-9);
 +#ifdef PME_TIME_SPREAD
 +        for(thread=0; thread<nthread; thread++)
 +            printf(" %.2f",cs1a[thread]*1e-9);
 +#endif
 +        printf("\n");
 +    }
 +#endif
 +}
 +
 +
 +static void dump_grid(FILE *fp,
 +                      int sx,int sy,int sz,int nx,int ny,int nz,
 +                      int my,int mz,const real *g)
 +{
 +    int x,y,z;
 +
 +    for(x=0; x<nx; x++)
 +    {
 +        for(y=0; y<ny; y++)
 +        {
 +            for(z=0; z<nz; z++)
 +            {
 +                fprintf(fp,"%2d %2d %2d %6.3f\n",
 +                        sx+x,sy+y,sz+z,g[(x*my + y)*mz + z]);
 +            }
 +        }
 +    }
 +}
 +
 +static void dump_local_fftgrid(gmx_pme_t pme,const real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    dump_grid(stderr,
 +              pme->pmegrid_start_ix,
 +              pme->pmegrid_start_iy,
 +              pme->pmegrid_start_iz,
 +              pme->pmegrid_nx-pme->pme_order+1,
 +              pme->pmegrid_ny-pme->pme_order+1,
 +              pme->pmegrid_nz-pme->pme_order+1,
 +              local_fft_size[YY],
 +              local_fft_size[ZZ],
 +              fftgrid);
 +}
 +
 +
 +void gmx_pme_calc_energy(gmx_pme_t pme,int n,rvec *x,real *q,real *V)
 +{
 +    pme_atomcomm_t *atc;
 +    pmegrids_t *grid;
 +
 +    if (pme->nnodes > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy called in parallel");
 +    }
 +    if (pme->bFEP > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy with free energy");
 +    }
 +
 +    atc = &pme->atc_energy;
 +    atc->nthread   = 1;
 +    if (atc->spline == NULL)
 +    {
 +        snew(atc->spline,atc->nthread);
 +    }
 +    atc->nslab     = 1;
 +    atc->bSpread   = TRUE;
 +    atc->pme_order = pme->pme_order;
 +    atc->n         = n;
 +    pme_realloc_atomcomm_things(atc);
 +    atc->x         = x;
 +    atc->q         = q;
 +
 +    /* We only use the A-charges grid */
 +    grid = &pme->pmegridA;
 +
 +    spread_on_grid(pme,atc,NULL,TRUE,FALSE,pme->fftgridA);
 +
 +    *V = gather_energy_bsplines(pme,grid->grid.grid,atc);
 +}
 +
 +
 +static void reset_pmeonly_counters(t_commrec *cr,gmx_wallcycle_t wcycle,
 +        t_nrnb *nrnb,t_inputrec *ir, gmx_large_int_t step_rel)
 +{
 +    /* Reset all the counters related to performance over the run */
 +    wallcycle_stop(wcycle,ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    init_nrnb(nrnb);
 +    ir->init_step += step_rel;
 +    ir->nsteps    -= step_rel;
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +
 +
 +int gmx_pmeonly(gmx_pme_t pme,
 +                t_commrec *cr,    t_nrnb *nrnb,
 +                gmx_wallcycle_t wcycle,
 +                real ewaldcoeff,  gmx_bool bGatherOnly,
 +                t_inputrec *ir)
 +{
 +    gmx_pme_pp_t pme_pp;
 +    int  natoms;
 +    matrix box;
 +    rvec *x_pp=NULL,*f_pp=NULL;
 +    real *chargeA=NULL,*chargeB=NULL;
 +    real lambda=0;
 +    int  maxshift_x=0,maxshift_y=0;
 +    real energy,dvdlambda;
 +    matrix vir;
 +    float cycles;
 +    int  count;
 +    gmx_bool bEnerVir;
 +    gmx_large_int_t step,step_rel;
 +
 +
 +    pme_pp = gmx_pme_pp_init(cr);
 +
 +    init_nrnb(nrnb);
 +
 +    count = 0;
 +    do /****** this is a quasi-loop over time steps! */
 +    {
 +        /* Domain decomposition */
 +        natoms = gmx_pme_recv_q_x(pme_pp,
 +                                  &chargeA,&chargeB,box,&x_pp,&f_pp,
 +                                  &maxshift_x,&maxshift_y,
 +                                  &pme->bFEP,&lambda,
 +                                  &bEnerVir,
 +                                  &step);
 +
 +        if (natoms == -1) {
 +            /* We should stop: break out of the loop */
 +            break;
 +        }
 +
 +        step_rel = step - ir->init_step;
 +
 +        if (count == 0)
 +            wallcycle_start(wcycle,ewcRUN);
 +
 +        wallcycle_start(wcycle,ewcPMEMESH);
 +
 +        dvdlambda = 0;
 +        clear_mat(vir);
 +        gmx_pme_do(pme,0,natoms,x_pp,f_pp,chargeA,chargeB,box,
 +                   cr,maxshift_x,maxshift_y,nrnb,wcycle,vir,ewaldcoeff,
 +                   &energy,lambda,&dvdlambda,
 +                   GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0));
 +
 +        cycles = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +        gmx_pme_send_force_vir_ener(pme_pp,
 +                                    f_pp,vir,energy,dvdlambda,
 +                                    cycles);
 +
 +        count++;
 +
 +        if (step_rel == wcycle_get_reset_counters(wcycle))
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_pmeonly_counters(cr,wcycle,nrnb,ir,step_rel);
 +            wcycle_set_reset_counters(wcycle, 0);
 +        }
 +
 +    } /***** end of quasi-loop, we stop with the break above */
 +    while (TRUE);
 +
 +    return 0;
 +}
 +
 +int gmx_pme_do(gmx_pme_t pme,
 +               int start,       int homenr,
 +               rvec x[],        rvec f[],
 +               real *chargeA,   real *chargeB,
 +               matrix box, t_commrec *cr,
 +               int  maxshift_x, int maxshift_y,
 +               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
 +               matrix vir,      real ewaldcoeff,
 +               real *energy,    real lambda,
 +               real *dvdlambda, int flags)
 +{
 +    int     q,d,i,j,ntot,npme;
 +    int     nx,ny,nz;
 +    int     n_d,local_ny;
 +    pme_atomcomm_t *atc=NULL;
 +    pmegrids_t *pmegrid=NULL;
 +    real    *grid=NULL;
 +    real    *ptr;
 +    rvec    *x_d,*f_d;
 +    real    *charge=NULL,*q_d;
 +    real    energy_AB[2];
 +    matrix  vir_AB[2];
 +    gmx_bool bClearF;
 +    gmx_parallel_3dfft_t pfft_setup;
 +    real *  fftgrid;
 +    t_complex * cfftgrid;
 +    int     thread;
++    const gmx_bool bCalcEnerVir = flags & GMX_PME_CALC_ENER_VIR;
++    const gmx_bool bCalcF = flags & GMX_PME_CALC_F;
 +
 +    assert(pme->nnodes > 0);
 +    assert(pme->nnodes == 1 || pme->ndecompdim > 0);
 +
 +    if (pme->nnodes > 1) {
 +        atc = &pme->atc[0];
 +        atc->npd = homenr;
 +        if (atc->npd > atc->pd_nalloc) {
 +            atc->pd_nalloc = over_alloc_dd(atc->npd);
 +            srenew(atc->pd,atc->pd_nalloc);
 +        }
 +        atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +    }
 +    else
 +    {
 +        /* This could be necessary for TPI */
 +        pme->atc[0].n = homenr;
 +    }
 +
 +    for(q=0; q<(pme->bFEP ? 2 : 1); q++) {
 +        if (q == 0) {
 +            pmegrid = &pme->pmegridA;
 +            fftgrid = pme->fftgridA;
 +            cfftgrid = pme->cfftgridA;
 +            pfft_setup = pme->pfft_setupA;
 +            charge = chargeA+start;
 +        } else {
 +            pmegrid = &pme->pmegridB;
 +            fftgrid = pme->fftgridB;
 +            cfftgrid = pme->cfftgridB;
 +            pfft_setup = pme->pfft_setupB;
 +            charge = chargeB+start;
 +        }
 +        grid = pmegrid->grid.grid;
 +        /* Unpack structure */
 +        if (debug) {
 +            fprintf(debug,"PME: nnodes = %d, nodeid = %d\n",
 +                    cr->nnodes,cr->nodeid);
 +            fprintf(debug,"Grid = %p\n",(void*)grid);
 +            if (grid == NULL)
 +                gmx_fatal(FARGS,"No grid!");
 +        }
 +        where();
 +
 +        m_inv_ur0(box,pme->recipbox);
 +
 +        if (pme->nnodes == 1) {
 +            atc = &pme->atc[0];
 +            if (DOMAINDECOMP(cr)) {
 +                atc->n = homenr;
 +                pme_realloc_atomcomm_things(atc);
 +            }
 +            atc->x = x;
 +            atc->q = charge;
 +            atc->f = f;
 +        } else {
 +            wallcycle_start(wcycle,ewcPME_REDISTXF);
 +            for(d=pme->ndecompdim-1; d>=0; d--)
 +            {
 +                if (d == pme->ndecompdim-1)
 +                {
 +                    n_d = homenr;
 +                    x_d = x + start;
 +                    q_d = charge;
 +                }
 +                else
 +                {
 +                    n_d = pme->atc[d+1].n;
 +                    x_d = atc->x;
 +                    q_d = atc->q;
 +                }
 +                atc = &pme->atc[d];
 +                atc->npd = n_d;
 +                if (atc->npd > atc->pd_nalloc) {
 +                    atc->pd_nalloc = over_alloc_dd(atc->npd);
 +                    srenew(atc->pd,atc->pd_nalloc);
 +                }
 +                atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +                pme_calc_pidx_wrapper(n_d,pme->recipbox,x_d,atc);
 +                where();
 +
 +                /* Redistribute x (only once) and qA or qB */
 +                if (DOMAINDECOMP(cr)) {
 +                    dd_pmeredist_x_q(pme, n_d, q==0, x_d, q_d, atc);
 +                } else {
 +                    pmeredist_pd(pme, TRUE, n_d, q==0, x_d, q_d, atc);
 +                }
 +            }
 +            where();
 +
 +            wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +        }
 +
 +        if (debug)
 +            fprintf(debug,"Node= %6d, pme local particles=%6d\n",
 +                    cr->nodeid,atc->n);
 +
 +        if (flags & GMX_PME_SPREAD_Q)
 +        {
 +            wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +
 +            /* Spread the charges on a grid */
 +            spread_on_grid(pme,&pme->atc[0],pmegrid,q==0,TRUE,fftgrid);
 +
 +            if (q == 0)
 +            {
 +                inc_nrnb(nrnb,eNR_WEIGHTS,DIM*atc->n);
 +            }
 +            inc_nrnb(nrnb,eNR_SPREADQBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
 +
 +            if (pme->nthread == 1)
 +            {
 +                wrap_periodic_pmegrid(pme,grid);
 +
 +                /* sum contributions to local grid from other nodes */
 +#ifdef GMX_MPI
 +                if (pme->nnodes > 1)
 +                {
 +                    gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_FORWARD);
 +                    where();
 +                }
 +#endif
 +
 +                copy_pmegrid_to_fftgrid(pme,grid,fftgrid);
 +            }
 +
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +
 +            /*
 +            dump_local_fftgrid(pme,fftgrid);
 +            exit(0);
 +            */
 +        }
 +
 +        /* Here we start a large thread parallel region */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            if (flags & GMX_PME_SOLVE)
 +            {
 +                int loop_count;
 +
 +                /* do 3d-fft */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,
 +                                           fftgrid,cfftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +                }
 +                where();
 +
 +                /* solve in k-space for our local cells */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_SOLVE);
 +                }
 +                loop_count =
 +                    solve_pme_yzx(pme,cfftgrid,ewaldcoeff,
 +                                  box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
 +                                  bCalcEnerVir,
 +                                  pme->nthread,thread);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_SOLVE);
 +                    where();
 +                    inc_nrnb(nrnb,eNR_SOLVEPME,loop_count);
 +                }
 +            }
 +
 +            if (bCalcF)
 +            {
 +                /* do 3d-invfft */
 +                if (thread == 0)
 +                {
 +                    where();
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,
 +                                           cfftgrid,fftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +
 +                    where();
 +
 +                    if (pme->nodeid == 0)
 +                    {
 +                        ntot = pme->nkx*pme->nky*pme->nkz;
 +                        npme  = ntot*log((real)ntot)/log(2.0);
 +                        inc_nrnb(nrnb,eNR_FFT,2*npme);
 +                    }
 +
 +                    wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +                }
 +
 +                copy_fftgrid_to_pmegrid(pme,fftgrid,grid,pme->nthread,thread);
 +            }
 +        }
 +        /* End of thread parallel section.
 +         * With MPI we have to synchronize here before gmx_sum_qgrid_dd.
 +         */
 +
 +        if (bCalcF)
 +        {
 +            /* distribute local grid to all nodes */
 +#ifdef GMX_MPI
 +            if (pme->nnodes > 1) {
 +                gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_BACKWARD);
 +            }
 +#endif
 +            where();
 +
 +            unwrap_periodic_pmegrid(pme,grid);
 +
 +            /* interpolate forces for our local atoms */
 +
 +            where();
 +
 +            /* If we are running without parallelization,
 +             * atc->f is the actual force array, not a buffer,
 +             * therefore we should not clear it.
 +             */
 +            bClearF = (q == 0 && PAR(cr));
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +            for(thread=0; thread<pme->nthread; thread++)
 +            {
 +                gather_f_bsplines(pme,grid,bClearF,atc,
 +                                  &atc->spline[thread],
 +                                  pme->bFEP ? (q==0 ? 1.0-lambda : lambda) : 1.0);
 +            }
 +
 +            where();
 +
 +            inc_nrnb(nrnb,eNR_GATHERFBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +        }
 +
 +        if (bCalcEnerVir)
 +        {
 +            /* This should only be called on the master thread
 +             * and after the threads have synchronized.
 +             */
 +            get_pme_ener_vir(pme,pme->nthread,&energy_AB[q],vir_AB[q]);
 +        }
 +    } /* of q-loop */
 +
 +    if (bCalcF && pme->nnodes > 1) {
 +        wallcycle_start(wcycle,ewcPME_REDISTXF);
 +        for(d=0; d<pme->ndecompdim; d++)
 +        {
 +            atc = &pme->atc[d];
 +            if (d == pme->ndecompdim - 1)
 +            {
 +                n_d = homenr;
 +                f_d = f + start;
 +            }
 +            else
 +            {
 +                n_d = pme->atc[d+1].n;
 +                f_d = pme->atc[d+1].f;
 +            }
 +            if (DOMAINDECOMP(cr)) {
 +                dd_pmeredist_f(pme,atc,n_d,f_d,
 +                               d==pme->ndecompdim-1 && pme->bPPnode);
 +            } else {
 +                pmeredist_pd(pme, FALSE, n_d, TRUE, f_d, NULL, atc);
 +            }
 +        }
 +
 +        wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +    }
 +    where();
 +
 +    if (bCalcEnerVir)
 +    {
 +        if (!pme->bFEP) {
 +            *energy = energy_AB[0];
 +            m_add(vir,vir_AB[0],vir);
 +        } else {
 +            *energy = (1.0-lambda)*energy_AB[0] + lambda*energy_AB[1];
 +            *dvdlambda += energy_AB[1] - energy_AB[0];
 +            for(i=0; i<DIM; i++)
 +            {
 +                for(j=0; j<DIM; j++)
 +                {
 +                    vir[i][j] += (1.0-lambda)*vir_AB[0][i][j] + 
 +                        lambda*vir_AB[1][i][j];
 +                }
 +            }
 +        }
 +    }
 +    else
 +    {
 +        *energy = 0;
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"PME mesh energy: %g\n",*energy);
 +    }
 +
 +    return 0;
 +}
index 498041ae2a1e0b1d2bfea27570ee80fecf33b9b4,0000000000000000000000000000000000000000..3ffb6fd138a6eae6c3f71858a851c786aa576bf7
mode 100644,000000..100644
--- /dev/null
@@@ -1,1866 -1,0 +1,1866 @@@
- #include "membed.h"
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "sysstuff.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "vcm.h"
 +#include "mdebin.h"
 +#include "nrnb.h"
 +#include "calcmu.h"
 +#include "index.h"
 +#include "vsite.h"
 +#include "update.h"
 +#include "ns.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "mdrun.h"
 +#include "confio.h"
 +#include "network.h"
 +#include "pull.h"
 +#include "xvgr.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "xmdrun.h"
 +#include "ionize.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "dihre.h"
 +#include "pme.h"
 +#include "mdatoms.h"
 +#include "repl_ex.h"
 +#include "qmmm.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "topsort.h"
 +#include "coulomb.h"
 +#include "constr.h"
 +#include "shellfc.h"
 +#include "compute_io.h"
 +#include "mvdata.h"
 +#include "checkpoint.h"
 +#include "mtop_util.h"
 +#include "sighandler.h"
 +#include "string2.h"
++#include "membed.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_FAHCORE
 +#include "corewrap.h"
 +#endif
 +
 +
 +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,t_inputrec *ir,
 +             gmx_mtop_t *top_global,
 +             t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_seed,gmx_membed_t *membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    gmx_mdoutf_t *outf;
 +    gmx_large_int_t step,step_rel;
 +    double     run_time;
 +    double     t,t0,lam0;
 +    gmx_bool       bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres;
 +    gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
 +               bFirstStep,bStateFromTPX,bInitStep,bLastStep,
 +               bBornRadii,bStartingFromCpt;
 +    gmx_bool       bDoDHDL=FALSE;
 +    gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
 +               bForceUpdate=FALSE,bCPT;
 +    int        mdof_flags;
 +    gmx_bool       bMasterState;
 +    int        force_flags,cglo_flags;
 +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
 +    int        i,m;
 +    t_trxstatus *status;
 +    rvec       mu_tot;
 +    t_vcm      *vcm;
 +    t_state    *bufstate=NULL;   
 +    matrix     *scale_tot,pcoupl_mu,M,ebox;
 +    gmx_nlheur_t nlh;
 +    t_trxframe rerun_fr;
 +    gmx_repl_ex_t repl_ex=NULL;
 +    int        nchkpt=1;
 +
 +    gmx_localtop_t *top;      
 +    t_mdebin *mdebin=NULL;
 +    t_state    *state=NULL;
 +    rvec       *f_global=NULL;
 +    int        n_xtc=-1;
 +    rvec       *x_xtc=NULL;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f=NULL;
 +    gmx_global_stat_t gstat;
 +    gmx_update_t upd=NULL;
 +    t_graph    *graph=NULL;
 +    globsig_t   gs;
 +
 +    gmx_bool        bFFscan;
 +    gmx_groups_t *groups;
 +    gmx_ekindata_t *ekind, *ekind_save;
 +    gmx_shellfc_t shellfc;
 +    int         count,nconverged=0;
 +    real        timestep=0;
 +    double      tcount=0;
 +    gmx_bool        bIonize=FALSE;
 +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
 +    gmx_bool        bAppend;
 +    gmx_bool        bResetCountersHalfMaxH=FALSE;
 +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
 +    real        mu_aver=0,dvdl;
 +    int         a0,a1,gnx=0,ii;
 +    atom_id     *grpindex=NULL;
 +    char        *grpname;
 +    t_coupl_rec *tcr=NULL;
 +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
 +    matrix      boxcopy={{0}},lastbox;
 +      tensor      tmpvir;
 +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
 +      real        vetanew = 0;
 +    double      cycles;
 +      real        saved_conserved_quantity = 0;
 +    real        last_ekin = 0;
 +      int         iter_i;
 +      t_extmass   MassQ;
 +    int         **trotter_seq; 
 +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
 +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
 +    gmx_iterate_t iterate;
 +    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
 +                                          simulation stops. If equal to zero, don't
 +                                          communicate any more between multisims.*/
 +#ifdef GMX_FAHCORE
 +    /* Temporary addition for FAHCORE checkpointing */
 +    int chkpt_ret;
 +#endif
 +
 +    /* Check for special mdrun options */
 +    bRerunMD = (Flags & MD_RERUN);
 +    bIonize  = (Flags & MD_IONIZE);
 +    bFFscan  = (Flags & MD_FFSCAN);
 +    bAppend  = (Flags & MD_APPENDFILES);
 +    if (Flags & MD_RESETCOUNTERSHALFWAY)
 +    {
 +        if (ir->nsteps > 0)
 +        {
 +            /* Signal to reset the counters half the simulation steps. */
 +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
 +        }
 +        /* Signal to reset the counters halfway the simulation time. */
 +        bResetCountersHalfMaxH = (max_hours > 0);
 +    }
 +
 +    /* md-vv uses averaged full step velocities for T-control 
 +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
 +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
 +    bVV = EI_VV(ir->eI);
 +    if (bVV) /* to store the initial velocities while computing virial */
 +    {
 +        snew(cbuf,top_global->natoms);
 +    }
 +    /* all the iteratative cases - only if there are constraints */ 
 +    bIterations = ((IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
 +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || (IR_NVT_TROTTER(ir))));        
 +    
 +    if (bRerunMD)
 +    {
 +        /* Since we don't know if the frames read are related in any way,
 +         * rebuild the neighborlist at every step.
 +         */
 +        ir->nstlist       = 1;
 +        ir->nstcalcenergy = 1;
 +        nstglobalcomm     = 1;
 +    }
 +
 +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
 +
 +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
 +    bGStatEveryStep = (nstglobalcomm == 1);
 +
 +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
 +    {
 +        fprintf(fplog,
 +                "To reduce the energy communication with nstlist = -1\n"
 +                "the neighbor list validity should not be checked at every step,\n"
 +                "this means that exact integration is not guaranteed.\n"
 +                "The neighbor list validity is checked after:\n"
 +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
 +                "In most cases this will result in exact integration.\n"
 +                "This reduces the energy communication by a factor of 2 to 3.\n"
 +                "If you want less energy communication, set nstlist > 3.\n\n");
 +    }
 +
 +    if (bRerunMD || bFFscan)
 +    {
 +        ir->nstxtcout = 0;
 +    }
 +    groups = &top_global->groups;
 +
 +    /* Initial values */
 +    init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
 +            nrnb,top_global,&upd,
 +            nfile,fnm,&outf,&mdebin,
 +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
 +
 +    clear_mat(total_vir);
 +    clear_mat(pres);
 +    /* Energy terms and groups */
 +    snew(enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
 +    if (DOMAINDECOMP(cr))
 +    {
 +        f = NULL;
 +    }
 +    else
 +    {
 +        snew(f,top_global->natoms);
 +    }
 +
 +    /* Kinetic energy data */
 +    snew(ekind,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
 +    /* needed for iteration of constraints */
 +    snew(ekind_save,1);
 +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
 +    /* Copy the cos acceleration to the groups struct */    
 +    ekind->cosacc.cos_accel = ir->cos_accel;
 +
 +    gstat = global_stat_init(ir);
 +    debug_gmx();
 +
 +    /* Check for polarizable models and flexible constraints */
 +    shellfc = init_shell_flexcon(fplog,
 +                                 top_global,n_flexible_constraints(constr),
 +                                 (ir->bContinuation || 
 +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
 +                                 NULL : state_global->x);
 +
 +    if (DEFORM(*ir))
 +    {
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
 +#endif
 +        set_deform_reference_box(upd,
 +                                 deform_init_init_step_tpx,
 +                                 deform_init_box_tpx);
 +#ifdef GMX_THREAD_MPI
 +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
 +#endif
 +    }
 +
 +    {
 +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
 +        if ((io > 2000) && MASTER(cr))
 +            fprintf(stderr,
 +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
 +                    io);
 +    }
 +
 +    if (DOMAINDECOMP(cr)) {
 +        top = dd_init_local_top(top_global);
 +
 +        snew(state,1);
 +        dd_init_local_state(cr->dd,state_global,state);
 +
 +        if (DDMASTER(cr->dd) && ir->nstfout) {
 +            snew(f_global,state_global->natoms);
 +        }
 +    } else {
 +        if (PAR(cr)) {
 +            /* Initialize the particle decomposition and split the topology */
 +            top = split_system(fplog,top_global,ir,cr);
 +
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +            pd_at_range(cr,&a0,&a1);
 +        } else {
 +            top = gmx_mtop_generate_local_top(top_global,ir);
 +
 +            a0 = 0;
 +            a1 = top_global->natoms;
 +        }
 +
 +        state = partdec_init_local_state(cr,state_global);
 +        f_global = f;
 +
 +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
 +
 +        if (vsite) {
 +            set_vsite_top(vsite,top,mdatoms,cr);
 +        }
 +
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
 +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +
 +        if (shellfc) {
 +            make_local_shells(cr,mdatoms,shellfc);
 +        }
 +
 +        if (ir->pull && PAR(cr)) {
 +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
 +        }
 +    }
 +
 +    if (DOMAINDECOMP(cr))
 +    {
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            state,&f,mdatoms,top,fr,
 +                            vsite,shellfc,constr,
 +                            nrnb,wcycle,FALSE);
 +    }
 +
 +    update_mdatoms(mdatoms,state->lambda);
 +
 +    if (MASTER(cr))
 +    {
 +        if (opt2bSet("-cpi",nfile,fnm))
 +        {
 +            /* Update mdebin with energy history if appending to output files */
 +            if ( Flags & MD_APPENDFILES )
 +            {
 +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
 +            }
 +            else
 +            {
 +                /* We might have read an energy history from checkpoint,
 +                 * free the allocated memory and reset the counts.
 +                 */
 +                done_energyhistory(&state_global->enerhist);
 +                init_energyhistory(&state_global->enerhist);
 +            }
 +        }
 +        /* Set the initial energy history in state by updating once */
 +        update_energyhistory(&state_global->enerhist,mdebin);
 +    } 
 +
 +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) {
 +        /* Set the random state if we read a checkpoint file */
 +        set_stochd_state(upd,state);
 +    }
 +
 +    /* Initialize constraints */
 +    if (constr) {
 +        if (!DOMAINDECOMP(cr))
 +            set_constraints(constr,top,ir,mdatoms,cr);
 +    }
 +
 +    /* Check whether we have to GCT stuff */
 +    bTCR = ftp2bSet(efGCT,nfile,fnm);
 +    if (bTCR) {
 +        if (MASTER(cr)) {
 +            fprintf(stderr,"Will do General Coupling Theory!\n");
 +        }
 +        gnx = top_global->mols.nr;
 +        snew(grpindex,gnx);
 +        for(i=0; (i<gnx); i++) {
 +            grpindex[i] = i;
 +        }
 +    }
 +
 +    if (repl_ex_nst > 0)
 +    {
 +        /* We need to be sure replica exchange can only occur
 +         * when the energies are current */
 +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
 +                        "repl_ex_nst",&repl_ex_nst);
 +        /* This check needs to happen before inter-simulation
 +         * signals are initialized, too */
 +    }
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
 +                                        repl_ex_nst,repl_ex_seed);
 +
 +    if (!ir->bContinuation && !bRerunMD)
 +    {
 +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
 +        {
 +            /* Set the velocities of frozen particles to zero */
 +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
 +            {
 +                for(m=0; m<DIM; m++)
 +                {
 +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
 +                    {
 +                        state->v[i][m] = 0;
 +                    }
 +                }
 +            }
 +        }
 +
 +        if (constr)
 +        {
 +            /* Constrain the initial coordinates and velocities */
 +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
 +                               graph,cr,nrnb,fr,top,shake_vir);
 +        }
 +        if (vsite)
 +        {
 +            /* Construct the virtual sites for the initial configuration */
 +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
 +                             top->idef.iparams,top->idef.il,
 +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +        }
 +    }
 +
 +    debug_gmx();
 +  
 +    /* I'm assuming we need global communication the first time! MRS */
 +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
 +                  | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM:0)
 +                  | (bVV ? CGLO_PRESSURE:0)
 +                  | (bVV ? CGLO_CONSTRAINT:0)
 +                  | (bRerunMD ? CGLO_RERUNMD:0)
 +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
 +    
 +    bSumEkinhOld = FALSE;
 +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                    NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                    constr,NULL,FALSE,state->box,
 +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
 +    if (ir->eI == eiVVAK) {
 +        /* a second call to get the half step temperature initialized as well */ 
 +        /* we do the same call as above, but turn the pressure off -- internally to 
 +           compute_globals, this is recognized as a velocity verlet half-step 
 +           kinetic energy calculation.  This minimized excess variables, but 
 +           perhaps loses some logic?*/
 +        
 +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                        NULL,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                        constr,NULL,FALSE,state->box,
 +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                        cglo_flags &~ (CGLO_STOPCM | CGLO_PRESSURE));
 +    }
 +    
 +    /* Calculate the initial half step temperature, and save the ekinh_old */
 +    if (!(Flags & MD_STARTFROMCPT)) 
 +    {
 +        for(i=0; (i<ir->opts.ngtc); i++) 
 +        {
 +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
 +        } 
 +    }
 +    if (ir->eI != eiVV) 
 +    {
 +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
 +                                     and there is no previous step */
 +    }
 +    
 +    /* if using an iterative algorithm, we need to create a working directory for the state. */
 +    if (bIterations) 
 +    {
 +            bufstate = init_bufstate(state);
 +    }
 +    if (bFFscan) 
 +    {
 +        snew(xcopy,state->natoms);
 +        snew(vcopy,state->natoms);
 +        copy_rvecn(state->x,xcopy,0,state->natoms);
 +        copy_rvecn(state->v,vcopy,0,state->natoms);
 +        copy_mat(state->box,boxcopy);
 +    } 
 +    
 +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
 +       temperature control */
 +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
 +    
 +    if (MASTER(cr))
 +    {
 +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
 +        {
 +            fprintf(fplog,
 +                    "RMS relative constraint deviation after constraining: %.2e\n",
 +                    constr_rmsd(constr,FALSE));
 +        }
 +        if (EI_STATE_VELOCITY(ir->eI))
 +        {
 +            fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
 +        }
 +        if (bRerunMD)
 +        {
 +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
 +                    " input trajectory '%s'\n\n",
 +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
 +            if (bVerbose)
 +            {
 +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
 +                        "run input file,\nwhich may not correspond to the time "
 +                        "needed to process input trajectory.\n\n");
 +            }
 +        }
 +        else
 +        {
 +            char tbuf[20];
 +            fprintf(stderr,"starting mdrun '%s'\n",
 +                    *(top_global->name));
 +            if (ir->nsteps >= 0)
 +            {
 +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
 +            }
 +            else
 +            {
 +                sprintf(tbuf,"%s","infinite");
 +            }
 +            if (ir->init_step > 0)
 +            {
 +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
 +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
 +                        gmx_step_str(ir->init_step,sbuf2),
 +                        ir->init_step*ir->delta_t);
 +            }
 +            else
 +            {
 +                fprintf(stderr,"%s steps, %s ps.\n",
 +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
 +            }
 +        }
 +        fprintf(fplog,"\n");
 +    }
 +
 +    /* Set and write start time */
 +    runtime_start(runtime);
 +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
 +    wallcycle_start(wcycle,ewcRUN);
 +    if (fplog)
 +        fprintf(fplog,"\n");
 +
 +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
 +#ifdef GMX_FAHCORE
 +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
 +                                    NULL,0);
 +    if ( chkpt_ret == 0 ) 
 +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
 +#endif
 +
 +    debug_gmx();
 +    /***********************************************************
 +     *
 +     *             Loop over MD steps 
 +     *
 +     ************************************************************/
 +
 +    /* if rerunMD then read coordinates and velocities from input trajectory */
 +    if (bRerunMD)
 +    {
 +        if (getenv("GMX_FORCE_UPDATE"))
 +        {
 +            bForceUpdate = TRUE;
 +        }
 +
 +        rerun_fr.natoms = 0;
 +        if (MASTER(cr))
 +        {
 +            bNotLastFrame = read_first_frame(oenv,&status,
 +                                             opt2fn("-rerun",nfile,fnm),
 +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
 +            if (rerun_fr.natoms != top_global->natoms)
 +            {
 +                gmx_fatal(FARGS,
 +                          "Number of atoms in trajectory (%d) does not match the "
 +                          "run input file (%d)\n",
 +                          rerun_fr.natoms,top_global->natoms);
 +            }
 +            if (ir->ePBC != epbcNONE)
 +            {
 +                if (!rerun_fr.bBox)
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
 +                }
 +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
 +                {
 +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
 +                }
 +            }
 +        }
 +
 +        if (PAR(cr))
 +        {
 +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +        }
 +
 +        if (ir->ePBC != epbcNONE)
 +        {
 +            /* Set the shift vectors.
 +             * Necessary here when have a static box different from the tpr box.
 +             */
 +            calc_shifts(rerun_fr.box,fr->shift_vec);
 +        }
 +    }
 +
 +    /* loop over MD steps or if rerunMD to end of input trajectory */
 +    bFirstStep = TRUE;
 +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
 +    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
 +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
 +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
 +    bLastStep    = FALSE;
 +    bSumEkinhOld = FALSE;
 +    bExchanged   = FALSE;
 +
 +    init_global_signals(&gs,cr,ir,repl_ex_nst);
 +
 +    step = ir->init_step;
 +    step_rel = 0;
 +
 +    if (ir->nstlist == -1)
 +    {
 +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
 +    }
 +
 +    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
 +    {
 +        /* check how many steps are left in other sims */
 +        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
 +    }
 +
 +
 +    /* and stop now if we should */
 +    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
 +                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
 +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
 +
 +        wallcycle_start(wcycle,ewcSTEP);
 +
 +        if (bRerunMD) {
 +            if (rerun_fr.bStep) {
 +                step = rerun_fr.step;
 +                step_rel = step - ir->init_step;
 +            }
 +            if (rerun_fr.bTime) {
 +                t = rerun_fr.time;
 +            }
 +            else
 +            {
 +                t = step;
 +            }
 +        } 
 +        else 
 +        {
 +            bLastStep = (step_rel == ir->nsteps);
 +            t = t0 + step*ir->delta_t;
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            if (bRerunMD && rerun_fr.bLambda && (ir->delta_lambda!=0))
 +            {
 +                state_global->lambda = rerun_fr.lambda;
 +            }
 +            else
 +            {
 +                state_global->lambda = lam0 + step*ir->delta_lambda;
 +            }
 +            state->lambda = state_global->lambda;
 +            bDoDHDL = do_per_step(step,ir->nstdhdl);
 +        }
 +
 +        if (bSimAnn) 
 +        {
 +            update_annealing_target_temp(&(ir->opts),t);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
 +            {
 +                for(i=0; i<state_global->natoms; i++)
 +                {
 +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
 +                }
 +                if (rerun_fr.bV)
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
 +                    }
 +                }
 +                else
 +                {
 +                    for(i=0; i<state_global->natoms; i++)
 +                    {
 +                        clear_rvec(state_global->v[i]);
 +                    }
 +                    if (bRerunWarnNoV)
 +                    {
 +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
 +                                "         Ekin, temperature and pressure are incorrect,\n"
 +                                "         the virial will be incorrect when constraints are present.\n"
 +                                "\n");
 +                        bRerunWarnNoV = FALSE;
 +                    }
 +                }
 +            }
 +            copy_mat(rerun_fr.box,state_global->box);
 +            copy_mat(state_global->box,state->box);
 +
 +            if (vsite && (Flags & MD_RERUN_VSITE))
 +            {
 +                if (DOMAINDECOMP(cr))
 +                {
 +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
 +                }
 +                if (graph)
 +                {
 +                    /* Following is necessary because the graph may get out of sync
 +                     * with the coordinates if we only have every N'th coordinate set
 +                     */
 +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                if (graph)
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +            }
 +        }
 +
 +        /* Stop Center of Mass motion */
 +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
 +
 +        /* Copy back starting coordinates in case we're doing a forcefield scan */
 +        if (bFFscan)
 +        {
 +            for(ii=0; (ii<state->natoms); ii++)
 +            {
 +                copy_rvec(xcopy[ii],state->x[ii]);
 +                copy_rvec(vcopy[ii],state->v[ii]);
 +            }
 +            copy_mat(boxcopy,state->box);
 +        }
 +
 +        if (bRerunMD)
 +        {
 +            /* for rerun MD always do Neighbour Searching */
 +            bNS = (bFirstStep || ir->nstlist != 0);
 +            bNStList = bNS;
 +        }
 +        else
 +        {
 +            /* Determine whether or not to do Neighbour Searching and LR */
 +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
 +            
 +            bNS = (bFirstStep || bExchanged || bNStList ||
 +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
 +
 +            if (bNS && ir->nstlist == -1)
 +            {
 +                set_nlistheuristics(&nlh,bFirstStep || bExchanged,step);
 +            }
 +        } 
 +
 +        /* check whether we should stop because another simulation has 
 +           stopped. */
 +        if (MULTISIM(cr))
 +        {
 +            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
 +                 (multisim_nsteps != ir->nsteps) )  
 +            {
 +                if (bNS)
 +                {
 +                    if (MASTER(cr))
 +                    {
 +                        fprintf(stderr, 
 +                                "Stopping simulation %d because another one has finished\n",
 +                                cr->ms->sim);
 +                    }
 +                    bLastStep=TRUE;
 +                    gs.sig[eglsCHKPT] = 1;
 +                }
 +            }
 +        }
 +
 +        /* < 0 means stop at next step, > 0 means stop at next NS step */
 +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
 +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
 +        {
 +            bLastStep = TRUE;
 +        }
 +
 +        /* Determine whether or not to update the Born radii if doing GB */
 +        bBornRadii=bFirstStep;
 +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
 +        {
 +            bBornRadii=TRUE;
 +        }
 +        
 +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
 +        do_verbose = bVerbose &&
 +                  (step % stepout == 0 || bFirstStep || bLastStep);
 +
 +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
 +        {
 +            if (bRerunMD)
 +            {
 +                bMasterState = TRUE;
 +            }
 +            else
 +            {
 +                bMasterState = FALSE;
 +                /* Correct the new box if it is too skewed */
 +                if (DYNAMIC_BOX(*ir))
 +                {
 +                    if (correct_box(fplog,step,state->box,graph))
 +                    {
 +                        bMasterState = TRUE;
 +                    }
 +                }
 +                if (DOMAINDECOMP(cr) && bMasterState)
 +                {
 +                    dd_collect_state(cr->dd,state,state_global);
 +                }
 +            }
 +
 +            if (DOMAINDECOMP(cr))
 +            {
 +                /* Repartition the domain decomposition */
 +                wallcycle_start(wcycle,ewcDOMDEC);
 +                dd_partition_system(fplog,step,cr,
 +                                    bMasterState,nstglobalcomm,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,do_verbose);
 +                wallcycle_stop(wcycle,ewcDOMDEC);
 +                /* If using an iterative integrator, reallocate space to match the decomposition */
 +            }
 +        }
 +
 +        if (MASTER(cr) && do_log && !bFFscan)
 +        {
 +            print_ebin_header(fplog,step,t,state->lambda);
 +        }
 +
 +        if (ir->efep != efepNO)
 +        {
 +            update_mdatoms(mdatoms,state->lambda); 
 +        }
 +
 +        if (bRerunMD && rerun_fr.bV)
 +        {
 +            
 +            /* We need the kinetic energy at minus the half step for determining
 +             * the full step kinetic energy and possibly for T-coupling.*/
 +            /* This may not be quite working correctly yet . . . . */
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
 +                            constr,NULL,FALSE,state->box,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
 +        }
 +        clear_mat(force_vir);
 +        
 +        /* Ionize the atoms if necessary */
 +        if (bIonize)
 +        {
 +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
 +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
 +        }
 +        
 +        /* Update force field in ffscan program */
 +        if (bFFscan)
 +        {
 +            if (update_forcefield(fplog,
 +                                  nfile,fnm,fr,
 +                                  mdatoms->nr,state->x,state->box)) {
 +                if (gmx_parallel_env_initialized())
 +                {
 +                    gmx_finalize();
 +                }
 +                exit(0);
 +            }
 +        }
 +
 +        /* We write a checkpoint at this MD step when:
 +         * either at an NS step when we signalled through gs,
 +         * or at the last step (but not when we do not want confout),
 +         * but never at the first step or with rerun.
 +         */
 +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
 +                 (bLastStep && (Flags & MD_CONFOUT))) &&
 +                step > ir->init_step && !bRerunMD);
 +        if (bCPT)
 +        {
 +            gs.set[eglsCHKPT] = 0;
 +        }
 +
 +        /* Determine the energy and pressure:
 +         * at nstcalcenergy steps and at energy output steps (set below).
 +         */
 +        bNstEner = do_per_step(step,ir->nstcalcenergy);
 +        bCalcEnerPres =
 +            (bNstEner ||
 +             (ir->epc != epcNO && do_per_step(step,ir->nstpcouple)));
 +
 +        /* Do we need global communication ? */
 +        bGStat = (bCalcEnerPres || bStopCM ||
 +                  do_per_step(step,nstglobalcomm) ||
 +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
 +
 +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
 +
 +        if (do_ene || do_log)
 +        {
 +            bCalcEnerPres = TRUE;
 +            bGStat        = TRUE;
 +        }
 +        
 +        /* these CGLO_ options remain the same throughout the iteration */
 +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
 +                      (bGStat ? CGLO_GSTAT : 0)
 +            );
 +        
 +        force_flags = (GMX_FORCE_STATECHANGED |
 +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
 +                       GMX_FORCE_ALLFORCES |
 +                       (bNStList ? GMX_FORCE_DOLR : 0) |
 +                       GMX_FORCE_SEPLRF |
 +                       (bCalcEnerPres ? GMX_FORCE_VIRIAL : 0) |
 +                       (bDoDHDL ? GMX_FORCE_DHDL : 0)
 +            );
 +        
 +        if (shellfc)
 +        {
 +            /* Now is the time to relax the shells */
 +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
 +                                      ir,bNS,force_flags,
 +                                      bStopCM,top,top_global,
 +                                      constr,enerd,fcd,
 +                                      state,f,force_vir,mdatoms,
 +                                      nrnb,wcycle,graph,groups,
 +                                      shellfc,fr,bBornRadii,t,mu_tot,
 +                                      state->natoms,&bConverged,vsite,
 +                                      outf->fp_field);
 +            tcount+=count;
 +
 +            if (bConverged)
 +            {
 +                nconverged++;
 +            }
 +        }
 +        else
 +        {
 +            /* The coordinates (x) are shifted (to get whole molecules)
 +             * in do_force.
 +             * This is parallellized as well, and does communication too. 
 +             * Check comments in sim_util.c
 +             */
 +        
 +            do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
 +                     state->box,state->x,&state->hist,
 +                     f,force_vir,mdatoms,enerd,fcd,
 +                     state->lambda,graph,
 +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
 +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
 +        }
 +        
 +        if (bTCR)
 +        {
 +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
 +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
 +        }
 +        
 +        if (bTCR && bFirstStep)
 +        {
 +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
 +            fprintf(fplog,"Done init_coupling\n"); 
 +            fflush(fplog);
 +        }
 +        
 +        if (bVV && !bStartingFromCpt && !bRerunMD)
 +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
 +        {
 +            if (ir->eI==eiVV && bInitStep) 
 +            {
 +                /* if using velocity verlet with full time step Ekin,
 +                 * take the first half step only to compute the 
 +                 * virial for the first step. From there,
 +                 * revert back to the initial coordinates
 +                 * so that the input is actually the initial step.
 +                 */
 +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
 +            } else {
 +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
 +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
 +            }
 +
 +            update_coords(fplog,step,ir,mdatoms,state,
 +                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
 +                          cr,nrnb,constr,&top->idef);
 +            
 +            if (bIterations)
 +            {
 +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
 +            }
 +            /* for iterations, we save these vectors, as we will be self-consistently iterating
 +               the calculations */
 +
 +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
 +            
 +            /* save the state */
 +            if (bIterations && iterate.bIterate) { 
 +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +            }
 +            
 +            bFirstIterate = TRUE;
 +            while (bFirstIterate || (bIterations && iterate.bIterate))
 +            {
 +                if (bIterations && iterate.bIterate) 
 +                {
 +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +                    if (bFirstIterate && bTrotter) 
 +                    {
 +                        /* The first time through, we need a decent first estimate
 +                           of veta(t+dt) to compute the constraints.  Do
 +                           this by computing the box volume part of the
 +                           trotter integration at this time. Nothing else
 +                           should be changed by this routine here.  If
 +                           !(first time), we start with the previous value
 +                           of veta.  */
 +                        
 +                        veta_save = state->veta;
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
 +                        vetanew = state->veta;
 +                        state->veta = veta_save;
 +                    } 
 +                } 
 +                
 +                bOK = TRUE;
 +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
 +                    dvdl = 0;
 +                    
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,shake_vir,NULL,
 +                                       cr,nrnb,wcycle,upd,constr,
 +                                       bInitStep,TRUE,bCalcEnerPres,vetanew);
 +                    
 +                    if (!bOK && !bFFscan)
 +                    {
 +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                    }
 +                    
 +                } 
 +                else if (graph)
 +                { /* Need to unshift here if a do_force has been
 +                     called in the previous step */
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                
 +                
 +                /* if VV, compute the pressure and constraints */
 +                /* For VV2, we strictly only need this if using pressure
 +                 * control, but we really would like to have accurate pressures
 +                 * printed out.
 +                 * Think about ways around this in the future?
 +                 * For now, keep this choice in comments.
 +                 */
 +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
 +                    /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
 +                bPres = TRUE;
 +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
 +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                constr,NULL,FALSE,state->box,
 +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                cglo_flags 
 +                                | CGLO_ENERGY 
 +                                | (bStopCM ? CGLO_STOPCM : 0)
 +                                | (bTemp ? CGLO_TEMPERATURE:0) 
 +                                | (bPres ? CGLO_PRESSURE : 0) 
 +                                | (bPres ? CGLO_CONSTRAINT : 0)
 +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
 +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                                | CGLO_SCALEEKIN 
 +                    );
 +                /* explanation of above: 
 +                   a) We compute Ekin at the full time step
 +                   if 1) we are using the AveVel Ekin, and it's not the
 +                   initial step, or 2) if we are using AveEkin, but need the full
 +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
 +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
 +                   EkinAveVel because it's needed for the pressure */
 +                
 +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
 +                if (!bInitStep) 
 +                {
 +                    if (bTrotter)
 +                    {
 +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
 +                    } 
 +                    else 
 +                    {
 +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    }
 +                }
 +                
 +                if (bIterations &&
 +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                                   state->veta,&vetanew)) 
 +                {
 +                    break;
 +                }
 +                bFirstIterate = FALSE;
 +            }
 +
 +            if (bTrotter && !bInitStep) {
 +                copy_mat(shake_vir,state->svir_prev);
 +                copy_mat(force_vir,state->fvir_prev);
 +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
 +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
 +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
 +                    enerd->term[F_EKIN] = trace(ekind->ekin);
 +                }
 +            }
 +            /* if it's the initial step, we performed this first step just to get the constraint virial */
 +            if (bInitStep && ir->eI==eiVV) {
 +                copy_rvecn(cbuf,state->v,0,state->natoms);
 +            }
 +            
 +            if (fr->bSepDVDL && fplog && do_log) 
 +            {
 +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +            }
 +            enerd->term[F_DHDL_CON] += dvdl;
 +        }
 +    
 +        /* MRS -- now done iterating -- compute the conserved quantity */
 +        if (bVV) {
 +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
 +            if (ir->eI==eiVV) 
 +            {
 +                last_ekin = enerd->term[F_EKIN]; /* does this get preserved through checkpointing? */
 +            }
 +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
 +            {
 +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
 +            }
 +        }
 +        
 +        /* ########  END FIRST UPDATE STEP  ############## */
 +        /* ########  If doing VV, we now have v(dt) ###### */
 +        
 +        /* ################## START TRAJECTORY OUTPUT ################# */
 +        
 +        /* Now we have the energies and forces corresponding to the 
 +         * coordinates at time t. We must output all of this before
 +         * the update.
 +         * for RerunMD t is read from input trajectory
 +         */
 +        mdof_flags = 0;
 +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
 +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
 +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
 +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
 +        if (bCPT) { mdof_flags |= MDOF_CPT; };
 +
 +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
 +        if (bLastStep)
 +        {
 +            /* Enforce writing positions and velocities at end of run */
 +            mdof_flags |= (MDOF_X | MDOF_V);
 +        }
 +#endif
 +#ifdef GMX_FAHCORE
 +        if (MASTER(cr))
 +            fcReportProgress( ir->nsteps, step );
 +
 +        /* sync bCPT and fc record-keeping */
 +        if (bCPT && MASTER(cr))
 +            fcRequestCheckPoint();
 +#endif
 +        
 +        if (mdof_flags != 0)
 +        {
 +            wallcycle_start(wcycle,ewcTRAJ);
 +            if (bCPT)
 +            {
 +                if (state->flags & (1<<estLD_RNG))
 +                {
 +                    get_stochd_state(upd,state);
 +                }
 +                if (MASTER(cr))
 +                {
 +                    if (bSumEkinhOld)
 +                    {
 +                        state_global->ekinstate.bUpToDate = FALSE;
 +                    }
 +                    else
 +                    {
 +                        update_ekinstate(&state_global->ekinstate,ekind);
 +                        state_global->ekinstate.bUpToDate = TRUE;
 +                    }
 +                    update_energyhistory(&state_global->enerhist,mdebin);
 +                }
 +            }
 +            write_traj(fplog,cr,outf,mdof_flags,top_global,
 +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
 +            if (bCPT)
 +            {
 +                nchkpt++;
 +                bCPT = FALSE;
 +            }
 +            debug_gmx();
 +            if (bLastStep && step_rel == ir->nsteps &&
 +                (Flags & MD_CONFOUT) && MASTER(cr) &&
 +                !bRerunMD && !bFFscan)
 +            {
 +                /* x and v have been collected in write_traj,
 +                 * because a checkpoint file will always be written
 +                 * at the last step.
 +                 */
 +                fprintf(stderr,"\nWriting final coordinates.\n");
 +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols &&
 +                    DOMAINDECOMP(cr))
 +                {
 +                    /* Make molecules whole only for confout writing */
 +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
 +                }
 +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
 +                                    *top_global->name,top_global,
 +                                    state_global->x,state_global->v,
 +                                    ir->ePBC,state->box);
 +                debug_gmx();
 +            }
 +            wallcycle_stop(wcycle,ewcTRAJ);
 +        }
 +        
 +        /* kludge -- virial is lost with restart for NPT control. Must restart */
 +        if (bStartingFromCpt && bVV) 
 +        {
 +            copy_mat(state->svir_prev,shake_vir);
 +            copy_mat(state->fvir_prev,force_vir);
 +        }
 +        /*  ################## END TRAJECTORY OUTPUT ################ */
 +        
 +        /* Determine the wallclock run time up till now */
 +        run_time = gmx_gettime() - (double)runtime->real;
 +
 +        /* Check whether everything is still allright */    
 +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
 +#ifdef GMX_THREAD_MPI
 +            && MASTER(cr)
 +#endif
 +            )
 +        {
 +            /* this is just make gs.sig compatible with the hack 
 +               of sending signals around by MPI_Reduce with together with
 +               other floats */
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
 +                gs.sig[eglsSTOPCOND]=1;
 +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
 +                gs.sig[eglsSTOPCOND]=-1;
 +            /* < 0 means stop at next step, > 0 means stop at next NS step */
 +            if (fplog)
 +            {
 +                fprintf(fplog,
 +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                        gmx_get_signal_name(),
 +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +                fflush(fplog);
 +            }
 +            fprintf(stderr,
 +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
 +                    gmx_get_signal_name(),
 +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
 +            fflush(stderr);
 +            handled_stop_condition=(int)gmx_get_stop_condition();
 +        }
 +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
 +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
 +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
 +        {
 +            /* Signal to terminate the run */
 +            gs.sig[eglsSTOPCOND] = 1;
 +            if (fplog)
 +            {
 +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +            }
 +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
 +        }
 +
 +        if (bResetCountersHalfMaxH && MASTER(cr) &&
 +            run_time > max_hours*60.0*60.0*0.495)
 +        {
 +            gs.sig[eglsRESETCOUNTERS] = 1;
 +        }
 +
 +        if (ir->nstlist == -1 && !bRerunMD)
 +        {
 +            /* When bGStatEveryStep=FALSE, global_stat is only called
 +             * when we check the atom displacements, not at NS steps.
 +             * This means that also the bonded interaction count check is not
 +             * performed immediately after NS. Therefore a few MD steps could
 +             * be performed with missing interactions.
 +             * But wrong energies are never written to file,
 +             * since energies are only written after global_stat
 +             * has been called.
 +             */
 +            if (step >= nlh.step_nscheck)
 +            {
 +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
 +                                                     nlh.scale_tot,state->x);
 +            }
 +            else
 +            {
 +                /* This is not necessarily true,
 +                 * but step_nscheck is determined quite conservatively.
 +                 */
 +                nlh.nabnsb = 0;
 +            }
 +        }
 +
 +        /* In parallel we only have to check for checkpointing in steps
 +         * where we do global communication,
 +         *  otherwise the other nodes don't know.
 +         */
 +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
 +                           cpt_period >= 0 &&
 +                           (cpt_period == 0 || 
 +                            run_time >= nchkpt*cpt_period*60.0)) &&
 +            gs.set[eglsCHKPT] == 0)
 +        {
 +            gs.sig[eglsCHKPT] = 1;
 +        }
 +  
 +        if (bIterations)
 +        {
 +            gmx_iterate_init(&iterate,bIterations);
 +        }
 +    
 +        /* for iterations, we save these vectors, as we will be redoing the calculations */
 +        if (bIterations && iterate.bIterate) 
 +        {
 +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
 +        }
 +        bFirstIterate = TRUE;
 +        while (bFirstIterate || (bIterations && iterate.bIterate))
 +        {
 +            /* We now restore these vectors to redo the calculation with improved extended variables */    
 +            if (bIterations) 
 +            { 
 +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
 +            }
 +
 +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
 +               so scroll down for that logic */
 +            
 +            /* #########   START SECOND UPDATE STEP ################# */
 +            /* Box is changed in update() when we do pressure coupling,
 +             * but we should still use the old box for energy corrections and when
 +             * writing it to the energy file, so it matches the trajectory files for
 +             * the same timestep above. Make a copy in a separate array.
 +             */
 +            copy_mat(state->box,lastbox);
 +
 +            bOK = TRUE;
 +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
 +            {
 +                wallcycle_start(wcycle,ewcUPDATE);
 +                dvdl = 0;
 +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
 +                if (bTrotter) 
 +                {
 +                    if (bIterations && iterate.bIterate) 
 +                    {
 +                        if (bFirstIterate) 
 +                        {
 +                            scalevir = 1;
 +                        }
 +                        else 
 +                        {
 +                            /* we use a new value of scalevir to converge the iterations faster */
 +                            scalevir = tracevir/trace(shake_vir);
 +                        }
 +                        msmul(shake_vir,scalevir,shake_vir); 
 +                        m_add(force_vir,shake_vir,total_vir);
 +                        clear_mat(shake_vir);
 +                    }
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
 +                /* We can only do Berendsen coupling after we have summed
 +                 * the kinetic energy or virial. Since the happens
 +                 * in global_state after update, we should only do it at
 +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
 +                 */
 +                }
 +                else 
 +                {
 +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
 +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
 +                                   upd,bInitStep);
 +                }
 +
 +                if (bVV)
 +                {
 +                    /* velocity half-step update */
 +                    update_coords(fplog,step,ir,mdatoms,state,f,
 +                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
 +                                  cr,nrnb,constr,&top->idef);
 +                }
 +
 +                /* Above, initialize just copies ekinh into ekin,
 +                 * it doesn't copy position (for VV),
 +                 * and entire integrator for MD.
 +                 */
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    copy_rvecn(state->x,cbuf,0,state->natoms);
 +                }
 +                
 +                update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                   &top->idef,shake_vir,force_vir,
 +                                   cr,nrnb,wcycle,upd,constr,
 +                                   bInitStep,FALSE,bCalcEnerPres,state->veta);  
 +                
 +                if (ir->eI==eiVVAK) 
 +                {
 +                    /* erase F_EKIN and F_TEMP here? */
 +                    /* just compute the kinetic energy at the half step to perform a trotter step */
 +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                                    constr,NULL,FALSE,lastbox,
 +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                                    cglo_flags | CGLO_TEMPERATURE    
 +                        );
 +                    wallcycle_start(wcycle,ewcUPDATE);
 +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
 +                    /* now we know the scaling, we can compute the positions again again */
 +                    copy_rvecn(cbuf,state->x,0,state->natoms);
 +
 +                    update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
 +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
 +                    wallcycle_stop(wcycle,ewcUPDATE);
 +
 +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
 +                    /* are the small terms in the shake_vir here due
 +                     * to numerical errors, or are they important
 +                     * physically? I'm thinking they are just errors, but not completely sure. 
 +                     * For now, will call without actually constraining, constr=NULL*/
 +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
 +                                       &top->idef,tmp_vir,force_vir,
 +                                       cr,nrnb,wcycle,upd,NULL,
 +                                       bInitStep,FALSE,bCalcEnerPres,
 +                                       state->veta);  
 +                }
 +                if (!bOK && !bFFscan) 
 +                {
 +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
 +                }
 +                
 +                if (fr->bSepDVDL && fplog && do_log) 
 +                {
 +                    fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
 +                }
 +                enerd->term[F_DHDL_CON] += dvdl;
 +            } 
 +            else if (graph) 
 +            {
 +                /* Need to unshift here */
 +                unshift_self(graph,state->box,state->x);
 +            }
 +
 +            if (vsite != NULL) 
 +            {
 +                wallcycle_start(wcycle,ewcVSITECONSTR);
 +                if (graph != NULL) 
 +                {
 +                    shift_self(graph,state->box,state->x);
 +                }
 +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
 +                                 top->idef.iparams,top->idef.il,
 +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +                
 +                if (graph != NULL) 
 +                {
 +                    unshift_self(graph,state->box,state->x);
 +                }
 +                wallcycle_stop(wcycle,ewcVSITECONSTR);
 +            }
 +            
 +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                gs.sig[eglsNABNSB] = nlh.nabnsb;
 +            }
 +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
 +                            wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
 +                            constr,
 +                            bFirstIterate ? &gs : NULL, 
 +                            (step_rel % gs.nstms == 0) && 
 +                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
 +                            lastbox,
 +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
 +                            cglo_flags 
 +                            | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0) 
 +                            | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
 +                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
 +                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
 +                            | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
 +                            | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
 +                            | CGLO_CONSTRAINT 
 +                );
 +            if (ir->nstlist == -1 && bFirstIterate)
 +            {
 +                nlh.nabnsb = gs.set[eglsNABNSB];
 +                gs.set[eglsNABNSB] = 0;
 +            }
 +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
 +            /* #############  END CALC EKIN AND PRESSURE ################# */
 +        
 +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
 +               the virial that should probably be addressed eventually. state->veta has better properies,
 +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
 +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
 +
 +            if (bIterations && 
 +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
 +                               trace(shake_vir),&tracevir)) 
 +            {
 +                break;
 +            }
 +            bFirstIterate = FALSE;
 +        }
 +
 +        update_box(fplog,step,ir,mdatoms,state,graph,f,
 +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
 +        
 +        /* ################# END UPDATE STEP 2 ################# */
 +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
 +    
 +        /* The coordinates (x) were unshifted in update */
 +        if (bFFscan && (shellfc==NULL || bConverged))
 +        {
 +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
 +                                 f,NULL,xcopy,
 +                                 &(top_global->mols),mdatoms->massT,pres))
 +            {
 +                if (gmx_parallel_env_initialized())
 +                {
 +                    gmx_finalize();
 +                }
 +                fprintf(stderr,"\n");
 +                exit(0);
 +            }
 +        }
 +        if (!bGStat)
 +        {
 +            /* We will not sum ekinh_old,                                                            
 +             * so signal that we still have to do it.                                                
 +             */
 +            bSumEkinhOld = TRUE;
 +        }
 +        
 +        if (bTCR)
 +        {
 +            /* Only do GCT when the relaxation of shells (minimization) has converged,
 +             * otherwise we might be coupling to bogus energies. 
 +             * In parallel we must always do this, because the other sims might
 +             * update the FF.
 +             */
 +
 +            /* Since this is called with the new coordinates state->x, I assume
 +             * we want the new box state->box too. / EL 20040121
 +             */
 +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
 +                        ir,MASTER(cr),
 +                        mdatoms,&(top->idef),mu_aver,
 +                        top_global->mols.nr,cr,
 +                        state->box,total_vir,pres,
 +                        mu_tot,state->x,f,bConverged);
 +            debug_gmx();
 +        }
 +
 +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* sum up the foreign energy and dhdl terms */
 +        sum_dhdl(enerd,state->lambda,ir);
 +
 +        /* use the directly determined last velocity, not actually the averaged half steps */
 +        if (bTrotter && ir->eI==eiVV) 
 +        {
 +            enerd->term[F_EKIN] = last_ekin;
 +        }
 +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
 +        
 +        if (bVV)
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
 +        }
 +        else 
 +        {
 +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
 +        }
 +        /* Check for excessively large energies */
 +        if (bIonize) 
 +        {
 +#ifdef GMX_DOUBLE
 +            real etot_max = 1e200;
 +#else
 +            real etot_max = 1e30;
 +#endif
 +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
 +            {
 +                fprintf(stderr,"Energy too large (%g), giving up\n",
 +                        enerd->term[F_ETOT]);
 +            }
 +        }
 +        /* #########  END PREPARING EDR OUTPUT  ###########  */
 +        
 +        /* Time for performance */
 +        if (((step % stepout) == 0) || bLastStep) 
 +        {
 +            runtime_upd_proc(runtime);
 +        }
 +        
 +        /* Output stuff */
 +        if (MASTER(cr))
 +        {
 +            gmx_bool do_dr,do_or;
 +            
 +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
 +            {
 +                if (bNstEner)
 +                {
 +                    upd_mdebin(mdebin,bDoDHDL, TRUE,
 +                               t,mdatoms->tmass,enerd,state,lastbox,
 +                               shake_vir,force_vir,total_vir,pres,
 +                               ekind,mu_tot,constr);
 +                }
 +                else
 +                {
 +                    upd_mdebin_step(mdebin);
 +                }
 +                
 +                do_dr  = do_per_step(step,ir->nstdisreout);
 +                do_or  = do_per_step(step,ir->nstorireout);
 +                
 +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
 +                           step,t,
 +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
 +            }
 +            if (ir->ePull != epullNO)
 +            {
 +                pull_print_output(ir->pull,step,t);
 +            }
 +            
 +            if (do_per_step(step,ir->nstlog))
 +            {
 +                if(fflush(fplog) != 0)
 +                {
 +                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of disk space?");
 +                }
 +            }
 +        }
 +
 +
 +        /* Remaining runtime */
 +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
 +        {
 +            if (shellfc) 
 +            {
 +                fprintf(stderr,"\n");
 +            }
 +            print_time(stderr,runtime,step,ir,cr);
 +        }
 +
 +        /* Replica exchange */
 +        bExchanged = FALSE;
 +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
 +            do_per_step(step,repl_ex_nst)) 
 +        {
 +            bExchanged = replica_exchange(fplog,cr,repl_ex,
 +                                          state_global,enerd->term,
 +                                          state,step,t);
 +
 +            if (bExchanged && DOMAINDECOMP(cr)) 
 +            {
 +                dd_partition_system(fplog,step,cr,TRUE,1,
 +                                    state_global,top_global,ir,
 +                                    state,&f,mdatoms,top,fr,
 +                                    vsite,shellfc,constr,
 +                                    nrnb,wcycle,FALSE);
 +            }
 +        }
 +        
 +        bFirstStep = FALSE;
 +        bInitStep = FALSE;
 +        bStartingFromCpt = FALSE;
 +
 +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
 +        /* With all integrators, except VV, we need to retain the pressure
 +         * at the current step for coupling at the next step.
 +         */
 +        if ((state->flags & (1<<estPRES_PREV)) &&
 +            (bGStatEveryStep ||
 +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
 +        {
 +            /* Store the pressure in t_state for pressure coupling
 +             * at the next MD step.
 +             */
 +            copy_mat(pres,state->pres_prev);
 +        }
 +        
 +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
 +
 +        if ( (membed!=NULL) && (!bLastStep) )
 +        {
 +            rescale_membed(step_rel,membed,state_global->x);
 +        }
 +
 +        if (bRerunMD) 
 +        {
 +            if (MASTER(cr))
 +            {
 +                /* read next frame from input trajectory */
 +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
 +            }
 +
 +            if (PAR(cr))
 +            {
 +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
 +            }
 +        }
 +        
 +        if (!bRerunMD || !rerun_fr.bStep)
 +        {
 +            /* increase the MD step number */
 +            step++;
 +            step_rel++;
 +        }
 +        
 +        cycles = wallcycle_stop(wcycle,ewcSTEP);
 +        if (DOMAINDECOMP(cr) && wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
 +        }
 +        
 +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
 +            gs.set[eglsRESETCOUNTERS] != 0)
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime);
 +            wcycle_set_reset_counters(wcycle,-1);
 +            /* Correct max_hours for the elapsed time */
 +            max_hours -= run_time/(60.0*60.0);
 +            bResetCountersHalfMaxH = FALSE;
 +            gs.set[eglsRESETCOUNTERS] = 0;
 +        }
 +
 +    }
 +    /* End of main MD loop */
 +    debug_gmx();
 +    
 +    /* Stop the time */
 +    runtime_end(runtime);
 +    
 +    if (bRerunMD && MASTER(cr))
 +    {
 +        close_trj(status);
 +    }
 +    
 +    if (!(cr->duty & DUTY_PME))
 +    {
 +        /* Tell the PME only node to finish */
 +        gmx_pme_finish(cr);
 +    }
 +    
 +    if (MASTER(cr))
 +    {
 +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
 +        {
 +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
 +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
 +        }
 +    }
 +
 +    done_mdoutf(outf);
 +
 +    debug_gmx();
 +
 +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
 +    {
 +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
 +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
 +    }
 +    
 +    if (shellfc && fplog)
 +    {
 +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
 +                (nconverged*100.0)/step_rel);
 +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
 +                tcount/step_rel);
 +    }
 +    
 +    if (repl_ex_nst > 0 && MASTER(cr))
 +    {
 +        print_replica_exchange_statistics(fplog,repl_ex);
 +    }
 +    
 +    runtime->nsteps_done = step_rel;
 +    
 +    return 0;
 +}
Simple merge