Merge remote-tracking branch 'gerrit/release-4-6'
authorRoland Schulz <roland@utk.edu>
Wed, 28 Mar 2012 18:20:34 +0000 (14:20 -0400)
committerRoland Schulz <roland@utk.edu>
Wed, 28 Mar 2012 18:40:18 +0000 (14:40 -0400)
Conflicts:
cmake/gmxCFlags.cmake
src/gromacs/selection/compiler.cpp
src/gromacs/selection/evaluate.cpp
src/gromacs/selection/parsetree.cpp
src/gromacs/selection/position.cpp
src/gromacs/selection/selmethod.cpp

Change-Id: I3fdfeebfea56b0779980949e5c01a869cdab181d

52 files changed:
1  2 
cmake/gmxCFlags.cmake
src/gromacs/gmxlib/confio.c
src/gromacs/gmxlib/enxio.c
src/gromacs/gmxlib/gbutil.c
src/gromacs/gmxlib/ifunc.c
src/gromacs/gmxlib/maths.c
src/gromacs/gmxlib/matio.c
src/gromacs/gmxlib/mvdata.c
src/gromacs/gmxlib/nrama.c
src/gromacs/gmxlib/princ.c
src/gromacs/gmxlib/readinp.c
src/gromacs/gmxlib/sfactor.c
src/gromacs/gmxlib/shift_util.c
src/gromacs/gmxlib/trxio.c
src/gromacs/gmxlib/txtdump.c
src/gromacs/gmxpreprocess/gen_ad.c
src/gromacs/gmxpreprocess/h_db.c
src/gromacs/gmxpreprocess/pdb2top.c
src/gromacs/gmxpreprocess/readir.c
src/gromacs/gmxpreprocess/resall.c
src/gromacs/gmxpreprocess/toputil.c
src/gromacs/legacyheaders/vsite.h
src/gromacs/mdlib/calcvir.c
src/gromacs/mdlib/constr.c
src/gromacs/mdlib/coupling.c
src/gromacs/mdlib/ebin.c
src/gromacs/mdlib/genborn.c
src/gromacs/mdlib/gmx_parallel_3dfft.c
src/gromacs/mdlib/gmx_wallcycle.c
src/gromacs/mdlib/init.c
src/gromacs/mdlib/mdebin_bar.c
src/gromacs/mdlib/minimize.c
src/gromacs/mdlib/partdec.c
src/gromacs/mdlib/pme.c
src/gromacs/mdlib/pullutil.c
src/gromacs/mdlib/qmmm.c
src/gromacs/mdlib/sim_util.c
src/gromacs/mdlib/tables.c
src/gromacs/mdlib/update.c
src/gromacs/mdlib/vsite.c
src/programs/g_x2top/g_x2top.c
src/programs/gmxdump/gmxdump.c
src/programs/grompp/convparm.c
src/programs/mdrun/ionize.c
src/programs/mdrun/repl_ex.c
src/programs/pdb2gmx/pdb2gmx.c
src/programs/pdb2gmx/xlate.c
src/tools/gmx_membed.c
src/tools/gmx_msd.c
src/tools/gmx_trjconv.c
src/tools/gmx_tune_pme.c
src/tools/gmx_wham.c

Simple merge
Simple merge
index b6a255b76a8ea6b87e61d402e2e8f8e85eec748c,0000000000000000000000000000000000000000..a3b46f783c10365bf4e16fa135b6bd768c683714
mode 100644,000000..100644
--- /dev/null
@@@ -1,1157 -1,0 +1,1146 @@@
- static void gen_units(int n,char ***units)
- {
-     int i;
-     snew(*units,n);
-     for(i=0; i<n; i++)
-     {
-         (*units)[i] = strdup("kJ/mol");
-     }
- }
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "futil.h"
 +#include "string2.h"
 +#include "gmx_fatal.h"
 +#include "smalloc.h"
 +#include "gmxfio.h"
 +#include "enxio.h"
 +#include "vec.h"
 +#include "xdrf.h"
 +#include "macros.h"
 +
 +/* The source code in this file should be thread-safe. 
 +         Please keep it that way. */
 +
 +/* This number should be increased whenever the file format changes! */
 +static const int enx_version = 5;
 +
 +const char *enx_block_id_name[] = {
 +    "Averaged orientation restraints",
 +    "Instantaneous orientation restraints",
 +    "Orientation restraint order tensor(s)",
 +    "Distance restraints",
 +    "Free energy data",
 +    "BAR histogram",
 +    "Delta H raw data"
 +};
 +
 +
 +/* Stuff for reading pre 4.1 energy files */
 +typedef struct {
 +    gmx_bool     bOldFileOpen;   /* Is this an open old file? */
 +    gmx_bool     bReadFirstStep; /* Did we read the first step? */
 +    int      first_step;     /* First step in the energy file */
 +    int      step_prev;      /* Previous step */
 +    int      nsum_prev;      /* Previous step sum length */
 +    t_energy *ener_prev;     /* Previous energy sums */
 +} ener_old_t;
 +
 +struct ener_file
 +{
 +    ener_old_t eo;
 +    t_fileio *fio;
 +    int framenr;
 +    real frametime;
 +};
 +
 +static void enxsubblock_init(t_enxsubblock *sb)
 +{
 +    sb->nr=0;
 +#ifdef GMX_DOUBLE
 +    sb->type=xdr_datatype_double;
 +#else
 +    sb->type=xdr_datatype_float;
 +#endif
 +    sb->fval = NULL;
 +    sb->dval = NULL;
 +    sb->ival = NULL;
 +    sb->lval = NULL;
 +    sb->cval = NULL;
 +    sb->sval = NULL;
 +    sb->fval_alloc = 0;
 +    sb->dval_alloc = 0;
 +    sb->ival_alloc = 0;
 +    sb->lval_alloc = 0;
 +    sb->cval_alloc = 0;
 +    sb->sval_alloc = 0;
 +}
 +
 +static void enxsubblock_free(t_enxsubblock *sb)
 +{
 +    if (sb->fval_alloc)
 +    {
 +        free(sb->fval);
 +        sb->fval_alloc=0;
 +        sb->fval=NULL;
 +    }
 +    if (sb->dval_alloc)
 +    {
 +        free(sb->dval);
 +        sb->dval_alloc=0;
 +        sb->dval=NULL;
 +    }
 +    if (sb->ival_alloc)
 +    {
 +        free(sb->ival);
 +        sb->ival_alloc=0;
 +        sb->ival=NULL;
 +    }
 +    if (sb->lval_alloc)
 +    {
 +        free(sb->lval);
 +        sb->lval_alloc=0;
 +        sb->lval=NULL;
 +    }
 +    if (sb->cval_alloc)
 +    {
 +        free(sb->cval);
 +        sb->cval_alloc=0;
 +        sb->cval=NULL;
 +    }
 +    if (sb->sval_alloc)
 +    {
 +        int i;
 +
 +        for(i=0;i<sb->sval_alloc;i++)
 +        {
 +            if (sb->sval[i])
 +            {
 +                free(sb->sval[i]);
 +            }
 +        }
 +        free(sb->sval);
 +        sb->sval_alloc=0;
 +        sb->sval=NULL;
 +    }
 +}
 +
 +/* allocate the appropriate amount of memory for the given type and nr */
 +static void enxsubblock_alloc(t_enxsubblock *sb)
 +{
 +    /* allocate the appropriate amount of memory */
 +    switch(sb->type)
 +    {
 +        case xdr_datatype_float:
 +            if (sb->nr > sb->fval_alloc)
 +            {
 +                srenew(sb->fval, sb->nr);
 +                sb->fval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_double:
 +            if (sb->nr > sb->dval_alloc)
 +            {
 +                srenew(sb->dval, sb->nr);
 +                sb->dval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_int:
 +            if (sb->nr > sb->ival_alloc)
 +            {
 +                srenew(sb->ival, sb->nr);
 +                sb->ival_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_large_int:
 +            if (sb->nr > sb->lval_alloc)
 +            {
 +                srenew(sb->lval, sb->nr);
 +                sb->lval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_char:
 +            if (sb->nr > sb->cval_alloc)
 +            {
 +                srenew(sb->cval, sb->nr);
 +                sb->cval_alloc=sb->nr;
 +            }
 +            break;
 +        case xdr_datatype_string:
 +            if (sb->nr > sb->sval_alloc)
 +            {
 +                int i;
 +
 +                srenew(sb->sval, sb->nr);
 +                for(i=sb->sval_alloc;i<sb->nr;i++)
 +                {
 +                    sb->sval[i]=NULL;
 +                }
 +                sb->sval_alloc=sb->nr;
 +            }
 +            break;
 +        default:
 +            gmx_incons("Unknown block type: this file is corrupted or from the future");
 +    }
 +}
 +
 +static void enxblock_init(t_enxblock *eb)
 +{
 +    eb->id=enxOR;
 +    eb->nsub=0;
 +    eb->sub=NULL;
 +    eb->nsub_alloc=0;
 +}
 +
 +static void enxblock_free(t_enxblock *eb)
 +{
 +    if (eb->nsub_alloc>0)
 +    {
 +        int i;
 +        for(i=0;i<eb->nsub_alloc;i++)
 +        {
 +            enxsubblock_free(&(eb->sub[i]));
 +        }
 +        free(eb->sub);
 +        eb->nsub_alloc=0;
 +        eb->sub=NULL;
 +    }
 +}
 +
 +void init_enxframe(t_enxframe *fr)
 +{
 +    fr->e_alloc=0;
 +    fr->ener=NULL;
 +
 +    /*fr->d_alloc=0;*/
 +    fr->ener=NULL;
 +
 +    /*fr->ndisre=0;*/
 +
 +    fr->nblock=0;
 +    fr->nblock_alloc=0;
 +    fr->block=NULL;
 +}
 +
 +
 +void free_enxframe(t_enxframe *fr)
 +{
 +  int b;
 +
 +  if (fr->e_alloc)
 +  {
 +    sfree(fr->ener);
 +  }
 +  for(b=0; b<fr->nblock_alloc; b++)
 +  {
 +      enxblock_free(&(fr->block[b]));
 +  }
 +  free(fr->block);
 +}
 +
 +void add_blocks_enxframe(t_enxframe *fr, int n)
 +{
 +    fr->nblock=n;
 +    if (n > fr->nblock_alloc)
 +    {
 +        int b;
 +
 +        srenew(fr->block, n);
 +        for(b=fr->nblock_alloc;b<fr->nblock;b++)
 +        {
 +            enxblock_init(&(fr->block[b]));
 +        }
 +        fr->nblock_alloc=n;
 +    }
 +}
 +
 +t_enxblock *find_block_id_enxframe(t_enxframe *ef, int id, t_enxblock *prev)
 +{
 +    gmx_off_t starti=0;
 +    gmx_off_t i;
 +
 +    if (prev)
 +    {
 +        starti=(prev - ef->block) + 1;
 +    }
 +    for(i=starti; i<ef->nblock; i++)
 +    {
 +        if (ef->block[i].id == id)
 +            return &(ef->block[i]);
 +    }
 +    return NULL;
 +}
 +
 +void add_subblocks_enxblock(t_enxblock *eb, int n)
 +{
 +    eb->nsub=n;
 +    if (eb->nsub > eb->nsub_alloc)
 +    {
 +        int b;
 +
 +        srenew(eb->sub, n);
 +        for(b=eb->nsub_alloc; b<n; b++)
 +        {
 +            enxsubblock_init(&(eb->sub[b]));
 +        } 
 +        eb->nsub_alloc=n;
 +    }
 +}
 +
 +static void enx_warning(const char *msg)
 +{
 +    if (getenv("GMX_ENX_NO_FATAL") != NULL)
 +    {
 +        gmx_warning(msg);
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"%s\n%s",
 +                  msg,
 +                  "If you want to use the correct frames before the corrupted frame and avoid this fatal error set the env.var. GMX_ENX_NO_FATAL");
 +    }
 +}
 +
 +static void edr_strings(XDR *xdr,gmx_bool bRead,int file_version,
 +                        int n,gmx_enxnm_t **nms)
 +{
 +    int  i;
 +    gmx_enxnm_t *nm;
 +
 +    if (*nms == NULL)
 +    {
 +        snew(*nms,n);
 +    }
 +    for(i=0; i<n; i++)
 +    {
 +        nm = &(*nms)[i];
 +        if (bRead)
 +        {
 +            if (nm->name)
 +            {
 +                sfree(nm->name);
 +                nm->name = NULL;
 +            }
 +            if (nm->unit)
 +            {
 +                sfree(nm->unit);
 +                nm->unit = NULL;
 +            }
 +        }
 +        if(!xdr_string(xdr,&(nm->name),STRLEN))
 +        {
 +            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        if (file_version >= 2)
 +        {
 +            if(!xdr_string(xdr,&(nm->unit),STRLEN))
 +            {
 +                gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +            }
 +        }
 +        else
 +        {
 +            nm->unit = strdup("kJ/mol");
 +        }
 +    }
 +}
 +
 +void do_enxnms(ener_file_t ef,int *nre,gmx_enxnm_t **nms)
 +{
 +    int  magic=-55555;
 +    XDR  *xdr;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  file_version;
 +    int  i;
 +   
 +    gmx_fio_checktype(ef->fio); 
 +
 +    xdr = gmx_fio_getxdr(ef->fio);
 +    
 +    if (!xdr_int(xdr,&magic))
 +    {
 +        if(!bRead)
 +        {
 +            gmx_file("Cannot write energy names to file; maybe you are out of disk space?");
 +        }
 +        *nre=0;
 +        return;
 +    }
 +    if (magic > 0)
 +    {
 +        /* Assume this is an old edr format */
 +        file_version = 1;
 +        *nre = magic;
 +        ef->eo.bOldFileOpen = TRUE;
 +        ef->eo.bReadFirstStep = FALSE;
 +        srenew(ef->eo.ener_prev,*nre);
 +    }
 +    else
 +    {
 +        ef->eo.bOldFileOpen=FALSE;
 +
 +        if (magic != -55555)
 +        {
 +            gmx_fatal(FARGS,"Energy names magic number mismatch, this is not a GROMACS edr file");
 +        }
 +        file_version = enx_version;
 +        xdr_int(xdr,&file_version);
 +        if (file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        xdr_int(xdr,nre);
 +    }
 +    if (file_version != enx_version)
 +    {
 +        fprintf(stderr,"Note: enx file_version %d, software version %d\n",
 +                file_version,enx_version);
 +    }
 +
 +    edr_strings(xdr,bRead,file_version,*nre,nms);
 +}
 +
 +static gmx_bool do_eheader(ener_file_t ef,int *file_version,t_enxframe *fr,
 +                       int nre_test,gmx_bool *bWrongPrecision,gmx_bool *bOK)
 +{
 +    int  magic=-7777777;
 +    real first_real_to_check;
 +    int  b,i,zero=0,dum=0;
 +    gmx_bool bRead = gmx_fio_getread(ef->fio);
 +    int  tempfix_nr=0;
 +    int  ndisre=0;
 +    int  startb=0;
 +#ifndef GMX_DOUBLE
 +    xdr_datatype dtreal=xdr_datatype_float; 
 +#else
 +    xdr_datatype dtreal=xdr_datatype_double; 
 +#endif
 +    
 +    if (nre_test >= 0)
 +    {
 +        *bWrongPrecision = FALSE;
 +    }
 +
 +    *bOK=TRUE;
 +    /* The original energy frame started with a real,
 +     * so we have to use a real for compatibility.
 +     * This is VERY DIRTY code, since do_eheader can be called
 +     * with the wrong precision set and then we could read r > -1e10,
 +     * while actually the intention was r < -1e10.
 +     * When nre_test >= 0, do_eheader should therefore terminate
 +     * before the number of i/o calls starts depending on what has been read
 +     * (which is the case for for instance the block sizes for variable
 +     * number of blocks, where this number is read before).
 +     */
 +    first_real_to_check = -2e10;
 +    if (!gmx_fio_do_real(ef->fio, first_real_to_check))
 +    {
 +        return FALSE;
 +    }
 +    if (first_real_to_check > -1e10)
 +    {
 +        /* Assume we are reading an old format */
 +        *file_version = 1;
 +        fr->t = first_real_to_check;
 +        if (!gmx_fio_do_int(ef->fio, dum))   *bOK = FALSE;
 +        fr->step = dum;
 +    }
 +    else
 +    {
 +        if (!gmx_fio_do_int(ef->fio, magic))       *bOK = FALSE;
 +        if (magic != -7777777)
 +        {
 +            enx_warning("Energy header magic number mismatch, this is not a GROMACS edr file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        *file_version = enx_version;
 +        if (!gmx_fio_do_int(ef->fio, *file_version)) *bOK = FALSE;
 +        if (*bOK && *file_version > enx_version)
 +        {
 +            gmx_fatal(FARGS,"reading tpx file (%s) version %d with version %d program",gmx_fio_getname(ef->fio),file_version,enx_version);
 +        }
 +        if (!gmx_fio_do_double(ef->fio, fr->t))       *bOK = FALSE;
 +        if (!gmx_fio_do_gmx_large_int(ef->fio, fr->step)) *bOK = FALSE;
 +        if (!bRead && fr->nsum == 1) {
 +            /* Do not store sums of length 1,
 +             * since this does not add information.
 +             */
 +            if (!gmx_fio_do_int(ef->fio, zero))      *bOK = FALSE;
 +        } else {
 +            if (!gmx_fio_do_int(ef->fio, fr->nsum))  *bOK = FALSE;
 +        }
 +        if (*file_version >= 3)
 +        {
 +            if (!gmx_fio_do_gmx_large_int(ef->fio, fr->nsteps)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->nsteps = max(1,fr->nsum);
 +        }
 +        if (*file_version >= 5)
 +        {
 +            if (!gmx_fio_do_double(ef->fio, fr->dt)) *bOK = FALSE;
 +        }
 +        else
 +        {
 +            fr->dt = 0;
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->nre))     *bOK = FALSE;
 +    if (*file_version < 4)
 +    {
 +        if (!gmx_fio_do_int(ef->fio, ndisre))  *bOK = FALSE;
 +    }
 +    else
 +    {
 +        /* now reserved for possible future use */
 +        if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +    }
 +
 +    if (!gmx_fio_do_int(ef->fio, fr->nblock))  *bOK = FALSE;
 +    if (fr->nblock < 0) *bOK=FALSE;
 +
 +    if (ndisre!=0)
 +    {
 +        if (*file_version >= 4)
 +        {
 +            enx_warning("Distance restraint blocks in old style in new style file");
 +            *bOK=FALSE;
 +            return FALSE;
 +        }
 +        fr->nblock+=1;
 +    }
 +
 +
 +    /* Frames could have nre=0, so we can not rely only on the fr->nre check */
 +    if (bRead && nre_test >= 0 &&
 +        ((fr->nre > 0 && fr->nre != nre_test) ||
 +         fr->nre < 0 || ndisre < 0 || fr->nblock < 0))
 +    {
 +        *bWrongPrecision = TRUE;
 +        return *bOK;
 +    }
 +
 +    /* we now know what these should be, or we've already bailed out because
 +       of wrong precision */
 +    if ( *file_version==1 && (fr->t < 0 || fr->t > 1e20 || fr->step < 0 ) )
 +    {
 +        enx_warning("edr file with negative step number or unreasonable time (and without version number).");
 +        *bOK=FALSE;
 +        return FALSE;
 +    }
 +
 +
 +    if (*bOK && bRead)
 +    {
 +        add_blocks_enxframe(fr, fr->nblock);
 +    }
 +
 +    startb=0;
 +    if (ndisre>0)
 +    {
 +        /* sub[0] is the instantaneous data, sub[1] is time averaged */
 +        add_subblocks_enxblock(&(fr->block[0]), 2);
 +        fr->block[0].id=enxDISRE;
 +        fr->block[0].sub[0].nr=ndisre;
 +        fr->block[0].sub[1].nr=ndisre;
 +        fr->block[0].sub[0].type=dtreal;
 +        fr->block[0].sub[1].type=dtreal;
 +        startb++;
 +    }
 +
 +    /* read block header info */
 +    for(b=startb; b<fr->nblock; b++)
 +    {
 +        if (*file_version<4)
 +        {
 +            /* blocks in old version files always have 1 subblock that 
 +               consists of reals. */
 +            int nrint;
 +
 +            if (bRead)
 +            {
 +                add_subblocks_enxblock(&(fr->block[b]), 1);
 +            }
 +            else
 +            {
 +                if (fr->block[b].nsub != 1)
 +                {
 +                    gmx_incons("Writing an old version .edr file with too many subblocks");
 +                }
 +                if (fr->block[b].sub[0].type != dtreal)
 +                {
 +                    gmx_incons("Writing an old version .edr file the wrong subblock type");
 +                }
 +            }
 +            nrint = fr->block[b].sub[0].nr;
 +            
 +            if (!gmx_fio_do_int(ef->fio, nrint))
 +            {
 +                *bOK = FALSE;
 +            }
 +            fr->block[b].id          = b - startb;
 +            fr->block[b].sub[0].nr   = nrint;
 +            fr->block[b].sub[0].type = dtreal;
 +        }
 +        else
 +        {
 +            int i;
 +            /* in the new version files, the block header only contains
 +               the ID and the number of subblocks */
 +            int nsub=fr->block[b].nsub;
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, fr->block[b].id);
 +            *bOK = *bOK && gmx_fio_do_int(ef->fio, nsub);
 +
 +            fr->block[b].nsub=nsub;
 +            if (bRead)
 +                add_subblocks_enxblock(&(fr->block[b]), nsub);
 +
 +            /* read/write type & size for each subblock */
 +            for(i=0;i<nsub;i++)
 +            {
 +                t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +                int typenr=sub->type;
 +
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, typenr);
 +                *bOK=*bOK && gmx_fio_do_int(ef->fio, sub->nr);
 +
 +                sub->type = (xdr_datatype)typenr;
 +            }
 +        }
 +    }
 +    if (!gmx_fio_do_int(ef->fio, fr->e_size))  *bOK = FALSE;
 +
 +    /* now reserved for possible future use */
 +    if (!gmx_fio_do_int(ef->fio, dum))  *bOK = FALSE;
 +
 +    /* Do a dummy int to keep the format compatible with the old code */
 +    if (!gmx_fio_do_int(ef->fio, dum))         *bOK = FALSE;
 +    
 +    if (*bOK && *file_version == 1 && nre_test < 0)
 +    {
 +#if 0
 +        if (fp >= ener_old_nalloc)
 +        {
 +            gmx_incons("Problem with reading old format energy files");
 +        }
 +#endif
 +        
 +        if (!ef->eo.bReadFirstStep)
 +        {
 +            ef->eo.bReadFirstStep = TRUE;
 +            ef->eo.first_step     = fr->step;
 +            ef->eo.step_prev      = fr->step;
 +            ef->eo.nsum_prev      = 0;
 +        }
 +        
 +        fr->nsum   = fr->step - ef->eo.first_step + 1;
 +        fr->nsteps = fr->step - ef->eo.step_prev;
 +        fr->dt     = 0;
 +    }
 +      
 +    return *bOK;
 +}
 +
 +void free_enxnms(int n,gmx_enxnm_t *nms)
 +{
 +    int i;
 +
 +    for(i=0; i<n; i++)
 +    {
 +        sfree(nms[i].name);
 +        sfree(nms[i].unit);
 +    }
 +
 +    sfree(nms);
 +}
 +
 +void close_enx(ener_file_t ef)
 +{
 +    if(gmx_fio_close(ef->fio) != 0)
 +    {
 +        gmx_file("Cannot close energy file; it might be corrupt, or maybe you are out of disk space?");
 +    }
 +}
 +
 +static gmx_bool empty_file(const char *fn)
 +{
 +    FILE *fp;
 +    char dum;
 +    int  ret;
 +    gmx_bool bEmpty;
 +    
 +    fp = gmx_fio_fopen(fn,"r");
 +    ret = fread(&dum,sizeof(dum),1,fp);
 +    bEmpty = feof(fp);
 +    gmx_fio_fclose(fp);
 +    
 +    return bEmpty;
 +}
 +
 +
 +ener_file_t open_enx(const char *fn,const char *mode)
 +{
 +    int        nre,i;
 +    gmx_enxnm_t *nms=NULL;
 +    int        file_version=-1;
 +    t_enxframe *fr;
 +    gmx_bool       bWrongPrecision,bOK=TRUE;
 +    struct ener_file *ef;
 +
 +    snew(ef,1);
 +
 +    if (mode[0]=='r') {
 +        ef->fio=gmx_fio_open(fn,mode);
 +        gmx_fio_checktype(ef->fio);
 +        gmx_fio_setprecision(ef->fio,FALSE);
 +        do_enxnms(ef,&nre,&nms);
 +        snew(fr,1);
 +        do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +        if(!bOK)
 +        {
 +            gmx_file("Cannot read energy file header. Corrupt file?");
 +        }
 +
 +        /* Now check whether this file is in single precision */
 +        if (!bWrongPrecision &&
 +            ((fr->e_size && (fr->nre == nre) && 
 +              (nre*4*(long int)sizeof(float) == fr->e_size)) ) )
 +        {
 +            fprintf(stderr,"Opened %s as single precision energy file\n",fn);
 +            free_enxnms(nre,nms);
 +        }
 +        else
 +        {
 +            gmx_fio_rewind(ef->fio);
 +            gmx_fio_checktype(ef->fio);
 +            gmx_fio_setprecision(ef->fio,TRUE);
 +            do_enxnms(ef,&nre,&nms);
 +            do_eheader(ef,&file_version,fr,nre,&bWrongPrecision,&bOK);
 +            if(!bOK)
 +            {
 +                gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +            }
 +
 +            if (((fr->e_size && (fr->nre == nre) && 
 +                            (nre*4*(long int)sizeof(double) == fr->e_size)) ))
 +                fprintf(stderr,"Opened %s as double precision energy file\n",
 +                        fn);
 +            else {
 +                if (empty_file(fn))
 +                    gmx_fatal(FARGS,"File %s is empty",fn);
 +                else
 +                    gmx_fatal(FARGS,"Energy file %s not recognized, maybe different CPU?",
 +                              fn);
 +            }
 +            free_enxnms(nre,nms);
 +        }
 +        free_enxframe(fr);
 +        sfree(fr);
 +        gmx_fio_rewind(ef->fio);
 +    }
 +    else 
 +        ef->fio = gmx_fio_open(fn,mode);
 +
 +    ef->framenr=0;
 +    ef->frametime=0;
 +    return ef;
 +}
 +
 +t_fileio *enx_file_pointer(const ener_file_t ef)
 +{
 +    return ef->fio;
 +}
 +
 +static void convert_full_sums(ener_old_t *ener_old,t_enxframe *fr)
 +{
 +    int nstep_all;
 +    int ne,ns,i;
 +    double esum_all,eav_all;
 +    
 +    if (fr->nsum > 0)
 +    {
 +        ne = 0;
 +        ns = 0;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            if (fr->ener[i].e    != 0) ne++;
 +            if (fr->ener[i].esum != 0) ns++;
 +        }
 +        if (ne > 0 && ns == 0)
 +        {
 +            /* We do not have all energy sums */
 +            fr->nsum = 0;
 +        }
 +    }
 +    
 +    /* Convert old full simulation sums to sums between energy frames */
 +    nstep_all = fr->step - ener_old->first_step + 1;
 +    if (fr->nsum > 1 && fr->nsum == nstep_all && ener_old->nsum_prev > 0)
 +    {
 +        /* Set the new sum length: the frame step difference */
 +        fr->nsum = fr->step - ener_old->step_prev;
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            esum_all = fr->ener[i].esum;
 +            eav_all  = fr->ener[i].eav;
 +            fr->ener[i].esum = esum_all - ener_old->ener_prev[i].esum;
 +            fr->ener[i].eav  = eav_all  - ener_old->ener_prev[i].eav
 +                - dsqr(ener_old->ener_prev[i].esum/(nstep_all - fr->nsum)
 +                       - esum_all/nstep_all)*
 +                (nstep_all - fr->nsum)*nstep_all/(double)fr->nsum;
 +            ener_old->ener_prev[i].esum = esum_all;
 +            ener_old->ener_prev[i].eav  = eav_all;
 +        }
 +        ener_old->nsum_prev = nstep_all;
 +    }
 +    else if (fr->nsum > 0)
 +    {
 +        if (fr->nsum != nstep_all)
 +        {
 +            fprintf(stderr,"\nWARNING: something is wrong with the energy sums, will not use exact averages\n");
 +            ener_old->nsum_prev = 0;
 +        }
 +        else
 +        {
 +            ener_old->nsum_prev = nstep_all;
 +        }
 +        /* Copy all sums to ener_prev */
 +        for(i=0; i<fr->nre; i++)
 +        {
 +            ener_old->ener_prev[i].esum = fr->ener[i].esum;
 +            ener_old->ener_prev[i].eav  = fr->ener[i].eav;
 +        }
 +    }
 +    
 +    ener_old->step_prev = fr->step;
 +}
 +
 +gmx_bool do_enx(ener_file_t ef,t_enxframe *fr)
 +{
 +    int       file_version=-1;
 +    int       i,b;
 +    gmx_bool      bRead,bOK,bOK1,bSane;
 +    real      tmp1,tmp2,rdum;
 +    char      buf[22];
 +    /*int       d_size;*/
 +    
 +    bOK = TRUE;
 +    bRead = gmx_fio_getread(ef->fio);
 +    if (!bRead)
 +    {  
 +        fr->e_size = fr->nre*sizeof(fr->ener[0].e)*4;
 +        /*d_size = fr->ndisre*(sizeof(real)*2);*/
 +    }
 +    gmx_fio_checktype(ef->fio);
 +
 +    if (!do_eheader(ef,&file_version,fr,-1,NULL,&bOK))
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\rLast energy frame read %d time %8.3f         ",
 +                    ef->framenr-1,ef->frametime);
 +            if (!bOK)
 +            {
 +                fprintf(stderr,
 +                        "\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                        ef->framenr,fr->t);
 +            }
 +        }
 +        else
 +        {
 +            gmx_file("Cannot write energy file header; maybe you are out of disk space?");
 +        }
 +        return FALSE;
 +    }
 +    if (bRead)
 +    {
 +        if ((ef->framenr <   20 || ef->framenr %   10 == 0) &&
 +            (ef->framenr <  200 || ef->framenr %  100 == 0) &&
 +            (ef->framenr < 2000 || ef->framenr % 1000 == 0))
 +        {
 +            fprintf(stderr,"\rReading energy frame %6d time %8.3f         ",
 +                    ef->framenr,fr->t);
 +        }
 +        ef->framenr++;
 +        ef->frametime = fr->t;
 +    }
 +    /* Check sanity of this header */
 +    bSane = fr->nre > 0 ;
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        bSane = bSane || (fr->block[b].nsub > 0);
 +    }
 +    if (!((fr->step >= 0) && bSane))
 +    {
 +        fprintf(stderr,"\nWARNING: there may be something wrong with energy file %s\n",
 +                gmx_fio_getname(ef->fio));
 +        fprintf(stderr,"Found: step=%s, nre=%d, nblock=%d, time=%g.\n"
 +                "Trying to skip frame expect a crash though\n",
 +                gmx_step_str(fr->step,buf),fr->nre,fr->nblock,fr->t);
 +    }
 +    if (bRead && fr->nre > fr->e_alloc)
 +    {
 +        srenew(fr->ener,fr->nre);
 +        for(i=fr->e_alloc; (i<fr->nre); i++)
 +        {
 +            fr->ener[i].e    = 0;
 +            fr->ener[i].eav  = 0;
 +            fr->ener[i].esum = 0;
 +        }
 +        fr->e_alloc = fr->nre;
 +    }
 +    
 +    for(i=0; i<fr->nre; i++)
 +    {
 +        bOK = bOK && gmx_fio_do_real(ef->fio, fr->ener[i].e);
 +        
 +        /* Do not store sums of length 1,
 +         * since this does not add information.
 +         */
 +        if (file_version == 1 ||
 +            (bRead && fr->nsum > 0) || fr->nsum > 1)
 +        {
 +            tmp1 = fr->ener[i].eav;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp1);
 +            if (bRead)
 +                fr->ener[i].eav = tmp1;
 +            
 +            /* This is to save only in single precision (unless compiled in DP) */
 +            tmp2 = fr->ener[i].esum;
 +            bOK = bOK && gmx_fio_do_real(ef->fio, tmp2);
 +            if (bRead)
 +                fr->ener[i].esum = tmp2;
 +            
 +            if (file_version == 1)
 +            {
 +                /* Old, unused real */
 +                rdum = 0;
 +                bOK = bOK && gmx_fio_do_real(ef->fio, rdum);
 +            }
 +        }
 +    }
 +    
 +    /* Here we can not check for file_version==1, since one could have
 +     * continued an old format simulation with a new one with mdrun -append.
 +     */
 +    if (bRead && ef->eo.bOldFileOpen)
 +    {
 +        /* Convert old full simulation sums to sums between energy frames */
 +        convert_full_sums(&(ef->eo),fr);
 +    }
 +    /* read the blocks */
 +    for(b=0; b<fr->nblock; b++)
 +    {
 +        /* now read the subblocks. */
 +        int nsub=fr->block[b].nsub; /* shortcut */
 +        int i;
 +
 +        for(i=0;i<nsub;i++)
 +        {
 +            t_enxsubblock *sub=&(fr->block[b].sub[i]); /* shortcut */
 +
 +            if (bRead)
 +            {
 +                enxsubblock_alloc(sub);
 +            }
 +
 +            /* read/write data */
 +            bOK1=TRUE;
 +            switch (sub->type)
 +            {
 +                case xdr_datatype_float:
 +                    bOK1=gmx_fio_ndo_float(ef->fio, sub->fval, sub->nr); 
 +                    break;
 +                case xdr_datatype_double:
 +                    bOK1=gmx_fio_ndo_double(ef->fio, sub->dval, sub->nr); 
 +                    break;
 +                case xdr_datatype_int:
 +                    bOK1=gmx_fio_ndo_int(ef->fio, sub->ival, sub->nr);
 +                    break;
 +                case xdr_datatype_large_int:
 +                    bOK1=gmx_fio_ndo_gmx_large_int(ef->fio, sub->lval, sub->nr);
 +                    break;
 +                case xdr_datatype_char:
 +                    bOK1=gmx_fio_ndo_uchar(ef->fio, sub->cval, sub->nr);
 +                    break;
 +                case xdr_datatype_string:
 +                    bOK1=gmx_fio_ndo_string(ef->fio, sub->sval, sub->nr);
 +                    break;
 +                default:
 +                    gmx_incons("Reading unknown block data type: this file is corrupted or from the future");
 +            }
 +            bOK = bOK && bOK1;
 +        }
 +    }
 +    
 +    if(!bRead)
 +    {
 +        if( gmx_fio_flush(ef->fio) != 0)
 +        {
 +            gmx_file("Cannot write energy file; maybe you are out of disk space?");
 +        }
 +    }
 +    
 +    if (!bOK)
 +    {
 +        if (bRead)
 +        {
 +            fprintf(stderr,"\nLast energy frame read %d",
 +                    ef->framenr-1);
 +            fprintf(stderr,"\nWARNING: Incomplete energy frame: nr %d time %8.3f\n",
 +                    ef->framenr,fr->t);
 +        }
 +        else
 +        {
 +            gmx_fatal(FARGS,"could not write energies");
 +        }
 +        return FALSE; 
 +    }
 +    
 +    return TRUE;
 +}
 +
 +static real find_energy(const char *name, int nre, gmx_enxnm_t *enm,
 +                        t_enxframe *fr)
 +{
 +    int i;
 +    
 +    for(i=0; i<nre; i++)
 +    {
 +        if (strcmp(enm[i].name,name) == 0)
 +        {
 +            return  fr->ener[i].e;
 +        }
 +    }
 +    
 +    gmx_fatal(FARGS,"Could not find energy term named '%s'",name);
 +    
 +    return 0;
 +}
 +
 +
 +void get_enx_state(const char *fn, real t, gmx_groups_t *groups, t_inputrec *ir,
 +                   t_state *state)
 +{
 +  /* Should match the names in mdebin.c */
 +  static const char *boxvel_nm[] = {
 +  "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
 +  "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
 +  };
 +  
 +  static const char *pcouplmu_nm[] = {
 +    "Pcoupl-Mu-XX", "Pcoupl-Mu-YY", "Pcoupl-Mu-ZZ",
 +    "Pcoupl-Mu-YX", "Pcoupl-Mu-ZX", "Pcoupl-Mu-ZY"
 +  };
 +  static const char *baro_nm[] = {
 +    "Barostat"
 +  };
 +
 +
 +  int ind0[] = { XX,YY,ZZ,YY,ZZ,ZZ };
 +  int ind1[] = { XX,YY,ZZ,XX,XX,YY };
 +  int nre,nfr,i,j,ni,npcoupl;
 +  char       buf[STRLEN];
 +  const char *bufi;
 +  gmx_enxnm_t *enm=NULL;
 +  t_enxframe *fr;
 +  ener_file_t in;
 +
 +  in = open_enx(fn,"r");
 +  do_enxnms(in,&nre,&enm);
 +  snew(fr,1);
 +  nfr = 0;
 +  while ((nfr==0 || fr->t != t) && do_enx(in,fr)) {
 +    nfr++;
 +  }
 +  close_enx(in);
 +  fprintf(stderr,"\n");
 +
 +  if (nfr == 0 || fr->t != t)
 +    gmx_fatal(FARGS,"Could not find frame with time %f in '%s'",t,fn);
 +  
 +  npcoupl = TRICLINIC(ir->compress) ? 6 : 3;
 +  if (ir->epc == epcPARRINELLORAHMAN) {
 +    clear_mat(state->boxv);
 +    for(i=0; i<npcoupl; i++) {
 +      state->boxv[ind0[i]][ind1[i]] =
 +      find_energy(boxvel_nm[i],nre,enm,fr);
 +    }
 +    fprintf(stderr,"\nREAD %d BOX VELOCITIES FROM %s\n\n",npcoupl,fn);
 +  }
 +
 +  if (ir->etc == etcNOSEHOOVER) 
 +  {
 +      char cns[20];
 +
 +      cns[0] = '\0';
 +
 +      for(i=0; i<state->ngtc; i++) {
 +          ni = groups->grps[egcTC].nm_ind[i];
 +          bufi = *(groups->grpname[ni]);
 +          for(j=0; (j<state->nhchainlength); j++) 
 +          {
 +              if (IR_NVT_TROTTER(ir))
 +              {
 +                  sprintf(cns,"-%d",j);
 +              }
 +              sprintf(buf,"Xi%s-%s",cns,bufi);
 +              state->nosehoover_xi[i] = find_energy(buf,nre,enm,fr);
 +              sprintf(buf,"vXi%s-%s",cns,bufi);
 +              state->nosehoover_vxi[i] = find_energy(buf,nre,enm,fr);
 +          }
 +
 +      }
 +      fprintf(stderr,"\nREAD %d NOSE-HOOVER Xi chains FROM %s\n\n",state->ngtc,fn);
 +
 +      if (IR_NPT_TROTTER(ir)) 
 +      {
 +          for(i=0; i<state->nnhpres; i++) {
 +              bufi = baro_nm[0]; /* All barostat DOF's together for now */
 +              for(j=0; (j<state->nhchainlength); j++) 
 +              {
 +                  sprintf(buf,"Xi-%d-%s",j,bufi); 
 +                  state->nhpres_xi[i] = find_energy(buf,nre,enm,fr);
 +                  sprintf(buf,"vXi-%d-%s",j,bufi);
 +                  state->nhpres_vxi[i] = find_energy(buf,nre,enm,fr);
 +              }
 +          }
 +          fprintf(stderr,"\nREAD %d NOSE-HOOVER BAROSTAT Xi chains FROM %s\n\n",state->nnhpres,fn);
 +      }
 +  } 
 +
 +  free_enxnms(nre,enm);
 +  free_enxframe(fr);
 +  sfree(fr);
 +}
 +
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 5dd6ed751d9b6130daf8d1c61e4830553ff92d7b,0000000000000000000000000000000000000000..2b0e5e46fda47bc82392ff729d82b5b3f6f7ef0c
mode 100644,000000..100644
--- /dev/null
@@@ -1,1527 -1,0 +1,1511 @@@
- static void low_pr_block(FILE *fp,int indent,const char *title,t_block *block, gmx_bool bShowNumbers)
- {
-   int i;
-   
-   if (available(fp,block,indent,title))
-     {
-       indent=pr_block_title(fp,indent,title,block);
-       for (i=0; i<=block->nr; i++)
-         {
-           (void) pr_indent(fp,indent+INDENT);
-           (void) fprintf(fp,"%s->index[%d]=%u\n",
-                        title,bShowNumbers?i:-1,block->index[i]);
-         }
-     }
- }
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROningen Mixture of Alchemy and Childrens' Stories
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +/* This file is completely threadsafe - please keep it that way! */
 +#ifdef GMX_THREAD_MPI
 +#include <thread_mpi.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include "smalloc.h"
 +#include "typedefs.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "string2.h"
 +#include "vec.h"
 +#include "macros.h"
 +
 +
 +int pr_indent(FILE *fp,int n)
 +{
 +  int i;
 +
 +  for (i=0; i<n; i++) (void) fprintf(fp," ");
 +  return n;
 +}
 +
 +int available(FILE *fp,void *p,int indent,const char *title)
 +{
 +  if (!p) {
 +    if (indent > 0)
 +      pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s: not available\n",title);
 +  }
 +  return (p!=NULL);
 +}
 +
 +int pr_title(FILE *fp,int indent,const char *title)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s:\n",title);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_n(FILE *fp,int indent,const char *title,int n)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%d):\n",title,n);
 +  return (indent+INDENT);
 +}
 +
 +int pr_title_nxn(FILE *fp,int indent,const char *title,int n1,int n2)
 +{
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"%s (%dx%d):\n",title,n1,n2);
 +  return (indent+INDENT);
 +}
 +
 +void pr_ivec(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%d\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_ivec_block(FILE *fp,int indent,const char *title,int vec[],int n, gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +    
 +    if (available(fp,vec,indent,title))
 +    {
 +        indent=pr_title_n(fp,indent,title,n);
 +        i = 0;
 +        while (i < n)
 +        {
 +            j = i+1;
 +            while (j < n && vec[j] == vec[j-1]+1)
 +            {
 +                j++;
 +            }
 +            /* Print consecutive groups of 3 or more as blocks */
 +            if (j - i < 3)
 +            {
 +                while(i < j)
 +                {
 +                    (void) pr_indent(fp,indent);
 +                    (void) fprintf(fp,"%s[%d]=%d\n",
 +                                   title,bShowNumbers?i:-1,vec[i]);
 +                    i++;
 +                }
 +            }
 +            else
 +            {
 +                (void) pr_indent(fp,indent);
 +                (void) fprintf(fp,"%s[%d,...,%d] = {%d,...,%d}\n",
 +                               title,
 +                               bShowNumbers?i:-1,
 +                               bShowNumbers?j-1:-1,
 +                               vec[i],vec[j-1]); 
 +                i = j;
 +            }
 +        }
 +    }
 +}
 +
 +void pr_bvec(FILE *fp,int indent,const char *title,gmx_bool vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]=%s\n",title,bShowNumbers?i:-1,
 +                       BOOL(vec[i]));
 +        }
 +    }
 +}
 +
 +void pr_ivecs(FILE *fp,int indent,const char *title,ivec vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_nxn(fp,indent,title,n,DIM);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={",title,bShowNumbers?i:-1);
 +          for (j=0; j<DIM; j++)
 +            {
 +              if (j!=0) (void) fprintf(fp,", ");
 +              fprintf(fp,"%d",vec[i][j]);
 +            }
 +          (void) fprintf(fp,"}\n");
 +        }
 +    }
 +}
 +
 +void pr_rvec(FILE *fp,int indent,const char *title,real vec[],int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,vec,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          pr_indent(fp,indent);
 +          fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +void pr_dvec(FILE *fp,int indent,const char *title,double vec[],int n, gmx_bool bShowNumbers)
 +{
 +      int i;
 +      
 +      if (available(fp,vec,indent,title))
 +    {  
 +              indent=pr_title_n(fp,indent,title,n);
 +              for (i=0; i<n; i++)
 +        {
 +                      pr_indent(fp,indent);
 +                      fprintf(fp,"%s[%d]=%12.5e\n",title,bShowNumbers?i:-1,vec[i]);
 +        }
 +    }
 +}
 +
 +
 +/*
 +void pr_mat(FILE *fp,int indent,char *title,matrix m)
 +{
 +  int i,j;
 +  
 +  if (available(fp,m,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for(i=0; i<n; i++) {
 +      pr_indent(fp,indent);
 +      fprintf(fp,"%s[%d]=%12.5e %12.5e %12.5e\n",
 +            title,bShowNumbers?i:-1,m[i][XX],m[i][YY],m[i][ZZ]);
 +    }
 +  }
 +}
 +*/
 +
 +void pr_rvecs_len(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  int i,j;
 +
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,"%12.5e",vec[i][j]);
 +      }
 +      (void) fprintf(fp,"} len=%12.5e\n",norm(vec[i]));
 +    }
 +  }
 +}
 +
 +void pr_rvecs(FILE *fp,int indent,const char *title,rvec vec[],int n)
 +{
 +  const char *fshort = "%12.5e";
 +  const char *flong  = "%15.8e";
 +  const char *format;
 +  int i,j;
 +
 +  if (getenv("LONGFORMAT") != NULL)
 +    format = flong;
 +  else
 +    format = fshort;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    indent=pr_title_nxn(fp,indent,title,n,DIM);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"%s[%5d]={",title,i);
 +      for (j=0; j<DIM; j++) {
 +      if (j != 0) 
 +        (void) fprintf(fp,", ");
 +      (void) fprintf(fp,format,vec[i][j]);
 +      }
 +      (void) fprintf(fp,"}\n");
 +    }
 +  }
 +}
 +
 +
 +void pr_reals(FILE *fp,int indent,const char *title,real *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +void pr_doubles(FILE *fp,int indent,const char *title,double *vec,int n)
 +{
 +  int i;
 +    
 +  if (available(fp,vec,indent,title)) {  
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%s:\t",title);
 +    for(i=0; i<n; i++)
 +      fprintf(fp,"  %10g",vec[i]);
 +    (void) fprintf(fp,"\n");
 +  }
 +}
 +
 +static void pr_int(FILE *fp,int indent,const char *title,int i)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %d\n",title,i);
 +}
 +
 +static void pr_gmx_large_int(FILE *fp,int indent,const char *title,gmx_large_int_t i)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,gmx_step_str(i,buf));
 +}
 +
 +static void pr_real(FILE *fp,int indent,const char *title,real r)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,r);
 +}
 +
 +static void pr_double(FILE *fp,int indent,const char *title,double d)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %g\n",title,d);
 +}
 +
 +static void pr_str(FILE *fp,int indent,const char *title,const char *s)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"%-20s = %s\n",title,s);
 +}
 +
 +void pr_qm_opts(FILE *fp,int indent,const char *title,t_grpopts *opts)
 +{
 +  int i,m,j;
 +
 +  fprintf(fp,"%s:\n",title);
 +  
 +  pr_int(fp,indent,"ngQM",opts->ngQM);
 +  if (opts->ngQM > 0) {
 +    pr_ivec(fp,indent,"QMmethod",opts->QMmethod,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMbasis",opts->QMbasis,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMcharge",opts->QMcharge,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"QMmult",opts->QMmult,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bSH",opts->bSH,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASorbitals",opts->CASorbitals,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"CASelectrons",opts->CASelectrons,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_rvec(fp,indent,"SAon",opts->SAon,opts->ngQM,FALSE);
 +    pr_ivec(fp,indent,"SAsteps",opts->SAsteps,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bOPT",opts->bOPT,opts->ngQM,FALSE);
 +    pr_bvec(fp,indent,"bTS",opts->bTS,opts->ngQM,FALSE);
 +  }
 +}
 +
 +static void pr_grp_opts(FILE *out,int indent,const char *title,t_grpopts *opts,
 +                      gmx_bool bMDPformat)
 +{
 +  int i,m,j;
 +
 +  if (!bMDPformat)
 +    fprintf(out,"%s:\n",title);
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"nrdf%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->nrdf[i]);
 +  fprintf(out,"\n");
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"ref-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->ref_t[i]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"tau-t%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10g",opts->tau_t[i]);
 +  fprintf(out,"\n");  
 +  
 +  /* Pretty-print the simulated annealing info */
 +  fprintf(out,"anneal%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10s",EANNEAL(opts->annealing[i]));
 +  fprintf(out,"\n");  
 + 
 +  fprintf(out,"ann-npoints%s",bMDPformat ? " = " : ":");
 +  for(i=0; (i<opts->ngtc); i++)
 +    fprintf(out,"  %10d",opts->anneal_npoints[i]);
 +  fprintf(out,"\n");  
 + 
 +  for(i=0; (i<opts->ngtc); i++) {
 +    if(opts->anneal_npoints[i]>0) {
 +      fprintf(out,"ann. times [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_time[i][j]);
 +      fprintf(out,"\n");  
 +      fprintf(out,"ann. temps [%d]:\t",i);
 +      for(j=0; (j<opts->anneal_npoints[i]); j++)
 +      fprintf(out,"  %10.1f",opts->anneal_temp[i][j]);
 +      fprintf(out,"\n");  
 +    }
 +  }
 +  
 +  pr_indent(out,indent);
 +  fprintf(out,"acc:\t");
 +  for(i=0; (i<opts->ngacc); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10g",opts->acc[i][m]);
 +  fprintf(out,"\n");
 +
 +  pr_indent(out,indent);
 +  fprintf(out,"nfreeze:");
 +  for(i=0; (i<opts->ngfrz); i++)
 +    for(m=0; (m<DIM); m++)
 +      fprintf(out,"  %10s",opts->nFreeze[i][m] ? "Y" : "N");
 +  fprintf(out,"\n");
 +
 +
 +  for(i=0; (i<opts->ngener); i++) {
 +    pr_indent(out,indent);
 +    fprintf(out,"energygrp-flags[%3d]:",i);
 +    for(m=0; (m<opts->ngener); m++)
 +      fprintf(out," %d",opts->egp_flags[opts->ngener*i+m]);
 +    fprintf(out,"\n");
 +  }
 +
 +  fflush(out);
 +}
 +
 +static void pr_matrix(FILE *fp,int indent,const char *title,rvec *m,
 +                    gmx_bool bMDPformat)
 +{
 +  if (bMDPformat)
 +    fprintf(fp,"%-10s    = %g %g %g %g %g %g\n",title,
 +          m[XX][XX],m[YY][YY],m[ZZ][ZZ],m[XX][YY],m[XX][ZZ],m[YY][ZZ]);
 +  else
 +    pr_rvecs(fp,indent,title,m,DIM);
 +}
 +
 +static void pr_cosine(FILE *fp,int indent,const char *title,t_cosines *cos,
 +                    gmx_bool bMDPformat)
 +{
 +  int j;
 +  
 +  if (bMDPformat) {
 +    fprintf(fp,"%s = %d\n",title,cos->n);
 +  }
 +  else {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"n = %d\n",cos->n);
 +    if (cos->n > 0) {
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"a =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->a[j]);
 +      fprintf(fp,"\n");
 +      (void) pr_indent(fp,indent+2);
 +      fprintf(fp,"phi =");
 +      for(j=0; (j<cos->n); j++)
 +      fprintf(fp," %e",cos->phi[j]);
 +      fprintf(fp,"\n");
 +    }
 +  }
 +}
 +
 +#define PS(t,s) pr_str(fp,indent,t,s)
 +#define PI(t,s) pr_int(fp,indent,t,s)
 +#define PSTEP(t,s) pr_gmx_large_int(fp,indent,t,s)
 +#define PR(t,s) pr_real(fp,indent,t,s)
 +#define PD(t,s) pr_double(fp,indent,t,s)
 +
 +static void pr_pullgrp(FILE *fp,int indent,int g,t_pullgrp *pg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"pull-group %d:\n",g);
 +  indent += 2;
 +  pr_ivec_block(fp,indent,"atom",pg->ind,pg->nat,TRUE);
 +  pr_rvec(fp,indent,"weight",pg->weight,pg->nweight,TRUE);
 +  PI("pbcatom",pg->pbcatom);
 +  pr_rvec(fp,indent,"vec",pg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"init",pg->init,DIM,TRUE);
 +  PR("rate",pg->rate);
 +  PR("k",pg->k);
 +  PR("kB",pg->kB);
 +}
 +
 +static void pr_pull(FILE *fp,int indent,t_pull *pull)
 +{
 +  int g;
 +
 +  PS("pull-geometry",EPULLGEOM(pull->eGeom));
 +  pr_ivec(fp,indent,"pull-dim",pull->dim,DIM,TRUE);
 +  PR("pull-r1",pull->cyl_r1);
 +  PR("pull-r0",pull->cyl_r0);
 +  PR("pull-constr-tol",pull->constr_tol);
 +  PI("pull-nstxout",pull->nstxout);
 +  PI("pull-nstfout",pull->nstfout);
 +  PI("pull-ngrp",pull->ngrp);
 +  for(g=0; g<pull->ngrp+1; g++)
 +    pr_pullgrp(fp,indent,g,&pull->grp[g]);
 +}
 +
 +static void pr_rotgrp(FILE *fp,int indent,int g,t_rotgrp *rotg)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"rotation_group %d:\n",g);
 +  indent += 2;
 +  PS("type",EROTGEOM(rotg->eType));
 +  PS("massw",BOOL(rotg->bMassW));
 +  pr_ivec_block(fp,indent,"atom",rotg->ind,rotg->nat,TRUE);
 +  pr_rvecs(fp,indent,"x_ref",rotg->x_ref,rotg->nat);
 +  pr_rvec(fp,indent,"vec",rotg->vec,DIM,TRUE);
 +  pr_rvec(fp,indent,"pivot",rotg->pivot,DIM,TRUE);
 +  PR("rate",rotg->rate);
 +  PR("k",rotg->k);
 +  PR("slab_dist",rotg->slab_dist);
 +  PR("min_gaussian",rotg->min_gaussian);
 +  PR("epsilon",rotg->eps);
 +  PS("fit_method",EROTFIT(rotg->eFittype));
 +  PI("potfitangle_nstep",rotg->PotAngle_nstep);
 +  PR("potfitangle_step",rotg->PotAngle_step);
 +}
 +
 +static void pr_rot(FILE *fp,int indent,t_rot *rot)
 +{
 +  int g;
 +
 +  PI("rot_nstrout",rot->nstrout);
 +  PI("rot_nstsout",rot->nstsout);
 +  PI("rot_ngrp",rot->ngrp);
 +  for(g=0; g<rot->ngrp; g++)
 +    pr_rotgrp(fp,indent,g,&rot->grp[g]);
 +}
 +
 +void pr_inputrec(FILE *fp,int indent,const char *title,t_inputrec *ir,
 +                 gmx_bool bMDPformat)
 +{
 +  const char *infbuf="inf";
 +  int  i;
 +  
 +  if (available(fp,ir,indent,title)) {
 +    if (!bMDPformat)
 +      indent=pr_title(fp,indent,title);
 +    PS("integrator",EI(ir->eI));
 +    PSTEP("nsteps",ir->nsteps);
 +    PSTEP("init-step",ir->init_step);
 +    PS("ns-type",ENS(ir->ns_type));
 +    PI("nstlist",ir->nstlist);
 +    PI("ndelta",ir->ndelta);
 +    PI("nstcomm",ir->nstcomm);
 +    PS("comm-mode",ECOM(ir->comm_mode));
 +    PI("nstlog",ir->nstlog);
 +    PI("nstxout",ir->nstxout);
 +    PI("nstvout",ir->nstvout);
 +    PI("nstfout",ir->nstfout);
 +    PI("nstcalcenergy",ir->nstcalcenergy);
 +    PI("nstenergy",ir->nstenergy);
 +    PI("nstxtcout",ir->nstxtcout);
 +    PR("init-t",ir->init_t);
 +    PR("delta-t",ir->delta_t);
 +    
 +    PR("xtcprec",ir->xtcprec);
 +    PI("nkx",ir->nkx);
 +    PI("nky",ir->nky);
 +    PI("nkz",ir->nkz);
 +    PI("pme-order",ir->pme_order);
 +    PR("ewald-rtol",ir->ewald_rtol);
 +    PR("ewald-geometry",ir->ewald_geometry);
 +    PR("epsilon-surface",ir->epsilon_surface);
 +    PS("optimize-fft",BOOL(ir->bOptFFT));
 +    PS("ePBC",EPBC(ir->ePBC));
 +    PS("bPeriodicMols",BOOL(ir->bPeriodicMols));
 +    PS("bContinuation",BOOL(ir->bContinuation));
 +    PS("bShakeSOR",BOOL(ir->bShakeSOR));
 +    PS("etc",ETCOUPLTYPE(ir->etc));
 +    PI("nsttcouple",ir->nsttcouple);
 +    PS("epc",EPCOUPLTYPE(ir->epc));
 +    PS("epctype",EPCOUPLTYPETYPE(ir->epct));
 +    PI("nstpcouple",ir->nstpcouple);
 +    PR("tau-p",ir->tau_p);
 +    pr_matrix(fp,indent,"ref-p",ir->ref_p,bMDPformat);
 +    pr_matrix(fp,indent,"compress",ir->compress,bMDPformat);
 +    PS("refcoord-scaling",EREFSCALINGTYPE(ir->refcoord_scaling));
 +    if (bMDPformat)
 +      fprintf(fp,"posres-com  = %g %g %g\n",ir->posres_com[XX],
 +            ir->posres_com[YY],ir->posres_com[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-com",ir->posres_com,DIM,TRUE);
 +    if (bMDPformat)
 +      fprintf(fp,"posres-comB = %g %g %g\n",ir->posres_comB[XX],
 +            ir->posres_comB[YY],ir->posres_comB[ZZ]);
 +    else
 +      pr_rvec(fp,indent,"posres-comB",ir->posres_comB,DIM,TRUE);
 +    PI("andersen-seed",ir->andersen_seed);
 +    PR("rlist",ir->rlist);
 +    PR("rlistlong",ir->rlistlong);
 +    PR("rtpi",ir->rtpi);
 +    PS("coulombtype",EELTYPE(ir->coulombtype));
 +    PR("rcoulomb-switch",ir->rcoulomb_switch);
 +    PR("rcoulomb",ir->rcoulomb);
 +    PS("vdwtype",EVDWTYPE(ir->vdwtype));
 +    PR("rvdw-switch",ir->rvdw_switch);
 +    PR("rvdw",ir->rvdw);
 +    if (ir->epsilon_r != 0)
 +      PR("epsilon-r",ir->epsilon_r);
 +    else
 +      PS("epsilon-r",infbuf);
 +    if (ir->epsilon_rf != 0)
 +      PR("epsilon-rf",ir->epsilon_rf);
 +    else
 +      PS("epsilon-rf",infbuf);
 +    PR("tabext",ir->tabext);
 +    PS("implicit-solvent",EIMPLICITSOL(ir->implicit_solvent));
 +    PS("gb-algorithm",EGBALGORITHM(ir->gb_algorithm));
 +    PR("gb-epsilon-solvent",ir->gb_epsilon_solvent);
 +    PI("nstgbradii",ir->nstgbradii);
 +    PR("rgbradii",ir->rgbradii);
 +    PR("gb-saltconc",ir->gb_saltconc);
 +    PR("gb-obc-alpha",ir->gb_obc_alpha);
 +    PR("gb-obc-beta",ir->gb_obc_beta);
 +    PR("gb-obc-gamma",ir->gb_obc_gamma);
 +    PR("gb-dielectric-offset",ir->gb_dielectric_offset);
 +    PS("sa-algorithm",ESAALGORITHM(ir->gb_algorithm));
 +    PR("sa-surface-tension",ir->sa_surface_tension);
 +        
 +    PS("DispCorr",EDISPCORR(ir->eDispCorr));
 +    PS("free-energy",EFEPTYPE(ir->efep));
 +    PR("init-lambda",ir->init_lambda);
 +    PR("delta-lambda",ir->delta_lambda);
 +    if (!bMDPformat)
 +    {
 +        PI("n-foreign-lambda",ir->n_flambda);
 +    }
 +    if (ir->n_flambda > 0)
 +    {
 +        pr_indent(fp,indent);
 +        fprintf(fp,"foreign-lambda%s",bMDPformat ? " = " : ":");
 +        for(i=0; i<ir->n_flambda; i++)
 +        {
 +            fprintf(fp,"  %10g",ir->flambda[i]);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +    PR("sc-alpha",ir->sc_alpha);
 +    PI("sc-power",ir->sc_power);
 +    PR("sc-sigma",ir->sc_sigma);
 +    PR("sc-sigma-min",ir->sc_sigma_min);
 +    PI("nstdhdl", ir->nstdhdl);
 +    PS("separate-dhdl-file", SEPDHDLFILETYPE(ir->separate_dhdl_file));
 +    PS("dhdl-derivatives", DHDLDERIVATIVESTYPE(ir->dhdl_derivatives));
 +    PI("dh-hist-size", ir->dh_hist_size);
 +    PD("dh-hist-spacing", ir->dh_hist_spacing);
 +
 +    PI("nwall",ir->nwall);
 +    PS("wall-type",EWALLTYPE(ir->wall_type));
 +    PI("wall-atomtype[0]",ir->wall_atomtype[0]);
 +    PI("wall-atomtype[1]",ir->wall_atomtype[1]);
 +    PR("wall-density[0]",ir->wall_density[0]);
 +    PR("wall-density[1]",ir->wall_density[1]);
 +    PR("wall-ewald-zfac",ir->wall_ewald_zfac);
 +
 +    PS("pull",EPULLTYPE(ir->ePull));
 +    if (ir->ePull != epullNO)
 +      pr_pull(fp,indent,ir->pull);
 +    
 +    PS("rotation",BOOL(ir->bRot));
 +    if (ir->bRot)
 +      pr_rot(fp,indent,ir->rot);
 +
 +    PS("disre",EDISRETYPE(ir->eDisre));
 +    PS("disre-weighting",EDISREWEIGHTING(ir->eDisreWeighting));
 +    PS("disre-mixed",BOOL(ir->bDisreMixed));
 +    PR("dr-fc",ir->dr_fc);
 +    PR("dr-tau",ir->dr_tau);
 +    PR("nstdisreout",ir->nstdisreout);
 +    PR("orires-fc",ir->orires_fc);
 +    PR("orires-tau",ir->orires_tau);
 +    PR("nstorireout",ir->nstorireout);
 +
 +    PR("dihre-fc",ir->dihre_fc);
 +    
 +    PR("em-stepsize",ir->em_stepsize);
 +    PR("em-tol",ir->em_tol);
 +    PI("niter",ir->niter);
 +    PR("fc-stepsize",ir->fc_stepsize);
 +    PI("nstcgsteep",ir->nstcgsteep);
 +    PI("nbfgscorr",ir->nbfgscorr);
 +
 +    PS("ConstAlg",ECONSTRTYPE(ir->eConstrAlg));
 +    PR("shake-tol",ir->shake_tol);
 +    PI("lincs-order",ir->nProjOrder);
 +    PR("lincs-warnangle",ir->LincsWarnAngle);
 +    PI("lincs-iter",ir->nLincsIter);
 +    PR("bd-fric",ir->bd_fric);
 +    PI("ld-seed",ir->ld_seed);
 +    PR("cos-accel",ir->cos_accel);
 +    pr_matrix(fp,indent,"deform",ir->deform,bMDPformat);
 +
 +    PS("adress",BOOL(ir->bAdress));
 +    if (ir->bAdress){
 +        PS("adress_type",EADRESSTYPE(ir->adress->type));
 +        PR("adress_const_wf",ir->adress->const_wf);
 +        PR("adress_ex_width",ir->adress->ex_width);
 +        PR("adress_hy_width",ir->adress->hy_width);
 +        PS("adress_interface_correction",EADRESSICTYPE(ir->adress->icor));
 +        PS("adress_site",EADRESSSITETYPE(ir->adress->site));
 +        PR("adress_ex_force_cap",ir->adress->ex_forcecap);
 +        PS("adress_do_hybridpairs", BOOL(ir->adress->do_hybridpairs));
 +
 +        pr_rvec(fp,indent,"adress_reference_coords",ir->adress->refs,DIM,TRUE);
 +    }
 +    PI("userint1",ir->userint1);
 +    PI("userint2",ir->userint2);
 +    PI("userint3",ir->userint3);
 +    PI("userint4",ir->userint4);
 +    PR("userreal1",ir->userreal1);
 +    PR("userreal2",ir->userreal2);
 +    PR("userreal3",ir->userreal3);
 +    PR("userreal4",ir->userreal4);
 +    pr_grp_opts(fp,indent,"grpopts",&(ir->opts),bMDPformat);
 +    pr_cosine(fp,indent,"efield-x",&(ir->ex[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-xt",&(ir->et[XX]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-y",&(ir->ex[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-yt",&(ir->et[YY]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-z",&(ir->ex[ZZ]),bMDPformat);
 +    pr_cosine(fp,indent,"efield-zt",&(ir->et[ZZ]),bMDPformat);
 +    PS("bQMMM",BOOL(ir->bQMMM));
 +    PI("QMconstraints",ir->QMconstraints);
 +    PI("QMMMscheme",ir->QMMMscheme);
 +    PR("scalefactor",ir->scalefactor);
 +    pr_qm_opts(fp,indent,"qm-opts",&(ir->opts));
 +  }
 +}
 +#undef PS
 +#undef PR
 +#undef PI
 +
 +static void pr_harm(FILE *fp,t_iparams *iparams,const char *r,const char *kr)
 +{
 +  fprintf(fp,"%sA=%12.5e, %sA=%12.5e, %sB=%12.5e, %sB=%12.5e\n",
 +        r,iparams->harmonic.rA,kr,iparams->harmonic.krA,
 +        r,iparams->harmonic.rB,kr,iparams->harmonic.krB);
 +}
 +
 +void pr_iparams(FILE *fp,t_functype ftype,t_iparams *iparams)
 +{
 +  int i;
 +  real VA[4],VB[4],*rbcA,*rbcB;
 +
 +  switch (ftype) {
 +  case F_ANGLES:
 +  case F_G96ANGLES:
 +    pr_harm(fp,iparams,"th","ct");
 +    break;
 +  case F_CROSS_BOND_BONDS:
 +    fprintf(fp,"r1e=%15.8e, r2e=%15.8e, krr=%15.8e\n",
 +          iparams->cross_bb.r1e,iparams->cross_bb.r2e,
 +          iparams->cross_bb.krr);
 +    break;
 +  case F_CROSS_BOND_ANGLES:
 +    fprintf(fp,"r1e=%15.8e, r1e=%15.8e, r3e=%15.8e, krt=%15.8e\n",
 +          iparams->cross_ba.r1e,iparams->cross_ba.r2e,
 +          iparams->cross_ba.r3e,iparams->cross_ba.krt);
 +    break;
 +  case F_LINEAR_ANGLES:
 +    fprintf(fp,"klinA=%15.8e, aA=%15.8e, klinB=%15.8e, aB=%15.8e\n",
 +            iparams->linangle.klinA,iparams->linangle.aA,
 +            iparams->linangle.klinB,iparams->linangle.aB);
 +    break;
 +  case F_UREY_BRADLEY:
 +    fprintf(fp,"theta=%15.8e, ktheta=%15.8e, r13=%15.8e, kUB=%15.8e\n",
 +          iparams->u_b.theta,iparams->u_b.ktheta,iparams->u_b.r13,iparams->u_b.kUB);
 +    break;
 +  case F_QUARTIC_ANGLES:
 +    fprintf(fp,"theta=%15.8e",iparams->qangle.theta);
 +    for(i=0; i<5; i++)
 +      fprintf(fp,", c%c=%15.8e",'0'+i,iparams->qangle.c[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_BHAM:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->bham.a,iparams->bham.b,iparams->bham.c);
 +    break;
 +  case F_BONDS:
 +  case F_G96BONDS:
 +  case F_HARMONIC:
 +    pr_harm(fp,iparams,"b0","cb");
 +    break;
 +  case F_IDIHS:
 +    pr_harm(fp,iparams,"xi","cx");
 +    break;
 +  case F_MORSE:
 +    fprintf(fp,"b0=%15.8e, cb=%15.8e, beta=%15.8e\n",
 +          iparams->morse.b0,iparams->morse.cb,iparams->morse.beta);
 +    break;
 +  case F_CUBICBONDS:
 +    fprintf(fp,"b0=%15.8e, kb=%15.8e, kcub=%15.8e\n",
 +          iparams->cubic.b0,iparams->cubic.kb,iparams->cubic.kcub);
 +    break;
 +  case F_CONNBONDS:
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FENEBONDS:
 +    fprintf(fp,"bm=%15.8e, kb=%15.8e\n",iparams->fene.bm,iparams->fene.kb);
 +    break;
 +  case F_RESTRBONDS:
 +      fprintf(fp,"lowA=%15.8e, up1A=%15.8e, up2A=%15.8e, kA=%15.8e, lowB=%15.8e, up1B=%15.8e, up2B=%15.8e, kB=%15.8e,\n",
 +              iparams->restraint.lowA,iparams->restraint.up1A,
 +              iparams->restraint.up2A,iparams->restraint.kA,
 +              iparams->restraint.lowB,iparams->restraint.up1B,
 +              iparams->restraint.up2B,iparams->restraint.kB);
 +      break;
 +  case F_TABBONDS:
 +  case F_TABBONDSNC:
 +  case F_TABANGLES:
 +  case F_TABDIHS:
 +    fprintf(fp,"tab=%d, kA=%15.8e, kB=%15.8e\n",
 +          iparams->tab.table,iparams->tab.kA,iparams->tab.kB);
 +    break;
 +  case F_POLARIZATION:
 +    fprintf(fp,"alpha=%15.8e\n",iparams->polarize.alpha);
 +    break;
 +  case F_ANHARM_POL:
 +    fprintf(fp,"alpha=%15.8e drcut=%15.8e khyp=%15.8e\n",
 +            iparams->anharm_polarize.alpha,
 +            iparams->anharm_polarize.drcut,
 +            iparams->anharm_polarize.khyp);
 +    break;
 +  case F_THOLE_POL:
 +    fprintf(fp,"a=%15.8e, alpha1=%15.8e, alpha2=%15.8e, rfac=%15.8e\n",
 +          iparams->thole.a,iparams->thole.alpha1,iparams->thole.alpha2,
 +          iparams->thole.rfac);
 +    break;
 +  case F_WATER_POL:
 +    fprintf(fp,"al_x=%15.8e, al_y=%15.8e, al_z=%15.8e, rOH=%9.6f, rHH=%9.6f, rOD=%9.6f\n",
 +          iparams->wpol.al_x,iparams->wpol.al_y,iparams->wpol.al_z,
 +          iparams->wpol.rOH,iparams->wpol.rHH,iparams->wpol.rOD);
 +    break;
 +  case F_LJ:
 +    fprintf(fp,"c6=%15.8e, c12=%15.8e\n",iparams->lj.c6,iparams->lj.c12);
 +    break;
 +  case F_LJ14:
 +    fprintf(fp,"c6A=%15.8e, c12A=%15.8e, c6B=%15.8e, c12B=%15.8e\n",
 +          iparams->lj14.c6A,iparams->lj14.c12A,
 +          iparams->lj14.c6B,iparams->lj14.c12B);
 +    break;
 +  case F_LJC14_Q:
 +    fprintf(fp,"fqq=%15.8e, qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljc14.fqq,
 +          iparams->ljc14.qi,iparams->ljc14.qj,
 +          iparams->ljc14.c6,iparams->ljc14.c12);
 +    break;
 +  case F_LJC_PAIRS_NB:
 +    fprintf(fp,"qi=%15.8e, qj=%15.8e, c6=%15.8e, c12=%15.8e\n",
 +          iparams->ljcnb.qi,iparams->ljcnb.qj,
 +          iparams->ljcnb.c6,iparams->ljcnb.c12);
 +    break;
 +  case F_PDIHS:
 +  case F_PIDIHS:
 +  case F_ANGRES:
 +  case F_ANGRESZ:
 +    fprintf(fp,"phiA=%15.8e, cpA=%15.8e, phiB=%15.8e, cpB=%15.8e, mult=%d\n",
 +          iparams->pdihs.phiA,iparams->pdihs.cpA,
 +          iparams->pdihs.phiB,iparams->pdihs.cpB,
 +          iparams->pdihs.mult);
 +    break;
 +  case F_DISRES:
 +    fprintf(fp,"label=%4d, type=%1d, low=%15.8e, up1=%15.8e, up2=%15.8e, fac=%15.8e)\n",
 +          iparams->disres.label,iparams->disres.type,
 +          iparams->disres.low,iparams->disres.up1,
 +          iparams->disres.up2,iparams->disres.kfac);
 +    break;
 +  case F_ORIRES:
 +    fprintf(fp,"ex=%4d, label=%d, power=%4d, c=%15.8e, obs=%15.8e, kfac=%15.8e)\n",
 +          iparams->orires.ex,iparams->orires.label,iparams->orires.power,
 +          iparams->orires.c,iparams->orires.obs,iparams->orires.kfac);
 +    break;
 +  case F_DIHRES:
 +    fprintf(fp,"label=%d, power=%4d phi=%15.8e, dphi=%15.8e, kfac=%15.8e)\n",
 +          iparams->dihres.label,iparams->dihres.power,
 +          iparams->dihres.phi,iparams->dihres.dphi,iparams->dihres.kfac);
 +    break;
 +  case F_POSRES:
 +    fprintf(fp,"pos0A=(%15.8e,%15.8e,%15.8e), fcA=(%15.8e,%15.8e,%15.8e), pos0B=(%15.8e,%15.8e,%15.8e), fcB=(%15.8e,%15.8e,%15.8e)\n",
 +          iparams->posres.pos0A[XX],iparams->posres.pos0A[YY],
 +          iparams->posres.pos0A[ZZ],iparams->posres.fcA[XX],
 +          iparams->posres.fcA[YY],iparams->posres.fcA[ZZ],
 +          iparams->posres.pos0B[XX],iparams->posres.pos0B[YY],
 +          iparams->posres.pos0B[ZZ],iparams->posres.fcB[XX],
 +          iparams->posres.fcB[YY],iparams->posres.fcB[ZZ]);
 +    break;
 +  case F_RBDIHS:
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcA[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_RBDIHS; i++) 
 +      fprintf(fp,"%srbcB[%d]=%15.8e",i==0?"":", ",i,iparams->rbdihs.rbcB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +  case F_FOURDIHS:
 +    /* Use the OPLS -> Ryckaert-Bellemans formula backwards to get the
 +     * OPLS potential constants back.
 +     */
 +    rbcA = iparams->rbdihs.rbcA;
 +    rbcB = iparams->rbdihs.rbcB;
 +
 +    VA[3] = -0.25*rbcA[4];
 +    VA[2] = -0.5*rbcA[3];
 +    VA[1] = 4.0*VA[3]-rbcA[2];
 +    VA[0] = 3.0*VA[2]-2.0*rbcA[1];
 +
 +    VB[3] = -0.25*rbcB[4];
 +    VB[2] = -0.5*rbcB[3];
 +    VB[1] = 4.0*VB[3]-rbcB[2];
 +    VB[0] = 3.0*VB[2]-2.0*rbcB[1];
 +
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourA[%d]=%15.8e",i==0?"":", ",i,VA[i]);
 +    fprintf(fp,"\n");
 +    for (i=0; i<NR_FOURDIHS; i++) 
 +      fprintf(fp,"%sFourB[%d]=%15.8e",i==0?"":", ",i,VB[i]);
 +    fprintf(fp,"\n");
 +    break;
 +   
 +  case F_CONSTR:
 +  case F_CONSTRNC:
 +    fprintf(fp,"dA=%15.8e, dB=%15.8e\n",iparams->constr.dA,iparams->constr.dB);
 +    break;
 +  case F_SETTLE:
 +    fprintf(fp,"doh=%15.8e, dhh=%15.8e\n",iparams->settle.doh,
 +          iparams->settle.dhh);
 +    break;
 +  case F_VSITE2:
 +    fprintf(fp,"a=%15.8e\n",iparams->vsite.a);
 +    break;
 +  case F_VSITE3:
 +  case F_VSITE3FD:
 +  case F_VSITE3FAD:
 +    fprintf(fp,"a=%15.8e, b=%15.8e\n",iparams->vsite.a,iparams->vsite.b);
 +    break;
 +  case F_VSITE3OUT:
 +  case F_VSITE4FD:
 +  case F_VSITE4FDN:
 +    fprintf(fp,"a=%15.8e, b=%15.8e, c=%15.8e\n",
 +          iparams->vsite.a,iparams->vsite.b,iparams->vsite.c);
 +    break;
 +  case F_VSITEN:
 +    fprintf(fp,"n=%2d, a=%15.8e\n",iparams->vsiten.n,iparams->vsiten.a);
 +    break;
 +  case F_GB12:
 +  case F_GB13:
 +  case F_GB14:
 +    fprintf(fp, "sar=%15.8e, st=%15.8e, pi=%15.8e, gbr=%15.8e, bmlt=%15.8e\n",iparams->gb.sar,iparams->gb.st,iparams->gb.pi,iparams->gb.gbr,iparams->gb.bmlt);
 +    break;              
 +  case F_CMAP:
 +    fprintf(fp, "cmapA=%1d, cmapB=%1d\n",iparams->cmap.cmapA, iparams->cmap.cmapB);
 +    break;              
 +  default:
 +    gmx_fatal(FARGS,"unknown function type %d (%s) in %s line %d",
 +            ftype,interaction_function[ftype].name,__FILE__,__LINE__);
 +  }
 +}
 +
 +void pr_ilist(FILE *fp,int indent,const char *title,
 +              t_functype *functype,t_ilist *ilist, gmx_bool bShowNumbers)
 +{
 +    int i,j,k,type,ftype;
 +    t_iatom *iatoms;
 +    
 +    if (available(fp,ilist,indent,title) && ilist->nr > 0)
 +    {  
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"nr: %d\n",ilist->nr);
 +        if (ilist->nr > 0) {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"iatoms:\n");
 +            iatoms=ilist->iatoms;
 +            for (i=j=0; i<ilist->nr;) {
 +#ifndef DEBUG
 +                (void) pr_indent(fp,indent+INDENT);
 +                type=*(iatoms++);
 +                ftype=functype[type];
 +                (void) fprintf(fp,"%d type=%d (%s)",
 +                               bShowNumbers?j:-1,bShowNumbers?type:-1,
 +                               interaction_function[ftype].name);
 +                j++;
 +                for (k=0; k<interaction_function[ftype].nratoms; k++)
 +                    (void) fprintf(fp," %u",*(iatoms++));
 +                (void) fprintf(fp,"\n");
 +                i+=1+interaction_function[ftype].nratoms;
 +#else
 +                fprintf(fp,"%5d%5d\n",i,iatoms[i]);
 +                i++;
 +#endif
 +            }
 +        }
 +    }
 +}
 +
 +static void pr_cmap(FILE *fp, int indent, const char *title,
 +                    gmx_cmap_t *cmap_grid, gmx_bool bShowNumbers)
 +{
 +    int i,j,nelem;
 +    real dx,idx;
 +      
 +    dx    = 360.0 / cmap_grid->grid_spacing;
 +    nelem = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
 +      
 +    if(available(fp,cmap_grid,indent,title))
 +    {
 +        fprintf(fp,"%s\n",title);
 +              
 +        for(i=0;i<cmap_grid->ngrid;i++)
 +        {
 +            idx = -180.0;
 +            fprintf(fp,"%8s %8s %8s %8s\n","V","dVdx","dVdy","d2dV");
 +                      
 +            fprintf(fp,"grid[%3d]={\n",bShowNumbers?i:-1);
 +                      
 +            for(j=0;j<nelem;j++)
 +            {
 +                if( (j%cmap_grid->grid_spacing)==0)
 +                {
 +                    fprintf(fp,"%8.1f\n",idx);
 +                    idx+=dx;
 +                }
 +                              
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+1]);
 +                fprintf(fp,"%8.3f ",cmap_grid->cmapdata[i].cmap[j*4+2]);
 +                fprintf(fp,"%8.3f\n",cmap_grid->cmapdata[i].cmap[j*4+3]);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +      
 +}
 +
 +void pr_ffparams(FILE *fp,int indent,const char *title,
 +                 gmx_ffparams_t *ffparams,
 +                 gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  indent=pr_title(fp,indent,title);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"atnr=%d\n",ffparams->atnr);
 +  (void) pr_indent(fp,indent);
 +  (void) fprintf(fp,"ntypes=%d\n",ffparams->ntypes);
 +  for (i=0; i<ffparams->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                     bShowNumbers?i:-1,
 +                     interaction_function[ffparams->functype[i]].name);
 +      pr_iparams(fp,ffparams->functype[i],&ffparams->iparams[i]);
 +  }
 +  (void) pr_double(fp,indent,"reppow",ffparams->reppow);
 +  (void) pr_real(fp,indent,"fudgeQQ",ffparams->fudgeQQ);
 +  pr_cmap(fp,indent,"cmap",&ffparams->cmap_grid,bShowNumbers);
 +}
 +
 +void pr_idef(FILE *fp,int indent,const char *title,t_idef *idef, gmx_bool bShowNumbers)
 +{
 +  int i,j;
 +  
 +  if (available(fp,idef,indent,title)) {  
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"atnr=%d\n",idef->atnr);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"ntypes=%d\n",idef->ntypes);
 +    for (i=0; i<idef->ntypes; i++) {
 +      (void) pr_indent(fp,indent+INDENT);
 +      (void) fprintf(fp,"functype[%d]=%s, ",
 +                   bShowNumbers?i:-1,
 +                   interaction_function[idef->functype[i]].name);
 +      pr_iparams(fp,idef->functype[i],&idef->iparams[i]);
 +    }
 +    (void) pr_real(fp,indent,"fudgeQQ",idef->fudgeQQ);
 +
 +    for(j=0; (j<F_NRE); j++)
 +      pr_ilist(fp,indent,interaction_function[j].longname,
 +               idef->functype,&idef->il[j],bShowNumbers);
 +  }
 +}
 +
 +static int pr_block_title(FILE *fp,int indent,const char *title,t_block *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +    }
 +  return indent;
 +}
 +
 +static int pr_blocka_title(FILE *fp,int indent,const char *title,t_blocka *block)
 +{
 +  int i;
 +
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nr=%d\n",block->nr);
 +      (void) pr_indent(fp,indent);
 +      (void) fprintf(fp,"nra=%d\n",block->nra);
 +    }
 +  return indent;
 +}
 +
 +static void low_pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block, gmx_bool bShowNumbers)
 +{
 +  int i;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      for (i=0; i<=block->nr; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->index[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->index[i]);
 +        }
 +      for (i=0; i<block->nra; i++)
 +        {
 +          (void) pr_indent(fp,indent+INDENT);
 +          (void) fprintf(fp,"%s->a[%d]=%u\n",
 +                       title,bShowNumbers?i:-1,block->a[i]);
 +        }
 +    }
 +}
 +
 +void pr_block(FILE *fp,int indent,const char *title,t_block *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_block_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={}\n",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d]={%d..%d}\n",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            start=end;
 +          }
 +    }
 +}
 +
 +void pr_blocka(FILE *fp,int indent,const char *title,t_blocka *block,gmx_bool bShowNumbers)
 +{
 +  int i,j,ok,size,start,end;
 +  
 +  if (available(fp,block,indent,title))
 +    {
 +      indent=pr_blocka_title(fp,indent,title,block);
 +      start=0;
 +      end=start;
 +      if ((ok=(block->index[start]==0))==0)
 +        (void) fprintf(fp,"block->index[%d] should be 0\n",start);
 +      else
 +        for (i=0; i<block->nr; i++)
 +          {
 +            end=block->index[i+1];
 +            size=pr_indent(fp,indent);
 +            if (end<=start)
 +              size+=fprintf(fp,"%s[%d]={",title,i);
 +            else
 +              size+=fprintf(fp,"%s[%d][%d..%d]={",
 +                          title,bShowNumbers?i:-1,
 +                          bShowNumbers?start:-1,bShowNumbers?end-1:-1);
 +            for (j=start; j<end; j++)
 +              {
 +                if (j>start) size+=fprintf(fp,", ");
 +                if ((size)>(USE_WIDTH))
 +                  {
 +                    (void) fprintf(fp,"\n");
 +                    size=pr_indent(fp,indent+INDENT);
 +                  }
 +                size+=fprintf(fp,"%u",block->a[j]);
 +              }
 +            (void) fprintf(fp,"}\n");
 +            start=end;
 +          }
 +      if ((end!=block->nra)||(!ok)) 
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"tables inconsistent, dumping complete tables:\n");
 +          low_pr_blocka(fp,indent,title,block,bShowNumbers);
 +        }
 +    }
 +}
 +
 +static void pr_strings(FILE *fp,int indent,const char *title,char ***nm,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]));
 +        }
 +    }
 +}
 +
 +static void pr_strings2(FILE *fp,int indent,const char *title,
 +                      char ***nm,char ***nmB,int n, gmx_bool bShowNumbers)
 +{
 +  int i;
 +
 +  if (available(fp,nm,indent,title))
 +    {  
 +      indent=pr_title_n(fp,indent,title,n);
 +      for (i=0; i<n; i++)
 +        {
 +          (void) pr_indent(fp,indent);
 +          (void) fprintf(fp,"%s[%d]={name=\"%s\",nameB=\"%s\"}\n",
 +                       title,bShowNumbers?i:-1,*(nm[i]),*(nmB[i]));
 +        }
 +    }
 +}
 +
 +static void pr_resinfo(FILE *fp,int indent,const char *title,t_resinfo *resinfo,int n, gmx_bool bShowNumbers)
 +{
 +    int i;
 +    
 +    if (available(fp,resinfo,indent,title))
 +    {  
 +        indent=pr_title_n(fp,indent,title,n);
 +        for (i=0; i<n; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            (void) fprintf(fp,"%s[%d]={name=\"%s\", nr=%d, ic='%c'}\n",
 +                           title,bShowNumbers?i:-1,
 +                           *(resinfo[i].name),resinfo[i].nr,
 +                           (resinfo[i].ic == '\0') ? ' ' : resinfo[i].ic);
 +        }
 +    }
 +}
 +
 +static void pr_atom(FILE *fp,int indent,const char *title,t_atom *atom,int n)
 +{
 +  int i,j;
 +  
 +  if (available(fp,atom,indent,title)) {  
 +    indent=pr_title_n(fp,indent,title,n);
 +    for (i=0; i<n; i++) {
 +      (void) pr_indent(fp,indent);
 +      fprintf(fp,"%s[%6d]={type=%3d, typeB=%3d, ptype=%8s, m=%12.5e, "
 +              "q=%12.5e, mB=%12.5e, qB=%12.5e, resind=%5d, atomnumber=%3d}\n",
 +              title,i,atom[i].type,atom[i].typeB,ptype_str[atom[i].ptype],
 +              atom[i].m,atom[i].q,atom[i].mB,atom[i].qB,
 +              atom[i].resind,atom[i].atomnumber);
 +    }
 +  }
 +}
 +
 +static void pr_grps(FILE *fp,int indent,const char *title,t_grps grps[],
 +                  char **grpname[], gmx_bool bShowNumbers)
 +{
 +    int i,j;
 +
 +    for(i=0; (i<egcNR); i++)
 +    {
 +        fprintf(fp,"%s[%-12s] nr=%d, name=[",title,gtypes[i],grps[i].nr);
 +        for(j=0; (j<grps[i].nr); j++)
 +        {
 +            fprintf(fp," %s",*(grpname[grps[i].nm_ind[j]]));
 +        }
 +        fprintf(fp,"]\n");
 +    }
 +}
 +
 +static void pr_groups(FILE *fp,int indent,const char *title,
 +                      gmx_groups_t *groups,
 +                      gmx_bool bShowNumbers)
 +{
 +    int grpnr[egcNR];
 +    int nat_max,i,g;
 +
 +    pr_grps(fp,indent,"grp",groups->grps,groups->grpname,bShowNumbers);
 +    pr_strings(fp,indent,"grpname",groups->grpname,groups->ngrpname,bShowNumbers);
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"groups          ");
 +    for(g=0; g<egcNR; g++)
 +    {
 +       printf(" %5.5s",gtypes[g]);
 +    }
 +    printf("\n");
 +
 +    (void) pr_indent(fp,indent);
 +    fprintf(fp,"allocated       ");
 +    nat_max = 0;
 +    for(g=0; g<egcNR; g++)
 +    {
 +        printf(" %5d",groups->ngrpnr[g]);
 +        nat_max = max(nat_max,groups->ngrpnr[g]);
 +    }
 +    printf("\n");
 +
 +    if (nat_max == 0)
 +    {
 +        (void) pr_indent(fp,indent);
 +        fprintf(fp,"groupnr[%5s] =","*");
 +        for(g=0; g<egcNR; g++)
 +        {
 +            fprintf(fp,"  %3d ",0);
 +        }
 +        fprintf(fp,"\n");
 +    }
 +    else
 +    {
 +        for(i=0; i<nat_max; i++)
 +        {
 +            (void) pr_indent(fp,indent);
 +            fprintf(fp,"groupnr[%5d] =",i);
 +            for(g=0; g<egcNR; g++)
 +            {
 +                fprintf(fp,"  %3d ",
 +                        groups->grpnr[g] ? groups->grpnr[g][i] : 0);
 +            }
 +            fprintf(fp,"\n");
 +        }
 +    }
 +}
 +
 +void pr_atoms(FILE *fp,int indent,const char *title,t_atoms *atoms, 
 +            gmx_bool bShownumbers)
 +{
 +  if (available(fp,atoms,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_atom(fp,indent,"atom",atoms->atom,atoms->nr);
 +      pr_strings(fp,indent,"atom",atoms->atomname,atoms->nr,bShownumbers);
 +      pr_strings2(fp,indent,"type",atoms->atomtype,atoms->atomtypeB,atoms->nr,bShownumbers);
 +      pr_resinfo(fp,indent,"residue",atoms->resinfo,atoms->nres,bShownumbers);
 +    }
 +}
 +
 +
 +void pr_atomtypes(FILE *fp,int indent,const char *title,t_atomtypes *atomtypes, 
 +                gmx_bool bShowNumbers)
 +{
 +  int i;
 +  if (available(fp,atomtypes,indent,title)) 
 +  {
 +    indent=pr_title(fp,indent,title);
 +    for(i=0;i<atomtypes->nr;i++) {
 +      pr_indent(fp,indent);
 +              fprintf(fp,
 +                              "atomtype[%3d]={radius=%12.5e, volume=%12.5e, gb_radius=%12.5e, surftens=%12.5e, atomnumber=%4d, S_hct=%12.5e)}\n",
 +                              bShowNumbers?i:-1,atomtypes->radius[i],atomtypes->vol[i],
 +                              atomtypes->gb_radius[i],
 +                              atomtypes->surftens[i],atomtypes->atomnumber[i],atomtypes->S_hct[i]);
 +    }
 +  }
 +}
 +
 +static void pr_moltype(FILE *fp,int indent,const char *title,
 +                       gmx_moltype_t *molt,int n,
 +                       gmx_ffparams_t *ffparams,
 +                       gmx_bool bShowNumbers)
 +{
 +    int j;
 +
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(molt->name));
 +    pr_atoms(fp,indent,"atoms",&(molt->atoms),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&molt->cgs, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&molt->excls, bShowNumbers);
 +    for(j=0; (j<F_NRE); j++) {
 +        pr_ilist(fp,indent,interaction_function[j].longname,
 +                 ffparams->functype,&molt->ilist[j],bShowNumbers);
 +    }
 +}
 +
 +static void pr_molblock(FILE *fp,int indent,const char *title,
 +                        gmx_molblock_t *molb,int n,
 +                        gmx_moltype_t *molt,
 +                        gmx_bool bShowNumbers)
 +{
 +    indent = pr_title_n(fp,indent,title,n);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"%-20s = %d \"%s\"\n",
 +                   "moltype",molb->type,*(molt[molb->type].name));
 +    pr_int(fp,indent,"#molecules",molb->nmol);
 +    pr_int(fp,indent,"#atoms_mol",molb->natoms_mol);
 +    pr_int(fp,indent,"#posres_xA",molb->nposres_xA);
 +    if (molb->nposres_xA > 0) {
 +        pr_rvecs(fp,indent,"posres_xA",molb->posres_xA,molb->nposres_xA);
 +    }
 +    pr_int(fp,indent,"#posres_xB",molb->nposres_xB);
 +    if (molb->nposres_xB > 0) {
 +        pr_rvecs(fp,indent,"posres_xB",molb->posres_xB,molb->nposres_xB);
 +    }
 +}
 +
 +void pr_mtop(FILE *fp,int indent,const char *title,gmx_mtop_t *mtop,
 +             gmx_bool bShowNumbers)
 +{
 +    int mt,mb;
 +
 +    if (available(fp,mtop,indent,title)) {
 +        indent=pr_title(fp,indent,title);
 +        (void) pr_indent(fp,indent);
 +        (void) fprintf(fp,"name=\"%s\"\n",*(mtop->name));
 +        pr_int(fp,indent,"#atoms",mtop->natoms);
 +        for(mb=0; mb<mtop->nmolblock; mb++) {
 +            pr_molblock(fp,indent,"molblock",&mtop->molblock[mb],mb,
 +                        mtop->moltype,bShowNumbers);
 +        }
 +        pr_ffparams(fp,indent,"ffparams",&(mtop->ffparams),bShowNumbers);
 +        pr_atomtypes(fp,indent,"atomtypes",&(mtop->atomtypes),bShowNumbers);
 +        for(mt=0; mt<mtop->nmoltype; mt++) {
 +            pr_moltype(fp,indent,"moltype",&mtop->moltype[mt],mt,
 +                       &mtop->ffparams,bShowNumbers);
 +        }
 +        pr_groups(fp,indent,"groups",&mtop->groups,bShowNumbers);
 +    }
 +}
 +
 +void pr_top(FILE *fp,int indent,const char *title,t_topology *top, gmx_bool bShowNumbers)
 +{
 +  if (available(fp,top,indent,title)) {
 +    indent=pr_title(fp,indent,title);
 +    (void) pr_indent(fp,indent);
 +    (void) fprintf(fp,"name=\"%s\"\n",*(top->name));
 +    pr_atoms(fp,indent,"atoms",&(top->atoms),bShowNumbers);
 +    pr_atomtypes(fp,indent,"atomtypes",&(top->atomtypes),bShowNumbers);
 +    pr_block(fp,indent,"cgs",&top->cgs, bShowNumbers);
 +    pr_block(fp,indent,"mols",&top->mols, bShowNumbers);
 +    pr_blocka(fp,indent,"excls",&top->excls, bShowNumbers);
 +    pr_idef(fp,indent,"idef",&top->idef,bShowNumbers);
 +  }
 +}
 +
 +void pr_header(FILE *fp,int indent,const char *title,t_tpxheader *sh)
 +{
 +  char buf[22];
 +    
 +  if (available(fp,sh,indent,title))
 +    {
 +      indent=pr_title(fp,indent,title);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bIr    = %spresent\n",sh->bIr?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bBox   = %spresent\n",sh->bBox?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bTop   = %spresent\n",sh->bTop?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bX     = %spresent\n",sh->bX?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bV     = %spresent\n",sh->bV?"":"not ");
 +      pr_indent(fp,indent);
 +      fprintf(fp,"bF     = %spresent\n",sh->bF?"":"not ");
 +      
 +      pr_indent(fp,indent);
 +      fprintf(fp,"natoms = %d\n",sh->natoms);
 +      pr_indent(fp,indent);
 +      fprintf(fp,"lambda = %e\n",sh->lambda);
 +    }
 +}
 +
 +void pr_commrec(FILE *fp,int indent,t_commrec *cr)
 +{
 +  pr_indent(fp,indent);
 +  fprintf(fp,"commrec:\n");
 +  indent+=2;
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nodeid    = %d\n",cr->nodeid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nnodes    = %d\n",cr->nnodes);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"npmenodes = %d\n",cr->npmenodes);
 +  /*
 +  pr_indent(fp,indent);
 +  fprintf(fp,"threadid  = %d\n",cr->threadid);
 +  pr_indent(fp,indent);
 +  fprintf(fp,"nthreads  = %d\n",cr->nthreads);
 +  */
 +}
Simple merge
index 405036947b56d035fd695551f9c37c1bfa67945d,0000000000000000000000000000000000000000..0145da64a89d2c00631973de5eb4fa1180bce230
mode 100644,000000..100644
--- /dev/null
@@@ -1,230 -1,0 +1,211 @@@
- static void dump_h_db(const char *fn,int nah,t_hackblock *ah)
- {
-   FILE *fp;
-   char buf[STRLEN],nname[STRLEN];
-   int  i,j,k;
-   
-   sprintf(buf,"%s_new.hdb",fn);
-   fp = gmx_fio_fopen(buf,"w");
-   for(i=0; (i<nah); i++) {
-     fprintf(fp,"%-8s%-8d\n",ah[i].name,ah[i].nhack);
-     for(k=0; (k<ah[i].nhack); k++) {
-       strcpy(nname,ah[i].hack[k].a[0]);
-       nname[0] = 'H';
-       print_ab(fp,&ah[i].hack[k],nname);
-     }
-   }
-   gmx_fio_fclose(fp);
- }
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +/* This file is completely threadsafe - keep it that way! */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include "string2.h"
 +#include "sysstuff.h"
 +#include "smalloc.h"
 +#include "futil.h"
 +#include "symtab.h"
 +#include "h_db.h"
 +#include "gmxfio.h"
 +#include "fflibutil.h"
 +#include "gmx_fatal.h"
 +#include "macros.h"
 +
 +/* There are 11 types of adding hydrogens, numbered from
 + * 1 thru 11. Each of these has a specific number of
 + * control atoms, that determine how the hydrogens are added.
 + * Here these number are given. Because arrays start at 0 an
 + * extra dummy for index 0 is added 
 + */
 +const int ncontrol[] = { -1, 3, 3, 3, 3, 4, 3, 1, 3, 3, 1, 1 };
 +#define maxcontrol asize(ncontrol)
 +
 +int compaddh(const void *a,const void *b)
 +{
 +  t_hackblock *ah,*bh;
 +
 +  ah=(t_hackblock *)a;
 +  bh=(t_hackblock *)b;
 +  return gmx_strcasecmp(ah->name,bh->name);
 +}
 +
 +void print_ab(FILE *out,t_hack *hack,char *nname)
 +{
 +  int i;
 +
 +  fprintf(out,"%d\t%d\t%s",hack->nr,hack->tp,nname);
 +  for(i=0; (i < hack->nctl); i++)
 +    fprintf(out,"\t%s",hack->a[i]);
 +  fprintf(out,"\n");
 +}
 +
 +
 +void read_ab(char *line,const char *fn,t_hack *hack)
 +{
 +  int  i,nh,tp,ns;
 +  char a[4][12];
 +  char hn[32];
 +  
 +  ns = sscanf(line,"%d%d%s%s%s%s%s",&nh,&tp,hn,a[0],a[1],a[2],a[3]);
 +  if (ns < 4)
 +    gmx_fatal(FARGS,"wrong format in input file %s on line\n%s\n",fn,line);
 +  
 +  hack->nr=nh;
 +  hack->tp=tp;
 +  if ((tp < 1) || (tp >= maxcontrol)) 
 +    gmx_fatal(FARGS,"Error in hdb file %s:\nH-type should be in 1-%d. Offending line:\n%s",fn,maxcontrol-1,line);
 +  
 +  hack->nctl = ns - 3;
 +  if ((hack->nctl != ncontrol[hack->tp]) && (ncontrol[hack->tp] != -1))
 +    gmx_fatal(FARGS,"Error in hdb file %s:\nWrong number of control atoms (%d iso %d) on line:\n%s\n",fn,hack->nctl,ncontrol[hack->tp],line);
 +  for(i=0; (i<hack->nctl); i++) {
 +    hack->a[i]=strdup(a[i]);
 +  }
 +  for(   ; i<4; i++) {
 +    hack->a[i]=NULL;
 +  }
 +  hack->oname=NULL;
 +  hack->nname=strdup(hn);
 +  hack->atom=NULL;
 +  hack->cgnr=NOTSET;
 +  hack->bXSet=FALSE;
 +  for(i=0; i<DIM; i++)
 +    hack->newx[i]=NOTSET;
 +}
 +
 +static void read_h_db_file(const char *hfn,int *nahptr,t_hackblock **ah)
 +{     
 +  FILE   *in;
 +  char   filebase[STRLEN],line[STRLEN], buf[STRLEN];
 +  int    i, n, nab, nah;
 +  t_hackblock *aah;
 +
 +  if (debug) fprintf(debug,"Hydrogen Database (%s):\n",hfn);
 +
 +  fflib_filename_base(hfn,filebase,STRLEN);
 +  /* Currently filebase is read and set, but not used.
 +   * hdb entries from any hdb file and be applied to rtp entries
 +   * in any rtp file.
 +   */
 +
 +  in = fflib_open(hfn);
 +
 +  nah = *nahptr;
 +  aah = *ah;
 +  while (fgets2(line,STRLEN-1,in)) {
 +    if (sscanf(line,"%s%n",buf,&n) != 1) {
 +      fprintf(stderr,"Error in hdb file: nah = %d\nline = '%s'\n",
 +            nah,line);
 +      break;
 +    }
 +    if (debug) fprintf(debug,"%s",buf);
 +    srenew(aah,nah+1);
 +    clear_t_hackblock(&aah[nah]);
 +    aah[nah].name     = strdup(buf);
 +    aah[nah].filebase = strdup(filebase);
 +    
 +    if (sscanf(line+n,"%d",&nab) == 1) {
 +      if (debug) fprintf(debug,"  %d\n",nab);
 +      snew(aah[nah].hack,nab);
 +      aah[nah].nhack = nab;
 +      for(i=0; (i<nab); i++) {
 +      if (feof(in))
 +        gmx_fatal(FARGS, "Expected %d lines of hydrogens, found only %d "
 +                    "while reading Hydrogen Database %s residue %s",
 +                    nab, i-1, aah[nah].name, hfn);
 +      if(NULL==fgets(buf, STRLEN, in))
 +        {
 +        gmx_fatal(FARGS,"Error reading from file %s",hfn);
 +      }
 +      read_ab(buf,hfn,&(aah[nah].hack[i]));
 +      }
 +    }
 +    nah++;
 +  }
 +  ffclose(in);
 +  
 +  /* Sort the list (necessary to be able to use bsearch */
 +  qsort(aah,nah,(size_t)sizeof(**ah),compaddh);
 +
 +  /*
 +  if (debug)
 +    dump_h_db(hfn,nah,aah);
 +  */
 +  
 +  *nahptr = nah;
 +  *ah     = aah;
 +}
 +
 +int read_h_db(const char *ffdir,t_hackblock **ah)
 +{
 +  int  nhdbf,f;
 +  char **hdbf;
 +  int  nah;
 +  FILE *fp;
 +
 +  /* Read the hydrogen database file(s).
 +   * Do not generate an error when no files are found.
 +   */
 +  nhdbf = fflib_search_file_end(ffdir,".hdb",FALSE,&hdbf);
 +  nah = 0;
 +  *ah = NULL;
 +  for(f=0; f<nhdbf; f++) {
 +    read_h_db_file(hdbf[f],&nah,ah);
 +    sfree(hdbf[f]);
 +  }
 +  sfree(hdbf);
 +
 +  return nah;
 +}
 +
 +t_hackblock *search_h_db(int nh,t_hackblock ah[],char *key)
 +{
 +  t_hackblock ahkey,*result;
 +
 +  if (nh <= 0)
 +    return NULL;
 +  
 +  ahkey.name=key;
 +
 +  result=(t_hackblock *)bsearch(&ahkey,ah,nh,(size_t)sizeof(ah[0]),compaddh);
 +  
 +  return result;
 +}
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index c4a83b8dc01f5c666d5fbfd2bfd9a013b617fbfe,0000000000000000000000000000000000000000..cbd9096c4fa09312faba899ea54e6f65cb3117fc
mode 100644,000000..100644
--- /dev/null
@@@ -1,1149 -1,0 +1,1149 @@@
-             constr->shaked = shake_init();
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "confio.h"
 +#include "constr.h"
 +#include "copyrite.h"
 +#include "invblock.h"
 +#include "main.h"
 +#include "mdrun.h"
 +#include "nrnb.h"
 +#include "smalloc.h"
 +#include "vec.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "txtdump.h"
 +#include "domdec.h"
 +#include "pdbio.h"
 +#include "partdec.h"
 +#include "splitter.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "macros.h"
 +
 +typedef struct gmx_constr {
 +  int              ncon_tot;     /* The total number of constraints    */
 +  int              nflexcon;     /* The number of flexible constraints */
 +  int              n_at2con_mt;  /* The size of at2con = #moltypes     */
 +  t_blocka         *at2con_mt;   /* A list of atoms to constraints     */
 +  gmx_lincsdata_t  lincsd;       /* LINCS data                         */
 +  gmx_shakedata_t  shaked;       /* SHAKE data                         */
 +  gmx_settledata_t settled;      /* SETTLE data                        */
 +  int              nblocks;      /* The number of SHAKE blocks         */
 +  int              *sblock;      /* The SHAKE blocks                   */
 +  int              sblock_nalloc;/* The allocation size of sblock      */
 +  real             *lagr;        /* Lagrange multipliers for SHAKE     */
 +  int              lagr_nalloc;  /* The allocation size of lagr        */
 +  int              maxwarn;      /* The maximum number of warnings     */
 +  int              warncount_lincs;
 +  int              warncount_settle;
 +  gmx_edsam_t      ed;           /* The essential dynamics data        */
 +
 +  gmx_mtop_t       *warn_mtop;   /* Only used for printing warnings    */
 +} t_gmx_constr;
 +
 +typedef struct {
 +  atom_id iatom[3];
 +  atom_id blocknr;
 +} t_sortblock;
 +
 +static void *init_vetavars(t_vetavars *vars,
 +                           gmx_bool constr_deriv,
 +                           real veta,real vetanew, t_inputrec *ir, gmx_ekindata_t *ekind, gmx_bool bPscal) 
 +{
 +    double g;
 +    int i;
 +
 +    /* first, set the alpha integrator variable */
 +    if ((ir->opts.nrdf[0] > 0) && bPscal) 
 +    {
 +        vars->alpha = 1.0 + DIM/((double)ir->opts.nrdf[0]);  
 +    } else {
 +        vars->alpha = 1.0;
 +    }
 +    g = 0.5*veta*ir->delta_t;
 +    vars->rscale = exp(g)*series_sinhx(g);
 +    g = -0.25*vars->alpha*veta*ir->delta_t;
 +    vars->vscale = exp(g)*series_sinhx(g);
 +    vars->rvscale = vars->vscale*vars->rscale;
 +    vars->veta = vetanew;
 +
 +    if (constr_deriv)
 +    {
 +        snew(vars->vscale_nhc,ir->opts.ngtc);
 +        if ((ekind==NULL) || (!bPscal))
 +        {
 +            for (i=0;i<ir->opts.ngtc;i++)
 +            {
 +                vars->vscale_nhc[i] = 1;
 +            }
 +        }
 +        else
 +        {
 +            for (i=0;i<ir->opts.ngtc;i++)
 +            {
 +                vars->vscale_nhc[i] = ekind->tcstat[i].vscale_nhc;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        vars->vscale_nhc = NULL;
 +    }
 +
 +    return vars;
 +}
 +
 +static void free_vetavars(t_vetavars *vars) 
 +{
 +    if (vars->vscale_nhc != NULL)
 +    {
 +        sfree(vars->vscale_nhc);
 +    }
 +}
 +
 +static int pcomp(const void *p1, const void *p2)
 +{
 +  int     db;
 +  atom_id min1,min2,max1,max2;
 +  t_sortblock *a1=(t_sortblock *)p1;
 +  t_sortblock *a2=(t_sortblock *)p2;
 +  
 +  db=a1->blocknr-a2->blocknr;
 +  
 +  if (db != 0)
 +    return db;
 +    
 +  min1=min(a1->iatom[1],a1->iatom[2]);
 +  max1=max(a1->iatom[1],a1->iatom[2]);
 +  min2=min(a2->iatom[1],a2->iatom[2]);
 +  max2=max(a2->iatom[1],a2->iatom[2]);
 +  
 +  if (min1 == min2)
 +    return max1-max2;
 +  else
 +    return min1-min2;
 +}
 +
 +static int icomp(const void *p1, const void *p2)
 +{
 +  atom_id *a1=(atom_id *)p1;
 +  atom_id *a2=(atom_id *)p2;
 +
 +  return (*a1)-(*a2);
 +}
 +
 +int n_flexible_constraints(struct gmx_constr *constr)
 +{
 +  int nflexcon;
 +
 +  if (constr)
 +    nflexcon = constr->nflexcon;
 +  else
 +    nflexcon = 0;
 +
 +  return nflexcon;
 +}
 +
 +void too_many_constraint_warnings(int eConstrAlg,int warncount)
 +{
 +  const char *abort="- aborting to avoid logfile runaway.\n"
 +    "This normally happens when your system is not sufficiently equilibrated,"
 +    "or if you are changing lambda too fast in free energy simulations.\n";
 +  
 +  gmx_fatal(FARGS,
 +          "Too many %s warnings (%d)\n"
 +          "If you know what you are doing you can %s"
 +          "set the environment variable GMX_MAXCONSTRWARN to -1,\n"
 +          "but normally it is better to fix the problem",
 +          (eConstrAlg == econtLINCS) ? "LINCS" : "SETTLE",warncount,
 +          (eConstrAlg == econtLINCS) ?
 +          "adjust the lincs warning threshold in your mdp file\nor " : "\n");
 +}
 +
 +static void write_constr_pdb(const char *fn,const char *title,
 +                             gmx_mtop_t *mtop,
 +                             int start,int homenr,t_commrec *cr,
 +                             rvec x[],matrix box)
 +{
 +    char fname[STRLEN],format[STRLEN];
 +    FILE *out;
 +    int  dd_ac0=0,dd_ac1=0,i,ii,resnr;
 +    gmx_domdec_t *dd;
 +    char *anm,*resnm;
 +  
 +    dd = NULL;
 +    if (PAR(cr))
 +    {
 +        sprintf(fname,"%s_n%d.pdb",fn,cr->sim_nodeid);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd = cr->dd;
 +            dd_get_constraint_range(dd,&dd_ac0,&dd_ac1);
 +            start = 0;
 +            homenr = dd_ac1;
 +        }
 +    }
 +    else
 +    {
 +        sprintf(fname,"%s.pdb",fn);
 +    }
 +    sprintf(format,"%s\n",get_pdbformat());
 +    
 +    out = gmx_fio_fopen(fname,"w");
 +    
 +    fprintf(out,"TITLE     %s\n",title);
 +    gmx_write_pdb_box(out,-1,box);
 +    for(i=start; i<start+homenr; i++)
 +    {
 +        if (dd != NULL)
 +        {
 +            if (i >= dd->nat_home && i < dd_ac0)
 +            {
 +                continue;
 +            }
 +            ii = dd->gatindex[i];
 +        }
 +        else
 +        {
 +            ii = i;
 +        }
 +        gmx_mtop_atominfo_global(mtop,ii,&anm,&resnr,&resnm);
 +        fprintf(out,format,"ATOM",(ii+1)%100000,
 +                anm,resnm,' ',resnr%10000,' ',
 +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ]);
 +    }
 +    fprintf(out,"TER\n");
 +
 +    gmx_fio_fclose(out);
 +}
 +                           
 +static void dump_confs(FILE *fplog,gmx_large_int_t step,gmx_mtop_t *mtop,
 +                     int start,int homenr,t_commrec *cr,
 +                     rvec x[],rvec xprime[],matrix box)
 +{
 +  char buf[256],buf2[22];
 + 
 +  char *env=getenv("GMX_SUPPRESS_DUMP");
 +  if (env)
 +      return; 
 +  
 +  sprintf(buf,"step%sb",gmx_step_str(step,buf2));
 +  write_constr_pdb(buf,"initial coordinates",
 +                 mtop,start,homenr,cr,x,box);
 +  sprintf(buf,"step%sc",gmx_step_str(step,buf2));
 +  write_constr_pdb(buf,"coordinates after constraining",
 +                 mtop,start,homenr,cr,xprime,box);
 +  if (fplog)
 +  {
 +      fprintf(fplog,"Wrote pdb files with previous and current coordinates\n");
 +  }
 +  fprintf(stderr,"Wrote pdb files with previous and current coordinates\n");
 +}
 +
 +static void pr_sortblock(FILE *fp,const char *title,int nsb,t_sortblock sb[])
 +{
 +  int i;
 +  
 +  fprintf(fp,"%s\n",title);
 +  for(i=0; (i<nsb); i++)
 +    fprintf(fp,"i: %5d, iatom: (%5d %5d %5d), blocknr: %5d\n",
 +          i,sb[i].iatom[0],sb[i].iatom[1],sb[i].iatom[2],
 +          sb[i].blocknr);
 +}
 +
 +gmx_bool constrain(FILE *fplog,gmx_bool bLog,gmx_bool bEner,
 +               struct gmx_constr *constr,
 +               t_idef *idef,t_inputrec *ir,gmx_ekindata_t *ekind,
 +               t_commrec *cr,
 +               gmx_large_int_t step,int delta_step,
 +               t_mdatoms *md,
 +               rvec *x,rvec *xprime,rvec *min_proj,matrix box,
 +               real lambda,real *dvdlambda,
 +               rvec *v,tensor *vir,
 +               t_nrnb *nrnb,int econq,gmx_bool bPscal,real veta, real vetanew)
 +{
 +    gmx_bool    bOK,bDump;
 +    int     start,homenr,nrend;
 +    int     i,j,d;
 +    int     ncons,error;
 +    tensor  rmdr;
 +    rvec    *vstor;
 +    real    invdt,vir_fac,t;
 +    t_ilist *settle;
 +    int     nsettle;
 +    t_pbc   pbc;
 +    char    buf[22];
 +    t_vetavars vetavar;
 +
 +    if (econq == econqForceDispl && !EI_ENERGY_MINIMIZATION(ir->eI))
 +    {
 +        gmx_incons("constrain called for forces displacements while not doing energy minimization, can not do this while the LINCS and SETTLE constraint connection matrices are mass weighted");
 +    }
 +    
 +    bOK   = TRUE;
 +    bDump = FALSE;
 +    
 +    start  = md->start;
 +    homenr = md->homenr;
 +    nrend = start+homenr;
 +
 +    /* set constants for pressure control integration */ 
 +    init_vetavars(&vetavar,econq!=econqCoord,
 +                  veta,vetanew,ir,ekind,bPscal);
 +
 +    if (ir->delta_t == 0)
 +    {
 +        invdt = 0;
 +    }
 +    else
 +    {
 +        invdt  = 1/ir->delta_t;
 +    }
 +
 +    if (ir->efep != efepNO && EI_DYNAMICS(ir->eI))
 +    {
 +        /* Set the constraint lengths for the step at which this configuration
 +         * is meant to be. The invmasses should not be changed.
 +         */
 +        lambda += delta_step*ir->delta_lambda;
 +    }
 +    
 +    if (vir != NULL)
 +    {
 +        clear_mat(rmdr);
 +    }
 +    
 +    where();
 +    if (constr->lincsd)
 +    {
 +        bOK = constrain_lincs(fplog,bLog,bEner,ir,step,constr->lincsd,md,cr,
 +                              x,xprime,min_proj,box,lambda,dvdlambda,
 +                              invdt,v,vir!=NULL,rmdr,
 +                              econq,nrnb,
 +                              constr->maxwarn,&constr->warncount_lincs);
 +        if (!bOK && constr->maxwarn >= 0)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
 +                        econstr_names[econtLINCS],gmx_step_str(step,buf));
 +            }
 +            bDump = TRUE;
 +        }
 +    } 
 +    
 +    if (constr->nblocks > 0)
 +    {
 +        switch (econq) {
 +        case (econqCoord):
 +            bOK = bshakef(fplog,constr->shaked,
 +                          homenr,md->invmass,constr->nblocks,constr->sblock,
 +                          idef,ir,box,x,xprime,nrnb,
 +                          constr->lagr,lambda,dvdlambda,
 +                          invdt,v,vir!=NULL,rmdr,constr->maxwarn>=0,econq,
 +                          &vetavar);
 +            break;
 +        case (econqVeloc):
 +            bOK = bshakef(fplog,constr->shaked,
 +                          homenr,md->invmass,constr->nblocks,constr->sblock,
 +                          idef,ir,box,x,min_proj,nrnb,
 +                          constr->lagr,lambda,dvdlambda,
 +                          invdt,NULL,vir!=NULL,rmdr,constr->maxwarn>=0,econq,
 +                          &vetavar);
 +            break;
 +        default:
 +            gmx_fatal(FARGS,"Internal error, SHAKE called for constraining something else than coordinates");
 +            break;
 +        }
 +
 +        if (!bOK && constr->maxwarn >= 0)
 +        {
 +            if (fplog != NULL)
 +            {
 +                fprintf(fplog,"Constraint error in algorithm %s at step %s\n",
 +                        econstr_names[econtSHAKE],gmx_step_str(step,buf));
 +            }
 +            bDump = TRUE;
 +        }
 +    }
 +        
 +    settle  = &idef->il[F_SETTLE];
 +    if (settle->nr > 0)
 +    {
 +        nsettle = settle->nr/2;
 +        
 +        switch (econq)
 +        {
 +        case econqCoord:
 +            csettle(constr->settled,
 +                    nsettle,settle->iatoms,x[0],xprime[0],
 +                    invdt,v[0],vir!=NULL,rmdr,&error,&vetavar);
 +            inc_nrnb(nrnb,eNR_SETTLE,nsettle);
 +            if (v != NULL)
 +            {
 +                inc_nrnb(nrnb,eNR_CONSTR_V,nsettle*3);
 +            }
 +            if (vir != NULL)
 +            {
 +                inc_nrnb(nrnb,eNR_CONSTR_VIR,nsettle*3);
 +            }
 +            
 +            bOK = (error < 0);
 +            if (!bOK && constr->maxwarn >= 0)
 +            {
 +                char buf[256];
 +                sprintf(buf,
 +                        "\nstep " gmx_large_int_pfmt ": Water molecule starting at atom %d can not be "
 +                        "settled.\nCheck for bad contacts and/or reduce the timestep if appropriate.\n",
 +                        step,ddglatnr(cr->dd,settle->iatoms[error*2+1]));
 +                if (fplog)
 +                {
 +                    fprintf(fplog,"%s",buf);
 +                }
 +                fprintf(stderr,"%s",buf);
 +                constr->warncount_settle++;
 +                if (constr->warncount_settle > constr->maxwarn)
 +                {
 +                    too_many_constraint_warnings(-1,constr->warncount_settle);
 +                }
 +                bDump = TRUE;
 +                break;
 +            case econqVeloc:
 +            case econqDeriv:
 +            case econqForce:
 +            case econqForceDispl:
 +                settle_proj(fplog,constr->settled,econq,
 +                            nsettle,settle->iatoms,x,
 +                            xprime,min_proj,vir!=NULL,rmdr,&vetavar);
 +                /* This is an overestimate */
 +                inc_nrnb(nrnb,eNR_SETTLE,nsettle);
 +                break;
 +            case econqDeriv_FlexCon:
 +                /* Nothing to do, since the are no flexible constraints in settles */
 +                break;
 +            default:
 +                gmx_incons("Unknown constraint quantity for settle");
 +            }
 +        }
 +    }
 +
 +    free_vetavars(&vetavar);
 +    
 +    if (vir != NULL)
 +    {
 +        switch (econq)
 +        {
 +        case econqCoord:
 +            vir_fac = 0.5/(ir->delta_t*ir->delta_t);
 +            break;
 +        case econqVeloc:
 +            vir_fac = 0.5/ir->delta_t;
 +            break;
 +        case econqForce:
 +        case econqForceDispl:
 +            vir_fac = 0.5;
 +            break;
 +        default:
 +            vir_fac = 0;
 +            gmx_incons("Unsupported constraint quantity for virial");
 +        }
 +        
 +        if (EI_VV(ir->eI))
 +        {
 +            vir_fac *= 2;  /* only constraining over half the distance here */
 +        }
 +        for(i=0; i<DIM; i++)
 +        {
 +            for(j=0; j<DIM; j++)
 +            {
 +                (*vir)[i][j] = vir_fac*rmdr[i][j];
 +            }
 +        }
 +    }
 +    
 +    if (bDump)
 +    {
 +        dump_confs(fplog,step,constr->warn_mtop,start,homenr,cr,x,xprime,box);
 +    }
 +    
 +    if (econq == econqCoord)
 +    {
 +        if (ir->ePull == epullCONSTRAINT)
 +        {
 +            if (EI_DYNAMICS(ir->eI))
 +            {
 +                t = ir->init_t + (step + delta_step)*ir->delta_t;
 +            }
 +            else
 +            {
 +                t = ir->init_t;
 +            }
 +            set_pbc(&pbc,ir->ePBC,box);
 +            pull_constraint(ir->pull,md,&pbc,cr,ir->delta_t,t,x,xprime,v,*vir);
 +        }
 +        if (constr->ed && delta_step > 0)
 +        {
 +            /* apply the essential dynamcs constraints here */
 +            do_edsam(ir,step,md,cr,xprime,v,box,constr->ed);
 +        }
 +    }
 +    
 +    return bOK;
 +}
 +
 +real *constr_rmsd_data(struct gmx_constr *constr)
 +{
 +  if (constr->lincsd)
 +    return lincs_rmsd_data(constr->lincsd);
 +  else
 +    return NULL;
 +}
 +
 +real constr_rmsd(struct gmx_constr *constr,gmx_bool bSD2)
 +{
 +  if (constr->lincsd)
 +    return lincs_rmsd(constr->lincsd,bSD2);
 +  else
 +    return 0;
 +}
 +
 +static void make_shake_sblock_pd(struct gmx_constr *constr,
 +                               t_idef *idef,t_mdatoms *md)
 +{
 +  int  i,j,m,ncons;
 +  int  bstart,bnr;
 +  t_blocka    sblocks;
 +  t_sortblock *sb;
 +  t_iatom     *iatom;
 +  atom_id     *inv_sblock;
 +
 +  /* Since we are processing the local topology,
 +   * the F_CONSTRNC ilist has been concatenated to the F_CONSTR ilist.
 +   */
 +  ncons = idef->il[F_CONSTR].nr/3;
 +
 +  init_blocka(&sblocks);
 +  gen_sblocks(NULL,md->start,md->start+md->homenr,idef,&sblocks,FALSE);
 +  
 +  /*
 +    bstart=(idef->nodeid > 0) ? blocks->multinr[idef->nodeid-1] : 0;
 +    nblocks=blocks->multinr[idef->nodeid] - bstart;
 +  */
 +  bstart  = 0;
 +  constr->nblocks = sblocks.nr;
 +  if (debug) 
 +    fprintf(debug,"ncons: %d, bstart: %d, nblocks: %d\n",
 +          ncons,bstart,constr->nblocks);
 +  
 +  /* Calculate block number for each atom */
 +  inv_sblock = make_invblocka(&sblocks,md->nr);
 +  
 +  done_blocka(&sblocks);
 +  
 +  /* Store the block number in temp array and
 +   * sort the constraints in order of the sblock number 
 +   * and the atom numbers, really sorting a segment of the array!
 +   */
 +#ifdef DEBUGIDEF 
 +  pr_idef(fplog,0,"Before Sort",idef);
 +#endif
 +  iatom=idef->il[F_CONSTR].iatoms;
 +  snew(sb,ncons);
 +  for(i=0; (i<ncons); i++,iatom+=3) {
 +    for(m=0; (m<3); m++)
 +      sb[i].iatom[m] = iatom[m];
 +    sb[i].blocknr = inv_sblock[iatom[1]];
 +  }
 +  
 +  /* Now sort the blocks */
 +  if (debug) {
 +    pr_sortblock(debug,"Before sorting",ncons,sb);
 +    fprintf(debug,"Going to sort constraints\n");
 +  }
 +  
 +  qsort(sb,ncons,(size_t)sizeof(*sb),pcomp);
 +  
 +  if (debug) {
 +    pr_sortblock(debug,"After sorting",ncons,sb);
 +  }
 +  
 +  iatom=idef->il[F_CONSTR].iatoms;
 +  for(i=0; (i<ncons); i++,iatom+=3) 
 +    for(m=0; (m<3); m++)
 +      iatom[m]=sb[i].iatom[m];
 +#ifdef DEBUGIDEF
 +  pr_idef(fplog,0,"After Sort",idef);
 +#endif
 +  
 +  j=0;
 +  snew(constr->sblock,constr->nblocks+1);
 +  bnr=-2;
 +  for(i=0; (i<ncons); i++) {
 +    if (sb[i].blocknr != bnr) {
 +      bnr=sb[i].blocknr;
 +      constr->sblock[j++]=3*i;
 +    }
 +  }
 +  /* Last block... */
 +  constr->sblock[j++] = 3*ncons;
 +  
 +  if (j != (constr->nblocks+1)) {
 +    fprintf(stderr,"bstart: %d\n",bstart);
 +    fprintf(stderr,"j: %d, nblocks: %d, ncons: %d\n",
 +          j,constr->nblocks,ncons);
 +    for(i=0; (i<ncons); i++)
 +      fprintf(stderr,"i: %5d  sb[i].blocknr: %5u\n",i,sb[i].blocknr);
 +    for(j=0; (j<=constr->nblocks); j++)
 +      fprintf(stderr,"sblock[%3d]=%5d\n",j,(int)constr->sblock[j]);
 +    gmx_fatal(FARGS,"DEATH HORROR: "
 +            "sblocks does not match idef->il[F_CONSTR]");
 +  }
 +  sfree(sb);
 +  sfree(inv_sblock);
 +}
 +
 +static void make_shake_sblock_dd(struct gmx_constr *constr,
 +                               t_ilist *ilcon,t_block *cgs,
 +                               gmx_domdec_t *dd)
 +{
 +  int ncons,c,cg;
 +  t_iatom *iatom;
 +
 +  if (dd->ncg_home+1 > constr->sblock_nalloc) {
 +    constr->sblock_nalloc = over_alloc_dd(dd->ncg_home+1);
 +    srenew(constr->sblock,constr->sblock_nalloc);
 +  }
 +  
 +  ncons = ilcon->nr/3;
 +  iatom = ilcon->iatoms;
 +  constr->nblocks = 0;
 +  cg = 0;
 +  for(c=0; c<ncons; c++) {
 +    if (c == 0 || iatom[1] >= cgs->index[cg+1]) {
 +      constr->sblock[constr->nblocks++] = 3*c;
 +      while (iatom[1] >= cgs->index[cg+1])
 +      cg++;
 +    }
 +    iatom += 3;
 +  }
 +  constr->sblock[constr->nblocks] = 3*ncons;
 +}
 +
 +t_blocka make_at2con(int start,int natoms,
 +                   t_ilist *ilist,t_iparams *iparams,
 +                   gmx_bool bDynamics,int *nflexiblecons)
 +{
 +  int *count,ncon,con,con_tot,nflexcon,ftype,i,a;
 +  t_iatom  *ia;
 +  t_blocka at2con;
 +  gmx_bool bFlexCon;
 +  
 +  snew(count,natoms);
 +  nflexcon = 0;
 +  for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +    ncon = ilist[ftype].nr/3;
 +    ia   = ilist[ftype].iatoms;
 +    for(con=0; con<ncon; con++) {
 +      bFlexCon = (iparams[ia[0]].constr.dA == 0 &&
 +                iparams[ia[0]].constr.dB == 0);
 +      if (bFlexCon)
 +      nflexcon++;
 +      if (bDynamics || !bFlexCon) {
 +      for(i=1; i<3; i++) {
 +        a = ia[i] - start;
 +        count[a]++;
 +      }
 +      }
 +      ia += 3;
 +    }
 +  }
 +  *nflexiblecons = nflexcon;
 +
 +  at2con.nr = natoms;
 +  at2con.nalloc_index = at2con.nr+1;
 +  snew(at2con.index,at2con.nalloc_index);
 +  at2con.index[0] = 0;
 +  for(a=0; a<natoms; a++) {
 +    at2con.index[a+1] = at2con.index[a] + count[a];
 +    count[a] = 0;
 +  }
 +  at2con.nra = at2con.index[natoms];
 +  at2con.nalloc_a = at2con.nra;
 +  snew(at2con.a,at2con.nalloc_a);
 +
 +  /* The F_CONSTRNC constraints have constraint numbers
 +   * that continue after the last F_CONSTR constraint.
 +   */
 +  con_tot = 0;
 +  for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +    ncon = ilist[ftype].nr/3;
 +    ia   = ilist[ftype].iatoms;
 +    for(con=0; con<ncon; con++) {
 +      bFlexCon = (iparams[ia[0]].constr.dA == 0 &&
 +                iparams[ia[0]].constr.dB == 0);
 +      if (bDynamics || !bFlexCon) {
 +      for(i=1; i<3; i++) {
 +        a = ia[i] - start;
 +        at2con.a[at2con.index[a]+count[a]++] = con_tot;
 +      }
 +      }
 +      con_tot++;
 +      ia += 3;
 +    }
 +  }
 +  
 +  sfree(count);
 +
 +  return at2con;
 +}
 +
 +void set_constraints(struct gmx_constr *constr,
 +                     gmx_localtop_t *top,t_inputrec *ir,
 +                     t_mdatoms *md,t_commrec *cr)
 +{
 +    t_idef *idef;
 +    int    ncons;
 +    t_ilist *settle;
 +    int    iO,iH;
 +    
 +    idef = &top->idef;
 +       
 +    if (constr->ncon_tot > 0)
 +    {
 +        /* We are using the local topology,
 +         * so there are only F_CONSTR constraints.
 +         */
 +        ncons = idef->il[F_CONSTR].nr/3;
 +        
 +        /* With DD we might also need to call LINCS with ncons=0 for
 +         * communicating coordinates to other nodes that do have constraints.
 +         */
 +        if (ir->eConstrAlg == econtLINCS)
 +        {
 +            set_lincs(idef,md,EI_DYNAMICS(ir->eI),cr,constr->lincsd);
 +        }
 +        if (ir->eConstrAlg == econtSHAKE)
 +        {
 +            if (cr->dd)
 +            {
 +                make_shake_sblock_dd(constr,&idef->il[F_CONSTR],&top->cgs,cr->dd);
 +            }
 +            else
 +            {
 +                make_shake_sblock_pd(constr,idef,md);
 +            }
 +            if (ncons > constr->lagr_nalloc)
 +            {
 +                constr->lagr_nalloc = over_alloc_dd(ncons);
 +                srenew(constr->lagr,constr->lagr_nalloc);
 +            }
 +        }
 +    }
 +
 +    if (idef->il[F_SETTLE].nr > 0 && constr->settled == NULL)
 +    {
 +        settle = &idef->il[F_SETTLE];
 +        iO = settle->iatoms[1];
 +        iH = settle->iatoms[1]+1;
 +        constr->settled =
 +            settle_init(md->massT[iO],md->massT[iH],
 +                        md->invmass[iO],md->invmass[iH],
 +                        idef->iparams[settle->iatoms[0]].settle.doh,
 +                        idef->iparams[settle->iatoms[0]].settle.dhh);
 +    }
 +    
 +    /* Make a selection of the local atoms for essential dynamics */
 +    if (constr->ed && cr->dd)
 +    {
 +        dd_make_local_ed_indices(cr->dd,constr->ed);
 +    }
 +}
 +
 +static void constr_recur(t_blocka *at2con,
 +                       t_ilist *ilist,t_iparams *iparams,gmx_bool bTopB,
 +                       int at,int depth,int nc,int *path,
 +                       real r0,real r1,real *r2max,
 +                       int *count)
 +{
 +  int  ncon1;
 +  t_iatom *ia1,*ia2;
 +  int  c,con,a1;
 +  gmx_bool bUse;
 +  t_iatom *ia;
 +  real len,rn0,rn1;
 +
 +  (*count)++;
 +
 +  ncon1 = ilist[F_CONSTR].nr/3;
 +  ia1   = ilist[F_CONSTR].iatoms;
 +  ia2   = ilist[F_CONSTRNC].iatoms;
 +
 +  /* Loop over all constraints connected to this atom */
 +  for(c=at2con->index[at]; c<at2con->index[at+1]; c++) {
 +    con = at2con->a[c];
 +    /* Do not walk over already used constraints */
 +    bUse = TRUE;
 +    for(a1=0; a1<depth; a1++) {
 +      if (con == path[a1])
 +      bUse = FALSE;
 +    }
 +    if (bUse) {
 +      ia = constr_iatomptr(ncon1,ia1,ia2,con);
 +      /* Flexible constraints currently have length 0, which is incorrect */
 +      if (!bTopB)
 +      len = iparams[ia[0]].constr.dA;
 +      else
 +      len = iparams[ia[0]].constr.dB;
 +      /* In the worst case the bond directions alternate */
 +      if (nc % 2 == 0) {
 +      rn0 = r0 + len;
 +      rn1 = r1;
 +      } else {
 +      rn0 = r0;
 +      rn1 = r1 + len;
 +      }
 +      /* Assume angles of 120 degrees between all bonds */
 +      if (rn0*rn0 + rn1*rn1 + rn0*rn1 > *r2max) {
 +      *r2max = rn0*rn0 + rn1*rn1 + r0*rn1;
 +      if (debug) {
 +        fprintf(debug,"Found longer constraint distance: r0 %5.3f r1 %5.3f rmax %5.3f\n", rn0,rn1,sqrt(*r2max));
 +        for(a1=0; a1<depth; a1++)
 +          fprintf(debug," %d %5.3f",
 +                  path[a1],
 +                  iparams[constr_iatomptr(ncon1,ia1,ia2,con)[0]].constr.dA);
 +        fprintf(debug," %d %5.3f\n",con,len);
 +      }
 +      }
 +      /* Limit the number of recursions to 1000*nc,
 +       * so a call does not take more than a second,
 +       * even for highly connected systems.
 +       */
 +      if (depth + 1 < nc && *count < 1000*nc) {
 +      if (ia[1] == at)
 +        a1 = ia[2];
 +      else
 +        a1 = ia[1];
 +      /* Recursion */
 +      path[depth] = con;
 +      constr_recur(at2con,ilist,iparams,
 +                   bTopB,a1,depth+1,nc,path,rn0,rn1,r2max,count);
 +      path[depth] = -1;
 +      }
 +    }
 +  }
 +}
 +
 +static real constr_r_max_moltype(FILE *fplog,
 +                               gmx_moltype_t *molt,t_iparams *iparams,
 +                               t_inputrec *ir)
 +{
 +  int natoms,nflexcon,*path,at,count;
 +
 +  t_blocka at2con;
 +  real r0,r1,r2maxA,r2maxB,rmax,lam0,lam1;
 +
 +  if (molt->ilist[F_CONSTR].nr   == 0 &&
 +      molt->ilist[F_CONSTRNC].nr == 0) {
 +    return 0;
 +  }
 +  
 +  natoms = molt->atoms.nr;
 +
 +  at2con = make_at2con(0,natoms,molt->ilist,iparams,
 +                     EI_DYNAMICS(ir->eI),&nflexcon);
 +  snew(path,1+ir->nProjOrder);
 +  for(at=0; at<1+ir->nProjOrder; at++)
 +    path[at] = -1;
 +
 +  r2maxA = 0;
 +  for(at=0; at<natoms; at++) {
 +    r0 = 0;
 +    r1 = 0;
 +
 +    count = 0;
 +    constr_recur(&at2con,molt->ilist,iparams,
 +               FALSE,at,0,1+ir->nProjOrder,path,r0,r1,&r2maxA,&count);
 +  }
 +  if (ir->efep == efepNO) {
 +    rmax = sqrt(r2maxA);
 +  } else {
 +    r2maxB = 0;
 +    for(at=0; at<natoms; at++) {
 +      r0 = 0;
 +      r1 = 0;
 +      count = 0;
 +      constr_recur(&at2con,molt->ilist,iparams,
 +                 TRUE,at,0,1+ir->nProjOrder,path,r0,r1,&r2maxB,&count);
 +    }
 +    lam0 = ir->init_lambda;
 +    if (EI_DYNAMICS(ir->eI))
 +      lam0 += ir->init_step*ir->delta_lambda;
 +    rmax = (1 - lam0)*sqrt(r2maxA) + lam0*sqrt(r2maxB);
 +    if (EI_DYNAMICS(ir->eI)) {
 +      lam1 = ir->init_lambda + (ir->init_step + ir->nsteps)*ir->delta_lambda;
 +      rmax = max(rmax,(1 - lam1)*sqrt(r2maxA) + lam1*sqrt(r2maxB));
 +    }
 +  }
 +
 +  done_blocka(&at2con);
 +  sfree(path);
 +
 +  return rmax;
 +}
 +
 +real constr_r_max(FILE *fplog,gmx_mtop_t *mtop,t_inputrec *ir)
 +{
 +  int mt;
 +  real rmax;
 +
 +  rmax = 0;
 +  for(mt=0; mt<mtop->nmoltype; mt++) {
 +    rmax = max(rmax,
 +             constr_r_max_moltype(fplog,&mtop->moltype[mt],
 +                                  mtop->ffparams.iparams,ir));
 +  }
 +  
 +  if (fplog)
 +    fprintf(fplog,"Maximum distance for %d constraints, at 120 deg. angles, all-trans: %.3f nm\n",1+ir->nProjOrder,rmax);
 +
 +  return rmax;
 +}
 +
 +gmx_constr_t init_constraints(FILE *fplog,
 +                              gmx_mtop_t *mtop,t_inputrec *ir,
 +                              gmx_edsam_t ed,t_state *state,
 +                              t_commrec *cr)
 +{
 +    int  ncon,nset,nmol,settle_type,i,natoms,mt,nflexcon;
 +    struct gmx_constr *constr;
 +    char *env;
 +    t_ilist *ilist;
 +    gmx_mtop_ilistloop_t iloop;
 +    
 +    ncon =
 +        gmx_mtop_ftype_count(mtop,F_CONSTR) +
 +        gmx_mtop_ftype_count(mtop,F_CONSTRNC);
 +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
 +    
 +    if (ncon+nset == 0 && ir->ePull != epullCONSTRAINT && ed == NULL) 
 +    {
 +        return NULL;
 +    }
 +    
 +    snew(constr,1);
 +    
 +    constr->ncon_tot = ncon;
 +    constr->nflexcon = 0;
 +    if (ncon > 0) 
 +    {
 +        constr->n_at2con_mt = mtop->nmoltype;
 +        snew(constr->at2con_mt,constr->n_at2con_mt);
 +        for(mt=0; mt<mtop->nmoltype; mt++) 
 +        {
 +            constr->at2con_mt[mt] = make_at2con(0,mtop->moltype[mt].atoms.nr,
 +                                                mtop->moltype[mt].ilist,
 +                                                mtop->ffparams.iparams,
 +                                                EI_DYNAMICS(ir->eI),&nflexcon);
 +            for(i=0; i<mtop->nmolblock; i++) 
 +            {
 +                if (mtop->molblock[i].type == mt) 
 +                {
 +                    constr->nflexcon += mtop->molblock[i].nmol*nflexcon;
 +                }
 +            }
 +        }
 +        
 +        if (constr->nflexcon > 0) 
 +        {
 +            if (fplog) 
 +            {
 +                fprintf(fplog,"There are %d flexible constraints\n",
 +                        constr->nflexcon);
 +                if (ir->fc_stepsize == 0) 
 +                {
 +                    fprintf(fplog,"\n"
 +                            "WARNING: step size for flexible constraining = 0\n"
 +                            "         All flexible constraints will be rigid.\n"
 +                            "         Will try to keep all flexible constraints at their original length,\n"
 +                            "         but the lengths may exhibit some drift.\n\n");
 +                    constr->nflexcon = 0;
 +                }
 +            }
 +            if (constr->nflexcon > 0) 
 +            {
 +                please_cite(fplog,"Hess2002");
 +            }
 +        }
 +        
 +        if (ir->eConstrAlg == econtLINCS) 
 +        {
 +            constr->lincsd = init_lincs(fplog,mtop,
 +                                        constr->nflexcon,constr->at2con_mt,
 +                                        DOMAINDECOMP(cr) && cr->dd->bInterCGcons,
 +                                        ir->nLincsIter,ir->nProjOrder);
 +        }
 +        
 +        if (ir->eConstrAlg == econtSHAKE) {
 +            if (DOMAINDECOMP(cr) && cr->dd->bInterCGcons)
 +            {
 +                gmx_fatal(FARGS,"SHAKE is not supported with domain decomposition and constraint that cross charge group boundaries, use LINCS");
 +            }
 +            if (constr->nflexcon) 
 +            {
 +                gmx_fatal(FARGS,"For this system also velocities and/or forces need to be constrained, this can not be done with SHAKE, you should select LINCS");
 +            }
 +            please_cite(fplog,"Ryckaert77a");
 +            if (ir->bShakeSOR) 
 +            {
 +                please_cite(fplog,"Barth95a");
 +            }
++
++            constr->shaked = shake_init();
 +        }
 +    }
 +  
 +    if (nset > 0) {
 +        please_cite(fplog,"Miyamoto92a");
 +        
 +        /* Check that we have only one settle type */
 +        settle_type = -1;
 +        iloop = gmx_mtop_ilistloop_init(mtop);
 +        while (gmx_mtop_ilistloop_next(iloop,&ilist,&nmol)) 
 +        {
 +            for (i=0; i<ilist[F_SETTLE].nr; i+=2) 
 +            {
 +                if (settle_type == -1) 
 +                {
 +                    settle_type = ilist[F_SETTLE].iatoms[i];
 +                } 
 +                else if (ilist[F_SETTLE].iatoms[i] != settle_type) 
 +                {
 +                    gmx_fatal(FARGS,
 +                              "The [molecules] section of your topology specifies more than one block of\n"
 +                              "a [moleculetype] with a [settles] block. Only one such is allowed. If you\n"
 +                              "are trying to partition your solvent into different *groups* (e.g. for\n"
 +                              "freezing, T-coupling, etc.) then you are using the wrong approach. Index\n"
 +                              "files specify groups. Otherwise, you may wish to change the least-used\n"
 +                              "block of molecules with SETTLE constraints into 3 normal constraints.");
 +                }
 +            }
 +        }
 +    }
 +    
 +    constr->maxwarn = 999;
 +    env = getenv("GMX_MAXCONSTRWARN");
 +    if (env) 
 +    {
 +        constr->maxwarn = 0;
 +        sscanf(env,"%d",&constr->maxwarn);
 +        if (fplog) 
 +        {
 +            fprintf(fplog,
 +                    "Setting the maximum number of constraint warnings to %d\n",
 +                    constr->maxwarn);
 +        }
 +        if (MASTER(cr)) 
 +        {
 +            fprintf(stderr,
 +                    "Setting the maximum number of constraint warnings to %d\n",
 +                    constr->maxwarn);
 +        }
 +    }
 +    if (constr->maxwarn < 0 && fplog) 
 +    {
 +        fprintf(fplog,"maxwarn < 0, will not stop on constraint errors\n");
 +    }
 +    constr->warncount_lincs  = 0;
 +    constr->warncount_settle = 0;
 +    
 +    /* Initialize the essential dynamics sampling.
 +     * Put the pointer to the ED struct in constr */
 +    constr->ed = ed;
 +    if (ed != NULL) 
 +    {
 +        init_edsam(mtop,ir,cr,ed,state->x,state->box);
 +    }
 +    
 +    constr->warn_mtop = mtop;
 +    
 +    return constr;
 +}
 +
 +t_blocka *atom2constraints_moltype(gmx_constr_t constr)
 +{
 +  return constr->at2con_mt;
 +}
 +
 +
 +gmx_bool inter_charge_group_constraints(gmx_mtop_t *mtop)
 +{
 +  const gmx_moltype_t *molt;
 +  const t_block *cgs;
 +  const t_ilist *il;
 +  int  mb;
 +  int  nat,*at2cg,cg,a,ftype,i;
 +  gmx_bool bInterCG;
 +
 +  bInterCG = FALSE;
 +  for(mb=0; mb<mtop->nmolblock && !bInterCG; mb++) {
 +    molt = &mtop->moltype[mtop->molblock[mb].type];
 +
 +    if (molt->ilist[F_CONSTR].nr   > 0 ||
 +      molt->ilist[F_CONSTRNC].nr > 0) {
 +      cgs  = &molt->cgs;
 +      snew(at2cg,molt->atoms.nr);
 +      for(cg=0; cg<cgs->nr; cg++) {
 +      for(a=cgs->index[cg]; a<cgs->index[cg+1]; a++)
 +        at2cg[a] = cg;
 +      }
 +      
 +      for(ftype=F_CONSTR; ftype<=F_CONSTRNC; ftype++) {
 +      il = &molt->ilist[ftype];
 +      for(i=0; i<il->nr && !bInterCG; i+=3) {
 +        if (at2cg[il->iatoms[i+1]] != at2cg[il->iatoms[i+2]])
 +          bInterCG = TRUE;
 +      }
 +      }
 +      sfree(at2cg);
 +    }
 +  }
 +
 +  return bInterCG;
 +}
index ec8b8fe9d1b7aa551576a488ba2de4edc5d623e7,0000000000000000000000000000000000000000..cad9c02266588702ed31174d2c9492e3fc2b8c94
mode 100644,000000..100644
--- /dev/null
@@@ -1,1410 -1,0 +1,1415 @@@
-       fprintf(fplog,"\nStep %s  Warning: Pressure scaling more than 1%%.\n",
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include "typedefs.h"
 +#include "smalloc.h"
 +#include "update.h"
 +#include "vec.h"
 +#include "macros.h"
 +#include "physics.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "txtdump.h"
 +#include "nrnb.h"
 +#include "gmx_random.h"
 +#include "update.h"
 +#include "mdrun.h"
 +
 +#define NTROTTERPARTS 3
 +
 +/* Suzuki-Yoshida Constants, for n=3 and n=5, for symplectic integration  */
 +/* for n=1, w0 = 1 */
 +/* for n=3, w0 = w2 = 1/(2-2^-(1/3)), w1 = 1-2*w0 */
 +/* for n=5, w0 = w1 = w3 = w4 = 1/(4-4^-(1/3)), w1 = 1-4*w0 */
 +
 +#define MAX_SUZUKI_YOSHIDA_NUM 5
 +#define SUZUKI_YOSHIDA_NUM  5
 +
 +static const double sy_const_1[] = { 1. };
 +static const double sy_const_3[] = { 0.828981543588751,-0.657963087177502,0.828981543588751 };
 +static const double sy_const_5[] = { 0.2967324292201065,0.2967324292201065,-0.186929716880426,0.2967324292201065,0.2967324292201065 };
 +
 +static const double* sy_const[] = {
 +    NULL,
 +    sy_const_1,
 +    NULL,
 +    sy_const_3,
 +    NULL,
 +    sy_const_5
 +};
 +
 +/*
 +static const double sy_const[MAX_SUZUKI_YOSHIDA_NUM+1][MAX_SUZUKI_YOSHIDA_NUM+1] = {
 +    {},
 +    {1},
 +    {},
 +    {0.828981543588751,-0.657963087177502,0.828981543588751},
 +    {},
 +    {0.2967324292201065,0.2967324292201065,-0.186929716880426,0.2967324292201065,0.2967324292201065}
 +};*/
 +
 +/* these integration routines are only referenced inside this file */
 +static void NHC_trotter(t_grpopts *opts,int nvar, gmx_ekindata_t *ekind,real dtfull,
 +                        double xi[],double vxi[], double scalefac[], real *veta, t_extmass *MassQ, gmx_bool bEkinAveVel)
 +
 +{
 +    /* general routine for both barostat and thermostat nose hoover chains */
 +
 +    int   i,j,mi,mj,jmax;
 +    double Ekin,Efac,reft,kT,nd;
 +    double dt;
 +    t_grp_tcstat *tcstat;
 +    double *ivxi,*ixi;
 +    double *iQinv;
 +    double *GQ;
 +    gmx_bool bBarostat;
 +    int mstepsi, mstepsj;
 +    int ns = SUZUKI_YOSHIDA_NUM;  /* set the degree of integration in the types/state.h file */
 +    int nh = opts->nhchainlength;
 +    
 +    snew(GQ,nh);
 +    mstepsi = mstepsj = ns;
 +
 +/* if scalefac is NULL, we are doing the NHC of the barostat */
 +    
 +    bBarostat = FALSE;
 +    if (scalefac == NULL) {
 +        bBarostat = TRUE;
 +    }
 +
 +    for (i=0; i<nvar; i++) 
 +    {
 +    
 +        /* make it easier to iterate by selecting 
 +           out the sub-array that corresponds to this T group */
 +        
 +        ivxi = &vxi[i*nh];
 +        ixi = &xi[i*nh];
 +        if (bBarostat) {
 +            iQinv = &(MassQ->QPinv[i*nh]); 
 +            nd = 1; /* THIS WILL CHANGE IF NOT ISOTROPIC */
 +            reft = max(0.0,opts->ref_t[0]);
 +            Ekin = sqr(*veta)/MassQ->Winv;
 +        } else {
 +            iQinv = &(MassQ->Qinv[i*nh]);  
 +            tcstat = &ekind->tcstat[i];
 +            nd = opts->nrdf[i];
 +            reft = max(0.0,opts->ref_t[i]);
 +            if (bEkinAveVel) 
 +            {
 +                Ekin = 2*trace(tcstat->ekinf)*tcstat->ekinscalef_nhc;
 +            } else {
 +                Ekin = 2*trace(tcstat->ekinh)*tcstat->ekinscaleh_nhc;
 +            }
 +        }
 +        kT = BOLTZ*reft;
 +
 +        for(mi=0;mi<mstepsi;mi++) 
 +        {
 +            for(mj=0;mj<mstepsj;mj++)
 +            { 
 +                /* weighting for this step using Suzuki-Yoshida integration - fixed at 5 */
 +                dt = sy_const[ns][mj] * dtfull / mstepsi;
 +                
 +                /* compute the thermal forces */
 +                GQ[0] = iQinv[0]*(Ekin - nd*kT);
 +                
 +                for (j=0;j<nh-1;j++) 
 +                {     
 +                    if (iQinv[j+1] > 0) {
 +                        /* we actually don't need to update here if we save the 
 +                           state of the GQ, but it's easier to just recompute*/
 +                        GQ[j+1] = iQinv[j+1]*((sqr(ivxi[j])/iQinv[j])-kT);      
 +                    } else {
 +                        GQ[j+1] = 0;
 +                    }
 +                }
 +                
 +                ivxi[nh-1] += 0.25*dt*GQ[nh-1];
 +                for (j=nh-1;j>0;j--) 
 +                { 
 +                    Efac = exp(-0.125*dt*ivxi[j]);
 +                    ivxi[j-1] = Efac*(ivxi[j-1]*Efac + 0.25*dt*GQ[j-1]);
 +                }
 +                
 +                Efac = exp(-0.5*dt*ivxi[0]);
 +                if (bBarostat) {
 +                    *veta *= Efac;                
 +                } else {
 +                    scalefac[i] *= Efac;
 +                }
 +                Ekin *= (Efac*Efac);
 +                
 +                /* Issue - if the KE is an average of the last and the current temperatures, then we might not be
 +                   able to scale the kinetic energy directly with this factor.  Might take more bookkeeping -- have to
 +                   think about this a bit more . . . */
 +
 +                GQ[0] = iQinv[0]*(Ekin - nd*kT);
 +                
 +                /* update thermostat positions */
 +                for (j=0;j<nh;j++) 
 +                { 
 +                    ixi[j] += 0.5*dt*ivxi[j];
 +                }
 +                
 +                for (j=0;j<nh-1;j++) 
 +                { 
 +                    Efac = exp(-0.125*dt*ivxi[j+1]);
 +                    ivxi[j] = Efac*(ivxi[j]*Efac + 0.25*dt*GQ[j]);
 +                    if (iQinv[j+1] > 0) {
 +                        GQ[j+1] = iQinv[j+1]*((sqr(ivxi[j])/iQinv[j])-kT);  
 +                    } else {
 +                        GQ[j+1] = 0;
 +                    }
 +                }
 +                ivxi[nh-1] += 0.25*dt*GQ[nh-1];
 +            }
 +        }
 +    }
 +    sfree(GQ);
 +}
 +
 +static void boxv_trotter(t_inputrec *ir, real *veta, real dt, tensor box, 
 +                         gmx_ekindata_t *ekind, tensor vir, real pcorr, real ecorr, t_extmass *MassQ)
 +{
 +
 +    real  pscal;
 +    double alpha;
 +    int   i,j,d,n,nwall;
 +    real  T,GW,vol;
 +    tensor Winvm,ekinmod,localpres;
 +    
 +    /* The heat bath is coupled to a separate barostat, the last temperature group.  In the 
 +       2006 Tuckerman et al paper., the order is iL_{T_baro} iL {T_part}
 +    */
 +    
 +    if (ir->epct==epctSEMIISOTROPIC) 
 +    {
 +        nwall = 2;
 +    } 
 +    else 
 +    {
 +        nwall = 3;
 +    }
 +
 +    /* eta is in pure units.  veta is in units of ps^-1. GW is in 
 +       units of ps^-2.  However, eta has a reference of 1 nm^3, so care must be 
 +       taken to use only RATIOS of eta in updating the volume. */
 +    
 +    /* we take the partial pressure tensors, modify the 
 +       kinetic energy tensor, and recovert to pressure */
 +    
 +    if (ir->opts.nrdf[0]==0) 
 +    { 
 +        gmx_fatal(FARGS,"Barostat is coupled to a T-group with no degrees of freedom\n");    
 +    } 
 +    /* alpha factor for phase space volume, then multiply by the ekin scaling factor.  */
 +    alpha = 1.0 + DIM/((double)ir->opts.nrdf[0]);
 +    alpha *= ekind->tcstat[0].ekinscalef_nhc;
 +    msmul(ekind->ekin,alpha,ekinmod);  
 +    
 +    /* for now, we use Elr = 0, because if you want to get it right, you
 +       really should be using PME. Maybe print a warning? */
 +    
 +    pscal   = calc_pres(ir->ePBC,nwall,box,ekinmod,vir,localpres,0.0) + pcorr;
 +    
 +    vol = det(box);
 +    GW = (vol*(MassQ->Winv/PRESFAC))*(DIM*pscal - trace(ir->ref_p));   /* W is in ps^2 * bar * nm^3 */
 +    
 +    *veta += 0.5*dt*GW;   
 +}
 +
 +/* 
 + * This file implements temperature and pressure coupling algorithms:
 + * For now only the Weak coupling and the modified weak coupling.
 + *
 + * Furthermore computation of pressure and temperature is done here
 + *
 + */
 +
 +real calc_pres(int ePBC,int nwall,matrix box,tensor ekin,tensor vir,
 +               tensor pres,real Elr)
 +{
 +    int  n,m;
 +    real fac,Plr;
 +    
 +    if (ePBC==epbcNONE || (ePBC==epbcXY && nwall!=2))
 +        clear_mat(pres);
 +    else {
 +        /* Uitzoeken welke ekin hier van toepassing is, zie Evans & Morris - E. 
 +         * Wrs. moet de druktensor gecorrigeerd worden voor de netto stroom in  
 +         * het systeem...       
 +         */
 +        
 +        /* Long range correction for periodic systems, see
 +         * Neumann et al. JCP
 +         * divide by 6 because it is multiplied by fac later on.
 +         * If Elr = 0, no correction is made.
 +         */
 +        
 +        /* This formula should not be used with Ewald or PME, 
 +         * where the full long-range virial is calculated. EL 990823
 +         */
 +        Plr = Elr/6.0;
 +        
 +        fac=PRESFAC*2.0/det(box);
 +        for(n=0; (n<DIM); n++)
 +            for(m=0; (m<DIM); m++)
 +                pres[n][m]=(ekin[n][m]-vir[n][m]+Plr)*fac;
 +        
 +        if (debug) {
 +            pr_rvecs(debug,0,"PC: pres",pres,DIM);
 +            pr_rvecs(debug,0,"PC: ekin",ekin,DIM);
 +            pr_rvecs(debug,0,"PC: vir ",vir, DIM);
 +            pr_rvecs(debug,0,"PC: box ",box, DIM);
 +        }
 +    }
 +    return trace(pres)/DIM;
 +}
 +
 +real calc_temp(real ekin,real nrdf)
 +{
 +    if (nrdf > 0)
 +        return (2.0*ekin)/(nrdf*BOLTZ);
 +    else
 +        return 0;
 +}
 +
 +void parrinellorahman_pcoupl(FILE *fplog,gmx_large_int_t step,
 +                           t_inputrec *ir,real dt,tensor pres,
 +                           tensor box,tensor box_rel,tensor boxv,
 +                           tensor M,matrix mu,gmx_bool bFirstStep)
 +{
 +  /* This doesn't do any coordinate updating. It just
 +   * integrates the box vector equations from the calculated
 +   * acceleration due to pressure difference. We also compute
 +   * the tensor M which is used in update to couple the particle
 +   * coordinates to the box vectors.
 +   *
 +   * In Nose and Klein (Mol.Phys 50 (1983) no 5., p 1055) this is
 +   * given as
 +   *            -1    .           .     -1
 +   * M_nk = (h')   * (h' * h + h' h) * h
 +   *
 +   * with the dots denoting time derivatives and h is the transformation from
 +   * the scaled frame to the real frame, i.e. the TRANSPOSE of the box. 
 +   * This also goes for the pressure and M tensors - they are transposed relative
 +   * to ours. Our equation thus becomes:
 +   *
 +   *                  -1       .    .           -1
 +   * M_gmx = M_nk' = b  * (b * b' + b * b') * b'
 +   * 
 +   * where b is the gromacs box matrix.                       
 +   * Our box accelerations are given by
 +   *   ..                                    ..
 +   *   b = vol/W inv(box') * (P-ref_P)     (=h')
 +   */
 +  
 +  int    d,n;
 +  tensor winv;
 +  real   vol=box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +  real   atot,arel,change,maxchange,xy_pressure;
 +  tensor invbox,pdiff,t1,t2;
 +
 +  real maxl;
 +
 +  m_inv_ur0(box,invbox);
 +
 +  if (!bFirstStep) {
 +    /* Note that PRESFAC does not occur here.
 +     * The pressure and compressibility always occur as a product,
 +     * therefore the pressure unit drops out.
 +     */
 +    maxl=max(box[XX][XX],box[YY][YY]);
 +    maxl=max(maxl,box[ZZ][ZZ]);
 +    for(d=0;d<DIM;d++)
 +      for(n=0;n<DIM;n++)
 +      winv[d][n]=
 +        (4*M_PI*M_PI*ir->compress[d][n])/(3*ir->tau_p*ir->tau_p*maxl);
 +    
 +    m_sub(pres,ir->ref_p,pdiff);
 +    
 +    if(ir->epct==epctSURFACETENSION) {
 +      /* Unlike Berendsen coupling it might not be trivial to include a z
 +       * pressure correction here? On the other hand we don't scale the
 +       * box momentarily, but change accelerations, so it might not be crucial.
 +       */
 +      xy_pressure=0.5*(pres[XX][XX]+pres[YY][YY]);
 +      for(d=0;d<ZZ;d++)
 +      pdiff[d][d]=(xy_pressure-(pres[ZZ][ZZ]-ir->ref_p[d][d]/box[d][d]));
 +    }
 +    
 +    tmmul(invbox,pdiff,t1);
 +    /* Move the off-diagonal elements of the 'force' to one side to ensure
 +     * that we obey the box constraints.
 +     */
 +    for(d=0;d<DIM;d++) {
 +      for(n=0;n<d;n++) {
 +      t1[d][n] += t1[n][d];
 +      t1[n][d] = 0;
 +      }
 +    }
 +    
 +    switch (ir->epct) {
 +    case epctANISOTROPIC:
 +      for(d=0;d<DIM;d++) 
 +      for(n=0;n<=d;n++)
 +        t1[d][n] *= winv[d][n]*vol;
 +      break;
 +    case epctISOTROPIC:
 +      /* calculate total volume acceleration */
 +      atot=box[XX][XX]*box[YY][YY]*t1[ZZ][ZZ]+
 +      box[XX][XX]*t1[YY][YY]*box[ZZ][ZZ]+
 +      t1[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
 +      arel=atot/(3*vol);
 +      /* set all RELATIVE box accelerations equal, and maintain total V
 +       * change speed */
 +      for(d=0;d<DIM;d++)
 +      for(n=0;n<=d;n++)
 +        t1[d][n] = winv[0][0]*vol*arel*box[d][n];    
 +      break;
 +    case epctSEMIISOTROPIC:
 +    case epctSURFACETENSION:
 +      /* Note the correction to pdiff above for surftens. coupling  */
 +      
 +      /* calculate total XY volume acceleration */
 +      atot=box[XX][XX]*t1[YY][YY]+t1[XX][XX]*box[YY][YY];
 +      arel=atot/(2*box[XX][XX]*box[YY][YY]);
 +      /* set RELATIVE XY box accelerations equal, and maintain total V
 +       * change speed. Dont change the third box vector accelerations */
 +      for(d=0;d<ZZ;d++)
 +      for(n=0;n<=d;n++)
 +        t1[d][n] = winv[d][n]*vol*arel*box[d][n];
 +      for(n=0;n<DIM;n++)
 +      t1[ZZ][n] *= winv[d][n]*vol;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Parrinello-Rahman pressure coupling type %s "
 +                "not supported yet\n",EPCOUPLTYPETYPE(ir->epct));
 +      break;
 +    }
 +    
 +    maxchange=0;
 +    for(d=0;d<DIM;d++)
 +      for(n=0;n<=d;n++) {
 +      boxv[d][n] += dt*t1[d][n];
 +      
 +      /* We do NOT update the box vectors themselves here, since
 +       * we need them for shifting later. It is instead done last
 +       * in the update() routine.
 +       */
 +      
 +      /* Calculate the change relative to diagonal elements-
 +         since it's perfectly ok for the off-diagonal ones to
 +         be zero it doesn't make sense to check the change relative
 +         to its current size.
 +      */
 +      
 +      change=fabs(dt*boxv[d][n]/box[d][d]);
 +      
 +      if (change>maxchange)
 +        maxchange=change;
 +      }
 +    
 +    if (maxchange > 0.01 && fplog) {
 +      char buf[22];
++      fprintf(fplog,
++              "\nStep %s  Warning: Pressure scaling more than 1%%. "
++              "This may mean your system\n is not yet equilibrated. "
++              "Use of Parrinello-Rahman pressure coupling during\n"
++              "equilibration can lead to simulation instability, "
++              "and is discouraged.\n",
 +            gmx_step_str(step,buf));
 +    }
 +  }
 +  
 +  preserve_box_shape(ir,box_rel,boxv);
 +
 +  mtmul(boxv,box,t1);       /* t1=boxv * b' */
 +  mmul(invbox,t1,t2);
 +  mtmul(t2,invbox,M);
 +
 +  /* Determine the scaling matrix mu for the coordinates */
 +  for(d=0;d<DIM;d++)
 +    for(n=0;n<=d;n++)
 +      t1[d][n] = box[d][n] + dt*boxv[d][n];
 +  preserve_box_shape(ir,box_rel,t1);
 +  /* t1 is the box at t+dt, determine mu as the relative change */
 +  mmul_ur0(invbox,t1,mu);
 +}
 +
 +void berendsen_pcoupl(FILE *fplog,gmx_large_int_t step, 
 +                    t_inputrec *ir,real dt, tensor pres,matrix box,
 +                    matrix mu)
 +{
 +  int    d,n;
 +  real   scalar_pressure, xy_pressure, p_corr_z;
 +  char   *ptr,buf[STRLEN];
 +
 +  /*
 +   *  Calculate the scaling matrix mu
 +   */
 +  scalar_pressure=0;
 +  xy_pressure=0;
 +  for(d=0; d<DIM; d++) {
 +    scalar_pressure += pres[d][d]/DIM;
 +    if (d != ZZ)
 +      xy_pressure += pres[d][d]/(DIM-1);
 +  }
 +  /* Pressure is now in bar, everywhere. */
 +#define factor(d,m) (ir->compress[d][m]*dt/ir->tau_p)
 +  
 +  /* mu has been changed from pow(1+...,1/3) to 1+.../3, since this is
 +   * necessary for triclinic scaling
 +   */
 +  clear_mat(mu);
 +  switch (ir->epct) {
 +  case epctISOTROPIC:
 +    for(d=0; d<DIM; d++) 
 +      {
 +      mu[d][d] = 1.0 - factor(d,d)*(ir->ref_p[d][d] - scalar_pressure) /DIM;
 +      }
 +    break;
 +  case epctSEMIISOTROPIC:
 +    for(d=0; d<ZZ; d++)
 +      mu[d][d] = 1.0 - factor(d,d)*(ir->ref_p[d][d]-xy_pressure)/DIM;
 +    mu[ZZ][ZZ] = 
 +      1.0 - factor(ZZ,ZZ)*(ir->ref_p[ZZ][ZZ] - pres[ZZ][ZZ])/DIM;
 +    break;
 +  case epctANISOTROPIC:
 +    for(d=0; d<DIM; d++)
 +      for(n=0; n<DIM; n++)
 +      mu[d][n] = (d==n ? 1.0 : 0.0) 
 +        -factor(d,n)*(ir->ref_p[d][n] - pres[d][n])/DIM;
 +    break;
 +  case epctSURFACETENSION:
 +    /* ir->ref_p[0/1] is the reference surface-tension times *
 +     * the number of surfaces                                */
 +    if (ir->compress[ZZ][ZZ])
 +      p_corr_z = dt/ir->tau_p*(ir->ref_p[ZZ][ZZ] - pres[ZZ][ZZ]);
 +    else
 +      /* when the compressibity is zero, set the pressure correction   *
 +       * in the z-direction to zero to get the correct surface tension */
 +      p_corr_z = 0;
 +    mu[ZZ][ZZ] = 1.0 - ir->compress[ZZ][ZZ]*p_corr_z;
 +    for(d=0; d<DIM-1; d++)
 +      mu[d][d] = 1.0 + factor(d,d)*(ir->ref_p[d][d]/(mu[ZZ][ZZ]*box[ZZ][ZZ])
 +                                  - (pres[ZZ][ZZ]+p_corr_z - xy_pressure))/(DIM-1);
 +    break;
 +  default:
 +    gmx_fatal(FARGS,"Berendsen pressure coupling type %s not supported yet\n",
 +              EPCOUPLTYPETYPE(ir->epct));
 +    break;
 +  }
 +  /* To fullfill the orientation restrictions on triclinic boxes
 +   * we will set mu_yx, mu_zx and mu_zy to 0 and correct
 +   * the other elements of mu to first order.
 +   */
 +  mu[YY][XX] += mu[XX][YY];
 +  mu[ZZ][XX] += mu[XX][ZZ];
 +  mu[ZZ][YY] += mu[YY][ZZ];
 +  mu[XX][YY] = 0;
 +  mu[XX][ZZ] = 0;
 +  mu[YY][ZZ] = 0;
 +
 +  if (debug) {
 +    pr_rvecs(debug,0,"PC: pres ",pres,3);
 +    pr_rvecs(debug,0,"PC: mu   ",mu,3);
 +  }
 +  
 +  if (mu[XX][XX]<0.99 || mu[XX][XX]>1.01 ||
 +      mu[YY][YY]<0.99 || mu[YY][YY]>1.01 ||
 +      mu[ZZ][ZZ]<0.99 || mu[ZZ][ZZ]>1.01) {
 +    char buf2[22];
 +    sprintf(buf,"\nStep %s  Warning: pressure scaling more than 1%%, "
 +          "mu: %g %g %g\n",
 +          gmx_step_str(step,buf2),mu[XX][XX],mu[YY][YY],mu[ZZ][ZZ]);
 +    if (fplog)
 +      fprintf(fplog,"%s",buf);
 +    fprintf(stderr,"%s",buf);
 +  }
 +}
 +
 +void berendsen_pscale(t_inputrec *ir,matrix mu,
 +                    matrix box,matrix box_rel,
 +                    int start,int nr_atoms,
 +                    rvec x[],unsigned short cFREEZE[],
 +                    t_nrnb *nrnb)
 +{
 +  ivec   *nFreeze=ir->opts.nFreeze;
 +  int    n,d,g=0;
 +      
 +  /* Scale the positions */
 +  for (n=start; n<start+nr_atoms; n++) {
 +    if (cFREEZE)
 +      g = cFREEZE[n];
 +    
 +    if (!nFreeze[g][XX])
 +      x[n][XX] = mu[XX][XX]*x[n][XX]+mu[YY][XX]*x[n][YY]+mu[ZZ][XX]*x[n][ZZ];
 +    if (!nFreeze[g][YY])
 +      x[n][YY] = mu[YY][YY]*x[n][YY]+mu[ZZ][YY]*x[n][ZZ];
 +    if (!nFreeze[g][ZZ])
 +      x[n][ZZ] = mu[ZZ][ZZ]*x[n][ZZ];
 +  }
 +  /* compute final boxlengths */
 +  for (d=0; d<DIM; d++) {
 +    box[d][XX] = mu[XX][XX]*box[d][XX]+mu[YY][XX]*box[d][YY]+mu[ZZ][XX]*box[d][ZZ];
 +    box[d][YY] = mu[YY][YY]*box[d][YY]+mu[ZZ][YY]*box[d][ZZ];
 +    box[d][ZZ] = mu[ZZ][ZZ]*box[d][ZZ];
 +  }      
 +
 +  preserve_box_shape(ir,box_rel,box);
 +  
 +  /* (un)shifting should NOT be done after this,
 +   * since the box vectors might have changed
 +   */
 +  inc_nrnb(nrnb,eNR_PCOUPL,nr_atoms);
 +}
 +
 +void berendsen_tcoupl(t_inputrec *ir,gmx_ekindata_t *ekind,real dt)
 +{
 +    t_grpopts *opts;
 +    int    i;
 +    real   T,reft=0,lll;
 +
 +    opts = &ir->opts;
 +
 +    for(i=0; (i<opts->ngtc); i++)
 +    {
 +        if (ir->eI == eiVV)
 +        {
 +            T = ekind->tcstat[i].T;
 +        }
 +        else
 +        {
 +            T = ekind->tcstat[i].Th;
 +        }
 +    
 +    if ((opts->tau_t[i] > 0) && (T > 0.0)) {
 + 
 +      reft = max(0.0,opts->ref_t[i]);
 +      lll  = sqrt(1.0 + (dt/opts->tau_t[i])*(reft/T-1.0));
 +      ekind->tcstat[i].lambda = max(min(lll,1.25),0.8);
 +    }
 +    else {
 +       ekind->tcstat[i].lambda = 1.0;
 +    }
 +
 +    if (debug)
 +      fprintf(debug,"TC: group %d: T: %g, Lambda: %g\n",
 +            i,T,ekind->tcstat[i].lambda);
 +  }
 +}
 +
 +void nosehoover_tcoupl(t_grpopts *opts,gmx_ekindata_t *ekind,real dt,
 +                       double xi[],double vxi[], t_extmass *MassQ)
 +{
 +    int   i;
 +    real  reft,oldvxi;
 +    
 +    /* note that this routine does not include Nose-hoover chains yet. Should be easy to add. */
 +    
 +    for(i=0; (i<opts->ngtc); i++) {
 +        reft = max(0.0,opts->ref_t[i]);
 +        oldvxi = vxi[i];
 +        vxi[i]  += dt*MassQ->Qinv[i]*(ekind->tcstat[i].Th - reft);
 +        xi[i] += dt*(oldvxi + vxi[i])*0.5;
 +    }
 +}
 +
 +t_state *init_bufstate(const t_state *template_state) 
 +{
 +    t_state *state;
 +    int nc = template_state->nhchainlength;
 +    snew(state,1);
 +    snew(state->nosehoover_xi,nc*template_state->ngtc);
 +    snew(state->nosehoover_vxi,nc*template_state->ngtc);
 +    snew(state->therm_integral,template_state->ngtc);
 +    snew(state->nhpres_xi,nc*template_state->nnhpres);
 +    snew(state->nhpres_vxi,nc*template_state->nnhpres);
 +
 +    return state;
 +}  
 +
 +void destroy_bufstate(t_state *state) 
 +{
 +    sfree(state->x);
 +    sfree(state->v);
 +    sfree(state->nosehoover_xi);
 +    sfree(state->nosehoover_vxi);
 +    sfree(state->therm_integral);
 +    sfree(state->nhpres_xi);
 +    sfree(state->nhpres_vxi);
 +    sfree(state);
 +}  
 +
 +void trotter_update(t_inputrec *ir,gmx_large_int_t step, gmx_ekindata_t *ekind, 
 +                    gmx_enerdata_t *enerd, t_state *state, 
 +                    tensor vir, t_mdatoms *md, 
 +                    t_extmass *MassQ, int **trotter_seqlist, int trotter_seqno) 
 +{
 +    
 +    int n,i,j,d,ntgrp,ngtc,gc=0;
 +    t_grp_tcstat *tcstat;
 +    t_grpopts *opts;
 +    gmx_large_int_t step_eff;
 +    real ecorr,pcorr,dvdlcorr;
 +    real bmass,qmass,reft,kT,dt,nd;
 +    tensor dumpres,dumvir;
 +    double *scalefac,dtc;
 +    int *trotter_seq;
 +    rvec sumv,consk;
 +    gmx_bool bCouple;
 +
 +    if (trotter_seqno <= ettTSEQ2)
 +    {
 +        step_eff = step-1;  /* the velocity verlet calls are actually out of order -- the first half step
 +                               is actually the last half step from the previous step.  Thus the first half step
 +                               actually corresponds to the n-1 step*/
 +                               
 +    } else {
 +        step_eff = step;
 +    }
 +
 +    bCouple = (ir->nsttcouple == 1 ||
 +               do_per_step(step_eff+ir->nsttcouple,ir->nsttcouple));
 +
 +    trotter_seq = trotter_seqlist[trotter_seqno];
 +
 +    /* signal we are returning if nothing is going to be done in this routine */
 +    if ((trotter_seq[0] == etrtSKIPALL)  || !(bCouple))
 +    {
 +        return;
 +    }
 +
 +    dtc = ir->nsttcouple*ir->delta_t;
 +    opts = &(ir->opts); /* just for ease of referencing */
 +    ngtc = opts->ngtc;
 +    snew(scalefac,opts->ngtc);
 +    for (i=0;i<ngtc;i++) 
 +    {
 +        scalefac[i] = 1;
 +    }
 +    /* execute the series of trotter updates specified in the trotterpart array */
 +    
 +    for (i=0;i<NTROTTERPARTS;i++){
 +        /* allow for doubled intgrators by doubling dt instead of making 2 calls */
 +        if ((trotter_seq[i] == etrtBAROV2) || (trotter_seq[i] == etrtBARONHC2) || (trotter_seq[i] == etrtNHC2))
 +        {
 +            dt = 2 * dtc;
 +        }
 +        else 
 +        {
 +            dt = dtc;
 +        }
 +
 +        switch (trotter_seq[i])
 +        {
 +        case etrtBAROV:
 +        case etrtBAROV2:
 +            boxv_trotter(ir,&(state->veta),dt,state->box,ekind,vir,
 +                         enerd->term[F_PDISPCORR],enerd->term[F_DISPCORR],MassQ);
 +            break;
 +        case etrtBARONHC:
 +        case etrtBARONHC2:
 +            NHC_trotter(opts,state->nnhpres,ekind,dt,state->nhpres_xi,
 +                        state->nhpres_vxi,NULL,&(state->veta),MassQ,FALSE);      
 +            break;
 +        case etrtNHC:
 +        case etrtNHC2:
 +            NHC_trotter(opts,opts->ngtc,ekind,dt,state->nosehoover_xi,
 +                        state->nosehoover_vxi,scalefac,NULL,MassQ,(ir->eI==eiVV));
 +            /* need to rescale the kinetic energies and velocities here.  Could 
 +               scale the velocities later, but we need them scaled in order to 
 +               produce the correct outputs, so we'll scale them here. */
 +            
 +            for (i=0; i<ngtc;i++) 
 +            {
 +                tcstat = &ekind->tcstat[i];
 +                tcstat->vscale_nhc = scalefac[i]; 
 +                tcstat->ekinscaleh_nhc *= (scalefac[i]*scalefac[i]); 
 +                tcstat->ekinscalef_nhc *= (scalefac[i]*scalefac[i]); 
 +            }
 +            /* now that we've scaled the groupwise velocities, we can add them up to get the total */
 +            /* but do we actually need the total? */
 +            
 +            /* modify the velocities as well */
 +            for (n=md->start;n<md->start+md->homenr;n++) 
 +            {
 +                if (md->cTC) 
 +                { 
 +                    gc = md->cTC[n];
 +                }
 +                for (d=0;d<DIM;d++) 
 +                {
 +                    state->v[n][d] *= scalefac[gc];
 +                }
 +                
 +                if (debug) 
 +                {
 +                    for (d=0;d<DIM;d++) 
 +                    {
 +                        sumv[d] += (state->v[n][d])/md->invmass[n];
 +                    }
 +                }
 +            }          
 +            break;
 +        default:
 +            break;
 +        }
 +    }
 +    /* check for conserved momentum -- worth looking at this again eventually, but not working right now.*/  
 +#if 0
 +    if (debug) 
 +    {
 +        if (bFirstHalf) 
 +        {
 +            for (d=0;d<DIM;d++) 
 +            {
 +                consk[d] = sumv[d]*exp((1 + 1.0/opts->nrdf[0])*((1.0/DIM)*log(det(state->box)/state->vol0)) + state->nosehoover_xi[0]); 
 +            }
 +            fprintf(debug,"Conserved kappa: %15.8f %15.8f %15.8f\n",consk[0],consk[1],consk[2]);    
 +        }
 +    }
 +#endif
 +    sfree(scalefac);
 +}
 +
 +int **init_npt_vars(t_inputrec *ir, t_state *state, t_extmass *MassQ, gmx_bool bTrotter) 
 +{
 +    int n,i,j,d,ntgrp,ngtc,nnhpres,nh,gc=0;
 +    t_grp_tcstat *tcstat;
 +    t_grpopts *opts;
 +    real ecorr,pcorr,dvdlcorr;
 +    real bmass,qmass,reft,kT,dt,ndj,nd;
 +    tensor dumpres,dumvir;
 +    int **trotter_seq;
 +
 +    opts = &(ir->opts); /* just for ease of referencing */
 +    ngtc = state->ngtc;
 +    nnhpres = state->nnhpres;
 +    nh = state->nhchainlength; 
 +
 +    if (ir->eI == eiMD) {
 +        snew(MassQ->Qinv,ngtc);
 +        for(i=0; (i<ngtc); i++) 
 +        { 
 +            if ((opts->tau_t[i] > 0) && (opts->ref_t[i] > 0)) 
 +            {
 +                MassQ->Qinv[i]=1.0/(sqr(opts->tau_t[i]/M_2PI)*opts->ref_t[i]);
 +            } 
 +            else 
 +            {
 +                MassQ->Qinv[i]=0.0;     
 +            }
 +        }
 +    }
 +    else if (EI_VV(ir->eI))
 +    {
 +    /* Set pressure variables */
 +        
 +        if (state->vol0 == 0) 
 +        {
 +            state->vol0 = det(state->box); /* because we start by defining a fixed compressibility, 
 +                                              we need the volume at this compressibility to solve the problem */ 
 +        }
 +
 +        /* units are nm^3 * ns^2 / (nm^3 * bar / kJ/mol) = kJ/mol  */
 +        /* Investigate this more -- is this the right mass to make it? */
 +        MassQ->Winv = (PRESFAC*trace(ir->compress)*BOLTZ*opts->ref_t[0])/(DIM*state->vol0*sqr(ir->tau_p/M_2PI));
 +        /* An alternate mass definition, from Tuckerman et al. */ 
 +        /* MassQ->Winv = 1.0/(sqr(ir->tau_p/M_2PI)*(opts->nrdf[0]+DIM)*BOLTZ*opts->ref_t[0]); */
 +        for (d=0;d<DIM;d++) 
 +        {
 +            for (n=0;n<DIM;n++) 
 +            {
 +                MassQ->Winvm[d][n]= PRESFAC*ir->compress[d][n]/(state->vol0*sqr(ir->tau_p/M_2PI)); 
 +                /* not clear this is correct yet for the anisotropic case*/
 +            } 
 +        }           
 +        /* Allocate space for thermostat variables */
 +        snew(MassQ->Qinv,ngtc*nh);
 +        
 +        /* now, set temperature variables */
 +        for(i=0; i<ngtc; i++) 
 +        {
 +            if ((opts->tau_t[i] > 0) && (opts->ref_t[i] > 0)) 
 +            {
 +                reft = max(0.0,opts->ref_t[i]);
 +                nd = opts->nrdf[i];
 +                kT = BOLTZ*reft;
 +                for (j=0;j<nh;j++) 
 +                {
 +                    if (j==0) 
 +                    {
 +                        ndj = nd;
 +                    } 
 +                    else 
 +                    {
 +                        ndj = 1;
 +                    }
 +                    MassQ->Qinv[i*nh+j]   = 1.0/(sqr(opts->tau_t[i]/M_2PI)*ndj*kT);
 +                }
 +            }
 +            else 
 +            {
 +                reft=0.0;
 +                for (j=0;j<nh;j++) 
 +                {
 +                    MassQ->Qinv[i*nh+j] = 0.0;
 +                }
 +            }
 +        }
 +    }
 +    
 +    /* first, initialize clear all the trotter calls */
 +    snew(trotter_seq,ettTSEQMAX);
 +    for (i=0;i<ettTSEQMAX;i++) 
 +    {
 +        snew(trotter_seq[i],NTROTTERPARTS);
 +        for (j=0;j<NTROTTERPARTS;j++) {
 +            trotter_seq[i][j] = etrtNONE;
 +        }
 +        trotter_seq[i][0] = etrtSKIPALL;
 +    }
 +    
 +    if (!bTrotter) 
 +    {
 +        /* no trotter calls, so we never use the values in the array.
 +         * We access them (so we need to define them, but ignore
 +         * then.*/
 +
 +        return trotter_seq;
 +    }
 +
 +    /* compute the kinetic energy by using the half step velocities or
 +     * the kinetic energies, depending on the order of the trotter calls */
 +
 +    if (ir->eI==eiVV)
 +    {
 +        if (IR_NPT_TROTTER(ir)) 
 +        {
 +            /* This is the complicated version - there are 4 possible calls, depending on ordering.
 +               We start with the initial one. */
 +            /* first, a round that estimates veta. */
 +            trotter_seq[0][0] = etrtBAROV; 
 +            
 +            /* trotter_seq[1] is etrtNHC for 1/2 step velocities - leave zero */
 +            
 +            /* The first half trotter update */
 +            trotter_seq[2][0] = etrtBAROV;
 +            trotter_seq[2][1] = etrtNHC;
 +            trotter_seq[2][2] = etrtBARONHC;
 +            
 +            /* The second half trotter update */
 +            trotter_seq[3][0] = etrtBARONHC;
 +            trotter_seq[3][1] = etrtNHC;
 +            trotter_seq[3][2] = etrtBAROV;
 +
 +            /* trotter_seq[4] is etrtNHC for second 1/2 step velocities - leave zero */
 +
 +        } 
 +        else 
 +        {
 +            if (IR_NVT_TROTTER(ir)) 
 +            {
 +                /* This is the easy version - there are only two calls, both the same. 
 +                   Otherwise, even easier -- no calls  */
 +                trotter_seq[2][0] = etrtNHC;
 +                trotter_seq[3][0] = etrtNHC;
 +            }
 +        }
 +    } else if (ir->eI==eiVVAK) {
 +        if (IR_NPT_TROTTER(ir)) 
 +        {
 +            /* This is the complicated version - there are 4 possible calls, depending on ordering.
 +               We start with the initial one. */
 +            /* first, a round that estimates veta. */
 +            trotter_seq[0][0] = etrtBAROV; 
 +            
 +            /* The first half trotter update, part 1 -- double update, because it commutes */
 +            trotter_seq[1][0] = etrtNHC;
 +
 +            /* The first half trotter update, part 2 */
 +            trotter_seq[2][0] = etrtBAROV;
 +            trotter_seq[2][1] = etrtBARONHC;
 +            
 +            /* The second half trotter update, part 1 */
 +            trotter_seq[3][0] = etrtBARONHC;
 +            trotter_seq[3][1] = etrtBAROV;
 +
 +            /* The second half trotter update -- blank for now */
 +            trotter_seq[4][0] = etrtNHC;
 +        } 
 +        else 
 +        {
 +            if (IR_NVT_TROTTER(ir)) 
 +            {
 +                /* This is the easy version - there is only one call, both the same. 
 +                   Otherwise, even easier -- no calls  */
 +                trotter_seq[1][0] = etrtNHC;
 +                trotter_seq[4][0] = etrtNHC;
 +            }
 +        }
 +    }
 +
 +    switch (ir->epct) 
 +    {
 +    case epctISOTROPIC:  
 +    default:
 +        bmass = DIM*DIM;  /* recommended mass parameters for isotropic barostat */
 +    }    
 +
 +    snew(MassQ->QPinv,nnhpres*opts->nhchainlength);
 +
 +    /* barostat temperature */
 +    if ((ir->tau_p > 0) && (opts->ref_t[0] > 0)) 
 +    {
 +        reft = max(0.0,opts->ref_t[0]);
 +        kT = BOLTZ*reft;
 +        for (i=0;i<nnhpres;i++) {
 +            for (j=0;j<nh;j++) 
 +            {
 +                if (j==0) {
 +                    qmass = bmass;
 +                } 
 +                else 
 +                {
 +                    qmass = 1;
 +                }
 +                MassQ->QPinv[i*opts->nhchainlength+j]   = 1.0/(sqr(opts->tau_t[0]/M_2PI)*qmass*kT);
 +            }
 +        }
 +    }
 +    else 
 +    {
 +        for (i=0;i<nnhpres;i++) {
 +            for (j=0;j<nh;j++) 
 +            {
 +                MassQ->QPinv[i*nh+j]=0.0;
 +            }
 +        }
 +    }    
 +    return trotter_seq;
 +}
 +
 +real NPT_energy(t_inputrec *ir, t_state *state, t_extmass *MassQ)
 +{
 +    int  i,j,nd,ndj,bmass,qmass,ngtcall;
 +    real ener_npt,reft,eta,kT,tau;
 +    double *ivxi, *ixi;
 +    double *iQinv;
 +    real vol,dbaro,W,Q;
 +    int nh = state->nhchainlength;
 +
 +    ener_npt = 0;
 +    
 +    /* now we compute the contribution of the pressure to the conserved quantity*/
 +    
 +    if (ir->epc==epcMTTK) 
 +    {
 +        /* find the volume, and the kinetic energy of the volume */
 +        
 +        switch (ir->epct) {
 +            
 +        case epctISOTROPIC:
 +            /* contribution from the pressure momenenta */
 +            ener_npt += 0.5*sqr(state->veta)/MassQ->Winv;
 +            
 +            /* contribution from the PV term */
 +            vol = det(state->box);
 +            ener_npt += vol*trace(ir->ref_p)/(DIM*PRESFAC);
 +
 +            break;
 +        case epctANISOTROPIC:
 +            
 +            break;
 +            
 +        case epctSURFACETENSION:
 +            
 +            break;
 +        case epctSEMIISOTROPIC:
 +            
 +            break;
 +        default:
 +            break;
 +        }
 +    }
 +    
 +    if (IR_NPT_TROTTER(ir)) 
 +    {
 +        /* add the energy from the barostat thermostat chain */
 +        for (i=0;i<state->nnhpres;i++) {
 +
 +            /* note -- assumes only one degree of freedom that is thermostatted in barostat */
 +            ivxi = &state->nhpres_vxi[i*nh];
 +            ixi = &state->nhpres_xi[i*nh];
 +            iQinv = &(MassQ->QPinv[i*nh]);
 +            reft = max(ir->opts.ref_t[0],0); /* using 'System' temperature */
 +            kT = BOLTZ * reft;
 +        
 +            for (j=0;j<nh;j++) 
 +            {
 +                if (iQinv[j] > 0)
 +                {
 +                    ener_npt += 0.5*sqr(ivxi[j])/iQinv[j];
 +                    /* contribution from the thermal variable of the NH chain */
 +                    ener_npt += ixi[j]*kT;
 +                }
 +                if (debug) 
 +                {
 +                    fprintf(debug,"P-T-group: %10d Chain %4d ThermV: %15.8f ThermX: %15.8f",i,j,ivxi[j],ixi[j]);
 +                }
 +            }
 +        }
 +    }
 +        
 +    if (ir->etc) 
 +    {
 +        for(i=0; i<ir->opts.ngtc; i++) 
 +        {
 +            ixi = &state->nosehoover_xi[i*nh];
 +            ivxi = &state->nosehoover_vxi[i*nh];
 +            iQinv = &(MassQ->Qinv[i*nh]);
 +            
 +            nd = ir->opts.nrdf[i];
 +            reft = max(ir->opts.ref_t[i],0);
 +            kT = BOLTZ * reft;
 +            
 +            if (nd > 0) 
 +            {
 +                if (IR_NVT_TROTTER(ir))
 +                {
 +                    /* contribution from the thermal momenta of the NH chain */
 +                    for (j=0;j<nh;j++) 
 +                    {
 +                        if (iQinv[j] > 0) {
 +                            ener_npt += 0.5*sqr(ivxi[j])/iQinv[j];
 +                            /* contribution from the thermal variable of the NH chain */
 +                            if (j==0) {
 +                                ndj = nd;
 +                            } 
 +                            else 
 +                            {
 +                                ndj = 1;
 +                            } 
 +                            ener_npt += ndj*ixi[j]*kT;
 +                        }
 +                    }
 +                }
 +                else  /* Other non Trotter temperature NH control  -- no chains yet. */
 +                { 
 +                    ener_npt += 0.5*BOLTZ*nd*sqr(ivxi[0])/iQinv[0];
 +                    ener_npt += nd*ixi[0]*kT;
 +                }
 +            }
 +        }
 +    }
 +    return ener_npt;
 +}
 +
 +static real vrescale_gamdev(int ia, gmx_rng_t rng)
 +/* Gamma distribution, adapted from numerical recipes */
 +{
 +    int j;
 +    real am,e,s,v1,v2,x,y;
 +
 +    if (ia < 6)
 +    {
 +        do
 +        {
 +            x = 1.0;
 +            for(j=1; j<=ia; j++)
 +            {
 +                x *= gmx_rng_uniform_real(rng);
 +            }
 +        }
 +        while (x == 0);
 +        x = -log(x);
 +    }
 +    else
 +    {
 +        do
 +        {
 +            do
 +            {
 +                do
 +                {
 +                    v1 = gmx_rng_uniform_real(rng);
 +                    v2 = 2.0*gmx_rng_uniform_real(rng)-1.0;
 +                }
 +                while (v1*v1 + v2*v2 > 1.0 ||
 +                       v1*v1*GMX_REAL_MAX < 3.0*ia);
 +                /* The last check above ensures that both x (3.0 > 2.0 in s)
 +                 * and the pre-factor for e do not go out of range.
 +                 */
 +                y = v2/v1;
 +                am = ia - 1;
 +                s = sqrt(2.0*am + 1.0);
 +                x = s*y + am;
 +            }
 +            while (x <= 0.0);
 +            e = (1.0 + y*y)*exp(am*log(x/am) - s*y);
 +        }
 +        while (gmx_rng_uniform_real(rng) > e);
 +    }
 +
 +    return x;
 +}
 +
 +static real vrescale_sumnoises(int nn,gmx_rng_t rng)
 +{
 +/*
 + * Returns the sum of n independent gaussian noises squared
 + * (i.e. equivalent to summing the square of the return values
 + * of nn calls to gmx_rng_gaussian_real).xs
 + */
 +  real rr;
 +
 +  if (nn == 0) {
 +    return 0.0;
 +  } else if (nn == 1) {
 +    rr = gmx_rng_gaussian_real(rng);
 +    return rr*rr;
 +  } else if (nn % 2 == 0) {
 +    return 2.0*vrescale_gamdev(nn/2,rng);
 +  } else {
 +    rr = gmx_rng_gaussian_real(rng);
 +    return 2.0*vrescale_gamdev((nn-1)/2,rng) + rr*rr;
 +  }
 +}
 +
 +static real vrescale_resamplekin(real kk,real sigma, int ndeg, real taut,
 +                               gmx_rng_t rng)
 +{
 +/*
 + * Generates a new value for the kinetic energy,
 + * according to Bussi et al JCP (2007), Eq. (A7)
 + * kk:    present value of the kinetic energy of the atoms to be thermalized (in arbitrary units)
 + * sigma: target average value of the kinetic energy (ndeg k_b T/2)  (in the same units as kk)
 + * ndeg:  number of degrees of freedom of the atoms to be thermalized
 + * taut:  relaxation time of the thermostat, in units of 'how often this routine is called'
 + */
 +  real factor,rr;
 +
 +  if (taut > 0.1) {
 +    factor = exp(-1.0/taut);
 +  } else {
 +    factor = 0.0;
 +  }
 +  rr = gmx_rng_gaussian_real(rng);
 +  return
 +    kk +
 +    (1.0 - factor)*(sigma*(vrescale_sumnoises(ndeg-1,rng) + rr*rr)/ndeg - kk) +
 +    2.0*rr*sqrt(kk*sigma/ndeg*(1.0 - factor)*factor);
 +}
 +
 +void vrescale_tcoupl(t_inputrec *ir,gmx_ekindata_t *ekind,real dt,
 +                     double therm_integral[],gmx_rng_t rng)
 +{
 +    t_grpopts *opts;
 +    int    i;
 +    real   Ek,Ek_ref1,Ek_ref,Ek_new; 
 +    
 +    opts = &ir->opts;
 +
 +    for(i=0; (i<opts->ngtc); i++)
 +    {
 +        if (ir->eI == eiVV)
 +        {
 +            Ek = trace(ekind->tcstat[i].ekinf);
 +        }
 +        else
 +        {
 +            Ek = trace(ekind->tcstat[i].ekinh);
 +        }
 +        
 +        if (opts->tau_t[i] >= 0 && opts->nrdf[i] > 0 && Ek > 0)
 +        {
 +            Ek_ref1 = 0.5*opts->ref_t[i]*BOLTZ;
 +            Ek_ref  = Ek_ref1*opts->nrdf[i];
 +
 +            Ek_new  = vrescale_resamplekin(Ek,Ek_ref,opts->nrdf[i],
 +                                           opts->tau_t[i]/dt,rng);
 +
 +            /* Analytically Ek_new>=0, but we check for rounding errors */
 +            if (Ek_new <= 0)
 +            {
 +                ekind->tcstat[i].lambda = 0.0;
 +            }
 +            else
 +            {
 +                ekind->tcstat[i].lambda = sqrt(Ek_new/Ek);
 +            }
 +
 +            therm_integral[i] -= Ek_new - Ek;
 +
 +            if (debug)
 +            {
 +                fprintf(debug,"TC: group %d: Ekr %g, Ek %g, Ek_new %g, Lambda: %g\n",
 +                        i,Ek_ref,Ek,Ek_new,ekind->tcstat[i].lambda);
 +            }
 +        }
 +        else
 +        {
 +            ekind->tcstat[i].lambda = 1.0;
 +        }
 +    }
 +}
 +
 +real vrescale_energy(t_grpopts *opts,double therm_integral[])
 +{
 +  int i;
 +  real ener;
 +
 +  ener = 0;
 +  for(i=0; i<opts->ngtc; i++) {
 +    ener += therm_integral[i];
 +  }
 +  
 +  return ener;
 +}
 +
 +void rescale_velocities(gmx_ekindata_t *ekind,t_mdatoms *mdatoms,
 +                        int start,int end,rvec v[])
 +{
 +    t_grp_acc      *gstat;
 +    t_grp_tcstat   *tcstat;
 +    unsigned short *cACC,*cTC;
 +    int  ga,gt,n,d;
 +    real lg;
 +    rvec vrel;
 +
 +    tcstat = ekind->tcstat;
 +    cTC    = mdatoms->cTC;
 +
 +    if (ekind->bNEMD)
 +    {
 +        gstat  = ekind->grpstat;
 +        cACC   = mdatoms->cACC;
 +
 +        ga = 0;
 +        gt = 0;
 +        for(n=start; n<end; n++) 
 +        {
 +            if (cACC) 
 +            {
 +                ga   = cACC[n];
 +            }
 +            if (cTC)
 +            {
 +                gt   = cTC[n];
 +            }
 +            /* Only scale the velocity component relative to the COM velocity */
 +            rvec_sub(v[n],gstat[ga].u,vrel);
 +            lg = tcstat[gt].lambda;
 +            for(d=0; d<DIM; d++)
 +            {
 +                v[n][d] = gstat[ga].u[d] + lg*vrel[d];
 +            }
 +        }
 +    }
 +    else
 +    {
 +        gt = 0;
 +        for(n=start; n<end; n++) 
 +        {
 +            if (cTC)
 +            {
 +                gt   = cTC[n];
 +            }
 +            lg = tcstat[gt].lambda;
 +            for(d=0; d<DIM; d++)
 +            {
 +                v[n][d] *= lg;
 +            }
 +        }
 +    }
 +}
 +
 +
 +/* set target temperatures if we are annealing */
 +void update_annealing_target_temp(t_grpopts *opts,real t)
 +{
 +  int i,j,n,npoints;
 +  real pert,thist=0,x;
 +
 +  for(i=0;i<opts->ngtc;i++) {
 +    npoints = opts->anneal_npoints[i];
 +    switch (opts->annealing[i]) {
 +    case eannNO:
 +      continue;
 +    case  eannPERIODIC:
 +      /* calculate time modulo the period */
 +      pert  = opts->anneal_time[i][npoints-1];
 +      n     = t / pert;
 +      thist = t - n*pert; /* modulo time */
 +      /* Make sure rounding didn't get us outside the interval */
 +      if (fabs(thist-pert) < GMX_REAL_EPS*100)
 +      thist=0;
 +      break;
 +    case eannSINGLE:
 +      thist = t;
 +      break;
 +    default:
 +      gmx_fatal(FARGS,"Death horror in update_annealing_target_temp (i=%d/%d npoints=%d)",i,opts->ngtc,npoints);
 +    }
 +    /* We are doing annealing for this group if we got here, 
 +     * and we have the (relative) time as thist.
 +     * calculate target temp */
 +    j=0;
 +    while ((j < npoints-1) && (thist>(opts->anneal_time[i][j+1])))
 +      j++;
 +    if (j < npoints-1) {
 +      /* Found our position between points j and j+1. 
 +       * Interpolate: x is the amount from j+1, (1-x) from point j 
 +       * First treat possible jumps in temperature as a special case.
 +       */
 +      if ((opts->anneal_time[i][j+1]-opts->anneal_time[i][j]) < GMX_REAL_EPS*100)
 +      opts->ref_t[i]=opts->anneal_temp[i][j+1];
 +      else {
 +      x = ((thist-opts->anneal_time[i][j])/
 +           (opts->anneal_time[i][j+1]-opts->anneal_time[i][j]));
 +      opts->ref_t[i] = x*opts->anneal_temp[i][j+1]+(1-x)*opts->anneal_temp[i][j];
 +      }
 +    }
 +    else {
 +      opts->ref_t[i] = opts->anneal_temp[i][npoints-1];
 +    }
 +  }
 +}
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 47452c56b820b39b3e96d06fbfc0958fb58a25af,0000000000000000000000000000000000000000..3ca50a3db80eade48f896b8c99c7666dfdb8b4b8
mode 100644,000000..100644
--- /dev/null
@@@ -1,2516 -1,0 +1,2472 @@@
- static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
- {
-   int  start,end,i,m;
-   if (DOMAINDECOMP(cr)) {
-     start = 0;
-     end   = cr->dd->nat_home;
-   } else if (PARTDECOMP(cr)) {
-     pd_at_range(cr,&start,&end);
-   } else {
-     start = 0;
-     end   = n;
-   }
-   for(i=start; i<end; i++) {
-     for(m=0; m<DIM; m++) {
-       x2[i][m] = x1[i][m] + a*f[i][m];
-     }
-   }
- }
- static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
- {
-   int  start,end,i,m;
-   if (DOMAINDECOMP(cr)) {
-     start = 0;
-     end   = cr->dd->nat_home;
-   } else if (PARTDECOMP(cr)) {
-     pd_at_range(cr,&start,&end);
-   } else {
-     start = 0;
-     end   = n;
-   }
-   for(i=start; i<end; i++) {
-     for(m=0; m<DIM; m++) {
-       f[i][m] = (x1[i][m] - x2[i][m])*a;
-     }
-   }
- }
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <string.h>
 +#include <time.h>
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "string2.h"
 +#include "network.h"
 +#include "confio.h"
 +#include "copyrite.h"
 +#include "smalloc.h"
 +#include "nrnb.h"
 +#include "main.h"
 +#include "force.h"
 +#include "macros.h"
 +#include "random.h"
 +#include "names.h"
 +#include "gmx_fatal.h"
 +#include "txtdump.h"
 +#include "typedefs.h"
 +#include "update.h"
 +#include "constr.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "tgroup.h"
 +#include "mdebin.h"
 +#include "vsite.h"
 +#include "force.h"
 +#include "mdrun.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "trnio.h"
 +#include "sparsematrix.h"
 +#include "mtxio.h"
 +#include "mdatoms.h"
 +#include "ns.h"
 +#include "gmx_wallcycle.h"
 +#include "mtop_util.h"
 +#include "gmxfio.h"
 +#include "pme.h"
 +#include "membed.h"
 +
 +typedef struct {
 +  t_state s;
 +  rvec    *f;
 +  real    epot;
 +  real    fnorm;
 +  real    fmax;
 +  int     a_fmax;
 +} em_state_t;
 +
 +static em_state_t *init_em_state()
 +{
 +  em_state_t *ems;
 +  
 +  snew(ems,1);
 +
 +  return ems;
 +}
 +
 +static void print_em_start(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                           gmx_wallcycle_t wcycle,
 +                           const char *name)
 +{
 +    char buf[STRLEN];
 +
 +    runtime_start(runtime);
 +
 +    sprintf(buf,"Started %s",name);
 +    print_date_and_time(fplog,cr->nodeid,buf,NULL);
 +
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +static void em_time_end(FILE *fplog,t_commrec *cr,gmx_runtime_t *runtime,
 +                        gmx_wallcycle_t wcycle)
 +{
 +    wallcycle_stop(wcycle,ewcRUN);
 +
 +    runtime_end(runtime);
 +}
 +
 +static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
 +{
 +    fprintf(out,"\n");
 +    fprintf(out,"%s:\n",minimizer);
 +    fprintf(out,"   Tolerance (Fmax)   = %12.5e\n",ftol);
 +    fprintf(out,"   Number of steps    = %12d\n",nsteps);
 +}
 +
 +static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
 +{
 +    if (bLastStep)
 +    {
 +        fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
 +    }
 +    else
 +    {
 +        fprintf(fp,"\nStepsize too small, or no change in energy.\n"
 +                "Converged to machine precision,\n"
 +                "but not to the requested precision Fmax < %g\n",
 +                ftol);
 +        if (sizeof(real)<sizeof(double))
 +        {
 +            fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
 +        }
 +        if (bConstrain)
 +        {
 +            fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
 +                    "off constraints alltogether (set constraints = none in mdp file)\n");
 +        }
 +    }
 +}
 +
 +
 +
 +static void print_converged(FILE *fp,const char *alg,real ftol,
 +                          gmx_large_int_t count,gmx_bool bDone,gmx_large_int_t nsteps,
 +                          real epot,real fmax, int nfmax, real fnorm)
 +{
 +  char buf[STEPSTRSIZE];
 +
 +  if (bDone)
 +    fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
 +          alg,ftol,gmx_step_str(count,buf)); 
 +  else if(count<nsteps)
 +    fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
 +               "but did not reach the requested Fmax < %g.\n",
 +          alg,gmx_step_str(count,buf),ftol);
 +  else 
 +    fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
 +          alg,ftol,gmx_step_str(count,buf));
 +
 +#ifdef GMX_DOUBLE
 +  fprintf(fp,"Potential Energy  = %21.14e\n",epot); 
 +  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1); 
 +  fprintf(fp,"Norm of force     = %21.14e\n",fnorm); 
 +#else
 +  fprintf(fp,"Potential Energy  = %14.7e\n",epot); 
 +  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1); 
 +  fprintf(fp,"Norm of force     = %14.7e\n",fnorm); 
 +#endif
 +}
 +
 +static void get_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,rvec *f,
 +                         real *fnorm,real *fmax,int *a_fmax)
 +{
 +  double fnorm2,*sum;
 +  real fmax2,fmax2_0,fam;
 +  int  la_max,a_max,start,end,i,m,gf;
 +
 +  /* This routine finds the largest force and returns it.
 +   * On parallel machines the global max is taken.
 +   */
 +  fnorm2 = 0;
 +  fmax2 = 0;
 +  la_max = -1;
 +  gf = 0;
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +  if (mdatoms->cFREEZE) {
 +    for(i=start; i<end; i++) {
 +      gf = mdatoms->cFREEZE[i];
 +      fam = 0;
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m])
 +        fam += sqr(f[i][m]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  } else {
 +    for(i=start; i<end; i++) {
 +      fam = norm2(f[i]);
 +      fnorm2 += fam;
 +      if (fam > fmax2) {
 +      fmax2  = fam;
 +      la_max = i;
 +      }
 +    }
 +  }
 +
 +  if (la_max >= 0 && DOMAINDECOMP(cr)) {
 +    a_max = cr->dd->gatindex[la_max];
 +  } else {
 +    a_max = la_max;
 +  }
 +  if (PAR(cr)) {
 +    snew(sum,2*cr->nnodes+1);
 +    sum[2*cr->nodeid]   = fmax2;
 +    sum[2*cr->nodeid+1] = a_max;
 +    sum[2*cr->nnodes]   = fnorm2;
 +    gmx_sumd(2*cr->nnodes+1,sum,cr);
 +    fnorm2 = sum[2*cr->nnodes];
 +    /* Determine the global maximum */
 +    for(i=0; i<cr->nnodes; i++) {
 +      if (sum[2*i] > fmax2) {
 +      fmax2 = sum[2*i];
 +      a_max = (int)(sum[2*i+1] + 0.5);
 +      }
 +    }
 +    sfree(sum);
 +  }
 +
 +  if (fnorm)
 +    *fnorm = sqrt(fnorm2);
 +  if (fmax)
 +    *fmax  = sqrt(fmax2);
 +  if (a_fmax)
 +    *a_fmax = a_max;
 +}
 +
 +static void get_state_f_norm_max(t_commrec *cr,
 +                         t_grpopts *opts,t_mdatoms *mdatoms,
 +                         em_state_t *ems)
 +{
 +  get_f_norm_max(cr,opts,mdatoms,ems->f,&ems->fnorm,&ems->fmax,&ems->a_fmax);
 +}
 +
 +void init_em(FILE *fplog,const char *title,
 +             t_commrec *cr,t_inputrec *ir,
 +             t_state *state_global,gmx_mtop_t *top_global,
 +             em_state_t *ems,gmx_localtop_t **top,
 +             rvec **f,rvec **f_global,
 +             t_nrnb *nrnb,rvec mu_tot,
 +             t_forcerec *fr,gmx_enerdata_t **enerd,
 +             t_graph **graph,t_mdatoms *mdatoms,gmx_global_stat_t *gstat,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin)
 +{
 +    int  start,homenr,i;
 +    real dvdlambda;
 +    
 +    if (fplog)
 +    {
 +        fprintf(fplog,"Initiating %s\n",title);
 +    }
 +    
 +    state_global->ngtc = 0;
 +    
 +    /* Initiate some variables */
 +    if (ir->efep != efepNO)
 +    {
 +        state_global->lambda = ir->init_lambda;
 +    }
 +    else 
 +    {
 +        state_global->lambda = 0.0;
 +    }
 +    
 +    init_nrnb(nrnb);
 +    
 +    if (DOMAINDECOMP(cr))
 +    {
 +        *top = dd_init_local_top(top_global);
 +        
 +        dd_init_local_state(cr->dd,state_global,&ems->s);
 +
 +        *f = NULL;
 +        
 +        /* Distribute the charge groups over the nodes from the master node */
 +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
 +                            state_global,top_global,ir,
 +                            &ems->s,&ems->f,mdatoms,*top,
 +                            fr,vsite,NULL,constr,
 +                            nrnb,NULL,FALSE);
 +        dd_store_state(cr->dd,&ems->s);
 +        
 +        if (ir->nstfout)
 +        {
 +            snew(*f_global,top_global->natoms);
 +        }
 +        else
 +        {
 +            *f_global = NULL;
 +        }
 +        *graph = NULL;
 +    }
 +    else
 +    {
 +        snew(*f,top_global->natoms);
 +
 +        /* Just copy the state */
 +        ems->s = *state_global;
 +        snew(ems->s.x,ems->s.nalloc);
 +        snew(ems->f,ems->s.nalloc);
 +        for(i=0; i<state_global->natoms; i++)
 +        {
 +            copy_rvec(state_global->x[i],ems->s.x[i]);
 +        }
 +        copy_mat(state_global->box,ems->s.box);
 +        
 +        if (PAR(cr) && ir->eI != eiNM)
 +        {
 +            /* Initialize the particle decomposition and split the topology */
 +            *top = split_system(fplog,top_global,ir,cr);
 +            
 +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
 +        }
 +        else
 +        {
 +            *top = gmx_mtop_generate_local_top(top_global,ir);
 +        }
 +        *f_global = *f;
 +        
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
 +        {
 +            *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
 +        }
 +        else
 +        {
 +            *graph = NULL;
 +        }
 +
 +        if (PARTDECOMP(cr))
 +        {
 +            pd_at_range(cr,&start,&homenr);
 +            homenr -= start;
 +        }
 +        else
 +        {
 +            start  = 0;
 +            homenr = top_global->natoms;
 +        }
 +        atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
 +        update_mdatoms(mdatoms,state_global->lambda);
 +    
 +        if (vsite)
 +        {
 +            set_vsite_top(vsite,*top,mdatoms,cr);
 +        }
 +    }
 +    
 +    if (constr)
 +    {
 +        if (ir->eConstrAlg == econtSHAKE &&
 +            gmx_mtop_ftype_count(top_global,F_CONSTR) > 0)
 +        {
 +            gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
 +                      econstr_names[econtSHAKE],econstr_names[econtLINCS]);
 +        }
 +        
 +        if (!DOMAINDECOMP(cr))
 +        {
 +            set_constraints(constr,*top,ir,mdatoms,cr);
 +        }
 +
 +        if (!ir->bContinuation)
 +        {
 +            /* Constrain the starting coordinates */
 +            dvdlambda=0;
 +            constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
 +                      ir,NULL,cr,-1,0,mdatoms,
 +                      ems->s.x,ems->s.x,NULL,ems->s.box,
 +                      ems->s.lambda,&dvdlambda,
 +                      NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +        }
 +    }
 +    
 +    if (PAR(cr))
 +    {
 +        *gstat = global_stat_init(ir);
 +    }
 +    
 +    *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
 +
 +    snew(*enerd,1);
 +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
 +
 +    if (mdebin != NULL)
 +    {
 +        /* Init bin for energy stuff */
 +        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL); 
 +    }
 +
 +    clear_rvec(mu_tot);
 +    calc_shifts(ems->s.box,fr->shift_vec);
 +}
 +
 +static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
 +                      gmx_runtime_t *runtime,gmx_wallcycle_t wcycle)
 +{
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Tell the PME only node to finish */
 +    gmx_pme_finish(cr);
 +  }
 +
 +  done_mdoutf(outf);
 +
 +  em_time_end(fplog,cr,runtime,wcycle);
 +}
 +
 +static void swap_em_state(em_state_t *ems1,em_state_t *ems2)
 +{
 +  em_state_t tmp;
 +
 +  tmp   = *ems1;
 +  *ems1 = *ems2;
 +  *ems2 = tmp;
 +}
 +
 +static void copy_em_coords(em_state_t *ems,t_state *state)
 +{
 +    int i;
 +
 +    for(i=0; (i<state->natoms); i++)
 +    {
 +        copy_rvec(ems->s.x[i],state->x[i]);
 +    }
 +}
 +
 +static void write_em_traj(FILE *fplog,t_commrec *cr,
 +                          gmx_mdoutf_t *outf,
 +                          gmx_bool bX,gmx_bool bF,const char *confout,
 +                          gmx_mtop_t *top_global,
 +                          t_inputrec *ir,gmx_large_int_t step,
 +                          em_state_t *state,
 +                          t_state *state_global,rvec *f_global)
 +{
 +    int mdof_flags;
 +
 +    if ((bX || bF || confout != NULL) && !DOMAINDECOMP(cr))
 +    {
 +        copy_em_coords(state,state_global);
 +        f_global = state->f;
 +    }
 +    
 +    mdof_flags = 0;
 +    if (bX) { mdof_flags |= MDOF_X; }
 +    if (bF) { mdof_flags |= MDOF_F; }
 +    write_traj(fplog,cr,outf,mdof_flags,
 +               top_global,step,(double)step,
 +               &state->s,state_global,state->f,f_global,NULL,NULL);
 +    
 +    if (confout != NULL && MASTER(cr))
 +    {
 +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 +        {
 +            /* Make molecules whole only for confout writing */
 +            do_pbc_mtop(fplog,ir->ePBC,state_global->box,top_global,
 +                        state_global->x);
 +        }
 +
 +        write_sto_conf_mtop(confout,
 +                            *top_global->name,top_global,
 +                            state_global->x,NULL,ir->ePBC,state_global->box);
 +    }
 +}
 +
 +static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
 +                     em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
 +                     gmx_constr_t constr,gmx_localtop_t *top,
 +                     t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                     gmx_large_int_t count)
 +
 +{
 +  t_state *s1,*s2;
 +  int  start,end,gf,i,m;
 +  rvec *x1,*x2;
 +  real dvdlambda;
 +
 +  s1 = &ems1->s;
 +  s2 = &ems2->s;
 +
 +  if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 +    gmx_incons("state mismatch in do_em_step");
 +
 +  s2->flags = s1->flags;
 +
 +  if (s2->nalloc != s1->nalloc) {
 +    s2->nalloc = s1->nalloc;
 +    srenew(s2->x,s1->nalloc);
 +    srenew(ems2->f,  s1->nalloc);
 +    if (s2->flags & (1<<estCGP))
 +      srenew(s2->cg_p,  s1->nalloc);
 +  }
 +  
 +  s2->natoms = s1->natoms;
 +  s2->lambda = s1->lambda;
 +  copy_mat(s1->box,s2->box);
 +
 +  start = md->start;
 +  end   = md->start + md->homenr;
 +
 +  x1 = s1->x;
 +  x2 = s2->x;
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (md->cFREEZE)
 +      gf = md->cFREEZE[i];
 +    for(m=0; m<DIM; m++) {
 +      if (ir->opts.nFreeze[gf][m])
 +      x2[i][m] = x1[i][m];
 +      else
 +      x2[i][m] = x1[i][m] + a*f[i][m];
 +    }
 +  }
 +
 +  if (s2->flags & (1<<estCGP)) {
 +    /* Copy the CG p vector */
 +    x1 = s1->cg_p;
 +    x2 = s2->cg_p;
 +    for(i=start; i<end; i++)
 +      copy_rvec(x1[i],x2[i]);
 +  }
 +
 +  if (DOMAINDECOMP(cr)) {
 +    s2->ddp_count = s1->ddp_count;
 +    if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
 +      s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 +      srenew(s2->cg_gl,s2->cg_gl_nalloc);
 +    }
 +    s2->ncg_gl = s1->ncg_gl;
 +    for(i=0; i<s2->ncg_gl; i++)
 +      s2->cg_gl[i] = s1->cg_gl[i];
 +    s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 +  }
 +
 +  if (constr) {
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdlambda = 0;
 +    constrain(NULL,TRUE,TRUE,constr,&top->idef,       
 +              ir,NULL,cr,count,0,md,
 +              s1->x,s2->x,NULL,s2->box,s2->lambda,
 +              &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  }
 +}
 +
 +static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
 +                                   gmx_mtop_t *top_global,t_inputrec *ir,
 +                                   em_state_t *ems,gmx_localtop_t *top,
 +                                   t_mdatoms *mdatoms,t_forcerec *fr,
 +                                   gmx_vsite_t *vsite,gmx_constr_t constr,
 +                                   t_nrnb *nrnb,gmx_wallcycle_t wcycle)
 +{
 +    /* Repartition the domain decomposition */
 +    wallcycle_start(wcycle,ewcDOMDEC);
 +    dd_partition_system(fplog,step,cr,FALSE,1,
 +                        NULL,top_global,ir,
 +                        &ems->s,&ems->f,
 +                        mdatoms,top,fr,vsite,NULL,constr,
 +                        nrnb,wcycle,FALSE);
 +    dd_store_state(cr->dd,&ems->s);
 +    wallcycle_stop(wcycle,ewcDOMDEC);
 +}
 +    
 +static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
 +                            t_state *state_global,gmx_mtop_t *top_global,
 +                            em_state_t *ems,gmx_localtop_t *top,
 +                            t_inputrec *inputrec,
 +                            t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                            gmx_global_stat_t gstat,
 +                            gmx_vsite_t *vsite,gmx_constr_t constr,
 +                            t_fcdata *fcd,
 +                            t_graph *graph,t_mdatoms *mdatoms,
 +                            t_forcerec *fr,rvec mu_tot,
 +                            gmx_enerdata_t *enerd,tensor vir,tensor pres,
 +                            gmx_large_int_t count,gmx_bool bFirst)
 +{
 +  real t;
 +  gmx_bool bNS;
 +  int  nabnsb;
 +  tensor force_vir,shake_vir,ekin;
 +  real dvdl,prescorr,enercorr,dvdlcorr;
 +  real terminate=0;
 +  
 +  /* Set the time to the initial time, the time does not change during EM */
 +  t = inputrec->init_t;
 +
 +  if (bFirst ||
 +      (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count)) {
 +    /* This the first state or an old state used before the last ns */
 +    bNS = TRUE;
 +  } else {
 +    bNS = FALSE;
 +    if (inputrec->nstlist > 0) {
 +      bNS = TRUE;
 +    } else if (inputrec->nstlist == -1) {
 +      nabnsb = natoms_beyond_ns_buffer(inputrec,fr,&top->cgs,NULL,ems->s.x);
 +      if (PAR(cr))
 +      gmx_sumi(1,&nabnsb,cr);
 +      bNS = (nabnsb > 0);
 +    }
 +  }
 +
 +  if (vsite)
 +    construct_vsites(fplog,vsite,ems->s.x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,ems->s.box);
 +
 +  if (DOMAINDECOMP(cr)) {
 +    if (bNS) {
 +      /* Repartition the domain decomposition */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                           ems,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +  }
 +      
 +    /* Calc force & energy on new trial position  */
 +    /* do_force always puts the charge groups in the box and shifts again
 +     * We do not unshift, so molecules are always whole in congrad.c
 +     */
 +    do_force(fplog,cr,inputrec,
 +             count,nrnb,wcycle,top,top_global,&top_global->groups,
 +             ems->s.box,ems->s.x,&ems->s.hist,
 +             ems->f,force_vir,mdatoms,enerd,fcd,
 +             ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
 +             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
 +             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
 +      
 +  /* Clear the unused shake virial and pressure */
 +  clear_mat(shake_vir);
 +  clear_mat(pres);
 +
 +  /* Calculate long range corrections to pressure and energy */
 +  calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
 +                pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
 +  /* don't think these next 4 lines  can be moved in for now, because we 
 +     don't always want to write it -- figure out how to clean this up MRS 8/4/2009 */
 +  enerd->term[F_DISPCORR] = enercorr;
 +  enerd->term[F_EPOT] += enercorr;
 +  enerd->term[F_PRES] += prescorr;
 +  enerd->term[F_DVDL] += dvdlcorr;
 +
 +    /* Communicate stuff when parallel */
 +    if (PAR(cr) && inputrec->eI != eiNM)
 +    {
 +        wallcycle_start(wcycle,ewcMoveE);
 +
 +        global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
 +                    inputrec,NULL,NULL,NULL,1,&terminate,
 +                    top_global,&ems->s,FALSE,
 +                    CGLO_ENERGY | 
 +                    CGLO_PRESSURE | 
 +                    CGLO_CONSTRAINT | 
 +                    CGLO_FIRSTITERATE);
 +
 +        wallcycle_stop(wcycle,ewcMoveE);
 +    }
 +
 +  ems->epot = enerd->term[F_EPOT];
 +  
 +  if (constr) {
 +    /* Project out the constraint components of the force */
 +    wallcycle_start(wcycle,ewcCONSTR);
 +    dvdl = 0;
 +    constrain(NULL,FALSE,FALSE,constr,&top->idef,
 +              inputrec,NULL,cr,count,0,mdatoms,
 +              ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
 +              NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
 +    if (fr->bSepDVDL && fplog)
 +      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
 +    enerd->term[F_DHDL_CON] += dvdl;
 +    m_add(force_vir,shake_vir,vir);
 +    wallcycle_stop(wcycle,ewcCONSTR);
 +  } else {
 +    copy_mat(force_vir,vir);
 +  }
 +
 +  clear_mat(ekin);
 +  enerd->term[F_PRES] =
 +    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
 +            (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
 +
 +  sum_dhdl(enerd,ems->s.lambda,inputrec);
 +
 +    if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 +    {
 +        get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,ems);
 +    }
 +}
 +
 +static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                            gmx_mtop_t *mtop,
 +                            em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb,*fmg;
 +  t_block *cgs_gl;
 +  int ncg,*cg_gl,*index,c,cg,i,a0,a1,a,gf,m;
 +  double partsum;
 +  unsigned char *grpnrFREEZE;
 +
 +  if (debug)
 +    fprintf(debug,"Doing reorder_partsum\n");
 +
 +  fm = s_min->f;
 +  fb = s_b->f;
 +
 +  cgs_gl = dd_charge_groups_global(cr->dd);
 +  index = cgs_gl->index;
 +
 +  /* Collect fm in a global vector fmg.
 +   * This conflicts with the spirit of domain decomposition,
 +   * but to fully optimize this a much more complicated algorithm is required.
 +   */
 +  snew(fmg,mtop->natoms);
 +  
 +  ncg   = s_min->s.ncg_gl;
 +  cg_gl = s_min->s.cg_gl;
 +  i = 0;
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      copy_rvec(fm[i],fmg[a]);
 +      i++;
 +    }
 +  }
 +  gmx_sum(mtop->natoms*3,fmg[0],cr);
 +
 +  /* Now we will determine the part of the sum for the cgs in state s_b */
 +  ncg   = s_b->s.ncg_gl;
 +  cg_gl = s_b->s.cg_gl;
 +  partsum = 0;
 +  i = 0;
 +  gf = 0;
 +  grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 +  for(c=0; c<ncg; c++) {
 +    cg = cg_gl[c];
 +    a0 = index[cg];
 +    a1 = index[cg+1];
 +    for(a=a0; a<a1; a++) {
 +      if (mdatoms->cFREEZE && grpnrFREEZE) {
 +      gf = grpnrFREEZE[i];
 +      }
 +      for(m=0; m<DIM; m++) {
 +      if (!opts->nFreeze[gf][m]) {
 +        partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 +      }
 +      }
 +      i++;
 +    }
 +  }
 +  
 +  sfree(fmg);
 +
 +  return partsum;
 +}
 +
 +static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
 +                  gmx_mtop_t *mtop,
 +                  em_state_t *s_min,em_state_t *s_b)
 +{
 +  rvec *fm,*fb;
 +  double sum;
 +  int  gf,i,m;
 +
 +  /* This is just the classical Polak-Ribiere calculation of beta;
 +   * it looks a bit complicated since we take freeze groups into account,
 +   * and might have to sum it in parallel runs.
 +   */
 +  
 +  if (!DOMAINDECOMP(cr) ||
 +      (s_min->s.ddp_count == cr->dd->ddp_count &&
 +       s_b->s.ddp_count   == cr->dd->ddp_count)) {
 +    fm = s_min->f;
 +    fb = s_b->f;
 +    sum = 0;
 +    gf = 0;
 +    /* This part of code can be incorrect with DD,
 +     * since the atom ordering in s_b and s_min might differ.
 +     */
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++)
 +      if (!opts->nFreeze[gf][m]) {
 +        sum += (fb[i][m] - fm[i][m])*fb[i][m];
 +      } 
 +    }
 +  } else {
 +    /* We need to reorder cgs while summing */
 +    sum = reorder_partsum(cr,opts,mdatoms,mtop,s_min,s_b);
 +  }
 +  if (PAR(cr))
 +    gmx_sumd(1,&sum,cr);
 +
 +  return sum/sqr(s_min->fnorm);
 +}
 +
 +double do_cg(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_seed,
 +             gmx_membed_t *membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +  const char *CG="Polak-Ribiere Conjugate Gradients";
 +
 +  em_state_t *s_min,*s_a,*s_b,*s_c;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global,*p,*sf,*sfm;
 +  double gpa,gpb,gpc,tmp,sum[2],minstep;
 +  real   fnormn;
 +  real   stepsize;    
 +  real   a,b,c,beta=0.0;
 +  real   epot_repl=0;
 +  real   pnorm;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,foundlower;
 +  rvec   mu_tot;
 +  gmx_bool   do_log=FALSE,do_ene=FALSE,do_x,do_f;
 +  tensor vir,pres;
 +  int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
 +  gmx_mdoutf_t *outf;
 +  int    i,m,gf,step,nminstep;
 +  real   terminate=0;  
 +
 +  step=0;
 +
 +  s_min = init_em_state();
 +  s_a   = init_em_state();
 +  s_b   = init_em_state();
 +  s_c   = init_em_state();
 +
 +  /* Init em and store the local state in s_min */
 +  init_em(fplog,CG,cr,inputrec,
 +          state_global,top_global,s_min,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +  
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,CG);
 +  
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  if (MASTER(cr))
 +    sp_header(stderr,CG,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,CG,inputrec->em_tol,number_steps);
 +
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole in congrad.c
 +   */
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state_global,top_global,s_min,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +             mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 +             NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +    
 +    print_ebin_header(fplog,step,step,s_min->s.lambda);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +
 +  /* Estimate/guess the initial stepsize */
 +  stepsize = inputrec->em_stepsize/s_min->fnorm;
 + 
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",
 +          s_min->fmax,s_min->a_fmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",
 +          s_min->fnorm/sqrt(state_global->natoms));
 +    fprintf(fplog,"\n");
 +  }  
 +  /* Start the loop over CG steps.            
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
 +    
 +    /* start taking steps in a new direction 
 +     * First time we enter the routine, beta=0, and the direction is 
 +     * simply the negative gradient.
 +     */
 +
 +    /* Calculate the new direction in p, and the gradient in this direction, gpa */
 +    p  = s_min->s.cg_p;
 +    sf = s_min->f;
 +    gpa = 0;
 +    gf = 0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      if (mdatoms->cFREEZE) 
 +      gf = mdatoms->cFREEZE[i];
 +      for(m=0; m<DIM; m++) {
 +      if (!inputrec->opts.nFreeze[gf][m]) {
 +        p[i][m] = sf[i][m] + beta*p[i][m];
 +        gpa -= p[i][m]*sf[i][m];
 +        /* f is negative gradient, thus the sign */
 +      } else {
 +          p[i][m] = 0;
 +      }
 +      }
 +    }
 +    
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpa,cr);
 +
 +    /* Calculate the norm of the search vector */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
 +    
 +    /* Just in case stepsize reaches zero due to numerical precision... */
 +    if(stepsize<=0)     
 +      stepsize = inputrec->em_stepsize/pnorm;
 +    
 +    /* 
 +     * Double check the value of the derivative in the search direction.
 +     * If it is positive it must be due to the old information in the
 +     * CG formula, so just remove that and start over with beta=0.
 +     * This corresponds to a steepest descent step.
 +     */
 +    if(gpa>0) {
 +      beta = 0;
 +      step--; /* Don't count this step since we are restarting */
 +      continue; /* Go back to the beginning of the big for-loop */
 +    }
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm)
 +     * relative change in coordinate is smaller than precision
 +     */
 +    minstep=0;
 +    for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++) {
 +      tmp = fabs(s_min->s.x[i][m]);
 +      if(tmp < 1.0)
 +        tmp = 1.0;
 +      tmp = p[i][m]/tmp;
 +      minstep += tmp*tmp;
 +      }
 +    }
 +    /* Add up from all CPUs */
 +    if(PAR(cr))
 +      gmx_sumd(1,&minstep,cr);
 +
 +    minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
 +
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +    
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +    
 +    write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                  top_global,inputrec,step,
 +                  s_min,state_global,f_global);
 +    
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new CG step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next CG step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     * 
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik 
 +     */
 +    s_a->epot = s_min->epot;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +    
 +    if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
 +      em_dd_partition_system(fplog,step,cr,top_global,inputrec,
 +                           s_min,top,mdatoms,fr,vsite,constr,
 +                           nrnb,wcycle);
 +    }
 +
 +    /* Take a trial step (new coords in s_c) */
 +    do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
 +             constr,top,nrnb,wcycle,-1);
 +    
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_c,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,-1,FALSE);
 +    
 +    /* Calc derivative along line */
 +    p  = s_c->s.cg_p;
 +    sf = s_c->f;
 +    gpc=0;
 +    for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +      for(m=0; m<DIM; m++) 
 +        gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +
 +    /* This is the max amount of increase in energy we tolerate */
 +    tmp=sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
 +
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }    
 +
 +
 +
 +    
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +    if (!foundlower) {
 +      nminstep=0;
 +
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);                
 +      
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +      
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +        /* Reload the old state */
 +        em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
 +                               s_min,top,mdatoms,fr,vsite,constr,
 +                               nrnb,wcycle);
 +      }
 +
 +      /* Take a trial step to this new point - new coords in s_b */
 +      do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
 +                 constr,top,nrnb,wcycle,-1);
 +      
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state_global,top_global,s_b,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,-1,FALSE);
 +      
 +      /* p does not change within a step, but since the domain decomposition
 +       * might change, we have to use cg_p of s_b here.
 +       */
 +      p  = s_b->s.cg_p;
 +      sf = s_b->f;
 +      gpb=0;
 +      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
 +        for(m=0; m<DIM; m++)
 +            gpb -= p[i][m]*sf[i][m];   /* f is negative gradient, thus the sign */
 +      }
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +      
 +      if (debug)
 +        fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
 +                s_a->epot,s_b->epot,s_c->epot,gpb);
 +
 +      epot_repl = s_b->epot;
 +      
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if (gpb > 0) {
 +        /* Replace c endpoint with b */
 +        swap_em_state(s_b,s_c);
 +        c = b;
 +        gpc = gpb;
 +      } else {
 +        /* Replace a endpoint with b */
 +        swap_em_state(s_b,s_a);
 +        a = b;
 +        gpa = gpb;
 +      }
 +      
 +      /* 
 +       * Stop search as soon as we find a value smaller than the endpoints.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++;
 +      } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
 +             (nminstep < 20));     
 +      
 +      if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
 +        nminstep >= 20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If beta==0 this was steepest descent, and then we give up.
 +       * If not, set beta=0 and restart with steepest descent before quitting.
 +         */
 +      if (beta == 0.0) {
 +        /* Converged */
 +        converged = TRUE;
 +        break;
 +      } else {
 +        /* Reset memory before giving up */
 +        beta = 0.0;
 +        continue;
 +      }
 +      }
 +      
 +      /* Select min energy state of A & C, put the best in B.
 +       */
 +      if (s_c->epot < s_a->epot) {
 +      if (debug)
 +        fprintf(debug,"CGE: C (%f) is lower than A (%f), moving C to B\n",
 +                s_c->epot,s_a->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +      } else {
 +      if (debug)
 +        fprintf(debug,"CGE: A (%f) is lower than C (%f), moving A to B\n",
 +                s_a->epot,s_c->epot);
 +      swap_em_state(s_b,s_a);
 +      gpb = gpa;
 +      b = a;
 +      }
 +      
 +    } else {
 +      if (debug)
 +      fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
 +              s_c->epot);
 +      swap_em_state(s_b,s_c);
 +      gpb = gpc;
 +      b = c;
 +    }
 +    
 +    /* new search direction */
 +    /* beta = 0 means forget all memory and restart with steepest descents. */
 +    if (nstcg && ((step % nstcg)==0)) 
 +      beta = 0.0;
 +    else {
 +      /* s_min->fnorm cannot be zero, because then we would have converged
 +       * and broken out.
 +       */
 +
 +      /* Polak-Ribiere update.
 +       * Change to fnorm2/fnorm2_old for Fletcher-Reeves
 +       */
 +      beta = pr_beta(cr,&inputrec->opts,mdatoms,top_global,s_min,s_b);
 +    }
 +    /* Limit beta to prevent oscillations */
 +    if (fabs(beta) > 5.0)
 +      beta = 0.0;
 +    
 +    
 +    /* update positions */
 +    swap_em_state(s_min,s_b);
 +    gpa = gpb;
 +    
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,s_min->epot,s_min->fnorm/sqrt(state_global->natoms),
 +              s_min->fmax,s_min->a_fmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +      print_ebin_header(fplog,step,step,s_min->s.lambda);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +    
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */       
 +    converged = converged || (s_min->fmax < inputrec->em_tol);
 +    
 +  } /* End of the loop */
 +  
 +  if (converged)      
 +    step--; /* we never took that last step in this case */
 +  
 +    if (s_min->fmax > inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE; 
 +    }
 +  
 +  if (MASTER(cr)) {
 +    /* If we printed energy and/or logfile last step (which was the last step)
 +     * we don't have to do it again, but otherwise print the final values.
 +     */
 +    if(!do_log) {
 +      /* Write final value to log since we didn't do anything the last step */
 +      print_ebin_header(fplog,step,step,s_min->s.lambda);
 +    }
 +    if (!do_ene || !do_log) {
 +      /* Write final energy file entries */
 +      print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +               !do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +  }
 +
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +  
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */  
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
 +  
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                s_min,state_global,f_global);
 +  
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +  
 +  if (MASTER(cr)) {
 +    print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +  
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +  
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_lbfgs(FILE *fplog,t_commrec *cr,
 +                int nfile,const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst,int repl_ex_seed,
 +                gmx_membed_t *membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{
 +  static const char *LBFGS="Low-Memory BFGS Minimizer";
 +  em_state_t ems;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  rvec   *f_global;
 +  int    ncorr,nmaxcorr,point,cp,neval,nminstep;
 +  double stepsize,gpa,gpb,gpc,tmp,minstep;
 +  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;   
 +  real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
 +  real   a,b,c,maxdelta,delta;
 +  real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
 +  real   dgdx,dgdg,sq,yr,beta;
 +  t_mdebin   *mdebin;
 +  gmx_bool   converged,first;
 +  rvec   mu_tot;
 +  real   fnorm,fmax;
 +  gmx_bool   do_log,do_ene,do_x,do_f,foundlower,*frozen;
 +  tensor vir,pres;
 +  int    start,end,number_steps;
 +  gmx_mdoutf_t *outf;
 +  int    i,k,m,n,nfmax,gf,step;
 +  /* not used */
 +  real   terminate;
 +
 +  if (PAR(cr))
 +    gmx_fatal(FARGS,"Cannot do parallel L-BFGS Minimization - yet.\n");
 +  
 +  n = 3*state->natoms;
 +  nmaxcorr = inputrec->nbfgscorr;
 +  
 +  /* Allocate memory */
 +  /* Use pointers to real so we dont have to loop over both atoms and
 +   * dimensions all the time...
 +   * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
 +   * that point to the same memory.
 +   */
 +  snew(xa,n);
 +  snew(xb,n);
 +  snew(xc,n);
 +  snew(fa,n);
 +  snew(fb,n);
 +  snew(fc,n);
 +  snew(frozen,n);
 +
 +  snew(p,n); 
 +  snew(lastx,n); 
 +  snew(lastf,n); 
 +  snew(rho,nmaxcorr);
 +  snew(alpha,nmaxcorr);
 +  
 +  snew(dx,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dx[i],n);
 +  
 +  snew(dg,nmaxcorr);
 +  for(i=0;i<nmaxcorr;i++)
 +    snew(dg[i],n);
 +
 +  step = 0;
 +  neval = 0; 
 +
 +  /* Init em */
 +  init_em(fplog,LBFGS,cr,inputrec,
 +          state,top_global,&ems,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +  /* Do_lbfgs is not completely updated like do_steep and do_cg,
 +   * so we free some memory again.
 +   */
 +  sfree(ems.s.x);
 +  sfree(ems.f);
 +
 +  xx = (real *)state->x;
 +  ff = (real *)f;
 +
 +  start = mdatoms->start;
 +  end   = mdatoms->homenr + start;
 +    
 +  /* Print to log file */
 +  print_em_start(fplog,cr,runtime,wcycle,LBFGS);
 +  
 +  do_log = do_ene = do_x = do_f = TRUE;
 +  
 +  /* Max number of steps */
 +  number_steps=inputrec->nsteps;
 +
 +  /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
 +  gf = 0;
 +  for(i=start; i<end; i++) {
 +    if (mdatoms->cFREEZE)
 +      gf = mdatoms->cFREEZE[i];
 +     for(m=0; m<DIM; m++) 
 +       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];  
 +  }
 +  if (MASTER(cr))
 +    sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
 +  if (fplog)
 +    sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
 +  
 +  if (vsite)
 +    construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
 +                   top->idef.iparams,top->idef.il,
 +                   fr->ePBC,fr->bMolPBC,graph,cr,state->box);
 +  
 +  /* Call the force routine and some auxiliary (neighboursearching etc.) */
 +  /* do_force always puts the charge groups in the box and shifts again
 +   * We do not unshift, so molecules are always whole
 +   */
 +  neval++;
 +  ems.s.x = state->x;
 +  ems.f = f;
 +  evaluate_energy(fplog,bVerbose,cr,
 +                state,top_global,&ems,top,
 +                inputrec,nrnb,wcycle,gstat,
 +                vsite,constr,fcd,graph,mdatoms,fr,
 +                mu_tot,enerd,vir,pres,-1,TRUE);
 +  where();
 +      
 +  if (MASTER(cr)) {
 +    /* Copy stuff to the energy bin for easy printing etc. */
 +    upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +             mdatoms->tmass,enerd,state,state->box,
 +             NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +    
 +    print_ebin_header(fplog,step,step,state->lambda);
 +    print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  }
 +  where();
 +  
 +  /* This is the starting energy */
 +  Epot = enerd->term[F_EPOT];
 +  
 +  fnorm = ems.fnorm;
 +  fmax  = ems.fmax;
 +  nfmax = ems.a_fmax;
 +  
 +  /* Set the initial step.
 +   * since it will be multiplied by the non-normalized search direction 
 +   * vector (force vector the first time), we scale it by the
 +   * norm of the force.
 +   */
 +  
 +  if (MASTER(cr)) {
 +    fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(stderr,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(stderr,"\n");
 +    /* and copy to the log file too... */
 +    fprintf(fplog,"Using %d BFGS correction steps.\n\n",nmaxcorr);
 +    fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
 +    fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
 +    fprintf(fplog,"\n");
 +  }   
 +  
 +  point=0;
 +  for(i=0;i<n;i++)
 +    if(!frozen[i])
 +      dx[point][i] = ff[i];  /* Initial search direction */
 +    else
 +      dx[point][i] = 0;
 +
 +  stepsize = 1.0/fnorm;
 +  converged = FALSE;
 +  
 +  /* Start the loop over BFGS steps.          
 +   * Each successful step is counted, and we continue until
 +   * we either converge or reach the max number of steps.
 +   */
 +  
 +  ncorr=0;
 +
 +  /* Set the gradient from the force */
 +  converged = FALSE;
 +  for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
 +    
 +    /* Write coordinates if necessary */
 +    do_x = do_per_step(step,inputrec->nstxout);
 +    do_f = do_per_step(step,inputrec->nstfout);
 +    
 +    write_traj(fplog,cr,outf,MDOF_X | MDOF_F,
 +               top_global,step,(real)step,state,state,f,f,NULL,NULL);
 +
 +    /* Do the linesearching in the direction dx[point][0..(n-1)] */
 +    
 +    /* pointer to current direction - point=0 first time here */
 +    s=dx[point];
 +    
 +    /* calculate line gradient */
 +    for(gpa=0,i=0;i<n;i++) 
 +      gpa-=s[i]*ff[i];
 +
 +    /* Calculate minimum allowed stepsize, before the average (norm) 
 +     * relative change in coordinate is smaller than precision 
 +     */
 +    for(minstep=0,i=0;i<n;i++) {
 +      tmp=fabs(xx[i]);
 +      if(tmp<1.0)
 +      tmp=1.0;
 +      tmp = s[i]/tmp;
 +      minstep += tmp*tmp;
 +    }
 +    minstep = GMX_REAL_EPS/sqrt(minstep/n);
 +    
 +    if(stepsize<minstep) {
 +      converged=TRUE;
 +      break;
 +    }
 +    
 +    /* Store old forces and coordinates */
 +    for(i=0;i<n;i++) {
 +      lastx[i]=xx[i];
 +      lastf[i]=ff[i];
 +    }
 +    Epot0=Epot;
 +    
 +    first=TRUE;
 +    
 +    for(i=0;i<n;i++)
 +      xa[i]=xx[i];
 +    
 +    /* Take a step downhill.
 +     * In theory, we should minimize the function along this direction.
 +     * That is quite possible, but it turns out to take 5-10 function evaluations
 +     * for each line. However, we dont really need to find the exact minimum -
 +     * it is much better to start a new BFGS step in a modified direction as soon
 +     * as we are close to it. This will save a lot of energy evaluations.
 +     *
 +     * In practice, we just try to take a single step.
 +     * If it worked (i.e. lowered the energy), we increase the stepsize but
 +     * the continue straight to the next BFGS step without trying to find any minimum.
 +     * If it didn't work (higher energy), there must be a minimum somewhere between
 +     * the old position and the new one.
 +     * 
 +     * Due to the finite numerical accuracy, it turns out that it is a good idea
 +     * to even accept a SMALL increase in energy, if the derivative is still downhill.
 +     * This leads to lower final energies in the tests I've done. / Erik 
 +     */
 +    foundlower=FALSE;
 +    EpotA = Epot0;
 +    a = 0.0;
 +    c = a + stepsize; /* reference position along line is zero */
 +
 +    /* Check stepsize first. We do not allow displacements 
 +     * larger than emstep.
 +     */
 +    do {
 +      c = a + stepsize;
 +      maxdelta=0;
 +      for(i=0;i<n;i++) {
 +      delta=c*s[i];
 +      if(delta>maxdelta)
 +        maxdelta=delta;
 +      }
 +      if(maxdelta>inputrec->em_stepsize)
 +      stepsize*=0.1;
 +    } while(maxdelta>inputrec->em_stepsize);
 +
 +    /* Take a trial step */
 +    for (i=0; i<n; i++)
 +      xc[i] = lastx[i] + c*s[i];
 +    
 +    neval++;
 +    /* Calculate energy for the trial step */
 +    ems.s.x = (rvec *)xc;
 +    ems.f   = (rvec *)fc;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state,top_global,&ems,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,step,FALSE);
 +    EpotC = ems.epot;
 +    
 +    /* Calc derivative along line */
 +    for(gpc=0,i=0; i<n; i++) {
 +      gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
 +    }
 +    /* Sum the gradient along the line across CPUs */
 +    if (PAR(cr))
 +      gmx_sumd(1,&gpc,cr);
 +    
 +     /* This is the max amount of increase in energy we tolerate */
 +   tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
 +    
 +    /* Accept the step if the energy is lower, or if it is not significantly higher
 +     * and the line derivative is still negative.
 +     */
 +    if(EpotC<EpotA || (gpc<0 && EpotC<(EpotA+tmp))) {
 +      foundlower = TRUE;
 +      /* Great, we found a better energy. Increase step for next iteration
 +       * if we are still going down, decrease it otherwise
 +       */
 +      if(gpc<0)
 +      stepsize *= 1.618034;  /* The golden section */
 +      else
 +      stepsize *= 0.618034;  /* 1/golden section */
 +    } else {
 +      /* New energy is the same or higher. We will have to do some work
 +       * to find a smaller value in the interval. Take smaller step next time!
 +       */
 +      foundlower = FALSE;
 +      stepsize *= 0.618034;
 +    }    
 +    
 +    /* OK, if we didn't find a lower value we will have to locate one now - there must
 +     * be one in the interval [a=0,c].
 +     * The same thing is valid here, though: Don't spend dozens of iterations to find
 +     * the line minimum. We try to interpolate based on the derivative at the endpoints,
 +     * and only continue until we find a lower value. In most cases this means 1-2 iterations.
 +     *
 +     * I also have a safeguard for potentially really patological functions so we never
 +     * take more than 20 steps before we give up ...
 +     *
 +     * If we already found a lower value we just skip this step and continue to the update.
 +     */
 +
 +    if(!foundlower) {
 +     
 +      nminstep=0;
 +      do {
 +      /* Select a new trial point.
 +       * If the derivatives at points a & c have different sign we interpolate to zero,
 +       * otherwise just do a bisection.
 +       */
 +      
 +      if(gpa<0 && gpc>0)
 +        b = a + gpa*(a-c)/(gpc-gpa);
 +      else
 +        b = 0.5*(a+c);                
 +      
 +      /* safeguard if interpolation close to machine accuracy causes errors:
 +       * never go outside the interval
 +       */
 +      if(b<=a || b>=c)
 +        b = 0.5*(a+c);
 +      
 +      /* Take a trial step */
 +      for (i=0; i<n; i++) 
 +        xb[i] = lastx[i] + b*s[i];
 +      
 +      neval++;
 +      /* Calculate energy for the trial step */
 +      ems.s.x = (rvec *)xb;
 +      ems.f   = (rvec *)fb;
 +      evaluate_energy(fplog,bVerbose,cr,
 +                      state,top_global,&ems,top,
 +                      inputrec,nrnb,wcycle,gstat,
 +                      vsite,constr,fcd,graph,mdatoms,fr,
 +                      mu_tot,enerd,vir,pres,step,FALSE);
 +      EpotB = ems.epot;
 +      
 +      fnorm = ems.fnorm;
 +      
 +      for(gpb=0,i=0; i<n; i++) 
 +        gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
 +      
 +      /* Sum the gradient along the line across CPUs */
 +      if (PAR(cr))
 +        gmx_sumd(1,&gpb,cr);
 +      
 +      /* Keep one of the intervals based on the value of the derivative at the new point */
 +      if(gpb>0) {
 +        /* Replace c endpoint with b */
 +        EpotC = EpotB;
 +        c = b;
 +        gpc = gpb;
 +        /* swap coord pointers b/c */
 +        xtmp = xb; 
 +        ftmp = fb;
 +        xb = xc; 
 +        fb = fc;
 +        xc = xtmp;
 +        fc = ftmp;
 +      } else {
 +        /* Replace a endpoint with b */
 +        EpotA = EpotB;
 +        a = b;
 +        gpa = gpb;
 +        /* swap coord pointers a/b */
 +        xtmp = xb; 
 +        ftmp = fb;
 +        xb = xa; 
 +        fb = fa;
 +        xa = xtmp; 
 +        fa = ftmp;
 +      }
 +      
 +      /* 
 +       * Stop search as soon as we find a value smaller than the endpoints,
 +       * or if the tolerance is below machine precision.
 +       * Never run more than 20 steps, no matter what.
 +       */
 +      nminstep++; 
 +      } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
 +
 +      if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
 +      /* OK. We couldn't find a significantly lower energy.
 +       * If ncorr==0 this was steepest descent, and then we give up.
 +       * If not, reset memory to restart as steepest descent before quitting.
 +         */
 +      if(ncorr==0) {
 +      /* Converged */
 +        converged=TRUE;
 +        break;
 +      } else {
 +        /* Reset memory */
 +        ncorr=0;
 +        /* Search in gradient direction */
 +        for(i=0;i<n;i++)
 +          dx[point][i]=ff[i];
 +        /* Reset stepsize */
 +        stepsize = 1.0/fnorm;
 +        continue;
 +      }
 +      }
 +      
 +      /* Select min energy state of A & C, put the best in xx/ff/Epot
 +       */
 +      if(EpotC<EpotA) {
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xc[i];
 +        ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +      } else {
 +      Epot = EpotA;
 +      /* Use state A */
 +      for(i=0;i<n;i++) {
 +        xx[i]=xa[i];
 +        ff[i]=fa[i];
 +      }
 +      stepsize=a;
 +      }
 +      
 +    } else {
 +      /* found lower */
 +      Epot = EpotC;
 +      /* Use state C */
 +      for(i=0;i<n;i++) {
 +      xx[i]=xc[i];
 +      ff[i]=fc[i];
 +      }
 +      stepsize=c;
 +    }
 +
 +    /* Update the memory information, and calculate a new 
 +     * approximation of the inverse hessian 
 +     */
 +    
 +    /* Have new data in Epot, xx, ff */       
 +    if(ncorr<nmaxcorr)
 +      ncorr++;
 +
 +    for(i=0;i<n;i++) {
 +      dg[point][i]=lastf[i]-ff[i];
 +      dx[point][i]*=stepsize;
 +    }
 +    
 +    dgdg=0;
 +    dgdx=0;   
 +    for(i=0;i<n;i++) {
 +      dgdg+=dg[point][i]*dg[point][i];
 +      dgdx+=dg[point][i]*dx[point][i];
 +    }
 +    
 +    diag=dgdx/dgdg;
 +    
 +    rho[point]=1.0/dgdx;
 +    point++;
 +    
 +    if(point>=nmaxcorr)
 +      point=0;
 +    
 +    /* Update */
 +    for(i=0;i<n;i++)
 +      p[i]=ff[i];
 +    
 +    cp=point;
 +    
 +    /* Recursive update. First go back over the memory points */
 +    for(k=0;k<ncorr;k++) {
 +      cp--;
 +      if(cp<0) 
 +      cp=ncorr-1;
 +      
 +      sq=0;
 +      for(i=0;i<n;i++)
 +      sq+=dx[cp][i]*p[i];
 +      
 +      alpha[cp]=rho[cp]*sq;
 +      
 +      for(i=0;i<n;i++)
 +      p[i] -= alpha[cp]*dg[cp][i];            
 +    }
 +    
 +    for(i=0;i<n;i++)
 +      p[i] *= diag;
 +    
 +    /* And then go forward again */
 +    for(k=0;k<ncorr;k++) {
 +      yr = 0;
 +      for(i=0;i<n;i++)
 +      yr += p[i]*dg[cp][i];
 +      
 +      beta = rho[cp]*yr;          
 +      beta = alpha[cp]-beta;
 +      
 +      for(i=0;i<n;i++)
 +      p[i] += beta*dx[cp][i];
 +      
 +      cp++;   
 +      if(cp>=ncorr)
 +      cp=0;
 +    }
 +    
 +    for(i=0;i<n;i++)
 +      if(!frozen[i])
 +      dx[point][i] = p[i];
 +      else
 +      dx[point][i] = 0;
 +
 +    stepsize=1.0;
 +    
 +    /* Test whether the convergence criterion is met */
 +    get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
 +    
 +    /* Print it if necessary */
 +    if (MASTER(cr)) {
 +      if(bVerbose)
 +      fprintf(stderr,"\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
 +              step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
 +      /* Store the new (lower) energies */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
 +               mdatoms->tmass,enerd,state,state->box,
 +               NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      do_log = do_per_step(step,inputrec->nstlog);
 +      do_ene = do_per_step(step,inputrec->nstenergy);
 +      if(do_log)
 +      print_ebin_header(fplog,step,step,state->lambda);
 +      print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
 +               do_log ? fplog : NULL,step,step,eprNORMAL,
 +               TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +    }
 +    
 +    /* Stop when the maximum force lies below tolerance.
 +     * If we have reached machine precision, converged is already set to true.
 +     */
 +    
 +    converged = converged || (fmax < inputrec->em_tol);
 +    
 +  } /* End of the loop */
 +  
 +  if(converged)       
 +    step--; /* we never took that last step in this case */
 +  
 +    if(fmax>inputrec->em_tol)
 +    {
 +        if (MASTER(cr))
 +        {
 +            warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
 +            warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
 +        }
 +        converged = FALSE; 
 +    }
 +  
 +  /* If we printed energy and/or logfile last step (which was the last step)
 +   * we don't have to do it again, but otherwise print the final values.
 +   */
 +  if(!do_log) /* Write final value to log since we didn't do anythin last step */
 +    print_ebin_header(fplog,step,step,state->lambda);
 +  if(!do_ene || !do_log) /* Write final energy file entries */
 +    print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
 +             !do_log ? fplog : NULL,step,step,eprNORMAL,
 +             TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +  
 +  /* Print some stuff... */
 +  if (MASTER(cr))
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
 +  
 +  /* IMPORTANT!
 +   * For accurate normal mode calculation it is imperative that we
 +   * store the last conformation into the full precision binary trajectory.
 +   *
 +   * However, we should only do it if we did NOT already write this step
 +   * above (which we did if do_x or do_f was true).
 +   */  
 +  do_x = !do_per_step(step,inputrec->nstxout);
 +  do_f = !do_per_step(step,inputrec->nstfout);
 +  write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
 +                top_global,inputrec,step,
 +                &ems,state,f);
 +  
 +  if (MASTER(cr)) {
 +    print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +    print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
 +                  number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
 +    
 +    fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
 +  }
 +  
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +  /* To print the actual number of steps we needed somewhere */
 +  runtime->nsteps_done = step;
 +
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_steep(FILE *fplog,t_commrec *cr,
 +                int nfile, const t_filenm fnm[],
 +                const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +                int nstglobalcomm,
 +                gmx_vsite_t *vsite,gmx_constr_t constr,
 +                int stepout,
 +                t_inputrec *inputrec,
 +                gmx_mtop_t *top_global,t_fcdata *fcd,
 +                t_state *state_global,
 +                t_mdatoms *mdatoms,
 +                t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +                gmx_edsam_t ed,
 +                t_forcerec *fr,
 +                int repl_ex_nst,int repl_ex_seed,
 +                gmx_membed_t *membed,
 +                real cpt_period,real max_hours,
 +                const char *deviceOptions,
 +                unsigned long Flags,
 +                gmx_runtime_t *runtime)
 +{ 
 +  const char *SD="Steepest Descents";
 +  em_state_t *s_min,*s_try;
 +  rvec       *f_global;
 +  gmx_localtop_t *top;
 +  gmx_enerdata_t *enerd;
 +  rvec   *f;
 +  gmx_global_stat_t gstat;
 +  t_graph    *graph;
 +  real   stepsize,constepsize;
 +  real   ustep,dvdlambda,fnormn;
 +  gmx_mdoutf_t *outf;
 +  t_mdebin   *mdebin; 
 +  gmx_bool   bDone,bAbort,do_x,do_f; 
 +  tensor vir,pres; 
 +  rvec   mu_tot;
 +  int    nsteps;
 +  int    count=0; 
 +  int    steps_accepted=0; 
 +  /* not used */
 +  real   terminate=0;
 +
 +  s_min = init_em_state();
 +  s_try = init_em_state();
 +
 +  /* Init em and store the local state in s_try */
 +  init_em(fplog,SD,cr,inputrec,
 +          state_global,top_global,s_try,&top,&f,&f_global,
 +          nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +          nfile,fnm,&outf,&mdebin);
 +      
 +  /* Print to log file  */
 +  print_em_start(fplog,cr,runtime,wcycle,SD);
 +    
 +  /* Set variables for stepsize (in nm). This is the largest  
 +   * step that we are going to make in any direction. 
 +   */
 +  ustep = inputrec->em_stepsize; 
 +  stepsize = 0;
 +  
 +  /* Max number of steps  */
 +  nsteps = inputrec->nsteps; 
 +  
 +  if (MASTER(cr)) 
 +    /* Print to the screen  */
 +    sp_header(stderr,SD,inputrec->em_tol,nsteps);
 +  if (fplog)
 +    sp_header(fplog,SD,inputrec->em_tol,nsteps);
 +    
 +  /**** HERE STARTS THE LOOP ****
 +   * count is the counter for the number of steps 
 +   * bDone will be TRUE when the minimization has converged
 +   * bAbort will be TRUE when nsteps steps have been performed or when
 +   * the stepsize becomes smaller than is reasonable for machine precision
 +   */
 +  count  = 0;
 +  bDone  = FALSE;
 +  bAbort = FALSE;
 +  while( !bDone && !bAbort ) {
 +    bAbort = (nsteps >= 0) && (count == nsteps);
 +    
 +    /* set new coordinates, except for first step */
 +    if (count > 0) {
 +      do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
 +               constr,top,nrnb,wcycle,count);
 +    }
 +    
 +    evaluate_energy(fplog,bVerbose,cr,
 +                  state_global,top_global,s_try,top,
 +                  inputrec,nrnb,wcycle,gstat,
 +                  vsite,constr,fcd,graph,mdatoms,fr,
 +                  mu_tot,enerd,vir,pres,count,count==0);
 +       
 +    if (MASTER(cr))
 +      print_ebin_header(fplog,count,count,s_try->s.lambda);
 +
 +    if (count == 0)
 +      s_min->epot = s_try->epot + 1;
 +    
 +    /* Print it if necessary  */
 +    if (MASTER(cr)) {
 +      if (bVerbose) {
 +      fprintf(stderr,"Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
 +              count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
 +              (s_try->epot < s_min->epot) ? '\n' : '\r');
 +      }
 +      
 +      if (s_try->epot < s_min->epot) {
 +      /* Store the new (lower) energies  */
 +      upd_mdebin(mdebin,FALSE,FALSE,(double)count,
 +                 mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
 +                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
 +      print_ebin(outf->fp_ene,TRUE,
 +                 do_per_step(steps_accepted,inputrec->nstdisreout),
 +                 do_per_step(steps_accepted,inputrec->nstorireout),
 +                 fplog,count,count,eprNORMAL,TRUE,
 +                 mdebin,fcd,&(top_global->groups),&(inputrec->opts));
 +      fflush(fplog);
 +      }
 +    } 
 +    
 +    /* Now if the new energy is smaller than the previous...  
 +     * or if this is the first step!
 +     * or if we did random steps! 
 +     */
 +    
 +    if ( (count==0) || (s_try->epot < s_min->epot) ) {
 +      steps_accepted++; 
 +
 +      /* Test whether the convergence criterion is met...  */
 +      bDone = (s_try->fmax < inputrec->em_tol);
 +      
 +      /* Copy the arrays for force, positions and energy  */
 +      /* The 'Min' array always holds the coords and forces of the minimal 
 +       sampled energy  */
 +      swap_em_state(s_min,s_try);
 +      if (count > 0)
 +      ustep *= 1.2;
 +
 +      /* Write to trn, if necessary */
 +      do_x = do_per_step(steps_accepted,inputrec->nstxout);
 +      do_f = do_per_step(steps_accepted,inputrec->nstfout);
 +      write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
 +                    top_global,inputrec,count,
 +                    s_min,state_global,f_global);
 +    } 
 +    else {
 +      /* If energy is not smaller make the step smaller...  */
 +      ustep *= 0.5;
 +
 +      if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
 +      /* Reload the old state */
 +      em_dd_partition_system(fplog,count,cr,top_global,inputrec,
 +                             s_min,top,mdatoms,fr,vsite,constr,
 +                             nrnb,wcycle);
 +      }
 +    }
 +    
 +    /* Determine new step  */
 +    stepsize = ustep/s_min->fmax;
 +    
 +    /* Check if stepsize is too small, with 1 nm as a characteristic length */
 +#ifdef GMX_DOUBLE
 +        if (count == nsteps || ustep < 1e-12)
 +#else
 +        if (count == nsteps || ustep < 1e-6)
 +#endif
 +        {
 +            if (MASTER(cr))
 +            {
 +                warn_step(stderr,inputrec->em_tol,count==nsteps,constr!=NULL);
 +                warn_step(fplog ,inputrec->em_tol,count==nsteps,constr!=NULL);
 +            }
 +            bAbort=TRUE;
 +        }
 +    
 +    count++;
 +  } /* End of the loop  */
 +  
 +    /* Print some shit...  */
 +  if (MASTER(cr)) 
 +    fprintf(stderr,"\nwriting lowest energy coordinates.\n"); 
 +  write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
 +              top_global,inputrec,count,
 +              s_min,state_global,f_global);
 +
 +  fnormn = s_min->fnorm/sqrt(state_global->natoms);
 +
 +  if (MASTER(cr)) {
 +    print_converged(stderr,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +    print_converged(fplog,SD,inputrec->em_tol,count,bDone,nsteps,
 +                  s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
 +  }
 +
 +  finish_em(fplog,cr,outf,runtime,wcycle);
 +  
 +  /* To print the actual number of steps we needed somewhere */
 +  inputrec->nsteps=count;
 +
 +  runtime->nsteps_done = count;
 +  
 +  return 0;
 +} /* That's all folks */
 +
 +
 +double do_nm(FILE *fplog,t_commrec *cr,
 +             int nfile,const t_filenm fnm[],
 +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
 +             int nstglobalcomm,
 +             gmx_vsite_t *vsite,gmx_constr_t constr,
 +             int stepout,
 +             t_inputrec *inputrec,
 +             gmx_mtop_t *top_global,t_fcdata *fcd,
 +             t_state *state_global,
 +             t_mdatoms *mdatoms,
 +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +             gmx_edsam_t ed,
 +             t_forcerec *fr,
 +             int repl_ex_nst,int repl_ex_seed,
 +             gmx_membed_t *membed,
 +             real cpt_period,real max_hours,
 +             const char *deviceOptions,
 +             unsigned long Flags,
 +             gmx_runtime_t *runtime)
 +{
 +    const char *NM = "Normal Mode Analysis";
 +    gmx_mdoutf_t *outf;
 +    int        natoms,atom,d;
 +    int        nnodes,node;
 +    rvec       *f_global;
 +    gmx_localtop_t *top;
 +    gmx_enerdata_t *enerd;
 +    rvec       *f;
 +    gmx_global_stat_t gstat;
 +    t_graph    *graph;
 +    real       t,lambda;
 +    gmx_bool       bNS;
 +    tensor     vir,pres;
 +    rvec       mu_tot;
 +    rvec       *fneg,*dfdx;
 +    gmx_bool       bSparse; /* use sparse matrix storage format */
 +    size_t     sz;
 +    gmx_sparsematrix_t * sparse_matrix = NULL;
 +    real *     full_matrix             = NULL;
 +    em_state_t *   state_work;
 +      
 +    /* added with respect to mdrun */
 +    int        i,j,k,row,col;
 +    real       der_range=10.0*sqrt(GMX_REAL_EPS);
 +    real       x_min;
 +    real       fnorm,fmax;
 +    
 +    if (constr != NULL)
 +    {
 +        gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
 +    }
 +
 +    state_work = init_em_state();
 +    
 +    /* Init em and store the local state in state_minimum */
 +    init_em(fplog,NM,cr,inputrec,
 +            state_global,top_global,state_work,&top,
 +            &f,&f_global,
 +            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
 +            nfile,fnm,&outf,NULL);
 +    
 +    natoms = top_global->natoms;
 +    snew(fneg,natoms);
 +    snew(dfdx,natoms);
 +    
 +#ifndef GMX_DOUBLE
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,
 +                "NOTE: This version of Gromacs has been compiled in single precision,\n"
 +                "      which MIGHT not be accurate enough for normal mode analysis.\n"
 +                "      Gromacs now uses sparse matrix storage, so the memory requirements\n"
 +                "      are fairly modest even if you recompile in double precision.\n\n");
 +    }
 +#endif
 +    
 +    /* Check if we can/should use sparse storage format.
 +     *
 +     * Sparse format is only useful when the Hessian itself is sparse, which it
 +      * will be when we use a cutoff.    
 +      * For small systems (n<1000) it is easier to always use full matrix format, though.
 +      */
 +    if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
 +    {
 +        fprintf(stderr,"Non-cutoff electrostatics used, forcing full Hessian format.\n");
 +        bSparse = FALSE;
 +    }
 +    else if(top_global->natoms < 1000)
 +    {
 +        fprintf(stderr,"Small system size (N=%d), using full Hessian format.\n",top_global->natoms);
 +        bSparse = FALSE;
 +    }
 +    else
 +    {
 +        fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
 +        bSparse = TRUE;
 +    }
 +    
 +    sz = DIM*top_global->natoms;
 +    
 +    fprintf(stderr,"Allocating Hessian memory...\n\n");
 +
 +    if(bSparse)
 +    {
 +        sparse_matrix=gmx_sparsematrix_init(sz);
 +        sparse_matrix->compressed_symmetric = TRUE;
 +    }
 +    else
 +    {
 +        snew(full_matrix,sz*sz);
 +    }
 +    
 +    /* Initial values */
 +    t      = inputrec->init_t;
 +    lambda = inputrec->init_lambda;
 +    
 +    init_nrnb(nrnb);
 +    
 +    where();
 +    
 +    /* Write start time and temperature */
 +    print_em_start(fplog,cr,runtime,wcycle,NM);
 +
 +    /* fudge nr of steps to nr of atoms */
 +    inputrec->nsteps = natoms*2;
 +
 +    if (MASTER(cr)) 
 +    {
 +        fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
 +                *(top_global->name),(int)inputrec->nsteps);
 +    }
 +
 +    nnodes = cr->nnodes;
 +   
 +    /* Make evaluate_energy do a single node force calculation */
 +    cr->nnodes = 1;
 +    evaluate_energy(fplog,bVerbose,cr,
 +                    state_global,top_global,state_work,top,
 +                    inputrec,nrnb,wcycle,gstat,
 +                    vsite,constr,fcd,graph,mdatoms,fr,
 +                    mu_tot,enerd,vir,pres,-1,TRUE);
 +    cr->nnodes = nnodes;
 +
 +    /* if forces are not small, warn user */
 +    get_state_f_norm_max(cr,&(inputrec->opts),mdatoms,state_work);
 +
 +    if (MASTER(cr))
 +    {
 +        fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
 +        if (state_work->fmax > 1.0e-3) 
 +        {
 +            fprintf(stderr,"Maximum force probably not small enough to");
 +            fprintf(stderr," ensure that you are in an \nenergy well. ");
 +            fprintf(stderr,"Be aware that negative eigenvalues may occur");
 +            fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
 +        }
 +    }
 +    
 +    /***********************************************************
 +     *
 +     *      Loop over all pairs in matrix 
 +     * 
 +     *      do_force called twice. Once with positive and 
 +     *      once with negative displacement 
 +     *
 +     ************************************************************/
 +
 +    /* Steps are divided one by one over the nodes */
 +    for(atom=cr->nodeid; atom<natoms; atom+=nnodes) 
 +    {
 +        
 +        for (d=0; d<DIM; d++) 
 +        {
 +            x_min = state_work->s.x[atom][d];
 +
 +            state_work->s.x[atom][d] = x_min - der_range;
 +          
 +            /* Make evaluate_energy do a single node force calculation */
 +            cr->nnodes = 1;
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2,FALSE);
 +                      
 +            for(i=0; i<natoms; i++)
 +            {
 +                copy_rvec(state_work->f[i], fneg[i]);
 +            }
 +            
 +            state_work->s.x[atom][d] = x_min + der_range;
 +            
 +            evaluate_energy(fplog,bVerbose,cr,
 +                            state_global,top_global,state_work,top,
 +                            inputrec,nrnb,wcycle,gstat,
 +                            vsite,constr,fcd,graph,mdatoms,fr,
 +                            mu_tot,enerd,vir,pres,atom*2+1,FALSE);
 +            cr->nnodes = nnodes;
 +
 +            /* x is restored to original */
 +            state_work->s.x[atom][d] = x_min;
 +
 +            for(j=0; j<natoms; j++) 
 +            {
 +                for (k=0; (k<DIM); k++) 
 +                {
 +                    dfdx[j][k] =
 +                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
 +                }
 +            }
 +
 +            if (!MASTER(cr))
 +            {
 +#ifdef GMX_MPI
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +                MPI_Send(dfdx[0],natoms*DIM,mpi_type,MASTERNODE(cr),cr->nodeid,
 +                         cr->mpi_comm_mygroup);
 +#endif
 +            }
 +            else
 +            {
 +                for(node=0; (node<nnodes && atom+node<natoms); node++)
 +                {
 +                    if (node > 0)
 +                    {
 +#ifdef GMX_MPI
 +                        MPI_Status stat;
 +                        MPI_Recv(dfdx[0],natoms*DIM,mpi_type,node,node,
 +                                 cr->mpi_comm_mygroup,&stat);
 +#undef mpi_type
 +#endif
 +                    }
 +
 +                    row = (atom + node)*DIM + d;
 +
 +                    for(j=0; j<natoms; j++) 
 +                    {
 +                        for(k=0; k<DIM; k++) 
 +                        {
 +                            col = j*DIM + k;
 +                            
 +                            if (bSparse)
 +                            {
 +                                if (col >= row && dfdx[j][k] != 0.0)
 +                                {
 +                                    gmx_sparsematrix_increment_value(sparse_matrix,
 +                                                                     row,col,dfdx[j][k]);
 +                                }
 +                            }
 +                            else
 +                            {
 +                                full_matrix[row*sz+col] = dfdx[j][k];
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +            
 +            if (bVerbose && fplog)
 +            {
 +                fflush(fplog);            
 +            }
 +        }
 +        /* write progress */
 +        if (MASTER(cr) && bVerbose) 
 +        {
 +            fprintf(stderr,"\rFinished step %d out of %d",
 +                    min(atom+nnodes,natoms),natoms); 
 +            fflush(stderr);
 +        }
 +    }
 +    
 +    if (MASTER(cr)) 
 +    {
 +        fprintf(stderr,"\n\nWriting Hessian...\n");
 +        gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
 +    }
 +
 +    finish_em(fplog,cr,outf,runtime,wcycle);
 +
 +    runtime->nsteps_done = natoms*2;
 +    
 +    return 0;
 +}
Simple merge
index 87dc2e45e9b4aae1823eb15ebdbbaad07d1894d3,0000000000000000000000000000000000000000..2f5d2bc2a6efc39c04a57d478d58f5ea12c0ebfa
mode 100644,000000..100644
--- /dev/null
@@@ -1,4268 -1,0 +1,4230 @@@
-     fclose(fp);
-     fclose(fp2);
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + *
 + *                 G   R   O   M   A   C   S
 + *
 + *          GROningen MAchine for Chemical Simulations
 + *
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + *
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + *
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + *
 + * For more info, check our website at http://www.gromacs.org
 + *
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +/* IMPORTANT FOR DEVELOPERS:
 + *
 + * Triclinic pme stuff isn't entirely trivial, and we've experienced
 + * some bugs during development (many of them due to me). To avoid
 + * this in the future, please check the following things if you make
 + * changes in this file:
 + *
 + * 1. You should obtain identical (at least to the PME precision)
 + *    energies, forces, and virial for
 + *    a rectangular box and a triclinic one where the z (or y) axis is
 + *    tilted a whole box side. For instance you could use these boxes:
 + *
 + *    rectangular       triclinic
 + *     2  0  0           2  0  0
 + *     0  2  0           0  2  0
 + *     0  0  6           2  2  6
 + *
 + * 2. You should check the energy conservation in a triclinic box.
 + *
 + * It might seem an overkill, but better safe than sorry.
 + * /Erik 001109
 + */
 +
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#ifdef GMX_OPENMP
 +#include <omp.h>
 +#endif
 +
 +#include <stdio.h>
 +#include <string.h>
 +#include <math.h>
 +#include "typedefs.h"
 +#include "txtdump.h"
 +#include "vec.h"
 +#include "gmxcomplex.h"
 +#include "smalloc.h"
 +#include "futil.h"
 +#include "coulomb.h"
 +#include "gmx_fatal.h"
 +#include "pme.h"
 +#include "network.h"
 +#include "physics.h"
 +#include "nrnb.h"
 +#include "copyrite.h"
 +#include "gmx_wallcycle.h"
 +#include "gmx_parallel_3dfft.h"
 +#include "pdbio.h"
 +#include "gmx_cyclecounter.h"
 +#include "macros.h"
 +
 +#if ( !defined(GMX_DOUBLE) && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_X86_64_SSE2) ) )
 +#include "gmx_sse2_single.h"
 +
 +#define PME_SSE
 +/* Some old AMD processors could have problems with unaligned loads+stores */
 +#ifndef GMX_FAHCORE
 +#define PME_SSE_UNALIGNED
 +#endif
 +#endif
 +
 +#define DFT_TOL 1e-7
 +/* #define PRT_FORCE */
 +/* conditions for on the fly time-measurement */
 +/* #define TAKETIME (step > 1 && timesteps < 10) */
 +#define TAKETIME FALSE
 +
 +/* #define PME_TIME_THREADS */
 +
 +#ifdef GMX_DOUBLE
 +#define mpi_type MPI_DOUBLE
 +#else
 +#define mpi_type MPI_FLOAT
 +#endif
 +
 +/* GMX_CACHE_SEP should be a multiple of 16 to preserve alignment */
 +#define GMX_CACHE_SEP 64
 +
 +/* We only define a maximum to be able to use local arrays without allocation.
 + * An order larger than 12 should never be needed, even for test cases.
 + * If needed it can be changed here.
 + */
 +#define PME_ORDER_MAX 12
 +
 +/* Internal datastructures */
 +typedef struct {
 +    int send_index0;
 +    int send_nindex;
 +    int recv_index0;
 +    int recv_nindex;
 +} pme_grid_comm_t;
 +
 +typedef struct {
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +    int  nnodes,nodeid;
 +    int  *s2g0;
 +    int  *s2g1;
 +    int  noverlap_nodes;
 +    int  *send_id,*recv_id;
 +    pme_grid_comm_t *comm_data;
 +    real *sendbuf;
 +    real *recvbuf;
 +} pme_overlap_t;
 +
 +typedef struct {
 +    int *n;     /* Cumulative counts of the number of particles per thread */
 +    int nalloc; /* Allocation size of i */
 +    int *i;     /* Particle indices ordered on thread index (n) */
 +} thread_plist_t;
 +
 +typedef struct {
 +    int  n;
 +    int  *ind;
 +    splinevec theta;
 +    splinevec dtheta;
 +} splinedata_t;
 +
 +typedef struct {
 +    int  dimind;            /* The index of the dimension, 0=x, 1=y */
 +    int  nslab;
 +    int  nodeid;
 +#ifdef GMX_MPI
 +    MPI_Comm mpi_comm;
 +#endif
 +
 +    int  *node_dest;        /* The nodes to send x and q to with DD */
 +    int  *node_src;         /* The nodes to receive x and q from with DD */
 +    int  *buf_index;        /* Index for commnode into the buffers */
 +
 +    int  maxshift;
 +
 +    int  npd;
 +    int  pd_nalloc;
 +    int  *pd;
 +    int  *count;            /* The number of atoms to send to each node */
 +    int  **count_thread;
 +    int  *rcount;           /* The number of atoms to receive */
 +
 +    int  n;
 +    int  nalloc;
 +    rvec *x;
 +    real *q;
 +    rvec *f;
 +    gmx_bool bSpread;       /* These coordinates are used for spreading */
 +    int  pme_order;
 +    ivec *idx;
 +    rvec *fractx;            /* Fractional coordinate relative to the
 +                              * lower cell boundary
 +                              */
 +    int  nthread;
 +    int  *thread_idx;        /* Which thread should spread which charge */
 +    thread_plist_t *thread_plist;
 +    splinedata_t *spline;
 +} pme_atomcomm_t;
 +
 +#define FLBS  3
 +#define FLBSZ 4
 +
 +typedef struct {
 +    ivec ci;     /* The spatial location of this grid       */
 +    ivec n;      /* The size of *grid, including order-1    */
 +    ivec offset; /* The grid offset from the full node grid */
 +    int  order;  /* PME spreading order                     */
 +    real *grid;  /* The grid local thread, size n           */
 +} pmegrid_t;
 +
 +typedef struct {
 +    pmegrid_t grid;     /* The full node grid (non thread-local)            */
 +    int  nthread;       /* The number of threads operating on this grid     */
 +    ivec nc;            /* The local spatial decomposition over the threads */
 +    pmegrid_t *grid_th; /* Array of grids for each thread                   */
 +    int  **g2t;         /* The grid to thread index                         */
 +    ivec nthread_comm;  /* The number of threads to communicate with        */
 +} pmegrids_t;
 +
 +
 +typedef struct {
 +#ifdef PME_SSE
 +    /* Masks for SSE aligned spreading and gathering */
 +    __m128 mask_SSE0[6],mask_SSE1[6];
 +#else
 +    int dummy; /* C89 requires that struct has at least one member */
 +#endif
 +} pme_spline_work_t;
 +
 +typedef struct {
 +    /* work data for solve_pme */
 +    int      nalloc;
 +    real *   mhx;
 +    real *   mhy;
 +    real *   mhz;
 +    real *   m2;
 +    real *   denom;
 +    real *   tmp1_alloc;
 +    real *   tmp1;
 +    real *   eterm;
 +    real *   m2inv;
 +
 +    real     energy;
 +    matrix   vir;
 +} pme_work_t;
 +
 +typedef struct gmx_pme {
 +    int  ndecompdim;         /* The number of decomposition dimensions */
 +    int  nodeid;             /* Our nodeid in mpi->mpi_comm */
 +    int  nodeid_major;
 +    int  nodeid_minor;
 +    int  nnodes;             /* The number of nodes doing PME */
 +    int  nnodes_major;
 +    int  nnodes_minor;
 +
 +    MPI_Comm mpi_comm;
 +    MPI_Comm mpi_comm_d[2];  /* Indexed on dimension, 0=x, 1=y */
 +#ifdef GMX_MPI
 +    MPI_Datatype  rvec_mpi;  /* the pme vector's MPI type */
 +#endif
 +
 +    int  nthread;            /* The number of threads doing PME */
 +
 +    gmx_bool bPPnode;        /* Node also does particle-particle forces */
 +    gmx_bool bFEP;           /* Compute Free energy contribution */
 +    int nkx,nky,nkz;         /* Grid dimensions */
 +    int pme_order;
 +    real epsilon_r;
 +
 +    pmegrids_t pmegridA;  /* Grids on which we do spreading/interpolation, includes overlap */
 +    pmegrids_t pmegridB;
 +    /* The PME charge spreading grid sizes/strides, includes pme_order-1 */
 +    int     pmegrid_nx,pmegrid_ny,pmegrid_nz;
 +    /* pmegrid_nz might be larger than strictly necessary to ensure
 +     * memory alignment, pmegrid_nz_base gives the real base size.
 +     */
 +    int     pmegrid_nz_base;
 +    /* The local PME grid starting indices */
 +    int     pmegrid_start_ix,pmegrid_start_iy,pmegrid_start_iz;
 +
 +    /* Work data for spreading and gathering */
 +    pme_spline_work_t spline_work;
 +
 +    real *fftgridA;             /* Grids for FFT. With 1D FFT decomposition this can be a pointer */
 +    real *fftgridB;             /* inside the interpolation grid, but separate for 2D PME decomp. */
 +    int   fftgrid_nx,fftgrid_ny,fftgrid_nz;
 +
 +    t_complex *cfftgridA;             /* Grids for complex FFT data */
 +    t_complex *cfftgridB;
 +    int   cfftgrid_nx,cfftgrid_ny,cfftgrid_nz;
 +
 +    gmx_parallel_3dfft_t  pfft_setupA;
 +    gmx_parallel_3dfft_t  pfft_setupB;
 +
 +    int  *nnx,*nny,*nnz;
 +    real *fshx,*fshy,*fshz;
 +
 +    pme_atomcomm_t atc[2];  /* Indexed on decomposition index */
 +    matrix    recipbox;
 +    splinevec bsp_mod;
 +
 +    pme_overlap_t overlap[2]; /* Indexed on dimension, 0=x, 1=y */
 +
 +    pme_atomcomm_t atc_energy; /* Only for gmx_pme_calc_energy */
 +
 +    rvec *bufv;             /* Communication buffer */
 +    real *bufr;             /* Communication buffer */
 +    int  buf_nalloc;        /* The communication buffer size */
 +
 +    /* thread local work data for solve_pme */
 +    pme_work_t *work;
 +
 +    /* Work data for PME_redist */
 +    gmx_bool redist_init;
 +    int *    scounts;
 +    int *    rcounts;
 +    int *    sdispls;
 +    int *    rdispls;
 +    int *    sidx;
 +    int *    idxa;
 +    real *   redist_buf;
 +    int      redist_buf_nalloc;
 +
 +    /* Work data for sum_qgrid */
 +    real *   sum_qgrid_tmp;
 +    real *   sum_qgrid_dd_tmp;
 +} t_gmx_pme;
 +
 +
 +static void calc_interpolation_idx(gmx_pme_t pme,pme_atomcomm_t *atc,
 +                                   int start,int end,int thread)
 +{
 +    int  i;
 +    int  *idxptr,tix,tiy,tiz;
 +    real *xptr,*fptr,tx,ty,tz;
 +    real rxx,ryx,ryy,rzx,rzy,rzz;
 +    int  nx,ny,nz;
 +    int  start_ix,start_iy,start_iz;
 +    int  *g2tx,*g2ty,*g2tz;
 +    gmx_bool bThreads;
 +    int  *thread_idx=NULL;
 +    thread_plist_t *tpl=NULL;
 +    int  *tpl_n=NULL;
 +    int  thread_i;
 +
 +    nx  = pme->nkx;
 +    ny  = pme->nky;
 +    nz  = pme->nkz;
 +
 +    start_ix = pme->pmegrid_start_ix;
 +    start_iy = pme->pmegrid_start_iy;
 +    start_iz = pme->pmegrid_start_iz;
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    g2tx = pme->pmegridA.g2t[XX];
 +    g2ty = pme->pmegridA.g2t[YY];
 +    g2tz = pme->pmegridA.g2t[ZZ];
 +
 +    bThreads = (atc->nthread > 1);
 +    if (bThreads)
 +    {
 +        thread_idx = atc->thread_idx;
 +
 +        tpl   = &atc->thread_plist[thread];
 +        tpl_n = tpl->n;
 +        for(i=0; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] = 0;
 +        }
 +    }
 +
 +    for(i=start; i<end; i++) {
 +        xptr   = atc->x[i];
 +        idxptr = atc->idx[i];
 +        fptr   = atc->fractx[i];
 +
 +        /* Fractional coordinates along box vectors, add 2.0 to make 100% sure we are positive for triclinic boxes */
 +        tx = nx * ( xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx + 2.0 );
 +        ty = ny * (                  xptr[YY] * ryy + xptr[ZZ] * rzy + 2.0 );
 +        tz = nz * (                                   xptr[ZZ] * rzz + 2.0 );
 +
 +        tix = (int)(tx);
 +        tiy = (int)(ty);
 +        tiz = (int)(tz);
 +
 +        /* Because decomposition only occurs in x and y,
 +         * we never have a fraction correction in z.
 +         */
 +        fptr[XX] = tx - tix + pme->fshx[tix];
 +        fptr[YY] = ty - tiy + pme->fshy[tiy];
 +        fptr[ZZ] = tz - tiz;
 +
 +        idxptr[XX] = pme->nnx[tix];
 +        idxptr[YY] = pme->nny[tiy];
 +        idxptr[ZZ] = pme->nnz[tiz];
 +
 +#ifdef DEBUG
 +        range_check(idxptr[XX],0,pme->pmegrid_nx);
 +        range_check(idxptr[YY],0,pme->pmegrid_ny);
 +        range_check(idxptr[ZZ],0,pme->pmegrid_nz);
 +#endif
 +
 +        if (bThreads)
 +        {
 +            thread_i = g2tx[idxptr[XX]] + g2ty[idxptr[YY]] + g2tz[idxptr[ZZ]];
 +            thread_idx[i] = thread_i;
 +            tpl_n[thread_i]++;
 +        }
 +    }
 +
 +    if (bThreads)
 +    {
 +        /* Make a list of particle indices sorted on thread */
 +
 +        /* Get the cumulative count */
 +        for(i=1; i<atc->nthread; i++)
 +        {
 +            tpl_n[i] += tpl_n[i-1];
 +        }
 +        /* The current implementation distributes particles equally
 +         * over the threads, so we could actually allocate for that
 +         * in pme_realloc_atomcomm_things.
 +         */
 +        if (tpl_n[atc->nthread-1] > tpl->nalloc)
 +        {
 +            tpl->nalloc = over_alloc_large(tpl_n[atc->nthread-1]);
 +            srenew(tpl->i,tpl->nalloc);
 +        }
 +        /* Set tpl_n to the cumulative start */
 +        for(i=atc->nthread-1; i>=1; i--)
 +        {
 +            tpl_n[i] = tpl_n[i-1];
 +        }
 +        tpl_n[0] = 0;
 +
 +        /* Fill our thread local array with indices sorted on thread */
 +        for(i=start; i<end; i++)
 +        {
 +            tpl->i[tpl_n[atc->thread_idx[i]]++] = i;
 +        }
 +        /* Now tpl_n contains the cummulative count again */
 +    }
 +}
 +
 +static void make_thread_local_ind(pme_atomcomm_t *atc,
 +                                  int thread,splinedata_t *spline)
 +{
 +    int  n,t,i,start,end;
 +    thread_plist_t *tpl;
 +
 +    /* Combine the indices made by each thread into one index */
 +
 +    n = 0;
 +    start = 0;
 +    for(t=0; t<atc->nthread; t++)
 +    {
 +        tpl = &atc->thread_plist[t];
 +        /* Copy our part (start - end) from the list of thread t */
 +        if (thread > 0)
 +        {
 +            start = tpl->n[thread-1];
 +        }
 +        end = tpl->n[thread];
 +        for(i=start; i<end; i++)
 +        {
 +            spline->ind[n++] = tpl->i[i];
 +        }
 +    }
 +
 +    spline->n = n;
 +}
 +
 +
 +static void pme_calc_pidx(int start, int end,
 +                          matrix recipbox, rvec x[],
 +                          pme_atomcomm_t *atc, int *count)
 +{
 +    int  nslab,i;
 +    int  si;
 +    real *xptr,s;
 +    real rxx,ryx,rzx,ryy,rzy;
 +    int *pd;
 +
 +    /* Calculate PME task index (pidx) for each grid index.
 +     * Here we always assign equally sized slabs to each node
 +     * for load balancing reasons (the PME grid spacing is not used).
 +     */
 +
 +    nslab = atc->nslab;
 +    pd    = atc->pd;
 +
 +    /* Reset the count */
 +    for(i=0; i<nslab; i++)
 +    {
 +        count[i] = 0;
 +    }
 +
 +    if (atc->dimind == 0)
 +    {
 +        rxx = recipbox[XX][XX];
 +        ryx = recipbox[YY][XX];
 +        rzx = recipbox[ZZ][XX];
 +        /* Calculate the node index in x-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +    else
 +    {
 +        ryy = recipbox[YY][YY];
 +        rzy = recipbox[ZZ][YY];
 +        /* Calculate the node index in y-dimension */
 +        for(i=start; i<end; i++)
 +        {
 +            xptr   = x[i];
 +            /* Fractional coordinates along box vectors */
 +            s = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
 +            si = (int)(s + 2*nslab) % nslab;
 +            pd[i] = si;
 +            count[si]++;
 +        }
 +    }
 +}
 +
 +static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
 +                                  pme_atomcomm_t *atc)
 +{
 +    int nthread,thread,slab;
 +
 +    nthread = atc->nthread;
 +
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        pme_calc_pidx(natoms* thread   /nthread,
 +                      natoms*(thread+1)/nthread,
 +                      recipbox,x,atc,atc->count_thread[thread]);
 +    }
 +    /* Non-parallel reduction, since nslab is small */
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        for(slab=0; slab<atc->nslab; slab++)
 +        {
 +            atc->count_thread[0][slab] += atc->count_thread[thread][slab];
 +        }
 +    }
 +}
 +
 +static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
 +{
 +    int i,d;
 +
 +    srenew(spline->ind,atc->nalloc);
 +    /* Initialize the index to identity so it works without threads */
 +    for(i=0; i<atc->nalloc; i++)
 +    {
 +        spline->ind[i] = i;
 +    }
 +
 +    for(d=0;d<DIM;d++)
 +    {
 +        srenew(spline->theta[d] ,atc->pme_order*atc->nalloc);
 +        srenew(spline->dtheta[d],atc->pme_order*atc->nalloc);
 +    }
 +}
 +
 +static void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
 +{
 +    int nalloc_old,i,j,nalloc_tpl;
 +
 +    /* We have to avoid a NULL pointer for atc->x to avoid
 +     * possible fatal errors in MPI routines.
 +     */
 +    if (atc->n > atc->nalloc || atc->nalloc == 0)
 +    {
 +        nalloc_old = atc->nalloc;
 +        atc->nalloc = over_alloc_dd(max(atc->n,1));
 +
 +        if (atc->nslab > 1) {
 +            srenew(atc->x,atc->nalloc);
 +            srenew(atc->q,atc->nalloc);
 +            srenew(atc->f,atc->nalloc);
 +            for(i=nalloc_old; i<atc->nalloc; i++)
 +            {
 +                clear_rvec(atc->f[i]);
 +            }
 +        }
 +        if (atc->bSpread) {
 +            srenew(atc->fractx,atc->nalloc);
 +            srenew(atc->idx   ,atc->nalloc);
 +
 +            if (atc->nthread > 1)
 +            {
 +                srenew(atc->thread_idx,atc->nalloc);
 +            }
 +
 +            for(i=0; i<atc->nthread; i++)
 +            {
 +                pme_realloc_splinedata(&atc->spline[i],atc);
 +            }
 +        }
 +    }
 +}
 +
 +static void pmeredist_pd(gmx_pme_t pme, gmx_bool forw,
 +                         int n, gmx_bool bXF, rvec *x_f, real *charge,
 +                         pme_atomcomm_t *atc)
 +/* Redistribute particle data for PME calculation */
 +/* domain decomposition by x coordinate           */
 +{
 +    int *idxa;
 +    int i, ii;
 +
 +    if(FALSE == pme->redist_init) {
 +        snew(pme->scounts,atc->nslab);
 +        snew(pme->rcounts,atc->nslab);
 +        snew(pme->sdispls,atc->nslab);
 +        snew(pme->rdispls,atc->nslab);
 +        snew(pme->sidx,atc->nslab);
 +        pme->redist_init = TRUE;
 +    }
 +    if (n > pme->redist_buf_nalloc) {
 +        pme->redist_buf_nalloc = over_alloc_dd(n);
 +        srenew(pme->redist_buf,pme->redist_buf_nalloc*DIM);
 +    }
 +
 +    pme->idxa = atc->pd;
 +
 +#ifdef GMX_MPI
 +    if (forw && bXF) {
 +        /* forward, redistribution from pp to pme */
 +
 +        /* Calculate send counts and exchange them with other nodes */
 +        for(i=0; (i<atc->nslab); i++) pme->scounts[i]=0;
 +        for(i=0; (i<n); i++) pme->scounts[pme->idxa[i]]++;
 +        MPI_Alltoall( pme->scounts, 1, MPI_INT, pme->rcounts, 1, MPI_INT, atc->mpi_comm);
 +
 +        /* Calculate send and receive displacements and index into send
 +           buffer */
 +        pme->sdispls[0]=0;
 +        pme->rdispls[0]=0;
 +        pme->sidx[0]=0;
 +        for(i=1; i<atc->nslab; i++) {
 +            pme->sdispls[i]=pme->sdispls[i-1]+pme->scounts[i-1];
 +            pme->rdispls[i]=pme->rdispls[i-1]+pme->rcounts[i-1];
 +            pme->sidx[i]=pme->sdispls[i];
 +        }
 +        /* Total # of particles to be received */
 +        atc->n = pme->rdispls[atc->nslab-1] + pme->rcounts[atc->nslab-1];
 +
 +        pme_realloc_atomcomm_things(atc);
 +
 +        /* Copy particle coordinates into send buffer and exchange*/
 +        for(i=0; (i<n); i++) {
 +            ii=DIM*pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii+XX]=x_f[i][XX];
 +            pme->redist_buf[ii+YY]=x_f[i][YY];
 +            pme->redist_buf[ii+ZZ]=x_f[i][ZZ];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->x, pme->rcounts, pme->rdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +    }
 +    if (forw) {
 +        /* Copy charge into send buffer and exchange*/
 +        for(i=0; i<atc->nslab; i++) pme->sidx[i]=pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii=pme->sidx[pme->idxa[i]];
 +            pme->sidx[pme->idxa[i]]++;
 +            pme->redist_buf[ii]=charge[i];
 +        }
 +        MPI_Alltoallv(pme->redist_buf, pme->scounts, pme->sdispls, mpi_type,
 +                      atc->q, pme->rcounts, pme->rdispls, mpi_type,
 +                      atc->mpi_comm);
 +    }
 +    else { /* backward, redistribution from pme to pp */
 +        MPI_Alltoallv(atc->f, pme->rcounts, pme->rdispls, pme->rvec_mpi,
 +                      pme->redist_buf, pme->scounts, pme->sdispls,
 +                      pme->rvec_mpi, atc->mpi_comm);
 +
 +        /* Copy data from receive buffer */
 +        for(i=0; i<atc->nslab; i++)
 +            pme->sidx[i] = pme->sdispls[i];
 +        for(i=0; (i<n); i++) {
 +            ii = DIM*pme->sidx[pme->idxa[i]];
 +            x_f[i][XX] += pme->redist_buf[ii+XX];
 +            x_f[i][YY] += pme->redist_buf[ii+YY];
 +            x_f[i][ZZ] += pme->redist_buf[ii+ZZ];
 +            pme->sidx[pme->idxa[i]]++;
 +        }
 +    }
 +#endif
 +}
 +
 +static void pme_dd_sendrecv(pme_atomcomm_t *atc,
 +                            gmx_bool bBackward,int shift,
 +                            void *buf_s,int nbyte_s,
 +                            void *buf_r,int nbyte_r)
 +{
 +#ifdef GMX_MPI
 +    int dest,src;
 +    MPI_Status stat;
 +
 +    if (bBackward == FALSE) {
 +        dest = atc->node_dest[shift];
 +        src  = atc->node_src[shift];
 +    } else {
 +        dest = atc->node_src[shift];
 +        src  = atc->node_dest[shift];
 +    }
 +
 +    if (nbyte_s > 0 && nbyte_r > 0) {
 +        MPI_Sendrecv(buf_s,nbyte_s,MPI_BYTE,
 +                     dest,shift,
 +                     buf_r,nbyte_r,MPI_BYTE,
 +                     src,shift,
 +                     atc->mpi_comm,&stat);
 +    } else if (nbyte_s > 0) {
 +        MPI_Send(buf_s,nbyte_s,MPI_BYTE,
 +                 dest,shift,
 +                 atc->mpi_comm);
 +    } else if (nbyte_r > 0) {
 +        MPI_Recv(buf_r,nbyte_r,MPI_BYTE,
 +                 src,shift,
 +                 atc->mpi_comm,&stat);
 +    }
 +#endif
 +}
 +
 +static void dd_pmeredist_x_q(gmx_pme_t pme,
 +                             int n, gmx_bool bX, rvec *x, real *charge,
 +                             pme_atomcomm_t *atc)
 +{
 +    int *commnode,*buf_index;
 +    int nnodes_comm,i,nsend,local_pos,buf_pos,node,scount,rcount;
 +
 +    commnode  = atc->node_dest;
 +    buf_index = atc->buf_index;
 +
 +    nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +    nsend = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        buf_index[commnode[i]] = nsend;
 +        nsend += atc->count[commnode[i]];
 +    }
 +    if (bX) {
 +        if (atc->count[atc->nodeid] + nsend != n)
 +            gmx_fatal(FARGS,"%d particles communicated to PME node %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
 +                      "This usually means that your system is not well equilibrated.",
 +                      n - (atc->count[atc->nodeid] + nsend),
 +                      pme->nodeid,'x'+atc->dimind);
 +
 +        if (nsend > pme->buf_nalloc) {
 +            pme->buf_nalloc = over_alloc_dd(nsend);
 +            srenew(pme->bufv,pme->buf_nalloc);
 +            srenew(pme->bufr,pme->buf_nalloc);
 +        }
 +
 +        atc->n = atc->count[atc->nodeid];
 +        for(i=0; i<nnodes_comm; i++) {
 +            scount = atc->count[commnode[i]];
 +            /* Communicate the count */
 +            if (debug)
 +                fprintf(debug,"dimind %d PME node %d send to node %d: %d\n",
 +                        atc->dimind,atc->nodeid,commnode[i],scount);
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            &scount,sizeof(int),
 +                            &atc->rcount[i],sizeof(int));
 +            atc->n += atc->rcount[i];
 +        }
 +
 +        pme_realloc_atomcomm_things(atc);
 +    }
 +
 +    local_pos = 0;
 +    for(i=0; i<n; i++) {
 +        node = atc->pd[i];
 +        if (node == atc->nodeid) {
 +            /* Copy direct to the receive buffer */
 +            if (bX) {
 +                copy_rvec(x[i],atc->x[local_pos]);
 +            }
 +            atc->q[local_pos] = charge[i];
 +            local_pos++;
 +        } else {
 +            /* Copy to the send buffer */
 +            if (bX) {
 +                copy_rvec(x[i],pme->bufv[buf_index[node]]);
 +            }
 +            pme->bufr[buf_index[node]] = charge[i];
 +            buf_index[node]++;
 +        }
 +    }
 +
 +    buf_pos = 0;
 +    for(i=0; i<nnodes_comm; i++) {
 +        scount = atc->count[commnode[i]];
 +        rcount = atc->rcount[i];
 +        if (scount > 0 || rcount > 0) {
 +            if (bX) {
 +                /* Communicate the coordinates */
 +                pme_dd_sendrecv(atc,FALSE,i,
 +                                pme->bufv[buf_pos],scount*sizeof(rvec),
 +                                atc->x[local_pos],rcount*sizeof(rvec));
 +            }
 +            /* Communicate the charges */
 +            pme_dd_sendrecv(atc,FALSE,i,
 +                            pme->bufr+buf_pos,scount*sizeof(real),
 +                            atc->q+local_pos,rcount*sizeof(real));
 +            buf_pos   += scount;
 +            local_pos += atc->rcount[i];
 +        }
 +    }
 +}
 +
 +static void dd_pmeredist_f(gmx_pme_t pme, pme_atomcomm_t *atc,
 +                           int n, rvec *f,
 +                           gmx_bool bAddF)
 +{
 +  int *commnode,*buf_index;
 +  int nnodes_comm,local_pos,buf_pos,i,scount,rcount,node;
 +
 +  commnode  = atc->node_dest;
 +  buf_index = atc->buf_index;
 +
 +  nnodes_comm = min(2*atc->maxshift,atc->nslab-1);
 +
 +  local_pos = atc->count[atc->nodeid];
 +  buf_pos = 0;
 +  for(i=0; i<nnodes_comm; i++) {
 +    scount = atc->rcount[i];
 +    rcount = atc->count[commnode[i]];
 +    if (scount > 0 || rcount > 0) {
 +      /* Communicate the forces */
 +      pme_dd_sendrecv(atc,TRUE,i,
 +                      atc->f[local_pos],scount*sizeof(rvec),
 +                      pme->bufv[buf_pos],rcount*sizeof(rvec));
 +      local_pos += scount;
 +    }
 +    buf_index[commnode[i]] = buf_pos;
 +    buf_pos   += rcount;
 +  }
 +
 +    local_pos = 0;
 +    if (bAddF)
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Add from the local force array */
 +                rvec_inc(f[i],atc->f[local_pos]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Add from the receive buffer */
 +                rvec_inc(f[i],pme->bufv[buf_index[node]]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +    else
 +    {
 +        for(i=0; i<n; i++)
 +        {
 +            node = atc->pd[i];
 +            if (node == atc->nodeid)
 +            {
 +                /* Copy from the local force array */
 +                copy_rvec(atc->f[local_pos],f[i]);
 +                local_pos++;
 +            }
 +            else
 +            {
 +                /* Copy from the receive buffer */
 +                copy_rvec(pme->bufv[buf_index[node]],f[i]);
 +                buf_index[node]++;
 +            }
 +        }
 +    }
 +}
 +
 +#ifdef GMX_MPI
 +static void
 +gmx_sum_qgrid_dd(gmx_pme_t pme, real *grid, int direction)
 +{
 +    pme_overlap_t *overlap;
 +    int send_index0,send_nindex;
 +    int recv_index0,recv_nindex;
 +    MPI_Status stat;
 +    int i,j,k,ix,iy,iz,icnt;
 +    int ipulse,send_id,recv_id,datasize;
 +    real *p;
 +    real *sendptr,*recvptr;
 +
 +    /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
 +    overlap = &pme->overlap[1];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        /* Since we have already (un)wrapped the overlap in the z-dimension,
 +         * we only have to communicate 0 to nkz (not pmegrid_nz).
 +         */
 +        if (direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        }
 +
 +        /* Copy data to contiguous send buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy,
 +                    send_index0-pme->pmegrid_start_iy+send_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<send_nindex;j++)
 +            {
 +                iy = j + send_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    overlap->sendbuf[icnt++] = grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz];
 +                }
 +            }
 +        }
 +
 +        datasize      = pme->pmegrid_nx * pme->nkz;
 +
 +        MPI_Sendrecv(overlap->sendbuf,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     overlap->recvbuf,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* Get data from contiguous recv buffer */
 +        if (debug)
 +        {
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy,
 +                    recv_index0-pme->pmegrid_start_iy+recv_nindex);
 +        }
 +        icnt = 0;
 +        for(i=0;i<pme->pmegrid_nx;i++)
 +        {
 +            ix = i;
 +            for(j=0;j<recv_nindex;j++)
 +            {
 +                iy = j + recv_index0 - pme->pmegrid_start_iy;
 +                for(k=0;k<pme->nkz;k++)
 +                {
 +                    iz = k;
 +                    if(direction==GMX_SUM_QGRID_FORWARD)
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz] += overlap->recvbuf[icnt++];
 +                    }
 +                    else
 +                    {
 +                        grid[ix*(pme->pmegrid_ny*pme->pmegrid_nz)+iy*(pme->pmegrid_nz)+iz]  = overlap->recvbuf[icnt++];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* Major dimension is easier, no copying required,
 +     * but we might have to sum to separate array.
 +     * Since we don't copy, we have to communicate up to pmegrid_nz,
 +     * not nkz as for the minor direction.
 +     */
 +    overlap = &pme->overlap[0];
 +
 +    for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++)
 +    {
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            send_id = overlap->send_id[ipulse];
 +            recv_id = overlap->recv_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].send_index0;
 +            send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].recv_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recvptr   = overlap->recvbuf;
 +        }
 +        else
 +        {
 +            send_id = overlap->recv_id[ipulse];
 +            recv_id = overlap->send_id[ipulse];
 +            send_index0   = overlap->comm_data[ipulse].recv_index0;
 +            send_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +            recv_index0   = overlap->comm_data[ipulse].send_index0;
 +            recv_nindex   = overlap->comm_data[ipulse].send_nindex;
 +            recvptr   = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        }
 +
 +        sendptr       = grid + (send_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +        datasize      = pme->pmegrid_ny * pme->pmegrid_nz;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"PME send node %d %d -> %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,send_id,
 +                    pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix,
 +                    send_index0-pme->pmegrid_start_ix+send_nindex);
 +            fprintf(debug,"PME recv node %d %d <- %d grid start %d Communicating %d to %d\n",
 +                    pme->nodeid,overlap->nodeid,recv_id,
 +                    pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix,
 +                    recv_index0-pme->pmegrid_start_ix+recv_nindex);
 +        }
 +
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +
 +        /* ADD data from contiguous recv buffer */
 +        if(direction==GMX_SUM_QGRID_FORWARD)
 +        {
 +            p = grid + (recv_index0-pme->pmegrid_start_ix)*(pme->pmegrid_ny*pme->pmegrid_nz);
 +            for(i=0;i<recv_nindex*datasize;i++)
 +            {
 +                p[i] += overlap->recvbuf[i];
 +            }
 +        }
 +    }
 +}
 +#endif
 +
 +
 +static int
 +copy_pmegrid_to_fftgrid(gmx_pme_t pme, real *pmegrid, real *fftgrid)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     i,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    {
 +#ifdef DEBUG_PME
 +        FILE *fp,*fp2;
 +        char fn[STRLEN],format[STRLEN];
 +        real val;
 +        sprintf(fn,"pmegrid%d.pdb",pme->nodeid);
 +        fp = ffopen(fn,"w");
 +        sprintf(fn,"pmegrid%d.txt",pme->nodeid);
 +        fp2 = ffopen(fn,"w");
 +     sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
 +#endif
 +
 +    for(ix=0;ix<local_fft_ndata[XX];ix++)
 +    {
 +        for(iy=0;iy<local_fft_ndata[YY];iy++)
 +        {
 +            for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +            {
 +                pmeidx = ix*(local_pme_size[YY]*local_pme_size[ZZ])+iy*(local_pme_size[ZZ])+iz;
 +                fftidx = ix*(local_fft_size[YY]*local_fft_size[ZZ])+iy*(local_fft_size[ZZ])+iz;
 +                fftgrid[fftidx] = pmegrid[pmeidx];
 +#ifdef DEBUG_PME
 +                val = 100*pmegrid[pmeidx];
 +                if (pmegrid[pmeidx] != 0)
 +                fprintf(fp,format,"ATOM",pmeidx,"CA","GLY",' ',pmeidx,' ',
 +                        5.0*ix,5.0*iy,5.0*iz,1.0,val);
 +                if (pmegrid[pmeidx] != 0)
 +                    fprintf(fp2,"%-12s  %5d  %5d  %5d  %12.5e\n",
 +                            "qgrid",
 +                            pme->pmegrid_start_ix + ix,
 +                            pme->pmegrid_start_iy + iy,
 +                            pme->pmegrid_start_iz + iz,
 +                            pmegrid[pmeidx]);
 +#endif
 +            }
 +        }
 +    }
 +#ifdef DEBUG_PME
- static void alloc_real_aligned(int n,real **ptr_raw,real **ptr)
- {
-     snew(*ptr_raw,n+8);
-     *ptr = (real *) (((size_t) *ptr_raw + 16) & (~((size_t) 15)));
- }
++    ffclose(fp);
++    ffclose(fp2);
 +#endif
 +    }
 +    return 0;
 +}
 +
 +
 +static gmx_cycles_t omp_cyc_start()
 +{
 +    return gmx_cycles_read();
 +}
 +
 +static gmx_cycles_t omp_cyc_end(gmx_cycles_t c)
 +{
 +    return gmx_cycles_read() - c;
 +}
 +
 +
 +static int
 +copy_fftgrid_to_pmegrid(gmx_pme_t pme, const real *fftgrid, real *pmegrid,
 +                        int nthread,int thread)
 +{
 +    ivec    local_fft_ndata,local_fft_offset,local_fft_size;
 +    ivec    local_pme_size;
 +    int     ixy0,ixy1,ixy,ix,iy,iz;
 +    int     pmeidx,fftidx;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1;
 +    static double cs1=0;
 +    static int cnt=0;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    local_pme_size[0] = pme->pmegrid_nx;
 +    local_pme_size[1] = pme->pmegrid_ny;
 +    local_pme_size[2] = pme->pmegrid_nz;
 +
 +    /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
 +     the offset is identical, and the PME grid always has more data (due to overlap)
 +     */
 +    ixy0 = ((thread  )*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +    ixy1 = ((thread+1)*local_fft_ndata[XX]*local_fft_ndata[YY])/nthread;
 +
 +    for(ixy=ixy0;ixy<ixy1;ixy++)
 +    {
 +        ix = ixy/local_fft_ndata[YY];
 +        iy = ixy - ix*local_fft_ndata[YY];
 +
 +        pmeidx = (ix*local_pme_size[YY] + iy)*local_pme_size[ZZ];
 +        fftidx = (ix*local_fft_size[YY] + iy)*local_fft_size[ZZ];
 +        for(iz=0;iz<local_fft_ndata[ZZ];iz++)
 +        {
 +            pmegrid[pmeidx+iz] = fftgrid[fftidx+iz];
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("copy %.2f\n",cs1*1e-9);
 +    }
 +#endif
 +
 +    return 0;
 +}
 +
 +
 +static void
 +wrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix,iy,iz;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    /* Add periodic overlap in z */
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                    pmegrid[(ix*pny+iy)*pnz+nz+iz];
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                       pmegrid[(ix*pny+ny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[(ix*pny+iy)*pnz+iz] +=
 +                        pmegrid[((nx+ix)*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void
 +unwrap_periodic_pmegrid(gmx_pme_t pme, real *pmegrid)
 +{
 +    int     nx,ny,nz,pnx,pny,pnz,ny_x,overlap,ix;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    pnx = pme->pmegrid_nx;
 +    pny = pme->pmegrid_ny;
 +    pnz = pme->pmegrid_nz;
 +
 +    overlap = pme->pme_order - 1;
 +
 +    if (pme->nnodes_major == 1)
 +    {
 +        ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
 +
 +        for(ix=0; ix<overlap; ix++)
 +        {
 +            int iy,iz;
 +
 +            for(iy=0; iy<ny_x; iy++)
 +            {
 +                for(iz=0; iz<nz; iz++)
 +                {
 +                    pmegrid[((nx+ix)*pny+iy)*pnz+iz] =
 +                        pmegrid[(ix*pny+iy)*pnz+iz];
 +                }
 +            }
 +        }
 +    }
 +
 +    if (pme->nnodes_minor == 1)
 +    {
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +       for(ix=0; ix<pme->pmegrid_nx; ix++)
 +       {
 +           int iy,iz;
 +
 +           for(iy=0; iy<overlap; iy++)
 +           {
 +               for(iz=0; iz<nz; iz++)
 +               {
 +                   pmegrid[(ix*pny+ny+iy)*pnz+iz] =
 +                       pmegrid[(ix*pny+iy)*pnz+iz];
 +               }
 +           }
 +       }
 +    }
 +
 +    /* Copy periodic overlap in z */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +    for(ix=0; ix<pme->pmegrid_nx; ix++)
 +    {
 +        int iy,iz;
 +
 +        for(iy=0; iy<pme->pmegrid_ny; iy++)
 +        {
 +            for(iz=0; iz<overlap; iz++)
 +            {
 +                pmegrid[(ix*pny+iy)*pnz+nz+iz] =
 +                    pmegrid[(ix*pny+iy)*pnz+iz];
 +            }
 +        }
 +    }
 +}
 +
 +static void clear_grid(int nx,int ny,int nz,real *grid,
 +                       ivec fs,int *flag,
 +                       int fx,int fy,int fz,
 +                       int order)
 +{
 +    int nc,ncz;
 +    int fsx,fsy,fsz,gx,gy,gz,g0x,g0y,x,y,z;
 +    int flind;
 +
 +    nc  = 2 + (order - 2)/FLBS;
 +    ncz = 2 + (order - 2)/FLBSZ;
 +
 +    for(fsx=fx; fsx<fx+nc; fsx++)
 +    {
 +        for(fsy=fy; fsy<fy+nc; fsy++)
 +        {
 +            for(fsz=fz; fsz<fz+ncz; fsz++)
 +            {
 +                flind = (fsx*fs[YY] + fsy)*fs[ZZ] + fsz;
 +                if (flag[flind] == 0)
 +                {
 +                    gx = fsx*FLBS;
 +                    gy = fsy*FLBS;
 +                    gz = fsz*FLBSZ;
 +                    g0x = (gx*ny + gy)*nz + gz;
 +                    for(x=0; x<FLBS; x++)
 +                    {
 +                        g0y = g0x;
 +                        for(y=0; y<FLBS; y++)
 +                        {
 +                            for(z=0; z<FLBSZ; z++)
 +                            {
 +                                grid[g0y+z] = 0;
 +                            }
 +                            g0y += nz;
 +                        }
 +                        g0x += ny*nz;
 +                    }
 +
 +                    flag[flind] = 1;
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +/* This has to be a macro to enable full compiler optimization with xlC (and probably others too) */
 +#define DO_BSPLINE(order)                            \
 +for(ithx=0; (ithx<order); ithx++)                    \
 +{                                                    \
 +    index_x = (i0+ithx)*pny*pnz;                     \
 +    valx    = qn*thx[ithx];                          \
 +                                                     \
 +    for(ithy=0; (ithy<order); ithy++)                \
 +    {                                                \
 +        valxy    = valx*thy[ithy];                   \
 +        index_xy = index_x+(j0+ithy)*pnz;            \
 +                                                     \
 +        for(ithz=0; (ithz<order); ithz++)            \
 +        {                                            \
 +            index_xyz        = index_xy+(k0+ithz);   \
 +            grid[index_xyz] += valxy*thz[ithz];      \
 +        }                                            \
 +    }                                                \
 +}
 +
 +
 +static void spread_q_bsplines_thread(pmegrid_t *pmegrid,
 +                                     pme_atomcomm_t *atc, splinedata_t *spline,
 +                                     pme_spline_work_t *work)
 +{
 +
 +    /* spread charges from home atoms to local grid */
 +    real     *grid;
 +    pme_overlap_t *ol;
 +    int      b,i,nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int *    idxptr;
 +    int      order,norder,index_x,index_xy,index_xyz;
 +    real     valx,valxy,qn;
 +    real     *thx,*thy,*thz;
 +    int      localsize, bndsize;
 +    int      pnx,pny,pnz,ndatatot;
 +    int      offx,offy,offz;
 +
 +    pnx = pmegrid->n[XX];
 +    pny = pmegrid->n[YY];
 +    pnz = pmegrid->n[ZZ];
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    ndatatot = pnx*pny*pnz;
 +    grid = pmegrid->grid;
 +    for(i=0;i<ndatatot;i++)
 +    {
 +        grid[i] = 0;
 +    }
 +
 +    order = pmegrid->order;
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = atc->q[n];
 +
 +        if (qn != 0)
 +        {
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX] - offx;
 +            j0   = idxptr[YY] - offy;
 +            k0   = idxptr[ZZ] - offz;
 +
 +            thx = spline->theta[XX] + norder;
 +            thy = spline->theta[YY] + norder;
 +            thz = spline->theta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_SPREAD_SSE_ORDER4
 +#else
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_SPREAD_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_BSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_BSPLINE(order);
 +                break;
 +            }
 +        }
 +    }
 +}
 +
- static int solve_pme_yzx_wrapper(gmx_pme_t pme,t_complex *grid,
-                                  real ewaldcoeff,real vol,
-                                  gmx_bool bEnerVir,real *mesh_energy,matrix vir)
- {
-     int  nthread,thread;
-     int  nelements=0;
-     nthread = pme->nthread;
- #pragma omp parallel for num_threads(nthread) schedule(static)
-     for(thread=0; thread<nthread; thread++)
-     {
-         int n;
-         n = solve_pme_yzx(pme,grid,ewaldcoeff,vol,bEnerVir,nthread,thread);
-         if (thread == 0)
-         {
-             nelements = n;
-         }
-     }
-     if (bEnerVir)
-     {
-         get_pme_ener_vir(pme,nthread,mesh_energy,vir);
-     }
-     return nelements;
- }
 +static void set_grid_alignment(int *pmegrid_nz,int pme_order)
 +{
 +#ifdef PME_SSE
 +    if (pme_order == 5
 +#ifndef PME_SSE_UNALIGNED
 +        || pme_order == 4
 +#endif
 +        )
 +    {
 +        /* Round nz up to a multiple of 4 to ensure alignment */
 +        *pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
 +    }
 +#endif
 +}
 +
 +static void set_gridsize_alignment(int *gridsize,int pme_order)
 +{
 +#ifdef PME_SSE
 +#ifndef PME_SSE_UNALIGNED
 +    if (pme_order == 4)
 +    {
 +        /* Add extra elements to ensured aligned operations do not go
 +         * beyond the allocated grid size.
 +         * Note that for pme_order=5, the pme grid z-size alignment
 +         * ensures that we will not go beyond the grid size.
 +         */
 +         *gridsize += 4;
 +    }
 +#endif
 +#endif
 +}
 +
 +static void pmegrid_init(pmegrid_t *grid,
 +                         int cx, int cy, int cz,
 +                         int x0, int y0, int z0,
 +                         int x1, int y1, int z1,
 +                         gmx_bool set_alignment,
 +                         int pme_order,
 +                         real *ptr)
 +{
 +    int nz,gridsize;
 +
 +    grid->ci[XX] = cx;
 +    grid->ci[YY] = cy;
 +    grid->ci[ZZ] = cz;
 +    grid->offset[XX] = x0;
 +    grid->offset[YY] = y0;
 +    grid->offset[ZZ] = z0;
 +    grid->n[XX]      = x1 - x0 + pme_order - 1;
 +    grid->n[YY]      = y1 - y0 + pme_order - 1;
 +    grid->n[ZZ]      = z1 - z0 + pme_order - 1;
 +
 +    nz = grid->n[ZZ];
 +    set_grid_alignment(&nz,pme_order);
 +    if (set_alignment)
 +    {
 +        grid->n[ZZ] = nz;
 +    }
 +    else if (nz != grid->n[ZZ])
 +    {
 +        gmx_incons("pmegrid_init call with an unaligned z size");
 +    }
 +
 +    grid->order = pme_order;
 +    if (ptr == NULL)
 +    {
 +        gridsize = grid->n[XX]*grid->n[YY]*grid->n[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid->grid,gridsize,16);
 +    }
 +    else
 +    {
 +        grid->grid = ptr;
 +    }
 +}
 +
 +static int div_round_up(int enumerator,int denominator)
 +{
 +    return (enumerator + denominator - 1)/denominator;
 +}
 +
 +static void make_subgrid_division(const ivec n,int ovl,int nthread,
 +                                  ivec nsub)
 +{
 +    int gsize_opt,gsize;
 +    int nsx,nsy,nsz;
 +    char *env;
 +
 +    gsize_opt = -1;
 +    for(nsx=1; nsx<=nthread; nsx++)
 +    {
 +        if (nthread % nsx == 0)
 +        {
 +            for(nsy=1; nsy<=nthread; nsy++)
 +            {
 +                if (nsx*nsy <= nthread && nthread % (nsx*nsy) == 0)
 +                {
 +                    nsz = nthread/(nsx*nsy);
 +
 +                    /* Determine the number of grid points per thread */
 +                    gsize =
 +                        (div_round_up(n[XX],nsx) + ovl)*
 +                        (div_round_up(n[YY],nsy) + ovl)*
 +                        (div_round_up(n[ZZ],nsz) + ovl);
 +
 +                    /* Minimize the number of grids points per thread
 +                     * and, secondarily, the number of cuts in minor dimensions.
 +                     */
 +                    if (gsize_opt == -1 ||
 +                        gsize < gsize_opt ||
 +                        (gsize == gsize_opt &&
 +                         (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
 +                    {
 +                        nsub[XX] = nsx;
 +                        nsub[YY] = nsy;
 +                        nsub[ZZ] = nsz;
 +                        gsize_opt = gsize;
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    env = getenv("GMX_PME_THREAD_DIVISION");
 +    if (env != NULL)
 +    {
 +        sscanf(env,"%d %d %d",&nsub[XX],&nsub[YY],&nsub[ZZ]);
 +    }
 +
 +    if (nsub[XX]*nsub[YY]*nsub[ZZ] != nthread)
 +    {
 +        gmx_fatal(FARGS,"PME grid thread division (%d x %d x %d) does not match the total number of threads (%d)",nsub[XX],nsub[YY],nsub[ZZ],nthread);
 +    }
 +}
 +
 +static void pmegrids_init(pmegrids_t *grids,
 +                          int nx,int ny,int nz,int nz_base,
 +                          int pme_order,
 +                          int nthread,
 +                          int overlap_x,
 +                          int overlap_y)
 +{
 +    ivec n,n_base,g0,g1;
 +    int t,x,y,z,d,i,tfac;
 +    int max_comm_lines;
 +
 +    n[XX] = nx - (pme_order - 1);
 +    n[YY] = ny - (pme_order - 1);
 +    n[ZZ] = nz - (pme_order - 1);
 +
 +    copy_ivec(n,n_base);
 +    n_base[ZZ] = nz_base;
 +
 +    pmegrid_init(&grids->grid,0,0,0,0,0,0,n[XX],n[YY],n[ZZ],FALSE,pme_order,
 +                 NULL);
 +
 +    grids->nthread = nthread;
 +
 +    make_subgrid_division(n_base,pme_order-1,grids->nthread,grids->nc);
 +
 +    if (grids->nthread > 1)
 +    {
 +        ivec nst;
 +        int gridsize;
 +        real *grid_all;
 +
 +        for(d=0; d<DIM; d++)
 +        {
 +            nst[d] = div_round_up(n[d],grids->nc[d]) + pme_order - 1;
 +        }
 +        set_grid_alignment(&nst[ZZ],pme_order);
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"pmegrid thread local division: %d x %d x %d\n",
 +                    grids->nc[XX],grids->nc[YY],grids->nc[ZZ]);
 +            fprintf(debug,"pmegrid %d %d %d max thread pmegrid %d %d %d\n",
 +                    nx,ny,nz,
 +                    nst[XX],nst[YY],nst[ZZ]);
 +        }
 +
 +        snew(grids->grid_th,grids->nthread);
 +        t = 0;
 +        gridsize = nst[XX]*nst[YY]*nst[ZZ];
 +        set_gridsize_alignment(&gridsize,pme_order);
 +        snew_aligned(grid_all,
 +                     grids->nthread*gridsize+(grids->nthread+1)*GMX_CACHE_SEP,
 +                     16);
 +
 +        for(x=0; x<grids->nc[XX]; x++)
 +        {
 +            for(y=0; y<grids->nc[YY]; y++)
 +            {
 +                for(z=0; z<grids->nc[ZZ]; z++)
 +                {
 +                    pmegrid_init(&grids->grid_th[t],
 +                                 x,y,z,
 +                                 (n[XX]*(x  ))/grids->nc[XX],
 +                                 (n[YY]*(y  ))/grids->nc[YY],
 +                                 (n[ZZ]*(z  ))/grids->nc[ZZ],
 +                                 (n[XX]*(x+1))/grids->nc[XX],
 +                                 (n[YY]*(y+1))/grids->nc[YY],
 +                                 (n[ZZ]*(z+1))/grids->nc[ZZ],
 +                                 TRUE,
 +                                 pme_order,
 +                                 grid_all+GMX_CACHE_SEP+t*(gridsize+GMX_CACHE_SEP));
 +                    t++;
 +                }
 +            }
 +        }
 +    }
 +
 +    snew(grids->g2t,DIM);
 +    tfac = 1;
 +    for(d=DIM-1; d>=0; d--)
 +    {
 +        snew(grids->g2t[d],n[d]);
 +        t = 0;
 +        for(i=0; i<n[d]; i++)
 +        {
 +            /* The second check should match the parameters
 +             * of the pmegrid_init call above.
 +             */
 +            while (t + 1 < grids->nc[d] && i >= (n[d]*(t+1))/grids->nc[d])
 +            {
 +                t++;
 +            }
 +            grids->g2t[d][i] = t*tfac;
 +        }
 +
 +        tfac *= grids->nc[d];
 +
 +        switch (d)
 +        {
 +        case XX: max_comm_lines = overlap_x;     break;
 +        case YY: max_comm_lines = overlap_y;     break;
 +        case ZZ: max_comm_lines = pme_order - 1; break;
 +        }
 +        grids->nthread_comm[d] = 0;
 +        while ((n[d]*grids->nthread_comm[d])/grids->nc[d] < max_comm_lines)
 +        {
 +            grids->nthread_comm[d]++;
 +        }
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"pmegrid thread grid communication range in %c: %d\n",
 +                    'x'+d,grids->nthread_comm[d]);
 +        }
 +        /* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
 +         * work, but this is not a problematic restriction.
 +         */
 +        if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
 +        {
 +            gmx_fatal(FARGS,"Too many threads for PME (%d) compared to the number of grid lines, reduce the number of threads doing PME",grids->nthread);
 +        }
 +    }
 +}
 +
 +
 +static void pmegrids_destroy(pmegrids_t *grids)
 +{
 +    int t;
 +
 +    if (grids->grid.grid != NULL)
 +    {
 +        sfree(grids->grid.grid);
 +
 +        if (grids->nthread > 0)
 +        {
 +            for(t=0; t<grids->nthread; t++)
 +            {
 +                sfree(grids->grid_th[t].grid);
 +            }
 +            sfree(grids->grid_th);
 +        }
 +    }
 +}
 +
 +
 +static void realloc_work(pme_work_t *work,int nkx)
 +{
 +    if (nkx > work->nalloc)
 +    {
 +        work->nalloc = nkx;
 +        srenew(work->mhx  ,work->nalloc);
 +        srenew(work->mhy  ,work->nalloc);
 +        srenew(work->mhz  ,work->nalloc);
 +        srenew(work->m2   ,work->nalloc);
 +        srenew(work->denom,work->nalloc);
 +        /* Allocate an aligned pointer for SSE operations, including 3 extra
 +         * elements at the end since SSE operates on 4 elements at a time.
 +         */
 +        sfree_aligned(work->denom);
 +        sfree_aligned(work->tmp1);
 +        sfree_aligned(work->eterm);
 +        snew_aligned(work->denom,work->nalloc+3,16);
 +        snew_aligned(work->tmp1 ,work->nalloc+3,16);
 +        snew_aligned(work->eterm,work->nalloc+3,16);
 +        srenew(work->m2inv,work->nalloc);
 +    }
 +}
 +
 +
 +static void free_work(pme_work_t *work)
 +{
 +    sfree(work->mhx);
 +    sfree(work->mhy);
 +    sfree(work->mhz);
 +    sfree(work->m2);
 +    sfree_aligned(work->denom);
 +    sfree_aligned(work->tmp1);
 +    sfree_aligned(work->eterm);
 +    sfree(work->m2inv);
 +}
 +
 +
 +#ifdef PME_SSE
 +    /* Calculate exponentials through SSE in float precision */
 +inline static void calc_exponentials(int start, int end, real f, real *d_aligned, real *r_aligned, real *e_aligned)
 +{
 +    {
 +        const __m128 two = _mm_set_ps(2.0f,2.0f,2.0f,2.0f);
 +        __m128 f_sse;
 +        __m128 lu;
 +        __m128 tmp_d1,d_inv,tmp_r,tmp_e;
 +        int kx;
 +        f_sse = _mm_load1_ps(&f);
 +        for(kx=0; kx<end; kx+=4)
 +        {
 +            tmp_d1   = _mm_load_ps(d_aligned+kx);
 +            lu       = _mm_rcp_ps(tmp_d1);
 +            d_inv    = _mm_mul_ps(lu,_mm_sub_ps(two,_mm_mul_ps(lu,tmp_d1)));
 +            tmp_r    = _mm_load_ps(r_aligned+kx);
 +            tmp_r    = gmx_mm_exp_ps(tmp_r);
 +            tmp_e    = _mm_mul_ps(f_sse,d_inv);
 +            tmp_e    = _mm_mul_ps(tmp_e,tmp_r);
 +            _mm_store_ps(e_aligned+kx,tmp_e);
 +        }
 +    }
 +}
 +#else
 +inline static void calc_exponentials(int start, int end, real f, real *d, real *r, real *e)
 +{
 +    int kx;
 +    for(kx=start; kx<end; kx++)
 +    {
 +        d[kx] = 1.0/d[kx];
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        r[kx] = exp(r[kx]);
 +    }
 +    for(kx=start; kx<end; kx++)
 +    {
 +        e[kx] = f*r[kx]*d[kx];
 +    }
 +}
 +#endif
 +
 +
 +static int solve_pme_yzx(gmx_pme_t pme,t_complex *grid,
 +                         real ewaldcoeff,real vol,
 +                         gmx_bool bEnerVir,
 +                         int nthread,int thread)
 +{
 +    /* do recip sum over local cells in grid */
 +    /* y major, z middle, x minor or continuous */
 +    t_complex *p0;
 +    int     kx,ky,kz,maxkx,maxky,maxkz;
 +    int     nx,ny,nz,iyz0,iyz1,iyz,iy,iz,kxstart,kxend;
 +    real    mx,my,mz;
 +    real    factor=M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
 +    real    ets2,struct2,vfactor,ets2vf;
 +    real    d1,d2,energy=0;
 +    real    by,bz;
 +    real    virxx=0,virxy=0,virxz=0,viryy=0,viryz=0,virzz=0;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    pme_work_t *work;
 +    real    *mhx,*mhy,*mhz,*m2,*denom,*tmp1,*eterm,*m2inv;
 +    real    mhxk,mhyk,mhzk,m2k;
 +    real    corner_fac;
 +    ivec    complex_order;
 +    ivec    local_ndata,local_offset,local_size;
 +    real    elfac;
 +
 +    elfac = ONE_4PI_EPS0/pme->epsilon_r;
 +
 +    nx = pme->nkx;
 +    ny = pme->nky;
 +    nz = pme->nkz;
 +
 +    /* Dimensions should be identical for A/B grid, so we just use A here */
 +    gmx_parallel_3dfft_complex_limits(pme->pfft_setupA,
 +                                      complex_order,
 +                                      local_ndata,
 +                                      local_offset,
 +                                      local_size);
 +
 +    rxx = pme->recipbox[XX][XX];
 +    ryx = pme->recipbox[YY][XX];
 +    ryy = pme->recipbox[YY][YY];
 +    rzx = pme->recipbox[ZZ][XX];
 +    rzy = pme->recipbox[ZZ][YY];
 +    rzz = pme->recipbox[ZZ][ZZ];
 +
 +    maxkx = (nx+1)/2;
 +    maxky = (ny+1)/2;
 +    maxkz = nz/2+1;
 +
 +    work = &pme->work[thread];
 +    mhx   = work->mhx;
 +    mhy   = work->mhy;
 +    mhz   = work->mhz;
 +    m2    = work->m2;
 +    denom = work->denom;
 +    tmp1  = work->tmp1;
 +    eterm = work->eterm;
 +    m2inv = work->m2inv;
 +
 +    iyz0 = local_ndata[YY]*local_ndata[ZZ]* thread   /nthread;
 +    iyz1 = local_ndata[YY]*local_ndata[ZZ]*(thread+1)/nthread;
 +
 +    for(iyz=iyz0; iyz<iyz1; iyz++)
 +    {
 +        iy = iyz/local_ndata[ZZ];
 +        iz = iyz - iy*local_ndata[ZZ];
 +
 +        ky = iy + local_offset[YY];
 +
 +        if (ky < maxky)
 +        {
 +            my = ky;
 +        }
 +        else
 +        {
 +            my = (ky - ny);
 +        }
 +
 +        by = M_PI*vol*pme->bsp_mod[YY][ky];
 +
 +        kz = iz + local_offset[ZZ];
 +
 +        mz = kz;
 +
 +        bz = pme->bsp_mod[ZZ][kz];
 +
 +        /* 0.5 correction for corner points */
 +        corner_fac = 1;
 +        if (kz == 0 || kz == (nz+1)/2)
 +        {
 +            corner_fac = 0.5;
 +        }
 +
 +        p0 = grid + iy*local_size[ZZ]*local_size[XX] + iz*local_size[XX];
 +
 +        /* We should skip the k-space point (0,0,0) */
 +        if (local_offset[XX] > 0 || ky > 0 || kz > 0)
 +        {
 +            kxstart = local_offset[XX];
 +        }
 +        else
 +        {
 +            kxstart = local_offset[XX] + 1;
 +            p0++;
 +        }
 +        kxend = local_offset[XX] + local_ndata[XX];
 +
 +        if (bEnerVir)
 +        {
 +            /* More expensive inner loop, especially because of the storage
 +             * of the mh elements in array's.
 +             * Because x is the minor grid index, all mh elements
 +             * depend on kx for triclinic unit cells.
 +             */
 +
 +                /* Two explicit loops to avoid a conditional inside the loop */
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                mhx[kx]   = mhxk;
 +                mhy[kx]   = mhyk;
 +                mhz[kx]   = mhzk;
 +                m2[kx]    = m2k;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                m2inv[kx] = 1.0/m2[kx];
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +
 +                struct2 = 2.0*(d1*d1+d2*d2);
 +
 +                tmp1[kx] = eterm[kx]*struct2;
 +            }
 +
 +            for(kx=kxstart; kx<kxend; kx++)
 +            {
 +                ets2     = corner_fac*tmp1[kx];
 +                vfactor  = (factor*m2[kx] + 1.0)*2.0*m2inv[kx];
 +                energy  += ets2;
 +
 +                ets2vf   = ets2*vfactor;
 +                virxx   += ets2vf*mhx[kx]*mhx[kx] - ets2;
 +                virxy   += ets2vf*mhx[kx]*mhy[kx];
 +                virxz   += ets2vf*mhx[kx]*mhz[kx];
 +                viryy   += ets2vf*mhy[kx]*mhy[kx] - ets2;
 +                viryz   += ets2vf*mhy[kx]*mhz[kx];
 +                virzz   += ets2vf*mhz[kx]*mhz[kx] - ets2;
 +            }
 +        }
 +        else
 +        {
 +            /* We don't need to calculate the energy and the virial.
 +             * In this case the triclinic overhead is small.
 +             */
 +
 +            /* Two explicit loops to avoid a conditional inside the loop */
 +
 +            for(kx=kxstart; kx<maxkx; kx++)
 +            {
 +                mx = kx;
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            for(kx=maxkx; kx<kxend; kx++)
 +            {
 +                mx = (kx - nx);
 +
 +                mhxk      = mx * rxx;
 +                mhyk      = mx * ryx + my * ryy;
 +                mhzk      = mx * rzx + my * rzy + mz * rzz;
 +                m2k       = mhxk*mhxk + mhyk*mhyk + mhzk*mhzk;
 +                denom[kx] = m2k*bz*by*pme->bsp_mod[XX][kx];
 +                tmp1[kx]  = -factor*m2k;
 +            }
 +
 +            calc_exponentials(kxstart,kxend,elfac,denom,tmp1,eterm);
 +
 +            for(kx=kxstart; kx<kxend; kx++,p0++)
 +            {
 +                d1      = p0->re;
 +                d2      = p0->im;
 +
 +                p0->re  = d1*eterm[kx];
 +                p0->im  = d2*eterm[kx];
 +            }
 +        }
 +    }
 +
 +    if (bEnerVir)
 +    {
 +        /* Update virial with local values.
 +         * The virial is symmetric by definition.
 +         * this virial seems ok for isotropic scaling, but I'm
 +         * experiencing problems on semiisotropic membranes.
 +         * IS THAT COMMENT STILL VALID??? (DvdS, 2001/02/07).
 +         */
 +        work->vir[XX][XX] = 0.25*virxx;
 +        work->vir[YY][YY] = 0.25*viryy;
 +        work->vir[ZZ][ZZ] = 0.25*virzz;
 +        work->vir[XX][YY] = work->vir[YY][XX] = 0.25*virxy;
 +        work->vir[XX][ZZ] = work->vir[ZZ][XX] = 0.25*virxz;
 +        work->vir[YY][ZZ] = work->vir[ZZ][YY] = 0.25*viryz;
 +
 +        /* This energy should be corrected for a charged system */
 +        work->energy = 0.5*energy;
 +    }
 +
 +    /* Return the loop count */
 +    return local_ndata[YY]*local_ndata[XX];
 +}
 +
 +static void get_pme_ener_vir(const gmx_pme_t pme,int nthread,
 +                             real *mesh_energy,matrix vir)
 +{
 +    /* This function sums output over threads
 +     * and should therefore only be called after thread synchronization.
 +     */
 +    int thread;
 +
 +    *mesh_energy = pme->work[0].energy;
 +    copy_mat(pme->work[0].vir,vir);
 +
 +    for(thread=1; thread<nthread; thread++)
 +    {
 +        *mesh_energy += pme->work[thread].energy;
 +        m_add(vir,pme->work[thread].vir,vir);
 +    }
 +}
 +
 +#define DO_FSPLINE(order)                      \
 +for(ithx=0; (ithx<order); ithx++)              \
 +{                                              \
 +    index_x = (i0+ithx)*pny*pnz;               \
 +    tx      = thx[ithx];                       \
 +    dx      = dthx[ithx];                      \
 +                                               \
 +    for(ithy=0; (ithy<order); ithy++)          \
 +    {                                          \
 +        index_xy = index_x+(j0+ithy)*pnz;      \
 +        ty       = thy[ithy];                  \
 +        dy       = dthy[ithy];                 \
 +        fxy1     = fz1 = 0;                    \
 +                                               \
 +        for(ithz=0; (ithz<order); ithz++)      \
 +        {                                      \
 +            gval  = grid[index_xy+(k0+ithz)];  \
 +            fxy1 += thz[ithz]*gval;            \
 +            fz1  += dthz[ithz]*gval;           \
 +        }                                      \
 +        fx += dx*ty*fxy1;                      \
 +        fy += tx*dy*fxy1;                      \
 +        fz += tx*ty*fz1;                       \
 +    }                                          \
 +}
 +
 +
 +void gather_f_bsplines(gmx_pme_t pme,real *grid,
 +                       gmx_bool bClearF,pme_atomcomm_t *atc,
 +                       splinedata_t *spline,
 +                       real scale)
 +{
 +    /* sum forces for local particles */
 +    int     nn,n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int     nx,ny,nz,pnx,pny,pnz;
 +    int *   idxptr;
 +    real    tx,ty,dx,dy,qn;
 +    real    fx,fy,fz,gval;
 +    real    fxy1,fz1;
 +    real    *thx,*thy,*thz,*dthx,*dthy,*dthz;
 +    int     norder;
 +    real    rxx,ryx,ryy,rzx,rzy,rzz;
 +    int     order;
 +
 +    pme_spline_work_t *work;
 +
 +    work = &pme->spline_work;
 +
 +    order = pme->pme_order;
 +    thx   = spline->theta[XX];
 +    thy   = spline->theta[YY];
 +    thz   = spline->theta[ZZ];
 +    dthx  = spline->dtheta[XX];
 +    dthy  = spline->dtheta[YY];
 +    dthz  = spline->dtheta[ZZ];
 +    nx    = pme->nkx;
 +    ny    = pme->nky;
 +    nz    = pme->nkz;
 +    pnx   = pme->pmegrid_nx;
 +    pny   = pme->pmegrid_ny;
 +    pnz   = pme->pmegrid_nz;
 +
 +    rxx   = pme->recipbox[XX][XX];
 +    ryx   = pme->recipbox[YY][XX];
 +    ryy   = pme->recipbox[YY][YY];
 +    rzx   = pme->recipbox[ZZ][XX];
 +    rzy   = pme->recipbox[ZZ][YY];
 +    rzz   = pme->recipbox[ZZ][ZZ];
 +
 +    for(nn=0; nn<spline->n; nn++)
 +    {
 +        n  = spline->ind[nn];
 +        qn = atc->q[n];
 +
 +        if (bClearF)
 +        {
 +            atc->f[n][XX] = 0;
 +            atc->f[n][YY] = 0;
 +            atc->f[n][ZZ] = 0;
 +        }
 +        if (qn != 0)
 +        {
 +            fx     = 0;
 +            fy     = 0;
 +            fz     = 0;
 +            idxptr = atc->idx[n];
 +            norder = nn*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next six statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +            dthx = spline->dtheta[XX] + norder;
 +            dthy = spline->dtheta[YY] + norder;
 +            dthz = spline->dtheta[ZZ] + norder;
 +
 +            switch (order) {
 +            case 4:
 +#ifdef PME_SSE
 +#ifdef PME_SSE_UNALIGNED
 +#define PME_GATHER_F_SSE_ORDER4
 +#else
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 4
 +#endif
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(4);
 +#endif
 +                break;
 +            case 5:
 +#ifdef PME_SSE
 +#define PME_GATHER_F_SSE_ALIGNED
 +#define PME_ORDER 5
 +#include "pme_sse_single.h"
 +#else
 +                DO_FSPLINE(5);
 +#endif
 +                break;
 +            default:
 +                DO_FSPLINE(order);
 +                break;
 +            }
 +
 +            atc->f[n][XX] += -qn*( fx*nx*rxx );
 +            atc->f[n][YY] += -qn*( fx*nx*ryx + fy*ny*ryy );
 +            atc->f[n][ZZ] += -qn*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
 +        }
 +    }
 +    /* Since the energy and not forces are interpolated
 +     * the net force might not be exactly zero.
 +     * This can be solved by also interpolating F, but
 +     * that comes at a cost.
 +     * A better hack is to remove the net force every
 +     * step, but that must be done at a higher level
 +     * since this routine doesn't see all atoms if running
 +     * in parallel. Don't know how important it is?  EL 990726
 +     */
 +}
 +
 +
 +static real gather_energy_bsplines(gmx_pme_t pme,real *grid,
 +                                   pme_atomcomm_t *atc)
 +{
 +    splinedata_t *spline;
 +    int     n,ithx,ithy,ithz,i0,j0,k0;
 +    int     index_x,index_xy;
 +    int *   idxptr;
 +    real    energy,pot,tx,ty,qn,gval;
 +    real    *thx,*thy,*thz;
 +    int     norder;
 +    int     order;
 +
 +    spline = &atc->spline[0];
 +
 +    order = pme->pme_order;
 +
 +    energy = 0;
 +    for(n=0; (n<atc->n); n++) {
 +        qn      = atc->q[n];
 +
 +        if (qn != 0) {
 +            idxptr = atc->idx[n];
 +            norder = n*order;
 +
 +            i0   = idxptr[XX];
 +            j0   = idxptr[YY];
 +            k0   = idxptr[ZZ];
 +
 +            /* Pointer arithmetic alert, next three statements */
 +            thx  = spline->theta[XX] + norder;
 +            thy  = spline->theta[YY] + norder;
 +            thz  = spline->theta[ZZ] + norder;
 +
 +            pot = 0;
 +            for(ithx=0; (ithx<order); ithx++)
 +            {
 +                index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
 +                tx      = thx[ithx];
 +
 +                for(ithy=0; (ithy<order); ithy++)
 +                {
 +                    index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
 +                    ty       = thy[ithy];
 +
 +                    for(ithz=0; (ithz<order); ithz++)
 +                    {
 +                        gval  = grid[index_xy+(k0+ithz)];
 +                        pot  += tx*ty*thz[ithz]*gval;
 +                    }
 +
 +                }
 +            }
 +
 +            energy += pot*qn;
 +        }
 +    }
 +
 +    return energy;
 +}
 +
 +/* Macro to force loop unrolling by fixing order.
 + * This gives a significant performance gain.
 + */
 +#define CALC_SPLINE(order)                     \
 +{                                              \
 +    int j,k,l;                                 \
 +    real dr,div;                               \
 +    real data[PME_ORDER_MAX];                  \
 +    real ddata[PME_ORDER_MAX];                 \
 +                                               \
 +    for(j=0; (j<DIM); j++)                     \
 +    {                                          \
 +        dr  = xptr[j];                         \
 +                                               \
 +        /* dr is relative offset from lower cell limit */ \
 +        data[order-1] = 0;                     \
 +        data[1] = dr;                          \
 +        data[0] = 1 - dr;                      \
 +                                               \
 +        for(k=3; (k<order); k++)               \
 +        {                                      \
 +            div = 1.0/(k - 1.0);               \
 +            data[k-1] = div*dr*data[k-2];      \
 +            for(l=1; (l<(k-1)); l++)           \
 +            {                                  \
 +                data[k-l-1] = div*((dr+l)*data[k-l-2]+(k-l-dr)* \
 +                                   data[k-l-1]);                \
 +            }                                  \
 +            data[0] = div*(1-dr)*data[0];      \
 +        }                                      \
 +        /* differentiate */                    \
 +        ddata[0] = -data[0];                   \
 +        for(k=1; (k<order); k++)               \
 +        {                                      \
 +            ddata[k] = data[k-1] - data[k];    \
 +        }                                      \
 +                                               \
 +        div = 1.0/(order - 1);                 \
 +        data[order-1] = div*dr*data[order-2];  \
 +        for(l=1; (l<(order-1)); l++)           \
 +        {                                      \
 +            data[order-l-1] = div*((dr+l)*data[order-l-2]+    \
 +                               (order-l-dr)*data[order-l-1]); \
 +        }                                      \
 +        data[0] = div*(1 - dr)*data[0];        \
 +                                               \
 +        for(k=0; k<order; k++)                 \
 +        {                                      \
 +            theta[j][i*order+k]  = data[k];    \
 +            dtheta[j][i*order+k] = ddata[k];   \
 +        }                                      \
 +    }                                          \
 +}
 +
 +void make_bsplines(splinevec theta,splinevec dtheta,int order,
 +                   rvec fractx[],int nr,int ind[],real charge[],
 +                   gmx_bool bFreeEnergy)
 +{
 +    /* construct splines for local atoms */
 +    int  i,ii;
 +    real *xptr;
 +
 +    for(i=0; i<nr; i++)
 +    {
 +        /* With free energy we do not use the charge check.
 +         * In most cases this will be more efficient than calling make_bsplines
 +         * twice, since usually more than half the particles have charges.
 +         */
 +        ii = ind[i];
 +        if (bFreeEnergy || charge[ii] != 0.0) {
 +            xptr = fractx[ii];
 +            switch(order) {
 +            case 4:  CALC_SPLINE(4);     break;
 +            case 5:  CALC_SPLINE(5);     break;
 +            default: CALC_SPLINE(order); break;
 +            }
 +        }
 +    }
 +}
 +
 +
 +void make_dft_mod(real *mod,real *data,int ndata)
 +{
 +  int i,j;
 +  real sc,ss,arg;
 +
 +  for(i=0;i<ndata;i++) {
 +    sc=ss=0;
 +    for(j=0;j<ndata;j++) {
 +      arg=(2.0*M_PI*i*j)/ndata;
 +      sc+=data[j]*cos(arg);
 +      ss+=data[j]*sin(arg);
 +    }
 +    mod[i]=sc*sc+ss*ss;
 +  }
 +  for(i=0;i<ndata;i++)
 +    if(mod[i]<1e-7)
 +      mod[i]=(mod[i-1]+mod[i+1])*0.5;
 +}
 +
 +
 +
 +void make_bspline_moduli(splinevec bsp_mod,int nx,int ny,int nz,int order)
 +{
 +  int nmax=max(nx,max(ny,nz));
 +  real *data,*ddata,*bsp_data;
 +  int i,k,l;
 +  real div;
 +
 +  snew(data,order);
 +  snew(ddata,order);
 +  snew(bsp_data,nmax);
 +
 +  data[order-1]=0;
 +  data[1]=0;
 +  data[0]=1;
 +
 +  for(k=3;k<order;k++) {
 +    div=1.0/(k-1.0);
 +    data[k-1]=0;
 +    for(l=1;l<(k-1);l++)
 +      data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
 +    data[0]=div*data[0];
 +  }
 +  /* differentiate */
 +  ddata[0]=-data[0];
 +  for(k=1;k<order;k++)
 +    ddata[k]=data[k-1]-data[k];
 +  div=1.0/(order-1);
 +  data[order-1]=0;
 +  for(l=1;l<(order-1);l++)
 +    data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
 +  data[0]=div*data[0];
 +
 +  for(i=0;i<nmax;i++)
 +    bsp_data[i]=0;
 +  for(i=1;i<=order;i++)
 +    bsp_data[i]=data[i-1];
 +
 +  make_dft_mod(bsp_mod[XX],bsp_data,nx);
 +  make_dft_mod(bsp_mod[YY],bsp_data,ny);
 +  make_dft_mod(bsp_mod[ZZ],bsp_data,nz);
 +
 +  sfree(data);
 +  sfree(ddata);
 +  sfree(bsp_data);
 +}
 +
 +static void setup_coordinate_communication(pme_atomcomm_t *atc)
 +{
 +  int nslab,n,i;
 +  int fw,bw;
 +
 +  nslab = atc->nslab;
 +
 +  n = 0;
 +  for(i=1; i<=nslab/2; i++) {
 +    fw = (atc->nodeid + i) % nslab;
 +    bw = (atc->nodeid - i + nslab) % nslab;
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = fw;
 +      atc->node_src[n]  = bw;
 +      n++;
 +    }
 +    if (n < nslab - 1) {
 +      atc->node_dest[n] = bw;
 +      atc->node_src[n]  = fw;
 +      n++;
 +    }
 +  }
 +}
 +
 +int gmx_pme_destroy(FILE *log,gmx_pme_t *pmedata)
 +{
 +    int thread;
 +
 +    if(NULL != log)
 +    {
 +        fprintf(log,"Destroying PME data structures.\n");
 +    }
 +
 +    sfree((*pmedata)->nnx);
 +    sfree((*pmedata)->nny);
 +    sfree((*pmedata)->nnz);
 +
 +    pmegrids_destroy(&(*pmedata)->pmegridA);
 +
 +    sfree((*pmedata)->fftgridA);
 +    sfree((*pmedata)->cfftgridA);
 +    gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupA);
 +
 +    if ((*pmedata)->pmegridB.grid.grid != NULL)
 +    {
 +        pmegrids_destroy(&(*pmedata)->pmegridB);
 +        sfree((*pmedata)->fftgridB);
 +        sfree((*pmedata)->cfftgridB);
 +        gmx_parallel_3dfft_destroy((*pmedata)->pfft_setupB);
 +    }
 +    for(thread=0; thread<(*pmedata)->nthread; thread++)
 +    {
 +        free_work(&(*pmedata)->work[thread]);
 +    }
 +    sfree((*pmedata)->work);
 +
 +    sfree(*pmedata);
 +    *pmedata = NULL;
 +
 +  return 0;
 +}
 +
 +static int mult_up(int n,int f)
 +{
 +    return ((n + f - 1)/f)*f;
 +}
 +
 +
 +static double pme_load_imbalance(gmx_pme_t pme)
 +{
 +    int    nma,nmi;
 +    double n1,n2,n3;
 +
 +    nma = pme->nnodes_major;
 +    nmi = pme->nnodes_minor;
 +
 +    n1 = mult_up(pme->nkx,nma)*mult_up(pme->nky,nmi)*pme->nkz;
 +    n2 = mult_up(pme->nkx,nma)*mult_up(pme->nkz,nmi)*pme->nky;
 +    n3 = mult_up(pme->nky,nma)*mult_up(pme->nkz,nmi)*pme->nkx;
 +
 +    /* pme_solve is roughly double the cost of an fft */
 +
 +    return (n1 + n2 + 3*n3)/(double)(6*pme->nkx*pme->nky*pme->nkz);
 +}
 +
 +static void init_atomcomm(gmx_pme_t pme,pme_atomcomm_t *atc, t_commrec *cr,
 +                          int dimind,gmx_bool bSpread)
 +{
 +    int nk,k,s,thread;
 +
 +    atc->dimind = dimind;
 +    atc->nslab  = 1;
 +    atc->nodeid = 0;
 +    atc->pd_nalloc = 0;
 +#ifdef GMX_MPI
 +    if (pme->nnodes > 1)
 +    {
 +        atc->mpi_comm = pme->mpi_comm_d[dimind];
 +        MPI_Comm_size(atc->mpi_comm,&atc->nslab);
 +        MPI_Comm_rank(atc->mpi_comm,&atc->nodeid);
 +    }
 +    if (debug)
 +    {
 +        fprintf(debug,"For PME atom communication in dimind %d: nslab %d rank %d\n",atc->dimind,atc->nslab,atc->nodeid);
 +    }
 +#endif
 +
 +    atc->bSpread   = bSpread;
 +    atc->pme_order = pme->pme_order;
 +
 +    if (atc->nslab > 1)
 +    {
 +        /* These three allocations are not required for particle decomp. */
 +        snew(atc->node_dest,atc->nslab);
 +        snew(atc->node_src,atc->nslab);
 +        setup_coordinate_communication(atc);
 +
 +        snew(atc->count_thread,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            snew(atc->count_thread[thread],atc->nslab);
 +        }
 +        atc->count = atc->count_thread[0];
 +        snew(atc->rcount,atc->nslab);
 +        snew(atc->buf_index,atc->nslab);
 +    }
 +
 +    atc->nthread = pme->nthread;
 +    if (atc->nthread > 1)
 +    {
 +        snew(atc->thread_plist,atc->nthread);
 +    }
 +    snew(atc->spline,atc->nthread);
 +    for(thread=0; thread<atc->nthread; thread++)
 +    {
 +        if (atc->nthread > 1)
 +        {
 +            snew(atc->thread_plist[thread].n,atc->nthread+2*GMX_CACHE_SEP);
 +            atc->thread_plist[thread].n += GMX_CACHE_SEP;
 +        }
 +    }
 +}
 +
 +static void
 +init_overlap_comm(pme_overlap_t *  ol,
 +                  int              norder,
 +#ifdef GMX_MPI
 +                  MPI_Comm         comm,
 +#endif
 +                  int              nnodes,
 +                  int              nodeid,
 +                  int              ndata,
 +                  int              commplainsize)
 +{
 +    int lbnd,rbnd,maxlr,b,i;
 +    int exten;
 +    int nn,nk;
 +    pme_grid_comm_t *pgc;
 +    gmx_bool bCont;
 +    int fft_start,fft_end,send_index1,recv_index1;
 +
 +#ifdef GMX_MPI
 +    ol->mpi_comm = comm;
 +#endif
 +
 +    ol->nnodes = nnodes;
 +    ol->nodeid = nodeid;
 +
 +    /* Linear translation of the PME grid wo'nt affect reciprocal space
 +     * calculations, so to optimize we only interpolate "upwards",
 +     * which also means we only have to consider overlap in one direction.
 +     * I.e., particles on this node might also be spread to grid indices
 +     * that belong to higher nodes (modulo nnodes)
 +     */
 +
 +    snew(ol->s2g0,ol->nnodes+1);
 +    snew(ol->s2g1,ol->nnodes);
 +    if (debug) { fprintf(debug,"PME slab boundaries:"); }
 +    for(i=0; i<nnodes; i++)
 +    {
 +        /* s2g0 the local interpolation grid start.
 +         * s2g1 the local interpolation grid end.
 +         * Because grid overlap communication only goes forward,
 +         * the grid the slabs for fft's should be rounded down.
 +         */
 +        ol->s2g0[i] = ( i   *ndata + 0       )/nnodes;
 +        ol->s2g1[i] = ((i+1)*ndata + nnodes-1)/nnodes + norder - 1;
 +
 +        if (debug)
 +        {
 +            fprintf(debug,"  %3d %3d",ol->s2g0[i],ol->s2g1[i]);
 +        }
 +    }
 +    ol->s2g0[nnodes] = ndata;
 +    if (debug) { fprintf(debug,"\n"); }
 +
 +    /* Determine with how many nodes we need to communicate the grid overlap */
 +    b = 0;
 +    do
 +    {
 +        b++;
 +        bCont = FALSE;
 +        for(i=0; i<nnodes; i++)
 +        {
 +            if ((i+b <  nnodes && ol->s2g1[i] > ol->s2g0[i+b]) ||
 +                (i+b >= nnodes && ol->s2g1[i] > ol->s2g0[i+b-nnodes] + ndata))
 +            {
 +                bCont = TRUE;
 +            }
 +        }
 +    }
 +    while (bCont && b < nnodes);
 +    ol->noverlap_nodes = b - 1;
 +
 +    snew(ol->send_id,ol->noverlap_nodes);
 +    snew(ol->recv_id,ol->noverlap_nodes);
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        ol->send_id[b] = (ol->nodeid + (b + 1)) % ol->nnodes;
 +        ol->recv_id[b] = (ol->nodeid - (b + 1) + ol->nnodes) % ol->nnodes;
 +    }
 +    snew(ol->comm_data, ol->noverlap_nodes);
 +
 +    for(b=0; b<ol->noverlap_nodes; b++)
 +    {
 +        pgc = &ol->comm_data[b];
 +        /* Send */
 +        fft_start        = ol->s2g0[ol->send_id[b]];
 +        fft_end          = ol->s2g0[ol->send_id[b]+1];
 +        if (ol->send_id[b] < nodeid)
 +        {
 +            fft_start += ndata;
 +            fft_end   += ndata;
 +        }
 +        send_index1      = ol->s2g1[nodeid];
 +        send_index1      = min(send_index1,fft_end);
 +        pgc->send_index0 = fft_start;
 +        pgc->send_nindex = max(0,send_index1 - pgc->send_index0);
 +
 +        /* We always start receiving to the first index of our slab */
 +        fft_start        = ol->s2g0[ol->nodeid];
 +        fft_end          = ol->s2g0[ol->nodeid+1];
 +        recv_index1      = ol->s2g1[ol->recv_id[b]];
 +        if (ol->recv_id[b] > nodeid)
 +        {
 +            recv_index1 -= ndata;
 +        }
 +        recv_index1      = min(recv_index1,fft_end);
 +        pgc->recv_index0 = fft_start;
 +        pgc->recv_nindex = max(0,recv_index1 - pgc->recv_index0);
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    snew(ol->sendbuf,norder*commplainsize);
 +    snew(ol->recvbuf,norder*commplainsize);
 +}
 +
 +static void
 +make_gridindex5_to_localindex(int n,int local_start,int local_range,
 +                              int **global_to_local,
 +                              real **fraction_shift)
 +{
 +    int i;
 +    int * gtl;
 +    real * fsh;
 +
 +    snew(gtl,5*n);
 +    snew(fsh,5*n);
 +    for(i=0; (i<5*n); i++)
 +    {
 +        /* Determine the global to local grid index */
 +        gtl[i] = (i - local_start + n) % n;
 +        /* For coordinates that fall within the local grid the fraction
 +         * is correct, we don't need to shift it.
 +         */
 +        fsh[i] = 0;
 +        if (local_range < n)
 +        {
 +            /* Due to rounding issues i could be 1 beyond the lower or
 +             * upper boundary of the local grid. Correct the index for this.
 +             * If we shift the index, we need to shift the fraction by
 +             * the same amount in the other direction to not affect
 +             * the weights.
 +             * Note that due to this shifting the weights at the end of
 +             * the spline might change, but that will only involve values
 +             * between zero and values close to the precision of a real,
 +             * which is anyhow the accuracy of the whole mesh calculation.
 +             */
 +            /* With local_range=0 we should not change i=local_start */
 +            if (i % n != local_start)
 +            {
 +                if (gtl[i] == n-1)
 +                {
 +                    gtl[i] = 0;
 +                    fsh[i] = -1;
 +                }
 +                else if (gtl[i] == local_range)
 +                {
 +                    gtl[i] = local_range - 1;
 +                    fsh[i] = 1;
 +                }
 +            }
 +        }
 +    }
 +
 +    *global_to_local = gtl;
 +    *fraction_shift  = fsh;
 +}
 +
 +static void sse_mask_init(pme_spline_work_t *work,int order)
 +{
 +#ifdef PME_SSE
 +    float  tmp[8];
 +    __m128 zero_SSE;
 +    int    of,i;
 +
 +    zero_SSE = _mm_setzero_ps();
 +
 +    for(of=0; of<8-(order-1); of++)
 +    {
 +        for(i=0; i<8; i++)
 +        {
 +            tmp[i] = (i >= of && i < of+order ? 1 : 0);
 +        }
 +        work->mask_SSE0[of] = _mm_loadu_ps(tmp);
 +        work->mask_SSE1[of] = _mm_loadu_ps(tmp+4);
 +        work->mask_SSE0[of] = _mm_cmpgt_ps(work->mask_SSE0[of],zero_SSE);
 +        work->mask_SSE1[of] = _mm_cmpgt_ps(work->mask_SSE1[of],zero_SSE);
 +    }
 +#endif
 +}
 +
 +static void
 +gmx_pme_check_grid_restrictions(FILE *fplog,char dim,int nnodes,int *nk)
 +{
 +    int nk_new;
 +
 +    if (*nk % nnodes != 0)
 +    {
 +        nk_new = nnodes*(*nk/nnodes + 1);
 +
 +        if (2*nk_new >= 3*(*nk))
 +        {
 +            gmx_fatal(FARGS,"The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). The grid size would have to be increased by more than 50%% to make the grid divisible. Change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).",
 +                      dim,*nk,dim,nnodes,dim);
 +        }
 +
 +        if (fplog != NULL)
 +        {
 +            fprintf(fplog,"\nNOTE: The PME grid size in dim %c (%d) is not divisble by the number of nodes doing PME in dim %c (%d). Increasing the PME grid size in dim %c to %d. This will increase the accuracy and will not decrease the performance significantly on this number of nodes. For optimal performance change the total number of nodes or the number of domain decomposition cells in x or the PME grid %c dimension (and the cut-off).\n\n",
 +                    dim,*nk,dim,nnodes,dim,nk_new,dim);
 +        }
 +
 +        *nk = nk_new;
 +    }
 +}
 +
 +int gmx_pme_init(gmx_pme_t *         pmedata,
 +                 t_commrec *         cr,
 +                 int                 nnodes_major,
 +                 int                 nnodes_minor,
 +                 t_inputrec *        ir,
 +                 int                 homenr,
 +                 gmx_bool            bFreeEnergy,
 +                 gmx_bool            bReproducible,
 +                 int                 nthread)
 +{
 +    gmx_pme_t pme=NULL;
 +
 +    pme_atomcomm_t *atc;
 +    ivec ndata;
 +
 +    if (debug)
 +        fprintf(debug,"Creating PME data structures.\n");
 +    snew(pme,1);
 +
 +    pme->redist_init         = FALSE;
 +    pme->sum_qgrid_tmp       = NULL;
 +    pme->sum_qgrid_dd_tmp    = NULL;
 +    pme->buf_nalloc          = 0;
 +    pme->redist_buf_nalloc   = 0;
 +
 +    pme->nnodes              = 1;
 +    pme->bPPnode             = TRUE;
 +
 +    pme->nnodes_major        = nnodes_major;
 +    pme->nnodes_minor        = nnodes_minor;
 +
 +#ifdef GMX_MPI
 +    if (PAR(cr))
 +    {
 +        pme->mpi_comm        = cr->mpi_comm_mygroup;
 +
 +        MPI_Comm_rank(pme->mpi_comm,&pme->nodeid);
 +        MPI_Comm_size(pme->mpi_comm,&pme->nnodes);
 +    }
 +#endif
 +
 +    if (pme->nnodes == 1)
 +    {
 +        pme->ndecompdim = 0;
 +        pme->nodeid_major = 0;
 +        pme->nodeid_minor = 0;
 +#ifdef GMX_MPI
 +        pme->mpi_comm_d[0] = pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +    }
 +    else
 +    {
 +        if (nnodes_minor == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = pme->mpi_comm;
 +            pme->mpi_comm_d[1] = MPI_COMM_NULL;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = pme->nodeid;
 +            pme->nodeid_minor = 0;
 +
 +        }
 +        else if (nnodes_major == 1)
 +        {
 +#ifdef GMX_MPI
 +            pme->mpi_comm_d[0] = MPI_COMM_NULL;
 +            pme->mpi_comm_d[1] = pme->mpi_comm;
 +#endif
 +            pme->ndecompdim = 1;
 +            pme->nodeid_major = 0;
 +            pme->nodeid_minor = pme->nodeid;
 +        }
 +        else
 +        {
 +            if (pme->nnodes % nnodes_major != 0)
 +            {
 +                gmx_incons("For 2D PME decomposition, #PME nodes must be divisible by the number of nodes in the major dimension");
 +            }
 +            pme->ndecompdim = 2;
 +
 +#ifdef GMX_MPI
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid % nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[0]);  /* My communicator along major dimension */
 +            MPI_Comm_split(pme->mpi_comm,pme->nodeid/nnodes_minor,
 +                           pme->nodeid,&pme->mpi_comm_d[1]);  /* My communicator along minor dimension */
 +
 +            MPI_Comm_rank(pme->mpi_comm_d[0],&pme->nodeid_major);
 +            MPI_Comm_size(pme->mpi_comm_d[0],&pme->nnodes_major);
 +            MPI_Comm_rank(pme->mpi_comm_d[1],&pme->nodeid_minor);
 +            MPI_Comm_size(pme->mpi_comm_d[1],&pme->nnodes_minor);
 +#endif
 +        }
 +        pme->bPPnode = (cr->duty & DUTY_PP);
 +    }
 +
 +    pme->nthread = nthread;
 +
 +    if (ir->ePBC == epbcSCREW)
 +    {
 +        gmx_fatal(FARGS,"pme does not (yet) work with pbc = screw");
 +    }
 +
 +    pme->bFEP        = ((ir->efep != efepNO) && bFreeEnergy);
 +    pme->nkx         = ir->nkx;
 +    pme->nky         = ir->nky;
 +    pme->nkz         = ir->nkz;
 +    pme->pme_order   = ir->pme_order;
 +    pme->epsilon_r   = ir->epsilon_r;
 +
 +    if (pme->pme_order > PME_ORDER_MAX)
 +    {
 +        gmx_fatal(FARGS,"pme_order (%d) is larger than the maximum allowed value (%d). Modify and recompile the code if you really need such a high order.",
 +                  pme->pme_order,PME_ORDER_MAX);
 +    }
 +
 +    /* Currently pme.c supports only the fft5d FFT code.
 +     * Therefore the grid always needs to be divisible by nnodes.
 +     * When the old 1D code is also supported again, change this check.
 +     *
 +     * This check should be done before calling gmx_pme_init
 +     * and fplog should be passed iso stderr.
 +     *
 +    if (pme->ndecompdim >= 2)
 +    */
 +    if (pme->ndecompdim >= 1)
 +    {
 +        /*
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'x',nnodes_major,&pme->nkx);
 +        gmx_pme_check_grid_restrictions(pme->nodeid==0 ? stderr : NULL,
 +                                        'y',nnodes_minor,&pme->nky);
 +        */
 +    }
 +
 +    if (pme->nkx <= pme->pme_order*(pme->nnodes_major > 1 ? 2 : 1) ||
 +        pme->nky <= pme->pme_order*(pme->nnodes_minor > 1 ? 2 : 1) ||
 +        pme->nkz <= pme->pme_order)
 +    {
 +        gmx_fatal(FARGS,"The pme grid dimensions need to be larger than pme_order (%d) and in parallel larger than 2*pme_ordern for x and/or y",pme->pme_order);
 +    }
 +
 +    if (pme->nnodes > 1) {
 +        double imbal;
 +
 +#ifdef GMX_MPI
 +        MPI_Type_contiguous(DIM, mpi_type, &(pme->rvec_mpi));
 +        MPI_Type_commit(&(pme->rvec_mpi));
 +#endif
 +
 +        /* Note that the charge spreading and force gathering, which usually
 +         * takes about the same amount of time as FFT+solve_pme,
 +         * is always fully load balanced
 +         * (unless the charge distribution is inhomogeneous).
 +         */
 +
 +        imbal = pme_load_imbalance(pme);
 +        if (imbal >= 1.2 && pme->nodeid_major == 0 && pme->nodeid_minor == 0)
 +        {
 +            fprintf(stderr,
 +                    "\n"
 +                    "NOTE: The load imbalance in PME FFT and solve is %d%%.\n"
 +                    "      For optimal PME load balancing\n"
 +                    "      PME grid_x (%d) and grid_y (%d) should be divisible by #PME_nodes_x (%d)\n"
 +                    "      and PME grid_y (%d) and grid_z (%d) should be divisible by #PME_nodes_y (%d)\n"
 +                    "\n",
 +                    (int)((imbal-1)*100 + 0.5),
 +                    pme->nkx,pme->nky,pme->nnodes_major,
 +                    pme->nky,pme->nkz,pme->nnodes_minor);
 +        }
 +    }
 +
 +    /* For non-divisible grid we need pme_order iso pme_order-1 */
 +    /* In sum_qgrid_dd x overlap is copied in place: take padding into account.
 +     * y is always copied through a buffer: we don't need padding in z,
 +     * but we do need the overlap in x because of the communication order.
 +     */
 +    init_overlap_comm(&pme->overlap[0],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[0],
 +#endif
 +                      pme->nnodes_major,pme->nodeid_major,
 +                      pme->nkx,
 +                      (div_round_up(pme->nky,pme->nnodes_minor)+pme->pme_order)*(pme->nkz+pme->pme_order-1));
 +
 +    init_overlap_comm(&pme->overlap[1],pme->pme_order,
 +#ifdef GMX_MPI
 +                      pme->mpi_comm_d[1],
 +#endif
 +                      pme->nnodes_minor,pme->nodeid_minor,
 +                      pme->nky,
 +                      (div_round_up(pme->nkx,pme->nnodes_major)+pme->pme_order)*pme->nkz);
 +
 +    /* Check for a limitation of the (current) sum_fftgrid_dd code */
 +    if (pme->nthread > 1 &&
 +        (pme->overlap[0].noverlap_nodes > 1 ||
 +         pme->overlap[1].noverlap_nodes > 1))
 +    {
 +        gmx_fatal(FARGS,"With threads the number of grid lines per node along x and or y should be pme_order (%d) or more or exactly pme_order-1",pme->pme_order);
 +    }
 +
 +    snew(pme->bsp_mod[XX],pme->nkx);
 +    snew(pme->bsp_mod[YY],pme->nky);
 +    snew(pme->bsp_mod[ZZ],pme->nkz);
 +
 +    /* The required size of the interpolation grid, including overlap.
 +     * The allocated size (pmegrid_n?) might be slightly larger.
 +     */
 +    pme->pmegrid_nx = pme->overlap[0].s2g1[pme->nodeid_major] -
 +                      pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_ny = pme->overlap[1].s2g1[pme->nodeid_minor] -
 +                      pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_nz_base = pme->nkz;
 +    pme->pmegrid_nz = pme->pmegrid_nz_base + pme->pme_order - 1;
 +    set_grid_alignment(&pme->pmegrid_nz,pme->pme_order);
 +
 +    pme->pmegrid_start_ix = pme->overlap[0].s2g0[pme->nodeid_major];
 +    pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor];
 +    pme->pmegrid_start_iz = 0;
 +
 +    make_gridindex5_to_localindex(pme->nkx,
 +                                  pme->pmegrid_start_ix,
 +                                  pme->pmegrid_nx - (pme->pme_order-1),
 +                                  &pme->nnx,&pme->fshx);
 +    make_gridindex5_to_localindex(pme->nky,
 +                                  pme->pmegrid_start_iy,
 +                                  pme->pmegrid_ny - (pme->pme_order-1),
 +                                  &pme->nny,&pme->fshy);
 +    make_gridindex5_to_localindex(pme->nkz,
 +                                  pme->pmegrid_start_iz,
 +                                  pme->pmegrid_nz_base,
 +                                  &pme->nnz,&pme->fshz);
 +
 +    pmegrids_init(&pme->pmegridA,
 +                  pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                  pme->pmegrid_nz_base,
 +                  pme->pme_order,
 +                  pme->nthread,
 +                  pme->overlap[0].s2g1[pme->nodeid_major]-pme->overlap[0].s2g0[pme->nodeid_major+1],
 +                  pme->overlap[1].s2g1[pme->nodeid_minor]-pme->overlap[1].s2g0[pme->nodeid_minor+1]);
 +
 +    sse_mask_init(&pme->spline_work,pme->pme_order);
 +
 +    ndata[0] = pme->nkx;
 +    ndata[1] = pme->nky;
 +    ndata[2] = pme->nkz;
 +
 +    /* This routine will allocate the grid data to fit the FFTs */
 +    gmx_parallel_3dfft_init(&pme->pfft_setupA,ndata,
 +                            &pme->fftgridA,&pme->cfftgridA,
 +                            pme->mpi_comm_d,
 +                            pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                            bReproducible,pme->nthread);
 +
 +    if (bFreeEnergy)
 +    {
 +        pmegrids_init(&pme->pmegridB,
 +                      pme->pmegrid_nx,pme->pmegrid_ny,pme->pmegrid_nz,
 +                      pme->pmegrid_nz_base,
 +                      pme->pme_order,
 +                      pme->nthread,
 +                      pme->nkx % pme->nnodes_major != 0,
 +                      pme->nky % pme->nnodes_minor != 0);
 +
 +        gmx_parallel_3dfft_init(&pme->pfft_setupB,ndata,
 +                                &pme->fftgridB,&pme->cfftgridB,
 +                                pme->mpi_comm_d,
 +                                pme->overlap[0].s2g0,pme->overlap[1].s2g0,
 +                                bReproducible,pme->nthread);
 +    }
 +    else
 +    {
 +        pme->pmegridB.grid.grid = NULL;
 +        pme->fftgridB           = NULL;
 +        pme->cfftgridB          = NULL;
 +    }
 +
 +    make_bspline_moduli(pme->bsp_mod,pme->nkx,pme->nky,pme->nkz,pme->pme_order);
 +
 +    /* Use atc[0] for spreading */
 +    init_atomcomm(pme,&pme->atc[0],cr,nnodes_major > 1 ? 0 : 1,TRUE);
 +    if (pme->ndecompdim >= 2)
 +    {
 +        init_atomcomm(pme,&pme->atc[1],cr,1,FALSE);
 +    }
 +
 +    if (pme->nnodes == 1) {
 +        pme->atc[0].n = homenr;
 +        pme_realloc_atomcomm_things(&pme->atc[0]);
 +    }
 +
 +    {
 +        int thread;
 +
 +        /* Use fft5d, order after FFT is y major, z, x minor */
 +
 +        snew(pme->work,pme->nthread);
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            realloc_work(&pme->work[thread],pme->nkx);
 +        }
 +    }
 +
 +    *pmedata = pme;
 +
 +    return 0;
 +}
 +
 +
 +static void copy_local_grid(gmx_pme_t pme,
 +                            pmegrids_t *pmegrids,int thread,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_my,fft_mz;
 +    int  nsx,nsy,nsz;
 +    ivec nf;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  d;
 +    pmegrid_t *pmegrid;
 +    real *grid_th;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    nsx = pmegrid->n[XX];
 +    nsy = pmegrid->n[YY];
 +    nsz = pmegrid->n[ZZ];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        nf[d] = min(pmegrid->n[d] - (pmegrid->order - 1),
 +                    local_fft_ndata[d] - pmegrid->offset[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +    /* Directly copy the non-overlapping parts of the local grids.
 +     * This also initializes the full grid.
 +     */
 +    grid_th = pmegrid->grid;
 +    for(x=0; x<nf[XX]; x++)
 +    {
 +        for(y=0; y<nf[YY]; y++)
 +        {
 +            i0  = ((offx + x)*fft_my + (offy + y))*fft_mz + offz;
 +            i0t = (x*nsy + y)*nsz;
 +            for(z=0; z<nf[ZZ]; z++)
 +            {
 +                fftgrid[i0+z] = grid_th[i0t+z];
 +            }
 +        }
 +    }
 +}
 +
 +static void print_sendbuf(gmx_pme_t pme,real *sendbuf)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int datasize,nind;
 +    int i,x,y,z,n;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    /* Major dimension */
 +    overlap = &pme->overlap[0];
 +
 +    nind   = overlap->comm_data[0].send_nindex;
 +
 +    for(y=0; y<local_fft_ndata[YY]; y++) {
 +         printf(" %2d",y);
 +    }
 +    printf("\n");
 +
 +    i = 0;
 +    for(x=0; x<nind; x++) {
 +        for(y=0; y<local_fft_ndata[YY]; y++) {
 +            n = 0;
 +            for(z=0; z<local_fft_ndata[ZZ]; z++) {
 +                if (sendbuf[i] != 0) n++;
 +                i++;
 +            }
 +            printf(" %2d",n);
 +        }
 +        printf("\n");
 +    }
 +}
 +
 +static void
 +reduce_threadgrid_overlap(gmx_pme_t pme,
 +                          const pmegrids_t *pmegrids,int thread,
 +                          real *fftgrid,real *commbuf_x,real *commbuf_y)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    int  fft_nx,fft_ny,fft_nz;
 +    int  fft_my,fft_mz;
 +    int  buf_my=-1;
 +    int  nsx,nsy,nsz;
 +    ivec ne;
 +    int  offx,offy,offz,x,y,z,i0,i0t;
 +    int  sx,sy,sz,fx,fy,fz,tx1,ty1,tz1,ox,oy,oz;
 +    gmx_bool bClearBufX,bClearBufY,bClearBufXY,bClearBuf;
 +    gmx_bool bCommX,bCommY;
 +    int  d;
 +    int  thread_f;
 +    const pmegrid_t *pmegrid,*pmegrid_g,*pmegrid_f;
 +    const real *grid_th;
 +    real *commbuf=NULL;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +    fft_nx = local_fft_ndata[XX];
 +    fft_ny = local_fft_ndata[YY];
 +    fft_nz = local_fft_ndata[ZZ];
 +
 +    fft_my = local_fft_size[YY];
 +    fft_mz = local_fft_size[ZZ];
 +
 +    /* This routine is called when all thread have finished spreading.
 +     * Here each thread sums grid contributions calculated by other threads
 +     * to the thread local grid volume.
 +     * To minimize the number of grid copying operations,
 +     * this routines sums immediately from the pmegrid to the fftgrid.
 +     */
 +
 +    /* Determine which part of the full node grid we should operate on,
 +     * this is our thread local part of the full grid.
 +     */
 +    pmegrid = &pmegrids->grid_th[thread];
 +
 +    for(d=0; d<DIM; d++)
 +    {
 +        ne[d] = min(pmegrid->offset[d]+pmegrid->n[d]-(pmegrid->order-1),
 +                    local_fft_ndata[d]);
 +    }
 +
 +    offx = pmegrid->offset[XX];
 +    offy = pmegrid->offset[YY];
 +    offz = pmegrid->offset[ZZ];
 +
 +
 +    bClearBufX  = TRUE;
 +    bClearBufY  = TRUE;
 +    bClearBufXY = TRUE;
 +
 +    /* Now loop over all the thread data blocks that contribute
 +     * to the grid region we (our thread) are operating on.
 +     */
 +    /* Note that ffy_nx/y is equal to the number of grid points
 +     * between the first point of our node grid and the one of the next node.
 +     */
 +    for(sx=0; sx>=-pmegrids->nthread_comm[XX]; sx--)
 +    {
 +        fx = pmegrid->ci[XX] + sx;
 +        ox = 0;
 +        bCommX = FALSE;
 +        if (fx < 0) {
 +            fx += pmegrids->nc[XX];
 +            ox -= fft_nx;
 +            bCommX = (pme->nnodes_major > 1);
 +        }
 +        pmegrid_g = &pmegrids->grid_th[fx*pmegrids->nc[YY]*pmegrids->nc[ZZ]];
 +        ox += pmegrid_g->offset[XX];
 +        if (!bCommX)
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],ne[XX]);
 +        }
 +        else
 +        {
 +            tx1 = min(ox + pmegrid_g->n[XX],pme->pme_order);
 +        }
 +
 +        for(sy=0; sy>=-pmegrids->nthread_comm[YY]; sy--)
 +        {
 +            fy = pmegrid->ci[YY] + sy;
 +            oy = 0;
 +            bCommY = FALSE;
 +            if (fy < 0) {
 +                fy += pmegrids->nc[YY];
 +                oy -= fft_ny;
 +                bCommY = (pme->nnodes_minor > 1);
 +            }
 +            pmegrid_g = &pmegrids->grid_th[fy*pmegrids->nc[ZZ]];
 +            oy += pmegrid_g->offset[YY];
 +            if (!bCommY)
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],ne[YY]);
 +            }
 +            else
 +            {
 +                ty1 = min(oy + pmegrid_g->n[YY],pme->pme_order);
 +            }
 +
 +            for(sz=0; sz>=-pmegrids->nthread_comm[ZZ]; sz--)
 +            {
 +                fz = pmegrid->ci[ZZ] + sz;
 +                oz = 0;
 +                if (fz < 0)
 +                {
 +                    fz += pmegrids->nc[ZZ];
 +                    oz -= fft_nz;
 +                }
 +                pmegrid_g = &pmegrids->grid_th[fz];
 +                oz += pmegrid_g->offset[ZZ];
 +                tz1 = min(oz + pmegrid_g->n[ZZ],ne[ZZ]);
 +
 +                if (sx == 0 && sy == 0 && sz == 0)
 +                {
 +                    /* We have already added our local contribution
 +                     * before calling this routine, so skip it here.
 +                     */
 +                    continue;
 +                }
 +
 +                thread_f = (fx*pmegrids->nc[YY] + fy)*pmegrids->nc[ZZ] + fz;
 +
 +                pmegrid_f = &pmegrids->grid_th[thread_f];
 +
 +                grid_th = pmegrid_f->grid;
 +
 +                nsx = pmegrid_f->n[XX];
 +                nsy = pmegrid_f->n[YY];
 +                nsz = pmegrid_f->n[ZZ];
 +
 +#ifdef DEBUG_PME_REDUCE
 +                printf("n%d t%d add %d  %2d %2d %2d  %2d %2d %2d  %2d-%2d %2d-%2d, %2d-%2d %2d-%2d, %2d-%2d %2d-%2d\n",
 +                       pme->nodeid,thread,thread_f,
 +                       pme->pmegrid_start_ix,
 +                       pme->pmegrid_start_iy,
 +                       pme->pmegrid_start_iz,
 +                       sx,sy,sz,
 +                       offx-ox,tx1-ox,offx,tx1,
 +                       offy-oy,ty1-oy,offy,ty1,
 +                       offz-oz,tz1-oz,offz,tz1);
 +#endif
 +
 +                if (!(bCommX || bCommY))
 +                {
 +                    /* Copy from the thread local grid to the node grid */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*fft_my + y)*fft_mz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +                            for(z=offz; z<tz1; z++)
 +                            {
 +                                fftgrid[i0+z] += grid_th[i0t+z];
 +                            }
 +                        }
 +                    }
 +                }
 +                else
 +                {
 +                    /* The order of this conditional decides
 +                     * where the corner volume gets stored with x+y decomp.
 +                     */
 +                    if (bCommY)
 +                    {
 +                        commbuf = commbuf_y;
 +                        buf_my  = ty1 - offy;
 +                        if (bCommX)
 +                        {
 +                            /* We index commbuf modulo the local grid size */
 +                            commbuf += buf_my*fft_nx*fft_nz;
 +
 +                            bClearBuf  = bClearBufXY;
 +                            bClearBufXY = FALSE;
 +                        }
 +                        else
 +                        {
 +                            bClearBuf  = bClearBufY;
 +                            bClearBufY = FALSE;
 +                        }
 +                    }
 +                    else
 +                    {
 +                        commbuf = commbuf_x;
 +                        buf_my  = fft_ny;
 +                        bClearBuf  = bClearBufX;
 +                        bClearBufX = FALSE;
 +                    }
 +
 +                    /* Copy to the communication buffer */
 +                    for(x=offx; x<tx1; x++)
 +                    {
 +                        for(y=offy; y<ty1; y++)
 +                        {
 +                            i0  = (x*buf_my + y)*fft_nz;
 +                            i0t = ((x - ox)*nsy + (y - oy))*nsz - oz;
 +
 +                            if (bClearBuf)
 +                            {
 +                                /* First access of commbuf, initialize it */
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z]  = grid_th[i0t+z];
 +                                }
 +                            }
 +                            else
 +                            {
 +                                for(z=offz; z<tz1; z++)
 +                                {
 +                                    commbuf[i0+z] += grid_th[i0t+z];
 +                                }
 +                            }
 +                        }
 +                    }
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void sum_fftgrid_dd(gmx_pme_t pme,real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +    pme_overlap_t *overlap;
 +    int  send_nindex;
 +    int  recv_index0,recv_nindex;
 +#ifdef GMX_MPI
 +    MPI_Status stat;
 +#endif
 +    int  ipulse,send_id,recv_id,datasize,gridsize,size_yx;
 +    real *sendptr,*recvptr;
 +    int  x,y,z,indg,indb;
 +
 +    /* Note that this routine is only used for forward communication.
 +     * Since the force gathering, unlike the charge spreading,
 +     * can be trivially parallelized over the particles,
 +     * the backwards process is much simpler and can use the "old"
 +     * communication setup.
 +     */
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    /* Currently supports only a single communication pulse */
 +
 +/* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_minor > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[1];
 +
 +        if (pme->nnodes_major > 1)
 +        {
 +             size_yx = pme->overlap[0].comm_data[0].send_nindex;
 +        }
 +        else
 +        {
 +            size_yx = 0;
 +        }
 +        datasize = (local_fft_ndata[XX]+size_yx)*local_fft_ndata[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        /*
 +        printf("node %d comm %2d x %2d x %2d\n",pme->nodeid,
 +               local_fft_ndata[XX]+size_yx,send_nindex,local_fft_ndata[ZZ]);
 +        printf("node %d send %f, %f\n",pme->nodeid,
 +               sendptr[0],sendptr[send_nindex*datasize-1]);
 +        */
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<local_fft_ndata[XX]; x++)
 +        {
 +            for(y=0; y<recv_nindex; y++)
 +            {
 +                indg = (x*local_fft_size[YY] + y)*local_fft_size[ZZ];
 +                indb = (x*recv_nindex        + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +        if (pme->nnodes_major > 1)
 +        {
 +            sendptr = pme->overlap[0].sendbuf;
 +            for(x=0; x<size_yx; x++)
 +            {
 +                for(y=0; y<recv_nindex; y++)
 +                {
 +                    indg = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                    indb = ((local_fft_ndata[XX] + x)*recv_nindex +y)*local_fft_ndata[ZZ];
 +                    for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                    {
 +                        sendptr[indg+z] += recvptr[indb+z];
 +                    }
 +                }
 +            }
 +        }
 +    }
 +
 +    /* for(ipulse=0;ipulse<overlap->noverlap_nodes;ipulse++) */
 +    if (pme->nnodes_major > 1)
 +    {
 +        /* Major dimension */
 +        overlap = &pme->overlap[0];
 +
 +        datasize = local_fft_ndata[YY]*local_fft_ndata[ZZ];
 +        gridsize = local_fft_size[YY] *local_fft_size[ZZ];
 +
 +        ipulse = 0;
 +
 +        send_id = overlap->send_id[ipulse];
 +        recv_id = overlap->recv_id[ipulse];
 +        send_nindex   = overlap->comm_data[ipulse].send_nindex;
 +        /* recv_index0   = overlap->comm_data[ipulse].recv_index0; */
 +        recv_index0 = 0;
 +        recv_nindex   = overlap->comm_data[ipulse].recv_nindex;
 +
 +        sendptr = overlap->sendbuf;
 +        recvptr = overlap->recvbuf;
 +
 +        if (debug != NULL)
 +        {
 +            fprintf(debug,"PME fftgrid comm %2d x %2d x %2d\n",
 +                   send_nindex,local_fft_ndata[YY],local_fft_ndata[ZZ]);
 +        }
 +
 +#ifdef GMX_MPI
 +        MPI_Sendrecv(sendptr,send_nindex*datasize,GMX_MPI_REAL,
 +                     send_id,ipulse,
 +                     recvptr,recv_nindex*datasize,GMX_MPI_REAL,
 +                     recv_id,ipulse,
 +                     overlap->mpi_comm,&stat);
 +#endif
 +
 +        for(x=0; x<recv_nindex; x++)
 +        {
 +            for(y=0; y<local_fft_ndata[YY]; y++)
 +            {
 +                indg = (x*local_fft_size[YY]  + y)*local_fft_size[ZZ];
 +                indb = (x*local_fft_ndata[YY] + y)*local_fft_ndata[ZZ];
 +                for(z=0; z<local_fft_ndata[ZZ]; z++)
 +                {
 +                    fftgrid[indg+z] += recvptr[indb+z];
 +                }
 +            }
 +        }
 +    }
 +}
 +
 +
 +static void spread_on_grid(gmx_pme_t pme,
 +                           pme_atomcomm_t *atc,pmegrids_t *grids,
 +                           gmx_bool bCalcSplines,gmx_bool bSpread,
 +                           real *fftgrid)
 +{
 +    int nthread,thread;
 +#ifdef PME_TIME_THREADS
 +    gmx_cycles_t c1,c2,c3,ct1a,ct1b,ct1c;
 +    static double cs1=0,cs2=0,cs3=0;
 +    static double cs1a[6]={0,0,0,0,0,0};
 +    static int cnt=0;
 +#endif
 +
 +    nthread = pme->nthread;
 +
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_start();
 +#endif
 +    if (bCalcSplines)
 +    {
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +        for(thread=0; thread<nthread; thread++)
 +        {
 +            int start,end;
 +
 +            start = atc->n* thread   /nthread;
 +            end   = atc->n*(thread+1)/nthread;
 +
 +            /* Compute fftgrid index for all atoms,
 +             * with help of some extra variables.
 +             */
 +            calc_interpolation_idx(pme,atc,start,end,thread);
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c1 = omp_cyc_end(c1);
 +    cs1 += (double)c1;
 +#endif
 +
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(nthread) schedule(static)
 +    for(thread=0; thread<nthread; thread++)
 +    {
 +        splinedata_t *spline;
 +        pmegrid_t *grid;
 +
 +        /* make local bsplines  */
 +        if (grids->nthread == 1)
 +        {
 +            spline = &atc->spline[0];
 +
 +            spline->n = atc->n;
 +
 +            grid = &grids->grid;
 +        }
 +        else
 +        {
 +            spline = &atc->spline[thread];
 +
 +            make_thread_local_ind(atc,thread,spline);
 +
 +            grid = &grids->grid_th[thread];
 +        }
 +
 +        if (bCalcSplines)
 +        {
 +            make_bsplines(spline->theta,spline->dtheta,pme->pme_order,
 +                          atc->fractx,spline->n,spline->ind,atc->q,pme->bFEP);
 +        }
 +
 +        if (bSpread)
 +        {
 +            /* put local atoms on grid. */
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_start();
 +#endif
 +            spread_q_bsplines_thread(grid,atc,spline,&pme->spline_work);
 +
 +            if (grids->nthread > 1)
 +            {
 +                copy_local_grid(pme,grids,thread,fftgrid);
 +            }
 +#ifdef PME_TIME_SPREAD
 +            ct1a = omp_cyc_end(ct1a);
 +            cs1a[thread] += (double)ct1a;
 +#endif
 +        }
 +    }
 +#ifdef PME_TIME_THREADS
 +    c2 = omp_cyc_end(c2);
 +    cs2 += (double)c2;
 +#endif
 +
 +    if (grids->nthread > 1)
 +    {
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_start();
 +#endif
 +#pragma omp parallel for num_threads(grids->nthread) schedule(static)
 +        for(thread=0; thread<grids->nthread; thread++)
 +        {
 +            reduce_threadgrid_overlap(pme,grids,thread,
 +                                      fftgrid,
 +                                      pme->overlap[0].sendbuf,
 +                                      pme->overlap[1].sendbuf);
 +#ifdef PRINT_PME_SENDBUF
 +            print_sendbuf(pme,pme->overlap[0].sendbuf);
 +#endif
 +        }
 +#ifdef PME_TIME_THREADS
 +        c3 = omp_cyc_end(c3);
 +        cs3 += (double)c3;
 +#endif
 +
 +        if (pme->nnodes > 1)
 +        {
 +            /* Communicate the overlapping part of the fftgrid */
 +            sum_fftgrid_dd(pme,fftgrid);
 +        }
 +    }
 +
 +#ifdef PME_TIME_THREADS
 +    cnt++;
 +    if (cnt % 20 == 0)
 +    {
 +        printf("idx %.2f spread %.2f red %.2f",
 +               cs1*1e-9,cs2*1e-9,cs3*1e-9);
 +#ifdef PME_TIME_SPREAD
 +        for(thread=0; thread<nthread; thread++)
 +            printf(" %.2f",cs1a[thread]*1e-9);
 +#endif
 +        printf("\n");
 +    }
 +#endif
 +}
 +
 +
 +static void dump_grid(FILE *fp,
 +                      int sx,int sy,int sz,int nx,int ny,int nz,
 +                      int my,int mz,const real *g)
 +{
 +    int x,y,z;
 +
 +    for(x=0; x<nx; x++)
 +    {
 +        for(y=0; y<ny; y++)
 +        {
 +            for(z=0; z<nz; z++)
 +            {
 +                fprintf(fp,"%2d %2d %2d %6.3f\n",
 +                        sx+x,sy+y,sz+z,g[(x*my + y)*mz + z]);
 +            }
 +        }
 +    }
 +}
 +
 +static void dump_local_fftgrid(gmx_pme_t pme,const real *fftgrid)
 +{
 +    ivec local_fft_ndata,local_fft_offset,local_fft_size;
 +
 +    gmx_parallel_3dfft_real_limits(pme->pfft_setupA,
 +                                   local_fft_ndata,
 +                                   local_fft_offset,
 +                                   local_fft_size);
 +
 +    dump_grid(stderr,
 +              pme->pmegrid_start_ix,
 +              pme->pmegrid_start_iy,
 +              pme->pmegrid_start_iz,
 +              pme->pmegrid_nx-pme->pme_order+1,
 +              pme->pmegrid_ny-pme->pme_order+1,
 +              pme->pmegrid_nz-pme->pme_order+1,
 +              local_fft_size[YY],
 +              local_fft_size[ZZ],
 +              fftgrid);
 +}
 +
 +
 +void gmx_pme_calc_energy(gmx_pme_t pme,int n,rvec *x,real *q,real *V)
 +{
 +    pme_atomcomm_t *atc;
 +    pmegrids_t *grid;
 +
 +    if (pme->nnodes > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy called in parallel");
 +    }
 +    if (pme->bFEP > 1)
 +    {
 +        gmx_incons("gmx_pme_calc_energy with free energy");
 +    }
 +
 +    atc = &pme->atc_energy;
 +    atc->nslab     = 1;
 +    atc->bSpread   = TRUE;
 +    atc->pme_order = pme->pme_order;
 +    atc->n         = n;
 +    pme_realloc_atomcomm_things(atc);
 +    atc->x         = x;
 +    atc->q         = q;
 +
 +    /* We only use the A-charges grid */
 +    grid = &pme->pmegridA;
 +
 +    spread_on_grid(pme,atc,NULL,TRUE,FALSE,pme->fftgridA);
 +
 +    *V = gather_energy_bsplines(pme,grid->grid.grid,atc);
 +}
 +
 +
 +static void reset_pmeonly_counters(t_commrec *cr,gmx_wallcycle_t wcycle,
 +        t_nrnb *nrnb,t_inputrec *ir, gmx_large_int_t step_rel)
 +{
 +    /* Reset all the counters related to performance over the run */
 +    wallcycle_stop(wcycle,ewcRUN);
 +    wallcycle_reset_all(wcycle);
 +    init_nrnb(nrnb);
 +    ir->init_step += step_rel;
 +    ir->nsteps    -= step_rel;
 +    wallcycle_start(wcycle,ewcRUN);
 +}
 +
 +
 +int gmx_pmeonly(gmx_pme_t pme,
 +                t_commrec *cr,    t_nrnb *nrnb,
 +                gmx_wallcycle_t wcycle,
 +                real ewaldcoeff,  gmx_bool bGatherOnly,
 +                t_inputrec *ir)
 +{
 +    gmx_pme_pp_t pme_pp;
 +    int  natoms;
 +    matrix box;
 +    rvec *x_pp=NULL,*f_pp=NULL;
 +    real *chargeA=NULL,*chargeB=NULL;
 +    real lambda=0;
 +    int  maxshift_x=0,maxshift_y=0;
 +    real energy,dvdlambda;
 +    matrix vir;
 +    float cycles;
 +    int  count;
 +    gmx_bool bEnerVir;
 +    gmx_large_int_t step,step_rel;
 +
 +
 +    pme_pp = gmx_pme_pp_init(cr);
 +
 +    init_nrnb(nrnb);
 +
 +    count = 0;
 +    do /****** this is a quasi-loop over time steps! */
 +    {
 +        /* Domain decomposition */
 +        natoms = gmx_pme_recv_q_x(pme_pp,
 +                                  &chargeA,&chargeB,box,&x_pp,&f_pp,
 +                                  &maxshift_x,&maxshift_y,
 +                                  &pme->bFEP,&lambda,
 +                                  &bEnerVir,
 +                                  &step);
 +
 +        if (natoms == -1) {
 +            /* We should stop: break out of the loop */
 +            break;
 +        }
 +
 +        step_rel = step - ir->init_step;
 +
 +        if (count == 0)
 +            wallcycle_start(wcycle,ewcRUN);
 +
 +        wallcycle_start(wcycle,ewcPMEMESH);
 +
 +        dvdlambda = 0;
 +        clear_mat(vir);
 +        gmx_pme_do(pme,0,natoms,x_pp,f_pp,chargeA,chargeB,box,
 +                   cr,maxshift_x,maxshift_y,nrnb,wcycle,vir,ewaldcoeff,
 +                   &energy,lambda,&dvdlambda,
 +                   GMX_PME_DO_ALL_F | (bEnerVir ? GMX_PME_CALC_ENER_VIR : 0));
 +
 +        cycles = wallcycle_stop(wcycle,ewcPMEMESH);
 +
 +        gmx_pme_send_force_vir_ener(pme_pp,
 +                                    f_pp,vir,energy,dvdlambda,
 +                                    cycles);
 +
 +        count++;
 +
 +        if (step_rel == wcycle_get_reset_counters(wcycle))
 +        {
 +            /* Reset all the counters related to performance over the run */
 +            reset_pmeonly_counters(cr,wcycle,nrnb,ir,step_rel);
 +            wcycle_set_reset_counters(wcycle, 0);
 +        }
 +
 +    } /***** end of quasi-loop, we stop with the break above */
 +    while (TRUE);
 +
 +    return 0;
 +}
 +
 +int gmx_pme_do(gmx_pme_t pme,
 +               int start,       int homenr,
 +               rvec x[],        rvec f[],
 +               real *chargeA,   real *chargeB,
 +               matrix box, t_commrec *cr,
 +               int  maxshift_x, int maxshift_y,
 +               t_nrnb *nrnb,    gmx_wallcycle_t wcycle,
 +               matrix vir,      real ewaldcoeff,
 +               real *energy,    real lambda,
 +               real *dvdlambda, int flags)
 +{
 +    int     q,d,i,j,ntot,npme;
 +    int     nx,ny,nz;
 +    int     n_d,local_ny;
 +    pme_atomcomm_t *atc=NULL;
 +    pmegrids_t *pmegrid=NULL;
 +    real    *grid=NULL;
 +    real    *ptr;
 +    rvec    *x_d,*f_d;
 +    real    *charge=NULL,*q_d;
 +    real    energy_AB[2];
 +    matrix  vir_AB[2];
 +    gmx_bool bClearF;
 +    gmx_parallel_3dfft_t pfft_setup;
 +    real *  fftgrid;
 +    t_complex * cfftgrid;
 +    int     thread;
 +
 +    if (pme->nnodes > 1) {
 +        atc = &pme->atc[0];
 +        atc->npd = homenr;
 +        if (atc->npd > atc->pd_nalloc) {
 +            atc->pd_nalloc = over_alloc_dd(atc->npd);
 +            srenew(atc->pd,atc->pd_nalloc);
 +        }
 +        atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +    }
 +    else
 +    {
 +        /* This could be necessary for TPI */
 +        pme->atc[0].n = homenr;
 +    }
 +
 +    for(q=0; q<(pme->bFEP ? 2 : 1); q++) {
 +        if (q == 0) {
 +            pmegrid = &pme->pmegridA;
 +            fftgrid = pme->fftgridA;
 +            cfftgrid = pme->cfftgridA;
 +            pfft_setup = pme->pfft_setupA;
 +            charge = chargeA+start;
 +        } else {
 +            pmegrid = &pme->pmegridB;
 +            fftgrid = pme->fftgridB;
 +            cfftgrid = pme->cfftgridB;
 +            pfft_setup = pme->pfft_setupB;
 +            charge = chargeB+start;
 +        }
 +        grid = pmegrid->grid.grid;
 +        /* Unpack structure */
 +        if (debug) {
 +            fprintf(debug,"PME: nnodes = %d, nodeid = %d\n",
 +                    cr->nnodes,cr->nodeid);
 +            fprintf(debug,"Grid = %p\n",(void*)grid);
 +            if (grid == NULL)
 +                gmx_fatal(FARGS,"No grid!");
 +        }
 +        where();
 +
 +        m_inv_ur0(box,pme->recipbox);
 +
 +        if (pme->nnodes == 1) {
 +            atc = &pme->atc[0];
 +            if (DOMAINDECOMP(cr)) {
 +                atc->n = homenr;
 +                pme_realloc_atomcomm_things(atc);
 +            }
 +            atc->x = x;
 +            atc->q = charge;
 +            atc->f = f;
 +        } else {
 +            wallcycle_start(wcycle,ewcPME_REDISTXF);
 +            for(d=pme->ndecompdim-1; d>=0; d--)
 +            {
 +                if (d == pme->ndecompdim-1)
 +                {
 +                    n_d = homenr;
 +                    x_d = x + start;
 +                    q_d = charge;
 +                }
 +                else
 +                {
 +                    n_d = pme->atc[d+1].n;
 +                    x_d = atc->x;
 +                    q_d = atc->q;
 +                }
 +                atc = &pme->atc[d];
 +                atc->npd = n_d;
 +                if (atc->npd > atc->pd_nalloc) {
 +                    atc->pd_nalloc = over_alloc_dd(atc->npd);
 +                    srenew(atc->pd,atc->pd_nalloc);
 +                }
 +                atc->maxshift = (atc->dimind==0 ? maxshift_x : maxshift_y);
 +                pme_calc_pidx_wrapper(n_d,pme->recipbox,x_d,atc);
 +                where();
 +
 +                /* Redistribute x (only once) and qA or qB */
 +                if (DOMAINDECOMP(cr)) {
 +                    dd_pmeredist_x_q(pme, n_d, q==0, x_d, q_d, atc);
 +                } else {
 +                    pmeredist_pd(pme, TRUE, n_d, q==0, x_d, q_d, atc);
 +                }
 +            }
 +            where();
 +
 +            wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +        }
 +
 +        if (debug)
 +            fprintf(debug,"Node= %6d, pme local particles=%6d\n",
 +                    cr->nodeid,atc->n);
 +
 +        if (flags & GMX_PME_SPREAD_Q)
 +        {
 +            wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +
 +            /* Spread the charges on a grid */
 +            spread_on_grid(pme,&pme->atc[0],pmegrid,q==0,TRUE,fftgrid);
 +
 +            if (q == 0)
 +            {
 +                inc_nrnb(nrnb,eNR_WEIGHTS,DIM*atc->n);
 +            }
 +            inc_nrnb(nrnb,eNR_SPREADQBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*atc->n);
 +
 +            if (pme->nthread == 1)
 +            {
 +                wrap_periodic_pmegrid(pme,grid);
 +
 +                /* sum contributions to local grid from other nodes */
 +#ifdef GMX_MPI
 +                if (pme->nnodes > 1)
 +                {
 +                    gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_FORWARD);
 +                    where();
 +                }
 +#endif
 +
 +                copy_pmegrid_to_fftgrid(pme,grid,fftgrid);
 +            }
 +
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +
 +            /*
 +            dump_local_fftgrid(pme,fftgrid);
 +            exit(0);
 +            */
 +        }
 +
 +        /* Here we start a large thread parallel region */
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +        for(thread=0; thread<pme->nthread; thread++)
 +        {
 +            if (flags & GMX_PME_SOLVE)
 +            {
 +                int loop_count;
 +
 +                /* do 3d-fft */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_REAL_TO_COMPLEX,
 +                                           fftgrid,cfftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +                }
 +                where();
 +
 +                /* solve in k-space for our local cells */
 +                if (thread == 0)
 +                {
 +                    wallcycle_start(wcycle,ewcPME_SOLVE);
 +                }
 +                loop_count =
 +                    solve_pme_yzx(pme,cfftgrid,ewaldcoeff,
 +                                  box[XX][XX]*box[YY][YY]*box[ZZ][ZZ],
 +                                  flags & GMX_PME_CALC_ENER_VIR,
 +                                  pme->nthread,thread);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_SOLVE);
 +                    where();
 +                    inc_nrnb(nrnb,eNR_SOLVEPME,loop_count);
 +                }
 +            }
 +
 +            if (flags & GMX_PME_CALC_F)
 +            {
 +                /* do 3d-invfft */
 +                if (thread == 0)
 +                {
 +                    where();
 +                    wallcycle_start(wcycle,ewcPME_FFT);
 +                }
 +                gmx_parallel_3dfft_execute(pfft_setup,GMX_FFT_COMPLEX_TO_REAL,
 +                                           cfftgrid,fftgrid,thread,wcycle);
 +                if (thread == 0)
 +                {
 +                    wallcycle_stop(wcycle,ewcPME_FFT);
 +
 +                    where();
 +
 +                    if (pme->nodeid == 0)
 +                    {
 +                        ntot = pme->nkx*pme->nky*pme->nkz;
 +                        npme  = ntot*log((real)ntot)/log(2.0);
 +                        inc_nrnb(nrnb,eNR_FFT,2*npme);
 +                    }
 +
 +                    wallcycle_start(wcycle,ewcPME_SPREADGATHER);
 +                }
 +
 +                copy_fftgrid_to_pmegrid(pme,fftgrid,grid,pme->nthread,thread);
 +            }
 +        }
 +        /* End of thread parallel section.
 +         * With MPI we have to synchronize here before gmx_sum_qgrid_dd.
 +         */
 +
 +        if (flags & GMX_PME_CALC_F)
 +        {
 +            /* distribute local grid to all nodes */
 +#ifdef GMX_MPI
 +            if (pme->nnodes > 1) {
 +                gmx_sum_qgrid_dd(pme,grid,GMX_SUM_QGRID_BACKWARD);
 +            }
 +#endif
 +            where();
 +
 +            unwrap_periodic_pmegrid(pme,grid);
 +
 +            /* interpolate forces for our local atoms */
 +
 +            where();
 +
 +            /* If we are running without parallelization,
 +             * atc->f is the actual force array, not a buffer,
 +             * therefore we should not clear it.
 +             */
 +            bClearF = (q == 0 && PAR(cr));
 +#pragma omp parallel for num_threads(pme->nthread) schedule(static)
 +            for(thread=0; thread<pme->nthread; thread++)
 +            {
 +                gather_f_bsplines(pme,grid,bClearF,atc,
 +                                  &atc->spline[thread],
 +                                  pme->bFEP ? (q==0 ? 1.0-lambda : lambda) : 1.0);
 +            }
 +
 +            where();
 +
 +            inc_nrnb(nrnb,eNR_GATHERFBSP,
 +                     pme->pme_order*pme->pme_order*pme->pme_order*pme->atc[0].n);
 +            wallcycle_stop(wcycle,ewcPME_SPREADGATHER);
 +        }
 +
 +        if (flags & GMX_PME_CALC_ENER_VIR)
 +        {
 +            /* This should only be called on the master thread
 +             * and after the threads have synchronized.
 +             */
 +            get_pme_ener_vir(pme,pme->nthread,&energy_AB[q],vir_AB[q]);
 +        }
 +    } /* of q-loop */
 +
 +    if ((flags & GMX_PME_CALC_F) && pme->nnodes > 1) {
 +        wallcycle_start(wcycle,ewcPME_REDISTXF);
 +        for(d=0; d<pme->ndecompdim; d++)
 +        {
 +            atc = &pme->atc[d];
 +            if (d == pme->ndecompdim - 1)
 +            {
 +                n_d = homenr;
 +                f_d = f + start;
 +            }
 +            else
 +            {
 +                n_d = pme->atc[d+1].n;
 +                f_d = pme->atc[d+1].f;
 +            }
 +            if (DOMAINDECOMP(cr)) {
 +                dd_pmeredist_f(pme,atc,n_d,f_d,
 +                               d==pme->ndecompdim-1 && pme->bPPnode);
 +            } else {
 +                pmeredist_pd(pme, FALSE, n_d, TRUE, f_d, NULL, atc);
 +            }
 +        }
 +
 +        wallcycle_stop(wcycle,ewcPME_REDISTXF);
 +    }
 +    where();
 +
 +    if (!pme->bFEP) {
 +        *energy = energy_AB[0];
 +        m_add(vir,vir_AB[0],vir);
 +    } else {
 +        *energy = (1.0-lambda)*energy_AB[0] + lambda*energy_AB[1];
 +        *dvdlambda += energy_AB[1] - energy_AB[0];
 +        for(i=0; i<DIM; i++)
 +            for(j=0; j<DIM; j++)
 +                vir[i][j] += (1.0-lambda)*vir_AB[0][i][j] + lambda*vir_AB[1][i][j];
 +    }
 +
 +    if (debug)
 +    {
 +        fprintf(debug,"PME mesh energy: %g\n",*energy);
 +    }
 +
 +    return 0;
 +}
Simple merge
index f5f208b8e051b54dff8a3c423391fa9e85829d7e,0000000000000000000000000000000000000000..834c43d2d72638a45aacdc05f7569132f80c28e0
mode 100644,000000..100644
--- /dev/null
@@@ -1,1111 -1,0 +1,1103 @@@
- void sort_QMlayers(t_QMMMrec *qr){
-   /* sorts QM layers from small to big */
-   qsort(qr->qm,qr->nrQMlayers,
-       (size_t)sizeof(qr->qm[0]),
-       QMlayer_comp);
- } /* sort_QMlayers */
 +/*
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 + 
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <math.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "macros.h"
 +#include "smalloc.h"
 +#include "physics.h"
 +#include "macros.h"
 +#include "vec.h"
 +#include "force.h"
 +#include "invblock.h"
 +#include "confio.h"
 +#include "names.h"
 +#include "network.h"
 +#include "pbc.h"
 +#include "ns.h"
 +#include "nrnb.h"
 +#include "bondf.h"
 +#include "mshift.h"
 +#include "txtdump.h"
 +#include "copyrite.h"
 +#include "qmmm.h"
 +#include <stdio.h>
 +#include <string.h>
 +#include "gmx_fatal.h"
 +#include "typedefs.h"
 +#include <stdlib.h>
 +#include "mtop_util.h"
 +
 +
 +/* declarations of the interfaces to the QM packages. The _SH indicate
 + * the QM interfaces can be used for Surface Hopping simulations 
 + */
 +#ifdef GMX_QMMM_GAMESS
 +/* GAMESS interface */
 +
 +void 
 +init_gamess(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_gamess(t_commrec *cr,t_forcerec *fr,
 +            t_QMrec *qm, t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#elif defined GMX_QMMM_MOPAC
 +/* MOPAC interface */
 +
 +void 
 +init_mopac(t_commrec *cr, t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_mopac(t_commrec *cr,t_forcerec *fr, t_QMrec *qm, 
 +           t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +real 
 +call_mopac_SH(t_commrec *cr,t_forcerec *fr,t_QMrec *qm, 
 +              t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#elif defined GMX_QMMM_GAUSSIAN
 +/* GAUSSIAN interface */
 +
 +void 
 +init_gaussian(t_commrec *cr ,t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_gaussian_SH(t_commrec *cr,t_forcerec *fr,t_QMrec *qm, 
 +                 t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +real 
 +call_gaussian(t_commrec *cr,t_forcerec *fr, t_QMrec *qm,
 +              t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#elif defined GMX_QMMM_ORCA
 +/* ORCA interface */
 +
 +void 
 +init_orca(t_commrec *cr ,t_QMrec *qm, t_MMrec *mm);
 +
 +real 
 +call_orca(t_commrec *cr,t_forcerec *fr, t_QMrec *qm,
 +              t_MMrec *mm,rvec f[], rvec fshift[]);
 +
 +#endif
 +
 +
 +
 +
 +/* this struct and these comparison functions are needed for creating
 + * a QMMM input for the QM routines from the QMMM neighbor list.  
 + */
 +
 +typedef struct {
 +  int      j;
 +  int      shift;
 +} t_j_particle;
 +
 +static int struct_comp(const void *a, const void *b){
 +
 +  return (int)(((t_j_particle *)a)->j)-(int)(((t_j_particle *)b)->j);
 +  
 +} /* struct_comp */
 +
 +static int int_comp(const void *a,const void *b){
 +  
 +  return (*(int *)a) - (*(int *)b);
 +  
 +} /* int_comp */
 +
 +static int QMlayer_comp(const void *a, const void *b){
 +  
 +  return (int)(((t_QMrec *)a)->nrQMatoms)-(int)(((t_QMrec *)b)->nrQMatoms);
 +  
 +} /* QMlayer_comp */
 +
 +real call_QMroutine(t_commrec *cr, t_forcerec *fr, t_QMrec *qm, 
 +                  t_MMrec *mm, rvec f[], rvec fshift[])
 +{
 +  /* makes a call to the requested QM routine (qm->QMmethod) 
 +   * Note that f is actually the gradient, i.e. -f
 +   */
 +  real
 +    QMener=0.0;
 +
 +    /* do a semi-empiprical calculation */
 +    
 +    if (qm->QMmethod<eQMmethodRHF && !(mm->nrMMatoms))
 +    {
 +#ifdef GMX_QMMM_MOPAC
 +        if (qm->bSH)
 +            QMener = call_mopac_SH(cr,fr,qm,mm,f,fshift);
 +        else
 +            QMener = call_mopac(cr,fr,qm,mm,f,fshift);
 +#else
 +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
 +#endif
 +    }
 +    else
 +    {
 +        /* do an ab-initio calculation */
 +        if (qm->bSH && qm->QMmethod==eQMmethodCASSCF)
 +        {
 +#ifdef GMX_QMMM_GAUSSIAN            
 +            QMener = call_gaussian_SH(cr,fr,qm,mm,f,fshift);
 +#else
 +            gmx_fatal(FARGS,"Ab-initio Surface-hopping only supported with Gaussian.");
 +#endif
 +        }
 +        else
 +        {
 +#ifdef GMX_QMMM_GAMESS
 +            QMener = call_gamess(cr,fr,qm,mm,f,fshift);
 +#elif defined GMX_QMMM_GAUSSIAN
 +            QMener = call_gaussian(cr,fr,qm,mm,f,fshift);
 +#elif defined GMX_QMMM_ORCA
 +            QMener = call_orca(cr,fr,qm,mm,f,fshift);
 +#else
 +            gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");
 +#endif
 +        }
 +    }
 +    return (QMener);
 +}
 +
 +void init_QMroutine(t_commrec *cr, t_QMrec *qm, t_MMrec *mm)
 +{
 +    /* makes a call to the requested QM routine (qm->QMmethod) 
 +     */
 +    if (qm->QMmethod<eQMmethodRHF){
 +#ifdef GMX_QMMM_MOPAC
 +        /* do a semi-empiprical calculation */
 +        init_mopac(cr,qm,mm);
 +#else
 +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
 +#endif
 +    }
 +    else 
 +    {
 +        /* do an ab-initio calculation */
 +#ifdef GMX_QMMM_GAMESS
 +        init_gamess(cr,qm,mm);
 +#elif defined GMX_QMMM_GAUSSIAN
 +        init_gaussian(cr,qm,mm);
 +#elif defined GMX_QMMM_ORCA
 +        init_orca(cr,qm,mm);
 +#else
 +        gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");   
 +#endif
 +    }
 +} /* init_QMroutine */
 +
 +void update_QMMM_coord(rvec x[],t_forcerec *fr, t_QMrec *qm, t_MMrec *mm)
 +{
 +  /* shifts the QM and MM particles into the central box and stores
 +   * these shifted coordinates in the coordinate arrays of the
 +   * QMMMrec. These coordinates are passed on the QM subroutines.
 +   */
 +  int
 +    i;
 +
 +  /* shift the QM atoms into the central box 
 +   */
 +  for(i=0;i<qm->nrQMatoms;i++){
 +    rvec_sub(x[qm->indexQM[i]],fr->shift_vec[qm->shiftQM[i]],qm->xQM[i]);
 +  }
 +  /* also shift the MM atoms into the central box, if any 
 +   */
 +  for(i=0;i<mm->nrMMatoms;i++){
 +      rvec_sub(x[mm->indexMM[i]],fr->shift_vec[mm->shiftMM[i]],mm->xMM[i]);   
 +  }
 +} /* update_QMMM_coord */
 +
 +static void punch_QMMM_excl(t_QMrec *qm,t_MMrec *mm,t_blocka *excls)
 +{
 +  /* punch a file containing the bonded interactions of each QM
 +   * atom with MM atoms. These need to be excluded in the QM routines
 +   * Only needed in case of QM/MM optimizations
 +   */
 +  FILE
 +    *out=NULL;
 +  int
 +    i,j,k,nrexcl=0,*excluded=NULL,max=0;
 +  
 +  
 +  out = fopen("QMMMexcl.dat","w");
 +  
 +  /* this can be done more efficiently I think 
 +   */
 +  for(i=0;i<qm->nrQMatoms;i++){
 +    nrexcl = 0;
 +    for(j=excls->index[qm->indexQM[i]];
 +      j<excls->index[qm->indexQM[i]+1];
 +      j++){
 +      for(k=0;k<mm->nrMMatoms;k++){
 +      if(mm->indexMM[k]==excls->a[j]){/* the excluded MM atom */
 +        if(nrexcl >= max){
 +          max += 1000;
 +          srenew(excluded,max);
 +        }     
 +        excluded[nrexcl++]=k;
 +        continue;
 +      }
 +      }
 +    }
 +    /* write to file: */
 +    fprintf(out,"%5d %5d\n",i+1,nrexcl);
 +    for(j=0;j<nrexcl;j++){
 +      fprintf(out,"%5d ",excluded[j]);
 +    }
 +    fprintf(out,"\n");
 +  }
 +  free(excluded);
 +  fclose(out);
 +} /* punch_QMMM_excl */
 +
 +
 +/* end of QMMM subroutines */
 +
 +/* QMMM core routines */
 +
 +t_QMrec *mk_QMrec(void){
 +  t_QMrec *qm;
 +  snew(qm,1);
 +  return qm;
 +} /* mk_QMrec */
 +
 +t_MMrec *mk_MMrec(void){
 +  t_MMrec *mm;
 +  snew(mm,1);
 +  return mm;
 +} /* mk_MMrec */
 +
 +static void init_QMrec(int grpnr, t_QMrec *qm,int nr, int *atomarray, 
 +                     gmx_mtop_t *mtop, t_inputrec *ir)
 +{
 +  /* fills the t_QMrec struct of QM group grpnr 
 +   */
 +  int i;
 +  t_atom *atom;
 +
 +
 +  qm->nrQMatoms = nr;
 +  snew(qm->xQM,nr);
 +  snew(qm->indexQM,nr);
 +  snew(qm->shiftQM,nr); /* the shifts */
 +  for(i=0;i<nr;i++){
 +    qm->indexQM[i]=atomarray[i];
 +  }
 +
 +  snew(qm->atomicnumberQM,nr);
 +  for (i=0;i<qm->nrQMatoms;i++){
 +    gmx_mtop_atomnr_to_atom(mtop,qm->indexQM[i],&atom);
 +    qm->nelectrons       += mtop->atomtypes.atomnumber[atom->type];
 +    qm->atomicnumberQM[i] = mtop->atomtypes.atomnumber[atom->type];
 +  }
 +  qm->QMcharge       = ir->opts.QMcharge[grpnr];
 +  qm->multiplicity   = ir->opts.QMmult[grpnr];
 +  qm->nelectrons    -= ir->opts.QMcharge[grpnr];
 +
 +  qm->QMmethod       = ir->opts.QMmethod[grpnr];
 +  qm->QMbasis        = ir->opts.QMbasis[grpnr];
 +  /* trajectory surface hopping setup (Gaussian only) */
 +  qm->bSH            = ir->opts.bSH[grpnr];
 +  qm->CASorbitals    = ir->opts.CASorbitals[grpnr];
 +  qm->CASelectrons   = ir->opts.CASelectrons[grpnr];
 +  qm->SAsteps        = ir->opts.SAsteps[grpnr];
 +  qm->SAon           = ir->opts.SAon[grpnr];
 +  qm->SAoff          = ir->opts.SAoff[grpnr];
 +  /* hack to prevent gaussian from reinitializing all the time */
 +  qm->nQMcpus        = 0; /* number of CPU's to be used by g01, is set
 +                         * upon initializing gaussian
 +                         * (init_gaussian() 
 +                         */
 +  /* print the current layer to allow users to check their input */
 +  fprintf(stderr,"Layer %d\nnr of QM atoms %d\n",grpnr,nr);
 +  fprintf(stderr,"QMlevel: %s/%s\n\n",
 +        eQMmethod_names[qm->QMmethod],eQMbasis_names[qm->QMbasis]);
 +  
 +  /* frontier atoms */
 +  snew(qm->frontatoms,nr);
 +  /* Lennard-Jones coefficients */ 
 +  snew(qm->c6,nr);
 +  snew(qm->c12,nr);
 +  /* do we optimize the QM separately using the algorithms of the QM program??
 +   */
 +  qm->bTS      = ir->opts.bTS[grpnr];
 +  qm->bOPT     = ir->opts.bOPT[grpnr];
 +
 +} /* init_QMrec */  
 +
 +t_QMrec *copy_QMrec(t_QMrec *qm)
 +{
 +  /* copies the contents of qm into a new t_QMrec struct */
 +  t_QMrec
 +    *qmcopy;
 +  int
 +    i;
 +  
 +  qmcopy = mk_QMrec();
 +  qmcopy->nrQMatoms = qm->nrQMatoms;
 +  snew(qmcopy->xQM,qmcopy->nrQMatoms);
 +  snew(qmcopy->indexQM,qmcopy->nrQMatoms);
 +  snew(qmcopy->atomicnumberQM,qm->nrQMatoms);
 +  snew(qmcopy->shiftQM,qmcopy->nrQMatoms); /* the shifts */
 +  for (i=0;i<qmcopy->nrQMatoms;i++){
 +    qmcopy->shiftQM[i]        = qm->shiftQM[i];
 +    qmcopy->indexQM[i]        = qm->indexQM[i];
 +    qmcopy->atomicnumberQM[i] = qm->atomicnumberQM[i];
 +  }
 +  qmcopy->nelectrons   = qm->nelectrons;
 +  qmcopy->multiplicity = qm->multiplicity;
 +  qmcopy->QMcharge     = qm->QMcharge;
 +  qmcopy->nelectrons   = qm->nelectrons;
 +  qmcopy->QMmethod     = qm->QMmethod; 
 +  qmcopy->QMbasis      = qm->QMbasis;  
 +  /* trajectory surface hopping setup (Gaussian only) */
 +  qmcopy->bSH          = qm->bSH;
 +  qmcopy->CASorbitals  = qm->CASorbitals;
 +  qmcopy->CASelectrons = qm->CASelectrons;
 +  qmcopy->SAsteps      = qm->SAsteps;
 +  qmcopy->SAon         = qm->SAon;
 +  qmcopy->SAoff        = qm->SAoff;
 +  qmcopy->bOPT         = qm->bOPT;
 +
 +  /* Gaussian init. variables */
 +  qmcopy->nQMcpus      = qm->nQMcpus;
 +  for(i=0;i<DIM;i++)
 +    qmcopy->SHbasis[i] = qm->SHbasis[i];
 +  qmcopy->QMmem        = qm->QMmem;
 +  qmcopy->accuracy     = qm->accuracy;
 +  qmcopy->cpmcscf      = qm->cpmcscf;
 +  qmcopy->SAstep       = qm->SAstep;
 +  snew(qmcopy->frontatoms,qm->nrQMatoms);
 +  snew(qmcopy->c12,qmcopy->nrQMatoms);
 +  snew(qmcopy->c6,qmcopy->nrQMatoms);
 +  if(qmcopy->bTS||qmcopy->bOPT){
 +    for(i=1;i<qmcopy->nrQMatoms;i++){
 +      qmcopy->frontatoms[i] = qm->frontatoms[i];
 +      qmcopy->c12[i]        = qm->c12[i];
 +      qmcopy->c6[i]         = qm->c6[i];
 +    }
 +  }
 +
 +  return(qmcopy);
 +
 +} /*copy_QMrec */
 +
 +t_QMMMrec *mk_QMMMrec(void)
 +{
 +
 +  t_QMMMrec *qr;
 +
 +  snew(qr,1);
 +
 +  return qr;
 +
 +} /* mk_QMMMrec */
 +
 +void init_QMMMrec(t_commrec *cr,
 +                matrix box,
 +                gmx_mtop_t *mtop,
 +                t_inputrec *ir,
 +                t_forcerec *fr)
 +{
 +  /* we put the atomsnumbers of atoms that belong to the QMMM group in
 +   * an array that will be copied later to QMMMrec->indexQM[..]. Also
 +   * it will be used to create an QMMMrec->bQMMM index array that
 +   * simply contains true/false for QM and MM (the other) atoms.
 +   */
 +
 +  gmx_groups_t *groups;
 +  atom_id   *qm_arr=NULL,vsite,ai,aj;
 +  int       qm_max=0,qm_nr=0,i,j,jmax,k,l,nrvsite2=0;
 +  t_QMMMrec *qr;
 +  t_MMrec   *mm;
 +  t_iatom   *iatoms;
 +  real      c12au,c6au;
 +  gmx_mtop_atomloop_all_t aloop;
 +  t_atom    *atom;
 +  gmx_mtop_ilistloop_all_t iloop;
 +  int       a_offset;
 +  t_ilist   *ilist_mol;
 +
 +  c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6)); 
 +  c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12)); 
 +  /* issue a fatal if the user wants to run with more than one node */
 +  if ( PAR(cr)) gmx_fatal(FARGS,"QM/MM does not work in parallel, use a single node instead\n");
 +
 +  /* Make a local copy of the QMMMrec */
 +  qr = fr->qr;
 +
 +  /* bQMMM[..] is an array containing TRUE/FALSE for atoms that are
 +   * QM/not QM. We first set all elemenst at false. Afterwards we use
 +   * the qm_arr (=MMrec->indexQM) to changes the elements
 +   * corresponding to the QM atoms at TRUE.  */
 +
 +  qr->QMMMscheme     = ir->QMMMscheme;
 +
 +  /* we take the possibility into account that a user has
 +   * defined more than one QM group:
 +   */
 +  /* an ugly work-around in case there is only one group In this case
 +   * the whole system is treated as QM. Otherwise the second group is
 +   * always the rest of the total system and is treated as MM.  
 +   */
 +
 +  /* small problem if there is only QM.... so no MM */
 +  
 +  jmax = ir->opts.ngQM;
 +
 +  if(qr->QMMMscheme==eQMMMschemeoniom)
 +    qr->nrQMlayers = jmax;
 +  else
 +    qr->nrQMlayers = 1; 
 +
 +  groups = &mtop->groups;
 +
 +  /* there are jmax groups of QM atoms. In case of multiple QM groups
 +   * I assume that the users wants to do ONIOM. However, maybe it
 +   * should also be possible to define more than one QM subsystem with
 +   * independent neighbourlists. I have to think about
 +   * that.. 11-11-2003 
 +   */
 +  snew(qr->qm,jmax);
 +  for(j=0;j<jmax;j++){
 +    /* new layer */
 +    aloop = gmx_mtop_atomloop_all_init(mtop);
 +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
 +      if(qm_nr >= qm_max){
 +      qm_max += 1000;
 +      srenew(qm_arr,qm_max);
 +      }
 +      if (ggrpnr(groups,egcQMMM ,i) == j) {
 +      /* hack for tip4p */
 +      qm_arr[qm_nr++] = i;
 +      }
 +    }
 +    if(qr->QMMMscheme==eQMMMschemeoniom){
 +      /* add the atoms to the bQMMM array
 +       */
 +
 +      /* I assume that users specify the QM groups from small to
 +       * big(ger) in the mdp file 
 +       */
 +      qr->qm[j] = mk_QMrec(); 
 +      /* we need to throw out link atoms that in the previous layer
 +       * existed to separate this QMlayer from the previous
 +       * QMlayer. We use the iatoms array in the idef for that
 +       * purpose. If all atoms defining the current Link Atom (Dummy2)
 +       * are part of the current QM layer it needs to be removed from
 +       * qm_arr[].  */
 +   
 +      iloop = gmx_mtop_ilistloop_all_init(mtop);
 +      while (gmx_mtop_ilistloop_all_next(iloop,&ilist_mol,&a_offset)) {
 +      nrvsite2 = ilist_mol[F_VSITE2].nr;
 +      iatoms   = ilist_mol[F_VSITE2].iatoms;
 +      
 +      for(k=0; k<nrvsite2; k+=4) {
 +        vsite = a_offset + iatoms[k+1]; /* the vsite         */
 +        ai    = a_offset + iatoms[k+2]; /* constructing atom */
 +        aj    = a_offset + iatoms[k+3]; /* constructing atom */
 +        if (ggrpnr(groups, egcQMMM, vsite) == ggrpnr(groups, egcQMMM, ai)
 +            &&
 +            ggrpnr(groups, egcQMMM, vsite) == ggrpnr(groups, egcQMMM, aj)) {
 +          /* this dummy link atom needs to be removed from the qm_arr
 +           * before making the QMrec of this layer!  
 +           */
 +          for(i=0;i<qm_nr;i++){
 +            if(qm_arr[i]==vsite){
 +              /* drop the element */
 +              for(l=i;l<qm_nr;l++){
 +                qm_arr[l]=qm_arr[l+1];
 +              }
 +              qm_nr--;
 +            }
 +          }
 +        }
 +      }
 +      }
 +
 +      /* store QM atoms in this layer in the QMrec and initialise layer 
 +       */
 +      init_QMrec(j,qr->qm[j],qm_nr,qm_arr,mtop,ir);
 +      
 +      /* we now store the LJ C6 and C12 parameters in QM rec in case
 +       * we need to do an optimization 
 +       */
 +      if(qr->qm[j]->bOPT || qr->qm[j]->bTS){
 +      for(i=0;i<qm_nr;i++){
 +        qr->qm[j]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
 +                                atom->type,atom->type)/c6au;
 +        qr->qm[j]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
 +                                atom->type,atom->type)/c12au;
 +      }
 +      }
 +      /* now we check for frontier QM atoms. These occur in pairs that
 +       * construct the vsite
 +       */
 +      iloop = gmx_mtop_ilistloop_all_init(mtop);
 +      while (gmx_mtop_ilistloop_all_next(iloop,&ilist_mol,&a_offset)) {
 +      nrvsite2 = ilist_mol[F_VSITE2].nr;
 +      iatoms   = ilist_mol[F_VSITE2].iatoms;
 +
 +      for(k=0; k<nrvsite2; k+=4){
 +        vsite = a_offset + iatoms[k+1]; /* the vsite         */
 +        ai    = a_offset + iatoms[k+2]; /* constructing atom */
 +        aj    = a_offset + iatoms[k+3]; /* constructing atom */
 +        if(ggrpnr(groups,egcQMMM,ai) < (groups->grps[egcQMMM].nr-1) &&
 +           (ggrpnr(groups,egcQMMM,aj) >= (groups->grps[egcQMMM].nr-1))){
 +            /* mark ai as frontier atom */
 +          for(i=0;i<qm_nr;i++){
 +            if( (qm_arr[i]==ai) || (qm_arr[i]==vsite) ){
 +              qr->qm[j]->frontatoms[i]=TRUE;
 +            }
 +          }
 +        }
 +        else if(ggrpnr(groups,egcQMMM,aj) < (groups->grps[egcQMMM].nr-1) &&
 +                (ggrpnr(groups,egcQMMM,ai) >= (groups->grps[egcQMMM].nr-1))){
 +          /* mark aj as frontier atom */
 +          for(i=0;i<qm_nr;i++){
 +            if( (qm_arr[i]==aj) || (qm_arr[i]==vsite)){
 +              qr->qm[j]->frontatoms[i]=TRUE;
 +            }
 +          }
 +        }
 +      }
 +      }
 +    }
 +  }
 +  if(qr->QMMMscheme!=eQMMMschemeoniom){
 +
 +    /* standard QMMM, all layers are merged together so there is one QM 
 +     * subsystem and one MM subsystem. 
 +     * Also we set the charges to zero in the md->charge arrays to prevent 
 +     * the innerloops from doubly counting the electostatic QM MM interaction
 +     */
 +    for (k=0;k<qm_nr;k++){
 +      gmx_mtop_atomnr_to_atom(mtop,qm_arr[k],&atom);
 +      atom->q  = 0.0;
 +      atom->qB = 0.0;
 +    } 
 +    qr->qm[0] = mk_QMrec();
 +    /* store QM atoms in the QMrec and initialise
 +     */
 +    init_QMrec(0,qr->qm[0],qm_nr,qm_arr,mtop,ir);
 +    if(qr->qm[0]->bOPT || qr->qm[0]->bTS){
 +      for(i=0;i<qm_nr;i++){
 +      gmx_mtop_atomnr_to_atom(mtop,qm_arr[i],&atom);
 +      qr->qm[0]->c6[i]  =  C6(fr->nbfp,mtop->ffparams.atnr,
 +                              atom->type,atom->type)/c6au;
 +      qr->qm[0]->c12[i] = C12(fr->nbfp,mtop->ffparams.atnr,
 +                              atom->type,atom->type)/c12au;
 +      }
 +      
 +    }
 +    
 +
 +
 +    /* find frontier atoms and mark them true in the frontieratoms array.
 +     */
 +    for(i=0;i<qm_nr;i++) {
 +      gmx_mtop_atomnr_to_ilist(mtop,qm_arr[i],&ilist_mol,&a_offset);
 +      nrvsite2 = ilist_mol[F_VSITE2].nr;
 +      iatoms   = ilist_mol[F_VSITE2].iatoms;
 +      
 +      for(k=0;k<nrvsite2;k+=4){
 +      vsite = a_offset + iatoms[k+1]; /* the vsite         */
 +      ai    = a_offset + iatoms[k+2]; /* constructing atom */
 +      aj    = a_offset + iatoms[k+3]; /* constructing atom */
 +      if(ggrpnr(groups,egcQMMM,ai) < (groups->grps[egcQMMM].nr-1) &&
 +         (ggrpnr(groups,egcQMMM,aj) >= (groups->grps[egcQMMM].nr-1))){
 +      /* mark ai as frontier atom */
 +        if ( (qm_arr[i]==ai) || (qm_arr[i]==vsite) ){
 +          qr->qm[0]->frontatoms[i]=TRUE;
 +        }
 +      }
 +      else if (ggrpnr(groups,egcQMMM,aj) < (groups->grps[egcQMMM].nr-1) &&
 +               (ggrpnr(groups,egcQMMM,ai) >=(groups->grps[egcQMMM].nr-1))) {
 +        /* mark aj as frontier atom */
 +        if ( (qm_arr[i]==aj) || (qm_arr[i]==vsite) ){
 +          qr->qm[0]->frontatoms[i]=TRUE;
 +        }
 +      }
 +      }
 +    }
 +      
 +    /* MM rec creation */
 +    mm               = mk_MMrec(); 
 +    mm->scalefactor  = ir->scalefactor;
 +    mm->nrMMatoms    = (mtop->natoms)-(qr->qm[0]->nrQMatoms); /* rest of the atoms */
 +    qr->mm           = mm;
 +  } else {/* ONIOM */
 +    /* MM rec creation */    
 +    mm               = mk_MMrec(); 
 +    mm->scalefactor  = ir->scalefactor;
 +    mm->nrMMatoms    = 0;
 +    qr->mm           = mm;
 +  }
 +  
 +  /* these variables get updated in the update QMMMrec */
 +
 +  if(qr->nrQMlayers==1){
 +    /* with only one layer there is only one initialisation
 +     * needed. Multilayer is a bit more complicated as it requires
 +     * re-initialisation at every step of the simulation. This is due
 +     * to the use of COMMON blocks in the fortran QM subroutines.  
 +     */
 +    if (qr->qm[0]->QMmethod<eQMmethodRHF)
 +    {
 +#ifdef GMX_QMMM_MOPAC
 +        /* semi-empiprical 1-layer ONIOM calculation requested (mopac93) */
 +        init_mopac(cr,qr->qm[0],qr->mm);
 +#else
 +        gmx_fatal(FARGS,"Semi-empirical QM only supported with Mopac.");
 +#endif
 +    }
 +    else 
 +    { 
 +        /* ab initio calculation requested (gamess/gaussian/ORCA) */
 +#ifdef GMX_QMMM_GAMESS
 +        init_gamess(cr,qr->qm[0],qr->mm);
 +#elif defined GMX_QMMM_GAUSSIAN
 +        init_gaussian(cr,qr->qm[0],qr->mm);
 +#elif defined GMX_QMMM_ORCA
 +        init_orca(cr,qr->qm[0],qr->mm);
 +#else
 +        gmx_fatal(FARGS,"Ab-initio calculation only supported with Gamess, Gaussian or ORCA.");
 +#endif
 +    }
 +  }
 +} /* init_QMMMrec */
 +
 +void update_QMMMrec(t_commrec *cr,
 +                  t_forcerec *fr,
 +                  rvec x[],
 +                  t_mdatoms *md,
 +                  matrix box,
 +                  gmx_localtop_t *top)
 +{
 +  /* updates the coordinates of both QM atoms and MM atoms and stores
 +   * them in the QMMMrec.  
 +   *
 +   * NOTE: is NOT yet working if there are no PBC. Also in ns.c, simple
 +   * ns needs to be fixed!  
 +   */
 +  int 
 +    mm_max=0,mm_nr=0,mm_nr_new,i,j,is,k,shift;
 +  t_j_particle 
 +    *mm_j_particles=NULL,*qm_i_particles=NULL;
 +  t_QMMMrec 
 +    *qr; 
 +  t_nblist 
 +    QMMMlist;
 +  rvec
 +    dx,crd;
 +  int
 +    *MMatoms;
 +  t_QMrec
 +    *qm;
 +  t_MMrec
 +    *mm;
 +  t_pbc
 +    pbc;
 +  int  
 +    *parallelMMarray=NULL;
 +  real
 +    c12au,c6au;
 +
 +  c6au  = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,6)); 
 +  c12au = (HARTREE2KJ*AVOGADRO*pow(BOHR2NM,12)); 
 +
 +  /* every cpu has this array. On every processor we fill this array
 +   * with 1's and 0's. 1's indicate the atoms is a QM atom on the
 +   * current cpu in a later stage these arrays are all summed. indexes
 +   * > 0 indicate the atom is a QM atom. Every node therefore knows
 +   * whcih atoms are part of the QM subsystem.  
 +   */
 +  /* copy some pointers */
 +  qr          = fr->qr;
 +  mm          = qr->mm;
 +  QMMMlist    = fr->QMMMlist;
 +
 +  
 +
 +  /*  init_pbc(box);  needs to be called first, see pbc.h */
 +  set_pbc_dd(&pbc,fr->ePBC,DOMAINDECOMP(cr) ? cr->dd : NULL,FALSE,box);
 +  /* only in standard (normal) QMMM we need the neighbouring MM
 +   * particles to provide a electric field of point charges for the QM
 +   * atoms.  
 +   */
 +  if(qr->QMMMscheme==eQMMMschemenormal){ /* also implies 1 QM-layer */
 +    /* we NOW create/update a number of QMMMrec entries:
 +     *
 +     * 1) the shiftQM, containing the shifts of the QM atoms
 +     *
 +     * 2) the indexMM array, containing the index of the MM atoms
 +     * 
 +     * 3) the shiftMM, containing the shifts of the MM atoms
 +     *
 +     * 4) the shifted coordinates of the MM atoms
 +     *
 +     * the shifts are used for computing virial of the QM/MM particles.
 +     */
 +    qm = qr->qm[0]; /* in case of normal QMMM, there is only one group */
 +    snew(qm_i_particles,QMMMlist.nri);
 +    if(QMMMlist.nri){
 +      qm_i_particles[0].shift = XYZ2IS(0,0,0);
 +      for(i=0;i<QMMMlist.nri;i++){
 +      qm_i_particles[i].j     = QMMMlist.iinr[i];
 +      
 +      if(i){
 +        qm_i_particles[i].shift = pbc_dx_aiuc(&pbc,x[QMMMlist.iinr[0]],
 +                                              x[QMMMlist.iinr[i]],dx);
 +        
 +      }
 +      /* However, since nri >= nrQMatoms, we do a quicksort, and throw
 +       * out double, triple, etc. entries later, as we do for the MM
 +       * list too.  
 +       */
 +      
 +      /* compute the shift for the MM j-particles with respect to
 +       * the QM i-particle and store them. 
 +       */
 +      
 +      crd[0] = IS2X(QMMMlist.shift[i]) + IS2X(qm_i_particles[i].shift);
 +      crd[1] = IS2Y(QMMMlist.shift[i]) + IS2Y(qm_i_particles[i].shift);
 +      crd[2] = IS2Z(QMMMlist.shift[i]) + IS2Z(qm_i_particles[i].shift);
 +      is = XYZ2IS(crd[0],crd[1],crd[2]); 
 +      for(j=QMMMlist.jindex[i];
 +          j<QMMMlist.jindex[i+1];
 +          j++){
 +        if(mm_nr >= mm_max){
 +          mm_max += 1000;
 +          srenew(mm_j_particles,mm_max);
 +        }       
 +        
 +        mm_j_particles[mm_nr].j = QMMMlist.jjnr[j];
 +        mm_j_particles[mm_nr].shift = is;
 +        mm_nr++;
 +      }
 +      }
 +      
 +      /* quicksort QM and MM shift arrays and throw away multiple entries */
 +      
 +
 +
 +      qsort(qm_i_particles,QMMMlist.nri,
 +          (size_t)sizeof(qm_i_particles[0]),
 +          struct_comp);
 +      qsort(mm_j_particles,mm_nr,
 +          (size_t)sizeof(mm_j_particles[0]),
 +          struct_comp);
 +      /* remove multiples in the QM shift array, since in init_QMMM() we
 +       * went through the atom numbers from 0 to md.nr, the order sorted
 +       * here matches the one of QMindex already.
 +       */
 +      j=0;
 +      for(i=0;i<QMMMlist.nri;i++){
 +      if (i==0 || qm_i_particles[i].j!=qm_i_particles[i-1].j){
 +        qm_i_particles[j++] = qm_i_particles[i];
 +      }
 +      }
 +      mm_nr_new = 0;
 +      if(qm->bTS||qm->bOPT){
 +      /* only remove double entries for the MM array */
 +      for(i=0;i<mm_nr;i++){
 +        if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
 +           && !md->bQM[mm_j_particles[i].j]){
 +          mm_j_particles[mm_nr_new++] = mm_j_particles[i];
 +        }
 +      }
 +      }      
 +      /* we also remove mm atoms that have no charges! 
 +      * actually this is already done in the ns.c  
 +      */
 +      else{
 +      for(i=0;i<mm_nr;i++){
 +        if((i==0 || mm_j_particles[i].j!=mm_j_particles[i-1].j)
 +           && !md->bQM[mm_j_particles[i].j] 
 +           && (md->chargeA[mm_j_particles[i].j]
 +               || (md->chargeB && md->chargeB[mm_j_particles[i].j]))) {
 +          mm_j_particles[mm_nr_new++] = mm_j_particles[i];
 +        }
 +      }
 +      }
 +      mm_nr = mm_nr_new;
 +      /* store the data retrieved above into the QMMMrec
 +       */    
 +      k=0;
 +      /* Keep the compiler happy,
 +       * shift will always be set in the loop for i=0
 +       */
 +      shift = 0;
 +      for(i=0;i<qm->nrQMatoms;i++){
 +      /* not all qm particles might have appeared as i
 +       * particles. They might have been part of the same charge
 +       * group for instance.
 +       */
 +      if (qm->indexQM[i] == qm_i_particles[k].j) {
 +        shift = qm_i_particles[k++].shift;
 +      }
 +      /* use previous shift, assuming they belong the same charge
 +       * group anyway,
 +       */
 +      
 +      qm->shiftQM[i] = shift;
 +      }
 +    }
 +    /* parallel excecution */
 +    if(PAR(cr)){
 +      snew(parallelMMarray,2*(md->nr)); 
 +      /* only MM particles have a 1 at their atomnumber. The second part
 +       * of the array contains the shifts. Thus:
 +       * p[i]=1/0 depending on wether atomnumber i is a MM particle in the QM
 +       * step or not. p[i+md->nr] is the shift of atomnumber i.
 +       */
 +      for(i=0;i<2*(md->nr);i++){
 +      parallelMMarray[i]=0;
 +      }
 +      
 +      for(i=0;i<mm_nr;i++){
 +      parallelMMarray[mm_j_particles[i].j]=1;
 +      parallelMMarray[mm_j_particles[i].j+(md->nr)]=mm_j_particles[i].shift;
 +      }
 +      gmx_sumi(md->nr,parallelMMarray,cr);
 +      mm_nr=0;
 +      
 +      mm_max = 0;
 +      for(i=0;i<md->nr;i++){
 +      if(parallelMMarray[i]){
 +        if(mm_nr >= mm_max){
 +          mm_max += 1000;
 +          srenew(mm->indexMM,mm_max);
 +          srenew(mm->shiftMM,mm_max);
 +        }
 +        mm->indexMM[mm_nr]  = i;
 +        mm->shiftMM[mm_nr++]= parallelMMarray[i+md->nr]/parallelMMarray[i];
 +      }
 +      }
 +      mm->nrMMatoms=mm_nr;
 +      free(parallelMMarray);
 +    }
 +    /* serial execution */
 +    else{
 +      mm->nrMMatoms = mm_nr;
 +      srenew(mm->shiftMM,mm_nr);
 +      srenew(mm->indexMM,mm_nr);
 +      for(i=0;i<mm_nr;i++){
 +      mm->indexMM[i]=mm_j_particles[i].j;
 +      mm->shiftMM[i]=mm_j_particles[i].shift;
 +      }
 +      
 +    }
 +    /* (re) allocate memory for the MM coordiate array. The QM
 +     * coordinate array was already allocated in init_QMMM, and is
 +     * only (re)filled in the update_QMMM_coordinates routine 
 +     */
 +    srenew(mm->xMM,mm->nrMMatoms);
 +    /* now we (re) fill the array that contains the MM charges with
 +     * the forcefield charges. If requested, these charges will be
 +     * scaled by a factor 
 +     */
 +    srenew(mm->MMcharges,mm->nrMMatoms);
 +    for(i=0;i<mm->nrMMatoms;i++){/* no free energy yet */
 +      mm->MMcharges[i]=md->chargeA[mm->indexMM[i]]*mm->scalefactor; 
 +    }  
 +    if(qm->bTS||qm->bOPT){
 +      /* store (copy) the c6 and c12 parameters into the MMrec struct 
 +       */
 +      srenew(mm->c6,mm->nrMMatoms);
 +      srenew(mm->c12,mm->nrMMatoms);
 +      for (i=0;i<mm->nrMMatoms;i++){
 +      mm->c6[i]  = C6(fr->nbfp,top->idef.atnr,
 +                      md->typeA[mm->indexMM[i]],
 +                      md->typeA[mm->indexMM[i]])/c6au;
 +      mm->c12[i] =C12(fr->nbfp,top->idef.atnr,
 +                      md->typeA[mm->indexMM[i]],
 +                      md->typeA[mm->indexMM[i]])/c12au;
 +      }
 +      punch_QMMM_excl(qr->qm[0],mm,&(top->excls));
 +    }
 +    /* the next routine fills the coordinate fields in the QMMM rec of
 +     * both the qunatum atoms and the MM atoms, using the shifts
 +     * calculated above.  
 +     */
 +
 +    update_QMMM_coord(x,fr,qr->qm[0],qr->mm);
 +    free(qm_i_particles);
 +    free(mm_j_particles);
 +  } 
 +  else { /* ONIOM */ /* ????? */
 +    mm->nrMMatoms=0;
 +    /* do for each layer */
 +    for (j=0;j<qr->nrQMlayers;j++){
 +      qm = qr->qm[j];
 +      qm->shiftQM[0]=XYZ2IS(0,0,0);
 +      for(i=1;i<qm->nrQMatoms;i++){
 +      qm->shiftQM[i] = pbc_dx_aiuc(&pbc,x[qm->indexQM[0]],x[qm->indexQM[i]],
 +                                   dx);
 +      }
 +      update_QMMM_coord(x,fr,qm,mm);    
 +    }
 +  }
 +} /* update_QMMM_rec */
 +
 +
 +real calculate_QMMM(t_commrec *cr,
 +                  rvec x[],rvec f[],
 +                  t_forcerec *fr,
 +                  t_mdatoms *md)
 +{
 +  real
 +    QMener=0.0;
 +  /* a selection for the QM package depending on which is requested
 +   * (Gaussian, GAMESS-UK, MOPAC or ORCA) needs to be implemented here. Now
 +   * it works through defines.... Not so nice yet 
 +   */
 +  t_QMMMrec
 +    *qr;
 +  t_QMrec
 +    *qm,*qm2;
 +  t_MMrec
 +    *mm=NULL;
 +  rvec 
 +    *forces=NULL,*fshift=NULL,    
 +    *forces2=NULL, *fshift2=NULL; /* needed for multilayer ONIOM */
 +  int
 +    i,j,k;
 +  /* make a local copy the QMMMrec pointer 
 +   */
 +  qr = fr->qr;
 +  mm = qr->mm;
 +
 +  /* now different procedures are carried out for one layer ONION and
 +   * normal QMMM on one hand and multilayer oniom on the other
 +   */
 +  if(qr->QMMMscheme==eQMMMschemenormal || qr->nrQMlayers==1){
 +    qm = qr->qm[0];
 +    snew(forces,(qm->nrQMatoms+mm->nrMMatoms));
 +    snew(fshift,(qm->nrQMatoms+mm->nrMMatoms));
 +    QMener = call_QMroutine(cr,fr,qm,mm,forces,fshift);
 +    for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      f[qm->indexQM[i]][j]          -= forces[i][j];
 +      fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
 +      }
 +    }
 +    for(i=0;i<mm->nrMMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      f[mm->indexMM[i]][j]          -= forces[qm->nrQMatoms+i][j];
 +      fr->fshift[mm->shiftMM[i]][j] += fshift[qm->nrQMatoms+i][j];
 +      }
 +      
 +    }
 +    free(forces);
 +    free(fshift);
 +  }
 +  else{ /* Multi-layer ONIOM */
 +    for(i=0;i<qr->nrQMlayers-1;i++){ /* last layer is special */
 +      qm  = qr->qm[i];
 +      qm2 = copy_QMrec(qr->qm[i+1]);
 +
 +      qm2->nrQMatoms = qm->nrQMatoms;
 +    
 +      for(j=0;j<qm2->nrQMatoms;j++){
 +      for(k=0;k<DIM;k++)
 +        qm2->xQM[j][k]       = qm->xQM[j][k];
 +      qm2->indexQM[j]        = qm->indexQM[j];
 +      qm2->atomicnumberQM[j] = qm->atomicnumberQM[j];
 +      qm2->shiftQM[j]        = qm->shiftQM[j];
 +      }
 +
 +      qm2->QMcharge = qm->QMcharge;
 +      /* this layer at the higher level of theory */
 +      srenew(forces,qm->nrQMatoms);
 +      srenew(fshift,qm->nrQMatoms);
 +      /* we need to re-initialize the QMroutine every step... */
 +      init_QMroutine(cr,qm,mm);
 +      QMener += call_QMroutine(cr,fr,qm,mm,forces,fshift);
 +
 +      /* this layer at the lower level of theory */
 +      srenew(forces2,qm->nrQMatoms);
 +      srenew(fshift2,qm->nrQMatoms);
 +      init_QMroutine(cr,qm2,mm);
 +      QMener -= call_QMroutine(cr,fr,qm2,mm,forces2,fshift2);
 +      /* E = E1high-E1low The next layer includes the current layer at
 +       * the lower level of theory, which provides + E2low
 +       * this is similar for gradients
 +       */
 +      for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +        f[qm->indexQM[i]][j]          -= (forces[i][j]-forces2[i][j]);
 +        fr->fshift[qm->shiftQM[i]][j] += (fshift[i][j]-fshift2[i][j]);
 +      }
 +      }
 +      free(qm2);
 +    }
 +    /* now the last layer still needs to be done: */
 +    qm      = qr->qm[qr->nrQMlayers-1]; /* C counts from 0 */
 +    init_QMroutine(cr,qm,mm);
 +    srenew(forces,qm->nrQMatoms);
 +    srenew(fshift,qm->nrQMatoms);
 +    QMener += call_QMroutine(cr,fr,qm,mm,forces,fshift);
 +    for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      f[qm->indexQM[i]][j]          -= forces[i][j];
 +      fr->fshift[qm->shiftQM[i]][j] += fshift[i][j];
 +      }
 +    }
 +    free(forces);
 +    free(fshift);
 +    free(forces2);
 +    free(fshift2);
 +  }
 +  if(qm->bTS||qm->bOPT){
 +    /* qm[0] still contains the largest ONIOM QM subsystem 
 +     * we take the optimized coordiates and put the in x[]
 +     */
 +    for(i=0;i<qm->nrQMatoms;i++){
 +      for(j=0;j<DIM;j++){
 +      x[qm->indexQM[i]][j] = qm->xQM[i][j];
 +      }
 +    }
 +  }
 +  return(QMener);
 +} /* calculate_QMMM */
 +
 +/* end of QMMM core routines */
index 07a56283e6ab15189e2494f50dc8d29b6cbb73bf,0000000000000000000000000000000000000000..859c6bd9d97fdd733c257723e73e371d96a9b264
mode 100644,000000..100644
--- /dev/null
@@@ -1,1591 -1,0 +1,1593 @@@
-             spread_vsite_f(fplog,vsite,x,f,fr->fshift,nrnb,
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + * 
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * GROwing Monsters And Cloning Shrimps
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#ifdef GMX_CRAY_XT3
 +#include<catamount/dclock.h>
 +#endif
 +
 +
 +#include <stdio.h>
 +#include <time.h>
 +#ifdef HAVE_SYS_TIME_H
 +#include <sys/time.h>
 +#endif
 +#include <math.h>
 +#include "typedefs.h"
 +#include "string2.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "names.h"
 +#include "confio.h"
 +#include "mvdata.h"
 +#include "txtdump.h"
 +#include "pbc.h"
 +#include "chargegroup.h"
 +#include "vec.h"
 +#include <time.h>
 +#include "nrnb.h"
 +#include "mshift.h"
 +#include "mdrun.h"
 +#include "update.h"
 +#include "physics.h"
 +#include "main.h"
 +#include "mdatoms.h"
 +#include "force.h"
 +#include "bondf.h"
 +#include "pme.h"
 +#include "pppm.h"
 +#include "disre.h"
 +#include "orires.h"
 +#include "network.h"
 +#include "calcmu.h"
 +#include "constr.h"
 +#include "xvgr.h"
 +#include "trnio.h"
 +#include "xtcio.h"
 +#include "copyrite.h"
 +#include "pull_rotation.h"
 +#include "domdec.h"
 +#include "partdec.h"
 +#include "gmx_wallcycle.h"
 +#include "genborn.h"
 +
 +#ifdef GMX_LIB_MPI
 +#include <mpi.h>
 +#endif
 +#ifdef GMX_THREAD_MPI
 +#include "tmpi.h"
 +#endif
 +
 +#include "adress.h"
 +#include "qmmm.h"
 +
 +#if 0
 +typedef struct gmx_timeprint {
 +    
 +} t_gmx_timeprint;
 +#endif
 +
 +/* Portable version of ctime_r implemented in src/gmxlib/string2.c, but we do not want it declared in public installed headers */
 +char *
 +gmx_ctime_r(const time_t *clock,char *buf, int n);
 +
 +
 +double
 +gmx_gettime()
 +{
 +#ifdef HAVE_GETTIMEOFDAY
 +      struct timeval t;
 +      double seconds;
 +      
 +      gettimeofday(&t,NULL);
 +      
 +      seconds = (double) t.tv_sec + 1e-6*(double)t.tv_usec;
 +      
 +      return seconds;
 +#else
 +      double  seconds;
 +      
 +      seconds = time(NULL);
 +      
 +      return seconds;
 +#endif
 +}
 +
 +
 +#define difftime(end,start) ((double)(end)-(double)(start))
 +
 +void print_time(FILE *out,gmx_runtime_t *runtime,gmx_large_int_t step,   
 +                t_inputrec *ir, t_commrec *cr)
 +{
 +    time_t finish;
 +    char   timebuf[STRLEN];
 +    double dt;
 +    char buf[48];
 +    
 +#ifndef GMX_THREAD_MPI
 +    if (!PAR(cr))
 +#endif
 +    {
 +        fprintf(out,"\r");
 +    }
 +    fprintf(out,"step %s",gmx_step_str(step,buf));
 +    if ((step >= ir->nstlist))
 +    {
 +        if ((ir->nstlist == 0) || ((step % ir->nstlist) == 0))
 +        {
 +            /* We have done a full cycle let's update time_per_step */
 +            runtime->last = gmx_gettime();
 +            dt = difftime(runtime->last,runtime->real);
 +            runtime->time_per_step = dt/(step - ir->init_step + 1);
 +        }
 +        dt = (ir->nsteps + ir->init_step - step)*runtime->time_per_step;
 +        
 +        if (ir->nsteps >= 0)
 +        {
 +            if (dt >= 300)
 +            {    
 +                finish = (time_t) (runtime->last + dt);
 +                gmx_ctime_r(&finish,timebuf,STRLEN);
 +                sprintf(buf,"%s",timebuf);
 +                buf[strlen(buf)-1]='\0';
 +                fprintf(out,", will finish %s",buf);
 +            }
 +            else
 +                fprintf(out,", remaining runtime: %5d s          ",(int)dt);
 +        }
 +        else
 +        {
 +            fprintf(out," performance: %.1f ns/day    ",
 +                    ir->delta_t/1000*24*60*60/runtime->time_per_step);
 +        }
 +    }
 +#ifndef GMX_THREAD_MPI
 +    if (PAR(cr))
 +    {
 +        fprintf(out,"\n");
 +    }
 +#endif
 +
 +    fflush(out);
 +}
 +
 +#ifdef NO_CLOCK 
 +#define clock() -1
 +#endif
 +
 +static double set_proctime(gmx_runtime_t *runtime)
 +{
 +    double diff;
 +#ifdef GMX_CRAY_XT3
 +    double prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = dclock();
 +    
 +    diff = runtime->proc - prev;
 +#else
 +    clock_t prev;
 +
 +    prev = runtime->proc;
 +    runtime->proc = clock();
 +
 +    diff = (double)(runtime->proc - prev)/(double)CLOCKS_PER_SEC;
 +#endif
 +    if (diff < 0)
 +    {
 +        /* The counter has probably looped, ignore this data */
 +        diff = 0;
 +    }
 +
 +    return diff;
 +}
 +
 +void runtime_start(gmx_runtime_t *runtime)
 +{
 +    runtime->real = gmx_gettime();
 +    runtime->proc          = 0;
 +    set_proctime(runtime);
 +    runtime->realtime      = 0;
 +    runtime->proctime      = 0;
 +    runtime->last          = 0;
 +    runtime->time_per_step = 0;
 +}
 +
 +void runtime_end(gmx_runtime_t *runtime)
 +{
 +    double now;
 +    
 +    now = gmx_gettime();
 +    
 +    runtime->proctime += set_proctime(runtime);
 +    runtime->realtime  = now - runtime->real;
 +    runtime->real      = now;
 +}
 +
 +void runtime_upd_proc(gmx_runtime_t *runtime)
 +{
 +    runtime->proctime += set_proctime(runtime);
 +}
 +
 +void print_date_and_time(FILE *fplog,int nodeid,const char *title,
 +                         const gmx_runtime_t *runtime)
 +{
 +    int i;
 +    char timebuf[STRLEN];
 +    char time_string[STRLEN];
 +    time_t tmptime;
 +
 +    if (fplog)
 +    {
 +        if (runtime != NULL)
 +        {
 +            tmptime = (time_t) runtime->real;
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        else
 +        {
 +            tmptime = (time_t) gmx_gettime();
 +            gmx_ctime_r(&tmptime,timebuf,STRLEN);
 +        }
 +        for(i=0; timebuf[i]>=' '; i++)
 +        {
 +            time_string[i]=timebuf[i];
 +        }
 +        time_string[i]='\0';
 +
 +        fprintf(fplog,"%s on node %d %s\n",title,nodeid,time_string);
 +    }
 +}
 +
 +static void sum_forces(int start,int end,rvec f[],rvec flr[])
 +{
 +  int i;
 +  
 +  if (gmx_debug_at) {
 +    pr_rvecs(debug,0,"fsr",f+start,end-start);
 +    pr_rvecs(debug,0,"flr",flr+start,end-start);
 +  }
 +  for(i=start; (i<end); i++)
 +    rvec_inc(f[i],flr[i]);
 +}
 +
 +/* 
 + * calc_f_el calculates forces due to an electric field.
 + *
 + * force is kJ mol^-1 nm^-1 = e * kJ mol^-1 nm^-1 / e 
 + *
 + * Et[] contains the parameters for the time dependent 
 + * part of the field (not yet used). 
 + * Ex[] contains the parameters for
 + * the spatial dependent part of the field. You can have cool periodic
 + * fields in principle, but only a constant field is supported
 + * now. 
 + * The function should return the energy due to the electric field
 + * (if any) but for now returns 0.
 + *
 + * WARNING:
 + * There can be problems with the virial.
 + * Since the field is not self-consistent this is unavoidable.
 + * For neutral molecules the virial is correct within this approximation.
 + * For neutral systems with many charged molecules the error is small.
 + * But for systems with a net charge or a few charged molecules
 + * the error can be significant when the field is high.
 + * Solution: implement a self-consitent electric field into PME.
 + */
 +static void calc_f_el(FILE *fp,int  start,int homenr,
 +                      real charge[],rvec x[],rvec f[],
 +                      t_cosines Ex[],t_cosines Et[],double t)
 +{
 +    rvec Ext;
 +    real t0;
 +    int  i,m;
 +    
 +    for(m=0; (m<DIM); m++)
 +    {
 +        if (Et[m].n > 0)
 +        {
 +            if (Et[m].n == 3)
 +            {
 +                t0 = Et[m].a[1];
 +                Ext[m] = cos(Et[m].a[0]*(t-t0))*exp(-sqr(t-t0)/(2.0*sqr(Et[m].a[2])));
 +            }
 +            else
 +            {
 +                Ext[m] = cos(Et[m].a[0]*t);
 +            }
 +        }
 +        else
 +        {
 +            Ext[m] = 1.0;
 +        }
 +        if (Ex[m].n > 0)
 +        {
 +            /* Convert the field strength from V/nm to MD-units */
 +            Ext[m] *= Ex[m].a[0]*FIELDFAC;
 +            for(i=start; (i<start+homenr); i++)
 +                f[i][m] += charge[i]*Ext[m];
 +        }
 +        else
 +        {
 +            Ext[m] = 0;
 +        }
 +    }
 +    if (fp != NULL)
 +    {
 +        fprintf(fp,"%10g  %10g  %10g  %10g #FIELD\n",t,
 +                Ext[XX]/FIELDFAC,Ext[YY]/FIELDFAC,Ext[ZZ]/FIELDFAC);
 +    }
 +}
 +
 +static void calc_virial(FILE *fplog,int start,int homenr,rvec x[],rvec f[],
 +                      tensor vir_part,t_graph *graph,matrix box,
 +                      t_nrnb *nrnb,const t_forcerec *fr,int ePBC)
 +{
 +  int i,j;
 +  tensor virtest;
 +
 +  /* The short-range virial from surrounding boxes */
 +  clear_mat(vir_part);
 +  calc_vir(fplog,SHIFTS,fr->shift_vec,fr->fshift,vir_part,ePBC==epbcSCREW,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,SHIFTS);
 +  
 +  /* Calculate partial virial, for local atoms only, based on short range. 
 +   * Total virial is computed in global_stat, called from do_md 
 +   */
 +  f_calc_vir(fplog,start,start+homenr,x,f,vir_part,graph,box);
 +  inc_nrnb(nrnb,eNR_VIRIAL,homenr);
 +
 +  /* Add position restraint contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][i] += fr->vir_diag_posres[i];
 +  }
 +
 +  /* Add wall contribution */
 +  for(i=0; i<DIM; i++) {
 +    vir_part[i][ZZ] += fr->vir_wall_z[i];
 +  }
 +
 +  if (debug)
 +    pr_rvecs(debug,0,"vir_part",vir_part,DIM);
 +}
 +
 +static void print_large_forces(FILE *fp,t_mdatoms *md,t_commrec *cr,
 +                             gmx_large_int_t step,real pforce,rvec *x,rvec *f)
 +{
 +  int  i;
 +  real pf2,fn2;
 +  char buf[STEPSTRSIZE];
 +
 +  pf2 = sqr(pforce);
 +  for(i=md->start; i<md->start+md->homenr; i++) {
 +    fn2 = norm2(f[i]);
 +    /* We also catch NAN, if the compiler does not optimize this away. */
 +    if (fn2 >= pf2 || fn2 != fn2) {
 +      fprintf(fp,"step %s  atom %6d  x %8.3f %8.3f %8.3f  force %12.5e\n",
 +            gmx_step_str(step,buf),
 +            ddglatnr(cr->dd,i),x[i][XX],x[i][YY],x[i][ZZ],sqrt(fn2));
 +    }
 +  }
 +}
 +
 +void do_force(FILE *fplog,t_commrec *cr,
 +              t_inputrec *inputrec,
 +              gmx_large_int_t step,t_nrnb *nrnb,gmx_wallcycle_t wcycle,
 +              gmx_localtop_t *top,
 +              gmx_mtop_t *mtop,
 +              gmx_groups_t *groups,
 +              matrix box,rvec x[],history_t *hist,
 +              rvec f[],
 +              tensor vir_force,
 +              t_mdatoms *mdatoms,
 +              gmx_enerdata_t *enerd,t_fcdata *fcd,
 +              real lambda,t_graph *graph,
 +              t_forcerec *fr,gmx_vsite_t *vsite,rvec mu_tot,
 +              double t,FILE *field,gmx_edsam_t ed,
 +              gmx_bool bBornRadii,
 +              int flags)
 +{
 +    int    cg0,cg1,i,j;
 +    int    start,homenr;
 +    double mu[2*DIM]; 
 +    gmx_bool   bSepDVDL,bStateChanged,bNS,bFillGrid,bCalcCGCM,bBS;
 +    gmx_bool   bDoLongRange,bDoForces,bSepLRF;
 +    gmx_bool   bDoAdressWF;
 +    matrix boxs;
 +    real   e,v,dvdl;
 +    t_pbc  pbc;
 +    float  cycles_ppdpme,cycles_pme,cycles_seppme,cycles_force;
 +  
 +    start  = mdatoms->start;
 +    homenr = mdatoms->homenr;
 +
 +    bSepDVDL = (fr->bSepDVDL && do_per_step(step,inputrec->nstlog));
 +
 +    clear_mat(vir_force);
 +
 +    if (PARTDECOMP(cr))
 +    {
 +        pd_cg_range(cr,&cg0,&cg1);
 +    }
 +    else
 +    {
 +        cg0 = 0;
 +        if (DOMAINDECOMP(cr))
 +        {
 +            cg1 = cr->dd->ncg_tot;
 +        }
 +        else
 +        {
 +            cg1 = top->cgs.nr;
 +        }
 +        if (fr->n_tpi > 0)
 +        {
 +            cg1--;
 +        }
 +    }
 +
 +    bStateChanged = (flags & GMX_FORCE_STATECHANGED);
 +    bNS           = (flags & GMX_FORCE_NS) && (fr->bAllvsAll==FALSE); 
 +    bFillGrid     = (bNS && bStateChanged);
 +    bCalcCGCM     = (bFillGrid && !DOMAINDECOMP(cr));
 +    bDoLongRange  = (fr->bTwinRange && bNS && (flags & GMX_FORCE_DOLR));
 +    bDoForces     = (flags & GMX_FORCE_FORCES);
 +    bSepLRF       = (bDoLongRange && bDoForces && (flags & GMX_FORCE_SEPLRF));
 +    /* should probably move this to the forcerec since it doesn't change */
 +    bDoAdressWF   = ((fr->adress_type!=eAdressOff));
 +
 +    if (bStateChanged)
 +    {
 +        update_forcerec(fplog,fr,box);
 +        
 +        /* Calculate total (local) dipole moment in a temporary common array. 
 +         * This makes it possible to sum them over nodes faster.
 +         */
 +        calc_mu(start,homenr,
 +                x,mdatoms->chargeA,mdatoms->chargeB,mdatoms->nChargePerturbed,
 +                mu,mu+DIM);
 +    }
 +  
 +  if (fr->ePBC != epbcNONE) { 
 +    /* Compute shift vectors every step,
 +     * because of pressure coupling or box deformation!
 +     */
 +    if ((flags & GMX_FORCE_DYNAMICBOX) && bStateChanged)
 +      calc_shifts(box,fr->shift_vec);
 +    
 +    if (bCalcCGCM) { 
 +      put_charge_groups_in_box(fplog,cg0,cg1,fr->ePBC,box,
 +                             &(top->cgs),x,fr->cg_cm);
 +      inc_nrnb(nrnb,eNR_CGCM,homenr);
 +      inc_nrnb(nrnb,eNR_RESETX,cg1-cg0);
 +    } 
 +    else if (EI_ENERGY_MINIMIZATION(inputrec->eI) && graph) {
 +      unshift_self(graph,box,x);
 +    }
 +  } 
 +  else if (bCalcCGCM) {
 +    calc_cgcm(fplog,cg0,cg1,&(top->cgs),x,fr->cg_cm);
 +    inc_nrnb(nrnb,eNR_CGCM,homenr);
 +  }
 +  
 +  if (bCalcCGCM) {
 +    if (PAR(cr)) {
 +      move_cgcm(fplog,cr,fr->cg_cm);
 +    }
 +    if (gmx_debug_at)
 +      pr_rvecs(debug,0,"cgcm",fr->cg_cm,top->cgs.nr);
 +  }
 +
 +#ifdef GMX_MPI
 +  if (!(cr->duty & DUTY_PME)) {
 +    /* Send particle coordinates to the pme nodes.
 +     * Since this is only implemented for domain decomposition
 +     * and domain decomposition does not use the graph,
 +     * we do not need to worry about shifting.
 +     */    
 +
 +    wallcycle_start(wcycle,ewcPP_PMESENDX);
 +
 +    bBS = (inputrec->nwall == 2);
 +    if (bBS) {
 +      copy_mat(box,boxs);
 +      svmul(inputrec->wall_ewald_zfac,boxs[ZZ],boxs[ZZ]);
 +    }
 +
 +    gmx_pme_send_x(cr,bBS ? boxs : box,x,
 +                   mdatoms->nChargePerturbed,lambda,
 +                   ( flags & GMX_FORCE_VIRIAL),step);
 +
 +    wallcycle_stop(wcycle,ewcPP_PMESENDX);
 +  }
 +#endif /* GMX_MPI */
 +
 +    /* Communicate coordinates and sum dipole if necessary */
 +    if (PAR(cr))
 +    {
 +        wallcycle_start(wcycle,ewcMOVEX);
 +        if (DOMAINDECOMP(cr))
 +        {
 +            dd_move_x(cr->dd,box,x);
 +        }
 +        else
 +        {
 +            move_x(fplog,cr,GMX_LEFT,GMX_RIGHT,x,nrnb);
 +        }
 +        /* When we don't need the total dipole we sum it in global_stat */
 +        if (bStateChanged && NEED_MUTOT(*inputrec))
 +        {
 +            gmx_sumd(2*DIM,mu,cr);
 +        }
 +        wallcycle_stop(wcycle,ewcMOVEX);
 +    }
 +    if (bStateChanged)
 +    {
 +
 +        /* update adress weight beforehand */
 +        if(bDoAdressWF)
 +        {
 +            /* need pbc for adress weight calculation with pbc_dx */
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +            if(fr->adress_site == eAdressSITEcog)
 +            {
 +                update_adress_weights_cog(top->idef.iparams,top->idef.il,x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEcom)
 +            {
 +                update_adress_weights_com(fplog,cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else if (fr->adress_site == eAdressSITEatomatom){
 +                update_adress_weights_atom_per_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                          inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +            else
 +            {
 +                update_adress_weights_atom(cg0,cg1,&(top->cgs),x,fr,mdatoms,
 +                                           inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +            }
 +        }
 +
 +        for(i=0; i<2; i++)
 +        {
 +            for(j=0;j<DIM;j++)
 +            {
 +                fr->mu_tot[i][j] = mu[i*DIM + j];
 +            }
 +        }
 +    }
 +    if (fr->efep == efepNO)
 +    {
 +        copy_rvec(fr->mu_tot[0],mu_tot);
 +    }
 +    else
 +    {
 +        for(j=0; j<DIM; j++)
 +        {
 +            mu_tot[j] =
 +                (1.0 - lambda)*fr->mu_tot[0][j] + lambda*fr->mu_tot[1][j];
 +        }
 +    }
 +
 +    /* Reset energies */
 +    reset_enerdata(&(inputrec->opts),fr,bNS,enerd,MASTER(cr));
 +    clear_rvecs(SHIFTS,fr->fshift);
 +
 +    if (bNS)
 +    {
 +        wallcycle_start(wcycle,ewcNS);
 +        
 +        if (graph && bStateChanged)
 +        {
 +            /* Calculate intramolecular shift vectors to make molecules whole */
 +            mk_mshift(fplog,graph,fr->ePBC,box,x);
 +        }
 +
 +        /* Reset long range forces if necessary */
 +        if (fr->bTwinRange)
 +        {
 +            /* Reset the (long-range) forces if necessary */
 +            clear_rvecs(fr->natoms_force_constr,bSepLRF ? fr->f_twin : f);
 +        }
 +
 +        /* Do the actual neighbour searching and if twin range electrostatics
 +         * also do the calculation of long range forces and energies.
 +         */
 +        dvdl = 0; 
 +        ns(fplog,fr,x,box,
 +           groups,&(inputrec->opts),top,mdatoms,
 +           cr,nrnb,lambda,&dvdl,&enerd->grpp,bFillGrid,
 +           bDoLongRange,bDoForces,bSepLRF ? fr->f_twin : f);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"LR non-bonded",0.0,dvdl);
 +        }
 +        enerd->dvdl_lin += dvdl;
 +        
 +        wallcycle_stop(wcycle,ewcNS);
 +    }
 +      
 +    if (inputrec->implicit_solvent && bNS) 
 +    {
 +        make_gb_nblist(cr,inputrec->gb_algorithm,inputrec->rlist,
 +                       x,box,fr,&top->idef,graph,fr->born);
 +    }
 +      
 +    if (DOMAINDECOMP(cr))
 +    {
 +        if (!(cr->duty & DUTY_PME))
 +        {
 +            wallcycle_start(wcycle,ewcPPDURINGPME);
 +            dd_force_flop_start(cr->dd,nrnb);
 +        }
 +    }
 +    
 +    if (inputrec->bRot)
 +    {
 +        /* Enforced rotation has its own cycle counter that starts after the collective
 +         * coordinates have been communicated. It is added to ddCyclF to allow
 +         * for proper load-balancing */
 +        wallcycle_start(wcycle,ewcROT);
 +        do_rotation(cr,inputrec,box,x,t,step,wcycle,bNS);
 +        wallcycle_stop(wcycle,ewcROT);
 +    }
 +
 +    /* Start the force cycle counter.
 +     * This counter is stopped in do_forcelow_level.
 +     * No parallel communication should occur while this counter is running,
 +     * since that will interfere with the dynamic load balancing.
 +     */
 +    wallcycle_start(wcycle,ewcFORCE);
 +
 +    if (bDoForces)
 +    {
 +        /* Reset forces for which the virial is calculated separately:
 +         * PME/Ewald forces if necessary */
 +        if (fr->bF_NoVirSum) 
 +        {
 +            if (flags & GMX_FORCE_VIRIAL)
 +            {
 +                fr->f_novirsum = fr->f_novirsum_alloc;
 +                if (fr->bDomDec)
 +                {
 +                    clear_rvecs(fr->f_novirsum_n,fr->f_novirsum);
 +                }
 +                else
 +                {
 +                    clear_rvecs(homenr,fr->f_novirsum+start);
 +                }
 +            }
 +            else
 +            {
 +                /* We are not calculating the pressure so we do not need
 +                 * a separate array for forces that do not contribute
 +                 * to the pressure.
 +                 */
 +                fr->f_novirsum = f;
 +            }
 +        }
 +
 +        if (bSepLRF)
 +        {
 +            /* Add the long range forces to the short range forces */
 +            for(i=0; i<fr->natoms_force_constr; i++)
 +            {
 +                copy_rvec(fr->f_twin[i],f[i]);
 +            }
 +        }
 +        else if (!(fr->bTwinRange && bNS))
 +        {
 +            /* Clear the short-range forces */
 +            clear_rvecs(fr->natoms_force_constr,f);
 +        }
 +
 +        clear_rvec(fr->vir_diag_posres);
 +    }
 +    if (inputrec->ePull == epullCONSTRAINT)
 +    {
 +        clear_pull_forces(inputrec->pull);
 +    }
 +
 +    /* update QMMMrec, if necessary */
 +    if(fr->bQMMM)
 +    {
 +        update_QMMMrec(cr,fr,x,mdatoms,box,top);
 +    }
 +
 +    if ((flags & GMX_FORCE_BONDED) && top->idef.il[F_POSRES].nr > 0)
 +    {
 +        /* Position restraints always require full pbc. Check if we already did it for Adress */
 +        if(!(bStateChanged && bDoAdressWF))
 +        {
 +            set_pbc(&pbc,inputrec->ePBC,box);
 +        }
 +        v = posres(top->idef.il[F_POSRES].nr,top->idef.il[F_POSRES].iatoms,
 +                   top->idef.iparams_posres,
 +                   (const rvec*)x,fr->f_novirsum,fr->vir_diag_posres,
 +                   inputrec->ePBC==epbcNONE ? NULL : &pbc,lambda,&dvdl,
 +                   fr->rc_scaling,fr->ePBC,fr->posres_com,fr->posres_comB);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,
 +                    interaction_function[F_POSRES].longname,v,dvdl);
 +        }
 +        enerd->term[F_POSRES] += v;
 +        /* This linear lambda dependence assumption is only correct
 +         * when only k depends on lambda,
 +         * not when the reference position depends on lambda.
 +         * grompp checks for this.
 +         */
 +        enerd->dvdl_lin += dvdl;
 +        inc_nrnb(nrnb,eNR_POSRES,top->idef.il[F_POSRES].nr/2);
 +    }
 +
 +    /* Compute the bonded and non-bonded energies and optionally forces */    
 +    do_force_lowlevel(fplog,step,fr,inputrec,&(top->idef),
 +                      cr,nrnb,wcycle,mdatoms,&(inputrec->opts),
 +                      x,hist,f,enerd,fcd,mtop,top,fr->born,
 +                      &(top->atomtypes),bBornRadii,box,
 +                      lambda,graph,&(top->excls),fr->mu_tot,
 +                      flags,&cycles_pme);
 +    
 +    cycles_force = wallcycle_stop(wcycle,ewcFORCE);
 +    
 +    if (ed)
 +    {
 +        do_flood(fplog,cr,x,f,ed,box,step);
 +    }
 +      
 +    if (DOMAINDECOMP(cr))
 +    {
 +        dd_force_flop_stop(cr->dd,nrnb);
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_force-cycles_pme,ddCyclF);
 +        }
 +    }
 +    
 +    if (bDoForces)
 +    {
 +        if (IR_ELEC_FIELD(*inputrec))
 +        {
 +            /* Compute forces due to electric field */
 +            calc_f_el(MASTER(cr) ? field : NULL,
 +                      start,homenr,mdatoms->chargeA,x,fr->f_novirsum,
 +                      inputrec->ex,inputrec->et,t);
 +        }
 +
 +        if (bDoAdressWF && fr->adress_icor == eAdressICThermoForce)
 +        {
 +            /* Compute thermodynamic force in hybrid AdResS region */
 +            adress_thermo_force(start,homenr,&(top->cgs),x,fr->f_novirsum,fr,mdatoms,
 +                                inputrec->ePBC==epbcNONE ? NULL : &pbc);
 +        }
 +        
 +        /* Communicate the forces */
 +        if (PAR(cr))
 +        {
 +            wallcycle_start(wcycle,ewcMOVEF);
 +            if (DOMAINDECOMP(cr))
 +            {
 +                dd_move_f(cr->dd,f,fr->fshift);
 +                /* Do we need to communicate the separate force array
 +                 * for terms that do not contribute to the single sum virial?
 +                 * Position restraints and electric fields do not introduce
 +                 * inter-cg forces, only full electrostatics methods do.
 +                 * When we do not calculate the virial, fr->f_novirsum = f,
 +                 * so we have already communicated these forces.
 +                 */
 +                if (EEL_FULL(fr->eeltype) && cr->dd->n_intercg_excl &&
 +                    (flags & GMX_FORCE_VIRIAL))
 +                {
 +                    dd_move_f(cr->dd,fr->f_novirsum,NULL);
 +                }
 +                if (bSepLRF)
 +                {
 +                    /* We should not update the shift forces here,
 +                     * since f_twin is already included in f.
 +                     */
 +                    dd_move_f(cr->dd,fr->f_twin,NULL);
 +                }
 +            }
 +            else
 +            {
 +                pd_move_f(cr,f,nrnb);
 +                if (bSepLRF)
 +                {
 +                    pd_move_f(cr,fr->f_twin,nrnb);
 +                }
 +            }
 +            wallcycle_stop(wcycle,ewcMOVEF);
 +        }
 +
 +        /* If we have NoVirSum forces, but we do not calculate the virial,
 +         * we sum fr->f_novirum=f later.
 +         */
 +        if (vsite && !(fr->bF_NoVirSum && !(flags & GMX_FORCE_VIRIAL)))
 +        {
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
-                 spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,
++            spread_vsite_f(fplog,vsite,x,f,fr->fshift,FALSE,NULL,nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +
 +            if (bSepLRF)
 +            {
 +                wallcycle_start(wcycle,ewcVSITESPREAD);
-             spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,nrnb,
++                spread_vsite_f(fplog,vsite,x,fr->f_twin,NULL,FALSE,NULL,
 +                               nrnb,
 +                               &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +                wallcycle_stop(wcycle,ewcVSITESPREAD);
 +            }
 +        }
 +        
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Calculation of the virial must be done after vsites! */
 +            calc_virial(fplog,mdatoms->start,mdatoms->homenr,x,f,
 +                        vir_force,graph,box,nrnb,fr,inputrec->ePBC);
 +        }
 +    }
 +
 +    enerd->term[F_COM_PULL] = 0;
 +    if (inputrec->ePull == epullUMBRELLA || inputrec->ePull == epullCONST_F)
 +    {
 +        /* Calculate the center of mass forces, this requires communication,
 +         * which is why pull_potential is called close to other communication.
 +         * The virial contribution is calculated directly,
 +         * which is why we call pull_potential after calc_virial.
 +         */
 +        set_pbc(&pbc,inputrec->ePBC,box);
 +        dvdl = 0; 
 +        enerd->term[F_COM_PULL] +=
 +            pull_potential(inputrec->ePull,inputrec->pull,mdatoms,&pbc,
 +                           cr,t,lambda,x,f,vir_force,&dvdl);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"Com pull",enerd->term[F_COM_PULL],dvdl);
 +        }
 +        enerd->dvdl_lin += dvdl;
 +    }
 +    
 +    /* Add the forces from enforced rotation potentials (if any) */
 +    if (inputrec->bRot)
 +    {
 +        wallcycle_start(wcycle,ewcROTadd);
 +        enerd->term[F_COM_PULL] += add_rot_forces(inputrec->rot, f, cr,step,t);
 +        wallcycle_stop(wcycle,ewcROTadd);
 +    }
 +
 +    if (PAR(cr) && !(cr->duty & DUTY_PME))
 +    {
 +        cycles_ppdpme = wallcycle_stop(wcycle,ewcPPDURINGPME);
 +        dd_cycles_add(cr->dd,cycles_ppdpme,ddCyclPPduringPME);
 +
 +        /* In case of node-splitting, the PP nodes receive the long-range 
 +         * forces, virial and energy from the PME nodes here.
 +         */    
 +        wallcycle_start(wcycle,ewcPP_PMEWAITRECVF);
 +        dvdl = 0;
 +        gmx_pme_receive_f(cr,fr->f_novirsum,fr->vir_el_recip,&e,&dvdl,
 +                          &cycles_seppme);
 +        if (bSepDVDL)
 +        {
 +            fprintf(fplog,sepdvdlformat,"PME mesh",e,dvdl);
 +        }
 +        enerd->term[F_COUL_RECIP] += e;
 +        enerd->dvdl_lin += dvdl;
 +        if (wcycle)
 +        {
 +            dd_cycles_add(cr->dd,cycles_seppme,ddCyclPME);
 +        }
 +        wallcycle_stop(wcycle,ewcPP_PMEWAITRECVF);
 +    }
 +
 +    if (bDoForces && fr->bF_NoVirSum)
 +    {
 +        if (vsite)
 +        {
 +            /* Spread the mesh force on virtual sites to the other particles... 
 +             * This is parallellized. MPI communication is performed
 +             * if the constructing atoms aren't local.
 +             */
 +            wallcycle_start(wcycle,ewcVSITESPREAD);
++            spread_vsite_f(fplog,vsite,x,fr->f_novirsum,NULL,
++                           (flags & GMX_FORCE_VIRIAL),fr->vir_el_recip,
++                           nrnb,
 +                           &top->idef,fr->ePBC,fr->bMolPBC,graph,box,cr);
 +            wallcycle_stop(wcycle,ewcVSITESPREAD);
 +        }
 +        if (flags & GMX_FORCE_VIRIAL)
 +        {
 +            /* Now add the forces, this is local */
 +            if (fr->bDomDec)
 +            {
 +                sum_forces(0,fr->f_novirsum_n,f,fr->f_novirsum);
 +            }
 +            else
 +            {
 +                sum_forces(start,start+homenr,f,fr->f_novirsum);
 +            }
 +            if (EEL_FULL(fr->eeltype))
 +            {
 +                /* Add the mesh contribution to the virial */
 +                m_add(vir_force,fr->vir_el_recip,vir_force);
 +            }
 +            if (debug)
 +            {
 +                pr_rvecs(debug,0,"vir_force",vir_force,DIM);
 +            }
 +        }
 +    }
 +    
 +    /* Sum the potential energy terms from group contributions */
 +    sum_epot(&(inputrec->opts),enerd);
 +    
 +    if (fr->print_force >= 0 && bDoForces)
 +    {
 +        print_large_forces(stderr,mdatoms,cr,step,fr->print_force,x,f);
 +    }
 +}
 +
 +void do_constrain_first(FILE *fplog,gmx_constr_t constr,
 +                        t_inputrec *ir,t_mdatoms *md,
 +                        t_state *state,rvec *f,
 +                        t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
 +                        t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir)
 +{
 +    int    i,m,start,end;
 +    gmx_large_int_t step;
 +    double mass,tmass,vcm[4];
 +    real   dt=ir->delta_t;
 +    real   dvdlambda;
 +    rvec   *savex;
 +    
 +    snew(savex,state->natoms);
 +
 +    start = md->start;
 +    end   = md->homenr + start;
 +    
 +    if (debug)
 +        fprintf(debug,"vcm: start=%d, homenr=%d, end=%d\n",
 +                start,md->homenr,end);
 +    /* Do a first constrain to reset particles... */
 +    step = ir->init_step;
 +    if (fplog)
 +    {
 +        char buf[STEPSTRSIZE];
 +        fprintf(fplog,"\nConstraining the starting coordinates (step %s)\n",
 +                gmx_step_str(step,buf));
 +    }
 +    dvdlambda = 0;
 +    
 +    /* constrain the current position */
 +    constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +              ir,NULL,cr,step,0,md,
 +              state->x,state->x,NULL,
 +              state->box,state->lambda,&dvdlambda,
 +              NULL,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
 +    if (EI_VV(ir->eI)) 
 +    {
 +        /* constrain the inital velocity, and save it */
 +        /* also may be useful if we need the ekin from the halfstep for velocity verlet */
 +        /* might not yet treat veta correctly */
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,0,md,
 +                  state->x,state->v,state->v,
 +                  state->box,state->lambda,&dvdlambda,
 +                  NULL,NULL,nrnb,econqVeloc,ir->epc==epcMTTK,state->veta,state->veta);
 +    }
 +    /* constrain the inital velocities at t-dt/2 */
 +    if (EI_STATE_VELOCITY(ir->eI) && ir->eI!=eiVV)
 +    {
 +        for(i=start; (i<end); i++) 
 +        {
 +            for(m=0; (m<DIM); m++) 
 +            {
 +                /* Reverse the velocity */
 +                state->v[i][m] = -state->v[i][m];
 +                /* Store the position at t-dt in buf */
 +                savex[i][m] = state->x[i][m] + dt*state->v[i][m];
 +            }
 +        }
 +    /* Shake the positions at t=-dt with the positions at t=0                        
 +     * as reference coordinates.                                                     
 +         */
 +        if (fplog)
 +        {
 +            char buf[STEPSTRSIZE];
 +            fprintf(fplog,"\nConstraining the coordinates at t0-dt (step %s)\n",
 +                    gmx_step_str(step,buf));
 +        }
 +        dvdlambda = 0;
 +        constrain(NULL,TRUE,FALSE,constr,&(top->idef),
 +                  ir,NULL,cr,step,-1,md,
 +                  state->x,savex,NULL,
 +                  state->box,state->lambda,&dvdlambda,
 +                  state->v,NULL,nrnb,econqCoord,ir->epc==epcMTTK,state->veta,state->veta);
 +        
 +        for(i=start; i<end; i++) {
 +            for(m=0; m<DIM; m++) {
 +                /* Re-reverse the velocities */
 +                state->v[i][m] = -state->v[i][m];
 +            }
 +        }
 +    }
 +    
 +    for(m=0; (m<4); m++)
 +        vcm[m] = 0;
 +    for(i=start; i<end; i++) {
 +        mass = md->massT[i];
 +        for(m=0; m<DIM; m++) {
 +            vcm[m] += state->v[i][m]*mass;
 +        }
 +        vcm[3] += mass;
 +    }
 +    
 +    if (ir->nstcomm != 0 || debug) {
 +        /* Compute the global sum of vcm */
 +        if (debug)
 +            fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
 +                    " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],vcm[3]);
 +        if (PAR(cr))
 +            gmx_sumd(4,vcm,cr);
 +        tmass = vcm[3];
 +        for(m=0; (m<DIM); m++)
 +            vcm[m] /= tmass;
 +        if (debug) 
 +            fprintf(debug,"vcm: %8.3f  %8.3f  %8.3f,"
 +                    " total mass = %12.5e\n",vcm[XX],vcm[YY],vcm[ZZ],tmass);
 +        if (ir->nstcomm != 0) {
 +            /* Now we have the velocity of center of mass, let's remove it */
 +            for(i=start; (i<end); i++) {
 +                for(m=0; (m<DIM); m++)
 +                    state->v[i][m] -= vcm[m];
 +            }
 +
 +        }
 +    }
 +    sfree(savex);
 +}
 +
 +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr)
 +{
 +  double eners[2],virs[2],enersum,virsum,y0,f,g,h;
 +  double r0,r1,r,rc3,rc9,ea,eb,ec,pa,pb,pc,pd;
 +  double invscale,invscale2,invscale3;
 +  int    ri0,ri1,ri,i,offstart,offset;
 +  real   scale,*vdwtab; 
 +
 +  fr->enershiftsix = 0;
 +  fr->enershifttwelve = 0;
 +  fr->enerdiffsix = 0;
 +  fr->enerdifftwelve = 0;
 +  fr->virdiffsix = 0;
 +  fr->virdifftwelve = 0;
 +
 +  if (eDispCorr != edispcNO) {
 +    for(i=0; i<2; i++) {
 +      eners[i] = 0;
 +      virs[i]  = 0;
 +    }
 +    if ((fr->vdwtype == evdwSWITCH) || (fr->vdwtype == evdwSHIFT)) {
 +      if (fr->rvdw_switch == 0)
 +      gmx_fatal(FARGS,
 +                "With dispersion correction rvdw-switch can not be zero "
 +                "for vdw-type = %s",evdw_names[fr->vdwtype]);
 +
 +      scale  = fr->nblists[0].tab.scale;
 +      vdwtab = fr->nblists[0].vdwtab;
 +
 +      /* Round the cut-offs to exact table values for precision */
 +      ri0 = floor(fr->rvdw_switch*scale);
 +      ri1 = ceil(fr->rvdw*scale);
 +      r0  = ri0/scale;
 +      r1  = ri1/scale;
 +      rc3 = r0*r0*r0;
 +      rc9  = rc3*rc3*rc3;
 +
 +      if (fr->vdwtype == evdwSHIFT) {
 +      /* Determine the constant energy shift below rvdw_switch */
 +      fr->enershiftsix    = (real)(-1.0/(rc3*rc3)) - vdwtab[8*ri0];
 +      fr->enershifttwelve = (real)( 1.0/(rc9*rc3)) - vdwtab[8*ri0 + 4];
 +      }
 +      /* Add the constant part from 0 to rvdw_switch.
 +       * This integration from 0 to rvdw_switch overcounts the number
 +       * of interactions by 1, as it also counts the self interaction.
 +       * We will correct for this later.
 +       */
 +      eners[0] += 4.0*M_PI*fr->enershiftsix*rc3/3.0;
 +      eners[1] += 4.0*M_PI*fr->enershifttwelve*rc3/3.0;
 +      
 +      invscale = 1.0/(scale);  
 +      invscale2 = invscale*invscale;
 +      invscale3 = invscale*invscale2;
 +
 +      /* following summation derived from cubic spline definition,
 +      Numerical Recipies in C, second edition, p. 113-116.  Exact
 +      for the cubic spline.  We first calculate the negative of
 +      the energy from rvdw to rvdw_switch, assuming that g(r)=1,
 +      and then add the more standard, abrupt cutoff correction to
 +      that result, yielding the long-range correction for a
 +      switched function.  We perform both the pressure and energy
 +      loops at the same time for simplicity, as the computational
 +      cost is low. */
 +      
 +      for (i=0;i<2;i++) {
 +        enersum = 0.0; virsum = 0.0;
 +        if (i==0)
 +        offstart = 0;
 +      else
 +        offstart = 4;
 +      for (ri=ri0; ri<ri1; ri++) {
 +          r = ri*invscale;
 +          ea = invscale3;
 +          eb = 2.0*invscale2*r;
 +          ec = invscale*r*r;
 +          
 +          pa = invscale3;
 +          pb = 3.0*invscale2*r;
 +          pc = 3.0*invscale*r*r;
 +          pd = r*r*r;
 +          
 +          /* this "8" is from the packing in the vdwtab array - perhaps
 +          should be #define'ed? */
 +          offset = 8*ri + offstart;
 +          y0 = vdwtab[offset];
 +          f = vdwtab[offset+1];
 +          g = vdwtab[offset+2];
 +          h = vdwtab[offset+3];
 +        
 +          enersum += y0*(ea/3 + eb/2 + ec) + f*(ea/4 + eb/3 + ec/2)+
 +            g*(ea/5 + eb/4 + ec/3) + h*(ea/6 + eb/5 + ec/4);  
 +          virsum  +=  f*(pa/4 + pb/3 + pc/2 + pd) + 
 +            2*g*(pa/5 + pb/4 + pc/3 + pd/2) + 3*h*(pa/6 + pb/5 + pc/4 + pd/3);
 +        
 +        }
 +        enersum *= 4.0*M_PI;
 +        virsum  *= 4.0*M_PI; 
 +        eners[i] -= enersum;
 +        virs[i]  -= virsum;
 +      }
 +
 +      /* now add the correction for rvdw_switch to infinity */
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } 
 +    else if ((fr->vdwtype == evdwCUT) || (fr->vdwtype == evdwUSER)) {
 +      if (fr->vdwtype == evdwUSER && fplog)
 +      fprintf(fplog,
 +              "WARNING: using dispersion correction with user tables\n");
 +      rc3  = fr->rvdw*fr->rvdw*fr->rvdw;
 +      rc9  = rc3*rc3*rc3;
 +      eners[0] += -4.0*M_PI/(3.0*rc3);
 +      eners[1] +=  4.0*M_PI/(9.0*rc9);
 +      virs[0]  +=  8.0*M_PI/rc3;
 +      virs[1]  += -16.0*M_PI/(3.0*rc9);
 +    } else {
 +      gmx_fatal(FARGS,
 +              "Dispersion correction is not implemented for vdw-type = %s",
 +              evdw_names[fr->vdwtype]);
 +    }
 +    fr->enerdiffsix    = eners[0];
 +    fr->enerdifftwelve = eners[1];
 +    /* The 0.5 is due to the Gromacs definition of the virial */
 +    fr->virdiffsix     = 0.5*virs[0];
 +    fr->virdifftwelve  = 0.5*virs[1];
 +  }
 +}
 +
 +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
 +                   gmx_large_int_t step,int natoms,
 +                   matrix box,real lambda,tensor pres,tensor virial,
 +                   real *prescorr, real *enercorr, real *dvdlcorr)
 +{
 +    gmx_bool bCorrAll,bCorrPres;
 +    real dvdlambda,invvol,dens,ninter,avcsix,avctwelve,enerdiff,svir=0,spres=0;
 +    int  m;
 +    
 +    *prescorr = 0;
 +    *enercorr = 0;
 +    *dvdlcorr = 0;
 +    
 +    clear_mat(virial);
 +    clear_mat(pres);
 +    
 +    if (ir->eDispCorr != edispcNO) {
 +        bCorrAll  = (ir->eDispCorr == edispcAllEner ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        bCorrPres = (ir->eDispCorr == edispcEnerPres ||
 +                     ir->eDispCorr == edispcAllEnerPres);
 +        
 +        invvol = 1/det(box);
 +        if (fr->n_tpi) 
 +        {
 +            /* Only correct for the interactions with the inserted molecule */
 +            dens = (natoms - fr->n_tpi)*invvol;
 +            ninter = fr->n_tpi;
 +        } 
 +        else 
 +        {
 +            dens = natoms*invvol;
 +            ninter = 0.5*natoms;
 +        }
 +        
 +        if (ir->efep == efepNO) 
 +        {
 +            avcsix    = fr->avcsix[0];
 +            avctwelve = fr->avctwelve[0];
 +        } 
 +        else 
 +        {
 +            avcsix    = (1 - lambda)*fr->avcsix[0]    + lambda*fr->avcsix[1];
 +            avctwelve = (1 - lambda)*fr->avctwelve[0] + lambda*fr->avctwelve[1];
 +        }
 +        
 +        enerdiff = ninter*(dens*fr->enerdiffsix - fr->enershiftsix);
 +        *enercorr += avcsix*enerdiff;
 +        dvdlambda = 0.0;
 +        if (ir->efep != efepNO) 
 +        {
 +            dvdlambda += (fr->avcsix[1] - fr->avcsix[0])*enerdiff;
 +        }
 +        if (bCorrAll) 
 +        {
 +            enerdiff = ninter*(dens*fr->enerdifftwelve - fr->enershifttwelve);
 +            *enercorr += avctwelve*enerdiff;
 +            if (fr->efep != efepNO) 
 +            {
 +                dvdlambda += (fr->avctwelve[1] - fr->avctwelve[0])*enerdiff;
 +            }
 +        }
 +        
 +        if (bCorrPres) 
 +        {
 +            svir = ninter*dens*avcsix*fr->virdiffsix/3.0;
 +            if (ir->eDispCorr == edispcAllEnerPres)
 +            {
 +                svir += ninter*dens*avctwelve*fr->virdifftwelve/3.0;
 +            }
 +            /* The factor 2 is because of the Gromacs virial definition */
 +            spres = -2.0*invvol*svir*PRESFAC;
 +            
 +            for(m=0; m<DIM; m++) {
 +                virial[m][m] += svir;
 +                pres[m][m] += spres;
 +            }
 +            *prescorr += spres;
 +        }
 +        
 +        /* Can't currently control when it prints, for now, just print when degugging */
 +        if (debug)
 +        {
 +            if (bCorrAll) {
 +                fprintf(debug,"Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
 +                        avcsix,avctwelve);
 +            }
 +            if (bCorrPres) 
 +            {
 +                fprintf(debug,
 +                        "Long Range LJ corr.: Epot %10g, Pres: %10g, Vir: %10g\n",
 +                        *enercorr,spres,svir);
 +            }
 +            else
 +            {
 +                fprintf(debug,"Long Range LJ corr.: Epot %10g\n",*enercorr);
 +            }
 +        }
 +        
 +        if (fr->bSepDVDL && do_per_step(step,ir->nstlog))
 +        {
 +            fprintf(fplog,sepdvdlformat,"Dispersion correction",
 +                    *enercorr,dvdlambda);
 +        }
 +        if (fr->efep != efepNO) 
 +        {
 +            *dvdlcorr += dvdlambda;
 +        }
 +    }
 +}
 +
 +void do_pbc_first(FILE *fplog,matrix box,t_forcerec *fr,
 +                t_graph *graph,rvec x[])
 +{
 +  if (fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +  calc_shifts(box,fr->shift_vec);
 +  if (graph) {
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 1",graph);
 +    shift_self(graph,box,x);
 +    /* By doing an extra mk_mshift the molecules that are broken
 +     * because they were e.g. imported from another software
 +     * will be made whole again. Such are the healing powers
 +     * of GROMACS.
 +     */
 +    mk_mshift(fplog,graph,fr->ePBC,box,x);
 +    if (gmx_debug_at)
 +      p_graph(debug,"do_pbc_first 2",graph);
 +  }
 +  if (fplog)
 +    fprintf(fplog,"Done rmpbc\n");
 +}
 +
 +static void low_do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +                          gmx_mtop_t *mtop,rvec x[],
 +                          gmx_bool bFirst)
 +{
 +  t_graph *graph;
 +  int mb,as,mol;
 +  gmx_molblock_t *molb;
 +
 +  if (bFirst && fplog)
 +    fprintf(fplog,"Removing pbc first time\n");
 +
 +  snew(graph,1);
 +  as = 0;
 +  for(mb=0; mb<mtop->nmolblock; mb++) {
 +    molb = &mtop->molblock[mb];
 +    if (molb->natoms_mol == 1 || 
 +      (!bFirst && mtop->moltype[molb->type].cgs.nr == 1)) {
 +      /* Just one atom or charge group in the molecule, no PBC required */
 +      as += molb->nmol*molb->natoms_mol;
 +    } else {
 +      /* Pass NULL iso fplog to avoid graph prints for each molecule type */
 +      mk_graph_ilist(NULL,mtop->moltype[molb->type].ilist,
 +                   0,molb->natoms_mol,FALSE,FALSE,graph);
 +      
 +      for(mol=0; mol<molb->nmol; mol++) {
 +      mk_mshift(fplog,graph,ePBC,box,x+as);
 +      
 +      shift_self(graph,box,x+as);
 +      /* The molecule is whole now.
 +       * We don't need the second mk_mshift call as in do_pbc_first,
 +       * since we no longer need this graph.
 +       */
 +      
 +      as += molb->natoms_mol;
 +      }
 +      done_graph(graph);
 +    }
 +  }
 +  sfree(graph);
 +}
 +
 +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
 +                     gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,TRUE);
 +}
 +
 +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
 +               gmx_mtop_t *mtop,rvec x[])
 +{
 +  low_do_pbc_mtop(fplog,ePBC,box,mtop,x,FALSE);
 +}
 +
 +void finish_run(FILE *fplog,t_commrec *cr,const char *confout,
 +                t_inputrec *inputrec,
 +                t_nrnb nrnb[],gmx_wallcycle_t wcycle,
 +                gmx_runtime_t *runtime,
 +                gmx_bool bWriteStat)
 +{
 +  int    i,j;
 +  t_nrnb *nrnb_tot=NULL;
 +  real   delta_t;
 +  double nbfs,mflop;
 +  double cycles[ewcNR];
 +
 +  wallcycle_sum(cr,wcycle,cycles);
 +
 +  if (cr->nnodes > 1) {
 +    if (SIMMASTER(cr))
 +      snew(nrnb_tot,1);
 +#ifdef GMX_MPI
 +    MPI_Reduce(nrnb->n,nrnb_tot->n,eNRNB,MPI_DOUBLE,MPI_SUM,
 +               MASTERRANK(cr),cr->mpi_comm_mysim);
 +#endif  
 +  } else {
 +    nrnb_tot = nrnb;
 +  }
 +    
 +  if (SIMMASTER(cr)) {
 +    print_flop(fplog,nrnb_tot,&nbfs,&mflop);
 +    if (cr->nnodes > 1) {
 +      sfree(nrnb_tot);
 +    }
 +  }
 +
 +  if ((cr->duty & DUTY_PP) && DOMAINDECOMP(cr)) {
 +    print_dd_statistics(cr,inputrec,fplog);
 +  }
 +
 +#ifdef GMX_MPI
 +    if (PARTDECOMP(cr))
 +    {
 +        if (MASTER(cr))
 +        {
 +            t_nrnb     *nrnb_all;
 +            int        s;
 +            MPI_Status stat;
 +
 +            snew(nrnb_all,cr->nnodes);
 +            nrnb_all[0] = *nrnb;
 +            for(s=1; s<cr->nnodes; s++)
 +            {
 +                MPI_Recv(nrnb_all[s].n,eNRNB,MPI_DOUBLE,s,0,
 +                         cr->mpi_comm_mysim,&stat);
 +            }
 +            pr_load(fplog,cr,nrnb_all);
 +            sfree(nrnb_all);
 +        }
 +        else
 +        {
 +            MPI_Send(nrnb->n,eNRNB,MPI_DOUBLE,MASTERRANK(cr),0,
 +                     cr->mpi_comm_mysim);
 +        }
 +    }
 +#endif  
 +
 +  if (SIMMASTER(cr)) {
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,runtime->realtime,
 +                    wcycle,cycles);
 +
 +    if (EI_DYNAMICS(inputrec->eI)) {
 +      delta_t = inputrec->delta_t;
 +    } else {
 +      delta_t = 0;
 +    }
 +    
 +    if (fplog) {
 +        print_perf(fplog,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +    if (bWriteStat) {
 +        print_perf(stderr,runtime->proctime,runtime->realtime,
 +                   cr->nnodes-cr->npmenodes,
 +                   runtime->nsteps_done,delta_t,nbfs,mflop);
 +    }
 +
 +    /*
 +    runtime=inputrec->nsteps*inputrec->delta_t;
 +    if (bWriteStat) {
 +      if (cr->nnodes == 1)
 +      fprintf(stderr,"\n\n");
 +      print_perf(stderr,nodetime,realtime,runtime,&ntot,
 +               cr->nnodes-cr->npmenodes,FALSE);
 +    }
 +    wallcycle_print(fplog,cr->nnodes,cr->npmenodes,realtime,wcycle,cycles);
 +    print_perf(fplog,nodetime,realtime,runtime,&ntot,cr->nnodes-cr->npmenodes,
 +             TRUE);
 +    if (PARTDECOMP(cr))
 +      pr_load(fplog,cr,nrnb_all);
 +    if (cr->nnodes > 1)
 +      sfree(nrnb_all);
 +    */
 +  }
 +}
 +
 +void init_md(FILE *fplog,
 +             t_commrec *cr,t_inputrec *ir,const output_env_t oenv,
 +             double *t,double *t0,
 +             real *lambda,double *lam0,
 +             t_nrnb *nrnb,gmx_mtop_t *mtop,
 +             gmx_update_t *upd,
 +             int nfile,const t_filenm fnm[],
 +             gmx_mdoutf_t **outf,t_mdebin **mdebin,
 +             tensor force_vir,tensor shake_vir,rvec mu_tot,
 +             gmx_bool *bSimAnn,t_vcm **vcm, t_state *state, unsigned long Flags)
 +{
 +    int  i,j,n;
 +    real tmpt,mod;
 +      
 +    /* Initial values */
 +    *t = *t0       = ir->init_t;
 +    if (ir->efep != efepNO)
 +    {
 +        *lam0 = ir->init_lambda;
 +        *lambda = *lam0 + ir->init_step*ir->delta_lambda;
 +    }
 +    else
 +    {
 +        *lambda = *lam0   = 0.0;
 +    } 
 +
 +    *bSimAnn=FALSE;
 +    for(i=0;i<ir->opts.ngtc;i++)
 +    {
 +        /* set bSimAnn if any group is being annealed */
 +        if(ir->opts.annealing[i]!=eannNO)
 +        {
 +            *bSimAnn = TRUE;
 +        }
 +    }
 +    if (*bSimAnn)
 +    {
 +        update_annealing_target_temp(&(ir->opts),ir->init_t);
 +    }
 +    
 +    if (upd)
 +    {
 +        *upd = init_update(fplog,ir);
 +    }
 +    
 +    if (vcm != NULL)
 +    {
 +        *vcm = init_vcm(fplog,&mtop->groups,ir);
 +    }
 +    
 +    if (EI_DYNAMICS(ir->eI) && !(Flags & MD_APPENDFILES))
 +    {
 +        if (ir->etc == etcBERENDSEN)
 +        {
 +            please_cite(fplog,"Berendsen84a");
 +        }
 +        if (ir->etc == etcVRESCALE)
 +        {
 +            please_cite(fplog,"Bussi2007a");
 +        }
 +    }
 +    
 +    init_nrnb(nrnb);
 +    
 +    if (nfile != -1)
 +    {
 +        *outf = init_mdoutf(nfile,fnm,Flags,cr,ir,oenv);
 +
 +        *mdebin = init_mdebin((Flags & MD_APPENDFILES) ? NULL : (*outf)->fp_ene,
 +                              mtop,ir, (*outf)->fp_dhdl);
 +    }
 +    
 +    if (ir->bAdress)
 +    {
 +      please_cite(fplog,"Fritsch12");
 +      please_cite(fplog,"Junghans10");
 +    }
 +    /* Initiate variables */  
 +    clear_mat(force_vir);
 +    clear_mat(shake_vir);
 +    clear_rvec(mu_tot);
 +    
 +    debug_gmx();
 +}
 +
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index d30f5e489d450f3c71d2a0697442caf29f343b94,0000000000000000000000000000000000000000..be85652b25369d54381a4dfc94c2faa57cef5f07
mode 100644,000000..100644
--- /dev/null
@@@ -1,2042 -1,0 +1,2029 @@@
- static const char *get_cystp(int resnr,int nrr,const rtprename_t *rr)
- {
-   enum { ecys, ecysH, ecysNR };
-   const char *lh[ecysNR] = { "CYS2", "CYS" };
-   const char *expl[ecysNR] = {
-     "Cysteine in disulfide bridge",
-     "Protonated"
-   };
-   return select_res(ecysNR,resnr,lh,expl,"CYSTEINE",nrr,rr);
- }
 +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
 + *
 + *
 + *                This source code is part of
 + * 
 + *                 G   R   O   M   A   C   S
 + * 
 + *          GROningen MAchine for Chemical Simulations
 + * 
 + *                        VERSION 3.2.0
 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
 + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
 + * Copyright (c) 2001-2004, The GROMACS development team,
 + * check out http://www.gromacs.org for more information.
 +
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public License
 + * as published by the Free Software Foundation; either version 2
 + * of the License, or (at your option) any later version.
 + * 
 + * If you want to redistribute modifications, please consider that
 + * scientific software is very special. Version control is crucial -
 + * bugs must be traceable. We will be happy to consider code for
 + * inclusion in the official distribution, but derived work must not
 + * be called official GROMACS. Details are found in the README & COPYING
 + * files - if they are missing, get the official version at www.gromacs.org.
 + * 
 + * To help us fund GROMACS development, we humbly ask that you cite
 + * the papers on the package - you can find them in the top README file.
 + * 
 + * For more info, check our website at http://www.gromacs.org
 + * 
 + * And Hey:
 + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
 + */
 +#ifdef HAVE_CONFIG_H
 +#include <config.h>
 +#endif
 +
 +#include <time.h>
 +#include <ctype.h>
 +#include "sysstuff.h"
 +#include "typedefs.h"
 +#include "gmxfio.h"
 +#include "smalloc.h"
 +#include "copyrite.h"
 +#include "string2.h"
 +#include "confio.h"
 +#include "symtab.h"
 +#include "vec.h"
 +#include "statutil.h"
 +#include "futil.h"
 +#include "gmx_fatal.h"
 +#include "pdbio.h"
 +#include "toputil.h"
 +#include "h_db.h"
 +#include "physics.h"
 +#include "pgutil.h"
 +#include "calch.h"
 +#include "resall.h"
 +#include "pdb2top.h"
 +#include "ter_db.h"
 +#include "strdb.h"
 +#include "gbutil.h"
 +#include "genhydro.h"
 +#include "readinp.h"
 +#include "atomprop.h"
 +#include "xlate.h"
 +#include "specbond.h"
 +#include "index.h"
 +#include "hizzie.h"
 +#include "fflibutil.h"
 +#include "macros.h"
 +
 +
 +typedef struct {
 +  char gmx[6];
 +  char main[6];
 +  char nter[6];
 +  char cter[6];
 +  char bter[6];
 +} rtprename_t;
 +
 +
 +static const char *res2bb_notermini(const char *name,
 +                                  int nrr,const rtprename_t *rr)
 +{
 +  /* NOTE: This function returns the main building block name,
 +   *       it does not take terminal renaming into account.
 +   */
 +  int i;
 +
 +  i = 0;
 +  while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
 +    i++;
 +  }
 +
 +  return (i < nrr ? rr[i].main : name);
 +}
 +
 +static const char *select_res(int nr,int resnr,
 +                            const char *name[],const char *expl[],
 +                            const char *title,
 +                            int nrr,const rtprename_t *rr)
 +{
 +  int sel=0;
 +
 +  printf("Which %s type do you want for residue %d\n",title,resnr+1);
 +  for(sel=0; (sel < nr); sel++) {
 +    printf("%d. %s (%s)\n",
 +         sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
 +  }
 +  printf("\nType a number:"); fflush(stdout);
 +
 +  if (scanf("%d",&sel) != 1)
 +    gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
 +  
 +  return name[sel];
 +}
 +
 +static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
 +{
 +  enum { easp, easpH, easpNR };
 +  const char *lh[easpNR] = { "ASP", "ASPH" };
 +  const char *expl[easpNR] = {
 +    "Not protonated (charge -1)",
 +    "Protonated (charge 0)"
 +  };
 +
 +  return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
 +}
 +
 +static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
 +{
 +  enum { eglu, egluH, egluNR };
 +  const char *lh[egluNR] = { "GLU", "GLUH" };
 +  const char *expl[egluNR] = {
 +    "Not protonated (charge -1)",
 +    "Protonated (charge 0)"
 +  };
 +
 +  return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
 +}
 +
 +static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
 +{
 +  enum { egln, eglnH, eglnNR };
 +  const char *lh[eglnNR] = { "GLN", "QLN" };
 +  const char *expl[eglnNR] = {
 +    "Not protonated (charge 0)",
 +    "Protonated (charge +1)"
 +  };
 +
 +  return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
 +}
 +
 +static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
 +{
 +  enum { elys, elysH, elysNR };
 +  const  char *lh[elysNR] = { "LYSN", "LYS" };
 +  const char *expl[elysNR] = {
 +    "Not protonated (charge 0)",
 +    "Protonated (charge +1)"
 +  };
 +
 +  return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
 +}
 +
 +static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
 +{
 +  enum { earg, eargH, eargNR };
 +  const  char *lh[eargNR] = { "ARGN", "ARG" };
 +  const char *expl[eargNR] = {
 +    "Not protonated (charge 0)",
 +    "Protonated (charge +1)"
 +  };
 +
 +  return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
 +}
 +
-     fclose(fp);
 +static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
 +{
 +  const char *expl[ehisNR] = {
 +    "H on ND1 only",
 +    "H on NE2 only",
 +    "H on ND1 and NE2",
 +    "Coupled to Heme"
 +  };
 +  
 +  return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
 +}
 +
 +static void read_rtprename(const char *fname,FILE *fp,
 +                         int *nrtprename,rtprename_t **rtprename)
 +{
 +  char line[STRLEN],buf[STRLEN];
 +  int  n;
 +  rtprename_t *rr;
 +  int  ncol,nc;
 +
 +  n  = *nrtprename;
 +  rr = *rtprename;
 +
 +  ncol = 0;
 +  while(get_a_line(fp,line,STRLEN)) {
 +    srenew(rr,n+1);
 +    nc = sscanf(line,"%s %s %s %s %s %s",
 +              rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
 +    if (ncol == 0) {
 +      if (nc != 2 && nc != 5) {
 +      gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
 +      }
 +      ncol = nc;
 +    } else if (nc != ncol) {
 +      gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
 +    }
 +    
 +    if (nc == 2) {
 +      /* This file does not have special termini names, copy them from main */
 +      strcpy(rr[n].nter,rr[n].main);
 +      strcpy(rr[n].cter,rr[n].main);
 +      strcpy(rr[n].bter,rr[n].main);
 +    }
 +
 +    n++;
 +  }
 +
 +  *nrtprename = n;
 +  *rtprename  = rr;
 +}
 +
 +static char *search_resrename(int nrr,rtprename_t *rr,
 +                              const char *name,
 +                              gmx_bool bStart,gmx_bool bEnd,
 +                              gmx_bool bCompareFFRTPname)
 +{
 +    char *nn;
 +    int i;
 +
 +    nn = NULL;
 +
 +    i = 0;
 +    while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx)  != 0) ||
 +                     ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
 +    {
 +        i++;
 +    }
 +
 +    /* If found in the database, rename this residue's rtp building block,
 +     * otherwise keep the old name.
 +     */
 +    if (i < nrr)
 +    {
 +        if (bStart && bEnd)
 +        {
 +            nn = rr[i].bter;
 +        }
 +        else if (bStart)
 +        {
 +            nn = rr[i].nter;
 +        }
 +        else if (bEnd)
 +        {
 +            nn = rr[i].cter;
 +        }
 +        else
 +        {
 +            nn = rr[i].main;
 +        }
 +        if (nn[0] == '-')
 +        {
 +            gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
 +        }
 +    }
 +
 +    return nn;
 +}
 +      
 +
 +static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
 +                          int nrr,rtprename_t *rr,t_symtab *symtab,
 +                          gmx_bool bVerbose)
 +{
 +    int  r,i,j;
 +    gmx_bool bStart,bEnd;
 +    char *nn;
 +    gmx_bool bFFRTPTERRNM;
 +
 +    bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
 +
 +    for(r=0; r<pdba->nres; r++)
 +    {
 +        bStart = FALSE;
 +        bEnd   = FALSE;
 +        for(j=0; j<nterpairs; j++)
 +        {
 +            if (r == r_start[j])
 +            {
 +                bStart = TRUE;
 +            }
 +        }
 +        for(j=0; j<nterpairs; j++)
 +        {
 +            if (r == r_end[j])
 +            {
 +                bEnd = TRUE;
 +            }
 +        }
 +
 +        nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
 +
 +        if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
 +        {
 +            /* This is a terminal residue, but the residue name,
 +             * currently stored in .rtp, is not a standard residue name,
 +             * but probably a force field specific rtp name.
 +             * Check if we need to rename it because it is terminal.
 +             */
 +            nn = search_resrename(nrr,rr,
 +                                  *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
 +        }
 +
 +        if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
 +        {
 +            if (bVerbose)
 +            {
 +                printf("Changing rtp entry of residue %d %s to '%s'\n",
 +                       pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
 +            }
 +            pdba->resinfo[r].rtp = put_symtab(symtab,nn);
 +        }
 +    }
 +}
 +
 +static void pdbres_to_gmxrtp(t_atoms *pdba)
 +{
 +    int i;
 +  
 +    for(i=0; (i<pdba->nres); i++)
 +    {
 +        if (pdba->resinfo[i].rtp == NULL)
 +        {
 +            pdba->resinfo[i].rtp = pdba->resinfo[i].name;
 +        }
 +    }
 +}
 +
 +static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
 +                          gmx_bool bFullCompare,t_symtab *symtab)
 +{
 +    char *resnm;
 +    int i;
 +  
 +    for(i=0; (i<pdba->nres); i++)
 +    {
 +        resnm = *pdba->resinfo[i].name;
 +        if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
 +            (!bFullCompare && strstr(resnm,oldnm) != NULL))
 +        {
 +            /* Rename the residue name (not the rtp name) */
 +            pdba->resinfo[i].name = put_symtab(symtab,newnm);
 +        }
 +    }
 +}
 +
 +static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
 +                      gmx_bool bFullCompare,t_symtab *symtab)
 +{
 +    char *bbnm;
 +    int i;
 +  
 +    for(i=0; (i<pdba->nres); i++)
 +    {
 +        /* We have not set the rtp name yes, use the residue name */
 +        bbnm = *pdba->resinfo[i].name;
 +        if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
 +            (!bFullCompare && strstr(bbnm,oldnm) != NULL))
 +        {
 +            /* Change the rtp builing block name */
 +            pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
 +        }
 +    }
 +}
 +
 +static void rename_bbint(t_atoms *pdba,const char *oldnm,
 +                         const char *gettp(int,int,const rtprename_t *),
 +                         gmx_bool bFullCompare,
 +                         t_symtab *symtab,
 +                         int nrr,const rtprename_t *rr)
 +{
 +    int  i;
 +    const char *ptr;
 +    char *bbnm;
 +  
 +    for(i=0; i<pdba->nres; i++)
 +    {
 +        /* We have not set the rtp name yes, use the residue name */
 +        bbnm = *pdba->resinfo[i].name;
 +        if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
 +            (!bFullCompare && strstr(bbnm,oldnm) != NULL))
 +        {
 +            ptr = gettp(i,nrr,rr);
 +            pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
 +        }
 +    }
 +}
 +
 +static void check_occupancy(t_atoms *atoms,const char *filename,gmx_bool bVerbose)
 +{
 +  int i,ftp;
 +  int nzero=0;
 +  int nnotone=0;
 +  
 +  ftp = fn2ftp(filename);
 +  if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
 +    fprintf(stderr,"No occupancies in %s\n",filename);
 +  else {
 +    for(i=0; (i<atoms->nr); i++) {
 +      if (atoms->pdbinfo[i].occup != 1) {
 +      if (bVerbose)
 +        fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
 +                *atoms->resinfo[atoms->atom[i].resind].name,
 +                atoms->resinfo[ atoms->atom[i].resind].nr,
 +                *atoms->atomname[i],
 +                atoms->pdbinfo[i].occup);
 +      if (atoms->pdbinfo[i].occup == 0) 
 +        nzero++;
 +      else 
 +        nnotone++;
 +      }
 +    }
 +    if (nzero == atoms->nr) {
 +      fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
 +    } else if ((nzero > 0) || (nnotone > 0)) {
 +      fprintf(stderr,
 +            "\n"
 +            "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
 +            "         occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
 +            "\n",
 +            nzero,nnotone,atoms->nr);
 +    } else {
 +      fprintf(stderr,"All occupancies are one\n");
 +    }
 +  }
 +}
 +
 +void write_posres(char *fn,t_atoms *pdba,real fc)
 +{
 +  FILE *fp;
 +  int  i;
 +  
 +  fp=gmx_fio_fopen(fn,"w");
 +  fprintf(fp,
 +        "; In this topology include file, you will find position restraint\n"
 +        "; entries for all the heavy atoms in your original pdb file.\n"
 +        "; This means that all the protons which were added by pdb2gmx are\n"
 +        "; not restrained.\n"
 +        "\n"
 +        "[ position_restraints ]\n"
 +        "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
 +        );
 +  for(i=0; (i<pdba->nr); i++) {
 +    if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
 +      fprintf(fp,"%6d%6d  %g  %g  %g\n",i+1,1,fc,fc,fc);
 +  }
 +  gmx_fio_fclose(fp);
 +}
 +
 +static int read_pdball(const char *inf, const char *outf,char *title,
 +                     t_atoms *atoms, rvec **x,
 +                     int *ePBC,matrix box, gmx_bool bRemoveH,
 +                     t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
 +                     gmx_atomprop_t aps,gmx_bool bVerbose)
 +/* Read a pdb file. (containing proteins) */
 +{
 +  int  natom,new_natom,i;
 +  
 +  /* READ IT */
 +  printf("Reading %s...\n",inf);
 +  get_stx_coordnum(inf,&natom);
 +  init_t_atoms(atoms,natom,TRUE);
 +  snew(*x,natom);
 +  read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
 +  if (fn2ftp(inf) == efPDB)
 +    get_pdb_atomnumber(atoms,aps);
 +  if (bRemoveH) {
 +    new_natom=0;
 +    for(i=0; i<atoms->nr; i++)
 +      if (!is_hydrogen(*atoms->atomname[i])) {
 +      atoms->atom[new_natom]=atoms->atom[i];
 +      atoms->atomname[new_natom]=atoms->atomname[i];
 +      atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
 +      copy_rvec((*x)[i],(*x)[new_natom]);
 +      new_natom++;
 +      }
 +    atoms->nr=new_natom;
 +    natom=new_natom;
 +  }
 +    
 +  printf("Read");
 +  if (title && title[0])
 +    printf(" '%s',",title);
 +  printf(" %d atoms\n",natom);
 +  
 +  /* Rename residues */
 +  rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
 +  rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
 +  rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
 +
 +  rename_atoms("xlateat.dat",NULL,
 +             atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
 +  
 +  if (natom == 0)
 +    return 0;
 +
 +  if (outf)
 +    write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
 + 
 +  return natom;
 +}
 +
 +void process_chain(t_atoms *pdba, rvec *x, 
 +                 gmx_bool bTrpU,gmx_bool bPheU,gmx_bool bTyrU,
 +                 gmx_bool bLysMan,gmx_bool bAspMan,gmx_bool bGluMan,
 +                 gmx_bool bHisMan,gmx_bool bArgMan,gmx_bool bGlnMan,
 +                 real angle,real distance,t_symtab *symtab,
 +                 int nrr,const rtprename_t *rr)
 +{
 +  /* Rename aromatics, lys, asp and histidine */
 +  if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
 +  if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
 +  if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
 +  if (bLysMan) 
 +    rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
 +  if (bArgMan) 
 +    rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
 +  if (bGlnMan) 
 +    rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
 +  if (bAspMan) 
 +    rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
 +  else
 +    rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
 +  if (bGluMan) 
 +    rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
 +  else
 +    rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
 +
 +  if (!bHisMan)
 +    set_histp(pdba,x,angle,distance);
 +  else
 +    rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
 +
 +  /* Initialize the rtp builing block names with the residue names
 +   * for the residues that have not been processed above.
 +   */
 +  pdbres_to_gmxrtp(pdba);
 +
 +  /* Now we have all rtp names set.
 +   * The rtp names will conform to Gromacs naming,
 +   * unless the input pdb file contained one or more force field specific
 +   * rtp names as residue names.
 +   */
 +}
 +
 +/* struct for sorting the atoms from the pdb file */
 +typedef struct {
 +  int  resnr;  /* residue number               */
 +  int  j;      /* database order index         */
 +  int  index;  /* original atom number         */
 +  char anm1;   /* second letter of atom name   */
 +  char altloc; /* alternate location indicator */
 +} t_pdbindex;
 +  
 +int pdbicomp(const void *a,const void *b)
 +{
 +  t_pdbindex *pa,*pb;
 +  int d;
 +
 +  pa=(t_pdbindex *)a;
 +  pb=(t_pdbindex *)b;
 +
 +  d = (pa->resnr - pb->resnr);
 +  if (d==0) {
 +    d = (pa->j - pb->j);
 +    if (d==0) {
 +      d = (pa->anm1 - pb->anm1);
 +      if (d==0)
 +      d = (pa->altloc - pb->altloc);
 +    }
 +  }
 +
 +  return d;
 +}
 +
 +static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
 +                        int natoms,t_atoms **pdbaptr,rvec **x,
 +                        t_blocka *block,char ***gnames)
 +{
 +  t_atoms *pdba,*pdbnew;
 +  rvec **xnew;
 +  int     i,j;
 +  t_restp *rptr;
 +  t_hackblock *hbr;
 +  t_pdbindex *pdbi;
 +  atom_id *a;
 +  char *atomnm;
 +  
 +  pdba=*pdbaptr;
 +  natoms=pdba->nr;
 +  pdbnew=NULL;
 +  snew(xnew,1);
 +  snew(pdbi, natoms);
 +  
 +  for(i=0; i<natoms; i++)
 +  {
 +      atomnm = *pdba->atomname[i];
 +      rptr = &restp[pdba->atom[i].resind];
 +      for(j=0; (j<rptr->natom); j++) 
 +      {
 +          if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0) 
 +          {
 +              break;
 +          }
 +      }
 +      if (j==rptr->natom) 
 +      {
 +          char buf[STRLEN];
 +          
 +          sprintf(buf,
 +                  "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
 +                  "while sorting atoms.\n%s",atomnm,
 +                  *pdba->resinfo[pdba->atom[i].resind].name,
 +                  pdba->resinfo[pdba->atom[i].resind].nr,
 +                  rptr->resname,
 +                  rptr->natom,
 +                  is_hydrogen(atomnm) ? 
 +                  "\nFor a hydrogen, this can be a different protonation state, or it\n"
 +                  "might have had a different number in the PDB file and was rebuilt\n"
 +                  "(it might for instance have been H3, and we only expected H1 & H2).\n"
 +                  "Note that hydrogens might have been added to the entry for the N-terminus.\n"
 +                  "Remove this hydrogen or choose a different protonation state to solve it.\n"
 +                  "Option -ignh will ignore all hydrogens in the input." : ".");
 +          gmx_fatal(FARGS,buf);
 +      }
 +      /* make shadow array to be sorted into indexgroup */
 +      pdbi[i].resnr  = pdba->atom[i].resind;
 +      pdbi[i].j      = j;
 +      pdbi[i].index  = i;
 +      pdbi[i].anm1   = atomnm[1];
 +      pdbi[i].altloc = pdba->pdbinfo[i].altloc;
 +  }
 +  qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
 +    
 +  /* pdba is sorted in pdbnew using the pdbi index */ 
 +  snew(a,natoms);
 +  snew(pdbnew,1);
 +  init_t_atoms(pdbnew,natoms,TRUE);
 +  snew(*xnew,natoms);
 +  pdbnew->nr=pdba->nr;
 +  pdbnew->nres=pdba->nres;
 +  sfree(pdbnew->resinfo);
 +  pdbnew->resinfo=pdba->resinfo;
 +  for (i=0; i<natoms; i++) {
 +    pdbnew->atom[i]     = pdba->atom[pdbi[i].index];
 +    pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
 +    pdbnew->pdbinfo[i]  = pdba->pdbinfo[pdbi[i].index];
 +    copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
 +     /* make indexgroup in block */
 +    a[i]=pdbi[i].index;
 +  }
 +  /* clean up */
 +  sfree(pdba->atomname);
 +  sfree(pdba->atom);
 +  sfree(pdba->pdbinfo);
 +  sfree(pdba);
 +  sfree(*x);
 +  /* copy the sorted pdbnew back to pdba */
 +  *pdbaptr=pdbnew;
 +  *x=*xnew;
 +  add_grp(block, gnames, natoms, a, "prot_sort");
 +  sfree(xnew);
 +  sfree(a);
 +  sfree(pdbi);
 +}
 +
 +static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],gmx_bool bVerbose)
 +{
 +  int     i,j,oldnatoms,ndel;
 +  t_resinfo *ri;
 +  
 +  printf("Checking for duplicate atoms....\n");
 +  oldnatoms    = pdba->nr;
 +  ndel = 0;
 +  /* NOTE: pdba->nr is modified inside the loop */
 +  for(i=1; (i < pdba->nr); i++) {
 +    /* compare 'i' and 'i-1', throw away 'i' if they are identical 
 +       this is a 'while' because multiple alternate locations can be present */
 +    while ( (i < pdba->nr) &&
 +          (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
 +          (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
 +      ndel++;
 +      if (bVerbose) {
 +      ri = &pdba->resinfo[pdba->atom[i].resind];
 +      printf("deleting duplicate atom %4s  %s%4d%c",
 +             *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
 +      if (ri->chainid && (ri->chainid != ' '))
 +        printf(" ch %c", ri->chainid);
 +      if (pdba->pdbinfo) {
 +        if (pdba->pdbinfo[i].atomnr)
 +          printf("  pdb nr %4d",pdba->pdbinfo[i].atomnr);
 +        if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
 +          printf("  altloc %c",pdba->pdbinfo[i].altloc);
 +      }
 +      printf("\n");
 +      }
 +      pdba->nr--;
 +      /* We can not free, since it might be in the symtab */
 +      /* sfree(pdba->atomname[i]); */
 +      for (j=i; j < pdba->nr; j++) {
 +      pdba->atom[j]     = pdba->atom[j+1];
 +      pdba->atomname[j] = pdba->atomname[j+1];
 +      pdba->pdbinfo[j]  = pdba->pdbinfo[j+1];
 +      copy_rvec(x[j+1],x[j]);
 +      }
 +      srenew(pdba->atom,     pdba->nr);
 +      /* srenew(pdba->atomname, pdba->nr); */
 +      srenew(pdba->pdbinfo,  pdba->nr);
 +    }
 +  }
 +  if (pdba->nr != oldnatoms)
 +    printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
 +  
 +  return pdba->nr;
 +}
 +
 +void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
 +{
 +    int i;
 +    const char *p_startrestype;
 +    const char *p_restype;
 +    int         nstartwarn,nendwarn;
 +    
 +    *r_start = -1;
 +    *r_end   = -1;
 +
 +    nstartwarn = 0;
 +    nendwarn   = 0;
 +    
 +    /* Find the starting terminus (typially N or 5') */
 +    for(i=r0;i<r1 && *r_start==-1;i++)
 +    {
 +        gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
 +        if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
 +        {
 +            printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
 +            *r_start=i;
 +        }
 +        else 
 +        {            
 +            if(nstartwarn < 5)
 +            {    
 +                printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
 +            }
 +            if(nstartwarn == 5)
 +            {
 +                printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
 +            }
 +            nstartwarn++;
 +        }
 +    }
 +
 +    if(*r_start>=0)
 +    {
 +        /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
 +        for(i=*r_start;i<r1;i++)
 +        {
 +            gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
 +            if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
 +            {
 +                *r_end=i;
 +            }
 +            else 
 +            {
 +                if(nendwarn < 5)
 +                {    
 +                    printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
 +                           *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
 +                           *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
 +                }
 +                if(nendwarn == 5)
 +                {
 +                    printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
 +                }
 +                nendwarn++;                
 +            }
 +        }  
 +    }
 +    
 +    if(*r_end>=0)
 +    {
 +        printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
 +    }
 +}
 +
 +
 +static void
 +modify_chain_numbers(t_atoms *       pdba,
 +                     const char *    chainsep)
 +{
 +    int   i;
 +    char  old_prev_chainid;
 +    char  old_this_chainid;
 +    int   old_prev_chainnum;
 +    int   old_this_chainnum;
 +    t_resinfo *ri;
 +    char  select[STRLEN];
 +    int   new_chainnum;
 +    int           this_atomnum;
 +    int           prev_atomnum;
 +    const char *  prev_atomname;
 +    const char *  this_atomname;
 +    const char *  prev_resname;
 +    const char *  this_resname;
 +    int           prev_resnum;
 +    int           this_resnum;
 +    char          prev_chainid;
 +    char          this_chainid;
 +    int           prev_chainnumber;
 +    int           this_chainnumber;
 +   
 +    enum 
 +    { 
 +        SPLIT_ID_OR_TER, 
 +        SPLIT_ID_AND_TER,
 +        SPLIT_ID_ONLY,
 +        SPLIT_TER_ONLY,
 +        SPLIT_INTERACTIVE
 +    }
 +    splitting;
 +    
 +    splitting = SPLIT_TER_ONLY; /* keep compiler happy */
 +    
 +    /* Be a bit flexible to catch typos */
 +    if (!strncmp(chainsep,"id_o",4))
 +    {
 +        /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
 +        splitting = SPLIT_ID_OR_TER;
 +        printf("Splitting chemical chains based on TER records or chain id changing.\n");
 +    }
 +    else if (!strncmp(chainsep,"int",3))
 +    {
 +        /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
 +        splitting = SPLIT_INTERACTIVE;
 +        printf("Splitting chemical chains interactively.\n");
 +    }
 +    else if (!strncmp(chainsep,"id_a",4))
 +    {
 +        splitting = SPLIT_ID_AND_TER;
 +        printf("Splitting chemical chains based on TER records and chain id changing.\n");
 +    }
 +    else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
 +    {
 +        splitting = SPLIT_ID_ONLY;
 +        printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
 +    }
 +    else if (chainsep[0]=='t')
 +    {
 +        splitting = SPLIT_TER_ONLY;
 +        printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
 +    }
 +    else
 +    {
 +        gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
 +    }                                                                           
 +                                                                                   
 +    /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
 +    
 +    old_prev_chainid  = '?';
 +    old_prev_chainnum = -1;
 +    new_chainnum  = -1;
 +    
 +    this_atomname       = NULL;
 +    this_atomnum        = -1;
 +    this_resname        = NULL;
 +    this_resnum         = -1;
 +    this_chainid        = '?';
 +    this_chainnumber    = -1;
 +
 +    for(i=0;i<pdba->nres;i++)
 +    {
 +        ri = &pdba->resinfo[i];
 +        old_this_chainid   = ri->chainid;
 +        old_this_chainnum  = ri->chainnum;
 +
 +        prev_atomname      = this_atomname;
 +        prev_atomnum       = this_atomnum;
 +        prev_resname       = this_resname;
 +        prev_resnum        = this_resnum;
 +        prev_chainid       = this_chainid;
 +        prev_chainnumber   = this_chainnumber;
 +
 +        this_atomname      = *(pdba->atomname[i]);
 +        this_atomnum       = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
 +        this_resname       = *ri->name;
 +        this_resnum        = ri->nr;
 +        this_chainid       = ri->chainid;
 +        this_chainnumber   = ri->chainnum;
 +
 +        switch (splitting)
 +        {
 +            case SPLIT_ID_OR_TER:
 +                if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
 +                {
 +                    new_chainnum++;
 +                }
 +                break;
 +                
 +            case SPLIT_ID_AND_TER:
 +                if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
 +                {
 +                    new_chainnum++;
 +                }
 +                break;
 +                
 +            case SPLIT_ID_ONLY:
 +                if(old_this_chainid != old_prev_chainid)
 +                {
 +                    new_chainnum++;
 +                }
 +                break;
 +                
 +            case SPLIT_TER_ONLY:
 +                if(old_this_chainnum != old_prev_chainnum)
 +                {
 +                    new_chainnum++;
 +                }
 +                break;
 +            case SPLIT_INTERACTIVE:
 +                if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
 +                {
 +                    if(i>0)
 +                    {
 +                        printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n" 
 +                               "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
 +                               prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
 +                               this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
 +                        
 +                        if(NULL==fgets(select,STRLEN-1,stdin))
 +                        {
 +                            gmx_fatal(FARGS,"Error reading from stdin");
 +                        }
 +                    }
 +                    if(i==0 || select[0] == 'y')
 +                    {
 +                        new_chainnum++;
 +                    }
 +                }               
 +                break;
 +            default:
 +                gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
 +                break;
 +        }
 +        old_prev_chainid  = old_this_chainid;
 +        old_prev_chainnum = old_this_chainnum;
 +                                                                                   
 +        ri->chainnum = new_chainnum;        
 +    }
 +}
 +
 +
 +typedef struct {
 +  char chainid;
 +  char chainnum;
 +  int  start;
 +  int  natom;
 +  gmx_bool bAllWat;
 +  int  nterpairs;
 +  int  *chainstart;
 +} t_pdbchain;
 +
 +typedef struct {
 +  char chainid;
 +  int  chainnum;
 +  gmx_bool bAllWat;
 +  int nterpairs;
 +  int *chainstart;
 +  t_hackblock **ntdb;
 +  t_hackblock **ctdb;
 +  int *r_start;
 +  int *r_end;
 +  t_atoms *pdba;
 +  rvec *x;
 +} t_chain;
 +
 +int main(int argc, char *argv[])
 +{
 +  const char *desc[] = {
 +    "This program reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
 +    "some database files, adds hydrogens to the molecules and generates",
 +    "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
 +    "and a topology in GROMACS format.",
 +    "These files can subsequently be processed to generate a run input file.",
 +    "[PAR]",
 +    "[TT]pdb2gmx[tt] will search for force fields by looking for",
 +    "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
 +    "of the current working directory and of the GROMACS library directory",
 +    "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
 +    "variable.",
 +    "By default the forcefield selection is interactive,",
 +    "but you can use the [TT]-ff[tt] option to specify one of the short names",
 +    "in the list on the command line instead. In that case [TT]pdb2gmx[tt] just looks",
 +    "for the corresponding [TT]<forcefield>.ff[tt] directory.",
 +    "[PAR]",
 +    "After choosing a force field, all files will be read only from",
 +    "the corresponding force field directory.",
 +    "If you want to modify or add a residue types, you can copy the force",
 +    "field directory from the GROMACS library directory to your current",
 +    "working directory. If you want to add new protein residue types,",
 +    "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
 +    "or copy the whole library directory to a local directory and set",
 +    "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
 +    "Check Chapter 5 of the manual for more information about file formats.",
 +    "[PAR]",
 +    
 +    "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
 +    "need not necessarily contain a protein structure. Every kind of",
 +    "molecule for which there is support in the database can be converted.",
 +    "If there is no support in the database, you can add it yourself.[PAR]",
 +    
 +    "The program has limited intelligence, it reads a number of database",
 +    "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
 +    "if necessary this can be done manually. The program can prompt the",
 +    "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
 +    "desired. For Lys the choice is between neutral (two protons on NZ) or",
 +    "protonated (three protons, default), for Asp and Glu unprotonated",
 +    "(default) or protonated, for His the proton can be either on ND1,",
 +    "on NE2 or on both. By default these selections are done automatically.",
 +    "For His, this is based on an optimal hydrogen bonding",
 +    "conformation. Hydrogen bonds are defined based on a simple geometric",
 +    "criterion, specified by the maximum hydrogen-donor-acceptor angle",
 +    "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
 +    "[TT]-dist[tt] respectively.[PAR]",
 +     
 +    "The protonation state of N- and C-termini can be chosen interactively",
 +    "with the [TT]-ter[tt] flag.  Default termini are ionized (NH3+ and COO-),",
 +    "respectively.  Some force fields support zwitterionic forms for chains of",
 +    "one residue, but for polypeptides these options should NOT be selected.",
 +    "The AMBER force fields have unique forms for the terminal residues,",
 +    "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
 +    "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
 +    "respectively to use these forms, making sure you preserve the format",
 +    "of the coordinate file. Alternatively, use named terminating residues",
 +    "(e.g. ACE, NME).[PAR]",
 +
 +    "The separation of chains is not entirely trivial since the markup",
 +    "in user-generated PDB files frequently varies and sometimes it",
 +    "is desirable to merge entries across a TER record, for instance",
 +    "if you want a disulfide bridge or distance restraints between",
 +    "two protein chains or if you have a HEME group bound to a protein.",
 +    "In such cases multiple chains should be contained in a single",
 +    "[TT]moleculetype[tt] definition.",
 +    "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
 +    "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
 +    "start, and termini added when applicable. This can be done based on the",
 +    "existence of TER records, when the chain id changes, or combinations of either",
 +    "or both of these. You can also do the selection fully interactively.",
 +    "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
 +    "are merged into one moleculetype, after adding all the chemical termini (or not).",
 +    "This can be turned off (no merging), all non-water chains can be merged into a",
 +    "single molecule, or the selection can be done interactively.[PAR]",
 +      
 +    "[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
 +    "If any of the occupancies are not one, indicating that the atom is",
 +    "not resolved well in the structure, a warning message is issued.",
 +    "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
 +    "all occupancy fields may be zero. Either way, it is up to the user",
 +    "to verify the correctness of the input data (read the article!).[PAR]", 
 +    
 +    "During processing the atoms will be reordered according to GROMACS",
 +    "conventions. With [TT]-n[tt] an index file can be generated that",
 +    "contains one group reordered in the same way. This allows you to",
 +    "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
 +    "one limitation: reordering is done after the hydrogens are stripped",
 +    "from the input and before new hydrogens are added. This means that",
 +    "you should not use [TT]-ignh[tt].[PAR]",
 +
 +    "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
 +    "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
 +    "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
 +    "[PAR]",
 +    
 +    "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
 +    "motions. Angular and out-of-plane motions can be removed by changing",
 +    "hydrogens into virtual sites and fixing angles, which fixes their",
 +    "position relative to neighboring atoms. Additionally, all atoms in the",
 +    "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
 +    "can be converted into virtual sites, eliminating the fast improper dihedral",
 +    "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
 +    "atoms are also converted to virtual sites. The mass of all atoms that are",
 +    "converted into virtual sites, is added to the heavy atoms.[PAR]",
 +    "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
 +    "done by increasing the hydrogen-mass by a factor of 4. This is also",
 +    "done for water hydrogens to slow down the rotational motion of water.",
 +    "The increase in mass of the hydrogens is subtracted from the bonded",
 +    "(heavy) atom so that the total mass of the system remains the same."
 +  };
 +
 +  
 +  FILE       *fp,*top_file,*top_file2,*itp_file=NULL;
 +  int        natom,nres;
 +  t_atoms    pdba_all,*pdba;
 +  t_atoms    *atoms;
 +  t_resinfo  *ri;
 +  t_blocka   *block;
 +  int        chain,nch,maxch,nwaterchain;
 +  t_pdbchain *pdb_ch;
 +  t_chain    *chains,*cc;
 +  char       select[STRLEN];
 +  int        nincl,nmol;
 +  char       **incls;
 +  t_mols     *mols;
 +  char       **gnames;
 +  int        ePBC;
 +  matrix     box;
 +  rvec       box_space;
 +  int        i,j,k,l,nrtp;
 +  int        *swap_index,si;
 +  t_restp    *restp;
 +  t_hackblock *ah;
 +  t_symtab   symtab;
 +  gpp_atomtype_t atype;
 +  gmx_residuetype_t rt;
 +  const char *top_fn;
 +  char       fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
 +  char       molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
 +  char       *c,forcefield[STRLEN],ffdir[STRLEN];
 +  char       ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
 +  char       *watermodel;
 +  const char *watres;
 +  int        nrtpf;
 +  char       **rtpf;
 +  char       rtp[STRLEN];
 +  int        nrrn;
 +  char       **rrn;
 +  int        nrtprename,naa;
 +  rtprename_t *rtprename=NULL;
 +  int        nah,nNtdb,nCtdb,ntdblist;
 +  t_hackblock *ntdb,*ctdb,**tdblist;
 +  int        nssbonds;
 +  t_ssbond   *ssbonds;
 +  rvec       *pdbx,*x;
 +  gmx_bool       bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bCheckMerge;
 +  real       mHmult=0;
 +  t_hackblock *hb_chain;
 +  t_restp    *restp_chain;
 +  output_env_t oenv;
 +  const char *p_restype;
 +  int        rc;
 +  int           this_atomnum;
 +  int           prev_atomnum;
 +  const char *  prev_atomname;
 +  const char *  this_atomname;
 +  const char *  prev_resname;
 +  const char *  this_resname;
 +  int           prev_resnum;
 +  int           this_resnum;
 +  char          prev_chainid;
 +  char          this_chainid;
 +  int           prev_chainnumber;
 +  int           this_chainnumber;
 +  int           nid_used;
 +  int           this_chainstart;
 +  int           prev_chainstart;
 +  gmx_bool      bMerged;
 +  int           nchainmerges;
 +    
 +  gmx_atomprop_t aps;
 +  
 +  t_filenm   fnm[] = { 
 +    { efSTX, "-f", "eiwit.pdb", ffREAD  },
 +    { efSTO, "-o", "conf",      ffWRITE },
 +    { efTOP, NULL, NULL,        ffWRITE },
 +    { efITP, "-i", "posre",     ffWRITE },
 +    { efNDX, "-n", "clean",     ffOPTWR },
 +    { efSTO, "-q", "clean.pdb", ffOPTWR }
 +  };
 +#define NFILE asize(fnm)
 + 
 +
 +  /* Command line arguments must be static */
 +  static gmx_bool bNewRTP=FALSE;
 +  static gmx_bool bInter=FALSE, bCysMan=FALSE; 
 +  static gmx_bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
 +  static gmx_bool bGlnMan=FALSE, bArgMan=FALSE;
 +  static gmx_bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
 +  static gmx_bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
 +  static gmx_bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
 +  static gmx_bool bRenumRes=FALSE,bRTPresname=FALSE;
 +  static real angle=135.0, distance=0.3,posre_fc=1000;
 +  static real long_bond_dist=0.25, short_bond_dist=0.05;
 +  static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
 +  static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
 +  static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
 +  static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
 +  static const char *ff = "select";
 +
 +  t_pargs pa[] = {
 +    { "-newrtp", FALSE, etBOOL, {&bNewRTP},
 +      "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
 +    { "-lb",     FALSE, etREAL, {&long_bond_dist},
 +      "HIDDENLong bond warning distance" },
 +    { "-sb",     FALSE, etREAL, {&short_bond_dist},
 +      "HIDDENShort bond warning distance" },
 +    { "-chainsep", FALSE, etENUM, {chainsep},
 +      "Condition in PDB files when a new chain should be started (adding termini)" },
 +    { "-merge",  FALSE, etENUM, {&merge},
 +      "Merge multiple chains into a single [moleculetype]" },         
 +    { "-ff",     FALSE, etSTR,  {&ff},
 +      "Force field, interactive by default. Use [TT]-h[tt] for information." },
 +    { "-water",  FALSE, etENUM, {watstr},
 +      "Water model to use" },
 +    { "-inter",  FALSE, etBOOL, {&bInter},
 +      "Set the next 8 options to interactive"},
 +    { "-ss",     FALSE, etBOOL, {&bCysMan}, 
 +      "Interactive SS bridge selection" },
 +    { "-ter",    FALSE, etBOOL, {&bTerMan}, 
 +      "Interactive termini selection, instead of charged (default)" },
 +    { "-lys",    FALSE, etBOOL, {&bLysMan}, 
 +      "Interactive lysine selection, instead of charged" },
 +    { "-arg",    FALSE, etBOOL, {&bArgMan}, 
 +      "Interactive arginine selection, instead of charged" },
 +    { "-asp",    FALSE, etBOOL, {&bAspMan}, 
 +      "Interactive aspartic acid selection, instead of charged" },
 +    { "-glu",    FALSE, etBOOL, {&bGluMan}, 
 +      "Interactive glutamic acid selection, instead of charged" },
 +    { "-gln",    FALSE, etBOOL, {&bGlnMan}, 
 +      "Interactive glutamine selection, instead of neutral" },
 +    { "-his",    FALSE, etBOOL, {&bHisMan},
 +      "Interactive histidine selection, instead of checking H-bonds" },
 +    { "-angle",  FALSE, etREAL, {&angle}, 
 +      "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
 +    { "-dist",   FALSE, etREAL, {&distance},
 +      "Maximum donor-acceptor distance for a H-bond (nm)" },
 +    { "-una",    FALSE, etBOOL, {&bUnA}, 
 +      "Select aromatic rings with united CH atoms on phenylalanine, "
 +      "tryptophane and tyrosine" },
 +    { "-sort",   FALSE, etBOOL, {&bSort}, 
 +      "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
 +    { "-ignh",   FALSE, etBOOL, {&bRemoveH}, 
 +      "Ignore hydrogen atoms that are in the coordinate file" },
 +    { "-missing",FALSE, etBOOL, {&bAllowMissing}, 
 +      "Continue when atoms are missing, dangerous" },
 +    { "-v",      FALSE, etBOOL, {&bVerbose}, 
 +      "Be slightly more verbose in messages" },
 +    { "-posrefc",FALSE, etREAL, {&posre_fc},
 +      "Force constant for position restraints" },
 +    { "-vsite",  FALSE, etENUM, {vsitestr}, 
 +      "Convert atoms to virtual sites" },
 +    { "-heavyh", FALSE, etBOOL, {&bHeavyH},
 +      "Make hydrogen atoms heavy" },
 +    { "-deuterate", FALSE, etBOOL, {&bDeuterate},
 +      "Change the mass of hydrogens to 2 amu" },
 +    { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
 +      "Use charge groups in the [TT].rtp[tt] file"  },
 +    { "-cmap", TRUE, etBOOL, {&bCmap},
 +      "Use cmap torsions (if enabled in the [TT].rtp[tt] file)"  },
 +    { "-renum", TRUE, etBOOL, {&bRenumRes},
 +      "Renumber the residues consecutively in the output"  },
 +    { "-rtpres", TRUE, etBOOL, {&bRTPresname},
 +      "Use [TT].rtp[tt] entry names as residue names"  }
 +  };
 +#define NPARGS asize(pa)
 +  
 +  CopyRight(stderr,argv[0]);
 +  parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
 +                  0,NULL,&oenv);
 +
 +  /* Force field selection, interactive or direct */
 +  choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
 +          forcefield,sizeof(forcefield),
 +          ffdir,sizeof(ffdir));
 +
 +  if (strlen(forcefield) > 0) {
 +    strcpy(ffname,forcefield);
 +    ffname[0] = toupper(ffname[0]);
 +  } else {
 +    gmx_fatal(FARGS,"Empty forcefield string");
 +  }
 +  
 +  printf("\nUsing the %s force field in directory %s\n\n",
 +       ffname,ffdir);
 +    
 +  choose_watermodel(watstr[0],ffdir,&watermodel);
 +
 +  if (bInter) {
 +    /* if anything changes here, also change description of -inter */
 +    bCysMan = TRUE;
 +    bTerMan = TRUE;
 +    bLysMan = TRUE;
 +    bArgMan = TRUE;
 +    bAspMan = TRUE;
 +    bGluMan = TRUE;
 +    bGlnMan = TRUE;
 +    bHisMan = TRUE;
 +  }
 +  
 +  if (bHeavyH)
 +    mHmult=4.0;
 +  else if (bDeuterate)
 +    mHmult=2.0;
 +  else
 +    mHmult=1.0;
 +  
 +  switch(vsitestr[0][0]) {
 +  case 'n': /* none */
 +    bVsites=FALSE;
 +    bVsiteAromatics=FALSE;
 +    break;
 +  case 'h': /* hydrogens */
 +    bVsites=TRUE;
 +    bVsiteAromatics=FALSE;
 +    break;
 +  case 'a': /* aromatics */
 +    bVsites=TRUE;
 +    bVsiteAromatics=TRUE;
 +    break;
 +  default:
 +    gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
 +              __FILE__,__LINE__,vsitestr[0]);
 +  }/* end switch */
 +  
 +  /* Open the symbol table */
 +  open_symtab(&symtab);
 +
 +  /* Residue type database */  
 +  gmx_residuetype_init(&rt);
 +  
 +  /* Read residue renaming database(s), if present */
 +  nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
 +    
 +  nrtprename = 0;
 +  rtprename  = NULL;
 +  for(i=0; i<nrrn; i++) {
 +    fp = fflib_open(rrn[i]);
 +    read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
++    ffclose(fp);
 +    sfree(rrn[i]);
 +  }
 +  sfree(rrn);
 +
 +  /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
 +  naa=0;
 +  for(i=0;i<nrtprename;i++)
 +  {
 +      rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
 +
 +      /* Only add names if the 'standard' gromacs/iupac base name was found */
 +      if(rc==0)
 +      {
 +          gmx_residuetype_add(rt,rtprename[i].main,p_restype);
 +          gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
 +          gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
 +          gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
 +      }          
 +  }
 +    
 +  clear_mat(box);
 +  if (watermodel != NULL && (strstr(watermodel,"4p") ||
 +                           strstr(watermodel,"4P"))) {
 +    watres = "HO4";
 +  } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
 +                                  strstr(watermodel,"5P"))) {
 +    watres = "HO5";
 +  } else {
 +    watres = "HOH";
 +  }
 +    
 +  aps = gmx_atomprop_init();
 +  natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
 +                    &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
 +                    aps,bVerbose);
 +  
 +  if (natom==0)
 +    gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
 +
 +  printf("Analyzing pdb file\n");
 +  nch=0;
 +  maxch=0;
 +  nwaterchain=0;
 +    
 +  modify_chain_numbers(&pdba_all,chainsep[0]);
 +
 +  nchainmerges        = 0;
 +    
 +  this_atomname       = NULL;
 +  this_atomnum        = -1;
 +  this_resname        = NULL;
 +  this_resnum         = -1;
 +  this_chainid        = '?';
 +  this_chainnumber    = -1;
 +  this_chainstart     = 0;
 +  /* Keep the compiler happy */
 +  prev_chainstart     = 0;
 +    
 +  pdb_ch=NULL;
 +
 +  bMerged = FALSE;
 +  for (i=0; (i<natom); i++) 
 +  {
 +      ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
 +
 +      prev_atomname      = this_atomname;
 +      prev_atomnum       = this_atomnum;
 +      prev_resname       = this_resname;
 +      prev_resnum        = this_resnum;
 +      prev_chainid       = this_chainid;
 +      prev_chainnumber   = this_chainnumber;
 +      if (!bMerged)
 +      {
 +          prev_chainstart    = this_chainstart;
 +      }
 +      
 +      this_atomname      = *pdba_all.atomname[i];
 +      this_atomnum       = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
 +      this_resname       = *ri->name;
 +      this_resnum        = ri->nr;
 +      this_chainid       = ri->chainid;
 +      this_chainnumber   = ri->chainnum;
 +      
 +      bWat = gmx_strcasecmp(*ri->name,watres) == 0;
 +      if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat)) 
 +      {
 +          this_chainstart = pdba_all.atom[i].resind;
 +          
 +          bMerged = FALSE;
 +          if (i>0 && !bWat) 
 +          {
 +              if(!strncmp(merge[0],"int",3))
 +              {
 +                  printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
 +                         "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
 +                         prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
 +                         this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
 +                  
 +                  if(NULL==fgets(select,STRLEN-1,stdin))
 +                  {
 +                      gmx_fatal(FARGS,"Error reading from stdin");
 +                  }
 +                  bMerged = (select[0] == 'y');
 +              }
 +              else if(!strncmp(merge[0],"all",3))
 +              {
 +                  bMerged = TRUE;
 +              }
 +          }
 +          
 +          if (bMerged)
 +          { 
 +              pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] = 
 +              pdba_all.atom[i].resind - prev_chainstart;
 +              pdb_ch[nch-1].nterpairs++;
 +              srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
 +              nchainmerges++;
 +          }
 +          else 
 +          {
 +              /* set natom for previous chain */
 +              if (nch > 0)
 +              {
 +                  pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
 +              }
 +              if (bWat)
 +              {
 +                  nwaterchain++;
 +                  ri->chainid = ' ';
 +              }
 +              /* check if chain identifier was used before */
 +              for (j=0; (j<nch); j++) 
 +              {
 +                  if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid) 
 +                  {
 +                      printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
 +                             "They will be treated as separate chains unless you reorder your file.\n",
 +                             ri->chainid);
 +                  }
 +              }
 +              if (nch == maxch)
 +              {
 +                  maxch += 16;
 +                  srenew(pdb_ch,maxch);
 +              }
 +              pdb_ch[nch].chainid = ri->chainid;
 +              pdb_ch[nch].chainnum = ri->chainnum; 
 +              pdb_ch[nch].start=i;
 +              pdb_ch[nch].bAllWat=bWat;
 +              if (bWat)
 +                  pdb_ch[nch].nterpairs=0;
 +              else
 +                  pdb_ch[nch].nterpairs=1;
 +              snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
 +              /* modified [nch] to [0] below */
 +              pdb_ch[nch].chainstart[0]=0;
 +              nch++;
 +          }
 +      }
 +      bPrevWat=bWat;
 +  }
 +  pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
 +  
 +  /* set all the water blocks at the end of the chain */
 +  snew(swap_index,nch);
 +  j=0;
 +  for(i=0; i<nch; i++)
 +    if (!pdb_ch[i].bAllWat) {
 +      swap_index[j]=i;
 +      j++;
 +    }
 +  for(i=0; i<nch; i++)
 +    if (pdb_ch[i].bAllWat) {
 +      swap_index[j]=i;
 +      j++;
 +    }
 +  if (nwaterchain>1)
 +    printf("Moved all the water blocks to the end\n");
 +
 +  snew(chains,nch);
 +  /* copy pdb data and x for all chains */
 +  for (i=0; (i<nch); i++) {
 +    si=swap_index[i];
 +    chains[i].chainid = pdb_ch[si].chainid;
 +    chains[i].chainnum = pdb_ch[si].chainnum;
 +    chains[i].bAllWat = pdb_ch[si].bAllWat;
 +    chains[i].nterpairs = pdb_ch[si].nterpairs;
 +    chains[i].chainstart = pdb_ch[si].chainstart;
 +    snew(chains[i].ntdb,pdb_ch[si].nterpairs);
 +    snew(chains[i].ctdb,pdb_ch[si].nterpairs);
 +    snew(chains[i].r_start,pdb_ch[si].nterpairs);
 +    snew(chains[i].r_end,pdb_ch[si].nterpairs);
 +      
 +    snew(chains[i].pdba,1);
 +    init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
 +    snew(chains[i].x,chains[i].pdba->nr);
 +    for (j=0; j<chains[i].pdba->nr; j++) {
 +      chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
 +      snew(chains[i].pdba->atomname[j],1);
 +      *chains[i].pdba->atomname[j] = 
 +      strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
 +      chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
 +      copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
 +    }
 +    /* Re-index the residues assuming that the indices are continuous */
 +    k    = chains[i].pdba->atom[0].resind;
 +    nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
 +    chains[i].pdba->nres = nres;
 +    for(j=0; j < chains[i].pdba->nr; j++) {
 +      chains[i].pdba->atom[j].resind -= k;
 +    }
 +    srenew(chains[i].pdba->resinfo,nres);
 +    for(j=0; j<nres; j++) {
 +      chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
 +      snew(chains[i].pdba->resinfo[j].name,1);
 +      *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
 +      /* make all chain identifiers equal to that of the chain */
 +      chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
 +    }
 +  }
 +
 +  if (nchainmerges>0)
 +    printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
 +           nchainmerges);
 +
 +  printf("There are %d chains and %d blocks of water and "
 +       "%d residues with %d atoms\n",
 +       nch-nwaterchain,nwaterchain,
 +       pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
 +        
 +  printf("\n  %5s  %4s %6s\n","chain","#res","#atoms");
 +  for (i=0; (i<nch); i++)
 +    printf("  %d '%c' %5d %6d  %s\n",
 +         i+1, chains[i].chainid ? chains[i].chainid:'-',
 +         chains[i].pdba->nres, chains[i].pdba->nr,
 +         chains[i].bAllWat ? "(only water)":"");
 +  printf("\n");
 +  
 +  check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
 +  
 +  /* Read atomtypes... */
 +  atype = read_atype(ffdir,&symtab);
 +  
 +  /* read residue database */
 +  printf("Reading residue database... (%s)\n",forcefield);
 +  nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
 +  nrtp  = 0;
 +  restp = NULL;
 +  for(i=0; i<nrtpf; i++) {
 +    read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
 +    sfree(rtpf[i]);
 +  }
 +  sfree(rtpf);
 +  if (bNewRTP) {
 +    /* Not correct with multiple rtp input files with different bonded types */
 +    fp=gmx_fio_fopen("new.rtp","w");
 +    print_resall(fp,nrtp,restp,atype);
 +    gmx_fio_fclose(fp);
 +  }
 +    
 +  /* read hydrogen database */
 +  nah = read_h_db(ffdir,&ah);
 +  
 +  /* Read Termini database... */
 +  nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
 +  nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
 +  
 +  top_fn=ftp2fn(efTOP,NFILE,fnm);
 +  top_file=gmx_fio_fopen(top_fn,"w");
 +
 +  sprintf(generator,"%s - %s",ShortProgram(), GromacsVersion() );
 +
 +  print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
 +
 +  nincl=0;
 +  nmol=0;
 +  incls=NULL;
 +  mols=NULL;
 +  nres=0;
 +  for(chain=0; (chain<nch); chain++) {
 +    cc = &(chains[chain]);
 +
 +    /* set pdba, natom and nres to the current chain */
 +    pdba =cc->pdba;
 +    x    =cc->x;
 +    natom=cc->pdba->nr;
 +    nres =cc->pdba->nres;
 +    
 +    if (cc->chainid && ( cc->chainid != ' ' ) )
 +      printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
 +            chain+1,cc->chainid,natom,nres);
 +    else
 +      printf("Processing chain %d (%d atoms, %d residues)\n",
 +            chain+1,natom,nres);
 +      
 +    process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
 +                bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
 +                nrtprename,rtprename);
 +      
 +        cc->chainstart[cc->nterpairs] = pdba->nres;
 +        j = 0;
 +        for(i=0; i<cc->nterpairs; i++)
 +        {
 +            find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
 +                        &(cc->r_start[j]),&(cc->r_end[j]),rt);    
 +      
 +            if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
 +            {
 +                j++;
 +            }
 +        }
 +        cc->nterpairs = j;
 +        if (cc->nterpairs == 0)
 +        {
 +            printf("Problem with chain definition, or missing terminal residues.\n"
 +                   "This chain does not appear to contain a recognized chain molecule.\n"
 +                   "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
 +        }
 +
 +    /* Check for disulfides and other special bonds */
 +    nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
 +
 +    if (nrtprename > 0) {        
 +      rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
 +                  &symtab,bVerbose);
 +    }
 +    
 +    if (debug) {
 +      if (nch==1) {
 +      sprintf(fn,"chain.pdb");
 +      } else {
 +      sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
 +      }
 +      write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
 +    }
 +
 +      
 +    for(i=0; i<cc->nterpairs; i++) 
 +    {
 +        
 +        /* Set termini.
 +         * We first apply a filter so we only have the
 +         * termini that can be applied to the residue in question
 +         * (or a generic terminus if no-residue specific is available).
 +         */
 +        /* First the N terminus */
 +        if (nNtdb > 0) 
 +        {
 +            tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
 +                                 *pdba->resinfo[cc->r_start[i]].name,
 +                                 *pdba->resinfo[cc->r_start[i]].rtp,
 +                                 &ntdblist);
 +            if(ntdblist==0)
 +            {
 +                printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
 +                       "is already in a terminus-specific form and skipping terminus selection.\n");
 +                cc->ntdb[i]=NULL;
 +            }
 +            else 
 +            {
 +                if(bTerMan && ntdblist>1)
 +                {
 +                    sprintf(select,"Select start terminus type for %s-%d",
 +                            *pdba->resinfo[cc->r_start[i]].name,
 +                            pdba->resinfo[cc->r_start[i]].nr);
 +                    cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
 +                }
 +                else
 +                {
 +                    cc->ntdb[i] = tdblist[0];
 +                }
 +                
 +                printf("Start terminus %s-%d: %s\n",
 +                       *pdba->resinfo[cc->r_start[i]].name,
 +                       pdba->resinfo[cc->r_start[i]].nr,
 +                       (cc->ntdb[i])->name);
 +                sfree(tdblist);
 +            }
 +        }
 +        else 
 +        {
 +            cc->ntdb[i] = NULL;
 +        }
 +        
 +        /* And the C terminus */
 +        if (nCtdb > 0)
 +        {
 +            tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
 +                                 *pdba->resinfo[cc->r_end[i]].name,
 +                                 *pdba->resinfo[cc->r_end[i]].rtp,
 +                                 &ntdblist);
 +            if(ntdblist==0)
 +            {
 +                printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
 +                       "is already in a terminus-specific form and skipping terminus selection.\n");
 +                cc->ctdb[i] = NULL;
 +            }
 +            else 
 +            {
 +                if(bTerMan && ntdblist>1)
 +                {
 +                    sprintf(select,"Select end terminus type for %s-%d",
 +                            *pdba->resinfo[cc->r_end[i]].name,
 +                            pdba->resinfo[cc->r_end[i]].nr);
 +                    cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
 +                }
 +                else
 +                {
 +                    cc->ctdb[i] = tdblist[0];
 +                }
 +                printf("End terminus %s-%d: %s\n",
 +                       *pdba->resinfo[cc->r_end[i]].name,
 +                       pdba->resinfo[cc->r_end[i]].nr,
 +                       (cc->ctdb[i])->name);
 +                sfree(tdblist);
 +            }
 +        }
 +        else 
 +        {
 +            cc->ctdb[i] = NULL;
 +        }
 +    }
 +    /* lookup hackblocks and rtp for all residues */
 +    get_hackblocks_rtp(&hb_chain, &restp_chain,
 +                     nrtp, restp, pdba->nres, pdba->resinfo, 
 +                     cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
 +    /* ideally, now we would not need the rtp itself anymore, but do 
 +     everything using the hb and restp arrays. Unfortunately, that 
 +     requires some re-thinking of code in gen_vsite.c, which I won't 
 +     do now :( AF 26-7-99 */
 +
 +    rename_atoms(NULL,ffdir,
 +               pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
 +
 +    match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
 +
 +    if (bSort) {
 +      block = new_blocka();
 +      snew(gnames,1);
 +      sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
 +                  natom,&pdba,&x,block,&gnames);
 +      natom = remove_duplicate_atoms(pdba,x,bVerbose);
 +      if (ftp2bSet(efNDX,NFILE,fnm)) {
 +      if (bRemoveH) {
 +        fprintf(stderr,"WARNING: with the -remh option the generated "
 +                "index file (%s) might be useless\n"
 +                "(the index file is generated before hydrogens are added)",
 +                ftp2fn(efNDX,NFILE,fnm));
 +      }
 +      write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
 +      }
 +      for(i=0; i < block->nr; i++)
 +      sfree(gnames[i]);
 +      sfree(gnames);
 +      done_blocka(block);
 +    } else {
 +      fprintf(stderr,"WARNING: "
 +            "without sorting no check for duplicate atoms can be done\n");
 +    }
 +
 +    /* Generate Hydrogen atoms (and termini) in the sequence */
 +    natom=add_h(&pdba,&x,nah,ah,
 +              cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
 +              NULL,NULL,TRUE,FALSE);
 +    printf("Now there are %d residues with %d atoms\n",
 +         pdba->nres,pdba->nr);
 +    if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
 +
 +    if (debug)
 +      for(i=0; (i<natom); i++)
 +      fprintf(debug,"Res %s%d atom %d %s\n",
 +              *(pdba->resinfo[pdba->atom[i].resind].name),
 +              pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
 +    
 +    strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
 +    
 +    /* make up molecule name(s) */
 +
 +      k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
 +            
 +    gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
 +      
 +    suffix[0]='\0';
 +      
 +    if (cc->bAllWat) 
 +    {
 +        sprintf(molname,"Water");
 +    } 
 +    else
 +    {
 +        this_chainid = cc->chainid;
 +        
 +        /* Add the chain id if we have one */
 +        if(this_chainid != ' ')
 +        {
 +            sprintf(buf,"_chain_%c",this_chainid);
 +            strcat(suffix,buf);
 +        }
 +
 +        /* Check if there have been previous chains with the same id */
 +        nid_used = 0;
 +        for(k=0;k<chain;k++)
 +        {
 +            if(cc->chainid == chains[k].chainid)
 +            {
 +                nid_used++;
 +            }
 +        }
 +        /* Add the number for this chain identifier if there are multiple copies */
 +        if(nid_used>0)
 +        {
 +            
 +            sprintf(buf,"%d",nid_used+1);
 +            strcat(suffix,buf);
 +        }
 +
 +        if(strlen(suffix)>0)
 +        {
 +            sprintf(molname,"%s%s",p_restype,suffix);
 +        }
 +        else
 +        {
 +            strcpy(molname,p_restype);
 +        }
 +    }
 +      
 +    if ((nch-nwaterchain>1) && !cc->bAllWat) {
 +      bITP=TRUE;
 +      strcpy(itp_fn,top_fn);
 +      printf("Chain time...\n");
 +      c=strrchr(itp_fn,'.');
 +      sprintf(c,"_%s.itp",molname);
 +      c=strrchr(posre_fn,'.');
 +      sprintf(c,"_%s.itp",molname);
 +      if (strcmp(itp_fn,posre_fn) == 0) {
 +      strcpy(buf_fn,posre_fn);
 +      c  = strrchr(buf_fn,'.');
 +      *c = '\0';
 +      sprintf(posre_fn,"%s_pr.itp",buf_fn);
 +      }
 +      
 +      nincl++;
 +      srenew(incls,nincl);
 +      incls[nincl-1]=strdup(itp_fn);
 +      itp_file=gmx_fio_fopen(itp_fn,"w");
 +    } else
 +      bITP=FALSE;
 +
 +    srenew(mols,nmol+1);
 +    if (cc->bAllWat) {
 +      mols[nmol].name = strdup("SOL");
 +      mols[nmol].nr   = pdba->nres;
 +    } else {
 +      mols[nmol].name = strdup(molname);
 +      mols[nmol].nr   = 1;
 +    }
 +    nmol++;
 +
 +    if (bITP)
 +      print_top_comment(itp_file,itp_fn,generator,ffdir,TRUE);
 +
 +    if (cc->bAllWat)
 +      top_file2=NULL;
 +    else
 +      if (bITP)
 +      top_file2=itp_file;
 +      else
 +      top_file2=top_file;
 +
 +    pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
 +          nrtp,restp,
 +          restp_chain,hb_chain,
 +          cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
 +          bVsites,bVsiteAromatics,forcefield,ffdir,
 +          mHmult,nssbonds,ssbonds,
 +          long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
 +          bRenumRes,bRTPresname);
 +    
 +    if (!cc->bAllWat)
 +      write_posres(posre_fn,pdba,posre_fc);
 +
 +    if (bITP)
 +      gmx_fio_fclose(itp_file);
 +
 +    /* pdba and natom have been reassigned somewhere so: */
 +    cc->pdba = pdba;
 +    cc->x = x;
 +    
 +    if (debug) {
 +      if (cc->chainid == ' ')
 +      sprintf(fn,"chain.pdb");
 +      else
 +      sprintf(fn,"chain_%c.pdb",cc->chainid);
 +      cool_quote(quote,255,NULL);
 +      write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
 +    }
 +  }
 +
 +  if (watermodel == NULL) {
 +    for(chain=0; chain<nch; chain++) {
 +      if (chains[chain].bAllWat) {
 +      gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
 +      }
 +    }
 +  } else {
 +    sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
 +    if (!fflib_fexist(buf_fn)) {
 +      gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
 +              buf_fn,watermodel);
 +    }
 +  }
 +
 +  print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
 +  gmx_fio_fclose(top_file);
 +
 +  gmx_residuetype_destroy(rt);
 +    
 +  /* now merge all chains back together */
 +  natom=0;
 +  nres=0;
 +  for (i=0; (i<nch); i++) {
 +    natom+=chains[i].pdba->nr;
 +    nres+=chains[i].pdba->nres;
 +  }
 +  snew(atoms,1);
 +  init_t_atoms(atoms,natom,FALSE);
 +  for(i=0; i < atoms->nres; i++)
 +    sfree(atoms->resinfo[i].name);
 +  sfree(atoms->resinfo);
 +  atoms->nres=nres;
 +  snew(atoms->resinfo,nres);
 +  snew(x,natom);
 +  k=0;
 +  l=0;
 +  for (i=0; (i<nch); i++) {
 +    if (nch>1)
 +      printf("Including chain %d in system: %d atoms %d residues\n",
 +           i+1,chains[i].pdba->nr,chains[i].pdba->nres);
 +    for (j=0; (j<chains[i].pdba->nr); j++) {
 +      atoms->atom[k]=chains[i].pdba->atom[j];
 +      atoms->atom[k].resind += l; /* l is processed nr of residues */
 +      atoms->atomname[k]=chains[i].pdba->atomname[j];
 +      atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
 +      copy_rvec(chains[i].x[j],x[k]);
 +      k++;
 +    }
 +    for (j=0; (j<chains[i].pdba->nres); j++) {
 +      atoms->resinfo[l] = chains[i].pdba->resinfo[j];
 +      if (bRTPresname) {
 +      atoms->resinfo[l].name = atoms->resinfo[l].rtp;
 +      }
 +      l++;
 +    }
 +  }
 +  
 +  if (nch>1) {
 +    fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
 +    print_sums(atoms, TRUE);
 +  }
 +  
 +  fprintf(stderr,"\nWriting coordinate file...\n");
 +  clear_rvec(box_space);
 +  if (box[0][0] == 0) 
 +    gen_box(0,atoms->nr,x,box,box_space,FALSE);
 +  write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
 +
 +  printf("\t\t--------- PLEASE NOTE ------------\n");
 +  printf("You have successfully generated a topology from: %s.\n",
 +       opt2fn("-f",NFILE,fnm));
 +  if (watermodel != NULL) {
 +    printf("The %s force field and the %s water model are used.\n",
 +         ffname,watermodel);
 +  } else {
 +    printf("The %s force field is used.\n",
 +         ffname);
 +  }
 +  printf("\t\t--------- ETON ESAELP ------------\n");
 +  
 +
 +  thanx(stdout);
 +  
 +  return 0;
 +}
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge