Merge branch 'release-4-5-patches'

author Christoph Junghans <junghans@mpip-mainz.mpg.de>

Wed, 15 Jun 2011 15:41:51 +0000 (17:41 +0200)

committer Christoph Junghans <junghans@mpip-mainz.mpg.de>

Wed, 15 Jun 2011 15:41:51 +0000 (17:41 +0200)
author Christoph Junghans <junghans@mpip-mainz.mpg.de>
Wed, 15 Jun 2011 15:41:51 +0000 (17:41 +0200)
committer Christoph Junghans <junghans@mpip-mainz.mpg.de>
Wed, 15 Jun 2011 15:41:51 +0000 (17:41 +0200)
diff --cc CMakeLists.txt
Simple merge
diff --cc cmake/ThreadMPI.cmake
Simple merge
diff --cc share/CMakeLists.txt
Simple merge
diff --cc src/gromacs/gmxlib/copyrite.c
Simple merge
diff --cc src/gromacs/gmxlib/enxio.c
Simple merge
diff --cc src/gromacs/gmxlib/filenm.c
Simple merge
diff --cc src/gromacs/gmxlib/futil.c
Simple merge
diff --cc src/gromacs/gmxlib/gmx_system_xdr.c
Simple merge
diff --cc src/gromacs/gmxlib/index.c
Simple merge
diff --cc src/gromacs/gmxlib/main.c
Simple merge
diff --cc src/gromacs/gmxlib/matio.c
Simple merge
diff --cc src/gromacs/gmxlib/network.c

index 2a7138fc1db833054c364f0f0cfdd9062f9fd3f6,0000000000000000000000000000000000000000..6baba1ce8f70f4d6bfa9e5d6ff092b9be274fd27

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/network.c
--- /dev/null
+++ b/src/gromacs/gmxlib/network.c
@@@ -1,698 -1,0 +1,780 @@@
-   gmx_call("gmx_sumd");
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include "gmx_fatal.h"
+ +#include "main.h"
+ +#include "smalloc.h"
+ +#include "network.h"
+ +#include "copyrite.h"
+ +#include "statutil.h"
+ +#include "ctype.h"
+ +#include "macros.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +
+ +#ifdef GMX_THREADS
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#include "mpelogging.h"
+ +
+ +/* The source code in this file should be thread-safe. 
+ +      Please keep it that way. */
+ +
+ +gmx_bool gmx_mpi_initialized(void)
+ +{
+ +  int n;
+ +#ifndef GMX_MPI
+ +  return 0;
+ +#else
+ +  MPI_Initialized(&n);
+ +  
+ +  return n;
+ +#endif
+ +}
+ +
+ +int gmx_setup(int *argc,char **argv,int *nnodes)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_setup");
+ +  return 0;
+ +#else
+ +  char   buf[256];
+ +  int    resultlen;               /* actual length of node name      */
+ +  int    i,flag;
+ +  int  mpi_num_nodes;
+ +  int  mpi_my_rank;
+ +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME];
+ +
+ +  /* Call the MPI routines */
+ +#ifdef GMX_LIB_MPI
+ +#ifdef GMX_FAHCORE
+ +  (void) fah_MPI_Init(argc,&argv);
+ +#else
+ +  (void) MPI_Init(argc,&argv);
+ +#endif
+ +#endif
+ +  (void) MPI_Comm_size( MPI_COMM_WORLD, &mpi_num_nodes );
+ +  (void) MPI_Comm_rank( MPI_COMM_WORLD, &mpi_my_rank );
+ +  (void) MPI_Get_processor_name( mpi_hostname, &resultlen );
+ +
+ +
+ +#ifdef USE_MPE
+ +  /* MPE logging routines. Get event IDs from MPE: */
+ +  /* General events */
+ +  ev_timestep1               = MPE_Log_get_event_number( );
+ +  ev_timestep2               = MPE_Log_get_event_number( );
+ +  ev_force_start             = MPE_Log_get_event_number( );
+ +  ev_force_finish            = MPE_Log_get_event_number( );
+ +  ev_do_fnbf_start           = MPE_Log_get_event_number( );
+ +  ev_do_fnbf_finish          = MPE_Log_get_event_number( );
+ +  ev_ns_start                = MPE_Log_get_event_number( );
+ +  ev_ns_finish               = MPE_Log_get_event_number( );
+ +  ev_calc_bonds_start        = MPE_Log_get_event_number( );
+ +  ev_calc_bonds_finish       = MPE_Log_get_event_number( );
+ +  ev_global_stat_start       = MPE_Log_get_event_number( );
+ +  ev_global_stat_finish      = MPE_Log_get_event_number( );
+ +  ev_virial_start            = MPE_Log_get_event_number( );
+ +  ev_virial_finish           = MPE_Log_get_event_number( );
+ +  
+ +  /* Enforced rotation */
+ +  ev_flexll_start            = MPE_Log_get_event_number( );
+ +  ev_flexll_finish           = MPE_Log_get_event_number( );
+ +  ev_add_rot_forces_start    = MPE_Log_get_event_number( );
+ +  ev_add_rot_forces_finish   = MPE_Log_get_event_number( );
+ +  ev_rotcycles_start         = MPE_Log_get_event_number( );
+ +  ev_rotcycles_finish        = MPE_Log_get_event_number( );
+ +  ev_forcecycles_start       = MPE_Log_get_event_number( );
+ +  ev_forcecycles_finish      = MPE_Log_get_event_number( );
+ +
+ +  /* Shift related events */
+ +  ev_shift_start             = MPE_Log_get_event_number( );
+ +  ev_shift_finish            = MPE_Log_get_event_number( );
+ +  ev_unshift_start           = MPE_Log_get_event_number( );
+ +  ev_unshift_finish          = MPE_Log_get_event_number( );
+ +  ev_mk_mshift_start         = MPE_Log_get_event_number( );
+ +  ev_mk_mshift_finish        = MPE_Log_get_event_number( );
+ +  
+ +  /* PME related events */
+ +  ev_pme_start               = MPE_Log_get_event_number( );
+ +  ev_pme_finish              = MPE_Log_get_event_number( );
+ +  ev_spread_on_grid_start    = MPE_Log_get_event_number( );
+ +  ev_spread_on_grid_finish   = MPE_Log_get_event_number( );
+ +  ev_sum_qgrid_start         = MPE_Log_get_event_number( );
+ +  ev_sum_qgrid_finish        = MPE_Log_get_event_number( );
+ +  ev_gmxfft3d_start          = MPE_Log_get_event_number( );
+ +  ev_gmxfft3d_finish         = MPE_Log_get_event_number( );
+ +  ev_solve_pme_start         = MPE_Log_get_event_number( );
+ +  ev_solve_pme_finish        = MPE_Log_get_event_number( );
+ +  ev_gather_f_bsplines_start = MPE_Log_get_event_number( );
+ +  ev_gather_f_bsplines_finish= MPE_Log_get_event_number( );
+ +  ev_reduce_start            = MPE_Log_get_event_number( );
+ +  ev_reduce_finish           = MPE_Log_get_event_number( );
+ +  ev_rscatter_start          = MPE_Log_get_event_number( );
+ +  ev_rscatter_finish         = MPE_Log_get_event_number( );
+ +  ev_alltoall_start          = MPE_Log_get_event_number( );
+ +  ev_alltoall_finish         = MPE_Log_get_event_number( );
+ +  ev_pmeredist_start         = MPE_Log_get_event_number( );
+ +  ev_pmeredist_finish        = MPE_Log_get_event_number( );
+ +  ev_init_pme_start          = MPE_Log_get_event_number( );      
+ +  ev_init_pme_finish         = MPE_Log_get_event_number( );
+ +  ev_send_coordinates_start  = MPE_Log_get_event_number( );
+ +  ev_send_coordinates_finish = MPE_Log_get_event_number( );
+ +  ev_update_fr_start         = MPE_Log_get_event_number( );
+ +  ev_update_fr_finish        = MPE_Log_get_event_number( );
+ +  ev_clear_rvecs_start       = MPE_Log_get_event_number( );
+ +  ev_clear_rvecs_finish      = MPE_Log_get_event_number( ); 
+ +  ev_update_start            = MPE_Log_get_event_number( ); 
+ +  ev_update_finish           = MPE_Log_get_event_number( ); 
+ +  ev_output_start            = MPE_Log_get_event_number( ); 
+ +  ev_output_finish           = MPE_Log_get_event_number( ); 
+ +  ev_sum_lrforces_start      = MPE_Log_get_event_number( ); 
+ +  ev_sum_lrforces_finish     = MPE_Log_get_event_number( ); 
+ +  ev_sort_start              = MPE_Log_get_event_number( );
+ +  ev_sort_finish             = MPE_Log_get_event_number( );
+ +  ev_sum_qgrid_start         = MPE_Log_get_event_number( );
+ +  ev_sum_qgrid_finish        = MPE_Log_get_event_number( );
+ +  
+ +  /* Essential dynamics related events */
+ +  ev_edsam_start             = MPE_Log_get_event_number( );
+ +  ev_edsam_finish            = MPE_Log_get_event_number( );
+ +  ev_get_coords_start        = MPE_Log_get_event_number( );
+ +  ev_get_coords_finish       = MPE_Log_get_event_number( );
+ +  ev_ed_apply_cons_start     = MPE_Log_get_event_number( );
+ +  ev_ed_apply_cons_finish    = MPE_Log_get_event_number( );
+ +  ev_fit_to_reference_start  = MPE_Log_get_event_number( );
+ +  ev_fit_to_reference_finish = MPE_Log_get_event_number( );
+ +  
+ +  /* describe events: */
+ +  if ( mpi_my_rank == 0 ) 
+ +  {
+ +    /* General events */
+ +    MPE_Describe_state(ev_timestep1,               ev_timestep2,                "timestep START",  "magenta" );
+ +    MPE_Describe_state(ev_force_start,             ev_force_finish,             "force",           "cornflower blue" );
+ +    MPE_Describe_state(ev_do_fnbf_start,           ev_do_fnbf_finish,           "do_fnbf",         "navy" );
+ +    MPE_Describe_state(ev_ns_start,                ev_ns_finish,                "neighbor search", "tomato" );
+ +    MPE_Describe_state(ev_calc_bonds_start,        ev_calc_bonds_finish,        "bonded forces",   "slate blue" );
+ +    MPE_Describe_state(ev_global_stat_start,       ev_global_stat_finish,       "global stat",     "firebrick3");
+ +    MPE_Describe_state(ev_update_fr_start,         ev_update_fr_finish,         "update forcerec", "goldenrod");
+ +    MPE_Describe_state(ev_clear_rvecs_start,       ev_clear_rvecs_finish,       "clear rvecs",     "bisque");
+ +    MPE_Describe_state(ev_update_start,            ev_update_finish,            "update",          "cornsilk");
+ +    MPE_Describe_state(ev_output_start,            ev_output_finish,            "output",          "black");
+ +    MPE_Describe_state(ev_virial_start,            ev_virial_finish,            "calc_virial",     "thistle4");
+ +    
+ +    /* Enforced rotation */
+ +    MPE_Describe_state(ev_flexll_start,            ev_flexll_finish,            "flex lowlevel",   "navajo white");
+ +    MPE_Describe_state(ev_add_rot_forces_start,    ev_add_rot_forces_finish,    "add rot forces",  "green");
+ +    MPE_Describe_state(ev_rotcycles_start,         ev_rotcycles_finish,         "count rot cyc",   "moccasin");
+ +    MPE_Describe_state(ev_forcecycles_start,       ev_forcecycles_finish,       "count force cyc", "powder blue");
+ +
+ +    /* PME related events */
+ +    MPE_Describe_state(ev_pme_start,               ev_pme_finish,               "doing PME",       "grey" );
+ +    MPE_Describe_state(ev_spread_on_grid_start,    ev_spread_on_grid_finish,    "spread",          "dark orange" );   
+ +    MPE_Describe_state(ev_sum_qgrid_start,         ev_sum_qgrid_finish,         "sum qgrid",       "slate blue");
+ +    MPE_Describe_state(ev_gmxfft3d_start,          ev_gmxfft3d_finish,          "fft3d",           "snow2" );   
+ +    MPE_Describe_state(ev_solve_pme_start,         ev_solve_pme_finish,         "solve PME",       "indian red" );   
+ +    MPE_Describe_state(ev_gather_f_bsplines_start, ev_gather_f_bsplines_finish, "bsplines",        "light sea green" );   
+ +    MPE_Describe_state(ev_reduce_start,            ev_reduce_finish,            "reduce",          "cyan1" );
+ +    MPE_Describe_state(ev_rscatter_start,          ev_rscatter_finish,          "rscatter",        "cyan3" );
+ +    MPE_Describe_state(ev_alltoall_start,          ev_alltoall_finish,          "alltoall",        "LightCyan4" );
+ +    MPE_Describe_state(ev_pmeredist_start,         ev_pmeredist_finish,         "pmeredist",       "thistle" );
+ +    MPE_Describe_state(ev_init_pme_start,          ev_init_pme_finish,          "init PME",        "snow4");
+ +    MPE_Describe_state(ev_send_coordinates_start,  ev_send_coordinates_finish,  "send_coordinates","blue");
+ +    MPE_Describe_state(ev_sum_lrforces_start,      ev_sum_lrforces_finish,      "sum_LRforces",    "lime green");
+ +    MPE_Describe_state(ev_sort_start,              ev_sort_finish,              "sort pme atoms",  "brown");
+ +    MPE_Describe_state(ev_sum_qgrid_start,         ev_sum_qgrid_finish,         "sum charge grid", "medium orchid");
+ +    
+ +    /* Shift related events */
+ +    MPE_Describe_state(ev_shift_start,             ev_shift_finish,             "shift",           "orange");
+ +    MPE_Describe_state(ev_unshift_start,           ev_unshift_finish,           "unshift",         "dark orange");    
+ +    MPE_Describe_state(ev_mk_mshift_start,         ev_mk_mshift_finish,         "mk_mshift",       "maroon");
+ +        
+ +    /* Essential dynamics related events */
+ +    MPE_Describe_state(ev_edsam_start,             ev_edsam_finish,             "EDSAM",           "deep sky blue");
+ +    MPE_Describe_state(ev_get_coords_start,        ev_get_coords_finish,        "ED get coords",   "steel blue");
+ +    MPE_Describe_state(ev_ed_apply_cons_start,     ev_ed_apply_cons_finish,     "ED apply constr", "forest green");
+ +    MPE_Describe_state(ev_fit_to_reference_start,  ev_fit_to_reference_finish,  "ED fit to ref",   "lavender");
+ +       
+ +  }
+ +  MPE_Init_log();
+ +#endif
+ + 
+ +#ifdef GMX_LIB_MPI 
+ +  fprintf(stderr,"NNODES=%d, MYRANK=%d, HOSTNAME=%s\n",
+ +        mpi_num_nodes,mpi_my_rank,mpi_hostname);
+ +#endif
+ +  
+ +  *nnodes=mpi_num_nodes;
+ +  
+ +  return mpi_my_rank;
+ +#endif
+ +}
+ +
+ +int  gmx_node_num(void)
+ +{
+ +#ifndef GMX_MPI
+ +  return 1;
+ +#else
+ +  int i;
+ +  (void) MPI_Comm_size(MPI_COMM_WORLD, &i);
+ +  return i;
+ +#endif
+ +}
+ +
+ +int gmx_node_rank(void)
+ +{
+ +#ifndef GMX_MPI
+ +  return 0;
+ +#else
+ +  int i;
+ +  (void) MPI_Comm_rank(MPI_COMM_WORLD, &i);
+ +  return i;
+ +#endif
+ +}
+ +
+ +void gmx_setup_nodecomm(FILE *fplog,t_commrec *cr)
+ +{
+ +  gmx_nodecomm_t *nc;
+ +  int  n,rank,resultlen,hostnum,i,j,ng,ni;
+ +#ifdef GMX_MPI
+ +  char mpi_hostname[MPI_MAX_PROCESSOR_NAME],num[MPI_MAX_PROCESSOR_NAME];
+ +#endif
+ +
+ +  /* Many MPI implementations do not optimize MPI_Allreduce
+ +   * (and probably also other global communication calls)
+ +   * for multi-core nodes connected by a network.
+ +   * We can optimize such communication by using one MPI call
+ +   * within each node and one between the nodes.
+ +   * For MVAPICH2 and Intel MPI this reduces the time for
+ +   * the global_stat communication by 25%
+ +   * for 2x2-core 3 GHz Woodcrest connected by mixed DDR/SDR Infiniband.
+ +   * B. Hess, November 2007
+ +   */
+ +
+ +  nc = &cr->nc;
+ +
+ +  nc->bUse = FALSE;
+ +#ifndef GMX_THREADS
+ +  if (getenv("GMX_NO_NODECOMM") == NULL) {
+ +#ifdef GMX_MPI
+ +    MPI_Comm_size(cr->mpi_comm_mygroup,&n);
+ +    MPI_Comm_rank(cr->mpi_comm_mygroup,&rank);
+ +    MPI_Get_processor_name(mpi_hostname,&resultlen);
+ +    /* This procedure can only differentiate nodes with host names
+ +     * that end on unique numbers.
+ +     */
+ +    i = 0;
+ +    j = 0;
+ +    /* Only parse the host name up to the first dot */
+ +    while(i < resultlen && mpi_hostname[i] != '.') {
+ +      if (isdigit(mpi_hostname[i])) {
+ +      num[j++] = mpi_hostname[i];
+ +      }
+ +      i++;
+ +    }
+ +    num[j] = '\0';
+ +    if (j == 0) {
+ +      hostnum = 0;
+ +    } else {
+ +      /* Use only the last 9 decimals, so we don't overflow an int */
+ +      hostnum = strtol(num + max(0,j-9), NULL, 10); 
+ +    }
+ +
+ +    if (debug) {
+ +      fprintf(debug,
+ +            "In gmx_setup_nodecomm: splitting communicator of size %d\n",
+ +            n);
+ +      fprintf(debug,"In gmx_setup_nodecomm: hostname '%s', hostnum %d\n",
+ +            mpi_hostname,hostnum);
+ +    }
+ +
+ +    /* The intra-node communicator, split on node number */
+ +    MPI_Comm_split(cr->mpi_comm_mygroup,hostnum,rank,&nc->comm_intra);
+ +    MPI_Comm_rank(nc->comm_intra,&nc->rank_intra);
+ +    if (debug) {
+ +      fprintf(debug,"In gmx_setup_nodecomm: node rank %d rank_intra %d\n",
+ +            rank,nc->rank_intra);
+ +    }
+ +    /* The inter-node communicator, split on rank_intra.
+ +     * We actually only need the one for rank=0,
+ +     * but it is easier to create them all.
+ +     */
+ +    MPI_Comm_split(cr->mpi_comm_mygroup,nc->rank_intra,rank,&nc->comm_inter);
+ +    /* Check if this really created two step communication */
+ +    MPI_Comm_size(nc->comm_inter,&ng);
+ +    MPI_Comm_size(nc->comm_intra,&ni);
+ +    if (debug) {
+ +      fprintf(debug,"In gmx_setup_nodecomm: groups %d, my group size %d\n",
+ +            ng,ni);
+ +    }
+ +    if ((ng > 1 && ng < n) || (ni > 1 && ni < n)) {
+ +      nc->bUse = TRUE;
+ +      if (fplog)
+ +      fprintf(fplog,"Using two step summing over %d groups of on average %.1f processes\n\n",ng,(real)n/(real)ng);
+ +      if (nc->rank_intra > 0)
+ +      MPI_Comm_free(&nc->comm_inter);
+ +    } else {
+ +      /* One group or all processes in a separate group, use normal summing */
+ +      MPI_Comm_free(&nc->comm_inter);
+ +      MPI_Comm_free(&nc->comm_intra);
+ +    }
+ +#endif
+ +  }
+ +#endif
+ +}
+ +
+ +void gmx_barrier(const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_barrier");
+ +#else
+ +  MPI_Barrier(cr->mpi_comm_mygroup);
+ +#endif
+ +}
+ +
+ +void gmx_abort(int noderank,int nnodes,int errorno)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_abort");
+ +#else
+ +#ifdef GMX_THREADS
+ +  fprintf(stderr,"Halting program %s\n",ShortProgram());
+ +  thanx(stderr);
+ +  exit(1);
+ +#else
+ +  if (nnodes > 1)
+ +  {
+ +      fprintf(stderr,"Halting parallel program %s on CPU %d out of %d\n",
+ +              ShortProgram(),noderank,nnodes);
+ +  }
+ +  else
+ +  {
+ +      fprintf(stderr,"Halting program %s\n",ShortProgram());
+ +  }
+ +
+ +  thanx(stderr);
+ +  MPI_Abort(MPI_COMM_WORLD,errorno);
+ +  exit(1);
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_bcast(int nbytes,void *b,const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_bast");
+ +#else
+ +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mygroup);
+ +#endif
+ +}
+ +
+ +void gmx_bcast_sim(int nbytes,void *b,const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_bast");
+ +#else
+ +  MPI_Bcast(b,nbytes,MPI_BYTE,MASTERRANK(cr),cr->mpi_comm_mysim);
+ +#endif
+ +}
+ +
+ +void gmx_sumd(int nr,double r[],const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumd");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ +    if (cr->nc.bUse) {
+ +        if (cr->nc.rank_intra == 0)
+ +        {
+ +            /* Use two step summing. */
+ +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,0,
+ +                       cr->nc.comm_intra);
+ +            /* Sum the roots of the internal (intra) buffers. */
+ +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,
+ +                          cr->nc.comm_inter);
+ +        }
+ +        else
+ +        {
+ +            /* This is here because of the silly MPI specification
+ +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ +            MPI_Reduce(r,NULL,nr,MPI_DOUBLE,MPI_SUM,0,cr->nc.comm_intra);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
+ +    } 
+ +    else 
+ +    {
+ +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM, 
+ +                      cr->mpi_comm_mygroup);
+ +    }
+ +#else
+ +    int i;
+ +
+ +    if (nr > cr->mpb->dbuf_alloc) {
+ +        cr->mpb->dbuf_alloc = nr;
+ +        srenew(cr->mpb->dbuf,cr->mpb->dbuf_alloc);
+ +    }
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,cr->nc.comm_intra);
+ +        if (cr->nc.rank_intra == 0) {
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(cr->mpb->dbuf,r,nr,MPI_DOUBLE,MPI_SUM, 
+ +                          cr->nc.comm_inter);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_DOUBLE,0,cr->nc.comm_intra);
+ +    } else {
+ +        MPI_Allreduce(r,cr->mpb->dbuf,nr,MPI_DOUBLE,MPI_SUM,
+ +                      cr->mpi_comm_mygroup);
+ +        for(i=0; i<nr; i++)
+ +            r[i] = cr->mpb->dbuf[i];
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_sumf(int nr,float r[],const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumf");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing.  */
+ +        if (cr->nc.rank_intra == 0)
+ +        {
+ +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,0,
+ +                       cr->nc.comm_intra);
+ +            /* Sum the roots of the internal (intra) buffers */
+ +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,
+ +                          cr->nc.comm_inter);
+ +        }
+ +        else
+ +        {
+ +            /* This is here because of the silly MPI specification
+ +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ +            MPI_Reduce(r,NULL,nr,MPI_FLOAT,MPI_SUM,0,cr->nc.comm_intra);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
+ +    } 
+ +    else 
+ +    {
+ +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,cr->mpi_comm_mygroup);
+ +    }
+ +#else
+ +    int i;
+ +
+ +    if (nr > cr->mpb->fbuf_alloc) {
+ +        cr->mpb->fbuf_alloc = nr;
+ +        srenew(cr->mpb->fbuf,cr->mpb->fbuf_alloc);
+ +    }
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,cr->nc.comm_intra);
+ +        if (cr->nc.rank_intra == 0) {
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(cr->mpb->fbuf,r,nr,MPI_FLOAT,MPI_SUM, 
+ +                          cr->nc.comm_inter);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_FLOAT,0,cr->nc.comm_intra);
+ +    } else {
+ +        MPI_Allreduce(r,cr->mpb->fbuf,nr,MPI_FLOAT,MPI_SUM,
+ +                      cr->mpi_comm_mygroup);
+ +        for(i=0; i<nr; i++)
+ +            r[i] = cr->mpb->fbuf[i];
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
+ +void gmx_sumi(int nr,int r[],const t_commrec *cr)
+ +{
+ +#ifndef GMX_MPI
+ +    gmx_call("gmx_sumi");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        if (cr->nc.rank_intra == 0) 
+ +        {
+ +            MPI_Reduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
+ +        }
+ +        else
+ +        {
+ +            /* This is here because of the silly MPI specification
+ +                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
+ +            MPI_Reduce(r,NULL,nr,MPI_INT,MPI_SUM,0,cr->nc.comm_intra);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
+ +    } 
+ +    else 
+ +    {
+ +        MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
+ +    }
+ +#else
+ +    int i;
+ +
+ +    if (nr > cr->mpb->ibuf_alloc) {
+ +        cr->mpb->ibuf_alloc = nr;
+ +        srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
+ +    }
+ +    if (cr->nc.bUse) {
+ +        /* Use two step summing */
+ +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->nc.comm_intra);
+ +        if (cr->nc.rank_intra == 0) {
+ +            /* Sum with the buffers reversed */
+ +            MPI_Allreduce(cr->mpb->ibuf,r,nr,MPI_INT,MPI_SUM,cr->nc.comm_inter);
+ +        }
+ +        MPI_Bcast(r,nr,MPI_INT,0,cr->nc.comm_intra);
+ +    } else {
+ +        MPI_Allreduce(r,cr->mpb->ibuf,nr,MPI_INT,MPI_SUM,cr->mpi_comm_mygroup);
+ +        for(i=0; i<nr; i++)
+ +            r[i] = cr->mpb->ibuf[i];
+ +    }
+ +#endif
+ +#endif
+ +}
+ +
++void gmx_sumli(int nr,gmx_large_int_t r[],const t_commrec *cr)
++{
++#ifndef GMX_MPI
++    gmx_call("gmx_sumli");
++#else
++#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
++    if (cr->nc.bUse) {
++        /* Use two step summing */
++        if (cr->nc.rank_intra == 0) 
++        {
++            MPI_Reduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,
++                       cr->nc.comm_intra);
++            /* Sum with the buffers reversed */
++            MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++                          cr->nc.comm_inter);
++        }
++        else
++        {
++            /* This is here because of the silly MPI specification
++                that MPI_IN_PLACE should be put in sendbuf instead of recvbuf */
++            MPI_Reduce(r,NULL,nr,GMX_MPI_LARGE_INT,MPI_SUM,0,cr->nc.comm_intra);
++        }
++        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
++    } 
++    else 
++    {
++        MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,cr->mpi_comm_mygroup);
++    }
++#else
++    int i;
++
++    if (nr > cr->mpb->ibuf_alloc) {
++        cr->mpb->ibuf_alloc = nr;
++        srenew(cr->mpb->ibuf,cr->mpb->ibuf_alloc);
++    }
++    if (cr->nc.bUse) {
++        /* Use two step summing */
++        MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++                      cr->nc.comm_intra);
++        if (cr->nc.rank_intra == 0) {
++            /* Sum with the buffers reversed */
++            MPI_Allreduce(cr->mpb->ibuf,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++                          cr->nc.comm_inter);
++        }
++        MPI_Bcast(r,nr,GMX_MPI_LARGE_INT,0,cr->nc.comm_intra);
++    } else {
++        MPI_Allreduce(r,cr->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++                      cr->mpi_comm_mygroup);
++        for(i=0; i<nr; i++)
++            r[i] = cr->mpb->ibuf[i];
++    }
++#endif
++#endif
++}
++
++
++
+ +#ifdef GMX_MPI
+ +void gmx_sumd_comm(int nr,double r[],MPI_Comm mpi_comm)
+ +{
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
+ +#else
+ +    /* this function is only used in code that is not performance critical,
+ +       (during setup, when comm_rec is not the appropriate communication  
+ +       structure), so this isn't as bad as it looks. */
+ +    double *buf;
+ +    int i;
+ +
+ +    snew(buf, nr);
+ +    MPI_Allreduce(r,buf,nr,MPI_DOUBLE,MPI_SUM,mpi_comm);
+ +    for(i=0; i<nr; i++)
+ +        r[i] = buf[i];
+ +    sfree(buf);
+ +#endif
+ +}
+ +#endif
+ +
+ +#ifdef GMX_MPI
+ +void gmx_sumf_comm(int nr,float r[],MPI_Comm mpi_comm)
+ +{
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
+ +#else
+ +    /* this function is only used in code that is not performance critical,
+ +       (during setup, when comm_rec is not the appropriate communication  
+ +       structure), so this isn't as bad as it looks. */
+ +    float *buf;
+ +    int i;
+ +
+ +    snew(buf, nr);
+ +    MPI_Allreduce(r,buf,nr,MPI_FLOAT,MPI_SUM,mpi_comm);
+ +    for(i=0; i<nr; i++)
+ +        r[i] = buf[i];
+ +    sfree(buf);
+ +#endif
+ +}
+ +#endif
+ +
+ +void gmx_sumd_sim(int nr,double r[],const gmx_multisim_t *ms)
+ +{
+ +#ifndef GMX_MPI
-   gmx_call("gmx_sumf");
++  gmx_call("gmx_sumd_sim");
+ +#else
+ +  gmx_sumd_comm(nr,r,ms->mpi_comm_masters);
+ +#endif
+ +}
+ +
+ +void gmx_sumf_sim(int nr,float r[],const gmx_multisim_t *ms)
+ +{
+ +#ifndef GMX_MPI
-     gmx_call("gmx_sumd");
++  gmx_call("gmx_sumf_sim");
+ +#else
+ +  gmx_sumf_comm(nr,r,ms->mpi_comm_masters);
+ +#endif
+ +}
+ +
+ +void gmx_sumi_sim(int nr,int r[], const gmx_multisim_t *ms)
+ +{
+ +#ifndef GMX_MPI
++    gmx_call("gmx_sumi_sim");
+ +#else
+ +#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
+ +    MPI_Allreduce(MPI_IN_PLACE,r,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
+ +#else
+ +    /* this is thread-unsafe, but it will do for now: */
+ +    int i;
+ +
+ +    if (nr > ms->mpb->ibuf_alloc) {
+ +        ms->mpb->ibuf_alloc = nr;
+ +        srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
+ +    }
+ +    MPI_Allreduce(r,ms->mpb->ibuf,nr,MPI_INT,MPI_SUM,ms->mpi_comm_masters);
+ +    for(i=0; i<nr; i++)
+ +        r[i] = ms->mpb->ibuf[i];
+ +#endif
+ +#endif
+ +}
+ +
++void gmx_sumli_sim(int nr,gmx_large_int_t r[], const gmx_multisim_t *ms)
++{
++#ifndef GMX_MPI
++    gmx_call("gmx_sumli_sim");
++#else
++#if defined(MPI_IN_PLACE_EXISTS) || defined(GMX_THREADS)
++    MPI_Allreduce(MPI_IN_PLACE,r,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++                  ms->mpi_comm_masters);
++#else
++    /* this is thread-unsafe, but it will do for now: */
++    int i;
++
++    if (nr > ms->mpb->ibuf_alloc) {
++        ms->mpb->ibuf_alloc = nr;
++        srenew(ms->mpb->ibuf,ms->mpb->ibuf_alloc);
++    }
++    MPI_Allreduce(r,ms->mpb->ibuf,nr,GMX_MPI_LARGE_INT,MPI_SUM,
++                  ms->mpi_comm_masters);
++    for(i=0; i<nr; i++)
++        r[i] = ms->mpb->ibuf[i];
++#endif
++#endif
++}
++
++
+ +void gmx_finalize(void)
+ +{
+ +#ifndef GMX_MPI
+ +  gmx_call("gmx_finalize");
+ +#else
+ +  int ret;
+ +
+ +  /* just as a check; we don't want to finalize twice */
+ +  int finalized;
+ +  MPI_Finalized(&finalized);
+ +  if (finalized)
+ +      return;
+ +
+ +  /* We sync the processes here to try to avoid problems
+ +   * with buggy MPI implementations that could cause
+ +   * unfinished processes to terminate.
+ +   */
+ +  MPI_Barrier(MPI_COMM_WORLD);
+ +
+ +  /*
+ +  if (DOMAINDECOMP(cr)) {
+ +    if (cr->npmenodes > 0 || cr->dd->bCartesian) 
+ +      MPI_Comm_free(&cr->mpi_comm_mygroup);
+ +    if (cr->dd->bCartesian)
+ +      MPI_Comm_free(&cr->mpi_comm_mysim);
+ +  }
+ +  */
+ +
+ +  /* Apparently certain mpich implementations cause problems
+ +   * with MPI_Finalize. In that case comment out MPI_Finalize.
+ +   */
+ +  if (debug)
+ +    fprintf(debug,"Will call MPI_Finalize now\n");
+ +
+ +  ret = MPI_Finalize();
+ +  if (debug)
+ +    fprintf(debug,"Return code from MPI_Finalize = %d\n",ret);
+ +#endif
+ +}
+ +
diff --cc src/gromacs/gmxlib/pdbio.c
Simple merge
diff --cc src/gromacs/gmxlib/rmpbc.c
Simple merge
diff --cc src/gromacs/gmxlib/strdb.c
Simple merge
diff --cc src/gromacs/gmxlib/string2.c

index 1e91cb89a4735dfdeb8fbbe417b488a62f6896f7,0000000000000000000000000000000000000000..9f064944b9174a3a03e56a68a189487755bee1c5

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/string2.c
--- /dev/null
+++ b/src/gromacs/gmxlib/string2.c
@@@ -1,602 -1,0 +1,605 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +/* This file is completely threadsafe - keep it that way! */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#ifdef GMX_CRAY_XT3
+ +#undef HAVE_PWD_H
+ +#endif
+ +
+ +#include <stdio.h>
+ +#include <ctype.h>
+ +#include <stdlib.h>
+ +#include <errno.h>
+ +#include <sys/types.h>
+ +#include <time.h>
+ +
+ +#ifdef HAVE_SYS_TIME_H
+ +#include <sys/time.h>
+ +#endif
+ +
+ +
+ +#ifdef HAVE_PWD_H
+ +#include <pwd.h>
+ +#endif
+ +#include <time.h>
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "gmx_fatal.h"
+ +#include "macros.h"
+ +#include "string2.h"
+ +#include "futil.h"
+ +
+ +int continuing(char *s)
+ +/* strip trailing spaces and if s ends with a CONTINUE remove that too.
+ + * returns TRUE if s ends with a CONTINUE, FALSE otherwise.
+ + */
+ +{
+ +  int sl;
+ +
+ +  rtrim(s);
+ +  sl = strlen(s);
+ +  if ((sl > 0) && (s[sl-1] == CONTINUE)) {
+ +    s[sl-1] = 0;
+ +    return TRUE;
+ +  }
+ +  else
+ +    return FALSE;
+ +}
+ +
+ +
+ +
+ +char *fgets2(char *line, int n, FILE *stream)
+ +/* This routine reads a string from stream of max length n
+ + * and zero terminated, without newlines
+ + * line should be long enough (>= n)
+ + */
+ +{
+ +  char *c;
+ +  if (fgets(line,n,stream) == NULL) {
+ +    return NULL;
+ +  }
+ +  if ((c=strchr(line,'\n')) != NULL) {
+ +    *c = '\0';
+ +  } else {
+ +    /* A line not ending in a newline can only occur at the end of a file,
+ +     * or because of n being too small.
+ +     * Since both cases occur very infrequently, we can check for EOF.
+ +     */
+ +    if (!gmx_eof(stream)) {
+ +      gmx_fatal(FARGS,"An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'",n,n,line);
+ +    }
+ +  }
+ +  if ((c=strchr(line,'\r')) != NULL) {
+ +    *c = '\0';
+ +  }
+ +
+ +  return line;
+ +}
+ +
+ +void strip_comment (char *line)
+ +{
+ +  char *c;
+ +
+ +  if (!line)
+ +    return;
+ +
+ +  /* search for a comment mark and replace it by a zero */
+ +  if ((c = strchr(line,COMMENTSIGN)) != NULL) 
+ +    (*c) = 0;
+ +}
+ +
+ +void upstring (char *str)
+ +{
+ +  int i;
+ +
+ +  for (i=0; (i < (int)strlen(str)); i++) 
+ +    str[i] = toupper(str[i]);
+ +}
+ +
+ +void ltrim (char *str)
+ +{
+ +  char *tr;
+ +  int i,c;
+ +
+ +  if (NULL == str)
+ +    return;
+ +
+ +  c = 0;
+ +  while (('\0' != str[c]) && isspace(str[c]))
+ +    c++;
+ +  if (c > 0) 
+ +    {
+ +      for(i=c; ('\0' != str[i]); i++)
+ +      str[i-c] = str[i];
+ +      str[i-c] = '\0';
+ +    }
+ +}
+ +
+ +void rtrim (char *str)
+ +{
+ +  int nul;
+ +
+ +  if (NULL == str)
+ +    return;
+ +
+ +  nul = strlen(str)-1;
+ +  while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) ) {
+ +    str[nul] = '\0';
+ +    nul--;
+ +  }
+ +}
+ +
+ +void trim (char *str)
+ +{
+ +  ltrim (str);
+ +  rtrim (str);
+ +}
+ +
+ +char *
+ +gmx_ctime_r(const time_t *clock,char *buf, int n)
+ +{
+ +    char tmpbuf[STRLEN];
+ +  
+ +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
+ +    /* Windows */
+ +    ctime_s( tmpbuf, STRLEN, clock );
++#elif (defined(__sun))
++    /*Solaris*/
++    ctime_r(clock, tmpbuf, n);
+ +#else
+ +    ctime_r(clock,tmpbuf);
+ +#endif
+ +    strncpy(buf,tmpbuf,n-1);
+ +    buf[n-1]='\0';
+ +    
+ +    return buf;
+ +}
+ +          
+ +void nice_header (FILE *out,const char *fn)
+ +{
+ +  const char *unk = "onbekend";
+ +  time_t clock;
+ +  char   *user=NULL;
+ +  int    gh;
+ +  uid_t  uid;
+ +  char   buf[256];
+ +  char   timebuf[STRLEN];
+ +#ifdef HAVE_PWD_H
+ +  struct passwd *pw;
+ +#endif
+ +
+ +  /* Print a nice header above the file */
+ +  time(&clock);
+ +  fprintf (out,"%c\n",COMMENTSIGN);
+ +  fprintf (out,"%c\tFile '%s' was generated\n",COMMENTSIGN,fn ? fn : unk);
+ +  
+ +#ifdef HAVE_PWD_H
+ +  uid = getuid();
+ +  pw  = getpwuid(uid);
+ +  gh  = gethostname(buf,255);
+ +  user= pw->pw_name;
+ +#else
+ +  uid = 0;
+ +  gh  = -1;
+ +#endif
+ +  
+ +  gmx_ctime_r(&clock,timebuf,STRLEN);
+ +  fprintf (out,"%c\tBy user: %s (%d)\n",COMMENTSIGN,
+ +         user ? user : unk,(int) uid);
+ +  fprintf(out,"%c\tOn host: %s\n",COMMENTSIGN,(gh == 0) ? buf : unk);
+ +
+ +  fprintf (out,"%c\tAt date: %s",COMMENTSIGN,timebuf);
+ +  fprintf (out,"%c\n",COMMENTSIGN);
+ +}
+ +
+ +int gmx_strcasecmp_min(const char *str1, const char *str2)
+ +{
+ +  char ch1,ch2;
+ +  
+ +  do
+ +    {
+ +      do
+ +      ch1=toupper(*(str1++));
+ +      while ((ch1=='-') || (ch1=='_'));
+ +      do 
+ +      ch2=toupper(*(str2++));
+ +      while ((ch2=='-') || (ch2=='_'));
+ +      if (ch1!=ch2) return (ch1-ch2);
+ +    }
+ +  while (ch1);
+ +  return 0; 
+ +}
+ +
+ +int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
+ +{
+ +  char ch1,ch2;
+ +  char *stri1, *stri2;
+ +
+ +  stri1=(char *)str1;
+ +  stri2=(char *)str2;  
+ +  do
+ +    {
+ +      do
+ +      ch1=toupper(*(str1++));
+ +      while ((ch1=='-') || (ch1=='_'));
+ +      do 
+ +      ch2=toupper(*(str2++));
+ +      while ((ch2=='-') || (ch2=='_'));
+ +      if (ch1!=ch2) return (ch1-ch2);
+ +    }
+ +  while (ch1 && (str1-stri1<n) && (str2-stri2<n));
+ +  return 0; 
+ +}
+ +
+ +int gmx_strcasecmp(const char *str1, const char *str2)
+ +{
+ +  char ch1,ch2;
+ +  
+ +  do
+ +    {
+ +      ch1=toupper(*(str1++));
+ +      ch2=toupper(*(str2++));
+ +      if (ch1!=ch2) return (ch1-ch2);
+ +    }
+ +  while (ch1);
+ +  return 0; 
+ +}
+ +
+ +int gmx_strncasecmp(const char *str1, const char *str2, int n)
+ +{
+ +  char ch1,ch2;
+ + 
+ +  if(n==0) 
+ +    return 0;
+ +
+ +  do
+ +    {
+ +      ch1=toupper(*(str1++));
+ +      ch2=toupper(*(str2++));
+ +      if (ch1!=ch2) return (ch1-ch2);
+ +      n--;
+ +    }
+ +  while (ch1 && n);
+ +  return 0; 
+ +}
+ +
+ +char *gmx_strdup(const char *src)
+ +{
+ +  char *dest;
+ +
+ +  snew(dest,strlen(src)+1);
+ +  strcpy(dest,src);
+ +  
+ +  return dest;
+ +}
+ +
+ +char *
+ +gmx_strndup(const char *src, int n)
+ +{
+ +    int   len;
+ +    char *dest;
+ +
+ +    len = strlen(src);
+ +    if (len > n) 
+ +    {
+ +        len = n;
+ +    }
+ +    snew(dest, len+1);
+ +    strncpy(dest, src, len);
+ +    dest[len] = 0;
+ +    return dest;
+ +}
+ +
+ +/*!
+ + * \param[in] pattern  Pattern to match against.
+ + * \param[in] str      String to match.
+ + * \returns   0 on match, GMX_NO_WCMATCH if there is no match.
+ + *
+ + * Matches \p str against \p pattern, which may contain * and ? wildcards.
+ + * All other characters are matched literally.
+ + * Currently, it is not possible to match literal * or ?.
+ + */
+ +int
+ +gmx_wcmatch(const char *pattern, const char *str)
+ +{
+ +    while (*pattern)
+ +    {
+ +        if (*pattern == '*')
+ +        {
+ +            /* Skip multiple wildcards in a sequence */
+ +            while (*pattern == '*' || *pattern == '?')
+ +            {
+ +                ++pattern;
+ +                /* For ?, we need to check that there are characters left
+ +                 * in str. */
+ +                if (*pattern == '?')
+ +                {
+ +                    if (*str == 0)
+ +                    {
+ +                        return GMX_NO_WCMATCH;
+ +                    }
+ +                    else
+ +                    {
+ +                        ++str;
+ +                    }
+ +                }
+ +            }
+ +            /* If the pattern ends after the star, we have a match */
+ +            if (*pattern == 0)
+ +            {
+ +                return 0;
+ +            }
+ +            /* Match the rest against each possible suffix of str */
+ +            while (*str)
+ +            {
+ +                /* Only do the recursive call if the first character
+ +                 * matches. We don't have to worry about wildcards here,
+ +                 * since we have processed them above. */
+ +                if (*pattern == *str)
+ +                {
+ +                    int rc;
+ +                    /* Match the suffix, and return if a match or an error */
+ +                    rc = gmx_wcmatch(pattern, str);
+ +                    if (rc != GMX_NO_WCMATCH)
+ +                    {
+ +                        return rc;
+ +                    }
+ +                }
+ +                ++str;
+ +            }
+ +            /* If no suffix of str matches, we don't have a match */
+ +            return GMX_NO_WCMATCH;
+ +        }
+ +        else if ((*pattern == '?' && *str != 0) || *pattern == *str)
+ +        {
+ +            ++str;
+ +        }
+ +        else
+ +        {
+ +            return GMX_NO_WCMATCH;
+ +        }
+ +        ++pattern;
+ +    }
+ +    /* When the pattern runs out, we have a match if the string has ended. */
+ +    return (*str == 0) ? 0 : GMX_NO_WCMATCH;
+ +}
+ +
+ +char *wrap_lines(const char *buf,int line_width, int indent,gmx_bool bIndentFirst)
+ +{
+ +  char *b2;
+ +  int i,i0,i2,j,b2len,lspace=0,l2space=0;
+ +  gmx_bool bFirst,bFitsOnLine;
+ +
+ +  /* characters are copied from buf to b2 with possible spaces changed
+ +   * into newlines and extra space added for indentation.
+ +   * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
+ +   * i0 points to the beginning of the current line (in buf, source)
+ +   * lspace and l2space point to the last space on the current line
+ +   * bFirst is set to prevent indentation of first line
+ +   * bFitsOnLine says if the first space occurred before line_width, if 
+ +   * that is not the case, we have a word longer than line_width which 
+ +   * will also not fit on the next line, so we might as well keep it on 
+ +   * the current line (where it also won't fit, but looks better)
+ +   */
+ +  
+ +  b2=NULL;
+ +  b2len=strlen(buf)+1+indent;
+ +  snew(b2,b2len);
+ +  i0=i2=0;
+ +  if (bIndentFirst)
+ +    for(i2=0; (i2<indent); i2++)
+ +      b2[i2] = ' ';
+ +  bFirst=TRUE;
+ +  do {
+ +    l2space = -1;
+ +    /* find the last space before end of line */
+ +    for(i=i0; ((i-i0 < line_width) || (l2space==-1)) && (buf[i]); i++) {
+ +      b2[i2++] = buf[i];
+ +      /* remember the position of a space */
+ +      if (buf[i] == ' ') {
+ +        lspace = i;
+ +      l2space = i2-1;
+ +      }
+ +      /* if we have a newline before the line is full, reset counters */
+ +      if (buf[i]=='\n' && buf[i+1]) { 
+ +      i0=i+1;
+ +      b2len+=indent;
+ +      srenew(b2, b2len);
+ +      /* add indentation after the newline */
+ +      for(j=0; (j<indent); j++)
+ +        b2[i2++]=' ';
+ +      }
+ +    }
+ +    /* If we are at the last newline, copy it */
+ +    if (buf[i]=='\n' && !buf[i+1]) {
+ +      b2[i2++] = buf[i++];
+ +    }
+ +    /* if we're not at the end of the string */
+ +    if (buf[i]) {
+ +      /* check if one word does not fit on the line */
+ +      bFitsOnLine = (i-i0 <= line_width);
+ +      /* reset line counters to just after the space */
+ +      i0 = lspace+1;
+ +      i2 = l2space+1;
+ +      /* if the words fit on the line, and we're beyond the indentation part */
+ +      if ( (bFitsOnLine) && (l2space >= indent) ) {
+ +      /* start a new line */
+ +      b2[l2space] = '\n';
+ +      /* and add indentation */
+ +      if (indent) {
+ +        if (bFirst) {
+ +          line_width-=indent;
+ +          bFirst=FALSE;
+ +        }
+ +        b2len+=indent;
+ +        srenew(b2, b2len);
+ +        for(j=0; (j<indent); j++)
+ +          b2[i2++]=' ';
+ +        /* no extra spaces after indent; */
+ +        while(buf[i0]==' ')
+ +          i0++;
+ +      }
+ +      }
+ +    }
+ +  } while (buf[i]);
+ +  b2[i2] = '\0';
+ +  
+ +  return b2;
+ +}
+ +
+ +char **split(char sep,const char *str)
+ +{
+ +  char **ptr = NULL;
+ +  int  n,nn,nptr = 0;
+ +  
+ +  if (str == NULL)
+ +    return NULL;
+ +  nn = strlen(str);
+ +  for(n=0; (n<nn); n++)
+ +    if (str[n] == sep)
+ +      nptr++;
+ +  snew(ptr,nptr+2);
+ +  nptr = 0;
+ +  while (*str != '\0') {
+ +    while ((*str != '\0') && (*str == sep))
+ +      str++;
+ +    if (*str != '\0') {
+ +      snew(ptr[nptr],1+strlen(str));
+ +      n = 0;
+ +      while ((*str != '\0') && (*str != sep)) {
+ +      ptr[nptr][n] = *str;
+ +      str++;
+ +      n++;
+ +      }
+ +      ptr[nptr][n] = '\0';
+ +      nptr++;
+ +    }
+ +  }
+ +  ptr[nptr] = NULL;
+ +  
+ +  return ptr;
+ +}
+ +
+ +
+ +gmx_large_int_t
+ +str_to_large_int_t(const char *str, char **endptr)
+ +{
+ +      int         sign = 1;
+ +      gmx_large_int_t  val  = 0;
+ +      char        ch;
+ +      const char  *p;
+ +      
+ +      p = str;
+ +      if(p==NULL)
+ +      {
+ +              *endptr=NULL;
+ +              return 0;
+ +      }
+ +      
+ +      /* Strip off initial white space */
+ +      while(isspace(*p))
+ +      {
+ +              p++;
+ +      }
+ +      /* Conform to ISO C99 - return original pointer if string does not contain a number */
+ +      if(*str=='\0')
+ +      {
+ +              *endptr=(char *)str;
+ +      }
+ +      
+ +      if(*p=='-')
+ +      {
+ +              p++;
+ +              sign *= -1;
+ +      }
+ +      
+ +      while( ((ch=*p) != '\0') && isdigit(ch) )
+ +      {
+ +              /* Important to add sign here, so we dont overflow in final multiplication */
+ +              ch = (ch-'0')*sign; 
+ +              val = val*10 + ch;
+ +              if(ch != val%10) 
+ +              {
+ +                      /* Some sort of overflow has occured, set endptr to original string */
+ +                      *endptr=(char *)str;
+ +                      errno = ERANGE;
+ +                      return(0);
+ +              }
+ +              p++;
+ +      }
+ +      
+ +      *endptr=(char *)p;
+ +      
+ +      return val;
+ +}
+ +
+ +char *gmx_strsep(char **stringp, const char *delim)
+ +{
+ +    char *ret;
+ +    int len=strlen(delim);
+ +    int i,j=0;
+ +    int found=0;
+ +
+ +    if (! *stringp)
+ +        return NULL;
+ +    ret=*stringp;
+ +    do
+ +    {
+ +        if ( (*stringp)[j] == '\0')
+ +        {
+ +            found=1;
+ +            *stringp=NULL;
+ +            break;
+ +        }
+ +        for (i=0;i<len;i++)
+ +        {
+ +            if ( (*stringp)[j]==delim[i])
+ +            {
+ +                (*stringp)[j]='\0';
+ +                *stringp=*stringp+j+1;
+ +                found=1;
+ +                break;
+ +            }
+ +        }
+ +        j++;
+ +    } while (!found);
+ +
+ +    return ret;
+ +}
+ +
diff --cc src/gromacs/gmxlib/thread_mpi/CMakeLists.txt
Simple merge
diff --cc src/gromacs/gmxlib/thread_mpi/impl.h
Simple merge
diff --cc src/gromacs/gmxlib/thread_mpi/numa_malloc.c

index 0000000000000000000000000000000000000000,bcfd8790147dadab0b265bef0f77cf20cd6a6f1b..bcfd8790147dadab0b265bef0f77cf20cd6a6f1b

mode 000000,100644..100644
--- /dev/null
--- 2/src/gmxlib/thread_mpi/numa_malloc.c
+++ b/src/gromacs/gmxlib/thread_mpi/numa_malloc.c
diff --cc src/gromacs/gmxlib/thread_mpi/p2p_buffer.c
Simple merge
diff --cc src/gromacs/gmxlib/thread_mpi/pthreads.c
Simple merge
diff --cc src/gromacs/gmxlib/thread_mpi/settings.h
Simple merge
diff --cc src/gromacs/gmxlib/thread_mpi/tmpi_init.c
Simple merge
diff --cc src/gromacs/gmxlib/thread_mpi/winthreads.c
Simple merge
diff --cc src/gromacs/gmxlib/typedefs.c
Simple merge
diff --cc src/gromacs/gmxlib/wman.c
Simple merge
diff --cc src/gromacs/gmxpreprocess/gen_vsite.c
Simple merge
diff --cc src/gromacs/gmxpreprocess/genhydro.c
Simple merge
diff --cc src/gromacs/gmxpreprocess/genhydro.h
Simple merge
diff --cc src/gromacs/gmxpreprocess/pdb2top.c
Simple merge
diff --cc src/gromacs/legacyheaders/futil.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_blas.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_cyclecounter.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmx_lapack.h
Simple merge
diff --cc src/gromacs/legacyheaders/gmxcomplex.h
Simple merge
diff --cc src/gromacs/legacyheaders/gstat.h
Simple merge
diff --cc src/gromacs/legacyheaders/main.h
Simple merge
diff --cc src/gromacs/legacyheaders/mdrun.h

index db0f7d7c05922c8a8d25b0e88896230bd3fae2a4,0000000000000000000000000000000000000000..e5bd8e77df50dba75554076880f9c40731e3cc5f

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/mdrun.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/mdrun.h
@@@ -1,372 -1,0 +1,434 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _mdrun_h
+ +#define _mdrun_h
+ +
+ +#include <stdio.h>
+ +#include <time.h>
+ +#include "typedefs.h"
+ +#include "network.h"
+ +#include "tgroup.h"
+ +#include "filenm.h"
+ +#include "mshift.h"
+ +#include "force.h"
+ +#include "edsam.h"
+ +#include "mdebin.h"
+ +#include "vcm.h"
+ +#include "vsite.h"
+ +#include "pull.h"
+ +#include "update.h"
+ +#include "membed.h"
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +#define MD_POLARISE       (1<<2)
+ +#define MD_IONIZE         (1<<3)
+ +#define MD_RERUN          (1<<4)
+ +#define MD_RERUN_VSITE    (1<<5)
+ +#define MD_FFSCAN         (1<<6)
+ +#define MD_SEPPOT         (1<<7)
+ +#define MD_PARTDEC        (1<<9)
+ +#define MD_DDBONDCHECK    (1<<10)
+ +#define MD_DDBONDCOMM     (1<<11)
+ +#define MD_CONFOUT        (1<<12)
+ +#define MD_REPRODUCIBLE   (1<<13)
+ +#define MD_READ_RNG       (1<<14)
+ +#define MD_APPENDFILES    (1<<15)
+ +#define MD_KEEPANDNUMCPT  (1<<16)
+ +#define MD_READ_EKIN      (1<<17)
+ +#define MD_STARTFROMCPT   (1<<18)
+ +#define MD_RESETCOUNTERSHALFWAY (1<<19)
+ +
+ +/* Define a number of flags to better control the information
+ + * passed to compute_globals in md.c and global_stat.
+ + */
+ +
+ +/* We are rerunning the simulation */
+ +#define CGLO_RERUNMD        (1<<1)
+ +/* we are computing the kinetic energy from average velocities */
+ +#define CGLO_EKINAVEVEL     (1<<2)
+ +/* we are removing the center of mass momenta */
+ +#define CGLO_STOPCM         (1<<3)
+ +/* bGStat is defined in do_md */
+ +#define CGLO_GSTAT          (1<<4)
+ +/* Sum the energy terms in global computation */
+ +#define CGLO_ENERGY         (1<<6)
+ +/* Sum the kinetic energy terms in global computation */
+ +#define CGLO_TEMPERATURE    (1<<7)
+ +/* Sum the kinetic energy terms in global computation */
+ +#define CGLO_PRESSURE       (1<<8)
+ +/* Sum the constraint term in global computation */
+ +#define CGLO_CONSTRAINT     (1<<9)
+ +/* we are using an integrator that requires iteration over some steps - currently not used*/
+ +#define CGLO_ITERATE        (1<<10)
+ +/* it is the first time we are iterating (or, only once through is required */
+ +#define CGLO_FIRSTITERATE   (1<<11)
+ +/* Reading ekin from the trajectory */
+ +#define CGLO_READEKIN       (1<<12)
+ +/* we need to reset the ekin rescaling factor here */
+ +#define CGLO_SCALEEKIN      (1<<13)
+ +  
+ +enum {
+ +  ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
+ +};
+ +
+ +typedef struct {
+ +  double real;
+ +#ifdef GMX_CRAY_XT3
+ +  double proc;
+ +#else
+ +  clock_t proc;
+ +#endif
+ +  double realtime;
+ +  double proctime;
+ +  double time_per_step;
+ +  double last;
+ +  gmx_large_int_t nsteps_done;
+ +} gmx_runtime_t;
+ +
+ +typedef struct {
+ +  t_fileio *fp_trn;
+ +  t_fileio *fp_xtc;
+ +  int  xtc_prec;
+ +  ener_file_t fp_ene;
+ +  const char *fn_cpt;
+ +  gmx_bool bKeepAndNumCPT;
+ +  int  eIntegrator;
+ +  int  simulation_part;
+ +  FILE *fp_dhdl;
+ +  FILE *fp_field;
+ +} gmx_mdoutf_t;
+ +
+ +/* Variables for temporary use with the deform option,
+ + * used in runner.c and md.c.
+ + * (These variables should be stored in the tpx file.)
+ + */
+ +extern gmx_large_int_t     deform_init_init_step_tpx;
+ +extern matrix              deform_init_box_tpx;
+ +#ifdef GMX_THREADS
+ +extern tMPI_Thread_mutex_t deform_init_box_mutex;
+ +
+ +/* The minimum number of atoms per thread. With fewer atoms than this,
+ + * the number of threads will get lowered.
+ + */
+ +#define MIN_ATOMS_PER_THREAD    90
+ +#endif
+ +
+ +
+ +typedef double gmx_integrator_t(FILE *log,t_commrec *cr,
+ +                              int nfile,const t_filenm fnm[],
+ +                              const output_env_t oenv, gmx_bool bVerbose,
+ +                                gmx_bool bCompact, int nstglobalcomm,
+ +                              gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                              int stepout,
+ +                              t_inputrec *inputrec,
+ +                              gmx_mtop_t *mtop,t_fcdata *fcd,
+ +                              t_state *state,
+ +                              t_mdatoms *mdatoms,
+ +                              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                              gmx_edsam_t ed, 
+ +                              t_forcerec *fr,
+ +                              int repl_ex_nst,int repl_ex_seed,
+ +                                gmx_membed_t *membed,
+ +                              real cpt_period,real max_hours,
+ +                              const char *deviceOptions,
+ +                              unsigned long Flags,
+ +                              gmx_runtime_t *runtime);
+ +
+ +typedef struct gmx_global_stat *gmx_global_stat_t;
+ +
+ +/* ROUTINES from md.c */
+ +
+ +gmx_integrator_t do_md;
+ +
+ +gmx_integrator_t do_md_openmm;
+ +
++
++
+ +/* ROUTINES from minimize.c */
+ +
+ +gmx_integrator_t do_steep;
+ +/* Do steepest descents EM */
+ +
+ +gmx_integrator_t do_cg;
+ +/* Do conjugate gradient EM */
+ +
+ +gmx_integrator_t do_lbfgs;
+ +/* Do conjugate gradient L-BFGS */
+ +
+ +gmx_integrator_t do_nm;
+ +/* Do normal mode analysis */
+ +
+ +/* ROUTINES from tpi.c */
+ +
+ +gmx_integrator_t do_tpi;
+ +/* Do test particle insertion */
+ +
+ +
++/* ROUTINES from md_support.c */
++
++/* return the number of steps between global communcations */
++int check_nstglobalcomm(FILE *fplog,t_commrec *cr,
++                        int nstglobalcomm,t_inputrec *ir);
++
++/* check whether an 'nst'-style parameter p is a multiple of nst, and
++   set it to be one if not, with a warning. */
++void check_nst_param(FILE *fplog,t_commrec *cr,
++                     const char *desc_nst,int nst,
++                     const char *desc_p,int *p);
++
++/* check which of the multisim simulations has the shortest number of
++   steps and return that number of nsteps */
++gmx_large_int_t get_multisim_nsteps(const t_commrec *cr,
++                                    gmx_large_int_t nsteps);
++
++void rerun_parallel_comm(t_commrec *cr,t_trxframe *fr,
++                         gmx_bool *bNotLastFrame);
++
++/* get the conserved energy associated with the ensemble type*/
++real compute_conserved_from_auxiliary(t_inputrec *ir, t_state *state,           
++                                      t_extmass *MassQ);
++
++/* reset all cycle and time counters. */
++void reset_all_counters(FILE *fplog,t_commrec *cr,
++                        gmx_large_int_t step,
++                        gmx_large_int_t *step_rel,t_inputrec *ir,
++                        gmx_wallcycle_t wcycle,t_nrnb *nrnb,
++                        gmx_runtime_t *runtime);
++
++
++
+ +/* ROUTINES from sim_util.c */
+ +void do_pbc_first(FILE *log,matrix box,t_forcerec *fr,
+ +                       t_graph *graph,rvec x[]);
+ +
+ +void do_pbc_first_mtop(FILE *fplog,int ePBC,matrix box,
+ +                            gmx_mtop_t *mtop,rvec x[]);
+ +
+ +void do_pbc_mtop(FILE *fplog,int ePBC,matrix box,
+ +                      gmx_mtop_t *mtop,rvec x[]);
+ +
++
+ +                   
+ +/* ROUTINES from stat.c */
+ +gmx_global_stat_t global_stat_init(t_inputrec *ir);
+ +
+ +void global_stat_destroy(gmx_global_stat_t gs);
+ +
+ +void global_stat(FILE *log,gmx_global_stat_t gs,
+ +                      t_commrec *cr,gmx_enerdata_t *enerd,
+ +                      tensor fvir,tensor svir,rvec mu_tot,
+ +                      t_inputrec *inputrec,
+ +                      gmx_ekindata_t *ekind,
+ +                      gmx_constr_t constr,t_vcm *vcm,
+ +                      int nsig,real *sig,
+ +                      gmx_mtop_t *top_global, t_state *state_local, 
+ +                      gmx_bool bSumEkinhOld, int flags);
+ +/* Communicate statistics over cr->mpi_comm_mysim */
+ +
+ +gmx_mdoutf_t *init_mdoutf(int nfile,const t_filenm fnm[],
+ +                               int mdrun_flags,
+ +                               const t_commrec *cr,const t_inputrec *ir,
+ +                               const output_env_t oenv);
+ +/* Returns a pointer to a data structure with all output file pointers
+ + * and names required by mdrun.
+ + */
+ +
+ +void done_mdoutf(gmx_mdoutf_t *of);
+ +/* Close all open output files and free the of pointer */
+ +
+ +#define MDOF_X   (1<<0)
+ +#define MDOF_V   (1<<1)
+ +#define MDOF_F   (1<<2)
+ +#define MDOF_XTC (1<<3)
+ +#define MDOF_CPT (1<<4)
+ +
+ +void write_traj(FILE *fplog,t_commrec *cr,
+ +                     gmx_mdoutf_t *of,
+ +                     int mdof_flags,
+ +                     gmx_mtop_t *top_global,
+ +                     gmx_large_int_t step,double t,
+ +                     t_state *state_local,t_state *state_global,
+ +                     rvec *f_local,rvec *f_global,
+ +                     int *n_xtc,rvec **x_xtc);
+ +/* Routine that writes frames to trn, xtc and/or checkpoint.
+ + * What is written is determined by the mdof_flags defined above.
+ + * Data is collected to the master node only when necessary.
+ + */
+ +
+ +int do_per_step(gmx_large_int_t step,gmx_large_int_t nstep);
+ +/* Return TRUE if io should be done */
+ +
+ +int do_any_io(int step, t_inputrec *ir);
+ +
+ +/* ROUTINES from sim_util.c */
+ +
+ +double gmx_gettime();
+ +
+ +void print_time(FILE *out, gmx_runtime_t *runtime,
+ +                       gmx_large_int_t step,t_inputrec *ir, t_commrec *cr);
+ +
+ +void runtime_start(gmx_runtime_t *runtime);
+ +
+ +void runtime_end(gmx_runtime_t *runtime);
+ +
+ +void runtime_upd_proc(gmx_runtime_t *runtime);
+ +/* The processor time should be updated every once in a while,
+ + * since on 32-bit manchines it loops after 72 minutes.
+ + */
+ +  
+ +void print_date_and_time(FILE *log,int pid,const char *title,
+ +                              const gmx_runtime_t *runtime);
+ +  
+ +void nstop_cm(FILE *log,t_commrec *cr,
+ +                   int start,int nr_atoms,real mass[],rvec x[],rvec v[]);
+ +
+ +void finish_run(FILE *log,t_commrec *cr,const char *confout,
+ +                     t_inputrec *inputrec,
+ +                     t_nrnb nrnb[],gmx_wallcycle_t wcycle,
+ +                     gmx_runtime_t *runtime,
+ +                     gmx_bool bWriteStat);
+ +
+ +void calc_enervirdiff(FILE *fplog,int eDispCorr,t_forcerec *fr);
+ +
+ +void calc_dispcorr(FILE *fplog,t_inputrec *ir,t_forcerec *fr,
+ +                        gmx_large_int_t step, int natoms, 
+ +                        matrix box,real lambda,tensor pres,tensor virial,
+ +                        real *prescorr, real *enercorr, real *dvdlcorr);
+ +
+ +typedef enum
+ +{
+ +  LIST_SCALARS        =0001,
+ +  LIST_INPUTREC       =0002,
+ +  LIST_TOP    =0004,
+ +  LIST_X      =0010,
+ +  LIST_V      =0020,
+ +  LIST_F      =0040,
+ +  LIST_LOAD   =0100
+ +} t_listitem;
+ +
+ +void check_nnodes_top(char *fn,t_topology *top);
+ +/* Reset the tpr file to work with one node if necessary */
+ +
+ +
+ +/* check the version */
+ +void check_ir_old_tpx_versions(t_commrec *cr,FILE *fplog,
+ +                               t_inputrec *ir,gmx_mtop_t *mtop);
+ +
+ +/* Allocate and initialize node-local state entries. */
+ +void set_state_entries(t_state *state,const t_inputrec *ir,int nnodes);
+ +
+ +/* Broadcast the data for a simulation, and allocate node-specific settings
+ +   such as rng generators. */
+ +void init_parallel(FILE *log, t_commrec *cr, t_inputrec *inputrec,
+ +                          gmx_mtop_t *mtop);
+ +
+ +
+ +void do_constrain_first(FILE *log,gmx_constr_t constr,
+ +                             t_inputrec *inputrec,t_mdatoms *md,
+ +                             t_state *state,rvec *f,
+ +                             t_graph *graph,t_commrec *cr,t_nrnb *nrnb,
+ +                             t_forcerec *fr, gmx_localtop_t *top, tensor shake_vir); 
+ +                        
+ +void dynamic_load_balancing(gmx_bool bVerbose,t_commrec *cr,real capacity[],
+ +                                 int dimension,t_mdatoms *md,t_topology *top,
+ +                                 rvec x[],rvec v[],matrix box);
+ +/* Perform load balancing, i.e. split the particles over processors
+ + * based on their coordinates in the "dimension" direction.
+ + */
++
++int multisim_min(const gmx_multisim_t *ms,int nmin,int n);
++/* Set an appropriate value for n across the whole multi-simulation */
++
++int multisim_nstsimsync(const t_commrec *cr,
++                      const t_inputrec *ir,int repl_ex_nst);
++/* Determine the interval for inter-simulation communication */
+ +                                 
++void init_global_signals(globsig_t *gs,const t_commrec *cr,
++                       const t_inputrec *ir,int repl_ex_nst);
++/* Constructor for globsig_t */
++
++void copy_coupling_state(t_state *statea,t_state *stateb,
++                       gmx_ekindata_t *ekinda,gmx_ekindata_t *ekindb, t_grpopts* opts);
++/* Copy stuff from state A to state B */
++
++void compute_globals(FILE *fplog, gmx_global_stat_t gstat, t_commrec *cr, t_inputrec *ir,
++                   t_forcerec *fr, gmx_ekindata_t *ekind,
++                   t_state *state, t_state *state_global, t_mdatoms *mdatoms,
++                   t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle,
++                   gmx_enerdata_t *enerd,tensor force_vir, tensor shake_vir, tensor total_vir,
++                   tensor pres, rvec mu_tot, gmx_constr_t constr,
++                   globsig_t *gs,gmx_bool bInterSimGS,
++                   matrix box, gmx_mtop_t *top_global, real *pcurr,
++                   int natoms, gmx_bool *bSumEkinhOld, int flags);
++/* Compute global variables during integration */
++
+ +int mdrunner(int nthreads_requested, FILE *fplog,t_commrec *cr,int nfile,
+ +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +             gmx_bool bCompact, int nstglobalcomm, ivec ddxyz,int dd_node_order,
+ +             real rdd, real rconstr, const char *dddlb_opt,real dlb_scale,
+ +           const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ +           int nstepout, int resetstep, int nmultisim, int repl_ex_nst,
+ +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
+ +           const char *deviceOptions, unsigned long Flags);
+ +/* Driver routine, that calls the different methods */
+ +
+ +void md_print_warning(const t_commrec *cr,FILE *fplog,const char *buf);
+ +/* Print a warning message to stderr on the master node
+ + * and to fplog if fplog!=NULL.
+ + */
+ +
+ +void init_md(FILE *fplog,
+ +                  t_commrec *cr,t_inputrec *ir, const output_env_t oenv, 
+ +                  double *t,double *t0,
+ +                  real *lambda,double *lam0,
+ +                  t_nrnb *nrnb,gmx_mtop_t *mtop,
+ +                  gmx_update_t *upd,
+ +                  int nfile,const t_filenm fnm[],
+ +                  gmx_mdoutf_t **outf,t_mdebin **mdebin,
+ +                  tensor force_vir,tensor shake_vir,
+ +                  rvec mu_tot,
+ +                  gmx_bool *bSimAnn,t_vcm **vcm, 
+ +                  t_state *state, unsigned long Flags);
+ +  /* Routine in sim_util.c */
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif        /* _mdrun_h */
diff --cc src/gromacs/legacyheaders/network.h
Simple merge
diff --cc src/gromacs/legacyheaders/pbc.h
Simple merge
diff --cc src/gromacs/legacyheaders/string2.h

index 0f26e3ffbc4973a362fe3db94d424b1e2855a75a,0000000000000000000000000000000000000000..3d46655865fd93a424a872dfe6d83a3d698ce488

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/string2.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/string2.h
@@@ -1,134 -1,0 +1,149 @@@
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +/*! \file
+ + * \brief Generic string handling functions.
+ + */
+ +#ifndef _string2_h
+ +#define _string2_h
+ +
+ +/*
+ + *
+ + * string2.h
+ + * David van der Spoel
+ + *
+ + */
+ +
+ +
+ +#include <string.h>
+ +#include <stdio.h>
+ +#include <stdlib.h>
+ +#include <ctype.h>
+ +#include <time.h>
+ +#include <errno.h>
+ +
+ +/*#include "typedefs.h"*/
+ +#include "types/simple.h"
+ +
++/* Suppress Cygwin compiler warnings from using newlib version of
++ * ctype.h */
++#ifdef GMX_CYGWIN
++#undef isdigit
++#undef isstring
++#undef isspace
++#undef isalnum
++#undef isalpha
++#undef ispunct
++#undef isxdigit
++#undef isupper
++#undef islower
++#undef toupper
++#undef tolower
++#endif
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +#define CONTINUE    '\\'
+ +#define COMMENTSIGN ';'
+ +
+ +int continuing(char *s);
+ +
+ +char *fgets2(char *s, int n, FILE *stream);
+ +
+ +void strip_comment (char *line);
+ +
+ +int break_line (char *line,
+ +                     char *variable,
+ +                     char *value);
+ +
+ +void upstring (char *str);
+ +
+ +void ltrim (char *str);
+ +
+ +void rtrim (char *str);
+ +
+ +void trim (char *str);
+ +
+ +void nice_header (FILE *out,const char *fn);
+ +
+ +int gmx_strcasecmp_min(const char *str1, const char *str2);
+ +int gmx_strncasecmp_min(const char *str1, const char *str2, int n);
+ +/* This funny version of strcasecmp, is not only case-insensitive,
+ + * but also ignores '-' and '_'.
+ + */
+ +
+ +int gmx_strcasecmp(const char *str1, const char *str2);
+ +int gmx_strncasecmp(const char *str1, const char *str2, int n);
+ +
+ +char *gmx_strdup(const char *src);
+ +char *gmx_strndup(const char *src, int n);
+ +    
+ +/** Pattern matcing with wildcards. */
+ +int gmx_wcmatch(const char *pattern, const char *src);
+ +
+ +/** Return value for gmx_wcmatch() when there is no match. */
+ +#define GMX_NO_WCMATCH 1
+ +
+ +
+ +/* this is our implementation of strsep, the thread-safe replacement for
+ +   strtok */
+ +char *gmx_strsep(char **stringp, const char *delim);
+ +
+ +
+ +char *wrap_lines(const char *buf,int line_width, int indent,
+ +                      gmx_bool bIndentFirst);
+ +/* wraps lines at 'linewidth', indenting all following
+ + * lines by 'indent' spaces. A temp buffer is allocated and returned,
+ + * which can be disposed of if no longer needed.
+ + * If !bIndentFirst, then the first line will not be indented, only 
+ + * the lines that are created due to wapping.
+ + */
+ +
+ +
+ +char **split(char sep,const char *str);
+ +/* Implementation of the well-known Perl function split */
+ +
+ +gmx_large_int_t str_to_large_int_t(const char *str, char **endptr);
+ +
+ +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
+ +#define snprintf _snprintf
+ +#endif
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif        /* _string2_h */
diff --cc src/gromacs/legacyheaders/thread_mpi.h
Simple merge
diff --cc src/gromacs/legacyheaders/thread_mpi/atomic.h
Simple merge
diff --cc src/gromacs/legacyheaders/thread_mpi/atomic/suncc-sparc.h

index 0000000000000000000000000000000000000000,201dc27a7fbe7e79e83ddaeb747b7212a102592c..201dc27a7fbe7e79e83ddaeb747b7212a102592c

mode 000000,100644..100644
--- /dev/null
--- 2/include/thread_mpi/atomic/suncc-sparc.h
+++ b/src/gromacs/legacyheaders/thread_mpi/atomic/suncc-sparc.h
diff --cc src/gromacs/legacyheaders/thread_mpi/numa_malloc.h

index 0000000000000000000000000000000000000000,7b0edc13f66f9cab3841777995a3b20f2fa6a959..7b0edc13f66f9cab3841777995a3b20f2fa6a959

mode 000000,100644..100644
--- /dev/null
--- 2/include/thread_mpi/numa_malloc.h
+++ b/src/gromacs/legacyheaders/thread_mpi/numa_malloc.h
diff --cc src/gromacs/legacyheaders/thread_mpi/threads.h
Simple merge
diff --cc src/gromacs/legacyheaders/tmpi.h
Simple merge
diff --cc src/gromacs/legacyheaders/typedefs.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/atoms.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/filenm.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/globsig.h

index 0000000000000000000000000000000000000000,29c634cc053897a5a7cc5e6ff4eabcb9da80d020..29c634cc053897a5a7cc5e6ff4eabcb9da80d020

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/globsig.h
+++ b/src/gromacs/legacyheaders/types/globsig.h
diff --cc src/gromacs/legacyheaders/types/iteratedconstraints.h

index 0000000000000000000000000000000000000000,501c2743ccba94262b05f78361f973e0d73b533a..501c2743ccba94262b05f78361f973e0d73b533a

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/iteratedconstraints.h
+++ b/src/gromacs/legacyheaders/types/iteratedconstraints.h
diff --cc src/gromacs/legacyheaders/types/nlistheuristics.h

index 0000000000000000000000000000000000000000,b240bacfdaad9e5dc72f04edca9e8d8cb1e4e1fd..b240bacfdaad9e5dc72f04edca9e8d8cb1e4e1fd

mode 000000,100644..100644
--- /dev/null
--- 2/include/types/nlistheuristics.h
+++ b/src/gromacs/legacyheaders/types/nlistheuristics.h
diff --cc src/gromacs/legacyheaders/types/pbc.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/simple.h
Simple merge
diff --cc src/gromacs/legacyheaders/types/topology.h
Simple merge
diff --cc src/gromacs/mdlib/constr.c
Simple merge
diff --cc src/gromacs/mdlib/domdec.c

index fb45eb9ff583f6a92e14007c572fa0579a5acde9,0000000000000000000000000000000000000000..ac1ec2ee315157f5054333f528a3409023dd7f70

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/domdec.c
--- /dev/null
+++ b/src/gromacs/mdlib/domdec.c
@@@ -1,8653 -1,0 +1,8653 @@@
-                               dd_warning(cr,fplog,"NOTE: reproducability requested, will not use dynamic load balancing\n");
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + * This file is part of Gromacs        Copyright (c) 1991-2008
+ + * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
+ + *
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gnomes, ROck Monsters And Chili Sauce
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <stdio.h>
+ +#include <time.h>
+ +#include <math.h>
+ +#include <string.h>
+ +#include <stdlib.h>
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "vec.h"
+ +#include "domdec.h"
+ +#include "domdec_network.h"
+ +#include "nrnb.h"
+ +#include "pbc.h"
+ +#include "chargegroup.h"
+ +#include "constr.h"
+ +#include "mdatoms.h"
+ +#include "names.h"
+ +#include "pdbio.h"
+ +#include "futil.h"
+ +#include "force.h"
+ +#include "pme.h"
+ +#include "pull.h"
+ +#include "pull_rotation.h"
+ +#include "gmx_wallcycle.h"
+ +#include "mdrun.h"
+ +#include "nsgrid.h"
+ +#include "shellfc.h"
+ +#include "mtop_util.h"
+ +#include "gmxfio.h"
+ +#include "gmx_ga2la.h"
+ +#include "gmx_sort.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREADS
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#define DDRANK(dd,rank)    (rank)
+ +#define DDMASTERRANK(dd)   (dd->masterrank)
+ +
+ +typedef struct gmx_domdec_master
+ +{
+ +    /* The cell boundaries */
+ +    real **cell_x;
+ +    /* The global charge group division */
+ +    int  *ncg;     /* Number of home charge groups for each node */
+ +    int  *index;   /* Index of nnodes+1 into cg */
+ +    int  *cg;      /* Global charge group index */
+ +    int  *nat;     /* Number of home atoms for each node. */
+ +    int  *ibuf;    /* Buffer for communication */
+ +    rvec *vbuf;    /* Buffer for state scattering and gathering */
+ +} gmx_domdec_master_t;
+ +
+ +typedef struct
+ +{
+ +    /* The numbers of charge groups to send and receive for each cell
+ +     * that requires communication, the last entry contains the total
+ +     * number of atoms that needs to be communicated.
+ +     */
+ +    int nsend[DD_MAXIZONE+2];
+ +    int nrecv[DD_MAXIZONE+2];
+ +    /* The charge groups to send */
+ +    int *index;
+ +    int nalloc;
+ +    /* The atom range for non-in-place communication */
+ +    int cell2at0[DD_MAXIZONE];
+ +    int cell2at1[DD_MAXIZONE];
+ +} gmx_domdec_ind_t;
+ +
+ +typedef struct
+ +{
+ +    int  np;                   /* Number of grid pulses in this dimension */
+ +    int  np_dlb;               /* For dlb, for use with edlbAUTO          */
+ +    gmx_domdec_ind_t *ind;     /* The indices to communicate, size np     */
+ +    int  np_nalloc;
+ +    gmx_bool bInPlace;             /* Can we communicate in place?            */
+ +} gmx_domdec_comm_dim_t;
+ +
+ +typedef struct
+ +{
+ +    gmx_bool *bCellMin;    /* Temp. var.: is this cell size at the limit     */
+ +    real *cell_f;      /* State var.: cell boundaries, box relative      */
+ +    real *old_cell_f;  /* Temp. var.: old cell size                      */
+ +    real *cell_f_max0; /* State var.: max lower boundary, incl neighbors */
+ +    real *cell_f_min1; /* State var.: min upper boundary, incl neighbors */
+ +    real *bound_min;   /* Temp. var.: lower limit for cell boundary      */
+ +    real *bound_max;   /* Temp. var.: upper limit for cell boundary      */
+ +    gmx_bool bLimited;     /* State var.: is DLB limited in this dim and row */
+ +    real *buf_ncd;     /* Temp. var.                                     */
+ +} gmx_domdec_root_t;
+ +
+ +#define DD_NLOAD_MAX 9
+ +
+ +/* Here floats are accurate enough, since these variables
+ + * only influence the load balancing, not the actual MD results.
+ + */
+ +typedef struct
+ +{
+ +    int  nload;
+ +    float *load;
+ +    float sum;
+ +    float max;
+ +    float sum_m;
+ +    float cvol_min;
+ +    float mdf;
+ +    float pme;
+ +    int   flags;
+ +} gmx_domdec_load_t;
+ +
+ +typedef struct
+ +{
+ +    int  nsc;
+ +    int  ind_gl;
+ +    int  ind;
+ +} gmx_cgsort_t;
+ +
+ +typedef struct
+ +{
+ +    gmx_cgsort_t *sort1,*sort2;
+ +    int  sort_nalloc;
+ +    gmx_cgsort_t *sort_new;
+ +    int  sort_new_nalloc;
+ +    int  *ibuf;
+ +    int  ibuf_nalloc;
+ +} gmx_domdec_sort_t;
+ +
+ +typedef struct
+ +{
+ +    rvec *v;
+ +    int  nalloc;
+ +} vec_rvec_t;
+ +
+ +/* This enum determines the order of the coordinates.
+ + * ddnatHOME and ddnatZONE should be first and second,
+ + * the others can be ordered as wanted.
+ + */
+ +enum { ddnatHOME, ddnatZONE, ddnatVSITE, ddnatCON, ddnatNR };
+ +
+ +enum { edlbAUTO, edlbNO, edlbYES, edlbNR };
+ +const char *edlb_names[edlbNR] = { "auto", "no", "yes" };
+ +
+ +typedef struct
+ +{
+ +    int  dim;      /* The dimension                                          */
+ +    gmx_bool dim_match;/* Tells if DD and PME dims match                         */
+ +    int  nslab;    /* The number of PME slabs in this dimension              */
+ +    real *slb_dim_f; /* Cell sizes for determining the PME comm. with SLB    */
+ +    int  *pp_min;  /* The minimum pp node location, size nslab               */
+ +    int  *pp_max;  /* The maximum pp node location,size nslab                */
+ +    int  maxshift; /* The maximum shift for coordinate redistribution in PME */
+ +} gmx_ddpme_t;
+ +
+ +typedef struct
+ +{
+ +    real min0;    /* The minimum bottom of this zone                        */
+ +    real max1;    /* The maximum top of this zone                           */
+ +    real mch0;    /* The maximum bottom communicaton height for this zone   */
+ +    real mch1;    /* The maximum top communicaton height for this zone      */
+ +    real p1_0;    /* The bottom value of the first cell in this zone        */
+ +    real p1_1;    /* The top value of the first cell in this zone           */
+ +} gmx_ddzone_t;
+ +
+ +typedef struct gmx_domdec_comm
+ +{
+ +    /* All arrays are indexed with 0 to dd->ndim (not Cartesian indexing),
+ +     * unless stated otherwise.
+ +     */
+ +
+ +    /* The number of decomposition dimensions for PME, 0: no PME */
+ +    int  npmedecompdim;
+ +    /* The number of nodes doing PME (PP/PME or only PME) */
+ +    int  npmenodes;
+ +    int  npmenodes_x;
+ +    int  npmenodes_y;
+ +    /* The communication setup including the PME only nodes */
+ +    gmx_bool bCartesianPP_PME;
+ +    ivec ntot;
+ +    int  cartpmedim;
+ +    int  *pmenodes;          /* size npmenodes                         */
+ +    int  *ddindex2simnodeid; /* size npmenodes, only with bCartesianPP
+ +                              * but with bCartesianPP_PME              */
+ +    gmx_ddpme_t ddpme[2];
+ +    
+ +    /* The DD particle-particle nodes only */
+ +    gmx_bool bCartesianPP;
+ +    int  *ddindex2ddnodeid; /* size npmenode, only with bCartesianPP_PME */
+ +    
+ +    /* The global charge groups */
+ +    t_block cgs_gl;
+ +
+ +    /* Should we sort the cgs */
+ +    int  nstSortCG;
+ +    gmx_domdec_sort_t *sort;
+ +    
+ +    /* Are there bonded and multi-body interactions between charge groups? */
+ +    gmx_bool bInterCGBondeds;
+ +    gmx_bool bInterCGMultiBody;
+ +
+ +    /* Data for the optional bonded interaction atom communication range */
+ +    gmx_bool bBondComm;
+ +    t_blocka *cglink;
+ +    char *bLocalCG;
+ +
+ +    /* The DLB option */
+ +    int  eDLB;
+ +    /* Are we actually using DLB? */
+ +    gmx_bool bDynLoadBal;
+ +
+ +    /* Cell sizes for static load balancing, first index cartesian */
+ +    real **slb_frac;
+ +    
+ +    /* The width of the communicated boundaries */
+ +    real cutoff_mbody;
+ +    real cutoff;
+ +    /* The minimum cell size (including triclinic correction) */
+ +    rvec cellsize_min;
+ +    /* For dlb, for use with edlbAUTO */
+ +    rvec cellsize_min_dlb;
+ +    /* The lower limit for the DD cell size with DLB */
+ +    real cellsize_limit;
+ +    /* Effectively no NB cut-off limit with DLB for systems without PBC? */
+ +    gmx_bool bVacDLBNoLimit;
+ +
+ +    /* tric_dir is only stored here because dd_get_ns_ranges needs it */
+ +    ivec tric_dir;
+ +    /* box0 and box_size are required with dim's without pbc and -gcom */
+ +    rvec box0;
+ +    rvec box_size;
+ +    
+ +    /* The cell boundaries */
+ +    rvec cell_x0;
+ +    rvec cell_x1;
+ +
+ +    /* The old location of the cell boundaries, to check cg displacements */
+ +    rvec old_cell_x0;
+ +    rvec old_cell_x1;
+ +
+ +    /* The communication setup and charge group boundaries for the zones */
+ +    gmx_domdec_zones_t zones;
+ +    
+ +    /* The zone limits for DD dimensions 1 and 2 (not 0), determined from
+ +     * cell boundaries of neighboring cells for dynamic load balancing.
+ +     */
+ +    gmx_ddzone_t zone_d1[2];
+ +    gmx_ddzone_t zone_d2[2][2];
+ +    
+ +    /* The coordinate/force communication setup and indices */
+ +    gmx_domdec_comm_dim_t cd[DIM];
+ +    /* The maximum number of cells to communicate with in one dimension */
+ +    int  maxpulse;
+ +    
+ +    /* Which cg distribution is stored on the master node */
+ +    int master_cg_ddp_count;
+ +    
+ +    /* The number of cg's received from the direct neighbors */
+ +    int  zone_ncg1[DD_MAXZONE];
+ +    
+ +    /* The atom counts, the range for each type t is nat[t-1] <= at < nat[t] */
+ +    int  nat[ddnatNR];
+ +    
+ +    /* Communication buffer for general use */
+ +    int  *buf_int;
+ +    int  nalloc_int;
+ +
+ +     /* Communication buffer for general use */
+ +    vec_rvec_t vbuf;
+ +    
+ +    /* Communication buffers only used with multiple grid pulses */
+ +    int  *buf_int2;
+ +    int  nalloc_int2;
+ +    vec_rvec_t vbuf2;
+ +    
+ +    /* Communication buffers for local redistribution */
+ +    int  **cggl_flag;
+ +    int  cggl_flag_nalloc[DIM*2];
+ +    rvec **cgcm_state;
+ +    int  cgcm_state_nalloc[DIM*2];
+ +    
+ +    /* Cell sizes for dynamic load balancing */
+ +    gmx_domdec_root_t **root;
+ +    real *cell_f_row;
+ +    real cell_f0[DIM];
+ +    real cell_f1[DIM];
+ +    real cell_f_max0[DIM];
+ +    real cell_f_min1[DIM];
+ +    
+ +    /* Stuff for load communication */
+ +    gmx_bool bRecordLoad;
+ +    gmx_domdec_load_t *load;
+ +#ifdef GMX_MPI
+ +    MPI_Comm *mpi_comm_load;
+ +#endif
+ +
+ +    /* Maximum DLB scaling per load balancing step in percent */
+ +    int dlb_scale_lim;
+ +
+ +    /* Cycle counters */
+ +    float cycl[ddCyclNr];
+ +    int   cycl_n[ddCyclNr];
+ +    float cycl_max[ddCyclNr];
+ +    /* Flop counter (0=no,1=yes,2=with (eFlop-1)*5% noise */
+ +    int eFlop;
+ +    double flop;
+ +    int    flop_n;
+ +    /* Have often have did we have load measurements */
+ +    int    n_load_have;
+ +    /* Have often have we collected the load measurements */
+ +    int    n_load_collect;
+ +    
+ +    /* Statistics */
+ +    double sum_nat[ddnatNR-ddnatZONE];
+ +    int    ndecomp;
+ +    int    nload;
+ +    double load_step;
+ +    double load_sum;
+ +    double load_max;
+ +    ivec   load_lim;
+ +    double load_mdf;
+ +    double load_pme;
+ +
+ +    /* The last partition step */
+ +    gmx_large_int_t globalcomm_step;
+ +
+ +    /* Debugging */
+ +    int  nstDDDump;
+ +    int  nstDDDumpGrid;
+ +    int  DD_debug;
+ +} gmx_domdec_comm_t;
+ +
+ +/* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
+ +#define DD_CGIBS 2
+ +
+ +/* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
+ +#define DD_FLAG_NRCG  65535
+ +#define DD_FLAG_FW(d) (1<<(16+(d)*2))
+ +#define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
+ +
+ +/* Zone permutation required to obtain consecutive charge groups
+ + * for neighbor searching.
+ + */
+ +static const int zone_perm[3][4] = { {0,0,0,0},{1,0,0,0},{3,0,1,2} };
+ +
+ +/* dd_zo and dd_zp3/dd_zp2 are set up such that i zones with non-zero
+ + * components see only j zones with that component 0.
+ + */
+ +
+ +/* The DD zone order */
+ +static const ivec dd_zo[DD_MAXZONE] =
+ +  {{0,0,0},{1,0,0},{1,1,0},{0,1,0},{0,1,1},{0,0,1},{1,0,1},{1,1,1}};
+ +
+ +/* The 3D setup */
+ +#define dd_z3n  8
+ +#define dd_zp3n 4
+ +static const ivec dd_zp3[dd_zp3n] = {{0,0,8},{1,3,6},{2,5,6},{3,5,7}};
+ +
+ +/* The 2D setup */
+ +#define dd_z2n  4
+ +#define dd_zp2n 2
+ +static const ivec dd_zp2[dd_zp2n] = {{0,0,4},{1,3,4}};
+ +
+ +/* The 1D setup */
+ +#define dd_z1n  2
+ +#define dd_zp1n 1
+ +static const ivec dd_zp1[dd_zp1n] = {{0,0,2}};
+ +
+ +/* Factors used to avoid problems due to rounding issues */
+ +#define DD_CELL_MARGIN       1.0001
+ +#define DD_CELL_MARGIN2      1.00005
+ +/* Factor to account for pressure scaling during nstlist steps */
+ +#define DD_PRES_SCALE_MARGIN 1.02
+ +
+ +/* Allowed performance loss before we DLB or warn */
+ +#define DD_PERF_LOSS 0.05
+ +
+ +#define DD_CELL_F_SIZE(dd,di) ((dd)->nc[(dd)->dim[(di)]]+1+(di)*2+1+(di))
+ +
+ +/* Use separate MPI send and receive commands
+ + * when nnodes <= GMX_DD_NNODES_SENDRECV.
+ + * This saves memory (and some copying for small nnodes).
+ + * For high parallelization scatter and gather calls are used.
+ + */
+ +#define GMX_DD_NNODES_SENDRECV 4
+ +
+ +
+ +/*
+ +#define dd_index(n,i) ((((i)[ZZ]*(n)[YY] + (i)[YY])*(n)[XX]) + (i)[XX])
+ +
+ +static void index2xyz(ivec nc,int ind,ivec xyz)
+ +{
+ +  xyz[XX] = ind % nc[XX];
+ +  xyz[YY] = (ind / nc[XX]) % nc[YY];
+ +  xyz[ZZ] = ind / (nc[YY]*nc[XX]);
+ +}
+ +*/
+ +
+ +/* This order is required to minimize the coordinate communication in PME
+ + * which uses decomposition in the x direction.
+ + */
+ +#define dd_index(n,i) ((((i)[XX]*(n)[YY] + (i)[YY])*(n)[ZZ]) + (i)[ZZ])
+ +
+ +static void ddindex2xyz(ivec nc,int ind,ivec xyz)
+ +{
+ +    xyz[XX] = ind / (nc[YY]*nc[ZZ]);
+ +    xyz[YY] = (ind / nc[ZZ]) % nc[YY];
+ +    xyz[ZZ] = ind % nc[ZZ];
+ +}
+ +
+ +static int ddcoord2ddnodeid(gmx_domdec_t *dd,ivec c)
+ +{
+ +    int ddindex;
+ +    int ddnodeid=-1;
+ +    
+ +    ddindex = dd_index(dd->nc,c);
+ +    if (dd->comm->bCartesianPP_PME)
+ +    {
+ +        ddnodeid = dd->comm->ddindex2ddnodeid[ddindex];
+ +    }
+ +    else if (dd->comm->bCartesianPP)
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Cart_rank(dd->mpi_comm_all,c,&ddnodeid);
+ +#endif
+ +    }
+ +    else
+ +    {
+ +        ddnodeid = ddindex;
+ +    }
+ +    
+ +    return ddnodeid;
+ +}
+ +
+ +static gmx_bool dynamic_dd_box(gmx_ddbox_t *ddbox,t_inputrec *ir)
+ +{
+ +    return (ddbox->nboundeddim < DIM || DYNAMIC_BOX(*ir));
+ +}
+ +
+ +int ddglatnr(gmx_domdec_t *dd,int i)
+ +{
+ +    int atnr;
+ +    
+ +    if (dd == NULL)
+ +    {
+ +        atnr = i + 1;
+ +    }
+ +    else
+ +    {
+ +        if (i >= dd->comm->nat[ddnatNR-1])
+ +        {
+ +            gmx_fatal(FARGS,"glatnr called with %d, which is larger than the local number of atoms (%d)",i,dd->comm->nat[ddnatNR-1]);
+ +        }
+ +        atnr = dd->gatindex[i] + 1;
+ +    }
+ +    
+ +    return atnr;
+ +}
+ +
+ +t_block *dd_charge_groups_global(gmx_domdec_t *dd)
+ +{
+ +    return &dd->comm->cgs_gl;
+ +}
+ +
+ +static void vec_rvec_init(vec_rvec_t *v)
+ +{
+ +    v->nalloc = 0;
+ +    v->v      = NULL;
+ +}
+ +
+ +static void vec_rvec_check_alloc(vec_rvec_t *v,int n)
+ +{
+ +    if (n > v->nalloc)
+ +    {
+ +        v->nalloc = over_alloc_dd(n);
+ +        srenew(v->v,v->nalloc);
+ +    }
+ +}
+ +
+ +void dd_store_state(gmx_domdec_t *dd,t_state *state)
+ +{
+ +    int i;
+ +    
+ +    if (state->ddp_count != dd->ddp_count)
+ +    {
+ +        gmx_incons("The state does not the domain decomposition state");
+ +    }
+ +    
+ +    state->ncg_gl = dd->ncg_home;
+ +    if (state->ncg_gl > state->cg_gl_nalloc)
+ +    {
+ +        state->cg_gl_nalloc = over_alloc_dd(state->ncg_gl);
+ +        srenew(state->cg_gl,state->cg_gl_nalloc);
+ +    }
+ +    for(i=0; i<state->ncg_gl; i++)
+ +    {
+ +        state->cg_gl[i] = dd->index_gl[i];
+ +    }
+ +    
+ +    state->ddp_count_cg_gl = dd->ddp_count;
+ +}
+ +
+ +gmx_domdec_zones_t *domdec_zones(gmx_domdec_t *dd)
+ +{
+ +    return &dd->comm->zones;
+ +}
+ +
+ +void dd_get_ns_ranges(gmx_domdec_t *dd,int icg,
+ +                      int *jcg0,int *jcg1,ivec shift0,ivec shift1)
+ +{
+ +    gmx_domdec_zones_t *zones;
+ +    int izone,d,dim;
+ +
+ +    zones = &dd->comm->zones;
+ +
+ +    izone = 0;
+ +    while (icg >= zones->izone[izone].cg1)
+ +    {
+ +        izone++;
+ +    }
+ +    
+ +    if (izone == 0)
+ +    {
+ +        *jcg0 = icg;
+ +    }
+ +    else if (izone < zones->nizone)
+ +    {
+ +        *jcg0 = zones->izone[izone].jcg0;
+ +    }
+ +    else
+ +    {
+ +        gmx_fatal(FARGS,"DD icg %d out of range: izone (%d) >= nizone (%d)",
+ +                  icg,izone,zones->nizone);
+ +    }
+ +        
+ +    *jcg1 = zones->izone[izone].jcg1;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        shift0[dim] = zones->izone[izone].shift0[dim];
+ +        shift1[dim] = zones->izone[izone].shift1[dim];
+ +        if (dd->comm->tric_dir[dim] || (dd->bGridJump && d > 0))
+ +        {
+ +            /* A conservative approach, this can be optimized */
+ +            shift0[dim] -= 1;
+ +            shift1[dim] += 1;
+ +        }
+ +    }
+ +}
+ +
+ +int dd_natoms_vsite(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->nat[ddnatVSITE];
+ +}
+ +
+ +void dd_get_constraint_range(gmx_domdec_t *dd,int *at_start,int *at_end)
+ +{
+ +    *at_start = dd->comm->nat[ddnatCON-1];
+ +    *at_end   = dd->comm->nat[ddnatCON];
+ +}
+ +
+ +void dd_move_x(gmx_domdec_t *dd,matrix box,rvec x[])
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    rvec shift={0,0,0},*buf,*rbuf;
+ +    gmx_bool bPBC,bScrew;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +    
+ +    buf = comm->vbuf.v;
+ +
+ +    nzone = 1;
+ +    nat_tot = dd->nat_home;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        bPBC   = (dd->ci[dd->dim[d]] == 0);
+ +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
+ +        if (bPBC)
+ +        {
+ +            copy_rvec(box[dd->dim[d]],shift);
+ +        }
+ +        cd = &comm->cd[d];
+ +        for(p=0; p<cd->np; p++)
+ +        {
+ +            ind = &cd->ind[p];
+ +            index = ind->index;
+ +            n = 0;
+ +            if (!bPBC)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        copy_rvec(x[j],buf[n]);
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            else if (!bScrew)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        /* We need to shift the coordinates */
+ +                        rvec_add(x[j],shift,buf[n]);
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            else
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        /* Shift x */
+ +                        buf[n][XX] = x[j][XX] + shift[XX];
+ +                        /* Rotate y and z.
+ +                         * This operation requires a special shift force
+ +                         * treatment, which is performed in calc_vir.
+ +                         */
+ +                        buf[n][YY] = box[YY][YY] - x[j][YY];
+ +                        buf[n][ZZ] = box[ZZ][ZZ] - x[j][ZZ];
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            
+ +            if (cd->bInPlace)
+ +            {
+ +                rbuf = x + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                rbuf = comm->vbuf2.v;
+ +            }
+ +            /* Send and receive the coordinates */
+ +            dd_sendrecv_rvec(dd, d, dddirBackward,
+ +                             buf,  ind->nsend[nzone+1],
+ +                             rbuf, ind->nrecv[nzone+1]);
+ +            if (!cd->bInPlace)
+ +            {
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        copy_rvec(rbuf[j],x[i]);
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            nat_tot += ind->nrecv[nzone+1];
+ +        }
+ +        nzone += nzone;
+ +    }
+ +}
+ +
+ +void dd_move_f(gmx_domdec_t *dd,rvec f[],rvec *fshift)
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    rvec *buf,*sbuf;
+ +    ivec vis;
+ +    int  is;
+ +    gmx_bool bPBC,bScrew;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +
+ +    buf = comm->vbuf.v;
+ +
+ +    n = 0;
+ +    nzone = comm->zones.n/2;
+ +    nat_tot = dd->nat_tot;
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        bPBC   = (dd->ci[dd->dim[d]] == 0);
+ +        bScrew = (bPBC && dd->bScrewPBC && dd->dim[d] == XX);
+ +        if (fshift == NULL && !bScrew)
+ +        {
+ +            bPBC = FALSE;
+ +        }
+ +        /* Determine which shift vector we need */
+ +        clear_ivec(vis);
+ +        vis[dd->dim[d]] = 1;
+ +        is = IVEC2IS(vis);
+ +        
+ +        cd = &comm->cd[d];
+ +        for(p=cd->np-1; p>=0; p--) {
+ +            ind = &cd->ind[p];
+ +            nat_tot -= ind->nrecv[nzone+1];
+ +            if (cd->bInPlace)
+ +            {
+ +                sbuf = f + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                sbuf = comm->vbuf2.v;
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        copy_rvec(f[i],sbuf[j]);
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            /* Communicate the forces */
+ +            dd_sendrecv_rvec(dd, d, dddirForward,
+ +                             sbuf, ind->nrecv[nzone+1],
+ +                             buf,  ind->nsend[nzone+1]);
+ +            index = ind->index;
+ +            /* Add the received forces */
+ +            n = 0;
+ +            if (!bPBC)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        rvec_inc(f[j],buf[n]);
+ +                        n++;
+ +                    }
+ +                } 
+ +            }
+ +            else if (!bScrew)
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        rvec_inc(f[j],buf[n]);
+ +                        /* Add this force to the shift force */
+ +                        rvec_inc(fshift[is],buf[n]);
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +            else
+ +            {
+ +                for(i=0; i<ind->nsend[nzone]; i++)
+ +                {
+ +                    at0 = cgindex[index[i]];
+ +                    at1 = cgindex[index[i]+1];
+ +                    for(j=at0; j<at1; j++)
+ +                    {
+ +                        /* Rotate the force */
+ +                        f[j][XX] += buf[n][XX];
+ +                        f[j][YY] -= buf[n][YY];
+ +                        f[j][ZZ] -= buf[n][ZZ];
+ +                        if (fshift)
+ +                        {
+ +                            /* Add this force to the shift force */
+ +                            rvec_inc(fshift[is],buf[n]);
+ +                        }
+ +                        n++;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        nzone /= 2;
+ +    }
+ +}
+ +
+ +void dd_atom_spread_real(gmx_domdec_t *dd,real v[])
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    real *buf,*rbuf;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +    
+ +    buf = &comm->vbuf.v[0][0];
+ +
+ +    nzone = 1;
+ +    nat_tot = dd->nat_home;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        cd = &comm->cd[d];
+ +        for(p=0; p<cd->np; p++)
+ +        {
+ +            ind = &cd->ind[p];
+ +            index = ind->index;
+ +            n = 0;
+ +            for(i=0; i<ind->nsend[nzone]; i++)
+ +            {
+ +                at0 = cgindex[index[i]];
+ +                at1 = cgindex[index[i]+1];
+ +                for(j=at0; j<at1; j++)
+ +                {
+ +                    buf[n] = v[j];
+ +                    n++;
+ +                }
+ +            }
+ +            
+ +            if (cd->bInPlace)
+ +            {
+ +                rbuf = v + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                rbuf = &comm->vbuf2.v[0][0];
+ +            }
+ +            /* Send and receive the coordinates */
+ +            dd_sendrecv_real(dd, d, dddirBackward,
+ +                             buf,  ind->nsend[nzone+1],
+ +                             rbuf, ind->nrecv[nzone+1]);
+ +            if (!cd->bInPlace)
+ +            {
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        v[i] = rbuf[j];
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            nat_tot += ind->nrecv[nzone+1];
+ +        }
+ +        nzone += nzone;
+ +    }
+ +}
+ +
+ +void dd_atom_sum_real(gmx_domdec_t *dd,real v[])
+ +{
+ +    int  nzone,nat_tot,n,d,p,i,j,at0,at1,zone;
+ +    int  *index,*cgindex;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    real *buf,*sbuf;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    cgindex = dd->cgindex;
+ +
+ +    buf = &comm->vbuf.v[0][0];
+ +
+ +    n = 0;
+ +    nzone = comm->zones.n/2;
+ +    nat_tot = dd->nat_tot;
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        cd = &comm->cd[d];
+ +        for(p=cd->np-1; p>=0; p--) {
+ +            ind = &cd->ind[p];
+ +            nat_tot -= ind->nrecv[nzone+1];
+ +            if (cd->bInPlace)
+ +            {
+ +                sbuf = v + nat_tot;
+ +            }
+ +            else
+ +            {
+ +                sbuf = &comm->vbuf2.v[0][0];
+ +                j = 0;
+ +                for(zone=0; zone<nzone; zone++)
+ +                {
+ +                    for(i=ind->cell2at0[zone]; i<ind->cell2at1[zone]; i++)
+ +                    {
+ +                        sbuf[j] = v[i];
+ +                        j++;
+ +                    }
+ +                }
+ +            }
+ +            /* Communicate the forces */
+ +            dd_sendrecv_real(dd, d, dddirForward,
+ +                             sbuf, ind->nrecv[nzone+1],
+ +                             buf,  ind->nsend[nzone+1]);
+ +            index = ind->index;
+ +            /* Add the received forces */
+ +            n = 0;
+ +            for(i=0; i<ind->nsend[nzone]; i++)
+ +            {
+ +                at0 = cgindex[index[i]];
+ +                at1 = cgindex[index[i]+1];
+ +                for(j=at0; j<at1; j++)
+ +                {
+ +                    v[j] += buf[n];
+ +                    n++;
+ +                }
+ +            } 
+ +        }
+ +        nzone /= 2;
+ +    }
+ +}
+ +
+ +static void print_ddzone(FILE *fp,int d,int i,int j,gmx_ddzone_t *zone)
+ +{
+ +    fprintf(fp,"zone d0 %d d1 %d d2 %d  min0 %6.3f max1 %6.3f mch0 %6.3f mch1 %6.3f p1_0 %6.3f p1_1 %6.3f\n",
+ +            d,i,j,
+ +            zone->min0,zone->max1,
+ +            zone->mch0,zone->mch0,
+ +            zone->p1_0,zone->p1_1);
+ +}
+ +
+ +static void dd_sendrecv_ddzone(const gmx_domdec_t *dd,
+ +                               int ddimind,int direction,
+ +                               gmx_ddzone_t *buf_s,int n_s,
+ +                               gmx_ddzone_t *buf_r,int n_r)
+ +{
+ +    rvec vbuf_s[5*2],vbuf_r[5*2];
+ +    int i;
+ +
+ +    for(i=0; i<n_s; i++)
+ +    {
+ +        vbuf_s[i*2  ][0] = buf_s[i].min0;
+ +        vbuf_s[i*2  ][1] = buf_s[i].max1;
+ +        vbuf_s[i*2  ][2] = buf_s[i].mch0;
+ +        vbuf_s[i*2+1][0] = buf_s[i].mch1;
+ +        vbuf_s[i*2+1][1] = buf_s[i].p1_0;
+ +        vbuf_s[i*2+1][2] = buf_s[i].p1_1;
+ +    }
+ +
+ +    dd_sendrecv_rvec(dd, ddimind, direction,
+ +                     vbuf_s, n_s*2,
+ +                     vbuf_r, n_r*2);
+ +
+ +    for(i=0; i<n_r; i++)
+ +    {
+ +        buf_r[i].min0 = vbuf_r[i*2  ][0];
+ +        buf_r[i].max1 = vbuf_r[i*2  ][1];
+ +        buf_r[i].mch0 = vbuf_r[i*2  ][2];
+ +        buf_r[i].mch1 = vbuf_r[i*2+1][0];
+ +        buf_r[i].p1_0 = vbuf_r[i*2+1][1];
+ +        buf_r[i].p1_1 = vbuf_r[i*2+1][2];
+ +    }
+ +}
+ +
+ +static void dd_move_cellx(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
+ +                          rvec cell_ns_x0,rvec cell_ns_x1)
+ +{
+ +    int  d,d1,dim,dim1,pos,buf_size,i,j,k,p,npulse,npulse_min;
+ +    gmx_ddzone_t *zp,buf_s[5],buf_r[5],buf_e[5];
+ +    rvec extr_s[2],extr_r[2];
+ +    rvec dh;
+ +    real dist_d,c=0,det;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_bool bPBC,bUse;
+ +
+ +    comm = dd->comm;
+ +
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        zp = (d == 1) ? &comm->zone_d1[0] : &comm->zone_d2[0][0];
+ +        zp->min0 = cell_ns_x0[dim];
+ +        zp->max1 = cell_ns_x1[dim];
+ +        zp->mch0 = cell_ns_x0[dim];
+ +        zp->mch1 = cell_ns_x1[dim];
+ +        zp->p1_0 = cell_ns_x0[dim];
+ +        zp->p1_1 = cell_ns_x1[dim];
+ +    }
+ +    
+ +    for(d=dd->ndim-2; d>=0; d--)
+ +    {
+ +        dim  = dd->dim[d];
+ +        bPBC = (dim < ddbox->npbcdim);
+ +
+ +        /* Use an rvec to store two reals */
+ +        extr_s[d][0] = comm->cell_f0[d+1];
+ +        extr_s[d][1] = comm->cell_f1[d+1];
+ +        extr_s[d][2] = 0;
+ +
+ +        pos = 0;
+ +        /* Store the extremes in the backward sending buffer,
+ +         * so the get updated separately from the forward communication.
+ +         */
+ +        for(d1=d; d1<dd->ndim-1; d1++)
+ +        {
+ +            /* We invert the order to be able to use the same loop for buf_e */
+ +            buf_s[pos].min0 = extr_s[d1][1];
+ +            buf_s[pos].max1 = extr_s[d1][0];
+ +            buf_s[pos].mch0 = 0;
+ +            buf_s[pos].mch1 = 0;
+ +            /* Store the cell corner of the dimension we communicate along */
+ +            buf_s[pos].p1_0 = comm->cell_x0[dim];
+ +            buf_s[pos].p1_1 = 0;
+ +            pos++;
+ +        }
+ +
+ +        buf_s[pos] = (dd->ndim == 2) ? comm->zone_d1[0] : comm->zone_d2[0][0];
+ +        pos++;
+ +
+ +        if (dd->ndim == 3 && d == 0)
+ +        {
+ +            buf_s[pos] = comm->zone_d2[0][1];
+ +            pos++;
+ +            buf_s[pos] = comm->zone_d1[0];
+ +            pos++;
+ +        }
+ +
+ +        /* We only need to communicate the extremes
+ +         * in the forward direction
+ +         */
+ +        npulse = comm->cd[d].np;
+ +        if (bPBC)
+ +        {
+ +            /* Take the minimum to avoid double communication */
+ +            npulse_min = min(npulse,dd->nc[dim]-1-npulse);
+ +        }
+ +        else
+ +        {
+ +            /* Without PBC we should really not communicate over
+ +             * the boundaries, but implementing that complicates
+ +             * the communication setup and therefore we simply
+ +             * do all communication, but ignore some data.
+ +             */
+ +            npulse_min = npulse;
+ +        }
+ +        for(p=0; p<npulse_min; p++)
+ +        {
+ +            /* Communicate the extremes forward */
+ +            bUse = (bPBC || dd->ci[dim] > 0);
+ +
+ +            dd_sendrecv_rvec(dd, d, dddirForward,
+ +                             extr_s+d, dd->ndim-d-1,
+ +                             extr_r+d, dd->ndim-d-1);
+ +
+ +            if (bUse)
+ +            {
+ +                for(d1=d; d1<dd->ndim-1; d1++)
+ +                {
+ +                    extr_s[d1][0] = max(extr_s[d1][0],extr_r[d1][0]);
+ +                    extr_s[d1][1] = min(extr_s[d1][1],extr_r[d1][1]);
+ +                }
+ +            }
+ +        }
+ +
+ +        buf_size = pos;
+ +        for(p=0; p<npulse; p++)
+ +        {
+ +            /* Communicate all the zone information backward */
+ +            bUse = (bPBC || dd->ci[dim] < dd->nc[dim] - 1);
+ +
+ +            dd_sendrecv_ddzone(dd, d, dddirBackward,
+ +                               buf_s, buf_size,
+ +                               buf_r, buf_size);
+ +
+ +            clear_rvec(dh);
+ +            if (p > 0)
+ +            {
+ +                for(d1=d+1; d1<dd->ndim; d1++)
+ +                {
+ +                    /* Determine the decrease of maximum required
+ +                     * communication height along d1 due to the distance along d,
+ +                     * this avoids a lot of useless atom communication.
+ +                     */
+ +                    dist_d = comm->cell_x1[dim] - buf_r[0].p1_0;
+ +
+ +                    if (ddbox->tric_dir[dim])
+ +                    {
+ +                        /* c is the off-diagonal coupling between the cell planes
+ +                         * along directions d and d1.
+ +                         */
+ +                        c = ddbox->v[dim][dd->dim[d1]][dim];
+ +                    }
+ +                    else
+ +                    {
+ +                        c = 0;
+ +                    }
+ +                    det = (1 + c*c)*comm->cutoff*comm->cutoff - dist_d*dist_d;
+ +                    if (det > 0)
+ +                    {
+ +                        dh[d1] = comm->cutoff - (c*dist_d + sqrt(det))/(1 + c*c);
+ +                    }
+ +                    else
+ +                    {
+ +                        /* A negative value signals out of range */
+ +                        dh[d1] = -1;
+ +                    }
+ +                }
+ +            }
+ +
+ +            /* Accumulate the extremes over all pulses */
+ +            for(i=0; i<buf_size; i++)
+ +            {
+ +                if (p == 0)
+ +                {
+ +                    buf_e[i] = buf_r[i];
+ +                }
+ +                else
+ +                {
+ +                    if (bUse)
+ +                    {
+ +                        buf_e[i].min0 = min(buf_e[i].min0,buf_r[i].min0);
+ +                        buf_e[i].max1 = max(buf_e[i].max1,buf_r[i].max1);
+ +                    }
+ +
+ +                    if (dd->ndim == 3 && d == 0 && i == buf_size - 1)
+ +                    {
+ +                        d1 = 1;
+ +                    }
+ +                    else
+ +                    {
+ +                        d1 = d + 1;
+ +                    }
+ +                    if (bUse && dh[d1] >= 0)
+ +                    {
+ +                        buf_e[i].mch0 = max(buf_e[i].mch0,buf_r[i].mch0-dh[d1]);
+ +                        buf_e[i].mch1 = max(buf_e[i].mch1,buf_r[i].mch1-dh[d1]);
+ +                    }
+ +                }
+ +                /* Copy the received buffer to the send buffer,
+ +                 * to pass the data through with the next pulse.
+ +                 */
+ +                buf_s[i] = buf_r[i];
+ +            }
+ +            if (((bPBC || dd->ci[dim]+npulse < dd->nc[dim]) && p == npulse-1) ||
+ +                (!bPBC && dd->ci[dim]+1+p == dd->nc[dim]-1))
+ +            {
+ +                /* Store the extremes */ 
+ +                pos = 0;
+ +
+ +                for(d1=d; d1<dd->ndim-1; d1++)
+ +                {
+ +                    extr_s[d1][1] = min(extr_s[d1][1],buf_e[pos].min0);
+ +                    extr_s[d1][0] = max(extr_s[d1][0],buf_e[pos].max1);
+ +                    pos++;
+ +                }
+ +
+ +                if (d == 1 || (d == 0 && dd->ndim == 3))
+ +                {
+ +                    for(i=d; i<2; i++)
+ +                    {
+ +                        comm->zone_d2[1-d][i] = buf_e[pos];
+ +                        pos++;
+ +                    }
+ +                }
+ +                if (d == 0)
+ +                {
+ +                    comm->zone_d1[1] = buf_e[pos];
+ +                    pos++;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (dd->ndim >= 2)
+ +    {
+ +        dim = dd->dim[1];
+ +        for(i=0; i<2; i++)
+ +        {
+ +            if (debug)
+ +            {
+ +                print_ddzone(debug,1,i,0,&comm->zone_d1[i]);
+ +            }
+ +            cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d1[i].min0);
+ +            cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d1[i].max1);
+ +        }
+ +    }
+ +    if (dd->ndim >= 3)
+ +    {
+ +        dim = dd->dim[2];
+ +        for(i=0; i<2; i++)
+ +        {
+ +            for(j=0; j<2; j++)
+ +            {
+ +                if (debug)
+ +                {
+ +                    print_ddzone(debug,2,i,j,&comm->zone_d2[i][j]);
+ +                }
+ +                cell_ns_x0[dim] = min(cell_ns_x0[dim],comm->zone_d2[i][j].min0);
+ +                cell_ns_x1[dim] = max(cell_ns_x1[dim],comm->zone_d2[i][j].max1);
+ +            }
+ +        }
+ +    }
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        comm->cell_f_max0[d] = extr_s[d-1][0];
+ +        comm->cell_f_min1[d] = extr_s[d-1][1];
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Cell fraction d %d, max0 %f, min1 %f\n",
+ +                    d,comm->cell_f_max0[d],comm->cell_f_min1[d]);
+ +        }
+ +    }
+ +}
+ +
+ +static void dd_collect_cg(gmx_domdec_t *dd,
+ +                          t_state *state_local)
+ +{
+ +    gmx_domdec_master_t *ma=NULL;
+ +    int buf2[2],*ibuf,i,ncg_home=0,*cg=NULL,nat_home=0;
+ +    t_block *cgs_gl;
+ +
+ +    if (state_local->ddp_count == dd->comm->master_cg_ddp_count)
+ +    {
+ +        /* The master has the correct distribution */
+ +        return;
+ +    }
+ +    
+ +    if (state_local->ddp_count == dd->ddp_count)
+ +    {
+ +        ncg_home = dd->ncg_home;
+ +        cg       = dd->index_gl;
+ +        nat_home = dd->nat_home;
+ +    } 
+ +    else if (state_local->ddp_count_cg_gl == state_local->ddp_count)
+ +    {
+ +        cgs_gl = &dd->comm->cgs_gl;
+ +
+ +        ncg_home = state_local->ncg_gl;
+ +        cg       = state_local->cg_gl;
+ +        nat_home = 0;
+ +        for(i=0; i<ncg_home; i++)
+ +        {
+ +            nat_home += cgs_gl->index[cg[i]+1] - cgs_gl->index[cg[i]];
+ +        }
+ +    }
+ +    else
+ +    {
+ +        gmx_incons("Attempted to collect a vector for a state for which the charge group distribution is unknown");
+ +    }
+ +    
+ +    buf2[0] = dd->ncg_home;
+ +    buf2[1] = dd->nat_home;
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma = dd->ma;
+ +        ibuf = ma->ibuf;
+ +    }
+ +    else
+ +    {
+ +        ibuf = NULL;
+ +    }
+ +    /* Collect the charge group and atom counts on the master */
+ +    dd_gather(dd,2*sizeof(int),buf2,ibuf);
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma->index[0] = 0;
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ncg[i] = ma->ibuf[2*i];
+ +            ma->nat[i] = ma->ibuf[2*i+1];
+ +            ma->index[i+1] = ma->index[i] + ma->ncg[i];
+ +            
+ +        }
+ +        /* Make byte counts and indices */
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
+ +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Initial charge group distribution: ");
+ +            for(i=0; i<dd->nnodes; i++)
+ +                fprintf(debug," %d",ma->ncg[i]);
+ +            fprintf(debug,"\n");
+ +        }
+ +    }
+ +    
+ +    /* Collect the charge group indices on the master */
+ +    dd_gatherv(dd,
+ +               dd->ncg_home*sizeof(int),dd->index_gl,
+ +               DDMASTER(dd) ? ma->ibuf : NULL,
+ +               DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
+ +               DDMASTER(dd) ? ma->cg : NULL);
+ +    
+ +    dd->comm->master_cg_ddp_count = state_local->ddp_count;
+ +}
+ +
+ +static void dd_collect_vec_sendrecv(gmx_domdec_t *dd,
+ +                                    rvec *lv,rvec *v)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    t_block *cgs_gl;
+ +
+ +    ma = dd->ma;
+ +    
+ +    if (!DDMASTER(dd))
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Send(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
+ +                 dd->rank,dd->mpi_comm_all);
+ +#endif
+ +    } else {
+ +        /* Copy the master coordinates to the global array */
+ +        cgs_gl = &dd->comm->cgs_gl;
+ +
+ +        n = DDMASTERRANK(dd);
+ +        a = 0;
+ +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +        {
+ +            for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
+ +            {
+ +                copy_rvec(lv[a++],v[c]);
+ +            }
+ +        }
+ +        
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            if (n != dd->rank)
+ +            {
+ +                if (ma->nat[n] > nalloc)
+ +                {
+ +                    nalloc = over_alloc_dd(ma->nat[n]);
+ +                    srenew(buf,nalloc);
+ +                }
+ +#ifdef GMX_MPI
+ +                MPI_Recv(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,DDRANK(dd,n),
+ +                         n,dd->mpi_comm_all,MPI_STATUS_IGNORE);
+ +#endif
+ +                a = 0;
+ +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +                {
+ +                    for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
+ +                    {
+ +                        copy_rvec(buf[a++],v[c]);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        sfree(buf);
+ +    }
+ +}
+ +
+ +static void get_commbuffer_counts(gmx_domdec_t *dd,
+ +                                  int **counts,int **disps)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int n;
+ +
+ +    ma = dd->ma;
+ +    
+ +    /* Make the rvec count and displacment arrays */
+ +    *counts  = ma->ibuf;
+ +    *disps   = ma->ibuf + dd->nnodes;
+ +    for(n=0; n<dd->nnodes; n++)
+ +    {
+ +        (*counts)[n] = ma->nat[n]*sizeof(rvec);
+ +        (*disps)[n]  = (n == 0 ? 0 : (*disps)[n-1] + (*counts)[n-1]);
+ +    }
+ +}
+ +
+ +static void dd_collect_vec_gatherv(gmx_domdec_t *dd,
+ +                                   rvec *lv,rvec *v)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  *rcounts=NULL,*disps=NULL;
+ +    int  n,i,c,a;
+ +    rvec *buf=NULL;
+ +    t_block *cgs_gl;
+ +    
+ +    ma = dd->ma;
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        get_commbuffer_counts(dd,&rcounts,&disps);
+ +
+ +        buf = ma->vbuf;
+ +    }
+ +    
+ +    dd_gatherv(dd,dd->nat_home*sizeof(rvec),lv,rcounts,disps,buf);
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        cgs_gl = &dd->comm->cgs_gl;
+ +
+ +        a = 0;
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +            {
+ +                for(c=cgs_gl->index[ma->cg[i]]; c<cgs_gl->index[ma->cg[i]+1]; c++)
+ +                {
+ +                    copy_rvec(buf[a++],v[c]);
+ +                }
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +void dd_collect_vec(gmx_domdec_t *dd,
+ +                    t_state *state_local,rvec *lv,rvec *v)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    
+ +    dd_collect_cg(dd,state_local);
+ +
+ +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
+ +    {
+ +        dd_collect_vec_sendrecv(dd,lv,v);
+ +    }
+ +    else
+ +    {
+ +        dd_collect_vec_gatherv(dd,lv,v);
+ +    }
+ +}
+ +
+ +
+ +void dd_collect_state(gmx_domdec_t *dd,
+ +                      t_state *state_local,t_state *state)
+ +{
+ +    int est,i,j,nh;
+ +
+ +    nh = state->nhchainlength;
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        state->lambda = state_local->lambda;
+ +        state->veta = state_local->veta;
+ +        state->vol0 = state_local->vol0;
+ +        copy_mat(state_local->box,state->box);
+ +        copy_mat(state_local->boxv,state->boxv);
+ +        copy_mat(state_local->svir_prev,state->svir_prev);
+ +        copy_mat(state_local->fvir_prev,state->fvir_prev);
+ +        copy_mat(state_local->pres_prev,state->pres_prev);
+ +
+ +
+ +        for(i=0; i<state_local->ngtc; i++)
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state->nosehoover_xi[i*nh+j]        = state_local->nosehoover_xi[i*nh+j];
+ +                state->nosehoover_vxi[i*nh+j]       = state_local->nosehoover_vxi[i*nh+j];
+ +            }
+ +            state->therm_integral[i] = state_local->therm_integral[i];            
+ +        }
+ +        for(i=0; i<state_local->nnhpres; i++) 
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state->nhpres_xi[i*nh+j]        = state_local->nhpres_xi[i*nh+j];
+ +                state->nhpres_vxi[i*nh+j]       = state_local->nhpres_vxi[i*nh+j];
+ +            }
+ +        }
+ +    }
+ +    for(est=0; est<estNR; est++)
+ +    {
+ +        if (EST_DISTR(est) && state_local->flags & (1<<est))
+ +        {
+ +            switch (est) {
+ +            case estX:
+ +                dd_collect_vec(dd,state_local,state_local->x,state->x);
+ +                break;
+ +            case estV:
+ +                dd_collect_vec(dd,state_local,state_local->v,state->v);
+ +                break;
+ +            case estSDX:
+ +                dd_collect_vec(dd,state_local,state_local->sd_X,state->sd_X);
+ +                break;
+ +            case estCGP:
+ +                dd_collect_vec(dd,state_local,state_local->cg_p,state->cg_p);
+ +                break;
+ +            case estLD_RNG:
+ +                if (state->nrngi == 1)
+ +                {
+ +                    if (DDMASTER(dd))
+ +                    {
+ +                        for(i=0; i<state_local->nrng; i++)
+ +                        {
+ +                            state->ld_rng[i] = state_local->ld_rng[i];
+ +                        }
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    dd_gather(dd,state_local->nrng*sizeof(state->ld_rng[0]),
+ +                              state_local->ld_rng,state->ld_rng);
+ +                }
+ +                break;
+ +            case estLD_RNGI:
+ +                if (state->nrngi == 1)
+ +                {
+ +                   if (DDMASTER(dd))
+ +                    {
+ +                        state->ld_rngi[0] = state_local->ld_rngi[0];
+ +                    } 
+ +                }
+ +                else
+ +                {
+ +                    dd_gather(dd,sizeof(state->ld_rngi[0]),
+ +                              state_local->ld_rngi,state->ld_rngi);
+ +                }
+ +                break;
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_collect_state");
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void dd_realloc_fr_cg(t_forcerec *fr,int nalloc)
+ +{
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Reallocating forcerec: currently %d, required %d, allocating %d\n",fr->cg_nalloc,nalloc,over_alloc_dd(nalloc));
+ +    }
+ +    fr->cg_nalloc = over_alloc_dd(nalloc);
+ +    srenew(fr->cg_cm,fr->cg_nalloc);
+ +    srenew(fr->cginfo,fr->cg_nalloc);
+ +}
+ +
+ +static void dd_realloc_state(t_state *state,rvec **f,int nalloc)
+ +{
+ +    int est;
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Reallocating state: currently %d, required %d, allocating %d\n",state->nalloc,nalloc,over_alloc_dd(nalloc));
+ +    }
+ +
+ +    state->nalloc = over_alloc_dd(nalloc);
+ +    
+ +    for(est=0; est<estNR; est++)
+ +    {
+ +        if (EST_DISTR(est) && state->flags & (1<<est))
+ +        {
+ +            switch(est) {
+ +            case estX:
+ +                srenew(state->x,state->nalloc);
+ +                break;
+ +            case estV:
+ +                srenew(state->v,state->nalloc);
+ +                break;
+ +            case estSDX:
+ +                srenew(state->sd_X,state->nalloc);
+ +                break;
+ +            case estCGP:
+ +                srenew(state->cg_p,state->nalloc);
+ +                break;
+ +            case estLD_RNG:
+ +            case estLD_RNGI:
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* No reallocation required */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_realloc_state");            
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (f != NULL)
+ +    {
+ +        srenew(*f,state->nalloc);
+ +    }
+ +}
+ +
+ +static void dd_distribute_vec_sendrecv(gmx_domdec_t *dd,t_block *cgs,
+ +                                       rvec *v,rvec *lv)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma  = dd->ma;
+ +        
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            if (n != dd->rank)
+ +            {
+ +                if (ma->nat[n] > nalloc)
+ +                {
+ +                    nalloc = over_alloc_dd(ma->nat[n]);
+ +                    srenew(buf,nalloc);
+ +                }
+ +                /* Use lv as a temporary buffer */
+ +                a = 0;
+ +                for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +                {
+ +                    for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
+ +                    {
+ +                        copy_rvec(v[c],buf[a++]);
+ +                    }
+ +                }
+ +                if (a != ma->nat[n])
+ +                {
+ +                    gmx_fatal(FARGS,"Internal error a (%d) != nat (%d)",
+ +                              a,ma->nat[n]);
+ +                }
+ +                
+ +#ifdef GMX_MPI
+ +                MPI_Send(buf,ma->nat[n]*sizeof(rvec),MPI_BYTE,
+ +                         DDRANK(dd,n),n,dd->mpi_comm_all);
+ +#endif
+ +            }
+ +        }
+ +        sfree(buf);
+ +        n = DDMASTERRANK(dd);
+ +        a = 0;
+ +        for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +        {
+ +            for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
+ +            {
+ +                copy_rvec(v[c],lv[a++]);
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Recv(lv,dd->nat_home*sizeof(rvec),MPI_BYTE,DDMASTERRANK(dd),
+ +                 MPI_ANY_TAG,dd->mpi_comm_all,MPI_STATUS_IGNORE);
+ +#endif
+ +    }
+ +}
+ +
+ +static void dd_distribute_vec_scatterv(gmx_domdec_t *dd,t_block *cgs,
+ +                                       rvec *v,rvec *lv)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int  *scounts=NULL,*disps=NULL;
+ +    int  n,i,c,a,nalloc=0;
+ +    rvec *buf=NULL;
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma  = dd->ma;
+ +     
+ +        get_commbuffer_counts(dd,&scounts,&disps);
+ +
+ +        buf = ma->vbuf;
+ +        a = 0;
+ +        for(n=0; n<dd->nnodes; n++)
+ +        {
+ +            for(i=ma->index[n]; i<ma->index[n+1]; i++)
+ +            {
+ +                for(c=cgs->index[ma->cg[i]]; c<cgs->index[ma->cg[i]+1]; c++)
+ +                {
+ +                    copy_rvec(v[c],buf[a++]);
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    dd_scatterv(dd,scounts,disps,buf,dd->nat_home*sizeof(rvec),lv);
+ +}
+ +
+ +static void dd_distribute_vec(gmx_domdec_t *dd,t_block *cgs,rvec *v,rvec *lv)
+ +{
+ +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
+ +    {
+ +        dd_distribute_vec_sendrecv(dd,cgs,v,lv);
+ +    }
+ +    else
+ +    {
+ +        dd_distribute_vec_scatterv(dd,cgs,v,lv);
+ +    }
+ +}
+ +
+ +static void dd_distribute_state(gmx_domdec_t *dd,t_block *cgs,
+ +                                t_state *state,t_state *state_local,
+ +                                rvec **f)
+ +{
+ +    int  i,j,ngtch,ngtcp,nh;
+ +
+ +    nh = state->nhchainlength;
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        state_local->lambda = state->lambda;
+ +        state_local->veta   = state->veta;
+ +        state_local->vol0   = state->vol0;
+ +        copy_mat(state->box,state_local->box);
+ +        copy_mat(state->box_rel,state_local->box_rel);
+ +        copy_mat(state->boxv,state_local->boxv);
+ +        copy_mat(state->svir_prev,state_local->svir_prev);
+ +        copy_mat(state->fvir_prev,state_local->fvir_prev);
+ +        for(i=0; i<state_local->ngtc; i++)
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state_local->nosehoover_xi[i*nh+j]        = state->nosehoover_xi[i*nh+j];
+ +                state_local->nosehoover_vxi[i*nh+j]       = state->nosehoover_vxi[i*nh+j];
+ +            }
+ +            state_local->therm_integral[i] = state->therm_integral[i];
+ +        }
+ +        for(i=0; i<state_local->nnhpres; i++)
+ +        {
+ +            for(j=0; j<nh; j++) {
+ +                state_local->nhpres_xi[i*nh+j]        = state->nhpres_xi[i*nh+j];
+ +                state_local->nhpres_vxi[i*nh+j]       = state->nhpres_vxi[i*nh+j];
+ +            }
+ +        }
+ +    }
+ +    dd_bcast(dd,sizeof(real),&state_local->lambda);
+ +    dd_bcast(dd,sizeof(real),&state_local->veta);
+ +    dd_bcast(dd,sizeof(real),&state_local->vol0);
+ +    dd_bcast(dd,sizeof(state_local->box),state_local->box);
+ +    dd_bcast(dd,sizeof(state_local->box_rel),state_local->box_rel);
+ +    dd_bcast(dd,sizeof(state_local->boxv),state_local->boxv);
+ +    dd_bcast(dd,sizeof(state_local->svir_prev),state_local->svir_prev);
+ +    dd_bcast(dd,sizeof(state_local->fvir_prev),state_local->fvir_prev);
+ +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_xi);
+ +    dd_bcast(dd,((state_local->ngtc*nh)*sizeof(double)),state_local->nosehoover_vxi);
+ +    dd_bcast(dd,state_local->ngtc*sizeof(double),state_local->therm_integral);
+ +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_xi);
+ +    dd_bcast(dd,((state_local->nnhpres*nh)*sizeof(double)),state_local->nhpres_vxi);
+ +
+ +    if (dd->nat_home > state_local->nalloc)
+ +    {
+ +        dd_realloc_state(state_local,f,dd->nat_home);
+ +    }
+ +    for(i=0; i<estNR; i++)
+ +    {
+ +        if (EST_DISTR(i) && state_local->flags & (1<<i))
+ +        {
+ +            switch (i) {
+ +            case estX:
+ +                dd_distribute_vec(dd,cgs,state->x,state_local->x);
+ +                break;
+ +            case estV:
+ +                dd_distribute_vec(dd,cgs,state->v,state_local->v);
+ +                break;
+ +            case estSDX:
+ +                dd_distribute_vec(dd,cgs,state->sd_X,state_local->sd_X);
+ +                break;
+ +            case estCGP:
+ +                dd_distribute_vec(dd,cgs,state->cg_p,state_local->cg_p);
+ +                break;
+ +            case estLD_RNG:
+ +                if (state->nrngi == 1)
+ +                {
+ +                    dd_bcastc(dd,
+ +                              state_local->nrng*sizeof(state_local->ld_rng[0]),
+ +                              state->ld_rng,state_local->ld_rng);
+ +                }
+ +                else
+ +                {
+ +                    dd_scatter(dd,
+ +                               state_local->nrng*sizeof(state_local->ld_rng[0]),
+ +                               state->ld_rng,state_local->ld_rng);
+ +                }
+ +                break;
+ +            case estLD_RNGI:
+ +                if (state->nrngi == 1)
+ +                {
+ +                    dd_bcastc(dd,sizeof(state_local->ld_rngi[0]),
+ +                              state->ld_rngi,state_local->ld_rngi);
+ +                }
+ +                else
+ +                {
+ +                     dd_scatter(dd,sizeof(state_local->ld_rngi[0]),
+ +                               state->ld_rngi,state_local->ld_rngi);
+ +                }   
+ +                break;
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* Not implemented yet */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_distribute_state");
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static char dim2char(int dim)
+ +{
+ +    char c='?';
+ +    
+ +    switch (dim)
+ +    {
+ +    case XX: c = 'X'; break;
+ +    case YY: c = 'Y'; break;
+ +    case ZZ: c = 'Z'; break;
+ +    default: gmx_fatal(FARGS,"Unknown dim %d",dim);
+ +    }
+ +    
+ +    return c;
+ +}
+ +
+ +static void write_dd_grid_pdb(const char *fn,gmx_large_int_t step,
+ +                              gmx_domdec_t *dd,matrix box,gmx_ddbox_t *ddbox)
+ +{
+ +    rvec grid_s[2],*grid_r=NULL,cx,r;
+ +    char fname[STRLEN],format[STRLEN],buf[22];
+ +    FILE *out;
+ +    int  a,i,d,z,y,x;
+ +    matrix tric;
+ +    real vol;
+ +
+ +    copy_rvec(dd->comm->cell_x0,grid_s[0]);
+ +    copy_rvec(dd->comm->cell_x1,grid_s[1]);
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        snew(grid_r,2*dd->nnodes);
+ +    }
+ +    
+ +    dd_gather(dd,2*sizeof(rvec),grid_s[0],DDMASTER(dd) ? grid_r[0] : NULL);
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            for(i=0; i<DIM; i++)
+ +            {
+ +                if (d == i)
+ +                {
+ +                    tric[d][i] = 1;
+ +                }
+ +                else
+ +                {
+ +                    if (dd->nc[d] > 1 && d < ddbox->npbcdim)
+ +                    {
+ +                        tric[d][i] = box[i][d]/box[i][i];
+ +                    }
+ +                    else
+ +                    {
+ +                        tric[d][i] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        sprintf(fname,"%s_%s.pdb",fn,gmx_step_str(step,buf));
+ +        sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
+ +        out = gmx_fio_fopen(fname,"w");
+ +        gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
+ +        a = 1;
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            vol = dd->nnodes/(box[XX][XX]*box[YY][YY]*box[ZZ][ZZ]);
+ +            for(d=0; d<DIM; d++)
+ +            {
+ +                vol *= grid_r[i*2+1][d] - grid_r[i*2][d];
+ +            }
+ +            for(z=0; z<2; z++)
+ +            {
+ +                for(y=0; y<2; y++)
+ +                {
+ +                    for(x=0; x<2; x++)
+ +                    {
+ +                        cx[XX] = grid_r[i*2+x][XX];
+ +                        cx[YY] = grid_r[i*2+y][YY];
+ +                        cx[ZZ] = grid_r[i*2+z][ZZ];
+ +                        mvmul(tric,cx,r);
+ +                        fprintf(out,format,"ATOM",a++,"CA","GLY",' ',1+i,
+ +                                10*r[XX],10*r[YY],10*r[ZZ],1.0,vol);
+ +                    }
+ +                }
+ +            }
+ +            for(d=0; d<DIM; d++)
+ +            {
+ +                for(x=0; x<4; x++)
+ +                {
+ +                    switch(d)
+ +                    {
+ +                    case 0: y = 1 + i*8 + 2*x; break;
+ +                    case 1: y = 1 + i*8 + 2*x - (x % 2); break;
+ +                    case 2: y = 1 + i*8 + x; break;
+ +                    }
+ +                    fprintf(out,"%6s%5d%5d\n","CONECT",y,y+(1<<d));
+ +                }
+ +            }
+ +        }
+ +        gmx_fio_fclose(out);
+ +        sfree(grid_r);
+ +    }
+ +}
+ +
+ +void write_dd_pdb(const char *fn,gmx_large_int_t step,const char *title,
+ +                  gmx_mtop_t *mtop,t_commrec *cr,
+ +                  int natoms,rvec x[],matrix box)
+ +{
+ +    char fname[STRLEN],format[STRLEN],format4[STRLEN],buf[22];
+ +    FILE *out;
+ +    int  i,ii,resnr,c;
+ +    char *atomname,*resname;
+ +    real b;
+ +    gmx_domdec_t *dd;
+ +    
+ +    dd = cr->dd;
+ +    if (natoms == -1)
+ +    {
+ +        natoms = dd->comm->nat[ddnatVSITE];
+ +    }
+ +    
+ +    sprintf(fname,"%s_%s_n%d.pdb",fn,gmx_step_str(step,buf),cr->sim_nodeid);
+ +    
+ +    sprintf(format,"%s%s\n",pdbformat,"%6.2f%6.2f");
+ +    sprintf(format4,"%s%s\n",pdbformat4,"%6.2f%6.2f");
+ +    
+ +    out = gmx_fio_fopen(fname,"w");
+ +    
+ +    fprintf(out,"TITLE     %s\n",title);
+ +    gmx_write_pdb_box(out,dd->bScrewPBC ? epbcSCREW : epbcXYZ,box);
+ +    for(i=0; i<natoms; i++)
+ +    {
+ +        ii = dd->gatindex[i];
+ +        gmx_mtop_atominfo_global(mtop,ii,&atomname,&resnr,&resname);
+ +        if (i < dd->comm->nat[ddnatZONE])
+ +        {
+ +            c = 0;
+ +            while (i >= dd->cgindex[dd->comm->zones.cg_range[c+1]])
+ +            {
+ +                c++;
+ +            }
+ +            b = c;
+ +        }
+ +        else if (i < dd->comm->nat[ddnatVSITE])
+ +        {
+ +            b = dd->comm->zones.n;
+ +        }
+ +        else
+ +        {
+ +            b = dd->comm->zones.n + 1;
+ +        }
+ +        fprintf(out,strlen(atomname)<4 ? format : format4,
+ +                "ATOM",(ii+1)%100000,
+ +                atomname,resname,' ',resnr%10000,' ',
+ +                10*x[i][XX],10*x[i][YY],10*x[i][ZZ],1.0,b);
+ +    }
+ +    fprintf(out,"TER\n");
+ +    
+ +    gmx_fio_fclose(out);
+ +}
+ +
+ +real dd_cutoff_mbody(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  di;
+ +    real r;
+ +
+ +    comm = dd->comm;
+ +
+ +    r = -1;
+ +    if (comm->bInterCGBondeds)
+ +    {
+ +        if (comm->cutoff_mbody > 0)
+ +        {
+ +            r = comm->cutoff_mbody;
+ +        }
+ +        else
+ +        {
+ +            /* cutoff_mbody=0 means we do not have DLB */
+ +            r = comm->cellsize_min[dd->dim[0]];
+ +            for(di=1; di<dd->ndim; di++)
+ +            {
+ +                r = min(r,comm->cellsize_min[dd->dim[di]]);
+ +            }
+ +            if (comm->bBondComm)
+ +            {
+ +                r = max(r,comm->cutoff_mbody);
+ +            }
+ +            else
+ +            {
+ +                r = min(r,comm->cutoff);
+ +            }
+ +        }
+ +    }
+ +
+ +    return r;
+ +}
+ +
+ +real dd_cutoff_twobody(gmx_domdec_t *dd)
+ +{
+ +    real r_mb;
+ +
+ +    r_mb = dd_cutoff_mbody(dd);
+ +
+ +    return max(dd->comm->cutoff,r_mb);
+ +}
+ +
+ +
+ +static void dd_cart_coord2pmecoord(gmx_domdec_t *dd,ivec coord,ivec coord_pme)
+ +{
+ +    int nc,ntot;
+ +    
+ +    nc   = dd->nc[dd->comm->cartpmedim];
+ +    ntot = dd->comm->ntot[dd->comm->cartpmedim];
+ +    copy_ivec(coord,coord_pme);
+ +    coord_pme[dd->comm->cartpmedim] =
+ +        nc + (coord[dd->comm->cartpmedim]*(ntot - nc) + (ntot - nc)/2)/nc;
+ +}
+ +
+ +static int low_ddindex2pmeindex(int ndd,int npme,int ddindex)
+ +{
+ +    /* Here we assign a PME node to communicate with this DD node
+ +     * by assuming that the major index of both is x.
+ +     * We add cr->npmenodes/2 to obtain an even distribution.
+ +     */
+ +    return (ddindex*npme + npme/2)/ndd;
+ +}
+ +
+ +static int ddindex2pmeindex(const gmx_domdec_t *dd,int ddindex)
+ +{
+ +    return low_ddindex2pmeindex(dd->nnodes,dd->comm->npmenodes,ddindex);
+ +}
+ +
+ +static int cr_ddindex2pmeindex(const t_commrec *cr,int ddindex)
+ +{
+ +    return low_ddindex2pmeindex(cr->dd->nnodes,cr->npmenodes,ddindex);
+ +}
+ +
+ +static int *dd_pmenodes(t_commrec *cr)
+ +{
+ +    int *pmenodes;
+ +    int n,i,p0,p1;
+ +    
+ +    snew(pmenodes,cr->npmenodes);
+ +    n = 0;
+ +    for(i=0; i<cr->dd->nnodes; i++) {
+ +        p0 = cr_ddindex2pmeindex(cr,i);
+ +        p1 = cr_ddindex2pmeindex(cr,i+1);
+ +        if (i+1 == cr->dd->nnodes || p1 > p0) {
+ +            if (debug)
+ +                fprintf(debug,"pmenode[%d] = %d\n",n,i+1+n);
+ +            pmenodes[n] = i + 1 + n;
+ +            n++;
+ +        }
+ +    }
+ +
+ +    return pmenodes;
+ +}
+ +
+ +static int gmx_ddcoord2pmeindex(t_commrec *cr,int x,int y,int z)
+ +{
+ +    gmx_domdec_t *dd;
+ +    ivec coords,coords_pme,nc;
+ +    int  slab;
+ +    
+ +    dd = cr->dd;
+ +    /*
+ +      if (dd->comm->bCartesian) {
+ +      gmx_ddindex2xyz(dd->nc,ddindex,coords);
+ +      dd_coords2pmecoords(dd,coords,coords_pme);
+ +      copy_ivec(dd->ntot,nc);
+ +      nc[dd->cartpmedim]         -= dd->nc[dd->cartpmedim];
+ +      coords_pme[dd->cartpmedim] -= dd->nc[dd->cartpmedim];
+ +      
+ +      slab = (coords_pme[XX]*nc[YY] + coords_pme[YY])*nc[ZZ] + coords_pme[ZZ];
+ +      } else {
+ +      slab = (ddindex*cr->npmenodes + cr->npmenodes/2)/dd->nnodes;
+ +      }
+ +    */
+ +    coords[XX] = x;
+ +    coords[YY] = y;
+ +    coords[ZZ] = z;
+ +    slab = ddindex2pmeindex(dd,dd_index(dd->nc,coords));
+ +    
+ +    return slab;
+ +}
+ +
+ +static int ddcoord2simnodeid(t_commrec *cr,int x,int y,int z)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    ivec coords;
+ +    int  ddindex,nodeid=-1;
+ +    
+ +    comm = cr->dd->comm;
+ +    
+ +    coords[XX] = x;
+ +    coords[YY] = y;
+ +    coords[ZZ] = z;
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Cart_rank(cr->mpi_comm_mysim,coords,&nodeid);
+ +#endif
+ +    }
+ +    else
+ +    {
+ +        ddindex = dd_index(cr->dd->nc,coords);
+ +        if (comm->bCartesianPP)
+ +        {
+ +            nodeid = comm->ddindex2simnodeid[ddindex];
+ +        }
+ +        else
+ +        {
+ +            if (comm->pmenodes)
+ +            {
+ +                nodeid = ddindex + gmx_ddcoord2pmeindex(cr,x,y,z);
+ +            }
+ +            else
+ +            {
+ +                nodeid = ddindex;
+ +            }
+ +        }
+ +    }
+ +  
+ +    return nodeid;
+ +}
+ +
+ +static int dd_simnode2pmenode(t_commrec *cr,int sim_nodeid)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    ivec coord,coord_pme;
+ +    int  i;
+ +    int  pmenode=-1;
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    /* This assumes a uniform x domain decomposition grid cell size */
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +#ifdef GMX_MPI
+ +        MPI_Cart_coords(cr->mpi_comm_mysim,sim_nodeid,DIM,coord);
+ +        if (coord[comm->cartpmedim] < dd->nc[comm->cartpmedim])
+ +        {
+ +            /* This is a PP node */
+ +            dd_cart_coord2pmecoord(dd,coord,coord_pme);
+ +            MPI_Cart_rank(cr->mpi_comm_mysim,coord_pme,&pmenode);
+ +        }
+ +#endif
+ +    }
+ +    else if (comm->bCartesianPP)
+ +    {
+ +        if (sim_nodeid < dd->nnodes)
+ +        {
+ +            pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* This assumes DD cells with identical x coordinates
+ +         * are numbered sequentially.
+ +         */
+ +        if (dd->comm->pmenodes == NULL)
+ +        {
+ +            if (sim_nodeid < dd->nnodes)
+ +            {
+ +                /* The DD index equals the nodeid */
+ +                pmenode = dd->nnodes + ddindex2pmeindex(dd,sim_nodeid);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            i = 0;
+ +            while (sim_nodeid > dd->comm->pmenodes[i])
+ +            {
+ +                i++;
+ +            }
+ +            if (sim_nodeid < dd->comm->pmenodes[i])
+ +            {
+ +                pmenode = dd->comm->pmenodes[i];
+ +            }
+ +        }
+ +    }
+ +    
+ +    return pmenode;
+ +}
+ +
+ +gmx_bool gmx_pmeonlynode(t_commrec *cr,int sim_nodeid)
+ +{
+ +    gmx_bool bPMEOnlyNode;
+ +    
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        bPMEOnlyNode = (dd_simnode2pmenode(cr,sim_nodeid) == -1);
+ +    }
+ +    else
+ +    {
+ +        bPMEOnlyNode = FALSE;
+ +    }
+ +    
+ +    return bPMEOnlyNode;
+ +}
+ +
+ +void get_pme_ddnodes(t_commrec *cr,int pmenodeid,
+ +                     int *nmy_ddnodes,int **my_ddnodes,int *node_peer)
+ +{
+ +    gmx_domdec_t *dd;
+ +    int x,y,z;
+ +    ivec coord,coord_pme;
+ +    
+ +    dd = cr->dd;
+ +    
+ +    snew(*my_ddnodes,(dd->nnodes+cr->npmenodes-1)/cr->npmenodes);
+ +    
+ +    *nmy_ddnodes = 0;
+ +    for(x=0; x<dd->nc[XX]; x++)
+ +    {
+ +        for(y=0; y<dd->nc[YY]; y++)
+ +        {
+ +            for(z=0; z<dd->nc[ZZ]; z++)
+ +            {
+ +                if (dd->comm->bCartesianPP_PME)
+ +                {
+ +                    coord[XX] = x;
+ +                    coord[YY] = y;
+ +                    coord[ZZ] = z;
+ +                    dd_cart_coord2pmecoord(dd,coord,coord_pme);
+ +                    if (dd->ci[XX] == coord_pme[XX] &&
+ +                        dd->ci[YY] == coord_pme[YY] &&
+ +                        dd->ci[ZZ] == coord_pme[ZZ])
+ +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
+ +                }
+ +                else
+ +                {
+ +                    /* The slab corresponds to the nodeid in the PME group */
+ +                    if (gmx_ddcoord2pmeindex(cr,x,y,z) == pmenodeid)
+ +                    {
+ +                        (*my_ddnodes)[(*nmy_ddnodes)++] = ddcoord2simnodeid(cr,x,y,z);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* The last PP-only node is the peer node */
+ +    *node_peer = (*my_ddnodes)[*nmy_ddnodes-1];
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Receive coordinates from PP nodes:");
+ +        for(x=0; x<*nmy_ddnodes; x++)
+ +        {
+ +            fprintf(debug," %d",(*my_ddnodes)[x]);
+ +        }
+ +        fprintf(debug,"\n");
+ +    }
+ +}
+ +
+ +static gmx_bool receive_vir_ener(t_commrec *cr)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  pmenode,coords[DIM],rank;
+ +    gmx_bool bReceive;
+ +    
+ +    bReceive = TRUE;
+ +    if (cr->npmenodes < cr->dd->nnodes)
+ +    {
+ +        comm = cr->dd->comm;
+ +        if (comm->bCartesianPP_PME)
+ +        {
+ +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
+ +#ifdef GMX_MPI
+ +            MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,coords);
+ +            coords[comm->cartpmedim]++;
+ +            if (coords[comm->cartpmedim] < cr->dd->nc[comm->cartpmedim])
+ +            {
+ +                MPI_Cart_rank(cr->mpi_comm_mysim,coords,&rank);
+ +                if (dd_simnode2pmenode(cr,rank) == pmenode)
+ +                {
+ +                    /* This is not the last PP node for pmenode */
+ +                    bReceive = FALSE;
+ +                }
+ +            }
+ +#endif  
+ +        }
+ +        else
+ +        {
+ +            pmenode = dd_simnode2pmenode(cr,cr->sim_nodeid);
+ +            if (cr->sim_nodeid+1 < cr->nnodes &&
+ +                dd_simnode2pmenode(cr,cr->sim_nodeid+1) == pmenode)
+ +            {
+ +                /* This is not the last PP node for pmenode */
+ +                bReceive = FALSE;
+ +            }
+ +        }
+ +    }
+ +    
+ +    return bReceive;
+ +}
+ +
+ +static void set_zones_ncg_home(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_zones_t *zones;
+ +    int i;
+ +
+ +    zones = &dd->comm->zones;
+ +
+ +    zones->cg_range[0] = 0;
+ +    for(i=1; i<zones->n+1; i++)
+ +    {
+ +        zones->cg_range[i] = dd->ncg_home;
+ +    }
+ +}
+ +
+ +static void rebuild_cgindex(gmx_domdec_t *dd,int *gcgs_index,t_state *state)
+ +{
+ +    int nat,i,*ind,*dd_cg_gl,*cgindex,cg_gl;
+ +    
+ +    ind = state->cg_gl;
+ +    dd_cg_gl = dd->index_gl;
+ +    cgindex  = dd->cgindex;
+ +    nat = 0;
+ +    cgindex[0] = nat;
+ +    for(i=0; i<state->ncg_gl; i++)
+ +    {
+ +        cgindex[i] = nat;
+ +        cg_gl = ind[i];
+ +        dd_cg_gl[i] = cg_gl;
+ +        nat += gcgs_index[cg_gl+1] - gcgs_index[cg_gl];
+ +    }
+ +    cgindex[i] = nat;
+ +    
+ +    dd->ncg_home = state->ncg_gl;
+ +    dd->nat_home = nat;
+ +
+ +    set_zones_ncg_home(dd);
+ +}
+ +
+ +static int ddcginfo(const cginfo_mb_t *cginfo_mb,int cg)
+ +{
+ +    while (cg >= cginfo_mb->cg_end)
+ +    {
+ +        cginfo_mb++;
+ +    }
+ +
+ +    return cginfo_mb->cginfo[(cg - cginfo_mb->cg_start) % cginfo_mb->cg_mod];
+ +}
+ +
+ +static void dd_set_cginfo(int *index_gl,int cg0,int cg1,
+ +                          t_forcerec *fr,char *bLocalCG)
+ +{
+ +    cginfo_mb_t *cginfo_mb;
+ +    int *cginfo;
+ +    int cg;
+ +
+ +    if (fr != NULL)
+ +    {
+ +        cginfo_mb = fr->cginfo_mb;
+ +        cginfo    = fr->cginfo;
+ +
+ +        for(cg=cg0; cg<cg1; cg++)
+ +        {
+ +            cginfo[cg] = ddcginfo(cginfo_mb,index_gl[cg]);
+ +        }
+ +    }
+ +
+ +    if (bLocalCG != NULL)
+ +    {
+ +        for(cg=cg0; cg<cg1; cg++)
+ +        {
+ +            bLocalCG[index_gl[cg]] = TRUE;
+ +        }
+ +    }
+ +}
+ +
+ +static void make_dd_indices(gmx_domdec_t *dd,int *gcgs_index,int cg_start)
+ +{
+ +    int nzone,zone,zone1,cg0,cg,cg_gl,a,a_gl;
+ +    int *zone2cg,*zone_ncg1,*index_gl,*gatindex;
+ +    gmx_ga2la_t *ga2la;
+ +    char *bLocalCG;
+ +
+ +    bLocalCG = dd->comm->bLocalCG;
+ +
+ +    if (dd->nat_tot > dd->gatindex_nalloc)
+ +    {
+ +        dd->gatindex_nalloc = over_alloc_dd(dd->nat_tot);
+ +        srenew(dd->gatindex,dd->gatindex_nalloc);
+ +    }
+ +
+ +    nzone      = dd->comm->zones.n;
+ +    zone2cg    = dd->comm->zones.cg_range;
+ +    zone_ncg1  = dd->comm->zone_ncg1;
+ +    index_gl   = dd->index_gl;
+ +    gatindex   = dd->gatindex;
+ +
+ +    if (zone2cg[1] != dd->ncg_home)
+ +    {
+ +        gmx_incons("dd->ncg_zone is not up to date");
+ +    }
+ +    
+ +    /* Make the local to global and global to local atom index */
+ +    a = dd->cgindex[cg_start];
+ +    for(zone=0; zone<nzone; zone++)
+ +    {
+ +        if (zone == 0)
+ +        {
+ +            cg0 = cg_start;
+ +        }
+ +        else
+ +        {
+ +            cg0 = zone2cg[zone];
+ +        }
+ +        for(cg=cg0; cg<zone2cg[zone+1]; cg++)
+ +        {
+ +            zone1 = zone;
+ +            if (cg - cg0 >= zone_ncg1[zone])
+ +            {
+ +                /* Signal that this cg is from more than one zone away */
+ +                zone1 += nzone;
+ +            }
+ +            cg_gl = index_gl[cg];
+ +            for(a_gl=gcgs_index[cg_gl]; a_gl<gcgs_index[cg_gl+1]; a_gl++)
+ +            {
+ +                gatindex[a] = a_gl;
+ +                ga2la_set(dd->ga2la,a_gl,a,zone1);
+ +                a++;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static int check_bLocalCG(gmx_domdec_t *dd,int ncg_sys,const char *bLocalCG,
+ +                          const char *where)
+ +{
+ +    int ncg,i,ngl,nerr;
+ +
+ +    nerr = 0;
+ +    if (bLocalCG == NULL)
+ +    {
+ +        return nerr;
+ +    }
+ +    for(i=0; i<dd->ncg_tot; i++)
+ +    {
+ +        if (!bLocalCG[dd->index_gl[i]])
+ +        {
+ +            fprintf(stderr,
+ +                    "DD node %d, %s: cg %d, global cg %d is not marked in bLocalCG (ncg_home %d)\n",dd->rank,where,i+1,dd->index_gl[i]+1,dd->ncg_home);
+ +            nerr++;
+ +        }
+ +    }
+ +    ngl = 0;
+ +    for(i=0; i<ncg_sys; i++)
+ +    {
+ +        if (bLocalCG[i])
+ +        {
+ +            ngl++;
+ +        }
+ +    }
+ +    if (ngl != dd->ncg_tot)
+ +    {
+ +        fprintf(stderr,"DD node %d, %s: In bLocalCG %d cgs are marked as local, whereas there are %d\n",dd->rank,where,ngl,dd->ncg_tot);
+ +        nerr++;
+ +    }
+ +
+ +    return nerr;
+ +}
+ +
+ +static void check_index_consistency(gmx_domdec_t *dd,
+ +                                    int natoms_sys,int ncg_sys,
+ +                                    const char *where)
+ +{
+ +    int  nerr,ngl,i,a,cell;
+ +    int  *have;
+ +
+ +    nerr = 0;
+ +
+ +    if (dd->comm->DD_debug > 1)
+ +    {
+ +        snew(have,natoms_sys);
+ +        for(a=0; a<dd->nat_tot; a++)
+ +        {
+ +            if (have[dd->gatindex[a]] > 0)
+ +            {
+ +                fprintf(stderr,"DD node %d: global atom %d occurs twice: index %d and %d\n",dd->rank,dd->gatindex[a]+1,have[dd->gatindex[a]],a+1);
+ +            }
+ +            else
+ +            {
+ +                have[dd->gatindex[a]] = a + 1;
+ +            }
+ +        }
+ +        sfree(have);
+ +    }
+ +
+ +    snew(have,dd->nat_tot);
+ +
+ +    ngl  = 0;
+ +    for(i=0; i<natoms_sys; i++)
+ +    {
+ +        if (ga2la_get(dd->ga2la,i,&a,&cell))
+ +        {
+ +            if (a >= dd->nat_tot)
+ +            {
+ +                fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which is larger than nat_tot (%d)\n",dd->rank,i+1,a+1,dd->nat_tot);
+ +                nerr++;
+ +            }
+ +            else
+ +            {
+ +                have[a] = 1;
+ +                if (dd->gatindex[a] != i)
+ +                {
+ +                    fprintf(stderr,"DD node %d: global atom %d marked as local atom %d, which has global atom index %d\n",dd->rank,i+1,a+1,dd->gatindex[a]+1);
+ +                    nerr++;
+ +                }
+ +            }
+ +            ngl++;
+ +        }
+ +    }
+ +    if (ngl != dd->nat_tot)
+ +    {
+ +        fprintf(stderr,
+ +                "DD node %d, %s: %d global atom indices, %d local atoms\n",
+ +                dd->rank,where,ngl,dd->nat_tot);
+ +    }
+ +    for(a=0; a<dd->nat_tot; a++)
+ +    {
+ +        if (have[a] == 0)
+ +        {
+ +            fprintf(stderr,
+ +                    "DD node %d, %s: local atom %d, global %d has no global index\n",
+ +                    dd->rank,where,a+1,dd->gatindex[a]+1);
+ +        }
+ +    }
+ +    sfree(have);
+ +
+ +    nerr += check_bLocalCG(dd,ncg_sys,dd->comm->bLocalCG,where);
+ +
+ +    if (nerr > 0) {
+ +        gmx_fatal(FARGS,"DD node %d, %s: %d atom/cg index inconsistencies",
+ +                  dd->rank,where,nerr);
+ +    }
+ +}
+ +
+ +static void clear_dd_indices(gmx_domdec_t *dd,int cg_start,int a_start)
+ +{
+ +    int  i;
+ +    char *bLocalCG;
+ +
+ +    if (a_start == 0)
+ +    {
+ +        /* Clear the whole list without searching */
+ +        ga2la_clear(dd->ga2la);
+ +    }
+ +    else
+ +    {
+ +        for(i=a_start; i<dd->nat_tot; i++)
+ +        {
+ +            ga2la_del(dd->ga2la,dd->gatindex[i]);
+ +        }
+ +    }
+ +
+ +    bLocalCG = dd->comm->bLocalCG;
+ +    if (bLocalCG)
+ +    {
+ +        for(i=cg_start; i<dd->ncg_tot; i++)
+ +        {
+ +            bLocalCG[dd->index_gl[i]] = FALSE;
+ +        }
+ +    }
+ +
+ +    dd_clear_local_vsite_indices(dd);
+ +    
+ +    if (dd->constraints)
+ +    {
+ +        dd_clear_local_constraint_indices(dd);
+ +    }
+ +}
+ +
+ +static real grid_jump_limit(gmx_domdec_comm_t *comm,int dim_ind)
+ +{
+ +    real grid_jump_limit;
+ +
+ +    /* The distance between the boundaries of cells at distance
+ +     * x+-1,y+-1 or y+-1,z+-1 is limited by the cut-off restrictions
+ +     * and by the fact that cells should not be shifted by more than
+ +     * half their size, such that cg's only shift by one cell
+ +     * at redecomposition.
+ +     */
+ +    grid_jump_limit = comm->cellsize_limit;
+ +    if (!comm->bVacDLBNoLimit)
+ +    {
+ +        grid_jump_limit = max(grid_jump_limit,
+ +                              comm->cutoff/comm->cd[dim_ind].np);
+ +    }
+ +
+ +    return grid_jump_limit;
+ +}
+ +
+ +static void check_grid_jump(gmx_large_int_t step,gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d,dim;
+ +    real limit,bfac;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        limit = grid_jump_limit(comm,d);
+ +        bfac = ddbox->box_size[dim];
+ +        if (ddbox->tric_dir[dim])
+ +        {
+ +            bfac *= ddbox->skew_fac[dim];
+ +        }
+ +        if ((comm->cell_f1[d] - comm->cell_f_max0[d])*bfac <  limit ||
+ +            (comm->cell_f0[d] - comm->cell_f_min1[d])*bfac > -limit)
+ +        {
+ +            char buf[22];
+ +            gmx_fatal(FARGS,"Step %s: The domain decomposition grid has shifted too much in the %c-direction around cell %d %d %d\n",
+ +                      gmx_step_str(step,buf),
+ +                      dim2char(dim),dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +        }
+ +    }
+ +}
+ +
+ +static int dd_load_count(gmx_domdec_comm_t *comm)
+ +{
+ +    return (comm->eFlop ? comm->flop_n : comm->cycl_n[ddCyclF]);
+ +}
+ +
+ +static float dd_force_load(gmx_domdec_comm_t *comm)
+ +{
+ +    float load;
+ +    
+ +    if (comm->eFlop)
+ +    {
+ +        load = comm->flop;
+ +        if (comm->eFlop > 1)
+ +        {
+ +            load *= 1.0 + (comm->eFlop - 1)*(0.1*rand()/RAND_MAX - 0.05);
+ +        }
+ +    } 
+ +    else
+ +    {
+ +        load = comm->cycl[ddCyclF];
+ +        if (comm->cycl_n[ddCyclF] > 1)
+ +        {
+ +            /* Subtract the maximum of the last n cycle counts
+ +             * to get rid of possible high counts due to other soures,
+ +             * for instance system activity, that would otherwise
+ +             * affect the dynamic load balancing.
+ +             */
+ +            load -= comm->cycl_max[ddCyclF];
+ +        }
+ +    }
+ +    
+ +    return load;
+ +}
+ +
+ +static void set_slb_pme_dim_f(gmx_domdec_t *dd,int dim,real **dim_f)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int i;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    snew(*dim_f,dd->nc[dim]+1);
+ +    (*dim_f)[0] = 0;
+ +    for(i=1; i<dd->nc[dim]; i++)
+ +    {
+ +        if (comm->slb_frac[dim])
+ +        {
+ +            (*dim_f)[i] = (*dim_f)[i-1] + comm->slb_frac[dim][i-1];
+ +        }
+ +        else
+ +        {
+ +            (*dim_f)[i] = (real)i/(real)dd->nc[dim];
+ +        }
+ +    }
+ +    (*dim_f)[dd->nc[dim]] = 1;
+ +}
+ +
+ +static void init_ddpme(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,int dimind)
+ +{
+ +    int        pmeindex,slab,nso,i;
+ +    ivec xyz;
+ +    
+ +    if (dimind == 0 && dd->dim[0] == YY && dd->comm->npmenodes_x == 1)
+ +    {
+ +        ddpme->dim = YY;
+ +    }
+ +    else
+ +    {
+ +        ddpme->dim = dimind;
+ +    }
+ +    ddpme->dim_match = (ddpme->dim == dd->dim[dimind]);
+ +    
+ +    ddpme->nslab = (ddpme->dim == 0 ?
+ +                    dd->comm->npmenodes_x :
+ +                    dd->comm->npmenodes_y);
+ +
+ +    if (ddpme->nslab <= 1)
+ +    {
+ +        return;
+ +    }
+ +
+ +    nso = dd->comm->npmenodes/ddpme->nslab;
+ +    /* Determine for each PME slab the PP location range for dimension dim */
+ +    snew(ddpme->pp_min,ddpme->nslab);
+ +    snew(ddpme->pp_max,ddpme->nslab);
+ +    for(slab=0; slab<ddpme->nslab; slab++) {
+ +        ddpme->pp_min[slab] = dd->nc[dd->dim[dimind]] - 1;
+ +        ddpme->pp_max[slab] = 0;
+ +    }
+ +    for(i=0; i<dd->nnodes; i++) {
+ +        ddindex2xyz(dd->nc,i,xyz);
+ +        /* For y only use our y/z slab.
+ +         * This assumes that the PME x grid size matches the DD grid size.
+ +         */
+ +        if (dimind == 0 || xyz[XX] == dd->ci[XX]) {
+ +            pmeindex = ddindex2pmeindex(dd,i);
+ +            if (dimind == 0) {
+ +                slab = pmeindex/nso;
+ +            } else {
+ +                slab = pmeindex % ddpme->nslab;
+ +            }
+ +            ddpme->pp_min[slab] = min(ddpme->pp_min[slab],xyz[dimind]);
+ +            ddpme->pp_max[slab] = max(ddpme->pp_max[slab],xyz[dimind]);
+ +        }
+ +    }
+ +
+ +    set_slb_pme_dim_f(dd,ddpme->dim,&ddpme->slb_dim_f);
+ +}
+ +
+ +int dd_pme_maxshift_x(gmx_domdec_t *dd)
+ +{
+ +    if (dd->comm->ddpme[0].dim == XX)
+ +    {
+ +        return dd->comm->ddpme[0].maxshift;
+ +    }
+ +    else
+ +    {
+ +        return 0;
+ +    }
+ +}
+ +
+ +int dd_pme_maxshift_y(gmx_domdec_t *dd)
+ +{
+ +    if (dd->comm->ddpme[0].dim == YY)
+ +    {
+ +        return dd->comm->ddpme[0].maxshift;
+ +    }
+ +    else if (dd->comm->npmedecompdim >= 2 && dd->comm->ddpme[1].dim == YY)
+ +    {
+ +        return dd->comm->ddpme[1].maxshift;
+ +    }
+ +    else
+ +    {
+ +        return 0;
+ +    }
+ +}
+ +
+ +static void set_pme_maxshift(gmx_domdec_t *dd,gmx_ddpme_t *ddpme,
+ +                             gmx_bool bUniform,gmx_ddbox_t *ddbox,real *cell_f)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  nc,ns,s;
+ +    int  *xmin,*xmax;
+ +    real range,pme_boundary;
+ +    int  sh;
+ +    
+ +    comm = dd->comm;
+ +    nc  = dd->nc[ddpme->dim];
+ +    ns  = ddpme->nslab;
+ +    
+ +    if (!ddpme->dim_match)
+ +    {
+ +        /* PP decomposition is not along dim: the worst situation */
+ +        sh = ns/2;
+ +    }
+ +    else if (ns <= 3 || (bUniform && ns == nc))
+ +    {
+ +        /* The optimal situation */
+ +        sh = 1;
+ +    }
+ +    else
+ +    {
+ +        /* We need to check for all pme nodes which nodes they
+ +         * could possibly need to communicate with.
+ +         */
+ +        xmin = ddpme->pp_min;
+ +        xmax = ddpme->pp_max;
+ +        /* Allow for atoms to be maximally 2/3 times the cut-off
+ +         * out of their DD cell. This is a reasonable balance between
+ +         * between performance and support for most charge-group/cut-off
+ +         * combinations.
+ +         */
+ +        range  = 2.0/3.0*comm->cutoff/ddbox->box_size[ddpme->dim];
+ +        /* Avoid extra communication when we are exactly at a boundary */
+ +        range *= 0.999;
+ +        
+ +        sh = 1;
+ +        for(s=0; s<ns; s++)
+ +        {
+ +            /* PME slab s spreads atoms between box frac. s/ns and (s+1)/ns */
+ +            pme_boundary = (real)s/ns;
+ +            while (sh+1 < ns &&
+ +                   ((s-(sh+1) >= 0 &&
+ +                     cell_f[xmax[s-(sh+1)   ]+1]     + range > pme_boundary) ||
+ +                    (s-(sh+1) <  0 &&
+ +                     cell_f[xmax[s-(sh+1)+ns]+1] - 1 + range > pme_boundary)))
+ +            {
+ +                sh++;
+ +            }
+ +            pme_boundary = (real)(s+1)/ns;
+ +            while (sh+1 < ns &&
+ +                   ((s+(sh+1) <  ns &&
+ +                     cell_f[xmin[s+(sh+1)   ]  ]     - range < pme_boundary) ||
+ +                    (s+(sh+1) >= ns &&
+ +                     cell_f[xmin[s+(sh+1)-ns]  ] + 1 - range < pme_boundary)))
+ +            {
+ +                sh++;
+ +            }
+ +        }
+ +    }
+ +    
+ +    ddpme->maxshift = sh;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"PME slab communication range for dim %d is %d\n",
+ +                ddpme->dim,ddpme->maxshift);
+ +    }
+ +}
+ +
+ +static void check_box_size(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
+ +{
+ +    int d,dim;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        if (dim < ddbox->nboundeddim &&
+ +            ddbox->box_size[dim]*ddbox->skew_fac[dim] <
+ +            dd->nc[dim]*dd->comm->cellsize_limit*DD_CELL_MARGIN)
+ +        {
+ +            gmx_fatal(FARGS,"The %c-size of the box (%f) times the triclinic skew factor (%f) is smaller than the number of DD cells (%d) times the smallest allowed cell size (%f)\n",
+ +                      dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
+ +                      dd->nc[dim],dd->comm->cellsize_limit);
+ +        }
+ +    }
+ +}
+ +
+ +static void set_dd_cell_sizes_slb(gmx_domdec_t *dd,gmx_ddbox_t *ddbox,
+ +                                  gmx_bool bMaster,ivec npulse)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d,j;
+ +    rvec cellsize_min;
+ +    real *cell_x,cell_dx,cellsize;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    for(d=0; d<DIM; d++)
+ +    {
+ +        cellsize_min[d] = ddbox->box_size[d]*ddbox->skew_fac[d];
+ +        npulse[d] = 1;
+ +        if (dd->nc[d] == 1 || comm->slb_frac[d] == NULL)
+ +        {
+ +            /* Uniform grid */
+ +            cell_dx = ddbox->box_size[d]/dd->nc[d];
+ +            if (bMaster)
+ +            {
+ +                for(j=0; j<dd->nc[d]+1; j++)
+ +                {
+ +                    dd->ma->cell_x[d][j] = ddbox->box0[d] + j*cell_dx;
+ +                }
+ +            }
+ +            else
+ +            {
+ +                comm->cell_x0[d] = ddbox->box0[d] + (dd->ci[d]  )*cell_dx;
+ +                comm->cell_x1[d] = ddbox->box0[d] + (dd->ci[d]+1)*cell_dx;
+ +            }
+ +            cellsize = cell_dx*ddbox->skew_fac[d];
+ +            while (cellsize*npulse[d] < comm->cutoff && npulse[d] < dd->nc[d]-1)
+ +            {
+ +                npulse[d]++;
+ +            }
+ +            cellsize_min[d] = cellsize;
+ +        }
+ +        else
+ +        {
+ +            /* Statically load balanced grid */
+ +            /* Also when we are not doing a master distribution we determine
+ +             * all cell borders in a loop to obtain identical values
+ +             * to the master distribution case and to determine npulse.
+ +             */
+ +            if (bMaster)
+ +            {
+ +                cell_x = dd->ma->cell_x[d];
+ +            }
+ +            else
+ +            {
+ +                snew(cell_x,dd->nc[d]+1);
+ +            }
+ +            cell_x[0] = ddbox->box0[d];
+ +            for(j=0; j<dd->nc[d]; j++)
+ +            {
+ +                cell_dx = ddbox->box_size[d]*comm->slb_frac[d][j];
+ +                cell_x[j+1] = cell_x[j] + cell_dx;
+ +                cellsize = cell_dx*ddbox->skew_fac[d];
+ +                while (cellsize*npulse[d] < comm->cutoff &&
+ +                       npulse[d] < dd->nc[d]-1)
+ +                {
+ +                    npulse[d]++;
+ +                }
+ +                cellsize_min[d] = min(cellsize_min[d],cellsize);
+ +            }
+ +            if (!bMaster)
+ +            {
+ +                comm->cell_x0[d] = cell_x[dd->ci[d]];
+ +                comm->cell_x1[d] = cell_x[dd->ci[d]+1];
+ +                sfree(cell_x);
+ +            }
+ +        }
+ +        /* The following limitation is to avoid that a cell would receive
+ +         * some of its own home charge groups back over the periodic boundary.
+ +         * Double charge groups cause trouble with the global indices.
+ +         */
+ +        if (d < ddbox->npbcdim &&
+ +            dd->nc[d] > 1 && npulse[d] >= dd->nc[d])
+ +        {
+ +            gmx_fatal_collective(FARGS,NULL,dd,
+ +                                 "The box size in direction %c (%f) times the triclinic skew factor (%f) is too small for a cut-off of %f with %d domain decomposition cells, use 1 or more than %d %s or increase the box size in this direction",
+ +                                 dim2char(d),ddbox->box_size[d],ddbox->skew_fac[d],
+ +                                 comm->cutoff,
+ +                                 dd->nc[d],dd->nc[d],
+ +                                 dd->nnodes > dd->nc[d] ? "cells" : "processors");
+ +        }
+ +    }
+ +    
+ +    if (!comm->bDynLoadBal)
+ +    {
+ +        copy_rvec(cellsize_min,comm->cellsize_min);
+ +    }
+ +   
+ +    for(d=0; d<comm->npmedecompdim; d++)
+ +    {
+ +        set_pme_maxshift(dd,&comm->ddpme[d],
+ +                         comm->slb_frac[dd->dim[d]]==NULL,ddbox,
+ +                         comm->ddpme[d].slb_dim_f);
+ +    }
+ +}
+ +
+ +
+ +static void dd_cell_sizes_dlb_root_enforce_limits(gmx_domdec_t *dd,
+ +                                       int d,int dim,gmx_domdec_root_t *root,
+ +                                       gmx_ddbox_t *ddbox,
+ +                                       gmx_bool bUniform,gmx_large_int_t step, real cellsize_limit_f, int range[])
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  ncd,i,j,nmin,nmin_old;
+ +    gmx_bool bLimLo,bLimHi;
+ +    real *cell_size;
+ +    real fac,halfway,cellsize_limit_f_i,region_size;
+ +    gmx_bool bPBC,bLastHi=FALSE;
+ +    int nrange[]={range[0],range[1]};
+ +
+ +    region_size= root->cell_f[range[1]]-root->cell_f[range[0]];  
+ +
+ +    comm = dd->comm;
+ +
+ +    ncd = dd->nc[dim];
+ +
+ +    bPBC = (dim < ddbox->npbcdim);
+ +
+ +    cell_size = root->buf_ncd;
+ +
+ +    if (debug) 
+ +    {
+ +        fprintf(debug,"enforce_limits: %d %d\n",range[0],range[1]);
+ +    }
+ +
+ +    /* First we need to check if the scaling does not make cells
+ +     * smaller than the smallest allowed size.
+ +     * We need to do this iteratively, since if a cell is too small,
+ +     * it needs to be enlarged, which makes all the other cells smaller,
+ +     * which could in turn make another cell smaller than allowed.
+ +     */
+ +    for(i=range[0]; i<range[1]; i++)
+ +    {
+ +        root->bCellMin[i] = FALSE;
+ +    }
+ +    nmin = 0;
+ +    do
+ +    {
+ +        nmin_old = nmin;
+ +        /* We need the total for normalization */
+ +        fac = 0;
+ +        for(i=range[0]; i<range[1]; i++)
+ +        {
+ +            if (root->bCellMin[i] == FALSE)
+ +            {
+ +                fac += cell_size[i];
+ +            }
+ +        }
+ +        fac = ( region_size - nmin*cellsize_limit_f)/fac; /* substracting cells already set to cellsize_limit_f */
+ +        /* Determine the cell boundaries */
+ +        for(i=range[0]; i<range[1]; i++)
+ +        {
+ +            if (root->bCellMin[i] == FALSE)
+ +            {
+ +                cell_size[i] *= fac;
+ +                if (!bPBC && (i == 0 || i == dd->nc[dim] -1))
+ +                {
+ +                    cellsize_limit_f_i = 0;
+ +                }
+ +                else
+ +                {
+ +                    cellsize_limit_f_i = cellsize_limit_f;
+ +                }
+ +                if (cell_size[i] < cellsize_limit_f_i)
+ +                {
+ +                    root->bCellMin[i] = TRUE;
+ +                    cell_size[i] = cellsize_limit_f_i;
+ +                    nmin++;
+ +                }
+ +            }
+ +            root->cell_f[i+1] = root->cell_f[i] + cell_size[i];
+ +        }
+ +    }
+ +    while (nmin > nmin_old);
+ +    
+ +    i=range[1]-1;
+ +    cell_size[i] = root->cell_f[i+1] - root->cell_f[i];
+ +    /* For this check we should not use DD_CELL_MARGIN,
+ +     * but a slightly smaller factor,
+ +     * since rounding could get use below the limit.
+ +     */
+ +    if (bPBC && cell_size[i] < cellsize_limit_f*DD_CELL_MARGIN2/DD_CELL_MARGIN)
+ +    {
+ +        char buf[22];
+ +        gmx_fatal(FARGS,"Step %s: the dynamic load balancing could not balance dimension %c: box size %f, triclinic skew factor %f, #cells %d, minimum cell size %f\n",
+ +                  gmx_step_str(step,buf),
+ +                  dim2char(dim),ddbox->box_size[dim],ddbox->skew_fac[dim],
+ +                  ncd,comm->cellsize_min[dim]);
+ +    }
+ +    
+ +    root->bLimited = (nmin > 0) || (range[0]>0) || (range[1]<ncd);
+ +    
+ +    if (!bUniform)
+ +    {
+ +        /* Check if the boundary did not displace more than halfway
+ +         * each of the cells it bounds, as this could cause problems,
+ +         * especially when the differences between cell sizes are large.
+ +         * If changes are applied, they will not make cells smaller
+ +         * than the cut-off, as we check all the boundaries which
+ +         * might be affected by a change and if the old state was ok,
+ +         * the cells will at most be shrunk back to their old size.
+ +         */
+ +        for(i=range[0]+1; i<range[1]; i++)
+ +        {
+ +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i-1]);
+ +            if (root->cell_f[i] < halfway)
+ +            {
+ +                root->cell_f[i] = halfway;
+ +                /* Check if the change also causes shifts of the next boundaries */
+ +                for(j=i+1; j<range[1]; j++)
+ +                {
+ +                    if (root->cell_f[j] < root->cell_f[j-1] + cellsize_limit_f)
+ +                        root->cell_f[j] =  root->cell_f[j-1] + cellsize_limit_f;
+ +                }
+ +            }
+ +            halfway = 0.5*(root->old_cell_f[i] + root->old_cell_f[i+1]);
+ +            if (root->cell_f[i] > halfway)
+ +            {
+ +                root->cell_f[i] = halfway;
+ +                /* Check if the change also causes shifts of the next boundaries */
+ +                for(j=i-1; j>=range[0]+1; j--)
+ +                {
+ +                    if (root->cell_f[j] > root->cell_f[j+1] - cellsize_limit_f)
+ +                        root->cell_f[j] = root->cell_f[j+1] - cellsize_limit_f;
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* nrange is defined as [lower, upper) range for new call to enforce_limits */
+ +    /* find highest violation of LimLo (a) and the following violation of LimHi (thus the lowest following) (b)
+ +     * then call enforce_limits for (oldb,a), (a,b). In the next step: (b,nexta). oldb and nexta can be the boundaries.
+ +     * for a and b nrange is used */
+ +    if (d > 0)
+ +    {
+ +        /* Take care of the staggering of the cell boundaries */
+ +        if (bUniform)
+ +        {
+ +            for(i=range[0]; i<range[1]; i++)
+ +            {
+ +                root->cell_f_max0[i] = root->cell_f[i];
+ +                root->cell_f_min1[i] = root->cell_f[i+1];
+ +            }
+ +        }
+ +        else
+ +        {
+ +            for(i=range[0]+1; i<range[1]; i++)
+ +            {
+ +                bLimLo = (root->cell_f[i] < root->bound_min[i]);
+ +                bLimHi = (root->cell_f[i] > root->bound_max[i]);
+ +                if (bLimLo && bLimHi)
+ +                {
+ +                    /* Both limits violated, try the best we can */
+ +                    /* For this case we split the original range (range) in two parts and care about the other limitiations in the next iteration. */
+ +                    root->cell_f[i] = 0.5*(root->bound_min[i] + root->bound_max[i]);
+ +                    nrange[0]=range[0];
+ +                    nrange[1]=i;
+ +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +
+ +                    nrange[0]=i;
+ +                    nrange[1]=range[1];
+ +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +
+ +                    return;
+ +                }
+ +                else if (bLimLo)
+ +                {
+ +                    /* root->cell_f[i] = root->bound_min[i]; */
+ +                    nrange[1]=i;  /* only store violation location. There could be a LimLo violation following with an higher index */
+ +                    bLastHi=FALSE;
+ +                }
+ +                else if (bLimHi && !bLastHi)
+ +                {
+ +                    bLastHi=TRUE;
+ +                    if (nrange[1] < range[1])   /* found a LimLo before */
+ +                    {
+ +                        root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
+ +                        dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +                        nrange[0]=nrange[1];
+ +                    }
+ +                    root->cell_f[i] = root->bound_max[i];
+ +                    nrange[1]=i; 
+ +                    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +                    nrange[0]=i;
+ +                    nrange[1]=range[1];
+ +                }
+ +            }
+ +            if (nrange[1] < range[1])   /* found last a LimLo */
+ +            {
+ +                root->cell_f[nrange[1]] = root->bound_min[nrange[1]];
+ +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +                nrange[0]=nrange[1];
+ +                nrange[1]=range[1];
+ +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +            } 
+ +            else if (nrange[0] > range[0]) /* found at least one LimHi */
+ +            {
+ +                dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, nrange);
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +
+ +static void set_dd_cell_sizes_dlb_root(gmx_domdec_t *dd,
+ +                                       int d,int dim,gmx_domdec_root_t *root,
+ +                                       gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                                       gmx_bool bUniform,gmx_large_int_t step)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  ncd,d1,i,j,pos;
+ +    real *cell_size;
+ +    real load_aver,load_i,imbalance,change,change_max,sc;
+ +    real cellsize_limit_f,dist_min_f,dist_min_f_hard,space;
+ +    real change_limit;
+ +    real relax = 0.5;
+ +    gmx_bool bPBC;
+ +    int range[] = { 0, 0 };
+ +
+ +    comm = dd->comm;
+ +
+ +    /* Convert the maximum change from the input percentage to a fraction */
+ +    change_limit = comm->dlb_scale_lim*0.01;
+ +
+ +    ncd = dd->nc[dim];
+ +
+ +    bPBC = (dim < ddbox->npbcdim);
+ +
+ +    cell_size = root->buf_ncd;
+ +
+ +    /* Store the original boundaries */
+ +    for(i=0; i<ncd+1; i++)
+ +    {
+ +        root->old_cell_f[i] = root->cell_f[i];
+ +    }
+ +    if (bUniform) {
+ +        for(i=0; i<ncd; i++)
+ +        {
+ +            cell_size[i] = 1.0/ncd;
+ +        }
+ +    }
+ +    else if (dd_load_count(comm))
+ +    {
+ +        load_aver = comm->load[d].sum_m/ncd;
+ +        change_max = 0;
+ +        for(i=0; i<ncd; i++)
+ +        {
+ +            /* Determine the relative imbalance of cell i */
+ +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
+ +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
+ +            /* Determine the change of the cell size using underrelaxation */
+ +            change = -relax*imbalance;
+ +            change_max = max(change_max,max(change,-change));
+ +        }
+ +        /* Limit the amount of scaling.
+ +         * We need to use the same rescaling for all cells in one row,
+ +         * otherwise the load balancing might not converge.
+ +         */
+ +        sc = relax;
+ +        if (change_max > change_limit)
+ +        {
+ +            sc *= change_limit/change_max;
+ +        }
+ +        for(i=0; i<ncd; i++)
+ +        {
+ +            /* Determine the relative imbalance of cell i */
+ +            load_i = comm->load[d].load[i*comm->load[d].nload+2];
+ +            imbalance = (load_i - load_aver)/(load_aver>0 ? load_aver : 1);
+ +            /* Determine the change of the cell size using underrelaxation */
+ +            change = -sc*imbalance;
+ +            cell_size[i] = (root->cell_f[i+1]-root->cell_f[i])*(1 + change);
+ +        }
+ +    }
+ +    
+ +    cellsize_limit_f  = comm->cellsize_min[dim]/ddbox->box_size[dim];
+ +    cellsize_limit_f *= DD_CELL_MARGIN;
+ +    dist_min_f_hard        = grid_jump_limit(comm,d)/ddbox->box_size[dim];
+ +    dist_min_f       = dist_min_f_hard * DD_CELL_MARGIN;
+ +    if (ddbox->tric_dir[dim])
+ +    {
+ +        cellsize_limit_f /= ddbox->skew_fac[dim];
+ +        dist_min_f       /= ddbox->skew_fac[dim];
+ +    }
+ +    if (bDynamicBox && d > 0)
+ +    {
+ +        dist_min_f *= DD_PRES_SCALE_MARGIN;
+ +    }
+ +    if (d > 0 && !bUniform)
+ +    {
+ +        /* Make sure that the grid is not shifted too much */
+ +        for(i=1; i<ncd; i++) {
+ +            if (root->cell_f_min1[i] - root->cell_f_max0[i-1] < 2 * dist_min_f_hard) 
+ +            {
+ +                gmx_incons("Inconsistent DD boundary staggering limits!");
+ +            }
+ +            root->bound_min[i] = root->cell_f_max0[i-1] + dist_min_f;
+ +            space = root->cell_f[i] - (root->cell_f_max0[i-1] + dist_min_f);
+ +            if (space > 0) {
+ +                root->bound_min[i] += 0.5*space;
+ +            }
+ +            root->bound_max[i] = root->cell_f_min1[i] - dist_min_f;
+ +            space = root->cell_f[i] - (root->cell_f_min1[i] - dist_min_f);
+ +            if (space < 0) {
+ +                root->bound_max[i] += 0.5*space;
+ +            }
+ +            if (debug)
+ +            {
+ +                fprintf(debug,
+ +                        "dim %d boundary %d %.3f < %.3f < %.3f < %.3f < %.3f\n",
+ +                        d,i,
+ +                        root->cell_f_max0[i-1] + dist_min_f,
+ +                        root->bound_min[i],root->cell_f[i],root->bound_max[i],
+ +                        root->cell_f_min1[i] - dist_min_f);
+ +            }
+ +        }
+ +    }
+ +    range[1]=ncd;
+ +    root->cell_f[0] = 0;
+ +    root->cell_f[ncd] = 1;
+ +    dd_cell_sizes_dlb_root_enforce_limits(dd, d, dim, root, ddbox, bUniform, step, cellsize_limit_f, range);
+ +
+ +
+ +    /* After the checks above, the cells should obey the cut-off
+ +     * restrictions, but it does not hurt to check.
+ +     */
+ +    for(i=0; i<ncd; i++)
+ +    {
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Relative bounds dim %d  cell %d: %f %f\n",
+ +                    dim,i,root->cell_f[i],root->cell_f[i+1]);
+ +        }
+ +
+ +        if ((bPBC || (i != 0 && i != dd->nc[dim]-1)) &&
+ +            root->cell_f[i+1] - root->cell_f[i] <
+ +            cellsize_limit_f/DD_CELL_MARGIN)
+ +        {
+ +            char buf[22];
+ +            fprintf(stderr,
+ +                    "\nWARNING step %s: direction %c, cell %d too small: %f\n",
+ +                    gmx_step_str(step,buf),dim2char(dim),i,
+ +                    (root->cell_f[i+1] - root->cell_f[i])
+ +                    *ddbox->box_size[dim]*ddbox->skew_fac[dim]);
+ +        }
+ +    }
+ +    
+ +    pos = ncd + 1;
+ +    /* Store the cell boundaries of the lower dimensions at the end */
+ +    for(d1=0; d1<d; d1++)
+ +    {
+ +        root->cell_f[pos++] = comm->cell_f0[d1];
+ +        root->cell_f[pos++] = comm->cell_f1[d1];
+ +    }
+ +    
+ +    if (d < comm->npmedecompdim)
+ +    {
+ +        /* The master determines the maximum shift for
+ +         * the coordinate communication between separate PME nodes.
+ +         */
+ +        set_pme_maxshift(dd,&comm->ddpme[d],bUniform,ddbox,root->cell_f);
+ +    }
+ +    root->cell_f[pos++] = comm->ddpme[0].maxshift;
+ +    if (d >= 1)
+ +    {
+ +        root->cell_f[pos++] = comm->ddpme[1].maxshift;
+ +    }
+ +}    
+ +
+ +static void relative_to_absolute_cell_bounds(gmx_domdec_t *dd,
+ +                                             gmx_ddbox_t *ddbox,int dimind)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int dim;
+ +
+ +    comm = dd->comm;
+ +
+ +    /* Set the cell dimensions */
+ +    dim = dd->dim[dimind];
+ +    comm->cell_x0[dim] = comm->cell_f0[dimind]*ddbox->box_size[dim];
+ +    comm->cell_x1[dim] = comm->cell_f1[dimind]*ddbox->box_size[dim];
+ +    if (dim >= ddbox->nboundeddim)
+ +    {
+ +        comm->cell_x0[dim] += ddbox->box0[dim];
+ +        comm->cell_x1[dim] += ddbox->box0[dim];
+ +    }
+ +}
+ +
+ +static void distribute_dd_cell_sizes_dlb(gmx_domdec_t *dd,
+ +                                         int d,int dim,real *cell_f_row,
+ +                                         gmx_ddbox_t *ddbox)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int d1,dim1,pos;
+ +
+ +    comm = dd->comm;
+ +
+ +#ifdef GMX_MPI
+ +    /* Each node would only need to know two fractions,
+ +     * but it is probably cheaper to broadcast the whole array.
+ +     */
+ +    MPI_Bcast(cell_f_row,DD_CELL_F_SIZE(dd,d)*sizeof(real),MPI_BYTE,
+ +              0,comm->mpi_comm_load[d]);
+ +#endif
+ +    /* Copy the fractions for this dimension from the buffer */
+ +    comm->cell_f0[d] = cell_f_row[dd->ci[dim]  ];
+ +    comm->cell_f1[d] = cell_f_row[dd->ci[dim]+1];
+ +    /* The whole array was communicated, so set the buffer position */
+ +    pos = dd->nc[dim] + 1;
+ +    for(d1=0; d1<=d; d1++)
+ +    {
+ +        if (d1 < d)
+ +        {
+ +            /* Copy the cell fractions of the lower dimensions */
+ +            comm->cell_f0[d1] = cell_f_row[pos++];
+ +            comm->cell_f1[d1] = cell_f_row[pos++];
+ +        }
+ +        relative_to_absolute_cell_bounds(dd,ddbox,d1);
+ +    }
+ +    /* Convert the communicated shift from float to int */
+ +    comm->ddpme[0].maxshift = (int)(cell_f_row[pos++] + 0.5);
+ +    if (d >= 1)
+ +    {
+ +        comm->ddpme[1].maxshift = (int)(cell_f_row[pos++] + 0.5);
+ +    }
+ +}
+ +
+ +static void set_dd_cell_sizes_dlb_change(gmx_domdec_t *dd,
+ +                                         gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                                         gmx_bool bUniform,gmx_large_int_t step)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int d,dim,d1;
+ +    gmx_bool bRowMember,bRowRoot;
+ +    real *cell_f_row;
+ +    
+ +    comm = dd->comm;
+ +
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        bRowMember = TRUE;
+ +        bRowRoot = TRUE;
+ +        for(d1=d; d1<dd->ndim; d1++)
+ +        {
+ +            if (dd->ci[dd->dim[d1]] > 0)
+ +            {
+ +                if (d1 > d)
+ +                {
+ +                    bRowMember = FALSE;
+ +                }
+ +                bRowRoot = FALSE;
+ +            }
+ +        }
+ +        if (bRowMember)
+ +        {
+ +            if (bRowRoot)
+ +            {
+ +                set_dd_cell_sizes_dlb_root(dd,d,dim,comm->root[d],
+ +                                           ddbox,bDynamicBox,bUniform,step);
+ +                cell_f_row = comm->root[d]->cell_f;
+ +            }
+ +            else
+ +            {
+ +                cell_f_row = comm->cell_f_row;
+ +            }
+ +            distribute_dd_cell_sizes_dlb(dd,d,dim,cell_f_row,ddbox);
+ +        }
+ +    }
+ +}    
+ +
+ +static void set_dd_cell_sizes_dlb_nochange(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
+ +{
+ +    int d;
+ +
+ +    /* This function assumes the box is static and should therefore
+ +     * not be called when the box has changed since the last
+ +     * call to dd_partition_system.
+ +     */
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        relative_to_absolute_cell_bounds(dd,ddbox,d); 
+ +    }
+ +}
+ +
+ +
+ +
+ +static void set_dd_cell_sizes_dlb(gmx_domdec_t *dd,
+ +                                  gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                                  gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
+ +                                  gmx_wallcycle_t wcycle)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int dim;
+ +
+ +    comm = dd->comm;
+ +    
+ +    if (bDoDLB)
+ +    {
+ +        wallcycle_start(wcycle,ewcDDCOMMBOUND);
+ +        set_dd_cell_sizes_dlb_change(dd,ddbox,bDynamicBox,bUniform,step);
+ +        wallcycle_stop(wcycle,ewcDDCOMMBOUND);
+ +    }
+ +    else if (bDynamicBox)
+ +    {
+ +        set_dd_cell_sizes_dlb_nochange(dd,ddbox);
+ +    }
+ +    
+ +    /* Set the dimensions for which no DD is used */
+ +    for(dim=0; dim<DIM; dim++) {
+ +        if (dd->nc[dim] == 1) {
+ +            comm->cell_x0[dim] = 0;
+ +            comm->cell_x1[dim] = ddbox->box_size[dim];
+ +            if (dim >= ddbox->nboundeddim)
+ +            {
+ +                comm->cell_x0[dim] += ddbox->box0[dim];
+ +                comm->cell_x1[dim] += ddbox->box0[dim];
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static void realloc_comm_ind(gmx_domdec_t *dd,ivec npulse)
+ +{
+ +    int d,np,i;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        cd = &dd->comm->cd[d];
+ +        np = npulse[dd->dim[d]];
+ +        if (np > cd->np_nalloc)
+ +        {
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"(Re)allocing cd for %c to %d pulses\n",
+ +                        dim2char(dd->dim[d]),np);
+ +            }
+ +            if (DDMASTER(dd) && cd->np_nalloc > 0)
+ +            {
+ +                fprintf(stderr,"\nIncreasing the number of cell to communicate in dimension %c to %d for the first time\n",dim2char(dd->dim[d]),np);
+ +            }
+ +            srenew(cd->ind,np);
+ +            for(i=cd->np_nalloc; i<np; i++)
+ +            {
+ +                cd->ind[i].index  = NULL;
+ +                cd->ind[i].nalloc = 0;
+ +            }
+ +            cd->np_nalloc = np;
+ +        }
+ +        cd->np = np;
+ +    }
+ +}
+ +
+ +
+ +static void set_dd_cell_sizes(gmx_domdec_t *dd,
+ +                              gmx_ddbox_t *ddbox,gmx_bool bDynamicBox,
+ +                              gmx_bool bUniform,gmx_bool bDoDLB,gmx_large_int_t step,
+ +                              gmx_wallcycle_t wcycle)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d;
+ +    ivec npulse;
+ +    
+ +    comm = dd->comm;
+ +
+ +    /* Copy the old cell boundaries for the cg displacement check */
+ +    copy_rvec(comm->cell_x0,comm->old_cell_x0);
+ +    copy_rvec(comm->cell_x1,comm->old_cell_x1);
+ +    
+ +    if (comm->bDynLoadBal)
+ +    {
+ +        if (DDMASTER(dd))
+ +        {
+ +            check_box_size(dd,ddbox);
+ +        }
+ +        set_dd_cell_sizes_dlb(dd,ddbox,bDynamicBox,bUniform,bDoDLB,step,wcycle);
+ +    }
+ +    else
+ +    {
+ +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,npulse);
+ +        realloc_comm_ind(dd,npulse);
+ +    }
+ +    
+ +    if (debug)
+ +    {
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            fprintf(debug,"cell_x[%d] %f - %f skew_fac %f\n",
+ +                    d,comm->cell_x0[d],comm->cell_x1[d],ddbox->skew_fac[d]);
+ +        }
+ +    }
+ +}
+ +
+ +static void comm_dd_ns_cell_sizes(gmx_domdec_t *dd,
+ +                                  gmx_ddbox_t *ddbox,
+ +                                  rvec cell_ns_x0,rvec cell_ns_x1,
+ +                                  gmx_large_int_t step)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int dim_ind,dim;
+ +    
+ +    comm = dd->comm;
+ +
+ +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
+ +    {
+ +        dim = dd->dim[dim_ind];
+ +        
+ +        /* Without PBC we don't have restrictions on the outer cells */
+ +        if (!(dim >= ddbox->npbcdim && 
+ +              (dd->ci[dim] == 0 || dd->ci[dim] == dd->nc[dim] - 1)) &&
+ +            comm->bDynLoadBal &&
+ +            (comm->cell_x1[dim] - comm->cell_x0[dim])*ddbox->skew_fac[dim] <
+ +            comm->cellsize_min[dim])
+ +        {
+ +            char buf[22];
+ +            gmx_fatal(FARGS,"Step %s: The %c-size (%f) times the triclinic skew factor (%f) is smaller than the smallest allowed cell size (%f) for domain decomposition grid cell %d %d %d",
+ +                      gmx_step_str(step,buf),dim2char(dim),
+ +                      comm->cell_x1[dim] - comm->cell_x0[dim],
+ +                      ddbox->skew_fac[dim],
+ +                      dd->comm->cellsize_min[dim],
+ +                      dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +        }
+ +    }
+ +    
+ +    if ((dd->bGridJump && dd->ndim > 1) || ddbox->nboundeddim < DIM)
+ +    {
+ +        /* Communicate the boundaries and update cell_ns_x0/1 */
+ +        dd_move_cellx(dd,ddbox,cell_ns_x0,cell_ns_x1);
+ +        if (dd->bGridJump && dd->ndim > 1)
+ +        {
+ +            check_grid_jump(step,dd,ddbox);
+ +        }
+ +    }
+ +}
+ +
+ +static void make_tric_corr_matrix(int npbcdim,matrix box,matrix tcm)
+ +{
+ +    if (YY < npbcdim)
+ +    {
+ +        tcm[YY][XX] = -box[YY][XX]/box[YY][YY];
+ +    }
+ +    else
+ +    {
+ +        tcm[YY][XX] = 0;
+ +    }
+ +    if (ZZ < npbcdim)
+ +    {
+ +        tcm[ZZ][XX] = -(box[ZZ][YY]*tcm[YY][XX] + box[ZZ][XX])/box[ZZ][ZZ];
+ +        tcm[ZZ][YY] = -box[ZZ][YY]/box[ZZ][ZZ];
+ +    }
+ +    else
+ +    {
+ +        tcm[ZZ][XX] = 0;
+ +        tcm[ZZ][YY] = 0;
+ +    }
+ +}
+ +
+ +static void check_screw_box(matrix box)
+ +{
+ +    /* Mathematical limitation */
+ +    if (box[YY][XX] != 0 || box[ZZ][XX] != 0)
+ +    {
+ +        gmx_fatal(FARGS,"With screw pbc the unit cell can not have non-zero off-diagonal x-components");
+ +    }
+ +    
+ +    /* Limitation due to the asymmetry of the eighth shell method */
+ +    if (box[ZZ][YY] != 0)
+ +    {
+ +        gmx_fatal(FARGS,"pbc=screw with non-zero box_zy is not supported");
+ +    }
+ +}
+ +
+ +static void distribute_cg(FILE *fplog,gmx_large_int_t step,
+ +                          matrix box,ivec tric_dir,t_block *cgs,rvec pos[],
+ +                          gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int **tmp_ind=NULL,*tmp_nalloc=NULL;
+ +    int  i,icg,j,k,k0,k1,d,npbcdim;
+ +    matrix tcm;
+ +    rvec box_size,cg_cm;
+ +    ivec ind;
+ +    real nrcg,inv_ncg,pos_d;
+ +    atom_id *cgindex;
+ +    gmx_bool bUnbounded,bScrew;
+ +
+ +    ma = dd->ma;
+ +    
+ +    if (tmp_ind == NULL)
+ +    {
+ +        snew(tmp_nalloc,dd->nnodes);
+ +        snew(tmp_ind,dd->nnodes);
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            tmp_nalloc[i] = over_alloc_large(cgs->nr/dd->nnodes+1);
+ +            snew(tmp_ind[i],tmp_nalloc[i]);
+ +        }
+ +    }
+ +    
+ +    /* Clear the count */
+ +    for(i=0; i<dd->nnodes; i++)
+ +    {
+ +        ma->ncg[i] = 0;
+ +        ma->nat[i] = 0;
+ +    }
+ +    
+ +    make_tric_corr_matrix(dd->npbcdim,box,tcm);
+ +    
+ +    cgindex = cgs->index;
+ +    
+ +    /* Compute the center of geometry for all charge groups */
+ +    for(icg=0; icg<cgs->nr; icg++)
+ +    {
+ +        k0      = cgindex[icg];
+ +        k1      = cgindex[icg+1];
+ +        nrcg    = k1 - k0;
+ +        if (nrcg == 1)
+ +        {
+ +            copy_rvec(pos[k0],cg_cm);
+ +        }
+ +        else
+ +        {
+ +            inv_ncg = 1.0/nrcg;
+ +            
+ +            clear_rvec(cg_cm);
+ +            for(k=k0; (k<k1); k++)
+ +            {
+ +                rvec_inc(cg_cm,pos[k]);
+ +            }
+ +            for(d=0; (d<DIM); d++)
+ +            {
+ +                cg_cm[d] *= inv_ncg;
+ +            }
+ +        }
+ +        /* Put the charge group in the box and determine the cell index */
+ +        for(d=DIM-1; d>=0; d--) {
+ +            pos_d = cg_cm[d];
+ +            if (d < dd->npbcdim)
+ +            {
+ +                bScrew = (dd->bScrewPBC && d == XX);
+ +                if (tric_dir[d] && dd->nc[d] > 1)
+ +                {
+ +                    /* Use triclinic coordintates for this dimension */
+ +                    for(j=d+1; j<DIM; j++)
+ +                    {
+ +                        pos_d += cg_cm[j]*tcm[j][d];
+ +                    }
+ +                }
+ +                while(pos_d >= box[d][d])
+ +                {
+ +                    pos_d -= box[d][d];
+ +                    rvec_dec(cg_cm,box[d]);
+ +                    if (bScrew)
+ +                    {
+ +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
+ +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
+ +                    }
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_dec(pos[k],box[d]);
+ +                        if (bScrew)
+ +                        {
+ +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
+ +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
+ +                        }
+ +                    }
+ +                }
+ +                while(pos_d < 0)
+ +                {
+ +                    pos_d += box[d][d];
+ +                    rvec_inc(cg_cm,box[d]);
+ +                    if (bScrew)
+ +                    {
+ +                        cg_cm[YY] = box[YY][YY] - cg_cm[YY];
+ +                        cg_cm[ZZ] = box[ZZ][ZZ] - cg_cm[ZZ];
+ +                    }
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_inc(pos[k],box[d]);
+ +                        if (bScrew) {
+ +                            pos[k][YY] = box[YY][YY] - pos[k][YY];
+ +                            pos[k][ZZ] = box[ZZ][ZZ] - pos[k][ZZ];
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            /* This could be done more efficiently */
+ +            ind[d] = 0;
+ +            while(ind[d]+1 < dd->nc[d] && pos_d >= ma->cell_x[d][ind[d]+1])
+ +            {
+ +                ind[d]++;
+ +            }
+ +        }
+ +        i = dd_index(dd->nc,ind);
+ +        if (ma->ncg[i] == tmp_nalloc[i])
+ +        {
+ +            tmp_nalloc[i] = over_alloc_large(ma->ncg[i]+1);
+ +            srenew(tmp_ind[i],tmp_nalloc[i]);
+ +        }
+ +        tmp_ind[i][ma->ncg[i]] = icg;
+ +        ma->ncg[i]++;
+ +        ma->nat[i] += cgindex[icg+1] - cgindex[icg];
+ +    }
+ +    
+ +    k1 = 0;
+ +    for(i=0; i<dd->nnodes; i++)
+ +    {
+ +        ma->index[i] = k1;
+ +        for(k=0; k<ma->ncg[i]; k++)
+ +        {
+ +            ma->cg[k1++] = tmp_ind[i][k];
+ +        }
+ +    }
+ +    ma->index[dd->nnodes] = k1;
+ +    
+ +    for(i=0; i<dd->nnodes; i++)
+ +    {
+ +        sfree(tmp_ind[i]);
+ +    }
+ +    sfree(tmp_ind);
+ +    sfree(tmp_nalloc);
+ +    
+ +    if (fplog)
+ +    {
+ +        char buf[22];
+ +        fprintf(fplog,"Charge group distribution at step %s:",
+ +                gmx_step_str(step,buf));
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            fprintf(fplog," %d",ma->ncg[i]);
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +}
+ +
+ +static void get_cg_distribution(FILE *fplog,gmx_large_int_t step,gmx_domdec_t *dd,
+ +                                t_block *cgs,matrix box,gmx_ddbox_t *ddbox,
+ +                                rvec pos[])
+ +{
+ +    gmx_domdec_master_t *ma=NULL;
+ +    ivec npulse;
+ +    int  i,cg_gl;
+ +    int  *ibuf,buf2[2] = { 0, 0 };
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        ma = dd->ma;
+ +        
+ +        if (dd->bScrewPBC)
+ +        {
+ +            check_screw_box(box);
+ +        }
+ +    
+ +        set_dd_cell_sizes_slb(dd,ddbox,TRUE,npulse);
+ +    
+ +        distribute_cg(fplog,step,box,ddbox->tric_dir,cgs,pos,dd);
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ibuf[2*i]   = ma->ncg[i];
+ +            ma->ibuf[2*i+1] = ma->nat[i];
+ +        }
+ +        ibuf = ma->ibuf;
+ +    }
+ +    else
+ +    {
+ +        ibuf = NULL;
+ +    }
+ +    dd_scatter(dd,2*sizeof(int),ibuf,buf2);
+ +    
+ +    dd->ncg_home = buf2[0];
+ +    dd->nat_home = buf2[1];
+ +    dd->ncg_tot  = dd->ncg_home;
+ +    dd->nat_tot  = dd->nat_home;
+ +    if (dd->ncg_home > dd->cg_nalloc || dd->cg_nalloc == 0)
+ +    {
+ +        dd->cg_nalloc = over_alloc_dd(dd->ncg_home);
+ +        srenew(dd->index_gl,dd->cg_nalloc);
+ +        srenew(dd->cgindex,dd->cg_nalloc+1);
+ +    }
+ +    if (DDMASTER(dd))
+ +    {
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            ma->ibuf[i] = ma->ncg[i]*sizeof(int);
+ +            ma->ibuf[dd->nnodes+i] = ma->index[i]*sizeof(int);
+ +        }
+ +    }
+ +    
+ +    dd_scatterv(dd,
+ +                DDMASTER(dd) ? ma->ibuf : NULL,
+ +                DDMASTER(dd) ? ma->ibuf+dd->nnodes : NULL,
+ +                DDMASTER(dd) ? ma->cg : NULL,
+ +                dd->ncg_home*sizeof(int),dd->index_gl);
+ +    
+ +    /* Determine the home charge group sizes */
+ +    dd->cgindex[0] = 0;
+ +    for(i=0; i<dd->ncg_home; i++)
+ +    {
+ +        cg_gl = dd->index_gl[i];
+ +        dd->cgindex[i+1] =
+ +            dd->cgindex[i] + cgs->index[cg_gl+1] - cgs->index[cg_gl];
+ +    }
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Home charge groups:\n");
+ +        for(i=0; i<dd->ncg_home; i++)
+ +        {
+ +            fprintf(debug," %d",dd->index_gl[i]);
+ +            if (i % 10 == 9) 
+ +                fprintf(debug,"\n");
+ +        }
+ +        fprintf(debug,"\n");
+ +    }
+ +}
+ +
+ +static int compact_and_copy_vec_at(int ncg,int *move,
+ +                                   int *cgindex,
+ +                                   int nvec,int vec,
+ +                                   rvec *src,gmx_domdec_comm_t *comm,
+ +                                   gmx_bool bCompact)
+ +{
+ +    int m,icg,i,i0,i1,nrcg;
+ +    int home_pos;
+ +    int pos_vec[DIM*2];
+ +    
+ +    home_pos = 0;
+ +
+ +    for(m=0; m<DIM*2; m++)
+ +    {
+ +        pos_vec[m] = 0;
+ +    }
+ +    
+ +    i0 = 0;
+ +    for(icg=0; icg<ncg; icg++)
+ +    {
+ +        i1 = cgindex[icg+1];
+ +        m = move[icg];
+ +        if (m == -1)
+ +        {
+ +            if (bCompact)
+ +            {
+ +                /* Compact the home array in place */
+ +                for(i=i0; i<i1; i++)
+ +                {
+ +                    copy_rvec(src[i],src[home_pos++]);
+ +                }
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* Copy to the communication buffer */
+ +            nrcg = i1 - i0;
+ +            pos_vec[m] += 1 + vec*nrcg;
+ +            for(i=i0; i<i1; i++)
+ +            {
+ +                copy_rvec(src[i],comm->cgcm_state[m][pos_vec[m]++]);
+ +            }
+ +            pos_vec[m] += (nvec - vec - 1)*nrcg;
+ +        }
+ +        if (!bCompact)
+ +        {
+ +            home_pos += i1 - i0;
+ +        }
+ +        i0 = i1;
+ +    }
+ +    
+ +    return home_pos;
+ +}
+ +
+ +static int compact_and_copy_vec_cg(int ncg,int *move,
+ +                                   int *cgindex,
+ +                                   int nvec,rvec *src,gmx_domdec_comm_t *comm,
+ +                                   gmx_bool bCompact)
+ +{
+ +    int m,icg,i0,i1,nrcg;
+ +    int home_pos;
+ +    int pos_vec[DIM*2];
+ +    
+ +    home_pos = 0;
+ +    
+ +    for(m=0; m<DIM*2; m++)
+ +    {
+ +        pos_vec[m] = 0;
+ +    }
+ +    
+ +    i0 = 0;
+ +    for(icg=0; icg<ncg; icg++)
+ +    {
+ +        i1 = cgindex[icg+1];
+ +        m = move[icg];
+ +        if (m == -1)
+ +        {
+ +            if (bCompact)
+ +            {
+ +                /* Compact the home array in place */
+ +                copy_rvec(src[icg],src[home_pos++]);
+ +            }
+ +        }
+ +        else
+ +        {
+ +            nrcg = i1 - i0;
+ +            /* Copy to the communication buffer */
+ +            copy_rvec(src[icg],comm->cgcm_state[m][pos_vec[m]]);
+ +            pos_vec[m] += 1 + nrcg*nvec;
+ +        }
+ +        i0 = i1;
+ +    }
+ +    if (!bCompact)
+ +    {
+ +        home_pos = ncg;
+ +    }
+ +    
+ +    return home_pos;
+ +}
+ +
+ +static int compact_ind(int ncg,int *move,
+ +                       int *index_gl,int *cgindex,
+ +                       int *gatindex,
+ +                       gmx_ga2la_t ga2la,char *bLocalCG,
+ +                       int *cginfo)
+ +{
+ +    int cg,nat,a0,a1,a,a_gl;
+ +    int home_pos;
+ +
+ +    home_pos = 0;
+ +    nat = 0;
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        a0 = cgindex[cg];
+ +        a1 = cgindex[cg+1];
+ +        if (move[cg] == -1)
+ +        {
+ +            /* Compact the home arrays in place.
+ +             * Anything that can be done here avoids access to global arrays.
+ +             */
+ +            cgindex[home_pos] = nat;
+ +            for(a=a0; a<a1; a++)
+ +            {
+ +                a_gl = gatindex[a];
+ +                gatindex[nat] = a_gl;
+ +                /* The cell number stays 0, so we don't need to set it */
+ +                ga2la_change_la(ga2la,a_gl,nat);
+ +                nat++;
+ +            }
+ +            index_gl[home_pos] = index_gl[cg];
+ +            cginfo[home_pos]   = cginfo[cg];
+ +            /* The charge group remains local, so bLocalCG does not change */
+ +            home_pos++;
+ +        }
+ +        else
+ +        {
+ +            /* Clear the global indices */
+ +            for(a=a0; a<a1; a++)
+ +            {
+ +                ga2la_del(ga2la,gatindex[a]);
+ +            }
+ +            if (bLocalCG)
+ +            {
+ +                bLocalCG[index_gl[cg]] = FALSE;
+ +            }
+ +        }
+ +    }
+ +    cgindex[home_pos] = nat;
+ +    
+ +    return home_pos;
+ +}
+ +
+ +static void clear_and_mark_ind(int ncg,int *move,
+ +                               int *index_gl,int *cgindex,int *gatindex,
+ +                               gmx_ga2la_t ga2la,char *bLocalCG,
+ +                               int *cell_index)
+ +{
+ +    int cg,a0,a1,a;
+ +    
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        if (move[cg] >= 0)
+ +        {
+ +            a0 = cgindex[cg];
+ +            a1 = cgindex[cg+1];
+ +            /* Clear the global indices */
+ +            for(a=a0; a<a1; a++)
+ +            {
+ +                ga2la_del(ga2la,gatindex[a]);
+ +            }
+ +            if (bLocalCG)
+ +            {
+ +                bLocalCG[index_gl[cg]] = FALSE;
+ +            }
+ +            /* Signal that this cg has moved using the ns cell index.
+ +             * Here we set it to -1.
+ +             * fill_grid will change it from -1 to 4*grid->ncells.
+ +             */
+ +            cell_index[cg] = -1;
+ +        }
+ +    }
+ +}
+ +
+ +static void print_cg_move(FILE *fplog,
+ +                          gmx_domdec_t *dd,
+ +                          gmx_large_int_t step,int cg,int dim,int dir,
+ +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
+ +                          rvec cm_old,rvec cm_new,real pos_d)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    char buf[22];
+ +
+ +    comm = dd->comm;
+ +
+ +    fprintf(fplog,"\nStep %s:\n",gmx_step_str(step,buf));
+ +    if (bHaveLimitdAndCMOld)
+ +    {
+ +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition (%f) in direction %c\n",
+ +                ddglatnr(dd,dd->cgindex[cg]),limitd,dim2char(dim));
+ +    }
+ +    else
+ +    {
+ +        fprintf(fplog,"The charge group starting at atom %d moved than the distance allowed by the domain decomposition in direction %c\n",
+ +                ddglatnr(dd,dd->cgindex[cg]),dim2char(dim));
+ +    }
+ +    fprintf(fplog,"distance out of cell %f\n",
+ +            dir==1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
+ +    if (bHaveLimitdAndCMOld)
+ +    {
+ +        fprintf(fplog,"Old coordinates: %8.3f %8.3f %8.3f\n",
+ +                cm_old[XX],cm_old[YY],cm_old[ZZ]);
+ +    }
+ +    fprintf(fplog,"New coordinates: %8.3f %8.3f %8.3f\n",
+ +            cm_new[XX],cm_new[YY],cm_new[ZZ]);
+ +    fprintf(fplog,"Old cell boundaries in direction %c: %8.3f %8.3f\n",
+ +            dim2char(dim),
+ +            comm->old_cell_x0[dim],comm->old_cell_x1[dim]);
+ +    fprintf(fplog,"New cell boundaries in direction %c: %8.3f %8.3f\n",
+ +            dim2char(dim),
+ +            comm->cell_x0[dim],comm->cell_x1[dim]);
+ +}
+ +
+ +static void cg_move_error(FILE *fplog,
+ +                          gmx_domdec_t *dd,
+ +                          gmx_large_int_t step,int cg,int dim,int dir,
+ +                          gmx_bool bHaveLimitdAndCMOld,real limitd,
+ +                          rvec cm_old,rvec cm_new,real pos_d)
+ +{
+ +    if (fplog)
+ +    {
+ +        print_cg_move(fplog, dd,step,cg,dim,dir,
+ +                      bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
+ +    }
+ +    print_cg_move(stderr,dd,step,cg,dim,dir,
+ +                  bHaveLimitdAndCMOld,limitd,cm_old,cm_new,pos_d);
+ +    gmx_fatal(FARGS,
+ +              "A charge group moved too far between two domain decomposition steps\n"
+ +              "This usually means that your system is not well equilibrated");
+ +}
+ +
+ +static void rotate_state_atom(t_state *state,int a)
+ +{
+ +    int est;
+ +
+ +    for(est=0; est<estNR; est++)
+ +    {
+ +        if (EST_DISTR(est) && state->flags & (1<<est)) {
+ +            switch (est) {
+ +            case estX:
+ +                /* Rotate the complete state; for a rectangular box only */
+ +                state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
+ +                state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
+ +                break;
+ +            case estV:
+ +                state->v[a][YY] = -state->v[a][YY];
+ +                state->v[a][ZZ] = -state->v[a][ZZ];
+ +                break;
+ +            case estSDX:
+ +                state->sd_X[a][YY] = -state->sd_X[a][YY];
+ +                state->sd_X[a][ZZ] = -state->sd_X[a][ZZ];
+ +                break;
+ +            case estCGP:
+ +                state->cg_p[a][YY] = -state->cg_p[a][YY];
+ +                state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
+ +                break;
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* These are distances, so not affected by rotation */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in rotate_state_atom");            
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static int dd_redistribute_cg(FILE *fplog,gmx_large_int_t step,
+ +                              gmx_domdec_t *dd,ivec tric_dir,
+ +                              t_state *state,rvec **f,
+ +                              t_forcerec *fr,t_mdatoms *md,
+ +                              gmx_bool bCompact,
+ +                              t_nrnb *nrnb)
+ +{
+ +    int  *move;
+ +    int  npbcdim;
+ +    int  ncg[DIM*2],nat[DIM*2];
+ +    int  c,i,cg,k,k0,k1,d,dim,dim2,dir,d2,d3,d4,cell_d;
+ +    int  mc,cdd,nrcg,ncg_recv,nat_recv,nvs,nvr,nvec,vec;
+ +    int  sbuf[2],rbuf[2];
+ +    int  home_pos_cg,home_pos_at,ncg_stay_home,buf_pos;
+ +    int  flag;
+ +    gmx_bool bV=FALSE,bSDX=FALSE,bCGP=FALSE;
+ +    gmx_bool bScrew;
+ +    ivec dev;
+ +    real inv_ncg,pos_d;
+ +    matrix tcm;
+ +    rvec *cg_cm,cell_x0,cell_x1,limitd,limit0,limit1,cm_new;
+ +    atom_id *cgindex;
+ +    cginfo_mb_t *cginfo_mb;
+ +    gmx_domdec_comm_t *comm;
+ +    
+ +    if (dd->bScrewPBC)
+ +    {
+ +        check_screw_box(state->box);
+ +    }
+ +    
+ +    comm  = dd->comm;
+ +    cg_cm = fr->cg_cm;
+ +    
+ +    for(i=0; i<estNR; i++)
+ +    {
+ +        if (EST_DISTR(i))
+ +        {
+ +            switch (i)
+ +            {
+ +            case estX:   /* Always present */            break;
+ +            case estV:   bV   = (state->flags & (1<<i)); break;
+ +            case estSDX: bSDX = (state->flags & (1<<i)); break;
+ +            case estCGP: bCGP = (state->flags & (1<<i)); break;
+ +            case estLD_RNG:
+ +            case estLD_RNGI:
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* No processing required */
+ +                break;
+ +            default:
+ +            gmx_incons("Unknown state entry encountered in dd_redistribute_cg");
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (dd->ncg_tot > comm->nalloc_int)
+ +    {
+ +        comm->nalloc_int = over_alloc_dd(dd->ncg_tot);
+ +        srenew(comm->buf_int,comm->nalloc_int);
+ +    }
+ +    move = comm->buf_int;
+ +    
+ +    /* Clear the count */
+ +    for(c=0; c<dd->ndim*2; c++)
+ +    {
+ +        ncg[c] = 0;
+ +        nat[c] = 0;
+ +    }
+ +
+ +    npbcdim = dd->npbcdim;
+ +
+ +    for(d=0; (d<DIM); d++)
+ +    {
+ +        limitd[d] = dd->comm->cellsize_min[d];
+ +        if (d >= npbcdim && dd->ci[d] == 0)
+ +        {
+ +            cell_x0[d] = -GMX_FLOAT_MAX;
+ +        }
+ +        else
+ +        {
+ +            cell_x0[d] = comm->cell_x0[d];
+ +        }
+ +        if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
+ +        {
+ +            cell_x1[d] = GMX_FLOAT_MAX;
+ +        }
+ +        else
+ +        {
+ +            cell_x1[d] = comm->cell_x1[d];
+ +        }
+ +        if (d < npbcdim)
+ +        {
+ +            limit0[d] = comm->old_cell_x0[d] - limitd[d];
+ +            limit1[d] = comm->old_cell_x1[d] + limitd[d];
+ +        }
+ +        else
+ +        {
+ +            /* We check after communication if a charge group moved
+ +             * more than one cell. Set the pre-comm check limit to float_max.
+ +             */
+ +            limit0[d] = -GMX_FLOAT_MAX;
+ +            limit1[d] =  GMX_FLOAT_MAX;
+ +        }
+ +    }
+ +    
+ +    make_tric_corr_matrix(npbcdim,state->box,tcm);
+ +    
+ +    cgindex = dd->cgindex;
+ +    
+ +    /* Compute the center of geometry for all home charge groups
+ +     * and put them in the box and determine where they should go.
+ +     */
+ +    for(cg=0; cg<dd->ncg_home; cg++)
+ +    {
+ +        k0   = cgindex[cg];
+ +        k1   = cgindex[cg+1];
+ +        nrcg = k1 - k0;
+ +        if (nrcg == 1)
+ +        {
+ +            copy_rvec(state->x[k0],cm_new);
+ +        }
+ +        else
+ +        {
+ +            inv_ncg = 1.0/nrcg;
+ +            
+ +            clear_rvec(cm_new);
+ +            for(k=k0; (k<k1); k++)
+ +            {
+ +                rvec_inc(cm_new,state->x[k]);
+ +            }
+ +            for(d=0; (d<DIM); d++)
+ +            {
+ +                cm_new[d] = inv_ncg*cm_new[d];
+ +            }
+ +        }
+ +        
+ +        clear_ivec(dev);
+ +        /* Do pbc and check DD cell boundary crossings */
+ +        for(d=DIM-1; d>=0; d--)
+ +        {
+ +            if (dd->nc[d] > 1)
+ +            {
+ +                bScrew = (dd->bScrewPBC && d == XX);
+ +                /* Determine the location of this cg in lattice coordinates */
+ +                pos_d = cm_new[d];
+ +                if (tric_dir[d])
+ +                {
+ +                    for(d2=d+1; d2<DIM; d2++)
+ +                    {
+ +                        pos_d += cm_new[d2]*tcm[d2][d];
+ +                    }
+ +                }
+ +                /* Put the charge group in the triclinic unit-cell */
+ +                if (pos_d >= cell_x1[d])
+ +                {
+ +                    if (pos_d >= limit1[d])
+ +                    {
+ +                        cg_move_error(fplog,dd,step,cg,d,1,TRUE,limitd[d],
+ +                                      cg_cm[cg],cm_new,pos_d);
+ +                    }
+ +                    dev[d] = 1;
+ +                    if (dd->ci[d] == dd->nc[d] - 1)
+ +                    {
+ +                        rvec_dec(cm_new,state->box[d]);
+ +                        if (bScrew)
+ +                        {
+ +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
+ +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
+ +                        }
+ +                        for(k=k0; (k<k1); k++)
+ +                        {
+ +                            rvec_dec(state->x[k],state->box[d]);
+ +                            if (bScrew)
+ +                            {
+ +                                rotate_state_atom(state,k);
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +                else if (pos_d < cell_x0[d])
+ +                {
+ +                    if (pos_d < limit0[d])
+ +                    {
+ +                        cg_move_error(fplog,dd,step,cg,d,-1,TRUE,limitd[d],
+ +                                      cg_cm[cg],cm_new,pos_d);
+ +                    }
+ +                    dev[d] = -1;
+ +                    if (dd->ci[d] == 0)
+ +                    {
+ +                        rvec_inc(cm_new,state->box[d]);
+ +                        if (bScrew)
+ +                        {
+ +                            cm_new[YY] = state->box[YY][YY] - cm_new[YY];
+ +                            cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
+ +                        }
+ +                        for(k=k0; (k<k1); k++)
+ +                        {
+ +                            rvec_inc(state->x[k],state->box[d]);
+ +                            if (bScrew)
+ +                            {
+ +                                rotate_state_atom(state,k);
+ +                            }
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            else if (d < npbcdim)
+ +            {
+ +                /* Put the charge group in the rectangular unit-cell */
+ +                while (cm_new[d] >= state->box[d][d])
+ +                {
+ +                    rvec_dec(cm_new,state->box[d]);
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_dec(state->x[k],state->box[d]);
+ +                    }
+ +                }
+ +                while (cm_new[d] < 0)
+ +                {
+ +                    rvec_inc(cm_new,state->box[d]);
+ +                    for(k=k0; (k<k1); k++)
+ +                    {
+ +                        rvec_inc(state->x[k],state->box[d]);
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    
+ +        copy_rvec(cm_new,cg_cm[cg]);
+ +        
+ +        /* Determine where this cg should go */
+ +        flag = 0;
+ +        mc = -1;
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            dim = dd->dim[d];
+ +            if (dev[dim] == 1)
+ +            {
+ +                flag |= DD_FLAG_FW(d);
+ +                if (mc == -1)
+ +                {
+ +                    mc = d*2;
+ +                }
+ +            }
+ +            else if (dev[dim] == -1)
+ +            {
+ +                flag |= DD_FLAG_BW(d);
+ +                if (mc == -1) {
+ +                    if (dd->nc[dim] > 2)
+ +                    {
+ +                        mc = d*2 + 1;
+ +                    }
+ +                    else
+ +                    {
+ +                        mc = d*2;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +        move[cg] = mc;
+ +        if (mc >= 0)
+ +        {
+ +            if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
+ +            {
+ +                comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
+ +                srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
+ +            }
+ +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS  ] = dd->index_gl[cg];
+ +            /* We store the cg size in the lower 16 bits
+ +             * and the place where the charge group should go
+ +             * in the next 6 bits. This saves some communication volume.
+ +             */
+ +            comm->cggl_flag[mc][ncg[mc]*DD_CGIBS+1] = nrcg | flag;
+ +            ncg[mc] += 1;
+ +            nat[mc] += nrcg;
+ +        }
+ +    }
+ +    
+ +    inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
+ +    inc_nrnb(nrnb,eNR_RESETX,dd->ncg_home);
+ +    
+ +    nvec = 1;
+ +    if (bV)
+ +    {
+ +        nvec++;
+ +    }
+ +    if (bSDX)
+ +    {
+ +        nvec++;
+ +    }
+ +    if (bCGP)
+ +    {
+ +        nvec++;
+ +    }
+ +    
+ +    /* Make sure the communication buffers are large enough */
+ +    for(mc=0; mc<dd->ndim*2; mc++)
+ +    {
+ +        nvr = ncg[mc] + nat[mc]*nvec;
+ +        if (nvr > comm->cgcm_state_nalloc[mc])
+ +        {
+ +            comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr);
+ +            srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
+ +        }
+ +    }
+ +    
+ +    /* Recalculating cg_cm might be cheaper than communicating,
+ +     * but that could give rise to rounding issues.
+ +     */
+ +    home_pos_cg =
+ +        compact_and_copy_vec_cg(dd->ncg_home,move,cgindex,
+ +                                nvec,cg_cm,comm,bCompact);
+ +    
+ +    vec = 0;
+ +    home_pos_at =
+ +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
+ +                                nvec,vec++,state->x,comm,bCompact);
+ +    if (bV)
+ +    {
+ +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
+ +                                nvec,vec++,state->v,comm,bCompact);
+ +    }
+ +    if (bSDX)
+ +    {
+ +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
+ +                                nvec,vec++,state->sd_X,comm,bCompact);
+ +    }
+ +    if (bCGP)
+ +    {
+ +        compact_and_copy_vec_at(dd->ncg_home,move,cgindex,
+ +                                nvec,vec++,state->cg_p,comm,bCompact);
+ +    }
+ +    
+ +    if (bCompact)
+ +    {
+ +        compact_ind(dd->ncg_home,move,
+ +                    dd->index_gl,dd->cgindex,dd->gatindex,
+ +                    dd->ga2la,comm->bLocalCG,
+ +                    fr->cginfo);
+ +    }
+ +    else
+ +    {
+ +        clear_and_mark_ind(dd->ncg_home,move,
+ +                           dd->index_gl,dd->cgindex,dd->gatindex,
+ +                           dd->ga2la,comm->bLocalCG,
+ +                           fr->ns.grid->cell_index);
+ +    }
+ +    
+ +    cginfo_mb = fr->cginfo_mb;
+ +
+ +    ncg_stay_home = home_pos_cg;
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        ncg_recv = 0;
+ +        nat_recv = 0;
+ +        nvr      = 0;
+ +        for(dir=0; dir<(dd->nc[dim]==2 ? 1 : 2); dir++)
+ +        {
+ +            cdd = d*2 + dir;
+ +            /* Communicate the cg and atom counts */
+ +            sbuf[0] = ncg[cdd];
+ +            sbuf[1] = nat[cdd];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"Sending ddim %d dir %d: ncg %d nat %d\n",
+ +                        d,dir,sbuf[0],sbuf[1]);
+ +            }
+ +            dd_sendrecv_int(dd, d, dir, sbuf, 2, rbuf, 2);
+ +            
+ +            if ((ncg_recv+rbuf[0])*DD_CGIBS > comm->nalloc_int)
+ +            {
+ +                comm->nalloc_int = over_alloc_dd((ncg_recv+rbuf[0])*DD_CGIBS);
+ +                srenew(comm->buf_int,comm->nalloc_int);
+ +            }
+ +            
+ +            /* Communicate the charge group indices, sizes and flags */
+ +            dd_sendrecv_int(dd, d, dir,
+ +                            comm->cggl_flag[cdd], sbuf[0]*DD_CGIBS,
+ +                            comm->buf_int+ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
+ +            
+ +            nvs = ncg[cdd] + nat[cdd]*nvec;
+ +            i   = rbuf[0]  + rbuf[1] *nvec;
+ +            vec_rvec_check_alloc(&comm->vbuf,nvr+i);
+ +            
+ +            /* Communicate cgcm and state */
+ +            dd_sendrecv_rvec(dd, d, dir,
+ +                             comm->cgcm_state[cdd], nvs,
+ +                             comm->vbuf.v+nvr, i);
+ +            ncg_recv += rbuf[0];
+ +            nat_recv += rbuf[1];
+ +            nvr      += i;
+ +        }
+ +        
+ +        /* Process the received charge groups */
+ +        buf_pos = 0;
+ +        for(cg=0; cg<ncg_recv; cg++)
+ +        {
+ +            flag = comm->buf_int[cg*DD_CGIBS+1];
+ +
+ +            if (dim >= npbcdim && dd->nc[dim] > 2)
+ +            {
+ +                /* No pbc in this dim and more than one domain boundary.
+ +                 * We to a separate check if a charge did not move too far.
+ +                 */
+ +                if (((flag & DD_FLAG_FW(d)) &&
+ +                     comm->vbuf.v[buf_pos][d] > cell_x1[dim]) ||
+ +                    ((flag & DD_FLAG_BW(d)) &&
+ +                     comm->vbuf.v[buf_pos][d] < cell_x0[dim]))
+ +                {
+ +                    cg_move_error(fplog,dd,step,cg,d,
+ +                                  (flag & DD_FLAG_FW(d)) ? 1 : 0,
+ +                                   FALSE,0,
+ +                                   comm->vbuf.v[buf_pos],
+ +                                   comm->vbuf.v[buf_pos],
+ +                                   comm->vbuf.v[buf_pos][d]);
+ +                }
+ +            }
+ +
+ +            mc = -1;
+ +            if (d < dd->ndim-1)
+ +            {
+ +                /* Check which direction this cg should go */
+ +                for(d2=d+1; (d2<dd->ndim && mc==-1); d2++)
+ +                {
+ +                    if (dd->bGridJump)
+ +                    {
+ +                        /* The cell boundaries for dimension d2 are not equal
+ +                         * for each cell row of the lower dimension(s),
+ +                         * therefore we might need to redetermine where
+ +                         * this cg should go.
+ +                         */
+ +                        dim2 = dd->dim[d2];
+ +                        /* If this cg crosses the box boundary in dimension d2
+ +                         * we can use the communicated flag, so we do not
+ +                         * have to worry about pbc.
+ +                         */
+ +                        if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
+ +                               (flag & DD_FLAG_FW(d2))) ||
+ +                              (dd->ci[dim2] == 0 &&
+ +                               (flag & DD_FLAG_BW(d2)))))
+ +                        {
+ +                            /* Clear the two flags for this dimension */
+ +                            flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
+ +                            /* Determine the location of this cg
+ +                             * in lattice coordinates
+ +                             */
+ +                            pos_d = comm->vbuf.v[buf_pos][dim2];
+ +                            if (tric_dir[dim2])
+ +                            {
+ +                                for(d3=dim2+1; d3<DIM; d3++)
+ +                                {
+ +                                    pos_d +=
+ +                                        comm->vbuf.v[buf_pos][d3]*tcm[d3][dim2];
+ +                                }
+ +                            }
+ +                            /* Check of we are not at the box edge.
+ +                             * pbc is only handled in the first step above,
+ +                             * but this check could move over pbc while
+ +                             * the first step did not due to different rounding.
+ +                             */
+ +                            if (pos_d >= cell_x1[dim2] &&
+ +                                dd->ci[dim2] != dd->nc[dim2]-1)
+ +                            {
+ +                                flag |= DD_FLAG_FW(d2);
+ +                            }
+ +                            else if (pos_d < cell_x0[dim2] &&
+ +                                     dd->ci[dim2] != 0)
+ +                            {
+ +                                flag |= DD_FLAG_BW(d2);
+ +                            }
+ +                            comm->buf_int[cg*DD_CGIBS+1] = flag;
+ +                        }
+ +                    }
+ +                    /* Set to which neighboring cell this cg should go */
+ +                    if (flag & DD_FLAG_FW(d2))
+ +                    {
+ +                        mc = d2*2;
+ +                    }
+ +                    else if (flag & DD_FLAG_BW(d2))
+ +                    {
+ +                        if (dd->nc[dd->dim[d2]] > 2)
+ +                        {
+ +                            mc = d2*2+1;
+ +                        }
+ +                        else
+ +                        {
+ +                            mc = d2*2;
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            
+ +            nrcg = flag & DD_FLAG_NRCG;
+ +            if (mc == -1)
+ +            {
+ +                if (home_pos_cg+1 > dd->cg_nalloc)
+ +                {
+ +                    dd->cg_nalloc = over_alloc_dd(home_pos_cg+1);
+ +                    srenew(dd->index_gl,dd->cg_nalloc);
+ +                    srenew(dd->cgindex,dd->cg_nalloc+1);
+ +                }
+ +                /* Set the global charge group index and size */
+ +                dd->index_gl[home_pos_cg] = comm->buf_int[cg*DD_CGIBS];
+ +                dd->cgindex[home_pos_cg+1] = dd->cgindex[home_pos_cg] + nrcg;
+ +                /* Copy the state from the buffer */
+ +                if (home_pos_cg >= fr->cg_nalloc)
+ +                {
+ +                    dd_realloc_fr_cg(fr,home_pos_cg+1);
+ +                    cg_cm = fr->cg_cm;
+ +                }
+ +                copy_rvec(comm->vbuf.v[buf_pos++],cg_cm[home_pos_cg]);
+ +                /* Set the cginfo */
+ +                fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
+ +                                                   dd->index_gl[home_pos_cg]);
+ +                if (comm->bLocalCG)
+ +                {
+ +                    comm->bLocalCG[dd->index_gl[home_pos_cg]] = TRUE;
+ +                }
+ +
+ +                if (home_pos_at+nrcg > state->nalloc)
+ +                {
+ +                    dd_realloc_state(state,f,home_pos_at+nrcg);
+ +                }
+ +                for(i=0; i<nrcg; i++)
+ +                {
+ +                    copy_rvec(comm->vbuf.v[buf_pos++],
+ +                              state->x[home_pos_at+i]);
+ +                }
+ +                if (bV)
+ +                {
+ +                    for(i=0; i<nrcg; i++)
+ +                    {
+ +                        copy_rvec(comm->vbuf.v[buf_pos++],
+ +                                  state->v[home_pos_at+i]);
+ +                    }
+ +                }
+ +                if (bSDX)
+ +                {
+ +                    for(i=0; i<nrcg; i++)
+ +                    {
+ +                        copy_rvec(comm->vbuf.v[buf_pos++],
+ +                                  state->sd_X[home_pos_at+i]);
+ +                    }
+ +                }
+ +                if (bCGP)
+ +                {
+ +                    for(i=0; i<nrcg; i++)
+ +                    {
+ +                        copy_rvec(comm->vbuf.v[buf_pos++],
+ +                                  state->cg_p[home_pos_at+i]);
+ +                    }
+ +                }
+ +                home_pos_cg += 1;
+ +                home_pos_at += nrcg;
+ +            }
+ +            else
+ +            {
+ +                /* Reallocate the buffers if necessary  */
+ +                if (ncg[mc]+1 > comm->cggl_flag_nalloc[mc])
+ +                {
+ +                    comm->cggl_flag_nalloc[mc] = over_alloc_dd(ncg[mc]+1);
+ +                    srenew(comm->cggl_flag[mc],comm->cggl_flag_nalloc[mc]*DD_CGIBS);
+ +                }
+ +                nvr = ncg[mc] + nat[mc]*nvec;
+ +                if (nvr + 1 + nrcg*nvec > comm->cgcm_state_nalloc[mc])
+ +                {
+ +                    comm->cgcm_state_nalloc[mc] = over_alloc_dd(nvr + 1 + nrcg*nvec);
+ +                    srenew(comm->cgcm_state[mc],comm->cgcm_state_nalloc[mc]);
+ +                }
+ +                /* Copy from the receive to the send buffers */
+ +                memcpy(comm->cggl_flag[mc] + ncg[mc]*DD_CGIBS,
+ +                       comm->buf_int + cg*DD_CGIBS,
+ +                       DD_CGIBS*sizeof(int));
+ +                memcpy(comm->cgcm_state[mc][nvr],
+ +                       comm->vbuf.v[buf_pos],
+ +                       (1+nrcg*nvec)*sizeof(rvec));
+ +                buf_pos += 1 + nrcg*nvec;
+ +                ncg[mc] += 1;
+ +                nat[mc] += nrcg;
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* With sorting (!bCompact) the indices are now only partially up to date
+ +     * and ncg_home and nat_home are not the real count, since there are
+ +     * "holes" in the arrays for the charge groups that moved to neighbors.
+ +     */
+ +    dd->ncg_home = home_pos_cg;
+ +    dd->nat_home = home_pos_at;
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Finished repartitioning\n");
+ +    }
+ +
+ +    return ncg_stay_home;
+ +}
+ +
+ +void dd_cycles_add(gmx_domdec_t *dd,float cycles,int ddCycl)
+ +{
+ +    dd->comm->cycl[ddCycl] += cycles;
+ +    dd->comm->cycl_n[ddCycl]++;
+ +    if (cycles > dd->comm->cycl_max[ddCycl])
+ +    {
+ +        dd->comm->cycl_max[ddCycl] = cycles;
+ +    }
+ +}
+ +
+ +static double force_flop_count(t_nrnb *nrnb)
+ +{
+ +    int i;
+ +    double sum;
+ +    const char *name;
+ +
+ +    sum = 0;
+ +    for(i=eNR_NBKERNEL010; i<eNR_NBKERNEL_FREE_ENERGY; i++)
+ +    {
+ +        /* To get closer to the real timings, we half the count
+ +         * for the normal loops and again half it for water loops.
+ +         */
+ +        name = nrnb_str(i);
+ +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
+ +        {
+ +            sum += nrnb->n[i]*0.25*cost_nrnb(i);
+ +        }
+ +        else
+ +        {
+ +            sum += nrnb->n[i]*0.50*cost_nrnb(i);
+ +        }
+ +    }
+ +    for(i=eNR_NBKERNEL_FREE_ENERGY; i<=eNR_NB14; i++)
+ +    {
+ +        name = nrnb_str(i);
+ +        if (strstr(name,"W3") != NULL || strstr(name,"W4") != NULL)
+ +        sum += nrnb->n[i]*cost_nrnb(i);
+ +    }
+ +    for(i=eNR_BONDS; i<=eNR_WALLS; i++)
+ +    {
+ +        sum += nrnb->n[i]*cost_nrnb(i);
+ +    }
+ +
+ +    return sum;
+ +}
+ +
+ +void dd_force_flop_start(gmx_domdec_t *dd,t_nrnb *nrnb)
+ +{
+ +    if (dd->comm->eFlop)
+ +    {
+ +        dd->comm->flop -= force_flop_count(nrnb);
+ +    }
+ +}
+ +void dd_force_flop_stop(gmx_domdec_t *dd,t_nrnb *nrnb)
+ +{
+ +    if (dd->comm->eFlop)
+ +    {
+ +        dd->comm->flop += force_flop_count(nrnb);
+ +        dd->comm->flop_n++;
+ +    }
+ +}  
+ +
+ +static void clear_dd_cycle_counts(gmx_domdec_t *dd)
+ +{
+ +    int i;
+ +    
+ +    for(i=0; i<ddCyclNr; i++)
+ +    {
+ +        dd->comm->cycl[i] = 0;
+ +        dd->comm->cycl_n[i] = 0;
+ +        dd->comm->cycl_max[i] = 0;
+ +    }
+ +    dd->comm->flop = 0;
+ +    dd->comm->flop_n = 0;
+ +}
+ +
+ +static void get_load_distribution(gmx_domdec_t *dd,gmx_wallcycle_t wcycle)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_load_t *load;
+ +    gmx_domdec_root_t *root=NULL;
+ +    int  d,dim,cid,i,pos;
+ +    float cell_frac=0,sbuf[DD_NLOAD_MAX];
+ +    gmx_bool bSepPME;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"get_load_distribution start\n");
+ +    }
+ +
+ +    wallcycle_start(wcycle,ewcDDCOMMLOAD);
+ +    
+ +    comm = dd->comm;
+ +    
+ +    bSepPME = (dd->pme_nodeid >= 0);
+ +    
+ +    for(d=dd->ndim-1; d>=0; d--)
+ +    {
+ +        dim = dd->dim[d];
+ +        /* Check if we participate in the communication in this dimension */
+ +        if (d == dd->ndim-1 || 
+ +            (dd->ci[dd->dim[d+1]]==0 && dd->ci[dd->dim[dd->ndim-1]]==0))
+ +        {
+ +            load = &comm->load[d];
+ +            if (dd->bGridJump)
+ +            {
+ +                cell_frac = comm->cell_f1[d] - comm->cell_f0[d];
+ +            }
+ +            pos = 0;
+ +            if (d == dd->ndim-1)
+ +            {
+ +                sbuf[pos++] = dd_force_load(comm);
+ +                sbuf[pos++] = sbuf[0];
+ +                if (dd->bGridJump)
+ +                {
+ +                    sbuf[pos++] = sbuf[0];
+ +                    sbuf[pos++] = cell_frac;
+ +                    if (d > 0)
+ +                    {
+ +                        sbuf[pos++] = comm->cell_f_max0[d];
+ +                        sbuf[pos++] = comm->cell_f_min1[d];
+ +                    }
+ +                }
+ +                if (bSepPME)
+ +                {
+ +                    sbuf[pos++] = comm->cycl[ddCyclPPduringPME];
+ +                    sbuf[pos++] = comm->cycl[ddCyclPME];
+ +                }
+ +            }
+ +            else
+ +            {
+ +                sbuf[pos++] = comm->load[d+1].sum;
+ +                sbuf[pos++] = comm->load[d+1].max;
+ +                if (dd->bGridJump)
+ +                {
+ +                    sbuf[pos++] = comm->load[d+1].sum_m;
+ +                    sbuf[pos++] = comm->load[d+1].cvol_min*cell_frac;
+ +                    sbuf[pos++] = comm->load[d+1].flags;
+ +                    if (d > 0)
+ +                    {
+ +                        sbuf[pos++] = comm->cell_f_max0[d];
+ +                        sbuf[pos++] = comm->cell_f_min1[d];
+ +                    }
+ +                }
+ +                if (bSepPME)
+ +                {
+ +                    sbuf[pos++] = comm->load[d+1].mdf;
+ +                    sbuf[pos++] = comm->load[d+1].pme;
+ +                }
+ +            }
+ +            load->nload = pos;
+ +            /* Communicate a row in DD direction d.
+ +             * The communicators are setup such that the root always has rank 0.
+ +             */
+ +#ifdef GMX_MPI
+ +            MPI_Gather(sbuf      ,load->nload*sizeof(float),MPI_BYTE,
+ +                       load->load,load->nload*sizeof(float),MPI_BYTE,
+ +                       0,comm->mpi_comm_load[d]);
+ +#endif
+ +            if (dd->ci[dim] == dd->master_ci[dim])
+ +            {
+ +                /* We are the root, process this row */
+ +                if (comm->bDynLoadBal)
+ +                {
+ +                    root = comm->root[d];
+ +                }
+ +                load->sum = 0;
+ +                load->max = 0;
+ +                load->sum_m = 0;
+ +                load->cvol_min = 1;
+ +                load->flags = 0;
+ +                load->mdf = 0;
+ +                load->pme = 0;
+ +                pos = 0;
+ +                for(i=0; i<dd->nc[dim]; i++)
+ +                {
+ +                    load->sum += load->load[pos++];
+ +                    load->max = max(load->max,load->load[pos]);
+ +                    pos++;
+ +                    if (dd->bGridJump)
+ +                    {
+ +                        if (root->bLimited)
+ +                        {
+ +                            /* This direction could not be load balanced properly,
+ +                             * therefore we need to use the maximum iso the average load.
+ +                             */
+ +                            load->sum_m = max(load->sum_m,load->load[pos]);
+ +                        }
+ +                        else
+ +                        {
+ +                            load->sum_m += load->load[pos];
+ +                        }
+ +                        pos++;
+ +                        load->cvol_min = min(load->cvol_min,load->load[pos]);
+ +                        pos++;
+ +                        if (d < dd->ndim-1)
+ +                        {
+ +                            load->flags = (int)(load->load[pos++] + 0.5);
+ +                        }
+ +                        if (d > 0)
+ +                        {
+ +                            root->cell_f_max0[i] = load->load[pos++];
+ +                            root->cell_f_min1[i] = load->load[pos++];
+ +                        }
+ +                    }
+ +                    if (bSepPME)
+ +                    {
+ +                        load->mdf = max(load->mdf,load->load[pos]);
+ +                        pos++;
+ +                        load->pme = max(load->pme,load->load[pos]);
+ +                        pos++;
+ +                    }
+ +                }
+ +                if (comm->bDynLoadBal && root->bLimited)
+ +                {
+ +                    load->sum_m *= dd->nc[dim];
+ +                    load->flags |= (1<<d);
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        comm->nload      += dd_load_count(comm);
+ +        comm->load_step  += comm->cycl[ddCyclStep];
+ +        comm->load_sum   += comm->load[0].sum;
+ +        comm->load_max   += comm->load[0].max;
+ +        if (comm->bDynLoadBal)
+ +        {
+ +            for(d=0; d<dd->ndim; d++)
+ +            {
+ +                if (comm->load[0].flags & (1<<d))
+ +                {
+ +                    comm->load_lim[d]++;
+ +                }
+ +            }
+ +        }
+ +        if (bSepPME)
+ +        {
+ +            comm->load_mdf += comm->load[0].mdf;
+ +            comm->load_pme += comm->load[0].pme;
+ +        }
+ +    }
+ +
+ +    wallcycle_stop(wcycle,ewcDDCOMMLOAD);
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"get_load_distribution finished\n");
+ +    }
+ +}
+ +
+ +static float dd_force_imb_perf_loss(gmx_domdec_t *dd)
+ +{
+ +    /* Return the relative performance loss on the total run time
+ +     * due to the force calculation load imbalance.
+ +     */
+ +    if (dd->comm->nload > 0)
+ +    {
+ +        return
+ +            (dd->comm->load_max*dd->nnodes - dd->comm->load_sum)/
+ +            (dd->comm->load_step*dd->nnodes);
+ +    }
+ +    else
+ +    {
+ +        return 0;
+ +    }
+ +}
+ +
+ +static void print_dd_load_av(FILE *fplog,gmx_domdec_t *dd)
+ +{
+ +    char  buf[STRLEN];
+ +    int   npp,npme,nnodes,d,limp;
+ +    float imbal,pme_f_ratio,lossf,lossp=0;
+ +    gmx_bool  bLim;
+ +    gmx_domdec_comm_t *comm;
+ +
+ +    comm = dd->comm;
+ +    if (DDMASTER(dd) && comm->nload > 0)
+ +    {
+ +        npp    = dd->nnodes;
+ +        npme   = (dd->pme_nodeid >= 0) ? comm->npmenodes : 0;
+ +        nnodes = npp + npme;
+ +        imbal = comm->load_max*npp/comm->load_sum - 1;
+ +        lossf = dd_force_imb_perf_loss(dd);
+ +        sprintf(buf," Average load imbalance: %.1f %%\n",imbal*100);
+ +        fprintf(fplog,"%s",buf);
+ +        fprintf(stderr,"\n");
+ +        fprintf(stderr,"%s",buf);
+ +        sprintf(buf," Part of the total run time spent waiting due to load imbalance: %.1f %%\n",lossf*100);
+ +        fprintf(fplog,"%s",buf);
+ +        fprintf(stderr,"%s",buf);
+ +        bLim = FALSE;
+ +        if (comm->bDynLoadBal)
+ +        {
+ +            sprintf(buf," Steps where the load balancing was limited by -rdd, -rcon and/or -dds:");
+ +            for(d=0; d<dd->ndim; d++)
+ +            {
+ +                limp = (200*comm->load_lim[d]+1)/(2*comm->nload);
+ +                sprintf(buf+strlen(buf)," %c %d %%",dim2char(dd->dim[d]),limp);
+ +                if (limp >= 50)
+ +                {
+ +                    bLim = TRUE;
+ +                }
+ +            }
+ +            sprintf(buf+strlen(buf),"\n");
+ +            fprintf(fplog,"%s",buf);
+ +            fprintf(stderr,"%s",buf);
+ +        }
+ +        if (npme > 0)
+ +        {
+ +            pme_f_ratio = comm->load_pme/comm->load_mdf;
+ +            lossp = (comm->load_pme -comm->load_mdf)/comm->load_step;
+ +            if (lossp <= 0)
+ +            {
+ +                lossp *= (float)npme/(float)nnodes;
+ +            }
+ +            else
+ +            {
+ +                lossp *= (float)npp/(float)nnodes;
+ +            }
+ +            sprintf(buf," Average PME mesh/force load: %5.3f\n",pme_f_ratio);
+ +            fprintf(fplog,"%s",buf);
+ +            fprintf(stderr,"%s",buf);
+ +            sprintf(buf," Part of the total run time spent waiting due to PP/PME imbalance: %.1f %%\n",fabs(lossp)*100);
+ +            fprintf(fplog,"%s",buf);
+ +            fprintf(stderr,"%s",buf);
+ +        }
+ +        fprintf(fplog,"\n");
+ +        fprintf(stderr,"\n");
+ +        
+ +        if (lossf >= DD_PERF_LOSS)
+ +        {
+ +            sprintf(buf,
+ +                    "NOTE: %.1f %% performance was lost due to load imbalance\n"
+ +                    "      in the domain decomposition.\n",lossf*100);
+ +            if (!comm->bDynLoadBal)
+ +            {
+ +                sprintf(buf+strlen(buf),"      You might want to use dynamic load balancing (option -dlb.)\n");
+ +            }
+ +            else if (bLim)
+ +            {
+ +                sprintf(buf+strlen(buf),"      You might want to decrease the cell size limit (options -rdd, -rcon and/or -dds).\n");
+ +            }
+ +            fprintf(fplog,"%s\n",buf);
+ +            fprintf(stderr,"%s\n",buf);
+ +        }
+ +        if (npme > 0 && fabs(lossp) >= DD_PERF_LOSS)
+ +        {
+ +            sprintf(buf,
+ +                    "NOTE: %.1f %% performance was lost because the PME nodes\n"
+ +                    "      had %s work to do than the PP nodes.\n"
+ +                    "      You might want to %s the number of PME nodes\n"
+ +                    "      or %s the cut-off and the grid spacing.\n",
+ +                    fabs(lossp*100),
+ +                    (lossp < 0) ? "less"     : "more",
+ +                    (lossp < 0) ? "decrease" : "increase",
+ +                    (lossp < 0) ? "decrease" : "increase");
+ +            fprintf(fplog,"%s\n",buf);
+ +            fprintf(stderr,"%s\n",buf);
+ +        }
+ +    }
+ +}
+ +
+ +static float dd_vol_min(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->load[0].cvol_min*dd->nnodes;
+ +}
+ +
+ +static gmx_bool dd_load_flags(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->load[0].flags;
+ +}
+ +
+ +static float dd_f_imbal(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->load[0].max*dd->nnodes/dd->comm->load[0].sum - 1;
+ +}
+ +
+ +static float dd_pme_f_ratio(gmx_domdec_t *dd)
+ +{
+ +    return dd->comm->load[0].pme/dd->comm->load[0].mdf;
+ +}
+ +
+ +static void dd_print_load(FILE *fplog,gmx_domdec_t *dd,gmx_large_int_t step)
+ +{
+ +    int flags,d;
+ +    char buf[22];
+ +    
+ +    flags = dd_load_flags(dd);
+ +    if (flags)
+ +    {
+ +        fprintf(fplog,
+ +                "DD  load balancing is limited by minimum cell size in dimension");
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            if (flags & (1<<d))
+ +            {
+ +                fprintf(fplog," %c",dim2char(dd->dim[d]));
+ +            }
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +    fprintf(fplog,"DD  step %s",gmx_step_str(step,buf));
+ +    if (dd->comm->bDynLoadBal)
+ +    {
+ +        fprintf(fplog,"  vol min/aver %5.3f%c",
+ +                dd_vol_min(dd),flags ? '!' : ' ');
+ +    }
+ +    fprintf(fplog," load imb.: force %4.1f%%",dd_f_imbal(dd)*100);
+ +    if (dd->comm->cycl_n[ddCyclPME])
+ +    {
+ +        fprintf(fplog,"  pme mesh/force %5.3f",dd_pme_f_ratio(dd));
+ +    }
+ +    fprintf(fplog,"\n\n");
+ +}
+ +
+ +static void dd_print_load_verbose(gmx_domdec_t *dd)
+ +{
+ +    if (dd->comm->bDynLoadBal)
+ +    {
+ +        fprintf(stderr,"vol %4.2f%c ",
+ +                dd_vol_min(dd),dd_load_flags(dd) ? '!' : ' ');
+ +    }
+ +    fprintf(stderr,"imb F %2d%% ",(int)(dd_f_imbal(dd)*100+0.5));
+ +    if (dd->comm->cycl_n[ddCyclPME])
+ +    {
+ +        fprintf(stderr,"pme/F %4.2f ",dd_pme_f_ratio(dd));
+ +    }
+ +}
+ +
+ +#ifdef GMX_MPI
+ +static void make_load_communicator(gmx_domdec_t *dd,MPI_Group g_all,
+ +                                   int dim_ind,ivec loc)
+ +{
+ +    MPI_Group g_row;
+ +    MPI_Comm  c_row;
+ +    int  dim,i,*rank;
+ +    ivec loc_c;
+ +    gmx_domdec_root_t *root;
+ +    
+ +    dim = dd->dim[dim_ind];
+ +    copy_ivec(loc,loc_c);
+ +    snew(rank,dd->nc[dim]);
+ +    for(i=0; i<dd->nc[dim]; i++)
+ +    {
+ +        loc_c[dim] = i;
+ +        rank[i] = dd_index(dd->nc,loc_c);
+ +    }
+ +    /* Here we create a new group, that does not necessarily
+ +     * include our process. But MPI_Comm_create needs to be
+ +     * called by all the processes in the original communicator.
+ +     * Calling MPI_Group_free afterwards gives errors, so I assume
+ +     * also the group is needed by all processes. (B. Hess)
+ +     */
+ +    MPI_Group_incl(g_all,dd->nc[dim],rank,&g_row);
+ +    MPI_Comm_create(dd->mpi_comm_all,g_row,&c_row);
+ +    if (c_row != MPI_COMM_NULL)
+ +    {
+ +        /* This process is part of the group */
+ +        dd->comm->mpi_comm_load[dim_ind] = c_row;
+ +        if (dd->comm->eDLB != edlbNO)
+ +        {
+ +            if (dd->ci[dim] == dd->master_ci[dim])
+ +            {
+ +                /* This is the root process of this row */
+ +                snew(dd->comm->root[dim_ind],1);
+ +                root = dd->comm->root[dim_ind];
+ +                snew(root->cell_f,DD_CELL_F_SIZE(dd,dim_ind));
+ +                snew(root->old_cell_f,dd->nc[dim]+1);
+ +                snew(root->bCellMin,dd->nc[dim]);
+ +                if (dim_ind > 0)
+ +                {
+ +                    snew(root->cell_f_max0,dd->nc[dim]);
+ +                    snew(root->cell_f_min1,dd->nc[dim]);
+ +                    snew(root->bound_min,dd->nc[dim]);
+ +                    snew(root->bound_max,dd->nc[dim]);
+ +                }
+ +                snew(root->buf_ncd,dd->nc[dim]);
+ +            }
+ +            else
+ +            {
+ +                /* This is not a root process, we only need to receive cell_f */
+ +                snew(dd->comm->cell_f_row,DD_CELL_F_SIZE(dd,dim_ind));
+ +            }
+ +        }
+ +        if (dd->ci[dim] == dd->master_ci[dim])
+ +        {
+ +            snew(dd->comm->load[dim_ind].load,dd->nc[dim]*DD_NLOAD_MAX);
+ +        }
+ +    }
+ +    sfree(rank);
+ +}
+ +#endif
+ +
+ +static void make_load_communicators(gmx_domdec_t *dd)
+ +{
+ +#ifdef GMX_MPI
+ +  MPI_Group g_all;
+ +  int  dim0,dim1,i,j;
+ +  ivec loc;
+ +
+ +  if (debug)
+ +    fprintf(debug,"Making load communicators\n");
+ +
+ +  MPI_Comm_group(dd->mpi_comm_all,&g_all);
+ +  
+ +  snew(dd->comm->load,dd->ndim);
+ +  snew(dd->comm->mpi_comm_load,dd->ndim);
+ +  
+ +  clear_ivec(loc);
+ +  make_load_communicator(dd,g_all,0,loc);
+ +  if (dd->ndim > 1) {
+ +    dim0 = dd->dim[0];
+ +    for(i=0; i<dd->nc[dim0]; i++) {
+ +      loc[dim0] = i;
+ +      make_load_communicator(dd,g_all,1,loc);
+ +    }
+ +  }
+ +  if (dd->ndim > 2) {
+ +    dim0 = dd->dim[0];
+ +    for(i=0; i<dd->nc[dim0]; i++) {
+ +      loc[dim0] = i;
+ +      dim1 = dd->dim[1];
+ +      for(j=0; j<dd->nc[dim1]; j++) {
+ +        loc[dim1] = j;
+ +        make_load_communicator(dd,g_all,2,loc);
+ +      }
+ +    }
+ +  }
+ +
+ +  MPI_Group_free(&g_all);
+ +
+ +  if (debug)
+ +    fprintf(debug,"Finished making load communicators\n");
+ +#endif
+ +}
+ +
+ +void setup_dd_grid(FILE *fplog,gmx_domdec_t *dd)
+ +{
+ +    gmx_bool bZYX;
+ +    int  d,dim,i,j,m;
+ +    ivec tmp,s;
+ +    int  nzone,nzonep;
+ +    ivec dd_zp[DD_MAXIZONE];
+ +    gmx_domdec_zones_t *zones;
+ +    gmx_domdec_ns_ranges_t *izone;
+ +    
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dim = dd->dim[d];
+ +        copy_ivec(dd->ci,tmp);
+ +        tmp[dim] = (tmp[dim] + 1) % dd->nc[dim];
+ +        dd->neighbor[d][0] = ddcoord2ddnodeid(dd,tmp);
+ +        copy_ivec(dd->ci,tmp);
+ +        tmp[dim] = (tmp[dim] - 1 + dd->nc[dim]) % dd->nc[dim];
+ +        dd->neighbor[d][1] = ddcoord2ddnodeid(dd,tmp);
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"DD rank %d neighbor ranks in dir %d are + %d - %d\n",
+ +                    dd->rank,dim,
+ +                    dd->neighbor[d][0],
+ +                    dd->neighbor[d][1]);
+ +        }
+ +    }
+ +    
+ +    if (DDMASTER(dd))
+ +    {
+ +        fprintf(stderr,"Making %dD domain decomposition %d x %d x %d\n",
+ +          dd->ndim,dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
+ +    }
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\nMaking %dD domain decomposition grid %d x %d x %d, home cell index %d %d %d\n\n",
+ +                dd->ndim,
+ +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],
+ +                dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +    }
+ +    switch (dd->ndim)
+ +    {
+ +    case 3:
+ +        nzone  = dd_z3n;
+ +        nzonep = dd_zp3n;
+ +        for(i=0; i<nzonep; i++)
+ +        {
+ +            copy_ivec(dd_zp3[i],dd_zp[i]);
+ +        }
+ +        break;
+ +    case 2:
+ +        nzone  = dd_z2n;
+ +        nzonep = dd_zp2n;
+ +        for(i=0; i<nzonep; i++)
+ +        {
+ +            copy_ivec(dd_zp2[i],dd_zp[i]);
+ +        }
+ +        break;
+ +    case 1:
+ +        nzone  = dd_z1n;
+ +        nzonep = dd_zp1n;
+ +        for(i=0; i<nzonep; i++)
+ +        {
+ +            copy_ivec(dd_zp1[i],dd_zp[i]);
+ +        }
+ +        break;
+ +    default:
+ +        gmx_fatal(FARGS,"Can only do 1, 2 or 3D domain decomposition");
+ +        nzone = 0;
+ +        nzonep = 0;
+ +    }
+ +
+ +    zones = &dd->comm->zones;
+ +
+ +    for(i=0; i<nzone; i++)
+ +    {
+ +        m = 0;
+ +        clear_ivec(zones->shift[i]);
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            zones->shift[i][dd->dim[d]] = dd_zo[i][m++];
+ +        }
+ +    }
+ +    
+ +    zones->n = nzone;
+ +    for(i=0; i<nzone; i++)
+ +    {
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            s[d] = dd->ci[d] - zones->shift[i][d];
+ +            if (s[d] < 0)
+ +            {
+ +                s[d] += dd->nc[d];
+ +            }
+ +            else if (s[d] >= dd->nc[d])
+ +            {
+ +                s[d] -= dd->nc[d];
+ +            }
+ +        }
+ +    }
+ +    zones->nizone = nzonep;
+ +    for(i=0; i<zones->nizone; i++)
+ +    {
+ +        if (dd_zp[i][0] != i)
+ +        {
+ +            gmx_fatal(FARGS,"Internal inconsistency in the dd grid setup");
+ +        }
+ +        izone = &zones->izone[i];
+ +        izone->j0 = dd_zp[i][1];
+ +        izone->j1 = dd_zp[i][2];
+ +        for(dim=0; dim<DIM; dim++)
+ +        {
+ +            if (dd->nc[dim] == 1)
+ +            {
+ +                /* All shifts should be allowed */
+ +                izone->shift0[dim] = -1;
+ +                izone->shift1[dim] = 1;
+ +            }
+ +            else
+ +            {
+ +                /*
+ +                  izone->shift0[d] = 0;
+ +                  izone->shift1[d] = 0;
+ +                  for(j=izone->j0; j<izone->j1; j++) {
+ +                  if (dd->shift[j][d] > dd->shift[i][d])
+ +                  izone->shift0[d] = -1;
+ +                  if (dd->shift[j][d] < dd->shift[i][d])
+ +                  izone->shift1[d] = 1;
+ +                  }
+ +                */
+ +                
+ +                int shift_diff;
+ +                
+ +                /* Assume the shift are not more than 1 cell */
+ +                izone->shift0[dim] = 1;
+ +                izone->shift1[dim] = -1;
+ +                for(j=izone->j0; j<izone->j1; j++)
+ +                {
+ +                    shift_diff = zones->shift[j][dim] - zones->shift[i][dim];
+ +                    if (shift_diff < izone->shift0[dim])
+ +                    {
+ +                        izone->shift0[dim] = shift_diff;
+ +                    }
+ +                    if (shift_diff > izone->shift1[dim])
+ +                    {
+ +                        izone->shift1[dim] = shift_diff;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (dd->comm->eDLB != edlbNO)
+ +    {
+ +        snew(dd->comm->root,dd->ndim);
+ +    }
+ +    
+ +    if (dd->comm->bRecordLoad)
+ +    {
+ +        make_load_communicators(dd);
+ +    }
+ +}
+ +
+ +static void make_pp_communicator(FILE *fplog,t_commrec *cr,int reorder)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int  i,rank,*buf;
+ +    ivec periods;
+ +#ifdef GMX_MPI
+ +    MPI_Comm comm_cart;
+ +#endif
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +#ifdef GMX_MPI
+ +    if (comm->bCartesianPP)
+ +    {
+ +        /* Set up cartesian communication for the particle-particle part */
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will use a Cartesian communicator: %d x %d x %d\n",
+ +                    dd->nc[XX],dd->nc[YY],dd->nc[ZZ]);
+ +        }
+ +        
+ +        for(i=0; i<DIM; i++)
+ +        {
+ +            periods[i] = TRUE;
+ +        }
+ +        MPI_Cart_create(cr->mpi_comm_mygroup,DIM,dd->nc,periods,reorder,
+ +                        &comm_cart);
+ +        /* We overwrite the old communicator with the new cartesian one */
+ +        cr->mpi_comm_mygroup = comm_cart;
+ +    }
+ +    
+ +    dd->mpi_comm_all = cr->mpi_comm_mygroup;
+ +    MPI_Comm_rank(dd->mpi_comm_all,&dd->rank);
+ +    
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +        /* Since we want to use the original cartesian setup for sim,
+ +         * and not the one after split, we need to make an index.
+ +         */
+ +        snew(comm->ddindex2ddnodeid,dd->nnodes);
+ +        comm->ddindex2ddnodeid[dd_index(dd->nc,dd->ci)] = dd->rank;
+ +        gmx_sumi(dd->nnodes,comm->ddindex2ddnodeid,cr);
+ +        /* Get the rank of the DD master,
+ +         * above we made sure that the master node is a PP node.
+ +         */
+ +        if (MASTER(cr))
+ +        {
+ +            rank = dd->rank;
+ +        }
+ +        else
+ +        {
+ +            rank = 0;
+ +        }
+ +        MPI_Allreduce(&rank,&dd->masterrank,1,MPI_INT,MPI_SUM,dd->mpi_comm_all);
+ +    }
+ +    else if (comm->bCartesianPP)
+ +    {
+ +        if (cr->npmenodes == 0)
+ +        {
+ +            /* The PP communicator is also
+ +             * the communicator for this simulation
+ +             */
+ +            cr->mpi_comm_mysim = cr->mpi_comm_mygroup;
+ +        }
+ +        cr->nodeid = dd->rank;
+ +        
+ +        MPI_Cart_coords(dd->mpi_comm_all,dd->rank,DIM,dd->ci);
+ +        
+ +        /* We need to make an index to go from the coordinates
+ +         * to the nodeid of this simulation.
+ +         */
+ +        snew(comm->ddindex2simnodeid,dd->nnodes);
+ +        snew(buf,dd->nnodes);
+ +        if (cr->duty & DUTY_PP)
+ +        {
+ +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
+ +        }
+ +        /* Communicate the ddindex to simulation nodeid index */
+ +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
+ +                      cr->mpi_comm_mysim);
+ +        sfree(buf);
+ +        
+ +        /* Determine the master coordinates and rank.
+ +         * The DD master should be the same node as the master of this sim.
+ +         */
+ +        for(i=0; i<dd->nnodes; i++)
+ +        {
+ +            if (comm->ddindex2simnodeid[i] == 0)
+ +            {
+ +                ddindex2xyz(dd->nc,i,dd->master_ci);
+ +                MPI_Cart_rank(dd->mpi_comm_all,dd->master_ci,&dd->masterrank);
+ +            }
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"The master rank is %d\n",dd->masterrank);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* No Cartesian communicators */
+ +        /* We use the rank in dd->comm->all as DD index */
+ +        ddindex2xyz(dd->nc,dd->rank,dd->ci);
+ +        /* The simulation master nodeid is 0, so the DD master rank is also 0 */
+ +        dd->masterrank = 0;
+ +        clear_ivec(dd->master_ci);
+ +    }
+ +#endif
+ +  
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,
+ +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+ +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,
+ +                "Domain decomposition nodeid %d, coordinates %d %d %d\n\n",
+ +                dd->rank,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +    }
+ +}
+ +
+ +static void receive_ddindex2simnodeid(t_commrec *cr)
+ +{
+ +    gmx_domdec_t *dd;
+ +    
+ +    gmx_domdec_comm_t *comm;
+ +    int  *buf;
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +#ifdef GMX_MPI
+ +    if (!comm->bCartesianPP_PME && comm->bCartesianPP)
+ +    {
+ +        snew(comm->ddindex2simnodeid,dd->nnodes);
+ +        snew(buf,dd->nnodes);
+ +        if (cr->duty & DUTY_PP)
+ +        {
+ +            buf[dd_index(dd->nc,dd->ci)] = cr->sim_nodeid;
+ +        }
+ +#ifdef GMX_MPI
+ +        /* Communicate the ddindex to simulation nodeid index */
+ +        MPI_Allreduce(buf,comm->ddindex2simnodeid,dd->nnodes,MPI_INT,MPI_SUM,
+ +                      cr->mpi_comm_mysim);
+ +#endif
+ +        sfree(buf);
+ +    }
+ +#endif
+ +}
+ +
+ +static gmx_domdec_master_t *init_gmx_domdec_master_t(gmx_domdec_t *dd,
+ +                                                     int ncg,int natoms)
+ +{
+ +    gmx_domdec_master_t *ma;
+ +    int i;
+ +
+ +    snew(ma,1);
+ +    
+ +    snew(ma->ncg,dd->nnodes);
+ +    snew(ma->index,dd->nnodes+1);
+ +    snew(ma->cg,ncg);
+ +    snew(ma->nat,dd->nnodes);
+ +    snew(ma->ibuf,dd->nnodes*2);
+ +    snew(ma->cell_x,DIM);
+ +    for(i=0; i<DIM; i++)
+ +    {
+ +        snew(ma->cell_x[i],dd->nc[i]+1);
+ +    }
+ +
+ +    if (dd->nnodes <= GMX_DD_NNODES_SENDRECV)
+ +    {
+ +        ma->vbuf = NULL;
+ +    }
+ +    else
+ +    {
+ +        snew(ma->vbuf,natoms);
+ +    }
+ +
+ +    return ma;
+ +}
+ +
+ +static void split_communicator(FILE *fplog,t_commrec *cr,int dd_node_order,
+ +                               int reorder)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int  i,rank;
+ +    gmx_bool bDiv[DIM];
+ +    ivec periods;
+ +#ifdef GMX_MPI
+ +    MPI_Comm comm_cart;
+ +#endif
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    if (comm->bCartesianPP)
+ +    {
+ +        for(i=1; i<DIM; i++)
+ +        {
+ +            bDiv[i] = ((cr->npmenodes*dd->nc[i]) % (dd->nnodes) == 0);
+ +        }
+ +        if (bDiv[YY] || bDiv[ZZ])
+ +        {
+ +            comm->bCartesianPP_PME = TRUE;
+ +            /* If we have 2D PME decomposition, which is always in x+y,
+ +             * we stack the PME only nodes in z.
+ +             * Otherwise we choose the direction that provides the thinnest slab
+ +             * of PME only nodes as this will have the least effect
+ +             * on the PP communication.
+ +             * But for the PME communication the opposite might be better.
+ +             */
+ +            if (bDiv[ZZ] && (comm->npmenodes_y > 1 ||
+ +                             !bDiv[YY] ||
+ +                             dd->nc[YY] > dd->nc[ZZ]))
+ +            {
+ +                comm->cartpmedim = ZZ;
+ +            }
+ +            else
+ +            {
+ +                comm->cartpmedim = YY;
+ +            }
+ +            comm->ntot[comm->cartpmedim]
+ +                += (cr->npmenodes*dd->nc[comm->cartpmedim])/dd->nnodes;
+ +        }
+ +        else if (fplog)
+ +        {
+ +            fprintf(fplog,"#pmenodes (%d) is not a multiple of nx*ny (%d*%d) or nx*nz (%d*%d)\n",cr->npmenodes,dd->nc[XX],dd->nc[YY],dd->nc[XX],dd->nc[ZZ]);
+ +            fprintf(fplog,
+ +                    "Will not use a Cartesian communicator for PP <-> PME\n\n");
+ +        }
+ +    }
+ +    
+ +#ifdef GMX_MPI
+ +    if (comm->bCartesianPP_PME)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will use a Cartesian communicator for PP <-> PME: %d x %d x %d\n",comm->ntot[XX],comm->ntot[YY],comm->ntot[ZZ]);
+ +        }
+ +        
+ +        for(i=0; i<DIM; i++)
+ +        {
+ +            periods[i] = TRUE;
+ +        }
+ +        MPI_Cart_create(cr->mpi_comm_mysim,DIM,comm->ntot,periods,reorder,
+ +                        &comm_cart);
+ +        
+ +        MPI_Comm_rank(comm_cart,&rank);
+ +        if (MASTERNODE(cr) && rank != 0)
+ +        {
+ +            gmx_fatal(FARGS,"MPI rank 0 was renumbered by MPI_Cart_create, we do not allow this");
+ +        }
+ +        
+ +        /* With this assigment we loose the link to the original communicator
+ +         * which will usually be MPI_COMM_WORLD, unless have multisim.
+ +         */
+ +        cr->mpi_comm_mysim = comm_cart;
+ +        cr->sim_nodeid = rank;
+ +        
+ +        MPI_Cart_coords(cr->mpi_comm_mysim,cr->sim_nodeid,DIM,dd->ci);
+ +        
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Cartesian nodeid %d, coordinates %d %d %d\n\n",
+ +                    cr->sim_nodeid,dd->ci[XX],dd->ci[YY],dd->ci[ZZ]);
+ +        }
+ +        
+ +        if (dd->ci[comm->cartpmedim] < dd->nc[comm->cartpmedim])
+ +        {
+ +            cr->duty = DUTY_PP;
+ +        }
+ +        if (cr->npmenodes == 0 ||
+ +            dd->ci[comm->cartpmedim] >= dd->nc[comm->cartpmedim])
+ +        {
+ +            cr->duty = DUTY_PME;
+ +        }
+ +        
+ +        /* Split the sim communicator into PP and PME only nodes */
+ +        MPI_Comm_split(cr->mpi_comm_mysim,
+ +                       cr->duty,
+ +                       dd_index(comm->ntot,dd->ci),
+ +                       &cr->mpi_comm_mygroup);
+ +    }
+ +    else
+ +    {
+ +        switch (dd_node_order)
+ +        {
+ +        case ddnoPP_PME:
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"Order of the nodes: PP first, PME last\n");
+ +            }
+ +            break;
+ +        case ddnoINTERLEAVE:
+ +            /* Interleave the PP-only and PME-only nodes,
+ +             * as on clusters with dual-core machines this will double
+ +             * the communication bandwidth of the PME processes
+ +             * and thus speed up the PP <-> PME and inter PME communication.
+ +             */
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"Interleaving PP and PME nodes\n");
+ +            }
+ +            comm->pmenodes = dd_pmenodes(cr);
+ +            break;
+ +        case ddnoCARTESIAN:
+ +            break;
+ +        default:
+ +            gmx_fatal(FARGS,"Unknown dd_node_order=%d",dd_node_order);
+ +        }
+ +    
+ +        if (dd_simnode2pmenode(cr,cr->sim_nodeid) == -1)
+ +        {
+ +            cr->duty = DUTY_PME;
+ +        }
+ +        else
+ +        {
+ +            cr->duty = DUTY_PP;
+ +        }
+ +        
+ +        /* Split the sim communicator into PP and PME only nodes */
+ +        MPI_Comm_split(cr->mpi_comm_mysim,
+ +                       cr->duty,
+ +                       cr->nodeid,
+ +                       &cr->mpi_comm_mygroup);
+ +        MPI_Comm_rank(cr->mpi_comm_mygroup,&cr->nodeid);
+ +    }
+ +#endif
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"This is a %s only node\n\n",
+ +                (cr->duty & DUTY_PP) ? "particle-particle" : "PME-mesh");
+ +    }
+ +}
+ +
+ +void make_dd_communicators(FILE *fplog,t_commrec *cr,int dd_node_order)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int CartReorder;
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    copy_ivec(dd->nc,comm->ntot);
+ +    
+ +    comm->bCartesianPP = (dd_node_order == ddnoCARTESIAN);
+ +    comm->bCartesianPP_PME = FALSE;
+ +    
+ +    /* Reorder the nodes by default. This might change the MPI ranks.
+ +     * Real reordering is only supported on very few architectures,
+ +     * Blue Gene is one of them.
+ +     */
+ +    CartReorder = (getenv("GMX_NO_CART_REORDER") == NULL);
+ +    
+ +    if (cr->npmenodes > 0)
+ +    {
+ +        /* Split the communicator into a PP and PME part */
+ +        split_communicator(fplog,cr,dd_node_order,CartReorder);
+ +        if (comm->bCartesianPP_PME)
+ +        {
+ +            /* We (possibly) reordered the nodes in split_communicator,
+ +             * so it is no longer required in make_pp_communicator.
+ +             */
+ +            CartReorder = FALSE;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* All nodes do PP and PME */
+ +#ifdef GMX_MPI    
+ +        /* We do not require separate communicators */
+ +        cr->mpi_comm_mygroup = cr->mpi_comm_mysim;
+ +#endif
+ +    }
+ +    
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        /* Copy or make a new PP communicator */
+ +        make_pp_communicator(fplog,cr,CartReorder);
+ +    }
+ +    else
+ +    {
+ +        receive_ddindex2simnodeid(cr);
+ +    }
+ +    
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Set up the commnuication to our PME node */
+ +        dd->pme_nodeid = dd_simnode2pmenode(cr,cr->sim_nodeid);
+ +        dd->pme_receive_vir_ener = receive_vir_ener(cr);
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"My pme_nodeid %d receive ener %d\n",
+ +                    dd->pme_nodeid,dd->pme_receive_vir_ener);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        dd->pme_nodeid = -1;
+ +    }
+ +
+ +    if (DDMASTER(dd))
+ +    {
+ +        dd->ma = init_gmx_domdec_master_t(dd,
+ +                                          comm->cgs_gl.nr,
+ +                                          comm->cgs_gl.index[comm->cgs_gl.nr]);
+ +    }
+ +}
+ +
+ +static real *get_slb_frac(FILE *fplog,const char *dir,int nc,const char *size_string)
+ +{
+ +    real *slb_frac,tot;
+ +    int  i,n;
+ +    double dbl;
+ +    
+ +    slb_frac = NULL;
+ +    if (nc > 1 && size_string != NULL)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Using static load balancing for the %s direction\n",
+ +                    dir);
+ +        }
+ +        snew(slb_frac,nc);
+ +        tot = 0;
+ +        for (i=0; i<nc; i++)
+ +        {
+ +            dbl = 0;
+ +            sscanf(size_string,"%lf%n",&dbl,&n);
+ +            if (dbl == 0)
+ +            {
+ +                gmx_fatal(FARGS,"Incorrect or not enough DD cell size entries for direction %s: '%s'",dir,size_string);
+ +            }
+ +            slb_frac[i] = dbl;
+ +            size_string += n;
+ +            tot += slb_frac[i];
+ +        }
+ +        /* Normalize */
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Relative cell sizes:");
+ +        }
+ +        for (i=0; i<nc; i++)
+ +        {
+ +            slb_frac[i] /= tot;
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog," %5.3f",slb_frac[i]);
+ +            }
+ +        }
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"\n");
+ +        }
+ +    }
+ +    
+ +    return slb_frac;
+ +}
+ +
+ +static int multi_body_bondeds_count(gmx_mtop_t *mtop)
+ +{
+ +    int n,nmol,ftype;
+ +    gmx_mtop_ilistloop_t iloop;
+ +    t_ilist *il;
+ +    
+ +    n = 0;
+ +    iloop = gmx_mtop_ilistloop_init(mtop);
+ +    while (gmx_mtop_ilistloop_next(iloop,&il,&nmol))
+ +    {
+ +        for(ftype=0; ftype<F_NRE; ftype++)
+ +        {
+ +            if ((interaction_function[ftype].flags & IF_BOND) &&
+ +                NRAL(ftype) >  2)
+ +            {
+ +                n += nmol*il[ftype].nr/(1 + NRAL(ftype));
+ +            }
+ +        }
+ +  }
+ +
+ +  return n;
+ +}
+ +
+ +static int dd_nst_env(FILE *fplog,const char *env_var,int def)
+ +{
+ +    char *val;
+ +    int  nst;
+ +    
+ +    nst = def;
+ +    val = getenv(env_var);
+ +    if (val)
+ +    {
+ +        if (sscanf(val,"%d",&nst) <= 0)
+ +        {
+ +            nst = 1;
+ +        }
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Found env.var. %s = %s, using value %d\n",
+ +                    env_var,val,nst);
+ +        }
+ +    }
+ +    
+ +    return nst;
+ +}
+ +
+ +static void dd_warning(t_commrec *cr,FILE *fplog,const char *warn_string)
+ +{
+ +    if (MASTER(cr))
+ +    {
+ +        fprintf(stderr,"\n%s\n",warn_string);
+ +    }
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"\n%s\n",warn_string);
+ +    }
+ +}
+ +
+ +static void check_dd_restrictions(t_commrec *cr,gmx_domdec_t *dd,
+ +                                  t_inputrec *ir,FILE *fplog)
+ +{
+ +    if (ir->ePBC == epbcSCREW &&
+ +        (dd->nc[XX] == 1 || dd->nc[YY] > 1 || dd->nc[ZZ] > 1))
+ +    {
+ +        gmx_fatal(FARGS,"With pbc=%s can only do domain decomposition in the x-direction",epbc_names[ir->ePBC]);
+ +    }
+ +
+ +    if (ir->ns_type == ensSIMPLE)
+ +    {
+ +        gmx_fatal(FARGS,"Domain decomposition does not support simple neighbor searching, use grid searching or use particle decomposition");
+ +    }
+ +
+ +    if (ir->nstlist == 0)
+ +    {
+ +        gmx_fatal(FARGS,"Domain decomposition does not work with nstlist=0");
+ +    }
+ +
+ +    if (ir->comm_mode == ecmANGULAR && ir->ePBC != epbcNONE)
+ +    {
+ +        dd_warning(cr,fplog,"comm-mode angular will give incorrect results when the comm group partially crosses a periodic boundary");
+ +    }
+ +}
+ +
+ +static real average_cellsize_min(gmx_domdec_t *dd,gmx_ddbox_t *ddbox)
+ +{
+ +    int  di,d;
+ +    real r;
+ +
+ +    r = ddbox->box_size[XX];
+ +    for(di=0; di<dd->ndim; di++)
+ +    {
+ +        d = dd->dim[di];
+ +        /* Check using the initial average cell size */
+ +        r = min(r,ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
+ +    }
+ +
+ +    return r;
+ +}
+ +
+ +static int check_dlb_support(FILE *fplog,t_commrec *cr,
+ +                             const char *dlb_opt,gmx_bool bRecordLoad,
+ +                             unsigned long Flags,t_inputrec *ir)
+ +{
+ +    gmx_domdec_t *dd;
+ +    int  eDLB=-1;
+ +    char buf[STRLEN];
+ +
+ +    switch (dlb_opt[0])
+ +    {
+ +    case 'a': eDLB = edlbAUTO; break;
+ +    case 'n': eDLB = edlbNO;   break;
+ +    case 'y': eDLB = edlbYES;  break;
+ +    default: gmx_incons("Unknown dlb_opt");
+ +    }
+ +
+ +    if (Flags & MD_RERUN)
+ +    {
+ +        return edlbNO;
+ +    }
+ +
+ +    if (!EI_DYNAMICS(ir->eI))
+ +    {
+ +        if (eDLB == edlbYES)
+ +        {
+ +            sprintf(buf,"NOTE: dynamic load balancing is only supported with dynamics, not with integrator '%s'\n",EI(ir->eI));
+ +            dd_warning(cr,fplog,buf);
+ +        }
+ +            
+ +        return edlbNO;
+ +    }
+ +
+ +    if (!bRecordLoad)
+ +    {
+ +        dd_warning(cr,fplog,"NOTE: Cycle counting is not supported on this architecture, will not use dynamic load balancing\n");
+ +
+ +        return edlbNO;
+ +    }
+ +
+ +    if (Flags & MD_REPRODUCIBLE)
+ +    {
+ +        switch (eDLB)
+ +        {
+ +                      case edlbNO: 
+ +                              break;
+ +                      case edlbAUTO:
-                               dd_warning(cr,fplog,"WARNING: reproducability requested with dynamic load balancing, the simulation will NOT be binary reproducable\n");
++                              dd_warning(cr,fplog,"NOTE: reproducibility requested, will not use dynamic load balancing\n");
+ +                              eDLB = edlbNO;
+ +                              break;
+ +                      case edlbYES:
++                              dd_warning(cr,fplog,"WARNING: reproducibility requested with dynamic load balancing, the simulation will NOT be binary reproducible\n");
+ +                              break;
+ +                      default:
+ +                              gmx_fatal(FARGS,"Death horror: undefined case (%d) for load balancing choice",eDLB);
+ +                              break;
+ +        }
+ +    }
+ +
+ +    return eDLB;
+ +}
+ +
+ +static void set_dd_dim(FILE *fplog,gmx_domdec_t *dd)
+ +{
+ +    int dim;
+ +
+ +    dd->ndim = 0;
+ +    if (getenv("GMX_DD_ORDER_ZYX") != NULL)
+ +    {
+ +        /* Decomposition order z,y,x */
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Using domain decomposition order z, y, x\n");
+ +        }
+ +        for(dim=DIM-1; dim>=0; dim--)
+ +        {
+ +            if (dd->nc[dim] > 1)
+ +            {
+ +                dd->dim[dd->ndim++] = dim;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* Decomposition order x,y,z */
+ +        for(dim=0; dim<DIM; dim++)
+ +        {
+ +            if (dd->nc[dim] > 1)
+ +            {
+ +                dd->dim[dd->ndim++] = dim;
+ +            }
+ +        }
+ +    }
+ +}
+ +
+ +static gmx_domdec_comm_t *init_dd_comm()
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  i;
+ +
+ +    snew(comm,1);
+ +    snew(comm->cggl_flag,DIM*2);
+ +    snew(comm->cgcm_state,DIM*2);
+ +    for(i=0; i<DIM*2; i++)
+ +    {
+ +        comm->cggl_flag_nalloc[i]  = 0;
+ +        comm->cgcm_state_nalloc[i] = 0;
+ +    }
+ +    
+ +    comm->nalloc_int = 0;
+ +    comm->buf_int    = NULL;
+ +
+ +    vec_rvec_init(&comm->vbuf);
+ +
+ +    comm->n_load_have    = 0;
+ +    comm->n_load_collect = 0;
+ +
+ +    for(i=0; i<ddnatNR-ddnatZONE; i++)
+ +    {
+ +        comm->sum_nat[i] = 0;
+ +    }
+ +    comm->ndecomp = 0;
+ +    comm->nload   = 0;
+ +    comm->load_step = 0;
+ +    comm->load_sum  = 0;
+ +    comm->load_max  = 0;
+ +    clear_ivec(comm->load_lim);
+ +    comm->load_mdf  = 0;
+ +    comm->load_pme  = 0;
+ +
+ +    return comm;
+ +}
+ +
+ +gmx_domdec_t *init_domain_decomposition(FILE *fplog,t_commrec *cr,
+ +                                        unsigned long Flags,
+ +                                        ivec nc,
+ +                                        real comm_distance_min,real rconstr,
+ +                                        const char *dlb_opt,real dlb_scale,
+ +                                        const char *sizex,const char *sizey,const char *sizez,
+ +                                        gmx_mtop_t *mtop,t_inputrec *ir,
+ +                                        matrix box,rvec *x,
+ +                                        gmx_ddbox_t *ddbox,
+ +                                        int *npme_x,int *npme_y)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    int  recload;
+ +    int  d,i,j;
+ +    real r_2b,r_mb,r_bonded=-1,r_bonded_limit=-1,limit,acs;
+ +    gmx_bool bC;
+ +    char buf[STRLEN];
+ +    
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,
+ +                "\nInitializing Domain Decomposition on %d nodes\n",cr->nnodes);
+ +    }
+ +    
+ +    snew(dd,1);
+ +
+ +    dd->comm = init_dd_comm();
+ +    comm = dd->comm;
+ +    snew(comm->cggl_flag,DIM*2);
+ +    snew(comm->cgcm_state,DIM*2);
+ +
+ +    dd->npbcdim   = ePBC2npbcdim(ir->ePBC);
+ +    dd->bScrewPBC = (ir->ePBC == epbcSCREW);
+ +    
+ +    dd->bSendRecv2      = dd_nst_env(fplog,"GMX_DD_SENDRECV2",0);
+ +    comm->dlb_scale_lim = dd_nst_env(fplog,"GMX_DLB_MAX",10);
+ +    comm->eFlop         = dd_nst_env(fplog,"GMX_DLB_FLOP",0);
+ +    recload             = dd_nst_env(fplog,"GMX_DD_LOAD",1);
+ +    comm->nstSortCG     = dd_nst_env(fplog,"GMX_DD_SORT",1);
+ +    comm->nstDDDump     = dd_nst_env(fplog,"GMX_DD_DUMP",0);
+ +    comm->nstDDDumpGrid = dd_nst_env(fplog,"GMX_DD_DUMP_GRID",0);
+ +    comm->DD_debug      = dd_nst_env(fplog,"GMX_DD_DEBUG",0);
+ +
+ +    dd->pme_recv_f_alloc = 0;
+ +    dd->pme_recv_f_buf = NULL;
+ +
+ +    if (dd->bSendRecv2 && fplog)
+ +    {
+ +        fprintf(fplog,"Will use two sequential MPI_Sendrecv calls instead of two simultaneous non-blocking MPI_Irecv and MPI_Isend pairs for constraint and vsite communication\n");
+ +    }
+ +    if (comm->eFlop)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will load balance based on FLOP count\n");
+ +        }
+ +        if (comm->eFlop > 1)
+ +        {
+ +            srand(1+cr->nodeid);
+ +        }
+ +        comm->bRecordLoad = TRUE;
+ +    }
+ +    else
+ +    {
+ +        comm->bRecordLoad = (wallcycle_have_counter() && recload > 0);
+ +                             
+ +    }
+ +    
+ +    comm->eDLB = check_dlb_support(fplog,cr,dlb_opt,comm->bRecordLoad,Flags,ir);
+ +    
+ +    comm->bDynLoadBal = (comm->eDLB == edlbYES);
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"Dynamic load balancing: %s\n",edlb_names[comm->eDLB]);
+ +    }
+ +    dd->bGridJump = comm->bDynLoadBal;
+ +    
+ +    if (comm->nstSortCG)
+ +    {
+ +        if (fplog)
+ +        {
+ +            if (comm->nstSortCG == 1)
+ +            {
+ +                fprintf(fplog,"Will sort the charge groups at every domain (re)decomposition\n");
+ +            }
+ +            else
+ +            {
+ +                fprintf(fplog,"Will sort the charge groups every %d steps\n",
+ +                        comm->nstSortCG);
+ +            }
+ +        }
+ +        snew(comm->sort,1);
+ +    }
+ +    else
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"Will not sort the charge groups\n");
+ +        }
+ +    }
+ +    
+ +    comm->bInterCGBondeds = (ncg_mtop(mtop) > mtop->mols.nr);
+ +    if (comm->bInterCGBondeds)
+ +    {
+ +        comm->bInterCGMultiBody = (multi_body_bondeds_count(mtop) > 0);
+ +    }
+ +    else
+ +    {
+ +        comm->bInterCGMultiBody = FALSE;
+ +    }
+ +    
+ +    dd->bInterCGcons = inter_charge_group_constraints(mtop);
+ +
+ +    if (ir->rlistlong == 0)
+ +    {
+ +        /* Set the cut-off to some very large value,
+ +         * so we don't need if statements everywhere in the code.
+ +         * We use sqrt, since the cut-off is squared in some places.
+ +         */
+ +        comm->cutoff   = GMX_CUTOFF_INF;
+ +    }
+ +    else
+ +    {
+ +        comm->cutoff   = ir->rlistlong;
+ +    }
+ +    comm->cutoff_mbody = 0;
+ +    
+ +    comm->cellsize_limit = 0;
+ +    comm->bBondComm = FALSE;
+ +
+ +    if (comm->bInterCGBondeds)
+ +    {
+ +        if (comm_distance_min > 0)
+ +        {
+ +            comm->cutoff_mbody = comm_distance_min;
+ +            if (Flags & MD_DDBONDCOMM)
+ +            {
+ +                comm->bBondComm = (comm->cutoff_mbody > comm->cutoff);
+ +            }
+ +            else
+ +            {
+ +                comm->cutoff = max(comm->cutoff,comm->cutoff_mbody);
+ +            }
+ +            r_bonded_limit = comm->cutoff_mbody;
+ +        }
+ +        else if (ir->bPeriodicMols)
+ +        {
+ +            /* Can not easily determine the required cut-off */
+ +            dd_warning(cr,fplog,"NOTE: Periodic molecules: can not easily determine the required minimum bonded cut-off, using half the non-bonded cut-off\n");
+ +            comm->cutoff_mbody = comm->cutoff/2;
+ +            r_bonded_limit = comm->cutoff_mbody;
+ +        }
+ +        else
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                dd_bonded_cg_distance(fplog,dd,mtop,ir,x,box,
+ +                                      Flags & MD_DDBONDCHECK,&r_2b,&r_mb);
+ +            }
+ +            gmx_bcast(sizeof(r_2b),&r_2b,cr);
+ +            gmx_bcast(sizeof(r_mb),&r_mb,cr);
+ +
+ +            /* We use an initial margin of 10% for the minimum cell size,
+ +             * except when we are just below the non-bonded cut-off.
+ +             */
+ +            if (Flags & MD_DDBONDCOMM)
+ +            {
+ +                if (max(r_2b,r_mb) > comm->cutoff)
+ +                {
+ +                    r_bonded       = max(r_2b,r_mb);
+ +                    r_bonded_limit = 1.1*r_bonded;
+ +                    comm->bBondComm = TRUE;
+ +                }
+ +                else
+ +                {
+ +                    r_bonded       = r_mb;
+ +                    r_bonded_limit = min(1.1*r_bonded,comm->cutoff);
+ +                }
+ +                /* We determine cutoff_mbody later */
+ +            }
+ +            else
+ +            {
+ +                /* No special bonded communication,
+ +                 * simply increase the DD cut-off.
+ +                 */
+ +                r_bonded_limit     = 1.1*max(r_2b,r_mb);
+ +                comm->cutoff_mbody = r_bonded_limit;
+ +                comm->cutoff       = max(comm->cutoff,comm->cutoff_mbody);
+ +            }
+ +        }
+ +        comm->cellsize_limit = max(comm->cellsize_limit,r_bonded_limit);
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,
+ +                    "Minimum cell size due to bonded interactions: %.3f nm\n",
+ +                    comm->cellsize_limit);
+ +        }
+ +    }
+ +
+ +    if (dd->bInterCGcons && rconstr <= 0)
+ +    {
+ +        /* There is a cell size limit due to the constraints (P-LINCS) */
+ +        rconstr = constr_r_max(fplog,mtop,ir);
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,
+ +                    "Estimated maximum distance required for P-LINCS: %.3f nm\n",
+ +                    rconstr);
+ +            if (rconstr > comm->cellsize_limit)
+ +            {
+ +                fprintf(fplog,"This distance will limit the DD cell size, you can override this with -rcon\n");
+ +            }
+ +        }
+ +    }
+ +    else if (rconstr > 0 && fplog)
+ +    {
+ +        /* Here we do not check for dd->bInterCGcons,
+ +         * because one can also set a cell size limit for virtual sites only
+ +         * and at this point we don't know yet if there are intercg v-sites.
+ +         */
+ +        fprintf(fplog,
+ +                "User supplied maximum distance required for P-LINCS: %.3f nm\n",
+ +                rconstr);
+ +    }
+ +    comm->cellsize_limit = max(comm->cellsize_limit,rconstr);
+ +
+ +    comm->cgs_gl = gmx_mtop_global_cgs(mtop);
+ +
+ +    if (nc[XX] > 0)
+ +    {
+ +        copy_ivec(nc,dd->nc);
+ +        set_dd_dim(fplog,dd);
+ +        set_ddbox_cr(cr,&dd->nc,ir,box,&comm->cgs_gl,x,ddbox);
+ +
+ +        if (cr->npmenodes == -1)
+ +        {
+ +            cr->npmenodes = 0;
+ +        }
+ +        acs = average_cellsize_min(dd,ddbox);
+ +        if (acs < comm->cellsize_limit)
+ +        {
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"ERROR: The initial cell size (%f) is smaller than the cell size limit (%f)\n",acs,comm->cellsize_limit);
+ +            }
+ +            gmx_fatal_collective(FARGS,cr,NULL,
+ +                                 "The initial cell size (%f) is smaller than the cell size limit (%f), change options -dd, -rdd or -rcon, see the log file for details",
+ +                                 acs,comm->cellsize_limit);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        set_ddbox_cr(cr,NULL,ir,box,&comm->cgs_gl,x,ddbox);
+ +
+ +        /* We need to choose the optimal DD grid and possibly PME nodes */
+ +        limit = dd_choose_grid(fplog,cr,dd,ir,mtop,box,ddbox,
+ +                               comm->eDLB!=edlbNO,dlb_scale,
+ +                               comm->cellsize_limit,comm->cutoff,
+ +                               comm->bInterCGBondeds,comm->bInterCGMultiBody);
+ +        
+ +        if (dd->nc[XX] == 0)
+ +        {
+ +            bC = (dd->bInterCGcons && rconstr > r_bonded_limit);
+ +            sprintf(buf,"Change the number of nodes or mdrun option %s%s%s",
+ +                    !bC ? "-rdd" : "-rcon",
+ +                    comm->eDLB!=edlbNO ? " or -dds" : "",
+ +                    bC ? " or your LINCS settings" : "");
+ +
+ +            gmx_fatal_collective(FARGS,cr,NULL,
+ +                                 "There is no domain decomposition for %d nodes that is compatible with the given box and a minimum cell size of %g nm\n"
+ +                                 "%s\n"
+ +                                 "Look in the log file for details on the domain decomposition",
+ +                                 cr->nnodes-cr->npmenodes,limit,buf);
+ +        }
+ +        set_dd_dim(fplog,dd);
+ +    }
+ +
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,
+ +                "Domain decomposition grid %d x %d x %d, separate PME nodes %d\n",
+ +                dd->nc[XX],dd->nc[YY],dd->nc[ZZ],cr->npmenodes);
+ +    }
+ +    
+ +    dd->nnodes = dd->nc[XX]*dd->nc[YY]*dd->nc[ZZ];
+ +    if (cr->nnodes - dd->nnodes != cr->npmenodes)
+ +    {
+ +        gmx_fatal_collective(FARGS,cr,NULL,
+ +                             "The size of the domain decomposition grid (%d) does not match the number of nodes (%d). The total number of nodes is %d",
+ +                             dd->nnodes,cr->nnodes - cr->npmenodes,cr->nnodes);
+ +    }
+ +    if (cr->npmenodes > dd->nnodes)
+ +    {
+ +        gmx_fatal_collective(FARGS,cr,NULL,
+ +                             "The number of separate PME node (%d) is larger than the number of PP nodes (%d), this is not supported.",cr->npmenodes,dd->nnodes);
+ +    }
+ +    if (cr->npmenodes > 0)
+ +    {
+ +        comm->npmenodes = cr->npmenodes;
+ +    }
+ +    else
+ +    {
+ +        comm->npmenodes = dd->nnodes;
+ +    }
+ +
+ +    if (EEL_PME(ir->coulombtype))
+ +    {
+ +        /* The following choices should match those
+ +         * in comm_cost_est in domdec_setup.c.
+ +         * Note that here the checks have to take into account
+ +         * that the decomposition might occur in a different order than xyz
+ +         * (for instance through the env.var. GMX_DD_ORDER_ZYX),
+ +         * in which case they will not match those in comm_cost_est,
+ +         * but since that is mainly for testing purposes that's fine.
+ +         */
+ +        if (dd->ndim >= 2 && dd->dim[0] == XX && dd->dim[1] == YY &&
+ +            comm->npmenodes > dd->nc[XX] && comm->npmenodes % dd->nc[XX] == 0 &&
+ +            getenv("GMX_PMEONEDD") == NULL)
+ +        {
+ +            comm->npmedecompdim = 2;
+ +            comm->npmenodes_x   = dd->nc[XX];
+ +            comm->npmenodes_y   = comm->npmenodes/comm->npmenodes_x;
+ +        }
+ +        else
+ +        {
+ +            /* In case nc is 1 in both x and y we could still choose to
+ +             * decompose pme in y instead of x, but we use x for simplicity.
+ +             */
+ +            comm->npmedecompdim = 1;
+ +            if (dd->dim[0] == YY)
+ +            {
+ +                comm->npmenodes_x = 1;
+ +                comm->npmenodes_y = comm->npmenodes;
+ +            }
+ +            else
+ +            {
+ +                comm->npmenodes_x = comm->npmenodes;
+ +                comm->npmenodes_y = 1;
+ +            }
+ +        }    
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"PME domain decomposition: %d x %d x %d\n",
+ +                    comm->npmenodes_x,comm->npmenodes_y,1);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        comm->npmedecompdim = 0;
+ +        comm->npmenodes_x   = 0;
+ +        comm->npmenodes_y   = 0;
+ +    }
+ +    
+ +    /* Technically we don't need both of these,
+ +     * but it simplifies code not having to recalculate it.
+ +     */
+ +    *npme_x = comm->npmenodes_x;
+ +    *npme_y = comm->npmenodes_y;
+ +        
+ +    snew(comm->slb_frac,DIM);
+ +    if (comm->eDLB == edlbNO)
+ +    {
+ +        comm->slb_frac[XX] = get_slb_frac(fplog,"x",dd->nc[XX],sizex);
+ +        comm->slb_frac[YY] = get_slb_frac(fplog,"y",dd->nc[YY],sizey);
+ +        comm->slb_frac[ZZ] = get_slb_frac(fplog,"z",dd->nc[ZZ],sizez);
+ +    }
+ +
+ +    if (comm->bInterCGBondeds && comm->cutoff_mbody == 0)
+ +    {
+ +        if (comm->bBondComm || comm->eDLB != edlbNO)
+ +        {
+ +            /* Set the bonded communication distance to halfway
+ +             * the minimum and the maximum,
+ +             * since the extra communication cost is nearly zero.
+ +             */
+ +            acs = average_cellsize_min(dd,ddbox);
+ +            comm->cutoff_mbody = 0.5*(r_bonded + acs);
+ +            if (comm->eDLB != edlbNO)
+ +            {
+ +                /* Check if this does not limit the scaling */
+ +                comm->cutoff_mbody = min(comm->cutoff_mbody,dlb_scale*acs);
+ +            }
+ +            if (!comm->bBondComm)
+ +            {
+ +                /* Without bBondComm do not go beyond the n.b. cut-off */
+ +                comm->cutoff_mbody = min(comm->cutoff_mbody,comm->cutoff);
+ +                if (comm->cellsize_limit >= comm->cutoff)
+ +                {
+ +                    /* We don't loose a lot of efficieny
+ +                     * when increasing it to the n.b. cut-off.
+ +                     * It can even be slightly faster, because we need
+ +                     * less checks for the communication setup.
+ +                     */
+ +                    comm->cutoff_mbody = comm->cutoff;
+ +                }
+ +            }
+ +            /* Check if we did not end up below our original limit */
+ +            comm->cutoff_mbody = max(comm->cutoff_mbody,r_bonded_limit);
+ +
+ +            if (comm->cutoff_mbody > comm->cellsize_limit)
+ +            {
+ +                comm->cellsize_limit = comm->cutoff_mbody;
+ +            }
+ +        }
+ +        /* Without DLB and cutoff_mbody<cutoff, cutoff_mbody is dynamic */
+ +    }
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Bonded atom communication beyond the cut-off: %d\n"
+ +                "cellsize limit %f\n",
+ +                comm->bBondComm,comm->cellsize_limit);
+ +    }
+ +    
+ +    if (MASTER(cr))
+ +    {
+ +        check_dd_restrictions(cr,dd,ir,fplog);
+ +    }
+ +
+ +    comm->globalcomm_step = INT_MIN;
+ +    dd->ddp_count = 0;
+ +
+ +    clear_dd_cycle_counts(dd);
+ +
+ +    return dd;
+ +}
+ +
+ +static void set_dlb_limits(gmx_domdec_t *dd)
+ +
+ +{
+ +    int d;
+ +
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        dd->comm->cd[d].np = dd->comm->cd[d].np_dlb;
+ +        dd->comm->cellsize_min[dd->dim[d]] =
+ +            dd->comm->cellsize_min_dlb[dd->dim[d]];
+ +    }
+ +}
+ +
+ +
+ +static void turn_on_dlb(FILE *fplog,t_commrec *cr,gmx_large_int_t step)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    real cellsize_min;
+ +    int  d,nc,i;
+ +    char buf[STRLEN];
+ +    
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +    
+ +    if (fplog)
+ +    {
+ +        fprintf(fplog,"At step %s the performance loss due to force load imbalance is %.1f %%\n",gmx_step_str(step,buf),dd_force_imb_perf_loss(dd)*100);
+ +    }
+ +
+ +    cellsize_min = comm->cellsize_min[dd->dim[0]];
+ +    for(d=1; d<dd->ndim; d++)
+ +    {
+ +        cellsize_min = min(cellsize_min,comm->cellsize_min[dd->dim[d]]);
+ +    }
+ +
+ +    if (cellsize_min < comm->cellsize_limit*1.05)
+ +    {
+ +        dd_warning(cr,fplog,"NOTE: the minimum cell size is smaller than 1.05 times the cell size limit, will not turn on dynamic load balancing\n");
+ +
+ +        /* Change DLB from "auto" to "no". */
+ +        comm->eDLB = edlbNO;
+ +
+ +        return;
+ +    }
+ +
+ +    dd_warning(cr,fplog,"NOTE: Turning on dynamic load balancing\n");
+ +    comm->bDynLoadBal = TRUE;
+ +    dd->bGridJump = TRUE;
+ +    
+ +    set_dlb_limits(dd);
+ +
+ +    /* We can set the required cell size info here,
+ +     * so we do not need to communicate this.
+ +     * The grid is completely uniform.
+ +     */
+ +    for(d=0; d<dd->ndim; d++)
+ +    {
+ +        if (comm->root[d])
+ +        {
+ +            comm->load[d].sum_m = comm->load[d].sum;
+ +
+ +            nc = dd->nc[dd->dim[d]];
+ +            for(i=0; i<nc; i++)
+ +            {
+ +                comm->root[d]->cell_f[i]    = i/(real)nc;
+ +                if (d > 0)
+ +                {
+ +                    comm->root[d]->cell_f_max0[i] =  i   /(real)nc;
+ +                    comm->root[d]->cell_f_min1[i] = (i+1)/(real)nc;
+ +                }
+ +            }
+ +            comm->root[d]->cell_f[nc] = 1.0;
+ +        }
+ +    }
+ +}
+ +
+ +static char *init_bLocalCG(gmx_mtop_t *mtop)
+ +{
+ +    int  ncg,cg;
+ +    char *bLocalCG;
+ +    
+ +    ncg = ncg_mtop(mtop);
+ +    snew(bLocalCG,ncg);
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        bLocalCG[cg] = FALSE;
+ +    }
+ +
+ +    return bLocalCG;
+ +}
+ +
+ +void dd_init_bondeds(FILE *fplog,
+ +                     gmx_domdec_t *dd,gmx_mtop_t *mtop,
+ +                     gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                     t_inputrec *ir,gmx_bool bBCheck,cginfo_mb_t *cginfo_mb)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_bool bBondComm;
+ +    int  d;
+ +
+ +    dd_make_reverse_top(fplog,dd,mtop,vsite,constr,ir,bBCheck);
+ +
+ +    comm = dd->comm;
+ +
+ +    if (comm->bBondComm)
+ +    {
+ +        /* Communicate atoms beyond the cut-off for bonded interactions */
+ +        comm = dd->comm;
+ +
+ +        comm->cglink = make_charge_group_links(mtop,dd,cginfo_mb);
+ +
+ +        comm->bLocalCG = init_bLocalCG(mtop);
+ +    }
+ +    else
+ +    {
+ +        /* Only communicate atoms based on cut-off */
+ +        comm->cglink   = NULL;
+ +        comm->bLocalCG = NULL;
+ +    }
+ +}
+ +
+ +static void print_dd_settings(FILE *fplog,gmx_domdec_t *dd,
+ +                              t_inputrec *ir,
+ +                              gmx_bool bDynLoadBal,real dlb_scale,
+ +                              gmx_ddbox_t *ddbox)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d;
+ +    ivec np;
+ +    real limit,shrink;
+ +    char buf[64];
+ +
+ +    if (fplog == NULL)
+ +    {
+ +        return;
+ +    }
+ +
+ +    comm = dd->comm;
+ +
+ +    if (bDynLoadBal)
+ +    {
+ +        fprintf(fplog,"The maximum number of communication pulses is:");
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),comm->cd[d].np_dlb);
+ +        }
+ +        fprintf(fplog,"\n");
+ +        fprintf(fplog,"The minimum size for domain decomposition cells is %.3f nm\n",comm->cellsize_limit);
+ +        fprintf(fplog,"The requested allowed shrink of DD cells (option -dds) is: %.2f\n",dlb_scale);
+ +        fprintf(fplog,"The allowed shrink of domain decomposition cells is:");
+ +        for(d=0; d<DIM; d++)
+ +        {
+ +            if (dd->nc[d] > 1)
+ +            {
+ +                if (d >= ddbox->npbcdim && dd->nc[d] == 2)
+ +                {
+ +                    shrink = 0;
+ +                }
+ +                else
+ +                {
+ +                    shrink =
+ +                        comm->cellsize_min_dlb[d]/
+ +                        (ddbox->box_size[d]*ddbox->skew_fac[d]/dd->nc[d]);
+ +                }
+ +                fprintf(fplog," %c %.2f",dim2char(d),shrink);
+ +            }
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +    else
+ +    {
+ +        set_dd_cell_sizes_slb(dd,ddbox,FALSE,np);
+ +        fprintf(fplog,"The initial number of communication pulses is:");
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            fprintf(fplog," %c %d",dim2char(dd->dim[d]),np[dd->dim[d]]);
+ +        }
+ +        fprintf(fplog,"\n");
+ +        fprintf(fplog,"The initial domain decomposition cell size is:");
+ +        for(d=0; d<DIM; d++) {
+ +            if (dd->nc[d] > 1)
+ +            {
+ +                fprintf(fplog," %c %.2f nm",
+ +                        dim2char(d),dd->comm->cellsize_min[d]);
+ +            }
+ +        }
+ +        fprintf(fplog,"\n\n");
+ +    }
+ +    
+ +    if (comm->bInterCGBondeds || dd->vsite_comm || dd->constraint_comm)
+ +    {
+ +        fprintf(fplog,"The maximum allowed distance for charge groups involved in interactions is:\n");
+ +        fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                "non-bonded interactions","",comm->cutoff);
+ +
+ +        if (bDynLoadBal)
+ +        {
+ +            limit = dd->comm->cellsize_limit;
+ +        }
+ +        else
+ +        {
+ +            if (dynamic_dd_box(ddbox,ir))
+ +            {
+ +                fprintf(fplog,"(the following are initial values, they could change due to box deformation)\n");
+ +            }
+ +            limit = dd->comm->cellsize_min[XX];
+ +            for(d=1; d<DIM; d++)
+ +            {
+ +                limit = min(limit,dd->comm->cellsize_min[d]);
+ +            }
+ +        }
+ +
+ +        if (comm->bInterCGBondeds)
+ +        {
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    "two-body bonded interactions","(-rdd)",
+ +                    max(comm->cutoff,comm->cutoff_mbody));
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    "multi-body bonded interactions","(-rdd)",
+ +                    (comm->bBondComm || dd->bGridJump) ? comm->cutoff_mbody : min(comm->cutoff,limit));
+ +        }
+ +        if (dd->vsite_comm)
+ +        {
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    "virtual site constructions","(-rcon)",limit);
+ +        }
+ +        if (dd->constraint_comm)
+ +        {
+ +            sprintf(buf,"atoms separated by up to %d constraints",
+ +                    1+ir->nProjOrder);
+ +            fprintf(fplog,"%40s  %-7s %6.3f nm\n",
+ +                    buf,"(-rcon)",limit);
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +    
+ +    fflush(fplog);
+ +}
+ +
+ +void set_dd_parameters(FILE *fplog,gmx_domdec_t *dd,real dlb_scale,
+ +                       t_inputrec *ir,t_forcerec *fr,
+ +                       gmx_ddbox_t *ddbox)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int  d,dim,npulse,npulse_d_max,npulse_d;
+ +    gmx_bool bNoCutOff;
+ +    int  natoms_tot;
+ +    real vol_frac;
+ +
+ +    comm = dd->comm;
+ +
+ +    bNoCutOff = (ir->rvdw == 0 || ir->rcoulomb == 0);
+ +
+ +    if (EEL_PME(ir->coulombtype))
+ +    {
+ +        init_ddpme(dd,&comm->ddpme[0],0);
+ +        if (comm->npmedecompdim >= 2)
+ +        {
+ +            init_ddpme(dd,&comm->ddpme[1],1);
+ +        }
+ +    }
+ +    else
+ +    {
+ +        comm->npmenodes = 0;
+ +        if (dd->pme_nodeid >= 0)
+ +        {
+ +            gmx_fatal_collective(FARGS,NULL,dd,
+ +                                 "Can not have separate PME nodes without PME electrostatics");
+ +        }
+ +    }
+ +    
+ +    /* If each molecule is a single charge group
+ +     * or we use domain decomposition for each periodic dimension,
+ +     * we do not need to take pbc into account for the bonded interactions.
+ +     */
+ +    if (fr->ePBC == epbcNONE || !comm->bInterCGBondeds ||
+ +        (dd->nc[XX]>1 && dd->nc[YY]>1 && (dd->nc[ZZ]>1 || fr->ePBC==epbcXY)))
+ +    {
+ +        fr->bMolPBC = FALSE;
+ +    }
+ +    else
+ +    {
+ +        fr->bMolPBC = TRUE;
+ +    }
+ +        
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"The DD cut-off is %f\n",comm->cutoff);
+ +    }
+ +    if (comm->eDLB != edlbNO)
+ +    {
+ +        /* Determine the maximum number of comm. pulses in one dimension */
+ +        
+ +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
+ +        
+ +        /* Determine the maximum required number of grid pulses */
+ +        if (comm->cellsize_limit >= comm->cutoff)
+ +        {
+ +            /* Only a single pulse is required */
+ +            npulse = 1;
+ +        }
+ +        else if (!bNoCutOff && comm->cellsize_limit > 0)
+ +        {
+ +            /* We round down slightly here to avoid overhead due to the latency
+ +             * of extra communication calls when the cut-off
+ +             * would be only slightly longer than the cell size.
+ +             * Later cellsize_limit is redetermined,
+ +             * so we can not miss interactions due to this rounding.
+ +             */
+ +            npulse = (int)(0.96 + comm->cutoff/comm->cellsize_limit);
+ +        }
+ +        else
+ +        {
+ +            /* There is no cell size limit */
+ +            npulse = max(dd->nc[XX]-1,max(dd->nc[YY]-1,dd->nc[ZZ]-1));
+ +        }
+ +
+ +        if (!bNoCutOff && npulse > 1)
+ +        {
+ +            /* See if we can do with less pulses, based on dlb_scale */
+ +            npulse_d_max = 0;
+ +            for(d=0; d<dd->ndim; d++)
+ +            {
+ +                dim = dd->dim[d];
+ +                npulse_d = (int)(1 + dd->nc[dim]*comm->cutoff
+ +                                 /(ddbox->box_size[dim]*ddbox->skew_fac[dim]*dlb_scale));
+ +                npulse_d_max = max(npulse_d_max,npulse_d);
+ +            }
+ +            npulse = min(npulse,npulse_d_max);
+ +        }
+ +        
+ +        /* This env var can override npulse */
+ +        d = dd_nst_env(fplog,"GMX_DD_NPULSE",0);
+ +        if (d > 0)
+ +        {
+ +            npulse = d;
+ +        }
+ +
+ +        comm->maxpulse = 1;
+ +        comm->bVacDLBNoLimit = (ir->ePBC == epbcNONE);
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            comm->cd[d].np_dlb = min(npulse,dd->nc[dd->dim[d]]-1);
+ +            comm->cd[d].np_nalloc = comm->cd[d].np_dlb;
+ +            snew(comm->cd[d].ind,comm->cd[d].np_nalloc);
+ +            comm->maxpulse = max(comm->maxpulse,comm->cd[d].np_dlb);
+ +            if (comm->cd[d].np_dlb < dd->nc[dd->dim[d]]-1)
+ +            {
+ +                comm->bVacDLBNoLimit = FALSE;
+ +            }
+ +        }
+ +        
+ +        /* cellsize_limit is set for LINCS in init_domain_decomposition */
+ +        if (!comm->bVacDLBNoLimit)
+ +        {
+ +            comm->cellsize_limit = max(comm->cellsize_limit,
+ +                                       comm->cutoff/comm->maxpulse);
+ +        }
+ +        comm->cellsize_limit = max(comm->cellsize_limit,comm->cutoff_mbody);
+ +        /* Set the minimum cell size for each DD dimension */
+ +        for(d=0; d<dd->ndim; d++)
+ +        {
+ +            if (comm->bVacDLBNoLimit ||
+ +                comm->cd[d].np_dlb*comm->cellsize_limit >= comm->cutoff)
+ +            {
+ +                comm->cellsize_min_dlb[dd->dim[d]] = comm->cellsize_limit;
+ +            }
+ +            else
+ +            {
+ +                comm->cellsize_min_dlb[dd->dim[d]] =
+ +                    comm->cutoff/comm->cd[d].np_dlb;
+ +            }
+ +        }
+ +        if (comm->cutoff_mbody <= 0)
+ +        {
+ +            comm->cutoff_mbody = min(comm->cutoff,comm->cellsize_limit);
+ +        }
+ +        if (comm->bDynLoadBal)
+ +        {
+ +            set_dlb_limits(dd);
+ +        }
+ +    }
+ +    
+ +    print_dd_settings(fplog,dd,ir,comm->bDynLoadBal,dlb_scale,ddbox);
+ +    if (comm->eDLB == edlbAUTO)
+ +    {
+ +        if (fplog)
+ +        {
+ +            fprintf(fplog,"When dynamic load balancing gets turned on, these settings will change to:\n");
+ +        }
+ +        print_dd_settings(fplog,dd,ir,TRUE,dlb_scale,ddbox);
+ +    }
+ +
+ +    if (ir->ePBC == epbcNONE)
+ +    {
+ +        vol_frac = 1 - 1/(double)dd->nnodes;
+ +    }
+ +    else
+ +    {
+ +        vol_frac =
+ +            (1 + comm_box_frac(dd->nc,comm->cutoff,ddbox))/(double)dd->nnodes;
+ +    }
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Volume fraction for all DD zones: %f\n",vol_frac);
+ +    }
+ +    natoms_tot = comm->cgs_gl.index[comm->cgs_gl.nr];
+ +   
+ +    dd->ga2la = ga2la_init(natoms_tot,vol_frac*natoms_tot);
+ +}
+ +
+ +static void merge_cg_buffers(int ncell,
+ +                             gmx_domdec_comm_dim_t *cd, int pulse,
+ +                             int  *ncg_cell,
+ +                             int  *index_gl, int  *recv_i,
+ +                             rvec *cg_cm,    rvec *recv_vr,
+ +                             int *cgindex,
+ +                             cginfo_mb_t *cginfo_mb,int *cginfo)
+ +{
+ +    gmx_domdec_ind_t *ind,*ind_p;
+ +    int p,cell,c,cg,cg0,cg1,cg_gl,nat;
+ +    int shift,shift_at;
+ +    
+ +    ind = &cd->ind[pulse];
+ +    
+ +    /* First correct the already stored data */
+ +    shift = ind->nrecv[ncell];
+ +    for(cell=ncell-1; cell>=0; cell--)
+ +    {
+ +        shift -= ind->nrecv[cell];
+ +        if (shift > 0)
+ +        {
+ +            /* Move the cg's present from previous grid pulses */
+ +            cg0 = ncg_cell[ncell+cell];
+ +            cg1 = ncg_cell[ncell+cell+1];
+ +            cgindex[cg1+shift] = cgindex[cg1];
+ +            for(cg=cg1-1; cg>=cg0; cg--)
+ +            {
+ +                index_gl[cg+shift] = index_gl[cg];
+ +                copy_rvec(cg_cm[cg],cg_cm[cg+shift]);
+ +                cgindex[cg+shift] = cgindex[cg];
+ +                cginfo[cg+shift] = cginfo[cg];
+ +            }
+ +            /* Correct the already stored send indices for the shift */
+ +            for(p=1; p<=pulse; p++)
+ +            {
+ +                ind_p = &cd->ind[p];
+ +                cg0 = 0;
+ +                for(c=0; c<cell; c++)
+ +                {
+ +                    cg0 += ind_p->nsend[c];
+ +                }
+ +                cg1 = cg0 + ind_p->nsend[cell];
+ +                for(cg=cg0; cg<cg1; cg++)
+ +                {
+ +                    ind_p->index[cg] += shift;
+ +                }
+ +            }
+ +        }
+ +    }
+ +
+ +    /* Merge in the communicated buffers */
+ +    shift = 0;
+ +    shift_at = 0;
+ +    cg0 = 0;
+ +    for(cell=0; cell<ncell; cell++)
+ +    {
+ +        cg1 = ncg_cell[ncell+cell+1] + shift;
+ +        if (shift_at > 0)
+ +        {
+ +            /* Correct the old cg indices */
+ +            for(cg=ncg_cell[ncell+cell]; cg<cg1; cg++)
+ +            {
+ +                cgindex[cg+1] += shift_at;
+ +            }
+ +        }
+ +        for(cg=0; cg<ind->nrecv[cell]; cg++)
+ +        {
+ +            /* Copy this charge group from the buffer */
+ +            index_gl[cg1] = recv_i[cg0];
+ +            copy_rvec(recv_vr[cg0],cg_cm[cg1]);
+ +            /* Add it to the cgindex */
+ +            cg_gl = index_gl[cg1];
+ +            cginfo[cg1] = ddcginfo(cginfo_mb,cg_gl);
+ +            nat = GET_CGINFO_NATOMS(cginfo[cg1]);
+ +            cgindex[cg1+1] = cgindex[cg1] + nat;
+ +            cg0++;
+ +            cg1++;
+ +            shift_at += nat;
+ +        }
+ +        shift += ind->nrecv[cell];
+ +        ncg_cell[ncell+cell+1] = cg1;
+ +    }
+ +}
+ +
+ +static void make_cell2at_index(gmx_domdec_comm_dim_t *cd,
+ +                               int nzone,int cg0,const int *cgindex)
+ +{
+ +    int cg,zone,p;
+ +    
+ +    /* Store the atom block boundaries for easy copying of communication buffers
+ +     */
+ +    cg = cg0;
+ +    for(zone=0; zone<nzone; zone++)
+ +    {
+ +        for(p=0; p<cd->np; p++) {
+ +            cd->ind[p].cell2at0[zone] = cgindex[cg];
+ +            cg += cd->ind[p].nrecv[zone];
+ +            cd->ind[p].cell2at1[zone] = cgindex[cg];
+ +        }
+ +    }
+ +}
+ +
+ +static gmx_bool missing_link(t_blocka *link,int cg_gl,char *bLocalCG)
+ +{
+ +    int  i;
+ +    gmx_bool bMiss;
+ +
+ +    bMiss = FALSE;
+ +    for(i=link->index[cg_gl]; i<link->index[cg_gl+1]; i++)
+ +    {
+ +        if (!bLocalCG[link->a[i]])
+ +        {
+ +            bMiss = TRUE;
+ +        }
+ +    }
+ +
+ +    return bMiss;
+ +}
+ +
+ +static void setup_dd_communication(gmx_domdec_t *dd,
+ +                                   matrix box,gmx_ddbox_t *ddbox,t_forcerec *fr)
+ +{
+ +    int dim_ind,dim,dim0,dim1=-1,dim2=-1,dimd,p,nat_tot;
+ +    int nzone,nzone_send,zone,zonei,cg0,cg1;
+ +    int c,i,j,cg,cg_gl,nrcg;
+ +    int *zone_cg_range,pos_cg,*index_gl,*cgindex,*recv_i;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_domdec_zones_t *zones;
+ +    gmx_domdec_comm_dim_t *cd;
+ +    gmx_domdec_ind_t *ind;
+ +    cginfo_mb_t *cginfo_mb;
+ +    gmx_bool bBondComm,bDist2B,bDistMB,bDistMB_pulse,bDistBonded,bScrew;
+ +    real r_mb,r_comm2,r_scomm2,r_bcomm2,r,r_0,r_1,r2,rb2,r2inc,inv_ncg,tric_sh;
+ +    rvec rb,rn;
+ +    real corner[DIM][4],corner_round_0=0,corner_round_1[4];
+ +    real bcorner[DIM],bcorner_round_1=0;
+ +    ivec tric_dist;
+ +    rvec *cg_cm,*normal,*v_d,*v_0=NULL,*v_1=NULL,*recv_vr;
+ +    real skew_fac2_d,skew_fac_01;
+ +    rvec sf2_round;
+ +    int  nsend,nat;
+ +    
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Setting up DD communication\n");
+ +    }
+ +    
+ +    comm  = dd->comm;
+ +    cg_cm = fr->cg_cm;
+ +
+ +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
+ +    {
+ +        dim = dd->dim[dim_ind];
+ +
+ +        /* Check if we need to use triclinic distances */
+ +        tric_dist[dim_ind] = 0;
+ +        for(i=0; i<=dim_ind; i++)
+ +        {
+ +            if (ddbox->tric_dir[dd->dim[i]])
+ +            {
+ +                tric_dist[dim_ind] = 1;
+ +            }
+ +        }
+ +    }
+ +
+ +    bBondComm = comm->bBondComm;
+ +
+ +    /* Do we need to determine extra distances for multi-body bondeds? */
+ +    bDistMB = (comm->bInterCGMultiBody && dd->bGridJump && dd->ndim > 1);
+ +    
+ +    /* Do we need to determine extra distances for only two-body bondeds? */
+ +    bDist2B = (bBondComm && !bDistMB);
+ +
+ +    r_comm2  = sqr(comm->cutoff);
+ +    r_bcomm2 = sqr(comm->cutoff_mbody);
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"bBondComm %d, r_bc %f\n",bBondComm,sqrt(r_bcomm2));
+ +    }
+ +
+ +    zones = &comm->zones;
+ +    
+ +    dim0 = dd->dim[0];
+ +    /* The first dimension is equal for all cells */
+ +    corner[0][0] = comm->cell_x0[dim0];
+ +    if (bDistMB)
+ +    {
+ +        bcorner[0] = corner[0][0];
+ +    }
+ +    if (dd->ndim >= 2)
+ +    {
+ +        dim1 = dd->dim[1];
+ +        /* This cell row is only seen from the first row */
+ +        corner[1][0] = comm->cell_x0[dim1];
+ +        /* All rows can see this row */
+ +        corner[1][1] = comm->cell_x0[dim1];
+ +        if (dd->bGridJump)
+ +        {
+ +            corner[1][1] = max(comm->cell_x0[dim1],comm->zone_d1[1].mch0);
+ +            if (bDistMB)
+ +            {
+ +                /* For the multi-body distance we need the maximum */
+ +                bcorner[1] = max(comm->cell_x0[dim1],comm->zone_d1[1].p1_0);
+ +            }
+ +        }
+ +        /* Set the upper-right corner for rounding */
+ +        corner_round_0 = comm->cell_x1[dim0];
+ +        
+ +        if (dd->ndim >= 3)
+ +        {
+ +            dim2 = dd->dim[2];
+ +            for(j=0; j<4; j++)
+ +            {
+ +                corner[2][j] = comm->cell_x0[dim2];
+ +            }
+ +            if (dd->bGridJump)
+ +            {
+ +                /* Use the maximum of the i-cells that see a j-cell */
+ +                for(i=0; i<zones->nizone; i++)
+ +                {
+ +                    for(j=zones->izone[i].j0; j<zones->izone[i].j1; j++)
+ +                    {
+ +                        if (j >= 4)
+ +                        {
+ +                            corner[2][j-4] =
+ +                                max(corner[2][j-4],
+ +                                    comm->zone_d2[zones->shift[i][dim0]][zones->shift[i][dim1]].mch0);
+ +                        }
+ +                    }
+ +                }
+ +                if (bDistMB)
+ +                {
+ +                    /* For the multi-body distance we need the maximum */
+ +                    bcorner[2] = comm->cell_x0[dim2];
+ +                    for(i=0; i<2; i++)
+ +                    {
+ +                        for(j=0; j<2; j++)
+ +                        {
+ +                            bcorner[2] = max(bcorner[2],
+ +                                             comm->zone_d2[i][j].p1_0);
+ +                        }
+ +                    }
+ +                }
+ +            }
+ +            
+ +            /* Set the upper-right corner for rounding */
+ +            /* Cell (0,0,0) and cell (1,0,0) can see cell 4 (0,1,1)
+ +             * Only cell (0,0,0) can see cell 7 (1,1,1)
+ +             */
+ +            corner_round_1[0] = comm->cell_x1[dim1];
+ +            corner_round_1[3] = comm->cell_x1[dim1];
+ +            if (dd->bGridJump)
+ +            {
+ +                corner_round_1[0] = max(comm->cell_x1[dim1],
+ +                                        comm->zone_d1[1].mch1);
+ +                if (bDistMB)
+ +                {
+ +                    /* For the multi-body distance we need the maximum */
+ +                    bcorner_round_1 = max(comm->cell_x1[dim1],
+ +                                          comm->zone_d1[1].p1_1);
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    /* Triclinic stuff */
+ +    normal = ddbox->normal;
+ +    skew_fac_01 = 0;
+ +    if (dd->ndim >= 2)
+ +    {
+ +        v_0 = ddbox->v[dim0];
+ +        if (ddbox->tric_dir[dim0] && ddbox->tric_dir[dim1])
+ +        {
+ +            /* Determine the coupling coefficient for the distances
+ +             * to the cell planes along dim0 and dim1 through dim2.
+ +             * This is required for correct rounding.
+ +             */
+ +            skew_fac_01 =
+ +                ddbox->v[dim0][dim1+1][dim0]*ddbox->v[dim1][dim1+1][dim1];
+ +            if (debug)
+ +            {
+ +                fprintf(debug,"\nskew_fac_01 %f\n",skew_fac_01);
+ +            }
+ +        }
+ +    }
+ +    if (dd->ndim >= 3)
+ +    {
+ +        v_1 = ddbox->v[dim1];
+ +    }
+ +    
+ +    zone_cg_range = zones->cg_range;
+ +    index_gl = dd->index_gl;
+ +    cgindex  = dd->cgindex;
+ +    cginfo_mb = fr->cginfo_mb;
+ +    
+ +    zone_cg_range[0]   = 0;
+ +    zone_cg_range[1]   = dd->ncg_home;
+ +    comm->zone_ncg1[0] = dd->ncg_home;
+ +    pos_cg             = dd->ncg_home;
+ +    
+ +    nat_tot = dd->nat_home;
+ +    nzone = 1;
+ +    for(dim_ind=0; dim_ind<dd->ndim; dim_ind++)
+ +    {
+ +        dim = dd->dim[dim_ind];
+ +        cd = &comm->cd[dim_ind];
+ +        
+ +        if (dim >= ddbox->npbcdim && dd->ci[dim] == 0)
+ +        {
+ +            /* No pbc in this dimension, the first node should not comm. */
+ +            nzone_send = 0;
+ +        }
+ +        else
+ +        {
+ +            nzone_send = nzone;
+ +        }
+ +
+ +        bScrew = (dd->bScrewPBC && dim == XX);
+ +        
+ +        v_d = ddbox->v[dim];
+ +        skew_fac2_d = sqr(ddbox->skew_fac[dim]);
+ +
+ +        cd->bInPlace = TRUE;
+ +        for(p=0; p<cd->np; p++)
+ +        {
+ +            /* Only atoms communicated in the first pulse are used
+ +             * for multi-body bonded interactions or for bBondComm.
+ +             */
+ +            bDistBonded   = ((bDistMB || bDist2B) && p == 0);
+ +            bDistMB_pulse = (bDistMB && bDistBonded);
+ +
+ +            ind = &cd->ind[p];
+ +            nsend = 0;
+ +            nat = 0;
+ +            for(zone=0; zone<nzone_send; zone++)
+ +            {
+ +                if (tric_dist[dim_ind] && dim_ind > 0)
+ +                {
+ +                    /* Determine slightly more optimized skew_fac's
+ +                     * for rounding.
+ +                     * This reduces the number of communicated atoms
+ +                     * by about 10% for 3D DD of rhombic dodecahedra.
+ +                     */
+ +                    for(dimd=0; dimd<dim; dimd++)
+ +                    {
+ +                        sf2_round[dimd] = 1;
+ +                        if (ddbox->tric_dir[dimd])
+ +                        {
+ +                            for(i=dd->dim[dimd]+1; i<DIM; i++)
+ +                            {
+ +                                /* If we are shifted in dimension i
+ +                                 * and the cell plane is tilted forward
+ +                                 * in dimension i, skip this coupling.
+ +                                 */
+ +                                if (!(zones->shift[nzone+zone][i] &&
+ +                                      ddbox->v[dimd][i][dimd] >= 0))
+ +                                {
+ +                                    sf2_round[dimd] +=
+ +                                        sqr(ddbox->v[dimd][i][dimd]);
+ +                                }
+ +                            }
+ +                            sf2_round[dimd] = 1/sf2_round[dimd];
+ +                        }
+ +                    }
+ +                }
+ +
+ +                zonei = zone_perm[dim_ind][zone];
+ +                if (p == 0)
+ +                {
+ +                    /* Here we permutate the zones to obtain a convenient order
+ +                     * for neighbor searching
+ +                     */
+ +                    cg0 = zone_cg_range[zonei];
+ +                    cg1 = zone_cg_range[zonei+1];
+ +                }
+ +                else
+ +                {
+ +                    /* Look only at the cg's received in the previous grid pulse
+ +                     */
+ +                    cg1 = zone_cg_range[nzone+zone+1];
+ +                    cg0 = cg1 - cd->ind[p-1].nrecv[zone];
+ +                }
+ +                ind->nsend[zone] = 0;
+ +                for(cg=cg0; cg<cg1; cg++)
+ +                {
+ +                    r2  = 0;
+ +                    rb2 = 0;
+ +                    if (tric_dist[dim_ind] == 0)
+ +                    {
+ +                        /* Rectangular direction, easy */
+ +                        r = cg_cm[cg][dim] - corner[dim_ind][zone];
+ +                        if (r > 0)
+ +                        {
+ +                            r2 += r*r;
+ +                        }
+ +                        if (bDistMB_pulse)
+ +                        {
+ +                            r = cg_cm[cg][dim] - bcorner[dim_ind];
+ +                            if (r > 0)
+ +                            {
+ +                                rb2 += r*r;
+ +                            }
+ +                        }
+ +                        /* Rounding gives at most a 16% reduction
+ +                         * in communicated atoms
+ +                         */
+ +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
+ +                        {
+ +                            r = cg_cm[cg][dim0] - corner_round_0;
+ +                            /* This is the first dimension, so always r >= 0 */
+ +                            r2 += r*r;
+ +                            if (bDistMB_pulse)
+ +                            {
+ +                                rb2 += r*r;
+ +                            }
+ +                        }
+ +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
+ +                        {
+ +                            r = cg_cm[cg][dim1] - corner_round_1[zone];
+ +                            if (r > 0)
+ +                            {
+ +                                r2 += r*r;
+ +                            }
+ +                            if (bDistMB_pulse)
+ +                            {
+ +                                r = cg_cm[cg][dim1] - bcorner_round_1;
+ +                                if (r > 0)
+ +                                {
+ +                                    rb2 += r*r;
+ +                                }
+ +                            }
+ +                        }
+ +                    }
+ +                    else
+ +                    {
+ +                        /* Triclinic direction, more complicated */
+ +                        clear_rvec(rn);
+ +                        clear_rvec(rb);
+ +                        /* Rounding, conservative as the skew_fac multiplication
+ +                         * will slightly underestimate the distance.
+ +                         */
+ +                        if (dim_ind >= 1 && (zonei == 1 || zonei == 2))
+ +                        {
+ +                            rn[dim0] = cg_cm[cg][dim0] - corner_round_0;
+ +                            for(i=dim0+1; i<DIM; i++)
+ +                            {
+ +                                rn[dim0] -= cg_cm[cg][i]*v_0[i][dim0];
+ +                            }
+ +                            r2 = rn[dim0]*rn[dim0]*sf2_round[dim0];
+ +                            if (bDistMB_pulse)
+ +                            {
+ +                                rb[dim0] = rn[dim0];
+ +                                rb2 = r2;
+ +                            }
+ +                            /* Take care that the cell planes along dim0 might not
+ +                             * be orthogonal to those along dim1 and dim2.
+ +                             */
+ +                            for(i=1; i<=dim_ind; i++)
+ +                            {
+ +                                dimd = dd->dim[i];
+ +                                if (normal[dim0][dimd] > 0)
+ +                                {
+ +                                    rn[dimd] -= rn[dim0]*normal[dim0][dimd];
+ +                                    if (bDistMB_pulse)
+ +                                    {
+ +                                        rb[dimd] -= rb[dim0]*normal[dim0][dimd];
+ +                                    }
+ +                                }
+ +                            }
+ +                        }
+ +                        if (dim_ind == 2 && (zonei == 2 || zonei == 3))
+ +                        {
+ +                            rn[dim1] += cg_cm[cg][dim1] - corner_round_1[zone];
+ +                            tric_sh = 0;
+ +                            for(i=dim1+1; i<DIM; i++)
+ +                            {
+ +                                tric_sh -= cg_cm[cg][i]*v_1[i][dim1];
+ +                            }
+ +                            rn[dim1] += tric_sh;
+ +                            if (rn[dim1] > 0)
+ +                            {
+ +                                r2 += rn[dim1]*rn[dim1]*sf2_round[dim1];
+ +                                /* Take care of coupling of the distances
+ +                                 * to the planes along dim0 and dim1 through dim2.
+ +                                 */
+ +                                r2 -= rn[dim0]*rn[dim1]*skew_fac_01;
+ +                                /* Take care that the cell planes along dim1
+ +                                 * might not be orthogonal to that along dim2.
+ +                                 */
+ +                                if (normal[dim1][dim2] > 0)
+ +                                {
+ +                                    rn[dim2] -= rn[dim1]*normal[dim1][dim2];
+ +                                }
+ +                            }
+ +                            if (bDistMB_pulse)
+ +                            {
+ +                                rb[dim1] +=
+ +                                    cg_cm[cg][dim1] - bcorner_round_1 + tric_sh;
+ +                                if (rb[dim1] > 0)
+ +                                {
+ +                                    rb2 += rb[dim1]*rb[dim1]*sf2_round[dim1];
+ +                                    /* Take care of coupling of the distances
+ +                                     * to the planes along dim0 and dim1 through dim2.
+ +                                     */
+ +                                    rb2 -= rb[dim0]*rb[dim1]*skew_fac_01;
+ +                                    /* Take care that the cell planes along dim1
+ +                                     * might not be orthogonal to that along dim2.
+ +                                     */
+ +                                    if (normal[dim1][dim2] > 0)
+ +                                    {
+ +                                        rb[dim2] -= rb[dim1]*normal[dim1][dim2];
+ +                                    }
+ +                                }
+ +                            }
+ +                        }
+ +                        /* The distance along the communication direction */
+ +                        rn[dim] += cg_cm[cg][dim] - corner[dim_ind][zone];
+ +                        tric_sh = 0;
+ +                        for(i=dim+1; i<DIM; i++)
+ +                        {
+ +                            tric_sh -= cg_cm[cg][i]*v_d[i][dim];
+ +                        }
+ +                        rn[dim] += tric_sh;
+ +                        if (rn[dim] > 0)
+ +                        {
+ +                            r2 += rn[dim]*rn[dim]*skew_fac2_d;
+ +                            /* Take care of coupling of the distances
+ +                             * to the planes along dim0 and dim1 through dim2.
+ +                             */
+ +                            if (dim_ind == 1 && zonei == 1)
+ +                            {
+ +                                r2 -= rn[dim0]*rn[dim]*skew_fac_01;
+ +                            }
+ +                        }
+ +                        if (bDistMB_pulse)
+ +                        {
+ +                            clear_rvec(rb);
+ +                            rb[dim] += cg_cm[cg][dim] - bcorner[dim_ind] + tric_sh;
+ +                            if (rb[dim] > 0)
+ +                            {
+ +                                rb2 += rb[dim]*rb[dim]*skew_fac2_d;
+ +                                /* Take care of coupling of the distances
+ +                                 * to the planes along dim0 and dim1 through dim2.
+ +                                 */
+ +                                if (dim_ind == 1 && zonei == 1)
+ +                                {
+ +                                    rb2 -= rb[dim0]*rb[dim]*skew_fac_01;
+ +                                }
+ +                            }
+ +                        }
+ +                    }
+ +                    
+ +                    if (r2 < r_comm2 ||
+ +                        (bDistBonded &&
+ +                         ((bDistMB && rb2 < r_bcomm2) ||
+ +                          (bDist2B && r2  < r_bcomm2)) &&
+ +                         (!bBondComm ||
+ +                          (GET_CGINFO_BOND_INTER(fr->cginfo[cg]) &&
+ +                           missing_link(comm->cglink,index_gl[cg],
+ +                                        comm->bLocalCG)))))
+ +                    {
+ +                        /* Make an index to the local charge groups */
+ +                        if (nsend+1 > ind->nalloc)
+ +                        {
+ +                            ind->nalloc = over_alloc_large(nsend+1);
+ +                            srenew(ind->index,ind->nalloc);
+ +                        }
+ +                        if (nsend+1 > comm->nalloc_int)
+ +                        {
+ +                            comm->nalloc_int = over_alloc_large(nsend+1);
+ +                            srenew(comm->buf_int,comm->nalloc_int);
+ +                        }
+ +                        ind->index[nsend] = cg;
+ +                        comm->buf_int[nsend] = index_gl[cg];
+ +                        ind->nsend[zone]++;
+ +                        vec_rvec_check_alloc(&comm->vbuf,nsend+1);
+ +
+ +                        if (dd->ci[dim] == 0)
+ +                        {
+ +                            /* Correct cg_cm for pbc */
+ +                            rvec_add(cg_cm[cg],box[dim],comm->vbuf.v[nsend]);
+ +                            if (bScrew)
+ +                            {
+ +                                comm->vbuf.v[nsend][YY] =
+ +                                    box[YY][YY]-comm->vbuf.v[nsend][YY];
+ +                                comm->vbuf.v[nsend][ZZ] =
+ +                                    box[ZZ][ZZ]-comm->vbuf.v[nsend][ZZ];
+ +                            }
+ +                        }
+ +                        else
+ +                        {
+ +                            copy_rvec(cg_cm[cg],comm->vbuf.v[nsend]);
+ +                        }
+ +                        nsend++;
+ +                        nat += cgindex[cg+1] - cgindex[cg];
+ +                    }
+ +                }
+ +            }
+ +            /* Clear the counts in case we do not have pbc */
+ +            for(zone=nzone_send; zone<nzone; zone++)
+ +            {
+ +                ind->nsend[zone] = 0;
+ +            }
+ +            ind->nsend[nzone]   = nsend;
+ +            ind->nsend[nzone+1] = nat;
+ +            /* Communicate the number of cg's and atoms to receive */
+ +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
+ +                            ind->nsend, nzone+2,
+ +                            ind->nrecv, nzone+2);
+ +            
+ +            /* The rvec buffer is also required for atom buffers of size nsend
+ +             * in dd_move_x and dd_move_f.
+ +             */
+ +            vec_rvec_check_alloc(&comm->vbuf,ind->nsend[nzone+1]);
+ +
+ +            if (p > 0)
+ +            {
+ +                /* We can receive in place if only the last zone is not empty */
+ +                for(zone=0; zone<nzone-1; zone++)
+ +                {
+ +                    if (ind->nrecv[zone] > 0)
+ +                    {
+ +                        cd->bInPlace = FALSE;
+ +                    }
+ +                }
+ +                if (!cd->bInPlace)
+ +                {
+ +                    /* The int buffer is only required here for the cg indices */
+ +                    if (ind->nrecv[nzone] > comm->nalloc_int2)
+ +                    {
+ +                        comm->nalloc_int2 = over_alloc_dd(ind->nrecv[nzone]);
+ +                        srenew(comm->buf_int2,comm->nalloc_int2);
+ +                    }
+ +                    /* The rvec buffer is also required for atom buffers
+ +                     * of size nrecv in dd_move_x and dd_move_f.
+ +                     */
+ +                    i = max(cd->ind[0].nrecv[nzone+1],ind->nrecv[nzone+1]);
+ +                    vec_rvec_check_alloc(&comm->vbuf2,i);
+ +                }
+ +            }
+ +            
+ +            /* Make space for the global cg indices */
+ +            if (pos_cg + ind->nrecv[nzone] > dd->cg_nalloc
+ +                || dd->cg_nalloc == 0)
+ +            {
+ +                dd->cg_nalloc = over_alloc_dd(pos_cg + ind->nrecv[nzone]);
+ +                srenew(index_gl,dd->cg_nalloc);
+ +                srenew(cgindex,dd->cg_nalloc+1);
+ +            }
+ +            /* Communicate the global cg indices */
+ +            if (cd->bInPlace)
+ +            {
+ +                recv_i = index_gl + pos_cg;
+ +            }
+ +            else
+ +            {
+ +                recv_i = comm->buf_int2;
+ +            }
+ +            dd_sendrecv_int(dd, dim_ind, dddirBackward,
+ +                            comm->buf_int, nsend,
+ +                            recv_i,        ind->nrecv[nzone]);
+ +
+ +            /* Make space for cg_cm */
+ +            if (pos_cg + ind->nrecv[nzone] > fr->cg_nalloc)
+ +            {
+ +                dd_realloc_fr_cg(fr,pos_cg + ind->nrecv[nzone]);
+ +                cg_cm = fr->cg_cm;
+ +            }
+ +            /* Communicate cg_cm */
+ +            if (cd->bInPlace)
+ +            {
+ +                recv_vr = cg_cm + pos_cg;
+ +            }
+ +            else
+ +            {
+ +                recv_vr = comm->vbuf2.v;
+ +            }
+ +            dd_sendrecv_rvec(dd, dim_ind, dddirBackward,
+ +                             comm->vbuf.v, nsend,
+ +                             recv_vr,      ind->nrecv[nzone]);
+ +            
+ +            /* Make the charge group index */
+ +            if (cd->bInPlace)
+ +            {
+ +                zone = (p == 0 ? 0 : nzone - 1);
+ +                while (zone < nzone)
+ +                {
+ +                    for(cg=0; cg<ind->nrecv[zone]; cg++)
+ +                    {
+ +                        cg_gl = index_gl[pos_cg];
+ +                        fr->cginfo[pos_cg] = ddcginfo(cginfo_mb,cg_gl);
+ +                        nrcg = GET_CGINFO_NATOMS(fr->cginfo[pos_cg]);
+ +                        cgindex[pos_cg+1] = cgindex[pos_cg] + nrcg;
+ +                        if (bBondComm)
+ +                        {
+ +                            /* Update the charge group presence,
+ +                             * so we can use it in the next pass of the loop.
+ +                             */
+ +                            comm->bLocalCG[cg_gl] = TRUE;
+ +                        }
+ +                        pos_cg++;
+ +                    }
+ +                    if (p == 0)
+ +                    {
+ +                        comm->zone_ncg1[nzone+zone] = ind->nrecv[zone];
+ +                    }
+ +                    zone++;
+ +                    zone_cg_range[nzone+zone] = pos_cg;
+ +                }
+ +            }
+ +            else
+ +            {
+ +                /* This part of the code is never executed with bBondComm. */
+ +                merge_cg_buffers(nzone,cd,p,zone_cg_range,
+ +                                 index_gl,recv_i,cg_cm,recv_vr,
+ +                                 cgindex,fr->cginfo_mb,fr->cginfo);
+ +                pos_cg += ind->nrecv[nzone];
+ +            }
+ +            nat_tot += ind->nrecv[nzone+1];
+ +        }
+ +        if (!cd->bInPlace)
+ +        {
+ +            /* Store the atom block for easy copying of communication buffers */
+ +            make_cell2at_index(cd,nzone,zone_cg_range[nzone],cgindex);
+ +        }
+ +        nzone += nzone;
+ +    }
+ +    dd->index_gl = index_gl;
+ +    dd->cgindex  = cgindex;
+ +    
+ +    dd->ncg_tot = zone_cg_range[zones->n];
+ +    dd->nat_tot = nat_tot;
+ +    comm->nat[ddnatHOME] = dd->nat_home;
+ +    for(i=ddnatZONE; i<ddnatNR; i++)
+ +    {
+ +        comm->nat[i] = dd->nat_tot;
+ +    }
+ +
+ +    if (!bBondComm)
+ +    {
+ +        /* We don't need to update cginfo, since that was alrady done above.
+ +         * So we pass NULL for the forcerec.
+ +         */
+ +        dd_set_cginfo(dd->index_gl,dd->ncg_home,dd->ncg_tot,
+ +                      NULL,comm->bLocalCG);
+ +    }
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"Finished setting up DD communication, zones:");
+ +        for(c=0; c<zones->n; c++)
+ +        {
+ +            fprintf(debug," %d",zones->cg_range[c+1]-zones->cg_range[c]);
+ +        }
+ +        fprintf(debug,"\n");
+ +    }
+ +}
+ +
+ +static void set_cg_boundaries(gmx_domdec_zones_t *zones)
+ +{
+ +    int c;
+ +    
+ +    for(c=0; c<zones->nizone; c++)
+ +    {
+ +        zones->izone[c].cg1  = zones->cg_range[c+1];
+ +        zones->izone[c].jcg0 = zones->cg_range[zones->izone[c].j0];
+ +        zones->izone[c].jcg1 = zones->cg_range[zones->izone[c].j1];
+ +    }
+ +}
+ +
+ +static int comp_cgsort(const void *a,const void *b)
+ +{
+ +    int comp;
+ +    
+ +    gmx_cgsort_t *cga,*cgb;
+ +    cga = (gmx_cgsort_t *)a;
+ +    cgb = (gmx_cgsort_t *)b;
+ +    
+ +    comp = cga->nsc - cgb->nsc;
+ +    if (comp == 0)
+ +    {
+ +        comp = cga->ind_gl - cgb->ind_gl;
+ +    }
+ +    
+ +    return comp;
+ +}
+ +
+ +static void order_int_cg(int n,gmx_cgsort_t *sort,
+ +                         int *a,int *buf)
+ +{
+ +    int i;
+ +    
+ +    /* Order the data */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        buf[i] = a[sort[i].ind];
+ +    }
+ +    
+ +    /* Copy back to the original array */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        a[i] = buf[i];
+ +    }
+ +}
+ +
+ +static void order_vec_cg(int n,gmx_cgsort_t *sort,
+ +                         rvec *v,rvec *buf)
+ +{
+ +    int i;
+ +    
+ +    /* Order the data */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        copy_rvec(v[sort[i].ind],buf[i]);
+ +    }
+ +    
+ +    /* Copy back to the original array */
+ +    for(i=0; i<n; i++)
+ +    {
+ +        copy_rvec(buf[i],v[i]);
+ +    }
+ +}
+ +
+ +static void order_vec_atom(int ncg,int *cgindex,gmx_cgsort_t *sort,
+ +                           rvec *v,rvec *buf)
+ +{
+ +    int a,atot,cg,cg0,cg1,i;
+ +    
+ +    /* Order the data */
+ +    a = 0;
+ +    for(cg=0; cg<ncg; cg++)
+ +    {
+ +        cg0 = cgindex[sort[cg].ind];
+ +        cg1 = cgindex[sort[cg].ind+1];
+ +        for(i=cg0; i<cg1; i++)
+ +        {
+ +            copy_rvec(v[i],buf[a]);
+ +            a++;
+ +        }
+ +    }
+ +    atot = a;
+ +    
+ +    /* Copy back to the original array */
+ +    for(a=0; a<atot; a++)
+ +    {
+ +        copy_rvec(buf[a],v[a]);
+ +    }
+ +}
+ +
+ +static void ordered_sort(int nsort2,gmx_cgsort_t *sort2,
+ +                         int nsort_new,gmx_cgsort_t *sort_new,
+ +                         gmx_cgsort_t *sort1)
+ +{
+ +    int i1,i2,i_new;
+ +    
+ +    /* The new indices are not very ordered, so we qsort them */
+ +    qsort_threadsafe(sort_new,nsort_new,sizeof(sort_new[0]),comp_cgsort);
+ +    
+ +    /* sort2 is already ordered, so now we can merge the two arrays */
+ +    i1 = 0;
+ +    i2 = 0;
+ +    i_new = 0;
+ +    while(i2 < nsort2 || i_new < nsort_new)
+ +    {
+ +        if (i2 == nsort2)
+ +        {
+ +            sort1[i1++] = sort_new[i_new++];
+ +        }
+ +        else if (i_new == nsort_new)
+ +        {
+ +            sort1[i1++] = sort2[i2++];
+ +        }
+ +        else if (sort2[i2].nsc < sort_new[i_new].nsc ||
+ +                 (sort2[i2].nsc == sort_new[i_new].nsc &&
+ +                  sort2[i2].ind_gl < sort_new[i_new].ind_gl))
+ +        {
+ +            sort1[i1++] = sort2[i2++];
+ +        }
+ +        else
+ +        {
+ +            sort1[i1++] = sort_new[i_new++];
+ +        }
+ +    }
+ +}
+ +
+ +static void dd_sort_state(gmx_domdec_t *dd,int ePBC,
+ +                          rvec *cgcm,t_forcerec *fr,t_state *state,
+ +                          int ncg_home_old)
+ +{
+ +    gmx_domdec_sort_t *sort;
+ +    gmx_cgsort_t *cgsort,*sort_i;
+ +    int  ncg_new,nsort2,nsort_new,i,cell_index,*ibuf,cgsize;
+ +    rvec *vbuf;
+ +    
+ +    sort = dd->comm->sort;
+ +    
+ +    if (dd->ncg_home > sort->sort_nalloc)
+ +    {
+ +        sort->sort_nalloc = over_alloc_dd(dd->ncg_home);
+ +        srenew(sort->sort1,sort->sort_nalloc);
+ +        srenew(sort->sort2,sort->sort_nalloc);
+ +    }
+ +    
+ +    if (ncg_home_old >= 0)
+ +    {
+ +        /* The charge groups that remained in the same ns grid cell
+ +         * are completely ordered. So we can sort efficiently by sorting
+ +         * the charge groups that did move into the stationary list.
+ +         */
+ +        ncg_new = 0;
+ +        nsort2 = 0;
+ +        nsort_new = 0;
+ +        for(i=0; i<dd->ncg_home; i++)
+ +        {
+ +            /* Check if this cg did not move to another node */
+ +            cell_index = fr->ns.grid->cell_index[i];
+ +            if (cell_index !=  4*fr->ns.grid->ncells)
+ +            {
+ +                if (i >= ncg_home_old || cell_index != sort->sort1[i].nsc)
+ +                {
+ +                    /* This cg is new on this node or moved ns grid cell */
+ +                    if (nsort_new >= sort->sort_new_nalloc)
+ +                    {
+ +                        sort->sort_new_nalloc = over_alloc_dd(nsort_new+1);
+ +                        srenew(sort->sort_new,sort->sort_new_nalloc);
+ +                    }
+ +                    sort_i = &(sort->sort_new[nsort_new++]);
+ +                }
+ +                else
+ +                {
+ +                    /* This cg did not move */
+ +                    sort_i = &(sort->sort2[nsort2++]);
+ +                }
+ +                /* Sort on the ns grid cell indices
+ +                 * and the global topology index
+ +                 */
+ +                sort_i->nsc    = cell_index;
+ +                sort_i->ind_gl = dd->index_gl[i];
+ +                sort_i->ind    = i;
+ +                ncg_new++;
+ +            }
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"ordered sort cgs: stationary %d moved %d\n",
+ +                    nsort2,nsort_new);
+ +        }
+ +        /* Sort efficiently */
+ +        ordered_sort(nsort2,sort->sort2,nsort_new,sort->sort_new,sort->sort1);
+ +    }
+ +    else
+ +    {
+ +        cgsort = sort->sort1;
+ +        ncg_new = 0;
+ +        for(i=0; i<dd->ncg_home; i++)
+ +        {
+ +            /* Sort on the ns grid cell indices
+ +             * and the global topology index
+ +             */
+ +            cgsort[i].nsc    = fr->ns.grid->cell_index[i];
+ +            cgsort[i].ind_gl = dd->index_gl[i];
+ +            cgsort[i].ind    = i;
+ +            if (cgsort[i].nsc != 4*fr->ns.grid->ncells)
+ +            {
+ +                ncg_new++;
+ +            }
+ +        }
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"qsort cgs: %d new home %d\n",dd->ncg_home,ncg_new);
+ +        }
+ +        /* Determine the order of the charge groups using qsort */
+ +        qsort_threadsafe(cgsort,dd->ncg_home,sizeof(cgsort[0]),comp_cgsort);
+ +    }
+ +    cgsort = sort->sort1;
+ +    
+ +    /* We alloc with the old size, since cgindex is still old */
+ +    vec_rvec_check_alloc(&dd->comm->vbuf,dd->cgindex[dd->ncg_home]);
+ +    vbuf = dd->comm->vbuf.v;
+ +    
+ +    /* Remove the charge groups which are no longer at home here */
+ +    dd->ncg_home = ncg_new;
+ +    
+ +    /* Reorder the state */
+ +    for(i=0; i<estNR; i++)
+ +    {
+ +        if (EST_DISTR(i) && state->flags & (1<<i))
+ +        {
+ +            switch (i)
+ +            {
+ +            case estX:
+ +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->x,vbuf);
+ +                break;
+ +            case estV:
+ +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->v,vbuf);
+ +                break;
+ +            case estSDX:
+ +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->sd_X,vbuf);
+ +                break;
+ +            case estCGP:
+ +                order_vec_atom(dd->ncg_home,dd->cgindex,cgsort,state->cg_p,vbuf);
+ +                break;
+ +            case estLD_RNG:
+ +            case estLD_RNGI:
+ +            case estDISRE_INITF:
+ +            case estDISRE_RM3TAV:
+ +            case estORIRE_INITF:
+ +            case estORIRE_DTAV:
+ +                /* No ordering required */
+ +                break;
+ +            default:
+ +                gmx_incons("Unknown state entry encountered in dd_sort_state");
+ +                break;
+ +            }
+ +        }
+ +    }
+ +    /* Reorder cgcm */
+ +    order_vec_cg(dd->ncg_home,cgsort,cgcm,vbuf);
+ +    
+ +    if (dd->ncg_home+1 > sort->ibuf_nalloc)
+ +    {
+ +        sort->ibuf_nalloc = over_alloc_dd(dd->ncg_home+1);
+ +        srenew(sort->ibuf,sort->ibuf_nalloc);
+ +    }
+ +    ibuf = sort->ibuf;
+ +    /* Reorder the global cg index */
+ +    order_int_cg(dd->ncg_home,cgsort,dd->index_gl,ibuf);
+ +    /* Reorder the cginfo */
+ +    order_int_cg(dd->ncg_home,cgsort,fr->cginfo,ibuf);
+ +    /* Rebuild the local cg index */
+ +    ibuf[0] = 0;
+ +    for(i=0; i<dd->ncg_home; i++)
+ +    {
+ +        cgsize = dd->cgindex[cgsort[i].ind+1] - dd->cgindex[cgsort[i].ind];
+ +        ibuf[i+1] = ibuf[i] + cgsize;
+ +    }
+ +    for(i=0; i<dd->ncg_home+1; i++)
+ +    {
+ +        dd->cgindex[i] = ibuf[i];
+ +    }
+ +    /* Set the home atom number */
+ +    dd->nat_home = dd->cgindex[dd->ncg_home];
+ +    
+ +    /* Copy the sorted ns cell indices back to the ns grid struct */
+ +    for(i=0; i<dd->ncg_home; i++)
+ +    {
+ +        fr->ns.grid->cell_index[i] = cgsort[i].nsc;
+ +    }
+ +    fr->ns.grid->nr = dd->ncg_home;
+ +}
+ +
+ +static void add_dd_statistics(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int ddnat;
+ +    
+ +    comm = dd->comm;
+ +    
+ +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
+ +    {
+ +        comm->sum_nat[ddnat-ddnatZONE] +=
+ +            comm->nat[ddnat] - comm->nat[ddnat-1];
+ +    }
+ +    comm->ndecomp++;
+ +}
+ +
+ +void reset_dd_statistics_counters(gmx_domdec_t *dd)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int ddnat;
+ +    
+ +    comm = dd->comm;
+ +
+ +    /* Reset all the statistics and counters for total run counting */
+ +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
+ +    {
+ +        comm->sum_nat[ddnat-ddnatZONE] = 0;
+ +    }
+ +    comm->ndecomp = 0;
+ +    comm->nload = 0;
+ +    comm->load_step = 0;
+ +    comm->load_sum = 0;
+ +    comm->load_max = 0;
+ +    clear_ivec(comm->load_lim);
+ +    comm->load_mdf = 0;
+ +    comm->load_pme = 0;
+ +}
+ +
+ +void print_dd_statistics(t_commrec *cr,t_inputrec *ir,FILE *fplog)
+ +{
+ +    gmx_domdec_comm_t *comm;
+ +    int ddnat;
+ +    double av;
+ +   
+ +    comm = cr->dd->comm;
+ +    
+ +    gmx_sumd(ddnatNR-ddnatZONE,comm->sum_nat,cr);
+ +    
+ +    if (fplog == NULL)
+ +    {
+ +        return;
+ +    }
+ +    
+ +    fprintf(fplog,"\n    D O M A I N   D E C O M P O S I T I O N   S T A T I S T I C S\n\n");
+ +            
+ +    for(ddnat=ddnatZONE; ddnat<ddnatNR; ddnat++)
+ +    {
+ +        av = comm->sum_nat[ddnat-ddnatZONE]/comm->ndecomp;
+ +        switch(ddnat)
+ +        {
+ +        case ddnatZONE:
+ +            fprintf(fplog,
+ +                    " av. #atoms communicated per step for force:  %d x %.1f\n",
+ +                    2,av);
+ +            break;
+ +        case ddnatVSITE:
+ +            if (cr->dd->vsite_comm)
+ +            {
+ +                fprintf(fplog,
+ +                        " av. #atoms communicated per step for vsites: %d x %.1f\n",
+ +                        (EEL_PME(ir->coulombtype) || ir->coulombtype==eelEWALD) ? 3 : 2,
+ +                        av);
+ +            }
+ +            break;
+ +        case ddnatCON:
+ +            if (cr->dd->constraint_comm)
+ +            {
+ +                fprintf(fplog,
+ +                        " av. #atoms communicated per step for LINCS:  %d x %.1f\n",
+ +                        1 + ir->nLincsIter,av);
+ +            }
+ +            break;
+ +        default:
+ +            gmx_incons(" Unknown type for DD statistics");
+ +        }
+ +    }
+ +    fprintf(fplog,"\n");
+ +    
+ +    if (comm->bRecordLoad && EI_DYNAMICS(ir->eI))
+ +    {
+ +        print_dd_load_av(fplog,cr->dd);
+ +    }
+ +}
+ +
+ +void dd_partition_system(FILE            *fplog,
+ +                         gmx_large_int_t      step,
+ +                         t_commrec       *cr,
+ +                         gmx_bool            bMasterState,
+ +                         int             nstglobalcomm,
+ +                         t_state         *state_global,
+ +                         gmx_mtop_t      *top_global,
+ +                         t_inputrec      *ir,
+ +                         t_state         *state_local,
+ +                         rvec            **f,
+ +                         t_mdatoms       *mdatoms,
+ +                         gmx_localtop_t  *top_local,
+ +                         t_forcerec      *fr,
+ +                         gmx_vsite_t     *vsite,
+ +                         gmx_shellfc_t   shellfc,
+ +                         gmx_constr_t    constr,
+ +                         t_nrnb          *nrnb,
+ +                         gmx_wallcycle_t wcycle,
+ +                         gmx_bool            bVerbose)
+ +{
+ +    gmx_domdec_t *dd;
+ +    gmx_domdec_comm_t *comm;
+ +    gmx_ddbox_t ddbox={0};
+ +    t_block *cgs_gl;
+ +    gmx_large_int_t step_pcoupl;
+ +    rvec cell_ns_x0,cell_ns_x1;
+ +    int  i,j,n,cg0=0,ncg_home_old=-1,nat_f_novirsum;
+ +    gmx_bool bBoxChanged,bNStGlobalComm,bDoDLB,bCheckDLB,bTurnOnDLB,bLogLoad;
+ +    gmx_bool bRedist,bSortCG,bResortAll;
+ +    ivec ncells_old,np;
+ +    real grid_density;
+ +    char sbuf[22];
+ +      
+ +    dd = cr->dd;
+ +    comm = dd->comm;
+ +
+ +    bBoxChanged = (bMasterState || DEFORM(*ir));
+ +    if (ir->epc != epcNO)
+ +    {
+ +        /* With nstpcouple > 1 pressure coupling happens.
+ +         * one step after calculating the pressure.
+ +         * Box scaling happens at the end of the MD step,
+ +         * after the DD partitioning.
+ +         * We therefore have to do DLB in the first partitioning
+ +         * after an MD step where P-coupling occured.
+ +         * We need to determine the last step in which p-coupling occurred.
+ +         * MRS -- need to validate this for vv?
+ +         */
+ +        n = ir->nstpcouple;
+ +        if (n == 1)
+ +        {
+ +            step_pcoupl = step - 1;
+ +        }
+ +        else
+ +        {
+ +            step_pcoupl = ((step - 1)/n)*n + 1;
+ +        }
+ +        if (step_pcoupl >= comm->globalcomm_step)
+ +        {
+ +            bBoxChanged = TRUE;
+ +        }
+ +    }
+ +
+ +    bNStGlobalComm = (step >= comm->globalcomm_step + nstglobalcomm);
+ +
+ +    if (!comm->bDynLoadBal)
+ +    {
+ +        bDoDLB = FALSE;
+ +    }
+ +    else
+ +    {
+ +        /* Should we do dynamic load balacing this step?
+ +         * Since it requires (possibly expensive) global communication,
+ +         * we might want to do DLB less frequently.
+ +         */
+ +        if (bBoxChanged || ir->epc != epcNO)
+ +        {
+ +            bDoDLB = bBoxChanged;
+ +        }
+ +        else
+ +        {
+ +            bDoDLB = bNStGlobalComm;
+ +        }
+ +    }
+ +
+ +    /* Check if we have recorded loads on the nodes */
+ +    if (comm->bRecordLoad && dd_load_count(comm))
+ +    {
+ +        if (comm->eDLB == edlbAUTO && !comm->bDynLoadBal)
+ +        {
+ +            /* Check if we should use DLB at the second partitioning
+ +             * and every 100 partitionings,
+ +             * so the extra communication cost is negligible.
+ +             */
+ +            n = max(100,nstglobalcomm);
+ +            bCheckDLB = (comm->n_load_collect == 0 ||
+ +                         comm->n_load_have % n == n-1);
+ +        }
+ +        else
+ +        {
+ +            bCheckDLB = FALSE;
+ +        }
+ +        
+ +        /* Print load every nstlog, first and last step to the log file */
+ +        bLogLoad = ((ir->nstlog > 0 && step % ir->nstlog == 0) ||
+ +                    comm->n_load_collect == 0 ||
+ +                    (step + ir->nstlist > ir->init_step + ir->nsteps));
+ +
+ +        /* Avoid extra communication due to verbose screen output
+ +         * when nstglobalcomm is set.
+ +         */
+ +        if (bDoDLB || bLogLoad || bCheckDLB ||
+ +            (bVerbose && (ir->nstlist == 0 || nstglobalcomm <= ir->nstlist)))
+ +        {
+ +            get_load_distribution(dd,wcycle);
+ +            if (DDMASTER(dd))
+ +            {
+ +                if (bLogLoad)
+ +                {
+ +                    dd_print_load(fplog,dd,step-1);
+ +                }
+ +                if (bVerbose)
+ +                {
+ +                    dd_print_load_verbose(dd);
+ +                }
+ +            }
+ +            comm->n_load_collect++;
+ +
+ +            if (bCheckDLB) {
+ +                /* Since the timings are node dependent, the master decides */
+ +                if (DDMASTER(dd))
+ +                {
+ +                    bTurnOnDLB =
+ +                        (dd_force_imb_perf_loss(dd) >= DD_PERF_LOSS);
+ +                    if (debug)
+ +                    {
+ +                        fprintf(debug,"step %s, imb loss %f\n",
+ +                                gmx_step_str(step,sbuf),
+ +                                dd_force_imb_perf_loss(dd));
+ +                    }
+ +                }
+ +                dd_bcast(dd,sizeof(bTurnOnDLB),&bTurnOnDLB);
+ +                if (bTurnOnDLB)
+ +                {
+ +                    turn_on_dlb(fplog,cr,step);
+ +                    bDoDLB = TRUE;
+ +                }
+ +            }
+ +        }
+ +        comm->n_load_have++;
+ +    }
+ +
+ +    cgs_gl = &comm->cgs_gl;
+ +
+ +    bRedist = FALSE;
+ +    if (bMasterState)
+ +    {
+ +        /* Clear the old state */
+ +        clear_dd_indices(dd,0,0);
+ +
+ +        set_ddbox(dd,bMasterState,cr,ir,state_global->box,
+ +                  TRUE,cgs_gl,state_global->x,&ddbox);
+ +    
+ +        get_cg_distribution(fplog,step,dd,cgs_gl,
+ +                            state_global->box,&ddbox,state_global->x);
+ +        
+ +        dd_distribute_state(dd,cgs_gl,
+ +                            state_global,state_local,f);
+ +        
+ +        dd_make_local_cgs(dd,&top_local->cgs);
+ +        
+ +        if (dd->ncg_home > fr->cg_nalloc)
+ +        {
+ +            dd_realloc_fr_cg(fr,dd->ncg_home);
+ +        }
+ +        calc_cgcm(fplog,0,dd->ncg_home,
+ +                  &top_local->cgs,state_local->x,fr->cg_cm);
+ +        
+ +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
+ +        
+ +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
+ +
+ +        cg0 = 0;
+ +    }
+ +    else if (state_local->ddp_count != dd->ddp_count)
+ +    {
+ +        if (state_local->ddp_count > dd->ddp_count)
+ +        {
+ +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count (%d) > dd->ddp_count (%d)",state_local->ddp_count,dd->ddp_count);
+ +        }
+ +        
+ +        if (state_local->ddp_count_cg_gl != state_local->ddp_count)
+ +        {
+ +            gmx_fatal(FARGS,"Internal inconsistency state_local->ddp_count_cg_gl (%d) != state_local->ddp_count (%d)",state_local->ddp_count_cg_gl,state_local->ddp_count);
+ +        }
+ +        
+ +        /* Clear the old state */
+ +        clear_dd_indices(dd,0,0);
+ +        
+ +        /* Build the new indices */
+ +        rebuild_cgindex(dd,cgs_gl->index,state_local);
+ +        make_dd_indices(dd,cgs_gl->index,0);
+ +        
+ +        /* Redetermine the cg COMs */
+ +        calc_cgcm(fplog,0,dd->ncg_home,
+ +                  &top_local->cgs,state_local->x,fr->cg_cm);
+ +        
+ +        inc_nrnb(nrnb,eNR_CGCM,dd->nat_home);
+ +
+ +        dd_set_cginfo(dd->index_gl,0,dd->ncg_home,fr,comm->bLocalCG);
+ +
+ +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
+ +                  TRUE,&top_local->cgs,state_local->x,&ddbox);
+ +
+ +        bRedist = comm->bDynLoadBal;
+ +    }
+ +    else
+ +    {
+ +        /* We have the full state, only redistribute the cgs */
+ +
+ +        /* Clear the non-home indices */
+ +        clear_dd_indices(dd,dd->ncg_home,dd->nat_home);
+ +
+ +        /* Avoid global communication for dim's without pbc and -gcom */
+ +        if (!bNStGlobalComm)
+ +        {
+ +            copy_rvec(comm->box0    ,ddbox.box0    );
+ +            copy_rvec(comm->box_size,ddbox.box_size);
+ +        }
+ +        set_ddbox(dd,bMasterState,cr,ir,state_local->box,
+ +                  bNStGlobalComm,&top_local->cgs,state_local->x,&ddbox);
+ +
+ +        bBoxChanged = TRUE;
+ +        bRedist = TRUE;
+ +    }
+ +    /* For dim's without pbc and -gcom */
+ +    copy_rvec(ddbox.box0    ,comm->box0    );
+ +    copy_rvec(ddbox.box_size,comm->box_size);
+ +    
+ +    set_dd_cell_sizes(dd,&ddbox,dynamic_dd_box(&ddbox,ir),bMasterState,bDoDLB,
+ +                      step,wcycle);
+ +    
+ +    if (comm->nstDDDumpGrid > 0 && step % comm->nstDDDumpGrid == 0)
+ +    {
+ +        write_dd_grid_pdb("dd_grid",step,dd,state_local->box,&ddbox);
+ +    }
+ +    
+ +    /* Check if we should sort the charge groups */
+ +    if (comm->nstSortCG > 0)
+ +    {
+ +        bSortCG = (bMasterState ||
+ +                   (bRedist && (step % comm->nstSortCG == 0)));
+ +    }
+ +    else
+ +    {
+ +        bSortCG = FALSE;
+ +    }
+ +
+ +    ncg_home_old = dd->ncg_home;
+ +
+ +    if (bRedist)
+ +    {
+ +        cg0 = dd_redistribute_cg(fplog,step,dd,ddbox.tric_dir,
+ +                                 state_local,f,fr,mdatoms,
+ +                                 !bSortCG,nrnb);
+ +    }
+ +    
+ +    get_nsgrid_boundaries(fr->ns.grid,dd,
+ +                          state_local->box,&ddbox,&comm->cell_x0,&comm->cell_x1,
+ +                          dd->ncg_home,fr->cg_cm,
+ +                          cell_ns_x0,cell_ns_x1,&grid_density);
+ +
+ +    if (bBoxChanged)
+ +    {
+ +        comm_dd_ns_cell_sizes(dd,&ddbox,cell_ns_x0,cell_ns_x1,step);
+ +    }
+ +
+ +    copy_ivec(fr->ns.grid->n,ncells_old);
+ +    grid_first(fplog,fr->ns.grid,dd,&ddbox,fr->ePBC,
+ +               state_local->box,cell_ns_x0,cell_ns_x1,
+ +               fr->rlistlong,grid_density);
+ +    /* We need to store tric_dir for dd_get_ns_ranges called from ns.c */
+ +    copy_ivec(ddbox.tric_dir,comm->tric_dir);
+ +
+ +    if (bSortCG)
+ +    {
+ +        /* Sort the state on charge group position.
+ +         * This enables exact restarts from this step.
+ +         * It also improves performance by about 15% with larger numbers
+ +         * of atoms per node.
+ +         */
+ +        
+ +        /* Fill the ns grid with the home cell,
+ +         * so we can sort with the indices.
+ +         */
+ +        set_zones_ncg_home(dd);
+ +        fill_grid(fplog,&comm->zones,fr->ns.grid,dd->ncg_home,
+ +                  0,dd->ncg_home,fr->cg_cm);
+ +        
+ +        /* Check if we can user the old order and ns grid cell indices
+ +         * of the charge groups to sort the charge groups efficiently.
+ +         */
+ +        bResortAll = (bMasterState ||
+ +                      fr->ns.grid->n[XX] != ncells_old[XX] ||
+ +                      fr->ns.grid->n[YY] != ncells_old[YY] ||
+ +                      fr->ns.grid->n[ZZ] != ncells_old[ZZ]);
+ +
+ +        if (debug)
+ +        {
+ +            fprintf(debug,"Step %s, sorting the %d home charge groups\n",
+ +                    gmx_step_str(step,sbuf),dd->ncg_home);
+ +        }
+ +        dd_sort_state(dd,ir->ePBC,fr->cg_cm,fr,state_local,
+ +                      bResortAll ? -1 : ncg_home_old);
+ +        /* Rebuild all the indices */
+ +        cg0 = 0;
+ +        ga2la_clear(dd->ga2la);
+ +    }
+ +    
+ +    /* Setup up the communication and communicate the coordinates */
+ +    setup_dd_communication(dd,state_local->box,&ddbox,fr);
+ +    
+ +    /* Set the indices */
+ +    make_dd_indices(dd,cgs_gl->index,cg0);
+ +
+ +    /* Set the charge group boundaries for neighbor searching */
+ +    set_cg_boundaries(&comm->zones);
+ +    
+ +    /*
+ +    write_dd_pdb("dd_home",step,"dump",top_global,cr,
+ +                 -1,state_local->x,state_local->box);
+ +    */
+ +    
+ +    /* Extract a local topology from the global topology */
+ +    for(i=0; i<dd->ndim; i++)
+ +    {
+ +        np[dd->dim[i]] = comm->cd[i].np;
+ +    }
+ +    dd_make_local_top(fplog,dd,&comm->zones,dd->npbcdim,state_local->box,
+ +                      comm->cellsize_min,np,
+ +                      fr,vsite,top_global,top_local);
+ +    
+ +    /* Set up the special atom communication */
+ +    n = comm->nat[ddnatZONE];
+ +    for(i=ddnatZONE+1; i<ddnatNR; i++)
+ +    {
+ +        switch(i)
+ +        {
+ +        case ddnatVSITE:
+ +            if (vsite && vsite->n_intercg_vsite)
+ +            {
+ +                n = dd_make_local_vsites(dd,n,top_local->idef.il);
+ +            }
+ +            break;
+ +        case ddnatCON:
+ +            if (dd->bInterCGcons)
+ +            {
+ +                /* Only for inter-cg constraints we need special code */
+ +                n = dd_make_local_constraints(dd,n,top_global,
+ +                                              constr,ir->nProjOrder,
+ +                                              &top_local->idef.il[F_CONSTR]);
+ +            }
+ +            break;
+ +        default:
+ +            gmx_incons("Unknown special atom type setup");
+ +        }
+ +        comm->nat[i] = n;
+ +    }
+ +    
+ +    /* Make space for the extra coordinates for virtual site
+ +     * or constraint communication.
+ +     */
+ +    state_local->natoms = comm->nat[ddnatNR-1];
+ +    if (state_local->natoms > state_local->nalloc)
+ +    {
+ +        dd_realloc_state(state_local,f,state_local->natoms);
+ +    }
+ +
+ +    if (fr->bF_NoVirSum)
+ +    {
+ +        if (vsite && vsite->n_intercg_vsite)
+ +        {
+ +            nat_f_novirsum = comm->nat[ddnatVSITE];
+ +        }
+ +        else
+ +        {
+ +            if (EEL_FULL(ir->coulombtype) && dd->n_intercg_excl > 0)
+ +            {
+ +                nat_f_novirsum = dd->nat_tot;
+ +            }
+ +            else
+ +            {
+ +                nat_f_novirsum = dd->nat_home;
+ +            }
+ +        }
+ +    }
+ +    else
+ +    {
+ +        nat_f_novirsum = 0;
+ +    }
+ +
+ +    /* Set the number of atoms required for the force calculation.
+ +     * Forces need to be constrained when using a twin-range setup
+ +     * or with energy minimization. For simple simulations we could
+ +     * avoid some allocation, zeroing and copying, but this is
+ +     * probably not worth the complications ande checking.
+ +     */
+ +    forcerec_set_ranges(fr,dd->ncg_home,dd->ncg_tot,
+ +                        dd->nat_tot,comm->nat[ddnatCON],nat_f_novirsum);
+ +
+ +    /* We make the all mdatoms up to nat_tot_con.
+ +     * We could save some work by only setting invmass
+ +     * between nat_tot and nat_tot_con.
+ +     */
+ +    /* This call also sets the new number of home particles to dd->nat_home */
+ +    atoms2md(top_global,ir,
+ +             comm->nat[ddnatCON],dd->gatindex,0,dd->nat_home,mdatoms);
+ +
+ +    /* Now we have the charges we can sort the FE interactions */
+ +    dd_sort_local_top(dd,mdatoms,top_local);
+ +
+ +    if (shellfc)
+ +    {
+ +        /* Make the local shell stuff, currently no communication is done */
+ +        make_local_shells(cr,mdatoms,shellfc);
+ +    }
+ +    
+ +      if (ir->implicit_solvent)
+ +    {
+ +        make_local_gb(cr,fr->born,ir->gb_algorithm);
+ +    }
+ +      
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Send the charges to our PME only node */
+ +        gmx_pme_send_q(cr,mdatoms->nChargePerturbed,
+ +                       mdatoms->chargeA,mdatoms->chargeB,
+ +                       dd_pme_maxshift_x(dd),dd_pme_maxshift_y(dd));
+ +    }
+ +    
+ +    if (constr)
+ +    {
+ +        set_constraints(constr,top_local,ir,mdatoms,cr);
+ +    }
+ +    
+ +    if (ir->ePull != epullNO)
+ +    {
+ +        /* Update the local pull groups */
+ +        dd_make_local_pull_groups(dd,ir->pull,mdatoms);
+ +    }
+ +    
+ +    if (ir->bRot)
+ +    {
+ +        /* Update the local rotation groups */
+ +        dd_make_local_rotation_groups(dd,ir->rot);
+ +    }
+ +
+ +
+ +    add_dd_statistics(dd);
+ +    
+ +    /* Make sure we only count the cycles for this DD partitioning */
+ +    clear_dd_cycle_counts(dd);
+ +    
+ +    /* Because the order of the atoms might have changed since
+ +     * the last vsite construction, we need to communicate the constructing
+ +     * atom coordinates again (for spreading the forces this MD step).
+ +     */
+ +    dd_move_x_vsites(dd,state_local->box,state_local->x);
+ +    
+ +    if (comm->nstDDDump > 0 && step % comm->nstDDDump == 0)
+ +    {
+ +        dd_move_x(dd,state_local->box,state_local->x);
+ +        write_dd_pdb("dd_dump",step,"dump",top_global,cr,
+ +                     -1,state_local->x,state_local->box);
+ +    }
+ +
+ +    if (bNStGlobalComm)
+ +    {
+ +        /* Store the global communication step */
+ +        comm->globalcomm_step = step;
+ +    }
+ +    
+ +    /* Increase the DD partitioning counter */
+ +    dd->ddp_count++;
+ +    /* The state currently matches this DD partitioning count, store it */
+ +    state_local->ddp_count = dd->ddp_count;
+ +    if (bMasterState)
+ +    {
+ +        /* The DD master node knows the complete cg distribution,
+ +         * store the count so we can possibly skip the cg info communication.
+ +         */
+ +        comm->master_cg_ddp_count = (bSortCG ? 0 : dd->ddp_count);
+ +    }
+ +
+ +    if (comm->DD_debug > 0)
+ +    {
+ +        /* Set the env var GMX_DD_DEBUG if you suspect corrupted indices */
+ +        check_index_consistency(dd,top_global->natoms,ncg_mtop(top_global),
+ +                                "after partitioning");
+ +    }
+ +}
diff --cc src/gromacs/mdlib/domdec_top.c
Simple merge
diff --cc src/gromacs/mdlib/edsam.c
Simple merge
diff --cc src/gromacs/mdlib/iteratedconstraints.c

index 0000000000000000000000000000000000000000,d0cd8b4737faf8a5f174a58a593c7961ff2398cb..d0cd8b4737faf8a5f174a58a593c7961ff2398cb

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/iteratedconstraints.c
+++ b/src/gromacs/mdlib/iteratedconstraints.c
diff --cc src/gromacs/mdlib/md_support.c

index 0000000000000000000000000000000000000000,7551a76d8c2b3213b574f02d7d7961962ffd8f0c..7551a76d8c2b3213b574f02d7d7961962ffd8f0c

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/md_support.c
+++ b/src/gromacs/mdlib/md_support.c
diff --cc src/gromacs/mdlib/mdebin.c

index d6f41810b15bfa1dc88d9518470112edc8a9a9df,0000000000000000000000000000000000000000..1d7ca41abdb206175d5ef492af54066fde924620

mode 100644,000000..100644
--- 1/src/gromacs/mdlib/mdebin.c
--- /dev/null
+++ b/src/gromacs/mdlib/mdebin.c
@@@ -1,1254 -1,0 +1,1254 @@@
-             setname[nsi++] = strdup(buf);
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROwing Monsters And Cloning Shrimps
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <string.h>
+ +#include <float.h>
+ +#include "typedefs.h"
+ +#include "string2.h"
+ +#include "mdebin.h"
+ +#include "smalloc.h"
+ +#include "physics.h"
+ +#include "enxio.h"
+ +#include "vec.h"
+ +#include "disre.h"
+ +#include "main.h"
+ +#include "network.h"
+ +#include "names.h"
+ +#include "orires.h"
+ +#include "constr.h"
+ +#include "mtop_util.h"
+ +#include "xvgr.h"
+ +#include "gmxfio.h"
+ +
+ +#include "mdebin_bar.h"
+ +
+ +
+ +static const char *conrmsd_nm[] = { "Constr. rmsd", "Constr.2 rmsd" };
+ +
+ +static const char *boxs_nm[] = { "Box-X", "Box-Y", "Box-Z" };
+ +
+ +static const char *tricl_boxs_nm[] = { 
+ +    "Box-XX", "Box-YY", "Box-ZZ",
+ +    "Box-YX", "Box-ZX", "Box-ZY" 
+ +};
+ +
+ +static const char *vol_nm[] = { "Volume" };
+ +
+ +static const char *dens_nm[] = {"Density" };
+ +
+ +static const char *pv_nm[] = {"pV" };
+ +
+ +static const char *enthalpy_nm[] = {"Enthalpy" };
+ +
+ +static const char *boxvel_nm[] = {
+ +    "Box-Vel-XX", "Box-Vel-YY", "Box-Vel-ZZ",
+ +    "Box-Vel-YX", "Box-Vel-ZX", "Box-Vel-ZY"
+ +};
+ +
+ +#define NBOXS asize(boxs_nm)
+ +#define NTRICLBOXS asize(tricl_boxs_nm)
+ +
+ +static gmx_bool bTricl,bDynBox;
+ +static int  f_nre=0,epc,etc,nCrmsd;
+ +
+ +
+ +
+ +
+ +
+ +t_mdebin *init_mdebin(ener_file_t fp_ene,
+ +                      const gmx_mtop_t *mtop,
+ +                      const t_inputrec *ir,
+ +                      FILE *fp_dhdl)
+ +{
+ +    const char *ener_nm[F_NRE];
+ +    static const char *vir_nm[] = {
+ +        "Vir-XX", "Vir-XY", "Vir-XZ",
+ +        "Vir-YX", "Vir-YY", "Vir-YZ",
+ +        "Vir-ZX", "Vir-ZY", "Vir-ZZ"
+ +    };
+ +    static const char *sv_nm[] = {
+ +        "ShakeVir-XX", "ShakeVir-XY", "ShakeVir-XZ",
+ +        "ShakeVir-YX", "ShakeVir-YY", "ShakeVir-YZ",
+ +        "ShakeVir-ZX", "ShakeVir-ZY", "ShakeVir-ZZ"
+ +    };
+ +    static const char *fv_nm[] = {
+ +        "ForceVir-XX", "ForceVir-XY", "ForceVir-XZ",
+ +        "ForceVir-YX", "ForceVir-YY", "ForceVir-YZ",
+ +        "ForceVir-ZX", "ForceVir-ZY", "ForceVir-ZZ"
+ +    };
+ +    static const char *pres_nm[] = {
+ +        "Pres-XX","Pres-XY","Pres-XZ",
+ +        "Pres-YX","Pres-YY","Pres-YZ",
+ +        "Pres-ZX","Pres-ZY","Pres-ZZ"
+ +    };
+ +    static const char *surft_nm[] = {
+ +        "#Surf*SurfTen"
+ +    };
+ +    static const char *mu_nm[] = {
+ +        "Mu-X", "Mu-Y", "Mu-Z"
+ +    };
+ +    static const char *vcos_nm[] = {
+ +        "2CosZ*Vel-X"
+ +    };
+ +    static const char *visc_nm[] = {
+ +        "1/Viscosity"
+ +    };
+ +    static const char *baro_nm[] = {
+ +        "Barostat"
+ +    };
+ +
+ +    char     **grpnms;
+ +    const gmx_groups_t *groups;
+ +    char     **gnm;
+ +    char     buf[256];
+ +    const char     *bufi;
+ +    t_mdebin *md;
+ +    int      i,j,ni,nj,n,nh,k,kk,ncon,nset;
+ +    gmx_bool     bBHAM,bNoseHoover,b14;
+ +
+ +    snew(md,1);
+ +
+ +    if (EI_DYNAMICS(ir->eI))
+ +    {
+ +        md->delta_t = ir->delta_t;
+ +    }
+ +    else
+ +    {
+ +        md->delta_t = 0;
+ +    }
+ +
+ +    groups = &mtop->groups;
+ +
+ +    bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
+ +    b14   = (gmx_mtop_ftype_count(mtop,F_LJ14) > 0 ||
+ +             gmx_mtop_ftype_count(mtop,F_LJC14_Q) > 0);
+ +
+ +    ncon = gmx_mtop_ftype_count(mtop,F_CONSTR);
+ +    nset = gmx_mtop_ftype_count(mtop,F_SETTLE);
+ +    md->bConstr    = (ncon > 0 || nset > 0);
+ +    md->bConstrVir = FALSE;
+ +    if (md->bConstr) {
+ +        if (ncon > 0 && ir->eConstrAlg == econtLINCS) {
+ +            if (ir->eI == eiSD2)
+ +                md->nCrmsd = 2;
+ +            else
+ +                md->nCrmsd = 1;
+ +        }
+ +        md->bConstrVir = (getenv("GMX_CONSTRAINTVIR") != NULL);
+ +    } else {
+ +        md->nCrmsd = 0;
+ +    }
+ +
+ +    /* Energy monitoring */
+ +    for(i=0;i<egNR;i++)
+ +    {
+ +        md->bEInd[i]=FALSE;
+ +    }
+ +
+ +#ifndef GMX_OPENMM
+ +    for(i=0; i<F_NRE; i++)
+ +    {
+ +        md->bEner[i] = FALSE;
+ +        if (i == F_LJ)
+ +            md->bEner[i] = !bBHAM;
+ +        else if (i == F_BHAM)
+ +            md->bEner[i] = bBHAM;
+ +        else if (i == F_EQM)
+ +            md->bEner[i] = ir->bQMMM;
+ +        else if (i == F_COUL_LR)
+ +            md->bEner[i] = (ir->rcoulomb > ir->rlist);
+ +        else if (i == F_LJ_LR)
+ +            md->bEner[i] = (!bBHAM && ir->rvdw > ir->rlist);
+ +        else if (i == F_BHAM_LR)
+ +            md->bEner[i] = (bBHAM && ir->rvdw > ir->rlist);
+ +        else if (i == F_RF_EXCL)
+ +            md->bEner[i] = (EEL_RF(ir->coulombtype) && ir->coulombtype != eelRF_NEC);
+ +        else if (i == F_COUL_RECIP)
+ +            md->bEner[i] = EEL_FULL(ir->coulombtype);
+ +        else if (i == F_LJ14)
+ +            md->bEner[i] = b14;
+ +        else if (i == F_COUL14)
+ +            md->bEner[i] = b14;
+ +        else if (i == F_LJC14_Q || i == F_LJC_PAIRS_NB)
+ +            md->bEner[i] = FALSE;
+ +        else if ((i == F_DVDL) || (i == F_DKDL))
+ +            md->bEner[i] = (ir->efep != efepNO);
+ +        else if (i == F_DHDL_CON)
+ +            md->bEner[i] = (ir->efep != efepNO && md->bConstr);
+ +        else if ((interaction_function[i].flags & IF_VSITE) ||
+ +                 (i == F_CONSTR) || (i == F_CONSTRNC) || (i == F_SETTLE))
+ +            md->bEner[i] = FALSE;
+ +        else if ((i == F_COUL_SR) || (i == F_EPOT) || (i == F_PRES)  || (i==F_EQM))
+ +            md->bEner[i] = TRUE;
+ +        else if ((i == F_GBPOL) && ir->implicit_solvent==eisGBSA)
+ +            md->bEner[i] = TRUE;
+ +        else if ((i == F_NPSOLVATION) && ir->implicit_solvent==eisGBSA && (ir->sa_algorithm != esaNO))
+ +            md->bEner[i] = TRUE;
+ +        else if ((i == F_GB12) || (i == F_GB13) || (i == F_GB14))
+ +            md->bEner[i] = FALSE;
+ +        else if ((i == F_ETOT) || (i == F_EKIN) || (i == F_TEMP))
+ +            md->bEner[i] = EI_DYNAMICS(ir->eI);
+ +        else if (i==F_VTEMP) 
+ +            md->bEner[i] =  (EI_DYNAMICS(ir->eI) && getenv("GMX_VIRIAL_TEMPERATURE"));
+ +        else if (i == F_DISPCORR || i == F_PDISPCORR)
+ +            md->bEner[i] = (ir->eDispCorr != edispcNO);
+ +        else if (i == F_DISRESVIOL)
+ +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_DISRES) > 0);
+ +        else if (i == F_ORIRESDEV)
+ +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0);
+ +        else if (i == F_CONNBONDS)
+ +            md->bEner[i] = FALSE;
+ +        else if (i == F_COM_PULL)
+ +            md->bEner[i] = (ir->ePull == epullUMBRELLA || ir->ePull == epullCONST_F || ir->bRot);
+ +        else if (i == F_ECONSERVED)
+ +            md->bEner[i] = ((ir->etc == etcNOSEHOOVER || ir->etc == etcVRESCALE) &&
+ +                            (ir->epc == epcNO || ir->epc==epcMTTK));
+ +        else
+ +            md->bEner[i] = (gmx_mtop_ftype_count(mtop,i) > 0);
+ +    }
+ +#else
+ +    /* OpenMM always produces only the following 4 energy terms */
+ +    md->bEner[F_EPOT] = TRUE;
+ +    md->bEner[F_EKIN] = TRUE;
+ +    md->bEner[F_ETOT] = TRUE;
+ +    md->bEner[F_TEMP] = TRUE;
+ +#endif
+ +
+ +    md->f_nre=0;
+ +    for(i=0; i<F_NRE; i++)
+ +    {
+ +        if (md->bEner[i])
+ +        {
+ +            /* FIXME: The constness should not be cast away */
+ +            /*ener_nm[f_nre]=(char *)interaction_function[i].longname;*/
+ +            ener_nm[md->f_nre]=interaction_function[i].longname;
+ +            md->f_nre++;
+ +        }
+ +    }
+ +
+ +    md->epc = ir->epc;
+ +    for (i=0;i<DIM;i++) 
+ +    {
+ +        for (j=0;j<DIM;j++) 
+ +        {
+ +            md->ref_p[i][j] = ir->ref_p[i][j];
+ +        }
+ +    }
+ +    md->bTricl = TRICLINIC(ir->compress) || TRICLINIC(ir->deform);
+ +    md->bDynBox = DYNAMIC_BOX(*ir);
+ +    md->etc = ir->etc;
+ +    md->bNHC_trotter = IR_NVT_TROTTER(ir);
+ +    md->bMTTK = IR_NPT_TROTTER(ir);
+ +
+ +    md->ebin  = mk_ebin();
+ +    /* Pass NULL for unit to let get_ebin_space determine the units
+ +     * for interaction_function[i].longname
+ +     */
+ +    md->ie    = get_ebin_space(md->ebin,md->f_nre,ener_nm,NULL);
+ +    if (md->nCrmsd)
+ +    {
+ +        /* This should be called directly after the call for md->ie,
+ +         * such that md->iconrmsd follows directly in the list.
+ +         */
+ +        md->iconrmsd = get_ebin_space(md->ebin,md->nCrmsd,conrmsd_nm,"");
+ +    }
+ +    if (md->bDynBox)
+ +    {
+ +        md->ib    = get_ebin_space(md->ebin, 
+ +                                   md->bTricl ? NTRICLBOXS : NBOXS, 
+ +                                   md->bTricl ? tricl_boxs_nm : boxs_nm,
+ +                                   unit_length);
+ +        md->ivol  = get_ebin_space(md->ebin, 1, vol_nm,  unit_volume);
+ +        md->idens = get_ebin_space(md->ebin, 1, dens_nm, unit_density_SI);
+ +        md->ipv   = get_ebin_space(md->ebin, 1, pv_nm,   unit_energy);
+ +        md->ienthalpy = get_ebin_space(md->ebin, 1, enthalpy_nm,   unit_energy);
+ +    }
+ +    if (md->bConstrVir)
+ +    {
+ +        md->isvir = get_ebin_space(md->ebin,asize(sv_nm),sv_nm,unit_energy);
+ +        md->ifvir = get_ebin_space(md->ebin,asize(fv_nm),fv_nm,unit_energy);
+ +    }
+ +    md->ivir   = get_ebin_space(md->ebin,asize(vir_nm),vir_nm,unit_energy);
+ +    md->ipres  = get_ebin_space(md->ebin,asize(pres_nm),pres_nm,unit_pres_bar);
+ +    md->isurft = get_ebin_space(md->ebin,asize(surft_nm),surft_nm,
+ +                                unit_surft_bar);
+ +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
+ +    {
+ +        md->ipc = get_ebin_space(md->ebin,md->bTricl ? 6 : 3,
+ +                                 boxvel_nm,unit_vel);
+ +    }
+ +    md->imu    = get_ebin_space(md->ebin,asize(mu_nm),mu_nm,unit_dipole_D);
+ +    if (ir->cos_accel != 0)
+ +    {
+ +        md->ivcos = get_ebin_space(md->ebin,asize(vcos_nm),vcos_nm,unit_vel);
+ +        md->ivisc = get_ebin_space(md->ebin,asize(visc_nm),visc_nm,
+ +                                   unit_invvisc_SI);
+ +    }
+ +
+ +    /* Energy monitoring */
+ +    for(i=0;i<egNR;i++)
+ +    {
+ +        md->bEInd[i] = FALSE;
+ +    }
+ +    md->bEInd[egCOULSR] = TRUE;
+ +    md->bEInd[egLJSR  ] = TRUE;
+ +
+ +    if (ir->rcoulomb > ir->rlist)
+ +    {
+ +        md->bEInd[egCOULLR] = TRUE;
+ +    }
+ +    if (!bBHAM)
+ +    {
+ +        if (ir->rvdw > ir->rlist)
+ +        {
+ +            md->bEInd[egLJLR]   = TRUE;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        md->bEInd[egLJSR]   = FALSE;
+ +        md->bEInd[egBHAMSR] = TRUE;
+ +        if (ir->rvdw > ir->rlist)
+ +        {
+ +            md->bEInd[egBHAMLR]   = TRUE;
+ +        }
+ +    }
+ +    if (b14)
+ +    {
+ +        md->bEInd[egLJ14] = TRUE;
+ +        md->bEInd[egCOUL14] = TRUE;
+ +    }
+ +    md->nEc=0;
+ +    for(i=0; (i<egNR); i++)
+ +    {
+ +        if (md->bEInd[i])
+ +        {
+ +            md->nEc++;
+ +        }
+ +    }
+ +
+ +    n=groups->grps[egcENER].nr;
+ +    md->nEg=n;
+ +    md->nE=(n*(n+1))/2;
+ +    snew(md->igrp,md->nE);
+ +    if (md->nE > 1)
+ +    {
+ +        n=0;
+ +        snew(gnm,md->nEc);
+ +        for(k=0; (k<md->nEc); k++)
+ +        {
+ +            snew(gnm[k],STRLEN);
+ +        }
+ +        for(i=0; (i<groups->grps[egcENER].nr); i++)
+ +        {
+ +            ni=groups->grps[egcENER].nm_ind[i];
+ +            for(j=i; (j<groups->grps[egcENER].nr); j++)
+ +            {
+ +                nj=groups->grps[egcENER].nm_ind[j];
+ +                for(k=kk=0; (k<egNR); k++)
+ +                {
+ +                    if (md->bEInd[k])
+ +                    {
+ +                        sprintf(gnm[kk],"%s:%s-%s",egrp_nm[k],
+ +                                *(groups->grpname[ni]),*(groups->grpname[nj]));
+ +                        kk++;
+ +                    }
+ +                }
+ +                md->igrp[n]=get_ebin_space(md->ebin,md->nEc,
+ +                                           (const char **)gnm,unit_energy);
+ +                n++;
+ +            }
+ +        }
+ +        for(k=0; (k<md->nEc); k++)
+ +        {
+ +            sfree(gnm[k]);
+ +        }
+ +        sfree(gnm);
+ +
+ +        if (n != md->nE)
+ +        {
+ +            gmx_incons("Number of energy terms wrong");
+ +        }
+ +    }
+ +
+ +    md->nTC=groups->grps[egcTC].nr;
+ +    md->nNHC = ir->opts.nhchainlength; /* shorthand for number of NH chains */ 
+ +    if (md->bMTTK)
+ +    {
+ +        md->nTCP = 1;  /* assume only one possible coupling system for barostat 
+ +                          for now */
+ +    } 
+ +    else 
+ +    {
+ +        md->nTCP = 0;
+ +    }
+ +
+ +    if (md->etc == etcNOSEHOOVER) {
+ +        if (md->bNHC_trotter) { 
+ +            md->mde_n = 2*md->nNHC*md->nTC;
+ +        }
+ +        else 
+ +        {
+ +            md->mde_n = 2*md->nTC;
+ +        }
+ +        if (md->epc == epcMTTK)
+ +        {
+ +            md->mdeb_n = 2*md->nNHC*md->nTCP;
+ +        }
+ +    } else { 
+ +        md->mde_n = md->nTC;
+ +        md->mdeb_n = 0;
+ +    }
+ +
+ +    snew(md->tmp_r,md->mde_n);
+ +    snew(md->tmp_v,md->mde_n);
+ +    snew(md->grpnms,md->mde_n);
+ +    grpnms = md->grpnms;
+ +
+ +    for(i=0; (i<md->nTC); i++)
+ +    {
+ +        ni=groups->grps[egcTC].nm_ind[i];
+ +        sprintf(buf,"T-%s",*(groups->grpname[ni]));
+ +        grpnms[i]=strdup(buf);
+ +    }
+ +    md->itemp=get_ebin_space(md->ebin,md->nTC,(const char **)grpnms,
+ +                             unit_temp_K);
+ +
+ +    bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); /* whether to print Nose-Hoover chains */
+ +
+ +    if (md->etc == etcNOSEHOOVER)
+ +    {
+ +        if (bNoseHoover) 
+ +        {
+ +            if (md->bNHC_trotter) 
+ +            {
+ +                for(i=0; (i<md->nTC); i++) 
+ +                {
+ +                    ni=groups->grps[egcTC].nm_ind[i];
+ +                    bufi = *(groups->grpname[ni]);
+ +                    for(j=0; (j<md->nNHC); j++) 
+ +                    {
+ +                        sprintf(buf,"Xi-%d-%s",j,bufi);
+ +                        grpnms[2*(i*md->nNHC+j)]=strdup(buf);
+ +                        sprintf(buf,"vXi-%d-%s",j,bufi);
+ +                        grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
+ +                    }
+ +                }
+ +                md->itc=get_ebin_space(md->ebin,md->mde_n,
+ +                                       (const char **)grpnms,unit_invtime);
+ +                if (md->bMTTK) 
+ +                {
+ +                    for(i=0; (i<md->nTCP); i++) 
+ +                    {
+ +                        bufi = baro_nm[0];  /* All barostat DOF's together for now. */
+ +                        for(j=0; (j<md->nNHC); j++) 
+ +                        {
+ +                            sprintf(buf,"Xi-%d-%s",j,bufi);
+ +                            grpnms[2*(i*md->nNHC+j)]=strdup(buf);
+ +                            sprintf(buf,"vXi-%d-%s",j,bufi);
+ +                            grpnms[2*(i*md->nNHC+j)+1]=strdup(buf);
+ +                        }
+ +                    }
+ +                    md->itcb=get_ebin_space(md->ebin,md->mdeb_n,
+ +                                            (const char **)grpnms,unit_invtime);
+ +                }
+ +            } 
+ +            else
+ +            {
+ +                for(i=0; (i<md->nTC); i++) 
+ +                {
+ +                    ni=groups->grps[egcTC].nm_ind[i];
+ +                    bufi = *(groups->grpname[ni]);
+ +                    sprintf(buf,"Xi-%s",bufi);
+ +                    grpnms[2*i]=strdup(buf);
+ +                    sprintf(buf,"vXi-%s",bufi);
+ +                    grpnms[2*i+1]=strdup(buf);
+ +                }
+ +                md->itc=get_ebin_space(md->ebin,md->mde_n,
+ +                                       (const char **)grpnms,unit_invtime);
+ +            }
+ +        }
+ +    }
+ +    else if (md->etc == etcBERENDSEN || md->etc == etcYES || 
+ +             md->etc == etcVRESCALE)
+ +    {
+ +        for(i=0; (i<md->nTC); i++)
+ +        {
+ +            ni=groups->grps[egcTC].nm_ind[i];
+ +            sprintf(buf,"Lamb-%s",*(groups->grpname[ni]));
+ +            grpnms[i]=strdup(buf);
+ +        }
+ +        md->itc=get_ebin_space(md->ebin,md->mde_n,(const char **)grpnms,"");
+ +    }
+ +
+ +    sfree(grpnms);
+ +
+ +
+ +    md->nU=groups->grps[egcACC].nr;
+ +    if (md->nU > 1)
+ +    {
+ +        snew(grpnms,3*md->nU);
+ +        for(i=0; (i<md->nU); i++)
+ +        {
+ +            ni=groups->grps[egcACC].nm_ind[i];
+ +            sprintf(buf,"Ux-%s",*(groups->grpname[ni]));
+ +            grpnms[3*i+XX]=strdup(buf);
+ +            sprintf(buf,"Uy-%s",*(groups->grpname[ni]));
+ +            grpnms[3*i+YY]=strdup(buf);
+ +            sprintf(buf,"Uz-%s",*(groups->grpname[ni]));
+ +            grpnms[3*i+ZZ]=strdup(buf);
+ +        }
+ +        md->iu=get_ebin_space(md->ebin,3*md->nU,(const char **)grpnms,unit_vel);
+ +        sfree(grpnms);
+ +    }
+ +
+ +    if ( fp_ene )
+ +    {
+ +        do_enxnms(fp_ene,&md->ebin->nener,&md->ebin->enm);
+ +    }
+ +
+ +    md->print_grpnms=NULL;
+ +
+ +    /* check whether we're going to write dh histograms */
+ +    md->dhc=NULL; 
+ +    if (ir->separate_dhdl_file == sepdhdlfileNO )
+ +    {
+ +        int i;
+ +        snew(md->dhc, 1);
+ +
+ +        mde_delta_h_coll_init(md->dhc, ir);
+ +        md->fp_dhdl = NULL;
+ +    }
+ +    else
+ +    {
+ +        md->fp_dhdl = fp_dhdl;
+ +    }
+ +    md->dhdl_derivatives = (ir->dhdl_derivatives==dhdlderivativesYES);
+ +    return md;
+ +}
+ +
+ +FILE *open_dhdl(const char *filename,const t_inputrec *ir,
+ +                const output_env_t oenv)
+ +{
+ +    FILE *fp;
+ +    const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
+ +    char title[STRLEN],label_x[STRLEN],label_y[STRLEN];
+ +    char **setname;
+ +    char buf[STRLEN];
+ +
+ +    sprintf(label_x,"%s (%s)","Time",unit_time);
+ +    if (ir->n_flambda == 0)
+ +    {
+ +        sprintf(title,"%s",dhdl);
+ +        sprintf(label_y,"%s (%s %s)",
+ +                dhdl,unit_energy,"[\\lambda]\\S-1\\N");
+ +    }
+ +    else
+ +    {
+ +        sprintf(title,"%s, %s",dhdl,deltag);
+ +        sprintf(label_y,"(%s)",unit_energy);
+ +    }
+ +    fp = gmx_fio_fopen(filename,"w+");
+ +    xvgr_header(fp,title,label_x,label_y,exvggtXNY,oenv);
+ +
+ +    if (ir->delta_lambda == 0)
+ +    {
+ +        sprintf(buf,"T = %g (K), %s = %g",
+ +                ir->opts.ref_t[0],lambda,ir->init_lambda);
+ +    }
+ +    else
+ +    {
+ +        sprintf(buf,"T = %g (K)",
+ +                ir->opts.ref_t[0]);
+ +    }
+ +    xvgr_subtitle(fp,buf,oenv);
+ +
+ +    if (ir->n_flambda > 0)
+ +    {
+ +        int nsets,s,nsi=0;
+ +        /* g_bar has to determine the lambda values used in this simulation
+ +         * from this xvg legend.  */
+ +        nsets = ( (ir->dhdl_derivatives==dhdlderivativesYES) ? 1 : 0) + 
+ +                  ir->n_flambda;
+ +        snew(setname,nsets);
+ +        if (ir->dhdl_derivatives == dhdlderivativesYES)
+ +        {
+ +            sprintf(buf,"%s %s %g",dhdl,lambda,ir->init_lambda);
-             setname[nsi++] = strdup(buf);
++            setname[nsi++] = gmx_strdup(buf);
+ +        }
+ +        for(s=0; s<ir->n_flambda; s++)
+ +        {
+ +            sprintf(buf,"%s %s %g",deltag,lambda,ir->flambda[s]);
++            setname[nsi++] = gmx_strdup(buf);
+ +        }
+ +        xvgr_legend(fp,nsets,(const char**)setname,oenv);
+ +
+ +        for(s=0; s<nsets; s++)
+ +        {
+ +            sfree(setname[s]);
+ +        }
+ +        sfree(setname);
+ +    }
+ +
+ +    return fp;
+ +}
+ +
+ +static void copy_energy(t_mdebin *md, real e[],real ecpy[])
+ +{
+ +    int i,j;
+ +
+ +    for(i=j=0; (i<F_NRE); i++)
+ +        if (md->bEner[i])
+ +            ecpy[j++] = e[i];
+ +    if (j != md->f_nre) 
+ +        gmx_incons("Number of energy terms wrong");
+ +}
+ +
+ +void upd_mdebin(t_mdebin *md, gmx_bool write_dhdl,
+ +                gmx_bool bSum,
+ +                double time,
+ +                real tmass,
+ +                gmx_enerdata_t *enerd,
+ +                t_state *state,
+ +                matrix  box,
+ +                tensor svir,
+ +                tensor fvir,
+ +                tensor vir,
+ +                tensor pres,
+ +                gmx_ekindata_t *ekind,
+ +                rvec mu_tot,
+ +                gmx_constr_t constr)
+ +{
+ +    int    i,j,k,kk,m,n,gid;
+ +    real   crmsd[2],tmp6[6];
+ +    real   bs[NTRICLBOXS],vol,dens,pv,enthalpy;
+ +    real   eee[egNR];
+ +    real   ecopy[F_NRE];
+ +    real   tmp;
+ +    gmx_bool   bNoseHoover;
+ +
+ +    /* Do NOT use the box in the state variable, but the separate box provided
+ +     * as an argument. This is because we sometimes need to write the box from
+ +     * the last timestep to match the trajectory frames.
+ +     */
+ +    copy_energy(md, enerd->term,ecopy);
+ +    add_ebin(md->ebin,md->ie,md->f_nre,ecopy,bSum);
+ +    if (md->nCrmsd)
+ +    {
+ +        crmsd[0] = constr_rmsd(constr,FALSE);
+ +        if (md->nCrmsd > 1)
+ +        {
+ +            crmsd[1] = constr_rmsd(constr,TRUE);
+ +        }
+ +        add_ebin(md->ebin,md->iconrmsd,md->nCrmsd,crmsd,FALSE);
+ +    }
+ +    if (md->bDynBox)
+ +    {
+ +        int nboxs;
+ +        if(md->bTricl)
+ +        {
+ +            bs[0] = box[XX][XX];
+ +            bs[1] = box[YY][YY];
+ +            bs[2] = box[ZZ][ZZ];
+ +            bs[3] = box[YY][XX];
+ +            bs[4] = box[ZZ][XX];
+ +            bs[5] = box[ZZ][YY];
+ +            nboxs=NTRICLBOXS;
+ +        }
+ +        else
+ +        {
+ +            bs[0] = box[XX][XX];
+ +            bs[1] = box[YY][YY];
+ +            bs[2] = box[ZZ][ZZ];
+ +            nboxs=NBOXS;
+ +        }
+ +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
+ +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
+ +
+ +        /* This is pV (in kJ/mol).  The pressure is the reference pressure,
+ +           not the instantaneous pressure */  
+ +        pv = 0;
+ +        for (i=0;i<DIM;i++) 
+ +        {
+ +            for (j=0;j<DIM;j++) 
+ +            {
+ +                if (i>j) 
+ +                {
+ +                    pv += box[i][j]*md->ref_p[i][j]/PRESFAC;
+ +                } 
+ +                else 
+ +                {
+ +                    pv += box[j][i]*md->ref_p[j][i]/PRESFAC;
+ +                }
+ +            }
+ +        }
+ +
+ +        add_ebin(md->ebin,md->ib   ,nboxs,bs   ,bSum);
+ +        add_ebin(md->ebin,md->ivol ,1    ,&vol ,bSum);
+ +        add_ebin(md->ebin,md->idens,1    ,&dens,bSum);
+ +        add_ebin(md->ebin,md->ipv  ,1    ,&pv  ,bSum);
+ +        enthalpy = pv + enerd->term[F_ETOT];
+ +        add_ebin(md->ebin,md->ienthalpy  ,1    ,&enthalpy  ,bSum);
+ +    }
+ +    if (md->bConstrVir)
+ +    {
+ +        add_ebin(md->ebin,md->isvir,9,svir[0],bSum);
+ +        add_ebin(md->ebin,md->ifvir,9,fvir[0],bSum);
+ +    }
+ +    add_ebin(md->ebin,md->ivir,9,vir[0],bSum);
+ +    add_ebin(md->ebin,md->ipres,9,pres[0],bSum);
+ +    tmp = (pres[ZZ][ZZ]-(pres[XX][XX]+pres[YY][YY])*0.5)*box[ZZ][ZZ];
+ +    add_ebin(md->ebin,md->isurft,1,&tmp,bSum);
+ +    if (md->epc == epcPARRINELLORAHMAN || md->epc == epcMTTK)
+ +    {
+ +        tmp6[0] = state->boxv[XX][XX];
+ +        tmp6[1] = state->boxv[YY][YY];
+ +        tmp6[2] = state->boxv[ZZ][ZZ];
+ +        tmp6[3] = state->boxv[YY][XX];
+ +        tmp6[4] = state->boxv[ZZ][XX];
+ +        tmp6[5] = state->boxv[ZZ][YY];
+ +        add_ebin(md->ebin,md->ipc,md->bTricl ? 6 : 3,tmp6,bSum);
+ +    }
+ +    add_ebin(md->ebin,md->imu,3,mu_tot,bSum);
+ +    if (ekind && ekind->cosacc.cos_accel != 0)
+ +    {
+ +        vol  = box[XX][XX]*box[YY][YY]*box[ZZ][ZZ];
+ +        dens = (tmass*AMU)/(vol*NANO*NANO*NANO);
+ +        add_ebin(md->ebin,md->ivcos,1,&(ekind->cosacc.vcos),bSum);
+ +        /* 1/viscosity, unit 1/(kg m^-1 s^-1) */
+ +        tmp = 1/(ekind->cosacc.cos_accel/(ekind->cosacc.vcos*PICO)
+ +                 *dens*vol*sqr(box[ZZ][ZZ]*NANO/(2*M_PI)));
+ +        add_ebin(md->ebin,md->ivisc,1,&tmp,bSum);    
+ +    }
+ +    if (md->nE > 1)
+ +    {
+ +        n=0;
+ +        for(i=0; (i<md->nEg); i++)
+ +        {
+ +            for(j=i; (j<md->nEg); j++)
+ +            {
+ +                gid=GID(i,j,md->nEg);
+ +                for(k=kk=0; (k<egNR); k++)
+ +                {
+ +                    if (md->bEInd[k])
+ +                    {
+ +                        eee[kk++] = enerd->grpp.ener[k][gid];
+ +                    }
+ +                }
+ +                add_ebin(md->ebin,md->igrp[n],md->nEc,eee,bSum);
+ +                n++;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (ekind)
+ +    {
+ +        for(i=0; (i<md->nTC); i++)
+ +        {
+ +            md->tmp_r[i] = ekind->tcstat[i].T;
+ +        }
+ +        add_ebin(md->ebin,md->itemp,md->nTC,md->tmp_r,bSum);
+ +
+ +        /* whether to print Nose-Hoover chains: */
+ +        bNoseHoover = (getenv("GMX_NOSEHOOVER_CHAINS") != NULL); 
+ +
+ +        if (md->etc == etcNOSEHOOVER)
+ +        {
+ +            if (bNoseHoover) 
+ +            {
+ +                if (md->bNHC_trotter)
+ +                {
+ +                    for(i=0; (i<md->nTC); i++) 
+ +                    {
+ +                        for (j=0;j<md->nNHC;j++) 
+ +                        {
+ +                            k = i*md->nNHC+j;
+ +                            md->tmp_r[2*k] = state->nosehoover_xi[k];
+ +                            md->tmp_r[2*k+1] = state->nosehoover_vxi[k];
+ +                        }
+ +                    }
+ +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);      
+ +
+ +                    if (md->bMTTK) {
+ +                        for(i=0; (i<md->nTCP); i++) 
+ +                        {
+ +                            for (j=0;j<md->nNHC;j++) 
+ +                            {
+ +                                k = i*md->nNHC+j;
+ +                                md->tmp_r[2*k] = state->nhpres_xi[k];
+ +                                md->tmp_r[2*k+1] = state->nhpres_vxi[k];
+ +                            }
+ +                        }
+ +                        add_ebin(md->ebin,md->itcb,md->mdeb_n,md->tmp_r,bSum);      
+ +                    }
+ +
+ +                } 
+ +                else 
+ +                {
+ +                    for(i=0; (i<md->nTC); i++)
+ +                    {
+ +                        md->tmp_r[2*i] = state->nosehoover_xi[i];
+ +                        md->tmp_r[2*i+1] = state->nosehoover_vxi[i];
+ +                    }
+ +                    add_ebin(md->ebin,md->itc,md->mde_n,md->tmp_r,bSum);
+ +                }
+ +            }
+ +        }
+ +        else if (md->etc == etcBERENDSEN || md->etc == etcYES || 
+ +                 md->etc == etcVRESCALE)
+ +        {
+ +            for(i=0; (i<md->nTC); i++)
+ +            {
+ +                md->tmp_r[i] = ekind->tcstat[i].lambda;
+ +            }
+ +            add_ebin(md->ebin,md->itc,md->nTC,md->tmp_r,bSum);
+ +        }
+ +    }
+ +
+ +    if (ekind && md->nU > 1)
+ +    {
+ +        for(i=0; (i<md->nU); i++)
+ +        {
+ +            copy_rvec(ekind->grpstat[i].u,md->tmp_v[i]);
+ +        }
+ +        add_ebin(md->ebin,md->iu,3*md->nU,md->tmp_v[0],bSum);
+ +    }
+ +
+ +    ebin_increase_count(md->ebin,bSum);
+ +
+ +    /* BAR + thermodynamic integration values */
+ +    if (write_dhdl)
+ +    {
+ +        if (md->fp_dhdl)
+ +        {
+ +            fprintf(md->fp_dhdl,"%.4f", time);
+ +
+ +            if (md->dhdl_derivatives)
+ +            {
+ +                fprintf(md->fp_dhdl," %g", enerd->term[F_DVDL]+ 
+ +                                           enerd->term[F_DKDL]+
+ +                                           enerd->term[F_DHDL_CON]);
+ +            }
+ +            for(i=1; i<enerd->n_lambda; i++)
+ +            {
+ +                fprintf(md->fp_dhdl," %g",
+ +                        enerd->enerpart_lambda[i]-enerd->enerpart_lambda[0]);
+ +            }
+ +            fprintf(md->fp_dhdl,"\n");
+ +        }
+ +        /* and the binary BAR output */
+ +        if (md->dhc)
+ +        {
+ +            mde_delta_h_coll_add_dh(md->dhc, 
+ +                                    enerd->term[F_DVDL]+ enerd->term[F_DKDL]+
+ +                                    enerd->term[F_DHDL_CON],
+ +                                    enerd->enerpart_lambda, time, 
+ +                                    state->lambda);
+ +        }
+ +    }
+ +}
+ +
+ +void upd_mdebin_step(t_mdebin *md)
+ +{
+ +    ebin_increase_count(md->ebin,FALSE); 
+ +}
+ +
+ +static void npr(FILE *log,int n,char c)
+ +{
+ +    for(; (n>0); n--) fprintf(log,"%c",c);
+ +}
+ +
+ +static void pprint(FILE *log,const char *s,t_mdebin *md)
+ +{
+ +    char CHAR='#';
+ +    int  slen;
+ +    char buf1[22],buf2[22];
+ +
+ +    slen = strlen(s);
+ +    fprintf(log,"\t<======  ");
+ +    npr(log,slen,CHAR);
+ +    fprintf(log,"  ==>\n");
+ +    fprintf(log,"\t<====  %s  ====>\n",s);
+ +    fprintf(log,"\t<==  ");
+ +    npr(log,slen,CHAR);
+ +    fprintf(log,"  ======>\n\n");
+ +
+ +    fprintf(log,"\tStatistics over %s steps using %s frames\n",
+ +            gmx_step_str(md->ebin->nsteps_sim,buf1),
+ +            gmx_step_str(md->ebin->nsum_sim,buf2));
+ +    fprintf(log,"\n");
+ +}
+ +
+ +void print_ebin_header(FILE *log,gmx_large_int_t steps,double time,real lamb)
+ +{
+ +    char buf[22];
+ +
+ +    fprintf(log,"   %12s   %12s   %12s\n"
+ +            "   %12s   %12.5f   %12.5f\n\n",
+ +            "Step","Time","Lambda",gmx_step_str(steps,buf),time,lamb);
+ +}
+ +
+ +void print_ebin(ener_file_t fp_ene,gmx_bool bEne,gmx_bool bDR,gmx_bool bOR,
+ +                FILE *log,
+ +                gmx_large_int_t step,double time,
+ +                int mode,gmx_bool bCompact,
+ +                t_mdebin *md,t_fcdata *fcd,
+ +                gmx_groups_t *groups,t_grpopts *opts)
+ +{
+ +    /*static char **grpnms=NULL;*/
+ +    char        buf[246];
+ +    int         i,j,n,ni,nj,ndr,nor,b;
+ +    int         ndisre=0;
+ +    real        *disre_rm3tav, *disre_rt;
+ +
+ +    /* these are for the old-style blocks (1 subblock, only reals), because
+ +       there can be only one per ID for these */
+ +    int         nr[enxNR];
+ +    int         id[enxNR];
+ +    real        *block[enxNR];
+ +
+ +    /* temporary arrays for the lambda values to write out */
+ +    double      enxlambda_data[2]; 
+ +
+ +    t_enxframe  fr;
+ +
+ +    switch (mode)
+ +    {
+ +        case eprNORMAL:
+ +            init_enxframe(&fr);
+ +            fr.t            = time;
+ +            fr.step         = step;
+ +            fr.nsteps       = md->ebin->nsteps;
+ +            fr.dt           = md->delta_t;
+ +            fr.nsum         = md->ebin->nsum;
+ +            fr.nre          = (bEne) ? md->ebin->nener : 0;
+ +            fr.ener         = md->ebin->e;
+ +            ndisre          = bDR ? fcd->disres.npair : 0;
+ +            disre_rm3tav    = fcd->disres.rm3tav;
+ +            disre_rt        = fcd->disres.rt;
+ +            /* Optional additional old-style (real-only) blocks. */
+ +            for(i=0; i<enxNR; i++)
+ +            {
+ +                nr[i] = 0;
+ +            }
+ +            if (fcd->orires.nr > 0 && bOR)
+ +            {
+ +                diagonalize_orires_tensors(&(fcd->orires));
+ +                nr[enxOR]     = fcd->orires.nr;
+ +                block[enxOR]  = fcd->orires.otav;
+ +                id[enxOR]     = enxOR;
+ +                nr[enxORI]    = (fcd->orires.oinsl != fcd->orires.otav) ? 
+ +                          fcd->orires.nr : 0;
+ +                block[enxORI] = fcd->orires.oinsl;
+ +                id[enxORI]    = enxORI;
+ +                nr[enxORT]    = fcd->orires.nex*12;
+ +                block[enxORT] = fcd->orires.eig;
+ +                id[enxORT]    = enxORT;
+ +            }        
+ +
+ +            /* whether we are going to wrte anything out: */
+ +            if (fr.nre || ndisre || nr[enxOR] || nr[enxORI])
+ +            {
+ +
+ +                /* the old-style blocks go first */
+ +                fr.nblock = 0;
+ +                for(i=0; i<enxNR; i++)
+ +                {
+ +                    if (nr[i] > 0)
+ +                    {
+ +                        fr.nblock = i + 1;
+ +                    }
+ +                }
+ +                add_blocks_enxframe(&fr, fr.nblock);
+ +                for(b=0;b<fr.nblock;b++)
+ +                {
+ +                    add_subblocks_enxblock(&(fr.block[b]), 1);
+ +                    fr.block[b].id=id[b]; 
+ +                    fr.block[b].sub[0].nr = nr[b];
+ +#ifndef GMX_DOUBLE
+ +                    fr.block[b].sub[0].type = xdr_datatype_float;
+ +                    fr.block[b].sub[0].fval = block[b];
+ +#else
+ +                    fr.block[b].sub[0].type = xdr_datatype_double;
+ +                    fr.block[b].sub[0].dval = block[b];
+ +#endif
+ +                }
+ +
+ +                /* check for disre block & fill it. */
+ +                if (ndisre>0)
+ +                {
+ +                    int db = fr.nblock;
+ +                    fr.nblock+=1;
+ +                    add_blocks_enxframe(&fr, fr.nblock);
+ +
+ +                    add_subblocks_enxblock(&(fr.block[db]), 2);
+ +                    fr.block[db].id=enxDISRE;
+ +                    fr.block[db].sub[0].nr=ndisre;
+ +                    fr.block[db].sub[1].nr=ndisre;
+ +#ifndef GMX_DOUBLE
+ +                    fr.block[db].sub[0].type=xdr_datatype_float;
+ +                    fr.block[db].sub[1].type=xdr_datatype_float;
+ +                    fr.block[db].sub[0].fval=disre_rt;
+ +                    fr.block[db].sub[1].fval=disre_rm3tav;
+ +#else
+ +                    fr.block[db].sub[0].type=xdr_datatype_double;
+ +                    fr.block[db].sub[1].type=xdr_datatype_double;
+ +                    fr.block[db].sub[0].dval=disre_rt;
+ +                    fr.block[db].sub[1].dval=disre_rm3tav;
+ +#endif
+ +                }
+ +                /* here we can put new-style blocks */
+ +
+ +                /* Free energy perturbation blocks */
+ +                if (md->dhc)
+ +                {
+ +                    mde_delta_h_coll_handle_block(md->dhc, &fr, fr.nblock);
+ +                }
+ +
+ +                /* do the actual I/O */
+ +                do_enx(fp_ene,&fr);
+ +                gmx_fio_check_file_position(enx_file_pointer(fp_ene));
+ +                if (fr.nre)
+ +                {
+ +                    /* We have stored the sums, so reset the sum history */
+ +                    reset_ebin_sums(md->ebin);
+ +                }
+ +
+ +                /* we can now free & reset the data in the blocks */
+ +                if (md->dhc)
+ +                    mde_delta_h_coll_reset(md->dhc);
+ +            }
+ +            free_enxframe(&fr);
+ +            break;
+ +        case eprAVER:
+ +            if (log)
+ +            {
+ +                pprint(log,"A V E R A G E S",md);
+ +            }
+ +            break;
+ +        case eprRMS:
+ +            if (log)
+ +            {
+ +                pprint(log,"R M S - F L U C T U A T I O N S",md);
+ +            }
+ +            break;
+ +        default:
+ +            gmx_fatal(FARGS,"Invalid print mode (%d)",mode);
+ +    }
+ +
+ +    if (log)
+ +    {
+ +        for(i=0;i<opts->ngtc;i++)
+ +        {
+ +            if(opts->annealing[i]!=eannNO)
+ +            {
+ +                fprintf(log,"Current ref_t for group %s: %8.1f\n",
+ +                        *(groups->grpname[groups->grps[egcTC].nm_ind[i]]),
+ +                        opts->ref_t[i]);
+ +            }
+ +        }
+ +        if (mode==eprNORMAL && fcd->orires.nr>0)
+ +        {
+ +            print_orires_log(log,&(fcd->orires));
+ +        }
+ +        fprintf(log,"   Energies (%s)\n",unit_energy);
+ +        pr_ebin(log,md->ebin,md->ie,md->f_nre+md->nCrmsd,5,mode,TRUE);  
+ +        fprintf(log,"\n");
+ +
+ +        if (!bCompact)
+ +        {
+ +            if (md->bDynBox)
+ +            {
+ +                pr_ebin(log,md->ebin,md->ib, md->bTricl ? NTRICLBOXS : NBOXS,5,
+ +                        mode,TRUE);      
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->bConstrVir)
+ +            {
+ +                fprintf(log,"   Constraint Virial (%s)\n",unit_energy);
+ +                pr_ebin(log,md->ebin,md->isvir,9,3,mode,FALSE);  
+ +                fprintf(log,"\n");
+ +                fprintf(log,"   Force Virial (%s)\n",unit_energy);
+ +                pr_ebin(log,md->ebin,md->ifvir,9,3,mode,FALSE);  
+ +                fprintf(log,"\n");
+ +            }
+ +            fprintf(log,"   Total Virial (%s)\n",unit_energy);
+ +            pr_ebin(log,md->ebin,md->ivir,9,3,mode,FALSE);   
+ +            fprintf(log,"\n");
+ +            fprintf(log,"   Pressure (%s)\n",unit_pres_bar);
+ +            pr_ebin(log,md->ebin,md->ipres,9,3,mode,FALSE);  
+ +            fprintf(log,"\n");
+ +            fprintf(log,"   Total Dipole (%s)\n",unit_dipole_D);
+ +            pr_ebin(log,md->ebin,md->imu,3,3,mode,FALSE);    
+ +            fprintf(log,"\n");
+ +
+ +            if (md->nE > 1)
+ +            {
+ +                if (md->print_grpnms==NULL)
+ +                {
+ +                    snew(md->print_grpnms,md->nE);
+ +                    n=0;
+ +                    for(i=0; (i<md->nEg); i++)
+ +                    {
+ +                        ni=groups->grps[egcENER].nm_ind[i];
+ +                        for(j=i; (j<md->nEg); j++)
+ +                        {
+ +                            nj=groups->grps[egcENER].nm_ind[j];
+ +                            sprintf(buf,"%s-%s",*(groups->grpname[ni]),
+ +                                    *(groups->grpname[nj]));
+ +                            md->print_grpnms[n++]=strdup(buf);
+ +                        }
+ +                    }
+ +                }
+ +                sprintf(buf,"Epot (%s)",unit_energy);
+ +                fprintf(log,"%15s   ",buf);
+ +                for(i=0; (i<egNR); i++)
+ +                {
+ +                    if (md->bEInd[i])
+ +                    {
+ +                        fprintf(log,"%12s   ",egrp_nm[i]);
+ +                    }
+ +                }
+ +                fprintf(log,"\n");
+ +                for(i=0; (i<md->nE); i++)
+ +                {
+ +                    fprintf(log,"%15s",md->print_grpnms[i]);
+ +                    pr_ebin(log,md->ebin,md->igrp[i],md->nEc,md->nEc,mode,
+ +                            FALSE);
+ +                }
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->nTC > 1)
+ +            {
+ +                pr_ebin(log,md->ebin,md->itemp,md->nTC,4,mode,TRUE);
+ +                fprintf(log,"\n");
+ +            }
+ +            if (md->nU > 1)
+ +            {
+ +                fprintf(log,"%15s   %12s   %12s   %12s\n",
+ +                        "Group","Ux","Uy","Uz");
+ +                for(i=0; (i<md->nU); i++)
+ +                {
+ +                    ni=groups->grps[egcACC].nm_ind[i];
+ +                    fprintf(log,"%15s",*groups->grpname[ni]);
+ +                    pr_ebin(log,md->ebin,md->iu+3*i,3,3,mode,FALSE);
+ +                }
+ +                fprintf(log,"\n");
+ +            }
+ +        }
+ +    }
+ +
+ +}
+ +
+ +void update_energyhistory(energyhistory_t * enerhist,t_mdebin * mdebin)
+ +{
+ +    int i;
+ +
+ +    enerhist->nsteps     = mdebin->ebin->nsteps;
+ +    enerhist->nsum       = mdebin->ebin->nsum;
+ +    enerhist->nsteps_sim = mdebin->ebin->nsteps_sim;
+ +    enerhist->nsum_sim   = mdebin->ebin->nsum_sim;
+ +    enerhist->nener      = mdebin->ebin->nener;
+ +
+ +    if (mdebin->ebin->nsum > 0)
+ +    {
+ +        /* Check if we need to allocate first */
+ +        if(enerhist->ener_ave == NULL)
+ +        {
+ +            snew(enerhist->ener_ave,enerhist->nener);
+ +            snew(enerhist->ener_sum,enerhist->nener);
+ +        }
+ +
+ +        for(i=0;i<enerhist->nener;i++)
+ +        {
+ +            enerhist->ener_ave[i] = mdebin->ebin->e[i].eav;
+ +            enerhist->ener_sum[i] = mdebin->ebin->e[i].esum;
+ +        }
+ +    }
+ +
+ +    if (mdebin->ebin->nsum_sim > 0)
+ +    {
+ +        /* Check if we need to allocate first */
+ +        if(enerhist->ener_sum_sim == NULL)
+ +        {
+ +            snew(enerhist->ener_sum_sim,enerhist->nener);
+ +        }
+ +
+ +        for(i=0;i<enerhist->nener;i++)
+ +        {
+ +            enerhist->ener_sum_sim[i] = mdebin->ebin->e_sim[i].esum;
+ +        }
+ +    }
+ +    if (mdebin->dhc)
+ +    {
+ +        mde_delta_h_coll_update_energyhistory(mdebin->dhc, enerhist);
+ +    }
+ +}
+ +
+ +void restore_energyhistory_from_state(t_mdebin * mdebin,
+ +                                      energyhistory_t * enerhist)
+ +{
+ +    int i;
+ +
+ +    if ((enerhist->nsum > 0 || enerhist->nsum_sim > 0) &&
+ +        mdebin->ebin->nener != enerhist->nener)
+ +    {
+ +        gmx_fatal(FARGS,"Mismatch between number of energies in run input (%d) and checkpoint file (%d).",
+ +                  mdebin->ebin->nener,enerhist->nener);
+ +    }
+ +
+ +    mdebin->ebin->nsteps     = enerhist->nsteps;
+ +    mdebin->ebin->nsum       = enerhist->nsum;
+ +    mdebin->ebin->nsteps_sim = enerhist->nsteps_sim;
+ +    mdebin->ebin->nsum_sim   = enerhist->nsum_sim;
+ +
+ +    for(i=0; i<mdebin->ebin->nener; i++)
+ +    {
+ +        mdebin->ebin->e[i].eav  =
+ +                  (enerhist->nsum > 0 ? enerhist->ener_ave[i] : 0);
+ +        mdebin->ebin->e[i].esum =
+ +                  (enerhist->nsum > 0 ? enerhist->ener_sum[i] : 0);
+ +        mdebin->ebin->e_sim[i].esum =
+ +                  (enerhist->nsum_sim > 0 ? enerhist->ener_sum_sim[i] : 0);
+ +    }
+ +    if (mdebin->dhc)
+ +    {         
+ +        mde_delta_h_coll_restore_energyhistory(mdebin->dhc, enerhist);
+ +    }
+ +}
diff --cc src/gromacs/mdlib/nlistheuristics.c

index 0000000000000000000000000000000000000000,0848e24f2eaa83c00a73bef512fa07902ae69519..0848e24f2eaa83c00a73bef512fa07902ae69519

mode 000000,100644..100644
--- /dev/null
--- 2/src/mdlib/nlistheuristics.c
+++ b/src/gromacs/mdlib/nlistheuristics.c
diff --cc src/gromacs/mdlib/update.c
Simple merge
diff --cc src/programs/g_protonate/g_protonate.c
Simple merge
diff --cc src/programs/g_x2top/g_x2top.c
Simple merge
diff --cc src/programs/gmxdump/gmxdump.c
Simple merge
diff --cc src/programs/grompp/grompp.c

index 59d65f557f21a88cf801086125844319d71b96f5,0000000000000000000000000000000000000000..19c181fb3c2d6eb836a478b3105033698d17b28c

mode 100644,000000..100644
--- 1/src/programs/grompp/grompp.c
--- /dev/null
+++ b/src/programs/grompp/grompp.c
@@@ -1,1598 -1,0 +1,1600 @@@
-     "[TT]grompp[tt] uses a built-in preprocessor to resolve includes, macros ",
-     "etcetera. The preprocessor supports the following keywords:[BR]",
+ +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.03
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <sys/types.h>
+ +#include <math.h>
+ +#include <string.h>
+ +#include <errno.h>
+ +#include <limits.h>
+ +
+ +#include "sysstuff.h"
+ +#include "smalloc.h"
+ +#include "macros.h"
+ +#include "string2.h"
+ +#include "readir.h"
+ +#include "toputil.h"
+ +#include "topio.h"
+ +#include "confio.h"
+ +#include "copyrite.h"
+ +#include "readir.h"
+ +#include "symtab.h"
+ +#include "names.h"
+ +#include "grompp.h"
+ +#include "random.h"
+ +#include "vec.h"
+ +#include "futil.h"
+ +#include "statutil.h"
+ +#include "splitter.h"
+ +#include "sortwater.h"
+ +#include "convparm.h"
+ +#include "gmx_fatal.h"
+ +#include "warninp.h"
+ +#include "index.h"
+ +#include "gmxfio.h"
+ +#include "trnio.h"
+ +#include "tpxio.h"
+ +#include "vsite_parm.h"
+ +#include "txtdump.h"
+ +#include "calcgrid.h"
+ +#include "add_par.h"
+ +#include "enxio.h"
+ +#include "perf_est.h"
+ +#include "compute_io.h"
+ +#include "gpp_atomtype.h"
+ +#include "gpp_tomorse.h"
+ +#include "mtop_util.h"
+ +#include "genborn.h"
+ +
+ +static int rm_interactions(int ifunc,int nrmols,t_molinfo mols[])
+ +{
+ +  int  i,n;
+ +  
+ +  n=0;
+ +  /* For all the molecule types */
+ +  for(i=0; i<nrmols; i++) {
+ +    n += mols[i].plist[ifunc].nr;
+ +    mols[i].plist[ifunc].nr=0;
+ +  }
+ +  return n;
+ +}
+ +
+ +static int check_atom_names(const char *fn1, const char *fn2, 
+ +                          gmx_mtop_t *mtop, t_atoms *at)
+ +{
+ +  int mb,m,i,j,nmismatch;
+ +  t_atoms *tat;
+ +#define MAXMISMATCH 20
+ +
+ +  if (mtop->natoms != at->nr)
+ +    gmx_incons("comparing atom names");
+ +  
+ +  nmismatch=0;
+ +  i = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    tat = &mtop->moltype[mtop->molblock[mb].type].atoms;
+ +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
+ +      for(j=0; j < tat->nr; j++) {
+ +      if (strcmp( *(tat->atomname[j]) , *(at->atomname[i]) ) != 0) {
+ +        if (nmismatch < MAXMISMATCH) {
+ +          fprintf(stderr,
+ +                  "Warning: atom name %d in %s and %s does not match (%s - %s)\n",
+ +                  i+1, fn1, fn2, *(tat->atomname[j]), *(at->atomname[i]));
+ +        } else if (nmismatch == MAXMISMATCH) {
+ +          fprintf(stderr,"(more than %d non-matching atom names)\n",MAXMISMATCH);
+ +        }
+ +        nmismatch++;
+ +      }
+ +      i++;
+ +      }
+ +    }
+ +  }
+ +
+ +  return nmismatch;
+ +}
+ +
+ +static void check_eg_vs_cg(gmx_mtop_t *mtop)
+ +{
+ +  int astart,mb,m,cg,j,firstj;
+ +  unsigned char firsteg,eg;
+ +  gmx_moltype_t *molt;
+ +  
+ +  /* Go through all the charge groups and make sure all their
+ +   * atoms are in the same energy group.
+ +   */
+ +  
+ +  astart = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molt = &mtop->moltype[mtop->molblock[mb].type];
+ +    for(m=0; m<mtop->molblock[mb].nmol; m++) {
+ +      for(cg=0; cg<molt->cgs.nr;cg++) {
+ +      /* Get the energy group of the first atom in this charge group */
+ +      firstj = astart + molt->cgs.index[cg];
+ +      firsteg = ggrpnr(&mtop->groups,egcENER,firstj);
+ +      for(j=molt->cgs.index[cg]+1;j<molt->cgs.index[cg+1];j++) {
+ +        eg = ggrpnr(&mtop->groups,egcENER,astart+j);
+ +        if(eg != firsteg) {
+ +          gmx_fatal(FARGS,"atoms %d and %d in charge group %d of molecule type '%s' are in different energy groups",
+ +                    firstj+1,astart+j+1,cg+1,*molt->name);
+ +        }
+ +      }
+ +      }
+ +      astart += molt->atoms.nr;
+ +    }
+ +  }  
+ +}
+ +
+ +static void check_cg_sizes(const char *topfn,t_block *cgs,warninp_t wi)
+ +{
+ +    int  maxsize,cg;
+ +    char warn_buf[STRLEN];
+ +
+ +    maxsize = 0;
+ +    for(cg=0; cg<cgs->nr; cg++)
+ +    {
+ +        maxsize = max(maxsize,cgs->index[cg+1]-cgs->index[cg]);
+ +    }
+ +    
+ +    if (maxsize > MAX_CHARGEGROUP_SIZE)
+ +    {
+ +        gmx_fatal(FARGS,"The largest charge group contains %d atoms. The maximum is %d.",maxsize,MAX_CHARGEGROUP_SIZE);
+ +    }
+ +    else if (maxsize > 10)
+ +    {
+ +        set_warning_line(wi,topfn,-1);
+ +        sprintf(warn_buf,
+ +                "The largest charge group contains %d atoms.\n"
+ +                "Since atoms only see each other when the centers of geometry of the charge groups they belong to are within the cut-off distance, too large charge groups can lead to serious cut-off artifacts.\n"
+ +                "For efficiency and accuracy, charge group should consist of a few atoms.\n"
+ +                "For all-atom force fields use: CH3, CH2, CH, NH2, NH, OH, CO2, CO, etc.",
+ +                maxsize);
+ +        warning_note(wi,warn_buf);
+ +    }
+ +}
+ +
+ +static void check_bonds_timestep(gmx_mtop_t *mtop,double dt,warninp_t wi)
+ +{
+ +    /* This check is not intended to ensure accurate integration,
+ +     * rather it is to signal mistakes in the mdp settings.
+ +     * A common mistake is to forget to turn on constraints
+ +     * for MD after energy minimization with flexible bonds.
+ +     * This check can also detect too large time steps for flexible water
+ +     * models, but such errors will often be masked by the constraints
+ +     * mdp options, which turns flexible water into water with bond constraints,
+ +     * but without an angle constraint. Unfortunately such incorrect use
+ +     * of water models can not easily be detected without checking
+ +     * for specific model names.
+ +     *
+ +     * The stability limit of leap-frog or velocity verlet is 4.44 steps
+ +     * per oscillational period.
+ +     * But accurate bonds distributions are lost far before that limit.
+ +     * To allow relatively common schemes (although not common with Gromacs)
+ +     * of dt=1 fs without constraints and dt=2 fs with only H-bond constraints
+ +     * we set the note limit to 10.
+ +     */
+ +    int       min_steps_warn=5;
+ +    int       min_steps_note=10;
+ +    t_iparams *ip;
+ +    int       molt;
+ +    gmx_moltype_t *moltype,*w_moltype;
+ +    t_atom    *atom;
+ +    t_ilist   *ilist,*ilb,*ilc,*ils;
+ +    int       ftype;
+ +    int       i,a1,a2,w_a1,w_a2,j;
+ +    real      twopi2,limit2,fc,re,m1,m2,period2,w_period2;
+ +    gmx_bool  bFound,bWater,bWarn;
+ +    char      warn_buf[STRLEN];
+ +
+ +    ip = mtop->ffparams.iparams;
+ +
+ +    twopi2 = sqr(2*M_PI);
+ +
+ +    limit2 = sqr(min_steps_note*dt);
+ +
+ +    w_a1 = w_a2 = -1;
+ +    w_period2 = -1.0;
+ +    
+ +    w_moltype = NULL;
+ +    for(molt=0; molt<mtop->nmoltype; molt++)
+ +    {
+ +        moltype = &mtop->moltype[molt];
+ +        atom  = moltype->atoms.atom;
+ +        ilist = moltype->ilist;
+ +        ilc = &ilist[F_CONSTR];
+ +        ils = &ilist[F_SETTLE];
+ +        for(ftype=0; ftype<F_NRE; ftype++)
+ +        {
+ +            if (!(ftype == F_BONDS || ftype == F_G96BONDS || ftype == F_HARMONIC))
+ +            {
+ +                continue;
+ +            }
+ +            
+ +            ilb = &ilist[ftype];
+ +            for(i=0; i<ilb->nr; i+=3)
+ +            {
+ +                fc = ip[ilb->iatoms[i]].harmonic.krA;
+ +                re = ip[ilb->iatoms[i]].harmonic.rA;
+ +                if (ftype == F_G96BONDS)
+ +                {
+ +                    /* Convert squared sqaure fc to harmonic fc */
+ +                    fc = 2*fc*re;
+ +                }
+ +                a1 = ilb->iatoms[i+1];
+ +                a2 = ilb->iatoms[i+2];
+ +                m1 = atom[a1].m;
+ +                m2 = atom[a2].m;
+ +                if (fc > 0 && m1 > 0 && m2 > 0)
+ +                {
+ +                    period2 = twopi2*m1*m2/((m1 + m2)*fc);
+ +                }
+ +                else
+ +                {
+ +                    period2 = GMX_FLOAT_MAX;
+ +                }
+ +                if (debug)
+ +                {
+ +                    fprintf(debug,"fc %g m1 %g m2 %g period %g\n",
+ +                            fc,m1,m2,sqrt(period2));
+ +                }
+ +                if (period2 < limit2)
+ +                {
+ +                    bFound = FALSE;
+ +                    for(j=0; j<ilc->nr; j+=3)
+ +                    {
+ +                        if ((ilc->iatoms[j+1] == a1 && ilc->iatoms[j+2] == a2) ||
+ +                            (ilc->iatoms[j+1] == a2 && ilc->iatoms[j+2] == a1))
+ +                            {
+ +                                bFound = TRUE;
+ +                            }
+ +                        }
+ +                    for(j=0; j<ils->nr; j+=2)
+ +                    {
+ +                        if ((a1 >= ils->iatoms[j+1] && a1 < ils->iatoms[j+1]+3) &&
+ +                            (a2 >= ils->iatoms[j+1] && a2 < ils->iatoms[j+1]+3))
+ +                        {
+ +                            bFound = TRUE;
+ +                        }
+ +                    }
+ +                    if (!bFound &&
+ +                        (w_moltype == NULL || period2 < w_period2))
+ +                    {
+ +                        w_moltype = moltype;
+ +                        w_a1      = a1;
+ +                        w_a2      = a2;
+ +                        w_period2 = period2;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +    }
+ +    
+ +    if (w_moltype != NULL)
+ +    {
+ +        bWarn = (w_period2 < sqr(min_steps_warn*dt));
+ +        /* A check that would recognize most water models */
+ +        bWater = ((*w_moltype->atoms.atomname[0])[0] == 'O' &&
+ +                  w_moltype->atoms.nr <= 5);
+ +        sprintf(warn_buf,"The bond in molecule-type %s between atoms %d %s and %d %s has an estimated oscillational period of %.1e ps, which is less than %d times the time step of %.1e ps.\n"
+ +                "%s",
+ +                *w_moltype->name,
+ +                w_a1+1,*w_moltype->atoms.atomname[w_a1],
+ +                w_a2+1,*w_moltype->atoms.atomname[w_a2],
+ +                sqrt(w_period2),bWarn ? min_steps_warn : min_steps_note,dt,
+ +                bWater ?
+ +                "Maybe you asked for fexible water." :
+ +                "Maybe you forgot to change the constraints mdp option.");
+ +        if (bWarn)
+ +        {
+ +            warning(wi,warn_buf);
+ +        }
+ +        else
+ +        {
+ +            warning_note(wi,warn_buf);
+ +        }
+ +    }
+ +}
+ +
+ +static void check_vel(gmx_mtop_t *mtop,rvec v[])
+ +{
+ +  gmx_mtop_atomloop_all_t aloop;
+ +  t_atom *atom;
+ +  int a;
+ +
+ +  aloop = gmx_mtop_atomloop_all_init(mtop);
+ +  while (gmx_mtop_atomloop_all_next(aloop,&a,&atom)) {
+ +    if (atom->ptype == eptShell ||
+ +      atom->ptype == eptBond  ||
+ +      atom->ptype == eptVSite) {
+ +      clear_rvec(v[a]);
+ +    }
+ +  }
+ +}
+ +
+ +static gmx_bool nint_ftype(gmx_mtop_t *mtop,t_molinfo *mi,int ftype)
+ +{
+ +  int nint,mb;
+ +
+ +  nint = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    nint += mtop->molblock[mb].nmol*mi[mtop->molblock[mb].type].plist[ftype].nr;
+ +  }
+ +
+ +  return nint;
+ +}
+ +
+ +/* This routine reorders the molecule type array
+ + * in the order of use in the molblocks,
+ + * unused molecule types are deleted.
+ + */
+ +static void renumber_moltypes(gmx_mtop_t *sys,
+ +                            int *nmolinfo,t_molinfo **molinfo)
+ +{
+ +  int *order,norder,i;
+ +  int mb,mi;
+ +  t_molinfo *minew;
+ +
+ +  snew(order,*nmolinfo);
+ +  norder = 0;
+ +  for(mb=0; mb<sys->nmolblock; mb++) {
+ +    for(i=0; i<norder; i++) {
+ +      if (order[i] == sys->molblock[mb].type) {
+ +      break;
+ +      }
+ +    }
+ +    if (i == norder) {
+ +      /* This type did not occur yet, add it */
+ +      order[norder] = sys->molblock[mb].type;
+ +      /* Renumber the moltype in the topology */
+ +      norder++;
+ +    }
+ +    sys->molblock[mb].type = i;
+ +  }
+ +  
+ +  /* We still need to reorder the molinfo structs */
+ +  snew(minew,norder);
+ +  for(mi=0; mi<*nmolinfo; mi++) {
+ +    for(i=0; i<norder; i++) {
+ +      if (order[i] == mi) {
+ +      break;
+ +      }
+ +    }
+ +    if (i == norder) {
+ +      done_mi(&(*molinfo)[mi]);
+ +    } else {
+ +      minew[i] = (*molinfo)[mi];
+ +    }
+ +  }
+ +  sfree(*molinfo);
+ +
+ +  *nmolinfo = norder;
+ +  *molinfo  = minew;
+ +}
+ +
+ +static void molinfo2mtop(int nmi,t_molinfo *mi,gmx_mtop_t *mtop)
+ +{
+ +  int m;
+ +  gmx_moltype_t *molt;
+ +
+ +  mtop->nmoltype = nmi;
+ +  snew(mtop->moltype,nmi);
+ +  for(m=0; m<nmi; m++) {
+ +    molt = &mtop->moltype[m];
+ +    molt->name  = mi[m].name;
+ +    molt->atoms = mi[m].atoms;
+ +    /* ilists are copied later */
+ +    molt->cgs   = mi[m].cgs;
+ +    molt->excls = mi[m].excls;
+ +  }
+ +}
+ +
+ +static void
+ +new_status(const char *topfile,const char *topppfile,const char *confin,
+ +           t_gromppopts *opts,t_inputrec *ir,gmx_bool bZero,
+ +           gmx_bool bGenVel,gmx_bool bVerbose,t_state *state,
+ +           gpp_atomtype_t atype,gmx_mtop_t *sys,
+ +           int *nmi,t_molinfo **mi,t_params plist[],
+ +           int *comb,double *reppow,real *fudgeQQ,
+ +           gmx_bool bMorse,
+ +           warninp_t wi)
+ +{
+ +  t_molinfo   *molinfo=NULL;
+ +  int         nmolblock;
+ +  gmx_molblock_t *molblock,*molbs;
+ +  t_atoms     *confat;
+ +  int         mb,i,nrmols,nmismatch;
+ +  char        buf[STRLEN];
+ +  gmx_bool        bGB=FALSE;
+ +  char        warn_buf[STRLEN];
+ +
+ +  init_mtop(sys);
+ +
+ +  /* Set gmx_boolean for GB */
+ +  if(ir->implicit_solvent)
+ +    bGB=TRUE;
+ +  
+ +  /* TOPOLOGY processing */
+ +  sys->name = do_top(bVerbose,topfile,topppfile,opts,bZero,&(sys->symtab),
+ +                     plist,comb,reppow,fudgeQQ,
+ +                     atype,&nrmols,&molinfo,ir,
+ +                     &nmolblock,&molblock,bGB,
+ +                     wi);
+ +  
+ +  sys->nmolblock = 0;
+ +  snew(sys->molblock,nmolblock);
+ +  
+ +  sys->natoms = 0;
+ +  for(mb=0; mb<nmolblock; mb++) {
+ +    if (sys->nmolblock > 0 &&
+ +      molblock[mb].type == sys->molblock[sys->nmolblock-1].type) {
+ +      /* Merge consecutive blocks with the same molecule type */
+ +      sys->molblock[sys->nmolblock-1].nmol += molblock[mb].nmol;
+ +      sys->natoms += molblock[mb].nmol*sys->molblock[sys->nmolblock-1].natoms_mol;
+ +    } else if (molblock[mb].nmol > 0) {
+ +      /* Add a new molblock to the topology */
+ +      molbs = &sys->molblock[sys->nmolblock];
+ +      *molbs = molblock[mb];
+ +      molbs->natoms_mol = molinfo[molbs->type].atoms.nr;
+ +      molbs->nposres_xA = 0;
+ +      molbs->nposres_xB = 0;
+ +      sys->natoms += molbs->nmol*molbs->natoms_mol;
+ +      sys->nmolblock++;
+ +    }
+ +  }
+ +  if (sys->nmolblock == 0) {
+ +    gmx_fatal(FARGS,"No molecules were defined in the system");
+ +  }
+ +
+ +  renumber_moltypes(sys,&nrmols,&molinfo);
+ +
+ +  if (bMorse)
+ +    convert_harmonics(nrmols,molinfo,atype);
+ +
+ +  if (ir->eDisre == edrNone) {
+ +    i = rm_interactions(F_DISRES,nrmols,molinfo);
+ +    if (i > 0) {
+ +      set_warning_line(wi,"unknown",-1);
+ +      sprintf(warn_buf,"disre = no, removed %d distance restraints",i);
+ +      warning_note(wi,warn_buf);
+ +    }
+ +  }
+ +  if (opts->bOrire == FALSE) {
+ +    i = rm_interactions(F_ORIRES,nrmols,molinfo);
+ +    if (i > 0) {
+ +      set_warning_line(wi,"unknown",-1);
+ +      sprintf(warn_buf,"orire = no, removed %d orientation restraints",i);
+ +      warning_note(wi,warn_buf);
+ +    }
+ +  }
+ +  if (opts->bDihre == FALSE) {
+ +    i = rm_interactions(F_DIHRES,nrmols,molinfo);
+ +    if (i > 0) {
+ +      set_warning_line(wi,"unknown",-1);
+ +      sprintf(warn_buf,"dihre = no, removed %d dihedral restraints",i);
+ +      warning_note(wi,warn_buf);
+ +    }
+ +  }
+ +  
+ +  /* Copy structures from msys to sys */
+ +  molinfo2mtop(nrmols,molinfo,sys);
+ +
+ +  gmx_mtop_finalize(sys);
+ + 
+ +  /* COORDINATE file processing */
+ +  if (bVerbose) 
+ +    fprintf(stderr,"processing coordinates...\n");
+ +
+ +  get_stx_coordnum(confin,&state->natoms);
+ +  if (state->natoms != sys->natoms)
+ +    gmx_fatal(FARGS,"number of coordinates in coordinate file (%s, %d)\n"
+ +              "             does not match topology (%s, %d)",
+ +            confin,state->natoms,topfile,sys->natoms);
+ +  else {
+ +    /* make space for coordinates and velocities */
+ +    char title[STRLEN];
+ +    snew(confat,1);
+ +    init_t_atoms(confat,state->natoms,FALSE);
+ +    init_state(state,state->natoms,0,0,0);
+ +    read_stx_conf(confin,title,confat,state->x,state->v,NULL,state->box);
+ +    /* This call fixes the box shape for runs with pressure scaling */
+ +    set_box_rel(ir,state);
+ +
+ +    nmismatch = check_atom_names(topfile, confin, sys, confat);
+ +    free_t_atoms(confat,TRUE);
+ +    sfree(confat);
+ +    
+ +    if (nmismatch) {
+ +      sprintf(buf,"%d non-matching atom name%s\n"
+ +            "atom names from %s will be used\n"
+ +            "atom names from %s will be ignored\n",
+ +            nmismatch,(nmismatch == 1) ? "" : "s",topfile,confin);
+ +      warning(wi,buf);
+ +    }    
+ +    if (bVerbose) 
+ +      fprintf(stderr,"double-checking input for internal consistency...\n");
+ +    double_check(ir,state->box,nint_ftype(sys,molinfo,F_CONSTR),wi);
+ +  }
+ +
+ +  if (bGenVel) {
+ +    real *mass;
+ +    gmx_mtop_atomloop_all_t aloop;
+ +    t_atom *atom;
+ +
+ +    snew(mass,state->natoms);
+ +    aloop = gmx_mtop_atomloop_all_init(sys);
+ +    while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ +      mass[i] = atom->m;
+ +    }
+ +
+ +    if (opts->seed == -1) {
+ +      opts->seed = make_seed();
+ +      fprintf(stderr,"Setting gen_seed to %d\n",opts->seed);
+ +    }
+ +    maxwell_speed(opts->tempi,opts->seed,sys,state->v);
+ +
+ +    stop_cm(stdout,state->natoms,mass,state->x,state->v);
+ +    sfree(mass);
+ +  }
+ +
+ +  *nmi = nrmols;
+ +  *mi  = molinfo;
+ +}
+ +
+ +static void copy_state(const char *slog,t_trxframe *fr,
+ +                       gmx_bool bReadVel,t_state *state,
+ +                       double *use_time)
+ +{
+ +    int i;
+ +
+ +    if (fr->not_ok & FRAME_NOT_OK)
+ +    {
+ +        gmx_fatal(FARGS,"Can not start from an incomplete frame");
+ +    }
+ +    if (!fr->bX)
+ +    {
+ +        gmx_fatal(FARGS,"Did not find a frame with coordinates in file %s",
+ +                  slog);
+ +    }
+ +
+ +    for(i=0; i<state->natoms; i++)
+ +    {
+ +        copy_rvec(fr->x[i],state->x[i]);
+ +    }
+ +    if (bReadVel)
+ +    {
+ +        if (!fr->bV)
+ +        {
+ +            gmx_incons("Trajecory frame unexpectedly does not contain velocities");
+ +        }
+ +        for(i=0; i<state->natoms; i++)
+ +        {
+ +            copy_rvec(fr->v[i],state->v[i]);
+ +        }
+ +    }
+ +    if (fr->bBox)
+ +    {
+ +        copy_mat(fr->box,state->box);
+ +    }
+ +
+ +    *use_time = fr->time;
+ +}
+ +
+ +static void cont_status(const char *slog,const char *ener,
+ +                      gmx_bool bNeedVel,gmx_bool bGenVel, real fr_time,
+ +                      t_inputrec *ir,t_state *state,
+ +                      gmx_mtop_t *sys,
+ +                        const output_env_t oenv)
+ +     /* If fr_time == -1 read the last frame available which is complete */
+ +{
+ +    gmx_bool bReadVel;
+ +    t_trxframe  fr;
+ +    t_trxstatus *fp;
+ +    int i;
+ +    double use_time;
+ +
+ +    bReadVel = (bNeedVel && !bGenVel);
+ +
+ +    fprintf(stderr,
+ +            "Reading Coordinates%s and Box size from old trajectory\n",
+ +            bReadVel ? ", Velocities" : "");
+ +    if (fr_time == -1)
+ +    {
+ +        fprintf(stderr,"Will read whole trajectory\n");
+ +    }
+ +    else
+ +    {
+ +        fprintf(stderr,"Will read till time %g\n",fr_time);
+ +    }
+ +    if (!bReadVel)
+ +    {
+ +        if (bGenVel)
+ +        {
+ +            fprintf(stderr,"Velocities generated: "
+ +                    "ignoring velocities in input trajectory\n");
+ +        }
+ +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
+ +    }
+ +    else
+ +    {
+ +        read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X | TRX_NEED_V);
+ +        
+ +        if (!fr.bV)
+ +        {
+ +            fprintf(stderr,
+ +                    "\n"
+ +                    "WARNING: Did not find a frame with velocities in file %s,\n"
+ +                    "         all velocities will be set to zero!\n\n",slog);
+ +            for(i=0; i<sys->natoms; i++)
+ +            {
+ +                clear_rvec(state->v[i]);
+ +            }
+ +            close_trj(fp);
+ +            /* Search for a frame without velocities */
+ +            bReadVel = FALSE;
+ +            read_first_frame(oenv,&fp,slog,&fr,TRX_NEED_X);
+ +        }
+ +    }
+ +
+ +    state->natoms = fr.natoms;
+ +
+ +    if (sys->natoms != state->natoms)
+ +    {
+ +        gmx_fatal(FARGS,"Number of atoms in Topology "
+ +                  "is not the same as in Trajectory");
+ +    }
+ +    copy_state(slog,&fr,bReadVel,state,&use_time);
+ +
+ +    /* Find the appropriate frame */
+ +    while ((fr_time == -1 || fr.time < fr_time) &&
+ +           read_next_frame(oenv,fp,&fr))
+ +    {
+ +        copy_state(slog,&fr,bReadVel,state,&use_time);
+ +    }
+ +  
+ +    close_trj(fp);
+ +
+ +    /* Set the relative box lengths for preserving the box shape.
+ +     * Note that this call can lead to differences in the last bit
+ +     * with respect to using tpbconv to create a [TT].tpx[tt] file.
+ +     */
+ +    set_box_rel(ir,state);
+ +
+ +    fprintf(stderr,"Using frame at t = %g ps\n",use_time);
+ +    fprintf(stderr,"Starting time for run is %g ps\n",ir->init_t); 
+ +  
+ +    if ((ir->epc != epcNO  || ir->etc ==etcNOSEHOOVER) && ener)
+ +    {
+ +        get_enx_state(ener,use_time,&sys->groups,ir,state);
+ +        preserve_box_shape(ir,state->box_rel,state->boxv);
+ +    }
+ +}
+ +
+ +static void read_posres(gmx_mtop_t *mtop,t_molinfo *molinfo,gmx_bool bTopB,
+ +                        char *fn,
+ +                        int rc_scaling, int ePBC, 
+ +                        rvec com,
+ +                        warninp_t wi)
+ +{
+ +  gmx_bool   bFirst = TRUE;
+ +  rvec   *x,*v,*xp;
+ +  dvec   sum;
+ +  double totmass;
+ +  t_atoms dumat;
+ +  matrix box,invbox;
+ +  int    natoms,npbcdim=0;
+ +  char   warn_buf[STRLEN],title[STRLEN];
+ +  int    a,i,ai,j,k,mb,nat_molb;
+ +  gmx_molblock_t *molb;
+ +  t_params *pr;
+ +  t_atom *atom;
+ +
+ +  get_stx_coordnum(fn,&natoms);
+ +  if (natoms != mtop->natoms) {
+ +    sprintf(warn_buf,"The number of atoms in %s (%d) does not match the number of atoms in the topology (%d). Will assume that the first %d atoms in the topology and %s match.",fn,natoms,mtop->natoms,min(mtop->natoms,natoms),fn);
+ +    warning(wi,warn_buf);
+ +  }
+ +  snew(x,natoms);
+ +  snew(v,natoms);
+ +  init_t_atoms(&dumat,natoms,FALSE);
+ +  read_stx_conf(fn,title,&dumat,x,v,NULL,box);
+ +  
+ +  npbcdim = ePBC2npbcdim(ePBC);
+ +  clear_rvec(com);
+ +  if (rc_scaling != erscNO) {
+ +    copy_mat(box,invbox);
+ +    for(j=npbcdim; j<DIM; j++) {
+ +      clear_rvec(invbox[j]);
+ +      invbox[j][j] = 1;
+ +    }
+ +    m_inv_ur0(invbox,invbox);
+ +  }
+ +
+ +  /* Copy the reference coordinates to mtop */
+ +  clear_dvec(sum);
+ +  totmass = 0;
+ +  a = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    molb = &mtop->molblock[mb];
+ +    nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
+ +    pr = &(molinfo[molb->type].plist[F_POSRES]);
+ +    if (pr->nr > 0) {
+ +      atom = mtop->moltype[molb->type].atoms.atom;
+ +      for(i=0; (i<pr->nr); i++) {
+ +      ai=pr->param[i].AI;
+ +      if (ai >= natoms) {
+ +        gmx_fatal(FARGS,"Position restraint atom index (%d) in moltype '%s' is larger than number of atoms in %s (%d).\n",
+ +                  ai+1,*molinfo[molb->type].name,fn,natoms);
+ +      }
+ +      if (rc_scaling == erscCOM) {
+ +        /* Determine the center of mass of the posres reference coordinates */
+ +        for(j=0; j<npbcdim; j++) {
+ +          sum[j] += atom[ai].m*x[a+ai][j];
+ +        }
+ +        totmass  += atom[ai].m;
+ +      }
+ +      }
+ +      if (!bTopB) {
+ +      molb->nposres_xA = nat_molb;
+ +      snew(molb->posres_xA,molb->nposres_xA);
+ +      for(i=0; i<nat_molb; i++) {
+ +        copy_rvec(x[a+i],molb->posres_xA[i]);
+ +      }
+ +      } else {
+ +      molb->nposres_xB = nat_molb;
+ +      snew(molb->posres_xB,molb->nposres_xB);
+ +      for(i=0; i<nat_molb; i++) {
+ +        copy_rvec(x[a+i],molb->posres_xB[i]);
+ +      }
+ +      }
+ +    }
+ +    a += nat_molb;
+ +  }
+ +  if (rc_scaling == erscCOM) {
+ +    if (totmass == 0)
+ +      gmx_fatal(FARGS,"The total mass of the position restraint atoms is 0");
+ +    for(j=0; j<npbcdim; j++)
+ +      com[j] = sum[j]/totmass;
+ +    fprintf(stderr,"The center of mass of the position restraint coord's is %6.3f %6.3f %6.3f\n",com[XX],com[YY],com[ZZ]);
+ +  }
+ +
+ +  if (rc_scaling != erscNO) {
+ +    for(mb=0; mb<mtop->nmolblock; mb++) {
+ +      molb = &mtop->molblock[mb];
+ +      nat_molb = molb->nmol*mtop->moltype[molb->type].atoms.nr;
+ +      if (molb->nposres_xA > 0 || molb->nposres_xB > 0) {
+ +      xp = (!bTopB ? molb->posres_xA : molb->posres_xB);
+ +      for(i=0; i<nat_molb; i++) {
+ +        for(j=0; j<npbcdim; j++) {
+ +          if (rc_scaling == erscALL) {
+ +            /* Convert from Cartesian to crystal coordinates */
+ +            xp[i][j] *= invbox[j][j];
+ +            for(k=j+1; k<npbcdim; k++) {
+ +              xp[i][j] += invbox[k][j]*xp[i][k];
+ +            }
+ +          } else if (rc_scaling == erscCOM) {
+ +            /* Subtract the center of mass */
+ +            xp[i][j] -= com[j];
+ +          }
+ +        }
+ +      }
+ +      }
+ +    }
+ +
+ +    if (rc_scaling == erscCOM) {
+ +      /* Convert the COM from Cartesian to crystal coordinates */
+ +      for(j=0; j<npbcdim; j++) {
+ +      com[j] *= invbox[j][j];
+ +      for(k=j+1; k<npbcdim; k++) {
+ +        com[j] += invbox[k][j]*com[k];
+ +      }
+ +      }
+ +    }
+ +  }
+ +  
+ +  free_t_atoms(&dumat,TRUE);
+ +  sfree(x);
+ +  sfree(v);
+ +}
+ +
+ +static void gen_posres(gmx_mtop_t *mtop,t_molinfo *mi,
+ +                       char *fnA, char *fnB,
+ +                       int rc_scaling, int ePBC,
+ +                       rvec com, rvec comB,
+ +                       warninp_t wi)
+ +{
+ +  int i,j;
+ +
+ +  read_posres  (mtop,mi,FALSE,fnA,rc_scaling,ePBC,com,wi);
+ +  if (strcmp(fnA,fnB) != 0) {
+ +      read_posres(mtop,mi,TRUE ,fnB,rc_scaling,ePBC,comB,wi);
+ +  }
+ +}
+ +
+ +static void set_wall_atomtype(gpp_atomtype_t at,t_gromppopts *opts,
+ +                            t_inputrec *ir)
+ +{
+ +  int i;
+ +
+ +  if (ir->nwall > 0)
+ +    fprintf(stderr,"Searching the wall atom type(s)\n");
+ +  for(i=0; i<ir->nwall; i++)
+ +    ir->wall_atomtype[i] = get_atomtype_type(opts->wall_atomtype[i],at);
+ +}
+ +
+ +static int nrdf_internal(t_atoms *atoms)
+ +{
+ +  int i,nmass,nrdf;
+ +
+ +  nmass = 0;
+ +  for(i=0; i<atoms->nr; i++) {
+ +    /* Vsite ptype might not be set here yet, so also check the mass */
+ +    if ((atoms->atom[i].ptype == eptAtom ||
+ +       atoms->atom[i].ptype == eptNucleus)
+ +      && atoms->atom[i].m > 0) {
+ +      nmass++;
+ +    }
+ +  }
+ +  switch (nmass) {
+ +  case 0:  nrdf = 0; break;
+ +  case 1:  nrdf = 0; break;
+ +  case 2:  nrdf = 1; break;
+ +  default: nrdf = nmass*3 - 6; break;
+ +  }
+ +  
+ +  return nrdf;
+ +}
+ +
+ +void
+ +spline1d( double        dx,
+ +               double *      y,
+ +               int           n,
+ +               double *      u,
+ +               double *      y2 )
+ +{
+ +    int i;
+ +    double p,q;
+ +      
+ +    y2[0] = 0.0;
+ +    u[0]  = 0.0;
+ +      
+ +    for(i=1;i<n-1;i++)
+ +    {
+ +              p = 0.5*y2[i-1]+2.0;
+ +        y2[i] = -0.5/p;
+ +        q = (y[i+1]-2.0*y[i]+y[i-1])/dx;
+ +              u[i] = (3.0*q/dx-0.5*u[i-1])/p;
+ +    }
+ +      
+ +    y2[n-1] = 0.0;
+ +      
+ +    for(i=n-2;i>=0;i--)
+ +    {
+ +        y2[i] = y2[i]*y2[i+1]+u[i];
+ +    }
+ +}
+ +
+ +
+ +void
+ +interpolate1d( double     xmin,
+ +                        double     dx,
+ +                        double *   ya,
+ +                        double *   y2a,
+ +                        double     x,
+ +                        double *   y,
+ +                        double *   y1)
+ +{
+ +    int ix;
+ +    double a,b;
+ +      
+ +    ix = (x-xmin)/dx;
+ +      
+ +    a = (xmin+(ix+1)*dx-x)/dx;
+ +    b = (x-xmin-ix*dx)/dx;
+ +      
+ +    *y  = a*ya[ix]+b*ya[ix+1]+((a*a*a-a)*y2a[ix]+(b*b*b-b)*y2a[ix+1])*(dx*dx)/6.0;
+ +    *y1 = (ya[ix+1]-ya[ix])/dx-(3.0*a*a-1.0)/6.0*dx*y2a[ix]+(3.0*b*b-1.0)/6.0*dx*y2a[ix+1];
+ +}
+ +
+ +
+ +void
+ +setup_cmap (int              grid_spacing,
+ +                      int              nc,
+ +                      real *           grid ,
+ +                      gmx_cmap_t *     cmap_grid)
+ +{
+ +      double *tmp_u,*tmp_u2,*tmp_yy,*tmp_y1,*tmp_t2,*tmp_grid;
+ +      
+ +    int    i,j,k,ii,jj,kk,idx;
+ +      int    offset;
+ +    double dx,xmin,v,v1,v2,v12;
+ +    double phi,psi;
+ +      
+ +      snew(tmp_u,2*grid_spacing);
+ +      snew(tmp_u2,2*grid_spacing);
+ +      snew(tmp_yy,2*grid_spacing);
+ +      snew(tmp_y1,2*grid_spacing);
+ +      snew(tmp_t2,2*grid_spacing*2*grid_spacing);
+ +      snew(tmp_grid,2*grid_spacing*2*grid_spacing);
+ +      
+ +    dx = 360.0/grid_spacing;
+ +    xmin = -180.0-dx*grid_spacing/2;
+ +      
+ +      for(kk=0;kk<nc;kk++)
+ +      {
+ +              /* Compute an offset depending on which cmap we are using                                 
+ +               * Offset will be the map number multiplied with the grid_spacing * grid_spacing * 2      
+ +               */
+ +              offset = kk * grid_spacing * grid_spacing * 2;
+ +              
+ +              for(i=0;i<2*grid_spacing;i++)
+ +              {
+ +                      ii=(i+grid_spacing-grid_spacing/2)%grid_spacing;
+ +                      
+ +                      for(j=0;j<2*grid_spacing;j++)
+ +                      {
+ +                              jj=(j+grid_spacing-grid_spacing/2)%grid_spacing;
+ +                              tmp_grid[i*grid_spacing*2+j] = grid[offset+ii*grid_spacing+jj];
+ +                      }
+ +              }
+ +              
+ +              for(i=0;i<2*grid_spacing;i++)
+ +              {
+ +                      spline1d(dx,&(tmp_grid[2*grid_spacing*i]),2*grid_spacing,tmp_u,&(tmp_t2[2*grid_spacing*i]));
+ +              }
+ +              
+ +              for(i=grid_spacing/2;i<grid_spacing+grid_spacing/2;i++)
+ +              {
+ +                      ii = i-grid_spacing/2;
+ +                      phi = ii*dx-180.0;
+ +                      
+ +                      for(j=grid_spacing/2;j<grid_spacing+grid_spacing/2;j++)
+ +                      {
+ +                              jj = j-grid_spacing/2;
+ +                              psi = jj*dx-180.0;
+ +                              
+ +                              for(k=0;k<2*grid_spacing;k++)
+ +                              {
+ +                                      interpolate1d(xmin,dx,&(tmp_grid[2*grid_spacing*k]),
+ +                                                                &(tmp_t2[2*grid_spacing*k]),psi,&tmp_yy[k],&tmp_y1[k]);
+ +                              }
+ +                              
+ +                              spline1d(dx,tmp_yy,2*grid_spacing,tmp_u,tmp_u2);
+ +                              interpolate1d(xmin,dx,tmp_yy,tmp_u2,phi,&v,&v1);
+ +                              spline1d(dx,tmp_y1,2*grid_spacing,tmp_u,tmp_u2);
+ +                              interpolate1d(xmin,dx,tmp_y1,tmp_u2,phi,&v2,&v12);
+ +                              
+ +                              idx = ii*grid_spacing+jj;
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4] = grid[offset+ii*grid_spacing+jj];
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4+1] = v1;
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4+2] = v2;
+ +                              cmap_grid->cmapdata[kk].cmap[idx*4+3] = v12;
+ +                      }
+ +              }
+ +      }
+ +}                             
+ +                              
+ +void init_cmap_grid(gmx_cmap_t *cmap_grid, int ngrid, int grid_spacing)
+ +{
+ +      int i,k,nelem;
+ +      
+ +      cmap_grid->ngrid        = ngrid;
+ +      cmap_grid->grid_spacing = grid_spacing;
+ +      nelem                   = cmap_grid->grid_spacing*cmap_grid->grid_spacing;
+ +      
+ +      snew(cmap_grid->cmapdata,ngrid);
+ +      
+ +      for(i=0;i<cmap_grid->ngrid;i++)
+ +      {
+ +              snew(cmap_grid->cmapdata[i].cmap,4*nelem);
+ +      }
+ +}
+ +
+ +
+ +static int count_constraints(gmx_mtop_t *mtop,t_molinfo *mi,warninp_t wi)
+ +{
+ +  int count,count_mol,i,mb;
+ +  gmx_molblock_t *molb;
+ +  t_params *plist;
+ +  char buf[STRLEN];
+ +
+ +  count = 0;
+ +  for(mb=0; mb<mtop->nmolblock; mb++) {
+ +    count_mol = 0;
+ +    molb  = &mtop->molblock[mb];
+ +    plist = mi[molb->type].plist;
+ +      
+ +    for(i=0; i<F_NRE; i++) {
+ +      if (i == F_SETTLE)
+ +      count_mol += 3*plist[i].nr;
+ +      else if (interaction_function[i].flags & IF_CONSTRAINT)
+ +      count_mol += plist[i].nr;
+ +    }
+ +      
+ +    if (count_mol > nrdf_internal(&mi[molb->type].atoms)) {
+ +      sprintf(buf,
+ +            "Molecule type '%s' has %d constraints.\n"
+ +            "For stability and efficiency there should not be more constraints than internal number of degrees of freedom: %d.\n",
+ +            *mi[molb->type].name,count_mol,
+ +            nrdf_internal(&mi[molb->type].atoms));
+ +      warning(wi,buf);
+ +    }
+ +    count += molb->nmol*count_mol;
+ +  }
+ +
+ +  return count;
+ +}
+ +
+ +static void check_gbsa_params_charged(gmx_mtop_t *sys, gpp_atomtype_t atype)
+ +{
+ +    int i,nmiss,natoms,mt;
+ +    real q;
+ +    const t_atoms *atoms;
+ +  
+ +    nmiss = 0;
+ +    for(mt=0;mt<sys->nmoltype;mt++)
+ +    {
+ +        atoms  = &sys->moltype[mt].atoms;
+ +        natoms = atoms->nr;
+ +
+ +        for(i=0;i<natoms;i++)
+ +        {
+ +            q = atoms->atom[i].q;
+ +            if ((get_atomtype_radius(atoms->atom[i].type,atype)    == 0  ||
+ +                 get_atomtype_vol(atoms->atom[i].type,atype)       == 0  ||
+ +                 get_atomtype_surftens(atoms->atom[i].type,atype)  == 0  ||
+ +                 get_atomtype_gb_radius(atoms->atom[i].type,atype) == 0  ||
+ +                 get_atomtype_S_hct(atoms->atom[i].type,atype)     == 0) &&
+ +                q != 0)
+ +            {
+ +                fprintf(stderr,"\nGB parameter(s) zero for atom type '%s' while charge is %g\n",
+ +                        get_atomtype_name(atoms->atom[i].type,atype),q);
+ +                nmiss++;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (nmiss > 0)
+ +    {
+ +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield has parameters with value zero for %d atomtypes that occur as charged atoms.",nmiss);
+ +    }
+ +}
+ +
+ +
+ +static void check_gbsa_params(t_inputrec *ir,gpp_atomtype_t atype)
+ +{
+ +    int  nmiss,i;
+ +
+ +    /* If we are doing GBSA, check that we got the parameters we need
+ +     * This checking is to see if there are GBSA paratmeters for all
+ +     * atoms in the force field. To go around this for testing purposes
+ +     * comment out the nerror++ counter temporarily
+ +     */
+ +    nmiss = 0;
+ +    for(i=0;i<get_atomtype_ntypes(atype);i++)
+ +    {
+ +        if (get_atomtype_radius(i,atype)    < 0 ||
+ +            get_atomtype_vol(i,atype)       < 0 ||
+ +            get_atomtype_surftens(i,atype)  < 0 ||
+ +            get_atomtype_gb_radius(i,atype) < 0 ||
+ +            get_atomtype_S_hct(i,atype)     < 0)
+ +        {
+ +            fprintf(stderr,"\nGB parameter(s) missing or negative for atom type '%s'\n",
+ +                    get_atomtype_name(i,atype));
+ +            nmiss++;
+ +        }
+ +    }
+ +    
+ +    if (nmiss > 0)
+ +    {
+ +        gmx_fatal(FARGS,"Can't do GB electrostatics; the implicit_genborn_params section of the forcefield is missing parameters for %d atomtypes or they might be negative.",nmiss);
+ +    }
+ +  
+ +}
+ +
+ +int main (int argc, char *argv[])
+ +{
+ +  static const char *desc[] = {
+ +    "The gromacs preprocessor",
+ +    "reads a molecular topology file, checks the validity of the",
+ +    "file, expands the topology from a molecular description to an atomic",
+ +    "description. The topology file contains information about",
+ +    "molecule types and the number of molecules, the preprocessor",
+ +    "copies each molecule as needed. ",
+ +    "There is no limitation on the number of molecule types. ",
+ +    "Bonds and bond-angles can be converted into constraints, separately",
+ +    "for hydrogens and heavy atoms.",
+ +    "Then a coordinate file is read and velocities can be generated",
+ +    "from a Maxwellian distribution if requested.",
+ +    "[TT]grompp[tt] also reads parameters for the [TT]mdrun[tt] ",
+ +    "(eg. number of MD steps, time step, cut-off), and others such as",
+ +    "NEMD parameters, which are corrected so that the net acceleration",
+ +    "is zero.",
+ +    "Eventually a binary file is produced that can serve as the sole input",
+ +    "file for the MD program.[PAR]",
+ +    
+ +    "[TT]grompp[tt] uses the atom names from the topology file. The atom names",
+ +    "in the coordinate file (option [TT]-c[tt]) are only read to generate",
+ +    "warnings when they do not match the atom names in the topology.",
+ +    "Note that the atom names are irrelevant for the simulation as",
+ +    "only the atom types are used for generating interaction parameters.[PAR]",
+ +
-     "#include <filename>[BR]",
++    "[TT]grompp[tt] uses a built-in preprocessor to resolve includes, macros, ",
++    "etc. The preprocessor supports the following keywords:[PAR]",
+ +    "#ifdef VARIABLE[BR]",
+ +    "#ifndef VARIABLE[BR]",
+ +    "#else[BR]",
+ +    "#endif[BR]",
+ +    "#define VARIABLE[BR]",
+ +    "#undef VARIABLE[BR]"
+ +    "#include \"filename\"[BR]",
-     "using the following two flags in your [TT]mdp[tt] file:[BR]",
-     "define = -DVARIABLE1 -DVARIABLE2[BR]",
-     "include = -I/home/john/doe[BR]",
++    "#include <filename>[PAR]",
+ +    "The functioning of these statements in your topology may be modulated by",
-     
-     "If your system does not have a c-preprocessor, you can still",
-     "use [TT]grompp[tt], but you do not have access to the features ",
-     "from the cpp. Command line options to the c-preprocessor can be given",
-     "in the [TT].mdp[tt] file. See your local manual (man cpp).[PAR]",
++    "using the following two flags in your [TT].mdp[tt] file:[PAR]",
++    "[TT]define = -DVARIABLE1 -DVARIABLE2[BR]",
++    "include = -I/home/john/doe[tt][BR]",
+ +    "For further information a C-programming textbook may help you out.",
+ +    "Specifying the [TT]-pp[tt] flag will get the pre-processed",
+ +    "topology file written out so that you can verify its contents.[PAR]",
-     "[TT]grompp[tt] can be used to restart simulations preserving",
-     "continuity by supplying just a checkpoint file with [TT]-t[tt].",
++   
++    /* cpp has been unnecessary for some time, hasn't it?
++        "If your system does not have a C-preprocessor, you can still",
++        "use [TT]grompp[tt], but you do not have access to the features ",
++        "from the cpp. Command line options to the C-preprocessor can be given",
++        "in the [TT].mdp[tt] file. See your local manual (man cpp).[PAR]",
++    */
+ +    
+ +    "When using position restraints a file with restraint coordinates",
+ +    "can be supplied with [TT]-r[tt], otherwise restraining will be done",
+ +    "with respect to the conformation from the [TT]-c[tt] option.",
+ +    "For free energy calculation the the coordinates for the B topology",
+ +    "can be supplied with [TT]-rb[tt], otherwise they will be equal to",
+ +    "those of the A topology.[PAR]",
+ +    
+ +    "Starting coordinates can be read from trajectory with [TT]-t[tt].",
+ +    "The last frame with coordinates and velocities will be read,",
+ +    "unless the [TT]-time[tt] option is used. Only if this information",
+ +    "is absent will the coordinates in the [TT]-c[tt] file be used.",
+ +    "Note that these velocities will not be used when [TT]gen_vel = yes[tt]",
+ +    "in your [TT].mdp[tt] file. An energy file can be supplied with",
+ +    "[TT]-e[tt] to read Nose-Hoover and/or Parrinello-Rahman coupling",
+ +    "variables.[PAR]",
+ +
-     "To verify your run input file, please make notice of all warnings",
++    "[TT]grompp[tt] can be used to restart simulations (preserving",
++    "continuity) by supplying just a checkpoint file with [TT]-t[tt].",
+ +    "However, for simply changing the number of run steps to extend",
+ +    "a run, using [TT]tpbconv[tt] is more convenient than [TT]grompp[tt].",
+ +    "You then supply the old checkpoint file directly to [TT]mdrun[tt]",
+ +    "with [TT]-cpi[tt]. If you wish to change the ensemble or things",
+ +    "like output frequency, then supplying the checkpoint file to",
+ +    "[TT]grompp[tt] with [TT]-t[tt] along with a new [TT].mdp[tt] file",
+ +    "with [TT]-f[tt] is the recommended procedure.[PAR]",
+ +
+ +    "By default, all bonded interactions which have constant energy due to",
+ +    "virtual site constructions will be removed. If this constant energy is",
+ +    "not zero, this will result in a shift in the total energy. All bonded",
+ +    "interactions can be kept by turning off [TT]-rmvsbds[tt]. Additionally,",
+ +    "all constraints for distances which will be constant anyway because",
+ +    "of virtual site constructions will be removed. If any constraints remain",
+ +    "which involve virtual sites, a fatal error will result.[PAR]"
+ +    
-     "of the [TT]mdout.mdp[tt] file, this contains comment lines, as well as",
-     "the input that [TT]grompp[tt] has read. If in doubt you can start grompp",
++    "To verify your run input file, please take note of all warnings",
+ +    "on the screen, and correct where necessary. Do also look at the contents",
++    "of the [TT]mdout.mdp[tt] file; this contains comment lines, as well as",
++    "the input that [TT]grompp[tt] has read. If in doubt, you can start [TT]grompp[tt]",
+ +    "with the [TT]-debug[tt] option which will give you more information",
+ +    "in a file called [TT]grompp.log[tt] (along with real debug info). You",
+ +    "can see the contents of the run input file with the [TT]gmxdump[tt]",
+ +    "program. [TT]gmxcheck[tt] can be used to compare the contents of two",
+ +    "run input files.[PAR]"
+ +
+ +    "The [TT]-maxwarn[tt] option can be used to override warnings printed",
+ +    "by [TT]grompp[tt] that otherwise halt output. In some cases, warnings are",
+ +    "harmless, but usually they are not. The user is advised to carefully",
+ +    "interpret the output messages before attempting to bypass them with",
+ +    "this option."
+ +  };
+ +  t_gromppopts *opts;
+ +  gmx_mtop_t   *sys;
+ +  int          nmi;
+ +  t_molinfo    *mi;
+ +  gpp_atomtype_t atype;
+ +  t_inputrec   *ir;
+ +  int          natoms,nvsite,comb,mt;
+ +  t_params     *plist;
+ +  t_state      state;
+ +  matrix       box;
+ +  real         max_spacing,fudgeQQ;
+ +  double       reppow;
+ +  char         fn[STRLEN],fnB[STRLEN];
+ +  const char   *mdparin;
+ +  int          ntype;
+ +  gmx_bool         bNeedVel,bGenVel;
+ +  gmx_bool         have_atomnumber;
+ +  int            n12,n13,n14;
+ +  t_params     *gb_plist = NULL;
+ +  gmx_genborn_t *born = NULL;
+ +  output_env_t oenv;
+ +  gmx_bool         bVerbose = FALSE;
+ +  warninp_t    wi;
+ +  char         warn_buf[STRLEN];
+ +
+ +  t_filenm fnm[] = {
+ +    { efMDP, NULL,  NULL,        ffREAD  },
+ +    { efMDP, "-po", "mdout",     ffWRITE },
+ +    { efSTX, "-c",  NULL,        ffREAD  },
+ +    { efSTX, "-r",  NULL,        ffOPTRD },
+ +    { efSTX, "-rb", NULL,        ffOPTRD },
+ +    { efNDX, NULL,  NULL,        ffOPTRD },
+ +    { efTOP, NULL,  NULL,        ffREAD  },
+ +    { efTOP, "-pp", "processed", ffOPTWR },
+ +    { efTPX, "-o",  NULL,        ffWRITE },
+ +    { efTRN, "-t",  NULL,        ffOPTRD },
+ +    { efEDR, "-e",  NULL,        ffOPTRD },
+ +    { efTRN, "-ref","rotref",    ffOPTRW }
+ +  };
+ +#define NFILE asize(fnm)
+ +
+ +  /* Command line options */
+ +  static gmx_bool bRenum=TRUE;
+ +  static gmx_bool bRmVSBds=TRUE,bZero=FALSE;
+ +  static int  i,maxwarn=0;
+ +  static real fr_time=-1;
+ +  t_pargs pa[] = {
+ +    { "-v",       FALSE, etBOOL,{&bVerbose},  
+ +      "Be loud and noisy" },
+ +    { "-time",    FALSE, etREAL, {&fr_time},
+ +      "Take frame at or first after this time." },
+ +    { "-rmvsbds",FALSE, etBOOL, {&bRmVSBds},
+ +      "Remove constant bonded interactions with virtual sites" },
+ +    { "-maxwarn", FALSE, etINT,  {&maxwarn},
+ +      "Number of allowed warnings during input processing. Not for normal use and may generate unstable systems" },
+ +    { "-zero",    FALSE, etBOOL, {&bZero},
+ +      "Set parameters for bonded interactions without defaults to zero instead of generating an error" },
+ +    { "-renum",   FALSE, etBOOL, {&bRenum},
+ +      "Renumber atomtypes and minimize number of atomtypes" }
+ +  };
+ +  
+ +  CopyRight(stdout,argv[0]);
+ +  
+ +  /* Initiate some variables */
+ +  snew(ir,1);
+ +  snew(opts,1);
+ +  init_ir(ir,opts);
+ +  
+ +  /* Parse the command line */
+ +  parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,
+ +                    asize(desc),desc,0,NULL,&oenv);
+ +  
+ +  wi = init_warning(TRUE,maxwarn);
+ +  
+ +  /* PARAMETER file processing */
+ +  mdparin = opt2fn("-f",NFILE,fnm);
+ +  set_warning_line(wi,mdparin,-1);    
+ +  get_ir(mdparin,opt2fn("-po",NFILE,fnm),ir,opts,wi);
+ +  
+ +  if (bVerbose) 
+ +    fprintf(stderr,"checking input for internal consistency...\n");
+ +  check_ir(mdparin,ir,opts,wi);
+ +
+ +  if (ir->ld_seed == -1) {
+ +    ir->ld_seed = make_seed();
+ +    fprintf(stderr,"Setting the LD random seed to %d\n",ir->ld_seed);
+ +  }
+ +
+ +  bNeedVel = EI_STATE_VELOCITY(ir->eI);
+ +  bGenVel  = (bNeedVel && opts->bGenVel);
+ +
+ +  snew(plist,F_NRE);
+ +  init_plist(plist);
+ +  snew(sys,1);
+ +  atype = init_atomtype();
+ +  if (debug)
+ +    pr_symtab(debug,0,"Just opened",&sys->symtab);
+ +    
+ +  strcpy(fn,ftp2fn(efTOP,NFILE,fnm));
+ +  if (!gmx_fexist(fn)) 
+ +    gmx_fatal(FARGS,"%s does not exist",fn);
+ +  new_status(fn,opt2fn_null("-pp",NFILE,fnm),opt2fn("-c",NFILE,fnm),
+ +           opts,ir,bZero,bGenVel,bVerbose,&state,
+ +           atype,sys,&nmi,&mi,plist,&comb,&reppow,&fudgeQQ,
+ +           opts->bMorse,
+ +           wi);
+ +  
+ +  if (debug)
+ +    pr_symtab(debug,0,"After new_status",&sys->symtab);
+ +  
+ +  if (count_constraints(sys,mi,wi) && (ir->eConstrAlg == econtSHAKE)) {
+ +    if (ir->eI == eiCG || ir->eI == eiLBFGS) {
+ +        sprintf(warn_buf,"Can not do %s with %s, use %s",
+ +                EI(ir->eI),econstr_names[econtSHAKE],econstr_names[econtLINCS]);
+ +        warning_error(wi,warn_buf);
+ +    }
+ +    if (ir->bPeriodicMols) {
+ +        sprintf(warn_buf,"Can not do periodic molecules with %s, use %s",
+ +                econstr_names[econtSHAKE],econstr_names[econtLINCS]);
+ +        warning_error(wi,warn_buf);
+ +    }
+ +  }
+ +
+ +  /* If we are doing QM/MM, check that we got the atom numbers */
+ +  have_atomnumber = TRUE;
+ +  for (i=0; i<get_atomtype_ntypes(atype); i++) {
+ +    have_atomnumber = have_atomnumber && (get_atomtype_atomnumber(i,atype) >= 0);
+ +  }
+ +  if (!have_atomnumber && ir->bQMMM)
+ +  {
+ +      warning_error(wi,
+ +                    "\n"
+ +                    "It appears as if you are trying to run a QM/MM calculation, but the force\n"
+ +                    "field you are using does not contain atom numbers fields. This is an\n"
+ +                    "optional field (introduced in Gromacs 3.3) for general runs, but mandatory\n"
+ +                    "for QM/MM. The good news is that it is easy to add - put the atom number as\n"
+ +                    "an integer just before the mass column in ffXXXnb.itp.\n"
+ +                    "NB: United atoms have the same atom numbers as normal ones.\n\n"); 
+ +  }
+ +
+ +  /* Check for errors in the input now, since they might cause problems
+ +   * during processing further down.
+ +   */
+ +  check_warning_error(wi,FARGS);
+ +
+ +  if (opt2bSet("-r",NFILE,fnm))
+ +    sprintf(fn,"%s",opt2fn("-r",NFILE,fnm));
+ +  else
+ +    sprintf(fn,"%s",opt2fn("-c",NFILE,fnm));
+ +  if (opt2bSet("-rb",NFILE,fnm))
+ +    sprintf(fnB,"%s",opt2fn("-rb",NFILE,fnm));
+ +  else
+ +    strcpy(fnB,fn);
+ +
+ +    if (nint_ftype(sys,mi,F_POSRES) > 0)
+ +    {
+ +        if (bVerbose)
+ +        {
+ +            fprintf(stderr,"Reading position restraint coords from %s",fn);
+ +            if (strcmp(fn,fnB) == 0)
+ +            {
+ +                fprintf(stderr,"\n");
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr," and %s\n",fnB);
+ +                if (ir->efep != efepNO && ir->n_flambda > 0)
+ +                {
+ +                    warning_error(wi,"Can not change the position restraint reference coordinates with lambda togther with foreign lambda calculation.");
+ +                }
+ +            }
+ +        }
+ +        gen_posres(sys,mi,fn,fnB,
+ +                   ir->refcoord_scaling,ir->ePBC,
+ +                   ir->posres_com,ir->posres_comB,
+ +                   wi);
+ +    }
+ +              
+ +  nvsite = 0;
+ +  /* set parameters for virtual site construction (not for vsiten) */
+ +  for(mt=0; mt<sys->nmoltype; mt++) {
+ +    nvsite +=
+ +      set_vsites(bVerbose, &sys->moltype[mt].atoms, atype, mi[mt].plist);
+ +  }
+ +  /* now throw away all obsolete bonds, angles and dihedrals: */
+ +  /* note: constraints are ALWAYS removed */
+ +  if (nvsite) {
+ +    for(mt=0; mt<sys->nmoltype; mt++) {
+ +      clean_vsite_bondeds(mi[mt].plist,sys->moltype[mt].atoms.nr,bRmVSBds);
+ +    }
+ +  }
+ +  
+ +      /* If we are using CMAP, setup the pre-interpolation grid */
+ +      if(plist->ncmap>0)
+ +      {
+ +              init_cmap_grid(&sys->ffparams.cmap_grid, plist->nc, plist->grid_spacing);
+ +              setup_cmap(plist->grid_spacing, plist->nc, plist->cmap,&sys->ffparams.cmap_grid);
+ +      }
+ +      
+ +  set_wall_atomtype(atype,opts,ir);
+ +  if (bRenum) {
+ +    renum_atype(plist, sys, ir->wall_atomtype, atype, bVerbose);
+ +    ntype = get_atomtype_ntypes(atype);
+ +  }
+ +
+ +    if (ir->implicit_solvent != eisNO)
+ +    {
+ +        /* Now we have renumbered the atom types, we can check the GBSA params */
+ +        check_gbsa_params(ir,atype);
+ +      
+ +      /* Check that all atoms that have charge and/or LJ-parameters also have 
+ +       * sensible GB-parameters
+ +       */
+ +      check_gbsa_params_charged(sys,atype);
+ +    }
+ +
+ +      /* PELA: Copy the atomtype data to the topology atomtype list */
+ +      copy_atomtype_atomtypes(atype,&(sys->atomtypes));
+ +
+ +      if (debug)
+ +    pr_symtab(debug,0,"After renum_atype",&sys->symtab);
+ +
+ +  if (bVerbose) 
+ +    fprintf(stderr,"converting bonded parameters...\n");
+ +      
+ +  ntype = get_atomtype_ntypes(atype);
+ +  convert_params(ntype, plist, mi, comb, reppow, fudgeQQ, sys);
+ +      
+ +  if (debug)
+ +    pr_symtab(debug,0,"After convert_params",&sys->symtab);
+ +
+ +  /* set ptype to VSite for virtual sites */
+ +  for(mt=0; mt<sys->nmoltype; mt++) {
+ +    set_vsites_ptype(FALSE,&sys->moltype[mt]);
+ +  }
+ +  if (debug) {
+ +    pr_symtab(debug,0,"After virtual sites",&sys->symtab);
+ +  }
+ +  /* Check velocity for virtual sites and shells */
+ +  if (bGenVel) {
+ +    check_vel(sys,state.v);
+ +  }
+ +    
+ +  /* check masses */
+ +  check_mol(sys,wi);
+ +  
+ +  for(i=0; i<sys->nmoltype; i++) {
+ +      check_cg_sizes(ftp2fn(efTOP,NFILE,fnm),&sys->moltype[i].cgs,wi);
+ +  }
+ +
+ +  if (EI_DYNAMICS(ir->eI) && ir->eI != eiBD)
+ +  {
+ +      check_bonds_timestep(sys,ir->delta_t,wi);
+ +  }
+ +
+ +  if (EI_ENERGY_MINIMIZATION(ir->eI) && 0 == ir->nsteps)
+ +  {
+ +      warning_note(wi,"Zero-step energy minimization will alter the coordinates before calculating the energy. If you just want the energy of a single point, try zero-step MD (with unconstrained_start = yes). To do multiple single-point energy evaluations of different configurations of the same topology, use mdrun -rerun.");
+ +  }
+ +
+ +  check_warning_error(wi,FARGS);
+ +      
+ +  if (bVerbose) 
+ +    fprintf(stderr,"initialising group options...\n");
+ +  do_index(mdparin,ftp2fn_null(efNDX,NFILE,fnm),
+ +           sys,bVerbose,ir,
+ +           bGenVel ? state.v : NULL,
+ +           wi);
+ +  
+ +  /* Init the temperature coupling state */
+ +  init_gtc_state(&state,ir->opts.ngtc,0,ir->opts.nhchainlength);
+ +
+ +  if (bVerbose)
+ +    fprintf(stderr,"Checking consistency between energy and charge groups...\n");
+ +  check_eg_vs_cg(sys);
+ +  
+ +  if (debug)
+ +    pr_symtab(debug,0,"After index",&sys->symtab);
+ +  triple_check(mdparin,ir,sys,wi);
+ +  close_symtab(&sys->symtab);
+ +  if (debug)
+ +    pr_symtab(debug,0,"After close",&sys->symtab);
+ +
+ +  /* make exclusions between QM atoms */
+ +  if (ir->bQMMM) {
+ +    generate_qmexcl(sys,ir);
+ +  }
+ +
+ +  if (ftp2bSet(efTRN,NFILE,fnm)) {
+ +    if (bVerbose)
+ +      fprintf(stderr,"getting data from old trajectory ...\n");
+ +    cont_status(ftp2fn(efTRN,NFILE,fnm),ftp2fn_null(efEDR,NFILE,fnm),
+ +              bNeedVel,bGenVel,fr_time,ir,&state,sys,oenv);
+ +  }
+ +
+ +    if (ir->ePBC==epbcXY && ir->nwall!=2)
+ +    {
+ +        clear_rvec(state.box[ZZ]);
+ +    }
+ +  
+ +    if (ir->rlist > 0)
+ +    {
+ +        set_warning_line(wi,mdparin,-1);
+ +        check_chargegroup_radii(sys,ir,state.x,wi);
+ +    }
+ +
+ +  if (EEL_FULL(ir->coulombtype)) {
+ +    /* Calculate the optimal grid dimensions */
+ +    copy_mat(state.box,box);
+ +    if (ir->ePBC==epbcXY && ir->nwall==2)
+ +      svmul(ir->wall_ewald_zfac,box[ZZ],box[ZZ]);
+ +    max_spacing = calc_grid(stdout,box,opts->fourierspacing,
+ +                            &(ir->nkx),&(ir->nky),&(ir->nkz));
+ +    if ((ir->coulombtype == eelPPPM) && (max_spacing > 0.1)) {
+ +        set_warning_line(wi,mdparin,-1);
+ +        warning_note(wi,"Grid spacing larger then 0.1 while using PPPM.");
+ +    }
+ +  }
+ +
+ +  if (ir->ePull != epullNO)
+ +    set_pull_init(ir,sys,state.x,state.box,oenv,opts->pull_start);
+ +  
+ +  if (ir->bRot)
+ +  {
+ +      set_reference_positions(ir->rot,sys,state.x,state.box,
+ +                              opt2fn("-ref",NFILE,fnm),opt2bSet("-ref",NFILE,fnm),
+ +                              wi);
+ +  }
+ +
+ +  /*  reset_multinr(sys); */
+ +  
+ +  if (EEL_PME(ir->coulombtype)) {
+ +    float ratio = pme_load_estimate(sys,ir,state.box);
+ +    fprintf(stderr,"Estimate for the relative computational load of the PME mesh part: %.2f\n",ratio);
+ +    /* With free energy we might need to do PME both for the A and B state
+ +     * charges. This will double the cost, but the optimal performance will
+ +     * then probably be at a slightly larger cut-off and grid spacing.
+ +     */
+ +    if ((ir->efep == efepNO && ratio > 1.0/2.0) ||
+ +        (ir->efep != efepNO && ratio > 2.0/3.0)) {
+ +        warning_note(wi,
+ +                     "The optimal PME mesh load for parallel simulations is below 0.5\n"
+ +                 "and for highly parallel simulations between 0.25 and 0.33,\n"
+ +                 "for higher performance, increase the cut-off and the PME grid spacing");
+ +    }
+ +  }
+ +
+ +    {
+ +        char warn_buf[STRLEN];
+ +        double cio = compute_io(ir,sys->natoms,&sys->groups,F_NRE,1);
+ +        sprintf(warn_buf,"This run will generate roughly %.0f Mb of data",cio);
+ +        if (cio > 2000) {
+ +            set_warning_line(wi,mdparin,-1);
+ +            warning_note(wi,warn_buf);
+ +        } else {
+ +            printf("%s\n",warn_buf);
+ +        }
+ +    }
+ +      
+ +  if (bVerbose) 
+ +    fprintf(stderr,"writing run input file...\n");
+ +
+ +  done_warning(wi,FARGS);
+ +
+ +  state.lambda = ir->init_lambda;
+ +  write_tpx_state(ftp2fn(efTPX,NFILE,fnm),ir,&state,sys);
+ +  
+ +  thanx(stderr);
+ +  
+ +  return 0;
+ +}
diff --cc src/programs/mdrun/md.c

index 206d1836bf25c6acbc9bca3a61cdb177fe8af2b4,0000000000000000000000000000000000000000..f640d099adc343970b778827bbbbd7ce09fe485b

mode 100644,000000..100644
--- 1/src/programs/mdrun/md.c
--- /dev/null
+++ b/src/programs/mdrun/md.c
@@@ -1,2809 -1,0 +1,1882 @@@
- #include <signal.h>
- #include <stdlib.h>
- 
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
- /* simulation conditions to transmit. Keep in mind that they are 
-    transmitted to other nodes through an MPI_Reduce after
-    casting them to a real (so the signals can be sent together with other 
-    data). This means that the only meaningful values are positive, 
-    negative or zero. */
- enum { eglsNABNSB, eglsCHKPT, eglsSTOPCOND, eglsRESETCOUNTERS, eglsNR };
- /* Is the signal in one simulation independent of other simulations? */
- gmx_bool gs_simlocal[eglsNR] = { TRUE, FALSE, FALSE, TRUE };
- 
- typedef struct {
-     int nstms;       /* The frequency for intersimulation communication */
-     int sig[eglsNR]; /* The signal set by one process in do_md */
-     int set[eglsNR]; /* The communicated signal, equal for all processes */
- } globsig_t;
- 
- 
- static int multisim_min(const gmx_multisim_t *ms,int nmin,int n)
- {
-     int  *buf;
-     gmx_bool bPos,bEqual;
-     int  s,d;
- 
-     snew(buf,ms->nsim);
-     buf[ms->sim] = n;
-     gmx_sumi_sim(ms->nsim,buf,ms);
-     bPos   = TRUE;
-     bEqual = TRUE;
-     for(s=0; s<ms->nsim; s++)
-     {
-         bPos   = bPos   && (buf[s] > 0);
-         bEqual = bEqual && (buf[s] == buf[0]);
-     }
-     if (bPos)
-     {
-         if (bEqual)
-         {
-             nmin = min(nmin,buf[0]);
-         }
-         else
-         {
-             /* Find the least common multiple */
-             for(d=2; d<nmin; d++)
-             {
-                 s = 0;
-                 while (s < ms->nsim && d % buf[s] == 0)
-                 {
-                     s++;
-                 }
-                 if (s == ms->nsim)
-                 {
-                     /* We found the LCM and it is less than nmin */
-                     nmin = d;
-                     break;
-                 }
-             }
-         }
-     }
-     sfree(buf);
- 
-     return nmin;
- }
- 
- static int multisim_nstsimsync(const t_commrec *cr,
-                                const t_inputrec *ir,int repl_ex_nst)
- {
-     int nmin;
- 
-     if (MASTER(cr))
-     {
-         nmin = INT_MAX;
-         nmin = multisim_min(cr->ms,nmin,ir->nstlist);
-         nmin = multisim_min(cr->ms,nmin,ir->nstcalcenergy);
-         nmin = multisim_min(cr->ms,nmin,repl_ex_nst);
-         if (nmin == INT_MAX)
-         {
-             gmx_fatal(FARGS,"Can not find an appropriate interval for inter-simulation communication, since nstlist, nstcalcenergy and -replex are all <= 0");
-         }
-         /* Avoid inter-simulation communication at every (second) step */
-         if (nmin <= 2)
-         {
-             nmin = 10;
-         }
-     }
- 
-     gmx_bcast(sizeof(int),&nmin,cr);
- 
-     return nmin;
- }
- 
- static void init_global_signals(globsig_t *gs,const t_commrec *cr,
-                                 const t_inputrec *ir,int repl_ex_nst)
- {
-     int i;
- 
-     if (MULTISIM(cr))
-     {
-         gs->nstms = multisim_nstsimsync(cr,ir,repl_ex_nst);
-         if (debug)
-         {
-             fprintf(debug,"Syncing simulations for checkpointing and termination every %d steps\n",gs->nstms);
-         }
-     }
-     else
-     {
-         gs->nstms = 1;
-     }
- 
-     for(i=0; i<eglsNR; i++)
-     {
-         gs->sig[i] = 0;
-         gs->set[i] = 0;
-     }
- }
- 
- static void copy_coupling_state(t_state *statea,t_state *stateb, 
-                                 gmx_ekindata_t *ekinda,gmx_ekindata_t *ekindb, t_grpopts* opts) 
- {
-     
-     /* MRS note -- might be able to get rid of some of the arguments.  Look over it when it's all debugged */
-     
-     int i,j,nc;
- 
-     /* Make sure we have enough space for x and v */
-     if (statea->nalloc > stateb->nalloc)
-     {
-         stateb->nalloc = statea->nalloc;
-         srenew(stateb->x,stateb->nalloc);
-         srenew(stateb->v,stateb->nalloc);
-     }
- 
-     stateb->natoms     = statea->natoms;
-     stateb->ngtc       = statea->ngtc;
-     stateb->nnhpres    = statea->nnhpres;
-     stateb->veta       = statea->veta;
-     if (ekinda) 
-     {
-         copy_mat(ekinda->ekin,ekindb->ekin);
-         for (i=0; i<stateb->ngtc; i++) 
-         {
-             ekindb->tcstat[i].T = ekinda->tcstat[i].T;
-             ekindb->tcstat[i].Th = ekinda->tcstat[i].Th;
-             copy_mat(ekinda->tcstat[i].ekinh,ekindb->tcstat[i].ekinh);
-             copy_mat(ekinda->tcstat[i].ekinf,ekindb->tcstat[i].ekinf);
-             ekindb->tcstat[i].ekinscalef_nhc =  ekinda->tcstat[i].ekinscalef_nhc;
-             ekindb->tcstat[i].ekinscaleh_nhc =  ekinda->tcstat[i].ekinscaleh_nhc;
-             ekindb->tcstat[i].vscale_nhc =  ekinda->tcstat[i].vscale_nhc;
-         }
-     }
-     copy_rvecn(statea->x,stateb->x,0,stateb->natoms);
-     copy_rvecn(statea->v,stateb->v,0,stateb->natoms);
-     copy_mat(statea->box,stateb->box);
-     copy_mat(statea->box_rel,stateb->box_rel);
-     copy_mat(statea->boxv,stateb->boxv);
- 
-     for (i = 0; i<stateb->ngtc; i++) 
-     { 
-         nc = i*opts->nhchainlength;
-         for (j=0; j<opts->nhchainlength; j++) 
-         {
-             stateb->nosehoover_xi[nc+j]  = statea->nosehoover_xi[nc+j];
-             stateb->nosehoover_vxi[nc+j] = statea->nosehoover_vxi[nc+j];
-         }
-     }
-     if (stateb->nhpres_xi != NULL)
-     {
-         for (i = 0; i<stateb->nnhpres; i++) 
-         {
-             nc = i*opts->nhchainlength;
-             for (j=0; j<opts->nhchainlength; j++) 
-             {
-                 stateb->nhpres_xi[nc+j]  = statea->nhpres_xi[nc+j];
-                 stateb->nhpres_vxi[nc+j] = statea->nhpres_vxi[nc+j];
-             }
-         }
-     }
- }
- 
- static real compute_conserved_from_auxiliary(t_inputrec *ir, t_state *state, t_extmass *MassQ)
- {
-     real quantity = 0;
-     switch (ir->etc) 
-     {
-     case etcNO:
-         break;
-     case etcBERENDSEN:
-         break;
-     case etcNOSEHOOVER:
-         quantity = NPT_energy(ir,state,MassQ);                
-         break;
-     case etcVRESCALE:
-         quantity = vrescale_energy(&(ir->opts),state->therm_integral);
-         break;
-     default:
-         break;
-     }
-     return quantity;
- }
- 
- static void compute_globals(FILE *fplog, gmx_global_stat_t gstat, t_commrec *cr, t_inputrec *ir, 
-                             t_forcerec *fr, gmx_ekindata_t *ekind, 
-                             t_state *state, t_state *state_global, t_mdatoms *mdatoms, 
-                             t_nrnb *nrnb, t_vcm *vcm, gmx_wallcycle_t wcycle,
-                             gmx_enerdata_t *enerd,tensor force_vir, tensor shake_vir, tensor total_vir, 
-                             tensor pres, rvec mu_tot, gmx_constr_t constr, 
-                             globsig_t *gs,gmx_bool bInterSimGS,
-                             matrix box, gmx_mtop_t *top_global, real *pcurr, 
-                             int natoms, gmx_bool *bSumEkinhOld, int flags)
- {
-     int  i,gsi;
-     real gs_buf[eglsNR];
-     tensor corr_vir,corr_pres,shakeall_vir;
-     gmx_bool bEner,bPres,bTemp, bVV;
-     gmx_bool bRerunMD, bStopCM, bGStat, bIterate, 
-         bFirstIterate,bReadEkin,bEkinAveVel,bScaleEkin, bConstrain;
-     real ekin,temp,prescorr,enercorr,dvdlcorr;
-     
-     /* translate CGLO flags to gmx_booleans */
-     bRerunMD = flags & CGLO_RERUNMD;
-     bStopCM = flags & CGLO_STOPCM;
-     bGStat = flags & CGLO_GSTAT;
- 
-     bReadEkin = (flags & CGLO_READEKIN);
-     bScaleEkin = (flags & CGLO_SCALEEKIN);
-     bEner = flags & CGLO_ENERGY;
-     bTemp = flags & CGLO_TEMPERATURE;
-     bPres  = (flags & CGLO_PRESSURE);
-     bConstrain = (flags & CGLO_CONSTRAINT);
-     bIterate = (flags & CGLO_ITERATE);
-     bFirstIterate = (flags & CGLO_FIRSTITERATE);
- 
-     /* we calculate a full state kinetic energy either with full-step velocity verlet
-        or half step where we need the pressure */
-     
-     bEkinAveVel = (ir->eI==eiVV || (ir->eI==eiVVAK && bPres) || bReadEkin);
-     
-     /* in initalization, it sums the shake virial in vv, and to 
-        sums ekinh_old in leapfrog (or if we are calculating ekinh_old) for other reasons */
- 
-     /* ########## Kinetic energy  ############## */
-     
-     if (bTemp) 
-     {
-         /* Non-equilibrium MD: this is parallellized, but only does communication
-          * when there really is NEMD.
-          */
-         
-         if (PAR(cr) && (ekind->bNEMD)) 
-         {
-             accumulate_u(cr,&(ir->opts),ekind);
-         }
-         debug_gmx();
-         if (bReadEkin)
-         {
-             restore_ekinstate_from_state(cr,ekind,&state_global->ekinstate);
-         }
-         else 
-         {
- 
-             calc_ke_part(state,&(ir->opts),mdatoms,ekind,nrnb,bEkinAveVel,bIterate);
-         }
-         
-         debug_gmx();
-         
-         /* Calculate center of mass velocity if necessary, also parallellized */
-         if (bStopCM && !bRerunMD && bEner) 
-         {
-             calc_vcm_grp(fplog,mdatoms->start,mdatoms->homenr,mdatoms,
-                          state->x,state->v,vcm);
-         }
-     }
- 
-     if (bTemp || bPres || bEner || bConstrain) 
-     {
-         if (!bGStat)
-         {
-             /* We will not sum ekinh_old,                                                            
-              * so signal that we still have to do it.                                                
-              */
-             *bSumEkinhOld = TRUE;
- 
-         }
-         else
-         {
-             if (gs != NULL)
-             {
-                 for(i=0; i<eglsNR; i++)
-                 {
-                     gs_buf[i] = gs->sig[i];
-                 }
-             }
-             if (PAR(cr)) 
-             {
-                 wallcycle_start(wcycle,ewcMoveE);
-                 GMX_MPE_LOG(ev_global_stat_start);
-                 global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
-                             ir,ekind,constr,vcm,
-                             gs != NULL ? eglsNR : 0,gs_buf,
-                             top_global,state,
-                             *bSumEkinhOld,flags);
-                 GMX_MPE_LOG(ev_global_stat_finish);
-                 wallcycle_stop(wcycle,ewcMoveE);
-             }
-             if (gs != NULL)
-             {
-                 if (MULTISIM(cr) && bInterSimGS)
-                 {
-                     if (MASTER(cr))
-                     {
-                         /* Communicate the signals between the simulations */
-                         gmx_sum_sim(eglsNR,gs_buf,cr->ms);
-                     }
-                     /* Communicate the signals form the master to the others */
-                     gmx_bcast(eglsNR*sizeof(gs_buf[0]),gs_buf,cr);
-                 }
-                 for(i=0; i<eglsNR; i++)
-                 {
-                     if (bInterSimGS || gs_simlocal[i])
-                     {
-                         /* Set the communicated signal only when it is non-zero,
-                          * since signals might not be processed at each MD step.
-                          */
-                         gsi = (gs_buf[i] >= 0 ?
-                                (int)(gs_buf[i] + 0.5) :
-                                (int)(gs_buf[i] - 0.5));
-                         if (gsi != 0)
-                         {
-                             gs->set[i] = gsi;
-                         }
-                         /* Turn off the local signal */
-                         gs->sig[i] = 0;
-                     }
-                 }
-             }
-             *bSumEkinhOld = FALSE;
-         }
-     }
-     
-     if (!ekind->bNEMD && debug && bTemp && (vcm->nr > 0))
-     {
-         correct_ekin(debug,
-                      mdatoms->start,mdatoms->start+mdatoms->homenr,
-                      state->v,vcm->group_p[0],
-                      mdatoms->massT,mdatoms->tmass,ekind->ekin);
-     }
-     
-     if (bEner) {
-         /* Do center of mass motion removal */
-         if (bStopCM && !bRerunMD) /* is this correct?  Does it get called too often with this logic? */
-         {
-             check_cm_grp(fplog,vcm,ir,1);
-             do_stopcm_grp(fplog,mdatoms->start,mdatoms->homenr,mdatoms->cVCM,
-                           state->x,state->v,vcm);
-             inc_nrnb(nrnb,eNR_STOPCM,mdatoms->homenr);
-         }
- 
-         /* Calculate the amplitude of the cosine velocity profile */
-         ekind->cosacc.vcos = ekind->cosacc.mvcos/mdatoms->tmass;
-     }
- 
-     if (bTemp) 
-     {
-         /* Sum the kinetic energies of the groups & calc temp */
-         /* compute full step kinetic energies if vv, or if vv-avek and we are computing the pressure with IR_NPT_TROTTER */
-         /* three maincase:  VV with AveVel (md-vv), vv with AveEkin (md-vv-avek), leap with AveEkin (md).  
-            Leap with AveVel is not supported; it's not clear that it will actually work.  
-            bEkinAveVel: If TRUE, we simply multiply ekin by ekinscale to get a full step kinetic energy. 
-            If FALSE, we average ekinh_old and ekinh*ekinscale_nhc to get an averaged half step kinetic energy.
-            bSaveEkinOld: If TRUE (in the case of iteration = bIterate is TRUE), we don't reset the ekinscale_nhc.  
-            If FALSE, we go ahead and erase over it.
-         */ 
-         enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,&(enerd->term[F_DKDL]),
-                                        bEkinAveVel,bIterate,bScaleEkin);
-  
-         enerd->term[F_EKIN] = trace(ekind->ekin);
-     }
-     
-     /* ##########  Long range energy information ###### */
-     
-     if (bEner || bPres || bConstrain) 
-     {
-         calc_dispcorr(fplog,ir,fr,0,top_global->natoms,box,state->lambda,
-                       corr_pres,corr_vir,&prescorr,&enercorr,&dvdlcorr);
-     }
-     
-     if (bEner && bFirstIterate) 
-     {
-         enerd->term[F_DISPCORR] = enercorr;
-         enerd->term[F_EPOT] += enercorr;
-         enerd->term[F_DVDL] += dvdlcorr;
-         if (fr->efep != efepNO) {
-             enerd->dvdl_lin += dvdlcorr;
-         }
-     }
-     
-     /* ########## Now pressure ############## */
-     if (bPres || bConstrain) 
-     {
-         
-         m_add(force_vir,shake_vir,total_vir);
-         
-         /* Calculate pressure and apply LR correction if PPPM is used.
-          * Use the box from last timestep since we already called update().
-          */
-         
-         enerd->term[F_PRES] = calc_pres(fr->ePBC,ir->nwall,box,ekind->ekin,total_vir,pres,
-                                         (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
-         
-         /* Calculate long range corrections to pressure and energy */
-         /* this adds to enerd->term[F_PRES] and enerd->term[F_ETOT], 
-            and computes enerd->term[F_DISPCORR].  Also modifies the 
-            total_vir and pres tesors */
-         
-         m_add(total_vir,corr_vir,total_vir);
-         m_add(pres,corr_pres,pres);
-         enerd->term[F_PDISPCORR] = prescorr;
-         enerd->term[F_PRES] += prescorr;
-         *pcurr = enerd->term[F_PRES];
-         /* calculate temperature using virial */
-         enerd->term[F_VTEMP] = calc_temp(trace(total_vir),ir->opts.nrdf[0]);
-         
-     }    
- }
- 
- 
- /* Definitions for convergence of iterated constraints */
- 
- /* iterate constraints up to 50 times  */
- #define MAXITERCONST       50
- 
- /* data type */
- typedef struct
- {
-     real f,fprev,x,xprev;  
-     int iter_i;
-     gmx_bool bIterate;
-     real allrelerr[MAXITERCONST+2];
-     int num_close; /* number of "close" violations, caused by limited precision. */
- } gmx_iterate_t;
-   
- #ifdef GMX_DOUBLE
- #define CONVERGEITER  0.000000001
- #define CLOSE_ENOUGH  0.000001000
- #else
- #define CONVERGEITER  0.0001
- #define CLOSE_ENOUGH  0.0050
- #endif
- 
- /* we want to keep track of the close calls.  If there are too many, there might be some other issues.
-    so we make sure that it's either less than some predetermined number, or if more than that number,
-    only some small fraction of the total. */
- #define MAX_NUMBER_CLOSE        50
- #define FRACTION_CLOSE       0.001
-   
- /* maximum length of cyclic traps to check, emerging from limited numerical precision  */
- #define CYCLEMAX            20
- 
- static void gmx_iterate_init(gmx_iterate_t *iterate,gmx_bool bIterate)
- {
-     int i;
- 
-     iterate->iter_i = 0;
-     iterate->bIterate = bIterate;
-     iterate->num_close = 0;
-     for (i=0;i<MAXITERCONST+2;i++) 
-     {
-         iterate->allrelerr[i] = 0;
-     }
- }
- 
- static gmx_bool done_iterating(const t_commrec *cr,FILE *fplog, int nsteps, gmx_iterate_t *iterate, gmx_bool bFirstIterate, real fom, real *newf) 
- {    
-     /* monitor convergence, and use a secant search to propose new
-        values.  
-                                                                   x_{i} - x_{i-1}
-        The secant method computes x_{i+1} = x_{i} - f(x_{i}) * ---------------------
-                                                                 f(x_{i}) - f(x_{i-1})
-        
-        The function we are trying to zero is fom-x, where fom is the
-        "figure of merit" which is the pressure (or the veta value) we
-        would get by putting in an old value of the pressure or veta into
-        the incrementor function for the step or half step.  I have
-        verified that this gives the same answer as self consistent
-        iteration, usually in many fewer steps, especially for small tau_p.
-        
-        We could possibly eliminate an iteration with proper use
-        of the value from the previous step, but that would take a bit
-        more bookkeeping, especially for veta, since tests indicate the
-        function of veta on the last step is not sufficiently close to
-        guarantee convergence this step. This is
-        good enough for now.  On my tests, I could use tau_p down to
-        0.02, which is smaller that would ever be necessary in
-        practice. Generally, 3-5 iterations will be sufficient */
- 
-     real relerr,err,xmin;
-     char buf[256];
-     int i;
-     gmx_bool incycle;
-     
-     if (bFirstIterate) 
-     {
-         iterate->x = fom;
-         iterate->f = fom-iterate->x;
-         iterate->xprev = 0;
-         iterate->fprev = 0;
-         *newf = fom;
-     } 
-     else 
-     {
-         iterate->f = fom-iterate->x; /* we want to zero this difference */
-         if ((iterate->iter_i > 1) && (iterate->iter_i < MAXITERCONST)) 
-         {
-             if (iterate->f==iterate->fprev) 
-             {
-                 *newf = iterate->f;
-             } 
-             else 
-             {
-                 *newf = iterate->x - (iterate->x-iterate->xprev)*(iterate->f)/(iterate->f-iterate->fprev); 
-             }
-         } 
-         else 
-         {
-             /* just use self-consistent iteration the first step to initialize, or 
-                if it's not converging (which happens occasionally -- need to investigate why) */
-             *newf = fom; 
-         }
-     }
-     /* Consider a slight shortcut allowing us to exit one sooner -- we check the
-        difference between the closest of x and xprev to the new
-        value. To be 100% certain, we should check the difference between
-        the last result, and the previous result, or
-        
-        relerr = (fabs((x-xprev)/fom));
-        
-        but this is pretty much never necessary under typical conditions.
-        Checking numerically, it seems to lead to almost exactly the same
-        trajectories, but there are small differences out a few decimal
-        places in the pressure, and eventually in the v_eta, but it could
-        save an interation.
-        
-        if (fabs(*newf-x) < fabs(*newf - xprev)) { xmin = x;} else { xmin = xprev;}
-        relerr = (fabs((*newf-xmin) / *newf));
-     */
-     
-     err = fabs((iterate->f-iterate->fprev));
-     relerr = fabs(err/fom);
- 
-     iterate->allrelerr[iterate->iter_i] = relerr;
-     
-     if (iterate->iter_i > 0) 
-     {
-         if (debug) 
-         {
-             fprintf(debug,"Iterating NPT constraints: %6i %20.12f%14.6g%20.12f\n",
-                     iterate->iter_i,fom,relerr,*newf);
-         }
-         
-         if ((relerr < CONVERGEITER) || (err < CONVERGEITER) || (fom==0) || ((iterate->x == iterate->xprev) && iterate->iter_i > 1))
-         {
-             iterate->bIterate = FALSE;
-             if (debug) 
-             {
-                 fprintf(debug,"Iterating NPT constraints: CONVERGED\n");
-             }
-             return TRUE;
-         }
-         if (iterate->iter_i > MAXITERCONST)
-         {
-             if (relerr < CLOSE_ENOUGH)
-             {
-                 incycle = FALSE;
-                 for (i=1;i<CYCLEMAX;i++) {
-                     if ((iterate->allrelerr[iterate->iter_i-(1+i)] == iterate->allrelerr[iterate->iter_i-1]) &&
-                         (iterate->allrelerr[iterate->iter_i-(1+i)] == iterate->allrelerr[iterate->iter_i-(1+2*i)])) {
-                         incycle = TRUE;
-                         if (debug) 
-                         {
-                             fprintf(debug,"Exiting from an NPT iterating cycle of length %d\n",i);
-                         }
-                         break;
-                     }
-                 }
-                 
-                 if (incycle) {
-                     /* step 1: trapped in a numerical attractor */
-                     /* we are trapped in a numerical attractor, and can't converge any more, and are close to the final result.
-                        Better to give up convergence here than have the simulation die.
-                     */
-                     iterate->num_close++;
-                     return TRUE;
-                 } 
-                 else 
-                 {
-                     /* Step #2: test if we are reasonably close for other reasons, then monitor the number.  If not, die */
-                     
-                     /* how many close calls have we had?  If less than a few, we're OK */
-                     if (iterate->num_close < MAX_NUMBER_CLOSE) 
-                     {
-                         sprintf(buf,"Slight numerical convergence deviation with NPT at step %d, relative error only %10.5g, likely not a problem, continuing\n",nsteps,relerr);
-                         md_print_warning(cr,fplog,buf);
-                         iterate->num_close++;
-                         return TRUE;
-                         /* if more than a few, check the total fraction.  If too high, die. */
-                     } else if (iterate->num_close/(double)nsteps > FRACTION_CLOSE) {
-                         gmx_fatal(FARGS,"Could not converge NPT constraints, too many exceptions (%d%%\n",iterate->num_close/(double)nsteps);
-                     } 
-                 }
-             }
-             else 
-             {
-                 gmx_fatal(FARGS,"Could not converge NPT constraints\n");
-             }
-         }
-     }
-     
-     iterate->xprev = iterate->x;
-     iterate->x = *newf;
-     iterate->fprev = iterate->f;
-     iterate->iter_i++;
-     
-     return FALSE;
- }
- 
- static void check_nst_param(FILE *fplog,t_commrec *cr,
-                             const char *desc_nst,int nst,
-                             const char *desc_p,int *p)
- {
-     char buf[STRLEN];
- 
-     if (*p > 0 && *p % nst != 0)
-     {
-         /* Round up to the next multiple of nst */
-         *p = ((*p)/nst + 1)*nst;
-         sprintf(buf,"NOTE: %s changes %s to %d\n",desc_nst,desc_p,*p);
-         md_print_warning(cr,fplog,buf);
-     }
- }
- 
- static void reset_all_counters(FILE *fplog,t_commrec *cr,
-                                gmx_large_int_t step,
-                                gmx_large_int_t *step_rel,t_inputrec *ir,
-                                gmx_wallcycle_t wcycle,t_nrnb *nrnb,
-                                gmx_runtime_t *runtime)
- {
-     char buf[STRLEN],sbuf[STEPSTRSIZE];
- 
-     /* Reset all the counters related to performance over the run */
-     sprintf(buf,"Step %s: resetting all time and cycle counters\n",
-             gmx_step_str(step,sbuf));
-     md_print_warning(cr,fplog,buf);
- 
-     wallcycle_stop(wcycle,ewcRUN);
-     wallcycle_reset_all(wcycle);
-     if (DOMAINDECOMP(cr))
-     {
-         reset_dd_statistics_counters(cr->dd);
-     }
-     init_nrnb(nrnb);
-     ir->init_step += *step_rel;
-     ir->nsteps    -= *step_rel;
-     *step_rel = 0;
-     wallcycle_start(wcycle,ewcRUN);
-     runtime_start(runtime);
-     print_date_and_time(fplog,cr->nodeid,"Restarted time",runtime);
- }
- 
- static void min_zero(int *n,int i)
- {
-     if (i > 0 && (*n == 0 || i < *n))
-     {
-         *n = i;
-     }
- }
- 
- static int lcd4(int i1,int i2,int i3,int i4)
- {
-     int nst;
- 
-     nst = 0;
-     min_zero(&nst,i1);
-     min_zero(&nst,i2);
-     min_zero(&nst,i3);
-     min_zero(&nst,i4);
-     if (nst == 0)
-     {
-         gmx_incons("All 4 inputs for determininig nstglobalcomm are <= 0");
-     }
-     
-     while (nst > 1 && ((i1 > 0 && i1 % nst != 0)  ||
-                        (i2 > 0 && i2 % nst != 0)  ||
-                        (i3 > 0 && i3 % nst != 0)  ||
-                        (i4 > 0 && i4 % nst != 0)))
-     {
-         nst--;
-     }
- 
-     return nst;
- }
- 
- static int check_nstglobalcomm(FILE *fplog,t_commrec *cr,
-                                int nstglobalcomm,t_inputrec *ir)
- {
-     char buf[STRLEN];
- 
-     if (!EI_DYNAMICS(ir->eI))
-     {
-         nstglobalcomm = 1;
-     }
- 
-     if (nstglobalcomm == -1)
-     {
-         if (!(ir->nstcalcenergy > 0 ||
-               ir->nstlist > 0 ||
-               ir->etc != etcNO ||
-               ir->epc != epcNO))
-         {
-             nstglobalcomm = 10;
-             if (ir->nstenergy > 0 && ir->nstenergy < nstglobalcomm)
-             {
-                 nstglobalcomm = ir->nstenergy;
-             }
-         }
-         else
-         {
-             /* Ensure that we do timely global communication for
-              * (possibly) each of the four following options.
-              */
-             nstglobalcomm = lcd4(ir->nstcalcenergy,
-                                  ir->nstlist,
-                                  ir->etc != etcNO ? ir->nsttcouple : 0,
-                                  ir->epc != epcNO ? ir->nstpcouple : 0);
-         }
-     }
-     else
-     {
-         if (ir->nstlist > 0 &&
-             nstglobalcomm > ir->nstlist && nstglobalcomm % ir->nstlist != 0)
-         {
-             nstglobalcomm = (nstglobalcomm / ir->nstlist)*ir->nstlist;
-             sprintf(buf,"WARNING: nstglobalcomm is larger than nstlist, but not a multiple, setting it to %d\n",nstglobalcomm);
-             md_print_warning(cr,fplog,buf);
-         }
-         if (ir->nstcalcenergy > 0)
-         {
-             check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
-                             "nstcalcenergy",&ir->nstcalcenergy);
-         }
-         if (ir->etc != etcNO && ir->nsttcouple > 0)
-         {
-             check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
-                             "nsttcouple",&ir->nsttcouple);
-         }
-         if (ir->epc != epcNO && ir->nstpcouple > 0)
-         {
-             check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
-                             "nstpcouple",&ir->nstpcouple);
-         }
- 
-         check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
-                         "nstenergy",&ir->nstenergy);
- 
-         check_nst_param(fplog,cr,"-gcom",nstglobalcomm,
-                         "nstlog",&ir->nstlog);
-     }
- 
-     if (ir->comm_mode != ecmNO && ir->nstcomm < nstglobalcomm)
-     {
-         sprintf(buf,"WARNING: Changing nstcomm from %d to %d\n",
-                 ir->nstcomm,nstglobalcomm);
-         md_print_warning(cr,fplog,buf);
-         ir->nstcomm = nstglobalcomm;
-     }
- 
-     return nstglobalcomm;
- }
- 
- void check_ir_old_tpx_versions(t_commrec *cr,FILE *fplog,
-                                t_inputrec *ir,gmx_mtop_t *mtop)
- {
-     /* Check required for old tpx files */
-     if (IR_TWINRANGE(*ir) && ir->nstlist > 1 &&
-         ir->nstcalcenergy % ir->nstlist != 0)
-     {
-         md_print_warning(cr,fplog,"Old tpr file with twin-range settings: modifying energy calculation and/or T/P-coupling frequencies");
- 
-         if (gmx_mtop_ftype_count(mtop,F_CONSTR) +
-             gmx_mtop_ftype_count(mtop,F_CONSTRNC) > 0 &&
-             ir->eConstrAlg == econtSHAKE)
-         {
-             md_print_warning(cr,fplog,"With twin-range cut-off's and SHAKE the virial and pressure are incorrect");
-             if (ir->epc != epcNO)
-             {
-                 gmx_fatal(FARGS,"Can not do pressure coupling with twin-range cut-off's and SHAKE");
-             }
-         }
-         check_nst_param(fplog,cr,"nstlist",ir->nstlist,
-                         "nstcalcenergy",&ir->nstcalcenergy);
-         if (ir->epc != epcNO)
-         {
-             check_nst_param(fplog,cr,"nstlist",ir->nstlist,
-                             "nstpcouple",&ir->nstpcouple);
-         }
-         check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
-                         "nstenergy",&ir->nstenergy);
-         check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
-                         "nstlog",&ir->nstlog);
-         if (ir->efep != efepNO)
-         {
-             check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
-                             "nstdhdl",&ir->nstdhdl);
-         }
-     }
- }
- 
- typedef struct {
-     gmx_bool       bGStatEveryStep;
-     gmx_large_int_t step_ns;
-     gmx_large_int_t step_nscheck;
-     gmx_large_int_t nns;
-     matrix     scale_tot;
-     int        nabnsb;
-     double     s1;
-     double     s2;
-     double     ab;
-     double     lt_runav;
-     double     lt_runav2;
- } gmx_nlheur_t;
- 
- static void reset_nlistheuristics(gmx_nlheur_t *nlh,gmx_large_int_t step)
- {
-     nlh->lt_runav  = 0;
-     nlh->lt_runav2 = 0;
-     nlh->step_nscheck = step;
- }
- 
- static void init_nlistheuristics(gmx_nlheur_t *nlh,
-                                  gmx_bool bGStatEveryStep,gmx_large_int_t step)
- {
-     nlh->bGStatEveryStep = bGStatEveryStep;
-     nlh->nns       = 0;
-     nlh->nabnsb    = 0;
-     nlh->s1        = 0;
-     nlh->s2        = 0;
-     nlh->ab        = 0;
- 
-     reset_nlistheuristics(nlh,step);
- }
- 
- static void update_nliststatistics(gmx_nlheur_t *nlh,gmx_large_int_t step)
- {
-     gmx_large_int_t nl_lt;
-     char sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
- 
-     /* Determine the neighbor list life time */
-     nl_lt = step - nlh->step_ns;
-     if (debug)
-     {
-         fprintf(debug,"%d atoms beyond ns buffer, updating neighbor list after %s steps\n",nlh->nabnsb,gmx_step_str(nl_lt,sbuf));
-     }
-     nlh->nns++;
-     nlh->s1 += nl_lt;
-     nlh->s2 += nl_lt*nl_lt;
-     nlh->ab += nlh->nabnsb;
-     if (nlh->lt_runav == 0)
-     {
-         nlh->lt_runav  = nl_lt;
-         /* Initialize the fluctuation average
-          * such that at startup we check after 0 steps.
-          */
-         nlh->lt_runav2 = sqr(nl_lt/2.0);
-     }
-     /* Running average with 0.9 gives an exp. history of 9.5 */
-     nlh->lt_runav2 = 0.9*nlh->lt_runav2 + 0.1*sqr(nlh->lt_runav - nl_lt);
-     nlh->lt_runav  = 0.9*nlh->lt_runav  + 0.1*nl_lt;
-     if (nlh->bGStatEveryStep)
-     {
-         /* Always check the nlist validity */
-         nlh->step_nscheck = step;
-     }
-     else
-     {
-         /* We check after:  <life time> - 2*sigma
-          * The factor 2 is quite conservative,
-          * but we assume that with nstlist=-1 the user
-          * prefers exact integration over performance.
-          */
-         nlh->step_nscheck = step
-                   + (int)(nlh->lt_runav - 2.0*sqrt(nlh->lt_runav2)) - 1;
-     }
-     if (debug)
-     {
-         fprintf(debug,"nlist life time %s run av. %4.1f sig %3.1f check %s check with -gcom %d\n",
-                 gmx_step_str(nl_lt,sbuf),nlh->lt_runav,sqrt(nlh->lt_runav2),
-                 gmx_step_str(nlh->step_nscheck-step+1,sbuf2),
-                 (int)(nlh->lt_runav - 2.0*sqrt(nlh->lt_runav2)));
-     }
- }
- 
- static void set_nlistheuristics(gmx_nlheur_t *nlh,gmx_bool bReset,gmx_large_int_t step)
- {
-     int d;
- 
-     if (bReset)
-     {
-         reset_nlistheuristics(nlh,step);
-     }
-     else
-     {
-         update_nliststatistics(nlh,step);
-     }
- 
-     nlh->step_ns = step;
-     /* Initialize the cumulative coordinate scaling matrix */
-     clear_mat(nlh->scale_tot);
-     for(d=0; d<DIM; d++)
-     {
-         nlh->scale_tot[d][d] = 1.0;
-     }
- }
- 
- static void rerun_parallel_comm(t_commrec *cr,t_trxframe *fr,
-                                 gmx_bool *bNotLastFrame)
- {
-     gmx_bool bAlloc;
-     rvec *xp,*vp;
- 
-     bAlloc = (fr->natoms == 0);
- 
-     if (MASTER(cr) && !*bNotLastFrame)
-     {
-         fr->natoms = -1;
-     }
-     xp = fr->x;
-     vp = fr->v;
-     gmx_bcast(sizeof(*fr),fr,cr);
-     fr->x = xp;
-     fr->v = vp;
- 
-     *bNotLastFrame = (fr->natoms >= 0);
- 
-     if (*bNotLastFrame && PARTDECOMP(cr))
-     {
-         /* x and v are the only variable size quantities stored in trr
-          * that are required for rerun (f is not needed).
-          */
-         if (bAlloc)
-         {
-             snew(fr->x,fr->natoms);
-             snew(fr->v,fr->natoms);
-         }
-         if (fr->bX)
-         {
-             gmx_bcast(fr->natoms*sizeof(fr->x[0]),fr->x[0],cr);
-         }
-         if (fr->bV)
-         {
-             gmx_bcast(fr->natoms*sizeof(fr->v[0]),fr->v[0],cr);
-         }
-     }
- }
- 
+ +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
+ +/* _isnan() */
+ +#include <float.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "vec.h"
+ +#include "statutil.h"
+ +#include "vcm.h"
+ +#include "mdebin.h"
+ +#include "nrnb.h"
+ +#include "calcmu.h"
+ +#include "index.h"
+ +#include "vsite.h"
+ +#include "update.h"
+ +#include "ns.h"
+ +#include "trnio.h"
+ +#include "xtcio.h"
+ +#include "mdrun.h"
+ +#include "confio.h"
+ +#include "network.h"
+ +#include "pull.h"
+ +#include "xvgr.h"
+ +#include "physics.h"
+ +#include "names.h"
+ +#include "xmdrun.h"
+ +#include "ionize.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "dihre.h"
+ +#include "pppm.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "repl_ex.h"
+ +#include "qmmm.h"
+ +#include "mpelogging.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "topsort.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "shellfc.h"
+ +#include "compute_io.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "membed.h"
++#include "string2.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREADS
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#ifdef GMX_FAHCORE
+ +#include "corewrap.h"
+ +#endif
+ +
+ +
-     bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps));
+ +double do_md(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
+ +             const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +             int nstglobalcomm,
+ +             gmx_vsite_t *vsite,gmx_constr_t constr,
+ +             int stepout,t_inputrec *ir,
+ +             gmx_mtop_t *top_global,
+ +             t_fcdata *fcd,
+ +             t_state *state_global,
+ +             t_mdatoms *mdatoms,
+ +             t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +             gmx_edsam_t ed,t_forcerec *fr,
+ +             int repl_ex_nst,int repl_ex_seed,gmx_membed_t *membed,
+ +             real cpt_period,real max_hours,
+ +             const char *deviceOptions,
+ +             unsigned long Flags,
+ +             gmx_runtime_t *runtime)
+ +{
+ +    gmx_mdoutf_t *outf;
+ +    gmx_large_int_t step,step_rel;
+ +    double     run_time;
+ +    double     t,t0,lam0;
+ +    gmx_bool       bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres;
+ +    gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
+ +               bFirstStep,bStateFromTPX,bInitStep,bLastStep,
+ +               bBornRadii,bStartingFromCpt;
+ +    gmx_bool       bDoDHDL=FALSE;
+ +    gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
+ +               bForceUpdate=FALSE,bCPT;
+ +    int        mdof_flags;
+ +    gmx_bool       bMasterState;
+ +    int        force_flags,cglo_flags;
+ +    tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
+ +    int        i,m;
+ +    t_trxstatus *status;
+ +    rvec       mu_tot;
+ +    t_vcm      *vcm;
+ +    t_state    *bufstate=NULL;   
+ +    matrix     *scale_tot,pcoupl_mu,M,ebox;
+ +    gmx_nlheur_t nlh;
+ +    t_trxframe rerun_fr;
+ +    gmx_repl_ex_t repl_ex=NULL;
+ +    int        nchkpt=1;
+ +
+ +    gmx_localtop_t *top;      
+ +    t_mdebin *mdebin=NULL;
+ +    t_state    *state=NULL;
+ +    rvec       *f_global=NULL;
+ +    int        n_xtc=-1;
+ +    rvec       *x_xtc=NULL;
+ +    gmx_enerdata_t *enerd;
+ +    rvec       *f=NULL;
+ +    gmx_global_stat_t gstat;
+ +    gmx_update_t upd=NULL;
+ +    t_graph    *graph=NULL;
+ +    globsig_t   gs;
+ +
+ +    gmx_bool        bFFscan;
+ +    gmx_groups_t *groups;
+ +    gmx_ekindata_t *ekind, *ekind_save;
+ +    gmx_shellfc_t shellfc;
+ +    int         count,nconverged=0;
+ +    real        timestep=0;
+ +    double      tcount=0;
+ +    gmx_bool        bIonize=FALSE;
+ +    gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
+ +    gmx_bool        bAppend;
+ +    gmx_bool        bResetCountersHalfMaxH=FALSE;
+ +    gmx_bool        bVV,bIterations,bFirstIterate,bTemp,bPres,bTrotter;
+ +    real        temp0,mu_aver=0,dvdl;
+ +    int         a0,a1,gnx=0,ii;
+ +    atom_id     *grpindex=NULL;
+ +    char        *grpname;
+ +    t_coupl_rec *tcr=NULL;
+ +    rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
+ +    matrix      boxcopy={{0}},lastbox;
+ +      tensor      tmpvir;
+ +      real        fom,oldfom,veta_save,pcurr,scalevir,tracevir;
+ +      real        vetanew = 0;
+ +    double      cycles;
+ +      real        saved_conserved_quantity = 0;
+ +    real        last_ekin = 0;
+ +      int         iter_i;
+ +      t_extmass   MassQ;
+ +    int         **trotter_seq; 
+ +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
+ +    int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
+ +    gmx_iterate_t iterate;
++    gmx_large_int_t multisim_nsteps=-1; /* number of steps to do  before first multisim 
++                                          simulation stops. If equal to zero, don't
++                                          communicate any more between multisims.*/
+ +#ifdef GMX_FAHCORE
+ +    /* Temporary addition for FAHCORE checkpointing */
+ +    int chkpt_ret;
+ +#endif
+ +
+ +    /* Check for special mdrun options */
+ +    bRerunMD = (Flags & MD_RERUN);
+ +    bIonize  = (Flags & MD_IONIZE);
+ +    bFFscan  = (Flags & MD_FFSCAN);
+ +    bAppend  = (Flags & MD_APPENDFILES);
+ +    if (Flags & MD_RESETCOUNTERSHALFWAY)
+ +    {
+ +        if (ir->nsteps > 0)
+ +        {
+ +            /* Signal to reset the counters half the simulation steps. */
+ +            wcycle_set_reset_counters(wcycle,ir->nsteps/2);
+ +        }
+ +        /* Signal to reset the counters halfway the simulation time. */
+ +        bResetCountersHalfMaxH = (max_hours > 0);
+ +    }
+ +
+ +    /* md-vv uses averaged full step velocities for T-control 
+ +       md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
+ +       md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
+ +    bVV = EI_VV(ir->eI);
+ +    if (bVV) /* to store the initial velocities while computing virial */
+ +    {
+ +        snew(cbuf,top_global->natoms);
+ +    }
+ +    /* all the iteratative cases - only if there are constraints */ 
+ +    bIterations = ((IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
+ +    bTrotter = (bVV && (IR_NPT_TROTTER(ir) || (IR_NVT_TROTTER(ir))));        
+ +    
+ +    if (bRerunMD)
+ +    {
+ +        /* Since we don't know if the frames read are related in any way,
+ +         * rebuild the neighborlist at every step.
+ +         */
+ +        ir->nstlist       = 1;
+ +        ir->nstcalcenergy = 1;
+ +        nstglobalcomm     = 1;
+ +    }
+ +
+ +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
+ +
+ +    nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
+ +    bGStatEveryStep = (nstglobalcomm == 1);
+ +
+ +    if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
+ +    {
+ +        fprintf(fplog,
+ +                "To reduce the energy communication with nstlist = -1\n"
+ +                "the neighbor list validity should not be checked at every step,\n"
+ +                "this means that exact integration is not guaranteed.\n"
+ +                "The neighbor list validity is checked after:\n"
+ +                "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
+ +                "In most cases this will result in exact integration.\n"
+ +                "This reduces the energy communication by a factor of 2 to 3.\n"
+ +                "If you want less energy communication, set nstlist > 3.\n\n");
+ +    }
+ +
+ +    if (bRerunMD || bFFscan)
+ +    {
+ +        ir->nstxtcout = 0;
+ +    }
+ +    groups = &top_global->groups;
+ +
+ +    /* Initial values */
+ +    init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
+ +            nrnb,top_global,&upd,
+ +            nfile,fnm,&outf,&mdebin,
+ +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
+ +
+ +    clear_mat(total_vir);
+ +    clear_mat(pres);
+ +    /* Energy terms and groups */
+ +    snew(enerd,1);
+ +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        f = NULL;
+ +    }
+ +    else
+ +    {
+ +        snew(f,top_global->natoms);
+ +    }
+ +
+ +    /* Kinetic energy data */
+ +    snew(ekind,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
+ +    /* needed for iteration of constraints */
+ +    snew(ekind_save,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
+ +    /* Copy the cos acceleration to the groups struct */    
+ +    ekind->cosacc.cos_accel = ir->cos_accel;
+ +
+ +    gstat = global_stat_init(ir);
+ +    debug_gmx();
+ +
+ +    /* Check for polarizable models and flexible constraints */
+ +    shellfc = init_shell_flexcon(fplog,
+ +                                 top_global,n_flexible_constraints(constr),
+ +                                 (ir->bContinuation || 
+ +                                  (DOMAINDECOMP(cr) && !MASTER(cr))) ?
+ +                                 NULL : state_global->x);
+ +
+ +    if (DEFORM(*ir))
+ +    {
+ +#ifdef GMX_THREADS
+ +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+ +#endif
+ +        set_deform_reference_box(upd,
+ +                                 deform_init_init_step_tpx,
+ +                                 deform_init_box_tpx);
+ +#ifdef GMX_THREADS
+ +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+ +#endif
+ +    }
+ +
+ +    {
+ +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
+ +        if ((io > 2000) && MASTER(cr))
+ +            fprintf(stderr,
+ +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
+ +                    io);
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr)) {
+ +        top = dd_init_local_top(top_global);
+ +
+ +        snew(state,1);
+ +        dd_init_local_state(cr->dd,state_global,state);
+ +
+ +        if (DDMASTER(cr->dd) && ir->nstfout) {
+ +            snew(f_global,state_global->natoms);
+ +        }
+ +    } else {
+ +        if (PAR(cr)) {
+ +            /* Initialize the particle decomposition and split the topology */
+ +            top = split_system(fplog,top_global,ir,cr);
+ +
+ +            pd_cg_range(cr,&fr->cg0,&fr->hcg);
+ +            pd_at_range(cr,&a0,&a1);
+ +        } else {
+ +            top = gmx_mtop_generate_local_top(top_global,ir);
+ +
+ +            a0 = 0;
+ +            a1 = top_global->natoms;
+ +        }
+ +
+ +        state = partdec_init_local_state(cr,state_global);
+ +        f_global = f;
+ +
+ +        atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
+ +
+ +        if (vsite) {
+ +            set_vsite_top(vsite,top,mdatoms,cr);
+ +        }
+ +
+ +        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
+ +            graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
+ +        }
+ +
+ +        if (shellfc) {
+ +            make_local_shells(cr,mdatoms,shellfc);
+ +        }
+ +
+ +        if (ir->pull && PAR(cr)) {
+ +            dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
+ +        }
+ +    }
+ +
+ +    if (DOMAINDECOMP(cr))
+ +    {
+ +        /* Distribute the charge groups over the nodes from the master node */
+ +        dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
+ +                            state_global,top_global,ir,
+ +                            state,&f,mdatoms,top,fr,
+ +                            vsite,shellfc,constr,
+ +                            nrnb,wcycle,FALSE);
+ +    }
+ +
+ +    update_mdatoms(mdatoms,state->lambda);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        if (opt2bSet("-cpi",nfile,fnm))
+ +        {
+ +            /* Update mdebin with energy history if appending to output files */
+ +            if ( Flags & MD_APPENDFILES )
+ +            {
+ +                restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ +            }
+ +            else
+ +            {
+ +                /* We might have read an energy history from checkpoint,
+ +                 * free the allocated memory and reset the counts.
+ +                 */
+ +                done_energyhistory(&state_global->enerhist);
+ +                init_energyhistory(&state_global->enerhist);
+ +            }
+ +        }
+ +        /* Set the initial energy history in state by updating once */
+ +        update_energyhistory(&state_global->enerhist,mdebin);
+ +    } 
+ +
+ +    if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) {
+ +        /* Set the random state if we read a checkpoint file */
+ +        set_stochd_state(upd,state);
+ +    }
+ +
+ +    /* Initialize constraints */
+ +    if (constr) {
+ +        if (!DOMAINDECOMP(cr))
+ +            set_constraints(constr,top,ir,mdatoms,cr);
+ +    }
+ +
+ +    /* Check whether we have to GCT stuff */
+ +    bTCR = ftp2bSet(efGCT,nfile,fnm);
+ +    if (bTCR) {
+ +        if (MASTER(cr)) {
+ +            fprintf(stderr,"Will do General Coupling Theory!\n");
+ +        }
+ +        gnx = top_global->mols.nr;
+ +        snew(grpindex,gnx);
+ +        for(i=0; (i<gnx); i++) {
+ +            grpindex[i] = i;
+ +        }
+ +    }
+ +
+ +    if (repl_ex_nst > 0)
+ +    {
+ +        /* We need to be sure replica exchange can only occur
+ +         * when the energies are current */
+ +        check_nst_param(fplog,cr,"nstcalcenergy",ir->nstcalcenergy,
+ +                        "repl_ex_nst",&repl_ex_nst);
+ +        /* This check needs to happen before inter-simulation
+ +         * signals are initialized, too */
+ +    }
+ +    if (repl_ex_nst > 0 && MASTER(cr))
+ +        repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
+ +                                        repl_ex_nst,repl_ex_seed);
+ +
+ +    if (!ir->bContinuation && !bRerunMD)
+ +    {
+ +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
+ +        {
+ +            /* Set the velocities of frozen particles to zero */
+ +            for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
+ +            {
+ +                for(m=0; m<DIM; m++)
+ +                {
+ +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
+ +                    {
+ +                        state->v[i][m] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +
+ +        if (constr)
+ +        {
+ +            /* Constrain the initial coordinates and velocities */
+ +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
+ +                               graph,cr,nrnb,fr,top,shake_vir);
+ +        }
+ +        if (vsite)
+ +        {
+ +            /* Construct the virtual sites for the initial configuration */
+ +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
+ +                             top->idef.iparams,top->idef.il,
+ +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +        }
+ +    }
+ +
+ +    debug_gmx();
+ +  
+ +    /* I'm assuming we need global communication the first time! MRS */
+ +    cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
+ +                  | (bVV ? CGLO_PRESSURE:0)
+ +                  | (bVV ? CGLO_CONSTRAINT:0)
+ +                  | (bRerunMD ? CGLO_RERUNMD:0)
+ +                  | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
+ +    
+ +    bSumEkinhOld = FALSE;
+ +    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                    constr,NULL,FALSE,state->box,
+ +                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
+ +    if (ir->eI == eiVVAK) {
+ +        /* a second call to get the half step temperature initialized as well */ 
+ +        /* we do the same call as above, but turn the pressure off -- internally to 
+ +           compute_globals, this is recognized as a velocity verlet half-step 
+ +           kinetic energy calculation.  This minimized excess variables, but 
+ +           perhaps loses some logic?*/
+ +        
+ +        compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                        wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                        constr,NULL,FALSE,state->box,
+ +                        top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                        cglo_flags &~ CGLO_PRESSURE);
+ +    }
+ +    
+ +    /* Calculate the initial half step temperature, and save the ekinh_old */
+ +    if (!(Flags & MD_STARTFROMCPT)) 
+ +    {
+ +        for(i=0; (i<ir->opts.ngtc); i++) 
+ +        {
+ +            copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
+ +        } 
+ +    }
+ +    if (ir->eI != eiVV) 
+ +    {
+ +        enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
+ +                                     and there is no previous step */
+ +    }
+ +    temp0 = enerd->term[F_TEMP];
+ +    
+ +    /* if using an iterative algorithm, we need to create a working directory for the state. */
+ +    if (bIterations) 
+ +    {
+ +            bufstate = init_bufstate(state);
+ +    }
+ +    if (bFFscan) 
+ +    {
+ +        snew(xcopy,state->natoms);
+ +        snew(vcopy,state->natoms);
+ +        copy_rvecn(state->x,xcopy,0,state->natoms);
+ +        copy_rvecn(state->v,vcopy,0,state->natoms);
+ +        copy_mat(state->box,boxcopy);
+ +    } 
+ +    
+ +    /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
+ +       temperature control */
+ +    trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
+ +    
+ +    if (MASTER(cr))
+ +    {
+ +        if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
+ +        {
+ +            fprintf(fplog,
+ +                    "RMS relative constraint deviation after constraining: %.2e\n",
+ +                    constr_rmsd(constr,FALSE));
+ +        }
+ +        fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
+ +        if (bRerunMD)
+ +        {
+ +            fprintf(stderr,"starting md rerun '%s', reading coordinates from"
+ +                    " input trajectory '%s'\n\n",
+ +                    *(top_global->name),opt2fn("-rerun",nfile,fnm));
+ +            if (bVerbose)
+ +            {
+ +                fprintf(stderr,"Calculated time to finish depends on nsteps from "
+ +                        "run input file,\nwhich may not correspond to the time "
+ +                        "needed to process input trajectory.\n\n");
+ +            }
+ +        }
+ +        else
+ +        {
+ +            char tbuf[20];
+ +            fprintf(stderr,"starting mdrun '%s'\n",
+ +                    *(top_global->name));
+ +            if (ir->nsteps >= 0)
+ +            {
+ +                sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
+ +            }
+ +            else
+ +            {
+ +                sprintf(tbuf,"%s","infinite");
+ +            }
+ +            if (ir->init_step > 0)
+ +            {
+ +                fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
+ +                        gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
+ +                        gmx_step_str(ir->init_step,sbuf2),
+ +                        ir->init_step*ir->delta_t);
+ +            }
+ +            else
+ +            {
+ +                fprintf(stderr,"%s steps, %s ps.\n",
+ +                        gmx_step_str(ir->nsteps,sbuf),tbuf);
+ +            }
+ +        }
+ +        fprintf(fplog,"\n");
+ +    }
+ +
+ +    /* Set and write start time */
+ +    runtime_start(runtime);
+ +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
+ +    wallcycle_start(wcycle,ewcRUN);
+ +    if (fplog)
+ +        fprintf(fplog,"\n");
+ +
+ +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
+ +#ifdef GMX_FAHCORE
+ +    chkpt_ret=fcCheckPointParallel( cr->nodeid,
+ +                                    NULL,0);
+ +    if ( chkpt_ret == 0 ) 
+ +        gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
+ +#endif
+ +
+ +    debug_gmx();
+ +    /***********************************************************
+ +     *
+ +     *             Loop over MD steps 
+ +     *
+ +     ************************************************************/
+ +
+ +    /* if rerunMD then read coordinates and velocities from input trajectory */
+ +    if (bRerunMD)
+ +    {
+ +        if (getenv("GMX_FORCE_UPDATE"))
+ +        {
+ +            bForceUpdate = TRUE;
+ +        }
+ +
+ +        rerun_fr.natoms = 0;
+ +        if (MASTER(cr))
+ +        {
+ +            bNotLastFrame = read_first_frame(oenv,&status,
+ +                                             opt2fn("-rerun",nfile,fnm),
+ +                                             &rerun_fr,TRX_NEED_X | TRX_READ_V);
+ +            if (rerun_fr.natoms != top_global->natoms)
+ +            {
+ +                gmx_fatal(FARGS,
+ +                          "Number of atoms in trajectory (%d) does not match the "
+ +                          "run input file (%d)\n",
+ +                          rerun_fr.natoms,top_global->natoms);
+ +            }
+ +            if (ir->ePBC != epbcNONE)
+ +            {
+ +                if (!rerun_fr.bBox)
+ +                {
+ +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
+ +                }
+ +                if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
+ +                {
+ +                    gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
+ +                }
+ +            }
+ +        }
+ +
+ +        if (PAR(cr))
+ +        {
+ +            rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
+ +        }
+ +
+ +        if (ir->ePBC != epbcNONE)
+ +        {
+ +            /* Set the shift vectors.
+ +             * Necessary here when have a static box different from the tpr box.
+ +             */
+ +            calc_shifts(rerun_fr.box,fr->shift_vec);
+ +        }
+ +    }
+ +
+ +    /* loop over MD steps or if rerunMD to end of input trajectory */
+ +    bFirstStep = TRUE;
+ +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
+ +    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
+ +    bInitStep = bFirstStep && (bStateFromTPX || bVV);
+ +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
+ +    bLastStep    = FALSE;
+ +    bSumEkinhOld = FALSE;
+ +    bExchanged   = FALSE;
+ +
+ +    init_global_signals(&gs,cr,ir,repl_ex_nst);
+ +
+ +    step = ir->init_step;
+ +    step_rel = 0;
+ +
+ +    if (ir->nstlist == -1)
+ +    {
+ +        init_nlistheuristics(&nlh,bGStatEveryStep,step);
+ +    }
+ +
-         
++    if (MULTISIM(cr) && (repl_ex_nst <=0 ))
++    {
++        /* check how many steps are left in other sims */
++        multisim_nsteps=get_multisim_nsteps(cr, ir->nsteps);
++    }
++
++
++    /* and stop now if we should */
++    bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) ||
++                 ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps )));
+ +    while (!bLastStep || (bRerunMD && bNotLastFrame)) {
+ +
+ +        wallcycle_start(wcycle,ewcSTEP);
+ +
+ +        GMX_MPE_LOG(ev_timestep1);
+ +
+ +        if (bRerunMD) {
+ +            if (rerun_fr.bStep) {
+ +                step = rerun_fr.step;
+ +                step_rel = step - ir->init_step;
+ +            }
+ +            if (rerun_fr.bTime) {
+ +                t = rerun_fr.time;
+ +            }
+ +            else
+ +            {
+ +                t = step;
+ +            }
+ +        } 
+ +        else 
+ +        {
+ +            bLastStep = (step_rel == ir->nsteps);
+ +            t = t0 + step*ir->delta_t;
+ +        }
+ +
+ +        if (ir->efep != efepNO)
+ +        {
+ +            if (bRerunMD && rerun_fr.bLambda && (ir->delta_lambda!=0))
+ +            {
+ +                state_global->lambda = rerun_fr.lambda;
+ +            }
+ +            else
+ +            {
+ +                state_global->lambda = lam0 + step*ir->delta_lambda;
+ +            }
+ +            state->lambda = state_global->lambda;
+ +            bDoDHDL = do_per_step(step,ir->nstdhdl);
+ +        }
+ +
+ +        if (bSimAnn) 
+ +        {
+ +            update_annealing_target_temp(&(ir->opts),t);
+ +        }
+ +
+ +        if (bRerunMD)
+ +        {
+ +            if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
+ +            {
+ +                for(i=0; i<state_global->natoms; i++)
+ +                {
+ +                    copy_rvec(rerun_fr.x[i],state_global->x[i]);
+ +                }
+ +                if (rerun_fr.bV)
+ +                {
+ +                    for(i=0; i<state_global->natoms; i++)
+ +                    {
+ +                        copy_rvec(rerun_fr.v[i],state_global->v[i]);
+ +                    }
+ +                }
+ +                else
+ +                {
+ +                    for(i=0; i<state_global->natoms; i++)
+ +                    {
+ +                        clear_rvec(state_global->v[i]);
+ +                    }
+ +                    if (bRerunWarnNoV)
+ +                    {
+ +                        fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
+ +                                "         Ekin, temperature and pressure are incorrect,\n"
+ +                                "         the virial will be incorrect when constraints are present.\n"
+ +                                "\n");
+ +                        bRerunWarnNoV = FALSE;
+ +                    }
+ +                }
+ +            }
+ +            copy_mat(rerun_fr.box,state_global->box);
+ +            copy_mat(state_global->box,state->box);
+ +
+ +            if (vsite && (Flags & MD_RERUN_VSITE))
+ +            {
+ +                if (DOMAINDECOMP(cr))
+ +                {
+ +                    gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
+ +                }
+ +                if (graph)
+ +                {
+ +                    /* Following is necessary because the graph may get out of sync
+ +                     * with the coordinates if we only have every N'th coordinate set
+ +                     */
+ +                    mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
+ +                    shift_self(graph,state->box,state->x);
+ +                }
+ +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
+ +                                 top->idef.iparams,top->idef.il,
+ +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +                if (graph)
+ +                {
+ +                    unshift_self(graph,state->box,state->x);
+ +                }
+ +            }
+ +        }
+ +
+ +        /* Stop Center of Mass motion */
+ +        bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
+ +
+ +        /* Copy back starting coordinates in case we're doing a forcefield scan */
+ +        if (bFFscan)
+ +        {
+ +            for(ii=0; (ii<state->natoms); ii++)
+ +            {
+ +                copy_rvec(xcopy[ii],state->x[ii]);
+ +                copy_rvec(vcopy[ii],state->v[ii]);
+ +            }
+ +            copy_mat(boxcopy,state->box);
+ +        }
+ +
+ +        if (bRerunMD)
+ +        {
+ +            /* for rerun MD always do Neighbour Searching */
+ +            bNS = (bFirstStep || ir->nstlist != 0);
+ +            bNStList = bNS;
+ +        }
+ +        else
+ +        {
+ +            /* Determine whether or not to do Neighbour Searching and LR */
+ +            bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
+ +            
+ +            bNS = (bFirstStep || bExchanged || bNStList ||
+ +                   (ir->nstlist == -1 && nlh.nabnsb > 0));
+ +
+ +            if (bNS && ir->nstlist == -1)
+ +            {
+ +                set_nlistheuristics(&nlh,bFirstStep || bExchanged,step);
+ +            }
+ +        } 
+ +
++        /* check whether we should stop because another simulation has 
++           stopped. */
++        if (MULTISIM(cr))
++        {
++            if ( (multisim_nsteps >= 0) &&  (step_rel >= multisim_nsteps)  &&  
++                 (multisim_nsteps != ir->nsteps) )  
++            {
++                if (bNS)
++                {
++                    if (MASTER(cr))
++                    {
++                        fprintf(stderr, 
++                                "Stopping simulation %d because another one has finished\n",
++                                cr->ms->sim);
++                    }
++                    bLastStep=TRUE;
++                    gs.sig[eglsCHKPT] = 1;
++                }
++            }
++        }
++
+ +        /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +        if ( (gs.set[eglsSTOPCOND] < 0 ) ||
+ +             ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
+ +        {
+ +            bLastStep = TRUE;
+ +        }
+ +
+ +        /* Determine whether or not to update the Born radii if doing GB */
+ +        bBornRadii=bFirstStep;
+ +        if (ir->implicit_solvent && (step % ir->nstgbradii==0))
+ +        {
+ +            bBornRadii=TRUE;
+ +        }
+ +        
+ +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
+ +        do_verbose = bVerbose &&
+ +                  (step % stepout == 0 || bFirstStep || bLastStep);
+ +
+ +        if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
+ +        {
+ +            if (bRerunMD)
+ +            {
+ +                bMasterState = TRUE;
+ +            }
+ +            else
+ +            {
+ +                bMasterState = FALSE;
+ +                /* Correct the new box if it is too skewed */
+ +                if (DYNAMIC_BOX(*ir))
+ +                {
+ +                    if (correct_box(fplog,step,state->box,graph))
+ +                    {
+ +                        bMasterState = TRUE;
+ +                    }
+ +                }
+ +                if (DOMAINDECOMP(cr) && bMasterState)
+ +                {
+ +                    dd_collect_state(cr->dd,state,state_global);
+ +                }
+ +            }
+ +
+ +            if (DOMAINDECOMP(cr))
+ +            {
+ +                /* Repartition the domain decomposition */
+ +                wallcycle_start(wcycle,ewcDOMDEC);
+ +                dd_partition_system(fplog,step,cr,
+ +                                    bMasterState,nstglobalcomm,
+ +                                    state_global,top_global,ir,
+ +                                    state,&f,mdatoms,top,fr,
+ +                                    vsite,shellfc,constr,
+ +                                    nrnb,wcycle,do_verbose);
+ +                wallcycle_stop(wcycle,ewcDOMDEC);
+ +                /* If using an iterative integrator, reallocate space to match the decomposition */
+ +            }
+ +        }
+ +
+ +        if (MASTER(cr) && do_log && !bFFscan)
+ +        {
+ +            print_ebin_header(fplog,step,t,state->lambda);
+ +        }
+ +
+ +        if (ir->efep != efepNO)
+ +        {
+ +            update_mdatoms(mdatoms,state->lambda); 
+ +        }
+ +
+ +        if (bRerunMD && rerun_fr.bV)
+ +        {
+ +            
+ +            /* We need the kinetic energy at minus the half step for determining
+ +             * the full step kinetic energy and possibly for T-coupling.*/
+ +            /* This may not be quite working correctly yet . . . . */
+ +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                            wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
+ +                            constr,NULL,FALSE,state->box,
+ +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                            CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
+ +        }
+ +        clear_mat(force_vir);
+ +        
+ +        /* Ionize the atoms if necessary */
+ +        if (bIonize)
+ +        {
+ +            ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
+ +                   mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
+ +        }
+ +        
+ +        /* Update force field in ffscan program */
+ +        if (bFFscan)
+ +        {
+ +            if (update_forcefield(fplog,
+ +                                  nfile,fnm,fr,
+ +                                  mdatoms->nr,state->x,state->box)) {
+ +                if (gmx_parallel_env_initialized())
+ +                {
+ +                    gmx_finalize();
+ +                }
+ +                exit(0);
+ +            }
+ +        }
+ +
+ +        GMX_MPE_LOG(ev_timestep2);
+ +
+ +        /* We write a checkpoint at this MD step when:
+ +         * either at an NS step when we signalled through gs,
+ +         * or at the last step (but not when we do not want confout),
+ +         * but never at the first step or with rerun.
+ +         */
+ +        bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) ||
+ +                 (bLastStep && (Flags & MD_CONFOUT))) &&
+ +                step > ir->init_step && !bRerunMD);
+ +        if (bCPT)
+ +        {
+ +            gs.set[eglsCHKPT] = 0;
+ +        }
+ +
+ +        /* Determine the energy and pressure:
+ +         * at nstcalcenergy steps and at energy output steps (set below).
+ +         */
+ +        bNstEner = do_per_step(step,ir->nstcalcenergy);
+ +        bCalcEnerPres =
+ +            (bNstEner ||
+ +             (ir->epc != epcNO && do_per_step(step,ir->nstpcouple)));
+ +
+ +        /* Do we need global communication ? */
+ +        bGStat = (bCalcEnerPres || bStopCM ||
+ +                  do_per_step(step,nstglobalcomm) ||
+ +                  (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
+ +
+ +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
+ +
+ +        if (do_ene || do_log)
+ +        {
+ +            bCalcEnerPres = TRUE;
+ +            bGStat        = TRUE;
+ +        }
+ +        
+ +        /* these CGLO_ options remain the same throughout the iteration */
+ +        cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
+ +                      (bStopCM ? CGLO_STOPCM : 0) |
+ +                      (bGStat ? CGLO_GSTAT : 0)
+ +            );
+ +        
+ +        force_flags = (GMX_FORCE_STATECHANGED |
+ +                       ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
+ +                       GMX_FORCE_ALLFORCES |
+ +                       (bNStList ? GMX_FORCE_DOLR : 0) |
+ +                       GMX_FORCE_SEPLRF |
+ +                       (bCalcEnerPres ? GMX_FORCE_VIRIAL : 0) |
+ +                       (bDoDHDL ? GMX_FORCE_DHDL : 0)
+ +            );
+ +        
+ +        if (shellfc)
+ +        {
+ +            /* Now is the time to relax the shells */
+ +            count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
+ +                                      ir,bNS,force_flags,
+ +                                      bStopCM,top,top_global,
+ +                                      constr,enerd,fcd,
+ +                                      state,f,force_vir,mdatoms,
+ +                                      nrnb,wcycle,graph,groups,
+ +                                      shellfc,fr,bBornRadii,t,mu_tot,
+ +                                      state->natoms,&bConverged,vsite,
+ +                                      outf->fp_field);
+ +            tcount+=count;
+ +
+ +            if (bConverged)
+ +            {
+ +                nconverged++;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            /* The coordinates (x) are shifted (to get whole molecules)
+ +             * in do_force.
+ +             * This is parallellized as well, and does communication too. 
+ +             * Check comments in sim_util.c
+ +             */
+ +        
+ +            do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
+ +                     state->box,state->x,&state->hist,
+ +                     f,force_vir,mdatoms,enerd,fcd,
+ +                     state->lambda,graph,
+ +                     fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
+ +                     (bNS ? GMX_FORCE_NS : 0) | force_flags);
+ +        }
+ +    
+ +        GMX_BARRIER(cr->mpi_comm_mygroup);
+ +        
+ +        if (bTCR)
+ +        {
+ +            mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
+ +                                   mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
+ +        }
+ +        
+ +        if (bTCR && bFirstStep)
+ +        {
+ +            tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
+ +            fprintf(fplog,"Done init_coupling\n"); 
+ +            fflush(fplog);
+ +        }
+ +        
+ +        if (bVV && !bStartingFromCpt && !bRerunMD)
+ +        /*  ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
+ +        {
+ +            if (ir->eI==eiVV && bInitStep) 
+ +            {
+ +                /* if using velocity verlet with full time step Ekin,
+ +                 * take the first half step only to compute the 
+ +                 * virial for the first step. From there,
+ +                 * revert back to the initial coordinates
+ +                 * so that the input is actually the initial step.
+ +                 */
+ +                copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
+ +            } else {
+ +                /* this is for NHC in the Ekin(t+dt/2) version of vv */
+ +                trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ1);            
+ +            }
+ +
+ +            update_coords(fplog,step,ir,mdatoms,state,
+ +                          f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                          ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
+ +                          cr,nrnb,constr,&top->idef);
+ +            
+ +            if (bIterations)
+ +            {
+ +                gmx_iterate_init(&iterate,bIterations && !bInitStep);
+ +            }
+ +            /* for iterations, we save these vectors, as we will be self-consistently iterating
+ +               the calculations */
+ +
+ +            /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
+ +            
+ +            /* save the state */
+ +            if (bIterations && iterate.bIterate) { 
+ +                copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+ +            }
+ +            
+ +            bFirstIterate = TRUE;
+ +            while (bFirstIterate || (bIterations && iterate.bIterate))
+ +            {
+ +                if (bIterations && iterate.bIterate) 
+ +                {
+ +                    copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
+ +                    if (bFirstIterate && bTrotter) 
+ +                    {
+ +                        /* The first time through, we need a decent first estimate
+ +                           of veta(t+dt) to compute the constraints.  Do
+ +                           this by computing the box volume part of the
+ +                           trotter integration at this time. Nothing else
+ +                           should be changed by this routine here.  If
+ +                           !(first time), we start with the previous value
+ +                           of veta.  */
+ +                        
+ +                        veta_save = state->veta;
+ +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
+ +                        vetanew = state->veta;
+ +                        state->veta = veta_save;
+ +                    } 
+ +                } 
+ +                
+ +                bOK = TRUE;
+ +                if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
+ +                    dvdl = 0;
+ +                    
+ +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
+ +                                       &top->idef,shake_vir,NULL,
+ +                                       cr,nrnb,wcycle,upd,constr,
+ +                                       bInitStep,TRUE,bCalcEnerPres,vetanew);
+ +                    
+ +                    if (!bOK && !bFFscan)
+ +                    {
+ +                        gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+ +                    }
+ +                    
+ +                } 
+ +                else if (graph)
+ +                { /* Need to unshift here if a do_force has been
+ +                     called in the previous step */
+ +                    unshift_self(graph,state->box,state->x);
+ +                }
+ +                
+ +                
+ +                /* if VV, compute the pressure and constraints */
+ +                /* For VV2, we strictly only need this if using pressure
+ +                 * control, but we really would like to have accurate pressures
+ +                 * printed out.
+ +                 * Think about ways around this in the future?
+ +                 * For now, keep this choice in comments.
+ +                 */
+ +                /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */
+ +                    /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/
+ +                bPres = TRUE;
+ +                bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK));
+ +                compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                                wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                                constr,NULL,FALSE,state->box,
+ +                                top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                                cglo_flags 
+ +                                | CGLO_ENERGY 
+ +                                | (bTemp ? CGLO_TEMPERATURE:0) 
+ +                                | (bPres ? CGLO_PRESSURE : 0) 
+ +                                | (bPres ? CGLO_CONSTRAINT : 0)
+ +                                | ((bIterations && iterate.bIterate) ? CGLO_ITERATE : 0)  
+ +                                | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
+ +                                | CGLO_SCALEEKIN 
+ +                    );
+ +                /* explanation of above: 
+ +                   a) We compute Ekin at the full time step
+ +                   if 1) we are using the AveVel Ekin, and it's not the
+ +                   initial step, or 2) if we are using AveEkin, but need the full
+ +                   time step kinetic energy for the pressure (always true now, since we want accurate statistics).
+ +                   b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in 
+ +                   EkinAveVel because it's needed for the pressure */
+ +                
+ +                /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
+ +                if (!bInitStep) 
+ +                {
+ +                    if (bTrotter)
+ +                    {
+ +                        trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
+ +                    } 
+ +                    else 
+ +                    {
+ +                        update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
+ +                    }
+ +                }
+ +                
+ +                if (bIterations &&
+ +                    done_iterating(cr,fplog,step,&iterate,bFirstIterate,
+ +                                   state->veta,&vetanew)) 
+ +                {
+ +                    break;
+ +                }
+ +                bFirstIterate = FALSE;
+ +            }
+ +
+ +            if (bTrotter && !bInitStep) {
+ +                copy_mat(shake_vir,state->svir_prev);
+ +                copy_mat(force_vir,state->fvir_prev);
+ +                if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
+ +                    /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
+ +                    enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
+ +                    enerd->term[F_EKIN] = trace(ekind->ekin);
+ +                }
+ +            }
+ +            /* if it's the initial step, we performed this first step just to get the constraint virial */
+ +            if (bInitStep && ir->eI==eiVV) {
+ +                copy_rvecn(cbuf,state->v,0,state->natoms);
+ +            }
+ +            
+ +            if (fr->bSepDVDL && fplog && do_log) 
+ +            {
+ +                fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
+ +            }
+ +            enerd->term[F_DHDL_CON] += dvdl;
+ +            
+ +            GMX_MPE_LOG(ev_timestep1);
+ +        }
+ +    
+ +        /* MRS -- now done iterating -- compute the conserved quantity */
+ +        if (bVV) {
+ +            saved_conserved_quantity = compute_conserved_from_auxiliary(ir,state,&MassQ);
+ +            if (ir->eI==eiVV) 
+ +            {
+ +                last_ekin = enerd->term[F_EKIN]; /* does this get preserved through checkpointing? */
+ +            }
+ +            if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) 
+ +            {
+ +                saved_conserved_quantity -= enerd->term[F_DISPCORR];
+ +            }
+ +        }
+ +        
+ +        /* ########  END FIRST UPDATE STEP  ############## */
+ +        /* ########  If doing VV, we now have v(dt) ###### */
+ +        
+ +        /* ################## START TRAJECTORY OUTPUT ################# */
+ +        
+ +        /* Now we have the energies and forces corresponding to the 
+ +         * coordinates at time t. We must output all of this before
+ +         * the update.
+ +         * for RerunMD t is read from input trajectory
+ +         */
+ +        GMX_MPE_LOG(ev_output_start);
+ +
+ +        mdof_flags = 0;
+ +        if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
+ +        if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
+ +        if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
+ +        if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
+ +        if (bCPT) { mdof_flags |= MDOF_CPT; };
+ +
+ +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP)
+ +        if (bLastStep)
+ +        {
+ +            /* Enforce writing positions and velocities at end of run */
+ +            mdof_flags |= (MDOF_X | MDOF_V);
+ +        }
+ +#endif
+ +#ifdef GMX_FAHCORE
+ +        if (MASTER(cr))
+ +            fcReportProgress( ir->nsteps, step );
+ +
+ +        /* sync bCPT and fc record-keeping */
+ +        if (bCPT && MASTER(cr))
+ +            fcRequestCheckPoint();
+ +#endif
+ +        
+ +        if (mdof_flags != 0)
+ +        {
+ +            wallcycle_start(wcycle,ewcTRAJ);
+ +            if (bCPT)
+ +            {
+ +                if (state->flags & (1<<estLD_RNG))
+ +                {
+ +                    get_stochd_state(upd,state);
+ +                }
+ +                if (MASTER(cr))
+ +                {
+ +                    if (bSumEkinhOld)
+ +                    {
+ +                        state_global->ekinstate.bUpToDate = FALSE;
+ +                    }
+ +                    else
+ +                    {
+ +                        update_ekinstate(&state_global->ekinstate,ekind);
+ +                        state_global->ekinstate.bUpToDate = TRUE;
+ +                    }
+ +                    update_energyhistory(&state_global->enerhist,mdebin);
+ +                }
+ +            }
+ +            write_traj(fplog,cr,outf,mdof_flags,top_global,
+ +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
+ +            if (bCPT)
+ +            {
+ +                nchkpt++;
+ +                bCPT = FALSE;
+ +            }
+ +            debug_gmx();
+ +            if (bLastStep && step_rel == ir->nsteps &&
+ +                (Flags & MD_CONFOUT) && MASTER(cr) &&
+ +                !bRerunMD && !bFFscan)
+ +            {
+ +                /* x and v have been collected in write_traj,
+ +                 * because a checkpoint file will always be written
+ +                 * at the last step.
+ +                 */
+ +                fprintf(stderr,"\nWriting final coordinates.\n");
+ +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols &&
+ +                    DOMAINDECOMP(cr))
+ +                {
+ +                    /* Make molecules whole only for confout writing */
+ +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
+ +                }
+ +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
+ +                                    *top_global->name,top_global,
+ +                                    state_global->x,state_global->v,
+ +                                    ir->ePBC,state->box);
+ +                debug_gmx();
+ +            }
+ +            wallcycle_stop(wcycle,ewcTRAJ);
+ +        }
+ +        GMX_MPE_LOG(ev_output_finish);
+ +        
+ +        /* kludge -- virial is lost with restart for NPT control. Must restart */
+ +        if (bStartingFromCpt && bVV) 
+ +        {
+ +            copy_mat(state->svir_prev,shake_vir);
+ +            copy_mat(state->fvir_prev,force_vir);
+ +        }
+ +        /*  ################## END TRAJECTORY OUTPUT ################ */
+ +        
+ +        /* Determine the wallclock run time up till now */
+ +        run_time = gmx_gettime() - (double)runtime->real;
+ +
+ +        /* Check whether everything is still allright */    
+ +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
+ +#ifdef GMX_THREADS
+ +            && MASTER(cr)
+ +#endif
+ +            )
+ +        {
+ +            /* this is just make gs.sig compatible with the hack 
+ +               of sending signals around by MPI_Reduce with together with
+ +               other floats */
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
+ +                gs.sig[eglsSTOPCOND]=1;
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
+ +                gs.sig[eglsSTOPCOND]=-1;
+ +            /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,
+ +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                        gmx_get_signal_name(),
+ +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +                fflush(fplog);
+ +            }
+ +            fprintf(stderr,
+ +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                    gmx_get_signal_name(),
+ +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +            fflush(stderr);
+ +            handled_stop_condition=(int)gmx_get_stop_condition();
+ +        }
+ +        else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
+ +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
+ +                 gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
+ +        {
+ +            /* Signal to terminate the run */
+ +            gs.sig[eglsSTOPCOND] = 1;
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +            }
+ +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +        }
-                             bFirstIterate ? &gs : NULL,(step % gs.nstms == 0),
++
+ +        if (bResetCountersHalfMaxH && MASTER(cr) &&
+ +            run_time > max_hours*60.0*60.0*0.495)
+ +        {
+ +            gs.sig[eglsRESETCOUNTERS] = 1;
+ +        }
+ +
+ +        if (ir->nstlist == -1 && !bRerunMD)
+ +        {
+ +            /* When bGStatEveryStep=FALSE, global_stat is only called
+ +             * when we check the atom displacements, not at NS steps.
+ +             * This means that also the bonded interaction count check is not
+ +             * performed immediately after NS. Therefore a few MD steps could
+ +             * be performed with missing interactions.
+ +             * But wrong energies are never written to file,
+ +             * since energies are only written after global_stat
+ +             * has been called.
+ +             */
+ +            if (step >= nlh.step_nscheck)
+ +            {
+ +                nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
+ +                                                     nlh.scale_tot,state->x);
+ +            }
+ +            else
+ +            {
+ +                /* This is not necessarily true,
+ +                 * but step_nscheck is determined quite conservatively.
+ +                 */
+ +                nlh.nabnsb = 0;
+ +            }
+ +        }
+ +
+ +        /* In parallel we only have to check for checkpointing in steps
+ +         * where we do global communication,
+ +         *  otherwise the other nodes don't know.
+ +         */
+ +        if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
+ +                           cpt_period >= 0 &&
+ +                           (cpt_period == 0 || 
+ +                            run_time >= nchkpt*cpt_period*60.0)) &&
+ +            gs.set[eglsCHKPT] == 0)
+ +        {
+ +            gs.sig[eglsCHKPT] = 1;
+ +        }
+ +  
+ +        if (bIterations)
+ +        {
+ +            gmx_iterate_init(&iterate,bIterations);
+ +        }
+ +    
+ +        /* for iterations, we save these vectors, as we will be redoing the calculations */
+ +        if (bIterations && iterate.bIterate) 
+ +        {
+ +            copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+ +        }
+ +        bFirstIterate = TRUE;
+ +        while (bFirstIterate || (bIterations && iterate.bIterate))
+ +        {
+ +            /* We now restore these vectors to redo the calculation with improved extended variables */    
+ +            if (bIterations) 
+ +            { 
+ +                copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
+ +            }
+ +
+ +            /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
+ +               so scroll down for that logic */
+ +            
+ +            /* #########   START SECOND UPDATE STEP ################# */
+ +            GMX_MPE_LOG(ev_update_start);
+ +            /* Box is changed in update() when we do pressure coupling,
+ +             * but we should still use the old box for energy corrections and when
+ +             * writing it to the energy file, so it matches the trajectory files for
+ +             * the same timestep above. Make a copy in a separate array.
+ +             */
+ +            copy_mat(state->box,lastbox);
+ +
+ +            bOK = TRUE;
+ +            if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate))
+ +            {
+ +                wallcycle_start(wcycle,ewcUPDATE);
+ +                dvdl = 0;
+ +                /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
+ +                if (bTrotter) 
+ +                {
+ +                    if (bIterations && iterate.bIterate) 
+ +                    {
+ +                        if (bFirstIterate) 
+ +                        {
+ +                            scalevir = 1;
+ +                        }
+ +                        else 
+ +                        {
+ +                            /* we use a new value of scalevir to converge the iterations faster */
+ +                            scalevir = tracevir/trace(shake_vir);
+ +                        }
+ +                        msmul(shake_vir,scalevir,shake_vir); 
+ +                        m_add(force_vir,shake_vir,total_vir);
+ +                        clear_mat(shake_vir);
+ +                    }
+ +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ3);
+ +                /* We can only do Berendsen coupling after we have summed
+ +                 * the kinetic energy or virial. Since the happens
+ +                 * in global_state after update, we should only do it at
+ +                 * step % nstlist = 1 with bGStatEveryStep=FALSE.
+ +                 */
+ +                }
+ +                else 
+ +                {
+ +                    update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
+ +                    update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
+ +                                   upd,bInitStep);
+ +                }
+ +
+ +                if (bVV)
+ +                {
+ +                    /* velocity half-step update */
+ +                    update_coords(fplog,step,ir,mdatoms,state,f,
+ +                                  fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                                  ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,
+ +                                  cr,nrnb,constr,&top->idef);
+ +                }
+ +
+ +                /* Above, initialize just copies ekinh into ekin,
+ +                 * it doesn't copy position (for VV),
+ +                 * and entire integrator for MD.
+ +                 */
+ +                
+ +                if (ir->eI==eiVVAK) 
+ +                {
+ +                    copy_rvecn(state->x,cbuf,0,state->natoms);
+ +                }
+ +                
+ +                update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                              ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
+ +                wallcycle_stop(wcycle,ewcUPDATE);
+ +
+ +                update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
+ +                                   &top->idef,shake_vir,force_vir,
+ +                                   cr,nrnb,wcycle,upd,constr,
+ +                                   bInitStep,FALSE,bCalcEnerPres,state->veta);  
+ +                
+ +                if (ir->eI==eiVVAK) 
+ +                {
+ +                    /* erase F_EKIN and F_TEMP here? */
+ +                    /* just compute the kinetic energy at the half step to perform a trotter step */
+ +                    compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                                    wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                                    constr,NULL,FALSE,lastbox,
+ +                                    top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                                    cglo_flags | CGLO_TEMPERATURE    
+ +                        );
+ +                    wallcycle_start(wcycle,ewcUPDATE);
+ +                    trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ4);            
+ +                    /* now we know the scaling, we can compute the positions again again */
+ +                    copy_rvecn(cbuf,state->x,0,state->natoms);
+ +
+ +                    update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+ +                                  ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
+ +                    wallcycle_stop(wcycle,ewcUPDATE);
+ +
+ +                    /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
+ +                    /* are the small terms in the shake_vir here due
+ +                     * to numerical errors, or are they important
+ +                     * physically? I'm thinking they are just errors, but not completely sure. 
+ +                     * For now, will call without actually constraining, constr=NULL*/
+ +                    update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
+ +                                       &top->idef,tmp_vir,force_vir,
+ +                                       cr,nrnb,wcycle,upd,NULL,
+ +                                       bInitStep,FALSE,bCalcEnerPres,
+ +                                       state->veta);  
+ +                }
+ +                if (!bOK && !bFFscan) 
+ +                {
+ +                    gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+ +                }
+ +                
+ +                if (fr->bSepDVDL && fplog && do_log) 
+ +                {
+ +                    fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
+ +                }
+ +                enerd->term[F_DHDL_CON] += dvdl;
+ +            } 
+ +            else if (graph) 
+ +            {
+ +                /* Need to unshift here */
+ +                unshift_self(graph,state->box,state->x);
+ +            }
+ +            
+ +            GMX_BARRIER(cr->mpi_comm_mygroup);
+ +            GMX_MPE_LOG(ev_update_finish);
+ +
+ +            if (vsite != NULL) 
+ +            {
+ +                wallcycle_start(wcycle,ewcVSITECONSTR);
+ +                if (graph != NULL) 
+ +                {
+ +                    shift_self(graph,state->box,state->x);
+ +                }
+ +                construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
+ +                                 top->idef.iparams,top->idef.il,
+ +                                 fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +                
+ +                if (graph != NULL) 
+ +                {
+ +                    unshift_self(graph,state->box,state->x);
+ +                }
+ +                wallcycle_stop(wcycle,ewcVSITECONSTR);
+ +            }
+ +            
+ +            /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
+ +            if (ir->nstlist == -1 && bFirstIterate)
+ +            {
+ +                gs.sig[eglsNABNSB] = nlh.nabnsb;
+ +            }
+ +            compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+ +                            wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+ +                            constr,
++                            bFirstIterate ? &gs : NULL, 
++                            (step_rel % gs.nstms == 0) && 
++                                (multisim_nsteps<0 || (step_rel<multisim_nsteps)),
+ +                            lastbox,
+ +                            top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+ +                            cglo_flags 
+ +                            | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0) 
+ +                            | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) 
+ +                            | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) 
+ +                            | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0) 
+ +                            | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
+ +                            | CGLO_CONSTRAINT 
+ +                );
+ +            if (ir->nstlist == -1 && bFirstIterate)
+ +            {
+ +                nlh.nabnsb = gs.set[eglsNABNSB];
+ +                gs.set[eglsNABNSB] = 0;
+ +            }
+ +            /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
+ +            /* #############  END CALC EKIN AND PRESSURE ################# */
+ +        
+ +            /* Note: this is OK, but there are some numerical precision issues with using the convergence of
+ +               the virial that should probably be addressed eventually. state->veta has better properies,
+ +               but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
+ +               generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
+ +
+ +            if (bIterations && 
+ +                done_iterating(cr,fplog,step,&iterate,bFirstIterate,
+ +                               trace(shake_vir),&tracevir)) 
+ +            {
+ +                break;
+ +            }
+ +            bFirstIterate = FALSE;
+ +        }
+ +
+ +        update_box(fplog,step,ir,mdatoms,state,graph,f,
+ +                   ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
+ +        
+ +        /* ################# END UPDATE STEP 2 ################# */
+ +        /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
+ +    
+ +        /* The coordinates (x) were unshifted in update */
+ +        if (bFFscan && (shellfc==NULL || bConverged))
+ +        {
+ +            if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
+ +                                 f,NULL,xcopy,
+ +                                 &(top_global->mols),mdatoms->massT,pres))
+ +            {
+ +                if (gmx_parallel_env_initialized())
+ +                {
+ +                    gmx_finalize();
+ +                }
+ +                fprintf(stderr,"\n");
+ +                exit(0);
+ +            }
+ +        }
+ +        if (!bGStat)
+ +        {
+ +            /* We will not sum ekinh_old,                                                            
+ +             * so signal that we still have to do it.                                                
+ +             */
+ +            bSumEkinhOld = TRUE;
+ +        }
+ +        
+ +        if (bTCR)
+ +        {
+ +            /* Only do GCT when the relaxation of shells (minimization) has converged,
+ +             * otherwise we might be coupling to bogus energies. 
+ +             * In parallel we must always do this, because the other sims might
+ +             * update the FF.
+ +             */
+ +
+ +            /* Since this is called with the new coordinates state->x, I assume
+ +             * we want the new box state->box too. / EL 20040121
+ +             */
+ +            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
+ +                        ir,MASTER(cr),
+ +                        mdatoms,&(top->idef),mu_aver,
+ +                        top_global->mols.nr,cr,
+ +                        state->box,total_vir,pres,
+ +                        mu_tot,state->x,f,bConverged);
+ +            debug_gmx();
+ +        }
+ +
+ +        /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
+ +        
+ +        /* sum up the foreign energy and dhdl terms */
+ +        sum_dhdl(enerd,state->lambda,ir);
+ +
+ +        /* use the directly determined last velocity, not actually the averaged half steps */
+ +        if (bTrotter && ir->eI==eiVV) 
+ +        {
+ +            enerd->term[F_EKIN] = last_ekin;
+ +        }
+ +        enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
+ +        
+ +        if (bVV)
+ +        {
+ +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
+ +        }
+ +        else 
+ +        {
+ +            enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir,state,&MassQ);
+ +        }
+ +        /* Check for excessively large energies */
+ +        if (bIonize) 
+ +        {
+ +#ifdef GMX_DOUBLE
+ +            real etot_max = 1e200;
+ +#else
+ +            real etot_max = 1e30;
+ +#endif
+ +            if (fabs(enerd->term[F_ETOT]) > etot_max) 
+ +            {
+ +                fprintf(stderr,"Energy too large (%g), giving up\n",
+ +                        enerd->term[F_ETOT]);
+ +            }
+ +        }
+ +        /* #########  END PREPARING EDR OUTPUT  ###########  */
+ +        
+ +        /* Time for performance */
+ +        if (((step % stepout) == 0) || bLastStep) 
+ +        {
+ +            runtime_upd_proc(runtime);
+ +        }
+ +        
+ +        /* Output stuff */
+ +        if (MASTER(cr))
+ +        {
+ +            gmx_bool do_dr,do_or;
+ +            
+ +            if (!(bStartingFromCpt && (EI_VV(ir->eI)))) 
+ +            {
+ +                if (bNstEner)
+ +                {
+ +                    upd_mdebin(mdebin,bDoDHDL, TRUE,
+ +                               t,mdatoms->tmass,enerd,state,lastbox,
+ +                               shake_vir,force_vir,total_vir,pres,
+ +                               ekind,mu_tot,constr);
+ +                }
+ +                else
+ +                {
+ +                    upd_mdebin_step(mdebin);
+ +                }
+ +                
+ +                do_dr  = do_per_step(step,ir->nstdisreout);
+ +                do_or  = do_per_step(step,ir->nstorireout);
+ +                
+ +                print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
+ +                           step,t,
+ +                           eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
+ +            }
+ +            if (ir->ePull != epullNO)
+ +            {
+ +                pull_print_output(ir->pull,step,t);
+ +            }
+ +            
+ +            if (do_per_step(step,ir->nstlog))
+ +            {
+ +                if(fflush(fplog) != 0)
+ +                {
+ +                    gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?");
+ +                }
+ +            }
+ +        }
+ +
+ +
+ +        /* Remaining runtime */
+ +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
+ +        {
+ +            if (shellfc) 
+ +            {
+ +                fprintf(stderr,"\n");
+ +            }
+ +            print_time(stderr,runtime,step,ir,cr);
+ +        }
+ +
+ +        /* Replica exchange */
+ +        bExchanged = FALSE;
+ +        if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
+ +            do_per_step(step,repl_ex_nst)) 
+ +        {
+ +            bExchanged = replica_exchange(fplog,cr,repl_ex,
+ +                                          state_global,enerd->term,
+ +                                          state,step,t);
+ +
+ +            if (bExchanged && DOMAINDECOMP(cr)) 
+ +            {
+ +                dd_partition_system(fplog,step,cr,TRUE,1,
+ +                                    state_global,top_global,ir,
+ +                                    state,&f,mdatoms,top,fr,
+ +                                    vsite,shellfc,constr,
+ +                                    nrnb,wcycle,FALSE);
+ +            }
+ +        }
+ +        
+ +        bFirstStep = FALSE;
+ +        bInitStep = FALSE;
+ +        bStartingFromCpt = FALSE;
+ +
+ +        /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
+ +        /* With all integrators, except VV, we need to retain the pressure
+ +         * at the current step for coupling at the next step.
+ +         */
+ +        if ((state->flags & (1<<estPRES_PREV)) &&
+ +            (bGStatEveryStep ||
+ +             (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
+ +        {
+ +            /* Store the pressure in t_state for pressure coupling
+ +             * at the next MD step.
+ +             */
+ +            copy_mat(pres,state->pres_prev);
+ +        }
+ +        
+ +        /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
+ +
+ +        if ( (membed!=NULL) && (!bLastStep) )
+ +            rescale_membed(step_rel,membed,state_global->x);
+ +        
+ +        if (bRerunMD) 
+ +        {
+ +            if (MASTER(cr))
+ +            {
+ +                /* read next frame from input trajectory */
+ +                bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
+ +            }
+ +
+ +            if (PAR(cr))
+ +            {
+ +                rerun_parallel_comm(cr,&rerun_fr,&bNotLastFrame);
+ +            }
+ +        }
+ +        
+ +        if (!bRerunMD || !rerun_fr.bStep)
+ +        {
+ +            /* increase the MD step number */
+ +            step++;
+ +            step_rel++;
+ +        }
+ +        
+ +        cycles = wallcycle_stop(wcycle,ewcSTEP);
+ +        if (DOMAINDECOMP(cr) && wcycle)
+ +        {
+ +            dd_cycles_add(cr->dd,cycles,ddCyclStep);
+ +        }
+ +        
+ +        if (step_rel == wcycle_get_reset_counters(wcycle) ||
+ +            gs.set[eglsRESETCOUNTERS] != 0)
+ +        {
+ +            /* Reset all the counters related to performance over the run */
+ +            reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime);
+ +            wcycle_set_reset_counters(wcycle,-1);
+ +            /* Correct max_hours for the elapsed time */
+ +            max_hours -= run_time/(60.0*60.0);
+ +            bResetCountersHalfMaxH = FALSE;
+ +            gs.set[eglsRESETCOUNTERS] = 0;
+ +        }
++
+ +    }
+ +    /* End of main MD loop */
+ +    debug_gmx();
+ +    
+ +    /* Stop the time */
+ +    runtime_end(runtime);
+ +    
+ +    if (bRerunMD && MASTER(cr))
+ +    {
+ +        close_trj(status);
+ +    }
+ +    
+ +    if (!(cr->duty & DUTY_PME))
+ +    {
+ +        /* Tell the PME only node to finish */
+ +        gmx_pme_finish(cr);
+ +    }
+ +    
+ +    if (MASTER(cr))
+ +    {
+ +        if (ir->nstcalcenergy > 0 && !bRerunMD) 
+ +        {
+ +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
+ +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
+ +        }
+ +    }
+ +
+ +    done_mdoutf(outf);
+ +
+ +    debug_gmx();
+ +
+ +    if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
+ +    {
+ +        fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
+ +        fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
+ +    }
+ +    
+ +    if (shellfc && fplog)
+ +    {
+ +        fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
+ +                (nconverged*100.0)/step_rel);
+ +        fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
+ +                tcount/step_rel);
+ +    }
+ +    
+ +    if (repl_ex_nst > 0 && MASTER(cr))
+ +    {
+ +        print_replica_exchange_statistics(fplog,repl_ex);
+ +    }
+ +    
+ +    runtime->nsteps_done = step_rel;
+ +    
+ +    return 0;
+ +}
diff --cc src/programs/mdrun/md_openmm.c

index 6043d8feba0d6958fac3e3be92ca948ff9c967b9,0000000000000000000000000000000000000000..87fd3dc6141c95bedb928a4833370a7bbafb1086

mode 100644,000000..100644
--- 1/src/programs/mdrun/md_openmm.c
--- /dev/null
+++ b/src/programs/mdrun/md_openmm.c
@@@ -1,686 -1,0 +1,587 @@@
- /* simulation conditions to transmit */
- enum { eglsNABNSB, eglsCHKPT, eglsSTOPCOND, eglsRESETCOUNTERS, eglsNR };
- 
- typedef struct
- {
-     int nstms;       /* The frequency for intersimulation communication */
-     int sig[eglsNR]; /* The signal set by one process in do_md */
-     int set[eglsNR]; /* The communicated signal, equal for all processes */
- } globsig_t;
- 
- 
- static int multisim_min(const gmx_multisim_t *ms,int nmin,int n)
- {
-     int  *buf;
-     gmx_bool bPos,bEqual;
-     int  s,d;
- 
-     snew(buf,ms->nsim);
-     buf[ms->sim] = n;
-     gmx_sumi_sim(ms->nsim,buf,ms);
-     bPos   = TRUE;
-     bEqual = TRUE;
-     for (s=0; s<ms->nsim; s++)
-     {
-         bPos   = bPos   && (buf[s] > 0);
-         bEqual = bEqual && (buf[s] == buf[0]);
-     }
-     if (bPos)
-     {
-         if (bEqual)
-         {
-             nmin = min(nmin,buf[0]);
-         }
-         else
-         {
-             /* Find the least common multiple */
-             for (d=2; d<nmin; d++)
-             {
-                 s = 0;
-                 while (s < ms->nsim && d % buf[s] == 0)
-                 {
-                     s++;
-                 }
-                 if (s == ms->nsim)
-                 {
-                     /* We found the LCM and it is less than nmin */
-                     nmin = d;
-                     break;
-                 }
-             }
-         }
-     }
-     sfree(buf);
- 
-     return nmin;
- }
- 
- static int multisim_nstsimsync(const t_commrec *cr,
-                                const t_inputrec *ir,int repl_ex_nst)
- {
-     int nmin;
- 
-     if (MASTER(cr))
-     {
-         nmin = INT_MAX;
-         nmin = multisim_min(cr->ms,nmin,ir->nstlist);
-         nmin = multisim_min(cr->ms,nmin,ir->nstcalcenergy);
-         nmin = multisim_min(cr->ms,nmin,repl_ex_nst);
-         if (nmin == INT_MAX)
-         {
-             gmx_fatal(FARGS,"Can not find an appropriate interval for inter-simulation communication, since nstlist, nstcalcenergy and -replex are all <= 0");
-         }
-         /* Avoid inter-simulation communication at every (second) step */
-         if (nmin <= 2)
-         {
-             nmin = 10;
-         }
-     }
- 
-     gmx_bcast(sizeof(int),&nmin,cr);
- 
-     return nmin;
- }
- 
- static void init_global_signals(globsig_t *gs,const t_commrec *cr,
-                                 const t_inputrec *ir,int repl_ex_nst)
- {
-     int i;
- 
-     gs->nstms = 1;
- 
-     for (i=0; i<eglsNR; i++)
-     {
-         gs->sig[i] = 0;
-         gs->set[i] = 0;
-     }
- }
- 
- 
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2010, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <signal.h>
+ +#include <stdlib.h>
+ +
+ +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
+ +/* _isnan() */
+ +#include <float.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "vec.h"
+ +#include "statutil.h"
+ +#include "vcm.h"
+ +#include "mdebin.h"
+ +#include "nrnb.h"
+ +#include "calcmu.h"
+ +#include "index.h"
+ +#include "vsite.h"
+ +#include "update.h"
+ +#include "ns.h"
+ +#include "trnio.h"
+ +#include "xtcio.h"
+ +#include "mdrun.h"
+ +#include "confio.h"
+ +#include "network.h"
+ +#include "pull.h"
+ +#include "xvgr.h"
+ +#include "physics.h"
+ +#include "names.h"
+ +#include "xmdrun.h"
+ +#include "ionize.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "dihre.h"
+ +#include "pppm.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "qmmm.h"
+ +#include "mpelogging.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "topsort.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "compute_io.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "genborn.h"
+ +#include "string2.h"
+ +#include "copyrite.h"
+ +#include "membed.h"
+ +
+ +#ifdef GMX_THREADS
+ +#include "tmpi.h"
+ +#endif
+ +
+ +/* include even when OpenMM not used to force compilation of do_md_openmm */
+ +#include "openmm_wrapper.h"
+ +
+ +double do_md_openmm(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
+ +                    const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+ +                    int nstglobalcomm,
+ +                    gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                    int stepout,t_inputrec *ir,
+ +                    gmx_mtop_t *top_global,
+ +                    t_fcdata *fcd,
+ +                    t_state *state_global,
+ +                    t_mdatoms *mdatoms,
+ +                    t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                    gmx_edsam_t ed,t_forcerec *fr,
+ +                    int repl_ex_nst,int repl_ex_seed,
+ +                    gmx_membed_t *membed,
+ +                    real cpt_period,real max_hours,
+ +                    const char *deviceOptions,
+ +                    unsigned long Flags,
+ +                    gmx_runtime_t *runtime)
+ +{
+ +    gmx_mdoutf_t *outf;
+ +    gmx_large_int_t step,step_rel;
+ +    double     run_time;
+ +    double     t,t0,lam0;
+ +    gmx_bool       bSimAnn,
+ +    bFirstStep,bStateFromTPX,bLastStep,bStartingFromCpt;
+ +    gmx_bool       bInitStep=TRUE;
+ +    gmx_bool       do_ene,do_log, do_verbose,
+ +    bX,bV,bF,bCPT;
+ +    tensor     force_vir,shake_vir,total_vir,pres;
+ +    int        i,m;
+ +    int        mdof_flags;
+ +    rvec       mu_tot;
+ +    t_vcm      *vcm;
+ +    int        nchkpt=1;
+ +    gmx_localtop_t *top;
+ +    t_mdebin *mdebin=NULL;
+ +    t_state    *state=NULL;
+ +    rvec       *f_global=NULL;
+ +    int        n_xtc=-1;
+ +    rvec       *x_xtc=NULL;
+ +    gmx_enerdata_t *enerd;
+ +    rvec       *f=NULL;
+ +    gmx_global_stat_t gstat;
+ +    gmx_update_t upd=NULL;
+ +    t_graph    *graph=NULL;
+ +    globsig_t   gs;
+ +
+ +    gmx_groups_t *groups;
+ +    gmx_ekindata_t *ekind, *ekind_save;
+ +    gmx_bool        bAppend;
+ +    int         a0,a1;
+ +    matrix      lastbox;
+ +    real        reset_counters=0,reset_counters_now=0;
+ +    char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
+ +    int         handled_stop_condition=gmx_stop_cond_none; 
+ +
+ +    const char *ommOptions = NULL;
+ +    void   *openmmData;
+ +
+ +#ifdef GMX_DOUBLE
+ +    /* Checks in cmake should prevent the compilation in double precision
+ +     * with OpenMM, but just to be sure we check here.
+ +     */
+ +    gmx_fatal(FARGS,"Compilation was performed in double precision, but OpenMM only supports single precision. If you want to use to OpenMM, compile in single precision.");
+ +#endif
+ +
+ +    bAppend  = (Flags & MD_APPENDFILES);
+ +    check_ir_old_tpx_versions(cr,fplog,ir,top_global);
+ +
+ +    groups = &top_global->groups;
+ +
+ +    /* Initial values */
+ +    init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
+ +            nrnb,top_global,&upd,
+ +            nfile,fnm,&outf,&mdebin,
+ +            force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
+ +
+ +    clear_mat(total_vir);
+ +    clear_mat(pres);
+ +    /* Energy terms and groups */
+ +    snew(enerd,1);
+ +    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
+ +    snew(f,top_global->natoms);
+ +
+ +    /* Kinetic energy data */
+ +    snew(ekind,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind);
+ +    /* needed for iteration of constraints */
+ +    snew(ekind_save,1);
+ +    init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
+ +    /* Copy the cos acceleration to the groups struct */
+ +    ekind->cosacc.cos_accel = ir->cos_accel;
+ +
+ +    gstat = global_stat_init(ir);
+ +    debug_gmx();
+ +
+ +    {
+ +        double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
+ +        if ((io > 2000) && MASTER(cr))
+ +            fprintf(stderr,
+ +                    "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
+ +                    io);
+ +    }
+ +
+ +    top = gmx_mtop_generate_local_top(top_global,ir);
+ +
+ +    a0 = 0;
+ +    a1 = top_global->natoms;
+ +
+ +    state = partdec_init_local_state(cr,state_global);
+ +    f_global = f;
+ +
+ +    atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
+ +
+ +    if (vsite)
+ +    {
+ +        set_vsite_top(vsite,top,mdatoms,cr);
+ +    }
+ +
+ +    if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
+ +    {
+ +        graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
+ +    }
+ +
+ +    update_mdatoms(mdatoms,state->lambda);
+ +
+ +    if (deviceOptions[0]=='\0')
+ +    {
+ +        /* empty options, which should default to OpenMM in this build */
+ +        ommOptions=deviceOptions;
+ +    }
+ +    else
+ +    {
+ +        if (gmx_strncasecmp(deviceOptions,"OpenMM",6)!=0)
+ +        {
+ +            gmx_fatal(FARGS, "This Gromacs version currently only works with OpenMM. Use -device \"OpenMM:<options>\"");
+ +        }
+ +        else
+ +        {
+ +            ommOptions=strchr(deviceOptions,':');
+ +            if (NULL!=ommOptions)
+ +            {
+ +                /* Increase the pointer to skip the colon */
+ +                ommOptions++;
+ +            }
+ +        }
+ +    }
+ +
+ +    openmmData = openmm_init(fplog, ommOptions, ir, top_global, top, mdatoms, fr, state);
+ +    please_cite(fplog,"Friedrichs2009");
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        /* Update mdebin with energy history if appending to output files */
+ +        if ( Flags & MD_APPENDFILES )
+ +        {
+ +            restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+ +        }
+ +        /* Set the initial energy history in state to zero by updating once */
+ +        update_energyhistory(&state_global->enerhist,mdebin);
+ +    }
+ +
+ +    if (constr)
+ +    {
+ +        set_constraints(constr,top,ir,mdatoms,cr);
+ +    }
+ +
+ +    if (!ir->bContinuation)
+ +    {
+ +        if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
+ +        {
+ +            /* Set the velocities of frozen particles to zero */
+ +            for (i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
+ +            {
+ +                for (m=0; m<DIM; m++)
+ +                {
+ +                    if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
+ +                    {
+ +                        state->v[i][m] = 0;
+ +                    }
+ +                }
+ +            }
+ +        }
+ +
+ +        if (constr)
+ +        {
+ +            /* Constrain the initial coordinates and velocities */
+ +            do_constrain_first(fplog,constr,ir,mdatoms,state,f,
+ +                               graph,cr,nrnb,fr,top,shake_vir);
+ +        }
+ +        if (vsite)
+ +        {
+ +            /* Construct the virtual sites for the initial configuration */
+ +            construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
+ +                             top->idef.iparams,top->idef.il,
+ +                             fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ +        }
+ +    }
+ +
+ +    debug_gmx();
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        char tbuf[20];
+ +        fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
+ +        fprintf(stderr,"starting mdrun '%s'\n",
+ +                *(top_global->name));
+ +        if (ir->nsteps >= 0)
+ +        {
+ +            sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
+ +        }
+ +        else
+ +        {
+ +            sprintf(tbuf,"%s","infinite");
+ +        }
+ +        if (ir->init_step > 0)
+ +        {
+ +            fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
+ +                    gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
+ +                    gmx_step_str(ir->init_step,sbuf2),
+ +                    ir->init_step*ir->delta_t);
+ +        }
+ +        else
+ +        {
+ +            fprintf(stderr,"%s steps, %s ps.\n",
+ +                    gmx_step_str(ir->nsteps,sbuf),tbuf);
+ +        }
+ +    }
+ +
+ +    fprintf(fplog,"\n");
+ +
+ +    /* Set and write start time */
+ +    runtime_start(runtime);
+ +    print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
+ +    wallcycle_start(wcycle,ewcRUN);
+ +    if (fplog)
+ +        fprintf(fplog,"\n");
+ +
+ +    /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
+ +
+ +    debug_gmx();
+ +    /***********************************************************
+ +     *
+ +     *             Loop over MD steps
+ +     *
+ +     ************************************************************/
+ +
+ +    /* loop over MD steps or if rerunMD to end of input trajectory */
+ +    bFirstStep = TRUE;
+ +    /* Skip the first Nose-Hoover integration when we get the state from tpx */
+ +    bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
+ +    bInitStep = bFirstStep && bStateFromTPX;
+ +    bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
+ +    bLastStep = FALSE;
+ +
+ +    init_global_signals(&gs,cr,ir,repl_ex_nst);
+ +
+ +    step = ir->init_step;
+ +    step_rel = 0;
+ +
+ +    while (!bLastStep)
+ +    {
+ +        wallcycle_start(wcycle,ewcSTEP);
+ +
+ +        GMX_MPE_LOG(ev_timestep1);
+ +
+ +        bLastStep = (step_rel == ir->nsteps);
+ +        t = t0 + step*ir->delta_t;
+ +
+ +        if (gs.set[eglsSTOPCOND] != 0)
+ +        {
+ +            bLastStep = TRUE;
+ +        }
+ +
+ +        do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
+ +        do_verbose = bVerbose &&
+ +                     (step % stepout == 0 || bFirstStep || bLastStep);
+ +
+ +        if (MASTER(cr) && do_log)
+ +        {
+ +            print_ebin_header(fplog,step,t,state->lambda);
+ +        }
+ +
+ +        clear_mat(force_vir);
+ +        GMX_MPE_LOG(ev_timestep2);
+ +
+ +        /* We write a checkpoint at this MD step when:
+ +         * either when we signalled through gs (in OpenMM NS works different),
+ +         * or at the last step (but not when we do not want confout),
+ +         * but never at the first step.
+ +         */
+ +        bCPT = ((gs.set[eglsCHKPT] ||
+ +                 (bLastStep && (Flags & MD_CONFOUT))) &&
+ +                step > ir->init_step );
+ +        if (bCPT)
+ +        {
+ +            gs.set[eglsCHKPT] = 0;
+ +        }
+ +
+ +        /* Now we have the energies and forces corresponding to the
+ +         * coordinates at time t. We must output all of this before
+ +         * the update.
+ +         * for RerunMD t is read from input trajectory
+ +         */
+ +        GMX_MPE_LOG(ev_output_start);
+ +
+ +        mdof_flags = 0;
+ +        if (do_per_step(step,ir->nstxout))
+ +        {
+ +            mdof_flags |= MDOF_X;
+ +        }
+ +        if (do_per_step(step,ir->nstvout))
+ +        {
+ +            mdof_flags |= MDOF_V;
+ +        }
+ +        if (do_per_step(step,ir->nstfout))
+ +        {
+ +            mdof_flags |= MDOF_F;
+ +        }
+ +        if (do_per_step(step,ir->nstxtcout))
+ +        {
+ +            mdof_flags |= MDOF_XTC;
+ +        }
+ +        if (bCPT)
+ +        {
+ +            mdof_flags |= MDOF_CPT;
+ +        };
+ +        do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
+ +
+ +        if (mdof_flags != 0 || do_ene || do_log)
+ +        {
+ +            wallcycle_start(wcycle,ewcTRAJ);
+ +            bF = (mdof_flags & MDOF_F);
+ +            bX = (mdof_flags & (MDOF_X | MDOF_XTC | MDOF_CPT));
+ +            bV = (mdof_flags & (MDOF_V | MDOF_CPT));
+ +
+ +            openmm_copy_state(openmmData, state, &t, f, enerd, bX, bV, bF, do_ene);
+ +
+ +            upd_mdebin(mdebin,FALSE,TRUE,
+ +                       t,mdatoms->tmass,enerd,state,lastbox,
+ +                       shake_vir,force_vir,total_vir,pres,
+ +                       ekind,mu_tot,constr);
+ +            print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,do_log?fplog:NULL,
+ +                       step,t,
+ +                       eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
+ +            write_traj(fplog,cr,outf,mdof_flags,top_global,
+ +                       step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
+ +            if (bCPT)
+ +            {
+ +                nchkpt++;
+ +                bCPT = FALSE;
+ +            }
+ +            debug_gmx();
+ +            if (bLastStep && step_rel == ir->nsteps &&
+ +                    (Flags & MD_CONFOUT) && MASTER(cr))
+ +            {
+ +                /* x and v have been collected in write_traj,
+ +                 * because a checkpoint file will always be written
+ +                 * at the last step.
+ +                 */
+ +                fprintf(stderr,"\nWriting final coordinates.\n");
+ +                if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
+ +                {
+ +                    /* Make molecules whole only for confout writing */
+ +                    do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
+ +                }
+ +                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
+ +                                    *top_global->name,top_global,
+ +                                    state_global->x,state_global->v,
+ +                                    ir->ePBC,state->box);
+ +                debug_gmx();
+ +            }
+ +            wallcycle_stop(wcycle,ewcTRAJ);
+ +        }
+ +        GMX_MPE_LOG(ev_output_finish);
+ +
+ +
+ +        /* Determine the wallclock run time up till now */
+ +        run_time = gmx_gettime() - (double)runtime->real;
+ +
+ +        /* Check whether everything is still allright */
+ +        if (((int)gmx_get_stop_condition() > handled_stop_condition)
+ +#ifdef GMX_THREADS
+ +            && MASTER(cr)
+ +#endif
+ +            )
+ +        {
+ +           /* this is just make gs.sig compatible with the hack 
+ +               of sending signals around by MPI_Reduce with together with
+ +               other floats */
+ +            /* NOTE: this only works for serial code. For code that allows
+ +               MPI nodes to propagate their condition, see kernel/md.c*/
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
+ +                gs.set[eglsSTOPCOND]=1;
+ +            if ( gmx_get_stop_condition() == gmx_stop_cond_next )
+ +                gs.set[eglsSTOPCOND]=1;
+ +            /* < 0 means stop at next step, > 0 means stop at next NS step */
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,
+ +                        "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                        gmx_get_signal_name(),
+ +                        gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +                fflush(fplog);
+ +            }
+ +            fprintf(stderr,
+ +                    "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+ +                    gmx_get_signal_name(),
+ +                    gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+ +            fflush(stderr);
+ +            handled_stop_condition=(int)gmx_get_stop_condition();
+ +        }
+ +        else if (MASTER(cr) &&
+ +                 (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
+ +                 gs.set[eglsSTOPCOND] == 0)
+ +        {
+ +            /* Signal to terminate the run */
+ +            gs.set[eglsSTOPCOND] = 1;
+ +            if (fplog)
+ +            {
+ +                fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +            }
+ +            fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+ +        }
+ +
+ +        /* checkpoints */
+ +        if (MASTER(cr) && (cpt_period >= 0 &&
+ +                           (cpt_period == 0 ||
+ +                            run_time >= nchkpt*cpt_period*60.0)) &&
+ +                gs.set[eglsCHKPT] == 0)
+ +        {
+ +            gs.set[eglsCHKPT] = 1;
+ +        }
+ +
+ +        /* Time for performance */
+ +        if (((step % stepout) == 0) || bLastStep)
+ +        {
+ +            runtime_upd_proc(runtime);
+ +        }
+ +
+ +        if (do_per_step(step,ir->nstlog))
+ +        {
+ +            if (fflush(fplog) != 0)
+ +            {
+ +                gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?");
+ +            }
+ +        }
+ +
+ +        /* Remaining runtime */
+ +        if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
+ +        {
+ +            print_time(stderr,runtime,step,ir,cr);
+ +        }
+ +
+ +        bFirstStep = FALSE;
+ +        bInitStep = FALSE;
+ +        bStartingFromCpt = FALSE;
+ +        step++;
+ +        step_rel++;
+ +
+ +        openmm_take_one_step(openmmData);
+ +    }
+ +    /* End of main MD loop */
+ +    debug_gmx();
+ +
+ +    /* Stop the time */
+ +    runtime_end(runtime);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        if (ir->nstcalcenergy > 0) 
+ +        {
+ +            print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
+ +                       eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
+ +        }
+ +    }
+ +
+ +    openmm_cleanup(fplog, openmmData);
+ +
+ +    done_mdoutf(outf);
+ +
+ +    debug_gmx();
+ +
+ +    runtime->nsteps_done = step_rel;
+ +
+ +    return 0;
+ +}
diff --cc src/programs/mdrun/mdrun.c

index 2d62e76bc87d731fd31ab377f7c3da19e23a0397,0000000000000000000000000000000000000000..377bc8fccacd62903f6d11830f998d2c4048b8a7

mode 100644,000000..100644
--- 1/src/programs/mdrun/mdrun.c
--- /dev/null
+++ b/src/programs/mdrun/mdrun.c
@@@ -1,674 -1,0 +1,696 @@@
-     "[BR]",
+ +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "macros.h"
+ +#include "copyrite.h"
+ +#include "main.h"
+ +#include "statutil.h"
+ +#include "smalloc.h"
+ +#include "futil.h"
+ +#include "smalloc.h"
+ +#include "edsam.h"
+ +#include "mdrun.h"
+ +#include "xmdrun.h"
+ +#include "checkpoint.h"
+ +#ifdef GMX_THREADS
+ +#include "thread_mpi.h"
+ +#endif
+ +
+ +/* afm stuf */
+ +#include "pull.h"
+ +
+ +int main(int argc,char *argv[])
+ +{
+ +  const char *desc[] = {
+ + #ifdef GMX_OPENMM
+ +    "This is an experimental release of GROMACS for accelerated",
+ +      "Molecular Dynamics simulations on GPU processors. Support is provided",
+ +      "by the OpenMM library (https://simtk.org/home/openmm).[PAR]",
+ +      "*Warning*[BR]",
+ +      "This release is targeted at developers and advanced users and",
+ +      "care should be taken before production use. The following should be",
+ +      "noted before using the program:[PAR]",
+ +      " * The current release runs only on modern nVidia GPU hardware with CUDA support.",
+ +      "Make sure that the necessary CUDA drivers and libraries for your operating system",
+ +      "are already installed. The CUDA SDK also should be installed in order to compile",
+ +      "the program from source (http://www.nvidia.com/object/cuda_home.html).[PAR]",
+ +      " * Multiple GPU cards are not supported.[PAR]",
+ +      " * Only a small subset of the GROMACS features and options are supported on the GPUs.",
+ +      "See below for a detailed list.[PAR]",
+ +      " * Consumer level GPU cards are known to often have problems with faulty memory.",
+ +      "It is recommended that a full memory check of the cards is done at least once",
+ +      "(for example, using the memtest=full option).",
+ +      "A partial memory check (for example, memtest=15) before and",
+ +      "after the simulation run would help spot",
+ +      "problems resulting from processor overheating.[PAR]",
+ +      " * The maximum size of the simulated systems depends on the available",
+ +      "GPU memory,for example, a GTX280 with 1GB memory has been tested with systems",
+ +      "of up to about 100,000 atoms.[PAR]",
+ +      " * In order to take a full advantage of the GPU platform features, many algorithms",
+ +      "have been implemented in a very different way than they are on the CPUs.",
+ +      "Therefore numercal correspondence between properties of the state of",
+ +      "simulated systems should not be expected. Moreover, the values will likely vary",
+ +      "when simulations are done on different GPU hardware.[PAR]",
+ +      " * Frequent retrieval of system state information such as",
+ +      "trajectory coordinates and energies can greatly influence the performance",
+ +      "of the program due to slow CPU<->GPU memory transfer speed.[PAR]",
+ +      " * MD algorithms are complex, and although the Gromacs code is highly tuned for them,",
+ +      "they often do not translate very well onto the streaming architetures.",
+ +      "Realistic expectations about the achievable speed-up from test with GTX280:",
+ +      "For small protein systems in implicit solvent using all-vs-all kernels the acceleration",
+ +      "can be as high as 20 times, but in most other setups involving cutoffs and PME the",
+ +      "acceleration is usually only ~4 times relative to a 3GHz CPU.[PAR]",
+ +      "Supported features:[PAR]",
+ +      " * Integrators: md/md-vv/md-vv-avek, sd/sd1 and bd.\n",
+ +      " * Long-range interactions (option coulombtype): Reaction-Field, Ewald, PME, and cut-off (for Implicit Solvent only)\n",
+ +      " * Temperature control: Supported only with the md/md-vv/md-vv-avek, sd/sd1 and bd integrators.\n",
+ +      " * Pressure control: Supported.\n",
+ +      " * Implicit solvent: Supported.\n",
+ +      "A detailed description can be found on the GROMACS website:\n",
+ +      "http://www.gromacs.org/gpu[PAR]",
+ +/* From the original mdrun documentaion */
+ +    "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
+ +    "and distributes the topology over nodes if needed.",
+ +    "[TT]mdrun[tt] produces at least four output files.",
+ +    "A single log file ([TT]-g[tt]) is written, unless the option",
+ +    "[TT]-seppot[tt] is used, in which case each node writes a log file.",
+ +    "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
+ +    "optionally forces.",
+ +    "The structure file ([TT]-c[tt]) contains the coordinates and",
+ +    "velocities of the last step.",
+ +    "The energy file ([TT]-e[tt]) contains energies, the temperature,",
+ +    "pressure, etc, a lot of these things are also printed in the log file.",
+ +    "Optionally coordinates can be written to a compressed trajectory file",
+ +    "([TT]-x[tt]).[PAR]",
+ +/* openmm specific information */
+ +      "Usage with OpenMM:[BR]",
+ +      "[TT]mdrun -device \"OpenMM:platform=Cuda,memtest=15,deviceid=0,force-device=no\"[tt][PAR]",
+ +      "Options:[PAR]",
+ +      "      [TT]platform[tt] = Cuda\t\t:\tThe only available value. OpenCL support will be available in future.\n",
+ +      "      [TT]memtest[tt] = 15\t\t:\tRun a partial, random GPU memory test for the given amount of seconds. A full test",
+ +      "(recommended!) can be run with \"memtest=full\". Memory testing can be disabled with \"memtest=off\".\n",
+ +      "      [TT]deviceid[tt] = 0\t\t:\tSpecify the target device when multiple cards are present.",
+ +      "Only one card can be used at any given time though.\n",
+ +      "      [TT]force-device[tt] = no\t\t:\tIf set to \"yes\" [TT]mdrun[tt]  will be forced to execute on",
+ +      "hardware that is not officially supported. GPU acceleration can also be achieved on older",
+ +      "but Cuda capable cards, although the simulation might be too slow, and the memory limits too strict.",
+ +#else
+ +    "The [TT]mdrun[tt] program is the main computational chemistry engine",
+ +    "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
+ +    "but it can also perform Stochastic Dynamics, Energy Minimization,",
+ +    "test particle insertion or (re)calculation of energies.",
+ +    "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
+ +    "builds a Hessian matrix from single conformation.",
+ +    "For usual Normal Modes-like calculations, make sure that",
+ +    "the structure provided is properly energy-minimized.",
+ +    "The generated matrix can be diagonalized by [TT]g_nmeig[tt].[PAR]",
+ +    "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
+ +    "and distributes the topology over nodes if needed.",
+ +    "[TT]mdrun[tt] produces at least four output files.",
+ +    "A single log file ([TT]-g[tt]) is written, unless the option",
+ +    "[TT]-seppot[tt] is used, in which case each node writes a log file.",
+ +    "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
+ +    "optionally forces.",
+ +    "The structure file ([TT]-c[tt]) contains the coordinates and",
+ +    "velocities of the last step.",
+ +    "The energy file ([TT]-e[tt]) contains energies, the temperature,",
+ +    "pressure, etc, a lot of these things are also printed in the log file.",
+ +    "Optionally coordinates can be written to a compressed trajectory file",
+ +    "([TT]-x[tt]).[PAR]",
+ +    "The option [TT]-dhdl[tt] is only used when free energy calculation is",
+ +    "turned on.[PAR]",
+ +    "When [TT]mdrun[tt] is started using MPI with more than 1 node, parallelization",
+ +    "is used. By default domain decomposition is used, unless the [TT]-pd[tt]",
+ +    "option is set, which selects particle decomposition.[PAR]",
+ +    "With domain decomposition, the spatial decomposition can be set",
+ +    "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.",
+ +    "The user only needs to change this when the system is very inhomogeneous.",
+ +    "Dynamic load balancing is set with the option [TT]-dlb[tt],",
+ +    "which can give a significant performance improvement,",
+ +    "especially for inhomogeneous systems. The only disadvantage of",
+ +    "dynamic load balancing is that runs are no longer binary reproducible,",
+ +    "but in most cases this is not important.",
+ +    "By default the dynamic load balancing is automatically turned on",
+ +    "when the measured performance loss due to load imbalance is 5% or more.",
+ +    "At low parallelization these are the only important options",
+ +    "for domain decomposition.",
+ +    "At high parallelization the options in the next two sections",
+ +    "could be important for increasing the performace.",
+ +    "[PAR]",
+ +    "When PME is used with domain decomposition, separate nodes can",
+ +    "be assigned to do only the PME mesh calculation;",
+ +    "this is computationally more efficient starting at about 12 nodes.",
+ +    "The number of PME nodes is set with option [TT]-npme[tt],",
+ +    "this can not be more than half of the nodes.",
+ +    "By default [TT]mdrun[tt] makes a guess for the number of PME",
+ +    "nodes when the number of nodes is larger than 11 or performance wise",
+ +    "not compatible with the PME grid x dimension.",
+ +    "But the user should optimize npme. Performance statistics on this issue",
+ +    "are written at the end of the log file.",
+ +    "For good load balancing at high parallelization, the PME grid x and y",
+ +    "dimensions should be divisible by the number of PME nodes",
+ +    "(the simulation will run correctly also when this is not the case).",
+ +    "[PAR]",
+ +    "This section lists all options that affect the domain decomposition.",
-     "[BR]",
++    "[PAR]",
+ +    "Option [TT]-rdd[tt] can be used to set the required maximum distance",
+ +    "for inter charge-group bonded interactions.",
+ +    "Communication for two-body bonded interactions below the non-bonded",
+ +    "cut-off distance always comes for free with the non-bonded communication.",
+ +    "Atoms beyond the non-bonded cut-off are only communicated when they have",
+ +    "missing bonded interactions; this means that the extra cost is minor",
+ +    "and nearly indepedent of the value of [TT]-rdd[tt].",
+ +    "With dynamic load balancing option [TT]-rdd[tt] also sets",
+ +    "the lower limit for the domain decomposition cell sizes.",
+ +    "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on",
+ +    "the initial coordinates. The chosen value will be a balance",
+ +    "between interaction range and communication cost.",
-     "[BR]",
++    "[PAR]",
+ +    "When inter charge-group bonded interactions are beyond",
+ +    "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.",
+ +    "For pair interactions and tabulated bonds",
+ +    "that do not generate exclusions, this check can be turned off",
+ +    "with the option [TT]-noddcheck[tt].",
-     "[BR]",
++    "[PAR]",
+ +    "When constraints are present, option [TT]-rcon[tt] influences",
+ +    "the cell size limit as well.",
+ +    "Atoms connected by NC constraints, where NC is the LINCS order plus 1,",
+ +    "should not be beyond the smallest cell size. A error message is",
+ +    "generated when this happens and the user should change the decomposition",
+ +    "or decrease the LINCS order and increase the number of LINCS iterations.",
+ +    "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS",
+ +    "in a conservative fashion. For high parallelization it can be useful",
+ +    "to set the distance required for P-LINCS with the option [TT]-rcon[tt].",
-     "either the current directory or from the GMXLIB directory.",
-     "A number of pre-formatted tables are presented in the GMXLIB dir,",
-     "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard Jones potentials with",
++    "[PAR]",
+ +    "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling",
+ +    "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that",
+ +    "the cells can scale down by at least this factor. This option is used",
+ +    "for the automated spatial decomposition (when not using [TT]-dd[tt])",
+ +    "as well as for determining the number of grid pulses, which in turn",
+ +    "sets the minimum allowed cell size. Under certain circumstances",
+ +    "the value of [TT]-dds[tt] might need to be adjusted to account for",
+ +    "high or low spatial inhomogeneity of the system.",
+ +    "[PAR]",
+ +    "The option [TT]-gcom[tt] can be used to only do global communication",
+ +    "every n steps.",
+ +    "This can improve performance for highly parallel simulations",
+ +    "where this global communication step becomes the bottleneck.",
+ +    "For a global thermostat and/or barostat the temperature",
+ +    "and/or pressure will also only be updated every [TT]-gcom[tt] steps.",
+ +    "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]",
+ +    "With [TT]-rerun[tt] an input trajectory can be given for which ",
+ +    "forces and energies will be (re)calculated. Neighbor searching will be",
+ +    "performed for every frame, unless [TT]nstlist[tt] is zero",
+ +    "(see the [TT].mdp[tt] file).[PAR]",
+ +    "ED (essential dynamics) sampling is switched on by using the [TT]-ei[tt]",
+ +    "flag followed by an [TT].edi[tt] file.",
+ +    "The [TT].edi[tt] file can be produced using options in the essdyn",
+ +    "menu of the WHAT IF program. [TT]mdrun[tt] produces a [TT].edo[tt] file that",
+ +    "contains projections of positions, velocities and forces onto selected",
+ +    "eigenvectors.[PAR]",
+ +    "When user-defined potential functions have been selected in the",
+ +    "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
+ +    "a formatted table with potential functions. The file is read from",
-     "When pair interactions are present a separate table for pair interaction",
++    "either the current directory or from the [TT]GMXLIB[tt] directory.",
++    "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
++    "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
+ +    "normal Coulomb.",
-     "appended, then a b for bonds, an a for angles or a d for dihedrals",
++    "When pair interactions are present, a separate table for pair interaction",
+ +    "functions is read using the [TT]-tablep[tt] option.[PAR]",
+ +    "When tabulated bonded functions are present in the topology,",
+ +    "interaction functions are read using the [TT]-tableb[tt] option.",
+ +    "For each different tabulated interaction type the table file name is",
+ +    "modified in a different way: before the file extension an underscore is",
-     "With [TT]-multi[tt] multiple systems are simulated in parallel.",
-     "As many input files are required as the number of systems.",
-     "The system number is appended to the run input and each output filename,",
-     "for instance [TT]topol.tpr[tt] becomes [TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.",
++    "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals",
+ +    "and finally the table number of the interaction type.[PAR]",
+ +    "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
+ +    "coordinates and forces when pulling is selected",
+ +    "in the [TT].mdp[tt] file.[PAR]",
-     "of steps. The number of replicas is set with the [TT]-multi[tt] option,",
-     "see above.",
++    "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ",
++    "simulated in parallel.",
++    "As many input files/directories are required as the number of systems. ",
++    "The [TT]-multidir[tt] option takes a list of directories (one for each ",
++    "system) and runs in each of them, using the input/output file names, ",
++    "such as specified by e.g. the [TT]-s[tt] option, relative to these ",
++    "directories.",
++    "With [TT]-multi[tt], the system number is appended to the run input ",
++    "and each output filename, for instance [TT]topol.tpr[tt] becomes",
++    "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.",
+ +    "The number of nodes per system is the total number of nodes",
+ +    "divided by the number of systems.",
+ +    "One use of this option is for NMR refinement: when distance",
+ +    "or orientation restraints are present these can be ensemble averaged",
+ +    "over all the systems.[PAR]",
+ +    "With [TT]-replex[tt] replica exchange is attempted every given number",
-     "investigation are: polarizability, and X-Ray bombardments.",
++    "of steps. The number of replicas is set with the [TT]-multi[tt] or ",
++    "[TT]-multidir[tt] option, described above.",
+ +    "All run input files should use a different coupling temperature,",
+ +    "the order of the files is not important. The random seed is set with",
+ +    "[TT]-reseed[tt]. The velocities are scaled and neighbor searching",
+ +    "is performed after every exchange.[PAR]",
+ +    "Finally some experimental algorithms can be tested when the",
+ +    "appropriate options have been given. Currently under",
-     "There are three scenarios with [TT]-cpi[tt]:[BR]",
-     "* no files with matching names are present: new output files are written[BR]",
-     "* all files are present with names and checksums matching those stored",
-     "in the checkpoint file: files are appended[BR]",
-     "* otherwise no files are modified and a fatal error is generated[BR]",
++    "investigation are: polarizability and X-ray bombardments.",
+ +    "[PAR]",
+ +    "The option [TT]-membed[dd] does what used to be g_membed, i.e. embed",
+ +    "a protein into a membrane. The data file should contain the options",
+ +    "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]",
+ +    "both apply to this as well.",
+ +    "[PAR]",
+ +    "The option [TT]-pforce[tt] is useful when you suspect a simulation",
+ +    "crashes due to too large forces. With this option coordinates and",
+ +    "forces of atoms with a force larger than a certain value will",
+ +    "be printed to stderr.",
+ +    "[PAR]",
+ +    "Checkpoints containing the complete state of the system are written",
+ +    "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
+ +    "unless option [TT]-cpt[tt] is set to -1.",
+ +    "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
+ +    "make sure that a recent state of the system is always available,",
+ +    "even when the simulation is terminated while writing a checkpoint.",
+ +    "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
+ +    "with the step number.",
+ +    "A simulation can be continued by reading the full state from file",
+ +    "with option [TT]-cpi[tt]. This option is intelligent in the way that",
+ +    "if no checkpoint file is found, Gromacs just assumes a normal run and",
+ +    "starts from the first step of the [TT].tpr[tt] file. By default the output",
+ +    "will be appending to the existing output files. The checkpoint file",
+ +    "contains checksums of all output files, such that you will never",
+ +    "loose data when some output files are modified, corrupt or removed.",
-     { efNDX, "-mn",     "membed",   ffOPTRD }
++    "There are three scenarios with [TT]-cpi[tt]:[PAR]",
++    "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
++    "[TT]*[tt] all files are present with names and checksums matching those stored",
++    "in the checkpoint file: files are appended[PAR]",
++    "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
+ +    "With [TT]-noappend[tt] new output files are opened and the simulation",
+ +    "part number is added to all output file names.",
+ +    "Note that in all cases the checkpoint file itself is not renamed",
+ +    "and will be overwritten, unless its name does not match",
+ +    "the [TT]-cpo[tt] option.",
+ +    "[PAR]",
+ +    "With checkpointing the output is appended to previously written",
+ +    "output files, unless [TT]-noappend[tt] is used or none of the previous",
+ +    "output files are present (except for the checkpoint file).",
+ +    "The integrity of the files to be appended is verified using checksums",
+ +    "which are stored in the checkpoint file. This ensures that output can",
+ +    "not be mixed up or corrupted due to file appending. When only some",
+ +    "of the previous output files are present, a fatal error is generated",
+ +    "and no old output files are modified and no new output files are opened.",
+ +    "The result with appending will be the same as from a single run.",
+ +    "The contents will be binary identical, unless you use a different number",
+ +    "of nodes or dynamic load balancing or the FFT library uses optimizations",
+ +    "through timing.",
+ +    "[PAR]",
+ +    "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
+ +    "file is written at the first neighbor search step where the run time",
+ +    "exceeds [TT]-maxh[tt]*0.99 hours.",
+ +    "[PAR]",
+ +    "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
+ +    "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
+ +    "pressed), it will stop after the next neighbor search step ",
+ +    "(with nstlist=0 at the next step).",
+ +    "In both cases all the usual output will be written to file.",
+ +    "When running with MPI, a signal to one of the [TT]mdrun[tt] processes",
+ +    "is sufficient, this signal should not be sent to mpirun or",
+ +    "the [TT]mdrun[tt] process that is the parent of the others.",
+ +    "[PAR]",
+ +    "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
+ +#endif
+ +  };
+ +  t_commrec    *cr;
+ +  t_filenm fnm[] = {
+ +    { efTPX, NULL,      NULL,       ffREAD },
+ +    { efTRN, "-o",      NULL,       ffWRITE },
+ +    { efXTC, "-x",      NULL,       ffOPTWR },
+ +    { efCPT, "-cpi",    NULL,       ffOPTRD },
+ +    { efCPT, "-cpo",    NULL,       ffOPTWR },
+ +    { efSTO, "-c",      "confout",  ffWRITE },
+ +    { efEDR, "-e",      "ener",     ffWRITE },
+ +    { efLOG, "-g",      "md",       ffWRITE },
+ +    { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
+ +    { efXVG, "-field",  "field",    ffOPTWR },
+ +    { efXVG, "-table",  "table",    ffOPTRD },
+ +    { efXVG, "-tablep", "tablep",   ffOPTRD },
+ +    { efXVG, "-tableb", "table",    ffOPTRD },
+ +    { efTRX, "-rerun",  "rerun",    ffOPTRD },
+ +    { efXVG, "-tpi",    "tpi",      ffOPTWR },
+ +    { efXVG, "-tpid",   "tpidist",  ffOPTWR },
+ +    { efEDI, "-ei",     "sam",      ffOPTRD },
+ +    { efEDO, "-eo",     "sam",      ffOPTWR },
+ +    { efGCT, "-j",      "wham",     ffOPTRD },
+ +    { efGCT, "-jo",     "bam",      ffOPTWR },
+ +    { efXVG, "-ffout",  "gct",      ffOPTWR },
+ +    { efXVG, "-devout", "deviatie", ffOPTWR },
+ +    { efXVG, "-runav",  "runaver",  ffOPTWR },
+ +    { efXVG, "-px",     "pullx",    ffOPTWR },
+ +    { efXVG, "-pf",     "pullf",    ffOPTWR },
+ +    { efXVG, "-ro",     "rotation", ffOPTWR },
+ +    { efLOG, "-ra",     "rotangles",ffOPTWR },
+ +    { efLOG, "-rs",     "rotslabs", ffOPTWR },
+ +    { efLOG, "-rt",     "rottorque",ffOPTWR },
+ +    { efMTX, "-mtx",    "nm",       ffOPTWR },
+ +    { efNDX, "-dn",     "dipole",   ffOPTWR },
+ +    { efDAT, "-membed", "membed",   ffOPTRD },
+ +    { efTOP, "-mp",     "membed",   ffOPTRD },
-     init_multisystem(cr,nmultisim,NFILE,fnm,TRUE);
++    { efNDX, "-mn",     "membed",   ffOPTRD },
++    { efRND, "-multidir",NULL,      ffOPTRDMULT}
+ +  };
+ +#define NFILE asize(fnm)
+ +
+ +  /* Command line options ! */
+ +  gmx_bool bCart        = FALSE;
+ +  gmx_bool bPPPME       = FALSE;
+ +  gmx_bool bPartDec     = FALSE;
+ +  gmx_bool bDDBondCheck = TRUE;
+ +  gmx_bool bDDBondComm  = TRUE;
+ +  gmx_bool bVerbose     = FALSE;
+ +  gmx_bool bCompact     = TRUE;
+ +  gmx_bool bSepPot      = FALSE;
+ +  gmx_bool bRerunVSite  = FALSE;
+ +  gmx_bool bIonize      = FALSE;
+ +  gmx_bool bConfout     = TRUE;
+ +  gmx_bool bReproducible = FALSE;
+ +    
+ +  int  npme=-1;
+ +  int  nmultisim=0;
+ +  int  nstglobalcomm=-1;
+ +  int  repl_ex_nst=0;
+ +  int  repl_ex_seed=-1;
+ +  int  nstepout=100;
+ +  int  nthreads=0; /* set to determine # of threads automatically */
+ +  int  resetstep=-1;
+ +  
+ +  rvec realddxyz={0,0,0};
+ +  const char *ddno_opt[ddnoNR+1] =
+ +    { NULL, "interleave", "pp_pme", "cartesian", NULL };
+ +    const char *dddlb_opt[] =
+ +    { NULL, "auto", "no", "yes", NULL };
+ +  real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
+ +  char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
+ +  real cpt_period=15.0,max_hours=-1;
+ +  gmx_bool bAppendFiles=TRUE;
+ +  gmx_bool bKeepAndNumCPT=FALSE;
+ +  gmx_bool bResetCountersHalfWay=FALSE;
+ +  output_env_t oenv=NULL;
+ +  const char *deviceOptions = "";
+ +
+ +  t_pargs pa[] = {
+ +
+ +    { "-pd",      FALSE, etBOOL,{&bPartDec},
+ +      "Use particle decompostion" },
+ +    { "-dd",      FALSE, etRVEC,{&realddxyz},
+ +      "Domain decomposition grid, 0 is optimize" },
+ +#ifdef GMX_THREADS
+ +    { "-nt",      FALSE, etINT, {&nthreads},
+ +      "Number of threads to start (0 is guess)" },
+ +#endif
+ +    { "-npme",    FALSE, etINT, {&npme},
+ +      "Number of separate nodes to be used for PME, -1 is guess" },
+ +    { "-ddorder", FALSE, etENUM, {ddno_opt},
+ +      "DD node order" },
+ +    { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
+ +      "Check for all bonded interactions with DD" },
+ +    { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
+ +      "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
+ +    { "-rdd",     FALSE, etREAL, {&rdd},
+ +      "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
+ +    { "-rcon",    FALSE, etREAL, {&rconstr},
+ +      "Maximum distance for P-LINCS (nm), 0 is estimate" },
+ +    { "-dlb",     FALSE, etENUM, {dddlb_opt},
+ +      "Dynamic load balancing (with DD)" },
+ +    { "-dds",     FALSE, etREAL, {&dlb_scale},
+ +      "Minimum allowed dlb scaling of the DD cell size" },
+ +    { "-ddcsx",   FALSE, etSTR, {&ddcsx},
+ +      "HIDDENThe DD cell sizes in x" },
+ +    { "-ddcsy",   FALSE, etSTR, {&ddcsy},
+ +      "HIDDENThe DD cell sizes in y" },
+ +    { "-ddcsz",   FALSE, etSTR, {&ddcsz},
+ +      "HIDDENThe DD cell sizes in z" },
+ +    { "-gcom",    FALSE, etINT,{&nstglobalcomm},
+ +      "Global communication frequency" },
+ +    { "-v",       FALSE, etBOOL,{&bVerbose},  
+ +      "Be loud and noisy" },
+ +    { "-compact", FALSE, etBOOL,{&bCompact},  
+ +      "Write a compact log file" },
+ +    { "-seppot",  FALSE, etBOOL, {&bSepPot},
+ +      "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
+ +    { "-pforce",  FALSE, etREAL, {&pforce},
+ +      "Print all forces larger than this (kJ/mol nm)" },
+ +    { "-reprod",  FALSE, etBOOL,{&bReproducible},  
+ +      "Try to avoid optimizations that affect binary reproducibility" },
+ +    { "-cpt",     FALSE, etREAL, {&cpt_period},
+ +      "Checkpoint interval (minutes)" },
+ +    { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
+ +      "Keep and number checkpoint files" },
+ +    { "-append",  FALSE, etBOOL, {&bAppendFiles},
+ +      "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
+ +    { "-maxh",   FALSE, etREAL, {&max_hours},
+ +      "Terminate after 0.99 times this time (hours)" },
+ +    { "-multi",   FALSE, etINT,{&nmultisim}, 
+ +      "Do multiple simulations in parallel" },
+ +    { "-replex",  FALSE, etINT, {&repl_ex_nst}, 
+ +      "Attempt replica exchange every # steps" },
+ +    { "-reseed",  FALSE, etINT, {&repl_ex_seed}, 
+ +      "Seed for replica exchange, -1 is generate a seed" },
+ +    { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
+ +      "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
+ +    { "-ionize",  FALSE, etBOOL,{&bIonize},
+ +      "Do a simulation including the effect of an X-Ray bombardment on your system" },
+ +    { "-confout", FALSE, etBOOL, {&bConfout},
+ +      "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
+ +    { "-stepout", FALSE, etINT, {&nstepout},
+ +      "HIDDENFrequency of writing the remaining runtime" },
+ +    { "-resetstep", FALSE, etINT, {&resetstep},
+ +      "HIDDENReset cycle counters after these many time steps" },
+ +    { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
+ +      "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
+ +#ifdef GMX_OPENMM
+ +    ,
+ +    { "-device",  FALSE, etSTR, {&deviceOptions},
+ +      "Device option string" }
+ +#endif
+ +  };
+ +  gmx_edsam_t  ed;
+ +  unsigned long Flags, PCA_Flags;
+ +  ivec     ddxyz;
+ +  int      dd_node_order;
+ +  gmx_bool     bAddPart;
+ +  FILE     *fplog,*fptest;
+ +  int      sim_part,sim_part_fn;
+ +  const char *part_suffix=".part";
+ +  char     suffix[STRLEN];
+ +  int      rc;
++  char **multidir=NULL;
+ +
+ +
+ +  cr = init_par(&argc,&argv);
+ +
+ +  if (MASTER(cr))
+ +    CopyRight(stderr, argv[0]);
+ +
+ +  PCA_Flags = (PCA_KEEP_ARGS | PCA_NOEXIT_ON_ARGS | PCA_CAN_SET_DEFFNM
+ +             | (MASTER(cr) ? 0 : PCA_QUIET));
+ +  
+ +
+ +  /* Comment this in to do fexist calls only on master
+ +   * works not with rerun or tables at the moment
+ +   * also comment out the version of init_forcerec in md.c 
+ +   * with NULL instead of opt2fn
+ +   */
+ +  /*
+ +     if (!MASTER(cr))
+ +     {
+ +     PCA_Flags |= PCA_NOT_READ_NODE;
+ +     }
+ +     */
+ +
+ +  parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
+ +                    asize(desc),desc,0,NULL, &oenv);
+ +
++
++
+ +  /* we set these early because they might be used in init_multisystem() 
+ +     Note that there is the potential for npme>nnodes until the number of
+ +     threads is set later on, if there's thread parallelization. That shouldn't
+ +     lead to problems. */ 
+ +  dd_node_order = nenum(ddno_opt);
+ +  cr->npmenodes = npme;
+ +
+ +#ifndef GMX_THREADS
+ +  nthreads=1;
+ +#endif
+ +
++  /* now check the -multi and -multidir option */
++  if (opt2bSet("-multidir", NFILE, fnm))
++  {
++      int i;
++      if (nmultisim > 0)
++      {
++          gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
++      }
++      nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
++  }
++
+ +
+ +  if (repl_ex_nst != 0 && nmultisim < 2)
+ +      gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
+ +
+ +  if (nmultisim > 1) {
+ +#ifndef GMX_THREADS
++    gmx_bool bParFn = (multidir == NULL);
++    init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
+ +#else
+ +    gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
+ +#endif
+ +  }
+ +
+ +  bAddPart = !bAppendFiles;
+ +
+ +  /* Check if there is ANY checkpoint file available */       
+ +  sim_part    = 1;
+ +  sim_part_fn = sim_part;
+ +  if (opt2bSet("-cpi",NFILE,fnm))
+ +  {
+ +      if (bSepPot && bAppendFiles)
+ +      {
+ +          gmx_fatal(FARGS,"Output file appending is not supported with -seppot");
+ +      }
+ +
+ +      bAppendFiles =
+ +                read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
+ +                                                              fnm,cr),
+ +                                                &sim_part_fn,NULL,cr,
+ +                                                bAppendFiles,NFILE,fnm,
+ +                                                part_suffix,&bAddPart);
+ +      if (sim_part_fn==0 && MASTER(cr))
+ +      {
+ +          fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
+ +      }
+ +      else
+ +      {
+ +          sim_part = sim_part_fn + 1;
+ +      }
+ +  } 
+ +  else
+ +  {
+ +      bAppendFiles = FALSE;
+ +  }
+ +
+ +  if (!bAppendFiles)
+ +  {
+ +      sim_part_fn = sim_part;
+ +  }
+ +
+ +  if (bAddPart)
+ +  {
+ +      /* Rename all output files (except checkpoint files) */
+ +      /* create new part name first (zero-filled) */
+ +      sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
+ +
+ +      add_suffix_to_output_names(fnm,NFILE,suffix);
+ +      if (MASTER(cr))
+ +      {
+ +          fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
+ +      }
+ +  }
+ +
+ +  Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
+ +  Flags = Flags | (bSepPot       ? MD_SEPPOT       : 0);
+ +  Flags = Flags | (bIonize       ? MD_IONIZE       : 0);
+ +  Flags = Flags | (bPartDec      ? MD_PARTDEC      : 0);
+ +  Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
+ +  Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
+ +  Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
+ +  Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
+ +  Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
+ +  Flags = Flags | (bAppendFiles  ? MD_APPENDFILES  : 0); 
+ +  Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); 
+ +  Flags = Flags | (sim_part>1    ? MD_STARTFROMCPT : 0); 
+ +  Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
+ +
+ +
+ +  /* We postpone opening the log file if we are appending, so we can 
+ +     first truncate the old log file and append to the correct position 
+ +     there instead.  */
+ +  if ((MASTER(cr) || bSepPot) && !bAppendFiles) 
+ +  {
+ +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
+ +      CopyRight(fplog,argv[0]);
+ +      please_cite(fplog,"Hess2008b");
+ +      please_cite(fplog,"Spoel2005a");
+ +      please_cite(fplog,"Lindahl2001a");
+ +      please_cite(fplog,"Berendsen95a");
+ +  }
+ +  else if (!MASTER(cr) && bSepPot)
+ +  {
+ +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
+ +  }
+ +  else
+ +  {
+ +      fplog = NULL;
+ +  }
+ +
+ +  ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
+ +  ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
+ +  ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
+ +
+ +  rc = mdrunner(nthreads, fplog,cr,NFILE,fnm,oenv,bVerbose,bCompact,
+ +                nstglobalcomm, ddxyz,dd_node_order,rdd,rconstr,
+ +                dddlb_opt[0],dlb_scale,ddcsx,ddcsy,ddcsz,
+ +                nstepout,resetstep,nmultisim,repl_ex_nst,repl_ex_seed,
+ +                pforce, cpt_period,max_hours,deviceOptions,Flags);
+ +
+ +  if (gmx_parallel_env_initialized())
+ +      gmx_finalize();
+ +
+ +  if (MULTIMASTER(cr)) {
+ +      thanx(stderr);
+ +  }
+ +
+ +  /* Log file has to be closed in mdrunner if we are appending to it 
+ +     (fplog not set here) */
+ +  if (MASTER(cr) && !bAppendFiles) 
+ +  {
+ +      gmx_log_close(fplog);
+ +  }
+ +
+ +  return rc;
+ +}
+ +
diff --cc src/programs/mdrun/repl_ex.c
Simple merge
diff --cc src/programs/mdrun/runner.c

index 3c7f97e27e18bf6f639b950603777f0b0c6dcf0a,0000000000000000000000000000000000000000..aacc8a6034b03576aa12ce2a61696948ec8c3d88

mode 100644,000000..100644
--- 1/src/programs/mdrun/runner.c
--- /dev/null
+++ b/src/programs/mdrun/runner.c
@@@ -1,930 -1,0 +1,914 @@@
-             nthreads = tMPI_Get_recommended_nthreads();
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <signal.h>
+ +#include <stdlib.h>
+ +
+ +#if ((defined WIN32 || defined _WIN32 || defined WIN64 || defined _WIN64) && !defined __CYGWIN__ && !defined __CYGWIN32__)
+ +/* _isnan() */
+ +#include <float.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "statutil.h"
+ +#include "mdrun.h"
+ +#include "network.h"
+ +#include "pull.h"
+ +#include "pull_rotation.h"
+ +#include "names.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "dihre.h"
+ +#include "pppm.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "repl_ex.h"
+ +#include "qmmm.h"
+ +#include "mpelogging.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "tpxio.h"
+ +#include "txtdump.h"
+ +#include "membed.h"
+ +
+ +#include "md_openmm.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREADS
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#ifdef GMX_FAHCORE
+ +#include "corewrap.h"
+ +#endif
+ +
+ +#ifdef GMX_OPENMM
+ +#include "md_openmm.h"
+ +#endif
+ +
+ +
+ +typedef struct { 
+ +    gmx_integrator_t *func;
+ +} gmx_intp_t;
+ +
+ +/* The array should match the eI array in include/types/enums.h */
+ +#ifdef GMX_OPENMM  /* FIXME do_md_openmm needs fixing */
+ +const gmx_intp_t integrator[eiNR] = { {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm}, {do_md_openmm},{do_md_openmm}};
+ +#else
+ +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
+ +#endif
+ +
+ +gmx_large_int_t     deform_init_init_step_tpx;
+ +matrix              deform_init_box_tpx;
+ +#ifdef GMX_THREADS
+ +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
+ +#endif
+ +
+ +
+ +#ifdef GMX_THREADS
+ +struct mdrunner_arglist
+ +{
+ +    FILE *fplog;
+ +    t_commrec *cr;
+ +    int nfile;
+ +    const t_filenm *fnm;
+ +    output_env_t oenv;
+ +    gmx_bool bVerbose;
+ +    gmx_bool bCompact;
+ +    int nstglobalcomm;
+ +    ivec ddxyz;
+ +    int dd_node_order;
+ +    real rdd;
+ +    real rconstr;
+ +    const char *dddlb_opt;
+ +    real dlb_scale;
+ +    const char *ddcsx;
+ +    const char *ddcsy;
+ +    const char *ddcsz;
+ +    int nstepout;
+ +    int resetstep;
+ +    int nmultisim;
+ +    int repl_ex_nst;
+ +    int repl_ex_seed;
+ +    real pforce;
+ +    real cpt_period;
+ +    real max_hours;
+ +    const char *deviceOptions;
+ +    unsigned long Flags;
+ +    int ret; /* return value */
+ +};
+ +
+ +
+ +/* The function used for spawning threads. Extracts the mdrunner() 
+ +   arguments from its one argument and calls mdrunner(), after making
+ +   a commrec. */
+ +static void mdrunner_start_fn(void *arg)
+ +{
+ +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
+ +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
+ +                                        that it's thread-local. This doesn't
+ +                                        copy pointed-to items, of course,
+ +                                        but those are all const. */
+ +    t_commrec *cr;  /* we need a local version of this */
+ +    FILE *fplog=NULL;
+ +    t_filenm *fnm;
+ +
+ +    fnm = dup_tfn(mc.nfile, mc.fnm);
+ +
+ +    cr = init_par_threads(mc.cr);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fplog=mc.fplog;
+ +    }
+ +
+ +    mda->ret=mdrunner(cr->nnodes, fplog, cr, mc.nfile, fnm, mc.oenv, 
+ +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
+ +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
+ +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
+ +                      mc.ddcsx, mc.ddcsy, mc.ddcsz, mc.nstepout, mc.resetstep, 
+ +                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_seed, mc.pforce, 
+ +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
+ +}
+ +
+ +/* called by mdrunner() to start a specific number of threads (including 
+ +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
+ +   for each thread. 
+ +   All options besides nthreads are the same as for mdrunner(). */
+ +static t_commrec *mdrunner_start_threads(int nthreads, 
+ +              FILE *fplog,t_commrec *cr,int nfile, 
+ +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +              gmx_bool bCompact, int nstglobalcomm,
+ +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ +              const char *dddlb_opt,real dlb_scale,
+ +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ +              int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
+ +              int repl_ex_seed, real pforce,real cpt_period, real max_hours, 
+ +              const char *deviceOptions, unsigned long Flags)
+ +{
+ +    int ret;
+ +    struct mdrunner_arglist *mda;
+ +    t_commrec *crn; /* the new commrec */
+ +    t_filenm *fnmn;
+ +
+ +    /* first check whether we even need to start tMPI */
+ +    if (nthreads<2)
+ +        return cr;
+ +
+ +    /* a few small, one-time, almost unavoidable memory leaks: */
+ +    snew(mda,1);
+ +    fnmn=dup_tfn(nfile, fnm);
+ +
+ +    /* fill the data structure to pass as void pointer to thread start fn */
+ +    mda->fplog=fplog;
+ +    mda->cr=cr;
+ +    mda->nfile=nfile;
+ +    mda->fnm=fnmn;
+ +    mda->oenv=oenv;
+ +    mda->bVerbose=bVerbose;
+ +    mda->bCompact=bCompact;
+ +    mda->nstglobalcomm=nstglobalcomm;
+ +    mda->ddxyz[XX]=ddxyz[XX];
+ +    mda->ddxyz[YY]=ddxyz[YY];
+ +    mda->ddxyz[ZZ]=ddxyz[ZZ];
+ +    mda->dd_node_order=dd_node_order;
+ +    mda->rdd=rdd;
+ +    mda->rconstr=rconstr;
+ +    mda->dddlb_opt=dddlb_opt;
+ +    mda->dlb_scale=dlb_scale;
+ +    mda->ddcsx=ddcsx;
+ +    mda->ddcsy=ddcsy;
+ +    mda->ddcsz=ddcsz;
+ +    mda->nstepout=nstepout;
+ +    mda->resetstep=resetstep;
+ +    mda->nmultisim=nmultisim;
+ +    mda->repl_ex_nst=repl_ex_nst;
+ +    mda->repl_ex_seed=repl_ex_seed;
+ +    mda->pforce=pforce;
+ +    mda->cpt_period=cpt_period;
+ +    mda->max_hours=max_hours;
+ +    mda->deviceOptions=deviceOptions;
+ +    mda->Flags=Flags;
+ +
+ +    fprintf(stderr, "Starting %d threads\n",nthreads);
+ +    fflush(stderr);
+ +    /* now spawn new threads that start mdrunner_start_fn(), while 
+ +       the main thread returns */
+ +    ret=tMPI_Init_fn(TRUE, nthreads, mdrunner_start_fn, (void*)(mda) );
+ +    if (ret!=TMPI_SUCCESS)
+ +        return NULL;
+ +
+ +    /* make a new comm_rec to reflect the new situation */
+ +    crn=init_par_threads(cr);
+ +    return crn;
+ +}
+ +
+ +
+ +/* get the number of threads based on how many there were requested, 
+ +   which algorithms we're using, and how many particles there are. */
+ +static int get_nthreads(int nthreads_requested, t_inputrec *inputrec,
+ +                        gmx_mtop_t *mtop)
+ +{
+ +    int nthreads,nthreads_new;
+ +    int min_atoms_per_thread;
+ +    char *env;
+ +
+ +    nthreads = nthreads_requested;
+ +
+ +    /* determine # of hardware threads. */
+ +    if (nthreads_requested < 1)
+ +    {
+ +        if ((env = getenv("GMX_MAX_THREADS")) != NULL)
+ +        {
+ +            nthreads = 0;
+ +            sscanf(env,"%d",&nthreads);
+ +            if (nthreads < 1)
+ +            {
+ +                gmx_fatal(FARGS,"GMX_MAX_THREADS (%d) should be larger than 0",
+ +                          nthreads);
+ +            }
+ +        }
+ +        else
+ +        {
-                 if (!MASTER(cr))
-                 {
-                     snew(state,1);
-                 }
++            nthreads = tMPI_Thread_get_hw_number();
+ +        }
+ +    }
+ +
+ +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
+ +    {
+ +        /* Steps are divided over the nodes iso splitting the atoms */
+ +        min_atoms_per_thread = 0;
+ +    }
+ +    else
+ +    {
+ +        min_atoms_per_thread = MIN_ATOMS_PER_THREAD;
+ +    }
+ +
+ +    /* Check if an algorithm does not support parallel simulation.  */
+ +    if (nthreads != 1 && 
+ +        ( inputrec->eI == eiLBFGS ||
+ +          inputrec->coulombtype == eelEWALD ) )
+ +    {
+ +        fprintf(stderr,"\nThe integration or electrostatics algorithm doesn't support parallel runs. Not starting any threads.\n");
+ +        nthreads = 1;
+ +    }
+ +    else if (nthreads_requested < 1 &&
+ +             mtop->natoms/nthreads < min_atoms_per_thread)
+ +    {
+ +        /* the thread number was chosen automatically, but there are too many
+ +           threads (too few atoms per thread) */
+ +        nthreads_new = max(1,mtop->natoms/min_atoms_per_thread);
+ +
+ +        if (nthreads_new > 8 || (nthreads == 8 && nthreads_new > 4))
+ +        {
+ +            /* Use only multiples of 4 above 8 threads
+ +             * or with an 8-core processor
+ +             * (to avoid 6 threads on 8 core processors with 4 real cores).
+ +             */
+ +            nthreads_new = (nthreads_new/4)*4;
+ +        }
+ +        else if (nthreads_new > 4)
+ +        {
+ +            /* Avoid 5 or 7 threads */
+ +            nthreads_new = (nthreads_new/2)*2;
+ +        }
+ +
+ +        nthreads = nthreads_new;
+ +
+ +        fprintf(stderr,"\n");
+ +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
+ +        fprintf(stderr,"      only starting %d threads.\n",nthreads);
+ +        fprintf(stderr,"      You can use the -nt option to optimize the number of threads.\n\n");
+ +    }
+ +    return nthreads;
+ +}
+ +#endif
+ +
+ +
+ +int mdrunner(int nthreads_requested, FILE *fplog,t_commrec *cr,int nfile,
+ +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +             gmx_bool bCompact, int nstglobalcomm,
+ +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ +             const char *dddlb_opt,real dlb_scale,
+ +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ +             int nstepout,int resetstep,int nmultisim,int repl_ex_nst,
+ +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
+ +             const char *deviceOptions, unsigned long Flags)
+ +{
+ +    double     nodetime=0,realtime;
+ +    t_inputrec *inputrec;
+ +    t_state    *state=NULL;
+ +    matrix     box;
+ +    gmx_ddbox_t ddbox={0};
+ +    int        npme_major,npme_minor;
+ +    real       tmpr1,tmpr2;
+ +    t_nrnb     *nrnb;
+ +    gmx_mtop_t *mtop=NULL;
+ +    t_mdatoms  *mdatoms=NULL;
+ +    t_forcerec *fr=NULL;
+ +    t_fcdata   *fcd=NULL;
+ +    real       ewaldcoeff=0;
+ +    gmx_pme_t  *pmedata=NULL;
+ +    gmx_vsite_t *vsite=NULL;
+ +    gmx_constr_t constr;
+ +    int        i,m,nChargePerturbed=-1,status,nalloc;
+ +    char       *gro;
+ +    gmx_wallcycle_t wcycle;
+ +    gmx_bool       bReadRNG,bReadEkin;
+ +    int        list;
+ +    gmx_runtime_t runtime;
+ +    int        rc;
+ +    gmx_large_int_t reset_counters;
+ +    gmx_edsam_t ed=NULL;
+ +    t_commrec   *cr_old=cr; 
+ +    int         nthreads=1;
+ +    gmx_membed_t *membed=NULL;
+ +
+ +    /* CAUTION: threads may be started later on in this function, so
+ +       cr doesn't reflect the final parallel state right now */
+ +    snew(inputrec,1);
+ +    snew(mtop,1);
+ +
+ +    if (bVerbose && SIMMASTER(cr))
+ +    {
+ +        fprintf(stderr,"Getting Loaded...\n");
+ +    }
+ +    
+ +    if (Flags & MD_APPENDFILES) 
+ +    {
+ +        fplog = NULL;
+ +    }
+ +
+ +    snew(state,1);
+ +    if (MASTER(cr)) 
+ +    {
+ +        /* Read (nearly) all data required for the simulation */
+ +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
+ +
+ +        /* NOW the threads will be started: */
+ +#ifdef GMX_THREADS
+ +        nthreads = get_nthreads(nthreads_requested, inputrec, mtop);
+ +
+ +        if (nthreads > 1)
+ +        {
+ +            /* now start the threads. */
+ +            cr=mdrunner_start_threads(nthreads, fplog, cr_old, nfile, fnm, 
+ +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
+ +                                      ddxyz, dd_node_order, rdd, rconstr, 
+ +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
+ +                                      nstepout, resetstep, nmultisim, 
+ +                                      repl_ex_nst, repl_ex_seed, pforce, 
+ +                                      cpt_period, max_hours, deviceOptions, 
+ +                                      Flags);
+ +            /* the main thread continues here with a new cr. We don't deallocate
+ +               the old cr because other threads may still be reading it. */
+ +            if (cr == NULL)
+ +            {
+ +                gmx_comm("Failed to spawn threads");
+ +            }
+ +        }
+ +#endif
+ +    }
+ +    /* END OF CAUTION: cr is now reliable */
+ +
+ +    /* g_membed initialisation *
+ +     * Because we change the mtop, init_membed is called before the init_parallel *
+ +     * (in case we ever want to make it run in parallel) */
+ +    if (opt2bSet("-membed",nfile,fnm))
+ +    {
+ +      fprintf(stderr,"Entering membed code");
+ +        snew(membed,1);
+ +        init_membed(fplog,membed,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* now broadcast everything to the non-master nodes/threads: */
+ +        init_parallel(fplog, cr, inputrec, mtop);
+ +    }
+ +    if (fplog != NULL)
+ +    {
+ +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
+ +    }
+ +
+ +    /* now make sure the state is initialized and propagated */
+ +    set_state_entries(state,inputrec,cr->nnodes);
+ +
+ +    /* A parallel command line option consistency check that we can
+ +       only do after any threads have started. */
+ +    if (!PAR(cr) &&
+ +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
+ +    {
+ +        gmx_fatal(FARGS,
+ +                  "The -dd or -npme option request a parallel simulation, "
+ +#ifndef GMX_MPI
+ +                  "but mdrun was compiled without threads or MPI enabled"
+ +#else
+ +#ifdef GMX_THREADS
+ +                  "but the number of threads (option -nt) is 1"
+ +#else
+ +                  "but mdrun was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec" 
+ +#endif
+ +#endif
+ +            );
+ +    }
+ +
+ +    if ((Flags & MD_RERUN) &&
+ +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
+ +    {
+ +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
+ +    }
+ +
+ +    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog))
+ +    {
+ +        /* All-vs-all loops do not work with domain decomposition */
+ +        Flags |= MD_PARTDEC;
+ +    }
+ +
+ +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
+ +    {
+ +        cr->npmenodes = 0;
+ +    }
+ +
+ +#ifdef GMX_FAHCORE
+ +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
+ +#endif
+ +
+ +    /* NMR restraints must be initialized before load_checkpoint,
+ +     * since with time averaging the history is added to t_state.
+ +     * For proper consistency check we therefore need to extend
+ +     * t_state here.
+ +     * So the PME-only nodes (if present) will also initialize
+ +     * the distance restraints.
+ +     */
+ +    snew(fcd,1);
+ +
+ +    /* This needs to be called before read_checkpoint to extend the state */
+ +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
+ +
+ +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
+ +    {
+ +        if (PAR(cr) && !(Flags & MD_PARTDEC))
+ +        {
+ +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
+ +        }
+ +        /* Orientation restraints */
+ +        if (MASTER(cr))
+ +        {
+ +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
+ +                        state);
+ +        }
+ +    }
+ +
+ +    if (DEFORM(*inputrec))
+ +    {
+ +        /* Store the deform reference box before reading the checkpoint */
+ +        if (SIMMASTER(cr))
+ +        {
+ +            copy_mat(state->box,box);
+ +        }
+ +        if (PAR(cr))
+ +        {
+ +            gmx_bcast(sizeof(box),box,cr);
+ +        }
+ +        /* Because we do not have the update struct available yet
+ +         * in which the reference values should be stored,
+ +         * we store them temporarily in static variables.
+ +         * This should be thread safe, since they are only written once
+ +         * and with identical values.
+ +         */
+ +#ifdef GMX_THREADS
+ +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+ +#endif
+ +        deform_init_init_step_tpx = inputrec->init_step;
+ +        copy_mat(box,deform_init_box_tpx);
+ +#ifdef GMX_THREADS
+ +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+ +#endif
+ +    }
+ +
+ +    if (opt2bSet("-cpi",nfile,fnm)) 
+ +    {
+ +        /* Check if checkpoint file exists before doing continuation.
+ +         * This way we can use identical input options for the first and subsequent runs...
+ +         */
+ +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
+ +        {
+ +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
+ +                            cr,Flags & MD_PARTDEC,ddxyz,
+ +                            inputrec,state,&bReadRNG,&bReadEkin,
+ +                            (Flags & MD_APPENDFILES));
+ +            
+ +            if (bReadRNG)
+ +            {
+ +                Flags |= MD_READ_RNG;
+ +            }
+ +            if (bReadEkin)
+ +            {
+ +                Flags |= MD_READ_EKIN;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
+ +#ifdef GMX_THREADS
+ +        /* With thread MPI only the master node/thread exists in mdrun.c,
+ +         * therefore non-master nodes need to open the "seppot" log file here.
+ +         */
+ +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
+ +#endif
+ +        )
+ +    {
+ +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
+ +                             Flags,&fplog);
+ +    }
+ +
+ +    if (SIMMASTER(cr)) 
+ +    {
+ +        copy_mat(state->box,box);
+ +    }
+ +
+ +    if (PAR(cr)) 
+ +    {
+ +        gmx_bcast(sizeof(box),box,cr);
+ +    }
+ +
+ +    /* Essential dynamics */
+ +    if (opt2bSet("-ei",nfile,fnm))
+ +    {
+ +        /* Open input and output files, allocate space for ED data structure */
+ +        ed = ed_open(nfile,fnm,Flags,cr);
+ +    }
+ +
+ +    if (bVerbose && SIMMASTER(cr))
+ +    {
+ +        fprintf(stderr,"Loaded with Money\n\n");
+ +    }
+ +
+ +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
+ +                     EI_TPI(inputrec->eI) ||
+ +                     inputrec->eI == eiNM))
+ +    {
+ +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
+ +                                           dddlb_opt,dlb_scale,
+ +                                           ddcsx,ddcsy,ddcsz,
+ +                                           mtop,inputrec,
+ +                                           box,state->x,
+ +                                           &ddbox,&npme_major,&npme_minor);
+ +
+ +        make_dd_communicators(fplog,cr,dd_node_order);
+ +
+ +        /* Set overallocation to avoid frequent reallocation of arrays */
+ +        set_over_alloc_dd(TRUE);
+ +    }
+ +    else
+ +    {
+ +        /* PME, if used, is done on all nodes with 1D decomposition */
+ +        cr->npmenodes = 0;
+ +        cr->duty = (DUTY_PP | DUTY_PME);
+ +        npme_major = 1;
+ +        npme_minor = 1;
+ +        if (!EI_TPI(inputrec->eI))
+ +        {
+ +            npme_major = cr->nnodes;
+ +        }
+ +        
+ +        if (inputrec->ePBC == epbcSCREW)
+ +        {
+ +            gmx_fatal(FARGS,
+ +                      "pbc=%s is only implemented with domain decomposition",
+ +                      epbc_names[inputrec->ePBC]);
+ +        }
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* After possible communicator splitting in make_dd_communicators.
+ +         * we can set up the intra/inter node communication.
+ +         */
+ +        gmx_setup_nodecomm(fplog,cr);
+ +    }
+ +
+ +    wcycle = wallcycle_init(fplog,resetstep,cr);
+ +    if (PAR(cr))
+ +    {
+ +        /* Master synchronizes its value of reset_counters with all nodes 
+ +         * including PME only nodes */
+ +        reset_counters = wcycle_get_reset_counters(wcycle);
+ +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
+ +        wcycle_set_reset_counters(wcycle, reset_counters);
+ +    }
+ +
+ +
+ +    snew(nrnb,1);
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        /* For domain decomposition we allocate dynamically
+ +         * in dd_partition_system.
+ +         */
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            bcast_state_setup(cr,state);
+ +        }
+ +        else
+ +        {
+ +            if (PAR(cr))
+ +            {
- 
- void md_print_warning(const t_commrec *cr,FILE *fplog,const char *buf)
- {
-     if (MASTER(cr))
-     {
-         fprintf(stderr,"\n%s\n",buf);
-     }
-     if (fplog)
-     {
-         fprintf(fplog,"\n%s\n",buf);
-     }
- }
+ +                bcast_state(cr,state,TRUE);
+ +            }
+ +        }
+ +
+ +        /* Dihedral Restraints */
+ +        if (gmx_mtop_ftype_count(mtop,F_DIHRES) > 0)
+ +        {
+ +            init_dihres(fplog,mtop,inputrec,fcd);
+ +        }
+ +
+ +        /* Initiate forcerecord */
+ +        fr = mk_forcerec();
+ +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ +                      opt2fn("-table",nfile,fnm),
+ +                      opt2fn("-tablep",nfile,fnm),
+ +                      opt2fn("-tableb",nfile,fnm),FALSE,pforce);
+ +
+ +        /* version for PCA_NOT_READ_NODE (see md.c) */
+ +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ +          "nofile","nofile","nofile",FALSE,pforce);
+ +          */        
+ +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
+ +
+ +        /* Initialize QM-MM */
+ +        if(fr->bQMMM)
+ +        {
+ +            init_QMMMrec(cr,box,mtop,inputrec,fr);
+ +        }
+ +
+ +        /* Initialize the mdatoms structure.
+ +         * mdatoms is not filled with atom data,
+ +         * as this can not be done now with domain decomposition.
+ +         */
+ +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
+ +
+ +        /* Initialize the virtual site communication */
+ +        vsite = init_vsite(mtop,cr);
+ +
+ +        calc_shifts(box,fr->shift_vec);
+ +
+ +        /* With periodic molecules the charge groups should be whole at start up
+ +         * and the virtual sites should not be far from their proper positions.
+ +         */
+ +        if (!inputrec->bContinuation && MASTER(cr) &&
+ +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
+ +        {
+ +            /* Make molecules whole at start of run */
+ +            if (fr->ePBC != epbcNONE)
+ +            {
+ +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
+ +            }
+ +            if (vsite)
+ +            {
+ +                /* Correct initial vsite positions are required
+ +                 * for the initial distribution in the domain decomposition
+ +                 * and for the initial shell prediction.
+ +                 */
+ +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
+ +            }
+ +        }
+ +
+ +        /* Initiate PPPM if necessary */
+ +        if (fr->eeltype == eelPPPM)
+ +        {
+ +            if (mdatoms->nChargePerturbed)
+ +            {
+ +                gmx_fatal(FARGS,"Free energy with %s is not implemented",
+ +                          eel_names[fr->eeltype]);
+ +            }
+ +            status = gmx_pppm_init(fplog,cr,oenv,FALSE,TRUE,box,
+ +                                   getenv("GMXGHAT"),inputrec, (Flags & MD_REPRODUCIBLE));
+ +            if (status != 0)
+ +            {
+ +                gmx_fatal(FARGS,"Error %d initializing PPPM",status);
+ +            }
+ +        }
+ +
+ +        if (EEL_PME(fr->eeltype))
+ +        {
+ +            ewaldcoeff = fr->ewaldcoeff;
+ +            pmedata = &fr->pmedata;
+ +        }
+ +        else
+ +        {
+ +            pmedata = NULL;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* This is a PME only node */
+ +
+ +        /* We don't need the state */
+ +        done_state(state);
+ +
+ +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
+ +        snew(pmedata,1);
+ +    }
+ +
+ +    /* Initiate PME if necessary,
+ +     * either on all nodes or on dedicated PME nodes only. */
+ +    if (EEL_PME(inputrec->coulombtype))
+ +    {
+ +        if (mdatoms)
+ +        {
+ +            nChargePerturbed = mdatoms->nChargePerturbed;
+ +        }
+ +        if (cr->npmenodes > 0)
+ +        {
+ +            /* The PME only nodes need to know nChargePerturbed */
+ +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
+ +        }
+ +        if (cr->duty & DUTY_PME)
+ +        {
+ +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
+ +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
+ +                                  (Flags & MD_REPRODUCIBLE));
+ +            if (status != 0) 
+ +            {
+ +                gmx_fatal(FARGS,"Error %d initializing PME",status);
+ +            }
+ +        }
+ +    }
+ +
+ +
+ +    if (integrator[inputrec->eI].func == do_md
+ +#ifdef GMX_OPENMM
+ +        ||
+ +        integrator[inputrec->eI].func == do_md_openmm
+ +#endif
+ +        )
+ +    {
+ +        /* Turn on signal handling on all nodes */
+ +        /*
+ +         * (A user signal from the PME nodes (if any)
+ +         * is communicated to the PP nodes.
+ +         */
+ +        signal_handler_install();
+ +    }
+ +
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        if (inputrec->ePull != epullNO)
+ +        {
+ +            /* Initialize pull code */
+ +            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv,
+ +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
+ +        }
+ +        
+ +        if (inputrec->bRot)
+ +        {
+ +           /* Initialize enforced rotation code */
+ +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,state->box,mtop,oenv,
+ +                    bVerbose,Flags);
+ +        }
+ +
+ +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
+ +
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
+ +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
+ +
+ +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
+ +
+ +            setup_dd_grid(fplog,cr->dd);
+ +        }
+ +
+ +        /* Now do whatever the user wants us to do (how flexible...) */
+ +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
+ +                                      oenv,bVerbose,bCompact,
+ +                                      nstglobalcomm,
+ +                                      vsite,constr,
+ +                                      nstepout,inputrec,mtop,
+ +                                      fcd,state,
+ +                                      mdatoms,nrnb,wcycle,ed,fr,
+ +                                      repl_ex_nst,repl_ex_seed,
+ +                                      membed,
+ +                                      cpt_period,max_hours,
+ +                                      deviceOptions,
+ +                                      Flags,
+ +                                      &runtime);
+ +
+ +        if (inputrec->ePull != epullNO)
+ +        {
+ +            finish_pull(fplog,inputrec->pull);
+ +        }
+ +        
+ +        if (inputrec->bRot)
+ +        {
+ +            finish_rot(fplog,inputrec->rot);
+ +        }
+ +
+ +    } 
+ +    else 
+ +    {
+ +        /* do PME only */
+ +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
+ +    }
+ +
+ +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
+ +    {
+ +        /* Some timing stats */  
+ +        if (SIMMASTER(cr))
+ +        {
+ +            if (runtime.proc == 0)
+ +            {
+ +                runtime.proc = runtime.real;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            runtime.real = 0;
+ +        }
+ +    }
+ +
+ +    wallcycle_stop(wcycle,ewcRUN);
+ +
+ +    /* Finish up, write some stuff
+ +     * if rerunMD, don't write last frame again 
+ +     */
+ +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
+ +               inputrec,nrnb,wcycle,&runtime,
+ +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
+ +    
+ +    if (opt2bSet("-membed",nfile,fnm))
+ +    {
+ +        sfree(membed);
+ +    }
+ +
+ +    /* Does what it says */  
+ +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
+ +
+ +    /* Close logfile already here if we were appending to it */
+ +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
+ +    {
+ +        gmx_log_close(fplog);
+ +    } 
+ +
+ +    rc=(int)gmx_get_stop_condition();
+ +
+ +#ifdef GMX_THREADS
+ +    /* we need to join all threads. The sub-threads join when they
+ +       exit this function, but the master thread needs to be told to 
+ +       wait for that. */
+ +    if (PAR(cr) && MASTER(cr))
+ +    {
+ +        tMPI_Finalize();
+ +    }
+ +#endif
+ +
+ +    return rc;
+ +}
diff --cc src/programs/pdb2gmx/pdb2gmx.c
Simple merge
diff --cc src/programs/pdb2gmx/xlate.c
Simple merge
diff --cc src/programs/tpbconv/tpbconv.c
Simple merge
diff --cc src/tools/gmx_membed.c

index be4c706f70766e97b20f748d224a4bc697be70a7,a290ec057b7cfd1b38b6dd16872a86549323fe08..566f68a8e102a6ceb71c504e5d24699ac222ee7a
--- 1/src/tools/gmx_membed.c
--- 2/src/tools/gmx_membed.c
+++ b/src/tools/gmx_membed.c
@@@ -44,8 -46,3379 +44,3339 @@@
   #include "macros.h"
   #include "copyrite.h"
   #include "main.h"
- -#include "futil.h"
- -#include "edsam.h"
- -#include "checkpoint.h"
- -#include "vcm.h"
- -#include "mdebin.h"
- -#include "nrnb.h"
- -#include "calcmu.h"
- -#include "index.h"
- -#include "vsite.h"
- -#include "update.h"
- -#include "ns.h"
- -#include "trnio.h"
- -#include "xtcio.h"
- -#include "mdrun.h"
- -#include "confio.h"
- -#include "network.h"
- -#include "pull.h"
- -#include "xvgr.h"
- -#include "physics.h"
- -#include "names.h"
- -#include "disre.h"
- -#include "orires.h"
- -#include "dihre.h"
- -#include "pppm.h"
- -#include "pme.h"
- -#include "mdatoms.h"
- -#include "qmmm.h"
- -#include "mpelogging.h"
- -#include "domdec.h"
- -#include "partdec.h"
- -#include "topsort.h"
- -#include "coulomb.h"
- -#include "constr.h"
- -#include "shellfc.h"
- -#include "mvdata.h"
- -#include "checkpoint.h"
- -#include "mtop_util.h"
- -#include "tpxio.h"
- -#include "string2.h"
- -#include "sighandler.h"
   #include "gmx_ana.h"
   
+ #ifdef GMX_LIB_MPI
+ #include <mpi.h>
+ #endif
+ #ifdef GMX_THREADS
+ #include "tmpi.h"
+ #endif
+ 
+ /* afm stuf */
+ #include "pull.h"
+ 
+ /* We use the same defines as in mvdata.c here */
+ #define  block_bc(cr,   d) gmx_bcast(     sizeof(d),     &(d),(cr))
+ #define nblock_bc(cr,nr,d) gmx_bcast((nr)*sizeof((d)[0]), (d),(cr))
+ #define   snew_bc(cr,d,nr) { if (!MASTER(cr)) snew((d),(nr)); }
+ 
+ /* The following two variables and the signal_handler function
+  * is used from pme.c as well
+  */
+ 
+ typedef struct {
+       t_state s;
+       rvec    *f;
+       real    epot;
+       real    fnorm;
+       real    fmax;
+       int     a_fmax;
+ } em_state_t;
+ 
+ typedef struct {
+       int    it_xy;
+       int    it_z;
+       int    xy_step;
+       int    z_step;
+       rvec    xmin;
+       rvec    xmax;
+       rvec    *geom_cent;
+       int    pieces;
+       int    *nidx;
+       atom_id **subindex;
+ } pos_ins_t;
+ 
+ typedef struct {
+       int             id;
+       char    *name;
+       int     nr;
+       int     natoms;     /*nr of atoms per lipid*/
+       int     mol1;       /*id of the first lipid molecule*/
+       real    area;
+ } lip_t;
+ 
+ typedef struct {
+       char    *name;
+       t_block mem_at;
+       int             *mol_id;
+       int             nmol;
+       real    lip_area;
+       real    zmin;
+       real    zmax;
+       real    zmed;
+ } mem_t;
+ 
+ typedef struct {
+       int             *mol;
+       int             *block;
+       int     nr;
+ } rmm_t;
+ 
+ int search_string(char *s,int ng,char ***gn)
+ {
+       int i;
+ 
+       for(i=0; (i<ng); i++)
+               if (gmx_strcasecmp(s,*gn[i]) == 0)
+                       return i;
+ 
+       gmx_fatal(FARGS,"Group %s not found in indexfile.\nMaybe you have non-default groups in your mdp file, while not using the '-n' option of grompp.\nIn that case use the '-n' option.\n",s);
+ 
+       return -1;
+ }
+ 
+ int get_mol_id(int at,int nmblock,gmx_molblock_t *mblock, int *type, int *block)
+ {
+       int mol_id=0;
+       int i;
+ 
+       for(i=0;i<nmblock;i++)
+       {
+               if(at<(mblock[i].nmol*mblock[i].natoms_mol))
+               {
+                       mol_id+=at/mblock[i].natoms_mol;
+                       *type = mblock[i].type;
+                       *block = i;
+                       return mol_id;
+               } else {
+                       at-= mblock[i].nmol*mblock[i].natoms_mol;
+                       mol_id+=mblock[i].nmol;
+               }
+       }
+ 
+       gmx_fatal(FARGS,"Something is wrong in mol ids, at %d, mol_id %d",at,mol_id);
+ 
+       return -1;
+ }
+ 
+ int get_block(int mol_id,int nmblock,gmx_molblock_t *mblock)
+ {
+       int i;
+       int nmol=0;
+ 
+       for(i=0;i<nmblock;i++)
+       {
+               nmol+=mblock[i].nmol;
+               if(mol_id<nmol)
+                       return i;
+       }
+ 
+       gmx_fatal(FARGS,"mol_id %d larger than total number of molecules %d.\n",mol_id,nmol);
+ 
+       return -1;
+ }
+ 
+ int get_tpr_version(const char *infile)
+ {
+       char    buf[STRLEN];
+       gmx_bool        bDouble;
+       int     precision,fver;
+         t_fileio *fio;
+ 
+       fio = open_tpx(infile,"r");
+       gmx_fio_checktype(fio);
+ 
+       precision = sizeof(real);
+ 
+       gmx_fio_do_string(fio,buf);
+       if (strncmp(buf,"VERSION",7))
+               gmx_fatal(FARGS,"Can not read file %s,\n"
+                               "             this file is from a Gromacs version which is older than 2.0\n"
+                               "             Make a new one with grompp or use a gro or pdb file, if possible",
+                               gmx_fio_getname(fio));
+       gmx_fio_do_int(fio,precision);
+       bDouble = (precision == sizeof(double));
+       if ((precision != sizeof(float)) && !bDouble)
+               gmx_fatal(FARGS,"Unknown precision in file %s: real is %d bytes "
+                               "instead of %d or %d",
+                               gmx_fio_getname(fio),precision,sizeof(float),sizeof(double));
+       gmx_fio_setprecision(fio,bDouble);
+       fprintf(stderr,"Reading file %s, %s (%s precision)\n",
+                       gmx_fio_getname(fio),buf,bDouble ? "double" : "single");
+ 
+       gmx_fio_do_int(fio,fver);
+ 
+       close_tpx(fio);
+ 
+       return fver;
+ }
+ 
+ void set_inbox(int natom, rvec *x)
+ {
+       rvec tmp;
+       int  i;
+ 
+       tmp[XX]=tmp[YY]=tmp[ZZ]=0.0;
+       for(i=0;i<natom;i++)
+       {
+               if(x[i][XX]<tmp[XX])            tmp[XX]=x[i][XX];
+               if(x[i][YY]<tmp[YY])            tmp[YY]=x[i][YY];
+               if(x[i][ZZ]<tmp[ZZ])            tmp[ZZ]=x[i][ZZ];
+       }
+ 
+       for(i=0;i<natom;i++)
+                       rvec_inc(x[i],tmp);
+ }
+ 
+ int get_mtype_list(t_block *at, gmx_mtop_t *mtop, t_block *tlist)
+ {
+       int i,j,nr,mol_id;
+         int type=0,block=0;
+       gmx_bool bNEW;
+ 
+       nr=0;
+       snew(tlist->index,at->nr);
+       for (i=0;i<at->nr;i++)
+       {
+               bNEW=TRUE;
+               mol_id = get_mol_id(at->index[i],mtop->nmolblock,mtop->molblock,&type,&block);
+               for(j=0;j<nr;j++)
+               {
+                       if(tlist->index[j]==type)
+                                               bNEW=FALSE;
+               }
+               if(bNEW==TRUE)
+               {
+                       tlist->index[nr]=type;
+                       nr++;
+               }
+       }
+ 
+       srenew(tlist->index,nr);
+       return nr;
+ }
+ 
+ void check_types(t_block *ins_at,t_block *rest_at,gmx_mtop_t *mtop)
+ {
+       t_block         *ins_mtype,*rest_mtype;
+       int                     i,j;
+ 
+       snew(ins_mtype,1);
+       snew(rest_mtype,1);
+     ins_mtype->nr  = get_mtype_list(ins_at , mtop, ins_mtype );
+     rest_mtype->nr = get_mtype_list(rest_at, mtop, rest_mtype);
+ 
+     for(i=0;i<ins_mtype->nr;i++)
+     {
+       for(j=0;j<rest_mtype->nr;j++)
+       {
+               if(ins_mtype->index[i]==rest_mtype->index[j])
+                       gmx_fatal(FARGS,"Moleculetype %s is found both in the group to insert and the rest of the system.\n"
+                                       "Because we need to exclude all interactions between the atoms in the group to\n"
+                                       "insert, the same moleculetype can not be used in both groups. Change the\n"
+                                       "moleculetype of the molecules %s in the inserted group. Do not forget to provide\n"
+                                       "an appropriate *.itp file",*(mtop->moltype[rest_mtype->index[j]].name),
+                                       *(mtop->moltype[rest_mtype->index[j]].name));
+       }
+     }
+ 
+     sfree(ins_mtype->index);
+     sfree(rest_mtype->index);
+     sfree(ins_mtype);
+     sfree(rest_mtype);
+ }
+ 
+ int init_ins_at(t_block *ins_at,t_block *rest_at,t_state *state, pos_ins_t *pos_ins,gmx_groups_t *groups,int ins_grp_id, real xy_max)
+ {
+       int i,gid,c=0;
+       real x,xmin,xmax,y,ymin,ymax,z,zmin,zmax;
+ 
+       snew(rest_at->index,state->natoms);
+ 
+       xmin=xmax=state->x[ins_at->index[0]][XX];
+       ymin=ymax=state->x[ins_at->index[0]][YY];
+       zmin=zmax=state->x[ins_at->index[0]][ZZ];
+ 
+       for(i=0;i<state->natoms;i++)
+       {
+               gid = groups->grpnr[egcFREEZE][i];
+               if(groups->grps[egcFREEZE].nm_ind[gid]==ins_grp_id)
+               {
+                       x=state->x[i][XX];
+                       if (x<xmin)                     xmin=x;
+                       if (x>xmax)                     xmax=x;
+                       y=state->x[i][YY];
+                       if (y<ymin)                             ymin=y;
+                       if (y>ymax)                             ymax=y;
+                       z=state->x[i][ZZ];
+                       if (z<zmin)                             zmin=z;
+                       if (z>zmax)                             zmax=z;
+               } else {
+                       rest_at->index[c]=i;
+                       c++;
+               }
+       }
+ 
+       rest_at->nr=c;
+       srenew(rest_at->index,c);
+ 
+       if(xy_max>1.000001)
+       {
+               pos_ins->xmin[XX]=xmin-((xmax-xmin)*xy_max-(xmax-xmin))/2;
+               pos_ins->xmin[YY]=ymin-((ymax-ymin)*xy_max-(ymax-ymin))/2;
+ 
+               pos_ins->xmax[XX]=xmax+((xmax-xmin)*xy_max-(xmax-xmin))/2;
+               pos_ins->xmax[YY]=ymax+((ymax-ymin)*xy_max-(ymax-ymin))/2;
+       } else {
+               pos_ins->xmin[XX]=xmin;
+               pos_ins->xmin[YY]=ymin;
+ 
+               pos_ins->xmax[XX]=xmax;
+               pos_ins->xmax[YY]=ymax;
+       }
+ 
+       /* 6.0 is estimated thickness of bilayer */
+       if( (zmax-zmin) < 6.0 )
+       {
+               pos_ins->xmin[ZZ]=zmin+(zmax-zmin)/2.0-3.0;
+               pos_ins->xmax[ZZ]=zmin+(zmax-zmin)/2.0+3.0;
+       } else {
+               pos_ins->xmin[ZZ]=zmin;
+               pos_ins->xmax[ZZ]=zmax;
+       }
+ 
+       return c;
+ }
+ 
+ real est_prot_area(pos_ins_t *pos_ins,rvec *r,t_block *ins_at, mem_t *mem_p)
+ {
+       real x,y,dx=0.15,dy=0.15,area=0.0;
+       real add;
+       int c,at;
+ 
+       for(x=pos_ins->xmin[XX];x<pos_ins->xmax[XX];x+=dx)
+       {
+               for(y=pos_ins->xmin[YY];y<pos_ins->xmax[YY];y+=dy)
+               {
+                       c=0;
+                       add=0.0;
+                       do
+                       {
+                               at=ins_at->index[c];
+                               if ( (r[at][XX]>=x) && (r[at][XX]<x+dx) &&
+                                               (r[at][YY]>=y) && (r[at][YY]<y+dy) &&
+                                               (r[at][ZZ]>mem_p->zmin+1.0) && (r[at][ZZ]<mem_p->zmax-1.0) )
+                                       add=1.0;
+                               c++;
+                       } while ( (c<ins_at->nr) && (add<0.5) );
+                       area+=add;
+               }
+       }
+       area=area*dx*dy;
+ 
+       return area;
+ }
+ 
+ void init_lip(matrix box, gmx_mtop_t *mtop, lip_t *lip)
+ {
+       int i;
+       real mem_area;
+       int mol1=0;
+ 
+       mem_area = box[XX][XX]*box[YY][YY]-box[XX][YY]*box[YY][XX];
+       for(i=0;i<mtop->nmolblock;i++)
+       {
+               if(mtop->molblock[i].type == lip->id)
+               {
+                       lip->nr=mtop->molblock[i].nmol;
+                       lip->natoms=mtop->molblock[i].natoms_mol;
+               }
+       }
+       lip->area=2.0*mem_area/(double)lip->nr;
+ 
+       for (i=0;i<lip->id;i++)
+               mol1+=mtop->molblock[i].nmol;
+       lip->mol1=mol1;
+ }
+ 
+ int init_mem_at(mem_t *mem_p, gmx_mtop_t *mtop, rvec *r, matrix box, pos_ins_t *pos_ins)
+ {
+       int i,j,at,mol,nmol,nmolbox,count;
+       t_block *mem_a;
+       real z,zmin,zmax,mem_area;
+       gmx_bool bNew;
+       atom_id *mol_id;
+       int type=0,block=0;
+ 
+       nmol=count=0;
+       mem_a=&(mem_p->mem_at);
+       snew(mol_id,mem_a->nr);
+ /*    snew(index,mem_a->nr); */
+       zmin=pos_ins->xmax[ZZ];
+       zmax=pos_ins->xmin[ZZ];
+       for(i=0;i<mem_a->nr;i++)
+       {
+               at=mem_a->index[i];
+               if(     (r[at][XX]>pos_ins->xmin[XX]) && (r[at][XX]<pos_ins->xmax[XX]) &&
+                       (r[at][YY]>pos_ins->xmin[YY]) && (r[at][YY]<pos_ins->xmax[YY]) &&
+                       (r[at][ZZ]>pos_ins->xmin[ZZ]) && (r[at][ZZ]<pos_ins->xmax[ZZ]) )
+               {
+                       mol = get_mol_id(at,mtop->nmolblock,mtop->molblock,&type,&block);
+ 
+                       bNew=TRUE;
+                       for(j=0;j<nmol;j++)
+                               if(mol == mol_id[j])
+                                       bNew=FALSE;
+ 
+                       if(bNew)
+                       {
+                               mol_id[nmol]=mol;
+                               nmol++;
+                       }
+ 
+                       z=r[at][ZZ];
+                       if(z<zmin)                                      zmin=z;
+                       if(z>zmax)                                      zmax=z;
+ 
+ /*                    index[count]=at;*/
+                       count++;
+               }
+       }
+ 
+       mem_p->nmol=nmol;
+       srenew(mol_id,nmol);
+       mem_p->mol_id=mol_id;
+ /*    srenew(index,count);*/
+ /*    mem_p->mem_at.nr=count;*/
+ /*    sfree(mem_p->mem_at.index);*/
+ /*    mem_p->mem_at.index=index;*/
+ 
+       if((zmax-zmin)>(box[ZZ][ZZ]-0.5))
+               gmx_fatal(FARGS,"Something is wrong with your membrane. Max and min z values are %f and %f.\n"
+                               "Maybe your membrane is not centered in the box, but located at the box edge in the z-direction,\n"
+                               "so that one membrane is distributed over two periodic box images. Another possibility is that\n"
+                               "your water layer is not thick enough.\n",zmax,zmin);
+       mem_p->zmin=zmin;
+       mem_p->zmax=zmax;
+       mem_p->zmed=(zmax-zmin)/2+zmin;
+ 
+       /*number of membrane molecules in protein box*/
+       nmolbox = count/mtop->molblock[block].natoms_mol;
+       /*mem_area = box[XX][XX]*box[YY][YY]-box[XX][YY]*box[YY][XX];
+       mem_p->lip_area = 2.0*mem_area/(double)mem_p->nmol;*/
+       mem_area = (pos_ins->xmax[XX]-pos_ins->xmin[XX])*(pos_ins->xmax[YY]-pos_ins->xmin[YY]);
+       mem_p->lip_area = 2.0*mem_area/(double)nmolbox;
+ 
+       return mem_p->mem_at.nr;
+ }
+ 
+ void init_resize(t_block *ins_at,rvec *r_ins,pos_ins_t *pos_ins,mem_t *mem_p,rvec *r, gmx_bool bALLOW_ASYMMETRY)
+ {
+       int i,j,at,c,outsidesum,gctr=0;
+     int idxsum=0;
+ 
+     /*sanity check*/
+     for (i=0;i<pos_ins->pieces;i++)
+           idxsum+=pos_ins->nidx[i];
+     if (idxsum!=ins_at->nr)
+           gmx_fatal(FARGS,"Piecewise sum of inserted atoms not same as size of group selected to insert.");
+ 
+     snew(pos_ins->geom_cent,pos_ins->pieces);
+     for (i=0;i<pos_ins->pieces;i++)
+     {
+       c=0;
+       outsidesum=0;
+       for(j=0;j<DIM;j++)
+               pos_ins->geom_cent[i][j]=0;
+ 
+       for(j=0;j<DIM;j++)
+               pos_ins->geom_cent[i][j]=0;
+       for (j=0;j<pos_ins->nidx[i];j++)
+       {
+               at=pos_ins->subindex[i][j];
+               copy_rvec(r[at],r_ins[gctr]);
+               if( (r_ins[gctr][ZZ]<mem_p->zmax) && (r_ins[gctr][ZZ]>mem_p->zmin) )
+               {
+                       rvec_inc(pos_ins->geom_cent[i],r_ins[gctr]);
+                       c++;
+               }
+               else
+                       outsidesum++;
+               gctr++;
+       }
+       if (c>0)
+               svmul(1/(double)c,pos_ins->geom_cent[i],pos_ins->geom_cent[i]);
+       if (!bALLOW_ASYMMETRY)
+               pos_ins->geom_cent[i][ZZ]=mem_p->zmed;
+ 
+       fprintf(stderr,"Embedding piece %d with center of geometry: %f %f %f\n",i,pos_ins->geom_cent[i][XX],pos_ins->geom_cent[i][YY],pos_ins->geom_cent[i][ZZ]);
+     }
+     fprintf(stderr,"\n");
+ }
+ 
+ void resize(t_block *ins_at, rvec *r_ins, rvec *r, pos_ins_t *pos_ins,rvec fac)
+ {
+       int i,j,k,at,c=0;
+       for (k=0;k<pos_ins->pieces;k++)
+               for(i=0;i<pos_ins->nidx[k];i++)
+               {
+                       at=pos_ins->subindex[k][i];
+                       for(j=0;j<DIM;j++)
+                               r[at][j]=pos_ins->geom_cent[k][j]+fac[j]*(r_ins[c][j]-pos_ins->geom_cent[k][j]);
+                       c++;
+               }
+ }
+ 
+ int gen_rm_list(rmm_t *rm_p,t_block *ins_at,t_block *rest_at,t_pbc *pbc, gmx_mtop_t *mtop,
+               rvec *r, rvec *r_ins, mem_t *mem_p, pos_ins_t *pos_ins, real probe_rad, int low_up_rm, gmx_bool bALLOW_ASYMMETRY)
+ {
+       int i,j,k,l,at,at2,mol_id;
+         int type=0,block=0;
+       int nrm,nupper,nlower;
+       real r_min_rad,z_lip,min_norm;
+       gmx_bool bRM;
+       rvec dr,dr_tmp;
+       real *dist;
+       int *order;
+ 
+       r_min_rad=probe_rad*probe_rad;
+       snew(rm_p->mol,mtop->mols.nr);
+       snew(rm_p->block,mtop->mols.nr);
+       nrm=nupper=0;
+       nlower=low_up_rm;
+       for(i=0;i<ins_at->nr;i++)
+       {
+               at=ins_at->index[i];
+               for(j=0;j<rest_at->nr;j++)
+               {
+                       at2=rest_at->index[j];
+                       pbc_dx(pbc,r[at],r[at2],dr);
+ 
+                       if(norm2(dr)<r_min_rad)
+                       {
+                               mol_id = get_mol_id(at2,mtop->nmolblock,mtop->molblock,&type,&block);
+                               bRM=TRUE;
+                               for(l=0;l<nrm;l++)
+                                       if(rm_p->mol[l]==mol_id)
+                                               bRM=FALSE;
+                               if(bRM)
+                               {
+                                       /*fprintf(stderr,"%d wordt toegevoegd\n",mol_id);*/
+                                       rm_p->mol[nrm]=mol_id;
+                                       rm_p->block[nrm]=block;
+                                       nrm++;
+                                       z_lip=0.0;
+                                       for(l=0;l<mem_p->nmol;l++)
+                                       {
+                                               if(mol_id==mem_p->mol_id[l])
+                                               {
+                                                       for(k=mtop->mols.index[mol_id];k<mtop->mols.index[mol_id+1];k++)
+                                                               z_lip+=r[k][ZZ];
+                                                       z_lip/=mtop->molblock[block].natoms_mol;
+                                                       if(z_lip<mem_p->zmed)
+                                                               nlower++;
+                                                       else
+                                                               nupper++;
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+ 
+       /*make sure equal number of lipids from upper and lower layer are removed */
+       if( (nupper!=nlower) && (!bALLOW_ASYMMETRY) )
+       {
+               snew(dist,mem_p->nmol);
+               snew(order,mem_p->nmol);
+               for(i=0;i<mem_p->nmol;i++)
+               {
+                       at = mtop->mols.index[mem_p->mol_id[i]];
+                       pbc_dx(pbc,r[at],pos_ins->geom_cent[0],dr);
+                       if (pos_ins->pieces>1)
+                       {
+                               /*minimum dr value*/
+                               min_norm=norm2(dr);
+                               for (k=1;k<pos_ins->pieces;k++)
+                               {
+                                       pbc_dx(pbc,r[at],pos_ins->geom_cent[k],dr_tmp);
+                                       if (norm2(dr_tmp) < min_norm)
+                                       {
+                                               min_norm=norm2(dr_tmp);
+                                               copy_rvec(dr_tmp,dr);
+                                       }
+                               }
+                       }
+                       dist[i]=dr[XX]*dr[XX]+dr[YY]*dr[YY];
+                       j=i-1;
+                       while (j>=0 && dist[i]<dist[order[j]])
+                       {
+                               order[j+1]=order[j];
+                               j--;
+                       }
+                       order[j+1]=i;
+               }
+ 
+               i=0;
+               while(nupper!=nlower)
+               {
+                       mol_id=mem_p->mol_id[order[i]];
+                       block=get_block(mol_id,mtop->nmolblock,mtop->molblock);
+ 
+                       bRM=TRUE;
+                       for(l=0;l<nrm;l++)
+                               if(rm_p->mol[l]==mol_id)
+                                       bRM=FALSE;
+                       if(bRM)
+                       {
+                               z_lip=0;
+                               for(k=mtop->mols.index[mol_id];k<mtop->mols.index[mol_id+1];k++)
+                                       z_lip+=r[k][ZZ];
+                               z_lip/=mtop->molblock[block].natoms_mol;
+                               if(nupper>nlower && z_lip<mem_p->zmed)
+                               {
+                                       rm_p->mol[nrm]=mol_id;
+                                       rm_p->block[nrm]=block;
+                                       nrm++;
+                                       nlower++;
+                               }
+                               else if (nupper<nlower && z_lip>mem_p->zmed)
+                               {
+                                       rm_p->mol[nrm]=mol_id;
+                                       rm_p->block[nrm]=block;
+                                       nrm++;
+                                       nupper++;
+                               }
+                       }
+                       i++;
+ 
+                       if(i>mem_p->nmol)
+                               gmx_fatal(FARGS,"Trying to remove more lipid molecules than there are in the membrane");
+               }
+               sfree(dist);
+               sfree(order);
+       }
+ 
+       rm_p->nr=nrm;
+       srenew(rm_p->mol,nrm);
+       srenew(rm_p->block,nrm);
+ 
+       return nupper+nlower;
+ }
+ 
+ void rm_group(t_inputrec *ir, gmx_groups_t *groups, gmx_mtop_t *mtop, rmm_t *rm_p, t_state *state, t_block *ins_at, pos_ins_t *pos_ins)
+ {
+       int i,j,k,n,rm,mol_id,at,block;
+       rvec *x_tmp,*v_tmp;
+       atom_id *list,*new_mols;
+       unsigned char  *new_egrp[egcNR];
+       gmx_bool bRM;
+ 
+       snew(list,state->natoms);
+       n=0;
+       for(i=0;i<rm_p->nr;i++)
+       {
+               mol_id=rm_p->mol[i];
+               at=mtop->mols.index[mol_id];
+               block =rm_p->block[i];
+               mtop->molblock[block].nmol--;
+               for(j=0;j<mtop->molblock[block].natoms_mol;j++)
+               {
+                       list[n]=at+j;
+                       n++;
+               }
+ 
+               mtop->mols.index[mol_id]=-1;
+       }
+ 
+       mtop->mols.nr-=rm_p->nr;
+       mtop->mols.nalloc_index-=rm_p->nr;
+       snew(new_mols,mtop->mols.nr);
+       for(i=0;i<mtop->mols.nr+rm_p->nr;i++)
+       {
+               j=0;
+               if(mtop->mols.index[i]!=-1)
+               {
+                       new_mols[j]=mtop->mols.index[i];
+                       j++;
+               }
+       }
+       sfree(mtop->mols.index);
+       mtop->mols.index=new_mols;
+ 
+ 
+       mtop->natoms-=n;
+       state->natoms-=n;
+       state->nalloc=state->natoms;
+       snew(x_tmp,state->nalloc);
+       snew(v_tmp,state->nalloc);
+ 
+       for(i=0;i<egcNR;i++)
+       {
+               if(groups->grpnr[i]!=NULL)
+               {
+                       groups->ngrpnr[i]=state->natoms;
+                       snew(new_egrp[i],state->natoms);
+               }
+       }
+ 
+       rm=0;
+       for (i=0;i<state->natoms+n;i++)
+       {
+               bRM=FALSE;
+               for(j=0;j<n;j++)
+               {
+                       if(i==list[j])
+                       {
+                               bRM=TRUE;
+                               rm++;
+                       }
+               }
+ 
+               if(!bRM)
+               {
+                       for(j=0;j<egcNR;j++)
+                       {
+                               if(groups->grpnr[j]!=NULL)
+                               {
+                                       new_egrp[j][i-rm]=groups->grpnr[j][i];
+                               }
+                       }
+                       copy_rvec(state->x[i],x_tmp[i-rm]);
+                       copy_rvec(state->v[i],v_tmp[i-rm]);
+                       for(j=0;j<ins_at->nr;j++)
+                       {
+                               if (i==ins_at->index[j])
+                                       ins_at->index[j]=i-rm;
+                       }
+                       for(j=0;j<pos_ins->pieces;j++)
+                       {
+                               for(k=0;k<pos_ins->nidx[j];k++)
+                               {
+                                       if (i==pos_ins->subindex[j][k])
+                                               pos_ins->subindex[j][k]=i-rm;
+                               }
+                       }
+               }
+       }
+       sfree(state->x);
+       state->x=x_tmp;
+       sfree(state->v);
+       state->v=v_tmp;
+ 
+       for(i=0;i<egcNR;i++)
+       {
+               if(groups->grpnr[i]!=NULL)
+               {
+                       sfree(groups->grpnr[i]);
+                       groups->grpnr[i]=new_egrp[i];
+               }
+       }
+ }
+ 
+ int rm_bonded(t_block *ins_at, gmx_mtop_t *mtop)
+ {
+       int i,j,m;
+       int type,natom,nmol,at,atom1=0,rm_at=0;
+       gmx_bool *bRM,bINS;
+       /*this routine lives dangerously by assuming that all molecules of a given type are in order in the structure*/
+       /*this routine does not live as dangerously as it seems. There is namely a check in mdrunner_membed to make
+          *sure that g_membed exits with a warning when there are molecules of the same type not in the 
+        *ins_at index group. MGWolf 050710 */
+ 
+ 
+       snew(bRM,mtop->nmoltype);
+       for (i=0;i<mtop->nmoltype;i++)
+       {
+               bRM[i]=TRUE;
+       }
+ 
+       for (i=0;i<mtop->nmolblock;i++) 
+       {
+           /*loop over molecule blocks*/
+               type        =mtop->molblock[i].type;
+               natom       =mtop->molblock[i].natoms_mol;
+               nmol            =mtop->molblock[i].nmol;
+ 
+               for(j=0;j<natom*nmol && bRM[type]==TRUE;j++) 
+               {
+                   /*loop over atoms in the block*/
+                       at=j+atom1; /*atom index = block index + offset*/
+                       bINS=FALSE;
+ 
+                       for (m=0;(m<ins_at->nr) && (bINS==FALSE);m++)
+                       {
+                           /*loop over atoms in insertion index group to determine if we're inserting one*/
+                               if(at==ins_at->index[m])
+                               {
+                                       bINS=TRUE;
+                               }
+                       }
+                       bRM[type]=bINS;
+               }
+               atom1+=natom*nmol; /*update offset*/
+               if(bRM[type])
+               {
+                       rm_at+=natom*nmol; /*increment bonded removal counter by # atoms in block*/
+               }
+       }
+ 
+       for(i=0;i<mtop->nmoltype;i++)
+       {
+               if(bRM[i])
+               {
+                       for(j=0;j<F_LJ;j++)
+                       {
+                               mtop->moltype[i].ilist[j].nr=0;
+                       }
+                       for(j=F_POSRES;j<=F_VSITEN;j++)
+                       {
+                               mtop->moltype[i].ilist[j].nr=0;
+                       }
+               }
+       }
+       sfree(bRM);
+ 
+       return rm_at;
+ }
+ 
+ void top_update(const char *topfile, char *ins, rmm_t *rm_p, gmx_mtop_t *mtop)
+ {
+ #define TEMP_FILENM "temp.top"
+       int     bMolecules=0;
+       FILE    *fpin,*fpout;
+       char    buf[STRLEN],buf2[STRLEN],*temp;
+       int             i,*nmol_rm,nmol,line;
+ 
+       fpin  = ffopen(topfile,"r");
+       fpout = ffopen(TEMP_FILENM,"w");
+ 
+       snew(nmol_rm,mtop->nmoltype);
+       for(i=0;i<rm_p->nr;i++)
+               nmol_rm[rm_p->block[i]]++;
+ 
+       line=0;
+       while(fgets(buf,STRLEN,fpin))
+       {
+               line++;
+               if(buf[0]!=';')
+               {
+                       strcpy(buf2,buf);
+                       if ((temp=strchr(buf2,'\n')) != NULL)
+                               temp[0]='\0';
+                       ltrim(buf2);
+ 
+                       if (buf2[0]=='[')
+                       {
+                               buf2[0]=' ';
+                               if ((temp=strchr(buf2,'\n')) != NULL)
+                                       temp[0]='\0';
+                               rtrim(buf2);
+                               if (buf2[strlen(buf2)-1]==']')
+                               {
+                                       buf2[strlen(buf2)-1]='\0';
+                                       ltrim(buf2);
+                                       rtrim(buf2);
+                                       if (gmx_strcasecmp(buf2,"molecules")==0)
+                                               bMolecules=1;
+                               }
+                               fprintf(fpout,"%s",buf);
+                       } else if (bMolecules==1)
+                       {
+                               for(i=0;i<mtop->nmolblock;i++)
+                               {
+                                       nmol=mtop->molblock[i].nmol;
+                                       sprintf(buf,"%-15s %5d\n",*(mtop->moltype[mtop->molblock[i].type].name),nmol);
+                                       fprintf(fpout,"%s",buf);
+                               }
+                               bMolecules=2;
+                       } else if (bMolecules==2)
+                       {
+                               /* print nothing */
+                       } else 
+                       {
+                               fprintf(fpout,"%s",buf);
+                       }
+               } else 
+               {
+                       fprintf(fpout,"%s",buf);
+               }
+       }
+ 
+       fclose(fpout);
+       /* use ffopen to generate backup of topinout */
+       fpout=ffopen(topfile,"w");
+       fclose(fpout);
+       rename(TEMP_FILENM,topfile);
+ #undef TEMP_FILENM
+ }
+ 
+ double do_md_membed(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
+              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+              int nstglobalcomm,
+              gmx_vsite_t *vsite,gmx_constr_t constr,
+              int stepout,t_inputrec *ir,
+              gmx_mtop_t *top_global,
+              t_fcdata *fcd,
+              t_state *state_global,
+              t_mdatoms *mdatoms,
+              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+              gmx_edsam_t ed,t_forcerec *fr,
+              int repl_ex_nst,int repl_ex_seed,
+              real cpt_period,real max_hours,
+              const char *deviceOptions,
+              unsigned long Flags,
+              gmx_runtime_t *runtime,
+              rvec fac, rvec *r_ins, pos_ins_t *pos_ins, t_block *ins_at,
+              real xy_step, real z_step, int it_xy, int it_z)
+ {
+     gmx_mdoutf_t *outf;
+     gmx_large_int_t step,step_rel;
+     double     run_time;
+     double     t,t0,lam0;
+     gmx_bool       bGStatEveryStep,bGStat,bNstEner,bCalcEnerPres;
+     gmx_bool       bNS,bNStList,bSimAnn,bStopCM,bRerunMD,bNotLastFrame=FALSE,
+                bFirstStep,bStateFromTPX,bInitStep,bLastStep,
+                bBornRadii,bStartingFromCpt;
+     gmx_bool       bDoDHDL=FALSE;
+     gmx_bool       do_ene,do_log,do_verbose,bRerunWarnNoV=TRUE,
+                bForceUpdate=FALSE,bCPT;
+     int        mdof_flags;
+     gmx_bool       bMasterState;
+     int        force_flags,cglo_flags;
+     tensor     force_vir,shake_vir,total_vir,tmp_vir,pres;
+     int        i,m;
+     t_trxstatus *status;
+     rvec       mu_tot;
+     t_vcm      *vcm;
+     t_state    *bufstate=NULL;
+     matrix     *scale_tot,pcoupl_mu,M,ebox;
+     gmx_nlheur_t nlh;
+     t_trxframe rerun_fr;
+ /*    gmx_repl_ex_t repl_ex=NULL;*/
+     int        nchkpt=1;
+ 
+     gmx_localtop_t *top;
+     t_mdebin *mdebin=NULL;
+     t_state    *state=NULL;
+     rvec       *f_global=NULL;
+     int        n_xtc=-1;
+     rvec       *x_xtc=NULL;
+     gmx_enerdata_t *enerd;
+     rvec       *f=NULL;
+     gmx_global_stat_t gstat;
+     gmx_update_t upd=NULL;
+     t_graph    *graph=NULL;
+     globsig_t   gs;
+ 
+     gmx_bool        bFFscan;
+     gmx_groups_t *groups;
+     gmx_ekindata_t *ekind, *ekind_save;
+     gmx_shellfc_t shellfc;
+     int         count,nconverged=0;
+     real        timestep=0;
+     double      tcount=0;
+     gmx_bool        bIonize=FALSE;
+     gmx_bool        bTCR=FALSE,bConverged=TRUE,bOK,bSumEkinhOld,bExchanged;
+     gmx_bool        bAppend;
+     gmx_bool        bResetCountersHalfMaxH=FALSE;
+     gmx_bool        bVV,bIterations,bIterate,bFirstIterate,bTemp,bPres,bTrotter;
+     real        temp0,dvdl;
+     int         a0,a1,ii;
+     rvec        *xcopy=NULL,*vcopy=NULL,*cbuf=NULL;
+     matrix      boxcopy={{0}},lastbox;
+       real        veta_save,pcurr,scalevir,tracevir;
+       real        vetanew = 0;
+     double      cycles;
+       real        last_conserved = 0;
+     real        last_ekin = 0;
+       t_extmass   MassQ;
+     int         **trotter_seq;
+     char        sbuf[STEPSTRSIZE],sbuf2[STEPSTRSIZE];
+     int         handled_stop_condition=gmx_stop_cond_none; /* compare to get_stop_condition*/
+     gmx_iterate_t iterate;
+ #ifdef GMX_FAHCORE
+     /* Temporary addition for FAHCORE checkpointing */
+     int chkpt_ret;
+ #endif
+ 
+     /* Check for special mdrun options */
+     bRerunMD = (Flags & MD_RERUN);
+     bIonize  = (Flags & MD_IONIZE);
+     bFFscan  = (Flags & MD_FFSCAN);
+     bAppend  = (Flags & MD_APPENDFILES);
+     bGStatEveryStep = FALSE;
+     if (Flags & MD_RESETCOUNTERSHALFWAY)
+     {
+         if (ir->nsteps > 0)
+         {
+             /* Signal to reset the counters half the simulation steps. */
+             wcycle_set_reset_counters(wcycle,ir->nsteps/2);
+         }
+         /* Signal to reset the counters halfway the simulation time. */
+         bResetCountersHalfMaxH = (max_hours > 0);
+     }
+ 
+     /* md-vv uses averaged full step velocities for T-control
+        md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
+        md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
+     bVV = EI_VV(ir->eI);
+     if (bVV) /* to store the initial velocities while computing virial */
+     {
+         snew(cbuf,top_global->natoms);
+     }
+     /* all the iteratative cases - only if there are constraints */
+     bIterations = ((IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD));
+     bTrotter = (bVV && (IR_NPT_TROTTER(ir) || (IR_NVT_TROTTER(ir))));
+ 
+     if (bRerunMD)
+     {
+         /* Since we don't know if the frames read are related in any way,
+          * rebuild the neighborlist at every step.
+          */
+         ir->nstlist       = 1;
+         ir->nstcalcenergy = 1;
+         nstglobalcomm     = 1;
+     }
+ 
+     check_ir_old_tpx_versions(cr,fplog,ir,top_global);
+ 
+     nstglobalcomm = check_nstglobalcomm(fplog,cr,nstglobalcomm,ir);
+     /*bGStatEveryStep = (nstglobalcomm == 1);*/
+     bGStatEveryStep = FALSE;
+ 
+     if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL)
+     {
+         fprintf(fplog,
+                 "To reduce the energy communication with nstlist = -1\n"
+                 "the neighbor list validity should not be checked at every step,\n"
+                 "this means that exact integration is not guaranteed.\n"
+                 "The neighbor list validity is checked after:\n"
+                 "  <n.list life time> - 2*std.dev.(n.list life time)  steps.\n"
+                 "In most cases this will result in exact integration.\n"
+                 "This reduces the energy communication by a factor of 2 to 3.\n"
+                 "If you want less energy communication, set nstlist > 3.\n\n");
+     }
+ 
+     if (bRerunMD || bFFscan)
+     {
+         ir->nstxtcout = 0;
+     }
+     groups = &top_global->groups;
+ 
+     /* Initial values */
+     init_md(fplog,cr,ir,oenv,&t,&t0,&state_global->lambda,&lam0,
+             nrnb,top_global,&upd,
+             nfile,fnm,&outf,&mdebin,
+             force_vir,shake_vir,mu_tot,&bSimAnn,&vcm,state_global,Flags);
+ 
+     clear_mat(total_vir);
+     clear_mat(pres);
+     /* Energy terms and groups */
+     snew(enerd,1);
+     init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,enerd);
+     if (DOMAINDECOMP(cr))
+     {
+         f = NULL;
+     }
+     else
+     {
+         snew(f,top_global->natoms);
+     }
+ 
+     /* Kinetic energy data */
+     snew(ekind,1);
+     init_ekindata(fplog,top_global,&(ir->opts),ekind);
+     /* needed for iteration of constraints */
+     snew(ekind_save,1);
+     init_ekindata(fplog,top_global,&(ir->opts),ekind_save);
+     /* Copy the cos acceleration to the groups struct */
+     ekind->cosacc.cos_accel = ir->cos_accel;
+ 
+     gstat = global_stat_init(ir);
+     debug_gmx();
+ 
+     /* Check for polarizable models and flexible constraints */
+     shellfc = init_shell_flexcon(fplog,
+                                  top_global,n_flexible_constraints(constr),
+                                  (ir->bContinuation ||
+                                   (DOMAINDECOMP(cr) && !MASTER(cr))) ?
+                                  NULL : state_global->x);
+ 
+ /*    if (DEFORM(*ir))
+     {
+ #ifdef GMX_THREADS
+         tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+ #endif
+         set_deform_reference_box(upd,
+                                  deform_init_init_step_tpx,
+                                  deform_init_box_tpx);
+ #ifdef GMX_THREADS
+         tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+ #endif
+     }*/
+ 
+ /*    {
+         double io = compute_io(ir,top_global->natoms,groups,mdebin->ebin->nener,1);
+         if ((io > 2000) && MASTER(cr))
+             fprintf(stderr,
+                     "\nWARNING: This run will generate roughly %.0f Mb of data\n\n",
+                     io);
+     }*/
+ 
+     if (DOMAINDECOMP(cr)) {
+         top = dd_init_local_top(top_global);
+ 
+         snew(state,1);
+         dd_init_local_state(cr->dd,state_global,state);
+ 
+         if (DDMASTER(cr->dd) && ir->nstfout) {
+             snew(f_global,state_global->natoms);
+         }
+     } else {
+         if (PAR(cr)) {
+             /* Initialize the particle decomposition and split the topology */
+             top = split_system(fplog,top_global,ir,cr);
+ 
+             pd_cg_range(cr,&fr->cg0,&fr->hcg);
+             pd_at_range(cr,&a0,&a1);
+         } else {
+             top = gmx_mtop_generate_local_top(top_global,ir);
+ 
+             a0 = 0;
+             a1 = top_global->natoms;
+         }
+ 
+         state = partdec_init_local_state(cr,state_global);
+         f_global = f;
+ 
+         atoms2md(top_global,ir,0,NULL,a0,a1-a0,mdatoms);
+ 
+         if (vsite) {
+             set_vsite_top(vsite,top,mdatoms,cr);
+         }
+ 
+         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols) {
+             graph = mk_graph(fplog,&(top->idef),0,top_global->natoms,FALSE,FALSE);
+         }
+ 
+         if (shellfc) {
+             make_local_shells(cr,mdatoms,shellfc);
+         }
+ 
+         if (ir->pull && PAR(cr)) {
+             dd_make_local_pull_groups(NULL,ir->pull,mdatoms);
+         }
+     }
+ 
+     if (DOMAINDECOMP(cr))
+     {
+         /* Distribute the charge groups over the nodes from the master node */
+         dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
+                             state_global,top_global,ir,
+                             state,&f,mdatoms,top,fr,
+                             vsite,shellfc,constr,
+                             nrnb,wcycle,FALSE);
+     }
+ 
+     update_mdatoms(mdatoms,state->lambda);
+ 
+     if (MASTER(cr))
+     {
+         if (opt2bSet("-cpi",nfile,fnm))
+         {
+             /* Update mdebin with energy history if appending to output files */
+             if ( Flags & MD_APPENDFILES )
+             {
+                 restore_energyhistory_from_state(mdebin,&state_global->enerhist);
+             }
+             else
+             {
+                 /* We might have read an energy history from checkpoint,
+                  * free the allocated memory and reset the counts.
+                  */
+                 done_energyhistory(&state_global->enerhist);
+                 init_energyhistory(&state_global->enerhist);
+             }
+         }
+         /* Set the initial energy history in state by updating once */
+         update_energyhistory(&state_global->enerhist,mdebin);
+     }
+ 
+     if ((state->flags & (1<<estLD_RNG)) && (Flags & MD_READ_RNG)) {
+         /* Set the random state if we read a checkpoint file */
+         set_stochd_state(upd,state);
+     }
+ 
+     /* Initialize constraints */
+     if (constr) {
+         if (!DOMAINDECOMP(cr))
+             set_constraints(constr,top,ir,mdatoms,cr);
+     }
+ 
+     /* Check whether we have to GCT stuff */
+  /*   bTCR = ftp2bSet(efGCT,nfile,fnm);
+     if (bTCR) {
+         if (MASTER(cr)) {
+             fprintf(stderr,"Will do General Coupling Theory!\n");
+         }
+         gnx = top_global->mols.nr;
+         snew(grpindex,gnx);
+         for(i=0; (i<gnx); i++) {
+             grpindex[i] = i;
+         }
+     }*/
+ 
+ /*    if (repl_ex_nst > 0 && MASTER(cr))
+         repl_ex = init_replica_exchange(fplog,cr->ms,state_global,ir,
+                                         repl_ex_nst,repl_ex_seed);*/
+ 
+     if (!ir->bContinuation && !bRerunMD)
+     {
+         if (mdatoms->cFREEZE && (state->flags & (1<<estV)))
+         {
+             /* Set the velocities of frozen particles to zero */
+             for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++)
+             {
+                 for(m=0; m<DIM; m++)
+                 {
+                     if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
+                     {
+                         state->v[i][m] = 0;
+                     }
+                 }
+             }
+         }
+ 
+         if (constr)
+         {
+             /* Constrain the initial coordinates and velocities */
+             do_constrain_first(fplog,constr,ir,mdatoms,state,f,
+                                graph,cr,nrnb,fr,top,shake_vir);
+         }
+         if (vsite)
+         {
+             /* Construct the virtual sites for the initial configuration */
+             construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,NULL,
+                              top->idef.iparams,top->idef.il,
+                              fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+         }
+     }
+ 
+     debug_gmx();
+ 
+     /* I'm assuming we need global communication the first time! MRS */
+     cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT
+                   | (bVV ? CGLO_PRESSURE:0)
+                   | (bVV ? CGLO_CONSTRAINT:0)
+                   | (bRerunMD ? CGLO_RERUNMD:0)
+                   | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN:0));
+ 
+     bSumEkinhOld = FALSE;
+     compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+                     wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+                     constr,NULL,FALSE,state->box,
+                     top_global,&pcurr,top_global->natoms,&bSumEkinhOld,cglo_flags);
+     if (ir->eI == eiVVAK) {
+         /* a second call to get the half step temperature initialized as well */
+         /* we do the same call as above, but turn the pressure off -- internally, this
+            is recognized as a velocity verlet half-step kinetic energy calculation.
+            This minimized excess variables, but perhaps loses some logic?*/
+ 
+         compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+                         wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+                         constr,NULL,FALSE,state->box,
+                         top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+                         cglo_flags &~ CGLO_PRESSURE);
+     }
+ 
+     /* Calculate the initial half step temperature, and save the ekinh_old */
+     if (!(Flags & MD_STARTFROMCPT))
+     {
+         for(i=0; (i<ir->opts.ngtc); i++)
+         {
+             copy_mat(ekind->tcstat[i].ekinh,ekind->tcstat[i].ekinh_old);
+         }
+     }
+     if (ir->eI != eiVV) 
+     {
+         enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step,
+                                      and there is no previous step */
+     }
+     temp0 = enerd->term[F_TEMP];
+ 
+     /* if using an iterative algorithm, we need to create a working directory for the state. */
+     if (bIterations)
+     {
+             bufstate = init_bufstate(state);
+     }
+     if (bFFscan)
+     {
+         snew(xcopy,state->natoms);
+         snew(vcopy,state->natoms);
+         copy_rvecn(state->x,xcopy,0,state->natoms);
+         copy_rvecn(state->v,vcopy,0,state->natoms);
+         copy_mat(state->box,boxcopy);
+     }
+ 
+     /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter
+        temperature control */
+     trotter_seq = init_npt_vars(ir,state,&MassQ,bTrotter);
+ 
+     if (MASTER(cr))
+     {
+         if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS)
+         {
+             fprintf(fplog,
+                     "RMS relative constraint deviation after constraining: %.2e\n",
+                     constr_rmsd(constr,FALSE));
+         }
+         fprintf(fplog,"Initial temperature: %g K\n",enerd->term[F_TEMP]);
+         if (bRerunMD)
+         {
+             fprintf(stderr,"starting md rerun '%s', reading coordinates from"
+                     " input trajectory '%s'\n\n",
+                     *(top_global->name),opt2fn("-rerun",nfile,fnm));
+             if (bVerbose)
+             {
+                 fprintf(stderr,"Calculated time to finish depends on nsteps from "
+                         "run input file,\nwhich may not correspond to the time "
+                         "needed to process input trajectory.\n\n");
+             }
+         }
+         else
+         {
+             char tbuf[20];
+             fprintf(stderr,"starting mdrun '%s'\n",
+                     *(top_global->name));
+             if (ir->nsteps >= 0)
+             {
+                 sprintf(tbuf,"%8.1f",(ir->init_step+ir->nsteps)*ir->delta_t);
+             }
+             else
+             {
+                 sprintf(tbuf,"%s","infinite");
+             }
+             if (ir->init_step > 0)
+             {
+                 fprintf(stderr,"%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
+                         gmx_step_str(ir->init_step+ir->nsteps,sbuf),tbuf,
+                         gmx_step_str(ir->init_step,sbuf2),
+                         ir->init_step*ir->delta_t);
+             }
+             else
+             {
+                 fprintf(stderr,"%s steps, %s ps.\n",
+                         gmx_step_str(ir->nsteps,sbuf),tbuf);
+             }
+         }
+         fprintf(fplog,"\n");
+     }
+ 
+     /* Set and write start time */
+     runtime_start(runtime);
+     print_date_and_time(fplog,cr->nodeid,"Started mdrun",runtime);
+     wallcycle_start(wcycle,ewcRUN);
+     if (fplog)
+         fprintf(fplog,"\n");
+ 
+     /* safest point to do file checkpointing is here.  More general point would be immediately before integrator call */
+ /*#ifdef GMX_FAHCORE
+     chkpt_ret=fcCheckPointParallel( cr->nodeid,
+                                     NULL,0);
+     if ( chkpt_ret == 0 )
+         gmx_fatal( 3,__FILE__,__LINE__, "Checkpoint error on step %d\n", 0 );
+ #endif*/
+ 
+     debug_gmx();
+     /***********************************************************
+      *
+      *             Loop over MD steps
+      *
+      ************************************************************/
+ 
+     /* if rerunMD then read coordinates and velocities from input trajectory */
+     if (bRerunMD)
+     {
+         if (getenv("GMX_FORCE_UPDATE"))
+         {
+             bForceUpdate = TRUE;
+         }
+ 
+         bNotLastFrame = read_first_frame(oenv,&status,
+                                          opt2fn("-rerun",nfile,fnm),
+                                          &rerun_fr,TRX_NEED_X | TRX_READ_V);
+         if (rerun_fr.natoms != top_global->natoms)
+         {
+             gmx_fatal(FARGS,
+                       "Number of atoms in trajectory (%d) does not match the "
+                       "run input file (%d)\n",
+                       rerun_fr.natoms,top_global->natoms);
+         }
+         if (ir->ePBC != epbcNONE)
+         {
+             if (!rerun_fr.bBox)
+             {
+                 gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f does not contain a box, while pbc is used",rerun_fr.step,rerun_fr.time);
+             }
+             if (max_cutoff2(ir->ePBC,rerun_fr.box) < sqr(fr->rlistlong))
+             {
+                 gmx_fatal(FARGS,"Rerun trajectory frame step %d time %f has too small box dimensions",rerun_fr.step,rerun_fr.time);
+             }
+ 
+             /* Set the shift vectors.
+              * Necessary here when have a static box different from the tpr box.
+              */
+             calc_shifts(rerun_fr.box,fr->shift_vec);
+         }
+     }
+ 
+     /* loop over MD steps or if rerunMD to end of input trajectory */
+     bFirstStep = TRUE;
+     /* Skip the first Nose-Hoover integration when we get the state from tpx */
+     bStateFromTPX = !opt2bSet("-cpi",nfile,fnm);
+     bInitStep = bFirstStep && (bStateFromTPX || bVV);
+     bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep;
+     bLastStep    = FALSE;
+     bSumEkinhOld = FALSE;
+     bExchanged   = FALSE;
+ 
+     init_global_signals(&gs,cr,ir,repl_ex_nst);
+ 
+     step = ir->init_step;
+     step_rel = 0;
+ 
+     if (ir->nstlist == -1)
+     {
+         init_nlistheuristics(&nlh,bGStatEveryStep,step);
+     }
+ 
+     bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps));
+     while (!bLastStep || (bRerunMD && bNotLastFrame)) {
+ 
+         wallcycle_start(wcycle,ewcSTEP);
+ 
+         GMX_MPE_LOG(ev_timestep1);
+ 
+         if (bRerunMD) {
+             if (rerun_fr.bStep) {
+                 step = rerun_fr.step;
+                 step_rel = step - ir->init_step;
+             }
+             if (rerun_fr.bTime) {
+                 t = rerun_fr.time;
+             }
+             else
+             {
+                 t = step;
+             }
+         }
+         else
+         {
+             bLastStep = (step_rel == ir->nsteps);
+             t = t0 + step*ir->delta_t;
+         }
+ 
+         if (ir->efep != efepNO)
+         {
+             if (bRerunMD && rerun_fr.bLambda && (ir->delta_lambda!=0))
+             {
+                 state_global->lambda = rerun_fr.lambda;
+             }
+             else
+             {
+                 state_global->lambda = lam0 + step*ir->delta_lambda;
+             }
+             state->lambda = state_global->lambda;
+             bDoDHDL = do_per_step(step,ir->nstdhdl);
+         }
+ 
+         if (bSimAnn)
+         {
+             update_annealing_target_temp(&(ir->opts),t);
+         }
+ 
+         if (bRerunMD)
+         {
+             if (!(DOMAINDECOMP(cr) && !MASTER(cr)))
+             {
+                 for(i=0; i<state_global->natoms; i++)
+                 {
+                     copy_rvec(rerun_fr.x[i],state_global->x[i]);
+                 }
+                 if (rerun_fr.bV)
+                 {
+                     for(i=0; i<state_global->natoms; i++)
+                     {
+                         copy_rvec(rerun_fr.v[i],state_global->v[i]);
+                     }
+                 }
+                 else
+                 {
+                     for(i=0; i<state_global->natoms; i++)
+                     {
+                         clear_rvec(state_global->v[i]);
+                     }
+                     if (bRerunWarnNoV)
+                     {
+                         fprintf(stderr,"\nWARNING: Some frames do not contain velocities.\n"
+                                 "         Ekin, temperature and pressure are incorrect,\n"
+                                 "         the virial will be incorrect when constraints are present.\n"
+                                 "\n");
+                         bRerunWarnNoV = FALSE;
+                     }
+                 }
+             }
+             copy_mat(rerun_fr.box,state_global->box);
+             copy_mat(state_global->box,state->box);
+ 
+             if (vsite && (Flags & MD_RERUN_VSITE))
+             {
+                 if (DOMAINDECOMP(cr))
+                 {
+                     gmx_fatal(FARGS,"Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition");
+                 }
+                 if (graph)
+                 {
+                     /* Following is necessary because the graph may get out of sync
+                      * with the coordinates if we only have every N'th coordinate set
+                      */
+                     mk_mshift(fplog,graph,fr->ePBC,state->box,state->x);
+                     shift_self(graph,state->box,state->x);
+                 }
+                 construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
+                                  top->idef.iparams,top->idef.il,
+                                  fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+                 if (graph)
+                 {
+                     unshift_self(graph,state->box,state->x);
+                 }
+             }
+         }
+ 
+         /* Stop Center of Mass motion */
+         bStopCM = (ir->comm_mode != ecmNO && do_per_step(step,ir->nstcomm));
+ 
+         /* Copy back starting coordinates in case we're doing a forcefield scan */
+         if (bFFscan)
+         {
+             for(ii=0; (ii<state->natoms); ii++)
+             {
+                 copy_rvec(xcopy[ii],state->x[ii]);
+                 copy_rvec(vcopy[ii],state->v[ii]);
+             }
+             copy_mat(boxcopy,state->box);
+         }
+ 
+         if (bRerunMD)
+         {
+             /* for rerun MD always do Neighbour Searching */
+             bNS = (bFirstStep || ir->nstlist != 0);
+             bNStList = bNS;
+         }
+         else
+         {
+             /* Determine whether or not to do Neighbour Searching and LR */
+             bNStList = (ir->nstlist > 0  && step % ir->nstlist == 0);
+ 
+             bNS = (bFirstStep || bExchanged || bNStList ||
+                    (ir->nstlist == -1 && nlh.nabnsb > 0));
+ 
+             if (bNS && ir->nstlist == -1)
+             {
+                 set_nlistheuristics(&nlh,bFirstStep || bExchanged,step);
+             }
+         }
+ 
+         /* < 0 means stop at next step, > 0 means stop at next NS step */
+         if ( (gs.set[eglsSTOPCOND] < 0 ) ||
+              ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist==0)) )
+         {
+             bLastStep = TRUE;
+         }
+ 
+         /* Determine whether or not to update the Born radii if doing GB */
+         bBornRadii=bFirstStep;
+         if (ir->implicit_solvent && (step % ir->nstgbradii==0))
+         {
+             bBornRadii=TRUE;
+         }
+ 
+         do_log = do_per_step(step,ir->nstlog) || bFirstStep || bLastStep;
+         do_verbose = bVerbose &&
+                   (step % stepout == 0 || bFirstStep || bLastStep);
+ 
+         if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD))
+         {
+             if (bRerunMD)
+             {
+                 bMasterState = TRUE;
+             }
+             else
+             {
+                 bMasterState = FALSE;
+                 /* Correct the new box if it is too skewed */
+                 if (DYNAMIC_BOX(*ir))
+                 {
+                     if (correct_box(fplog,step,state->box,graph))
+                     {
+                         bMasterState = TRUE;
+                     }
+                 }
+                 if (DOMAINDECOMP(cr) && bMasterState)
+                 {
+                     dd_collect_state(cr->dd,state,state_global);
+                 }
+             }
+ 
+             if (DOMAINDECOMP(cr))
+             {
+                 /* Repartition the domain decomposition */
+                 wallcycle_start(wcycle,ewcDOMDEC);
+                 dd_partition_system(fplog,step,cr,
+                                     bMasterState,nstglobalcomm,
+                                     state_global,top_global,ir,
+                                     state,&f,mdatoms,top,fr,
+                                     vsite,shellfc,constr,
+                                     nrnb,wcycle,do_verbose);
+                 wallcycle_stop(wcycle,ewcDOMDEC);
+                 /* If using an iterative integrator, reallocate space to match the decomposition */
+             }
+         }
+ 
+         if (MASTER(cr) && do_log && !bFFscan)
+         {
+             print_ebin_header(fplog,step,t,state->lambda);
+         }
+ 
+         if (ir->efep != efepNO)
+         {
+             update_mdatoms(mdatoms,state->lambda);
+         }
+ 
+         if (bRerunMD && rerun_fr.bV)
+         {
+ 
+             /* We need the kinetic energy at minus the half step for determining
+              * the full step kinetic energy and possibly for T-coupling.*/
+             /* This may not be quite working correctly yet . . . . */
+             compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+                             wcycle,enerd,NULL,NULL,NULL,NULL,mu_tot,
+                             constr,NULL,FALSE,state->box,
+                             top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+                             CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE);
+         }
+         clear_mat(force_vir);
+ 
+         /* Ionize the atoms if necessary */
+ /*        if (bIonize)
+         {
+             ionize(fplog,oenv,mdatoms,top_global,t,ir,state->x,state->v,
+                    mdatoms->start,mdatoms->start+mdatoms->homenr,state->box,cr);
+         }*/
+ 
+         /* Update force field in ffscan program */
+ /*        if (bFFscan)
+         {
+             if (update_forcefield(fplog,
+                                   nfile,fnm,fr,
+                                   mdatoms->nr,state->x,state->box)) {
+                 if (gmx_parallel_env_initialized())
+                 {
+                     gmx_finalize();
+                 }
+                 exit(0);
+             }
+         }*/
+ 
+         GMX_MPE_LOG(ev_timestep2);
+ 
+         /* We write a checkpoint at this MD step when:
+          * either at an NS step when we signalled through gs,
+          * or at the last step (but not when we do not want confout),
+          * but never at the first step or with rerun.
+          */
+ /*        bCPT = (((gs.set[eglsCHKPT] && bNS) ||
+                  (bLastStep && (Flags & MD_CONFOUT))) &&
+                 step > ir->init_step && !bRerunMD);
+         if (bCPT)
+         {
+             gs.set[eglsCHKPT] = 0;
+         }*/
+ 
+         /* Determine the energy and pressure:
+          * at nstcalcenergy steps and at energy output steps (set below).
+          */
+         bNstEner = (bGStatEveryStep || do_per_step(step,ir->nstcalcenergy));
+         bCalcEnerPres = bNstEner;
+ 
+         /* Do we need global communication ? */
+         bGStat = (bCalcEnerPres || bStopCM ||
+                   (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
+ 
+         do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
+ 
+         if (do_ene || do_log)
+         {
+             bCalcEnerPres = TRUE;
+             bGStat    = TRUE;
+         }
+ 
+         /* these CGLO_ options remain the same throughout the iteration */
+         cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) |
+                       (bStopCM ? CGLO_STOPCM : 0) |
+                       (bGStat ? CGLO_GSTAT : 0)
+             );
+ 
+         force_flags = (GMX_FORCE_STATECHANGED |
+                        ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) |
+                        GMX_FORCE_ALLFORCES |
+                        (bNStList ? GMX_FORCE_DOLR : 0) |
+                        GMX_FORCE_SEPLRF |
+                        (bCalcEnerPres ? GMX_FORCE_VIRIAL : 0) |
+                        (bDoDHDL ? GMX_FORCE_DHDL : 0)
+             );
+ 
+         if (shellfc)
+         {
+             /* Now is the time to relax the shells */
+             count=relax_shell_flexcon(fplog,cr,bVerbose,bFFscan ? step+1 : step,
+                                       ir,bNS,force_flags,
+                                       bStopCM,top,top_global,
+                                       constr,enerd,fcd,
+                                       state,f,force_vir,mdatoms,
+                                       nrnb,wcycle,graph,groups,
+                                       shellfc,fr,bBornRadii,t,mu_tot,
+                                       state->natoms,&bConverged,vsite,
+                                       outf->fp_field);
+             tcount+=count;
+ 
+             if (bConverged)
+             {
+                 nconverged++;
+             }
+         }
+         else
+         {
+             /* The coordinates (x) are shifted (to get whole molecules)
+              * in do_force.
+              * This is parallellized as well, and does communication too.
+              * Check comments in sim_util.c
+              */
+ 
+             do_force(fplog,cr,ir,step,nrnb,wcycle,top,top_global,groups,
+                      state->box,state->x,&state->hist,
+                      f,force_vir,mdatoms,enerd,fcd,
+                      state->lambda,graph,
+                      fr,vsite,mu_tot,t,outf->fp_field,ed,bBornRadii,
+                      (bNS ? GMX_FORCE_NS : 0) | force_flags);
+         }
+ 
+         GMX_BARRIER(cr->mpi_comm_mygroup);
+ 
+  /*       if (bTCR)
+         {
+             mu_aver = calc_mu_aver(cr,state->x,mdatoms->chargeA,
+                                    mu_tot,&top_global->mols,mdatoms,gnx,grpindex);
+         }
+ 
+         if (bTCR && bFirstStep)
+         {
+             tcr=init_coupling(fplog,nfile,fnm,cr,fr,mdatoms,&(top->idef));
+             fprintf(fplog,"Done init_coupling\n");
+             fflush(fplog);
+         }*/
+ 
+         /*  ############### START FIRST UPDATE HALF-STEP ############### */
+ 
+         if (bVV && !bStartingFromCpt && !bRerunMD)
+         {
+             if (ir->eI == eiVV)
+             {
+                 if (bInitStep)
+                 {
+                     /* if using velocity verlet with full time step Ekin,
+                      * take the first half step only to compute the
+                      * virial for the first step. From there,
+                      * revert back to the initial coordinates
+                      * so that the input is actually the initial step.
+                      */
+                     copy_rvecn(state->v,cbuf,0,state->natoms); /* should make this better for parallelizing? */
+                 }
+ 
+                 /* this is for NHC in the Ekin(t+dt/2) version of vv */
+                 if (!bInitStep)
+                 {
+                 trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ2);
+                 }
+ 
+               if (ir->eI == eiVVAK)
+               {
+                 update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
+               }
+ 
+                 update_coords(fplog,step,ir,mdatoms,state,
+                               f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                               ekind,M,wcycle,upd,bInitStep,etrtVELOCITY1,
+                               cr,nrnb,constr,&top->idef);
+ 
+                 if (bIterations)
+                 {
+                     gmx_iterate_init(&iterate,bIterations && !bInitStep);
+                 }
+                 /* for iterations, we save these vectors, as we will be self-consistently iterating
+                    the calculations */
+                 /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */
+ 
+                 /* save the state */
+                 if (bIterations && iterate.bIterate) {
+                     copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+                 }
+             }
+ 
+             bFirstIterate = TRUE;
+             while (bFirstIterate || (bIterations && iterate.bIterate))
+             {
+                 if (bIterations && iterate.bIterate)
+                 {
+                     copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
+                     if (bFirstIterate && bTrotter)
+                     {
+                         /* The first time through, we need a decent first estimate
+                            of veta(t+dt) to compute the constraints.  Do
+                            this by computing the box volume part of the
+                            trotter integration at this time. Nothing else
+                            should be changed by this routine here.  If
+                            !(first time), we start with the previous value
+                            of veta.  */
+ 
+                         veta_save = state->veta;
+                         trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ,trotter_seq,ettTSEQ0);
+                         vetanew = state->veta;
+                         state->veta = veta_save;
+                     }
+                 }
+ 
+                 bOK = TRUE;
+                 if ( !bRerunMD || rerun_fr.bV || bForceUpdate) {  /* Why is rerun_fr.bV here?  Unclear. */
+                     dvdl = 0;
+ 
+                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
+                                        &top->idef,shake_vir,NULL,
+                                        cr,nrnb,wcycle,upd,constr,
+                                        bInitStep,TRUE,bCalcEnerPres,vetanew);
+ 
+                     if (!bOK && !bFFscan)
+                     {
+                         gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+                     }
+ 
+                 }
+                 else if (graph)
+                 { /* Need to unshift here if a do_force has been
+                      called in the previous step */
+                     unshift_self(graph,state->box,state->x);
+                 }
+ 
+ 
+                 if (bVV) {
+                     /* if VV, compute the pressure and constraints */
+                     /* if VV2, the pressure and constraints only if using pressure control.*/
+                     bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir));
+                     bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));
+                     compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+                                     wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+                                     constr,NULL,FALSE,state->box,
+                                     top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+                                     cglo_flags
+                                     | CGLO_ENERGY
+                                     | (bTemp ? CGLO_TEMPERATURE:0)
+                                     | (bPres ? CGLO_PRESSURE : 0)
+                                     | (bPres ? CGLO_CONSTRAINT : 0)
+                                     | (iterate.bIterate ? CGLO_ITERATE : 0)
+                                     | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
+                                     | CGLO_SCALEEKIN
+                         );
+                 }
+                 /* explanation of above:
+                    a) We compute Ekin at the full time step
+                    if 1) we are using the AveVel Ekin, and it's not the
+                    initial step, or 2) if we are using AveEkin, but need the full
+                    time step kinetic energy for the pressure.
+                    b) If we are using EkinAveEkin for the kinetic energy for the temperture control, we still feed in
+                    EkinAveVel because it's needed for the pressure */
+ 
+                 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
+                 if (bVV && !bInitStep)
+                 {
+                 trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ, trotter_seq,ettTSEQ2);
+                 }
+ 
+                 if (bIterations &&
+                     done_iterating(cr,fplog,step,&iterate,bFirstIterate,
+                                    state->veta,&vetanew))
+                 {
+                     break;
+                 }
+                 bFirstIterate = FALSE;
+             }
+ 
+             if (bTrotter && !bInitStep) {
+                 copy_mat(shake_vir,state->svir_prev);
+                 copy_mat(force_vir,state->fvir_prev);
+                 if (IR_NVT_TROTTER(ir) && ir->eI==eiVV) {
+                     /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
+                     enerd->term[F_TEMP] = sum_ekin(&(ir->opts),ekind,NULL,(ir->eI==eiVV),FALSE,FALSE);
+                     enerd->term[F_EKIN] = trace(ekind->ekin);
+                 }
+             }
+             /* if it's the initial step, we performed this first step just to get the constraint virial */
+             if (bInitStep && ir->eI==eiVV) {
+                 copy_rvecn(cbuf,state->v,0,state->natoms);
+             }
+ 
+             if (fr->bSepDVDL && fplog && do_log)
+             {
+                 fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
+             }
+             enerd->term[F_DHDL_CON] += dvdl;
+ 
+             GMX_MPE_LOG(ev_timestep1);
+ 
+         }
+ 
+         /* MRS -- now done iterating -- compute the conserved quantity */
+         if (bVV) {
+             last_conserved = 0;
+             if (IR_NVT_TROTTER(ir) || IR_NPT_TROTTER(ir))
+             {
+                 last_conserved =
+                     NPT_energy(ir,state,&MassQ);
+                 if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
+                 {
+                     last_conserved -= enerd->term[F_DISPCORR];
+                 }
+             }
+             if (ir->eI==eiVV) {
+                 last_ekin = enerd->term[F_EKIN]; /* does this get preserved through checkpointing? */
+             }
+         }
+ 
+         /* ########  END FIRST UPDATE STEP  ############## */
+         /* ########  If doing VV, we now have v(dt) ###### */
+ 
+         /* ################## START TRAJECTORY OUTPUT ################# */
+ 
+         /* Now we have the energies and forces corresponding to the
+          * coordinates at time t. We must output all of this before
+          * the update.
+          * for RerunMD t is read from input trajectory
+          */
+         GMX_MPE_LOG(ev_output_start);
+ 
+         mdof_flags = 0;
+         if (do_per_step(step,ir->nstxout)) { mdof_flags |= MDOF_X; }
+         if (do_per_step(step,ir->nstvout)) { mdof_flags |= MDOF_V; }
+         if (do_per_step(step,ir->nstfout)) { mdof_flags |= MDOF_F; }
+         if (do_per_step(step,ir->nstxtcout)) { mdof_flags |= MDOF_XTC; }
+ /*        if (bCPT) { mdof_flags |= MDOF_CPT; };*/
+ 
+ #ifdef GMX_FAHCORE
+         if (MASTER(cr))
+             fcReportProgress( ir->nsteps, step );
+ 
+         if (bLastStep)
+         {
+             /* Enforce writing positions and velocities at end of run */
+             mdof_flags |= (MDOF_X | MDOF_V);
+         }
+             /* sync bCPT and fc record-keeping */
+ /*            if (bCPT && MASTER(cr))
+                 fcRequestCheckPoint();*/
+ #endif
+ 
+         if (mdof_flags != 0)
+         {
+             wallcycle_start(wcycle,ewcTRAJ);
+ /*            if (bCPT)
+             {
+                 if (state->flags & (1<<estLD_RNG))
+                 {
+                     get_stochd_state(upd,state);
+                 }
+                 if (MASTER(cr))
+                 {
+                     if (bSumEkinhOld)
+                     {
+                         state_global->ekinstate.bUpToDate = FALSE;
+                     }
+                     else
+                     {
+                         update_ekinstate(&state_global->ekinstate,ekind);
+                         state_global->ekinstate.bUpToDate = TRUE;
+                     }
+                     update_energyhistory(&state_global->enerhist,mdebin);
+                 }
+             }*/
+             write_traj(fplog,cr,outf,mdof_flags,top_global,
+                        step,t,state,state_global,f,f_global,&n_xtc,&x_xtc);
+ /*            if (bCPT)
+             {
+                 nchkpt++;
+                 bCPT = FALSE;
+             }*/
+             debug_gmx();
+             if (bLastStep && step_rel == ir->nsteps &&
+                 (Flags & MD_CONFOUT) && MASTER(cr) &&
+                 !bRerunMD && !bFFscan)
+             {
+                 /* x and v have been collected in write_traj,
+                  * because a checkpoint file will always be written
+                  * at the last step.
+                  */
+                 fprintf(stderr,"\nWriting final coordinates.\n");
+                 if (ir->ePBC != epbcNONE && !ir->bPeriodicMols &&
+                     DOMAINDECOMP(cr))
+                 {
+                     /* Make molecules whole only for confout writing */
+                     do_pbc_mtop(fplog,ir->ePBC,state->box,top_global,state_global->x);
+                 }
+ /*                write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
+                                     *top_global->name,top_global,
+                                     state_global->x,state_global->v,
+                                     ir->ePBC,state->box);*/
+                 debug_gmx();
+             }
+             wallcycle_stop(wcycle,ewcTRAJ);
+         }
+         GMX_MPE_LOG(ev_output_finish);
+ 
+         /* kludge -- virial is lost with restart for NPT control. Must restart */
+         if (bStartingFromCpt && bVV)
+         {
+             copy_mat(state->svir_prev,shake_vir);
+             copy_mat(state->fvir_prev,force_vir);
+         }
+         /*  ################## END TRAJECTORY OUTPUT ################ */
+ 
+         /* Determine the pressure:
+          * always when we want exact averages in the energy file,
+          * at ns steps when we have pressure coupling,
+          * otherwise only at energy output steps (set below).
+          */
+ 
+         bNstEner = (bGStatEveryStep || do_per_step(step,ir->nstcalcenergy));
+         bCalcEnerPres = bNstEner;
+ 
+         /* Do we need global communication ? */
+         bGStat = (bGStatEveryStep || bStopCM || bNS ||
+                   (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck));
+ 
+         do_ene = (do_per_step(step,ir->nstenergy) || bLastStep);
+ 
+         if (do_ene || do_log)
+         {
+             bCalcEnerPres = TRUE;
+             bGStat        = TRUE;
+         }
+ 
+         /* Determine the wallclock run time up till now */
+         run_time = gmx_gettime() - (double)runtime->real;
+ 
+         /* Check whether everything is still allright */
+         if (((int)gmx_get_stop_condition() > handled_stop_condition)
+ #ifdef GMX_THREADS
+           && MASTER(cr)
+ #endif
+           )
+         {
+             /* this is just make gs.sig compatible with the hack
+                of sending signals around by MPI_Reduce with together with
+                other floats */
+             if ( gmx_get_stop_condition() == gmx_stop_cond_next_ns )
+                 gs.sig[eglsSTOPCOND]=1;
+             if ( gmx_get_stop_condition() == gmx_stop_cond_next )
+                 gs.sig[eglsSTOPCOND]=-1;
+             /* < 0 means stop at next step, > 0 means stop at next NS step */
+             if (fplog)
+             {
+                 fprintf(fplog,
+                         "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+                         gmx_get_signal_name(),
+                         gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+                 fflush(fplog);
+             }
+             fprintf(stderr,
+                     "\n\nReceived the %s signal, stopping at the next %sstep\n\n",
+                     gmx_get_signal_name(),
+                     gs.sig[eglsSTOPCOND]==1 ? "NS " : "");
+             fflush(stderr);
+             handled_stop_condition=(int)gmx_get_stop_condition();
+         }
+         else if (MASTER(cr) && (bNS || ir->nstlist <= 0) &&
+                  (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) &&
+                  gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0)
+         {
+             /* Signal to terminate the run */
+             gs.sig[eglsSTOPCOND] = 1;
+             if (fplog)
+             {
+                 fprintf(fplog,"\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+             }
+             fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n",gmx_step_str(step,sbuf),max_hours*0.99);
+         }
+ 
+         if (bResetCountersHalfMaxH && MASTER(cr) &&
+             run_time > max_hours*60.0*60.0*0.495)
+         {
+             gs.sig[eglsRESETCOUNTERS] = 1;
+         }
+ 
+         if (ir->nstlist == -1 && !bRerunMD)
+         {
+             /* When bGStatEveryStep=FALSE, global_stat is only called
+              * when we check the atom displacements, not at NS steps.
+              * This means that also the bonded interaction count check is not
+              * performed immediately after NS. Therefore a few MD steps could
+              * be performed with missing interactions.
+              * But wrong energies are never written to file,
+              * since energies are only written after global_stat
+              * has been called.
+              */
+             if (step >= nlh.step_nscheck)
+             {
+                 nlh.nabnsb = natoms_beyond_ns_buffer(ir,fr,&top->cgs,
+                                                      nlh.scale_tot,state->x);
+             }
+             else
+             {
+                 /* This is not necessarily true,
+                  * but step_nscheck is determined quite conservatively.
+                  */
+                 nlh.nabnsb = 0;
+             }
+         }
+ 
+         /* In parallel we only have to check for checkpointing in steps
+          * where we do global communication,
+          *  otherwise the other nodes don't know.
+          */
+         if (MASTER(cr) && ((bGStat || !PAR(cr)) &&
+                            cpt_period >= 0 &&
+                            (cpt_period == 0 ||
+                             run_time >= nchkpt*cpt_period*60.0)) &&
+             gs.set[eglsCHKPT] == 0)
+         {
+             gs.sig[eglsCHKPT] = 1;
+         }
+ 
+         if (bIterations)
+         {
+             gmx_iterate_init(&iterate,bIterations);
+         }
+ 
+         /* for iterations, we save these vectors, as we will be redoing the calculations */
+         if (bIterations && iterate.bIterate)
+         {
+             copy_coupling_state(state,bufstate,ekind,ekind_save,&(ir->opts));
+         }
+         bFirstIterate = TRUE;
+         while (bFirstIterate || (bIterations && iterate.bIterate))
+         {
+             /* We now restore these vectors to redo the calculation with improved extended variables */
+             if (bIterations)
+             {
+                 copy_coupling_state(bufstate,state,ekind_save,ekind,&(ir->opts));
+             }
+ 
+             /* We make the decision to break or not -after- the calculation of Ekin and Pressure,
+                so scroll down for that logic */
+ 
+             /* #########   START SECOND UPDATE STEP ################# */
+             GMX_MPE_LOG(ev_update_start);
+             bOK = TRUE;
+             if (!bRerunMD || rerun_fr.bV || bForceUpdate)
+             {
+                 wallcycle_start(wcycle,ewcUPDATE);
+                 dvdl = 0;
+                 /* Box is changed in update() when we do pressure coupling,
+                  * but we should still use the old box for energy corrections and when
+                  * writing it to the energy file, so it matches the trajectory files for
+                  * the same timestep above. Make a copy in a separate array.
+                  */
+                 copy_mat(state->box,lastbox);
+                 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
+                 if (bTrotter)
+                 {
+                     if (bIterations && iterate.bIterate)
+                     {
+                         if (bFirstIterate)
+                         {
+                             scalevir = 1;
+                         }
+                         else
+                         {
+                             /* we use a new value of scalevir to converge the iterations faster */
+                             scalevir = tracevir/trace(shake_vir);
+                         }
+                         msmul(shake_vir,scalevir,shake_vir);
+                         m_add(force_vir,shake_vir,total_vir);
+                         clear_mat(shake_vir);
+                     }
+                     trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ, trotter_seq,ettTSEQ3);
+                 }
+                 /* We can only do Berendsen coupling after we have summed
+                  * the kinetic energy or virial. Since the happens
+                  * in global_state after update, we should only do it at
+                  * step % nstlist = 1 with bGStatEveryStep=FALSE.
+                  */
+ 
+               if (ir->eI != eiVVAK)
+                 {
+                 update_tcouple(fplog,step,ir,state,ekind,wcycle,upd,&MassQ,mdatoms);
+                 }
+                 update_pcouple(fplog,step,ir,state,pcoupl_mu,M,wcycle,
+                                 upd,bInitStep);
+ 
+               if (bVV)
+               {
+                   /* velocity half-step update */
+                   update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                                 ekind,M,wcycle,upd,FALSE,etrtVELOCITY2,cr,nrnb,constr,&top->idef);
+               }
+ 
+                 /* Above, initialize just copies ekinh into ekin,
+                  * it doesn't copy position (for VV),
+                  * and entire integrator for MD.
+                  */
+ 
+                 if (ir->eI==eiVVAK)
+                 {
+                     copy_rvecn(state->x,cbuf,0,state->natoms);
+                 }
+ 
+                 update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                               ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
+                 wallcycle_stop(wcycle,ewcUPDATE);
+ 
+                 update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
+                                    &top->idef,shake_vir,force_vir,
+                                    cr,nrnb,wcycle,upd,constr,
+                                    bInitStep,FALSE,bCalcEnerPres,state->veta);
+ 
+                 if (ir->eI==eiVVAK)
+                 {
+                     /* erase F_EKIN and F_TEMP here? */
+                     /* just compute the kinetic energy at the half step to perform a trotter step */
+                     compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+                                     wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+                                     constr,NULL,FALSE,lastbox,
+                                     top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+                                     cglo_flags | CGLO_TEMPERATURE | CGLO_CONSTRAINT
+                         );
+                     wallcycle_start(wcycle,ewcUPDATE);
+                     trotter_update(ir,step,ekind,enerd,state,total_vir,mdatoms,&MassQ, trotter_seq,ettTSEQ4);
+                     /* now we know the scaling, we can compute the positions again again */
+                     copy_rvecn(cbuf,state->x,0,state->natoms);
+ 
+                     update_coords(fplog,step,ir,mdatoms,state,f,fr->bTwinRange && bNStList,fr->f_twin,fcd,
+                                   ekind,M,wcycle,upd,bInitStep,etrtPOSITION,cr,nrnb,constr,&top->idef);
+                     wallcycle_stop(wcycle,ewcUPDATE);
+ 
+                     /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */
+                     /* are the small terms in the shake_vir here due
+                      * to numerical errors, or are they important
+                      * physically? I'm thinking they are just errors, but not completely sure.
+                      * For now, will call without actually constraining, constr=NULL*/
+                     update_constraints(fplog,step,&dvdl,ir,ekind,mdatoms,state,graph,f,
+                                        &top->idef,tmp_vir,force_vir,
+                                        cr,nrnb,wcycle,upd,NULL,
+                                        bInitStep,FALSE,bCalcEnerPres,state->veta);
+                 }
+                 if (!bOK && !bFFscan)
+                 {
+                     gmx_fatal(FARGS,"Constraint error: Shake, Lincs or Settle could not solve the constrains");
+                 }
+ 
+                 if (fr->bSepDVDL && fplog && do_log)
+                 {
+                     fprintf(fplog,sepdvdlformat,"Constraint",0.0,dvdl);
+                 }
+                 enerd->term[F_DHDL_CON] += dvdl;
+             }
+             else if (graph)
+             {
+                 /* Need to unshift here */
+                 unshift_self(graph,state->box,state->x);
+             }
+ 
+             GMX_BARRIER(cr->mpi_comm_mygroup);
+             GMX_MPE_LOG(ev_update_finish);
+ 
+             if (vsite != NULL)
+             {
+                 wallcycle_start(wcycle,ewcVSITECONSTR);
+                 if (graph != NULL)
+                 {
+                     shift_self(graph,state->box,state->x);
+                 }
+                 construct_vsites(fplog,vsite,state->x,nrnb,ir->delta_t,state->v,
+                                  top->idef.iparams,top->idef.il,
+                                  fr->ePBC,fr->bMolPBC,graph,cr,state->box);
+ 
+                 if (graph != NULL)
+                 {
+                     unshift_self(graph,state->box,state->x);
+                 }
+                 wallcycle_stop(wcycle,ewcVSITECONSTR);
+             }
+ 
+             /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */
+             if (ir->nstlist == -1 && bFirstIterate)
+             {
+                 gs.sig[eglsNABNSB] = nlh.nabnsb;
+             }
+             compute_globals(fplog,gstat,cr,ir,fr,ekind,state,state_global,mdatoms,nrnb,vcm,
+                             wcycle,enerd,force_vir,shake_vir,total_vir,pres,mu_tot,
+                             constr,
+                             bFirstIterate ? &gs : NULL,(step % gs.nstms == 0),
+                             lastbox,
+                             top_global,&pcurr,top_global->natoms,&bSumEkinhOld,
+                             cglo_flags
+                             | (!EI_VV(ir->eI) ? CGLO_ENERGY : 0)
+                             | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
+                             | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0)
+                             | (bIterations && iterate.bIterate ? CGLO_ITERATE : 0)
+                             | (bFirstIterate ? CGLO_FIRSTITERATE : 0)
+                             | CGLO_CONSTRAINT
+                 );
+             if (ir->nstlist == -1 && bFirstIterate)
+             {
+                 nlh.nabnsb = gs.set[eglsNABNSB];
+                 gs.set[eglsNABNSB] = 0;
+             }
+             /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */
+             /* #############  END CALC EKIN AND PRESSURE ################# */
+ 
+             /* Note: this is OK, but there are some numerical precision issues with using the convergence of
+                the virial that should probably be addressed eventually. state->veta has better properies,
+                but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
+                generate the new shake_vir, but test the veta value for convergence.  This will take some thought. */
+ 
+             if (bIterations &&
+                 done_iterating(cr,fplog,step,&iterate,bFirstIterate,
+                                trace(shake_vir),&tracevir))
+             {
+                 break;
+             }
+             bFirstIterate = FALSE;
+         }
+ 
+         update_box(fplog,step,ir,mdatoms,state,graph,f,
+                    ir->nstlist==-1 ? &nlh.scale_tot : NULL,pcoupl_mu,nrnb,wcycle,upd,bInitStep,FALSE);
+ 
+         /* ################# END UPDATE STEP 2 ################# */
+         /* #### We now have r(t+dt) and v(t+dt/2)  ############# */
+ 
+         /* The coordinates (x) were unshifted in update */
+ /*        if (bFFscan && (shellfc==NULL || bConverged))
+         {
+             if (print_forcefield(fplog,enerd->term,mdatoms->homenr,
+                                  f,NULL,xcopy,
+                                  &(top_global->mols),mdatoms->massT,pres))
+             {
+                 if (gmx_parallel_env_initialized())
+                 {
+                     gmx_finalize();
+                 }
+                 fprintf(stderr,"\n");
+                 exit(0);
+             }
+         }*/
+         if (!bGStat)
+         {
+             /* We will not sum ekinh_old,
+              * so signal that we still have to do it.
+              */
+             bSumEkinhOld = TRUE;
+         }
+ 
+ /*        if (bTCR)
+         {*/
+             /* Only do GCT when the relaxation of shells (minimization) has converged,
+              * otherwise we might be coupling to bogus energies.
+              * In parallel we must always do this, because the other sims might
+              * update the FF.
+              */
+ 
+             /* Since this is called with the new coordinates state->x, I assume
+              * we want the new box state->box too. / EL 20040121
+              */
+ /*            do_coupling(fplog,oenv,nfile,fnm,tcr,t,step,enerd->term,fr,
+                         ir,MASTER(cr),
+                         mdatoms,&(top->idef),mu_aver,
+                         top_global->mols.nr,cr,
+                         state->box,total_vir,pres,
+                         mu_tot,state->x,f,bConverged);
+             debug_gmx();
+         }*/
+ 
+         /* #########  BEGIN PREPARING EDR OUTPUT  ###########  */
+ 
+         sum_dhdl(enerd,state->lambda,ir);
+         /* use the directly determined last velocity, not actually the averaged half steps */
+         if (bTrotter && ir->eI==eiVV)
+         {
+             enerd->term[F_EKIN] = last_ekin;
+         }
+         enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
+ 
+         switch (ir->etc)
+         {
+         case etcNO:
+             break;
+         case etcBERENDSEN:
+             break;
+         case etcNOSEHOOVER:
+             if (IR_NVT_TROTTER(ir)) {
+                 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + last_conserved;
+             } else {
+                 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] +
+                     NPT_energy(ir,state,&MassQ);
+             }
+             break;
+         case etcVRESCALE:
+             enerd->term[F_ECONSERVED] =
+                 enerd->term[F_ETOT] + vrescale_energy(&(ir->opts),
+                                                       state->therm_integral);
+             break;
+         default:
+             break;
+         }
+ 
+         /* Check for excessively large energies */
+ /*        if (bIonize)
+         {
+ #ifdef GMX_DOUBLE
+             real etot_max = 1e200;
+ #else
+             real etot_max = 1e30;
+ #endif
+             if (fabs(enerd->term[F_ETOT]) > etot_max)
+             {
+                 fprintf(stderr,"Energy too large (%g), giving up\n",
+                         enerd->term[F_ETOT]);
+             }
+         }*/
+         /* #########  END PREPARING EDR OUTPUT  ###########  */
+ 
+         /* Time for performance */
+         if (((step % stepout) == 0) || bLastStep)
+         {
+             runtime_upd_proc(runtime);
+         }
+ 
+         /* Output stuff */
+         if (MASTER(cr))
+         {
+             gmx_bool do_dr,do_or;
+ 
+             if (!(bStartingFromCpt && (EI_VV(ir->eI))))
+             {
+                 if (bNstEner)
+                 {
+                     upd_mdebin(mdebin,bDoDHDL,TRUE,
+                                t,mdatoms->tmass,enerd,state,lastbox,
+                                shake_vir,force_vir,total_vir,pres,
+                                ekind,mu_tot,constr);
+                 }
+                 else
+                 {
+                     upd_mdebin_step(mdebin);
+                 }
+ 
+                 do_dr  = do_per_step(step,ir->nstdisreout);
+                 do_or  = do_per_step(step,ir->nstorireout);
+ 
+                 print_ebin(outf->fp_ene,do_ene,do_dr,do_or,do_log?fplog:NULL,
+                            step,t,
+                            eprNORMAL,bCompact,mdebin,fcd,groups,&(ir->opts));
+             }
+             if (ir->ePull != epullNO)
+             {
+                 pull_print_output(ir->pull,step,t);
+             }
+ 
+             if (do_per_step(step,ir->nstlog))
+             {
+                 if(fflush(fplog) != 0)
+                 {
+                     gmx_fatal(FARGS,"Cannot flush logfile - maybe you are out of quota?");
+                 }
+             }
+         }
+ 
+ 
+         /* Remaining runtime */
+         if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal() ))
+         {
+             if (shellfc)
+             {
+                 fprintf(stderr,"\n");
+             }
+             print_time(stderr,runtime,step,ir,cr);
+         }
+ 
+               /* Set new positions for the group to embed */
+               if(!bLastStep){
+                       if(step_rel<=it_xy)
+                       {
+                               fac[0]+=xy_step;
+                               fac[1]+=xy_step;
+                       } else if (step_rel<=(it_xy+it_z))
+                       {
+                               fac[2]+=z_step;
+                       }
+                       resize(ins_at,r_ins,state_global->x,pos_ins,fac);
+               }
+ 
+         /* Replica exchange */
+ /*        bExchanged = FALSE;
+         if ((repl_ex_nst > 0) && (step > 0) && !bLastStep &&
+             do_per_step(step,repl_ex_nst))
+         {
+             bExchanged = replica_exchange(fplog,cr,repl_ex,
+                                           state_global,enerd->term,
+                                           state,step,t);
+         }
+         if (bExchanged && PAR(cr))
+         {
+             if (DOMAINDECOMP(cr))
+             {
+                 dd_partition_system(fplog,step,cr,TRUE,1,
+                                     state_global,top_global,ir,
+                                     state,&f,mdatoms,top,fr,
+                                     vsite,shellfc,constr,
+                                     nrnb,wcycle,FALSE);
+             }
+             else
+             {
+                 bcast_state(cr,state,FALSE);
+             }
+         }*/
+ 
+         bFirstStep = FALSE;
+         bInitStep = FALSE;
+         bStartingFromCpt = FALSE;
+ 
+         /* #######  SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
+       /* With all integrators, except VV, we need to retain the pressure
+          * at the current step for coupling at the next step.
+          */
+         if ((state->flags & (1<<estPRES_PREV)) &&
+             (bGStatEveryStep ||
+              (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
+         {
+             /* Store the pressure in t_state for pressure coupling
+              * at the next MD step.
+              */
+             copy_mat(pres,state->pres_prev);
+         }
+ 
+         /* #######  END SET VARIABLES FOR NEXT ITERATION ###### */
+ 
+         if (bRerunMD)
+         {
+             /* read next frame from input trajectory */
+             bNotLastFrame = read_next_frame(oenv,status,&rerun_fr);
+         }
+ 
+         if (!bRerunMD || !rerun_fr.bStep)
+         {
+             /* increase the MD step number */
+             step++;
+             step_rel++;
+         }
+ 
+         cycles = wallcycle_stop(wcycle,ewcSTEP);
+         if (DOMAINDECOMP(cr) && wcycle)
+         {
+             dd_cycles_add(cr->dd,cycles,ddCyclStep);
+         }
+ 
+         if (step_rel == wcycle_get_reset_counters(wcycle) ||
+             gs.set[eglsRESETCOUNTERS] != 0)
+         {
+             /* Reset all the counters related to performance over the run */
+             reset_all_counters(fplog,cr,step,&step_rel,ir,wcycle,nrnb,runtime);
+             wcycle_set_reset_counters(wcycle,-1);
+             bResetCountersHalfMaxH = FALSE;
+             gs.set[eglsRESETCOUNTERS] = 0;
+         }
+     }
+     /* End of main MD loop */
+     debug_gmx();
+     write_sto_conf_mtop(ftp2fn(efSTO,nfile,fnm),
+                                         *top_global->name,top_global,
+                                         state_global->x,state_global->v,
+                                         ir->ePBC,state->box);
+ 
+     /* Stop the time */
+     runtime_end(runtime);
+ 
+     if (bRerunMD)
+     {
+         close_trj(status);
+     }
+ 
+     if (!(cr->duty & DUTY_PME))
+     {
+         /* Tell the PME only node to finish */
+         gmx_pme_finish(cr);
+     }
+ 
+     if (MASTER(cr))
+     {
+         if (ir->nstcalcenergy > 0 && !bRerunMD)
+         {
+             print_ebin(outf->fp_ene,FALSE,FALSE,FALSE,fplog,step,t,
+                        eprAVER,FALSE,mdebin,fcd,groups,&(ir->opts));
+         }
+     }
+ 
+     done_mdoutf(outf);
+ 
+     debug_gmx();
+ 
+     if (ir->nstlist == -1 && nlh.nns > 0 && fplog)
+     {
+         fprintf(fplog,"Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n",nlh.s1/nlh.nns,sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns)));
+         fprintf(fplog,"Average number of atoms that crossed the half buffer length: %.1f\n\n",nlh.ab/nlh.nns);
+     }
+ 
+     if (shellfc && fplog)
+     {
+         fprintf(fplog,"Fraction of iterations that converged:           %.2f %%\n",
+                 (nconverged*100.0)/step_rel);
+         fprintf(fplog,"Average number of force evaluations per MD step: %.2f\n\n",
+                 tcount/step_rel);
+     }
+ 
+ /*    if (repl_ex_nst > 0 && MASTER(cr))
+     {
+         print_replica_exchange_statistics(fplog,repl_ex);
+     }*/
+ 
+     runtime->nsteps_done = step_rel;
+ 
+     return 0;
+ }
+ 
+ 
+ int mdrunner_membed(FILE *fplog,t_commrec *cr,int nfile,const t_filenm fnm[],
+              const output_env_t oenv, gmx_bool bVerbose,gmx_bool bCompact,
+              int nstglobalcomm,
+              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+              const char *dddlb_opt,real dlb_scale,
+              const char *ddcsx,const char *ddcsy,const char *ddcsz,
+              int nstepout,int resetstep,int nmultisim,int repl_ex_nst,int repl_ex_seed,
+              real pforce,real cpt_period,real max_hours,
+              const char *deviceOptions,
+              unsigned long Flags,
+              real xy_fac, real xy_max, real z_fac, real z_max,
+              int it_xy, int it_z, real probe_rad, int low_up_rm,
+              int pieces, gmx_bool bALLOW_ASYMMETRY, int maxwarn)
+ {
+     double     nodetime=0,realtime;
+     t_inputrec *inputrec;
+     t_state    *state=NULL;
+     matrix     box;
+     gmx_ddbox_t ddbox;
+     int        npme_major,npme_minor;
+     real       tmpr1,tmpr2;
+     t_nrnb     *nrnb;
+     gmx_mtop_t *mtop=NULL;
+     t_mdatoms  *mdatoms=NULL;
+     t_forcerec *fr=NULL;
+     t_fcdata   *fcd=NULL;
+     real       ewaldcoeff=0;
+     gmx_pme_t  *pmedata=NULL;
+     gmx_vsite_t *vsite=NULL;
+     gmx_constr_t constr;
+     int        i,m,nChargePerturbed=-1,status,nalloc;
+     char       *gro;
+     gmx_wallcycle_t wcycle;
+     gmx_bool       bReadRNG,bReadEkin;
+     int        list;
+     gmx_runtime_t runtime;
+     int        rc;
+     gmx_large_int_t reset_counters;
+     gmx_edsam_t ed=NULL;
+     t_commrec   *cr_old=cr;
+     int        nthreads=1,nthreads_requested=1;
+ 
+ 
+       char                    *ins;
+       int                     rm_bonded_at,fr_id,fr_i=0,tmp_id,warn=0;
+       int                     ng,j,max_lip_rm,ins_grp_id,ins_nat,mem_nat,ntype,lip_rm,tpr_version;
+       real                    xy_step=0,z_step=0;
+       real                    prot_area;
+       rvec                    *r_ins=NULL,fac;
+       t_block                 *ins_at,*rest_at;
+       pos_ins_t               *pos_ins;
+       mem_t                   *mem_p;
+       rmm_t                   *rm_p;
+       gmx_groups_t            *groups;
+       gmx_bool                        bExcl=FALSE;
+       t_atoms                 atoms;
+       t_pbc                   *pbc;
+       char                    **piecename=NULL;
+ 
+     /* CAUTION: threads may be started later on in this function, so
+        cr doesn't reflect the final parallel state right now */
+     snew(inputrec,1);
+     snew(mtop,1);
+ 
+     if (bVerbose && SIMMASTER(cr))
+     {
+         fprintf(stderr,"Getting Loaded...\n");
+     }
+ 
+     if (Flags & MD_APPENDFILES)
+     {
+         fplog = NULL;
+     }
+ 
+     snew(state,1);
+     if (MASTER(cr))
+     {
+         /* Read (nearly) all data required for the simulation */
+         read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
+ 
+         /* NOW the threads will be started: */
+ #ifdef GMX_THREADS
+ #endif
+     }
+     /* END OF CAUTION: cr is now reliable */
+ 
+     if (PAR(cr))
+     {
+         /* now broadcast everything to the non-master nodes/threads: */
+         init_parallel(fplog, cr, inputrec, mtop);
+     }
+     /* now make sure the state is initialized and propagated */
+     set_state_entries(state,inputrec,cr->nnodes);
+ 
+     if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog))
+     {
+         /* All-vs-all loops do not work with domain decomposition */
+         Flags |= MD_PARTDEC;
+     }
+ 
+     if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
+     {
+         cr->npmenodes = 0;
+     }
+ 
+       snew(ins_at,1);
+       snew(pos_ins,1);
+       if(MASTER(cr))
+       {
+               tpr_version = get_tpr_version(ftp2fn(efTPX,nfile,fnm));
+               if (tpr_version<58)
+                       gmx_fatal(FARGS,"Version of *.tpr file to old (%d). Rerun grompp with gromacs VERSION 4.0.3 or newer.\n",tpr_version);
+ 
+               if( inputrec->eI != eiMD )
+                       gmx_input("Change integrator to md in mdp file.");
+ 
+               if(PAR(cr))
+                       gmx_input("Sorry, parallel g_membed is not yet fully functrional.");
+ 
+               groups=&(mtop->groups);
+ 
+               atoms=gmx_mtop_global_atoms(mtop);
+               snew(mem_p,1);
+               fprintf(stderr,"\nSelect a group to embed in the membrane:\n");
+               get_index(&atoms,ftp2fn_null(efNDX,nfile,fnm),1,&(ins_at->nr),&(ins_at->index),&ins);
+               ins_grp_id = search_string(ins,groups->ngrpname,(groups->grpname));
+               fprintf(stderr,"\nSelect a group to embed %s into (e.g. the membrane):\n",ins);
+               get_index(&atoms,ftp2fn_null(efNDX,nfile,fnm),1,&(mem_p->mem_at.nr),&(mem_p->mem_at.index),&(mem_p->name));
+ 
+               pos_ins->pieces=pieces;
+               snew(pos_ins->nidx,pieces);
+               snew(pos_ins->subindex,pieces);
+               snew(piecename,pieces); 
+               if (pieces>1)
+               {
+                       fprintf(stderr,"\nSelect pieces to embed:\n");
+                       get_index(&atoms,ftp2fn_null(efNDX,nfile,fnm),pieces,pos_ins->nidx,pos_ins->subindex,piecename);
+               }
+               else
+               {       
+                       /*use whole embedded group*/
+                       snew(pos_ins->nidx,1);
+                       snew(pos_ins->subindex,1);
+                       pos_ins->nidx[0]=ins_at->nr;
+                       pos_ins->subindex[0]=ins_at->index;
+               }
+ 
+               if(probe_rad<0.2199999)
+               {
+                       warn++;
+                       fprintf(stderr,"\nWarning %d:\nA probe radius (-rad) smaller than 0.2 can result in overlap between waters "
+                                       "and the group to embed, which will result in Lincs errors etc.\nIf you are sure, you can increase maxwarn.\n\n",warn);
+               }
+ 
+               if(xy_fac<0.09999999)
+               {
+                       warn++;
+                       fprintf(stderr,"\nWarning %d:\nThe initial size of %s is probably too smal.\n"
+                                       "If you are sure, you can increase maxwarn.\n\n",warn,ins);
+               }
+ 
+               if(it_xy<1000)
+               {
+                       warn++;
+                       fprintf(stderr,"\nWarning %d;\nThe number of steps used to grow the xy-coordinates of %s (%d) is probably too small.\n"
+                                       "Increase -nxy or, if you are sure, you can increase maxwarn.\n\n",warn,ins,it_xy);
+               }
+ 
+               if( (it_z<100) && ( z_fac<0.99999999 || z_fac>1.0000001) )
+                 {
+                         warn++;
+                         fprintf(stderr,"\nWarning %d;\nThe number of steps used to grow the z-coordinate of %s (%d) is probably too small.\n"
+                                        "Increase -nz or, if you are sure, you can increase maxwarn.\n\n",warn,ins,it_z);
+                 }
+ 
+               if(it_xy+it_z>inputrec->nsteps)
+               {
+                       warn++;
+                       fprintf(stderr,"\nWarning %d:\nThe number of growth steps (-nxy + -nz) is larger than the number of steps in the tpr.\n"
+                                       "If you are sure, you can increase maxwarn.\n\n",warn);
+               }
+ 
+               fr_id=-1;
+               if( inputrec->opts.ngfrz==1)
+                       gmx_fatal(FARGS,"You did not specify \"%s\" as a freezegroup.",ins);
+               for(i=0;i<inputrec->opts.ngfrz;i++)
+               {
+                       tmp_id = mtop->groups.grps[egcFREEZE].nm_ind[i];
+                       if(ins_grp_id==tmp_id)
+                       {
+                               fr_id=tmp_id;
+                               fr_i=i;
+                       }
+               }
+               if (fr_id == -1 )
+                       gmx_fatal(FARGS,"\"%s\" not as freezegroup defined in the mdp-file.",ins);
+ 
+               for(i=0;i<DIM;i++)
+                       if( inputrec->opts.nFreeze[fr_i][i] != 1)
+                               gmx_fatal(FARGS,"freeze dimensions for %s are not Y Y Y\n",ins);
+ 
+               ng = groups->grps[egcENER].nr;
+               if (ng == 1)
+                       gmx_input("No energy groups defined. This is necessary for energy exclusion in the freeze group");
+ 
+               for(i=0;i<ng;i++)
+               {
+                       for(j=0;j<ng;j++)
+                       {
+                               if (inputrec->opts.egp_flags[ng*i+j] == EGP_EXCL)
+                               {
+                                       bExcl = TRUE;
+                                       if ( (groups->grps[egcENER].nm_ind[i] != ins_grp_id) || (groups->grps[egcENER].nm_ind[j] != ins_grp_id) )
+                                               gmx_fatal(FARGS,"Energy exclusions \"%s\" and  \"%s\" do not match the group to embed \"%s\"",
+                                                               *groups->grpname[groups->grps[egcENER].nm_ind[i]],
+                                                               *groups->grpname[groups->grps[egcENER].nm_ind[j]],ins);
+                               }
+                       }
+               }
+               if (!bExcl)
+                       gmx_input("No energy exclusion groups defined. This is necessary for energy exclusion in the freeze group");
+ 
+               /* Set all atoms in box*/
+               /*set_inbox(state->natoms,state->x);*/
+ 
+               /* Guess the area the protein will occupy in the membrane plane  Calculate area per lipid*/
+               snew(rest_at,1);
+               ins_nat = init_ins_at(ins_at,rest_at,state,pos_ins,groups,ins_grp_id,xy_max);
+               /* Check moleculetypes in insertion group */
+               check_types(ins_at,rest_at,mtop);
+ 
+               mem_nat = init_mem_at(mem_p,mtop,state->x,state->box,pos_ins);
+ 
+               prot_area = est_prot_area(pos_ins,state->x,ins_at,mem_p);
+               if ( (prot_area>7.5) && ( (state->box[XX][XX]*state->box[YY][YY]-state->box[XX][YY]*state->box[YY][XX])<50) )
+               {
+                       warn++;
+                       fprintf(stderr,"\nWarning %d:\nThe xy-area is very small compared to the area of the protein.\n"
+                                       "This might cause pressure problems during the growth phase. Just try with\n"
+                                       "current setup (-maxwarn + 1), but if pressure problems occur, lower the\n"
+                                       "compressibility in the mdp-file or use no pressure coupling at all.\n\n",warn);
+               }
+               if(warn>maxwarn)
+                                       gmx_fatal(FARGS,"Too many warnings.\n");
+ 
+               printf("The estimated area of the protein in the membrane is %.3f nm^2\n",prot_area);
+               printf("\nThere are %d lipids in the membrane part that overlaps the protein.\nThe area per lipid is %.4f nm^2.\n",mem_p->nmol,mem_p->lip_area);
+ 
+               /* Maximum number of lipids to be removed*/
+               max_lip_rm=(int)(2*prot_area/mem_p->lip_area);
+               printf("Maximum number of lipids that will be removed is %d.\n",max_lip_rm);
+ 
+               printf("\nWill resize the protein by a factor of %.3f in the xy plane and %.3f in the z direction.\n"
+                               "This resizing will be done with respect to the geometrical center of all protein atoms\n"
+                               "that span the membrane region, i.e. z between %.3f and %.3f\n\n",xy_fac,z_fac,mem_p->zmin,mem_p->zmax);
+ 
+               /* resize the protein by xy and by z if necessary*/
+               snew(r_ins,ins_at->nr);
+               init_resize(ins_at,r_ins,pos_ins,mem_p,state->x,bALLOW_ASYMMETRY);
+               fac[0]=fac[1]=xy_fac;
+               fac[2]=z_fac;
+ 
+               xy_step =(xy_max-xy_fac)/(double)(it_xy);
+               z_step  =(z_max-z_fac)/(double)(it_z-1);
+ 
+               resize(ins_at,r_ins,state->x,pos_ins,fac);
+ 
+               /* remove overlapping lipids and water from the membrane box*/
+               /*mark molecules to be removed*/
+               snew(pbc,1);
+               set_pbc(pbc,inputrec->ePBC,state->box);
+ 
+               snew(rm_p,1);
+               lip_rm = gen_rm_list(rm_p,ins_at,rest_at,pbc,mtop,state->x, r_ins, mem_p,pos_ins,probe_rad,low_up_rm,bALLOW_ASYMMETRY);
+         lip_rm -= low_up_rm;
+ 
+               if(fplog)
+                       for(i=0;i<rm_p->nr;i++)
+                               fprintf(fplog,"rm mol %d\n",rm_p->mol[i]);
+ 
+               for(i=0;i<mtop->nmolblock;i++)
+               {
+                       ntype=0;
+                       for(j=0;j<rm_p->nr;j++)
+                               if(rm_p->block[j]==i)
+                                       ntype++;
+                       printf("Will remove %d %s molecules\n",ntype,*(mtop->moltype[mtop->molblock[i].type].name));
+               }
+ 
+               if(lip_rm>max_lip_rm)
+               {
+                       warn++;
+                       fprintf(stderr,"\nWarning %d:\nTrying to remove a larger lipid area than the estimated protein area\n"
+                                       "Try making the -xyinit resize factor smaller. If you are sure about this increase maxwarn.\n\n",warn);
+               }
+ 
+               /*remove all lipids and waters overlapping and update all important structures*/
+               rm_group(inputrec,groups,mtop,rm_p,state,ins_at,pos_ins);
+ 
+               rm_bonded_at = rm_bonded(ins_at,mtop);
+               if (rm_bonded_at != ins_at->nr)
+               {
+                       fprintf(stderr,"Warning: The number of atoms for which the bonded interactions are removed is %d, "
+                                       "while %d atoms are embedded. Make sure that the atoms to be embedded are not in the same"
+                                       "molecule type as atoms that are not to be embedded.\n",rm_bonded_at,ins_at->nr);
+               }
+ 
+               if(warn>maxwarn)
+                       gmx_fatal(FARGS,"Too many warnings.\nIf you are sure these warnings are harmless, you can increase -maxwarn");
+ 
+               if (MASTER(cr))
+               {
+                       if (ftp2bSet(efTOP,nfile,fnm))
+                               top_update(opt2fn("-p",nfile,fnm),ins,rm_p,mtop);
+               }
+ 
+               sfree(pbc);
+               sfree(rest_at);
+       }
+ 
+ #ifdef GMX_FAHCORE
+     fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
+ #endif
+ 
+     /* NMR restraints must be initialized before load_checkpoint,
+      * since with time averaging the history is added to t_state.
+      * For proper consistency check we therefore need to extend
+      * t_state here.
+      * So the PME-only nodes (if present) will also initialize
+      * the distance restraints.
+      */
+     snew(fcd,1);
+ 
+     /* This needs to be called before read_checkpoint to extend the state */
+     init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state);
+ 
+     if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
+     {
+         if (PAR(cr) && !(Flags & MD_PARTDEC))
+         {
+             gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
+         }
+         /* Orientation restraints */
+         if (MASTER(cr))
+         {
+             init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
+                         state);
+         }
+     }
+ 
+     if (DEFORM(*inputrec))
+     {
+         /* Store the deform reference box before reading the checkpoint */
+         if (SIMMASTER(cr))
+         {
+             copy_mat(state->box,box);
+         }
+         if (PAR(cr))
+         {
+             gmx_bcast(sizeof(box),box,cr);
+         }
+         /* Because we do not have the update struct available yet
+          * in which the reference values should be stored,
+          * we store them temporarily in static variables.
+          * This should be thread safe, since they are only written once
+          * and with identical values.
+          */
+ /*        deform_init_init_step_tpx = inputrec->init_step;*/
+ /*        copy_mat(box,deform_init_box_tpx);*/
+     }
+ 
+     if (opt2bSet("-cpi",nfile,fnm))
+     {
+         /* Check if checkpoint file exists before doing continuation.
+          * This way we can use identical input options for the first and subsequent runs...
+          */
+         if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
+         {
+             load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
+                             cr,Flags & MD_PARTDEC,ddxyz,
+                             inputrec,state,&bReadRNG,&bReadEkin,
+                             (Flags & MD_APPENDFILES));
+ 
+             if (bReadRNG)
+             {
+                 Flags |= MD_READ_RNG;
+             }
+             if (bReadEkin)
+             {
+                 Flags |= MD_READ_EKIN;
+             }
+         }
+     }
+ 
+     if ((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
+     {
+         gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
+                              Flags,&fplog);
+     }
+ 
+     if (SIMMASTER(cr))
+     {
+         copy_mat(state->box,box);
+     }
+ 
+     if (PAR(cr))
+     {
+         gmx_bcast(sizeof(box),box,cr);
+     }
+ 
+     if (bVerbose && SIMMASTER(cr))
+     {
+         fprintf(stderr,"Loaded with Money\n\n");
+     }
+ 
+     if (PAR(cr) && !((Flags & MD_PARTDEC) || EI_TPI(inputrec->eI)))
+     {
+         cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
+                                            dddlb_opt,dlb_scale,
+                                            ddcsx,ddcsy,ddcsz,
+                                            mtop,inputrec,
+                                            box,state->x,
+                                            &ddbox,&npme_major,&npme_minor);
+ 
+         make_dd_communicators(fplog,cr,dd_node_order);
+ 
+         /* Set overallocation to avoid frequent reallocation of arrays */
+         set_over_alloc_dd(TRUE);
+     }
+     else
+     {
+         /* PME, if used, is done on all nodes with 1D decomposition */
+         cr->npmenodes = 0;
+         cr->duty = (DUTY_PP | DUTY_PME);
+         npme_major = cr->nnodes;
+         npme_minor = 1;
+ 
+         if (inputrec->ePBC == epbcSCREW)
+         {
+             gmx_fatal(FARGS,
+                       "pbc=%s is only implemented with domain decomposition",
+                       epbc_names[inputrec->ePBC]);
+         }
+     }
+ 
+     if (PAR(cr))
+     {
+         /* After possible communicator splitting in make_dd_communicators.
+          * we can set up the intra/inter node communication.
+          */
+         gmx_setup_nodecomm(fplog,cr);
+     }
+ 
+     wcycle = wallcycle_init(fplog,resetstep,cr);
+     if (PAR(cr))
+     {
+         /* Master synchronizes its value of reset_counters with all nodes
+          * including PME only nodes */
+         reset_counters = wcycle_get_reset_counters(wcycle);
+         gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
+         wcycle_set_reset_counters(wcycle, reset_counters);
+     }
+ 
+ 
+     snew(nrnb,1);
+     if (cr->duty & DUTY_PP)
+     {
+         /* For domain decomposition we allocate dynamically
+          * in dd_partition_system.
+          */
+         if (DOMAINDECOMP(cr))
+         {
+             bcast_state_setup(cr,state);
+         }
+         else
+         {
+             if (PAR(cr))
+             {
+                 if (!MASTER(cr))
+                 {
+                     snew(state,1);
+                 }
+                 bcast_state(cr,state,TRUE);
+             }
+         }
+ 
+         /* Dihedral Restraints */
+         if (gmx_mtop_ftype_count(mtop,F_DIHRES) > 0)
+         {
+             init_dihres(fplog,mtop,inputrec,fcd);
+         }
+ 
+         /* Initiate forcerecord */
+         fr = mk_forcerec();
+         init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
+                       opt2fn("-table",nfile,fnm),
+                       opt2fn("-tablep",nfile,fnm),
+                       opt2fn("-tableb",nfile,fnm),FALSE,pforce);
+ 
+         /* version for PCA_NOT_READ_NODE (see md.c) */
+         /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
+           "nofile","nofile","nofile",FALSE,pforce);
+           */
+         fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
+ 
+         /* Initialize QM-MM */
+         if(fr->bQMMM)
+         {
+             init_QMMMrec(cr,box,mtop,inputrec,fr);
+         }
+ 
+         /* Initialize the mdatoms structure.
+          * mdatoms is not filled with atom data,
+          * as this can not be done now with domain decomposition.
+          */
+         mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
+ 
+         /* Initialize the virtual site communication */
+         vsite = init_vsite(mtop,cr);
+ 
+         calc_shifts(box,fr->shift_vec);
+ 
+         /* With periodic molecules the charge groups should be whole at start up
+          * and the virtual sites should not be far from their proper positions.
+          */
+         if (!inputrec->bContinuation && MASTER(cr) &&
+             !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
+         {
+             /* Make molecules whole at start of run */
+             if (fr->ePBC != epbcNONE)
+             {
+                 do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
+             }
+             if (vsite)
+             {
+                 /* Correct initial vsite positions are required
+                  * for the initial distribution in the domain decomposition
+                  * and for the initial shell prediction.
+                  */
+                 construct_vsites_mtop(fplog,vsite,mtop,state->x);
+             }
+         }
+ 
+         /* Initiate PPPM if necessary */
+         if (fr->eeltype == eelPPPM)
+         {
+             if (mdatoms->nChargePerturbed)
+             {
+                 gmx_fatal(FARGS,"Free energy with %s is not implemented",
+                           eel_names[fr->eeltype]);
+             }
+             status = gmx_pppm_init(fplog,cr,oenv,FALSE,TRUE,box,
+                                    getenv("GMXGHAT"),inputrec, (Flags & MD_REPRODUCIBLE));
+             if (status != 0)
+             {
+                 gmx_fatal(FARGS,"Error %d initializing PPPM",status);
+             }
+         }
+ 
+         if (EEL_PME(fr->eeltype))
+         {
+             ewaldcoeff = fr->ewaldcoeff;
+             pmedata = &fr->pmedata;
+         }
+         else
+         {
+             pmedata = NULL;
+         }
+     }
+     else
+     {
+         /* This is a PME only node */
+ 
+         /* We don't need the state */
+         done_state(state);
+ 
+         ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
+         snew(pmedata,1);
+     }
+ 
+     /* Initiate PME if necessary,
+      * either on all nodes or on dedicated PME nodes only. */
+     if (EEL_PME(inputrec->coulombtype))
+     {
+         if (mdatoms)
+         {
+             nChargePerturbed = mdatoms->nChargePerturbed;
+         }
+         if (cr->npmenodes > 0)
+         {
+             /* The PME only nodes need to know nChargePerturbed */
+             gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
+         }
+         if (cr->duty & DUTY_PME)
+         {
+             status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
+                                   mtop ? mtop->natoms : 0,nChargePerturbed,
+                                   (Flags & MD_REPRODUCIBLE));
+             if (status != 0)
+             {
+                 gmx_fatal(FARGS,"Error %d initializing PME",status);
+             }
+         }
+     }
+ 
+ 
+ /*    if (integrator[inputrec->eI].func == do_md
+ #ifdef GMX_OPENMM
+         ||
+         integrator[inputrec->eI].func == do_md_openmm
+ #endif
+         )
+     {*/
+         /* Turn on signal handling on all nodes */
+         /*
+          * (A user signal from the PME nodes (if any)
+          * is communicated to the PP nodes.
+          */
+         signal_handler_install();
+ /*    }*/
+ 
+     if (cr->duty & DUTY_PP)
+     {
+         if (inputrec->ePull != epullNO)
+         {
+             /* Initialize pull code */
+             init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv,
+                       EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
+         }
+ 
+         constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
+ 
+         if (DOMAINDECOMP(cr))
+         {
+             dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
+                             Flags & MD_DDBONDCHECK,fr->cginfo_mb);
+ 
+             set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
+ 
+             setup_dd_grid(fplog,cr->dd);
+         }
+ 
+         /* Now do whatever the user wants us to do (how flexible...) */
+         do_md_membed(fplog,cr,nfile,fnm,
+                                       oenv,bVerbose,bCompact,
+                                       nstglobalcomm,
+                                       vsite,constr,
+                                       nstepout,inputrec,mtop,
+                                       fcd,state,
+                                       mdatoms,nrnb,wcycle,ed,fr,
+                                       repl_ex_nst,repl_ex_seed,
+                                       cpt_period,max_hours,
+                                       deviceOptions,
+                                       Flags,
+                                       &runtime,
+                                       fac, r_ins, pos_ins, ins_at,
+                                       xy_step, z_step, it_xy, it_z);
+ 
+         if (inputrec->ePull != epullNO)
+         {
+             finish_pull(fplog,inputrec->pull);
+         }
+     }
+     else
+     {
+         /* do PME only */
+         gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
+     }
+ 
+     if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
+     {
+         /* Some timing stats */
+         if (MASTER(cr))
+         {
+             if (runtime.proc == 0)
+             {
+                 runtime.proc = runtime.real;
+             }
+         }
+         else
+         {
+             runtime.real = 0;
+         }
+     }
+ 
+     wallcycle_stop(wcycle,ewcRUN);
+ 
+     /* Finish up, write some stuff
+      * if rerunMD, don't write last frame again
+      */
+     finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
+                inputrec,nrnb,wcycle,&runtime,
+                EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
+ 
+     /* Does what it says */
+     print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
+ 
+     /* Close logfile already here if we were appending to it */
+     if (MASTER(cr) && (Flags & MD_APPENDFILES))
+     {
+         gmx_log_close(fplog);
+     }
+ 
+     if (pieces>1)
+     {
+       sfree(piecename);
+     }
+ 
+     rc=(int)gmx_get_stop_condition();
+ 
+     return rc;
+ }
+ 
   int gmx_membed(int argc,char *argv[])
   {
         const char *desc[] = {
@@@ -65,45 -3437,67 +3395,69 @@@
                         " - [TT]energygrp_excl  = Protein Protein[tt][BR]",
                         "The output is a structure file containing the protein embedded in the membrane. If a topology",
                         "file is provided, the number of lipid and ",
-                       "solvent molecules will be updated to match the new structure file.\n",
-                       "For a more extensive manual see Wolf et al, J Comp Chem 31 (2010) 2169-2174, Appendix.\n",
-                       "\n",
-                       "SHORT METHOD DESCRIPTION\n",
-                       "------------------------\n",
-                       "1. The protein is resized around its center of mass by a factor -xy in the xy-plane",
-                       "(the membrane plane) and a factor -z in the z-direction (if the size of the",
+                       "solvent molecules will be updated to match the new structure file.[BR]",
+                       "For a more extensive manual see Wolf et al, J Comp Chem 31 (2010) 2169-2174, Appendix.[PAR]",
+                       "SHORT METHOD DESCRIPTION[BR]",
+                       "------------------------[BR]",
+                       "1. The protein is resized around its center of mass by a factor [TT]-xy[tt] in the xy-plane",
+                       "(the membrane plane) and a factor [TT]-z[tt] in the [IT]z[it]-direction (if the size of the",
                         "protein in the z-direction is the same or smaller than the width of the membrane, a",
-                       "-z value larger than 1 can prevent that the protein will be enveloped by the lipids).\n",
+                       "[TT]-z[tt] value larger than 1 can prevent that the protein will be enveloped by the lipids).[BR]",
                         "2. All lipid and solvent molecules overlapping with the resized protein are removed. All",
-                       "intraprotein interactions are turned off to prevent numerical issues for small values of -xy",
-                       " or -z\n",
-                       "3. One md step is performed.\n",
-                       "4. The resize factor (-xy or -z) is incremented by a small amount ((1-xy)/nxy or (1-z)/nz) and the",
+                       "intraprotein interactions are turned off to prevent numerical issues for small values of [TT]-xy[tt]",
+                       " or [TT]-z[tt][BR]",
+                       "3. One md step is performed.[BR]",
+                       "4. The resize factor ([TT]-xy[tt] or [TT]-z[tt]) is incremented by a small amount ((1-xy)/nxy or (1-z)/nz) and the",
                         "protein is resized again around its center of mass. The resize factor for the xy-plane",
-                       "is incremented first. The resize factor for the z-direction is not changed until the -xy factor",
-                       "is 1 (thus after -nxy iteration).\n",
-                       "5. Repeat step 3 and 4 until the protein reaches its original size (-nxy + -nz iterations).\n",
-                       "For a more extensive method descrition see Wolf et al, J Comp Chem, 31 (2010) 2169-2174.\n",
-                       "\n",
-                       "NOTE\n----\n",
-                       " - Protein can be any molecule you want to insert in the membrane.\n",
+                       "is incremented first. The resize factor for the z-direction is not changed until the [TT]-xy[tt] factor",
+                       "is 1 (thus after [TT]-nxy[tt] iterations).[BR]",
+                       "5. Repeat step 3 and 4 until the protein reaches its original size ([TT]-nxy[tt] + [TT]-nz[tt] iterations).[BR]",
+                       "For a more extensive method description see Wolf et al, J Comp Chem, 31 (2010) 2169-2174.[PAR]",
+                       "NOTE[BR]----[BR]",
+                       " - Protein can be any molecule you want to insert in the membrane.[BR]",
                         " - It is recommended to perform a short equilibration run after the embedding",
- -                      "(see Wolf et al, J Comp Chem 31 (2010) 2169-2174), to re-equilibrate the membrane. Clearly",
- -                      "protein equilibration might require longer.[PAR]"
+ +                      "(see Wolf et al, J Comp Chem 31 (2010) 2169-2174, to re-equilibrate the membrane. Clearly",
+ +                      "protein equilibration might require longer.\n",
+ +                      " - It is now also possible to use the g_membed functionality with mdrun. You should than pass",
+ +                      "a data file containing the command line options of g_membed following the -membed option, for",
+ +                      "example mdrun -s into_mem.tpr -membed membed.dat.",
+ +                      "\n"
         };
- -      t_commrec    *cr;
         t_filenm fnm[] = {
                         { efTPX, "-f",      "into_mem", ffREAD },
                         { efNDX, "-n",      "index",    ffOPTRD },
                         { efTOP, "-p",      "topol",    ffOPTRW },
                         { efTRN, "-o",      NULL,       ffWRITE },
                         { efXTC, "-x",      NULL,       ffOPTWR },
- -                      { efCPT, "-cpi",    NULL,       ffOPTRD },
- -                      { efCPT, "-cpo",    NULL,       ffOPTWR },
                         { efSTO, "-c",      "membedded",  ffWRITE },
                         { efEDR, "-e",      "ener",     ffWRITE },
+ +                        { efDAT, "-dat",    "membed",   ffWRITE }
+                       { efLOG, "-g",      "md",       ffWRITE },
+                       { efEDI, "-ei",     "sam",      ffOPTRD },
+                       { efTRX, "-rerun",  "rerun",    ffOPTRD },
+                       { efXVG, "-table",  "table",    ffOPTRD },
+                       { efXVG, "-tablep", "tablep",   ffOPTRD },
+                       { efXVG, "-tableb", "table",    ffOPTRD },
+                       { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
+                       { efXVG, "-field",  "field",    ffOPTWR },
+                       { efXVG, "-table",  "table",    ffOPTRD },
+                       { efXVG, "-tablep", "tablep",   ffOPTRD },
+                       { efXVG, "-tableb", "table",    ffOPTRD },
+                       { efTRX, "-rerun",  "rerun",    ffOPTRD },
+                       { efXVG, "-tpi",    "tpi",      ffOPTWR },
+                       { efXVG, "-tpid",   "tpidist",  ffOPTWR },
+                       { efEDI, "-ei",     "sam",      ffOPTRD },
+                       { efEDO, "-eo",     "sam",      ffOPTWR },
+                       { efGCT, "-j",      "wham",     ffOPTRD },
+                       { efGCT, "-jo",     "bam",      ffOPTWR },
+                       { efXVG, "-ffout",  "gct",      ffOPTWR },
+                       { efXVG, "-devout", "deviatie", ffOPTWR },
+                       { efXVG, "-runav",  "runaver",  ffOPTWR },
+                       { efXVG, "-px",     "pullx",    ffOPTWR },
+                       { efXVG, "-pf",     "pullf",    ffOPTWR },
+                       { efMTX, "-mtx",    "nm",       ffOPTWR },
+                       { efNDX, "-dn",     "dipole",   ffOPTWR },
+                         { efRND, "-multidir",NULL,      ffOPTRDMULT}
         };
   #define NFILE asize(fnm)
   
@@@ -130,87 -3556,260 +3484,181 @@@
   #endif
   
         t_pargs pa[] = {
- -                      { "-xyinit",   FALSE, etREAL,  {&xy_fac},       "Resize factor for the protein in the xy dimension before starting embedding" },
- -                      { "-xyend",   FALSE, etREAL,  {&xy_max},                "Final resize factor in the xy dimension" },
- -                      { "-zinit",    FALSE, etREAL,  {&z_fac},                "Resize factor for the protein in the z dimension before starting embedding" },
- -                      { "-zend",    FALSE, etREAL,  {&z_max},                 "Final resize faction in the z dimension" },
- -                      { "-nxy",     FALSE,  etINT,  {&it_xy},         "Number of iteration for the xy dimension" },
- -                      { "-nz",      FALSE,  etINT,  {&it_z},          "Number of iterations for the z dimension" },
- -                      { "-rad",     FALSE, etREAL,  {&probe_rad},     "Probe radius to check for overlap between the group to embed and the membrane"},
- -                      { "-pieces",  FALSE,  etINT,  {&pieces},        "Perform piecewise resize. Select parts of the group to insert and resize these with respect to their own geometrical center." },
- -            { "-asymmetry",FALSE, etBOOL,{&bALLOW_ASYMMETRY}, "Allow asymmetric insertion, i.e. the number of lipids removed from the upper and lower leaflet will not be checked." },
- -            { "-ndiff" ,  FALSE, etINT, {&low_up_rm},       "Number of lipids that will additionally be removed from the lower (negative number) or upper (positive number) membrane leaflet." },
- -                      { "-maxwarn", FALSE, etINT, {&maxwarn},                 "Maximum number of warning allowed" },
- -  { "-pd",      FALSE, etBOOL,{&bPartDec},
- -    "HIDDENUse particle decompostion" },
- -  { "-dd",      FALSE, etRVEC,{&realddxyz},
- -    "HIDDENDomain decomposition grid, 0 is optimize" },
- -  { "-nt",      FALSE, etINT, {&nthreads},
- -    "HIDDENNumber of threads to start (0 is guess)" },
- -  { "-npme",    FALSE, etINT, {&npme},
- -    "HIDDENNumber of separate nodes to be used for PME, -1 is guess" },
- -  { "-ddorder", FALSE, etENUM, {ddno_opt},
- -    "HIDDENDD node order" },
- -  { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
- -    "HIDDENCheck for all bonded interactions with DD" },
- -  { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
- -    "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
- -  { "-rdd",     FALSE, etREAL, {&rdd},
- -    "HIDDENThe maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
- -  { "-rcon",    FALSE, etREAL, {&rconstr},
- -    "HIDDENMaximum distance for P-LINCS (nm), 0 is estimate" },
- -  { "-dlb",     FALSE, etENUM, {dddlb_opt},
- -    "HIDDENDynamic load balancing (with DD)" },
- -  { "-dds",     FALSE, etREAL, {&dlb_scale},
- -    "HIDDENMinimum allowed dlb scaling of the DD cell size" },
- -  { "-ddcsx",   FALSE, etSTR, {&ddcsx},
- -    "HIDDENThe DD cell sizes in x" },
- -  { "-ddcsy",   FALSE, etSTR, {&ddcsy},
- -    "HIDDENThe DD cell sizes in y" },
- -  { "-ddcsz",   FALSE, etSTR, {&ddcsz},
- -    "HIDDENThe DD cell sizes in z" },
- -  { "-gcom",    FALSE, etINT,{&nstglobalcomm},
- -    "HIDDENGlobal communication frequency" },
- -  { "-compact", FALSE, etBOOL,{&bCompact},
- -    "Write a compact log file" },
- -  { "-seppot",  FALSE, etBOOL, {&bSepPot},
- -    "HIDDENWrite separate V and dVdl terms for each interaction type and node to the log file(s)" },
- -  { "-pforce",  FALSE, etREAL, {&pforce},
- -    "HIDDENPrint all forces larger than this (kJ/mol nm)" },
- -  { "-reprod",  FALSE, etBOOL,{&bReproducible},
- -    "HIDDENTry to avoid optimizations that affect binary reproducibility" },
- -  { "-multi",   FALSE, etINT,{&nmultisim},
- -    "HIDDENDo multiple simulations in parallel" },
- -  { "-replex",  FALSE, etINT, {&repl_ex_nst},
- -    "HIDDENAttempt replica exchange every # steps" },
- -  { "-reseed",  FALSE, etINT, {&repl_ex_seed},
- -    "HIDDENSeed for replica exchange, -1 is generate a seed" },
- -  { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
- -    "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
- -  { "-ionize",  FALSE, etBOOL,{&bIonize},
- -    "HIDDENDo a simulation including the effect of an X-Ray bombardment on your system" },
- -  { "-confout", TRUE, etBOOL, {&bConfout},
- -    "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
- -  { "-stepout", FALSE, etINT, {&nstepout},
- -    "HIDDENFrequency of writing the remaining runtime" },
- -  { "-resetstep", FALSE, etINT, {&resetstep},
- -    "HIDDENReset cycle counters after these many time steps" },
- -  { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
- -    "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" },
- -  { "-v",       FALSE, etBOOL,{&bVerbose},
- -    "Be loud and noisy" },
- -  { "-maxh",   FALSE, etREAL, {&max_hours},
- -    "HIDDENTerminate after 0.99 times this time (hours)" },
- -  { "-cpt",     FALSE, etREAL, {&cpt_period},
- -    "HIDDENCheckpoint interval (minutes)" },
- -  { "-append",  FALSE, etBOOL, {&bAppendFiles},
- -    "HIDDENAppend to previous output files when continuing from checkpoint" },
- -  { "-addpart",  FALSE, etBOOL, {&bAddPart},
- -    "HIDDENAdd the simulation part number to all output files when continuing from checkpoint" },
+ +                      { "-xyinit",   FALSE, etREAL,  {&xy_fac},       
+ +                              "Resize factor for the protein in the xy dimension before starting embedding" },
+ +                      { "-xyend",   FALSE, etREAL,  {&xy_max},
+ +                              "Final resize factor in the xy dimension" },
+ +                      { "-zinit",    FALSE, etREAL,  {&z_fac},
+ +                              "Resize factor for the protein in the z dimension before starting embedding" },
+ +                      { "-zend",    FALSE, etREAL,  {&z_max},
+ +                              "Final resize faction in the z dimension" },
+ +                      { "-nxy",     FALSE,  etINT,  {&it_xy},
+ +                              "Number of iteration for the xy dimension" },
+ +                      { "-nz",      FALSE,  etINT,  {&it_z},
+ +                              "Number of iterations for the z dimension" },
+ +                      { "-rad",     FALSE, etREAL,  {&probe_rad},
+ +                              "Probe radius to check for overlap between the group to embed and the membrane"},
+ +                      { "-pieces",  FALSE,  etINT,  {&pieces},
+ +                              "Perform piecewise resize. Select parts of the group to insert and resize these with respect to their own geometrical center." },
+ +                      { "-asymmetry",FALSE, etBOOL,{&bALLOW_ASYMMETRY}, 
+ +                              "Allow asymmetric insertion, i.e. the number of lipids removed from the upper and lower leaflet will not be checked." },
+ +                      { "-ndiff" ,  FALSE, etINT, {&low_up_rm},
+ +                              "Number of lipids that will additionally be removed from the lower (negative number) or upper (positive number) membrane leaflet." },
+ +                      { "-maxwarn", FALSE, etINT, {&maxwarn},         
+ +                              "Maximum number of warning allowed" },
+ +                        { "-start",   FALSE, etBOOL, {&bStart},
+ +                                "Call mdrun with membed options" },
+ +                      { "-stepout", FALSE, etINT, {&nstepout},
+ +                              "HIDDENFrequency of writing the remaining runtime" },
+ +                      { "-v",       FALSE, etBOOL,{&bVerbose},
+ +                              "Be loud and noisy" },
+ +                      { "-mdrun_path", FALSE, etSTR, {&mdrun_path},
+ +                              "Path to the mdrun executable compiled with this g_membed version" }
         };
+ +
+ +        FILE *data_out;
+ +        output_env_t oenv;
+ +        char buf[256],buf2[64];
+       gmx_edsam_t  ed;
+       unsigned long Flags, PCA_Flags;
+       ivec     ddxyz;
+       int      dd_node_order;
+       gmx_bool     HaveCheckpoint;
+       FILE     *fplog,*fptest;
+       int      sim_part,sim_part_fn;
+       const char *part_suffix=".part";
+       char     suffix[STRLEN];
+       int      rc;
+         char **multidir=NULL;
+ 
+       cr = init_par(&argc,&argv);
+ 
+       PCA_Flags = (PCA_KEEP_ARGS | PCA_NOEXIT_ON_ARGS | PCA_CAN_SET_DEFFNM
+                       | (MASTER(cr) ? 0 : PCA_QUIET));
+ 
+ 
+       /* Comment this in to do fexist calls only on master
+        * works not with rerun or tables at the moment
+        * also comment out the version of init_forcerec in md.c
+        * with NULL instead of opt2fn
+        */
+       /*
+    if (!MASTER(cr))
+    {
+    PCA_Flags |= PCA_NOT_READ_NODE;
+    }
+        */
+ 
+       parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
+                       asize(desc),desc,0,NULL, &oenv);
+ 
+       /* we set these early because they might be used in init_multisystem()
+    Note that there is the potential for npme>nnodes until the number of
+    threads is set later on, if there's thread parallelization. That shouldn't
+    lead to problems. */
+       dd_node_order = nenum(ddno_opt);
+       cr->npmenodes = npme;
+ 
+ #ifdef GMX_THREADS
+       /* now determine the number of threads automatically. The threads are
+    only started at mdrunner_threads, though. */
+       if (nthreads<1)
+       {
+               nthreads=tMPI_Thread_get_hw_number();
+       }
+ #else
+       nthreads=1;
+ #endif
+ 
+         /* now check the -multi and -multidir option */
+         if (opt2bSet("-multidir", NFILE, fnm))
+         {
+             int i;
+             if (nmultisim > 0)
+             {
+                 gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually     exclusive.");
+             }
+             nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
+         }
+ 
   
- -      if (repl_ex_nst != 0 && nmultisim < 2)
- -              gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
   
+ +        parse_common_args(&argc,argv,0, NFILE,fnm,asize(pa),pa,
+ +                    asize(desc),desc,0,NULL, &oenv);
+       if (nmultisim > 1) {
+ #ifndef GMX_THREADS
+                 gmx_bool bParFn = (multidir == NULL);
+               init_multisystem(cr,nmultisim,multidir,NFILE,fnm,TRUE);
+ #else
+               gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
+ #endif
+       }
+ 
+       /* Check if there is ANY checkpoint file available */
+       sim_part    = 1;
+       sim_part_fn = sim_part;
+       if (opt2bSet("-cpi",NFILE,fnm))
+       {
+               bAppendFiles =
+                       read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,fnm,cr),
+                                                       &sim_part_fn,NULL,cr,
+                                                       bAppendFiles,NFILE,fnm,
+                                                       part_suffix,&bAddPart);
+               if (sim_part_fn==0 && MASTER(cr))
+               {
+                       fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
+               }
+               else
+               {
+                       sim_part = sim_part_fn + 1;
+               }
+       }
+       else
+       {
+               bAppendFiles = FALSE;
+       }
   
- -      if (!bAppendFiles)
- -      {
- -              sim_part_fn = sim_part;
- -      }
- -
- -      if (bAddPart && sim_part_fn > 1)
+ +        data_out = ffopen(opt2fn("-dat",NFILE,fnm),"w");
+ +        fprintf(data_out,"nxy = %d\nnz = %d\nxyinit = %f\nxyend = %f\nzinit = %f\nzend = %f\n"
+ +                      "rad = %f\npieces = %d\nasymmetry = %s\nndiff = %d\nmaxwarn = %d\n",
+ +                      it_xy,it_z,xy_fac,xy_max,z_fac,z_max,probe_rad,pieces,
+ +                      bALLOW_ASYMMETRY ? "yes" : "no",low_up_rm,maxwarn);
+ +        fclose(data_out);
+ +
+ +        sprintf(buf,"%s -s %s -membed %s -o %s -c %s -e %s -nt 1 -cpt -1",
+ +                  (mdrun_path==NULL) ? "mdrun" : mdrun_path,
+ +                  opt2fn("-f",NFILE,fnm),opt2fn("-dat",NFILE,fnm),opt2fn("-o",NFILE,fnm),
+ +                  opt2fn("-c",NFILE,fnm),opt2fn("-e",NFILE,fnm));
+ +        if (opt2bSet("-n",NFILE,fnm))
         {
- -              /* This is a continuation run, rename trajectory output files
- -       (except checkpoint files) */
- -              /* create new part name first (zero-filled) */
- -              sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
- -
- -              add_suffix_to_output_names(fnm,NFILE,suffix);
- -              fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
- -      }
- -
- -      Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
- -      Flags = Flags | (bSepPot       ? MD_SEPPOT       : 0);
- -      Flags = Flags | (bIonize       ? MD_IONIZE       : 0);
- -      Flags = Flags | (bPartDec      ? MD_PARTDEC      : 0);
- -      Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
- -      Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
- -      Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
- -      Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
- -      Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
- -      Flags = Flags | (bAppendFiles  ? MD_APPENDFILES  : 0);
- -      Flags = Flags | (sim_part>1    ? MD_STARTFROMCPT : 0);
- -      Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
- -
- -
- -      /* We postpone opening the log file if we are appending, so we can
- -   first truncate the old log file and append to the correct position
- -   there instead.  */
- -      if ((MASTER(cr) || bSepPot) && !bAppendFiles)
+ +              sprintf(buf2," -mn %s",opt2fn("-n",NFILE,fnm));
+ +              strcat(buf,buf2);
+ +        }
+ +      if (opt2bSet("-x",NFILE,fnm))
         {
- -              gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
- -              CopyRight(fplog,argv[0]);
- -              please_cite(fplog,"Hess2008b");
- -              please_cite(fplog,"Spoel2005a");
- -              please_cite(fplog,"Lindahl2001a");
- -              please_cite(fplog,"Berendsen95a");
+ +              sprintf(buf2," -x %s",opt2fn("-x",NFILE,fnm));
+ +                strcat(buf,buf2);
         }
- -      else
+ +        if (opt2bSet("-p",NFILE,fnm))
+ +        {
+ +                sprintf(buf2," -mp %s",opt2fn("-p",NFILE,fnm));
+ +                strcat(buf,buf2);
+ +        }
+ +      if (bVerbose)
         {
- -              fplog = NULL;
- -      }
- -
- -      ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
- -      ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
- -      ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
- -
- -      /* even if nthreads = 1, we still call this one */
- -
- -      rc = mdrunner_membed(fplog, cr, NFILE, fnm, oenv, bVerbose, bCompact,
- -                      nstglobalcomm,
- -                      ddxyz, dd_node_order, rdd, rconstr, dddlb_opt[0], dlb_scale,
- -                      ddcsx, ddcsy, ddcsz, nstepout, resetstep, nmultisim, repl_ex_nst,
- -                      repl_ex_seed, pforce, cpt_period, max_hours, deviceOptions, Flags,
- -                      xy_fac,xy_max,z_fac,z_max,
- -                      it_xy,it_z,probe_rad,low_up_rm,
- -                      pieces,bALLOW_ASYMMETRY,maxwarn);
- -
- -      if (gmx_parallel_env_initialized())
- -              gmx_finalize();
- -
- -      if (MULTIMASTER(cr)) {
- -              thanx(stderr);
+ +              sprintf(buf2," -v -stepout %d",nstepout);
+ +              strcat(buf,buf2);
         }
   
- -      /* Log file has to be closed in mdrunner if we are appending to it
- -   (fplog not set here) */
- -      fprintf(stderr,"Please cite:\nWolf et al, J Comp Chem 31 (2010) 2169-2174.\n");
+ +        printf("%s\n",buf);
+ +        if (bStart)
+ +        {
+ +                system(buf);
+ +        } else {
+ +                printf("You can membed your protein now by:\n%s\n",buf);
+ +        }
   
- -      if (MASTER(cr) && !bAppendFiles)
- -      {
- -              gmx_log_close(fplog);
- -      }
+ +        fprintf(stderr,"Please cite:\nWolf et al, J Comp Chem 31 (2010) 2169-2174.\n");
   
- -      return rc;
+ +      return 0;
   }
diff --cc src/tools/gmx_tune_pme.c
Simple merge
author	Christoph Junghans <junghans@mpip-mainz.mpg.de>
	Wed, 15 Jun 2011 15:41:51 +0000 (17:41 +0200)
committer	Christoph Junghans <junghans@mpip-mainz.mpg.de>
	Wed, 15 Jun 2011 15:41:51 +0000 (17:41 +0200)
		1	2
CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
cmake/ThreadMPI.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
share/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/copyrite.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/enxio.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/filenm.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/futil.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/gmx_system_xdr.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/index.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/main.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/matio.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/network.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/pdbio.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/rmpbc.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/strdb.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/string2.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/thread_mpi/CMakeLists.txt	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/thread_mpi/impl.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/thread_mpi/numa_malloc.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/gmxlib/thread_mpi/p2p_buffer.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/thread_mpi/pthreads.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/thread_mpi/settings.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/thread_mpi/tmpi_init.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/thread_mpi/winthreads.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/typedefs.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/wman.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/gen_vsite.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/genhydro.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/genhydro.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxpreprocess/pdb2top.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/futil.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_blas.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_cyclecounter.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmx_lapack.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gmxcomplex.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/gstat.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/main.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/mdrun.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/network.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/pbc.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/string2.h	patch \|	diff1 \|	\|	blob \| history
src/gromacs/legacyheaders/thread_mpi.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/thread_mpi/atomic.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/thread_mpi/atomic/suncc-sparc.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/thread_mpi/numa_malloc.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/thread_mpi/threads.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/tmpi.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/typedefs.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/atoms.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/filenm.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/globsig.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/iteratedconstraints.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/nlistheuristics.h	patch \|	\|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/pbc.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/simple.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/legacyheaders/types/topology.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/constr.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/domdec.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/domdec_top.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/edsam.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdlib/iteratedconstraints.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/md_support.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/mdebin.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/mdlib/nlistheuristics.c	patch \|	\|	diff2 \|	blob \| history
src/gromacs/mdlib/update.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/g_protonate/g_protonate.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/g_x2top/g_x2top.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/gmxdump/gmxdump.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/grompp/grompp.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/md.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/md_openmm.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/mdrun.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/repl_ex.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/mdrun/runner.c	patch \|	diff1 \|	\|	blob \| history
src/programs/pdb2gmx/pdb2gmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/pdb2gmx/xlate.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/programs/tpbconv/tpbconv.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/tools/gmx_membed.c	patch \|	diff1 \|	diff2 \|	blob \| history
src/tools/gmx_tune_pme.c	patch \|	diff1 \|	diff2 \|	blob \| history