Merge release-4-6 into master

author Roland Schulz <roland@utk.edu>

Mon, 21 Jan 2013 01:12:10 +0000 (20:12 -0500)

committer Roland Schulz <roland@utk.edu>

Mon, 21 Jan 2013 01:30:53 +0000 (20:30 -0500)
author Roland Schulz <roland@utk.edu>
Mon, 21 Jan 2013 01:12:10 +0000 (20:12 -0500)
committer Roland Schulz <roland@utk.edu>
Mon, 21 Jan 2013 01:30:53 +0000 (20:30 -0500)
diff --cc cmake/gmxGetCompilerInfo.cmake
Simple merge
diff --cc src/gromacs/gmxlib/confio.c

index 0af17e2bb4d6919f3b5ef0d9e7e9355372f83b46,0000000000000000000000000000000000000000..4167a51b506a428a6ab6a4f457d056a9a3707033

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/confio.c
--- /dev/null
+++ b/src/gromacs/gmxlib/confio.c
@@@ -1,1449 -1,0 +1,1450 @@@
-     fr.title = title;
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * GROningen Mixture of Alchemy and Childrens' Stories
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include <math.h>
+ +#include "sysstuff.h"
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include <errno.h>
+ +#include "macros.h"
+ +#include "string2.h"
+ +#include "confio.h"
+ +#include "vec.h"
+ +#include "symtab.h"
+ +#include "futil.h"
+ +#include "xdrf.h"
+ +#include "filenm.h"
+ +#include "pdbio.h"
+ +#include "tpxio.h"
+ +#include "gmx_fatal.h"
+ +#include "copyrite.h"
+ +#include "filenm.h"
+ +#include "statutil.h"
+ +#include "pbc.h"
+ +#include "mtop_util.h"
+ +#include "gmxfio.h"
+ +
+ +#define CHAR_SHIFT 24
+ +
+ +static int read_g96_pos(char line[],t_symtab *symtab,
+ +                      FILE *fp,const char *infile,
+ +                      t_trxframe *fr)
+ +{
+ +  t_atoms *atoms;
+ +  gmx_bool   bEnd;
+ +  int    nwanted,natoms,atnr,resnr=0,oldres,newres,shift;
+ +  char   anm[STRLEN],resnm[STRLEN];
+ +  char   c1,c2;
+ +  double db1,db2,db3;
+ +  
+ +  nwanted = fr->natoms;
+ +
+ +  atoms = fr->atoms;
+ +
+ +  natoms = 0;
+ +
+ +  if (fr->bX) {
+ +    if (fr->bAtoms)
+ +      shift = CHAR_SHIFT;
+ +    else
+ +      shift = 0;
+ +    newres  = -1;
+ +    oldres  = -666; /* Unlikely number for the first residue! */
+ +    bEnd    = FALSE;
+ +    while (!bEnd && fgets2(line,STRLEN,fp)) {
+ +      bEnd = (strncmp(line,"END",3) == 0);
+ +      if (!bEnd  && (line[0] != '#')) {
+ +      if (sscanf(line+shift,"%15lf%15lf%15lf",&db1,&db2,&db3) != 3)
+ +        gmx_fatal(FARGS,"Did not find 3 coordinates for atom %d in %s\n",
+ +                    natoms+1,infile);
+ +      if ((nwanted != -1) && (natoms >= nwanted))
+ +        gmx_fatal(FARGS,
+ +                    "Found more coordinates (%d) in %s than expected %d\n",
+ +                    natoms,infile,nwanted);
+ +      if (atoms) {
+ +        if (fr->bAtoms &&
+ +            (sscanf(line,"%5d%c%5s%c%5s%7d",&resnr,&c1,resnm,&c2,anm,&atnr) 
+ +             != 6)) {
+ +          if (oldres>=0)
+ +            resnr = oldres;
+ +          else {
+ +            resnr    = 1;
+ +            strncpy(resnm,"???",sizeof(resnm)-1); 
+ +          }
+ +          strncpy(anm,"???",sizeof(anm)-1); 
+ +        }
+ +        atoms->atomname[natoms]=put_symtab(symtab,anm);
+ +        if (resnr != oldres) {
+ +          oldres = resnr;
+ +          newres++;
+ +          if (newres >= atoms->nr)
+ +            gmx_fatal(FARGS,"More residues than atoms in %s (natoms = %d)",
+ +                        infile,atoms->nr);
+ +          atoms->atom[natoms].resind = newres;
+ +          if (newres+1 > atoms->nres) {
+ +            atoms->nres = newres+1;
+ +          }
+ +          t_atoms_set_resinfo(atoms,natoms,symtab,resnm,resnr,' ',0,' ');
+ +        } else {
+ +          atoms->atom[natoms].resind = newres;
+ +        }
+ +      }
+ +      if (fr->x) {
+ +        fr->x[natoms][0] = db1;
+ +        fr->x[natoms][1] = db2;
+ +        fr->x[natoms][2] = db3;
+ +      }
+ +      natoms++;
+ +      }
+ +    }
+ +    if ((nwanted != -1) && natoms != nwanted)
+ +      fprintf(stderr,
+ +            "Warning: found less coordinates (%d) in %s than expected %d\n",
+ +            natoms,infile,nwanted);
+ +  }
+ +
+ +  fr->natoms = natoms;
+ +
+ +  return natoms;
+ +}
+ +
+ +static int read_g96_vel(char line[],FILE *fp,const char *infile,
+ +                      t_trxframe *fr)
+ +{
+ +  gmx_bool   bEnd;
+ +  int    nwanted,natoms=-1,shift;
+ +  double db1,db2,db3;
+ +
+ +  nwanted = fr->natoms;
+ +
+ +  if (fr->v && fr->bV) {
+ +    if (strcmp(line,"VELOCITYRED") == 0)
+ +      shift = 0;
+ +    else
+ +      shift = CHAR_SHIFT;
+ +    natoms = 0;
+ +    bEnd = FALSE;
+ +    while (!bEnd && fgets2(line,STRLEN,fp)) {
+ +      bEnd = (strncmp(line,"END",3) == 0);
+ +      if (!bEnd && (line[0] != '#')) {
+ +      if (sscanf(line+shift,"%15lf%15lf%15lf",&db1,&db2,&db3) != 3)
+ +        gmx_fatal(FARGS,"Did not find 3 velocities for atom %d in %s",
+ +                    natoms+1,infile);
+ +      if ((nwanted != -1) && (natoms >= nwanted))
+ +        gmx_fatal(FARGS,"Found more velocities (%d) in %s than expected %d\n",
+ +                    natoms,infile,nwanted);
+ +      if (fr->v) {
+ +        fr->v[natoms][0] = db1;
+ +        fr->v[natoms][1] = db2;
+ +        fr->v[natoms][2] = db3;
+ +      }
+ +      natoms++;
+ +      }
+ +    }
+ +    if ((nwanted != -1) && (natoms != nwanted))
+ +      fprintf(stderr,
+ +            "Warning: found less velocities (%d) in %s than expected %d\n",
+ +            natoms,infile,nwanted);
+ +  }
+ +  
+ +  return natoms;
+ +}
+ +
+ +int read_g96_conf(FILE *fp,const char *infile,t_trxframe *fr, char *line)
+ +{
+ +  t_symtab *symtab=NULL;
+ +  gmx_bool   bAtStart,bTime,bAtoms,bPos,bVel,bBox,bEnd,bFinished;
+ +  int    natoms,nbp;
+ +  double db1,db2,db3,db4,db5,db6,db7,db8,db9;
+ +
+ +  bAtStart = (ftell(fp) == 0);
+ +
+ +  clear_trxframe(fr,FALSE);
+ +  
+ +  if (!symtab) {
+ +    snew(symtab,1);
+ +    open_symtab(symtab);
+ +  }
+ +  
+ +  natoms=0;
+ +
+ +  if (bAtStart) {
+ +    while ( !fr->bTitle && fgets2(line,STRLEN,fp))
+ +      fr->bTitle = (strcmp(line,"TITLE") == 0);
+ +    if (fr->title == NULL) {
+ +      fgets2(line,STRLEN,fp);
+ +      fr->title = strdup(line);
+ +    }
+ +    bEnd = FALSE;
+ +    while (!bEnd && fgets2(line,STRLEN,fp))
+ +      bEnd = (strcmp(line,"END") == 0);
+ +    fgets2(line,STRLEN,fp);
+ +  }
+ +  
+ +  /* Do not get a line if we are not at the start of the file, *
+ +   * because without a parameter file we don't know what is in *
+ +   * the trajectory and we have already read the line in the   *
+ +   * previous call (VERY DIRTY).                               */
+ +  bFinished = FALSE;
+ +  do {
+ +    bTime  = (strcmp(line,"TIMESTEP") == 0);
+ +    bAtoms = (strcmp(line,"POSITION") == 0);
+ +    bPos   = (bAtoms || (strcmp(line,"POSITIONRED") == 0));
+ +    bVel   = (strncmp(line,"VELOCITY",8) == 0);
+ +    bBox   = (strcmp(line,"BOX") == 0);
+ +    if (bTime) {
+ +      if (!fr->bTime && !fr->bX) {
+ +      fr->bStep = bTime;
+ +      fr->bTime = bTime;
+ +      do 
+ +        bFinished = (fgets2(line,STRLEN,fp) == NULL);
+ +      while (!bFinished && (line[0] == '#'));
+ +      sscanf(line,"%15d%15lf",&(fr->step),&db1);
+ +      fr->time = db1;
+ +      } else
+ +      bFinished = TRUE;
+ +    }
+ +    if (bPos) {
+ +      if (!fr->bX) {
+ +      fr->bAtoms = bAtoms;
+ +      fr->bX     = bPos;
+ +      natoms = read_g96_pos(line,symtab,fp,infile,fr);
+ +      } else
+ +      bFinished = TRUE;
+ +    }
+ +    if (fr->v && bVel) {
+ +      fr->bV = bVel;
+ +      natoms = read_g96_vel(line,fp,infile,fr);
+ +    }
+ +    if (bBox) {
+ +      fr->bBox = bBox;
+ +      clear_mat(fr->box);
+ +      bEnd = FALSE;
+ +      while (!bEnd && fgets2(line,STRLEN,fp)) {
+ +      bEnd = (strncmp(line,"END",3) == 0);
+ +      if (!bEnd && (line[0] != '#')) {
+ +        nbp = sscanf(line,"%15lf%15lf%15lf%15lf%15lf%15lf%15lf%15lf%15lf",
+ +                     &db1,&db2,&db3,&db4,&db5,&db6,&db7,&db8,&db9);
+ +        if (nbp < 3)
+ +          gmx_fatal(FARGS,"Found a BOX line, but no box in %s",infile);
+ +        fr->box[XX][XX] = db1;
+ +        fr->box[YY][YY] = db2;
+ +        fr->box[ZZ][ZZ] = db3;
+ +        if (nbp == 9) {
+ +          fr->box[XX][YY] = db4;
+ +          fr->box[XX][ZZ] = db5;
+ +          fr->box[YY][XX] = db6;
+ +          fr->box[YY][ZZ] = db7;
+ +          fr->box[ZZ][XX] = db8;
+ +          fr->box[ZZ][YY] = db9; 
+ +        }
+ +      }
+ +      }
+ +      bFinished = TRUE;
+ +    }
+ +  } while (!bFinished && fgets2(line,STRLEN,fp));
+ +  
+ +  free_symtab(symtab);
+ +
+ +  fr->natoms = natoms;
+ +  
+ +  return natoms;
+ +}
+ +
+ +void write_g96_conf(FILE *out,t_trxframe *fr,
+ +                    int nindex,const atom_id *index)
+ +{
+ +  t_atoms *atoms;
+ +  int nout,i,a;
+ +  
+ +  atoms = fr->atoms;
+ +
+ +  if (index)
+ +    nout = nindex;
+ +  else
+ +    nout = fr->natoms; 
+ +
+ +  if (fr->bTitle)
+ +    fprintf(out,"TITLE\n%s\nEND\n",fr->title);
+ +  if (fr->bStep || fr->bTime)
+ +    /* Officially the time format is %15.9, which is not enough for 10 ns */
+ +    fprintf(out,"TIMESTEP\n%15d%15.6f\nEND\n",fr->step,fr->time);
+ +  if (fr->bX) {
+ +    if (fr->bAtoms) {
+ +      fprintf(out,"POSITION\n");
+ +      for(i=0; i<nout; i++) {
+ +      if (index) a = index[i]; else a = i;
+ +      fprintf(out,"%5d %-5s %-5s%7d%15.9f%15.9f%15.9f\n",
+ +              (atoms->resinfo[atoms->atom[a].resind].nr) % 100000,
+ +              *atoms->resinfo[atoms->atom[a].resind].name,
+ +              *atoms->atomname[a],(i+1) % 10000000,
+ +              fr->x[a][XX],fr->x[a][YY],fr->x[a][ZZ]);
+ +      }
+ +    } else {
+ +      fprintf(out,"POSITIONRED\n");
+ +      for(i=0; i<nout; i++) {
+ +      if (index) a = index[i]; else a = i;
+ +      fprintf(out,"%15.9f%15.9f%15.9f\n",
+ +              fr->x[a][XX],fr->x[a][YY],fr->x[a][ZZ]);
+ +      }
+ +    }
+ +    fprintf(out,"END\n");
+ +  }
+ +  if (fr->bV) {
+ +    if (fr->bAtoms) {
+ +      fprintf(out,"VELOCITY\n");
+ +      for(i=0; i<nout; i++) {
+ +      if (index) a = index[i]; else a = i;
+ +      fprintf(out,"%5d %-5s %-5s%7d%15.9f%15.9f%15.9f\n",
+ +              (atoms->resinfo[atoms->atom[a].resind].nr) % 100000,
+ +              *atoms->resinfo[atoms->atom[a].resind].name,
+ +              *atoms->atomname[a],(i+1) % 10000000,
+ +              fr->v[a][XX],fr->v[a][YY],fr->v[a][ZZ]);
+ +      }
+ +    } else {
+ +      fprintf(out,"VELOCITYRED\n");
+ +      for(i=0; i<nout; i++) {
+ +      if (index) a = index[i]; else a = i;
+ +      fprintf(out,"%15.9f%15.9f%15.9f\n",
+ +              fr->v[a][XX],fr->v[a][YY],fr->v[a][ZZ]);
+ +      }
+ +    }
+ +    fprintf(out,"END\n");
+ +  }
+ +  if (fr->bBox) {
+ +    fprintf(out,"BOX\n");
+ +    fprintf(out,"%15.9f%15.9f%15.9f",
+ +          fr->box[XX][XX],fr->box[YY][YY],fr->box[ZZ][ZZ]);
+ +    if (fr->box[XX][YY] || fr->box[XX][ZZ] || fr->box[YY][XX] ||
+ +      fr->box[YY][ZZ] || fr->box[ZZ][XX] ||fr->box[ZZ][YY])
+ +      fprintf(out,"%15.9f%15.9f%15.9f%15.9f%15.9f%15.9f",
+ +            fr->box[XX][YY],fr->box[XX][ZZ],fr->box[YY][XX],
+ +            fr->box[YY][ZZ],fr->box[ZZ][XX],fr->box[ZZ][YY]);
+ +    fprintf(out,"\n");
+ +    fprintf(out,"END\n");
+ +  }
+ +}
+ +
+ +static int get_espresso_word(FILE *fp,char word[])
+ +{
+ +  int  ret,nc,i;
+ +  
+ +  ret = 0;
+ +  nc = 0;
+ +  
+ +  do {
+ +    i = fgetc(fp);
+ +    if (i != EOF) {
+ +      if (i == ' ' || i == '\n' || i == '\t') {
+ +      if (nc > 0)
+ +        ret = 1;
+ +      } else if (i == '{') {
+ +      if (nc == 0)
+ +        word[nc++] = '{';
+ +      ret = 2;
+ +      } else if (i == '}') {
+ +      if (nc == 0)
+ +        word[nc++] = '}';
+ +      ret = 3;
+ +      } else {
+ +      word[nc++] = (char)i;
+ +      }
+ +    }
+ +  } while (i != EOF && ret == 0);
+ +  
+ +  word[nc] = '\0';
+ +
+ +  /*  printf("'%s'\n",word); */
+ +  
+ +  return ret;
+ +}
+ +
+ +static int check_open_parenthesis(FILE *fp,int r,
+ +                                const char *infile,const char *keyword)
+ +{
+ +  int level_inc;
+ +  char word[STRLEN];
+ +
+ +  level_inc = 0;
+ +  if (r == 2) {
+ +    level_inc++;
+ +  } else {
+ +    r = get_espresso_word(fp,word);
+ +    if (r == 2)
+ +      level_inc++;
+ +    else
+ +      gmx_fatal(FARGS,"Expected '{' after '%s' in file '%s'",
+ +              keyword,infile);
+ +  }
+ +
+ +  return level_inc;
+ +}
+ +
+ +static int check_close_parenthesis(FILE *fp,int r,
+ +                                const char *infile,const char *keyword)
+ +{
+ +  int level_inc;
+ +  char word[STRLEN];
+ +  
+ +  level_inc = 0;
+ +  if (r == 3) {
+ +    level_inc--;
+ +  } else {
+ +    r = get_espresso_word(fp,word);
+ +    if (r == 3)
+ +      level_inc--;
+ +    else
+ +      gmx_fatal(FARGS,"Expected '}' after section '%s' in file '%s'",
+ +              keyword,infile);
+ +  }
+ +
+ +  return level_inc;
+ +}
+ +
+ +enum { espID, espPOS, espTYPE, espQ, espV, espF, espMOLECULE, espNR };
+ +const char *esp_prop[espNR] = { "id", "pos", "type", "q", "v", "f",
+ +                              "molecule" };
+ +
+ +static void read_espresso_conf(const char *infile,
+ +                             t_atoms *atoms,rvec x[],rvec *v,matrix box)
+ +{
+ +  t_symtab *symtab=NULL;
+ +  FILE *fp;
+ +  char word[STRLEN],buf[STRLEN];
+ +  int  natoms,level,npar,r,nprop,p,i,m,molnr;
+ +  int  prop[32];
+ +  double d;
+ +  gmx_bool bFoundParticles,bFoundProp,bFoundVariable,bMol;
+ +
+ +  if (!symtab) {
+ +    snew(symtab,1);
+ +    open_symtab(symtab);
+ +  }
+ +
+ +  clear_mat(box);
+ +  
+ +  fp = gmx_fio_fopen(infile,"r");
+ +  
+ +  bFoundParticles = FALSE;
+ +  bFoundVariable = FALSE;
+ +  bMol = FALSE;
+ +  level = 0;
+ +  while ((r=get_espresso_word(fp,word))) {
+ +    if (level==1 && strcmp(word,"particles")==0 && !bFoundParticles) {
+ +      bFoundParticles = TRUE;
+ +      level += check_open_parenthesis(fp,r,infile,"particles");
+ +      nprop = 0;
+ +      while (level == 2 && (r=get_espresso_word(fp,word))) {
+ +      bFoundProp = FALSE;
+ +      for(p=0; p<espNR; p++) {
+ +        if (strcmp(word,esp_prop[p]) == 0) {
+ +          bFoundProp = TRUE;
+ +          prop[nprop++] = p;
+ +          /* printf("  prop[%d] = %s\n",nprop-1,esp_prop[prop[nprop-1]]); */
+ +        }
+ +      }
+ +      if (!bFoundProp && word[0] != '}') {
+ +        gmx_fatal(FARGS,"Can not read Espresso files with particle property '%s'",word);
+ +      }
+ +      if (bFoundProp && p == espMOLECULE)
+ +        bMol = TRUE;
+ +      if (r == 3)
+ +        level--;
+ +      }
+ +      
+ +      i = 0;
+ +      while (level > 0 && (r=get_espresso_word(fp,word))) {
+ +      if (r == 2) {
+ +        level++;
+ +      } else if (r == 3) {
+ +        level--;
+ +      }
+ +      if (level == 2) {
+ +        for(p=0; p<nprop; p++) {
+ +          switch (prop[p]) {
+ +          case espID:
+ +            r = get_espresso_word(fp,word);
+ +            /* Not used */
+ +            break;
+ +          case espPOS:
+ +            for(m=0; m<3; m++) {
+ +              r = get_espresso_word(fp,word);
+ +              sscanf(word,"%lf",&d);
+ +              x[i][m] = d;
+ +            }
+ +            break;
+ +          case espTYPE:
+ +            r = get_espresso_word(fp,word);
+ +            atoms->atom[i].type = strtol(word, NULL, 10);  
+ +            break;
+ +          case espQ:
+ +            r = get_espresso_word(fp,word);
+ +            sscanf(word,"%lf",&d);
+ +            atoms->atom[i].q = d;
+ +            break;
+ +          case espV:
+ +            for(m=0; m<3; m++) {
+ +              r = get_espresso_word(fp,word);
+ +              sscanf(word,"%lf",&d);
+ +              v[i][m] = d;
+ +            }
+ +            break;
+ +          case espF:
+ +            for(m=0; m<3; m++) {
+ +              r = get_espresso_word(fp,word);
+ +              /* not used */
+ +            }
+ +            break;
+ +          case espMOLECULE:
+ +            r = get_espresso_word(fp,word);
+ +            molnr = strtol(word, NULL, 10);
+ +            if (i == 0 ||
+ +                atoms->resinfo[atoms->atom[i-1].resind].nr != molnr) {
+ +              atoms->atom[i].resind =
+ +                (i == 0 ? 0 : atoms->atom[i-1].resind+1); 
+ +              atoms->resinfo[atoms->atom[i].resind].nr      = molnr;
+ +              atoms->resinfo[atoms->atom[i].resind].ic      = ' ';
+ +              atoms->resinfo[atoms->atom[i].resind].chainid = ' ';
+ +        atoms->resinfo[atoms->atom[i].resind].chainnum = molnr; /* Not sure if this is right? */
+ +            } else {
+ +              atoms->atom[i].resind = atoms->atom[i-1].resind;
+ +            }
+ +            break;
+ +          }
+ +        }
+ +        /* Generate an atom name from the particle type */
+ +        sprintf(buf,"T%d",atoms->atom[i].type);
+ +        atoms->atomname[i] = put_symtab(symtab,buf);
+ +        if (bMol) {
+ +          if (i == 0 || atoms->atom[i].resind != atoms->atom[i-1].resind) {
+ +            atoms->resinfo[atoms->atom[i].resind].name =
+ +              put_symtab(symtab,"MOL");
+ +          }
+ +        } else {
+ +          /* Residue number is the atom number */
+ +          atoms->atom[i].resind = i;
+ +          /* Generate an residue name from the particle type */
+ +          if (atoms->atom[i].type < 26) {
+ +            sprintf(buf,"T%c",'A'+atoms->atom[i].type);
+ +          } else {
+ +            sprintf(buf,"T%c%c",
+ +                    'A'+atoms->atom[i].type/26,'A'+atoms->atom[i].type%26);
+ +          }
+ +          t_atoms_set_resinfo(atoms,i,symtab,buf,i,' ',0,' ');
+ +        }       
+ +
+ +        if (r == 3)
+ +          level--;
+ +        i++;
+ +      }
+ +      }
+ +      atoms->nres = atoms->nr;
+ +
+ +      if (i != atoms->nr) {
+ +      gmx_fatal(FARGS,"Internal inconsistency in Espresso routines, read %d atoms, expected %d atoms",i,atoms->nr);
+ +      }
+ +    } else if (level==1 && strcmp(word,"variable")==0 && !bFoundVariable) {
+ +      bFoundVariable = TRUE;
+ +      level += check_open_parenthesis(fp,r,infile,"variable");
+ +      while (level==2 && (r=get_espresso_word(fp,word))) {
+ +      if (level==2 && strcmp(word,"box_l") == 0) {
+ +        for(m=0; m<3; m++) {
+ +          r = get_espresso_word(fp,word);
+ +          sscanf(word,"%lf",&d);
+ +          box[m][m] = d;
+ +        }
+ +        level += check_close_parenthesis(fp,r,infile,"box_l");
+ +      }
+ +      }
+ +    } else if (r == 2) {
+ +      level++;
+ +    } else if (r == 3) {
+ +      level--;
+ +    }
+ +  }
+ +  
+ +  if (!bFoundParticles) {
+ +    fprintf(stderr,"Did not find a particles section in Espresso file '%s'\n",
+ +          infile);
+ +  }
+ +  
+ +  gmx_fio_fclose(fp);
+ +}
+ +
+ +static int get_espresso_coordnum(const char *infile)
+ +{
+ +  FILE *fp;
+ +  char word[STRLEN];
+ +  int  natoms,level,r;
+ +  gmx_bool bFoundParticles;
+ +
+ +  natoms = 0;
+ +  
+ +  fp = gmx_fio_fopen(infile,"r");
+ +  
+ +  bFoundParticles = FALSE;
+ +  level = 0;
+ +  while ((r=get_espresso_word(fp,word)) && !bFoundParticles) {
+ +    if (level==1 && strcmp(word,"particles")==0 && !bFoundParticles) {
+ +      bFoundParticles = TRUE;
+ +      level += check_open_parenthesis(fp,r,infile,"particles");
+ +      while (level > 0 && (r=get_espresso_word(fp,word))) {
+ +      if (r == 2) {
+ +        level++;
+ +        if (level == 2)
+ +          natoms++;
+ +      } else if (r == 3) {
+ +        level--;
+ +      }
+ +      }
+ +    } else if (r == 2) {
+ +      level++;
+ +    } else if (r == 3) {
+ +      level--;
+ +    }
+ +  }
+ +  if (!bFoundParticles) {
+ +    fprintf(stderr,"Did not find a particles section in Espresso file '%s'\n",
+ +          infile);
+ +  }
+ +  
+ +  gmx_fio_fclose(fp);
+ +  
+ +  return natoms;
+ +}
+ +
+ +static void write_espresso_conf_indexed(FILE *out,const char *title,
+ +                                      t_atoms *atoms,int nx,atom_id *index,
+ +                                      rvec *x,rvec *v,matrix box)
+ +{
+ +  int i,j;
+ +
+ +  fprintf(out,"# %s\n",title);
+ +  if (TRICLINIC(box)) {
+ +    gmx_warning("The Espresso format does not support triclinic unit-cells");
+ +  }
+ +  fprintf(out,"{variable {box_l %f %f %f}}\n",box[0][0],box[1][1],box[2][2]);
+ +  
+ +  fprintf(out,"{particles {id pos type q%s}\n",v ? " v" : "");
+ +  for(i=0; i<nx; i++) {
+ +    if (index)
+ +      j = index[i];
+ +    else
+ +      j = i;
+ +    fprintf(out,"\t{%d %f %f %f %d %g",
+ +          j,x[j][XX],x[j][YY],x[j][ZZ],
+ +          atoms->atom[j].type,atoms->atom[j].q);
+ +    if (v)
+ +      fprintf(out," %f %f %f",v[j][XX],v[j][YY],v[j][ZZ]);
+ +    fprintf(out,"}\n");
+ +  }
+ +  fprintf(out,"}\n");
+ +}
+ +
+ +static void get_coordnum_fp (FILE *in, char *title, int *natoms)
+ +{
+ +  char line[STRLEN+1];
+ +
+ +  fgets2 (title,STRLEN,in);
+ +  fgets2 (line,STRLEN,in);
+ +  if (sscanf (line,"%d",natoms) != 1) {
+ +    gmx_fatal(FARGS,"gro file does not have the number of atoms on the second line");
+ +  }
+ +}
+ +
+ +static void get_coordnum (const char *infile,int *natoms)
+ +{
+ +  FILE *in;
+ +  char title[STRLEN];
+ +  
+ +  in=gmx_fio_fopen(infile,"r");
+ +  get_coordnum_fp(in,title,natoms);
+ +  gmx_fio_fclose (in);
+ +}
+ +
+ +static gmx_bool get_w_conf(FILE *in, const char *infile, char *title,
+ +                           t_symtab *symtab, t_atoms *atoms, int *ndec,
+ +                           rvec x[], rvec *v, matrix box)
+ +{
+ +  char   name[6];
+ +  char   line[STRLEN+1],*ptr;
+ +  char   buf[256];
+ +  double x1,y1,z1,x2,y2,z2;
+ +  rvec   xmin,xmax;
+ +  int    natoms,i,m,resnr,newres,oldres,ddist,c;
+ +  gmx_bool   bFirst,bVel;
+ +  char   *p1,*p2,*p3;
+ +  
+ +  newres  = -1;
+ +  oldres  = NOTSET; /* Unlikely number for the first residue! */
+ +  ddist   = 0;
+ +  
+ +  /* Read the title and number of atoms */
+ +  get_coordnum_fp(in,title,&natoms);
+ +
+ +  if (natoms > atoms->nr)
+ +    gmx_fatal(FARGS,"gro file contains more atoms (%d) than expected (%d)",
+ +              natoms,atoms->nr);
+ +  else if (natoms <  atoms->nr)
+ +    fprintf(stderr,"Warning: gro file contains less atoms (%d) than expected"
+ +          " (%d)\n",natoms,atoms->nr);
+ +  
+ +  bFirst=TRUE;
+ +  
+ +  bVel = FALSE;
+ +
+ +  /* just pray the arrays are big enough */
+ +  for (i=0; (i < natoms) ; i++) {
+ +    if ((fgets2 (line,STRLEN,in)) == NULL) {
+ +      unexpected_eof(infile,i+2);
+ +    }
+ +    if (strlen(line) < 39)
+ +      gmx_fatal(FARGS,"Invalid line in %s for atom %d:\n%s",infile,i+1,line);
+ +
+ +    /* determine read precision from distance between periods 
+ +       (decimal points) */
+ +    if (bFirst) {
+ +      bFirst=FALSE;
+ +      p1=strchr(line,'.');
+ +      if (p1 == NULL)
+ +      gmx_fatal(FARGS,"A coordinate in file %s does not contain a '.'",infile);
+ +      p2=strchr(&p1[1],'.');
+ +      if (p2 == NULL)
+ +      gmx_fatal(FARGS,"A coordinate in file %s does not contain a '.'",infile);
+ +      ddist = p2 - p1;
+ +      *ndec = ddist - 5;
+ +
+ +      p3=strchr(&p2[1],'.');
+ +      if (p3 == NULL)
+ +      gmx_fatal(FARGS,"A coordinate in file %s does not contain a '.'",infile);
+ +
+ +      if (p3 - p2 != ddist)
+ +      gmx_fatal(FARGS,"The spacing of the decimal points in file %s is not consistent for x, y and z",infile);
+ +    }
+ +    
+ +    /* residue number*/
+ +    memcpy(name,line,5);
+ +    name[5]='\0';
+ +    sscanf(name,"%d",&resnr);
+ +    memcpy(name,line+5,5);
+ +    name[5]='\0';
+ +    if (resnr != oldres) {
+ +      oldres = resnr;
+ +      newres++;
+ +      if (newres >= natoms)
+ +      gmx_fatal(FARGS,"More residues than atoms in %s (natoms = %d)",
+ +                  infile,natoms);
+ +      atoms->atom[i].resind = newres;
+ +      t_atoms_set_resinfo(atoms,i,symtab,name,resnr,' ',0,' ');
+ +    } else {
+ +      atoms->atom[i].resind = newres;
+ +    }
+ +
+ +    /* atomname */
+ +    memcpy(name,line+10,5);
+ +    atoms->atomname[i]=put_symtab(symtab,name);
+ +   
+ +    /* eventueel controle atomnumber met i+1 */
+ +
+ +    /* coordinates (start after residue shit) */
+ +    ptr = line + 20;
+ +    /* Read fixed format */
+ +    for(m=0; m<DIM; m++) {
+ +      for(c=0; (c<ddist && ptr[0]); c++) {
+ +      buf[c] = ptr[0];
+ +      ptr++;
+ +      }
+ +      buf[c] = '\0';
+ +      if (sscanf (buf,"%lf %lf",&x1,&x2) != 1) {
+ +      gmx_fatal(FARGS,"Something is wrong in the coordinate formatting of file %s. Note that gro is fixed format (see the manual)",infile);
+ +      } else {
+ +      x[i][m] = x1;
+ +      }
+ +    }
+ +
+ +    /* velocities (start after residues and coordinates) */
+ +    if (v) {
+ +      /* Read fixed format */
+ +      for(m=0; m<DIM; m++) {
+ +      for(c=0; (c<ddist && ptr[0]); c++) {
+ +        buf[c] = ptr[0];
+ +        ptr++;
+ +      }
+ +      buf[c] = '\0';
+ +      if (sscanf (buf,"%lf",&x1) != 1) {
+ +        v[i][m] = 0;
+ +      } else {
+ +        v[i][m] = x1;
+ +        bVel = TRUE;
+ +      }
+ +      }
+ +    }
+ +  }
+ +  atoms->nres = newres + 1;
+ +
+ +  /* box */
+ +  fgets2 (line,STRLEN,in);
+ +  if (sscanf (line,"%lf%lf%lf",&x1,&y1,&z1) != 3) {
+ +    gmx_warning("Bad box in file %s",infile);
+ +    
+ +    /* Generate a cubic box */
+ +    for(m=0; (m<DIM); m++)
+ +      xmin[m]=xmax[m]=x[0][m];
+ +    for(i=1; (i<atoms->nr); i++)
+ +      for(m=0; (m<DIM); m++) {
+ +      xmin[m]=min(xmin[m],x[i][m]);
+ +      xmax[m]=max(xmax[m],x[i][m]);
+ +      }
+ +    for (i=0; i<DIM; i++) for (m=0; m<DIM; m++) box[i][m]=0.0;
+ +    for(m=0; (m<DIM); m++)
+ +      box[m][m]=(xmax[m]-xmin[m]);
+ +    fprintf(stderr,"Generated a cubic box %8.3f x %8.3f x %8.3f\n",
+ +          box[XX][XX],box[YY][YY],box[ZZ][ZZ]);
+ +  }
+ +  else {
+ +    /* We found the first three values, the diagonal elements */
+ +    box[XX][XX]=x1;
+ +    box[YY][YY]=y1;
+ +    box[ZZ][ZZ]=z1;
+ +    if (sscanf (line,"%*f%*f%*f%lf%lf%lf%lf%lf%lf",
+ +              &x1,&y1,&z1,&x2,&y2,&z2) != 6) 
+ +      x1=y1=z1=x2=y2=z2=0.0;
+ +    box[XX][YY] = x1;
+ +    box[XX][ZZ] = y1;
+ +    box[YY][XX] = z1;
+ +    box[YY][ZZ] = x2;
+ +    box[ZZ][XX] = y2;
+ +    box[ZZ][YY] = z2;
+ +  }
+ +
+ +  return bVel;
+ +}
+ +
+ +static void read_whole_conf(const char *infile,char *title,
+ +                          t_atoms *atoms, rvec x[],rvec *v, matrix box)
+ +{
+ +  FILE   *in;
+ +  int    ndec;
+ +  t_symtab symtab;
+ +
+ +  /* open file */
+ +  in=gmx_fio_fopen(infile,"r");
+ +
+ +  open_symtab(&symtab);
+ +  get_w_conf(in, infile, title, &symtab, atoms, &ndec, x, v, box);
+ +  /* We can't free the symbols, as they are still used in atoms, so
+ +   * the only choice is to leak them. */
+ +  free_symtab(&symtab);
+ +
+ +  gmx_fio_fclose(in);
+ +}
+ +
+ +gmx_bool gro_next_x_or_v(FILE *status,t_trxframe *fr)
+ +{
+ +  t_atoms atoms;
+ +  t_symtab symtab;
+ +  char    title[STRLEN],*p;
+ +  double  tt;
+ +  int     ndec=0,i;
+ +
+ +  if (gmx_eof(status))
+ +    return FALSE;
+ +
+ +  open_symtab(&symtab);
+ +  atoms.nr=fr->natoms;
+ +  snew(atoms.atom,fr->natoms);
+ +  atoms.nres=fr->natoms;
+ +  snew(atoms.resinfo,fr->natoms);
+ +  snew(atoms.atomname,fr->natoms);
+ +  
+ +  fr->bV = get_w_conf(status,title,title,&symtab,&atoms,&ndec,fr->x,fr->v,fr->box);
+ +  fr->bPrec = TRUE;
+ +  fr->prec = 1;
+ +  /* prec = 10^ndec: */
+ +  for(i=0; i<ndec; i++)
+ +    fr->prec *= 10;
+ +  fr->title = title;
+ +  fr->bTitle = TRUE;
+ +  fr->bX = TRUE;
+ +  fr->bBox = TRUE;
+ +
+ +  sfree(atoms.atom);
+ +  sfree(atoms.resinfo);
+ +  sfree(atoms.atomname);
+ +  done_symtab(&symtab);
+ +
+ +  if ((p=strstr(title,"t=")) != NULL) {
+ +    p+=2;
+ +    if (sscanf(p,"%lf",&tt)==1) {
+ +      fr->time = tt;
+ +      fr->bTime = TRUE;
+ +    } else {
+ +      fr->time = 0;
+ +      fr->bTime = FALSE;
+ +    }
+ +  }
+ +  
+ +  if (atoms.nr != fr->natoms)
+ +    gmx_fatal(FARGS,"Number of atoms in gro frame (%d) doesn't match the number in the previous frame (%d)",atoms.nr,fr->natoms);
+ +  
+ +  return TRUE;
+ +}
+ +
+ +int gro_first_x_or_v(FILE *status,t_trxframe *fr)
+ +{
+ +  char title[STRLEN];
+ +  
+ +  frewind(status);
+ +  fprintf(stderr,"Reading frames from gro file");
+ +  get_coordnum_fp(status, title, &fr->natoms);
+ +  frewind(status);
+ +  fprintf(stderr," '%s', %d atoms.\n",title, fr->natoms);
+ +  fr->bTitle = TRUE;
+ +  fr->title = title;
+ +  if (fr->natoms==0)
+ +    gmx_file("No coordinates in gro file");
+ +  
+ +  snew(fr->x,fr->natoms);
+ +  snew(fr->v,fr->natoms);
+ +  gro_next_x_or_v(status, fr);
+ +  
+ +  return fr->natoms;
+ +}
+ +
+ +static void make_hconf_format(int pr,gmx_bool bVel,char format[])
+ +{
+ +  int l,vpr;
+ +
+ +  /* build format string for printing, 
+ +     something like "%8.3f" for x and "%8.4f" for v */
+ +  if (pr<0)
+ +    pr=0;
+ +  if (pr>30)
+ +    pr=30;
+ +  l=pr+5;
+ +  vpr=pr+1;
+ +  if (bVel)
+ +    sprintf(format,"%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df\n",
+ +          l,pr,l,pr,l,pr,l,vpr,l,vpr,l,vpr);
+ +  else
+ +    sprintf(format,"%%%d.%df%%%d.%df%%%d.%df\n",l,pr,l,pr,l,pr);
+ +
+ +}
+ +
+ +static void write_hconf_box(FILE *out,int pr,matrix box)
+ +{
+ +  char format[100];
+ +  int l;
+ +
+ +  if (pr<5) 
+ +    pr=5;
+ +  l=pr+5;
+ +  
+ +  if (box[XX][YY] || box[XX][ZZ] || box[YY][XX] || box[YY][ZZ] ||
+ +      box[ZZ][XX] || box[ZZ][YY]) {
+ +    sprintf(format,"%%%d.%df%%%d.%df%%%d.%df"
+ +          "%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df\n",
+ +          l,pr,l,pr,l,pr,l,pr,l,pr,l,pr,l,pr,l,pr,l,pr);
+ +    fprintf(out,format,
+ +          box[XX][XX],box[YY][YY],box[ZZ][ZZ],
+ +          box[XX][YY],box[XX][ZZ],box[YY][XX],
+ +          box[YY][ZZ],box[ZZ][XX],box[ZZ][YY]);
+ +  } else {
+ +    sprintf(format,"%%%d.%df%%%d.%df%%%d.%df\n",l,pr,l,pr,l,pr);
+ +    fprintf(out,format,
+ +          box[XX][XX],box[YY][YY],box[ZZ][ZZ]);
+ +  }
+ +}
+ +
+ +void write_hconf_indexed_p(FILE *out,const char *title,t_atoms *atoms,
+ +                         int nx,const atom_id index[], int pr,
+ +                         rvec *x,rvec *v,matrix box)
+ +{
+ +  char resnm[6],nm[6],format[100];
+ +  int  ai,i,resind,resnr;
+ +
+ +  bromacs(format,99);
+ +  fprintf (out,"%s\n",(title && title[0])?title:format);
+ +  fprintf (out,"%5d\n",nx);
+ +
+ +  make_hconf_format(pr,v!=NULL,format);
+ +  
+ +  for (i=0; (i<nx); i++) {
+ +    ai=index[i];
+ +    
+ +    resind = atoms->atom[ai].resind;
+ +    strncpy(resnm," ??? ",sizeof(resnm)-1);
+ +    if (resind < atoms->nres) {
+ +      strncpy(resnm,*atoms->resinfo[resind].name,sizeof(resnm)-1);
+ +      resnr = atoms->resinfo[resind].nr;
+ +    } else {
+ +      strncpy(resnm," ??? ",sizeof(resnm)-1);
+ +      resnr = resind + 1;
+ +    }
+ +    
+ +    if (atoms->atom)
+ +      strncpy(nm,*atoms->atomname[ai],sizeof(nm)-1);
+ +    else
+ +      strncpy(nm," ??? ",sizeof(nm)-1);
+ +
+ +    fprintf(out,"%5d%-5.5s%5.5s%5d",resnr%100000,resnm,nm,(ai+1)%100000);
+ +    /* next fprintf uses built format string */
+ +    if (v)
+ +      fprintf(out,format,
+ +            x[ai][XX], x[ai][YY], x[ai][ZZ], v[ai][XX],v[ai][YY],v[ai][ZZ]);
+ +    else
+ +      fprintf(out,format,
+ +            x[ai][XX], x[ai][YY], x[ai][ZZ]);
+ +  }
+ +
+ +  write_hconf_box(out,pr,box);
+ +
+ +  fflush(out);
+ +}
+ +
+ +static void write_hconf_mtop(FILE *out,const char *title,gmx_mtop_t *mtop,
+ +                           int pr,
+ +                           rvec *x,rvec *v,matrix box)
+ +{
+ +  char format[100];
+ +  int  i,resnr;
+ +  gmx_mtop_atomloop_all_t aloop;
+ +  t_atom *atom;
+ +  char *atomname,*resname;
+ +
+ +  bromacs(format,99);
+ +  fprintf (out,"%s\n",(title && title[0])?title:format);
+ +  fprintf (out,"%5d\n",mtop->natoms);
+ +
+ +  make_hconf_format(pr,v!=NULL,format);
+ +  
+ +  aloop = gmx_mtop_atomloop_all_init(mtop);
+ +  while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ +    gmx_mtop_atomloop_all_names(aloop,&atomname,&resnr,&resname);
+ +
+ +    fprintf(out,"%5d%-5.5s%5.5s%5d",
+ +          resnr%100000,resname,atomname,(i+1)%100000);
+ +    /* next fprintf uses built format string */
+ +    if (v)
+ +      fprintf(out,format,
+ +            x[i][XX], x[i][YY], x[i][ZZ], v[i][XX],v[i][YY],v[i][ZZ]);
+ +    else
+ +      fprintf(out,format,
+ +            x[i][XX], x[i][YY], x[i][ZZ]);
+ +  }
+ +
+ +  write_hconf_box(out,pr,box);
+ +
+ +  fflush(out);
+ +}
+ +
+ +void write_hconf_p(FILE *out,const char *title,t_atoms *atoms, int pr,
+ +                 rvec *x,rvec *v,matrix box)
+ +{
+ +  atom_id *aa;
+ +  int     i;
+ +  
+ +  snew(aa,atoms->nr);
+ +  for(i=0; (i<atoms->nr); i++)
+ +    aa[i]=i;
+ +  write_hconf_indexed_p(out,title,atoms,atoms->nr,aa,pr,x,v,box);
+ +  sfree(aa);
+ +}
+ +
+ +void write_conf_p(const char *outfile, const char *title,
+ +                t_atoms *atoms, int pr,
+ +                rvec *x, rvec *v,matrix box)
+ +{
+ +  FILE *out;
+ +
+ +  out=gmx_fio_fopen(outfile,"w");
+ +  write_hconf_p(out,title,atoms,pr,x,v,box);
+ +
+ +  gmx_fio_fclose (out);
+ +}
+ +
+ +static void write_conf(const char *outfile, const char *title, t_atoms *atoms,
+ +                     rvec *x, rvec *v,matrix box)
+ +{
+ +  write_conf_p(outfile, title, atoms, 3, x, v, box);
+ +}
+ +
+ +void write_sto_conf_indexed(const char *outfile,const char *title,
+ +                          t_atoms *atoms, 
+ +                          rvec x[],rvec *v,int ePBC,matrix box,
+ +                          atom_id nindex,atom_id index[])
+ +{
+ +  FILE       *out;
+ +  int        ftp;
+ +  t_trxframe fr;
+ +
+ +  ftp=fn2ftp(outfile);
+ +  switch (ftp) {
+ +  case efGRO:
+ +    out=gmx_fio_fopen(outfile,"w");
+ +    write_hconf_indexed_p(out, title, atoms, nindex, index, 3, x, v, box);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  case efG96:
+ +    clear_trxframe(&fr,TRUE);
+ +    fr.bTitle = TRUE;
+ +    fr.title = title;
+ +    fr.natoms = atoms->nr;
+ +    fr.bAtoms = TRUE;
+ +    fr.atoms = atoms;
+ +    fr.bX = TRUE;
+ +    fr.x = x;
+ +    if (v) {
+ +      fr.bV = TRUE;
+ +      fr.v = v;
+ +    }
+ +    fr.bBox = TRUE;
+ +    copy_mat(box,fr.box);
+ +    out=gmx_fio_fopen(outfile,"w");
+ +    write_g96_conf(out, &fr, nindex, index);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  case efPDB:
+ +  case efBRK:
+ +  case efENT:
+ +  case efPQR:
+ +    out=gmx_fio_fopen(outfile,"w");
+ +    write_pdbfile_indexed(out,title,atoms,x,ePBC,box,' ',-1,nindex,index,NULL,TRUE);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  case efESP:
+ +    out=gmx_fio_fopen(outfile,"w"); 
+ +    write_espresso_conf_indexed(out, title, atoms, nindex, index, x, v, box);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  case efTPR:
+ +  case efTPB:
+ +  case efTPA:
+ +    gmx_fatal(FARGS,"Sorry, can not write a topology to %s",outfile);
+ +    break;
+ +  default:
+ +    gmx_incons("Not supported in write_sto_conf_indexed");
+ +  }
+ +}
+ +
+ +static void write_xyz_conf(const char *outfile,const char *title,
+ +                         t_atoms *atoms,rvec *x)
+ +{
+ +  FILE *fp;
+ +  int i,anr;
+ +  real value;
+ +  char *ptr,*name;
+ +  gmx_atomprop_t aps=gmx_atomprop_init();
+ +  
+ +  fp = gmx_fio_fopen(outfile,"w");
+ +  fprintf(fp,"%3d\n",atoms->nr);
+ +  fprintf(fp,"%s\n",title);
+ +  for(i=0; (i<atoms->nr); i++) {
+ +    anr  = atoms->atom[i].atomnumber;
+ +    name = *atoms->atomname[i];
+ +    if (anr == NOTSET) {
+ +      if (gmx_atomprop_query(aps,epropElement,"???",name,&value))
+ +      anr = gmx_nint(value);
+ +    }
+ +    if ((ptr = gmx_atomprop_element(aps,anr)) == NULL)
+ +      ptr = name;
+ +    fprintf(fp,"%3s%10.5f%10.5f%10.5f\n",ptr,
+ +          10*x[i][XX],10*x[i][YY],10*x[i][ZZ]);
+ +  }
+ +  gmx_fio_fclose(fp);
+ +  gmx_atomprop_destroy(aps);
+ +}
+ +
+ +void write_sto_conf(const char *outfile,const char *title,t_atoms *atoms, 
+ +                  rvec x[],rvec *v,int ePBC,matrix box)
+ +{
+ +  FILE       *out;
+ +  int        ftp;
+ +  t_trxframe fr;
+ +  
+ +  ftp=fn2ftp(outfile);
+ +  switch (ftp) {
+ +  case efGRO:
+ +    write_conf(outfile, title, atoms, x, v, box);
+ +    break;
+ +  case efG96:
+ +    clear_trxframe(&fr,TRUE);
+ +    fr.bTitle = TRUE;
+ +    fr.title = title;
+ +    fr.natoms = atoms->nr;
+ +    fr.bAtoms = TRUE;
+ +    fr.atoms = atoms;
+ +    fr.bX = TRUE;
+ +    fr.x = x;
+ +    if (v) {
+ +      fr.bV = TRUE;
+ +      fr.v = v;
+ +    }
+ +    fr.bBox = TRUE;
+ +    copy_mat(box,fr.box);
+ +    out=gmx_fio_fopen(outfile,"w");
+ +    write_g96_conf(out, &fr, -1, NULL);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  case efXYZ:
+ +    write_xyz_conf(outfile,(strlen(title) > 0) ? title : outfile,atoms,x);
+ +    break;
+ +  case efPDB:
+ +  case efBRK:
+ +  case efENT:
+ +    out=gmx_fio_fopen(outfile,"w");
+ +    write_pdbfile(out, title, atoms, x, ePBC, box, ' ', -1,NULL,TRUE);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  case efESP:
+ +    out=gmx_fio_fopen(outfile,"w");
+ +    write_espresso_conf_indexed(out, title, atoms, atoms->nr, NULL, x, v, box);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  case efTPR:
+ +  case efTPB:
+ +  case efTPA:
+ +    gmx_fatal(FARGS,"Sorry, can not write a topology to %s",outfile);
+ +    break;
+ +  default:
+ +    gmx_incons("Not supported in write_sto_conf");
+ +  }
+ +}
+ +
+ +void write_sto_conf_mtop(const char *outfile,const char *title,
+ +                       gmx_mtop_t *mtop,
+ +                       rvec x[],rvec *v,int ePBC,matrix box)
+ +{
+ +  int  ftp;
+ +  FILE *out;
+ +  t_atoms atoms;
+ +
+ +  ftp=fn2ftp(outfile);
+ +  switch (ftp) {
+ +  case efGRO:
+ +    out = gmx_fio_fopen(outfile,"w");
+ +    write_hconf_mtop(out,title,mtop,3,x,v,box);
+ +    gmx_fio_fclose(out);
+ +    break;
+ +  default:
+ +    /* This is a brute force approach which requires a lot of memory.
+ +     * We should implement mtop versions of all writing routines.
+ +     */
+ +    atoms = gmx_mtop_global_atoms(mtop);
+ +    
+ +    write_sto_conf(outfile,title,&atoms,x,v,ePBC,box);
+ +    
+ +    done_atom(&atoms);
+ +    break;
+ +  }
+ +}
+ +
+ +static int get_xyz_coordnum(const char *infile)
+ +{
+ +  FILE *fp;
+ +  int n;
+ +  
+ +  fp = gmx_fio_fopen(infile,"r");
+ +  if (fscanf(fp,"%d",&n) != 1)
+ +    gmx_fatal(FARGS,"Can not read number of atoms from %s",infile);
+ +  gmx_fio_fclose(fp);
+ +  
+ +  return n;
+ +}
+ +
+ +static void read_xyz_conf(const char *infile,char *title,
+ +                        t_atoms *atoms,rvec *x)
+ +{
+ +  FILE   *fp;
+ +  int    i,n;
+ +  double xx,yy,zz;
+ +  t_symtab *tab;
+ +  char atomnm[32],buf[STRLEN];
+ +  
+ +  snew(tab,1);
+ +  fp = gmx_fio_fopen(infile,"r");
+ +  fgets2(buf,STRLEN-1,fp);
+ +  if (sscanf(buf,"%d",&n) != 1)
+ +    gmx_fatal(FARGS,"Can not read number of atoms from %s",infile);
+ +  fgets2(buf,STRLEN-1,fp);
+ +  strcpy(title,buf);
+ +  for(i=0; (i<n); i++) {
+ +    fgets2(buf,STRLEN-1,fp);
+ +    if (sscanf(buf,"%s%lf%lf%lf",atomnm,&xx,&yy,&zz) != 4)
+ +      gmx_fatal(FARGS,"Can not read coordinates from %s",infile);
+ +    atoms->atomname[i] = put_symtab(tab,atomnm);
+ +    x[i][XX] = xx*0.1;
+ +    x[i][YY] = yy*0.1;
+ +    x[i][ZZ] = zz*0.1;
+ +  }
+ +  gmx_fio_fclose(fp);
+ +}
+ +
+ +void get_stx_coordnum(const char *infile,int *natoms)
+ +{
+ +  FILE *in;
+ +  int ftp,tpxver,tpxgen;
+ +  t_trxframe fr;
+ +  char g96_line[STRLEN+1];
+ +
+ +  ftp=fn2ftp(infile);
+ +  range_check(ftp,0,efNR);
+ +  switch (ftp) {
+ +  case efGRO:
+ +    get_coordnum(infile, natoms);
+ +    break;
+ +  case efG96:
+ +    in=gmx_fio_fopen(infile,"r");
+ +    fr.title = NULL;
+ +    fr.natoms = -1;
+ +    fr.atoms = NULL;
+ +    fr.x = NULL;
+ +    fr.v = NULL;
+ +    fr.f = NULL;
+ +    *natoms=read_g96_conf(in,infile,&fr,g96_line);
+ +    gmx_fio_fclose(in);
+ +    break;
+ +  case efXYZ:
+ +    *natoms = get_xyz_coordnum(infile);
+ +    break;
+ +  case efPDB:
+ +  case efBRK:
+ +  case efENT:
+ +    in=gmx_fio_fopen(infile,"r");
+ +    get_pdb_coordnum(in, natoms);
+ +    gmx_fio_fclose(in);
+ +    break;
+ +  case efESP:
+ +    *natoms = get_espresso_coordnum(infile);
+ +    break;
+ +  case efTPA:
+ +  case efTPB:
+ +  case efTPR: {
+ +    t_tpxheader tpx;
+ +    
+ +    read_tpxheader(infile,&tpx,TRUE,&tpxver,&tpxgen);
+ +    *natoms = tpx.natoms;
+ +    break;
+ +  }
+ +  default:
+ +    gmx_fatal(FARGS,"File type %s not supported in get_stx_coordnum",
+ +            ftp2ext(ftp));
+ +  }
+ +}
+ +
+ +void read_stx_conf(const char *infile,char *title,t_atoms *atoms, 
+ +                 rvec x[],rvec *v,int *ePBC,matrix box)
+ +{
+ +  FILE       *in;
+ +  char       buf[256];
+ +  gmx_mtop_t *mtop;
+ +  t_topology top;
+ +  t_trxframe fr;
+ +  int        i,ftp,natoms;
+ +  real       d;
+ +  char       g96_line[STRLEN+1];
+ +
+ +  if (atoms->nr == 0)
+ +    fprintf(stderr,"Warning: Number of atoms in %s is 0\n",infile);
+ +  else if (atoms->atom == NULL)
+ +    gmx_mem("Uninitialized array atom");
+ +  
+ +  if (ePBC)
+ +    *ePBC = -1;
+ +
+ +  ftp=fn2ftp(infile);
+ +  switch (ftp) {
+ +  case efGRO:
+ +    read_whole_conf(infile, title, atoms, x, v, box);
+ +    break;
+ +  case efXYZ:
+ +    read_xyz_conf(infile,title,atoms,x);
+ +    break;
+ +  case efG96:
++    fr.title  = NULL;
+ +    fr.natoms = atoms->nr;
+ +    fr.atoms = atoms;
+ +    fr.x = x;
+ +    fr.v = v;
+ +    fr.f = NULL;
+ +    in = gmx_fio_fopen(infile,"r");
+ +    read_g96_conf(in, infile, &fr, g96_line);
+ +    gmx_fio_fclose(in);
+ +    copy_mat(fr.box,box);
++    strncpy(title, fr.title, STRLEN);
+ +    break;
+ +  case efPDB:
+ +  case efBRK:
+ +  case efENT:
+ +    read_pdb_conf(infile, title, atoms, x, ePBC, box, TRUE, NULL);
+ +    break;
+ +  case efESP:
+ +    read_espresso_conf(infile,atoms,x,v,box);
+ +    break;
+ +  case efTPR:
+ +  case efTPB:
+ +  case efTPA: 
+ +    snew(mtop,1);
+ +    i = read_tpx(infile,NULL,box,&natoms,x,v,NULL,mtop);
+ +    if (ePBC)
+ +      *ePBC = i;
+ +    
+ +    strcpy(title,*(mtop->name));
+ +    
+ +    /* Free possibly allocated memory */
+ +    done_atom(atoms);
+ +    
+ +    *atoms = gmx_mtop_global_atoms(mtop);
+ +    top = gmx_mtop_t_to_t_topology(mtop);
+ +    tpx_make_chain_identifiers(atoms,&top.mols);
+ +              
+ +    sfree(mtop);
+ +    /* The strings in the symtab are still in use in the returned t_atoms
+ +     * structure, so we should not free them. But there is no place to put the
+ +     * symbols; the only choice is to leak the memory...
+ +     * So we clear the symbol table before freeing the topology structure. */
+ +    free_symtab(&top.symtab);
+ +    done_top(&top);
+ +                
+ +    break;
+ +  default:
+ +    gmx_incons("Not supported in read_stx_conf");
+ +  }
+ +}
+ +
diff --cc src/gromacs/gmxlib/gmx_omp.c

index 29622e9a7e307a9a33490528ba145c33d0d0a531,0000000000000000000000000000000000000000..a06c43d902ab5c49c301c08e87d2677ab1530580

mode 100644,000000..100644
--- 1/src/gromacs/gmxlib/gmx_omp.c
--- /dev/null
+++ b/src/gromacs/gmxlib/gmx_omp.c
@@@ -1,179 -1,0 +1,182 @@@
-     if (!hw_opt->bThreadPinning)
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + *
+ + *                This source code is part of
+ + *
+ + *                 G   R   O   M   A   C   S
+ + *
+ + *          GROningen MAchine for Chemical Simulations
+ + *
+ + * Written by the Gromacs development team under coordination of
+ + * David van der Spoel, Berk Hess, and Erik Lindahl.
+ + *
+ + * This library is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + *
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the research papers on the package. Check out http://www.gromacs.org
+ + *
+ + * And Hey:
+ + * Gnomes, ROck Monsters And Chili Sauce
+ + */
+ +
+ +#ifdef HAVE_CONFIG_H
+ +#include "config.h"
+ +#endif
+ +
+ +#ifdef GMX_OPENMP
+ +#include <omp.h>
+ +#endif
+ +
+ +#include <stdio.h>
+ +
+ +#include "md_logging.h"
+ +#include "gmx_fatal.h"
+ +#include "statutil.h"
+ +#include "string2.h"
+ +#include "gmx_omp.h"
+ +
+ +int gmx_omp_get_max_threads(void)
+ +{
+ +#ifdef GMX_OPENMP
+ +    return omp_get_max_threads();
+ +#else
+ +    return 1;
+ +#endif
+ +}
+ +
+ +int gmx_omp_get_num_procs(void)
+ +{
+ +#ifdef GMX_OPENMP
+ +    return omp_get_num_procs();
+ +#else
+ +    return 1;
+ +#endif
+ +}
+ +
+ +int gmx_omp_get_thread_num(void)
+ +{
+ +#ifdef GMX_OPENMP
+ +    return omp_get_thread_num();
+ +#else
+ +    return 0;
+ +#endif
+ +}
+ +
+ +void gmx_omp_set_num_threads(int num_threads)
+ +{
+ +#ifdef GMX_OPENMP
+ +    omp_set_num_threads(num_threads);
+ +#else
+ +    return;
+ +#endif
+ +}
+ +
+ +/*!
+ + * Thread affinity set by the OpenMP library can conflict with the GROMACS
+ + * internal affinity setting.
+ + *
+ + * While GNU OpenMP does not set affinity by default, the Intel OpenMP library
+ + * does. This conflicts with the internal affinity (especially thread-MPI)
+ + * setting, results in incorrectly locked threads, and causes dreadful performance.
+ + *
+ + * The KMP_AFFINITY environment variable is used by Intel, GOMP_CPU_AFFINITY
+ + * by the GNU compilers (Intel also honors it well). If any of the variables
+ + * is set, we honor it, disable the internal pinning, and warn the user.
+ + * When using Intel OpenMP, we will disable affinity if the user did not set it
+ + * anually through one of the aforementioned environment variables.
+ + *
+ + * Note that the Intel OpenMP affinity disabling iwll only take effect if this
+ + * function is called before the OpenMP library gets initialized which happens
+ + * when the first call is made into a compilation unit that contains OpenMP
+ + * pragmas.
+ + */
+ +void gmx_omp_check_thread_affinity(FILE *fplog, const t_commrec *cr,
+ +                                   gmx_hw_opt_t *hw_opt)
+ +{
+ +    gmx_bool bKmpAffinitySet, bGompCpuAffinitySet;
+ +    char *kmp_env, *gomp_env;
+ +
+ +    /* no need to worry if internal thread pinning is turned off */
-         md_print_warn(cr, fplog, "WARNING: KMP_AFFINITY set, will turn off %s internal affinity\n"
-                       "         setting as the two can conflict and cause performance degradation.\n"
-                       "         To keep using the %s internal affinity setting, set the\n"
-                       "         KMP_AFFINITY=disabled environment variable.",
++    if (hw_opt->thread_affinity == threadaffOFF)
+ +    {
+ +        return;
+ +    }
+ +
+ +#if defined(GMX_OPENMP)
+ +
+ +    /* We assume that the affinity setting is available on all platforms
+ +     * gcc supports. Even if this is not the case (e.g. Mac OS) the user
+ +     * will only get a warning.*/
+ +    bGompCpuAffinitySet = FALSE;
+ +    gomp_env = NULL;
+ +#if defined(__GNUC__)
+ +    gomp_env = getenv("GOMP_CPU_AFFINITY");
+ +    bGompCpuAffinitySet = (gomp_env != NULL);
+ +#endif /* __GNUC__ */
+ +
+ +    bKmpAffinitySet = FALSE;
+ +#if defined(__INTEL_COMPILER)
+ +    kmp_env = getenv("KMP_AFFINITY");
+ +    bKmpAffinitySet = (kmp_env != NULL);
+ +
+ +    /* disable Intel OpenMP affinity if neither KMP_AFFINITY nor
+ +     * GOMP_CPU_AFFINITY is set (Intel uses the GNU env. var as well) */
+ +    if (!bKmpAffinitySet && !bGompCpuAffinitySet)
+ +    {
+ +        int retval;
+ +
+ +#ifdef _MSC_VER
+ +        /* Windows not POSIX */
+ +        retval = _putenv_s("KMP_AFFINITY", "disabled");
+ +#else
+ +        /* POSIX */
+ +        retval = setenv("KMP_AFFINITY", "disabled", 0);
+ +#endif /* _MSC_VER */
+ +
+ +        if (debug)
+ +        {
+ +            fprintf(debug, "Disabling Intel OpenMP affinity by setting the KMP_AFFINITY=disabled env. var.\n");
+ +        }
+ +
+ +        if (retval != 0)
+ +        {
+ +            gmx_warning("Disabling Intel OpenMp affinity setting failed!");
+ +        }
+ +    }
+ +
+ +    /* turn off internal pinning KMP_AFFINITY != "disabled" */
+ +    if (bKmpAffinitySet && (gmx_strncasecmp(kmp_env, "disabled", 8) != 0))
+ +    {
-         hw_opt->bThreadPinning = FALSE;
++        /* TODO: with -pin auto we should only warn when using all cores */
++        md_print_warn(cr, fplog,
++                      "NOTE: KMP_AFFINITY set, will turn off %s internal affinity\n"
++                      "      setting as the two can conflict and cause performance degradation.\n"
++                      "      To keep using the %s internal affinity setting, set the\n"
++                      "      KMP_AFFINITY=disabled environment variable.",
+ +                      ShortProgram(), ShortProgram());
+ +
-                       "WARNING: GOMP_CPU_AFFINITY set, will turn off %s internal affinity\n"
-                       "         setting as the two can conflict and cause performance degradation.\n"
-                       "         To keep using the %s internal affinity setting, unset the\n"
-                       "         GOMP_CPU_AFFINITY environment variable.",
++        hw_opt->thread_affinity = threadaffOFF;
+ +    }
+ +#endif /* __INTEL_COMPILER */
+ +
+ +#if defined(__INTEL_COMPILER) || defined(__GNUC__)
+ +    /* turn off internal pinning f GOMP_CPU_AFFINITY is set & non-empty */
+ +    if (bGompCpuAffinitySet && gomp_env != NULL && gomp_env != '\0')
+ +    {
++        /* TODO: with -pin auto we should only warn when using all cores */
+ +        md_print_warn(cr, fplog,
-         hw_opt->bThreadPinning = FALSE;
++                      "NOTE: GOMP_CPU_AFFINITY set, will turn off %s internal affinity\n"
++                      "      setting as the two can conflict and cause performance degradation.\n"
++                      "      To keep using the %s internal affinity setting, unset the\n"
++                      "      GOMP_CPU_AFFINITY environment variable.",
+ +                      ShortProgram(), ShortProgram());
+ +
++        hw_opt->thread_affinity = threadaffOFF;
+ +    }
+ +#endif /* __INTEL_COMPILER || __GNUC__ */
+ +
+ +#endif /* GMX_OPENMP */
+ +}
diff --cc src/gromacs/gmxlib/gmx_thread_affinity.c

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..c67b961a2af73783bc592f07b384f127b8cd78ca

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/gromacs/gmxlib/gmx_thread_affinity.c
@@@ -1,0 -1,0 +1,430 @@@
++/*
++ * This file is part of the GROMACS molecular simulation package.
++ *
++ * Copyright (c) 2012, by the GROMACS development team, led by
++ * David van der Spoel, Berk Hess, Erik Lindahl, and including many
++ * others, as listed in the AUTHORS file in the top-level source
++ * directory and at http://www.gromacs.org.
++ *
++ * GROMACS is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public License
++ * as published by the Free Software Foundation; either version 2.1
++ * of the License, or (at your option) any later version.
++ *
++ * GROMACS is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with GROMACS; if not, see
++ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
++ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
++ *
++ * If you want to redistribute modifications to GROMACS, please
++ * consider that scientific software is very special. Version
++ * control is crucial - bugs must be traceable. We will be happy to
++ * consider code for inclusion in the official distribution, but
++ * derived work must not be called official GROMACS. Details are found
++ * in the README & COPYING files - if they are missing, get the
++ * official version at http://www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the research papers on the package. Check out http://www.gromacs.org.
++ */
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++#if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_GETAFFINITY)
++#define _GNU_SOURCE
++#include <sched.h>
++#include <sys/syscall.h>
++#endif
++#include <assert.h>
++#include <stdio.h>
++#include "typedefs.h"
++#include "types/commrec.h"
++#include "types/hw_info.h"
++#include "gmx_cpuid.h"
++#include "gmx_omp.h"
++#include "gmx_omp_nthreads.h"
++#include "mdrun.h"
++#include "md_logging.h"
++#include "statutil.h"
++#include "gmx_thread_affinity.h"
++
++#include "thread_mpi/threads.h"
++
++
++static int
++get_thread_affinity_layout(FILE *fplog,
++                           const t_commrec *cr,
++                           const gmx_hw_info_t * hwinfo,
++                           int nthreads,
++                           int pin_offset, int * pin_stride,
++                           const int **locality_order)
++{
++    int         nhwthreads,npkg,ncores,nhwthreads_per_core,rc;
++    const int * pkg_id;
++    const int * core_id;
++    const int * hwthread_id;
++
++    if (pin_offset < 0)
++    {
++        gmx_fatal(FARGS,"Negative thread pinning offset requested");
++    }
++    if (*pin_stride < 0)
++    {
++        gmx_fatal(FARGS,"Negative thread pinning stride requested");
++    }
++
++    rc = gmx_cpuid_topology(hwinfo->cpuid_info, &nhwthreads, &npkg, &ncores,
++                            &nhwthreads_per_core,
++                            &pkg_id, &core_id, &hwthread_id, locality_order);
++
++    if (rc != 0)
++    {
++        nhwthreads      = hwinfo->nthreads_hw_avail;
++        *locality_order = NULL;
++
++        if (nhwthreads <= 0)
++        {
++            /* We don't know anything about the hardware, don't pin */
++            md_print_warn(cr, fplog,
++                          "We don't know how many logical cores we have, will not pin threads");
++
++            return -1;
++        }
++    }
++
++    if (pin_offset + nthreads > nhwthreads)
++    {
++        /* We are oversubscribing, don't pin */
++        md_print_warn(NULL, fplog,
++                      "More threads requested than available logical cores, will not pin threads");
++
++        return -1;
++    }
++
++    /* Check if we need to choose the pinning stride */
++    if (*pin_stride == 0)
++    {
++        if (rc == 0 && pin_offset + nthreads*nhwthreads_per_core <= nhwthreads)
++        {
++            /* Put one thread on each physical core */
++            *pin_stride = nhwthreads_per_core;
++        }
++        else
++        {
++            /* We don't know if we have SMT, and if we do, we don't know
++             * if hw threads in the same physical core are consecutive.
++             * Without SMT the pinning layout should not matter too much.
++             * so we assume a consecutive layout and maximally spread out"
++             * the threads at equal threads per core.
++             * Note that IBM is the major non-x86 case with cpuid support
++             * and probably threads are already pinned by the queuing system,
++             * so we wouldn't end up here in the first place.
++             */
++            *pin_stride = (nhwthreads - pin_offset)/nthreads;
++        }
++
++        if (fplog != NULL)
++        {
++            fprintf(fplog,"Pinning threads with a logical core stride of %d\n",
++                    *pin_stride);
++        }
++    }
++    else
++    {
++        if (pin_offset + nthreads*(*pin_stride) > nhwthreads)
++        {
++            /* We are oversubscribing, don't pin */
++            md_print_warn(NULL, fplog,
++                          "The requested pinning stride is too large for the available logical cores, will not pin threads");
++
++            return -1;
++        }
++    }
++
++    return 0;
++}
++
++/* Set CPU affinity. Can be important for performance.
++   On some systems (e.g. Cray) CPU Affinity is set by default.
++   But default assigning doesn't work (well) with only some ranks
++   having threads. This causes very low performance.
++   External tools have cumbersome syntax for setting affinity
++   in the case that only some ranks have threads.
++   Thus it is important that GROMACS sets the affinity internally
++   if only PME is using threads.
++*/
++void
++gmx_set_thread_affinity(FILE *fplog,
++                        const t_commrec *cr,
++                        gmx_hw_opt_t *hw_opt,
++                        int nthreads_pme,
++                        const gmx_hw_info_t *hwinfo,
++                        const t_inputrec *inputrec)
++{
++    int nth_affinity_set, thread_id_node, thread_id,
++        nthread_local, nthread_node, nthread_hw_max, nphyscore;
++    int offset;
++    const int *locality_order;
++    int rc;
++
++    if (hw_opt->thread_affinity == threadaffOFF)
++    {
++        /* Nothing to do */
++        return;
++    }
++
++#ifndef __APPLE__
++    /* If the tMPI thread affinity setting is not supported encourage the user
++     * to report it as it's either a bug or an exotic platform which we might
++     * want to support. */
++    if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES)
++    {
++        md_print_warn(NULL, fplog,
++                      "Can not set thread affinities on the current platform. On NUMA systems this\n"
++                      "can cause performance degradation. If you think your platform should support\n"
++                      "setting affinities, contact the GROMACS developers.");
++        return;
++    }
++#endif /* __APPLE__ */
++
++    /* threads on this MPI process or TMPI thread */
++    if (cr->duty & DUTY_PP)
++    {
++        nthread_local = gmx_omp_nthreads_get(emntNonbonded);
++    }
++    else
++    {
++        nthread_local = gmx_omp_nthreads_get(emntPME);
++    }
++
++    /* map the current process to cores */
++    thread_id_node = 0;
++    nthread_node = nthread_local;
++#ifdef GMX_MPI
++    if (PAR(cr) || MULTISIM(cr))
++    {
++        /* We need to determine a scan of the thread counts in this
++         * compute node.
++         */
++        MPI_Comm comm_intra;
++
++        MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->rank_intranode,
++                       &comm_intra);
++        MPI_Scan(&nthread_local,&thread_id_node,1,MPI_INT,MPI_SUM,comm_intra);
++        /* MPI_Scan is inclusive, but here we need exclusive */
++        thread_id_node -= nthread_local;
++        /* Get the total number of threads on this physical node */
++        MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
++        MPI_Comm_free(&comm_intra);
++    }
++#endif
++
++    if (hw_opt->thread_affinity == threadaffAUTO &&
++        nthread_node != hwinfo->nthreads_hw_avail)
++    {
++        if (nthread_node > 1 && nthread_node < hwinfo->nthreads_hw_avail)
++        {
++            md_print_warn(cr, fplog,
++                          "NOTE: The number of threads is not equal to the number of (logical) cores\n"
++                          "      and the -pin option is set to auto: will not pin thread to cores.\n"
++                          "      This can lead to significant performance degradation.\n"
++                          "      Consider using -pin on (and -pinoffset in case you run multiple jobs).\n");
++        }
++
++        return;
++    }
++
++    offset = 0;
++    if (hw_opt->core_pinning_offset != 0)
++    {
++        offset = hw_opt->core_pinning_offset;
++        md_print_info(cr,fplog,"Applying core pinning offset %d\n", offset);
++    }
++
++    rc = get_thread_affinity_layout(fplog, cr, hwinfo,
++                                    nthread_node,
++                                    offset, &hw_opt->core_pinning_stride,
++                                    &locality_order);
++    if (rc != 0)
++    {
++        /* Incompatible layout, don't pin, warning was already issued */
++        return;
++    }
++
++    /* Set the per-thread affinity. In order to be able to check the success
++     * of affinity settings, we will set nth_affinity_set to 1 on threads
++     * where the affinity setting succeded and to 0 where it failed.
++     * Reducing these 0/1 values over the threads will give the total number
++     * of threads on which we succeeded.
++     */
++    nth_affinity_set = 0;
++#pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \
++                     reduction(+:nth_affinity_set)
++    {
++        int      index,core;
++        gmx_bool setaffinity_ret;
++
++        thread_id       = gmx_omp_get_thread_num();
++        thread_id_node += thread_id;
++        index           = offset + thread_id_node*hw_opt->core_pinning_stride;
++        if (locality_order != NULL)
++        {
++            core = locality_order[index];
++        }
++        else
++        {
++            core = index;
++        }
++
++        setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
++
++        /* store the per-thread success-values of the setaffinity */
++        nth_affinity_set = (setaffinity_ret == 0);
++
++        if (debug)
++        {
++            fprintf(debug, "On rank %2d, thread %2d, core %2d the affinity setting returned %d\n",
++                    cr->nodeid, gmx_omp_get_thread_num(), core, setaffinity_ret);
++        }
++    }
++
++    if (nth_affinity_set > nthread_local)
++    {
++        char msg[STRLEN];
++
++        sprintf(msg, "Looks like we have set affinity for more threads than "
++                "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
++        gmx_incons(msg);
++    }
++    else
++    {
++        /* check & warn if some threads failed to set their affinities */
++        if (nth_affinity_set != nthread_local)
++        {
++            char sbuf1[STRLEN], sbuf2[STRLEN];
++
++            /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
++            sbuf1[0] = sbuf2[0] = '\0';
++#ifdef GMX_MPI
++#ifdef GMX_THREAD_MPI
++            sprintf(sbuf1, "In thread-MPI thread #%d: ", cr->nodeid);
++#else /* GMX_LIB_MPI */
++            sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
++#endif
++#endif /* GMX_MPI */
++
++            if (nthread_local > 1)
++            {
++                sprintf(sbuf2, "of %d/%d thread%s ",
++                        nthread_local - nth_affinity_set, nthread_local,
++                        (nthread_local - nth_affinity_set) > 1 ? "s" : "");
++            }
++
++            md_print_warn(NULL, fplog,
++                          "NOTE: %sAffinity setting %sfailed.\n"
++                          "      This can cause performance degradation!",
++                          sbuf1, sbuf2);
++        }
++    }
++}
++
++/* Check the process affinity mask and if it is found to be non-zero,
++ * will honor it and disable mdrun internal affinity setting.
++ * Note that this will only work on Linux as we use a GNU feature.
++ */
++void
++gmx_check_thread_affinity_set(FILE *fplog, const t_commrec *cr,
++                              gmx_hw_opt_t *hw_opt, int ncpus,
++                              gmx_bool bAfterOpenmpInit)
++{
++#ifdef HAVE_SCHED_GETAFFINITY
++    cpu_set_t mask_current;
++    int       i, ret, cpu_count, cpu_set;
++    gmx_bool  bAllSet;
++
++    assert(hw_opt);
++    if (hw_opt->thread_affinity == threadaffOFF)
++    {
++        /* internal affinity setting is off, don't bother checking process affinity */
++        return;
++    }
++
++    CPU_ZERO(&mask_current);
++    if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
++    {
++        /* failed to query affinity mask, will just return */
++        if (debug)
++        {
++            fprintf(debug, "Failed to query affinity mask (error %d)", ret);
++        }
++        return;
++    }
++
++    /* Before proceeding with the actual check, make sure that the number of
++     * detected CPUs is >= the CPUs in the current set.
++     * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
++#ifdef CPU_COUNT
++    if (ncpus < CPU_COUNT(&mask_current))
++    {
++        if (debug)
++        {
++            fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT",
++                    ncpus, CPU_COUNT(&mask_current));
++        }
++        return;
++    }
++#endif /* CPU_COUNT */
++
++    bAllSet = TRUE;
++    for (i = 0; (i < ncpus && i < CPU_SETSIZE); i++)
++    {
++        bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
++    }
++
++    if (!bAllSet)
++    {
++        if (hw_opt->thread_affinity == threadaffAUTO)
++        {
++            if (!bAfterOpenmpInit)
++            {
++                md_print_warn(cr, fplog,
++                              "Non-default thread affinity set, disabling internal thread affinity");
++            }
++            else
++            {
++                md_print_warn(cr, fplog,
++                              "Non-default thread affinity set probably by the OpenMP library,\n"
++                              "disabling internal thread affinity");
++            }
++            hw_opt->thread_affinity = threadaffOFF;
++        }
++        else
++        {
++            /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */
++            if (bAfterOpenmpInit)
++            {
++                md_print_warn(cr, fplog,
++                              "Overriding thread affinity set outside %s\n",
++                              ShortProgram());
++            }
++        }
++
++        if (debug)
++        {
++            fprintf(debug, "Non-default affinity mask found\n");
++        }
++    }
++    else
++    {
++        if (debug)
++        {
++            fprintf(debug, "Default affinity mask found\n");
++        }
++    }
++#endif /* HAVE_SCHED_GETAFFINITY */
++}
diff --cc src/gromacs/legacyheaders/gmx_thread_affinity.h

index 0000000000000000000000000000000000000000,0000000000000000000000000000000000000000..c8de7bf3e1421da64e804b3ae2b5b89707ad62bb

new file mode 100644 (file)
--- /dev/null
--- /dev/null
+++ b/src/gromacs/legacyheaders/gmx_thread_affinity.h
@@@ -1,0 -1,0 +1,77 @@@
++/*
++ * This file is part of the GROMACS molecular simulation package.
++ *
++ * Copyright (c) 2012, by the GROMACS development team, led by
++ * David van der Spoel, Berk Hess, Erik Lindahl, and including many
++ * others, as listed in the AUTHORS file in the top-level source
++ * directory and at http://www.gromacs.org.
++ *
++ * GROMACS is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public License
++ * as published by the Free Software Foundation; either version 2.1
++ * of the License, or (at your option) any later version.
++ *
++ * GROMACS is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with GROMACS; if not, see
++ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
++ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
++ *
++ * If you want to redistribute modifications to GROMACS, please
++ * consider that scientific software is very special. Version
++ * control is crucial - bugs must be traceable. We will be happy to
++ * consider code for inclusion in the official distribution, but
++ * derived work must not be called official GROMACS. Details are found
++ * in the README & COPYING files - if they are missing, get the
++ * official version at http://www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the research papers on the package. Check out http://www.gromacs.org.
++ */
++#ifndef GMX_THREAD_AFFINITY_H_
++#define GMX_THREAD_AFFINITY_H_
++#include "types/commrec.h"
++#include "typedefs.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++#if 0
++} /* fixes auto-indentation problems */
++#endif
++
++/* Sets the thread affinity using the requested setting stored in hw_opt.
++ * The hardware topologu is requested from hwinfo, when present.
++ */
++void
++gmx_set_thread_affinity(FILE *fplog,
++                        const t_commrec *cr,
++                        gmx_hw_opt_t *hw_opt,
++                        int nthreads_pme,
++                        const gmx_hw_info_t *hwinfo,
++                        const t_inputrec *inputrec);
++
++/* Check the process affinity mask and if it is found to be non-zero,
++ * will honor it and disable mdrun internal affinity setting.
++ * This function should be called first before the OpenMP library gets
++ * initialized with the last argument FALSE (which will detect affinity
++ * set by external tools like taskset), and later, after the OpenMP
++ * initialization, with the last argument TRUE to detect affinity changes
++ * made by the OpenMP library.
++ *
++ * Note that this will only work on Linux as we use a GNU feature.
++ */
++void
++gmx_check_thread_affinity_set(FILE *fplog, const t_commrec *cr,
++                              gmx_hw_opt_t *hw_opt, int ncpus,
++                              gmx_bool bAfterOpenmpInit);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* GMX_THREAD_AFFINITY_H_ */
diff --cc src/gromacs/legacyheaders/mdrun.h

index 3c802d57f311bb1bf7cad063f2f64f45784a11ee,0000000000000000000000000000000000000000..3fb195f297ac83191a450619165615e0168eae81

mode 100644,000000..100644
--- 1/src/gromacs/legacyheaders/mdrun.h
--- /dev/null
+++ b/src/gromacs/legacyheaders/mdrun.h
@@@ -1,206 -1,0 +1,212 @@@
-     gmx_bool bThreadPinning;      /* Pin OpenMP threads to cores?             */
-     gmx_bool bPinHyperthreading;  /* Pin pairs of threads to physical cores   */
-     int      core_pinning_offset; /* Physical core pinning offset             */
+ +/*
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gromacs Runs On Most of All Computer Systems
+ + */
+ +
+ +#ifndef _mdrun_h
+ +#define _mdrun_h
+ +
+ +#include <stdio.h>
+ +#include <time.h>
+ +#include "typedefs.h"
+ +#include "network.h"
+ +#include "sim_util.h"
+ +#include "tgroup.h"
+ +#include "filenm.h"
+ +#include "mshift.h"
+ +#include "force.h"
+ +#include "edsam.h"
+ +#include "mdebin.h"
+ +#include "vcm.h"
+ +#include "vsite.h"
+ +#include "pull.h"
+ +#include "update.h"
+ +#include "types/membedt.h"
+ +#include "types/globsig.h"
+ +
+ +
+ +#ifdef GMX_THREAD_MPI
+ +#include "thread_mpi/threads.h"
+ +#endif
+ +
+ +#ifdef __cplusplus
+ +extern "C" {
+ +#endif
+ +
+ +#define MD_POLARISE       (1<<2)
+ +#define MD_IONIZE         (1<<3)
+ +#define MD_RERUN          (1<<4)
+ +#define MD_RERUN_VSITE    (1<<5)
+ +#define MD_FFSCAN         (1<<6)
+ +#define MD_SEPPOT         (1<<7)
+ +#define MD_PARTDEC        (1<<9)
+ +#define MD_DDBONDCHECK    (1<<10)
+ +#define MD_DDBONDCOMM     (1<<11)
+ +#define MD_CONFOUT        (1<<12)
+ +#define MD_REPRODUCIBLE   (1<<13)
+ +#define MD_READ_RNG       (1<<14)
+ +#define MD_APPENDFILES    (1<<15)
+ +#define MD_APPENDFILESSET (1<<21)
+ +#define MD_KEEPANDNUMCPT  (1<<16)
+ +#define MD_READ_EKIN      (1<<17)
+ +#define MD_STARTFROMCPT   (1<<18)
+ +#define MD_RESETCOUNTERSHALFWAY (1<<19)
+ +#define MD_TUNEPME        (1<<20)
+ +#define MD_TESTVERLET     (1<<22)
+ +
++/* The options for the domain decomposition MPI task ordering */
+ +enum {
+ +  ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
+ +};
+ +
++/* The options for the thread affinity setting, default: auto */
++enum {
++  threadaffSEL, threadaffAUTO, threadaffON, threadaffOFF, threadaffNR
++};
++
+ +typedef struct {
+ +    int      nthreads_tot;        /* Total number of threads requested (TMPI) */
+ +    int      nthreads_tmpi;       /* Number of TMPI threads requested         */
+ +    int      nthreads_omp;        /* Number of OpenMP threads requested       */
+ +    int      nthreads_omp_pme;    /* As nthreads_omp, but for PME only nodes  */
++    int      thread_affinity;     /* Thread affinity switch, see enum above   */
++    int      core_pinning_stride; /* Logical core pinning stride              */
++    int      core_pinning_offset; /* Logical core pinning offset              */
+ +    char    *gpu_id;              /* GPU id's to use, each specified as chars */
+ +} gmx_hw_opt_t;
+ +
+ +/* Variables for temporary use with the deform option,
+ + * used in runner.c and md.c.
+ + * (These variables should be stored in the tpx file.)
+ + */
+ +extern gmx_large_int_t     deform_init_init_step_tpx;
+ +extern matrix              deform_init_box_tpx;
+ +#ifdef GMX_THREAD_MPI
+ +extern tMPI_Thread_mutex_t deform_init_box_mutex;
+ +
+ +/* The minimum number of atoms per tMPI thread. With fewer atoms than this,
+ + * the number of threads will get lowered.
+ + */
+ +#define MIN_ATOMS_PER_MPI_THREAD    90
+ +#define MIN_ATOMS_PER_GPU           900
+ +#endif
+ +
+ +
+ +typedef double gmx_integrator_t(FILE *log,t_commrec *cr,
+ +                              int nfile,const t_filenm fnm[],
+ +                              const output_env_t oenv, gmx_bool bVerbose,
+ +                                gmx_bool bCompact, int nstglobalcomm,
+ +                              gmx_vsite_t *vsite,gmx_constr_t constr,
+ +                              int stepout,
+ +                              t_inputrec *inputrec,
+ +                              gmx_mtop_t *mtop,t_fcdata *fcd,
+ +                              t_state *state,
+ +                              t_mdatoms *mdatoms,
+ +                              t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ +                              gmx_edsam_t ed, 
+ +                              t_forcerec *fr,
+ +                              int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ +                                gmx_membed_t membed,
+ +                              real cpt_period,real max_hours,
+ +                              const char *deviceOptions,
+ +                              unsigned long Flags,
+ +                              gmx_runtime_t *runtime);
+ +
+ +/* ROUTINES from md.c */
+ +
+ +gmx_integrator_t do_md;
+ +
+ +
+ +/* ROUTINES from minimize.c */
+ +
+ +gmx_integrator_t do_steep;
+ +/* Do steepest descents EM */
+ +
+ +gmx_integrator_t do_cg;
+ +/* Do conjugate gradient EM */
+ +
+ +gmx_integrator_t do_lbfgs;
+ +/* Do conjugate gradient L-BFGS */
+ +
+ +gmx_integrator_t do_nm;
+ +/* Do normal mode analysis */
+ +
+ +/* ROUTINES from tpi.c */
+ +
+ +gmx_integrator_t do_tpi;
+ +/* Do test particle insertion */
+ +
+ +void init_npt_masses(t_inputrec *ir, t_state *state, t_extmass *MassQ, gmx_bool bInit);
+ +
+ +int ExpandedEnsembleDynamics(FILE *log,t_inputrec *ir, gmx_enerdata_t *enerd,
+ +                             t_state *state, t_extmass *MassQ, df_history_t *dfhist,
+ +                             gmx_large_int_t step, gmx_rng_t mcrng,
+ +                             rvec *v, t_mdatoms *mdatoms);
+ +
+ +void PrintFreeEnergyInfoToFile(FILE *outfile, t_lambda *fep, t_expanded *expand, t_simtemp *simtemp, df_history_t *dfhist,
+ +                               int nlam, int frequency, gmx_large_int_t step);
+ +
+ +void get_mc_state(gmx_rng_t rng,t_state *state);
+ +
+ +void set_mc_state(gmx_rng_t rng,t_state *state);
+ +
+ +/* check the version */
+ +void check_ir_old_tpx_versions(t_commrec *cr,FILE *fplog,
+ +                               t_inputrec *ir,gmx_mtop_t *mtop);
+ +
+ +/* Allocate and initialize node-local state entries. */
+ +void set_state_entries(t_state *state,const t_inputrec *ir,int nnodes);
+ +
+ +/* Broadcast the data for a simulation, and allocate node-specific settings
+ +   such as rng generators. */
+ +void init_parallel(FILE *log, t_commrec *cr, t_inputrec *inputrec,
+ +                          gmx_mtop_t *mtop);
+ +
+ +int mdrunner(gmx_hw_opt_t *hw_opt,
+ +           FILE *fplog,t_commrec *cr,int nfile,
+ +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +             gmx_bool bCompact, int nstglobalcomm, ivec ddxyz,int dd_node_order,
+ +             real rdd, real rconstr, const char *dddlb_opt,real dlb_scale,
+ +           const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ +           const char *nbpu_opt,
+ +           int nsteps_cmdline, int nstepout, int resetstep,
+ +           int nmultisim, int repl_ex_nst, int repl_ex_nex,
+ +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
+ +           const char *deviceOptions, unsigned long Flags);
+ +/* Driver routine, that calls the different methods */
+ +
+ +#ifdef __cplusplus
+ +}
+ +#endif
+ +
+ +#endif        /* _mdrun_h */
diff --cc src/programs/mdrun/mdrun.c

index c801a2f684353fc2b231d2da4df1420941dd408b,0000000000000000000000000000000000000000..6286afeade82fefcab758846dfdb955738306094

mode 100644,000000..100644
--- 1/src/programs/mdrun/mdrun.c
--- /dev/null
+++ b/src/programs/mdrun/mdrun.c
@@@ -1,740 -1,0 +1,751 @@@
-     "When compiled with OpenMP on Linux, [TT]mdrun[tt] pins threads to cores,",
+ +/*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
+ +
+ +#include "typedefs.h"
+ +#include "macros.h"
+ +#include "copyrite.h"
+ +#include "main.h"
+ +#include "statutil.h"
+ +#include "smalloc.h"
+ +#include "futil.h"
+ +#include "smalloc.h"
+ +#include "edsam.h"
+ +#include "mdrun.h"
+ +#include "xmdrun.h"
+ +#include "checkpoint.h"
+ +#ifdef GMX_THREAD_MPI
+ +#include "thread_mpi.h"
+ +#endif
+ +
+ +/* afm stuf */
+ +#include "pull.h"
+ +
+ +int cmain(int argc,char *argv[])
+ +{
+ +  const char *desc[] = {
+ +    "The [TT]mdrun[tt] program is the main computational chemistry engine",
+ +    "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
+ +    "but it can also perform Stochastic Dynamics, Energy Minimization,",
+ +    "test particle insertion or (re)calculation of energies.",
+ +    "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
+ +    "builds a Hessian matrix from single conformation.",
+ +    "For usual Normal Modes-like calculations, make sure that",
+ +    "the structure provided is properly energy-minimized.",
+ +    "The generated matrix can be diagonalized by [TT]g_nmeig[tt].[PAR]",
+ +    "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
+ +    "and distributes the topology over nodes if needed.",
+ +    "[TT]mdrun[tt] produces at least four output files.",
+ +    "A single log file ([TT]-g[tt]) is written, unless the option",
+ +    "[TT]-seppot[tt] is used, in which case each node writes a log file.",
+ +    "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
+ +    "optionally forces.",
+ +    "The structure file ([TT]-c[tt]) contains the coordinates and",
+ +    "velocities of the last step.",
+ +    "The energy file ([TT]-e[tt]) contains energies, the temperature,",
+ +    "pressure, etc, a lot of these things are also printed in the log file.",
+ +    "Optionally coordinates can be written to a compressed trajectory file",
+ +    "([TT]-x[tt]).[PAR]",
+ +    "The option [TT]-dhdl[tt] is only used when free energy calculation is",
+ +    "turned on.[PAR]",
+ +    "A simulation can be run in parallel using two different parallelization",
+ +    "schemes: MPI parallelization and/or OpenMP thread parallelization.",
+ +    "The MPI parallelization uses multiple processes when [TT]mdrun[tt] is",
+ +    "compiled with a normal MPI library or threads when [TT]mdrun[tt] is",
+ +    "compiled with the GROMACS built-in thread-MPI library. OpenMP threads",
+ +    "are supported when mdrun is compiled with OpenMP. Full OpenMP support",
+ +    "is only available with the Verlet cut-off scheme, with the (older)",
+ +    "group scheme only PME-only processes can use OpenMP parallelization.",
+ +    "In all cases [TT]mdrun[tt] will by default try to use all the available",
+ +    "hardware resources. With a normal MPI library only the options",
+ +    "[TT]-ntomp[tt] (with the Verlet cut-off scheme) and [TT]-ntomp_pme[tt],",
+ +    "for PME-only processes, can be used to control the number of threads.",
+ +    "With thread-MPI there are additional options [TT]-nt[tt], which sets",
+ +    "the total number of threads, and [TT]-ntmpi[tt], which sets the number",
+ +    "of thread-MPI threads.",
+ +    "Note that using combined MPI+OpenMP parallelization is almost always",
+ +    "slower than single parallelization, except at the scaling limit, where",
+ +    "especially OpenMP parallelization of PME reduces the communication cost.",
+ +    "OpenMP-only parallelization is much faster than MPI-only parallelization",
+ +    "on a single CPU(-die). Since we currently don't have proper hardware",
+ +    "topology detection, [TT]mdrun[tt] compiled with thread-MPI will only",
+ +    "automatically use OpenMP-only parallelization when you use up to 4",
+ +    "threads, up to 12 threads with Intel Nehalem/Westmere, or up to 16",
+ +    "threads with Intel Sandy Bridge or newer CPUs. Otherwise MPI-only",
+ +    "parallelization is used (except with GPUs, see below).",
+ +    "[PAR]",
+ +    "To quickly test the performance of the new Verlet cut-off scheme",
+ +    "with old [TT].tpr[tt] files, either on CPUs or CPUs+GPUs, you can use",
+ +    "the [TT]-testverlet[tt] option. This should not be used for production,",
+ +    "since it can slightly modify potentials and it will remove charge groups",
+ +    "making analysis difficult, as the [TT].tpr[tt] file will still contain",
+ +    "charge groups. For production simulations it is highly recommended",
+ +    "to specify [TT]cutoff-scheme = Verlet[tt] in the [TT].mdp[tt] file.",
+ +    "[PAR]",
+ +    "With GPUs (only supported with the Verlet cut-off scheme), the number",
+ +    "of GPUs should match the number of MPI processes or MPI threads,",
+ +    "excluding PME-only processes/threads. With thread-MPI the number",
+ +    "of MPI threads will automatically be set to the number of GPUs detected.",
+ +    "When you want to use a subset of the available GPUs, you can use",
+ +    "the [TT]-gpu_id[tt] option, where GPU id's are passed as a string,",
+ +    "e.g. 02 for using GPUs 0 and 2. When you want different GPU id's",
+ +    "on different nodes of a compute cluster, use the GMX_GPU_ID environment",
+ +    "variable instead. The format for GMX_GPU_ID is identical to ",
+ +    "[TT]-gpu_id[tt], but an environment variable can have different values",
+ +    "on different nodes of a cluster.",
+ +    "[PAR]",
+ +    "When using PME with separate PME nodes or with a GPU, the two major",
+ +    "compute tasks, the non-bonded force calculation and the PME calculation",
+ +    "run on different compute resources. If this load is not balanced,",
+ +    "some of the resources will be idle part of time. With the Verlet",
+ +    "cut-off scheme this load is automatically balanced when the PME load",
+ +    "is too high (but not when it is too low). This is done by scaling",
+ +    "the Coulomb cut-off and PME grid spacing by the same amount. In the first",
+ +    "few hundred steps different settings are tried and the fastest is chosen",
+ +    "for the rest of the simulation. This does not affect the accuracy of",
+ +    "the results, but it does affect the decomposition of the Coulomb energy",
+ +    "into particle and mesh contributions. The auto-tuning can be turned off",
+ +    "with the option [TT]-notunepme[tt].",
+ +    "[PAR]",
-     "If you don't want this, use [TT]-nopin[tt].",
-     "With Intel CPUs with hyper-threading enabled, you should pin",
-     "consecutive threads to the same physical core for optimal",
-     "performance when you use virtual cores. This is done automatically",
-     "when you use more than half of the virtual cores. It can also be set",
-     "manually with [TT]-pinht[tt], e.g. for running multiple simulations",
-     "on one compute node.",
++    "[TT]mdrun[tt] pins (sets affinity of) threads to specific cores,",
++    "when all (logical) cores on a compute node are used by [TT]mdrun[tt],",
++    "even when no multi-threading is used,",
+ +    "as this usually results in significantly better performance.",
-     "the offset in (physical) cores for pinning.",
++    "If the queuing systems or the OpenMP library pinned threads, we honor",
++    "this and don't pin again, even though the layout may be sub-optimal.",
++    "If you want to have [TT]mdrun[tt] override an already set thread affinity",
++    "or pin threads when using less cores, use [TT]-pin on[tt].",
++    "With SMT (simultaneous multithreading), e.g. Intel Hyper-Threading,",
++    "there are multiple logical cores per physical core.",
++    "The option [TT]-pinstride[tt] sets the stride in logical cores for",
++    "pinning consecutive threads. Without SMT, 1 is usually the best choice.",
++    "With Intel Hyper-Threading 2 is best when using half or less of the",
++    "logical cores, 1 otherwise. The default value of 0 do exactly that:",
++    "it minimizes the threads per logical core, to optimize performance.",
++    "If you want to run multiple mdrun jobs on the same physical node,"
++    "you should set [TT]-pinstride[tt] to 1 when using all logical cores.",
+ +    "When running multiple mdrun (or other) simulations on the same physical",
+ +    "node, some simulations need to start pinning from a non-zero core",
+ +    "to avoid overloading cores; with [TT]-pinoffset[tt] you can specify",
-   gmx_hw_opt_t hw_opt={0,0,0,0,TRUE,FALSE,0,NULL};
++    "the offset in logical cores for pinning.",
+ +    "[PAR]",
+ +    "When [TT]mdrun[tt] is started using MPI with more than 1 process",
+ +    "or with thread-MPI with more than 1 thread, MPI parallelization is used.",
+ +    "By default domain decomposition is used, unless the [TT]-pd[tt]",
+ +    "option is set, which selects particle decomposition.",
+ +    "[PAR]",
+ +    "With domain decomposition, the spatial decomposition can be set",
+ +    "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.",
+ +    "The user only needs to change this when the system is very inhomogeneous.",
+ +    "Dynamic load balancing is set with the option [TT]-dlb[tt],",
+ +    "which can give a significant performance improvement,",
+ +    "especially for inhomogeneous systems. The only disadvantage of",
+ +    "dynamic load balancing is that runs are no longer binary reproducible,",
+ +    "but in most cases this is not important.",
+ +    "By default the dynamic load balancing is automatically turned on",
+ +    "when the measured performance loss due to load imbalance is 5% or more.",
+ +    "At low parallelization these are the only important options",
+ +    "for domain decomposition.",
+ +    "At high parallelization the options in the next two sections",
+ +    "could be important for increasing the performace.",
+ +    "[PAR]",
+ +    "When PME is used with domain decomposition, separate nodes can",
+ +    "be assigned to do only the PME mesh calculation;",
+ +    "this is computationally more efficient starting at about 12 nodes.",
+ +    "The number of PME nodes is set with option [TT]-npme[tt],",
+ +    "this can not be more than half of the nodes.",
+ +    "By default [TT]mdrun[tt] makes a guess for the number of PME",
+ +    "nodes when the number of nodes is larger than 11 or performance wise",
+ +    "not compatible with the PME grid x dimension.",
+ +    "But the user should optimize npme. Performance statistics on this issue",
+ +    "are written at the end of the log file.",
+ +    "For good load balancing at high parallelization, the PME grid x and y",
+ +    "dimensions should be divisible by the number of PME nodes",
+ +    "(the simulation will run correctly also when this is not the case).",
+ +    "[PAR]",
+ +    "This section lists all options that affect the domain decomposition.",
+ +    "[PAR]",
+ +    "Option [TT]-rdd[tt] can be used to set the required maximum distance",
+ +    "for inter charge-group bonded interactions.",
+ +    "Communication for two-body bonded interactions below the non-bonded",
+ +    "cut-off distance always comes for free with the non-bonded communication.",
+ +    "Atoms beyond the non-bonded cut-off are only communicated when they have",
+ +    "missing bonded interactions; this means that the extra cost is minor",
+ +    "and nearly indepedent of the value of [TT]-rdd[tt].",
+ +    "With dynamic load balancing option [TT]-rdd[tt] also sets",
+ +    "the lower limit for the domain decomposition cell sizes.",
+ +    "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on",
+ +    "the initial coordinates. The chosen value will be a balance",
+ +    "between interaction range and communication cost.",
+ +    "[PAR]",
+ +    "When inter charge-group bonded interactions are beyond",
+ +    "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.",
+ +    "For pair interactions and tabulated bonds",
+ +    "that do not generate exclusions, this check can be turned off",
+ +    "with the option [TT]-noddcheck[tt].",
+ +    "[PAR]",
+ +    "When constraints are present, option [TT]-rcon[tt] influences",
+ +    "the cell size limit as well.",
+ +    "Atoms connected by NC constraints, where NC is the LINCS order plus 1,",
+ +    "should not be beyond the smallest cell size. A error message is",
+ +    "generated when this happens and the user should change the decomposition",
+ +    "or decrease the LINCS order and increase the number of LINCS iterations.",
+ +    "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS",
+ +    "in a conservative fashion. For high parallelization it can be useful",
+ +    "to set the distance required for P-LINCS with the option [TT]-rcon[tt].",
+ +    "[PAR]",
+ +    "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling",
+ +    "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that",
+ +    "the cells can scale down by at least this factor. This option is used",
+ +    "for the automated spatial decomposition (when not using [TT]-dd[tt])",
+ +    "as well as for determining the number of grid pulses, which in turn",
+ +    "sets the minimum allowed cell size. Under certain circumstances",
+ +    "the value of [TT]-dds[tt] might need to be adjusted to account for",
+ +    "high or low spatial inhomogeneity of the system.",
+ +    "[PAR]",
+ +    "The option [TT]-gcom[tt] can be used to only do global communication",
+ +    "every n steps.",
+ +    "This can improve performance for highly parallel simulations",
+ +    "where this global communication step becomes the bottleneck.",
+ +    "For a global thermostat and/or barostat the temperature",
+ +    "and/or pressure will also only be updated every [TT]-gcom[tt] steps.",
+ +    "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]",
+ +    "With [TT]-rerun[tt] an input trajectory can be given for which ",
+ +    "forces and energies will be (re)calculated. Neighbor searching will be",
+ +    "performed for every frame, unless [TT]nstlist[tt] is zero",
+ +    "(see the [TT].mdp[tt] file).[PAR]",
+ +    "ED (essential dynamics) sampling and/or additional flooding potentials",
+ +    "are switched on by using the [TT]-ei[tt] flag followed by an [TT].edi[tt]",
+ +    "file. The [TT].edi[tt] file can be produced with the [TT]make_edi[tt] tool",
+ +    "or by using options in the essdyn menu of the WHAT IF program.",
+ +    "[TT]mdrun[tt] produces a [TT].xvg[tt] output file that",
+ +    "contains projections of positions, velocities and forces onto selected",
+ +    "eigenvectors.[PAR]",
+ +    "When user-defined potential functions have been selected in the",
+ +    "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
+ +    "a formatted table with potential functions. The file is read from",
+ +    "either the current directory or from the [TT]GMXLIB[tt] directory.",
+ +    "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
+ +    "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
+ +    "normal Coulomb.",
+ +    "When pair interactions are present, a separate table for pair interaction",
+ +    "functions is read using the [TT]-tablep[tt] option.[PAR]",
+ +    "When tabulated bonded functions are present in the topology,",
+ +    "interaction functions are read using the [TT]-tableb[tt] option.",
+ +    "For each different tabulated interaction type the table file name is",
+ +    "modified in a different way: before the file extension an underscore is",
+ +    "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals",
+ +    "and finally the table number of the interaction type.[PAR]",
+ +    "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
+ +    "coordinates and forces when pulling is selected",
+ +    "in the [TT].mdp[tt] file.[PAR]",
+ +    "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ",
+ +    "simulated in parallel.",
+ +    "As many input files/directories are required as the number of systems. ",
+ +    "The [TT]-multidir[tt] option takes a list of directories (one for each ",
+ +    "system) and runs in each of them, using the input/output file names, ",
+ +    "such as specified by e.g. the [TT]-s[tt] option, relative to these ",
+ +    "directories.",
+ +    "With [TT]-multi[tt], the system number is appended to the run input ",
+ +    "and each output filename, for instance [TT]topol.tpr[tt] becomes",
+ +    "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.",
+ +    "The number of nodes per system is the total number of nodes",
+ +    "divided by the number of systems.",
+ +    "One use of this option is for NMR refinement: when distance",
+ +    "or orientation restraints are present these can be ensemble averaged",
+ +    "over all the systems.[PAR]",
+ +    "With [TT]-replex[tt] replica exchange is attempted every given number",
+ +    "of steps. The number of replicas is set with the [TT]-multi[tt] or ",
+ +    "[TT]-multidir[tt] option, described above.",
+ +    "All run input files should use a different coupling temperature,",
+ +    "the order of the files is not important. The random seed is set with",
+ +    "[TT]-reseed[tt]. The velocities are scaled and neighbor searching",
+ +    "is performed after every exchange.[PAR]",
+ +    "Finally some experimental algorithms can be tested when the",
+ +    "appropriate options have been given. Currently under",
+ +    "investigation are: polarizability and X-ray bombardments.",
+ +    "[PAR]",
+ +    "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed",
+ +    "a protein into a membrane. The data file should contain the options",
+ +    "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]",
+ +    "both apply to this as well.",
+ +    "[PAR]",
+ +    "The option [TT]-pforce[tt] is useful when you suspect a simulation",
+ +    "crashes due to too large forces. With this option coordinates and",
+ +    "forces of atoms with a force larger than a certain value will",
+ +    "be printed to stderr.",
+ +    "[PAR]",
+ +    "Checkpoints containing the complete state of the system are written",
+ +    "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
+ +    "unless option [TT]-cpt[tt] is set to -1.",
+ +    "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
+ +    "make sure that a recent state of the system is always available,",
+ +    "even when the simulation is terminated while writing a checkpoint.",
+ +    "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
+ +    "with the step number.",
+ +    "A simulation can be continued by reading the full state from file",
+ +    "with option [TT]-cpi[tt]. This option is intelligent in the way that",
+ +    "if no checkpoint file is found, Gromacs just assumes a normal run and",
+ +    "starts from the first step of the [TT].tpr[tt] file. By default the output",
+ +    "will be appending to the existing output files. The checkpoint file",
+ +    "contains checksums of all output files, such that you will never",
+ +    "loose data when some output files are modified, corrupt or removed.",
+ +    "There are three scenarios with [TT]-cpi[tt]:[PAR]",
+ +    "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
+ +    "[TT]*[tt] all files are present with names and checksums matching those stored",
+ +    "in the checkpoint file: files are appended[PAR]",
+ +    "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
+ +    "With [TT]-noappend[tt] new output files are opened and the simulation",
+ +    "part number is added to all output file names.",
+ +    "Note that in all cases the checkpoint file itself is not renamed",
+ +    "and will be overwritten, unless its name does not match",
+ +    "the [TT]-cpo[tt] option.",
+ +    "[PAR]",
+ +    "With checkpointing the output is appended to previously written",
+ +    "output files, unless [TT]-noappend[tt] is used or none of the previous",
+ +    "output files are present (except for the checkpoint file).",
+ +    "The integrity of the files to be appended is verified using checksums",
+ +    "which are stored in the checkpoint file. This ensures that output can",
+ +    "not be mixed up or corrupted due to file appending. When only some",
+ +    "of the previous output files are present, a fatal error is generated",
+ +    "and no old output files are modified and no new output files are opened.",
+ +    "The result with appending will be the same as from a single run.",
+ +    "The contents will be binary identical, unless you use a different number",
+ +    "of nodes or dynamic load balancing or the FFT library uses optimizations",
+ +    "through timing.",
+ +    "[PAR]",
+ +    "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
+ +    "file is written at the first neighbor search step where the run time",
+ +    "exceeds [TT]-maxh[tt]*0.99 hours.",
+ +    "[PAR]",
+ +    "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
+ +    "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
+ +    "pressed), it will stop after the next neighbor search step ",
+ +    "(with nstlist=0 at the next step).",
+ +    "In both cases all the usual output will be written to file.",
+ +    "When running with MPI, a signal to one of the [TT]mdrun[tt] processes",
+ +    "is sufficient, this signal should not be sent to mpirun or",
+ +    "the [TT]mdrun[tt] process that is the parent of the others.",
+ +    "[PAR]",
+ +    "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
+ +  };
+ +  t_commrec    *cr;
+ +  t_filenm fnm[] = {
+ +    { efTPX, NULL,      NULL,       ffREAD },
+ +    { efTRN, "-o",      NULL,       ffWRITE },
+ +    { efXTC, "-x",      NULL,       ffOPTWR },
+ +    { efCPT, "-cpi",    NULL,       ffOPTRD },
+ +    { efCPT, "-cpo",    NULL,       ffOPTWR },
+ +    { efSTO, "-c",      "confout",  ffWRITE },
+ +    { efEDR, "-e",      "ener",     ffWRITE },
+ +    { efLOG, "-g",      "md",       ffWRITE },
+ +    { efXVG, "-dhdl",   "dhdl",     ffOPTWR },
+ +    { efXVG, "-field",  "field",    ffOPTWR },
+ +    { efXVG, "-table",  "table",    ffOPTRD },
+ +    { efXVG, "-tabletf", "tabletf",    ffOPTRD },
+ +    { efXVG, "-tablep", "tablep",   ffOPTRD },
+ +    { efXVG, "-tableb", "table",    ffOPTRD },
+ +    { efTRX, "-rerun",  "rerun",    ffOPTRD },
+ +    { efXVG, "-tpi",    "tpi",      ffOPTWR },
+ +    { efXVG, "-tpid",   "tpidist",  ffOPTWR },
+ +    { efEDI, "-ei",     "sam",      ffOPTRD },
+ +    { efXVG, "-eo",     "edsam",    ffOPTWR },
+ +    { efGCT, "-j",      "wham",     ffOPTRD },
+ +    { efGCT, "-jo",     "bam",      ffOPTWR },
+ +    { efXVG, "-ffout",  "gct",      ffOPTWR },
+ +    { efXVG, "-devout", "deviatie", ffOPTWR },
+ +    { efXVG, "-runav",  "runaver",  ffOPTWR },
+ +    { efXVG, "-px",     "pullx",    ffOPTWR },
+ +    { efXVG, "-pf",     "pullf",    ffOPTWR },
+ +    { efXVG, "-ro",     "rotation", ffOPTWR },
+ +    { efLOG, "-ra",     "rotangles",ffOPTWR },
+ +    { efLOG, "-rs",     "rotslabs", ffOPTWR },
+ +    { efLOG, "-rt",     "rottorque",ffOPTWR },
+ +    { efMTX, "-mtx",    "nm",       ffOPTWR },
+ +    { efNDX, "-dn",     "dipole",   ffOPTWR },
+ +    { efRND, "-multidir",NULL,      ffOPTRDMULT},
+ +    { efDAT, "-membed", "membed",   ffOPTRD },
+ +    { efTOP, "-mp",     "membed",   ffOPTRD },
+ +    { efNDX, "-mn",     "membed",   ffOPTRD }
+ +  };
+ +#define NFILE asize(fnm)
+ +
+ +  /* Command line options ! */
+ +  gmx_bool bCart        = FALSE;
+ +  gmx_bool bPPPME       = FALSE;
+ +  gmx_bool bPartDec     = FALSE;
+ +  gmx_bool bDDBondCheck = TRUE;
+ +  gmx_bool bDDBondComm  = TRUE;
+ +  gmx_bool bTunePME     = TRUE;
+ +  gmx_bool bTestVerlet  = FALSE;
+ +  gmx_bool bVerbose     = FALSE;
+ +  gmx_bool bCompact     = TRUE;
+ +  gmx_bool bSepPot      = FALSE;
+ +  gmx_bool bRerunVSite  = FALSE;
+ +  gmx_bool bIonize      = FALSE;
+ +  gmx_bool bConfout     = TRUE;
+ +  gmx_bool bReproducible = FALSE;
+ +    
+ +  int  npme=-1;
+ +  int  nmultisim=0;
+ +  int  nstglobalcomm=-1;
+ +  int  repl_ex_nst=0;
+ +  int  repl_ex_seed=-1;
+ +  int  repl_ex_nex=0;
+ +  int  nstepout=100;
+ +  int  resetstep=-1;
+ +  int  nsteps=-2; /* the value -2 means that the mdp option will be used */
+ +  
+ +  rvec realddxyz={0,0,0};
+ +  const char *ddno_opt[ddnoNR+1] =
+ +    { NULL, "interleave", "pp_pme", "cartesian", NULL };
+ +  const char *dddlb_opt[] =
+ +    { NULL, "auto", "no", "yes", NULL };
++  const char *thread_aff_opt[threadaffNR+1] =
++    { NULL, "auto", "on", "off", NULL };
+ +  const char *nbpu_opt[] =
+ +    { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL };
+ +  real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
+ +  char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
+ +  real cpt_period=15.0,max_hours=-1;
+ +  gmx_bool bAppendFiles=TRUE;
+ +  gmx_bool bKeepAndNumCPT=FALSE;
+ +  gmx_bool bResetCountersHalfWay=FALSE;
+ +  output_env_t oenv=NULL;
+ +  const char *deviceOptions = "";
+ +
-     { "-pin",     FALSE, etBOOL, {&hw_opt.bThreadPinning},
-       "Pin OpenMP threads to cores" },
-     { "-pinht",   FALSE, etBOOL, {&hw_opt.bPinHyperthreading},
-       "Always pin threads to Hyper-Threading cores" },
++  gmx_hw_opt_t hw_opt={0,0,0,0,threadaffSEL,0,0,NULL};
+ +
+ +  t_pargs pa[] = {
+ +
+ +    { "-pd",      FALSE, etBOOL,{&bPartDec},
+ +      "Use particle decompostion" },
+ +    { "-dd",      FALSE, etRVEC,{&realddxyz},
+ +      "Domain decomposition grid, 0 is optimize" },
+ +    { "-ddorder", FALSE, etENUM, {ddno_opt},
+ +      "DD node order" },
+ +    { "-npme",    FALSE, etINT, {&npme},
+ +      "Number of separate nodes to be used for PME, -1 is guess" },
+ +    { "-nt",      FALSE, etINT, {&hw_opt.nthreads_tot},
+ +      "Total number of threads to start (0 is guess)" },
+ +    { "-ntmpi",   FALSE, etINT, {&hw_opt.nthreads_tmpi},
+ +      "Number of thread-MPI threads to start (0 is guess)" },
+ +    { "-ntomp",   FALSE, etINT, {&hw_opt.nthreads_omp},
+ +      "Number of OpenMP threads per MPI process/thread to start (0 is guess)" },
+ +    { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme},
+ +      "Number of OpenMP threads per MPI process/thread to start (0 is -ntomp)" },
-       "Core offset for pinning (for running multiple mdrun processes on a single physical node)" },
++    { "-pin",     FALSE, etENUM, {thread_aff_opt},
++      "Fix threads (or processes) to specific cores" },
+ +    { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset},
- 
++      "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" },
++    { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
++      "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
+ +    { "-gpu_id",  FALSE, etSTR, {&hw_opt.gpu_id},
+ +      "List of GPU id's to use" },
+ +    { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
+ +      "Check for all bonded interactions with DD" },
+ +    { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
+ +      "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
+ +    { "-rdd",     FALSE, etREAL, {&rdd},
+ +      "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
+ +    { "-rcon",    FALSE, etREAL, {&rconstr},
+ +      "Maximum distance for P-LINCS (nm), 0 is estimate" },
+ +    { "-dlb",     FALSE, etENUM, {dddlb_opt},
+ +      "Dynamic load balancing (with DD)" },
+ +    { "-dds",     FALSE, etREAL, {&dlb_scale},
+ +      "Minimum allowed dlb scaling of the DD cell size" },
+ +    { "-ddcsx",   FALSE, etSTR, {&ddcsx},
+ +      "HIDDENThe DD cell sizes in x" },
+ +    { "-ddcsy",   FALSE, etSTR, {&ddcsy},
+ +      "HIDDENThe DD cell sizes in y" },
+ +    { "-ddcsz",   FALSE, etSTR, {&ddcsz},
+ +      "HIDDENThe DD cell sizes in z" },
+ +    { "-gcom",    FALSE, etINT,{&nstglobalcomm},
+ +      "Global communication frequency" },
+ +    { "-nb",      FALSE, etENUM, {&nbpu_opt},
+ +      "Calculate non-bonded interactions on" },
+ +    { "-tunepme", FALSE, etBOOL, {&bTunePME},  
+ +      "Optimize PME load between PP/PME nodes or GPU/CPU" },
+ +    { "-testverlet", FALSE, etBOOL, {&bTestVerlet},
+ +      "Test the Verlet non-bonded scheme" },
+ +    { "-v",       FALSE, etBOOL,{&bVerbose},  
+ +      "Be loud and noisy" },
+ +    { "-compact", FALSE, etBOOL,{&bCompact},  
+ +      "Write a compact log file" },
+ +    { "-seppot",  FALSE, etBOOL, {&bSepPot},
+ +      "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
+ +    { "-pforce",  FALSE, etREAL, {&pforce},
+ +      "Print all forces larger than this (kJ/mol nm)" },
+ +    { "-reprod",  FALSE, etBOOL,{&bReproducible},  
+ +      "Try to avoid optimizations that affect binary reproducibility" },
+ +    { "-cpt",     FALSE, etREAL, {&cpt_period},
+ +      "Checkpoint interval (minutes)" },
+ +    { "-cpnum",   FALSE, etBOOL, {&bKeepAndNumCPT},
+ +      "Keep and number checkpoint files" },
+ +    { "-append",  FALSE, etBOOL, {&bAppendFiles},
+ +      "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
+ +    { "-nsteps",  FALSE, etINT, {&nsteps},
+ +      "Run this number of steps, overrides .mdp file option" },
+ +    { "-maxh",   FALSE, etREAL, {&max_hours},
+ +      "Terminate after 0.99 times this time (hours)" },
+ +    { "-multi",   FALSE, etINT,{&nmultisim}, 
+ +      "Do multiple simulations in parallel" },
+ +    { "-replex",  FALSE, etINT, {&repl_ex_nst}, 
+ +      "Attempt replica exchange periodically with this period (steps)" },
+ +    { "-nex",  FALSE, etINT, {&repl_ex_nex},
+ +      "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion).  -nex zero or not specified gives neighbor replica exchange." },
+ +    { "-reseed",  FALSE, etINT, {&repl_ex_seed}, 
+ +      "Seed for replica exchange, -1 is generate a seed" },
+ +    { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
+ +      "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
+ +    { "-ionize",  FALSE, etBOOL,{&bIonize},
+ +      "Do a simulation including the effect of an X-Ray bombardment on your system" },
+ +    { "-confout", FALSE, etBOOL, {&bConfout},
+ +      "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
+ +    { "-stepout", FALSE, etINT, {&nstepout},
+ +      "HIDDENFrequency of writing the remaining runtime" },
+ +    { "-resetstep", FALSE, etINT, {&resetstep},
+ +      "HIDDENReset cycle counters after these many time steps" },
+ +    { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
+ +      "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
+ +  };
+ +  gmx_edsam_t  ed;
+ +  unsigned long Flags, PCA_Flags;
+ +  ivec     ddxyz;
+ +  int      dd_node_order;
+ +  gmx_bool     bAddPart;
+ +  FILE     *fplog,*fpmulti;
+ +  int      sim_part,sim_part_fn;
+ +  const char *part_suffix=".part";
+ +  char     suffix[STRLEN];
+ +  int      rc;
+ +  char **multidir=NULL;
+ +
+ +
+ +  cr = init_par(&argc,&argv);
+ +
+ +  if (MASTER(cr))
+ +    CopyRight(stderr, argv[0]);
+ +
+ +  PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET));
+ +  
+ +  /* Comment this in to do fexist calls only on master
+ +   * works not with rerun or tables at the moment
+ +   * also comment out the version of init_forcerec in md.c 
+ +   * with NULL instead of opt2fn
+ +   */
+ +  /*
+ +     if (!MASTER(cr))
+ +     {
+ +     PCA_Flags |= PCA_NOT_READ_NODE;
+ +     }
+ +     */
+ +
+ +  parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
+ +                    asize(desc),desc,0,NULL, &oenv);
+ +
+ +
+ +  /* we set these early because they might be used in init_multisystem() 
+ +     Note that there is the potential for npme>nnodes until the number of
+ +     threads is set later on, if there's thread parallelization. That shouldn't
+ +     lead to problems. */ 
+ +  dd_node_order = nenum(ddno_opt);
+ +  cr->npmenodes = npme;
+ +
++  hw_opt.thread_affinity = nenum(thread_aff_opt);
++
+ +  /* now check the -multi and -multidir option */
+ +  if (opt2bSet("-multidir", NFILE, fnm))
+ +  {
+ +      int i;
+ +      if (nmultisim > 0)
+ +      {
+ +          gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
+ +      }
+ +      nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
+ +  }
+ +
+ +
+ +  if (repl_ex_nst != 0 && nmultisim < 2)
+ +      gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
+ +
+ +  if (repl_ex_nex < 0)
+ +      gmx_fatal(FARGS,"Replica exchange number of exchanges needs to be positive");
+ +
+ +  if (nmultisim > 1) {
+ +#ifndef GMX_THREAD_MPI
+ +    gmx_bool bParFn = (multidir == NULL);
+ +    init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
+ +#else
+ +    gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
+ +#endif
+ +  }
+ +
+ +  bAddPart = !bAppendFiles;
+ +
+ +  /* Check if there is ANY checkpoint file available */       
+ +  sim_part    = 1;
+ +  sim_part_fn = sim_part;
+ +  if (opt2bSet("-cpi",NFILE,fnm))
+ +  {
+ +      if (bSepPot && bAppendFiles)
+ +      {
+ +          gmx_fatal(FARGS,"Output file appending is not supported with -seppot");
+ +      }
+ +
+ +      bAppendFiles =
+ +                read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
+ +                                                              fnm,cr),
+ +                                                &sim_part_fn,NULL,cr,
+ +                                                bAppendFiles,NFILE,fnm,
+ +                                                part_suffix,&bAddPart);
+ +      if (sim_part_fn==0 && MULTIMASTER(cr))
+ +      {
+ +          fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
+ +      }
+ +      else
+ +      {
+ +          sim_part = sim_part_fn + 1;
+ +      }
+ +
+ +      if (MULTISIM(cr) && MASTER(cr))
+ +      {
+ +          if (MULTIMASTER(cr))
+ +          {
+ +              /* Log file is not yet available, so if there's a
+ +               * problem we can only write to stderr. */
+ +              fpmulti = stderr;
+ +          }
+ +          else
+ +          {
+ +              fpmulti = NULL;
+ +          }
+ +          check_multi_int(fpmulti,cr->ms,sim_part,"simulation part",TRUE);
+ +      }
+ +  } 
+ +  else
+ +  {
+ +      bAppendFiles = FALSE;
+ +  }
+ +
+ +  if (!bAppendFiles)
+ +  {
+ +      sim_part_fn = sim_part;
+ +  }
+ +
+ +  if (bAddPart)
+ +  {
+ +      /* Rename all output files (except checkpoint files) */
+ +      /* create new part name first (zero-filled) */
+ +      sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
+ +
+ +      add_suffix_to_output_names(fnm,NFILE,suffix);
+ +      if (MULTIMASTER(cr))
+ +      {
+ +          fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
+ +      }
+ +  }
+ +
+ +  Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
+ +  Flags = Flags | (bSepPot       ? MD_SEPPOT       : 0);
+ +  Flags = Flags | (bIonize       ? MD_IONIZE       : 0);
+ +  Flags = Flags | (bPartDec      ? MD_PARTDEC      : 0);
+ +  Flags = Flags | (bDDBondCheck  ? MD_DDBONDCHECK  : 0);
+ +  Flags = Flags | (bDDBondComm   ? MD_DDBONDCOMM   : 0);
+ +  Flags = Flags | (bTunePME      ? MD_TUNEPME      : 0);
+ +  Flags = Flags | (bTestVerlet   ? MD_TESTVERLET   : 0);
+ +  Flags = Flags | (bConfout      ? MD_CONFOUT      : 0);
+ +  Flags = Flags | (bRerunVSite   ? MD_RERUN_VSITE  : 0);
+ +  Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
+ +  Flags = Flags | (bAppendFiles  ? MD_APPENDFILES  : 0); 
+ +  Flags = Flags | (opt2parg_bSet("-append", asize(pa),pa) ? MD_APPENDFILESSET : 0); 
+ +  Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0); 
+ +  Flags = Flags | (sim_part>1    ? MD_STARTFROMCPT : 0); 
+ +  Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
+ +
+ +
+ +  /* We postpone opening the log file if we are appending, so we can 
+ +     first truncate the old log file and append to the correct position 
+ +     there instead.  */
+ +  if ((MASTER(cr) || bSepPot) && !bAppendFiles) 
+ +  {
+ +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,
+ +                   !bSepPot,Flags & MD_APPENDFILES,&fplog);
+ +      CopyRight(fplog,argv[0]);
+ +      please_cite(fplog,"Hess2008b");
+ +      please_cite(fplog,"Spoel2005a");
+ +      please_cite(fplog,"Lindahl2001a");
+ +      please_cite(fplog,"Berendsen95a");
+ +  }
+ +  else if (!MASTER(cr) && bSepPot)
+ +  {
+ +      gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
+ +  }
+ +  else
+ +  {
+ +      fplog = NULL;
+ +  }
+ +
+ +  ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
+ +  ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
+ +  ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
+ +
+ +  rc = mdrunner(&hw_opt, fplog,cr,NFILE,fnm,oenv,bVerbose,bCompact,
+ +                nstglobalcomm, ddxyz,dd_node_order,rdd,rconstr,
+ +                dddlb_opt[0],dlb_scale,ddcsx,ddcsy,ddcsz,
+ +                nbpu_opt[0],
+ +                nsteps,nstepout,resetstep,
+ +                nmultisim,repl_ex_nst,repl_ex_nex,repl_ex_seed,
+ +                pforce, cpt_period,max_hours,deviceOptions,Flags);
+ +
+ +  gmx_finalize_par();
+ +
+ +  if (MULTIMASTER(cr)) {
+ +      thanx(stderr);
+ +  }
+ +
+ +  /* Log file has to be closed in mdrunner if we are appending to it 
+ +     (fplog not set here) */
+ +  if (MASTER(cr) && !bAppendFiles) 
+ +  {
+ +      gmx_log_close(fplog);
+ +  }
+ +
+ +  return rc;
+ +}
+ +
diff --cc src/programs/mdrun/runner.c

index 40b48cd6914f1679bfd31959a783fa7cce749660,0000000000000000000000000000000000000000..6b712de3d3e11313d454aebac6da739a375164a2

mode 100644,000000..100644
--- 1/src/programs/mdrun/runner.c
--- /dev/null
+++ b/src/programs/mdrun/runner.c
@@@ -1,1990 -1,0 +1,1683 @@@
- #if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_GETAFFINITY)
- #define _GNU_SOURCE
- #include <sched.h>
- #include <sys/syscall.h>
- #endif
+ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ + *
+ + * 
+ + *                This source code is part of
+ + * 
+ + *                 G   R   O   M   A   C   S
+ + * 
+ + *          GROningen MAchine for Chemical Simulations
+ + * 
+ + *                        VERSION 3.2.0
+ + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ + * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ + * Copyright (c) 2001-2004, The GROMACS development team,
+ + * check out http://www.gromacs.org for more information.
+ +
+ + * This program is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU General Public License
+ + * as published by the Free Software Foundation; either version 2
+ + * of the License, or (at your option) any later version.
+ + * 
+ + * If you want to redistribute modifications, please consider that
+ + * scientific software is very special. Version control is crucial -
+ + * bugs must be traceable. We will be happy to consider code for
+ + * inclusion in the official distribution, but derived work must not
+ + * be called official GROMACS. Details are found in the README & COPYING
+ + * files - if they are missing, get the official version at www.gromacs.org.
+ + * 
+ + * To help us fund GROMACS development, we humbly ask that you cite
+ + * the papers on the package - you can find them in the top README file.
+ + * 
+ + * For more info, check our website at http://www.gromacs.org
+ + * 
+ + * And Hey:
+ + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ + */
+ +#ifdef HAVE_CONFIG_H
+ +#include <config.h>
+ +#endif
- 
- #include "thread_mpi/threads.h"
+ +#include <signal.h>
+ +#include <stdlib.h>
+ +#ifdef HAVE_UNISTD_H
+ +#include <unistd.h>
+ +#endif
+ +#include <string.h>
+ +#include <assert.h>
+ +
+ +#include "typedefs.h"
+ +#include "smalloc.h"
+ +#include "sysstuff.h"
+ +#include "statutil.h"
+ +#include "mdrun.h"
+ +#include "md_logging.h"
+ +#include "md_support.h"
+ +#include "network.h"
+ +#include "pull.h"
+ +#include "pull_rotation.h"
+ +#include "names.h"
+ +#include "disre.h"
+ +#include "orires.h"
+ +#include "pme.h"
+ +#include "mdatoms.h"
+ +#include "repl_ex.h"
+ +#include "qmmm.h"
+ +#include "domdec.h"
+ +#include "partdec.h"
+ +#include "coulomb.h"
+ +#include "constr.h"
+ +#include "mvdata.h"
+ +#include "checkpoint.h"
+ +#include "mtop_util.h"
+ +#include "sighandler.h"
+ +#include "tpxio.h"
+ +#include "txtdump.h"
+ +#include "gmx_detect_hardware.h"
+ +#include "gmx_omp_nthreads.h"
+ +#include "pull_rotation.h"
+ +#include "calc_verletbuf.h"
+ +#include "../mdlib/nbnxn_search.h"
+ +#include "../mdlib/nbnxn_consts.h"
+ +#include "gmx_fatal_collective.h"
+ +#include "membed.h"
+ +#include "macros.h"
+ +#include "gmx_omp.h"
-        the main thread returns */
-     ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi,
-                      (hw_opt->bThreadPinning ? TMPI_AFFINITY_ALL_CORES : TMPI_AFFINITY_NONE),
++#include "gmx_thread_affinity.h"
+ +
+ +#ifdef GMX_LIB_MPI
+ +#include <mpi.h>
+ +#endif
+ +#ifdef GMX_THREAD_MPI
+ +#include "tmpi.h"
+ +#endif
+ +
+ +#ifdef GMX_FAHCORE
+ +#include "corewrap.h"
+ +#endif
+ +
+ +#include "gpu_utils.h"
+ +#include "nbnxn_cuda_data_mgmt.h"
+ +
+ +typedef struct { 
+ +    gmx_integrator_t *func;
+ +} gmx_intp_t;
+ +
+ +/* The array should match the eI array in include/types/enums.h */
+ +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
+ +
+ +gmx_large_int_t     deform_init_init_step_tpx;
+ +matrix              deform_init_box_tpx;
+ +#ifdef GMX_THREAD_MPI
+ +tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
+ +#endif
+ +
+ +
+ +#ifdef GMX_THREAD_MPI
+ +struct mdrunner_arglist
+ +{
+ +    gmx_hw_opt_t *hw_opt;
+ +    FILE *fplog;
+ +    t_commrec *cr;
+ +    int nfile;
+ +    const t_filenm *fnm;
+ +    output_env_t oenv;
+ +    gmx_bool bVerbose;
+ +    gmx_bool bCompact;
+ +    int nstglobalcomm;
+ +    ivec ddxyz;
+ +    int dd_node_order;
+ +    real rdd;
+ +    real rconstr;
+ +    const char *dddlb_opt;
+ +    real dlb_scale;
+ +    const char *ddcsx;
+ +    const char *ddcsy;
+ +    const char *ddcsz;
+ +    const char *nbpu_opt;
+ +    int nsteps_cmdline;
+ +    int nstepout;
+ +    int resetstep;
+ +    int nmultisim;
+ +    int repl_ex_nst;
+ +    int repl_ex_nex;
+ +    int repl_ex_seed;
+ +    real pforce;
+ +    real cpt_period;
+ +    real max_hours;
+ +    const char *deviceOptions;
+ +    unsigned long Flags;
+ +    int ret; /* return value */
+ +};
+ +
+ +
+ +/* The function used for spawning threads. Extracts the mdrunner() 
+ +   arguments from its one argument and calls mdrunner(), after making
+ +   a commrec. */
+ +static void mdrunner_start_fn(void *arg)
+ +{
+ +    struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
+ +    struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure 
+ +                                        that it's thread-local. This doesn't
+ +                                        copy pointed-to items, of course,
+ +                                        but those are all const. */
+ +    t_commrec *cr;  /* we need a local version of this */
+ +    FILE *fplog=NULL;
+ +    t_filenm *fnm;
+ +
+ +    fnm = dup_tfn(mc.nfile, mc.fnm);
+ +
+ +    cr = init_par_threads(mc.cr);
+ +
+ +    if (MASTER(cr))
+ +    {
+ +        fplog=mc.fplog;
+ +    }
+ +
+ +    mda->ret=mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, 
+ +                      mc.bVerbose, mc.bCompact, mc.nstglobalcomm, 
+ +                      mc.ddxyz, mc.dd_node_order, mc.rdd,
+ +                      mc.rconstr, mc.dddlb_opt, mc.dlb_scale, 
+ +                      mc.ddcsx, mc.ddcsy, mc.ddcsz,
+ +                      mc.nbpu_opt,
+ +                      mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
+ +                      mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, 
+ +                      mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
+ +}
+ +
+ +/* called by mdrunner() to start a specific number of threads (including 
+ +   the main thread) for thread-parallel runs. This in turn calls mdrunner()
+ +   for each thread. 
+ +   All options besides nthreads are the same as for mdrunner(). */
+ +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, 
+ +              FILE *fplog,t_commrec *cr,int nfile, 
+ +              const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +              gmx_bool bCompact, int nstglobalcomm,
+ +              ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ +              const char *dddlb_opt,real dlb_scale,
+ +              const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ +              const char *nbpu_opt,
+ +              int nsteps_cmdline, int nstepout,int resetstep,
+ +              int nmultisim,int repl_ex_nst,int repl_ex_nex, int repl_ex_seed,
+ +              real pforce,real cpt_period, real max_hours, 
+ +              const char *deviceOptions, unsigned long Flags)
+ +{
+ +    int ret;
+ +    struct mdrunner_arglist *mda;
+ +    t_commrec *crn; /* the new commrec */
+ +    t_filenm *fnmn;
+ +
+ +    /* first check whether we even need to start tMPI */
+ +    if (hw_opt->nthreads_tmpi < 2)
+ +    {
+ +        return cr;
+ +    }
+ +
+ +    /* a few small, one-time, almost unavoidable memory leaks: */
+ +    snew(mda,1);
+ +    fnmn=dup_tfn(nfile, fnm);
+ +
+ +    /* fill the data structure to pass as void pointer to thread start fn */
+ +    mda->hw_opt=hw_opt;
+ +    mda->fplog=fplog;
+ +    mda->cr=cr;
+ +    mda->nfile=nfile;
+ +    mda->fnm=fnmn;
+ +    mda->oenv=oenv;
+ +    mda->bVerbose=bVerbose;
+ +    mda->bCompact=bCompact;
+ +    mda->nstglobalcomm=nstglobalcomm;
+ +    mda->ddxyz[XX]=ddxyz[XX];
+ +    mda->ddxyz[YY]=ddxyz[YY];
+ +    mda->ddxyz[ZZ]=ddxyz[ZZ];
+ +    mda->dd_node_order=dd_node_order;
+ +    mda->rdd=rdd;
+ +    mda->rconstr=rconstr;
+ +    mda->dddlb_opt=dddlb_opt;
+ +    mda->dlb_scale=dlb_scale;
+ +    mda->ddcsx=ddcsx;
+ +    mda->ddcsy=ddcsy;
+ +    mda->ddcsz=ddcsz;
+ +    mda->nbpu_opt=nbpu_opt;
+ +    mda->nsteps_cmdline=nsteps_cmdline;
+ +    mda->nstepout=nstepout;
+ +    mda->resetstep=resetstep;
+ +    mda->nmultisim=nmultisim;
+ +    mda->repl_ex_nst=repl_ex_nst;
+ +    mda->repl_ex_nex=repl_ex_nex;
+ +    mda->repl_ex_seed=repl_ex_seed;
+ +    mda->pforce=pforce;
+ +    mda->cpt_period=cpt_period;
+ +    mda->max_hours=max_hours;
+ +    mda->deviceOptions=deviceOptions;
+ +    mda->Flags=Flags;
+ +
+ +    /* now spawn new threads that start mdrunner_start_fn(), while 
- /* Check the process affinity mask. If it is non-zero, something
-  * else has set the affinity, and mdrun should honor that and
-  * not attempt to do its own thread pinning.
-  *
-  * This function should be called twice. Once before the OpenMP
-  * library gets initialized with bAfterOpenMPInit=FALSE (which will
-  * detect affinity set by external tools like taskset), and again
-  * later, after the OpenMP initialization, with bAfterOpenMPInit=TRUE
-  * (which will detect affinity changes made by the OpenMP library).
-  *
-  * Note that this will only work on Linux, because we use a GNU
-  * feature. */
- static void check_cpu_affinity_set(FILE *fplog, const t_commrec *cr,
-                                    gmx_hw_opt_t *hw_opt, int ncpus,
-                                    gmx_bool bAfterOpenmpInit)
- {
- #ifdef HAVE_SCHED_GETAFFINITY
-     cpu_set_t mask_current;
-     int       i, ret, cpu_count, cpu_set;
-     gmx_bool  bAllSet;
- 
-     assert(hw_opt);
-     if (!hw_opt->bThreadPinning)
-     {
-         /* internal affinity setting is off, don't bother checking process affinity */
-         return;
-     }
- 
-     CPU_ZERO(&mask_current);
-     if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
-     {
-         /* failed to query affinity mask, will just return */
-         if (debug)
-         {
-             fprintf(debug, "Failed to query affinity mask (error %d)", ret);
-         }
-         return;
-     }
- 
-     /* Before proceeding with the actual check, make sure that the number of
-      * detected CPUs is >= the CPUs in the current set.
-      * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
- #ifdef CPU_COUNT
-     if (ncpus < CPU_COUNT(&mask_current))
-     {
-         if (debug)
-         {
-             fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT",
-                     ncpus, CPU_COUNT(&mask_current));
-         }
-         return;
-     }
- #endif /* CPU_COUNT */
- 
-     bAllSet = TRUE;
-     for (i = 0; (i < ncpus && i < CPU_SETSIZE); i++)
-     {
-         bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
-     }
- 
-     if (!bAllSet)
-     {
-         if (!bAfterOpenmpInit)
-         {
-             md_print_warn(cr, fplog,
-                           "%s detected a non-default process affinity, "
-                           "so it will not attempt to pin its threads", ShortProgram());
-         }
-         else
-         {
-             md_print_warn(cr, fplog,
-                           "%s detected a non-default process affinity, "
-                           "probably set by the OpenMP library, "
-                           "so it will not attempt to pin its threads", ShortProgram());
-         }
-         hw_opt->bThreadPinning = FALSE;
- 
-         if (debug)
-         {
-             fprintf(debug, "Non-default affinity mask found, mdrun will not pin threads\n");
-         }
-     }
-     else
-     {
-         if (debug)
-         {
-             fprintf(debug, "Default affinity mask found\n");
-         }
-     }
- #endif /* HAVE_SCHED_GETAFFINITY */
- }
- 
- /* Set CPU affinity. Can be important for performance.
-    On some systems (e.g. Cray) CPU Affinity is set by default.
-    But default assigning doesn't work (well) with only some ranks
-    having threads. This causes very low performance.
-    External tools have cumbersome syntax for setting affinity
-    in the case that only some ranks have threads.
-    Thus it is important that GROMACS sets the affinity internally
-    if only PME is using threads.
- */
- static void set_cpu_affinity(FILE *fplog,
-                              const t_commrec *cr,
-                              gmx_hw_opt_t *hw_opt,
-                              int nthreads_pme,
-                              const gmx_hw_info_t *hwinfo,
-                              const t_inputrec *inputrec)
- {
- #if defined GMX_THREAD_MPI
-     /* With the number of TMPI threads equal to the number of cores
-      * we already pinned in thread-MPI, so don't pin again here.
-      */
-     if (hw_opt->nthreads_tmpi == tMPI_Thread_get_hw_number())
-     {
-         return;
-     }
- #endif
- 
- #ifndef __APPLE__
-     /* If the tMPI thread affinity setting is not supported encourage the user
-      * to report it as it's either a bug or an exotic platform which we might
-      * want to support. */
-     if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES)
-     {
-         md_print_warn(NULL, fplog,
-                       "Can not set thread affinities on the current plarform. On NUMA systems this\n"
-                       "can cause performance degradation. If you think your platform should support\n"
-                       "setting affinities, contact the GROMACS developers.");
-         return;
-     }
- #endif /* __APPLE__ */
- 
-     if (hw_opt->bThreadPinning)
-     {
-         int nth_affinity_set, thread_id_node, thread_id,
-             nthread_local, nthread_node, nthread_hw_max, nphyscore;
-         int offset;
-         char *env;
- 
-         /* threads on this MPI process or TMPI thread */
-         if (cr->duty & DUTY_PP)
-         {
-             nthread_local = gmx_omp_nthreads_get(emntNonbonded);
-         }
-         else
-         {
-             nthread_local = gmx_omp_nthreads_get(emntPME);
-         }
- 
-         /* map the current process to cores */
-         thread_id_node = 0;
-         nthread_node = nthread_local;
- #ifdef GMX_MPI
-         if (PAR(cr) || MULTISIM(cr))
-         {
-             /* We need to determine a scan of the thread counts in this
-              * compute node.
-              */
-             MPI_Comm comm_intra;
- 
-             MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->rank_intranode,
-                            &comm_intra);
-             MPI_Scan(&nthread_local,&thread_id_node,1,MPI_INT,MPI_SUM,comm_intra);
-             /* MPI_Scan is inclusive, but here we need exclusive */
-             thread_id_node -= nthread_local;
-             /* Get the total number of threads on this physical node */
-             MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
-             MPI_Comm_free(&comm_intra);
-         }
- #endif
- 
-         offset = 0;
-         if (hw_opt->core_pinning_offset > 0)
-         {
-             offset = hw_opt->core_pinning_offset;
-             if (SIMMASTER(cr))
-             {
-                 fprintf(stderr, "Applying core pinning offset %d\n", offset);
-             }
-             if (fplog)
-             {
-                 fprintf(fplog, "Applying core pinning offset %d\n", offset);
-             }
-         }
- 
-         /* With Intel Hyper-Threading enabled, we want to pin consecutive
-          * threads to physical cores when using more threads than physical
-          * cores or when the user requests so.
-          */
-         nthread_hw_max = hwinfo->nthreads_hw_avail;
-         nphyscore = -1;
-         if (hw_opt->bPinHyperthreading ||
-             (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
-              nthread_node > nthread_hw_max/2 && getenv("GMX_DISABLE_PINHT") == NULL))
-         {
-             if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) != GMX_CPUID_X86_SMT_ENABLED)
-             {
-                 /* We print to stderr on all processes, as we might have
-                  * different settings on different physical nodes.
-                  */
-                 if (gmx_cpuid_vendor(hwinfo->cpuid_info) != GMX_CPUID_VENDOR_INTEL)
-                 {
-                     md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
-                                   "but non-Intel CPU detected (vendor: %s)\n",
-                                   gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
-                 }
-                 else
-                 {
-                     md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
-                                   "but the CPU detected does not have Intel Hyper-Threading support "
-                                   "(or it is turned off)\n");
-                 }
-             }
-             nphyscore = nthread_hw_max/2;
- 
-             if (SIMMASTER(cr))
-             {
-                 fprintf(stderr, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
-                         nphyscore);
-             }
-             if (fplog)
-             {
-                 fprintf(fplog, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
-                         nphyscore);
-             }
-         }
- 
-         /* Set the per-thread affinity. In order to be able to check the success
-          * of affinity settings, we will set nth_affinity_set to 1 on threads
-          * where the affinity setting succeded and to 0 where it failed.
-          * Reducing these 0/1 values over the threads will give the total number
-          * of threads on which we succeeded.
-          */
-          nth_affinity_set = 0;
- #pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \
-                      reduction(+:nth_affinity_set)
-         {
-             int      core;
-             gmx_bool setaffinity_ret;
- 
-             thread_id       = gmx_omp_get_thread_num();
-             thread_id_node += thread_id;
-             if (nphyscore <= 0)
-             {
-                 core = offset + thread_id_node;
-             }
-             else
-             {
-                 /* Lock pairs of threads to the same hyperthreaded core */
-                 core = offset + thread_id_node/2 + (thread_id_node % 2)*nphyscore;
-             }
- 
-             setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
- 
-             /* store the per-thread success-values of the setaffinity */
-             nth_affinity_set = (setaffinity_ret == 0);
- 
-             if (debug)
-             {
-                 fprintf(debug, "On rank %2d, thread %2d, core %2d the affinity setting returned %d\n",
-                         cr->nodeid, gmx_omp_get_thread_num(), core, setaffinity_ret);
-             }
-         }
- 
-         if (nth_affinity_set > nthread_local)
-         {
-             char msg[STRLEN];
- 
-             sprintf(msg, "Looks like we have set affinity for more threads than "
-                     "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
-             gmx_incons(msg);
-         }
-         else
-         {
-             /* check & warn if some threads failed to set their affinities */
-             if (nth_affinity_set != nthread_local)
-             {
-                 char sbuf1[STRLEN], sbuf2[STRLEN];
- 
-                 /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
-                 sbuf1[0] = sbuf2[0] = '\0';
- #ifdef GMX_MPI
- #ifdef GMX_THREAD_MPI
-                 sprintf(sbuf1, "In thread-MPI thread #%d: ", cr->nodeid);
- #else /* GMX_LIB_MPI */
-                 sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
- #endif
- #endif /* GMX_MPI */
- 
-                 if (nthread_local > 1)
-                 {
-                     sprintf(sbuf2, "of %d/%d thread%s ",
-                             nthread_local - nth_affinity_set, nthread_local,
-                             (nthread_local - nth_affinity_set) > 1 ? "s" : "");
-                 }
- 
-                 md_print_warn(NULL, fplog,
-                               "NOTE: %sAffinity setting %sfailed.\n"
-                               "      This can cause performance degradation!",
-                               sbuf1, sbuf2);
-             }
-         }
-     }
- }
- 
- 
++       the main thread returns, we set thread affinity later */
++    ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE,
+ +                     mdrunner_start_fn, (void*)(mda) );
+ +    if (ret!=TMPI_SUCCESS)
+ +        return NULL;
+ +
+ +    /* make a new comm_rec to reflect the new situation */
+ +    crn=init_par_threads(cr);
+ +    return crn;
+ +}
+ +
+ +
+ +static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
+ +                                        const gmx_hw_opt_t *hw_opt,
+ +                                        int nthreads_tot,
+ +                                        int ngpu)
+ +{
+ +    int nthreads_tmpi;
+ +
+ +    /* There are no separate PME nodes here, as we ensured in
+ +     * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
+ +     * and a conditional ensures we would not have ended up here.
+ +     * Note that separate PME nodes might be switched on later.
+ +     */
+ +    if (ngpu > 0)
+ +    {
+ +        nthreads_tmpi = ngpu;
+ +        if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
+ +        {
+ +            nthreads_tmpi = nthreads_tot;
+ +        }
+ +    }
+ +    else if (hw_opt->nthreads_omp > 0)
+ +    {
+ +        /* Here we could oversubscribe, when we do, we issue a warning later */
+ +        nthreads_tmpi = max(1,nthreads_tot/hw_opt->nthreads_omp);
+ +    }
+ +    else
+ +    {
+ +        /* TODO choose nthreads_omp based on hardware topology
+ +           when we have a hardware topology detection library */
+ +        /* In general, when running up to 4 threads, OpenMP should be faster.
+ +         * Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
+ +         * On Intel>=Nehalem running OpenMP on a single CPU is always faster,
+ +         * even on two CPUs it's usually faster (but with many OpenMP threads
+ +         * it could be faster not to use HT, currently we always use HT).
+ +         * On Nehalem/Westmere we want to avoid running 16 threads over
+ +         * two CPUs with HT, so we need a limit<16; thus we use 12.
+ +         * A reasonable limit for Intel Sandy and Ivy bridge,
+ +         * not knowing the topology, is 16 threads.
+ +         */
+ +        const int nthreads_omp_always_faster             =  4;
+ +        const int nthreads_omp_always_faster_Nehalem     = 12;
+ +        const int nthreads_omp_always_faster_SandyBridge = 16;
+ +        const int first_model_Nehalem     = 0x1A;
+ +        const int first_model_SandyBridge = 0x2A;
+ +        gmx_bool bIntel_Family6;
+ +
+ +        bIntel_Family6 =
+ +            (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
+ +             gmx_cpuid_family(hwinfo->cpuid_info) == 6);
+ +
+ +        if (nthreads_tot <= nthreads_omp_always_faster ||
+ +            (bIntel_Family6 &&
+ +             ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
+ +              (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
+ +        {
+ +            /* Use pure OpenMP parallelization */
+ +            nthreads_tmpi = 1;
+ +        }
+ +        else
+ +        {
+ +            /* Don't use OpenMP parallelization */
+ +            nthreads_tmpi = nthreads_tot;
+ +        }
+ +    }
+ +
+ +    return nthreads_tmpi;
+ +}
+ +
+ +
+ +/* Get the number of threads to use for thread-MPI based on how many
+ + * were requested, which algorithms we're using,
+ + * and how many particles there are.
+ + * At the point we have already called check_and_update_hw_opt.
+ + * Thus all options should be internally consistent and consistent
+ + * with the hardware, except that ntmpi could be larger than #GPU.
+ + */
+ +static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
+ +                            gmx_hw_opt_t *hw_opt,
+ +                            t_inputrec *inputrec, gmx_mtop_t *mtop,
+ +                            const t_commrec *cr,
+ +                            FILE *fplog)
+ +{
+ +    int nthreads_hw,nthreads_tot_max,nthreads_tmpi,nthreads_new,ngpu;
+ +    int min_atoms_per_mpi_thread;
+ +    char *env;
+ +    char sbuf[STRLEN];
+ +    gmx_bool bCanUseGPU;
+ +
+ +    if (hw_opt->nthreads_tmpi > 0)
+ +    {
+ +        /* Trivial, return right away */
+ +        return hw_opt->nthreads_tmpi;
+ +    }
+ +
+ +    nthreads_hw = hwinfo->nthreads_hw_avail;
+ +
+ +    /* How many total (#tMPI*#OpenMP) threads can we start? */ 
+ +    if (hw_opt->nthreads_tot > 0)
+ +    {
+ +        nthreads_tot_max = hw_opt->nthreads_tot;
+ +    }
+ +    else
+ +    {
+ +        nthreads_tot_max = nthreads_hw;
+ +    }
+ +
+ +    bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
+ +    if (bCanUseGPU)
+ +    {
+ +        ngpu = hwinfo->gpu_info.ncuda_dev_use;
+ +    }
+ +    else
+ +    {
+ +        ngpu = 0;
+ +    }
+ +
+ +    nthreads_tmpi =
+ +        get_tmpi_omp_thread_division(hwinfo,hw_opt,nthreads_tot_max,ngpu);
+ +
+ +    if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
+ +    {
+ +        /* Steps are divided over the nodes iso splitting the atoms */
+ +        min_atoms_per_mpi_thread = 0;
+ +    }
+ +    else
+ +    {
+ +        if (bCanUseGPU)
+ +        {
+ +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
+ +        }
+ +        else
+ +        {
+ +            min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
+ +        }
+ +    }
+ +
+ +    /* Check if an algorithm does not support parallel simulation.  */
+ +    if (nthreads_tmpi != 1 &&
+ +        ( inputrec->eI == eiLBFGS ||
+ +          inputrec->coulombtype == eelEWALD ) )
+ +    {
+ +        nthreads_tmpi = 1;
+ +
+ +        md_print_warn(cr,fplog,"The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
+ +        if (hw_opt->nthreads_tmpi > nthreads_tmpi)
+ +        {
+ +            gmx_fatal(FARGS,"You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
+ +        }
+ +    }
+ +    else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
+ +    {
+ +        /* the thread number was chosen automatically, but there are too many
+ +           threads (too few atoms per thread) */
+ +        nthreads_new = max(1,mtop->natoms/min_atoms_per_mpi_thread);
+ +
+ +        /* Avoid partial use of Hyper-Threading */
+ +        if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
+ +            nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw)
+ +        {
+ +            nthreads_new = nthreads_hw/2;
+ +        }
+ +
+ +        /* Avoid large prime numbers in the thread count */
+ +        if (nthreads_new >= 6)
+ +        {
+ +            /* Use only 6,8,10 with additional factors of 2 */
+ +            int fac;
+ +
+ +            fac = 2;
+ +            while (3*fac*2 <= nthreads_new)
+ +            {
+ +                fac *= 2;
+ +            }
+ +
+ +            nthreads_new = (nthreads_new/fac)*fac;
+ +        }
+ +        else
+ +        {
+ +            /* Avoid 5 */
+ +            if (nthreads_new == 5)
+ +            {
+ +                nthreads_new = 4;
+ +            }
+ +        }
+ +
+ +        nthreads_tmpi = nthreads_new;
+ +
+ +        fprintf(stderr,"\n");
+ +        fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
+ +        fprintf(stderr,"      only starting %d thread-MPI threads.\n",nthreads_tmpi);
+ +        fprintf(stderr,"      You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
+ +    }
+ +
+ +    return nthreads_tmpi;
+ +}
+ +#endif /* GMX_THREAD_MPI */
+ +
+ +
+ +/* Environment variable for setting nstlist */
+ +static const char*  NSTLIST_ENVVAR          =  "GMX_NSTLIST";
+ +/* Try to increase nstlist when using a GPU with nstlist less than this */
+ +static const int    NSTLIST_GPU_ENOUGH      = 20;
+ +/* Increase nstlist until the non-bonded cost increases more than this factor */
+ +static const float  NBNXN_GPU_LIST_OK_FAC   = 1.25;
+ +/* Don't increase nstlist beyond a non-bonded cost increases of this factor */
+ +static const float  NBNXN_GPU_LIST_MAX_FAC  = 1.40;
+ +
+ +/* Try to increase nstlist when running on a GPU */
+ +static void increase_nstlist(FILE *fp,t_commrec *cr,
+ +                             t_inputrec *ir,const gmx_mtop_t *mtop,matrix box)
+ +{
+ +    char *env;
+ +    int  nstlist_orig,nstlist_prev;
+ +    verletbuf_list_setup_t ls;
+ +    real rlist_inc,rlist_ok,rlist_max,rlist_new,rlist_prev;
+ +    int  i;
+ +    t_state state_tmp;
+ +    gmx_bool bBox,bDD,bCont;
+ +    const char *nstl_fmt="\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
+ +    const char *vbd_err="Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used";
+ +    const char *box_err="Can not increase nstlist for GPU run because the box is too small";
+ +    const char *dd_err ="Can not increase nstlist for GPU run because of domain decomposition limitations";
+ +    char buf[STRLEN];
+ +
+ +    /* Number of + nstlist alternative values to try when switching  */
+ +    const int nstl[]={ 20, 25, 40, 50 };
+ +#define NNSTL  sizeof(nstl)/sizeof(nstl[0])
+ +
+ +    env = getenv(NSTLIST_ENVVAR);
+ +    if (env == NULL)
+ +    {
+ +        if (fp != NULL)
+ +        {
+ +            fprintf(fp,nstl_fmt,ir->nstlist);
+ +        }
+ +    }
+ +
+ +    if (ir->verletbuf_drift == 0)
+ +    {
+ +        gmx_fatal(FARGS,"You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
+ +    }
+ +
+ +    if (ir->verletbuf_drift < 0)
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            fprintf(stderr,"%s\n",vbd_err);
+ +        }
+ +        if (fp != NULL)
+ +        {
+ +            fprintf(fp,"%s\n",vbd_err);
+ +        }
+ +
+ +        return;
+ +    }
+ +
+ +    nstlist_orig = ir->nstlist;
+ +    if (env != NULL)
+ +    {
+ +        sprintf(buf,"Getting nstlist from environment variable GMX_NSTLIST=%s",env);
+ +        if (MASTER(cr))
+ +        {
+ +            fprintf(stderr,"%s\n",buf);
+ +        }
+ +        if (fp != NULL)
+ +        {
+ +            fprintf(fp,"%s\n",buf);
+ +        }
+ +        sscanf(env,"%d",&ir->nstlist);
+ +    }
+ +
+ +    verletbuf_get_list_setup(TRUE,&ls);
+ +
+ +    /* Allow rlist to make the list double the size of the cut-off sphere */
+ +    rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE,mtop->natoms/det(box));
+ +    rlist_ok  = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC,1.0/3.0) - rlist_inc;
+ +    rlist_max = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC,1.0/3.0) - rlist_inc;
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n",
+ +                rlist_inc,rlist_max);
+ +    }
+ +
+ +    i = 0;
+ +    nstlist_prev = nstlist_orig;
+ +    rlist_prev   = ir->rlist;
+ +    do
+ +    {
+ +        if (env == NULL)
+ +        {
+ +            ir->nstlist = nstl[i];
+ +        }
+ +
+ +        /* Set the pair-list buffer size in ir */
+ +        calc_verlet_buffer_size(mtop,det(box),ir,ir->verletbuf_drift,&ls,
+ +                                NULL,&rlist_new);
+ +
+ +        /* Does rlist fit in the box? */
+ +        bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC,box));
+ +        bDD  = TRUE;
+ +        if (bBox && DOMAINDECOMP(cr))
+ +        {
+ +            /* Check if rlist fits in the domain decomposition */
+ +            if (inputrec2nboundeddim(ir) < DIM)
+ +            {
+ +                gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
+ +            }
+ +            copy_mat(box,state_tmp.box);
+ +            bDD = change_dd_cutoff(cr,&state_tmp,ir,rlist_new);
+ +        }
+ +
+ +        bCont = FALSE;
+ +
+ +        if (env == NULL)
+ +        {
+ +            if (bBox && bDD && rlist_new <= rlist_max)
+ +            {
+ +                /* Increase nstlist */
+ +                nstlist_prev = ir->nstlist;
+ +                rlist_prev   = rlist_new;
+ +                bCont = (i+1 < NNSTL && rlist_new < rlist_ok);
+ +            }
+ +            else
+ +            {
+ +                /* Stick with the previous nstlist */
+ +                ir->nstlist = nstlist_prev;
+ +                rlist_new   = rlist_prev;
+ +                bBox = TRUE;
+ +                bDD  = TRUE;
+ +            }
+ +        }
+ +
+ +        i++;
+ +    }
+ +    while (bCont);
+ +
+ +    if (!bBox || !bDD)
+ +    {
+ +        gmx_warning(!bBox ? box_err : dd_err);
+ +        if (fp != NULL)
+ +        {
+ +            fprintf(fp,"\n%s\n",bBox ? box_err : dd_err);
+ +        }
+ +        ir->nstlist = nstlist_orig;
+ +    }
+ +    else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
+ +    {
+ +        sprintf(buf,"Changing nstlist from %d to %d, rlist from %g to %g",
+ +                nstlist_orig,ir->nstlist,
+ +                ir->rlist,rlist_new);
+ +        if (MASTER(cr))
+ +        {
+ +            fprintf(stderr,"%s\n\n",buf);
+ +        }
+ +        if (fp != NULL)
+ +        {
+ +            fprintf(fp,"%s\n\n",buf);
+ +        }
+ +        ir->rlist     = rlist_new;
+ +        ir->rlistlong = rlist_new;
+ +    }
+ +}
+ +
+ +static void prepare_verlet_scheme(FILE *fplog,
+ +                                  gmx_hw_info_t *hwinfo,
+ +                                  t_commrec *cr,
+ +                                  gmx_hw_opt_t *hw_opt,
+ +                                  const char *nbpu_opt,
+ +                                  t_inputrec *ir,
+ +                                  const gmx_mtop_t *mtop,
+ +                                  matrix box,
+ +                                  gmx_bool *bUseGPU)
+ +{
+ +    /* Here we only check for GPU usage on the MPI master process,
+ +     * as here we don't know how many GPUs we will use yet.
+ +     * We check for a GPU on all processes later.
+ +     */
+ +    *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
+ +
+ +    if (ir->verletbuf_drift > 0)
+ +    {
+ +        /* Update the Verlet buffer size for the current run setup */
+ +        verletbuf_list_setup_t ls;
+ +        real rlist_new;
+ +
+ +        /* Here we assume CPU acceleration is on. But as currently
+ +         * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
+ +         * and 4x2 gives a larger buffer than 4x4, this is ok.
+ +         */
+ +        verletbuf_get_list_setup(*bUseGPU,&ls);
+ +
+ +        calc_verlet_buffer_size(mtop,det(box),ir,
+ +                                ir->verletbuf_drift,&ls,
+ +                                NULL,&rlist_new);
+ +        if (rlist_new != ir->rlist)
+ +        {
+ +            if (fplog != NULL)
+ +            {
+ +                fprintf(fplog,"\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
+ +                        ir->rlist,rlist_new,
+ +                        ls.cluster_size_i,ls.cluster_size_j);
+ +            }
+ +            ir->rlist     = rlist_new;
+ +            ir->rlistlong = rlist_new;
+ +        }
+ +    }
+ +
+ +    /* With GPU or emulation we should check nstlist for performance */
+ +    if ((EI_DYNAMICS(ir->eI) &&
+ +         *bUseGPU &&
+ +         ir->nstlist < NSTLIST_GPU_ENOUGH) ||
+ +        getenv(NSTLIST_ENVVAR) != NULL)
+ +    {
+ +        /* Choose a better nstlist */
+ +        increase_nstlist(fplog,cr,ir,mtop,box);
+ +    }
+ +}
+ +
+ +static void convert_to_verlet_scheme(FILE *fplog,
+ +                                     t_inputrec *ir,
+ +                                     gmx_mtop_t *mtop,real box_vol)
+ +{
+ +    char *conv_mesg="Converting input file with group cut-off scheme to the Verlet cut-off scheme";
+ +
+ +    md_print_warn(NULL,fplog,"%s\n",conv_mesg);
+ +
+ +    ir->cutoff_scheme   = ecutsVERLET;
+ +    ir->verletbuf_drift = 0.005;
+ +
+ +    if (ir->rcoulomb != ir->rvdw)
+ +    {
+ +        gmx_fatal(FARGS,"The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
+ +    }
+ +
+ +    if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
+ +    {
+ +        gmx_fatal(FARGS,"User non-bonded potentials are not (yet) supported with the Verlet scheme");
+ +    }
+ +    else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
+ +    {
+ +        md_print_warn(NULL,fplog,"Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
+ +
+ +        if (EVDW_SWITCHED(ir->vdwtype))
+ +        {
+ +            ir->vdwtype = evdwCUT;
+ +        }
+ +        if (EEL_SWITCHED(ir->coulombtype))
+ +        {
+ +            if (EEL_FULL(ir->coulombtype))
+ +            {
+ +                /* With full electrostatic only PME can be switched */
+ +                ir->coulombtype = eelPME;
+ +            }
+ +            else
+ +            {
+ +                md_print_warn(NULL,fplog,"NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n",eel_names[ir->coulombtype]);
+ +                ir->coulombtype = eelRF;
+ +                ir->epsilon_rf  = 0.0;
+ +            }
+ +        }
+ +
+ +        /* We set the target energy drift to a small number.
+ +         * Note that this is only for testing. For production the user
+ +         * should think about this and set the mdp options.
+ +         */
+ +        ir->verletbuf_drift = 1e-4;
+ +    }
+ +
+ +    if (inputrec2nboundeddim(ir) != 3)
+ +    {
+ +        gmx_fatal(FARGS,"Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
+ +    }
+ +
+ +    if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
+ +    {
+ +        gmx_fatal(FARGS,"Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
+ +    }
+ +
+ +    if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
+ +    {
+ +        verletbuf_list_setup_t ls;
+ +
+ +        verletbuf_get_list_setup(FALSE,&ls);
+ +        calc_verlet_buffer_size(mtop,box_vol,ir,ir->verletbuf_drift,&ls,
+ +                                NULL,&ir->rlist);
+ +    }
+ +    else
+ +    {
+ +        ir->verletbuf_drift = -1;
+ +        ir->rlist           = 1.05*max(ir->rvdw,ir->rcoulomb);
+ +    }
+ +
+ +    gmx_mtop_remove_chargegroups(mtop);
+ +}
+ +
-         check_cpu_affinity_set(fplog,
-                                NULL,
-                                hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+ +static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
+ +                                    int cutoff_scheme,
+ +                                    gmx_bool bIsSimMaster)
+ +{
+ +    gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster);
+ +
+ +#ifndef GMX_THREAD_MPI
+ +    if (hw_opt->nthreads_tot > 0)
+ +    {
+ +        gmx_fatal(FARGS,"Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
+ +    }
+ +    if (hw_opt->nthreads_tmpi > 0)
+ +    {
+ +        gmx_fatal(FARGS,"Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
+ +    }
+ +#endif
+ +
+ +    if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
+ +    {
+ +        /* We have the same number of OpenMP threads for PP and PME processes,
+ +         * thus we can perform several consistency checks.
+ +         */
+ +        if (hw_opt->nthreads_tmpi > 0 &&
+ +            hw_opt->nthreads_omp > 0 &&
+ +            hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
+ +        {
+ +            gmx_fatal(FARGS,"The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
+ +                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi,hw_opt->nthreads_omp);
+ +        }
+ +
+ +        if (hw_opt->nthreads_tmpi > 0 &&
+ +            hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
+ +        {
+ +            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
+ +                      hw_opt->nthreads_tot,hw_opt->nthreads_tmpi);
+ +        }
+ +
+ +        if (hw_opt->nthreads_omp > 0 &&
+ +            hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
+ +        {
+ +            gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
+ +                      hw_opt->nthreads_tot,hw_opt->nthreads_omp);
+ +        }
+ +
+ +        if (hw_opt->nthreads_tmpi > 0 &&
+ +            hw_opt->nthreads_omp <= 0)
+ +        {
+ +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
+ +        }
+ +    }
+ +
+ +#ifndef GMX_OPENMP
+ +    if (hw_opt->nthreads_omp > 1)
+ +    {
+ +        gmx_fatal(FARGS,"OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
+ +    }
+ +#endif
+ +
+ +    if (cutoff_scheme == ecutsGROUP)
+ +    {
+ +        /* We only have OpenMP support for PME only nodes */
+ +        if (hw_opt->nthreads_omp > 1)
+ +        {
+ +            gmx_fatal(FARGS,"OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
+ +                      ecutscheme_names[cutoff_scheme],
+ +                      ecutscheme_names[ecutsVERLET]);
+ +        }
+ +        hw_opt->nthreads_omp = 1;
+ +    }
+ +
+ +    if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
+ +    {
+ +        gmx_fatal(FARGS,"You need to specify -ntomp in addition to -ntomp_pme");
+ +    }
+ +
+ +    if (hw_opt->nthreads_tot == 1)
+ +    {
+ +        hw_opt->nthreads_tmpi = 1;
+ +
+ +        if (hw_opt->nthreads_omp > 1)
+ +        {
+ +            gmx_fatal(FARGS,"You requested %d OpenMP threads with %d total threads",
+ +                      hw_opt->nthreads_tmpi,hw_opt->nthreads_tot);
+ +        }
+ +        hw_opt->nthreads_omp = 1;
+ +    }
+ +
+ +    if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
+ +    {
+ +        hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
+ +    }
+ +
+ +    if (debug)
+ +    {
+ +        fprintf(debug,"hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
+ +                hw_opt->nthreads_tot,
+ +                hw_opt->nthreads_tmpi,
+ +                hw_opt->nthreads_omp,
+ +                hw_opt->nthreads_omp_pme,
+ +                hw_opt->gpu_id!=NULL ? hw_opt->gpu_id : "");
+ +                
+ +    }
+ +}
+ +
+ +
+ +/* Override the value in inputrec with value passed on the command line (if any) */
+ +static void override_nsteps_cmdline(FILE *fplog,
+ +                                    int nsteps_cmdline,
+ +                                    t_inputrec *ir,
+ +                                    const t_commrec *cr)
+ +{
+ +    assert(ir);
+ +    assert(cr);
+ +
+ +    /* override with anything else than the default -2 */
+ +    if (nsteps_cmdline > -2)
+ +    {
+ +        char stmp[STRLEN];
+ +
+ +        ir->nsteps = nsteps_cmdline;
+ +        if (EI_DYNAMICS(ir->eI))
+ +        {
+ +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps, %.3f ps",
+ +                    nsteps_cmdline, nsteps_cmdline*ir->delta_t);
+ +        }
+ +        else
+ +        {
+ +            sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps",
+ +                    nsteps_cmdline);
+ +        }
+ +
+ +        md_print_warn(cr, fplog, "%s\n", stmp);
+ +    }
+ +}
+ +
+ +/* Data structure set by SIMMASTER which needs to be passed to all nodes
+ + * before the other nodes have read the tpx file and called gmx_detect_hardware.
+ + */
+ +typedef struct {
+ +    int cutoff_scheme; /* The cutoff scheme from inputrec_t */
+ +    gmx_bool bUseGPU;       /* Use GPU or GPU emulation          */
+ +} master_inf_t;
+ +
+ +int mdrunner(gmx_hw_opt_t *hw_opt,
+ +             FILE *fplog,t_commrec *cr,int nfile,
+ +             const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ +             gmx_bool bCompact, int nstglobalcomm,
+ +             ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ +             const char *dddlb_opt,real dlb_scale,
+ +             const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ +             const char *nbpu_opt,
+ +             int nsteps_cmdline, int nstepout,int resetstep,
+ +             int nmultisim,int repl_ex_nst,int repl_ex_nex,
+ +             int repl_ex_seed, real pforce,real cpt_period,real max_hours,
+ +             const char *deviceOptions, unsigned long Flags)
+ +{
+ +    gmx_bool   bForceUseGPU,bTryUseGPU;
+ +    double     nodetime=0,realtime;
+ +    t_inputrec *inputrec;
+ +    t_state    *state=NULL;
+ +    matrix     box;
+ +    gmx_ddbox_t ddbox={0};
+ +    int        npme_major,npme_minor;
+ +    real       tmpr1,tmpr2;
+ +    t_nrnb     *nrnb;
+ +    gmx_mtop_t *mtop=NULL;
+ +    t_mdatoms  *mdatoms=NULL;
+ +    t_forcerec *fr=NULL;
+ +    t_fcdata   *fcd=NULL;
+ +    real       ewaldcoeff=0;
+ +    gmx_pme_t  *pmedata=NULL;
+ +    gmx_vsite_t *vsite=NULL;
+ +    gmx_constr_t constr;
+ +    int        i,m,nChargePerturbed=-1,status,nalloc;
+ +    char       *gro;
+ +    gmx_wallcycle_t wcycle;
+ +    gmx_bool       bReadRNG,bReadEkin;
+ +    int        list;
+ +    gmx_runtime_t runtime;
+ +    int        rc;
+ +    gmx_large_int_t reset_counters;
+ +    gmx_edsam_t ed=NULL;
+ +    t_commrec   *cr_old=cr; 
+ +    int         nthreads_pme=1;
+ +    int         nthreads_pp=1;
+ +    gmx_membed_t membed=NULL;
+ +    gmx_hw_info_t *hwinfo=NULL;
+ +    master_inf_t minf={-1,FALSE};
+ +
+ +    /* CAUTION: threads may be started later on in this function, so
+ +       cr doesn't reflect the final parallel state right now */
+ +    snew(inputrec,1);
+ +    snew(mtop,1);
+ +    
+ +    if (Flags & MD_APPENDFILES) 
+ +    {
+ +        fplog = NULL;
+ +    }
+ +
+ +    bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
+ +    bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
+ +
+ +    snew(state,1);
+ +    if (SIMMASTER(cr)) 
+ +    {
+ +        /* Read (nearly) all data required for the simulation */
+ +        read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
+ +
+ +        if (inputrec->cutoff_scheme != ecutsVERLET &&
+ +            ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
+ +        {
+ +            convert_to_verlet_scheme(fplog,inputrec,mtop,det(state->box));
+ +        }
+ +
+ +        /* Detect hardware, gather information. With tMPI only thread 0 does it
+ +         * and after threads are started broadcasts hwinfo around. */
+ +        snew(hwinfo, 1);
+ +        gmx_detect_hardware(fplog, hwinfo, cr,
+ +                            bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
+ +
+ +        minf.cutoff_scheme = inputrec->cutoff_scheme;
+ +        minf.bUseGPU       = FALSE;
+ +
+ +        if (inputrec->cutoff_scheme == ecutsVERLET)
+ +        {
+ +            prepare_verlet_scheme(fplog,hwinfo,cr,hw_opt,nbpu_opt,
+ +                                  inputrec,mtop,state->box,
+ +                                  &minf.bUseGPU);
+ +        }
+ +        else if (hwinfo->bCanUseGPU)
+ +        {
+ +            md_print_warn(cr,fplog,
+ +                          "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
+ +                          "      To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
+ +                          "      (for quick performance testing you can use the -testverlet option)\n");
+ +
+ +            if (bForceUseGPU)
+ +            {
+ +                gmx_fatal(FARGS,"GPU requested, but can't be used without cutoff-scheme=Verlet");
+ +            }
+ +        }
+ +    }
+ +#ifndef GMX_THREAD_MPI
+ +    if (PAR(cr))
+ +    {
+ +        gmx_bcast_sim(sizeof(minf),&minf,cr);
+ +    }
+ +#endif
+ +    if (minf.bUseGPU && cr->npmenodes == -1)
+ +    {
+ +        /* Don't automatically use PME-only nodes with GPUs */
+ +        cr->npmenodes = 0;
+ +    }
+ +
+ +    /* Check for externally set OpenMP affinity and turn off internal
+ +     * pinning if any is found. We need to do this check early to tell
+ +     * thread-MPI whether it should do pinning when spawning threads.
++     * TODO: the above no longer holds, we should move these checks down
+ +     */
+ +    gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
+ +
+ +#ifdef GMX_THREAD_MPI
+ +    /* With thread-MPI inputrec is only set here on the master thread */
+ +    if (SIMMASTER(cr))
+ +#endif
+ +    {
+ +        check_and_update_hw_opt(hw_opt,minf.cutoff_scheme,SIMMASTER(cr));
+ +
+ +#ifdef GMX_THREAD_MPI
+ +        /* Early check for externally set process affinity. Can't do over all
+ +         * MPI processes because hwinfo is not available everywhere, but with
+ +         * thread-MPI it's needed as pinning might get turned off which needs
+ +         * to be known before starting thread-MPI. */
-     check_cpu_affinity_set(fplog, cr,
-                            hw_opt, hwinfo->nthreads_hw_avail, FALSE);
++        gmx_check_thread_affinity_set(fplog,
++                                      NULL,
++                                      hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+ +#endif
+ +
+ +#ifdef GMX_THREAD_MPI
+ +        if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
+ +        {
+ +            gmx_fatal(FARGS,"You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
+ +        }
+ +#endif
+ +
+ +        if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
+ +            cr->npmenodes <= 0)
+ +        {
+ +            gmx_fatal(FARGS,"You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
+ +        }
+ +    }
+ +
+ +#ifdef GMX_THREAD_MPI
+ +    if (SIMMASTER(cr))
+ +    {
+ +        /* NOW the threads will be started: */
+ +        hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
+ +                                                 hw_opt,
+ +                                                 inputrec, mtop,
+ +                                                 cr, fplog);
+ +        if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
+ +        {
+ +            hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
+ +        }
+ +
+ +        if (hw_opt->nthreads_tmpi > 1)
+ +        {
+ +            /* now start the threads. */
+ +            cr=mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, 
+ +                                      oenv, bVerbose, bCompact, nstglobalcomm, 
+ +                                      ddxyz, dd_node_order, rdd, rconstr, 
+ +                                      dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
+ +                                      nbpu_opt,
+ +                                      nsteps_cmdline, nstepout, resetstep, nmultisim, 
+ +                                      repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
+ +                                      cpt_period, max_hours, deviceOptions, 
+ +                                      Flags);
+ +            /* the main thread continues here with a new cr. We don't deallocate
+ +               the old cr because other threads may still be reading it. */
+ +            if (cr == NULL)
+ +            {
+ +                gmx_comm("Failed to spawn threads");
+ +            }
+ +        }
+ +    }
+ +#endif
+ +    /* END OF CAUTION: cr is now reliable */
+ +
+ +    /* g_membed initialisation *
+ +     * Because we change the mtop, init_membed is called before the init_parallel *
+ +     * (in case we ever want to make it run in parallel) */
+ +    if (opt2bSet("-membed",nfile,fnm))
+ +    {
+ +        if (MASTER(cr))
+ +        {
+ +            fprintf(stderr,"Initializing membed");
+ +        }
+ +        membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* now broadcast everything to the non-master nodes/threads: */
+ +        init_parallel(fplog, cr, inputrec, mtop);
+ +
+ +        /* This check needs to happen after get_nthreads_mpi() */
+ +        if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
+ +        {
+ +            gmx_fatal_collective(FARGS,cr,NULL,
+ +                                 "The Verlet cut-off scheme is not supported with particle decomposition.\n"
+ +                                 "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
+ +        }
+ +    }
+ +    if (fplog != NULL)
+ +    {
+ +        pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
+ +    }
+ +
+ +#if defined GMX_THREAD_MPI
+ +    /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
+ +     * to the other threads  -- slightly uncool, but works fine, just need to
+ +     * make sure that the data doesn't get freed twice. */
+ +    if (cr->nnodes > 1)
+ +    {
+ +        if (!SIMMASTER(cr))
+ +        {
+ +            snew(hwinfo, 1);
+ +        }
+ +        gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
+ +    }
+ +#else
+ +    if (PAR(cr) && !SIMMASTER(cr))
+ +    {
+ +        /* now we have inputrec on all nodes, can run the detection */
+ +        /* TODO: perhaps it's better to propagate within a node instead? */
+ +        snew(hwinfo, 1);
+ +        gmx_detect_hardware(fplog, hwinfo, cr,
+ +                                 bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
+ +    }
+ +
+ +    /* Now do the affinity check with MPI/no-MPI (done earlier with thread-MPI). */
-     /* Before setting affinity, check whether the affinity has changed
-      * - which indicates that probably the OpenMP library has changed it since
-      * we first checked). */
-     check_cpu_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, TRUE);
++    gmx_check_thread_affinity_set(fplog, cr,
++                                  hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+ +#endif
+ +
+ +    /* now make sure the state is initialized and propagated */
+ +    set_state_entries(state,inputrec,cr->nnodes);
+ +
+ +    /* A parallel command line option consistency check that we can
+ +       only do after any threads have started. */
+ +    if (!PAR(cr) &&
+ +        (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
+ +    {
+ +        gmx_fatal(FARGS,
+ +                  "The -dd or -npme option request a parallel simulation, "
+ +#ifndef GMX_MPI
+ +                  "but %s was compiled without threads or MPI enabled"
+ +#else
+ +#ifdef GMX_THREAD_MPI
+ +                  "but the number of threads (option -nt) is 1"
+ +#else
+ +                  "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
+ +#endif
+ +#endif
+ +                  , ShortProgram()
+ +            );
+ +    }
+ +
+ +    if ((Flags & MD_RERUN) &&
+ +        (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
+ +    {
+ +        gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
+ +    }
+ +
+ +    if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog) && PAR(cr))
+ +    {
+ +        /* All-vs-all loops do not work with domain decomposition */
+ +        Flags |= MD_PARTDEC;
+ +    }
+ +
+ +    if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
+ +    {
+ +        if (cr->npmenodes > 0)
+ +        {
+ +            if (!EEL_PME(inputrec->coulombtype))
+ +            {
+ +                gmx_fatal_collective(FARGS,cr,NULL,
+ +                                     "PME nodes are requested, but the system does not use PME electrostatics");
+ +            }
+ +            if (Flags & MD_PARTDEC)
+ +            {
+ +                gmx_fatal_collective(FARGS,cr,NULL,
+ +                                     "PME nodes are requested, but particle decomposition does not support separate PME nodes");
+ +            }
+ +        }
+ +
+ +        cr->npmenodes = 0;
+ +    }
+ +
+ +#ifdef GMX_FAHCORE
+ +    fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
+ +#endif
+ +
+ +    /* NMR restraints must be initialized before load_checkpoint,
+ +     * since with time averaging the history is added to t_state.
+ +     * For proper consistency check we therefore need to extend
+ +     * t_state here.
+ +     * So the PME-only nodes (if present) will also initialize
+ +     * the distance restraints.
+ +     */
+ +    snew(fcd,1);
+ +
+ +    /* This needs to be called before read_checkpoint to extend the state */
+ +    init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state, repl_ex_nst > 0);
+ +
+ +    if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
+ +    {
+ +        if (PAR(cr) && !(Flags & MD_PARTDEC))
+ +        {
+ +            gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
+ +        }
+ +        /* Orientation restraints */
+ +        if (MASTER(cr))
+ +        {
+ +            init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
+ +                        state);
+ +        }
+ +    }
+ +
+ +    if (DEFORM(*inputrec))
+ +    {
+ +        /* Store the deform reference box before reading the checkpoint */
+ +        if (SIMMASTER(cr))
+ +        {
+ +            copy_mat(state->box,box);
+ +        }
+ +        if (PAR(cr))
+ +        {
+ +            gmx_bcast(sizeof(box),box,cr);
+ +        }
+ +        /* Because we do not have the update struct available yet
+ +         * in which the reference values should be stored,
+ +         * we store them temporarily in static variables.
+ +         * This should be thread safe, since they are only written once
+ +         * and with identical values.
+ +         */
+ +#ifdef GMX_THREAD_MPI
+ +        tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+ +#endif
+ +        deform_init_init_step_tpx = inputrec->init_step;
+ +        copy_mat(box,deform_init_box_tpx);
+ +#ifdef GMX_THREAD_MPI
+ +        tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+ +#endif
+ +    }
+ +
+ +    if (opt2bSet("-cpi",nfile,fnm)) 
+ +    {
+ +        /* Check if checkpoint file exists before doing continuation.
+ +         * This way we can use identical input options for the first and subsequent runs...
+ +         */
+ +        if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
+ +        {
+ +            load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
+ +                            cr,Flags & MD_PARTDEC,ddxyz,
+ +                            inputrec,state,&bReadRNG,&bReadEkin,
+ +                            (Flags & MD_APPENDFILES),
+ +                            (Flags & MD_APPENDFILESSET));
+ +            
+ +            if (bReadRNG)
+ +            {
+ +                Flags |= MD_READ_RNG;
+ +            }
+ +            if (bReadEkin)
+ +            {
+ +                Flags |= MD_READ_EKIN;
+ +            }
+ +        }
+ +    }
+ +
+ +    if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
+ +#ifdef GMX_THREAD_MPI
+ +        /* With thread MPI only the master node/thread exists in mdrun.c,
+ +         * therefore non-master nodes need to open the "seppot" log file here.
+ +         */
+ +        || (!MASTER(cr) && (Flags & MD_SEPPOT))
+ +#endif
+ +        )
+ +    {
+ +        gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
+ +                             Flags,&fplog);
+ +    }
+ +
+ +    /* override nsteps with value from cmdline */
+ +    override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
+ +
+ +    if (SIMMASTER(cr)) 
+ +    {
+ +        copy_mat(state->box,box);
+ +    }
+ +
+ +    if (PAR(cr)) 
+ +    {
+ +        gmx_bcast(sizeof(box),box,cr);
+ +    }
+ +
+ +    /* Essential dynamics */
+ +    if (opt2bSet("-ei",nfile,fnm))
+ +    {
+ +        /* Open input and output files, allocate space for ED data structure */
+ +        ed = ed_open(mtop->natoms,&state->edsamstate,nfile,fnm,Flags,oenv,cr);
+ +    }
+ +
+ +    if (PAR(cr) && !((Flags & MD_PARTDEC) ||
+ +                     EI_TPI(inputrec->eI) ||
+ +                     inputrec->eI == eiNM))
+ +    {
+ +        cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
+ +                                           dddlb_opt,dlb_scale,
+ +                                           ddcsx,ddcsy,ddcsz,
+ +                                           mtop,inputrec,
+ +                                           box,state->x,
+ +                                           &ddbox,&npme_major,&npme_minor);
+ +
+ +        make_dd_communicators(fplog,cr,dd_node_order);
+ +
+ +        /* Set overallocation to avoid frequent reallocation of arrays */
+ +        set_over_alloc_dd(TRUE);
+ +    }
+ +    else
+ +    {
+ +        /* PME, if used, is done on all nodes with 1D decomposition */
+ +        cr->npmenodes = 0;
+ +        cr->duty = (DUTY_PP | DUTY_PME);
+ +        npme_major = 1;
+ +        npme_minor = 1;
+ +        if (!EI_TPI(inputrec->eI))
+ +        {
+ +            npme_major = cr->nnodes;
+ +        }
+ +        
+ +        if (inputrec->ePBC == epbcSCREW)
+ +        {
+ +            gmx_fatal(FARGS,
+ +                      "pbc=%s is only implemented with domain decomposition",
+ +                      epbc_names[inputrec->ePBC]);
+ +        }
+ +    }
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* After possible communicator splitting in make_dd_communicators.
+ +         * we can set up the intra/inter node communication.
+ +         */
+ +        gmx_setup_nodecomm(fplog,cr);
+ +    }
+ +
+ +    /* Initialize per-physical-node MPI process/thread ID and counters. */
+ +    gmx_init_intranode_counters(cr);
+ +
+ +#ifdef GMX_MPI
+ +    md_print_info(cr,fplog,"Using %d MPI %s\n",
+ +                  cr->nnodes,
+ +#ifdef GMX_THREAD_MPI
+ +                  cr->nnodes==1 ? "thread" : "threads"
+ +#else
+ +                  cr->nnodes==1 ? "process" : "processes"
+ +#endif
+ +                  );
+ +    fflush(stderr);
+ +#endif
+ +
+ +    gmx_omp_nthreads_init(fplog, cr,
+ +                          hwinfo->nthreads_hw_avail,
+ +                          hw_opt->nthreads_omp,
+ +                          hw_opt->nthreads_omp_pme,
+ +                          (cr->duty & DUTY_PP) == 0,
+ +                          inputrec->cutoff_scheme == ecutsVERLET);
+ +
+ +    gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
+ +
+ +    /* getting number of PP/PME threads
+ +       PME: env variable should be read only on one node to make sure it is 
+ +       identical everywhere;
+ +     */
+ +    /* TODO nthreads_pp is only used for pinning threads.
+ +     * This is a temporary solution until we have a hw topology library.
+ +     */
+ +    nthreads_pp  = gmx_omp_nthreads_get(emntNonbonded);
+ +    nthreads_pme = gmx_omp_nthreads_get(emntPME);
+ +
+ +    wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pp,nthreads_pme);
+ +
+ +    if (PAR(cr))
+ +    {
+ +        /* Master synchronizes its value of reset_counters with all nodes 
+ +         * including PME only nodes */
+ +        reset_counters = wcycle_get_reset_counters(wcycle);
+ +        gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
+ +        wcycle_set_reset_counters(wcycle, reset_counters);
+ +    }
+ +
+ +    snew(nrnb,1);
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        /* For domain decomposition we allocate dynamically
+ +         * in dd_partition_system.
+ +         */
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            bcast_state_setup(cr,state);
+ +        }
+ +        else
+ +        {
+ +            if (PAR(cr))
+ +            {
+ +                bcast_state(cr,state,TRUE);
+ +            }
+ +        }
+ +
+ +        /* Initiate forcerecord */
+ +        fr = mk_forcerec();
+ +        fr->hwinfo = hwinfo;
+ +        init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ +                      opt2fn("-table",nfile,fnm),
+ +                      opt2fn("-tabletf",nfile,fnm),
+ +                      opt2fn("-tablep",nfile,fnm),
+ +                      opt2fn("-tableb",nfile,fnm),
+ +                      nbpu_opt,
+ +                      FALSE,pforce);
+ +
+ +        /* version for PCA_NOT_READ_NODE (see md.c) */
+ +        /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ +          "nofile","nofile","nofile","nofile",FALSE,pforce);
+ +          */        
+ +        fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
+ +
+ +        /* Initialize QM-MM */
+ +        if(fr->bQMMM)
+ +        {
+ +            init_QMMMrec(cr,box,mtop,inputrec,fr);
+ +        }
+ +
+ +        /* Initialize the mdatoms structure.
+ +         * mdatoms is not filled with atom data,
+ +         * as this can not be done now with domain decomposition.
+ +         */
+ +        mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
+ +
+ +        /* Initialize the virtual site communication */
+ +        vsite = init_vsite(mtop,cr,FALSE);
+ +
+ +        calc_shifts(box,fr->shift_vec);
+ +
+ +        /* With periodic molecules the charge groups should be whole at start up
+ +         * and the virtual sites should not be far from their proper positions.
+ +         */
+ +        if (!inputrec->bContinuation && MASTER(cr) &&
+ +            !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
+ +        {
+ +            /* Make molecules whole at start of run */
+ +            if (fr->ePBC != epbcNONE)
+ +            {
+ +                do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
+ +            }
+ +            if (vsite)
+ +            {
+ +                /* Correct initial vsite positions are required
+ +                 * for the initial distribution in the domain decomposition
+ +                 * and for the initial shell prediction.
+ +                 */
+ +                construct_vsites_mtop(fplog,vsite,mtop,state->x);
+ +            }
+ +        }
+ +
+ +        if (EEL_PME(fr->eeltype))
+ +        {
+ +            ewaldcoeff = fr->ewaldcoeff;
+ +            pmedata = &fr->pmedata;
+ +        }
+ +        else
+ +        {
+ +            pmedata = NULL;
+ +        }
+ +    }
+ +    else
+ +    {
+ +        /* This is a PME only node */
+ +
+ +        /* We don't need the state */
+ +        done_state(state);
+ +
+ +        ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
+ +        snew(pmedata,1);
+ +    }
+ +
-     /* Set the CPU affinity */
-     set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
++    if (hw_opt->thread_affinity != threadaffOFF)
++    {
++        /* Before setting affinity, check whether the affinity has changed
++         * - which indicates that probably the OpenMP library has changed it
++         * since we first checked).
++         */
++        gmx_check_thread_affinity_set(fplog, cr,
++                                      hw_opt, hwinfo->nthreads_hw_avail, TRUE);
+ +
++        /* Set the CPU affinity */
++        gmx_set_thread_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
++    }
+ +
+ +    /* Initiate PME if necessary,
+ +     * either on all nodes or on dedicated PME nodes only. */
+ +    if (EEL_PME(inputrec->coulombtype))
+ +    {
+ +        if (mdatoms)
+ +        {
+ +            nChargePerturbed = mdatoms->nChargePerturbed;
+ +        }
+ +        if (cr->npmenodes > 0)
+ +        {
+ +            /* The PME only nodes need to know nChargePerturbed */
+ +            gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
+ +        }
+ +
+ +        if (cr->duty & DUTY_PME)
+ +        {
+ +            status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
+ +                                  mtop ? mtop->natoms : 0,nChargePerturbed,
+ +                                  (Flags & MD_REPRODUCIBLE),nthreads_pme);
+ +            if (status != 0) 
+ +            {
+ +                gmx_fatal(FARGS,"Error %d initializing PME",status);
+ +            }
+ +        }
+ +    }
+ +
+ +
+ +    if (integrator[inputrec->eI].func == do_md)
+ +    {
+ +        /* Turn on signal handling on all nodes */
+ +        /*
+ +         * (A user signal from the PME nodes (if any)
+ +         * is communicated to the PP nodes.
+ +         */
+ +        signal_handler_install();
+ +    }
+ +
+ +    if (cr->duty & DUTY_PP)
+ +    {
+ +        if (inputrec->ePull != epullNO)
+ +        {
+ +            /* Initialize pull code */
+ +            init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
+ +                      EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
+ +        }
+ +        
+ +        if (inputrec->bRot)
+ +        {
+ +           /* Initialize enforced rotation code */
+ +           init_rot(fplog,inputrec,nfile,fnm,cr,state->x,box,mtop,oenv,
+ +                    bVerbose,Flags);
+ +        }
+ +
+ +        constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
+ +
+ +        if (DOMAINDECOMP(cr))
+ +        {
+ +            dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
+ +                            Flags & MD_DDBONDCHECK,fr->cginfo_mb);
+ +
+ +            set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
+ +
+ +            setup_dd_grid(fplog,cr->dd);
+ +        }
+ +
+ +        /* Now do whatever the user wants us to do (how flexible...) */
+ +        integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
+ +                                      oenv,bVerbose,bCompact,
+ +                                      nstglobalcomm,
+ +                                      vsite,constr,
+ +                                      nstepout,inputrec,mtop,
+ +                                      fcd,state,
+ +                                      mdatoms,nrnb,wcycle,ed,fr,
+ +                                      repl_ex_nst,repl_ex_nex,repl_ex_seed,
+ +                                      membed,
+ +                                      cpt_period,max_hours,
+ +                                      deviceOptions,
+ +                                      Flags,
+ +                                      &runtime);
+ +
+ +        if (inputrec->ePull != epullNO)
+ +        {
+ +            finish_pull(fplog,inputrec->pull);
+ +        }
+ +        
+ +        if (inputrec->bRot)
+ +        {
+ +            finish_rot(fplog,inputrec->rot);
+ +        }
+ +
+ +    } 
+ +    else 
+ +    {
+ +        /* do PME only */
+ +        gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
+ +    }
+ +
+ +    if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
+ +    {
+ +        /* Some timing stats */  
+ +        if (SIMMASTER(cr))
+ +        {
+ +            if (runtime.proc == 0)
+ +            {
+ +                runtime.proc = runtime.real;
+ +            }
+ +        }
+ +        else
+ +        {
+ +            runtime.real = 0;
+ +        }
+ +    }
+ +
+ +    wallcycle_stop(wcycle,ewcRUN);
+ +
+ +    /* Finish up, write some stuff
+ +     * if rerunMD, don't write last frame again 
+ +     */
+ +    finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
+ +               inputrec,nrnb,wcycle,&runtime,
+ +               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
+ +                 nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
+ +               nthreads_pp, 
+ +               EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
+ +
+ +    if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU)
+ +    {
+ +        char gpu_err_str[STRLEN];
+ +
+ +        /* free GPU memory and uninitialize GPU (by destroying the context) */
+ +        nbnxn_cuda_free(fplog, fr->nbv->cu_nbv);
+ +
+ +        if (!free_gpu(gpu_err_str))
+ +        {
+ +            gmx_warning("On node %d failed to free GPU #%d: %s",
+ +                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
+ +        }
+ +    }
+ +
+ +    if (opt2bSet("-membed",nfile,fnm))
+ +    {
+ +        sfree(membed);
+ +    }
+ +
+ +#ifdef GMX_THREAD_MPI
+ +    if (PAR(cr) && SIMMASTER(cr))
+ +#endif
+ +    {
+ +        gmx_hardware_info_free(hwinfo);
+ +    }
+ +
+ +    /* Does what it says */  
+ +    print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
+ +
+ +    /* Close logfile already here if we were appending to it */
+ +    if (MASTER(cr) && (Flags & MD_APPENDFILES))
+ +    {
+ +        gmx_log_close(fplog);
+ +    } 
+ +
+ +    rc=(int)gmx_get_stop_condition();
+ +
+ +#ifdef GMX_THREAD_MPI
+ +    /* we need to join all threads. The sub-threads join when they
+ +       exit this function, but the master thread needs to be told to 
+ +       wait for that. */
+ +    if (PAR(cr) && MASTER(cr))
+ +    {
+ +        tMPI_Finalize();
+ +    }
+ +#endif
+ +
+ +    return rc;
+ +}
diff --cc src/tools/levenmar.c

index 63ff58fac794e04f97f14c09bac7f7e7d9b87b8b,1be1cc52624cdbe9d43a0a02fe97cba1ca15f3ee..0d96233b89dc60b920668c2879f470323be974f0
--- 1/src/tools/levenmar.c
--- 2/src/tools/levenmar.c
+++ b/src/tools/levenmar.c
@@@ -206,65 -254,106 +206,65 @@@ static void dump_mat(int n,real **a
   
   gmx_bool gaussj(real **a, int n, real **b, int m)
   {
- -    int *indxc, *indxr, *ipiv;
- -    int  i, icol = 0, irow = 0, j, k, l, ll;
- -    real big, dum, pivinv;
- -
- -    indxc = ivector(1, n);
- -    indxr = ivector(1, n);
- -    ipiv  = ivector(1, n);
- -    for (j = 1; j <= n; j++)
- -    {
- -        ipiv[j] = 0;
- -    }
- -    for (i = 1; i <= n; i++)
- -    {
- -        big = 0.0;
- -        for (j = 1; j <= n; j++)
- -        {
- -            if (ipiv[j] != 1)
- -            {
- -                for (k = 1; k <= n; k++)
- -                {
- -                    if (ipiv[k] == 0)
- -                    {
- -                        if (fabs(a[j][k]) >= big)
- -                        {
- -                            big  = fabs(a[j][k]);
- -                            irow = j;
- -                            icol = k;
- -                        }
- -                    }
- -                    else if (ipiv[k] > 1)
- -                    {
- -                        nrerror("GAUSSJ: Singular Matrix-1", FALSE);
- -                        return FALSE;
- -                    }
- -                }
- -            }
- -        }
- -        ++(ipiv[icol]);
- -        if (irow != icol)
- -        {
- -            for (l = 1; l <= n; l++)
- -            {
- -                SWAP(a[irow][l], a[icol][l]);
- -            }
- -            for (l = 1; l <= m; l++)
- -            {
- -                SWAP(b[irow][l], b[icol][l]);
- -            }
- -        }
- -        indxr[i] = irow;
- -        indxc[i] = icol;
- -        if (a[icol][icol] == 0.0)
- -        {
- -            fprintf(stderr, "irow = %d, icol = %d\n", irow, icol);
- -            dump_mat(n, a);
- -            nrerror("GAUSSJ: Singular Matrix-2", FALSE);
- -            return FALSE;
- -        }
- -        pivinv        = 1.0/a[icol][icol];
- -        a[icol][icol] = 1.0;
- -        for (l = 1; l <= n; l++)
- -        {
- -            a[icol][l] *= pivinv;
- -        }
- -        for (l = 1; l <= m; l++)
- -        {
- -            b[icol][l] *= pivinv;
- -        }
- -        for (ll = 1; ll <= n; ll++)
- -        {
- -            if (ll != icol)
- -            {
- -                dum         = a[ll][icol];
- -                a[ll][icol] = 0.0;
- -                for (l = 1; l <= n; l++)
- -                {
- -                    a[ll][l] -= a[icol][l]*dum;
- -                }
- -                for (l = 1; l <= m; l++)
- -                {
- -                    b[ll][l] -= b[icol][l]*dum;
- -                }
- -            }
- -        }
- -    }
- -    for (l = n; l >= 1; l--)
- -    {
- -        if (indxr[l] != indxc[l])
- -        {
- -            for (k = 1; k <= n; k++)
- -            {
- -                SWAP(a[k][indxr[l]], a[k][indxc[l]]);
- -            }
- -        }
- -    }
- -    free_ivector(ipiv, 1);
- -    free_ivector(indxr, 1);
- -    free_ivector(indxc, 1);
- -
- -    return TRUE;
+ +  int *indxc,*indxr,*ipiv;
+ +  int i,icol=0,irow=0,j,k,l,ll;
+ +  real big,dum,pivinv;
+ +  
+ +  indxc=ivector(1,n);
+ +  indxr=ivector(1,n);
+ +  ipiv=ivector(1,n);
+ +  for (j=1;j<=n;j++) ipiv[j]=0;
+ +  for (i=1;i<=n;i++) {
+ +    big=0.0;
+ +    for (j=1;j<=n;j++)
+ +      if (ipiv[j] != 1)
+ +      for (k=1;k<=n;k++) {
+ +        if (ipiv[k] == 0) {
+ +          if (fabs(a[j][k]) >= big) {
+ +            big=fabs(a[j][k]);
+ +            irow=j;
+ +            icol=k;
+ +          }
+ +        } else if (ipiv[k] > 1) {
+ +          nrerror("GAUSSJ: Singular Matrix-1", FALSE);
+ +          return FALSE;
+ +        }
+ +      }
+ +    ++(ipiv[icol]);
+ +    if (irow != icol) {
-       for (l=1;l<=n;l++) SWAP(a[irow][l],a[icol][l])
-       for (l=1;l<=m;l++) SWAP(b[irow][l],b[icol][l])
++        for (l=1;l<=n;l++) SWAP(a[irow][l],a[icol][l]);
++        for (l=1;l<=m;l++) SWAP(b[irow][l],b[icol][l]);
+ +    }
+ +    indxr[i]=irow;
+ +    indxc[i]=icol;
+ +    if (a[icol][icol] == 0.0) {
+ +      fprintf(stderr,"irow = %d, icol = %d\n",irow,icol);
+ +      dump_mat(n,a);
+ +      nrerror("GAUSSJ: Singular Matrix-2", FALSE);
+ +      return FALSE;
+ +    }
+ +    pivinv=1.0/a[icol][icol];
+ +    a[icol][icol]=1.0;
+ +    for (l=1;l<=n;l++) a[icol][l] *= pivinv;
+ +    for (l=1;l<=m;l++) b[icol][l] *= pivinv;
+ +    for (ll=1;ll<=n;ll++)
+ +      if (ll != icol) {
+ +      dum=a[ll][icol];
+ +      a[ll][icol]=0.0;
+ +      for (l=1;l<=n;l++) a[ll][l] -= a[icol][l]*dum;
+ +      for (l=1;l<=m;l++) b[ll][l] -= b[icol][l]*dum;
+ +      }
+ +  }
+ +  for (l=n;l>=1;l--) {
+ +    if (indxr[l] != indxc[l])
+ +      for (k=1;k<=n;k++)
+ +      SWAP(a[k][indxr[l]],a[k][indxc[l]]);
+ +  }
+ +  free_ivector(ipiv,1);
+ +  free_ivector(indxr,1);
+ +  free_ivector(indxc,1);
+ +  
+ +  return TRUE;
   }
   
   #undef SWAP
@@@ -272,152 -361,231 +272,152 @@@
   
   static void covsrt(real **covar, int ma, int lista[], int mfit)
   {
- -    int  i, j;
- -    real swap;
- -
- -    for (j = 1; j < ma; j++)
- -    {
- -        for (i = j+1; i <= ma; i++)
- -        {
- -            covar[i][j] = 0.0;
- -        }
- -    }
- -    for (i = 1; i < mfit; i++)
- -    {
- -        for (j = i+1; j <= mfit; j++)
- -        {
- -            if (lista[j] > lista[i])
- -            {
- -                covar[lista[j]][lista[i]] = covar[i][j];
- -            }
- -            else
- -            {
- -                covar[lista[i]][lista[j]] = covar[i][j];
- -            }
- -        }
- -    }
- -    swap = covar[1][1];
- -    for (j = 1; j <= ma; j++)
- -    {
- -        covar[1][j] = covar[j][j];
- -        covar[j][j] = 0.0;
- -    }
- -    covar[lista[1]][lista[1]] = swap;
- -    for (j = 2; j <= mfit; j++)
- -    {
- -        covar[lista[j]][lista[j]] = covar[1][j];
- -    }
- -    for (j = 2; j <= ma; j++)
- -    {
- -        for (i = 1; i <= j-1; i++)
- -        {
- -            covar[i][j] = covar[j][i];
- -        }
- -    }
+ +  int i,j;
+ +  real swap;
+ +  
+ +  for (j=1;j<ma;j++)
+ +    for (i=j+1;i<=ma;i++) covar[i][j]=0.0;
+ +  for (i=1;i<mfit;i++)
+ +    for (j=i+1;j<=mfit;j++) {
+ +      if (lista[j] > lista[i])
+ +      covar[lista[j]][lista[i]]=covar[i][j];
+ +      else
+ +      covar[lista[i]][lista[j]]=covar[i][j];
+ +    }
+ +  swap=covar[1][1];
+ +  for (j=1;j<=ma;j++) {
+ +    covar[1][j]=covar[j][j];
+ +    covar[j][j]=0.0;
+ +  }
+ +  covar[lista[1]][lista[1]]=swap;
+ +  for (j=2;j<=mfit;j++) covar[lista[j]][lista[j]]=covar[1][j];
+ +  for (j=2;j<=ma;j++)
+ +    for (i=1;i<=j-1;i++) covar[i][j]=covar[j][i];
   }
   
- -#define SWAP(a, b) {swap = (a); (a) = (b); (b) = swap; }
+ +#define SWAP(a,b) {swap=(a);(a)=(b);(b)=swap;}
   
- -static void covsrt_new(real **covar, int ma, int ia[], int mfit)
- -/* Expand in storage the covariance matrix covar, so as to take
- - * into account parameters that are being held fixed. (For the
- - * latter, return zero covariances.)
- - */
+ +static void covsrt_new(real **covar,int ma, int ia[], int mfit)
+ +     /* Expand in storage the covariance matrix covar, so as to take 
+ +      * into account parameters that are being held fixed. (For the
+ +      * latter, return zero covariances.)
+ +      */
   {
- -    int  i, j, k;
- -    real swap;
- -    for (i = mfit+1; i <= ma; i++)
- -    {
- -        for (j = 1; j <= i; j++)
- -        {
- -            covar[i][j] = covar[j][i] = 0.0;
- -        }
- -    }
- -    k = mfit;
- -    for (j = ma; j >= 1; j--)
- -    {
- -        if (ia[j])
- -        {
- -            for (i = 1; i <= ma; i++)
- -            {
- -                SWAP(covar[i][k], covar[i][j]);
- -            }
- -            for (i = 1; i <= ma; i++)
- -            {
- -                SWAP(covar[k][i], covar[j][i]);
- -            }
- -            k--;
- -        }
- -    }
+ +  int i,j,k;
+ +  real swap;
+ +  for (i=mfit+1;i<=ma;i++)
+ +    for (j=1;j<=i;j++) covar[i][j]=covar[j][i]=0.0;
+ +  k=mfit;
+ +  for (j=ma;j>=1;j--) {
+ +    if (ia[j]) {
-       for (i=1;i<=ma;i++) SWAP(covar[i][k],covar[i][j])
-       for (i=1;i<=ma;i++) SWAP(covar[k][i],covar[j][i])
++        for (i=1;i<=ma;i++) SWAP(covar[i][k],covar[i][j]);
++        for (i=1;i<=ma;i++) SWAP(covar[k][i],covar[j][i]);
+ +      k--;
+ +    }
+ +  }
   }
   #undef SWAP
- -
- -static void mrqcof(real x[], real y[], real sig[], int ndata, real a[],
- -                   int ma, int lista[], int mfit,
- -                   real **alpha, real beta[], real *chisq,
- -                   void (*funcs)(real, real *, real *, real *))
+ +      
+ +static void mrqcof(real x[], real y[], real sig[], int ndata, real a[], 
+ +                 int ma, int lista[], int mfit, 
+ +                 real **alpha, real beta[], real *chisq,
+ +                 void (*funcs)(real,real *,real *,real *)) 
   {
- -    int  k, j, i;
- -    real ymod, wt, sig2i, dy, *dyda;
- -
- -    dyda = rvector(1, ma);
- -    for (j = 1; j <= mfit; j++)
- -    {
- -        for (k = 1; k <= j; k++)
- -        {
- -            alpha[j][k] = 0.0;
- -        }
- -        beta[j] = 0.0;
- -    }
- -    *chisq = 0.0;
- -    for (i = 1; i <= ndata; i++)
- -    {
- -        (*funcs)(x[i], a, &ymod, dyda);
- -        sig2i = 1.0/(sig[i]*sig[i]);
- -        dy    = y[i]-ymod;
- -        for (j = 1; j <= mfit; j++)
- -        {
- -            wt = dyda[lista[j]]*sig2i;
- -            for (k = 1; k <= j; k++)
- -            {
- -                alpha[j][k] += wt*dyda[lista[k]];
- -            }
- -            beta[j] += dy*wt;
- -        }
- -        (*chisq) += dy*dy*sig2i;
- -    }
- -    for (j = 2; j <= mfit; j++)
- -    {
- -        for (k = 1; k <= j-1; k++)
- -        {
- -            alpha[k][j] = alpha[j][k];
- -        }
- -    }
- -    free_vector(dyda, 1);
+ +  int k,j,i;
+ +  real ymod,wt,sig2i,dy,*dyda;
+ +  
+ +  dyda=rvector(1,ma);
+ +  for (j=1;j<=mfit;j++) {
+ +    for (k=1;k<=j;k++) alpha[j][k]=0.0;
+ +    beta[j]=0.0;
+ +  }
+ +  *chisq=0.0;
+ +  for (i=1;i<=ndata;i++) {
+ +    (*funcs)(x[i],a,&ymod,dyda);
+ +    sig2i=1.0/(sig[i]*sig[i]);
+ +    dy=y[i]-ymod;
+ +    for (j=1;j<=mfit;j++) {
+ +      wt=dyda[lista[j]]*sig2i;
+ +      for (k=1;k<=j;k++)
+ +      alpha[j][k] += wt*dyda[lista[k]];
+ +      beta[j] += dy*wt;
+ +    }
+ +    (*chisq) += dy*dy*sig2i;
+ +  }
+ +  for (j=2;j<=mfit;j++)
+ +    for (k=1;k<=j-1;k++) alpha[k][j]=alpha[j][k];
+ +  free_vector(dyda,1);
   }
   
- -
- -gmx_bool mrqmin(real x[], real y[], real sig[], int ndata, real a[],
- -                int ma, int lista[], int mfit,
- -                real **covar, real **alpha, real *chisq,
- -                void (*funcs)(real, real *, real *, real *),
- -                real *alamda)
+ +      
+ +gmx_bool mrqmin(real x[], real y[], real sig[], int ndata, real a[], 
+ +          int ma, int lista[], int mfit, 
+ +          real **covar, real **alpha, real *chisq,
+ +          void (*funcs)(real,real *,real *,real *),
+ +          real *alamda) 
   {
- -    int          k, kk, j, ihit;
- -    static real *da, *atry, **oneda, *beta, ochisq;
- -
- -    if (*alamda < 0.0)
- -    {
- -        oneda = matrix1(1, mfit, 1, 1);
- -        atry  = rvector(1, ma);
- -        da    = rvector(1, ma);
- -        beta  = rvector(1, ma);
- -        kk    = mfit+1;
- -        for (j = 1; j <= ma; j++)
- -        {
- -            ihit = 0;
- -            for (k = 1; k <= mfit; k++)
- -            {
- -                if (lista[k] == j)
- -                {
- -                    ihit++;
- -                }
- -            }
- -            if (ihit == 0)
- -            {
- -                lista[kk++] = j;
- -            }
- -            else if (ihit > 1)
- -            {
- -                nrerror("Bad LISTA permutation in MRQMIN-1", FALSE);
- -                return FALSE;
- -            }
- -        }
- -        if (kk != ma+1)
- -        {
- -            nrerror("Bad LISTA permutation in MRQMIN-2", FALSE);
- -            return FALSE;
- -        }
- -        *alamda = 0.001;
- -        mrqcof(x, y, sig, ndata, a, ma, lista, mfit, alpha, beta, chisq, funcs);
- -        ochisq = (*chisq);
- -    }
- -    for (j = 1; j <= mfit; j++)
- -    {
- -        for (k = 1; k <= mfit; k++)
- -        {
- -            covar[j][k] = alpha[j][k];
- -        }
- -        covar[j][j] = alpha[j][j]*(1.0+(*alamda));
- -        oneda[j][1] = beta[j];
- -    }
- -    if (!gaussj(covar, mfit, oneda, 1))
- -    {
- -        return FALSE;
- -    }
- -    for (j = 1; j <= mfit; j++)
- -    {
- -        da[j] = oneda[j][1];
- -    }
- -    if (*alamda == 0.0)
- -    {
- -        covsrt(covar, ma, lista, mfit);
- -        free_vector(beta, 1);
- -        free_vector(da, 1);
- -        free_vector(atry, 1);
- -        free_matrix(oneda, 1, mfit, 1);
- -        return TRUE;
- -    }
- -    for (j = 1; j <= ma; j++)
- -    {
- -        atry[j] = a[j];
- -    }
- -    for (j = 1; j <= mfit; j++)
- -    {
- -        atry[lista[j]] = a[lista[j]]+da[j];
- -    }
- -    mrqcof(x, y, sig, ndata, atry, ma, lista, mfit, covar, da, chisq, funcs);
- -    if (*chisq < ochisq)
- -    {
- -        *alamda *= 0.1;
- -        ochisq   = (*chisq);
- -        for (j = 1; j <= mfit; j++)
- -        {
- -            for (k = 1; k <= mfit; k++)
- -            {
- -                alpha[j][k] = covar[j][k];
- -            }
- -            beta[j]     = da[j];
- -            a[lista[j]] = atry[lista[j]];
- -        }
- -    }
- -    else
- -    {
- -        *alamda *= 10.0;
- -        *chisq   = ochisq;
- -    }
+ +  int k,kk,j,ihit;
+ +  static real *da,*atry,**oneda,*beta,ochisq;
+ +  
+ +  if (*alamda < 0.0) {
+ +    oneda=matrix1(1,mfit,1,1);
+ +    atry=rvector(1,ma);
+ +    da=rvector(1,ma);
+ +    beta=rvector(1,ma);
+ +    kk=mfit+1;
+ +    for (j=1;j<=ma;j++) {
+ +      ihit=0;
+ +      for (k=1;k<=mfit;k++)
+ +      if (lista[k] == j) ihit++;
+ +      if (ihit == 0)
+ +      lista[kk++]=j;
+ +      else if (ihit > 1) {
+ +      nrerror("Bad LISTA permutation in MRQMIN-1", FALSE);
+ +      return FALSE;
+ +      }
+ +    }
+ +    if (kk != ma+1) {
+ +      nrerror("Bad LISTA permutation in MRQMIN-2", FALSE);
+ +      return FALSE;
+ +    }
+ +    *alamda=0.001;
+ +    mrqcof(x,y,sig,ndata,a,ma,lista,mfit,alpha,beta,chisq,funcs);
+ +    ochisq=(*chisq);
+ +  }
+ +  for (j=1;j<=mfit;j++) {
+ +    for (k=1;k<=mfit;k++) covar[j][k]=alpha[j][k];
+ +    covar[j][j]=alpha[j][j]*(1.0+(*alamda));
+ +    oneda[j][1]=beta[j];
+ +  }
+ +  if (!gaussj(covar,mfit,oneda,1))
+ +    return FALSE;
+ +  for (j=1;j<=mfit;j++)
+ +    da[j]=oneda[j][1];
+ +  if (*alamda == 0.0) {
+ +    covsrt(covar,ma,lista,mfit);
+ +    free_vector(beta,1);
+ +    free_vector(da,1);
+ +    free_vector(atry,1);
+ +    free_matrix(oneda,1,mfit,1);
       return TRUE;
+ +  }
+ +  for (j=1;j<=ma;j++) atry[j]=a[j];
+ +  for (j=1;j<=mfit;j++)
+ +    atry[lista[j]] = a[lista[j]]+da[j];
+ +  mrqcof(x,y,sig,ndata,atry,ma,lista,mfit,covar,da,chisq,funcs);
+ +  if (*chisq < ochisq) {
+ +    *alamda *= 0.1;
+ +    ochisq=(*chisq);
+ +    for (j=1;j<=mfit;j++) {
+ +      for (k=1;k<=mfit;k++) alpha[j][k]=covar[j][k];
+ +      beta[j]=da[j];
+ +      a[lista[j]]=atry[lista[j]];
+ +    }
+ +  } else {
+ +    *alamda *= 10.0;
+ +    *chisq=ochisq;
+ +  }
+ +  return TRUE;
   }
author	Roland Schulz <roland@utk.edu>
	Mon, 21 Jan 2013 01:12:10 +0000 (20:12 -0500)
committer	Roland Schulz <roland@utk.edu>
	Mon, 21 Jan 2013 01:30:53 +0000 (20:30 -0500)
		1	2
cmake/gmxGetCompilerInfo.cmake	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxlib/confio.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/gmx_omp.c	patch \|	diff1 \|	\|	blob \| history
src/gromacs/gmxlib/gmx_thread_affinity.c	patch \|	\|	\|	blob
src/gromacs/legacyheaders/gmx_thread_affinity.h	patch \|	\|	\|	blob
src/gromacs/legacyheaders/mdrun.h	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/mdrun.c	patch \|	diff1 \|	\|	blob \| history
src/programs/mdrun/runner.c	patch \|	diff1 \|	\|	blob \| history
src/tools/levenmar.c	patch \|	diff1 \|	diff2 \|	blob \| history