--- /dev/null
- fr.title = title;
+/*
+ *
+ * This source code is part of
+ *
+ * G R O M A C S
+ *
+ * GROningen MAchine for Chemical Simulations
+ *
+ * VERSION 3.2.0
+ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team,
+ * check out http://www.gromacs.org for more information.
+
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * If you want to redistribute modifications, please consider that
+ * scientific software is very special. Version control is crucial -
+ * bugs must be traceable. We will be happy to consider code for
+ * inclusion in the official distribution, but derived work must not
+ * be called official GROMACS. Details are found in the README & COPYING
+ * files - if they are missing, get the official version at www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the papers on the package - you can find them in the top README file.
+ *
+ * For more info, check our website at http://www.gromacs.org
+ *
+ * And Hey:
+ * GROningen Mixture of Alchemy and Childrens' Stories
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+#include "sysstuff.h"
+#include "typedefs.h"
+#include "smalloc.h"
+#include "sysstuff.h"
+#include <errno.h>
+#include "macros.h"
+#include "string2.h"
+#include "confio.h"
+#include "vec.h"
+#include "symtab.h"
+#include "futil.h"
+#include "xdrf.h"
+#include "filenm.h"
+#include "pdbio.h"
+#include "tpxio.h"
+#include "gmx_fatal.h"
+#include "copyrite.h"
+#include "filenm.h"
+#include "statutil.h"
+#include "pbc.h"
+#include "mtop_util.h"
+#include "gmxfio.h"
+
+#define CHAR_SHIFT 24
+
+static int read_g96_pos(char line[],t_symtab *symtab,
+ FILE *fp,const char *infile,
+ t_trxframe *fr)
+{
+ t_atoms *atoms;
+ gmx_bool bEnd;
+ int nwanted,natoms,atnr,resnr=0,oldres,newres,shift;
+ char anm[STRLEN],resnm[STRLEN];
+ char c1,c2;
+ double db1,db2,db3;
+
+ nwanted = fr->natoms;
+
+ atoms = fr->atoms;
+
+ natoms = 0;
+
+ if (fr->bX) {
+ if (fr->bAtoms)
+ shift = CHAR_SHIFT;
+ else
+ shift = 0;
+ newres = -1;
+ oldres = -666; /* Unlikely number for the first residue! */
+ bEnd = FALSE;
+ while (!bEnd && fgets2(line,STRLEN,fp)) {
+ bEnd = (strncmp(line,"END",3) == 0);
+ if (!bEnd && (line[0] != '#')) {
+ if (sscanf(line+shift,"%15lf%15lf%15lf",&db1,&db2,&db3) != 3)
+ gmx_fatal(FARGS,"Did not find 3 coordinates for atom %d in %s\n",
+ natoms+1,infile);
+ if ((nwanted != -1) && (natoms >= nwanted))
+ gmx_fatal(FARGS,
+ "Found more coordinates (%d) in %s than expected %d\n",
+ natoms,infile,nwanted);
+ if (atoms) {
+ if (fr->bAtoms &&
+ (sscanf(line,"%5d%c%5s%c%5s%7d",&resnr,&c1,resnm,&c2,anm,&atnr)
+ != 6)) {
+ if (oldres>=0)
+ resnr = oldres;
+ else {
+ resnr = 1;
+ strncpy(resnm,"???",sizeof(resnm)-1);
+ }
+ strncpy(anm,"???",sizeof(anm)-1);
+ }
+ atoms->atomname[natoms]=put_symtab(symtab,anm);
+ if (resnr != oldres) {
+ oldres = resnr;
+ newres++;
+ if (newres >= atoms->nr)
+ gmx_fatal(FARGS,"More residues than atoms in %s (natoms = %d)",
+ infile,atoms->nr);
+ atoms->atom[natoms].resind = newres;
+ if (newres+1 > atoms->nres) {
+ atoms->nres = newres+1;
+ }
+ t_atoms_set_resinfo(atoms,natoms,symtab,resnm,resnr,' ',0,' ');
+ } else {
+ atoms->atom[natoms].resind = newres;
+ }
+ }
+ if (fr->x) {
+ fr->x[natoms][0] = db1;
+ fr->x[natoms][1] = db2;
+ fr->x[natoms][2] = db3;
+ }
+ natoms++;
+ }
+ }
+ if ((nwanted != -1) && natoms != nwanted)
+ fprintf(stderr,
+ "Warning: found less coordinates (%d) in %s than expected %d\n",
+ natoms,infile,nwanted);
+ }
+
+ fr->natoms = natoms;
+
+ return natoms;
+}
+
+static int read_g96_vel(char line[],FILE *fp,const char *infile,
+ t_trxframe *fr)
+{
+ gmx_bool bEnd;
+ int nwanted,natoms=-1,shift;
+ double db1,db2,db3;
+
+ nwanted = fr->natoms;
+
+ if (fr->v && fr->bV) {
+ if (strcmp(line,"VELOCITYRED") == 0)
+ shift = 0;
+ else
+ shift = CHAR_SHIFT;
+ natoms = 0;
+ bEnd = FALSE;
+ while (!bEnd && fgets2(line,STRLEN,fp)) {
+ bEnd = (strncmp(line,"END",3) == 0);
+ if (!bEnd && (line[0] != '#')) {
+ if (sscanf(line+shift,"%15lf%15lf%15lf",&db1,&db2,&db3) != 3)
+ gmx_fatal(FARGS,"Did not find 3 velocities for atom %d in %s",
+ natoms+1,infile);
+ if ((nwanted != -1) && (natoms >= nwanted))
+ gmx_fatal(FARGS,"Found more velocities (%d) in %s than expected %d\n",
+ natoms,infile,nwanted);
+ if (fr->v) {
+ fr->v[natoms][0] = db1;
+ fr->v[natoms][1] = db2;
+ fr->v[natoms][2] = db3;
+ }
+ natoms++;
+ }
+ }
+ if ((nwanted != -1) && (natoms != nwanted))
+ fprintf(stderr,
+ "Warning: found less velocities (%d) in %s than expected %d\n",
+ natoms,infile,nwanted);
+ }
+
+ return natoms;
+}
+
+int read_g96_conf(FILE *fp,const char *infile,t_trxframe *fr, char *line)
+{
+ t_symtab *symtab=NULL;
+ gmx_bool bAtStart,bTime,bAtoms,bPos,bVel,bBox,bEnd,bFinished;
+ int natoms,nbp;
+ double db1,db2,db3,db4,db5,db6,db7,db8,db9;
+
+ bAtStart = (ftell(fp) == 0);
+
+ clear_trxframe(fr,FALSE);
+
+ if (!symtab) {
+ snew(symtab,1);
+ open_symtab(symtab);
+ }
+
+ natoms=0;
+
+ if (bAtStart) {
+ while ( !fr->bTitle && fgets2(line,STRLEN,fp))
+ fr->bTitle = (strcmp(line,"TITLE") == 0);
+ if (fr->title == NULL) {
+ fgets2(line,STRLEN,fp);
+ fr->title = strdup(line);
+ }
+ bEnd = FALSE;
+ while (!bEnd && fgets2(line,STRLEN,fp))
+ bEnd = (strcmp(line,"END") == 0);
+ fgets2(line,STRLEN,fp);
+ }
+
+ /* Do not get a line if we are not at the start of the file, *
+ * because without a parameter file we don't know what is in *
+ * the trajectory and we have already read the line in the *
+ * previous call (VERY DIRTY). */
+ bFinished = FALSE;
+ do {
+ bTime = (strcmp(line,"TIMESTEP") == 0);
+ bAtoms = (strcmp(line,"POSITION") == 0);
+ bPos = (bAtoms || (strcmp(line,"POSITIONRED") == 0));
+ bVel = (strncmp(line,"VELOCITY",8) == 0);
+ bBox = (strcmp(line,"BOX") == 0);
+ if (bTime) {
+ if (!fr->bTime && !fr->bX) {
+ fr->bStep = bTime;
+ fr->bTime = bTime;
+ do
+ bFinished = (fgets2(line,STRLEN,fp) == NULL);
+ while (!bFinished && (line[0] == '#'));
+ sscanf(line,"%15d%15lf",&(fr->step),&db1);
+ fr->time = db1;
+ } else
+ bFinished = TRUE;
+ }
+ if (bPos) {
+ if (!fr->bX) {
+ fr->bAtoms = bAtoms;
+ fr->bX = bPos;
+ natoms = read_g96_pos(line,symtab,fp,infile,fr);
+ } else
+ bFinished = TRUE;
+ }
+ if (fr->v && bVel) {
+ fr->bV = bVel;
+ natoms = read_g96_vel(line,fp,infile,fr);
+ }
+ if (bBox) {
+ fr->bBox = bBox;
+ clear_mat(fr->box);
+ bEnd = FALSE;
+ while (!bEnd && fgets2(line,STRLEN,fp)) {
+ bEnd = (strncmp(line,"END",3) == 0);
+ if (!bEnd && (line[0] != '#')) {
+ nbp = sscanf(line,"%15lf%15lf%15lf%15lf%15lf%15lf%15lf%15lf%15lf",
+ &db1,&db2,&db3,&db4,&db5,&db6,&db7,&db8,&db9);
+ if (nbp < 3)
+ gmx_fatal(FARGS,"Found a BOX line, but no box in %s",infile);
+ fr->box[XX][XX] = db1;
+ fr->box[YY][YY] = db2;
+ fr->box[ZZ][ZZ] = db3;
+ if (nbp == 9) {
+ fr->box[XX][YY] = db4;
+ fr->box[XX][ZZ] = db5;
+ fr->box[YY][XX] = db6;
+ fr->box[YY][ZZ] = db7;
+ fr->box[ZZ][XX] = db8;
+ fr->box[ZZ][YY] = db9;
+ }
+ }
+ }
+ bFinished = TRUE;
+ }
+ } while (!bFinished && fgets2(line,STRLEN,fp));
+
+ free_symtab(symtab);
+
+ fr->natoms = natoms;
+
+ return natoms;
+}
+
+void write_g96_conf(FILE *out,t_trxframe *fr,
+ int nindex,const atom_id *index)
+{
+ t_atoms *atoms;
+ int nout,i,a;
+
+ atoms = fr->atoms;
+
+ if (index)
+ nout = nindex;
+ else
+ nout = fr->natoms;
+
+ if (fr->bTitle)
+ fprintf(out,"TITLE\n%s\nEND\n",fr->title);
+ if (fr->bStep || fr->bTime)
+ /* Officially the time format is %15.9, which is not enough for 10 ns */
+ fprintf(out,"TIMESTEP\n%15d%15.6f\nEND\n",fr->step,fr->time);
+ if (fr->bX) {
+ if (fr->bAtoms) {
+ fprintf(out,"POSITION\n");
+ for(i=0; i<nout; i++) {
+ if (index) a = index[i]; else a = i;
+ fprintf(out,"%5d %-5s %-5s%7d%15.9f%15.9f%15.9f\n",
+ (atoms->resinfo[atoms->atom[a].resind].nr) % 100000,
+ *atoms->resinfo[atoms->atom[a].resind].name,
+ *atoms->atomname[a],(i+1) % 10000000,
+ fr->x[a][XX],fr->x[a][YY],fr->x[a][ZZ]);
+ }
+ } else {
+ fprintf(out,"POSITIONRED\n");
+ for(i=0; i<nout; i++) {
+ if (index) a = index[i]; else a = i;
+ fprintf(out,"%15.9f%15.9f%15.9f\n",
+ fr->x[a][XX],fr->x[a][YY],fr->x[a][ZZ]);
+ }
+ }
+ fprintf(out,"END\n");
+ }
+ if (fr->bV) {
+ if (fr->bAtoms) {
+ fprintf(out,"VELOCITY\n");
+ for(i=0; i<nout; i++) {
+ if (index) a = index[i]; else a = i;
+ fprintf(out,"%5d %-5s %-5s%7d%15.9f%15.9f%15.9f\n",
+ (atoms->resinfo[atoms->atom[a].resind].nr) % 100000,
+ *atoms->resinfo[atoms->atom[a].resind].name,
+ *atoms->atomname[a],(i+1) % 10000000,
+ fr->v[a][XX],fr->v[a][YY],fr->v[a][ZZ]);
+ }
+ } else {
+ fprintf(out,"VELOCITYRED\n");
+ for(i=0; i<nout; i++) {
+ if (index) a = index[i]; else a = i;
+ fprintf(out,"%15.9f%15.9f%15.9f\n",
+ fr->v[a][XX],fr->v[a][YY],fr->v[a][ZZ]);
+ }
+ }
+ fprintf(out,"END\n");
+ }
+ if (fr->bBox) {
+ fprintf(out,"BOX\n");
+ fprintf(out,"%15.9f%15.9f%15.9f",
+ fr->box[XX][XX],fr->box[YY][YY],fr->box[ZZ][ZZ]);
+ if (fr->box[XX][YY] || fr->box[XX][ZZ] || fr->box[YY][XX] ||
+ fr->box[YY][ZZ] || fr->box[ZZ][XX] ||fr->box[ZZ][YY])
+ fprintf(out,"%15.9f%15.9f%15.9f%15.9f%15.9f%15.9f",
+ fr->box[XX][YY],fr->box[XX][ZZ],fr->box[YY][XX],
+ fr->box[YY][ZZ],fr->box[ZZ][XX],fr->box[ZZ][YY]);
+ fprintf(out,"\n");
+ fprintf(out,"END\n");
+ }
+}
+
+static int get_espresso_word(FILE *fp,char word[])
+{
+ int ret,nc,i;
+
+ ret = 0;
+ nc = 0;
+
+ do {
+ i = fgetc(fp);
+ if (i != EOF) {
+ if (i == ' ' || i == '\n' || i == '\t') {
+ if (nc > 0)
+ ret = 1;
+ } else if (i == '{') {
+ if (nc == 0)
+ word[nc++] = '{';
+ ret = 2;
+ } else if (i == '}') {
+ if (nc == 0)
+ word[nc++] = '}';
+ ret = 3;
+ } else {
+ word[nc++] = (char)i;
+ }
+ }
+ } while (i != EOF && ret == 0);
+
+ word[nc] = '\0';
+
+ /* printf("'%s'\n",word); */
+
+ return ret;
+}
+
+static int check_open_parenthesis(FILE *fp,int r,
+ const char *infile,const char *keyword)
+{
+ int level_inc;
+ char word[STRLEN];
+
+ level_inc = 0;
+ if (r == 2) {
+ level_inc++;
+ } else {
+ r = get_espresso_word(fp,word);
+ if (r == 2)
+ level_inc++;
+ else
+ gmx_fatal(FARGS,"Expected '{' after '%s' in file '%s'",
+ keyword,infile);
+ }
+
+ return level_inc;
+}
+
+static int check_close_parenthesis(FILE *fp,int r,
+ const char *infile,const char *keyword)
+{
+ int level_inc;
+ char word[STRLEN];
+
+ level_inc = 0;
+ if (r == 3) {
+ level_inc--;
+ } else {
+ r = get_espresso_word(fp,word);
+ if (r == 3)
+ level_inc--;
+ else
+ gmx_fatal(FARGS,"Expected '}' after section '%s' in file '%s'",
+ keyword,infile);
+ }
+
+ return level_inc;
+}
+
+enum { espID, espPOS, espTYPE, espQ, espV, espF, espMOLECULE, espNR };
+const char *esp_prop[espNR] = { "id", "pos", "type", "q", "v", "f",
+ "molecule" };
+
+static void read_espresso_conf(const char *infile,
+ t_atoms *atoms,rvec x[],rvec *v,matrix box)
+{
+ t_symtab *symtab=NULL;
+ FILE *fp;
+ char word[STRLEN],buf[STRLEN];
+ int natoms,level,npar,r,nprop,p,i,m,molnr;
+ int prop[32];
+ double d;
+ gmx_bool bFoundParticles,bFoundProp,bFoundVariable,bMol;
+
+ if (!symtab) {
+ snew(symtab,1);
+ open_symtab(symtab);
+ }
+
+ clear_mat(box);
+
+ fp = gmx_fio_fopen(infile,"r");
+
+ bFoundParticles = FALSE;
+ bFoundVariable = FALSE;
+ bMol = FALSE;
+ level = 0;
+ while ((r=get_espresso_word(fp,word))) {
+ if (level==1 && strcmp(word,"particles")==0 && !bFoundParticles) {
+ bFoundParticles = TRUE;
+ level += check_open_parenthesis(fp,r,infile,"particles");
+ nprop = 0;
+ while (level == 2 && (r=get_espresso_word(fp,word))) {
+ bFoundProp = FALSE;
+ for(p=0; p<espNR; p++) {
+ if (strcmp(word,esp_prop[p]) == 0) {
+ bFoundProp = TRUE;
+ prop[nprop++] = p;
+ /* printf(" prop[%d] = %s\n",nprop-1,esp_prop[prop[nprop-1]]); */
+ }
+ }
+ if (!bFoundProp && word[0] != '}') {
+ gmx_fatal(FARGS,"Can not read Espresso files with particle property '%s'",word);
+ }
+ if (bFoundProp && p == espMOLECULE)
+ bMol = TRUE;
+ if (r == 3)
+ level--;
+ }
+
+ i = 0;
+ while (level > 0 && (r=get_espresso_word(fp,word))) {
+ if (r == 2) {
+ level++;
+ } else if (r == 3) {
+ level--;
+ }
+ if (level == 2) {
+ for(p=0; p<nprop; p++) {
+ switch (prop[p]) {
+ case espID:
+ r = get_espresso_word(fp,word);
+ /* Not used */
+ break;
+ case espPOS:
+ for(m=0; m<3; m++) {
+ r = get_espresso_word(fp,word);
+ sscanf(word,"%lf",&d);
+ x[i][m] = d;
+ }
+ break;
+ case espTYPE:
+ r = get_espresso_word(fp,word);
+ atoms->atom[i].type = strtol(word, NULL, 10);
+ break;
+ case espQ:
+ r = get_espresso_word(fp,word);
+ sscanf(word,"%lf",&d);
+ atoms->atom[i].q = d;
+ break;
+ case espV:
+ for(m=0; m<3; m++) {
+ r = get_espresso_word(fp,word);
+ sscanf(word,"%lf",&d);
+ v[i][m] = d;
+ }
+ break;
+ case espF:
+ for(m=0; m<3; m++) {
+ r = get_espresso_word(fp,word);
+ /* not used */
+ }
+ break;
+ case espMOLECULE:
+ r = get_espresso_word(fp,word);
+ molnr = strtol(word, NULL, 10);
+ if (i == 0 ||
+ atoms->resinfo[atoms->atom[i-1].resind].nr != molnr) {
+ atoms->atom[i].resind =
+ (i == 0 ? 0 : atoms->atom[i-1].resind+1);
+ atoms->resinfo[atoms->atom[i].resind].nr = molnr;
+ atoms->resinfo[atoms->atom[i].resind].ic = ' ';
+ atoms->resinfo[atoms->atom[i].resind].chainid = ' ';
+ atoms->resinfo[atoms->atom[i].resind].chainnum = molnr; /* Not sure if this is right? */
+ } else {
+ atoms->atom[i].resind = atoms->atom[i-1].resind;
+ }
+ break;
+ }
+ }
+ /* Generate an atom name from the particle type */
+ sprintf(buf,"T%d",atoms->atom[i].type);
+ atoms->atomname[i] = put_symtab(symtab,buf);
+ if (bMol) {
+ if (i == 0 || atoms->atom[i].resind != atoms->atom[i-1].resind) {
+ atoms->resinfo[atoms->atom[i].resind].name =
+ put_symtab(symtab,"MOL");
+ }
+ } else {
+ /* Residue number is the atom number */
+ atoms->atom[i].resind = i;
+ /* Generate an residue name from the particle type */
+ if (atoms->atom[i].type < 26) {
+ sprintf(buf,"T%c",'A'+atoms->atom[i].type);
+ } else {
+ sprintf(buf,"T%c%c",
+ 'A'+atoms->atom[i].type/26,'A'+atoms->atom[i].type%26);
+ }
+ t_atoms_set_resinfo(atoms,i,symtab,buf,i,' ',0,' ');
+ }
+
+ if (r == 3)
+ level--;
+ i++;
+ }
+ }
+ atoms->nres = atoms->nr;
+
+ if (i != atoms->nr) {
+ gmx_fatal(FARGS,"Internal inconsistency in Espresso routines, read %d atoms, expected %d atoms",i,atoms->nr);
+ }
+ } else if (level==1 && strcmp(word,"variable")==0 && !bFoundVariable) {
+ bFoundVariable = TRUE;
+ level += check_open_parenthesis(fp,r,infile,"variable");
+ while (level==2 && (r=get_espresso_word(fp,word))) {
+ if (level==2 && strcmp(word,"box_l") == 0) {
+ for(m=0; m<3; m++) {
+ r = get_espresso_word(fp,word);
+ sscanf(word,"%lf",&d);
+ box[m][m] = d;
+ }
+ level += check_close_parenthesis(fp,r,infile,"box_l");
+ }
+ }
+ } else if (r == 2) {
+ level++;
+ } else if (r == 3) {
+ level--;
+ }
+ }
+
+ if (!bFoundParticles) {
+ fprintf(stderr,"Did not find a particles section in Espresso file '%s'\n",
+ infile);
+ }
+
+ gmx_fio_fclose(fp);
+}
+
+static int get_espresso_coordnum(const char *infile)
+{
+ FILE *fp;
+ char word[STRLEN];
+ int natoms,level,r;
+ gmx_bool bFoundParticles;
+
+ natoms = 0;
+
+ fp = gmx_fio_fopen(infile,"r");
+
+ bFoundParticles = FALSE;
+ level = 0;
+ while ((r=get_espresso_word(fp,word)) && !bFoundParticles) {
+ if (level==1 && strcmp(word,"particles")==0 && !bFoundParticles) {
+ bFoundParticles = TRUE;
+ level += check_open_parenthesis(fp,r,infile,"particles");
+ while (level > 0 && (r=get_espresso_word(fp,word))) {
+ if (r == 2) {
+ level++;
+ if (level == 2)
+ natoms++;
+ } else if (r == 3) {
+ level--;
+ }
+ }
+ } else if (r == 2) {
+ level++;
+ } else if (r == 3) {
+ level--;
+ }
+ }
+ if (!bFoundParticles) {
+ fprintf(stderr,"Did not find a particles section in Espresso file '%s'\n",
+ infile);
+ }
+
+ gmx_fio_fclose(fp);
+
+ return natoms;
+}
+
+static void write_espresso_conf_indexed(FILE *out,const char *title,
+ t_atoms *atoms,int nx,atom_id *index,
+ rvec *x,rvec *v,matrix box)
+{
+ int i,j;
+
+ fprintf(out,"# %s\n",title);
+ if (TRICLINIC(box)) {
+ gmx_warning("The Espresso format does not support triclinic unit-cells");
+ }
+ fprintf(out,"{variable {box_l %f %f %f}}\n",box[0][0],box[1][1],box[2][2]);
+
+ fprintf(out,"{particles {id pos type q%s}\n",v ? " v" : "");
+ for(i=0; i<nx; i++) {
+ if (index)
+ j = index[i];
+ else
+ j = i;
+ fprintf(out,"\t{%d %f %f %f %d %g",
+ j,x[j][XX],x[j][YY],x[j][ZZ],
+ atoms->atom[j].type,atoms->atom[j].q);
+ if (v)
+ fprintf(out," %f %f %f",v[j][XX],v[j][YY],v[j][ZZ]);
+ fprintf(out,"}\n");
+ }
+ fprintf(out,"}\n");
+}
+
+static void get_coordnum_fp (FILE *in, char *title, int *natoms)
+{
+ char line[STRLEN+1];
+
+ fgets2 (title,STRLEN,in);
+ fgets2 (line,STRLEN,in);
+ if (sscanf (line,"%d",natoms) != 1) {
+ gmx_fatal(FARGS,"gro file does not have the number of atoms on the second line");
+ }
+}
+
+static void get_coordnum (const char *infile,int *natoms)
+{
+ FILE *in;
+ char title[STRLEN];
+
+ in=gmx_fio_fopen(infile,"r");
+ get_coordnum_fp(in,title,natoms);
+ gmx_fio_fclose (in);
+}
+
+static gmx_bool get_w_conf(FILE *in, const char *infile, char *title,
+ t_symtab *symtab, t_atoms *atoms, int *ndec,
+ rvec x[], rvec *v, matrix box)
+{
+ char name[6];
+ char line[STRLEN+1],*ptr;
+ char buf[256];
+ double x1,y1,z1,x2,y2,z2;
+ rvec xmin,xmax;
+ int natoms,i,m,resnr,newres,oldres,ddist,c;
+ gmx_bool bFirst,bVel;
+ char *p1,*p2,*p3;
+
+ newres = -1;
+ oldres = NOTSET; /* Unlikely number for the first residue! */
+ ddist = 0;
+
+ /* Read the title and number of atoms */
+ get_coordnum_fp(in,title,&natoms);
+
+ if (natoms > atoms->nr)
+ gmx_fatal(FARGS,"gro file contains more atoms (%d) than expected (%d)",
+ natoms,atoms->nr);
+ else if (natoms < atoms->nr)
+ fprintf(stderr,"Warning: gro file contains less atoms (%d) than expected"
+ " (%d)\n",natoms,atoms->nr);
+
+ bFirst=TRUE;
+
+ bVel = FALSE;
+
+ /* just pray the arrays are big enough */
+ for (i=0; (i < natoms) ; i++) {
+ if ((fgets2 (line,STRLEN,in)) == NULL) {
+ unexpected_eof(infile,i+2);
+ }
+ if (strlen(line) < 39)
+ gmx_fatal(FARGS,"Invalid line in %s for atom %d:\n%s",infile,i+1,line);
+
+ /* determine read precision from distance between periods
+ (decimal points) */
+ if (bFirst) {
+ bFirst=FALSE;
+ p1=strchr(line,'.');
+ if (p1 == NULL)
+ gmx_fatal(FARGS,"A coordinate in file %s does not contain a '.'",infile);
+ p2=strchr(&p1[1],'.');
+ if (p2 == NULL)
+ gmx_fatal(FARGS,"A coordinate in file %s does not contain a '.'",infile);
+ ddist = p2 - p1;
+ *ndec = ddist - 5;
+
+ p3=strchr(&p2[1],'.');
+ if (p3 == NULL)
+ gmx_fatal(FARGS,"A coordinate in file %s does not contain a '.'",infile);
+
+ if (p3 - p2 != ddist)
+ gmx_fatal(FARGS,"The spacing of the decimal points in file %s is not consistent for x, y and z",infile);
+ }
+
+ /* residue number*/
+ memcpy(name,line,5);
+ name[5]='\0';
+ sscanf(name,"%d",&resnr);
+ memcpy(name,line+5,5);
+ name[5]='\0';
+ if (resnr != oldres) {
+ oldres = resnr;
+ newres++;
+ if (newres >= natoms)
+ gmx_fatal(FARGS,"More residues than atoms in %s (natoms = %d)",
+ infile,natoms);
+ atoms->atom[i].resind = newres;
+ t_atoms_set_resinfo(atoms,i,symtab,name,resnr,' ',0,' ');
+ } else {
+ atoms->atom[i].resind = newres;
+ }
+
+ /* atomname */
+ memcpy(name,line+10,5);
+ atoms->atomname[i]=put_symtab(symtab,name);
+
+ /* eventueel controle atomnumber met i+1 */
+
+ /* coordinates (start after residue shit) */
+ ptr = line + 20;
+ /* Read fixed format */
+ for(m=0; m<DIM; m++) {
+ for(c=0; (c<ddist && ptr[0]); c++) {
+ buf[c] = ptr[0];
+ ptr++;
+ }
+ buf[c] = '\0';
+ if (sscanf (buf,"%lf %lf",&x1,&x2) != 1) {
+ gmx_fatal(FARGS,"Something is wrong in the coordinate formatting of file %s. Note that gro is fixed format (see the manual)",infile);
+ } else {
+ x[i][m] = x1;
+ }
+ }
+
+ /* velocities (start after residues and coordinates) */
+ if (v) {
+ /* Read fixed format */
+ for(m=0; m<DIM; m++) {
+ for(c=0; (c<ddist && ptr[0]); c++) {
+ buf[c] = ptr[0];
+ ptr++;
+ }
+ buf[c] = '\0';
+ if (sscanf (buf,"%lf",&x1) != 1) {
+ v[i][m] = 0;
+ } else {
+ v[i][m] = x1;
+ bVel = TRUE;
+ }
+ }
+ }
+ }
+ atoms->nres = newres + 1;
+
+ /* box */
+ fgets2 (line,STRLEN,in);
+ if (sscanf (line,"%lf%lf%lf",&x1,&y1,&z1) != 3) {
+ gmx_warning("Bad box in file %s",infile);
+
+ /* Generate a cubic box */
+ for(m=0; (m<DIM); m++)
+ xmin[m]=xmax[m]=x[0][m];
+ for(i=1; (i<atoms->nr); i++)
+ for(m=0; (m<DIM); m++) {
+ xmin[m]=min(xmin[m],x[i][m]);
+ xmax[m]=max(xmax[m],x[i][m]);
+ }
+ for (i=0; i<DIM; i++) for (m=0; m<DIM; m++) box[i][m]=0.0;
+ for(m=0; (m<DIM); m++)
+ box[m][m]=(xmax[m]-xmin[m]);
+ fprintf(stderr,"Generated a cubic box %8.3f x %8.3f x %8.3f\n",
+ box[XX][XX],box[YY][YY],box[ZZ][ZZ]);
+ }
+ else {
+ /* We found the first three values, the diagonal elements */
+ box[XX][XX]=x1;
+ box[YY][YY]=y1;
+ box[ZZ][ZZ]=z1;
+ if (sscanf (line,"%*f%*f%*f%lf%lf%lf%lf%lf%lf",
+ &x1,&y1,&z1,&x2,&y2,&z2) != 6)
+ x1=y1=z1=x2=y2=z2=0.0;
+ box[XX][YY] = x1;
+ box[XX][ZZ] = y1;
+ box[YY][XX] = z1;
+ box[YY][ZZ] = x2;
+ box[ZZ][XX] = y2;
+ box[ZZ][YY] = z2;
+ }
+
+ return bVel;
+}
+
+static void read_whole_conf(const char *infile,char *title,
+ t_atoms *atoms, rvec x[],rvec *v, matrix box)
+{
+ FILE *in;
+ int ndec;
+ t_symtab symtab;
+
+ /* open file */
+ in=gmx_fio_fopen(infile,"r");
+
+ open_symtab(&symtab);
+ get_w_conf(in, infile, title, &symtab, atoms, &ndec, x, v, box);
+ /* We can't free the symbols, as they are still used in atoms, so
+ * the only choice is to leak them. */
+ free_symtab(&symtab);
+
+ gmx_fio_fclose(in);
+}
+
+gmx_bool gro_next_x_or_v(FILE *status,t_trxframe *fr)
+{
+ t_atoms atoms;
+ t_symtab symtab;
+ char title[STRLEN],*p;
+ double tt;
+ int ndec=0,i;
+
+ if (gmx_eof(status))
+ return FALSE;
+
+ open_symtab(&symtab);
+ atoms.nr=fr->natoms;
+ snew(atoms.atom,fr->natoms);
+ atoms.nres=fr->natoms;
+ snew(atoms.resinfo,fr->natoms);
+ snew(atoms.atomname,fr->natoms);
+
+ fr->bV = get_w_conf(status,title,title,&symtab,&atoms,&ndec,fr->x,fr->v,fr->box);
+ fr->bPrec = TRUE;
+ fr->prec = 1;
+ /* prec = 10^ndec: */
+ for(i=0; i<ndec; i++)
+ fr->prec *= 10;
+ fr->title = title;
+ fr->bTitle = TRUE;
+ fr->bX = TRUE;
+ fr->bBox = TRUE;
+
+ sfree(atoms.atom);
+ sfree(atoms.resinfo);
+ sfree(atoms.atomname);
+ done_symtab(&symtab);
+
+ if ((p=strstr(title,"t=")) != NULL) {
+ p+=2;
+ if (sscanf(p,"%lf",&tt)==1) {
+ fr->time = tt;
+ fr->bTime = TRUE;
+ } else {
+ fr->time = 0;
+ fr->bTime = FALSE;
+ }
+ }
+
+ if (atoms.nr != fr->natoms)
+ gmx_fatal(FARGS,"Number of atoms in gro frame (%d) doesn't match the number in the previous frame (%d)",atoms.nr,fr->natoms);
+
+ return TRUE;
+}
+
+int gro_first_x_or_v(FILE *status,t_trxframe *fr)
+{
+ char title[STRLEN];
+
+ frewind(status);
+ fprintf(stderr,"Reading frames from gro file");
+ get_coordnum_fp(status, title, &fr->natoms);
+ frewind(status);
+ fprintf(stderr," '%s', %d atoms.\n",title, fr->natoms);
+ fr->bTitle = TRUE;
+ fr->title = title;
+ if (fr->natoms==0)
+ gmx_file("No coordinates in gro file");
+
+ snew(fr->x,fr->natoms);
+ snew(fr->v,fr->natoms);
+ gro_next_x_or_v(status, fr);
+
+ return fr->natoms;
+}
+
+static void make_hconf_format(int pr,gmx_bool bVel,char format[])
+{
+ int l,vpr;
+
+ /* build format string for printing,
+ something like "%8.3f" for x and "%8.4f" for v */
+ if (pr<0)
+ pr=0;
+ if (pr>30)
+ pr=30;
+ l=pr+5;
+ vpr=pr+1;
+ if (bVel)
+ sprintf(format,"%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df\n",
+ l,pr,l,pr,l,pr,l,vpr,l,vpr,l,vpr);
+ else
+ sprintf(format,"%%%d.%df%%%d.%df%%%d.%df\n",l,pr,l,pr,l,pr);
+
+}
+
+static void write_hconf_box(FILE *out,int pr,matrix box)
+{
+ char format[100];
+ int l;
+
+ if (pr<5)
+ pr=5;
+ l=pr+5;
+
+ if (box[XX][YY] || box[XX][ZZ] || box[YY][XX] || box[YY][ZZ] ||
+ box[ZZ][XX] || box[ZZ][YY]) {
+ sprintf(format,"%%%d.%df%%%d.%df%%%d.%df"
+ "%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df%%%d.%df\n",
+ l,pr,l,pr,l,pr,l,pr,l,pr,l,pr,l,pr,l,pr,l,pr);
+ fprintf(out,format,
+ box[XX][XX],box[YY][YY],box[ZZ][ZZ],
+ box[XX][YY],box[XX][ZZ],box[YY][XX],
+ box[YY][ZZ],box[ZZ][XX],box[ZZ][YY]);
+ } else {
+ sprintf(format,"%%%d.%df%%%d.%df%%%d.%df\n",l,pr,l,pr,l,pr);
+ fprintf(out,format,
+ box[XX][XX],box[YY][YY],box[ZZ][ZZ]);
+ }
+}
+
+void write_hconf_indexed_p(FILE *out,const char *title,t_atoms *atoms,
+ int nx,const atom_id index[], int pr,
+ rvec *x,rvec *v,matrix box)
+{
+ char resnm[6],nm[6],format[100];
+ int ai,i,resind,resnr;
+
+ bromacs(format,99);
+ fprintf (out,"%s\n",(title && title[0])?title:format);
+ fprintf (out,"%5d\n",nx);
+
+ make_hconf_format(pr,v!=NULL,format);
+
+ for (i=0; (i<nx); i++) {
+ ai=index[i];
+
+ resind = atoms->atom[ai].resind;
+ strncpy(resnm," ??? ",sizeof(resnm)-1);
+ if (resind < atoms->nres) {
+ strncpy(resnm,*atoms->resinfo[resind].name,sizeof(resnm)-1);
+ resnr = atoms->resinfo[resind].nr;
+ } else {
+ strncpy(resnm," ??? ",sizeof(resnm)-1);
+ resnr = resind + 1;
+ }
+
+ if (atoms->atom)
+ strncpy(nm,*atoms->atomname[ai],sizeof(nm)-1);
+ else
+ strncpy(nm," ??? ",sizeof(nm)-1);
+
+ fprintf(out,"%5d%-5.5s%5.5s%5d",resnr%100000,resnm,nm,(ai+1)%100000);
+ /* next fprintf uses built format string */
+ if (v)
+ fprintf(out,format,
+ x[ai][XX], x[ai][YY], x[ai][ZZ], v[ai][XX],v[ai][YY],v[ai][ZZ]);
+ else
+ fprintf(out,format,
+ x[ai][XX], x[ai][YY], x[ai][ZZ]);
+ }
+
+ write_hconf_box(out,pr,box);
+
+ fflush(out);
+}
+
+static void write_hconf_mtop(FILE *out,const char *title,gmx_mtop_t *mtop,
+ int pr,
+ rvec *x,rvec *v,matrix box)
+{
+ char format[100];
+ int i,resnr;
+ gmx_mtop_atomloop_all_t aloop;
+ t_atom *atom;
+ char *atomname,*resname;
+
+ bromacs(format,99);
+ fprintf (out,"%s\n",(title && title[0])?title:format);
+ fprintf (out,"%5d\n",mtop->natoms);
+
+ make_hconf_format(pr,v!=NULL,format);
+
+ aloop = gmx_mtop_atomloop_all_init(mtop);
+ while (gmx_mtop_atomloop_all_next(aloop,&i,&atom)) {
+ gmx_mtop_atomloop_all_names(aloop,&atomname,&resnr,&resname);
+
+ fprintf(out,"%5d%-5.5s%5.5s%5d",
+ resnr%100000,resname,atomname,(i+1)%100000);
+ /* next fprintf uses built format string */
+ if (v)
+ fprintf(out,format,
+ x[i][XX], x[i][YY], x[i][ZZ], v[i][XX],v[i][YY],v[i][ZZ]);
+ else
+ fprintf(out,format,
+ x[i][XX], x[i][YY], x[i][ZZ]);
+ }
+
+ write_hconf_box(out,pr,box);
+
+ fflush(out);
+}
+
+void write_hconf_p(FILE *out,const char *title,t_atoms *atoms, int pr,
+ rvec *x,rvec *v,matrix box)
+{
+ atom_id *aa;
+ int i;
+
+ snew(aa,atoms->nr);
+ for(i=0; (i<atoms->nr); i++)
+ aa[i]=i;
+ write_hconf_indexed_p(out,title,atoms,atoms->nr,aa,pr,x,v,box);
+ sfree(aa);
+}
+
+void write_conf_p(const char *outfile, const char *title,
+ t_atoms *atoms, int pr,
+ rvec *x, rvec *v,matrix box)
+{
+ FILE *out;
+
+ out=gmx_fio_fopen(outfile,"w");
+ write_hconf_p(out,title,atoms,pr,x,v,box);
+
+ gmx_fio_fclose (out);
+}
+
+static void write_conf(const char *outfile, const char *title, t_atoms *atoms,
+ rvec *x, rvec *v,matrix box)
+{
+ write_conf_p(outfile, title, atoms, 3, x, v, box);
+}
+
+void write_sto_conf_indexed(const char *outfile,const char *title,
+ t_atoms *atoms,
+ rvec x[],rvec *v,int ePBC,matrix box,
+ atom_id nindex,atom_id index[])
+{
+ FILE *out;
+ int ftp;
+ t_trxframe fr;
+
+ ftp=fn2ftp(outfile);
+ switch (ftp) {
+ case efGRO:
+ out=gmx_fio_fopen(outfile,"w");
+ write_hconf_indexed_p(out, title, atoms, nindex, index, 3, x, v, box);
+ gmx_fio_fclose(out);
+ break;
+ case efG96:
+ clear_trxframe(&fr,TRUE);
+ fr.bTitle = TRUE;
+ fr.title = title;
+ fr.natoms = atoms->nr;
+ fr.bAtoms = TRUE;
+ fr.atoms = atoms;
+ fr.bX = TRUE;
+ fr.x = x;
+ if (v) {
+ fr.bV = TRUE;
+ fr.v = v;
+ }
+ fr.bBox = TRUE;
+ copy_mat(box,fr.box);
+ out=gmx_fio_fopen(outfile,"w");
+ write_g96_conf(out, &fr, nindex, index);
+ gmx_fio_fclose(out);
+ break;
+ case efPDB:
+ case efBRK:
+ case efENT:
+ case efPQR:
+ out=gmx_fio_fopen(outfile,"w");
+ write_pdbfile_indexed(out,title,atoms,x,ePBC,box,' ',-1,nindex,index,NULL,TRUE);
+ gmx_fio_fclose(out);
+ break;
+ case efESP:
+ out=gmx_fio_fopen(outfile,"w");
+ write_espresso_conf_indexed(out, title, atoms, nindex, index, x, v, box);
+ gmx_fio_fclose(out);
+ break;
+ case efTPR:
+ case efTPB:
+ case efTPA:
+ gmx_fatal(FARGS,"Sorry, can not write a topology to %s",outfile);
+ break;
+ default:
+ gmx_incons("Not supported in write_sto_conf_indexed");
+ }
+}
+
+static void write_xyz_conf(const char *outfile,const char *title,
+ t_atoms *atoms,rvec *x)
+{
+ FILE *fp;
+ int i,anr;
+ real value;
+ char *ptr,*name;
+ gmx_atomprop_t aps=gmx_atomprop_init();
+
+ fp = gmx_fio_fopen(outfile,"w");
+ fprintf(fp,"%3d\n",atoms->nr);
+ fprintf(fp,"%s\n",title);
+ for(i=0; (i<atoms->nr); i++) {
+ anr = atoms->atom[i].atomnumber;
+ name = *atoms->atomname[i];
+ if (anr == NOTSET) {
+ if (gmx_atomprop_query(aps,epropElement,"???",name,&value))
+ anr = gmx_nint(value);
+ }
+ if ((ptr = gmx_atomprop_element(aps,anr)) == NULL)
+ ptr = name;
+ fprintf(fp,"%3s%10.5f%10.5f%10.5f\n",ptr,
+ 10*x[i][XX],10*x[i][YY],10*x[i][ZZ]);
+ }
+ gmx_fio_fclose(fp);
+ gmx_atomprop_destroy(aps);
+}
+
+void write_sto_conf(const char *outfile,const char *title,t_atoms *atoms,
+ rvec x[],rvec *v,int ePBC,matrix box)
+{
+ FILE *out;
+ int ftp;
+ t_trxframe fr;
+
+ ftp=fn2ftp(outfile);
+ switch (ftp) {
+ case efGRO:
+ write_conf(outfile, title, atoms, x, v, box);
+ break;
+ case efG96:
+ clear_trxframe(&fr,TRUE);
+ fr.bTitle = TRUE;
+ fr.title = title;
+ fr.natoms = atoms->nr;
+ fr.bAtoms = TRUE;
+ fr.atoms = atoms;
+ fr.bX = TRUE;
+ fr.x = x;
+ if (v) {
+ fr.bV = TRUE;
+ fr.v = v;
+ }
+ fr.bBox = TRUE;
+ copy_mat(box,fr.box);
+ out=gmx_fio_fopen(outfile,"w");
+ write_g96_conf(out, &fr, -1, NULL);
+ gmx_fio_fclose(out);
+ break;
+ case efXYZ:
+ write_xyz_conf(outfile,(strlen(title) > 0) ? title : outfile,atoms,x);
+ break;
+ case efPDB:
+ case efBRK:
+ case efENT:
+ out=gmx_fio_fopen(outfile,"w");
+ write_pdbfile(out, title, atoms, x, ePBC, box, ' ', -1,NULL,TRUE);
+ gmx_fio_fclose(out);
+ break;
+ case efESP:
+ out=gmx_fio_fopen(outfile,"w");
+ write_espresso_conf_indexed(out, title, atoms, atoms->nr, NULL, x, v, box);
+ gmx_fio_fclose(out);
+ break;
+ case efTPR:
+ case efTPB:
+ case efTPA:
+ gmx_fatal(FARGS,"Sorry, can not write a topology to %s",outfile);
+ break;
+ default:
+ gmx_incons("Not supported in write_sto_conf");
+ }
+}
+
+void write_sto_conf_mtop(const char *outfile,const char *title,
+ gmx_mtop_t *mtop,
+ rvec x[],rvec *v,int ePBC,matrix box)
+{
+ int ftp;
+ FILE *out;
+ t_atoms atoms;
+
+ ftp=fn2ftp(outfile);
+ switch (ftp) {
+ case efGRO:
+ out = gmx_fio_fopen(outfile,"w");
+ write_hconf_mtop(out,title,mtop,3,x,v,box);
+ gmx_fio_fclose(out);
+ break;
+ default:
+ /* This is a brute force approach which requires a lot of memory.
+ * We should implement mtop versions of all writing routines.
+ */
+ atoms = gmx_mtop_global_atoms(mtop);
+
+ write_sto_conf(outfile,title,&atoms,x,v,ePBC,box);
+
+ done_atom(&atoms);
+ break;
+ }
+}
+
+static int get_xyz_coordnum(const char *infile)
+{
+ FILE *fp;
+ int n;
+
+ fp = gmx_fio_fopen(infile,"r");
+ if (fscanf(fp,"%d",&n) != 1)
+ gmx_fatal(FARGS,"Can not read number of atoms from %s",infile);
+ gmx_fio_fclose(fp);
+
+ return n;
+}
+
+static void read_xyz_conf(const char *infile,char *title,
+ t_atoms *atoms,rvec *x)
+{
+ FILE *fp;
+ int i,n;
+ double xx,yy,zz;
+ t_symtab *tab;
+ char atomnm[32],buf[STRLEN];
+
+ snew(tab,1);
+ fp = gmx_fio_fopen(infile,"r");
+ fgets2(buf,STRLEN-1,fp);
+ if (sscanf(buf,"%d",&n) != 1)
+ gmx_fatal(FARGS,"Can not read number of atoms from %s",infile);
+ fgets2(buf,STRLEN-1,fp);
+ strcpy(title,buf);
+ for(i=0; (i<n); i++) {
+ fgets2(buf,STRLEN-1,fp);
+ if (sscanf(buf,"%s%lf%lf%lf",atomnm,&xx,&yy,&zz) != 4)
+ gmx_fatal(FARGS,"Can not read coordinates from %s",infile);
+ atoms->atomname[i] = put_symtab(tab,atomnm);
+ x[i][XX] = xx*0.1;
+ x[i][YY] = yy*0.1;
+ x[i][ZZ] = zz*0.1;
+ }
+ gmx_fio_fclose(fp);
+}
+
+void get_stx_coordnum(const char *infile,int *natoms)
+{
+ FILE *in;
+ int ftp,tpxver,tpxgen;
+ t_trxframe fr;
+ char g96_line[STRLEN+1];
+
+ ftp=fn2ftp(infile);
+ range_check(ftp,0,efNR);
+ switch (ftp) {
+ case efGRO:
+ get_coordnum(infile, natoms);
+ break;
+ case efG96:
+ in=gmx_fio_fopen(infile,"r");
+ fr.title = NULL;
+ fr.natoms = -1;
+ fr.atoms = NULL;
+ fr.x = NULL;
+ fr.v = NULL;
+ fr.f = NULL;
+ *natoms=read_g96_conf(in,infile,&fr,g96_line);
+ gmx_fio_fclose(in);
+ break;
+ case efXYZ:
+ *natoms = get_xyz_coordnum(infile);
+ break;
+ case efPDB:
+ case efBRK:
+ case efENT:
+ in=gmx_fio_fopen(infile,"r");
+ get_pdb_coordnum(in, natoms);
+ gmx_fio_fclose(in);
+ break;
+ case efESP:
+ *natoms = get_espresso_coordnum(infile);
+ break;
+ case efTPA:
+ case efTPB:
+ case efTPR: {
+ t_tpxheader tpx;
+
+ read_tpxheader(infile,&tpx,TRUE,&tpxver,&tpxgen);
+ *natoms = tpx.natoms;
+ break;
+ }
+ default:
+ gmx_fatal(FARGS,"File type %s not supported in get_stx_coordnum",
+ ftp2ext(ftp));
+ }
+}
+
+void read_stx_conf(const char *infile,char *title,t_atoms *atoms,
+ rvec x[],rvec *v,int *ePBC,matrix box)
+{
+ FILE *in;
+ char buf[256];
+ gmx_mtop_t *mtop;
+ t_topology top;
+ t_trxframe fr;
+ int i,ftp,natoms;
+ real d;
+ char g96_line[STRLEN+1];
+
+ if (atoms->nr == 0)
+ fprintf(stderr,"Warning: Number of atoms in %s is 0\n",infile);
+ else if (atoms->atom == NULL)
+ gmx_mem("Uninitialized array atom");
+
+ if (ePBC)
+ *ePBC = -1;
+
+ ftp=fn2ftp(infile);
+ switch (ftp) {
+ case efGRO:
+ read_whole_conf(infile, title, atoms, x, v, box);
+ break;
+ case efXYZ:
+ read_xyz_conf(infile,title,atoms,x);
+ break;
+ case efG96:
++ fr.title = NULL;
+ fr.natoms = atoms->nr;
+ fr.atoms = atoms;
+ fr.x = x;
+ fr.v = v;
+ fr.f = NULL;
+ in = gmx_fio_fopen(infile,"r");
+ read_g96_conf(in, infile, &fr, g96_line);
+ gmx_fio_fclose(in);
+ copy_mat(fr.box,box);
++ strncpy(title, fr.title, STRLEN);
+ break;
+ case efPDB:
+ case efBRK:
+ case efENT:
+ read_pdb_conf(infile, title, atoms, x, ePBC, box, TRUE, NULL);
+ break;
+ case efESP:
+ read_espresso_conf(infile,atoms,x,v,box);
+ break;
+ case efTPR:
+ case efTPB:
+ case efTPA:
+ snew(mtop,1);
+ i = read_tpx(infile,NULL,box,&natoms,x,v,NULL,mtop);
+ if (ePBC)
+ *ePBC = i;
+
+ strcpy(title,*(mtop->name));
+
+ /* Free possibly allocated memory */
+ done_atom(atoms);
+
+ *atoms = gmx_mtop_global_atoms(mtop);
+ top = gmx_mtop_t_to_t_topology(mtop);
+ tpx_make_chain_identifiers(atoms,&top.mols);
+
+ sfree(mtop);
+ /* The strings in the symtab are still in use in the returned t_atoms
+ * structure, so we should not free them. But there is no place to put the
+ * symbols; the only choice is to leak the memory...
+ * So we clear the symbol table before freeing the topology structure. */
+ free_symtab(&top.symtab);
+ done_top(&top);
+
+ break;
+ default:
+ gmx_incons("Not supported in read_stx_conf");
+ }
+}
+
--- /dev/null
- if (!hw_opt->bThreadPinning)
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ *
+ *
+ * This source code is part of
+ *
+ * G R O M A C S
+ *
+ * GROningen MAchine for Chemical Simulations
+ *
+ * Written by the Gromacs development team under coordination of
+ * David van der Spoel, Berk Hess, and Erik Lindahl.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org
+ *
+ * And Hey:
+ * Gnomes, ROck Monsters And Chili Sauce
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef GMX_OPENMP
+#include <omp.h>
+#endif
+
+#include <stdio.h>
+
+#include "md_logging.h"
+#include "gmx_fatal.h"
+#include "statutil.h"
+#include "string2.h"
+#include "gmx_omp.h"
+
+int gmx_omp_get_max_threads(void)
+{
+#ifdef GMX_OPENMP
+ return omp_get_max_threads();
+#else
+ return 1;
+#endif
+}
+
+int gmx_omp_get_num_procs(void)
+{
+#ifdef GMX_OPENMP
+ return omp_get_num_procs();
+#else
+ return 1;
+#endif
+}
+
+int gmx_omp_get_thread_num(void)
+{
+#ifdef GMX_OPENMP
+ return omp_get_thread_num();
+#else
+ return 0;
+#endif
+}
+
+void gmx_omp_set_num_threads(int num_threads)
+{
+#ifdef GMX_OPENMP
+ omp_set_num_threads(num_threads);
+#else
+ return;
+#endif
+}
+
+/*!
+ * Thread affinity set by the OpenMP library can conflict with the GROMACS
+ * internal affinity setting.
+ *
+ * While GNU OpenMP does not set affinity by default, the Intel OpenMP library
+ * does. This conflicts with the internal affinity (especially thread-MPI)
+ * setting, results in incorrectly locked threads, and causes dreadful performance.
+ *
+ * The KMP_AFFINITY environment variable is used by Intel, GOMP_CPU_AFFINITY
+ * by the GNU compilers (Intel also honors it well). If any of the variables
+ * is set, we honor it, disable the internal pinning, and warn the user.
+ * When using Intel OpenMP, we will disable affinity if the user did not set it
+ * anually through one of the aforementioned environment variables.
+ *
+ * Note that the Intel OpenMP affinity disabling iwll only take effect if this
+ * function is called before the OpenMP library gets initialized which happens
+ * when the first call is made into a compilation unit that contains OpenMP
+ * pragmas.
+ */
+void gmx_omp_check_thread_affinity(FILE *fplog, const t_commrec *cr,
+ gmx_hw_opt_t *hw_opt)
+{
+ gmx_bool bKmpAffinitySet, bGompCpuAffinitySet;
+ char *kmp_env, *gomp_env;
+
+ /* no need to worry if internal thread pinning is turned off */
- md_print_warn(cr, fplog, "WARNING: KMP_AFFINITY set, will turn off %s internal affinity\n"
- " setting as the two can conflict and cause performance degradation.\n"
- " To keep using the %s internal affinity setting, set the\n"
- " KMP_AFFINITY=disabled environment variable.",
++ if (hw_opt->thread_affinity == threadaffOFF)
+ {
+ return;
+ }
+
+#if defined(GMX_OPENMP)
+
+ /* We assume that the affinity setting is available on all platforms
+ * gcc supports. Even if this is not the case (e.g. Mac OS) the user
+ * will only get a warning.*/
+ bGompCpuAffinitySet = FALSE;
+ gomp_env = NULL;
+#if defined(__GNUC__)
+ gomp_env = getenv("GOMP_CPU_AFFINITY");
+ bGompCpuAffinitySet = (gomp_env != NULL);
+#endif /* __GNUC__ */
+
+ bKmpAffinitySet = FALSE;
+#if defined(__INTEL_COMPILER)
+ kmp_env = getenv("KMP_AFFINITY");
+ bKmpAffinitySet = (kmp_env != NULL);
+
+ /* disable Intel OpenMP affinity if neither KMP_AFFINITY nor
+ * GOMP_CPU_AFFINITY is set (Intel uses the GNU env. var as well) */
+ if (!bKmpAffinitySet && !bGompCpuAffinitySet)
+ {
+ int retval;
+
+#ifdef _MSC_VER
+ /* Windows not POSIX */
+ retval = _putenv_s("KMP_AFFINITY", "disabled");
+#else
+ /* POSIX */
+ retval = setenv("KMP_AFFINITY", "disabled", 0);
+#endif /* _MSC_VER */
+
+ if (debug)
+ {
+ fprintf(debug, "Disabling Intel OpenMP affinity by setting the KMP_AFFINITY=disabled env. var.\n");
+ }
+
+ if (retval != 0)
+ {
+ gmx_warning("Disabling Intel OpenMp affinity setting failed!");
+ }
+ }
+
+ /* turn off internal pinning KMP_AFFINITY != "disabled" */
+ if (bKmpAffinitySet && (gmx_strncasecmp(kmp_env, "disabled", 8) != 0))
+ {
- hw_opt->bThreadPinning = FALSE;
++ /* TODO: with -pin auto we should only warn when using all cores */
++ md_print_warn(cr, fplog,
++ "NOTE: KMP_AFFINITY set, will turn off %s internal affinity\n"
++ " setting as the two can conflict and cause performance degradation.\n"
++ " To keep using the %s internal affinity setting, set the\n"
++ " KMP_AFFINITY=disabled environment variable.",
+ ShortProgram(), ShortProgram());
+
- "WARNING: GOMP_CPU_AFFINITY set, will turn off %s internal affinity\n"
- " setting as the two can conflict and cause performance degradation.\n"
- " To keep using the %s internal affinity setting, unset the\n"
- " GOMP_CPU_AFFINITY environment variable.",
++ hw_opt->thread_affinity = threadaffOFF;
+ }
+#endif /* __INTEL_COMPILER */
+
+#if defined(__INTEL_COMPILER) || defined(__GNUC__)
+ /* turn off internal pinning f GOMP_CPU_AFFINITY is set & non-empty */
+ if (bGompCpuAffinitySet && gomp_env != NULL && gomp_env != '\0')
+ {
++ /* TODO: with -pin auto we should only warn when using all cores */
+ md_print_warn(cr, fplog,
- hw_opt->bThreadPinning = FALSE;
++ "NOTE: GOMP_CPU_AFFINITY set, will turn off %s internal affinity\n"
++ " setting as the two can conflict and cause performance degradation.\n"
++ " To keep using the %s internal affinity setting, unset the\n"
++ " GOMP_CPU_AFFINITY environment variable.",
+ ShortProgram(), ShortProgram());
+
++ hw_opt->thread_affinity = threadaffOFF;
+ }
+#endif /* __INTEL_COMPILER || __GNUC__ */
+
+#endif /* GMX_OPENMP */
+}
--- /dev/null
--- /dev/null
++/*
++ * This file is part of the GROMACS molecular simulation package.
++ *
++ * Copyright (c) 2012, by the GROMACS development team, led by
++ * David van der Spoel, Berk Hess, Erik Lindahl, and including many
++ * others, as listed in the AUTHORS file in the top-level source
++ * directory and at http://www.gromacs.org.
++ *
++ * GROMACS is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public License
++ * as published by the Free Software Foundation; either version 2.1
++ * of the License, or (at your option) any later version.
++ *
++ * GROMACS is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with GROMACS; if not, see
++ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
++ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * If you want to redistribute modifications to GROMACS, please
++ * consider that scientific software is very special. Version
++ * control is crucial - bugs must be traceable. We will be happy to
++ * consider code for inclusion in the official distribution, but
++ * derived work must not be called official GROMACS. Details are found
++ * in the README & COPYING files - if they are missing, get the
++ * official version at http://www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the research papers on the package. Check out http://www.gromacs.org.
++ */
++#ifdef HAVE_CONFIG_H
++#include <config.h>
++#endif
++#if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_GETAFFINITY)
++#define _GNU_SOURCE
++#include <sched.h>
++#include <sys/syscall.h>
++#endif
++#include <assert.h>
++#include <stdio.h>
++#include "typedefs.h"
++#include "types/commrec.h"
++#include "types/hw_info.h"
++#include "gmx_cpuid.h"
++#include "gmx_omp.h"
++#include "gmx_omp_nthreads.h"
++#include "mdrun.h"
++#include "md_logging.h"
++#include "statutil.h"
++#include "gmx_thread_affinity.h"
++
++#include "thread_mpi/threads.h"
++
++
++static int
++get_thread_affinity_layout(FILE *fplog,
++ const t_commrec *cr,
++ const gmx_hw_info_t * hwinfo,
++ int nthreads,
++ int pin_offset, int * pin_stride,
++ const int **locality_order)
++{
++ int nhwthreads,npkg,ncores,nhwthreads_per_core,rc;
++ const int * pkg_id;
++ const int * core_id;
++ const int * hwthread_id;
++
++ if (pin_offset < 0)
++ {
++ gmx_fatal(FARGS,"Negative thread pinning offset requested");
++ }
++ if (*pin_stride < 0)
++ {
++ gmx_fatal(FARGS,"Negative thread pinning stride requested");
++ }
++
++ rc = gmx_cpuid_topology(hwinfo->cpuid_info, &nhwthreads, &npkg, &ncores,
++ &nhwthreads_per_core,
++ &pkg_id, &core_id, &hwthread_id, locality_order);
++
++ if (rc != 0)
++ {
++ nhwthreads = hwinfo->nthreads_hw_avail;
++ *locality_order = NULL;
++
++ if (nhwthreads <= 0)
++ {
++ /* We don't know anything about the hardware, don't pin */
++ md_print_warn(cr, fplog,
++ "We don't know how many logical cores we have, will not pin threads");
++
++ return -1;
++ }
++ }
++
++ if (pin_offset + nthreads > nhwthreads)
++ {
++ /* We are oversubscribing, don't pin */
++ md_print_warn(NULL, fplog,
++ "More threads requested than available logical cores, will not pin threads");
++
++ return -1;
++ }
++
++ /* Check if we need to choose the pinning stride */
++ if (*pin_stride == 0)
++ {
++ if (rc == 0 && pin_offset + nthreads*nhwthreads_per_core <= nhwthreads)
++ {
++ /* Put one thread on each physical core */
++ *pin_stride = nhwthreads_per_core;
++ }
++ else
++ {
++ /* We don't know if we have SMT, and if we do, we don't know
++ * if hw threads in the same physical core are consecutive.
++ * Without SMT the pinning layout should not matter too much.
++ * so we assume a consecutive layout and maximally spread out"
++ * the threads at equal threads per core.
++ * Note that IBM is the major non-x86 case with cpuid support
++ * and probably threads are already pinned by the queuing system,
++ * so we wouldn't end up here in the first place.
++ */
++ *pin_stride = (nhwthreads - pin_offset)/nthreads;
++ }
++
++ if (fplog != NULL)
++ {
++ fprintf(fplog,"Pinning threads with a logical core stride of %d\n",
++ *pin_stride);
++ }
++ }
++ else
++ {
++ if (pin_offset + nthreads*(*pin_stride) > nhwthreads)
++ {
++ /* We are oversubscribing, don't pin */
++ md_print_warn(NULL, fplog,
++ "The requested pinning stride is too large for the available logical cores, will not pin threads");
++
++ return -1;
++ }
++ }
++
++ return 0;
++}
++
++/* Set CPU affinity. Can be important for performance.
++ On some systems (e.g. Cray) CPU Affinity is set by default.
++ But default assigning doesn't work (well) with only some ranks
++ having threads. This causes very low performance.
++ External tools have cumbersome syntax for setting affinity
++ in the case that only some ranks have threads.
++ Thus it is important that GROMACS sets the affinity internally
++ if only PME is using threads.
++*/
++void
++gmx_set_thread_affinity(FILE *fplog,
++ const t_commrec *cr,
++ gmx_hw_opt_t *hw_opt,
++ int nthreads_pme,
++ const gmx_hw_info_t *hwinfo,
++ const t_inputrec *inputrec)
++{
++ int nth_affinity_set, thread_id_node, thread_id,
++ nthread_local, nthread_node, nthread_hw_max, nphyscore;
++ int offset;
++ const int *locality_order;
++ int rc;
++
++ if (hw_opt->thread_affinity == threadaffOFF)
++ {
++ /* Nothing to do */
++ return;
++ }
++
++#ifndef __APPLE__
++ /* If the tMPI thread affinity setting is not supported encourage the user
++ * to report it as it's either a bug or an exotic platform which we might
++ * want to support. */
++ if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES)
++ {
++ md_print_warn(NULL, fplog,
++ "Can not set thread affinities on the current platform. On NUMA systems this\n"
++ "can cause performance degradation. If you think your platform should support\n"
++ "setting affinities, contact the GROMACS developers.");
++ return;
++ }
++#endif /* __APPLE__ */
++
++ /* threads on this MPI process or TMPI thread */
++ if (cr->duty & DUTY_PP)
++ {
++ nthread_local = gmx_omp_nthreads_get(emntNonbonded);
++ }
++ else
++ {
++ nthread_local = gmx_omp_nthreads_get(emntPME);
++ }
++
++ /* map the current process to cores */
++ thread_id_node = 0;
++ nthread_node = nthread_local;
++#ifdef GMX_MPI
++ if (PAR(cr) || MULTISIM(cr))
++ {
++ /* We need to determine a scan of the thread counts in this
++ * compute node.
++ */
++ MPI_Comm comm_intra;
++
++ MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->rank_intranode,
++ &comm_intra);
++ MPI_Scan(&nthread_local,&thread_id_node,1,MPI_INT,MPI_SUM,comm_intra);
++ /* MPI_Scan is inclusive, but here we need exclusive */
++ thread_id_node -= nthread_local;
++ /* Get the total number of threads on this physical node */
++ MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
++ MPI_Comm_free(&comm_intra);
++ }
++#endif
++
++ if (hw_opt->thread_affinity == threadaffAUTO &&
++ nthread_node != hwinfo->nthreads_hw_avail)
++ {
++ if (nthread_node > 1 && nthread_node < hwinfo->nthreads_hw_avail)
++ {
++ md_print_warn(cr, fplog,
++ "NOTE: The number of threads is not equal to the number of (logical) cores\n"
++ " and the -pin option is set to auto: will not pin thread to cores.\n"
++ " This can lead to significant performance degradation.\n"
++ " Consider using -pin on (and -pinoffset in case you run multiple jobs).\n");
++ }
++
++ return;
++ }
++
++ offset = 0;
++ if (hw_opt->core_pinning_offset != 0)
++ {
++ offset = hw_opt->core_pinning_offset;
++ md_print_info(cr,fplog,"Applying core pinning offset %d\n", offset);
++ }
++
++ rc = get_thread_affinity_layout(fplog, cr, hwinfo,
++ nthread_node,
++ offset, &hw_opt->core_pinning_stride,
++ &locality_order);
++ if (rc != 0)
++ {
++ /* Incompatible layout, don't pin, warning was already issued */
++ return;
++ }
++
++ /* Set the per-thread affinity. In order to be able to check the success
++ * of affinity settings, we will set nth_affinity_set to 1 on threads
++ * where the affinity setting succeded and to 0 where it failed.
++ * Reducing these 0/1 values over the threads will give the total number
++ * of threads on which we succeeded.
++ */
++ nth_affinity_set = 0;
++#pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \
++ reduction(+:nth_affinity_set)
++ {
++ int index,core;
++ gmx_bool setaffinity_ret;
++
++ thread_id = gmx_omp_get_thread_num();
++ thread_id_node += thread_id;
++ index = offset + thread_id_node*hw_opt->core_pinning_stride;
++ if (locality_order != NULL)
++ {
++ core = locality_order[index];
++ }
++ else
++ {
++ core = index;
++ }
++
++ setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
++
++ /* store the per-thread success-values of the setaffinity */
++ nth_affinity_set = (setaffinity_ret == 0);
++
++ if (debug)
++ {
++ fprintf(debug, "On rank %2d, thread %2d, core %2d the affinity setting returned %d\n",
++ cr->nodeid, gmx_omp_get_thread_num(), core, setaffinity_ret);
++ }
++ }
++
++ if (nth_affinity_set > nthread_local)
++ {
++ char msg[STRLEN];
++
++ sprintf(msg, "Looks like we have set affinity for more threads than "
++ "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
++ gmx_incons(msg);
++ }
++ else
++ {
++ /* check & warn if some threads failed to set their affinities */
++ if (nth_affinity_set != nthread_local)
++ {
++ char sbuf1[STRLEN], sbuf2[STRLEN];
++
++ /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
++ sbuf1[0] = sbuf2[0] = '\0';
++#ifdef GMX_MPI
++#ifdef GMX_THREAD_MPI
++ sprintf(sbuf1, "In thread-MPI thread #%d: ", cr->nodeid);
++#else /* GMX_LIB_MPI */
++ sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
++#endif
++#endif /* GMX_MPI */
++
++ if (nthread_local > 1)
++ {
++ sprintf(sbuf2, "of %d/%d thread%s ",
++ nthread_local - nth_affinity_set, nthread_local,
++ (nthread_local - nth_affinity_set) > 1 ? "s" : "");
++ }
++
++ md_print_warn(NULL, fplog,
++ "NOTE: %sAffinity setting %sfailed.\n"
++ " This can cause performance degradation!",
++ sbuf1, sbuf2);
++ }
++ }
++}
++
++/* Check the process affinity mask and if it is found to be non-zero,
++ * will honor it and disable mdrun internal affinity setting.
++ * Note that this will only work on Linux as we use a GNU feature.
++ */
++void
++gmx_check_thread_affinity_set(FILE *fplog, const t_commrec *cr,
++ gmx_hw_opt_t *hw_opt, int ncpus,
++ gmx_bool bAfterOpenmpInit)
++{
++#ifdef HAVE_SCHED_GETAFFINITY
++ cpu_set_t mask_current;
++ int i, ret, cpu_count, cpu_set;
++ gmx_bool bAllSet;
++
++ assert(hw_opt);
++ if (hw_opt->thread_affinity == threadaffOFF)
++ {
++ /* internal affinity setting is off, don't bother checking process affinity */
++ return;
++ }
++
++ CPU_ZERO(&mask_current);
++ if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
++ {
++ /* failed to query affinity mask, will just return */
++ if (debug)
++ {
++ fprintf(debug, "Failed to query affinity mask (error %d)", ret);
++ }
++ return;
++ }
++
++ /* Before proceeding with the actual check, make sure that the number of
++ * detected CPUs is >= the CPUs in the current set.
++ * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
++#ifdef CPU_COUNT
++ if (ncpus < CPU_COUNT(&mask_current))
++ {
++ if (debug)
++ {
++ fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT",
++ ncpus, CPU_COUNT(&mask_current));
++ }
++ return;
++ }
++#endif /* CPU_COUNT */
++
++ bAllSet = TRUE;
++ for (i = 0; (i < ncpus && i < CPU_SETSIZE); i++)
++ {
++ bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
++ }
++
++ if (!bAllSet)
++ {
++ if (hw_opt->thread_affinity == threadaffAUTO)
++ {
++ if (!bAfterOpenmpInit)
++ {
++ md_print_warn(cr, fplog,
++ "Non-default thread affinity set, disabling internal thread affinity");
++ }
++ else
++ {
++ md_print_warn(cr, fplog,
++ "Non-default thread affinity set probably by the OpenMP library,\n"
++ "disabling internal thread affinity");
++ }
++ hw_opt->thread_affinity = threadaffOFF;
++ }
++ else
++ {
++ /* Only warn once, at the last check (bAfterOpenmpInit==TRUE) */
++ if (bAfterOpenmpInit)
++ {
++ md_print_warn(cr, fplog,
++ "Overriding thread affinity set outside %s\n",
++ ShortProgram());
++ }
++ }
++
++ if (debug)
++ {
++ fprintf(debug, "Non-default affinity mask found\n");
++ }
++ }
++ else
++ {
++ if (debug)
++ {
++ fprintf(debug, "Default affinity mask found\n");
++ }
++ }
++#endif /* HAVE_SCHED_GETAFFINITY */
++}
--- /dev/null
--- /dev/null
++/*
++ * This file is part of the GROMACS molecular simulation package.
++ *
++ * Copyright (c) 2012, by the GROMACS development team, led by
++ * David van der Spoel, Berk Hess, Erik Lindahl, and including many
++ * others, as listed in the AUTHORS file in the top-level source
++ * directory and at http://www.gromacs.org.
++ *
++ * GROMACS is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public License
++ * as published by the Free Software Foundation; either version 2.1
++ * of the License, or (at your option) any later version.
++ *
++ * GROMACS is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with GROMACS; if not, see
++ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
++ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * If you want to redistribute modifications to GROMACS, please
++ * consider that scientific software is very special. Version
++ * control is crucial - bugs must be traceable. We will be happy to
++ * consider code for inclusion in the official distribution, but
++ * derived work must not be called official GROMACS. Details are found
++ * in the README & COPYING files - if they are missing, get the
++ * official version at http://www.gromacs.org.
++ *
++ * To help us fund GROMACS development, we humbly ask that you cite
++ * the research papers on the package. Check out http://www.gromacs.org.
++ */
++#ifndef GMX_THREAD_AFFINITY_H_
++#define GMX_THREAD_AFFINITY_H_
++#include "types/commrec.h"
++#include "typedefs.h"
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++#if 0
++} /* fixes auto-indentation problems */
++#endif
++
++/* Sets the thread affinity using the requested setting stored in hw_opt.
++ * The hardware topologu is requested from hwinfo, when present.
++ */
++void
++gmx_set_thread_affinity(FILE *fplog,
++ const t_commrec *cr,
++ gmx_hw_opt_t *hw_opt,
++ int nthreads_pme,
++ const gmx_hw_info_t *hwinfo,
++ const t_inputrec *inputrec);
++
++/* Check the process affinity mask and if it is found to be non-zero,
++ * will honor it and disable mdrun internal affinity setting.
++ * This function should be called first before the OpenMP library gets
++ * initialized with the last argument FALSE (which will detect affinity
++ * set by external tools like taskset), and later, after the OpenMP
++ * initialization, with the last argument TRUE to detect affinity changes
++ * made by the OpenMP library.
++ *
++ * Note that this will only work on Linux as we use a GNU feature.
++ */
++void
++gmx_check_thread_affinity_set(FILE *fplog, const t_commrec *cr,
++ gmx_hw_opt_t *hw_opt, int ncpus,
++ gmx_bool bAfterOpenmpInit);
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif /* GMX_THREAD_AFFINITY_H_ */
--- /dev/null
- gmx_bool bThreadPinning; /* Pin OpenMP threads to cores? */
- gmx_bool bPinHyperthreading; /* Pin pairs of threads to physical cores */
- int core_pinning_offset; /* Physical core pinning offset */
+/*
+ *
+ * This source code is part of
+ *
+ * G R O M A C S
+ *
+ * GROningen MAchine for Chemical Simulations
+ *
+ * VERSION 3.2.0
+ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team,
+ * check out http://www.gromacs.org for more information.
+
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * If you want to redistribute modifications, please consider that
+ * scientific software is very special. Version control is crucial -
+ * bugs must be traceable. We will be happy to consider code for
+ * inclusion in the official distribution, but derived work must not
+ * be called official GROMACS. Details are found in the README & COPYING
+ * files - if they are missing, get the official version at www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the papers on the package - you can find them in the top README file.
+ *
+ * For more info, check our website at http://www.gromacs.org
+ *
+ * And Hey:
+ * Gromacs Runs On Most of All Computer Systems
+ */
+
+#ifndef _mdrun_h
+#define _mdrun_h
+
+#include <stdio.h>
+#include <time.h>
+#include "typedefs.h"
+#include "network.h"
+#include "sim_util.h"
+#include "tgroup.h"
+#include "filenm.h"
+#include "mshift.h"
+#include "force.h"
+#include "edsam.h"
+#include "mdebin.h"
+#include "vcm.h"
+#include "vsite.h"
+#include "pull.h"
+#include "update.h"
+#include "types/membedt.h"
+#include "types/globsig.h"
+
+
+#ifdef GMX_THREAD_MPI
+#include "thread_mpi/threads.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MD_POLARISE (1<<2)
+#define MD_IONIZE (1<<3)
+#define MD_RERUN (1<<4)
+#define MD_RERUN_VSITE (1<<5)
+#define MD_FFSCAN (1<<6)
+#define MD_SEPPOT (1<<7)
+#define MD_PARTDEC (1<<9)
+#define MD_DDBONDCHECK (1<<10)
+#define MD_DDBONDCOMM (1<<11)
+#define MD_CONFOUT (1<<12)
+#define MD_REPRODUCIBLE (1<<13)
+#define MD_READ_RNG (1<<14)
+#define MD_APPENDFILES (1<<15)
+#define MD_APPENDFILESSET (1<<21)
+#define MD_KEEPANDNUMCPT (1<<16)
+#define MD_READ_EKIN (1<<17)
+#define MD_STARTFROMCPT (1<<18)
+#define MD_RESETCOUNTERSHALFWAY (1<<19)
+#define MD_TUNEPME (1<<20)
+#define MD_TESTVERLET (1<<22)
+
++/* The options for the domain decomposition MPI task ordering */
+enum {
+ ddnoSEL, ddnoINTERLEAVE, ddnoPP_PME, ddnoCARTESIAN, ddnoNR
+};
+
++/* The options for the thread affinity setting, default: auto */
++enum {
++ threadaffSEL, threadaffAUTO, threadaffON, threadaffOFF, threadaffNR
++};
++
+typedef struct {
+ int nthreads_tot; /* Total number of threads requested (TMPI) */
+ int nthreads_tmpi; /* Number of TMPI threads requested */
+ int nthreads_omp; /* Number of OpenMP threads requested */
+ int nthreads_omp_pme; /* As nthreads_omp, but for PME only nodes */
++ int thread_affinity; /* Thread affinity switch, see enum above */
++ int core_pinning_stride; /* Logical core pinning stride */
++ int core_pinning_offset; /* Logical core pinning offset */
+ char *gpu_id; /* GPU id's to use, each specified as chars */
+} gmx_hw_opt_t;
+
+/* Variables for temporary use with the deform option,
+ * used in runner.c and md.c.
+ * (These variables should be stored in the tpx file.)
+ */
+extern gmx_large_int_t deform_init_init_step_tpx;
+extern matrix deform_init_box_tpx;
+#ifdef GMX_THREAD_MPI
+extern tMPI_Thread_mutex_t deform_init_box_mutex;
+
+/* The minimum number of atoms per tMPI thread. With fewer atoms than this,
+ * the number of threads will get lowered.
+ */
+#define MIN_ATOMS_PER_MPI_THREAD 90
+#define MIN_ATOMS_PER_GPU 900
+#endif
+
+
+typedef double gmx_integrator_t(FILE *log,t_commrec *cr,
+ int nfile,const t_filenm fnm[],
+ const output_env_t oenv, gmx_bool bVerbose,
+ gmx_bool bCompact, int nstglobalcomm,
+ gmx_vsite_t *vsite,gmx_constr_t constr,
+ int stepout,
+ t_inputrec *inputrec,
+ gmx_mtop_t *mtop,t_fcdata *fcd,
+ t_state *state,
+ t_mdatoms *mdatoms,
+ t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+ gmx_edsam_t ed,
+ t_forcerec *fr,
+ int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+ gmx_membed_t membed,
+ real cpt_period,real max_hours,
+ const char *deviceOptions,
+ unsigned long Flags,
+ gmx_runtime_t *runtime);
+
+/* ROUTINES from md.c */
+
+gmx_integrator_t do_md;
+
+
+/* ROUTINES from minimize.c */
+
+gmx_integrator_t do_steep;
+/* Do steepest descents EM */
+
+gmx_integrator_t do_cg;
+/* Do conjugate gradient EM */
+
+gmx_integrator_t do_lbfgs;
+/* Do conjugate gradient L-BFGS */
+
+gmx_integrator_t do_nm;
+/* Do normal mode analysis */
+
+/* ROUTINES from tpi.c */
+
+gmx_integrator_t do_tpi;
+/* Do test particle insertion */
+
+void init_npt_masses(t_inputrec *ir, t_state *state, t_extmass *MassQ, gmx_bool bInit);
+
+int ExpandedEnsembleDynamics(FILE *log,t_inputrec *ir, gmx_enerdata_t *enerd,
+ t_state *state, t_extmass *MassQ, df_history_t *dfhist,
+ gmx_large_int_t step, gmx_rng_t mcrng,
+ rvec *v, t_mdatoms *mdatoms);
+
+void PrintFreeEnergyInfoToFile(FILE *outfile, t_lambda *fep, t_expanded *expand, t_simtemp *simtemp, df_history_t *dfhist,
+ int nlam, int frequency, gmx_large_int_t step);
+
+void get_mc_state(gmx_rng_t rng,t_state *state);
+
+void set_mc_state(gmx_rng_t rng,t_state *state);
+
+/* check the version */
+void check_ir_old_tpx_versions(t_commrec *cr,FILE *fplog,
+ t_inputrec *ir,gmx_mtop_t *mtop);
+
+/* Allocate and initialize node-local state entries. */
+void set_state_entries(t_state *state,const t_inputrec *ir,int nnodes);
+
+/* Broadcast the data for a simulation, and allocate node-specific settings
+ such as rng generators. */
+void init_parallel(FILE *log, t_commrec *cr, t_inputrec *inputrec,
+ gmx_mtop_t *mtop);
+
+int mdrunner(gmx_hw_opt_t *hw_opt,
+ FILE *fplog,t_commrec *cr,int nfile,
+ const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ gmx_bool bCompact, int nstglobalcomm, ivec ddxyz,int dd_node_order,
+ real rdd, real rconstr, const char *dddlb_opt,real dlb_scale,
+ const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ const char *nbpu_opt,
+ int nsteps_cmdline, int nstepout, int resetstep,
+ int nmultisim, int repl_ex_nst, int repl_ex_nex,
+ int repl_ex_seed, real pforce,real cpt_period,real max_hours,
+ const char *deviceOptions, unsigned long Flags);
+/* Driver routine, that calls the different methods */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _mdrun_h */
--- /dev/null
- "When compiled with OpenMP on Linux, [TT]mdrun[tt] pins threads to cores,",
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ *
+ *
+ * This source code is part of
+ *
+ * G R O M A C S
+ *
+ * GROningen MAchine for Chemical Simulations
+ *
+ * VERSION 3.2.0
+ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team,
+ * check out http://www.gromacs.org for more information.
+
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * If you want to redistribute modifications, please consider that
+ * scientific software is very special. Version control is crucial -
+ * bugs must be traceable. We will be happy to consider code for
+ * inclusion in the official distribution, but derived work must not
+ * be called official GROMACS. Details are found in the README & COPYING
+ * files - if they are missing, get the official version at www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the papers on the package - you can find them in the top README file.
+ *
+ * For more info, check our website at http://www.gromacs.org
+ *
+ * And Hey:
+ * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "typedefs.h"
+#include "macros.h"
+#include "copyrite.h"
+#include "main.h"
+#include "statutil.h"
+#include "smalloc.h"
+#include "futil.h"
+#include "smalloc.h"
+#include "edsam.h"
+#include "mdrun.h"
+#include "xmdrun.h"
+#include "checkpoint.h"
+#ifdef GMX_THREAD_MPI
+#include "thread_mpi.h"
+#endif
+
+/* afm stuf */
+#include "pull.h"
+
+int cmain(int argc,char *argv[])
+{
+ const char *desc[] = {
+ "The [TT]mdrun[tt] program is the main computational chemistry engine",
+ "within GROMACS. Obviously, it performs Molecular Dynamics simulations,",
+ "but it can also perform Stochastic Dynamics, Energy Minimization,",
+ "test particle insertion or (re)calculation of energies.",
+ "Normal mode analysis is another option. In this case [TT]mdrun[tt]",
+ "builds a Hessian matrix from single conformation.",
+ "For usual Normal Modes-like calculations, make sure that",
+ "the structure provided is properly energy-minimized.",
+ "The generated matrix can be diagonalized by [TT]g_nmeig[tt].[PAR]",
+ "The [TT]mdrun[tt] program reads the run input file ([TT]-s[tt])",
+ "and distributes the topology over nodes if needed.",
+ "[TT]mdrun[tt] produces at least four output files.",
+ "A single log file ([TT]-g[tt]) is written, unless the option",
+ "[TT]-seppot[tt] is used, in which case each node writes a log file.",
+ "The trajectory file ([TT]-o[tt]), contains coordinates, velocities and",
+ "optionally forces.",
+ "The structure file ([TT]-c[tt]) contains the coordinates and",
+ "velocities of the last step.",
+ "The energy file ([TT]-e[tt]) contains energies, the temperature,",
+ "pressure, etc, a lot of these things are also printed in the log file.",
+ "Optionally coordinates can be written to a compressed trajectory file",
+ "([TT]-x[tt]).[PAR]",
+ "The option [TT]-dhdl[tt] is only used when free energy calculation is",
+ "turned on.[PAR]",
+ "A simulation can be run in parallel using two different parallelization",
+ "schemes: MPI parallelization and/or OpenMP thread parallelization.",
+ "The MPI parallelization uses multiple processes when [TT]mdrun[tt] is",
+ "compiled with a normal MPI library or threads when [TT]mdrun[tt] is",
+ "compiled with the GROMACS built-in thread-MPI library. OpenMP threads",
+ "are supported when mdrun is compiled with OpenMP. Full OpenMP support",
+ "is only available with the Verlet cut-off scheme, with the (older)",
+ "group scheme only PME-only processes can use OpenMP parallelization.",
+ "In all cases [TT]mdrun[tt] will by default try to use all the available",
+ "hardware resources. With a normal MPI library only the options",
+ "[TT]-ntomp[tt] (with the Verlet cut-off scheme) and [TT]-ntomp_pme[tt],",
+ "for PME-only processes, can be used to control the number of threads.",
+ "With thread-MPI there are additional options [TT]-nt[tt], which sets",
+ "the total number of threads, and [TT]-ntmpi[tt], which sets the number",
+ "of thread-MPI threads.",
+ "Note that using combined MPI+OpenMP parallelization is almost always",
+ "slower than single parallelization, except at the scaling limit, where",
+ "especially OpenMP parallelization of PME reduces the communication cost.",
+ "OpenMP-only parallelization is much faster than MPI-only parallelization",
+ "on a single CPU(-die). Since we currently don't have proper hardware",
+ "topology detection, [TT]mdrun[tt] compiled with thread-MPI will only",
+ "automatically use OpenMP-only parallelization when you use up to 4",
+ "threads, up to 12 threads with Intel Nehalem/Westmere, or up to 16",
+ "threads with Intel Sandy Bridge or newer CPUs. Otherwise MPI-only",
+ "parallelization is used (except with GPUs, see below).",
+ "[PAR]",
+ "To quickly test the performance of the new Verlet cut-off scheme",
+ "with old [TT].tpr[tt] files, either on CPUs or CPUs+GPUs, you can use",
+ "the [TT]-testverlet[tt] option. This should not be used for production,",
+ "since it can slightly modify potentials and it will remove charge groups",
+ "making analysis difficult, as the [TT].tpr[tt] file will still contain",
+ "charge groups. For production simulations it is highly recommended",
+ "to specify [TT]cutoff-scheme = Verlet[tt] in the [TT].mdp[tt] file.",
+ "[PAR]",
+ "With GPUs (only supported with the Verlet cut-off scheme), the number",
+ "of GPUs should match the number of MPI processes or MPI threads,",
+ "excluding PME-only processes/threads. With thread-MPI the number",
+ "of MPI threads will automatically be set to the number of GPUs detected.",
+ "When you want to use a subset of the available GPUs, you can use",
+ "the [TT]-gpu_id[tt] option, where GPU id's are passed as a string,",
+ "e.g. 02 for using GPUs 0 and 2. When you want different GPU id's",
+ "on different nodes of a compute cluster, use the GMX_GPU_ID environment",
+ "variable instead. The format for GMX_GPU_ID is identical to ",
+ "[TT]-gpu_id[tt], but an environment variable can have different values",
+ "on different nodes of a cluster.",
+ "[PAR]",
+ "When using PME with separate PME nodes or with a GPU, the two major",
+ "compute tasks, the non-bonded force calculation and the PME calculation",
+ "run on different compute resources. If this load is not balanced,",
+ "some of the resources will be idle part of time. With the Verlet",
+ "cut-off scheme this load is automatically balanced when the PME load",
+ "is too high (but not when it is too low). This is done by scaling",
+ "the Coulomb cut-off and PME grid spacing by the same amount. In the first",
+ "few hundred steps different settings are tried and the fastest is chosen",
+ "for the rest of the simulation. This does not affect the accuracy of",
+ "the results, but it does affect the decomposition of the Coulomb energy",
+ "into particle and mesh contributions. The auto-tuning can be turned off",
+ "with the option [TT]-notunepme[tt].",
+ "[PAR]",
- "If you don't want this, use [TT]-nopin[tt].",
- "With Intel CPUs with hyper-threading enabled, you should pin",
- "consecutive threads to the same physical core for optimal",
- "performance when you use virtual cores. This is done automatically",
- "when you use more than half of the virtual cores. It can also be set",
- "manually with [TT]-pinht[tt], e.g. for running multiple simulations",
- "on one compute node.",
++ "[TT]mdrun[tt] pins (sets affinity of) threads to specific cores,",
++ "when all (logical) cores on a compute node are used by [TT]mdrun[tt],",
++ "even when no multi-threading is used,",
+ "as this usually results in significantly better performance.",
- "the offset in (physical) cores for pinning.",
++ "If the queuing systems or the OpenMP library pinned threads, we honor",
++ "this and don't pin again, even though the layout may be sub-optimal.",
++ "If you want to have [TT]mdrun[tt] override an already set thread affinity",
++ "or pin threads when using less cores, use [TT]-pin on[tt].",
++ "With SMT (simultaneous multithreading), e.g. Intel Hyper-Threading,",
++ "there are multiple logical cores per physical core.",
++ "The option [TT]-pinstride[tt] sets the stride in logical cores for",
++ "pinning consecutive threads. Without SMT, 1 is usually the best choice.",
++ "With Intel Hyper-Threading 2 is best when using half or less of the",
++ "logical cores, 1 otherwise. The default value of 0 do exactly that:",
++ "it minimizes the threads per logical core, to optimize performance.",
++ "If you want to run multiple mdrun jobs on the same physical node,"
++ "you should set [TT]-pinstride[tt] to 1 when using all logical cores.",
+ "When running multiple mdrun (or other) simulations on the same physical",
+ "node, some simulations need to start pinning from a non-zero core",
+ "to avoid overloading cores; with [TT]-pinoffset[tt] you can specify",
- gmx_hw_opt_t hw_opt={0,0,0,0,TRUE,FALSE,0,NULL};
++ "the offset in logical cores for pinning.",
+ "[PAR]",
+ "When [TT]mdrun[tt] is started using MPI with more than 1 process",
+ "or with thread-MPI with more than 1 thread, MPI parallelization is used.",
+ "By default domain decomposition is used, unless the [TT]-pd[tt]",
+ "option is set, which selects particle decomposition.",
+ "[PAR]",
+ "With domain decomposition, the spatial decomposition can be set",
+ "with option [TT]-dd[tt]. By default [TT]mdrun[tt] selects a good decomposition.",
+ "The user only needs to change this when the system is very inhomogeneous.",
+ "Dynamic load balancing is set with the option [TT]-dlb[tt],",
+ "which can give a significant performance improvement,",
+ "especially for inhomogeneous systems. The only disadvantage of",
+ "dynamic load balancing is that runs are no longer binary reproducible,",
+ "but in most cases this is not important.",
+ "By default the dynamic load balancing is automatically turned on",
+ "when the measured performance loss due to load imbalance is 5% or more.",
+ "At low parallelization these are the only important options",
+ "for domain decomposition.",
+ "At high parallelization the options in the next two sections",
+ "could be important for increasing the performace.",
+ "[PAR]",
+ "When PME is used with domain decomposition, separate nodes can",
+ "be assigned to do only the PME mesh calculation;",
+ "this is computationally more efficient starting at about 12 nodes.",
+ "The number of PME nodes is set with option [TT]-npme[tt],",
+ "this can not be more than half of the nodes.",
+ "By default [TT]mdrun[tt] makes a guess for the number of PME",
+ "nodes when the number of nodes is larger than 11 or performance wise",
+ "not compatible with the PME grid x dimension.",
+ "But the user should optimize npme. Performance statistics on this issue",
+ "are written at the end of the log file.",
+ "For good load balancing at high parallelization, the PME grid x and y",
+ "dimensions should be divisible by the number of PME nodes",
+ "(the simulation will run correctly also when this is not the case).",
+ "[PAR]",
+ "This section lists all options that affect the domain decomposition.",
+ "[PAR]",
+ "Option [TT]-rdd[tt] can be used to set the required maximum distance",
+ "for inter charge-group bonded interactions.",
+ "Communication for two-body bonded interactions below the non-bonded",
+ "cut-off distance always comes for free with the non-bonded communication.",
+ "Atoms beyond the non-bonded cut-off are only communicated when they have",
+ "missing bonded interactions; this means that the extra cost is minor",
+ "and nearly indepedent of the value of [TT]-rdd[tt].",
+ "With dynamic load balancing option [TT]-rdd[tt] also sets",
+ "the lower limit for the domain decomposition cell sizes.",
+ "By default [TT]-rdd[tt] is determined by [TT]mdrun[tt] based on",
+ "the initial coordinates. The chosen value will be a balance",
+ "between interaction range and communication cost.",
+ "[PAR]",
+ "When inter charge-group bonded interactions are beyond",
+ "the bonded cut-off distance, [TT]mdrun[tt] terminates with an error message.",
+ "For pair interactions and tabulated bonds",
+ "that do not generate exclusions, this check can be turned off",
+ "with the option [TT]-noddcheck[tt].",
+ "[PAR]",
+ "When constraints are present, option [TT]-rcon[tt] influences",
+ "the cell size limit as well.",
+ "Atoms connected by NC constraints, where NC is the LINCS order plus 1,",
+ "should not be beyond the smallest cell size. A error message is",
+ "generated when this happens and the user should change the decomposition",
+ "or decrease the LINCS order and increase the number of LINCS iterations.",
+ "By default [TT]mdrun[tt] estimates the minimum cell size required for P-LINCS",
+ "in a conservative fashion. For high parallelization it can be useful",
+ "to set the distance required for P-LINCS with the option [TT]-rcon[tt].",
+ "[PAR]",
+ "The [TT]-dds[tt] option sets the minimum allowed x, y and/or z scaling",
+ "of the cells with dynamic load balancing. [TT]mdrun[tt] will ensure that",
+ "the cells can scale down by at least this factor. This option is used",
+ "for the automated spatial decomposition (when not using [TT]-dd[tt])",
+ "as well as for determining the number of grid pulses, which in turn",
+ "sets the minimum allowed cell size. Under certain circumstances",
+ "the value of [TT]-dds[tt] might need to be adjusted to account for",
+ "high or low spatial inhomogeneity of the system.",
+ "[PAR]",
+ "The option [TT]-gcom[tt] can be used to only do global communication",
+ "every n steps.",
+ "This can improve performance for highly parallel simulations",
+ "where this global communication step becomes the bottleneck.",
+ "For a global thermostat and/or barostat the temperature",
+ "and/or pressure will also only be updated every [TT]-gcom[tt] steps.",
+ "By default it is set to the minimum of nstcalcenergy and nstlist.[PAR]",
+ "With [TT]-rerun[tt] an input trajectory can be given for which ",
+ "forces and energies will be (re)calculated. Neighbor searching will be",
+ "performed for every frame, unless [TT]nstlist[tt] is zero",
+ "(see the [TT].mdp[tt] file).[PAR]",
+ "ED (essential dynamics) sampling and/or additional flooding potentials",
+ "are switched on by using the [TT]-ei[tt] flag followed by an [TT].edi[tt]",
+ "file. The [TT].edi[tt] file can be produced with the [TT]make_edi[tt] tool",
+ "or by using options in the essdyn menu of the WHAT IF program.",
+ "[TT]mdrun[tt] produces a [TT].xvg[tt] output file that",
+ "contains projections of positions, velocities and forces onto selected",
+ "eigenvectors.[PAR]",
+ "When user-defined potential functions have been selected in the",
+ "[TT].mdp[tt] file the [TT]-table[tt] option is used to pass [TT]mdrun[tt]",
+ "a formatted table with potential functions. The file is read from",
+ "either the current directory or from the [TT]GMXLIB[tt] directory.",
+ "A number of pre-formatted tables are presented in the [TT]GMXLIB[tt] dir,",
+ "for 6-8, 6-9, 6-10, 6-11, 6-12 Lennard-Jones potentials with",
+ "normal Coulomb.",
+ "When pair interactions are present, a separate table for pair interaction",
+ "functions is read using the [TT]-tablep[tt] option.[PAR]",
+ "When tabulated bonded functions are present in the topology,",
+ "interaction functions are read using the [TT]-tableb[tt] option.",
+ "For each different tabulated interaction type the table file name is",
+ "modified in a different way: before the file extension an underscore is",
+ "appended, then a 'b' for bonds, an 'a' for angles or a 'd' for dihedrals",
+ "and finally the table number of the interaction type.[PAR]",
+ "The options [TT]-px[tt] and [TT]-pf[tt] are used for writing pull COM",
+ "coordinates and forces when pulling is selected",
+ "in the [TT].mdp[tt] file.[PAR]",
+ "With [TT]-multi[tt] or [TT]-multidir[tt], multiple systems can be ",
+ "simulated in parallel.",
+ "As many input files/directories are required as the number of systems. ",
+ "The [TT]-multidir[tt] option takes a list of directories (one for each ",
+ "system) and runs in each of them, using the input/output file names, ",
+ "such as specified by e.g. the [TT]-s[tt] option, relative to these ",
+ "directories.",
+ "With [TT]-multi[tt], the system number is appended to the run input ",
+ "and each output filename, for instance [TT]topol.tpr[tt] becomes",
+ "[TT]topol0.tpr[tt], [TT]topol1.tpr[tt] etc.",
+ "The number of nodes per system is the total number of nodes",
+ "divided by the number of systems.",
+ "One use of this option is for NMR refinement: when distance",
+ "or orientation restraints are present these can be ensemble averaged",
+ "over all the systems.[PAR]",
+ "With [TT]-replex[tt] replica exchange is attempted every given number",
+ "of steps. The number of replicas is set with the [TT]-multi[tt] or ",
+ "[TT]-multidir[tt] option, described above.",
+ "All run input files should use a different coupling temperature,",
+ "the order of the files is not important. The random seed is set with",
+ "[TT]-reseed[tt]. The velocities are scaled and neighbor searching",
+ "is performed after every exchange.[PAR]",
+ "Finally some experimental algorithms can be tested when the",
+ "appropriate options have been given. Currently under",
+ "investigation are: polarizability and X-ray bombardments.",
+ "[PAR]",
+ "The option [TT]-membed[tt] does what used to be g_membed, i.e. embed",
+ "a protein into a membrane. The data file should contain the options",
+ "that where passed to g_membed before. The [TT]-mn[tt] and [TT]-mp[tt]",
+ "both apply to this as well.",
+ "[PAR]",
+ "The option [TT]-pforce[tt] is useful when you suspect a simulation",
+ "crashes due to too large forces. With this option coordinates and",
+ "forces of atoms with a force larger than a certain value will",
+ "be printed to stderr.",
+ "[PAR]",
+ "Checkpoints containing the complete state of the system are written",
+ "at regular intervals (option [TT]-cpt[tt]) to the file [TT]-cpo[tt],",
+ "unless option [TT]-cpt[tt] is set to -1.",
+ "The previous checkpoint is backed up to [TT]state_prev.cpt[tt] to",
+ "make sure that a recent state of the system is always available,",
+ "even when the simulation is terminated while writing a checkpoint.",
+ "With [TT]-cpnum[tt] all checkpoint files are kept and appended",
+ "with the step number.",
+ "A simulation can be continued by reading the full state from file",
+ "with option [TT]-cpi[tt]. This option is intelligent in the way that",
+ "if no checkpoint file is found, Gromacs just assumes a normal run and",
+ "starts from the first step of the [TT].tpr[tt] file. By default the output",
+ "will be appending to the existing output files. The checkpoint file",
+ "contains checksums of all output files, such that you will never",
+ "loose data when some output files are modified, corrupt or removed.",
+ "There are three scenarios with [TT]-cpi[tt]:[PAR]",
+ "[TT]*[tt] no files with matching names are present: new output files are written[PAR]",
+ "[TT]*[tt] all files are present with names and checksums matching those stored",
+ "in the checkpoint file: files are appended[PAR]",
+ "[TT]*[tt] otherwise no files are modified and a fatal error is generated[PAR]",
+ "With [TT]-noappend[tt] new output files are opened and the simulation",
+ "part number is added to all output file names.",
+ "Note that in all cases the checkpoint file itself is not renamed",
+ "and will be overwritten, unless its name does not match",
+ "the [TT]-cpo[tt] option.",
+ "[PAR]",
+ "With checkpointing the output is appended to previously written",
+ "output files, unless [TT]-noappend[tt] is used or none of the previous",
+ "output files are present (except for the checkpoint file).",
+ "The integrity of the files to be appended is verified using checksums",
+ "which are stored in the checkpoint file. This ensures that output can",
+ "not be mixed up or corrupted due to file appending. When only some",
+ "of the previous output files are present, a fatal error is generated",
+ "and no old output files are modified and no new output files are opened.",
+ "The result with appending will be the same as from a single run.",
+ "The contents will be binary identical, unless you use a different number",
+ "of nodes or dynamic load balancing or the FFT library uses optimizations",
+ "through timing.",
+ "[PAR]",
+ "With option [TT]-maxh[tt] a simulation is terminated and a checkpoint",
+ "file is written at the first neighbor search step where the run time",
+ "exceeds [TT]-maxh[tt]*0.99 hours.",
+ "[PAR]",
+ "When [TT]mdrun[tt] receives a TERM signal, it will set nsteps to the current",
+ "step plus one. When [TT]mdrun[tt] receives an INT signal (e.g. when ctrl+C is",
+ "pressed), it will stop after the next neighbor search step ",
+ "(with nstlist=0 at the next step).",
+ "In both cases all the usual output will be written to file.",
+ "When running with MPI, a signal to one of the [TT]mdrun[tt] processes",
+ "is sufficient, this signal should not be sent to mpirun or",
+ "the [TT]mdrun[tt] process that is the parent of the others.",
+ "[PAR]",
+ "When [TT]mdrun[tt] is started with MPI, it does not run niced by default."
+ };
+ t_commrec *cr;
+ t_filenm fnm[] = {
+ { efTPX, NULL, NULL, ffREAD },
+ { efTRN, "-o", NULL, ffWRITE },
+ { efXTC, "-x", NULL, ffOPTWR },
+ { efCPT, "-cpi", NULL, ffOPTRD },
+ { efCPT, "-cpo", NULL, ffOPTWR },
+ { efSTO, "-c", "confout", ffWRITE },
+ { efEDR, "-e", "ener", ffWRITE },
+ { efLOG, "-g", "md", ffWRITE },
+ { efXVG, "-dhdl", "dhdl", ffOPTWR },
+ { efXVG, "-field", "field", ffOPTWR },
+ { efXVG, "-table", "table", ffOPTRD },
+ { efXVG, "-tabletf", "tabletf", ffOPTRD },
+ { efXVG, "-tablep", "tablep", ffOPTRD },
+ { efXVG, "-tableb", "table", ffOPTRD },
+ { efTRX, "-rerun", "rerun", ffOPTRD },
+ { efXVG, "-tpi", "tpi", ffOPTWR },
+ { efXVG, "-tpid", "tpidist", ffOPTWR },
+ { efEDI, "-ei", "sam", ffOPTRD },
+ { efXVG, "-eo", "edsam", ffOPTWR },
+ { efGCT, "-j", "wham", ffOPTRD },
+ { efGCT, "-jo", "bam", ffOPTWR },
+ { efXVG, "-ffout", "gct", ffOPTWR },
+ { efXVG, "-devout", "deviatie", ffOPTWR },
+ { efXVG, "-runav", "runaver", ffOPTWR },
+ { efXVG, "-px", "pullx", ffOPTWR },
+ { efXVG, "-pf", "pullf", ffOPTWR },
+ { efXVG, "-ro", "rotation", ffOPTWR },
+ { efLOG, "-ra", "rotangles",ffOPTWR },
+ { efLOG, "-rs", "rotslabs", ffOPTWR },
+ { efLOG, "-rt", "rottorque",ffOPTWR },
+ { efMTX, "-mtx", "nm", ffOPTWR },
+ { efNDX, "-dn", "dipole", ffOPTWR },
+ { efRND, "-multidir",NULL, ffOPTRDMULT},
+ { efDAT, "-membed", "membed", ffOPTRD },
+ { efTOP, "-mp", "membed", ffOPTRD },
+ { efNDX, "-mn", "membed", ffOPTRD }
+ };
+#define NFILE asize(fnm)
+
+ /* Command line options ! */
+ gmx_bool bCart = FALSE;
+ gmx_bool bPPPME = FALSE;
+ gmx_bool bPartDec = FALSE;
+ gmx_bool bDDBondCheck = TRUE;
+ gmx_bool bDDBondComm = TRUE;
+ gmx_bool bTunePME = TRUE;
+ gmx_bool bTestVerlet = FALSE;
+ gmx_bool bVerbose = FALSE;
+ gmx_bool bCompact = TRUE;
+ gmx_bool bSepPot = FALSE;
+ gmx_bool bRerunVSite = FALSE;
+ gmx_bool bIonize = FALSE;
+ gmx_bool bConfout = TRUE;
+ gmx_bool bReproducible = FALSE;
+
+ int npme=-1;
+ int nmultisim=0;
+ int nstglobalcomm=-1;
+ int repl_ex_nst=0;
+ int repl_ex_seed=-1;
+ int repl_ex_nex=0;
+ int nstepout=100;
+ int resetstep=-1;
+ int nsteps=-2; /* the value -2 means that the mdp option will be used */
+
+ rvec realddxyz={0,0,0};
+ const char *ddno_opt[ddnoNR+1] =
+ { NULL, "interleave", "pp_pme", "cartesian", NULL };
+ const char *dddlb_opt[] =
+ { NULL, "auto", "no", "yes", NULL };
++ const char *thread_aff_opt[threadaffNR+1] =
++ { NULL, "auto", "on", "off", NULL };
+ const char *nbpu_opt[] =
+ { NULL, "auto", "cpu", "gpu", "gpu_cpu", NULL };
+ real rdd=0.0,rconstr=0.0,dlb_scale=0.8,pforce=-1;
+ char *ddcsx=NULL,*ddcsy=NULL,*ddcsz=NULL;
+ real cpt_period=15.0,max_hours=-1;
+ gmx_bool bAppendFiles=TRUE;
+ gmx_bool bKeepAndNumCPT=FALSE;
+ gmx_bool bResetCountersHalfWay=FALSE;
+ output_env_t oenv=NULL;
+ const char *deviceOptions = "";
+
- { "-pin", FALSE, etBOOL, {&hw_opt.bThreadPinning},
- "Pin OpenMP threads to cores" },
- { "-pinht", FALSE, etBOOL, {&hw_opt.bPinHyperthreading},
- "Always pin threads to Hyper-Threading cores" },
++ gmx_hw_opt_t hw_opt={0,0,0,0,threadaffSEL,0,0,NULL};
+
+ t_pargs pa[] = {
+
+ { "-pd", FALSE, etBOOL,{&bPartDec},
+ "Use particle decompostion" },
+ { "-dd", FALSE, etRVEC,{&realddxyz},
+ "Domain decomposition grid, 0 is optimize" },
+ { "-ddorder", FALSE, etENUM, {ddno_opt},
+ "DD node order" },
+ { "-npme", FALSE, etINT, {&npme},
+ "Number of separate nodes to be used for PME, -1 is guess" },
+ { "-nt", FALSE, etINT, {&hw_opt.nthreads_tot},
+ "Total number of threads to start (0 is guess)" },
+ { "-ntmpi", FALSE, etINT, {&hw_opt.nthreads_tmpi},
+ "Number of thread-MPI threads to start (0 is guess)" },
+ { "-ntomp", FALSE, etINT, {&hw_opt.nthreads_omp},
+ "Number of OpenMP threads per MPI process/thread to start (0 is guess)" },
+ { "-ntomp_pme", FALSE, etINT, {&hw_opt.nthreads_omp_pme},
+ "Number of OpenMP threads per MPI process/thread to start (0 is -ntomp)" },
- "Core offset for pinning (for running multiple mdrun processes on a single physical node)" },
++ { "-pin", FALSE, etENUM, {thread_aff_opt},
++ "Fix threads (or processes) to specific cores" },
+ { "-pinoffset", FALSE, etINT, {&hw_opt.core_pinning_offset},
-
++ "The starting logical core number for pinning to cores; used to avoid pinning threads from different mdrun instances to the same core" },
++ { "-pinstride", FALSE, etINT, {&hw_opt.core_pinning_stride},
++ "Pinning distance in logical cores for threads, use 0 to minimize the number of threads per physical core" },
+ { "-gpu_id", FALSE, etSTR, {&hw_opt.gpu_id},
+ "List of GPU id's to use" },
+ { "-ddcheck", FALSE, etBOOL, {&bDDBondCheck},
+ "Check for all bonded interactions with DD" },
+ { "-ddbondcomm", FALSE, etBOOL, {&bDDBondComm},
+ "HIDDENUse special bonded atom communication when [TT]-rdd[tt] > cut-off" },
+ { "-rdd", FALSE, etREAL, {&rdd},
+ "The maximum distance for bonded interactions with DD (nm), 0 is determine from initial coordinates" },
+ { "-rcon", FALSE, etREAL, {&rconstr},
+ "Maximum distance for P-LINCS (nm), 0 is estimate" },
+ { "-dlb", FALSE, etENUM, {dddlb_opt},
+ "Dynamic load balancing (with DD)" },
+ { "-dds", FALSE, etREAL, {&dlb_scale},
+ "Minimum allowed dlb scaling of the DD cell size" },
+ { "-ddcsx", FALSE, etSTR, {&ddcsx},
+ "HIDDENThe DD cell sizes in x" },
+ { "-ddcsy", FALSE, etSTR, {&ddcsy},
+ "HIDDENThe DD cell sizes in y" },
+ { "-ddcsz", FALSE, etSTR, {&ddcsz},
+ "HIDDENThe DD cell sizes in z" },
+ { "-gcom", FALSE, etINT,{&nstglobalcomm},
+ "Global communication frequency" },
+ { "-nb", FALSE, etENUM, {&nbpu_opt},
+ "Calculate non-bonded interactions on" },
+ { "-tunepme", FALSE, etBOOL, {&bTunePME},
+ "Optimize PME load between PP/PME nodes or GPU/CPU" },
+ { "-testverlet", FALSE, etBOOL, {&bTestVerlet},
+ "Test the Verlet non-bonded scheme" },
+ { "-v", FALSE, etBOOL,{&bVerbose},
+ "Be loud and noisy" },
+ { "-compact", FALSE, etBOOL,{&bCompact},
+ "Write a compact log file" },
+ { "-seppot", FALSE, etBOOL, {&bSepPot},
+ "Write separate V and dVdl terms for each interaction type and node to the log file(s)" },
+ { "-pforce", FALSE, etREAL, {&pforce},
+ "Print all forces larger than this (kJ/mol nm)" },
+ { "-reprod", FALSE, etBOOL,{&bReproducible},
+ "Try to avoid optimizations that affect binary reproducibility" },
+ { "-cpt", FALSE, etREAL, {&cpt_period},
+ "Checkpoint interval (minutes)" },
+ { "-cpnum", FALSE, etBOOL, {&bKeepAndNumCPT},
+ "Keep and number checkpoint files" },
+ { "-append", FALSE, etBOOL, {&bAppendFiles},
+ "Append to previous output files when continuing from checkpoint instead of adding the simulation part number to all file names" },
+ { "-nsteps", FALSE, etINT, {&nsteps},
+ "Run this number of steps, overrides .mdp file option" },
+ { "-maxh", FALSE, etREAL, {&max_hours},
+ "Terminate after 0.99 times this time (hours)" },
+ { "-multi", FALSE, etINT,{&nmultisim},
+ "Do multiple simulations in parallel" },
+ { "-replex", FALSE, etINT, {&repl_ex_nst},
+ "Attempt replica exchange periodically with this period (steps)" },
+ { "-nex", FALSE, etINT, {&repl_ex_nex},
+ "Number of random exchanges to carry out each exchange interval (N^3 is one suggestion). -nex zero or not specified gives neighbor replica exchange." },
+ { "-reseed", FALSE, etINT, {&repl_ex_seed},
+ "Seed for replica exchange, -1 is generate a seed" },
+ { "-rerunvsite", FALSE, etBOOL, {&bRerunVSite},
+ "HIDDENRecalculate virtual site coordinates with [TT]-rerun[tt]" },
+ { "-ionize", FALSE, etBOOL,{&bIonize},
+ "Do a simulation including the effect of an X-Ray bombardment on your system" },
+ { "-confout", FALSE, etBOOL, {&bConfout},
+ "HIDDENWrite the last configuration with [TT]-c[tt] and force checkpointing at the last step" },
+ { "-stepout", FALSE, etINT, {&nstepout},
+ "HIDDENFrequency of writing the remaining runtime" },
+ { "-resetstep", FALSE, etINT, {&resetstep},
+ "HIDDENReset cycle counters after these many time steps" },
+ { "-resethway", FALSE, etBOOL, {&bResetCountersHalfWay},
+ "HIDDENReset the cycle counters after half the number of steps or halfway [TT]-maxh[tt]" }
+ };
+ gmx_edsam_t ed;
+ unsigned long Flags, PCA_Flags;
+ ivec ddxyz;
+ int dd_node_order;
+ gmx_bool bAddPart;
+ FILE *fplog,*fpmulti;
+ int sim_part,sim_part_fn;
+ const char *part_suffix=".part";
+ char suffix[STRLEN];
+ int rc;
+ char **multidir=NULL;
+
+
+ cr = init_par(&argc,&argv);
+
+ if (MASTER(cr))
+ CopyRight(stderr, argv[0]);
+
+ PCA_Flags = (PCA_CAN_SET_DEFFNM | (MASTER(cr) ? 0 : PCA_QUIET));
+
+ /* Comment this in to do fexist calls only on master
+ * works not with rerun or tables at the moment
+ * also comment out the version of init_forcerec in md.c
+ * with NULL instead of opt2fn
+ */
+ /*
+ if (!MASTER(cr))
+ {
+ PCA_Flags |= PCA_NOT_READ_NODE;
+ }
+ */
+
+ parse_common_args(&argc,argv,PCA_Flags, NFILE,fnm,asize(pa),pa,
+ asize(desc),desc,0,NULL, &oenv);
+
+
+ /* we set these early because they might be used in init_multisystem()
+ Note that there is the potential for npme>nnodes until the number of
+ threads is set later on, if there's thread parallelization. That shouldn't
+ lead to problems. */
+ dd_node_order = nenum(ddno_opt);
+ cr->npmenodes = npme;
+
++ hw_opt.thread_affinity = nenum(thread_aff_opt);
++
+ /* now check the -multi and -multidir option */
+ if (opt2bSet("-multidir", NFILE, fnm))
+ {
+ int i;
+ if (nmultisim > 0)
+ {
+ gmx_fatal(FARGS, "mdrun -multi and -multidir options are mutually exclusive.");
+ }
+ nmultisim = opt2fns(&multidir, "-multidir", NFILE, fnm);
+ }
+
+
+ if (repl_ex_nst != 0 && nmultisim < 2)
+ gmx_fatal(FARGS,"Need at least two replicas for replica exchange (option -multi)");
+
+ if (repl_ex_nex < 0)
+ gmx_fatal(FARGS,"Replica exchange number of exchanges needs to be positive");
+
+ if (nmultisim > 1) {
+#ifndef GMX_THREAD_MPI
+ gmx_bool bParFn = (multidir == NULL);
+ init_multisystem(cr, nmultisim, multidir, NFILE, fnm, bParFn);
+#else
+ gmx_fatal(FARGS,"mdrun -multi is not supported with the thread library.Please compile GROMACS with MPI support");
+#endif
+ }
+
+ bAddPart = !bAppendFiles;
+
+ /* Check if there is ANY checkpoint file available */
+ sim_part = 1;
+ sim_part_fn = sim_part;
+ if (opt2bSet("-cpi",NFILE,fnm))
+ {
+ if (bSepPot && bAppendFiles)
+ {
+ gmx_fatal(FARGS,"Output file appending is not supported with -seppot");
+ }
+
+ bAppendFiles =
+ read_checkpoint_simulation_part(opt2fn_master("-cpi", NFILE,
+ fnm,cr),
+ &sim_part_fn,NULL,cr,
+ bAppendFiles,NFILE,fnm,
+ part_suffix,&bAddPart);
+ if (sim_part_fn==0 && MULTIMASTER(cr))
+ {
+ fprintf(stdout,"No previous checkpoint file present, assuming this is a new run.\n");
+ }
+ else
+ {
+ sim_part = sim_part_fn + 1;
+ }
+
+ if (MULTISIM(cr) && MASTER(cr))
+ {
+ if (MULTIMASTER(cr))
+ {
+ /* Log file is not yet available, so if there's a
+ * problem we can only write to stderr. */
+ fpmulti = stderr;
+ }
+ else
+ {
+ fpmulti = NULL;
+ }
+ check_multi_int(fpmulti,cr->ms,sim_part,"simulation part",TRUE);
+ }
+ }
+ else
+ {
+ bAppendFiles = FALSE;
+ }
+
+ if (!bAppendFiles)
+ {
+ sim_part_fn = sim_part;
+ }
+
+ if (bAddPart)
+ {
+ /* Rename all output files (except checkpoint files) */
+ /* create new part name first (zero-filled) */
+ sprintf(suffix,"%s%04d",part_suffix,sim_part_fn);
+
+ add_suffix_to_output_names(fnm,NFILE,suffix);
+ if (MULTIMASTER(cr))
+ {
+ fprintf(stdout,"Checkpoint file is from part %d, new output files will be suffixed '%s'.\n",sim_part-1,suffix);
+ }
+ }
+
+ Flags = opt2bSet("-rerun",NFILE,fnm) ? MD_RERUN : 0;
+ Flags = Flags | (bSepPot ? MD_SEPPOT : 0);
+ Flags = Flags | (bIonize ? MD_IONIZE : 0);
+ Flags = Flags | (bPartDec ? MD_PARTDEC : 0);
+ Flags = Flags | (bDDBondCheck ? MD_DDBONDCHECK : 0);
+ Flags = Flags | (bDDBondComm ? MD_DDBONDCOMM : 0);
+ Flags = Flags | (bTunePME ? MD_TUNEPME : 0);
+ Flags = Flags | (bTestVerlet ? MD_TESTVERLET : 0);
+ Flags = Flags | (bConfout ? MD_CONFOUT : 0);
+ Flags = Flags | (bRerunVSite ? MD_RERUN_VSITE : 0);
+ Flags = Flags | (bReproducible ? MD_REPRODUCIBLE : 0);
+ Flags = Flags | (bAppendFiles ? MD_APPENDFILES : 0);
+ Flags = Flags | (opt2parg_bSet("-append", asize(pa),pa) ? MD_APPENDFILESSET : 0);
+ Flags = Flags | (bKeepAndNumCPT ? MD_KEEPANDNUMCPT : 0);
+ Flags = Flags | (sim_part>1 ? MD_STARTFROMCPT : 0);
+ Flags = Flags | (bResetCountersHalfWay ? MD_RESETCOUNTERSHALFWAY : 0);
+
+
+ /* We postpone opening the log file if we are appending, so we can
+ first truncate the old log file and append to the correct position
+ there instead. */
+ if ((MASTER(cr) || bSepPot) && !bAppendFiles)
+ {
+ gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,
+ !bSepPot,Flags & MD_APPENDFILES,&fplog);
+ CopyRight(fplog,argv[0]);
+ please_cite(fplog,"Hess2008b");
+ please_cite(fplog,"Spoel2005a");
+ please_cite(fplog,"Lindahl2001a");
+ please_cite(fplog,"Berendsen95a");
+ }
+ else if (!MASTER(cr) && bSepPot)
+ {
+ gmx_log_open(ftp2fn(efLOG,NFILE,fnm),cr,!bSepPot,Flags,&fplog);
+ }
+ else
+ {
+ fplog = NULL;
+ }
+
+ ddxyz[XX] = (int)(realddxyz[XX] + 0.5);
+ ddxyz[YY] = (int)(realddxyz[YY] + 0.5);
+ ddxyz[ZZ] = (int)(realddxyz[ZZ] + 0.5);
+
+ rc = mdrunner(&hw_opt, fplog,cr,NFILE,fnm,oenv,bVerbose,bCompact,
+ nstglobalcomm, ddxyz,dd_node_order,rdd,rconstr,
+ dddlb_opt[0],dlb_scale,ddcsx,ddcsy,ddcsz,
+ nbpu_opt[0],
+ nsteps,nstepout,resetstep,
+ nmultisim,repl_ex_nst,repl_ex_nex,repl_ex_seed,
+ pforce, cpt_period,max_hours,deviceOptions,Flags);
+
+ gmx_finalize_par();
+
+ if (MULTIMASTER(cr)) {
+ thanx(stderr);
+ }
+
+ /* Log file has to be closed in mdrunner if we are appending to it
+ (fplog not set here) */
+ if (MASTER(cr) && !bAppendFiles)
+ {
+ gmx_log_close(fplog);
+ }
+
+ return rc;
+}
+
--- /dev/null
- #if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_GETAFFINITY)
- #define _GNU_SOURCE
- #include <sched.h>
- #include <sys/syscall.h>
- #endif
+/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+ *
+ *
+ * This source code is part of
+ *
+ * G R O M A C S
+ *
+ * GROningen MAchine for Chemical Simulations
+ *
+ * VERSION 3.2.0
+ * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
+ * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
+ * Copyright (c) 2001-2004, The GROMACS development team,
+ * check out http://www.gromacs.org for more information.
+
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * If you want to redistribute modifications, please consider that
+ * scientific software is very special. Version control is crucial -
+ * bugs must be traceable. We will be happy to consider code for
+ * inclusion in the official distribution, but derived work must not
+ * be called official GROMACS. Details are found in the README & COPYING
+ * files - if they are missing, get the official version at www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the papers on the package - you can find them in the top README file.
+ *
+ * For more info, check our website at http://www.gromacs.org
+ *
+ * And Hey:
+ * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
-
- #include "thread_mpi/threads.h"
+#include <signal.h>
+#include <stdlib.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+#include <assert.h>
+
+#include "typedefs.h"
+#include "smalloc.h"
+#include "sysstuff.h"
+#include "statutil.h"
+#include "mdrun.h"
+#include "md_logging.h"
+#include "md_support.h"
+#include "network.h"
+#include "pull.h"
+#include "pull_rotation.h"
+#include "names.h"
+#include "disre.h"
+#include "orires.h"
+#include "pme.h"
+#include "mdatoms.h"
+#include "repl_ex.h"
+#include "qmmm.h"
+#include "domdec.h"
+#include "partdec.h"
+#include "coulomb.h"
+#include "constr.h"
+#include "mvdata.h"
+#include "checkpoint.h"
+#include "mtop_util.h"
+#include "sighandler.h"
+#include "tpxio.h"
+#include "txtdump.h"
+#include "gmx_detect_hardware.h"
+#include "gmx_omp_nthreads.h"
+#include "pull_rotation.h"
+#include "calc_verletbuf.h"
+#include "../mdlib/nbnxn_search.h"
+#include "../mdlib/nbnxn_consts.h"
+#include "gmx_fatal_collective.h"
+#include "membed.h"
+#include "macros.h"
+#include "gmx_omp.h"
- the main thread returns */
- ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi,
- (hw_opt->bThreadPinning ? TMPI_AFFINITY_ALL_CORES : TMPI_AFFINITY_NONE),
++#include "gmx_thread_affinity.h"
+
+#ifdef GMX_LIB_MPI
+#include <mpi.h>
+#endif
+#ifdef GMX_THREAD_MPI
+#include "tmpi.h"
+#endif
+
+#ifdef GMX_FAHCORE
+#include "corewrap.h"
+#endif
+
+#include "gpu_utils.h"
+#include "nbnxn_cuda_data_mgmt.h"
+
+typedef struct {
+ gmx_integrator_t *func;
+} gmx_intp_t;
+
+/* The array should match the eI array in include/types/enums.h */
+const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md},{do_md}};
+
+gmx_large_int_t deform_init_init_step_tpx;
+matrix deform_init_box_tpx;
+#ifdef GMX_THREAD_MPI
+tMPI_Thread_mutex_t deform_init_box_mutex=TMPI_THREAD_MUTEX_INITIALIZER;
+#endif
+
+
+#ifdef GMX_THREAD_MPI
+struct mdrunner_arglist
+{
+ gmx_hw_opt_t *hw_opt;
+ FILE *fplog;
+ t_commrec *cr;
+ int nfile;
+ const t_filenm *fnm;
+ output_env_t oenv;
+ gmx_bool bVerbose;
+ gmx_bool bCompact;
+ int nstglobalcomm;
+ ivec ddxyz;
+ int dd_node_order;
+ real rdd;
+ real rconstr;
+ const char *dddlb_opt;
+ real dlb_scale;
+ const char *ddcsx;
+ const char *ddcsy;
+ const char *ddcsz;
+ const char *nbpu_opt;
+ int nsteps_cmdline;
+ int nstepout;
+ int resetstep;
+ int nmultisim;
+ int repl_ex_nst;
+ int repl_ex_nex;
+ int repl_ex_seed;
+ real pforce;
+ real cpt_period;
+ real max_hours;
+ const char *deviceOptions;
+ unsigned long Flags;
+ int ret; /* return value */
+};
+
+
+/* The function used for spawning threads. Extracts the mdrunner()
+ arguments from its one argument and calls mdrunner(), after making
+ a commrec. */
+static void mdrunner_start_fn(void *arg)
+{
+ struct mdrunner_arglist *mda=(struct mdrunner_arglist*)arg;
+ struct mdrunner_arglist mc=*mda; /* copy the arg list to make sure
+ that it's thread-local. This doesn't
+ copy pointed-to items, of course,
+ but those are all const. */
+ t_commrec *cr; /* we need a local version of this */
+ FILE *fplog=NULL;
+ t_filenm *fnm;
+
+ fnm = dup_tfn(mc.nfile, mc.fnm);
+
+ cr = init_par_threads(mc.cr);
+
+ if (MASTER(cr))
+ {
+ fplog=mc.fplog;
+ }
+
+ mda->ret=mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv,
+ mc.bVerbose, mc.bCompact, mc.nstglobalcomm,
+ mc.ddxyz, mc.dd_node_order, mc.rdd,
+ mc.rconstr, mc.dddlb_opt, mc.dlb_scale,
+ mc.ddcsx, mc.ddcsy, mc.ddcsz,
+ mc.nbpu_opt,
+ mc.nsteps_cmdline, mc.nstepout, mc.resetstep,
+ mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce,
+ mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags);
+}
+
+/* called by mdrunner() to start a specific number of threads (including
+ the main thread) for thread-parallel runs. This in turn calls mdrunner()
+ for each thread.
+ All options besides nthreads are the same as for mdrunner(). */
+static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt,
+ FILE *fplog,t_commrec *cr,int nfile,
+ const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ gmx_bool bCompact, int nstglobalcomm,
+ ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ const char *dddlb_opt,real dlb_scale,
+ const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ const char *nbpu_opt,
+ int nsteps_cmdline, int nstepout,int resetstep,
+ int nmultisim,int repl_ex_nst,int repl_ex_nex, int repl_ex_seed,
+ real pforce,real cpt_period, real max_hours,
+ const char *deviceOptions, unsigned long Flags)
+{
+ int ret;
+ struct mdrunner_arglist *mda;
+ t_commrec *crn; /* the new commrec */
+ t_filenm *fnmn;
+
+ /* first check whether we even need to start tMPI */
+ if (hw_opt->nthreads_tmpi < 2)
+ {
+ return cr;
+ }
+
+ /* a few small, one-time, almost unavoidable memory leaks: */
+ snew(mda,1);
+ fnmn=dup_tfn(nfile, fnm);
+
+ /* fill the data structure to pass as void pointer to thread start fn */
+ mda->hw_opt=hw_opt;
+ mda->fplog=fplog;
+ mda->cr=cr;
+ mda->nfile=nfile;
+ mda->fnm=fnmn;
+ mda->oenv=oenv;
+ mda->bVerbose=bVerbose;
+ mda->bCompact=bCompact;
+ mda->nstglobalcomm=nstglobalcomm;
+ mda->ddxyz[XX]=ddxyz[XX];
+ mda->ddxyz[YY]=ddxyz[YY];
+ mda->ddxyz[ZZ]=ddxyz[ZZ];
+ mda->dd_node_order=dd_node_order;
+ mda->rdd=rdd;
+ mda->rconstr=rconstr;
+ mda->dddlb_opt=dddlb_opt;
+ mda->dlb_scale=dlb_scale;
+ mda->ddcsx=ddcsx;
+ mda->ddcsy=ddcsy;
+ mda->ddcsz=ddcsz;
+ mda->nbpu_opt=nbpu_opt;
+ mda->nsteps_cmdline=nsteps_cmdline;
+ mda->nstepout=nstepout;
+ mda->resetstep=resetstep;
+ mda->nmultisim=nmultisim;
+ mda->repl_ex_nst=repl_ex_nst;
+ mda->repl_ex_nex=repl_ex_nex;
+ mda->repl_ex_seed=repl_ex_seed;
+ mda->pforce=pforce;
+ mda->cpt_period=cpt_period;
+ mda->max_hours=max_hours;
+ mda->deviceOptions=deviceOptions;
+ mda->Flags=Flags;
+
+ /* now spawn new threads that start mdrunner_start_fn(), while
- /* Check the process affinity mask. If it is non-zero, something
- * else has set the affinity, and mdrun should honor that and
- * not attempt to do its own thread pinning.
- *
- * This function should be called twice. Once before the OpenMP
- * library gets initialized with bAfterOpenMPInit=FALSE (which will
- * detect affinity set by external tools like taskset), and again
- * later, after the OpenMP initialization, with bAfterOpenMPInit=TRUE
- * (which will detect affinity changes made by the OpenMP library).
- *
- * Note that this will only work on Linux, because we use a GNU
- * feature. */
- static void check_cpu_affinity_set(FILE *fplog, const t_commrec *cr,
- gmx_hw_opt_t *hw_opt, int ncpus,
- gmx_bool bAfterOpenmpInit)
- {
- #ifdef HAVE_SCHED_GETAFFINITY
- cpu_set_t mask_current;
- int i, ret, cpu_count, cpu_set;
- gmx_bool bAllSet;
-
- assert(hw_opt);
- if (!hw_opt->bThreadPinning)
- {
- /* internal affinity setting is off, don't bother checking process affinity */
- return;
- }
-
- CPU_ZERO(&mask_current);
- if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
- {
- /* failed to query affinity mask, will just return */
- if (debug)
- {
- fprintf(debug, "Failed to query affinity mask (error %d)", ret);
- }
- return;
- }
-
- /* Before proceeding with the actual check, make sure that the number of
- * detected CPUs is >= the CPUs in the current set.
- * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
- #ifdef CPU_COUNT
- if (ncpus < CPU_COUNT(&mask_current))
- {
- if (debug)
- {
- fprintf(debug, "%d CPUs detected, but %d was returned by CPU_COUNT",
- ncpus, CPU_COUNT(&mask_current));
- }
- return;
- }
- #endif /* CPU_COUNT */
-
- bAllSet = TRUE;
- for (i = 0; (i < ncpus && i < CPU_SETSIZE); i++)
- {
- bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
- }
-
- if (!bAllSet)
- {
- if (!bAfterOpenmpInit)
- {
- md_print_warn(cr, fplog,
- "%s detected a non-default process affinity, "
- "so it will not attempt to pin its threads", ShortProgram());
- }
- else
- {
- md_print_warn(cr, fplog,
- "%s detected a non-default process affinity, "
- "probably set by the OpenMP library, "
- "so it will not attempt to pin its threads", ShortProgram());
- }
- hw_opt->bThreadPinning = FALSE;
-
- if (debug)
- {
- fprintf(debug, "Non-default affinity mask found, mdrun will not pin threads\n");
- }
- }
- else
- {
- if (debug)
- {
- fprintf(debug, "Default affinity mask found\n");
- }
- }
- #endif /* HAVE_SCHED_GETAFFINITY */
- }
-
- /* Set CPU affinity. Can be important for performance.
- On some systems (e.g. Cray) CPU Affinity is set by default.
- But default assigning doesn't work (well) with only some ranks
- having threads. This causes very low performance.
- External tools have cumbersome syntax for setting affinity
- in the case that only some ranks have threads.
- Thus it is important that GROMACS sets the affinity internally
- if only PME is using threads.
- */
- static void set_cpu_affinity(FILE *fplog,
- const t_commrec *cr,
- gmx_hw_opt_t *hw_opt,
- int nthreads_pme,
- const gmx_hw_info_t *hwinfo,
- const t_inputrec *inputrec)
- {
- #if defined GMX_THREAD_MPI
- /* With the number of TMPI threads equal to the number of cores
- * we already pinned in thread-MPI, so don't pin again here.
- */
- if (hw_opt->nthreads_tmpi == tMPI_Thread_get_hw_number())
- {
- return;
- }
- #endif
-
- #ifndef __APPLE__
- /* If the tMPI thread affinity setting is not supported encourage the user
- * to report it as it's either a bug or an exotic platform which we might
- * want to support. */
- if (tMPI_Thread_setaffinity_support() != TMPI_SETAFFINITY_SUPPORT_YES)
- {
- md_print_warn(NULL, fplog,
- "Can not set thread affinities on the current plarform. On NUMA systems this\n"
- "can cause performance degradation. If you think your platform should support\n"
- "setting affinities, contact the GROMACS developers.");
- return;
- }
- #endif /* __APPLE__ */
-
- if (hw_opt->bThreadPinning)
- {
- int nth_affinity_set, thread_id_node, thread_id,
- nthread_local, nthread_node, nthread_hw_max, nphyscore;
- int offset;
- char *env;
-
- /* threads on this MPI process or TMPI thread */
- if (cr->duty & DUTY_PP)
- {
- nthread_local = gmx_omp_nthreads_get(emntNonbonded);
- }
- else
- {
- nthread_local = gmx_omp_nthreads_get(emntPME);
- }
-
- /* map the current process to cores */
- thread_id_node = 0;
- nthread_node = nthread_local;
- #ifdef GMX_MPI
- if (PAR(cr) || MULTISIM(cr))
- {
- /* We need to determine a scan of the thread counts in this
- * compute node.
- */
- MPI_Comm comm_intra;
-
- MPI_Comm_split(MPI_COMM_WORLD,gmx_hostname_num(),cr->rank_intranode,
- &comm_intra);
- MPI_Scan(&nthread_local,&thread_id_node,1,MPI_INT,MPI_SUM,comm_intra);
- /* MPI_Scan is inclusive, but here we need exclusive */
- thread_id_node -= nthread_local;
- /* Get the total number of threads on this physical node */
- MPI_Allreduce(&nthread_local,&nthread_node,1,MPI_INT,MPI_SUM,comm_intra);
- MPI_Comm_free(&comm_intra);
- }
- #endif
-
- offset = 0;
- if (hw_opt->core_pinning_offset > 0)
- {
- offset = hw_opt->core_pinning_offset;
- if (SIMMASTER(cr))
- {
- fprintf(stderr, "Applying core pinning offset %d\n", offset);
- }
- if (fplog)
- {
- fprintf(fplog, "Applying core pinning offset %d\n", offset);
- }
- }
-
- /* With Intel Hyper-Threading enabled, we want to pin consecutive
- * threads to physical cores when using more threads than physical
- * cores or when the user requests so.
- */
- nthread_hw_max = hwinfo->nthreads_hw_avail;
- nphyscore = -1;
- if (hw_opt->bPinHyperthreading ||
- (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
- nthread_node > nthread_hw_max/2 && getenv("GMX_DISABLE_PINHT") == NULL))
- {
- if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) != GMX_CPUID_X86_SMT_ENABLED)
- {
- /* We print to stderr on all processes, as we might have
- * different settings on different physical nodes.
- */
- if (gmx_cpuid_vendor(hwinfo->cpuid_info) != GMX_CPUID_VENDOR_INTEL)
- {
- md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
- "but non-Intel CPU detected (vendor: %s)\n",
- gmx_cpuid_vendor_string[gmx_cpuid_vendor(hwinfo->cpuid_info)]);
- }
- else
- {
- md_print_warn(NULL, fplog, "Pinning for Hyper-Threading layout requested, "
- "but the CPU detected does not have Intel Hyper-Threading support "
- "(or it is turned off)\n");
- }
- }
- nphyscore = nthread_hw_max/2;
-
- if (SIMMASTER(cr))
- {
- fprintf(stderr, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
- nphyscore);
- }
- if (fplog)
- {
- fprintf(fplog, "Pinning to Hyper-Threading cores with %d physical cores in a compute node\n",
- nphyscore);
- }
- }
-
- /* Set the per-thread affinity. In order to be able to check the success
- * of affinity settings, we will set nth_affinity_set to 1 on threads
- * where the affinity setting succeded and to 0 where it failed.
- * Reducing these 0/1 values over the threads will give the total number
- * of threads on which we succeeded.
- */
- nth_affinity_set = 0;
- #pragma omp parallel firstprivate(thread_id_node) num_threads(nthread_local) \
- reduction(+:nth_affinity_set)
- {
- int core;
- gmx_bool setaffinity_ret;
-
- thread_id = gmx_omp_get_thread_num();
- thread_id_node += thread_id;
- if (nphyscore <= 0)
- {
- core = offset + thread_id_node;
- }
- else
- {
- /* Lock pairs of threads to the same hyperthreaded core */
- core = offset + thread_id_node/2 + (thread_id_node % 2)*nphyscore;
- }
-
- setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core);
-
- /* store the per-thread success-values of the setaffinity */
- nth_affinity_set = (setaffinity_ret == 0);
-
- if (debug)
- {
- fprintf(debug, "On rank %2d, thread %2d, core %2d the affinity setting returned %d\n",
- cr->nodeid, gmx_omp_get_thread_num(), core, setaffinity_ret);
- }
- }
-
- if (nth_affinity_set > nthread_local)
- {
- char msg[STRLEN];
-
- sprintf(msg, "Looks like we have set affinity for more threads than "
- "we have (%d > %d)!\n", nth_affinity_set, nthread_local);
- gmx_incons(msg);
- }
- else
- {
- /* check & warn if some threads failed to set their affinities */
- if (nth_affinity_set != nthread_local)
- {
- char sbuf1[STRLEN], sbuf2[STRLEN];
-
- /* sbuf1 contains rank info, while sbuf2 OpenMP thread info */
- sbuf1[0] = sbuf2[0] = '\0';
- #ifdef GMX_MPI
- #ifdef GMX_THREAD_MPI
- sprintf(sbuf1, "In thread-MPI thread #%d: ", cr->nodeid);
- #else /* GMX_LIB_MPI */
- sprintf(sbuf1, "In MPI process #%d: ", cr->nodeid);
- #endif
- #endif /* GMX_MPI */
-
- if (nthread_local > 1)
- {
- sprintf(sbuf2, "of %d/%d thread%s ",
- nthread_local - nth_affinity_set, nthread_local,
- (nthread_local - nth_affinity_set) > 1 ? "s" : "");
- }
-
- md_print_warn(NULL, fplog,
- "NOTE: %sAffinity setting %sfailed.\n"
- " This can cause performance degradation!",
- sbuf1, sbuf2);
- }
- }
- }
- }
-
-
++ the main thread returns, we set thread affinity later */
++ ret=tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE,
+ mdrunner_start_fn, (void*)(mda) );
+ if (ret!=TMPI_SUCCESS)
+ return NULL;
+
+ /* make a new comm_rec to reflect the new situation */
+ crn=init_par_threads(cr);
+ return crn;
+}
+
+
+static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo,
+ const gmx_hw_opt_t *hw_opt,
+ int nthreads_tot,
+ int ngpu)
+{
+ int nthreads_tmpi;
+
+ /* There are no separate PME nodes here, as we ensured in
+ * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes
+ * and a conditional ensures we would not have ended up here.
+ * Note that separate PME nodes might be switched on later.
+ */
+ if (ngpu > 0)
+ {
+ nthreads_tmpi = ngpu;
+ if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi)
+ {
+ nthreads_tmpi = nthreads_tot;
+ }
+ }
+ else if (hw_opt->nthreads_omp > 0)
+ {
+ /* Here we could oversubscribe, when we do, we issue a warning later */
+ nthreads_tmpi = max(1,nthreads_tot/hw_opt->nthreads_omp);
+ }
+ else
+ {
+ /* TODO choose nthreads_omp based on hardware topology
+ when we have a hardware topology detection library */
+ /* In general, when running up to 4 threads, OpenMP should be faster.
+ * Note: on AMD Bulldozer we should avoid running OpenMP over two dies.
+ * On Intel>=Nehalem running OpenMP on a single CPU is always faster,
+ * even on two CPUs it's usually faster (but with many OpenMP threads
+ * it could be faster not to use HT, currently we always use HT).
+ * On Nehalem/Westmere we want to avoid running 16 threads over
+ * two CPUs with HT, so we need a limit<16; thus we use 12.
+ * A reasonable limit for Intel Sandy and Ivy bridge,
+ * not knowing the topology, is 16 threads.
+ */
+ const int nthreads_omp_always_faster = 4;
+ const int nthreads_omp_always_faster_Nehalem = 12;
+ const int nthreads_omp_always_faster_SandyBridge = 16;
+ const int first_model_Nehalem = 0x1A;
+ const int first_model_SandyBridge = 0x2A;
+ gmx_bool bIntel_Family6;
+
+ bIntel_Family6 =
+ (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL &&
+ gmx_cpuid_family(hwinfo->cpuid_info) == 6);
+
+ if (nthreads_tot <= nthreads_omp_always_faster ||
+ (bIntel_Family6 &&
+ ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) ||
+ (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge))))
+ {
+ /* Use pure OpenMP parallelization */
+ nthreads_tmpi = 1;
+ }
+ else
+ {
+ /* Don't use OpenMP parallelization */
+ nthreads_tmpi = nthreads_tot;
+ }
+ }
+
+ return nthreads_tmpi;
+}
+
+
+/* Get the number of threads to use for thread-MPI based on how many
+ * were requested, which algorithms we're using,
+ * and how many particles there are.
+ * At the point we have already called check_and_update_hw_opt.
+ * Thus all options should be internally consistent and consistent
+ * with the hardware, except that ntmpi could be larger than #GPU.
+ */
+static int get_nthreads_mpi(gmx_hw_info_t *hwinfo,
+ gmx_hw_opt_t *hw_opt,
+ t_inputrec *inputrec, gmx_mtop_t *mtop,
+ const t_commrec *cr,
+ FILE *fplog)
+{
+ int nthreads_hw,nthreads_tot_max,nthreads_tmpi,nthreads_new,ngpu;
+ int min_atoms_per_mpi_thread;
+ char *env;
+ char sbuf[STRLEN];
+ gmx_bool bCanUseGPU;
+
+ if (hw_opt->nthreads_tmpi > 0)
+ {
+ /* Trivial, return right away */
+ return hw_opt->nthreads_tmpi;
+ }
+
+ nthreads_hw = hwinfo->nthreads_hw_avail;
+
+ /* How many total (#tMPI*#OpenMP) threads can we start? */
+ if (hw_opt->nthreads_tot > 0)
+ {
+ nthreads_tot_max = hw_opt->nthreads_tot;
+ }
+ else
+ {
+ nthreads_tot_max = nthreads_hw;
+ }
+
+ bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU);
+ if (bCanUseGPU)
+ {
+ ngpu = hwinfo->gpu_info.ncuda_dev_use;
+ }
+ else
+ {
+ ngpu = 0;
+ }
+
+ nthreads_tmpi =
+ get_tmpi_omp_thread_division(hwinfo,hw_opt,nthreads_tot_max,ngpu);
+
+ if (inputrec->eI == eiNM || EI_TPI(inputrec->eI))
+ {
+ /* Steps are divided over the nodes iso splitting the atoms */
+ min_atoms_per_mpi_thread = 0;
+ }
+ else
+ {
+ if (bCanUseGPU)
+ {
+ min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU;
+ }
+ else
+ {
+ min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD;
+ }
+ }
+
+ /* Check if an algorithm does not support parallel simulation. */
+ if (nthreads_tmpi != 1 &&
+ ( inputrec->eI == eiLBFGS ||
+ inputrec->coulombtype == eelEWALD ) )
+ {
+ nthreads_tmpi = 1;
+
+ md_print_warn(cr,fplog,"The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n");
+ if (hw_opt->nthreads_tmpi > nthreads_tmpi)
+ {
+ gmx_fatal(FARGS,"You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that");
+ }
+ }
+ else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread)
+ {
+ /* the thread number was chosen automatically, but there are too many
+ threads (too few atoms per thread) */
+ nthreads_new = max(1,mtop->natoms/min_atoms_per_mpi_thread);
+
+ /* Avoid partial use of Hyper-Threading */
+ if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED &&
+ nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw)
+ {
+ nthreads_new = nthreads_hw/2;
+ }
+
+ /* Avoid large prime numbers in the thread count */
+ if (nthreads_new >= 6)
+ {
+ /* Use only 6,8,10 with additional factors of 2 */
+ int fac;
+
+ fac = 2;
+ while (3*fac*2 <= nthreads_new)
+ {
+ fac *= 2;
+ }
+
+ nthreads_new = (nthreads_new/fac)*fac;
+ }
+ else
+ {
+ /* Avoid 5 */
+ if (nthreads_new == 5)
+ {
+ nthreads_new = 4;
+ }
+ }
+
+ nthreads_tmpi = nthreads_new;
+
+ fprintf(stderr,"\n");
+ fprintf(stderr,"NOTE: Parallelization is limited by the small number of atoms,\n");
+ fprintf(stderr," only starting %d thread-MPI threads.\n",nthreads_tmpi);
+ fprintf(stderr," You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n");
+ }
+
+ return nthreads_tmpi;
+}
+#endif /* GMX_THREAD_MPI */
+
+
+/* Environment variable for setting nstlist */
+static const char* NSTLIST_ENVVAR = "GMX_NSTLIST";
+/* Try to increase nstlist when using a GPU with nstlist less than this */
+static const int NSTLIST_GPU_ENOUGH = 20;
+/* Increase nstlist until the non-bonded cost increases more than this factor */
+static const float NBNXN_GPU_LIST_OK_FAC = 1.25;
+/* Don't increase nstlist beyond a non-bonded cost increases of this factor */
+static const float NBNXN_GPU_LIST_MAX_FAC = 1.40;
+
+/* Try to increase nstlist when running on a GPU */
+static void increase_nstlist(FILE *fp,t_commrec *cr,
+ t_inputrec *ir,const gmx_mtop_t *mtop,matrix box)
+{
+ char *env;
+ int nstlist_orig,nstlist_prev;
+ verletbuf_list_setup_t ls;
+ real rlist_inc,rlist_ok,rlist_max,rlist_new,rlist_prev;
+ int i;
+ t_state state_tmp;
+ gmx_bool bBox,bDD,bCont;
+ const char *nstl_fmt="\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n";
+ const char *vbd_err="Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used";
+ const char *box_err="Can not increase nstlist for GPU run because the box is too small";
+ const char *dd_err ="Can not increase nstlist for GPU run because of domain decomposition limitations";
+ char buf[STRLEN];
+
+ /* Number of + nstlist alternative values to try when switching */
+ const int nstl[]={ 20, 25, 40, 50 };
+#define NNSTL sizeof(nstl)/sizeof(nstl[0])
+
+ env = getenv(NSTLIST_ENVVAR);
+ if (env == NULL)
+ {
+ if (fp != NULL)
+ {
+ fprintf(fp,nstl_fmt,ir->nstlist);
+ }
+ }
+
+ if (ir->verletbuf_drift == 0)
+ {
+ gmx_fatal(FARGS,"You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp");
+ }
+
+ if (ir->verletbuf_drift < 0)
+ {
+ if (MASTER(cr))
+ {
+ fprintf(stderr,"%s\n",vbd_err);
+ }
+ if (fp != NULL)
+ {
+ fprintf(fp,"%s\n",vbd_err);
+ }
+
+ return;
+ }
+
+ nstlist_orig = ir->nstlist;
+ if (env != NULL)
+ {
+ sprintf(buf,"Getting nstlist from environment variable GMX_NSTLIST=%s",env);
+ if (MASTER(cr))
+ {
+ fprintf(stderr,"%s\n",buf);
+ }
+ if (fp != NULL)
+ {
+ fprintf(fp,"%s\n",buf);
+ }
+ sscanf(env,"%d",&ir->nstlist);
+ }
+
+ verletbuf_get_list_setup(TRUE,&ls);
+
+ /* Allow rlist to make the list double the size of the cut-off sphere */
+ rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE,mtop->natoms/det(box));
+ rlist_ok = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC,1.0/3.0) - rlist_inc;
+ rlist_max = (max(ir->rvdw,ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC,1.0/3.0) - rlist_inc;
+ if (debug)
+ {
+ fprintf(debug,"GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n",
+ rlist_inc,rlist_max);
+ }
+
+ i = 0;
+ nstlist_prev = nstlist_orig;
+ rlist_prev = ir->rlist;
+ do
+ {
+ if (env == NULL)
+ {
+ ir->nstlist = nstl[i];
+ }
+
+ /* Set the pair-list buffer size in ir */
+ calc_verlet_buffer_size(mtop,det(box),ir,ir->verletbuf_drift,&ls,
+ NULL,&rlist_new);
+
+ /* Does rlist fit in the box? */
+ bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC,box));
+ bDD = TRUE;
+ if (bBox && DOMAINDECOMP(cr))
+ {
+ /* Check if rlist fits in the domain decomposition */
+ if (inputrec2nboundeddim(ir) < DIM)
+ {
+ gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet");
+ }
+ copy_mat(box,state_tmp.box);
+ bDD = change_dd_cutoff(cr,&state_tmp,ir,rlist_new);
+ }
+
+ bCont = FALSE;
+
+ if (env == NULL)
+ {
+ if (bBox && bDD && rlist_new <= rlist_max)
+ {
+ /* Increase nstlist */
+ nstlist_prev = ir->nstlist;
+ rlist_prev = rlist_new;
+ bCont = (i+1 < NNSTL && rlist_new < rlist_ok);
+ }
+ else
+ {
+ /* Stick with the previous nstlist */
+ ir->nstlist = nstlist_prev;
+ rlist_new = rlist_prev;
+ bBox = TRUE;
+ bDD = TRUE;
+ }
+ }
+
+ i++;
+ }
+ while (bCont);
+
+ if (!bBox || !bDD)
+ {
+ gmx_warning(!bBox ? box_err : dd_err);
+ if (fp != NULL)
+ {
+ fprintf(fp,"\n%s\n",bBox ? box_err : dd_err);
+ }
+ ir->nstlist = nstlist_orig;
+ }
+ else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist)
+ {
+ sprintf(buf,"Changing nstlist from %d to %d, rlist from %g to %g",
+ nstlist_orig,ir->nstlist,
+ ir->rlist,rlist_new);
+ if (MASTER(cr))
+ {
+ fprintf(stderr,"%s\n\n",buf);
+ }
+ if (fp != NULL)
+ {
+ fprintf(fp,"%s\n\n",buf);
+ }
+ ir->rlist = rlist_new;
+ ir->rlistlong = rlist_new;
+ }
+}
+
+static void prepare_verlet_scheme(FILE *fplog,
+ gmx_hw_info_t *hwinfo,
+ t_commrec *cr,
+ gmx_hw_opt_t *hw_opt,
+ const char *nbpu_opt,
+ t_inputrec *ir,
+ const gmx_mtop_t *mtop,
+ matrix box,
+ gmx_bool *bUseGPU)
+{
+ /* Here we only check for GPU usage on the MPI master process,
+ * as here we don't know how many GPUs we will use yet.
+ * We check for a GPU on all processes later.
+ */
+ *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL);
+
+ if (ir->verletbuf_drift > 0)
+ {
+ /* Update the Verlet buffer size for the current run setup */
+ verletbuf_list_setup_t ls;
+ real rlist_new;
+
+ /* Here we assume CPU acceleration is on. But as currently
+ * calc_verlet_buffer_size gives the same results for 4x8 and 4x4
+ * and 4x2 gives a larger buffer than 4x4, this is ok.
+ */
+ verletbuf_get_list_setup(*bUseGPU,&ls);
+
+ calc_verlet_buffer_size(mtop,det(box),ir,
+ ir->verletbuf_drift,&ls,
+ NULL,&rlist_new);
+ if (rlist_new != ir->rlist)
+ {
+ if (fplog != NULL)
+ {
+ fprintf(fplog,"\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n",
+ ir->rlist,rlist_new,
+ ls.cluster_size_i,ls.cluster_size_j);
+ }
+ ir->rlist = rlist_new;
+ ir->rlistlong = rlist_new;
+ }
+ }
+
+ /* With GPU or emulation we should check nstlist for performance */
+ if ((EI_DYNAMICS(ir->eI) &&
+ *bUseGPU &&
+ ir->nstlist < NSTLIST_GPU_ENOUGH) ||
+ getenv(NSTLIST_ENVVAR) != NULL)
+ {
+ /* Choose a better nstlist */
+ increase_nstlist(fplog,cr,ir,mtop,box);
+ }
+}
+
+static void convert_to_verlet_scheme(FILE *fplog,
+ t_inputrec *ir,
+ gmx_mtop_t *mtop,real box_vol)
+{
+ char *conv_mesg="Converting input file with group cut-off scheme to the Verlet cut-off scheme";
+
+ md_print_warn(NULL,fplog,"%s\n",conv_mesg);
+
+ ir->cutoff_scheme = ecutsVERLET;
+ ir->verletbuf_drift = 0.005;
+
+ if (ir->rcoulomb != ir->rvdw)
+ {
+ gmx_fatal(FARGS,"The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs");
+ }
+
+ if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype))
+ {
+ gmx_fatal(FARGS,"User non-bonded potentials are not (yet) supported with the Verlet scheme");
+ }
+ else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype))
+ {
+ md_print_warn(NULL,fplog,"Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials");
+
+ if (EVDW_SWITCHED(ir->vdwtype))
+ {
+ ir->vdwtype = evdwCUT;
+ }
+ if (EEL_SWITCHED(ir->coulombtype))
+ {
+ if (EEL_FULL(ir->coulombtype))
+ {
+ /* With full electrostatic only PME can be switched */
+ ir->coulombtype = eelPME;
+ }
+ else
+ {
+ md_print_warn(NULL,fplog,"NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n",eel_names[ir->coulombtype]);
+ ir->coulombtype = eelRF;
+ ir->epsilon_rf = 0.0;
+ }
+ }
+
+ /* We set the target energy drift to a small number.
+ * Note that this is only for testing. For production the user
+ * should think about this and set the mdp options.
+ */
+ ir->verletbuf_drift = 1e-4;
+ }
+
+ if (inputrec2nboundeddim(ir) != 3)
+ {
+ gmx_fatal(FARGS,"Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc");
+ }
+
+ if (ir->efep != efepNO || ir->implicit_solvent != eisNO)
+ {
+ gmx_fatal(FARGS,"Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent");
+ }
+
+ if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO))
+ {
+ verletbuf_list_setup_t ls;
+
+ verletbuf_get_list_setup(FALSE,&ls);
+ calc_verlet_buffer_size(mtop,box_vol,ir,ir->verletbuf_drift,&ls,
+ NULL,&ir->rlist);
+ }
+ else
+ {
+ ir->verletbuf_drift = -1;
+ ir->rlist = 1.05*max(ir->rvdw,ir->rcoulomb);
+ }
+
+ gmx_mtop_remove_chargegroups(mtop);
+}
+
- check_cpu_affinity_set(fplog,
- NULL,
- hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt,
+ int cutoff_scheme,
+ gmx_bool bIsSimMaster)
+{
+ gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster);
+
+#ifndef GMX_THREAD_MPI
+ if (hw_opt->nthreads_tot > 0)
+ {
+ gmx_fatal(FARGS,"Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
+ }
+ if (hw_opt->nthreads_tmpi > 0)
+ {
+ gmx_fatal(FARGS,"Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI");
+ }
+#endif
+
+ if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0)
+ {
+ /* We have the same number of OpenMP threads for PP and PME processes,
+ * thus we can perform several consistency checks.
+ */
+ if (hw_opt->nthreads_tmpi > 0 &&
+ hw_opt->nthreads_omp > 0 &&
+ hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp)
+ {
+ gmx_fatal(FARGS,"The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested",
+ hw_opt->nthreads_tot,hw_opt->nthreads_tmpi,hw_opt->nthreads_omp);
+ }
+
+ if (hw_opt->nthreads_tmpi > 0 &&
+ hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0)
+ {
+ gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)",
+ hw_opt->nthreads_tot,hw_opt->nthreads_tmpi);
+ }
+
+ if (hw_opt->nthreads_omp > 0 &&
+ hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0)
+ {
+ gmx_fatal(FARGS,"The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)",
+ hw_opt->nthreads_tot,hw_opt->nthreads_omp);
+ }
+
+ if (hw_opt->nthreads_tmpi > 0 &&
+ hw_opt->nthreads_omp <= 0)
+ {
+ hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
+ }
+ }
+
+#ifndef GMX_OPENMP
+ if (hw_opt->nthreads_omp > 1)
+ {
+ gmx_fatal(FARGS,"OpenMP threads are requested, but Gromacs was compiled without OpenMP support");
+ }
+#endif
+
+ if (cutoff_scheme == ecutsGROUP)
+ {
+ /* We only have OpenMP support for PME only nodes */
+ if (hw_opt->nthreads_omp > 1)
+ {
+ gmx_fatal(FARGS,"OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s",
+ ecutscheme_names[cutoff_scheme],
+ ecutscheme_names[ecutsVERLET]);
+ }
+ hw_opt->nthreads_omp = 1;
+ }
+
+ if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0)
+ {
+ gmx_fatal(FARGS,"You need to specify -ntomp in addition to -ntomp_pme");
+ }
+
+ if (hw_opt->nthreads_tot == 1)
+ {
+ hw_opt->nthreads_tmpi = 1;
+
+ if (hw_opt->nthreads_omp > 1)
+ {
+ gmx_fatal(FARGS,"You requested %d OpenMP threads with %d total threads",
+ hw_opt->nthreads_tmpi,hw_opt->nthreads_tot);
+ }
+ hw_opt->nthreads_omp = 1;
+ }
+
+ if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0)
+ {
+ hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp;
+ }
+
+ if (debug)
+ {
+ fprintf(debug,"hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n",
+ hw_opt->nthreads_tot,
+ hw_opt->nthreads_tmpi,
+ hw_opt->nthreads_omp,
+ hw_opt->nthreads_omp_pme,
+ hw_opt->gpu_id!=NULL ? hw_opt->gpu_id : "");
+
+ }
+}
+
+
+/* Override the value in inputrec with value passed on the command line (if any) */
+static void override_nsteps_cmdline(FILE *fplog,
+ int nsteps_cmdline,
+ t_inputrec *ir,
+ const t_commrec *cr)
+{
+ assert(ir);
+ assert(cr);
+
+ /* override with anything else than the default -2 */
+ if (nsteps_cmdline > -2)
+ {
+ char stmp[STRLEN];
+
+ ir->nsteps = nsteps_cmdline;
+ if (EI_DYNAMICS(ir->eI))
+ {
+ sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps, %.3f ps",
+ nsteps_cmdline, nsteps_cmdline*ir->delta_t);
+ }
+ else
+ {
+ sprintf(stmp, "Overriding nsteps with value passed on the command line: %d steps",
+ nsteps_cmdline);
+ }
+
+ md_print_warn(cr, fplog, "%s\n", stmp);
+ }
+}
+
+/* Data structure set by SIMMASTER which needs to be passed to all nodes
+ * before the other nodes have read the tpx file and called gmx_detect_hardware.
+ */
+typedef struct {
+ int cutoff_scheme; /* The cutoff scheme from inputrec_t */
+ gmx_bool bUseGPU; /* Use GPU or GPU emulation */
+} master_inf_t;
+
+int mdrunner(gmx_hw_opt_t *hw_opt,
+ FILE *fplog,t_commrec *cr,int nfile,
+ const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
+ gmx_bool bCompact, int nstglobalcomm,
+ ivec ddxyz,int dd_node_order,real rdd,real rconstr,
+ const char *dddlb_opt,real dlb_scale,
+ const char *ddcsx,const char *ddcsy,const char *ddcsz,
+ const char *nbpu_opt,
+ int nsteps_cmdline, int nstepout,int resetstep,
+ int nmultisim,int repl_ex_nst,int repl_ex_nex,
+ int repl_ex_seed, real pforce,real cpt_period,real max_hours,
+ const char *deviceOptions, unsigned long Flags)
+{
+ gmx_bool bForceUseGPU,bTryUseGPU;
+ double nodetime=0,realtime;
+ t_inputrec *inputrec;
+ t_state *state=NULL;
+ matrix box;
+ gmx_ddbox_t ddbox={0};
+ int npme_major,npme_minor;
+ real tmpr1,tmpr2;
+ t_nrnb *nrnb;
+ gmx_mtop_t *mtop=NULL;
+ t_mdatoms *mdatoms=NULL;
+ t_forcerec *fr=NULL;
+ t_fcdata *fcd=NULL;
+ real ewaldcoeff=0;
+ gmx_pme_t *pmedata=NULL;
+ gmx_vsite_t *vsite=NULL;
+ gmx_constr_t constr;
+ int i,m,nChargePerturbed=-1,status,nalloc;
+ char *gro;
+ gmx_wallcycle_t wcycle;
+ gmx_bool bReadRNG,bReadEkin;
+ int list;
+ gmx_runtime_t runtime;
+ int rc;
+ gmx_large_int_t reset_counters;
+ gmx_edsam_t ed=NULL;
+ t_commrec *cr_old=cr;
+ int nthreads_pme=1;
+ int nthreads_pp=1;
+ gmx_membed_t membed=NULL;
+ gmx_hw_info_t *hwinfo=NULL;
+ master_inf_t minf={-1,FALSE};
+
+ /* CAUTION: threads may be started later on in this function, so
+ cr doesn't reflect the final parallel state right now */
+ snew(inputrec,1);
+ snew(mtop,1);
+
+ if (Flags & MD_APPENDFILES)
+ {
+ fplog = NULL;
+ }
+
+ bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
+ bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
+
+ snew(state,1);
+ if (SIMMASTER(cr))
+ {
+ /* Read (nearly) all data required for the simulation */
+ read_tpx_state(ftp2fn(efTPX,nfile,fnm),inputrec,state,NULL,mtop);
+
+ if (inputrec->cutoff_scheme != ecutsVERLET &&
+ ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL))
+ {
+ convert_to_verlet_scheme(fplog,inputrec,mtop,det(state->box));
+ }
+
+ /* Detect hardware, gather information. With tMPI only thread 0 does it
+ * and after threads are started broadcasts hwinfo around. */
+ snew(hwinfo, 1);
+ gmx_detect_hardware(fplog, hwinfo, cr,
+ bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
+
+ minf.cutoff_scheme = inputrec->cutoff_scheme;
+ minf.bUseGPU = FALSE;
+
+ if (inputrec->cutoff_scheme == ecutsVERLET)
+ {
+ prepare_verlet_scheme(fplog,hwinfo,cr,hw_opt,nbpu_opt,
+ inputrec,mtop,state->box,
+ &minf.bUseGPU);
+ }
+ else if (hwinfo->bCanUseGPU)
+ {
+ md_print_warn(cr,fplog,
+ "NOTE: GPU(s) found, but the current simulation can not use GPUs\n"
+ " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n"
+ " (for quick performance testing you can use the -testverlet option)\n");
+
+ if (bForceUseGPU)
+ {
+ gmx_fatal(FARGS,"GPU requested, but can't be used without cutoff-scheme=Verlet");
+ }
+ }
+ }
+#ifndef GMX_THREAD_MPI
+ if (PAR(cr))
+ {
+ gmx_bcast_sim(sizeof(minf),&minf,cr);
+ }
+#endif
+ if (minf.bUseGPU && cr->npmenodes == -1)
+ {
+ /* Don't automatically use PME-only nodes with GPUs */
+ cr->npmenodes = 0;
+ }
+
+ /* Check for externally set OpenMP affinity and turn off internal
+ * pinning if any is found. We need to do this check early to tell
+ * thread-MPI whether it should do pinning when spawning threads.
++ * TODO: the above no longer holds, we should move these checks down
+ */
+ gmx_omp_check_thread_affinity(fplog, cr, hw_opt);
+
+#ifdef GMX_THREAD_MPI
+ /* With thread-MPI inputrec is only set here on the master thread */
+ if (SIMMASTER(cr))
+#endif
+ {
+ check_and_update_hw_opt(hw_opt,minf.cutoff_scheme,SIMMASTER(cr));
+
+#ifdef GMX_THREAD_MPI
+ /* Early check for externally set process affinity. Can't do over all
+ * MPI processes because hwinfo is not available everywhere, but with
+ * thread-MPI it's needed as pinning might get turned off which needs
+ * to be known before starting thread-MPI. */
- check_cpu_affinity_set(fplog, cr,
- hw_opt, hwinfo->nthreads_hw_avail, FALSE);
++ gmx_check_thread_affinity_set(fplog,
++ NULL,
++ hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+#endif
+
+#ifdef GMX_THREAD_MPI
+ if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0)
+ {
+ gmx_fatal(FARGS,"You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes");
+ }
+#endif
+
+ if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp &&
+ cr->npmenodes <= 0)
+ {
+ gmx_fatal(FARGS,"You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes");
+ }
+ }
+
+#ifdef GMX_THREAD_MPI
+ if (SIMMASTER(cr))
+ {
+ /* NOW the threads will be started: */
+ hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo,
+ hw_opt,
+ inputrec, mtop,
+ cr, fplog);
+ if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0)
+ {
+ hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi;
+ }
+
+ if (hw_opt->nthreads_tmpi > 1)
+ {
+ /* now start the threads. */
+ cr=mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm,
+ oenv, bVerbose, bCompact, nstglobalcomm,
+ ddxyz, dd_node_order, rdd, rconstr,
+ dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz,
+ nbpu_opt,
+ nsteps_cmdline, nstepout, resetstep, nmultisim,
+ repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce,
+ cpt_period, max_hours, deviceOptions,
+ Flags);
+ /* the main thread continues here with a new cr. We don't deallocate
+ the old cr because other threads may still be reading it. */
+ if (cr == NULL)
+ {
+ gmx_comm("Failed to spawn threads");
+ }
+ }
+ }
+#endif
+ /* END OF CAUTION: cr is now reliable */
+
+ /* g_membed initialisation *
+ * Because we change the mtop, init_membed is called before the init_parallel *
+ * (in case we ever want to make it run in parallel) */
+ if (opt2bSet("-membed",nfile,fnm))
+ {
+ if (MASTER(cr))
+ {
+ fprintf(stderr,"Initializing membed");
+ }
+ membed = init_membed(fplog,nfile,fnm,mtop,inputrec,state,cr,&cpt_period);
+ }
+
+ if (PAR(cr))
+ {
+ /* now broadcast everything to the non-master nodes/threads: */
+ init_parallel(fplog, cr, inputrec, mtop);
+
+ /* This check needs to happen after get_nthreads_mpi() */
+ if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC))
+ {
+ gmx_fatal_collective(FARGS,cr,NULL,
+ "The Verlet cut-off scheme is not supported with particle decomposition.\n"
+ "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads.");
+ }
+ }
+ if (fplog != NULL)
+ {
+ pr_inputrec(fplog,0,"Input Parameters",inputrec,FALSE);
+ }
+
+#if defined GMX_THREAD_MPI
+ /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer
+ * to the other threads -- slightly uncool, but works fine, just need to
+ * make sure that the data doesn't get freed twice. */
+ if (cr->nnodes > 1)
+ {
+ if (!SIMMASTER(cr))
+ {
+ snew(hwinfo, 1);
+ }
+ gmx_bcast(sizeof(&hwinfo), &hwinfo, cr);
+ }
+#else
+ if (PAR(cr) && !SIMMASTER(cr))
+ {
+ /* now we have inputrec on all nodes, can run the detection */
+ /* TODO: perhaps it's better to propagate within a node instead? */
+ snew(hwinfo, 1);
+ gmx_detect_hardware(fplog, hwinfo, cr,
+ bForceUseGPU, bTryUseGPU, hw_opt->gpu_id);
+ }
+
+ /* Now do the affinity check with MPI/no-MPI (done earlier with thread-MPI). */
- /* Before setting affinity, check whether the affinity has changed
- * - which indicates that probably the OpenMP library has changed it since
- * we first checked). */
- check_cpu_affinity_set(fplog, cr, hw_opt, hwinfo->nthreads_hw_avail, TRUE);
++ gmx_check_thread_affinity_set(fplog, cr,
++ hw_opt, hwinfo->nthreads_hw_avail, FALSE);
+#endif
+
+ /* now make sure the state is initialized and propagated */
+ set_state_entries(state,inputrec,cr->nnodes);
+
+ /* A parallel command line option consistency check that we can
+ only do after any threads have started. */
+ if (!PAR(cr) &&
+ (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0))
+ {
+ gmx_fatal(FARGS,
+ "The -dd or -npme option request a parallel simulation, "
+#ifndef GMX_MPI
+ "but %s was compiled without threads or MPI enabled"
+#else
+#ifdef GMX_THREAD_MPI
+ "but the number of threads (option -nt) is 1"
+#else
+ "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec"
+#endif
+#endif
+ , ShortProgram()
+ );
+ }
+
+ if ((Flags & MD_RERUN) &&
+ (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI))
+ {
+ gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun");
+ }
+
+ if (can_use_allvsall(inputrec,mtop,TRUE,cr,fplog) && PAR(cr))
+ {
+ /* All-vs-all loops do not work with domain decomposition */
+ Flags |= MD_PARTDEC;
+ }
+
+ if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC))
+ {
+ if (cr->npmenodes > 0)
+ {
+ if (!EEL_PME(inputrec->coulombtype))
+ {
+ gmx_fatal_collective(FARGS,cr,NULL,
+ "PME nodes are requested, but the system does not use PME electrostatics");
+ }
+ if (Flags & MD_PARTDEC)
+ {
+ gmx_fatal_collective(FARGS,cr,NULL,
+ "PME nodes are requested, but particle decomposition does not support separate PME nodes");
+ }
+ }
+
+ cr->npmenodes = 0;
+ }
+
+#ifdef GMX_FAHCORE
+ fcRegisterSteps(inputrec->nsteps,inputrec->init_step);
+#endif
+
+ /* NMR restraints must be initialized before load_checkpoint,
+ * since with time averaging the history is added to t_state.
+ * For proper consistency check we therefore need to extend
+ * t_state here.
+ * So the PME-only nodes (if present) will also initialize
+ * the distance restraints.
+ */
+ snew(fcd,1);
+
+ /* This needs to be called before read_checkpoint to extend the state */
+ init_disres(fplog,mtop,inputrec,cr,Flags & MD_PARTDEC,fcd,state, repl_ex_nst > 0);
+
+ if (gmx_mtop_ftype_count(mtop,F_ORIRES) > 0)
+ {
+ if (PAR(cr) && !(Flags & MD_PARTDEC))
+ {
+ gmx_fatal(FARGS,"Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)");
+ }
+ /* Orientation restraints */
+ if (MASTER(cr))
+ {
+ init_orires(fplog,mtop,state->x,inputrec,cr->ms,&(fcd->orires),
+ state);
+ }
+ }
+
+ if (DEFORM(*inputrec))
+ {
+ /* Store the deform reference box before reading the checkpoint */
+ if (SIMMASTER(cr))
+ {
+ copy_mat(state->box,box);
+ }
+ if (PAR(cr))
+ {
+ gmx_bcast(sizeof(box),box,cr);
+ }
+ /* Because we do not have the update struct available yet
+ * in which the reference values should be stored,
+ * we store them temporarily in static variables.
+ * This should be thread safe, since they are only written once
+ * and with identical values.
+ */
+#ifdef GMX_THREAD_MPI
+ tMPI_Thread_mutex_lock(&deform_init_box_mutex);
+#endif
+ deform_init_init_step_tpx = inputrec->init_step;
+ copy_mat(box,deform_init_box_tpx);
+#ifdef GMX_THREAD_MPI
+ tMPI_Thread_mutex_unlock(&deform_init_box_mutex);
+#endif
+ }
+
+ if (opt2bSet("-cpi",nfile,fnm))
+ {
+ /* Check if checkpoint file exists before doing continuation.
+ * This way we can use identical input options for the first and subsequent runs...
+ */
+ if( gmx_fexist_master(opt2fn_master("-cpi",nfile,fnm,cr),cr) )
+ {
+ load_checkpoint(opt2fn_master("-cpi",nfile,fnm,cr),&fplog,
+ cr,Flags & MD_PARTDEC,ddxyz,
+ inputrec,state,&bReadRNG,&bReadEkin,
+ (Flags & MD_APPENDFILES),
+ (Flags & MD_APPENDFILESSET));
+
+ if (bReadRNG)
+ {
+ Flags |= MD_READ_RNG;
+ }
+ if (bReadEkin)
+ {
+ Flags |= MD_READ_EKIN;
+ }
+ }
+ }
+
+ if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES))
+#ifdef GMX_THREAD_MPI
+ /* With thread MPI only the master node/thread exists in mdrun.c,
+ * therefore non-master nodes need to open the "seppot" log file here.
+ */
+ || (!MASTER(cr) && (Flags & MD_SEPPOT))
+#endif
+ )
+ {
+ gmx_log_open(ftp2fn(efLOG,nfile,fnm),cr,!(Flags & MD_SEPPOT),
+ Flags,&fplog);
+ }
+
+ /* override nsteps with value from cmdline */
+ override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr);
+
+ if (SIMMASTER(cr))
+ {
+ copy_mat(state->box,box);
+ }
+
+ if (PAR(cr))
+ {
+ gmx_bcast(sizeof(box),box,cr);
+ }
+
+ /* Essential dynamics */
+ if (opt2bSet("-ei",nfile,fnm))
+ {
+ /* Open input and output files, allocate space for ED data structure */
+ ed = ed_open(mtop->natoms,&state->edsamstate,nfile,fnm,Flags,oenv,cr);
+ }
+
+ if (PAR(cr) && !((Flags & MD_PARTDEC) ||
+ EI_TPI(inputrec->eI) ||
+ inputrec->eI == eiNM))
+ {
+ cr->dd = init_domain_decomposition(fplog,cr,Flags,ddxyz,rdd,rconstr,
+ dddlb_opt,dlb_scale,
+ ddcsx,ddcsy,ddcsz,
+ mtop,inputrec,
+ box,state->x,
+ &ddbox,&npme_major,&npme_minor);
+
+ make_dd_communicators(fplog,cr,dd_node_order);
+
+ /* Set overallocation to avoid frequent reallocation of arrays */
+ set_over_alloc_dd(TRUE);
+ }
+ else
+ {
+ /* PME, if used, is done on all nodes with 1D decomposition */
+ cr->npmenodes = 0;
+ cr->duty = (DUTY_PP | DUTY_PME);
+ npme_major = 1;
+ npme_minor = 1;
+ if (!EI_TPI(inputrec->eI))
+ {
+ npme_major = cr->nnodes;
+ }
+
+ if (inputrec->ePBC == epbcSCREW)
+ {
+ gmx_fatal(FARGS,
+ "pbc=%s is only implemented with domain decomposition",
+ epbc_names[inputrec->ePBC]);
+ }
+ }
+
+ if (PAR(cr))
+ {
+ /* After possible communicator splitting in make_dd_communicators.
+ * we can set up the intra/inter node communication.
+ */
+ gmx_setup_nodecomm(fplog,cr);
+ }
+
+ /* Initialize per-physical-node MPI process/thread ID and counters. */
+ gmx_init_intranode_counters(cr);
+
+#ifdef GMX_MPI
+ md_print_info(cr,fplog,"Using %d MPI %s\n",
+ cr->nnodes,
+#ifdef GMX_THREAD_MPI
+ cr->nnodes==1 ? "thread" : "threads"
+#else
+ cr->nnodes==1 ? "process" : "processes"
+#endif
+ );
+ fflush(stderr);
+#endif
+
+ gmx_omp_nthreads_init(fplog, cr,
+ hwinfo->nthreads_hw_avail,
+ hw_opt->nthreads_omp,
+ hw_opt->nthreads_omp_pme,
+ (cr->duty & DUTY_PP) == 0,
+ inputrec->cutoff_scheme == ecutsVERLET);
+
+ gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU);
+
+ /* getting number of PP/PME threads
+ PME: env variable should be read only on one node to make sure it is
+ identical everywhere;
+ */
+ /* TODO nthreads_pp is only used for pinning threads.
+ * This is a temporary solution until we have a hw topology library.
+ */
+ nthreads_pp = gmx_omp_nthreads_get(emntNonbonded);
+ nthreads_pme = gmx_omp_nthreads_get(emntPME);
+
+ wcycle = wallcycle_init(fplog,resetstep,cr,nthreads_pp,nthreads_pme);
+
+ if (PAR(cr))
+ {
+ /* Master synchronizes its value of reset_counters with all nodes
+ * including PME only nodes */
+ reset_counters = wcycle_get_reset_counters(wcycle);
+ gmx_bcast_sim(sizeof(reset_counters),&reset_counters,cr);
+ wcycle_set_reset_counters(wcycle, reset_counters);
+ }
+
+ snew(nrnb,1);
+ if (cr->duty & DUTY_PP)
+ {
+ /* For domain decomposition we allocate dynamically
+ * in dd_partition_system.
+ */
+ if (DOMAINDECOMP(cr))
+ {
+ bcast_state_setup(cr,state);
+ }
+ else
+ {
+ if (PAR(cr))
+ {
+ bcast_state(cr,state,TRUE);
+ }
+ }
+
+ /* Initiate forcerecord */
+ fr = mk_forcerec();
+ fr->hwinfo = hwinfo;
+ init_forcerec(fplog,oenv,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ opt2fn("-table",nfile,fnm),
+ opt2fn("-tabletf",nfile,fnm),
+ opt2fn("-tablep",nfile,fnm),
+ opt2fn("-tableb",nfile,fnm),
+ nbpu_opt,
+ FALSE,pforce);
+
+ /* version for PCA_NOT_READ_NODE (see md.c) */
+ /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE,
+ "nofile","nofile","nofile","nofile",FALSE,pforce);
+ */
+ fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT);
+
+ /* Initialize QM-MM */
+ if(fr->bQMMM)
+ {
+ init_QMMMrec(cr,box,mtop,inputrec,fr);
+ }
+
+ /* Initialize the mdatoms structure.
+ * mdatoms is not filled with atom data,
+ * as this can not be done now with domain decomposition.
+ */
+ mdatoms = init_mdatoms(fplog,mtop,inputrec->efep!=efepNO);
+
+ /* Initialize the virtual site communication */
+ vsite = init_vsite(mtop,cr,FALSE);
+
+ calc_shifts(box,fr->shift_vec);
+
+ /* With periodic molecules the charge groups should be whole at start up
+ * and the virtual sites should not be far from their proper positions.
+ */
+ if (!inputrec->bContinuation && MASTER(cr) &&
+ !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols))
+ {
+ /* Make molecules whole at start of run */
+ if (fr->ePBC != epbcNONE)
+ {
+ do_pbc_first_mtop(fplog,inputrec->ePBC,box,mtop,state->x);
+ }
+ if (vsite)
+ {
+ /* Correct initial vsite positions are required
+ * for the initial distribution in the domain decomposition
+ * and for the initial shell prediction.
+ */
+ construct_vsites_mtop(fplog,vsite,mtop,state->x);
+ }
+ }
+
+ if (EEL_PME(fr->eeltype))
+ {
+ ewaldcoeff = fr->ewaldcoeff;
+ pmedata = &fr->pmedata;
+ }
+ else
+ {
+ pmedata = NULL;
+ }
+ }
+ else
+ {
+ /* This is a PME only node */
+
+ /* We don't need the state */
+ done_state(state);
+
+ ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol);
+ snew(pmedata,1);
+ }
+
- /* Set the CPU affinity */
- set_cpu_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
++ if (hw_opt->thread_affinity != threadaffOFF)
++ {
++ /* Before setting affinity, check whether the affinity has changed
++ * - which indicates that probably the OpenMP library has changed it
++ * since we first checked).
++ */
++ gmx_check_thread_affinity_set(fplog, cr,
++ hw_opt, hwinfo->nthreads_hw_avail, TRUE);
+
++ /* Set the CPU affinity */
++ gmx_set_thread_affinity(fplog,cr,hw_opt,nthreads_pme,hwinfo,inputrec);
++ }
+
+ /* Initiate PME if necessary,
+ * either on all nodes or on dedicated PME nodes only. */
+ if (EEL_PME(inputrec->coulombtype))
+ {
+ if (mdatoms)
+ {
+ nChargePerturbed = mdatoms->nChargePerturbed;
+ }
+ if (cr->npmenodes > 0)
+ {
+ /* The PME only nodes need to know nChargePerturbed */
+ gmx_bcast_sim(sizeof(nChargePerturbed),&nChargePerturbed,cr);
+ }
+
+ if (cr->duty & DUTY_PME)
+ {
+ status = gmx_pme_init(pmedata,cr,npme_major,npme_minor,inputrec,
+ mtop ? mtop->natoms : 0,nChargePerturbed,
+ (Flags & MD_REPRODUCIBLE),nthreads_pme);
+ if (status != 0)
+ {
+ gmx_fatal(FARGS,"Error %d initializing PME",status);
+ }
+ }
+ }
+
+
+ if (integrator[inputrec->eI].func == do_md)
+ {
+ /* Turn on signal handling on all nodes */
+ /*
+ * (A user signal from the PME nodes (if any)
+ * is communicated to the PP nodes.
+ */
+ signal_handler_install();
+ }
+
+ if (cr->duty & DUTY_PP)
+ {
+ if (inputrec->ePull != epullNO)
+ {
+ /* Initialize pull code */
+ init_pull(fplog,inputrec,nfile,fnm,mtop,cr,oenv, inputrec->fepvals->init_lambda,
+ EI_DYNAMICS(inputrec->eI) && MASTER(cr),Flags);
+ }
+
+ if (inputrec->bRot)
+ {
+ /* Initialize enforced rotation code */
+ init_rot(fplog,inputrec,nfile,fnm,cr,state->x,box,mtop,oenv,
+ bVerbose,Flags);
+ }
+
+ constr = init_constraints(fplog,mtop,inputrec,ed,state,cr);
+
+ if (DOMAINDECOMP(cr))
+ {
+ dd_init_bondeds(fplog,cr->dd,mtop,vsite,constr,inputrec,
+ Flags & MD_DDBONDCHECK,fr->cginfo_mb);
+
+ set_dd_parameters(fplog,cr->dd,dlb_scale,inputrec,fr,&ddbox);
+
+ setup_dd_grid(fplog,cr->dd);
+ }
+
+ /* Now do whatever the user wants us to do (how flexible...) */
+ integrator[inputrec->eI].func(fplog,cr,nfile,fnm,
+ oenv,bVerbose,bCompact,
+ nstglobalcomm,
+ vsite,constr,
+ nstepout,inputrec,mtop,
+ fcd,state,
+ mdatoms,nrnb,wcycle,ed,fr,
+ repl_ex_nst,repl_ex_nex,repl_ex_seed,
+ membed,
+ cpt_period,max_hours,
+ deviceOptions,
+ Flags,
+ &runtime);
+
+ if (inputrec->ePull != epullNO)
+ {
+ finish_pull(fplog,inputrec->pull);
+ }
+
+ if (inputrec->bRot)
+ {
+ finish_rot(fplog,inputrec->rot);
+ }
+
+ }
+ else
+ {
+ /* do PME only */
+ gmx_pmeonly(*pmedata,cr,nrnb,wcycle,ewaldcoeff,FALSE,inputrec);
+ }
+
+ if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI))
+ {
+ /* Some timing stats */
+ if (SIMMASTER(cr))
+ {
+ if (runtime.proc == 0)
+ {
+ runtime.proc = runtime.real;
+ }
+ }
+ else
+ {
+ runtime.real = 0;
+ }
+ }
+
+ wallcycle_stop(wcycle,ewcRUN);
+
+ /* Finish up, write some stuff
+ * if rerunMD, don't write last frame again
+ */
+ finish_run(fplog,cr,ftp2fn(efSTO,nfile,fnm),
+ inputrec,nrnb,wcycle,&runtime,
+ fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
+ nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
+ nthreads_pp,
+ EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
+
+ if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU)
+ {
+ char gpu_err_str[STRLEN];
+
+ /* free GPU memory and uninitialize GPU (by destroying the context) */
+ nbnxn_cuda_free(fplog, fr->nbv->cu_nbv);
+
+ if (!free_gpu(gpu_err_str))
+ {
+ gmx_warning("On node %d failed to free GPU #%d: %s",
+ cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
+ }
+ }
+
+ if (opt2bSet("-membed",nfile,fnm))
+ {
+ sfree(membed);
+ }
+
+#ifdef GMX_THREAD_MPI
+ if (PAR(cr) && SIMMASTER(cr))
+#endif
+ {
+ gmx_hardware_info_free(hwinfo);
+ }
+
+ /* Does what it says */
+ print_date_and_time(fplog,cr->nodeid,"Finished mdrun",&runtime);
+
+ /* Close logfile already here if we were appending to it */
+ if (MASTER(cr) && (Flags & MD_APPENDFILES))
+ {
+ gmx_log_close(fplog);
+ }
+
+ rc=(int)gmx_get_stop_condition();
+
+#ifdef GMX_THREAD_MPI
+ /* we need to join all threads. The sub-threads join when they
+ exit this function, but the master thread needs to be told to
+ wait for that. */
+ if (PAR(cr) && MASTER(cr))
+ {
+ tMPI_Finalize();
+ }
+#endif
+
+ return rc;
+}
gmx_bool gaussj(real **a, int n, real **b, int m)
{
- int *indxc, *indxr, *ipiv;
- int i, icol = 0, irow = 0, j, k, l, ll;
- real big, dum, pivinv;
-
- indxc = ivector(1, n);
- indxr = ivector(1, n);
- ipiv = ivector(1, n);
- for (j = 1; j <= n; j++)
- {
- ipiv[j] = 0;
- }
- for (i = 1; i <= n; i++)
- {
- big = 0.0;
- for (j = 1; j <= n; j++)
- {
- if (ipiv[j] != 1)
- {
- for (k = 1; k <= n; k++)
- {
- if (ipiv[k] == 0)
- {
- if (fabs(a[j][k]) >= big)
- {
- big = fabs(a[j][k]);
- irow = j;
- icol = k;
- }
- }
- else if (ipiv[k] > 1)
- {
- nrerror("GAUSSJ: Singular Matrix-1", FALSE);
- return FALSE;
- }
- }
- }
- }
- ++(ipiv[icol]);
- if (irow != icol)
- {
- for (l = 1; l <= n; l++)
- {
- SWAP(a[irow][l], a[icol][l]);
- }
- for (l = 1; l <= m; l++)
- {
- SWAP(b[irow][l], b[icol][l]);
- }
- }
- indxr[i] = irow;
- indxc[i] = icol;
- if (a[icol][icol] == 0.0)
- {
- fprintf(stderr, "irow = %d, icol = %d\n", irow, icol);
- dump_mat(n, a);
- nrerror("GAUSSJ: Singular Matrix-2", FALSE);
- return FALSE;
- }
- pivinv = 1.0/a[icol][icol];
- a[icol][icol] = 1.0;
- for (l = 1; l <= n; l++)
- {
- a[icol][l] *= pivinv;
- }
- for (l = 1; l <= m; l++)
- {
- b[icol][l] *= pivinv;
- }
- for (ll = 1; ll <= n; ll++)
- {
- if (ll != icol)
- {
- dum = a[ll][icol];
- a[ll][icol] = 0.0;
- for (l = 1; l <= n; l++)
- {
- a[ll][l] -= a[icol][l]*dum;
- }
- for (l = 1; l <= m; l++)
- {
- b[ll][l] -= b[icol][l]*dum;
- }
- }
- }
- }
- for (l = n; l >= 1; l--)
- {
- if (indxr[l] != indxc[l])
- {
- for (k = 1; k <= n; k++)
- {
- SWAP(a[k][indxr[l]], a[k][indxc[l]]);
- }
- }
- }
- free_ivector(ipiv, 1);
- free_ivector(indxr, 1);
- free_ivector(indxc, 1);
-
- return TRUE;
+ int *indxc,*indxr,*ipiv;
+ int i,icol=0,irow=0,j,k,l,ll;
+ real big,dum,pivinv;
+
+ indxc=ivector(1,n);
+ indxr=ivector(1,n);
+ ipiv=ivector(1,n);
+ for (j=1;j<=n;j++) ipiv[j]=0;
+ for (i=1;i<=n;i++) {
+ big=0.0;
+ for (j=1;j<=n;j++)
+ if (ipiv[j] != 1)
+ for (k=1;k<=n;k++) {
+ if (ipiv[k] == 0) {
+ if (fabs(a[j][k]) >= big) {
+ big=fabs(a[j][k]);
+ irow=j;
+ icol=k;
+ }
+ } else if (ipiv[k] > 1) {
+ nrerror("GAUSSJ: Singular Matrix-1", FALSE);
+ return FALSE;
+ }
+ }
+ ++(ipiv[icol]);
+ if (irow != icol) {
- for (l=1;l<=n;l++) SWAP(a[irow][l],a[icol][l])
- for (l=1;l<=m;l++) SWAP(b[irow][l],b[icol][l])
++ for (l=1;l<=n;l++) SWAP(a[irow][l],a[icol][l]);
++ for (l=1;l<=m;l++) SWAP(b[irow][l],b[icol][l]);
+ }
+ indxr[i]=irow;
+ indxc[i]=icol;
+ if (a[icol][icol] == 0.0) {
+ fprintf(stderr,"irow = %d, icol = %d\n",irow,icol);
+ dump_mat(n,a);
+ nrerror("GAUSSJ: Singular Matrix-2", FALSE);
+ return FALSE;
+ }
+ pivinv=1.0/a[icol][icol];
+ a[icol][icol]=1.0;
+ for (l=1;l<=n;l++) a[icol][l] *= pivinv;
+ for (l=1;l<=m;l++) b[icol][l] *= pivinv;
+ for (ll=1;ll<=n;ll++)
+ if (ll != icol) {
+ dum=a[ll][icol];
+ a[ll][icol]=0.0;
+ for (l=1;l<=n;l++) a[ll][l] -= a[icol][l]*dum;
+ for (l=1;l<=m;l++) b[ll][l] -= b[icol][l]*dum;
+ }
+ }
+ for (l=n;l>=1;l--) {
+ if (indxr[l] != indxc[l])
+ for (k=1;k<=n;k++)
+ SWAP(a[k][indxr[l]],a[k][indxc[l]]);
+ }
+ free_ivector(ipiv,1);
+ free_ivector(indxr,1);
+ free_ivector(indxc,1);
+
+ return TRUE;
}
#undef SWAP
static void covsrt(real **covar, int ma, int lista[], int mfit)
{
- int i, j;
- real swap;
-
- for (j = 1; j < ma; j++)
- {
- for (i = j+1; i <= ma; i++)
- {
- covar[i][j] = 0.0;
- }
- }
- for (i = 1; i < mfit; i++)
- {
- for (j = i+1; j <= mfit; j++)
- {
- if (lista[j] > lista[i])
- {
- covar[lista[j]][lista[i]] = covar[i][j];
- }
- else
- {
- covar[lista[i]][lista[j]] = covar[i][j];
- }
- }
- }
- swap = covar[1][1];
- for (j = 1; j <= ma; j++)
- {
- covar[1][j] = covar[j][j];
- covar[j][j] = 0.0;
- }
- covar[lista[1]][lista[1]] = swap;
- for (j = 2; j <= mfit; j++)
- {
- covar[lista[j]][lista[j]] = covar[1][j];
- }
- for (j = 2; j <= ma; j++)
- {
- for (i = 1; i <= j-1; i++)
- {
- covar[i][j] = covar[j][i];
- }
- }
+ int i,j;
+ real swap;
+
+ for (j=1;j<ma;j++)
+ for (i=j+1;i<=ma;i++) covar[i][j]=0.0;
+ for (i=1;i<mfit;i++)
+ for (j=i+1;j<=mfit;j++) {
+ if (lista[j] > lista[i])
+ covar[lista[j]][lista[i]]=covar[i][j];
+ else
+ covar[lista[i]][lista[j]]=covar[i][j];
+ }
+ swap=covar[1][1];
+ for (j=1;j<=ma;j++) {
+ covar[1][j]=covar[j][j];
+ covar[j][j]=0.0;
+ }
+ covar[lista[1]][lista[1]]=swap;
+ for (j=2;j<=mfit;j++) covar[lista[j]][lista[j]]=covar[1][j];
+ for (j=2;j<=ma;j++)
+ for (i=1;i<=j-1;i++) covar[i][j]=covar[j][i];
}
-#define SWAP(a, b) {swap = (a); (a) = (b); (b) = swap; }
+#define SWAP(a,b) {swap=(a);(a)=(b);(b)=swap;}
-static void covsrt_new(real **covar, int ma, int ia[], int mfit)
-/* Expand in storage the covariance matrix covar, so as to take
- * into account parameters that are being held fixed. (For the
- * latter, return zero covariances.)
- */
+static void covsrt_new(real **covar,int ma, int ia[], int mfit)
+ /* Expand in storage the covariance matrix covar, so as to take
+ * into account parameters that are being held fixed. (For the
+ * latter, return zero covariances.)
+ */
{
- int i, j, k;
- real swap;
- for (i = mfit+1; i <= ma; i++)
- {
- for (j = 1; j <= i; j++)
- {
- covar[i][j] = covar[j][i] = 0.0;
- }
- }
- k = mfit;
- for (j = ma; j >= 1; j--)
- {
- if (ia[j])
- {
- for (i = 1; i <= ma; i++)
- {
- SWAP(covar[i][k], covar[i][j]);
- }
- for (i = 1; i <= ma; i++)
- {
- SWAP(covar[k][i], covar[j][i]);
- }
- k--;
- }
- }
+ int i,j,k;
+ real swap;
+ for (i=mfit+1;i<=ma;i++)
+ for (j=1;j<=i;j++) covar[i][j]=covar[j][i]=0.0;
+ k=mfit;
+ for (j=ma;j>=1;j--) {
+ if (ia[j]) {
- for (i=1;i<=ma;i++) SWAP(covar[i][k],covar[i][j])
- for (i=1;i<=ma;i++) SWAP(covar[k][i],covar[j][i])
++ for (i=1;i<=ma;i++) SWAP(covar[i][k],covar[i][j]);
++ for (i=1;i<=ma;i++) SWAP(covar[k][i],covar[j][i]);
+ k--;
+ }
+ }
}
#undef SWAP
-
-static void mrqcof(real x[], real y[], real sig[], int ndata, real a[],
- int ma, int lista[], int mfit,
- real **alpha, real beta[], real *chisq,
- void (*funcs)(real, real *, real *, real *))
+
+static void mrqcof(real x[], real y[], real sig[], int ndata, real a[],
+ int ma, int lista[], int mfit,
+ real **alpha, real beta[], real *chisq,
+ void (*funcs)(real,real *,real *,real *))
{
- int k, j, i;
- real ymod, wt, sig2i, dy, *dyda;
-
- dyda = rvector(1, ma);
- for (j = 1; j <= mfit; j++)
- {
- for (k = 1; k <= j; k++)
- {
- alpha[j][k] = 0.0;
- }
- beta[j] = 0.0;
- }
- *chisq = 0.0;
- for (i = 1; i <= ndata; i++)
- {
- (*funcs)(x[i], a, &ymod, dyda);
- sig2i = 1.0/(sig[i]*sig[i]);
- dy = y[i]-ymod;
- for (j = 1; j <= mfit; j++)
- {
- wt = dyda[lista[j]]*sig2i;
- for (k = 1; k <= j; k++)
- {
- alpha[j][k] += wt*dyda[lista[k]];
- }
- beta[j] += dy*wt;
- }
- (*chisq) += dy*dy*sig2i;
- }
- for (j = 2; j <= mfit; j++)
- {
- for (k = 1; k <= j-1; k++)
- {
- alpha[k][j] = alpha[j][k];
- }
- }
- free_vector(dyda, 1);
+ int k,j,i;
+ real ymod,wt,sig2i,dy,*dyda;
+
+ dyda=rvector(1,ma);
+ for (j=1;j<=mfit;j++) {
+ for (k=1;k<=j;k++) alpha[j][k]=0.0;
+ beta[j]=0.0;
+ }
+ *chisq=0.0;
+ for (i=1;i<=ndata;i++) {
+ (*funcs)(x[i],a,&ymod,dyda);
+ sig2i=1.0/(sig[i]*sig[i]);
+ dy=y[i]-ymod;
+ for (j=1;j<=mfit;j++) {
+ wt=dyda[lista[j]]*sig2i;
+ for (k=1;k<=j;k++)
+ alpha[j][k] += wt*dyda[lista[k]];
+ beta[j] += dy*wt;
+ }
+ (*chisq) += dy*dy*sig2i;
+ }
+ for (j=2;j<=mfit;j++)
+ for (k=1;k<=j-1;k++) alpha[k][j]=alpha[j][k];
+ free_vector(dyda,1);
}
-
-gmx_bool mrqmin(real x[], real y[], real sig[], int ndata, real a[],
- int ma, int lista[], int mfit,
- real **covar, real **alpha, real *chisq,
- void (*funcs)(real, real *, real *, real *),
- real *alamda)
+
+gmx_bool mrqmin(real x[], real y[], real sig[], int ndata, real a[],
+ int ma, int lista[], int mfit,
+ real **covar, real **alpha, real *chisq,
+ void (*funcs)(real,real *,real *,real *),
+ real *alamda)
{
- int k, kk, j, ihit;
- static real *da, *atry, **oneda, *beta, ochisq;
-
- if (*alamda < 0.0)
- {
- oneda = matrix1(1, mfit, 1, 1);
- atry = rvector(1, ma);
- da = rvector(1, ma);
- beta = rvector(1, ma);
- kk = mfit+1;
- for (j = 1; j <= ma; j++)
- {
- ihit = 0;
- for (k = 1; k <= mfit; k++)
- {
- if (lista[k] == j)
- {
- ihit++;
- }
- }
- if (ihit == 0)
- {
- lista[kk++] = j;
- }
- else if (ihit > 1)
- {
- nrerror("Bad LISTA permutation in MRQMIN-1", FALSE);
- return FALSE;
- }
- }
- if (kk != ma+1)
- {
- nrerror("Bad LISTA permutation in MRQMIN-2", FALSE);
- return FALSE;
- }
- *alamda = 0.001;
- mrqcof(x, y, sig, ndata, a, ma, lista, mfit, alpha, beta, chisq, funcs);
- ochisq = (*chisq);
- }
- for (j = 1; j <= mfit; j++)
- {
- for (k = 1; k <= mfit; k++)
- {
- covar[j][k] = alpha[j][k];
- }
- covar[j][j] = alpha[j][j]*(1.0+(*alamda));
- oneda[j][1] = beta[j];
- }
- if (!gaussj(covar, mfit, oneda, 1))
- {
- return FALSE;
- }
- for (j = 1; j <= mfit; j++)
- {
- da[j] = oneda[j][1];
- }
- if (*alamda == 0.0)
- {
- covsrt(covar, ma, lista, mfit);
- free_vector(beta, 1);
- free_vector(da, 1);
- free_vector(atry, 1);
- free_matrix(oneda, 1, mfit, 1);
- return TRUE;
- }
- for (j = 1; j <= ma; j++)
- {
- atry[j] = a[j];
- }
- for (j = 1; j <= mfit; j++)
- {
- atry[lista[j]] = a[lista[j]]+da[j];
- }
- mrqcof(x, y, sig, ndata, atry, ma, lista, mfit, covar, da, chisq, funcs);
- if (*chisq < ochisq)
- {
- *alamda *= 0.1;
- ochisq = (*chisq);
- for (j = 1; j <= mfit; j++)
- {
- for (k = 1; k <= mfit; k++)
- {
- alpha[j][k] = covar[j][k];
- }
- beta[j] = da[j];
- a[lista[j]] = atry[lista[j]];
- }
- }
- else
- {
- *alamda *= 10.0;
- *chisq = ochisq;
- }
+ int k,kk,j,ihit;
+ static real *da,*atry,**oneda,*beta,ochisq;
+
+ if (*alamda < 0.0) {
+ oneda=matrix1(1,mfit,1,1);
+ atry=rvector(1,ma);
+ da=rvector(1,ma);
+ beta=rvector(1,ma);
+ kk=mfit+1;
+ for (j=1;j<=ma;j++) {
+ ihit=0;
+ for (k=1;k<=mfit;k++)
+ if (lista[k] == j) ihit++;
+ if (ihit == 0)
+ lista[kk++]=j;
+ else if (ihit > 1) {
+ nrerror("Bad LISTA permutation in MRQMIN-1", FALSE);
+ return FALSE;
+ }
+ }
+ if (kk != ma+1) {
+ nrerror("Bad LISTA permutation in MRQMIN-2", FALSE);
+ return FALSE;
+ }
+ *alamda=0.001;
+ mrqcof(x,y,sig,ndata,a,ma,lista,mfit,alpha,beta,chisq,funcs);
+ ochisq=(*chisq);
+ }
+ for (j=1;j<=mfit;j++) {
+ for (k=1;k<=mfit;k++) covar[j][k]=alpha[j][k];
+ covar[j][j]=alpha[j][j]*(1.0+(*alamda));
+ oneda[j][1]=beta[j];
+ }
+ if (!gaussj(covar,mfit,oneda,1))
+ return FALSE;
+ for (j=1;j<=mfit;j++)
+ da[j]=oneda[j][1];
+ if (*alamda == 0.0) {
+ covsrt(covar,ma,lista,mfit);
+ free_vector(beta,1);
+ free_vector(da,1);
+ free_vector(atry,1);
+ free_matrix(oneda,1,mfit,1);
return TRUE;
+ }
+ for (j=1;j<=ma;j++) atry[j]=a[j];
+ for (j=1;j<=mfit;j++)
+ atry[lista[j]] = a[lista[j]]+da[j];
+ mrqcof(x,y,sig,ndata,atry,ma,lista,mfit,covar,da,chisq,funcs);
+ if (*chisq < ochisq) {
+ *alamda *= 0.1;
+ ochisq=(*chisq);
+ for (j=1;j<=mfit;j++) {
+ for (k=1;k<=mfit;k++) alpha[j][k]=covar[j][k];
+ beta[j]=da[j];
+ a[lista[j]]=atry[lista[j]];
+ }
+ } else {
+ *alamda *= 10.0;
+ *chisq=ochisq;
+ }
+ return TRUE;
}