From: Roland Schulz Date: Thu, 27 Jun 2013 03:34:35 +0000 (-0400) Subject: Merge branch 'release-4-6' into master X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=2398ab23cc07aad54eb1dc601c93d5ef5f221ad1;p=alexxy%2Fgromacs.git Merge branch 'release-4-6' into master Mostly easy. selhelp.cpp got the fix from selhelp.c Omitted the content of 34a402e7 (clang-AMD-FMA work-around) in favour of reworking it for C++ in a child patch Conflicts: CMakeLists.txt - kept master version number! src/gmxlib/selection/selhelp.c - deleted src/gromacs/gmxana/pp2shift.h - deleted src/gromacs/legacyheaders/pull_rotation.h Change-Id: Ibf0c9af136e39dfcbef0a85eb7d314740706cb60 --- 2398ab23cc07aad54eb1dc601c93d5ef5f221ad1 diff --cc src/gromacs/gmxana/dlist.c index ef3c45f515,0000000000..d2e7373be5 mode 100644,000000..100644 --- a/src/gromacs/gmxana/dlist.c +++ b/src/gromacs/gmxana/dlist.c @@@ -1,439 -1,0 +1,441 @@@ +/* + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Green Red Orange Magenta Azure Cyan Skyblue + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "string2.h" +#include "smalloc.h" +#include "gstat.h" +#include "gmx_fatal.h" +#include "index.h" + +t_dlist *mk_dlist(FILE *log, + t_atoms *atoms, int *nlist, + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bHChi, + int maxchi, int r0, gmx_residuetype_t rt) +{ + int ires, i, j, k, ii; + t_dihatms atm, prev; + int nl = 0, nc[edMax]; + char *thisres; + t_dlist *dl; + + snew(dl, atoms->nres+1); - prev.C = prev.O = -1; ++ prev.C = prev.Cn[1] = -1; /* Keep the compiler quiet */ + for (i = 0; (i < edMax); i++) + { + nc[i] = 0; + } + ires = -1; + i = 0; + while (i < atoms->nr) + { + ires = atoms->atom[i].resind; + + /* Initiate all atom numbers to -1 */ - atm.minC = atm.H = atm.N = atm.C = atm.O = atm.minO = -1; ++ atm.minC = atm.H = atm.N = atm.C = atm.O = atm.minCalpha = -1; + for (j = 0; (j < MAXCHI+3); j++) + { + atm.Cn[j] = -1; + } + + /* Look for atoms in this residue */ + /* maybe should allow for chis to hydrogens? */ + while ((i < atoms->nr) && (atoms->atom[i].resind == ires)) + { + if ((strcmp(*(atoms->atomname[i]), "H") == 0) || - (strcmp(*(atoms->atomname[i]), "H1") == 0) ) ++ (strcmp(*(atoms->atomname[i]), "H1") == 0) || ++ (strcmp(*(atoms->atomname[i]), "HN") == 0) ) + { + atm.H = i; + } + else if (strcmp(*(atoms->atomname[i]), "N") == 0) + { + atm.N = i; + } + else if (strcmp(*(atoms->atomname[i]), "C") == 0) + { + atm.C = i; + } + else if ((strcmp(*(atoms->atomname[i]), "O") == 0) || + (strcmp(*(atoms->atomname[i]), "O1") == 0)) + { + atm.O = i; + } + else if (strcmp(*(atoms->atomname[i]), "CA") == 0) + { + atm.Cn[1] = i; + } + else if (strcmp(*(atoms->atomname[i]), "CB") == 0) + { + atm.Cn[2] = i; + } + else if ((strcmp(*(atoms->atomname[i]), "CG") == 0) || + (strcmp(*(atoms->atomname[i]), "CG1") == 0) || + (strcmp(*(atoms->atomname[i]), "OG") == 0) || + (strcmp(*(atoms->atomname[i]), "OG1") == 0) || + (strcmp(*(atoms->atomname[i]), "SG") == 0)) + { + atm.Cn[3] = i; + } + else if ((strcmp(*(atoms->atomname[i]), "CD") == 0) || + (strcmp(*(atoms->atomname[i]), "CD1") == 0) || + (strcmp(*(atoms->atomname[i]), "SD") == 0) || + (strcmp(*(atoms->atomname[i]), "OD1") == 0) || + (strcmp(*(atoms->atomname[i]), "ND1") == 0)) + { + atm.Cn[4] = i; + } + /* by grs - split the Cn[4] into 2 bits to check allowing dih to H */ + else if (bHChi && ((strcmp(*(atoms->atomname[i]), "HG") == 0) || + (strcmp(*(atoms->atomname[i]), "HG1") == 0)) ) + { + atm.Cn[4] = i; + } + else if ((strcmp(*(atoms->atomname[i]), "CE") == 0) || + (strcmp(*(atoms->atomname[i]), "CE1") == 0) || + (strcmp(*(atoms->atomname[i]), "OE1") == 0) || + (strcmp(*(atoms->atomname[i]), "NE") == 0)) + { + atm.Cn[5] = i; + } + else if ((strcmp(*(atoms->atomname[i]), "CZ") == 0) || + (strcmp(*(atoms->atomname[i]), "NZ") == 0)) + { + atm.Cn[6] = i; + } + /* HChi flag here too */ + else if (bHChi && (strcmp(*(atoms->atomname[i]), "NH1") == 0)) + { + atm.Cn[7] = i; + } + i++; + } + + thisres = *(atoms->resinfo[ires].name); + + /* added by grs - special case for aromatics, whose chis above 2 are + not real and produce rubbish output - so set back to -1 */ + if (strcmp(thisres, "PHE") == 0 || + strcmp(thisres, "TYR") == 0 || + strcmp(thisres, "PTR") == 0 || + strcmp(thisres, "TRP") == 0 || + strcmp(thisres, "HIS") == 0 || + strcmp(thisres, "HISA") == 0 || + strcmp(thisres, "HISB") == 0) + { + for (ii = 5; ii <= 7; ii++) + { + atm.Cn[ii] = -1; + } + } + /* end fixing aromatics */ + + /* Special case for Pro, has no H */ + if (strcmp(thisres, "PRO") == 0) + { + atm.H = atm.Cn[4]; + } + /* Carbon from previous residue */ + if (prev.C != -1) + { + atm.minC = prev.C; + } - if (prev.O != -1) ++ /* Alpha-carbon from previous residue */ ++ if (prev.Cn[1] != -1) + { - atm.minO = prev.O; ++ atm.minCalpha = prev.Cn[1]; + } + prev = atm; + + /* Check how many dihedrals we have */ + if ((atm.N != -1) && (atm.Cn[1] != -1) && (atm.C != -1) && + (atm.O != -1) && ((atm.H != -1) || (atm.minC != -1))) + { + dl[nl].resnr = ires+1; + dl[nl].atm = atm; + dl[nl].atm.Cn[0] = atm.N; + if ((atm.Cn[3] != -1) && (atm.Cn[2] != -1) && (atm.Cn[1] != -1)) + { + nc[0]++; + if (atm.Cn[4] != -1) + { + nc[1]++; + if (atm.Cn[5] != -1) + { + nc[2]++; + if (atm.Cn[6] != -1) + { + nc[3]++; + if (atm.Cn[7] != -1) + { + nc[4]++; + if (atm.Cn[8] != -1) + { + nc[5]++; + } + } + } + } + } + } - if ((atm.minC != -1) && (atm.minO != -1)) ++ if ((atm.minC != -1) && (atm.minCalpha != -1)) + { + nc[6]++; + } + dl[nl].index = gmx_residuetype_get_index(rt, thisres); + + sprintf(dl[nl].name, "%s%d", thisres, ires+r0); + nl++; + } + else if (debug) + { + fprintf(debug, "Could not find N atom but could find other atoms" + " in residue %s%d\n", thisres, ires+r0); + } + } + fprintf(stderr, "\n"); + fprintf(log, "\n"); + fprintf(log, "There are %d residues with dihedrals\n", nl); + j = 0; + if (bPhi) + { + j += nl; + } + if (bPsi) + { + j += nl; + } + if (bChi) + { + for (i = 0; (i < maxchi); i++) + { + j += nc[i]; + } + } + fprintf(log, "There are %d dihedrals\n", j); + fprintf(log, "Dihedral: "); + if (bPhi) + { + fprintf(log, " Phi "); + } + if (bPsi) + { + fprintf(log, " Psi "); + } + if (bChi) + { + for (i = 0; (i < maxchi); i++) + { + fprintf(log, "Chi%d ", i+1); + } + } + fprintf(log, "\nNumber: "); + if (bPhi) + { + fprintf(log, "%4d ", nl); + } + if (bPsi) + { + fprintf(log, "%4d ", nl); + } + if (bChi) + { + for (i = 0; (i < maxchi); i++) + { + fprintf(log, "%4d ", nc[i]); + } + } + fprintf(log, "\n"); + + *nlist = nl; + + return dl; +} + +gmx_bool has_dihedral(int Dih, t_dlist *dl) +{ + gmx_bool b = FALSE; + int ddd; + + switch (Dih) + { + case edPhi: + b = ((dl->atm.H != -1) && (dl->atm.N != -1) && (dl->atm.Cn[1] != -1) && (dl->atm.C != -1)); + break; + case edPsi: + b = ((dl->atm.N != -1) && (dl->atm.Cn[1] != -1) && (dl->atm.C != -1) && (dl->atm.O != -1)); + break; + case edOmega: - b = ((dl->atm.minO != -1) && (dl->atm.minC != -1) && (dl->atm.N != -1) && (dl->atm.Cn[1] != -1)); ++ b = ((dl->atm.minCalpha != -1) && (dl->atm.minC != -1) && (dl->atm.N != -1) && (dl->atm.Cn[1] != -1)); + break; + case edChi1: + case edChi2: + case edChi3: + case edChi4: + case edChi5: + case edChi6: + ddd = Dih - edChi1; + b = ((dl->atm.Cn[ddd] != -1) && (dl->atm.Cn[ddd+1] != -1) && + (dl->atm.Cn[ddd+2] != -1) && (dl->atm.Cn[ddd+3] != -1)); + break; + default: + pr_dlist(stdout, 1, dl, 1, 0, TRUE, TRUE, TRUE, TRUE, MAXCHI); + gmx_fatal(FARGS, "Non existant dihedral %d in file %s, line %d", + Dih, __FILE__, __LINE__); + } + return b; +} + +static void pr_one_ro(FILE *fp, t_dlist *dl, int nDih, real dt) +{ + int k; + for (k = 0; k < NROT; k++) + { + fprintf(fp, " %6.2f", dl->rot_occ[nDih][k]); + } + fprintf(fp, "\n"); +} + +static void pr_ntr_s2(FILE *fp, t_dlist *dl, int nDih, real dt) +{ + fprintf(fp, " %6.2f %6.2f\n", (dt == 0) ? 0 : dl->ntr[nDih]/dt, dl->S2[nDih]); +} + +void pr_dlist(FILE *fp, int nl, t_dlist dl[], real dt, int printtype, + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega, int maxchi) +{ + int i, Xi; + + void (*pr_props)(FILE *, t_dlist *, int, real); + + /* Analysis of dihedral transitions etc */ + + if (printtype == edPrintST) + { + pr_props = pr_ntr_s2; + fprintf(stderr, "Now printing out transitions and OPs...\n"); + } + else + { + pr_props = pr_one_ro; + fprintf(stderr, "Now printing out rotamer occupancies...\n"); + fprintf(fp, "\nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n\n"); + } + + /* change atom numbers from 0 based to 1 based */ + for (i = 0; (i < nl); i++) + { + fprintf(fp, "Residue %s\n", dl[i].name); + if (printtype == edPrintST) + { + fprintf(fp, " Angle [ AI, AJ, AK, AL] #tr/ns S^2D \n" + "--------------------------------------------\n"); + } + else + { + fprintf(fp, " Angle [ AI, AJ, AK, AL] rotamers 0 g(-) t g(+)\n" + "--------------------------------------------\n"); + } + if (bPhi) + { + fprintf(fp, " Phi [%5d,%5d,%5d,%5d]", + (dl[i].atm.H == -1) ? 1+dl[i].atm.minC : 1+dl[i].atm.H, + 1+dl[i].atm.N, 1+dl[i].atm.Cn[1], 1+dl[i].atm.C); + pr_props(fp, &dl[i], edPhi, dt); + } + if (bPsi) + { + fprintf(fp, " Psi [%5d,%5d,%5d,%5d]", 1+dl[i].atm.N, 1+dl[i].atm.Cn[1], + 1+dl[i].atm.C, 1+dl[i].atm.O); + pr_props(fp, &dl[i], edPsi, dt); + } + if (bOmega && has_dihedral(edOmega, &(dl[i]))) + { - fprintf(fp, " Omega [%5d,%5d,%5d,%5d]", 1+dl[i].atm.minO, 1+dl[i].atm.minC, ++ fprintf(fp, " Omega [%5d,%5d,%5d,%5d]", 1+dl[i].atm.minCalpha, 1+dl[i].atm.minC, + 1+dl[i].atm.N, 1+dl[i].atm.Cn[1]); + pr_props(fp, &dl[i], edOmega, dt); + } + for (Xi = 0; Xi < MAXCHI; Xi++) + { + if (bChi && (Xi < maxchi) && (dl[i].atm.Cn[Xi+3] != -1) ) + { + fprintf(fp, " Chi%d[%5d,%5d,%5d,%5d]", Xi+1, 1+dl[i].atm.Cn[Xi], + 1+dl[i].atm.Cn[Xi+1], 1+dl[i].atm.Cn[Xi+2], + 1+dl[i].atm.Cn[Xi+3]); + pr_props(fp, &dl[i], Xi+edChi1, dt); /* Xi+2 was wrong here */ + } + } + fprintf(fp, "\n"); + } +} + + + +int pr_trans(FILE *fp, int nl, t_dlist dl[], real dt, int Xi) +{ + /* never called at the moment */ + + int i, nn, nz; + + nz = 0; + fprintf(fp, "\\begin{table}[h]\n"); + fprintf(fp, "\\caption{Number of dihedral transitions per nanosecond}\n"); + fprintf(fp, "\\begin{tabular}{|l|l|}\n"); + fprintf(fp, "\\hline\n"); + fprintf(fp, "Residue\t&$\\chi_%d$\t\\\\\n", Xi+1); + for (i = 0; (i < nl); i++) + { + nn = dl[i].ntr[Xi]/dt; + + if (nn == 0) + { + fprintf(fp, "%s\t&\\HL{%d}\t\\\\\n", dl[i].name, nn); + nz++; + } + else if (nn > 0) + { + fprintf(fp, "%s\t&\\%d\t\\\\\n", dl[i].name, nn); + } + } + fprintf(fp, "\\hline\n"); + fprintf(fp, "\\end{tabular}\n"); + fprintf(fp, "\\end{table}\n\n"); + + return nz; +} diff --cc src/gromacs/gmxana/gmx_chi.c index d3fcb8b6d8,0000000000..0c37cd8e5b mode 100644,000000..100644 --- a/src/gromacs/gmxana/gmx_chi.c +++ b/src/gromacs/gmxana/gmx_chi.c @@@ -1,1582 -1,0 +1,1582 @@@ +/* + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Green Red Orange Magenta Azure Cyan Skyblue + */ +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include + +#include "confio.h" +#include "pdbio.h" +#include "copyrite.h" +#include "gmx_fatal.h" +#include "futil.h" +#include "gstat.h" +#include "macros.h" +#include "maths.h" +#include "physics.h" +#include "index.h" +#include "smalloc.h" +#include "statutil.h" +#include "tpxio.h" +#include +#include "sysstuff.h" +#include "txtdump.h" +#include "typedefs.h" +#include "vec.h" +#include "strdb.h" +#include "xvgr.h" +#include "matio.h" +#include "gmx_ana.h" + +static gmx_bool bAllowed(real phi, real psi) +{ + static const char *map[] = { + "1100000000000000001111111000000000001111111111111111111111111", + "1100000000000000001111110000000000011111111111111111111111111", + "1100000000000000001111110000000000011111111111111111111111111", + "1100000000000000001111100000000000111111111111111111111111111", + "1100000000000000001111100000000000111111111111111111111111111", + "1100000000000000001111100000000001111111111111111111111111111", + "1100000000000000001111100000000001111111111111111111111111111", + "1100000000000000001111100000000011111111111111111111111111111", + "1110000000000000001111110000000111111111111111111111111111111", + "1110000000000000001111110000001111111111111111111111111111111", + "1110000000000000001111111000011111111111111111111111111111111", + "1110000000000000001111111100111111111111111111111111111111111", + "1110000000000000001111111111111111111111111111111111111111111", + "1110000000000000001111111111111111111111111111111111111111111", + "1110000000000000001111111111111111111111111111111111111111111", + "1110000000000000001111111111111111111111111111111111111111111", + "1110000000000000001111111111111110011111111111111111111111111", + "1110000000000000001111111111111100000111111111111111111111111", + "1110000000000000001111111111111000000000001111111111111111111", + "1100000000000000001111111111110000000000000011111111111111111", + "1100000000000000001111111111100000000000000011111111111111111", + "1000000000000000001111111111000000000000000001111111111111110", + "0000000000000000001111111110000000000000000000111111111111100", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000111111111111000000000000000", + "1100000000000000000000000000000001111111111111100000000000111", + "1100000000000000000000000000000001111111111111110000000000111", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000" + }; +#define NPP asize(map) + int x, y; + +#define INDEX(ppp) ((((int) (360+ppp*RAD2DEG)) % 360)/6) + x = INDEX(phi); + y = INDEX(psi); +#undef INDEX + return (gmx_bool) map[x][y]; +} + +atom_id *make_chi_ind(int nl, t_dlist dl[], int *ndih) +{ + atom_id *id; + int i, Xi, n; + + /* There are nl residues with max edMax dihedrals with 4 atoms each */ + snew(id, nl*edMax*4); + + n = 0; + for (i = 0; (i < nl); i++) + { + /* Phi, fake the first one */ + dl[i].j0[edPhi] = n/4; + if (dl[i].atm.minC >= 0) + { + id[n++] = dl[i].atm.minC; + } + else + { + id[n++] = dl[i].atm.H; + } + id[n++] = dl[i].atm.N; + id[n++] = dl[i].atm.Cn[1]; + id[n++] = dl[i].atm.C; + } + for (i = 0; (i < nl); i++) + { + /* Psi, fake the last one */ + dl[i].j0[edPsi] = n/4; + id[n++] = dl[i].atm.N; + id[n++] = dl[i].atm.Cn[1]; + id[n++] = dl[i].atm.C; + if (i < (nl-1) ) + { + id[n++] = dl[i+1].atm.N; + } + else + { + id[n++] = dl[i].atm.O; + } + } - for (i = 0; (i < nl); i++) ++ for (i = 1; (i < nl); i++) + { + /* Omega */ + if (has_dihedral(edOmega, &(dl[i]))) + { + dl[i].j0[edOmega] = n/4; - id[n++] = dl[i].atm.minO; ++ id[n++] = dl[i].atm.minCalpha; + id[n++] = dl[i].atm.minC; + id[n++] = dl[i].atm.N; - id[n++] = dl[i].atm.H; ++ id[n++] = dl[i].atm.Cn[1]; + } + } + for (Xi = 0; (Xi < MAXCHI); Xi++) + { + /* Chi# */ + for (i = 0; (i < nl); i++) + { + if (dl[i].atm.Cn[Xi+3] != -1) + { + dl[i].j0[edChi1+Xi] = n/4; + id[n++] = dl[i].atm.Cn[Xi]; + id[n++] = dl[i].atm.Cn[Xi+1]; + id[n++] = dl[i].atm.Cn[Xi+2]; + id[n++] = dl[i].atm.Cn[Xi+3]; + } + } + } + *ndih = n/4; + + return id; +} + +int bin(real chi, int mult) +{ + mult = 3; + + return (int) (chi*mult/360.0); +} + + +static void do_dihcorr(const char *fn, int nf, int ndih, real **dih, real dt, + int nlist, t_dlist dlist[], real time[], int maxchi, + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega, + const output_env_t oenv) +{ + char name1[256], name2[256]; + int i, j, Xi; + + do_autocorr(fn, oenv, "Dihedral Autocorrelation Function", + nf, ndih, dih, dt, eacCos, FALSE); + /* Dump em all */ + j = 0; + for (i = 0; (i < nlist); i++) + { + if (bPhi) + { + print_one(oenv, "corrphi", dlist[i].name, "Phi ACF for", "C(t)", nf/2, time, + dih[j]); + } + j++; + } + for (i = 0; (i < nlist); i++) + { + if (bPsi) + { + print_one(oenv, "corrpsi", dlist[i].name, "Psi ACF for", "C(t)", nf/2, time, + dih[j]); + } + j++; + } + for (i = 0; (i < nlist); i++) + { + if (has_dihedral(edOmega, &dlist[i])) + { + if (bOmega) + { + print_one(oenv, "corromega", dlist[i].name, "Omega ACF for", "C(t)", + nf/2, time, dih[j]); + } + j++; + } + } + for (Xi = 0; (Xi < maxchi); Xi++) + { + sprintf(name1, "corrchi%d", Xi+1); + sprintf(name2, "Chi%d ACF for", Xi+1); + for (i = 0; (i < nlist); i++) + { + if (dlist[i].atm.Cn[Xi+3] != -1) + { + if (bChi) + { + print_one(oenv, name1, dlist[i].name, name2, "C(t)", nf/2, time, dih[j]); + } + j++; + } + } + } + fprintf(stderr, "\n"); +} + +static void copy_dih_data(real in[], real out[], int nf, gmx_bool bLEAVE) +{ + /* if bLEAVE, do nothing to data in copying to out + * otherwise multiply by 180/pi to convert rad to deg */ + int i; + real mult; + if (bLEAVE) + { + mult = 1; + } + else + { + mult = (180.0/M_PI); + } + for (i = 0; (i < nf); i++) + { + out[i] = in[i]*mult; + } +} + +static void dump_em_all(int nlist, t_dlist dlist[], int nf, real time[], + real **dih, int maxchi, + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega, gmx_bool bRAD, + const output_env_t oenv) +{ + char name[256], titlestr[256], ystr[256]; + real *data; + int i, j, Xi; + + snew(data, nf); + if (bRAD) + { + strcpy(ystr, "Angle (rad)"); + } + else + { + strcpy(ystr, "Angle (degrees)"); + } + + /* Dump em all */ + j = 0; + for (i = 0; (i < nlist); i++) + { + /* grs debug printf("OK i %d j %d\n", i, j) ; */ + if (bPhi) + { + copy_dih_data(dih[j], data, nf, bRAD); + print_one(oenv, "phi", dlist[i].name, "\\xf\\f{}", ystr, nf, time, data); + } + j++; + } + for (i = 0; (i < nlist); i++) + { + if (bPsi) + { + copy_dih_data(dih[j], data, nf, bRAD); + print_one(oenv, "psi", dlist[i].name, "\\xy\\f{}", ystr, nf, time, data); + } + j++; + } + for (i = 0; (i < nlist); i++) + { + if (has_dihedral(edOmega, &(dlist[i]))) + { + if (bOmega) + { + copy_dih_data(dih[j], data, nf, bRAD); + print_one(oenv, "omega", dlist[i].name, "\\xw\\f{}", ystr, nf, time, data); + } + j++; + } + } + + for (Xi = 0; (Xi < maxchi); Xi++) + { + for (i = 0; (i < nlist); i++) + { + if (dlist[i].atm.Cn[Xi+3] != -1) + { + if (bChi) + { + sprintf(name, "chi%d", Xi+1); + sprintf(titlestr, "\\xc\\f{}\\s%d\\N", Xi+1); + copy_dih_data(dih[j], data, nf, bRAD); + print_one(oenv, name, dlist[i].name, titlestr, ystr, nf, time, data); + } + j++; + } + } + } + fprintf(stderr, "\n"); +} + +static void reset_one(real dih[], int nf, real phase) +{ + int j; + + for (j = 0; (j < nf); j++) + { + dih[j] += phase; + while (dih[j] < -M_PI) + { + dih[j] += 2*M_PI; + } + while (dih[j] >= M_PI) + { + dih[j] -= 2*M_PI; + } + } +} + +static int reset_em_all(int nlist, t_dlist dlist[], int nf, + real **dih, int maxchi) +{ + int i, j, Xi; + + /* Reset em all */ + j = 0; + /* Phi */ + for (i = 0; (i < nlist); i++) + { + if (dlist[i].atm.minC == -1) + { + reset_one(dih[j++], nf, M_PI); + } + else + { + reset_one(dih[j++], nf, 0); + } + } + /* Psi */ + for (i = 0; (i < nlist-1); i++) + { + reset_one(dih[j++], nf, 0); + } + /* last Psi is faked from O */ + reset_one(dih[j++], nf, M_PI); + + /* Omega */ + for (i = 0; (i < nlist); i++) + { + if (has_dihedral(edOmega, &dlist[i])) + { + reset_one(dih[j++], nf, 0); + } + } + /* Chi 1 thru maxchi */ + for (Xi = 0; (Xi < maxchi); Xi++) + { + for (i = 0; (i < nlist); i++) + { + if (dlist[i].atm.Cn[Xi+3] != -1) + { + reset_one(dih[j], nf, 0); + j++; + } + } + } + fprintf(stderr, "j after resetting (nr. active dihedrals) = %d\n", j); + return j; +} + +static void histogramming(FILE *log, int nbin, gmx_residuetype_t rt, + int nf, int maxchi, real **dih, + int nlist, t_dlist dlist[], + atom_id index[], + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bOmega, gmx_bool bChi, + gmx_bool bNormalize, gmx_bool bSSHisto, const char *ssdump, + real bfac_max, t_atoms *atoms, + gmx_bool bDo_jc, const char *fn, + const output_env_t oenv) +{ + /* also gets 3J couplings and order parameters S2 */ + t_karplus kkkphi[] = { + { "J_NHa1", 6.51, -1.76, 1.6, -M_PI/3, 0.0, 0.0 }, + { "J_NHa2", 6.51, -1.76, 1.6, M_PI/3, 0.0, 0.0 }, + { "J_HaC'", 4.0, 1.1, 0.1, 0.0, 0.0, 0.0 }, + { "J_NHCb", 4.7, -1.5, -0.2, M_PI/3, 0.0, 0.0 }, + { "J_Ci-1Hai", 4.5, -1.3, -1.2, 2*M_PI/3, 0.0, 0.0 } + }; + t_karplus kkkpsi[] = { + { "J_HaN", -0.88, -0.61, -0.27, M_PI/3, 0.0, 0.0 } + }; + t_karplus kkkchi1[] = { + { "JHaHb2", 9.5, -1.6, 1.8, -M_PI/3, 0, 0.0 }, + { "JHaHb3", 9.5, -1.6, 1.8, 0, 0, 0.0 } + }; +#define NKKKPHI asize(kkkphi) +#define NKKKPSI asize(kkkpsi) +#define NKKKCHI asize(kkkchi1) +#define NJC (NKKKPHI+NKKKPSI+NKKKCHI) + + FILE *fp, *ssfp[3] = {NULL, NULL, NULL}; + const char *sss[3] = { "sheet", "helix", "coil" }; + real S2; + real *normhisto; + real **Jc, **Jcsig; + int ****his_aa_ss = NULL; + int ***his_aa, **his_aa1, *histmp; + int i, j, k, m, n, nn, Dih, nres, hindex, angle; + gmx_bool bBfac, bOccup; + char hisfile[256], hhisfile[256], sshisfile[256], title[256], *ss_str = NULL; + char **leg; + const char *residue_name; + int rt_size; + + rt_size = gmx_residuetype_get_size(rt); + if (bSSHisto) + { + fp = ffopen(ssdump, "r"); + if (1 != fscanf(fp, "%d", &nres)) + { + gmx_fatal(FARGS, "Error reading from file %s", ssdump); + } + + snew(ss_str, nres+1); + if (1 != fscanf(fp, "%s", ss_str)) + { + gmx_fatal(FARGS, "Error reading from file %s", ssdump); + } + + ffclose(fp); + /* Four dimensional array... Very cool */ + snew(his_aa_ss, 3); + for (i = 0; (i < 3); i++) + { + snew(his_aa_ss[i], rt_size+1); + for (j = 0; (j <= rt_size); j++) + { + snew(his_aa_ss[i][j], edMax); + for (Dih = 0; (Dih < edMax); Dih++) + { + snew(his_aa_ss[i][j][Dih], nbin+1); + } + } + } + } + snew(his_aa, edMax); + for (Dih = 0; (Dih < edMax); Dih++) + { + snew(his_aa[Dih], rt_size+1); + for (i = 0; (i <= rt_size); i++) + { + snew(his_aa[Dih][i], nbin+1); + } + } + snew(histmp, nbin); + + snew(Jc, nlist); + snew(Jcsig, nlist); + for (i = 0; (i < nlist); i++) + { + snew(Jc[i], NJC); + snew(Jcsig[i], NJC); + } + + j = 0; + n = 0; + for (Dih = 0; (Dih < NONCHI+maxchi); Dih++) + { + for (i = 0; (i < nlist); i++) + { + if (((Dih < edOmega) ) || + ((Dih == edOmega) && (has_dihedral(edOmega, &(dlist[i])))) || + ((Dih > edOmega) && (dlist[i].atm.Cn[Dih-NONCHI+3] != -1))) + { + make_histo(log, nf, dih[j], nbin, histmp, -M_PI, M_PI); + + if (bSSHisto) + { + /* Assume there is only one structure, the first. + * Compute index in histogram. + */ + /* Check the atoms to see whether their B-factors are low enough + * Check atoms to see their occupancy is 1. + */ + bBfac = bOccup = TRUE; + for (nn = 0; (nn < 4); nn++, n++) + { + bBfac = bBfac && (atoms->pdbinfo[index[n]].bfac <= bfac_max); + bOccup = bOccup && (atoms->pdbinfo[index[n]].occup == 1); + } + if (bOccup && ((bfac_max <= 0) || ((bfac_max > 0) && bBfac))) + { + hindex = ((dih[j][0]+M_PI)*nbin)/(2*M_PI); + range_check(hindex, 0, nbin); + + /* Assign dihedral to either of the structure determined + * histograms + */ + switch (ss_str[dlist[i].resnr]) + { + case 'E': + his_aa_ss[0][dlist[i].index][Dih][hindex]++; + break; + case 'H': + his_aa_ss[1][dlist[i].index][Dih][hindex]++; + break; + default: + his_aa_ss[2][dlist[i].index][Dih][hindex]++; + break; + } + } + else if (debug) + { + fprintf(debug, "Res. %d has imcomplete occupancy or bfacs > %g\n", + dlist[i].resnr, bfac_max); + } + } + else + { + n += 4; + } + + switch (Dih) + { + case edPhi: + calc_distribution_props(nbin, histmp, -M_PI, NKKKPHI, kkkphi, &S2); + + for (m = 0; (m < NKKKPHI); m++) + { + Jc[i][m] = kkkphi[m].Jc; + Jcsig[i][m] = kkkphi[m].Jcsig; + } + break; + case edPsi: + calc_distribution_props(nbin, histmp, -M_PI, NKKKPSI, kkkpsi, &S2); + + for (m = 0; (m < NKKKPSI); m++) + { + Jc[i][NKKKPHI+m] = kkkpsi[m].Jc; + Jcsig[i][NKKKPHI+m] = kkkpsi[m].Jcsig; + } + break; + case edChi1: + calc_distribution_props(nbin, histmp, -M_PI, NKKKCHI, kkkchi1, &S2); + for (m = 0; (m < NKKKCHI); m++) + { + Jc[i][NKKKPHI+NKKKPSI+m] = kkkchi1[m].Jc; + Jcsig[i][NKKKPHI+NKKKPSI+m] = kkkchi1[m].Jcsig; + } + break; + default: /* covers edOmega and higher Chis than Chi1 */ + calc_distribution_props(nbin, histmp, -M_PI, 0, NULL, &S2); + break; + } + dlist[i].S2[Dih] = S2; + + /* Sum distribution per amino acid type as well */ + for (k = 0; (k < nbin); k++) + { + his_aa[Dih][dlist[i].index][k] += histmp[k]; + histmp[k] = 0; + } + j++; + } + else /* dihed not defined */ + { + dlist[i].S2[Dih] = 0.0; + } + } + } + sfree(histmp); + + /* Print out Jcouplings */ + fprintf(log, "\n *** J-Couplings from simulation (plus std. dev.) ***\n\n"); + fprintf(log, "Residue "); + for (i = 0; (i < NKKKPHI); i++) + { + fprintf(log, "%7s SD", kkkphi[i].name); + } + for (i = 0; (i < NKKKPSI); i++) + { + fprintf(log, "%7s SD", kkkpsi[i].name); + } + for (i = 0; (i < NKKKCHI); i++) + { + fprintf(log, "%7s SD", kkkchi1[i].name); + } + fprintf(log, "\n"); + for (i = 0; (i < NJC+1); i++) + { + fprintf(log, "------------"); + } + fprintf(log, "\n"); + for (i = 0; (i < nlist); i++) + { + fprintf(log, "%-10s", dlist[i].name); + for (j = 0; (j < NJC); j++) + { + fprintf(log, " %5.2f %4.2f", Jc[i][j], Jcsig[i][j]); + } + fprintf(log, "\n"); + } + fprintf(log, "\n"); + + /* and to -jc file... */ + if (bDo_jc) + { + fp = xvgropen(fn, "\\S3\\NJ-Couplings from Karplus Equation", "Residue", + "Coupling", oenv); + snew(leg, NJC); + for (i = 0; (i < NKKKPHI); i++) + { + leg[i] = strdup(kkkphi[i].name); + } + for (i = 0; (i < NKKKPSI); i++) + { + leg[i+NKKKPHI] = strdup(kkkpsi[i].name); + } + for (i = 0; (i < NKKKCHI); i++) + { + leg[i+NKKKPHI+NKKKPSI] = strdup(kkkchi1[i].name); + } + xvgr_legend(fp, NJC, (const char**)leg, oenv); + fprintf(fp, "%5s ", "#Res."); + for (i = 0; (i < NJC); i++) + { + fprintf(fp, "%10s ", leg[i]); + } + fprintf(fp, "\n"); + for (i = 0; (i < nlist); i++) + { + fprintf(fp, "%5d ", dlist[i].resnr); + for (j = 0; (j < NJC); j++) + { + fprintf(fp, " %8.3f", Jc[i][j]); + } + fprintf(fp, "\n"); + } + ffclose(fp); + for (i = 0; (i < NJC); i++) + { + sfree(leg[i]); + } + } + /* finished -jc stuff */ + + snew(normhisto, nbin); + for (i = 0; (i < rt_size); i++) + { + for (Dih = 0; (Dih < edMax); Dih++) + { + /* First check whether something is in there */ + for (j = 0; (j < nbin); j++) + { + if (his_aa[Dih][i][j] != 0) + { + break; + } + } + if ((j < nbin) && + ((bPhi && (Dih == edPhi)) || + (bPsi && (Dih == edPsi)) || + (bOmega && (Dih == edOmega)) || + (bChi && (Dih >= edChi1)))) + { + if (bNormalize) + { + normalize_histo(nbin, his_aa[Dih][i], (360.0/nbin), normhisto); + } + + residue_name = gmx_residuetype_get_name(rt, i); + switch (Dih) + { + case edPhi: + sprintf(hisfile, "histo-phi%s", residue_name); + sprintf(title, "\\xf\\f{} Distribution for %s", residue_name); + break; + case edPsi: + sprintf(hisfile, "histo-psi%s", residue_name); + sprintf(title, "\\xy\\f{} Distribution for %s", residue_name); + break; + case edOmega: + sprintf(hisfile, "histo-omega%s", residue_name); + sprintf(title, "\\xw\\f{} Distribution for %s", residue_name); + break; + default: + sprintf(hisfile, "histo-chi%d%s", Dih-NONCHI+1, residue_name); + sprintf(title, "\\xc\\f{}\\s%d\\N Distribution for %s", + Dih-NONCHI+1, residue_name); + } + strcpy(hhisfile, hisfile); + strcat(hhisfile, ".xvg"); + fp = xvgropen(hhisfile, title, "Degrees", "", oenv); + fprintf(fp, "@ with g0\n"); + xvgr_world(fp, -180, 0, 180, 0.1, oenv); + fprintf(fp, "# this effort to set graph size fails unless you run with -autoscale none or -autoscale y flags\n"); + fprintf(fp, "@ xaxis tick on\n"); + fprintf(fp, "@ xaxis tick major 90\n"); + fprintf(fp, "@ xaxis tick minor 30\n"); + fprintf(fp, "@ xaxis ticklabel prec 0\n"); + fprintf(fp, "@ yaxis tick off\n"); + fprintf(fp, "@ yaxis ticklabel off\n"); + fprintf(fp, "@ type xy\n"); + if (bSSHisto) + { + for (k = 0; (k < 3); k++) + { + sprintf(sshisfile, "%s-%s.xvg", hisfile, sss[k]); + ssfp[k] = ffopen(sshisfile, "w"); + } + } + for (j = 0; (j < nbin); j++) + { + angle = -180 + (360/nbin)*j; + if (bNormalize) + { + fprintf(fp, "%5d %10g\n", angle, normhisto[j]); + } + else + { + fprintf(fp, "%5d %10d\n", angle, his_aa[Dih][i][j]); + } + if (bSSHisto) + { + for (k = 0; (k < 3); k++) + { + fprintf(ssfp[k], "%5d %10d\n", angle, + his_aa_ss[k][i][Dih][j]); + } + } + } + fprintf(fp, "&\n"); + ffclose(fp); + if (bSSHisto) + { + for (k = 0; (k < 3); k++) + { + fprintf(ssfp[k], "&\n"); + ffclose(ssfp[k]); + } + } + } + } + } + sfree(normhisto); + + if (bSSHisto) + { + /* Four dimensional array... Very cool */ + for (i = 0; (i < 3); i++) + { + for (j = 0; (j <= rt_size); j++) + { + for (Dih = 0; (Dih < edMax); Dih++) + { + sfree(his_aa_ss[i][j][Dih]); + } + sfree(his_aa_ss[i][j]); + } + sfree(his_aa_ss[i]); + } + sfree(his_aa_ss); + sfree(ss_str); + } +} + +static FILE *rama_file(const char *fn, const char *title, const char *xaxis, + const char *yaxis, const output_env_t oenv) +{ + FILE *fp; + + fp = xvgropen(fn, title, xaxis, yaxis, oenv); + fprintf(fp, "@ with g0\n"); + xvgr_world(fp, -180, -180, 180, 180, oenv); + fprintf(fp, "@ xaxis tick on\n"); + fprintf(fp, "@ xaxis tick major 90\n"); + fprintf(fp, "@ xaxis tick minor 30\n"); + fprintf(fp, "@ xaxis ticklabel prec 0\n"); + fprintf(fp, "@ yaxis tick on\n"); + fprintf(fp, "@ yaxis tick major 90\n"); + fprintf(fp, "@ yaxis tick minor 30\n"); + fprintf(fp, "@ yaxis ticklabel prec 0\n"); + fprintf(fp, "@ s0 type xy\n"); + fprintf(fp, "@ s0 symbol 2\n"); + fprintf(fp, "@ s0 symbol size 0.410000\n"); + fprintf(fp, "@ s0 symbol fill 1\n"); + fprintf(fp, "@ s0 symbol color 1\n"); + fprintf(fp, "@ s0 symbol linewidth 1\n"); + fprintf(fp, "@ s0 symbol linestyle 1\n"); + fprintf(fp, "@ s0 symbol center false\n"); + fprintf(fp, "@ s0 symbol char 0\n"); + fprintf(fp, "@ s0 skip 0\n"); + fprintf(fp, "@ s0 linestyle 0\n"); + fprintf(fp, "@ s0 linewidth 1\n"); + fprintf(fp, "@ type xy\n"); + + return fp; +} + +static void do_rama(int nf, int nlist, t_dlist dlist[], real **dih, + gmx_bool bViol, gmx_bool bRamOmega, const output_env_t oenv) +{ + FILE *fp, *gp = NULL; + gmx_bool bOm; + char fn[256]; + int i, j, k, Xi1, Xi2, Phi, Psi, Om = 0, nlevels; +#define NMAT 120 + real **mat = NULL, phi, psi, omega, axis[NMAT], lo, hi; + t_rgb rlo = { 1.0, 0.0, 0.0 }; + t_rgb rmid = { 1.0, 1.0, 1.0 }; + t_rgb rhi = { 0.0, 0.0, 1.0 }; + + for (i = 0; (i < nlist); i++) + { + if ((has_dihedral(edPhi, &(dlist[i]))) && + (has_dihedral(edPsi, &(dlist[i])))) + { + sprintf(fn, "ramaPhiPsi%s.xvg", dlist[i].name); + fp = rama_file(fn, "Ramachandran Plot", + "\\8f\\4 (deg)", "\\8y\\4 (deg)", oenv); + bOm = bRamOmega && has_dihedral(edOmega, &(dlist[i])); + if (bOm) + { + Om = dlist[i].j0[edOmega]; + snew(mat, NMAT); + for (j = 0; (j < NMAT); j++) + { + snew(mat[j], NMAT); + axis[j] = -180+(360*j)/NMAT; + } + } + if (bViol) + { + sprintf(fn, "violPhiPsi%s.xvg", dlist[i].name); + gp = ffopen(fn, "w"); + } + Phi = dlist[i].j0[edPhi]; + Psi = dlist[i].j0[edPsi]; + for (j = 0; (j < nf); j++) + { + phi = RAD2DEG*dih[Phi][j]; + psi = RAD2DEG*dih[Psi][j]; + fprintf(fp, "%10g %10g\n", phi, psi); + if (bViol) + { + fprintf(gp, "%d\n", !bAllowed(dih[Phi][j], RAD2DEG*dih[Psi][j])); + } + if (bOm) + { + omega = RAD2DEG*dih[Om][j]; + mat[(int)((phi*NMAT)/360)+NMAT/2][(int)((psi*NMAT)/360)+NMAT/2] + += omega; + } + } + if (bViol) + { + ffclose(gp); + } + ffclose(fp); + if (bOm) + { + sprintf(fn, "ramomega%s.xpm", dlist[i].name); + fp = ffopen(fn, "w"); + lo = hi = 0; + for (j = 0; (j < NMAT); j++) + { + for (k = 0; (k < NMAT); k++) + { + mat[j][k] /= nf; + lo = min(mat[j][k], lo); + hi = max(mat[j][k], hi); + } + } + /* Symmetrise */ + if (fabs(lo) > fabs(hi)) + { + hi = -lo; + } + else + { + lo = -hi; + } + /* Add 180 */ + for (j = 0; (j < NMAT); j++) + { + for (k = 0; (k < NMAT); k++) + { + mat[j][k] += 180; + } + } + lo += 180; + hi += 180; + nlevels = 20; + write_xpm3(fp, 0, "Omega/Ramachandran Plot", "Deg", "Phi", "Psi", + NMAT, NMAT, axis, axis, mat, lo, 180.0, hi, rlo, rmid, rhi, &nlevels); + ffclose(fp); + for (j = 0; (j < NMAT); j++) + { + sfree(mat[j]); + } + sfree(mat); + } + } + if ((has_dihedral(edChi1, &(dlist[i]))) && + (has_dihedral(edChi2, &(dlist[i])))) + { + sprintf(fn, "ramaX1X2%s.xvg", dlist[i].name); + fp = rama_file(fn, "\\8c\\4\\s1\\N-\\8c\\4\\s2\\N Ramachandran Plot", + "\\8c\\4\\s1\\N (deg)", "\\8c\\4\\s2\\N (deg)", oenv); + Xi1 = dlist[i].j0[edChi1]; + Xi2 = dlist[i].j0[edChi2]; + for (j = 0; (j < nf); j++) + { + fprintf(fp, "%10g %10g\n", RAD2DEG*dih[Xi1][j], RAD2DEG*dih[Xi2][j]); + } + ffclose(fp); + } + else + { + fprintf(stderr, "No chi1 & chi2 angle for %s\n", dlist[i].name); + } + } +} + + +static void print_transitions(const char *fn, int maxchi, int nlist, + t_dlist dlist[], t_atoms *atoms, rvec x[], + matrix box, gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, real dt, + const output_env_t oenv) +{ + /* based on order_params below */ + FILE *fp; + int nh[edMax]; + int i, Dih, Xi; + + /* must correspond with enum in pp2shift.h:38 */ + char *leg[edMax]; +#define NLEG asize(leg) + + leg[0] = strdup("Phi"); + leg[1] = strdup("Psi"); + leg[2] = strdup("Omega"); + leg[3] = strdup("Chi1"); + leg[4] = strdup("Chi2"); + leg[5] = strdup("Chi3"); + leg[6] = strdup("Chi4"); + leg[7] = strdup("Chi5"); + leg[8] = strdup("Chi6"); + + /* Print order parameters */ + fp = xvgropen(fn, "Dihedral Rotamer Transitions", "Residue", "Transitions/ns", + oenv); + xvgr_legend(fp, NONCHI+maxchi, (const char**)leg, oenv); + + for (Dih = 0; (Dih < edMax); Dih++) + { + nh[Dih] = 0; + } + + fprintf(fp, "%5s ", "#Res."); + fprintf(fp, "%10s %10s %10s ", leg[edPhi], leg[edPsi], leg[edOmega]); + for (Xi = 0; Xi < maxchi; Xi++) + { + fprintf(fp, "%10s ", leg[NONCHI+Xi]); + } + fprintf(fp, "\n"); + + for (i = 0; (i < nlist); i++) + { + fprintf(fp, "%5d ", dlist[i].resnr); + for (Dih = 0; (Dih < NONCHI+maxchi); Dih++) + { + fprintf(fp, "%10.3f ", dlist[i].ntr[Dih]/dt); + } + /* fprintf(fp,"%12s\n",dlist[i].name); this confuses xmgrace */ + fprintf(fp, "\n"); + } + ffclose(fp); +} + +static void order_params(FILE *log, + const char *fn, int maxchi, int nlist, t_dlist dlist[], + const char *pdbfn, real bfac_init, + t_atoms *atoms, rvec x[], int ePBC, matrix box, + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, const output_env_t oenv) +{ + FILE *fp; + int nh[edMax]; + char buf[STRLEN]; + int i, Dih, Xi; + real S2Max, S2Min; + + /* except for S2Min/Max, must correspond with enum in pp2shift.h:38 */ + const char *const_leg[2+edMax] = { + "S2Min", "S2Max", "Phi", "Psi", "Omega", + "Chi1", "Chi2", "Chi3", "Chi4", "Chi5", + "Chi6" + }; +#define NLEG asize(leg) + + char *leg[2+edMax]; + + for (i = 0; i < NLEG; i++) + { + leg[i] = strdup(const_leg[i]); + } + + /* Print order parameters */ + fp = xvgropen(fn, "Dihedral Order Parameters", "Residue", "S2", oenv); + xvgr_legend(fp, 2+NONCHI+maxchi, const_leg, oenv); + + for (Dih = 0; (Dih < edMax); Dih++) + { + nh[Dih] = 0; + } + + fprintf(fp, "%5s ", "#Res."); + fprintf(fp, "%10s %10s ", leg[0], leg[1]); + fprintf(fp, "%10s %10s %10s ", leg[2+edPhi], leg[2+edPsi], leg[2+edOmega]); + for (Xi = 0; Xi < maxchi; Xi++) + { + fprintf(fp, "%10s ", leg[2+NONCHI+Xi]); + } + fprintf(fp, "\n"); + + for (i = 0; (i < nlist); i++) + { + S2Max = -10; + S2Min = 10; + for (Dih = 0; (Dih < NONCHI+maxchi); Dih++) + { + if (dlist[i].S2[Dih] != 0) + { + if (dlist[i].S2[Dih] > S2Max) + { + S2Max = dlist[i].S2[Dih]; + } + if (dlist[i].S2[Dih] < S2Min) + { + S2Min = dlist[i].S2[Dih]; + } + } + if (dlist[i].S2[Dih] > 0.8) + { + nh[Dih]++; + } + } + fprintf(fp, "%5d ", dlist[i].resnr); + fprintf(fp, "%10.3f %10.3f ", S2Min, S2Max); + for (Dih = 0; (Dih < NONCHI+maxchi); Dih++) + { + fprintf(fp, "%10.3f ", dlist[i].S2[Dih]); + } + fprintf(fp, "\n"); + /* fprintf(fp,"%12s\n",dlist[i].name); this confuses xmgrace */ + } + ffclose(fp); + + if (NULL != pdbfn) + { + real x0, y0, z0; + + if (NULL == atoms->pdbinfo) + { + snew(atoms->pdbinfo, atoms->nr); + } + for (i = 0; (i < atoms->nr); i++) + { + atoms->pdbinfo[i].bfac = bfac_init; + } + + for (i = 0; (i < nlist); i++) + { + atoms->pdbinfo[dlist[i].atm.N].bfac = -dlist[i].S2[0]; /* Phi */ + atoms->pdbinfo[dlist[i].atm.H].bfac = -dlist[i].S2[0]; /* Phi */ + atoms->pdbinfo[dlist[i].atm.C].bfac = -dlist[i].S2[1]; /* Psi */ + atoms->pdbinfo[dlist[i].atm.O].bfac = -dlist[i].S2[1]; /* Psi */ + for (Xi = 0; (Xi < maxchi); Xi++) /* Chi's */ + { + if (dlist[i].atm.Cn[Xi+3] != -1) + { + atoms->pdbinfo[dlist[i].atm.Cn[Xi+1]].bfac = -dlist[i].S2[NONCHI+Xi]; + } + } + } + + fp = ffopen(pdbfn, "w"); + fprintf(fp, "REMARK generated by g_chi\n"); + fprintf(fp, "REMARK " + "B-factor field contains negative of dihedral order parameters\n"); + write_pdbfile(fp, NULL, atoms, x, ePBC, box, ' ', 0, NULL, TRUE); + x0 = y0 = z0 = 1000.0; + for (i = 0; (i < atoms->nr); i++) + { + x0 = min(x0, x[i][XX]); + y0 = min(y0, x[i][YY]); + z0 = min(z0, x[i][ZZ]); + } + x0 *= 10.0; /* nm -> angstrom */ + y0 *= 10.0; /* nm -> angstrom */ + z0 *= 10.0; /* nm -> angstrom */ + sprintf(buf, "%s%%6.f%%6.2f\n", get_pdbformat()); + for (i = 0; (i < 10); i++) + { + fprintf(fp, buf, "ATOM ", atoms->nr+1+i, "CA", "LEG", ' ', + atoms->nres+1, ' ', x0, y0, z0+(1.2*i), 0.0, -0.1*i); + } + ffclose(fp); + } + + fprintf(log, "Dihedrals with S2 > 0.8\n"); + fprintf(log, "Dihedral: "); + if (bPhi) + { + fprintf(log, " Phi "); + } + if (bPsi) + { + fprintf(log, " Psi "); + } + if (bChi) + { + for (Xi = 0; (Xi < maxchi); Xi++) + { + fprintf(log, " %s ", leg[2+NONCHI+Xi]); + } + } + fprintf(log, "\nNumber: "); + if (bPhi) + { + fprintf(log, "%4d ", nh[0]); + } + if (bPsi) + { + fprintf(log, "%4d ", nh[1]); + } + if (bChi) + { + for (Xi = 0; (Xi < maxchi); Xi++) + { + fprintf(log, "%4d ", nh[NONCHI+Xi]); + } + } + fprintf(log, "\n"); + + for (i = 0; i < NLEG; i++) + { + sfree(leg[i]); + } + +} + +int gmx_chi(int argc, char *argv[]) +{ + const char *desc[] = { + "[TT]g_chi[tt] computes [GRK]phi[grk], [GRK]psi[grk], [GRK]omega[grk], and [GRK]chi[grk] dihedrals for all your ", + "amino acid backbone and sidechains.", + "It can compute dihedral angle as a function of time, and as", + "histogram distributions.", + "The distributions [TT](histo-(dihedral)(RESIDUE).xvg[tt]) are cumulative over all residues of each type.[PAR]", + "If option [TT]-corr[tt] is given, the program will", + "calculate dihedral autocorrelation functions. The function used", + "is C(t) = [CHEVRON][COS][GRK]chi[grk]([GRK]tau[grk])[cos] [COS][GRK]chi[grk]([GRK]tau[grk]+t)[cos][chevron]. The use of cosines", + "rather than angles themselves, resolves the problem of periodicity.", + "(Van der Spoel & Berendsen (1997), Biophys. J. 72, 2032-2041).", + "Separate files for each dihedral of each residue", + "[TT](corr(dihedral)(RESIDUE)(nresnr).xvg[tt]) are output, as well as a", + "file containing the information for all residues (argument of [TT]-corr[tt]).[PAR]", + "With option [TT]-all[tt], the angles themselves as a function of time for", + "each residue are printed to separate files [TT](dihedral)(RESIDUE)(nresnr).xvg[tt].", + "These can be in radians or degrees.[PAR]", + "A log file (argument [TT]-g[tt]) is also written. This contains [BR]", + "(a) information about the number of residues of each type.[BR]", + "(b) The NMR ^3J coupling constants from the Karplus equation.[BR]", + "(c) a table for each residue of the number of transitions between ", + "rotamers per nanosecond, and the order parameter S^2 of each dihedral.[BR]", + "(d) a table for each residue of the rotamer occupancy.[PAR]", + "All rotamers are taken as 3-fold, except for [GRK]omega[grk] and [GRK]chi[grk] dihedrals", + "to planar groups (i.e. [GRK]chi[grk][SUB]2[sub] of aromatics, Asp and Asn; [GRK]chi[grk][SUB]3[sub] of Glu", + "and Gln; and [GRK]chi[grk][SUB]4[sub] of Arg), which are 2-fold. \"rotamer 0\" means ", + "that the dihedral was not in the core region of each rotamer. ", + "The width of the core region can be set with [TT]-core_rotamer[tt][PAR]", + + "The S^2 order parameters are also output to an [TT].xvg[tt] file", + "(argument [TT]-o[tt] ) and optionally as a [TT].pdb[tt] file with", + "the S^2 values as B-factor (argument [TT]-p[tt]). ", + "The total number of rotamer transitions per timestep", + "(argument [TT]-ot[tt]), the number of transitions per rotamer", + "(argument [TT]-rt[tt]), and the ^3J couplings (argument [TT]-jc[tt]), ", + "can also be written to [TT].xvg[tt] files. Note that the analysis", + "of rotamer transitions assumes that the supplied trajectory frames", + "are equally spaced in time.[PAR]", + + "If [TT]-chi_prod[tt] is set (and [TT]-maxchi[tt] > 0), cumulative rotamers, e.g.", + "1+9([GRK]chi[grk][SUB]1[sub]-1)+3([GRK]chi[grk][SUB]2[sub]-1)+([GRK]chi[grk][SUB]3[sub]-1) (if the residue has three 3-fold ", + "dihedrals and [TT]-maxchi[tt] >= 3)", + "are calculated. As before, if any dihedral is not in the core region,", + "the rotamer is taken to be 0. The occupancies of these cumulative ", + "rotamers (starting with rotamer 0) are written to the file", + "that is the argument of [TT]-cp[tt], and if the [TT]-all[tt] flag", + "is given, the rotamers as functions of time", + "are written to [TT]chiproduct(RESIDUE)(nresnr).xvg[tt] ", + "and their occupancies to [TT]histo-chiproduct(RESIDUE)(nresnr).xvg[tt].[PAR]", + + "The option [TT]-r[tt] generates a contour plot of the average [GRK]omega[grk] angle", + "as a function of the [GRK]phi[grk] and [GRK]psi[grk] angles, that is, in a Ramachandran plot", + "the average [GRK]omega[grk] angle is plotted using color coding.", + + }; + + const char *bugs[] = { + "Produces MANY output files (up to about 4 times the number of residues in the protein, twice that if autocorrelation functions are calculated). Typically several hundred files are output.", + "[GRK]phi[grk] and [GRK]psi[grk] dihedrals are calculated in a non-standard way, using H-N-CA-C for [GRK]phi[grk] instead of C(-)-N-CA-C, and N-CA-C-O for [GRK]psi[grk] instead of N-CA-C-N(+). This causes (usually small) discrepancies with the output of other tools like [TT]g_rama[tt].", + "[TT]-r0[tt] option does not work properly", + "Rotamers with multiplicity 2 are printed in [TT]chi.log[tt] as if they had multiplicity 3, with the 3rd (g(+)) always having probability 0" + }; + + /* defaults */ + static int r0 = 1, ndeg = 1, maxchi = 2; + static gmx_bool bAll = FALSE; + static gmx_bool bPhi = FALSE, bPsi = FALSE, bOmega = FALSE; + static real bfac_init = -1.0, bfac_max = 0; + static const char *maxchistr[] = { NULL, "0", "1", "2", "3", "4", "5", "6", NULL }; + static gmx_bool bRama = FALSE, bShift = FALSE, bViol = FALSE, bRamOmega = FALSE; + static gmx_bool bNormHisto = TRUE, bChiProduct = FALSE, bHChi = FALSE, bRAD = FALSE, bPBC = TRUE; + static real core_frac = 0.5; + t_pargs pa[] = { + { "-r0", FALSE, etINT, {&r0}, + "starting residue" }, + { "-phi", FALSE, etBOOL, {&bPhi}, + "Output for [GRK]phi[grk] dihedral angles" }, + { "-psi", FALSE, etBOOL, {&bPsi}, + "Output for [GRK]psi[grk] dihedral angles" }, + { "-omega", FALSE, etBOOL, {&bOmega}, + "Output for [GRK]omega[grk] dihedrals (peptide bonds)" }, + { "-rama", FALSE, etBOOL, {&bRama}, + "Generate [GRK]phi[grk]/[GRK]psi[grk] and [GRK]chi[grk][SUB]1[sub]/[GRK]chi[grk][SUB]2[sub] Ramachandran plots" }, + { "-viol", FALSE, etBOOL, {&bViol}, + "Write a file that gives 0 or 1 for violated Ramachandran angles" }, + { "-periodic", FALSE, etBOOL, {&bPBC}, + "Print dihedral angles modulo 360 degrees" }, + { "-all", FALSE, etBOOL, {&bAll}, + "Output separate files for every dihedral." }, + { "-rad", FALSE, etBOOL, {&bRAD}, + "in angle vs time files, use radians rather than degrees."}, + { "-shift", FALSE, etBOOL, {&bShift}, + "Compute chemical shifts from [GRK]phi[grk]/[GRK]psi[grk] angles" }, + { "-binwidth", FALSE, etINT, {&ndeg}, + "bin width for histograms (degrees)" }, + { "-core_rotamer", FALSE, etREAL, {&core_frac}, + "only the central [TT]-core_rotamer[tt]*(360/multiplicity) belongs to each rotamer (the rest is assigned to rotamer 0)" }, + { "-maxchi", FALSE, etENUM, {maxchistr}, + "calculate first ndih [GRK]chi[grk] dihedrals" }, + { "-normhisto", FALSE, etBOOL, {&bNormHisto}, + "Normalize histograms" }, + { "-ramomega", FALSE, etBOOL, {&bRamOmega}, + "compute average omega as a function of [GRK]phi[grk]/[GRK]psi[grk] and plot it in an [TT].xpm[tt] plot" }, + { "-bfact", FALSE, etREAL, {&bfac_init}, + "B-factor value for [TT].pdb[tt] file for atoms with no calculated dihedral order parameter"}, + { "-chi_prod", FALSE, etBOOL, {&bChiProduct}, + "compute a single cumulative rotamer for each residue"}, + { "-HChi", FALSE, etBOOL, {&bHChi}, + "Include dihedrals to sidechain hydrogens"}, + { "-bmax", FALSE, etREAL, {&bfac_max}, + "Maximum B-factor on any of the atoms that make up a dihedral, for the dihedral angle to be considere in the statistics. Applies to database work where a number of X-Ray structures is analyzed. [TT]-bmax[tt] <= 0 means no limit." } + }; + + FILE *log; + int natoms, nlist, idum, nbin; + t_atoms atoms; + rvec *x; + int ePBC; + matrix box; + char title[256], grpname[256]; + t_dlist *dlist; + gmx_bool bChi, bCorr, bSSHisto; + gmx_bool bDo_rt, bDo_oh, bDo_ot, bDo_jc; + real dt = 0, traj_t_ns; + output_env_t oenv; + gmx_residuetype_t rt; + + atom_id isize, *index; + int ndih, nactdih, nf; + real **dih, *trans_frac, *aver_angle, *time; + int i, j, **chi_lookup, *multiplicity; + + t_filenm fnm[] = { + { efSTX, "-s", NULL, ffREAD }, + { efTRX, "-f", NULL, ffREAD }, + { efXVG, "-o", "order", ffWRITE }, + { efPDB, "-p", "order", ffOPTWR }, + { efDAT, "-ss", "ssdump", ffOPTRD }, + { efXVG, "-jc", "Jcoupling", ffWRITE }, + { efXVG, "-corr", "dihcorr", ffOPTWR }, + { efLOG, "-g", "chi", ffWRITE }, + /* add two more arguments copying from g_angle */ + { efXVG, "-ot", "dihtrans", ffOPTWR }, + { efXVG, "-oh", "trhisto", ffOPTWR }, + { efXVG, "-rt", "restrans", ffOPTWR }, + { efXVG, "-cp", "chiprodhisto", ffOPTWR } + }; +#define NFILE asize(fnm) + int npargs; + t_pargs *ppa; + + npargs = asize(pa); + ppa = add_acf_pargs(&npargs, pa); + parse_common_args(&argc, argv, PCA_CAN_VIEW | PCA_CAN_TIME | PCA_BE_NICE, + NFILE, fnm, npargs, ppa, asize(desc), desc, asize(bugs), bugs, + &oenv); + + /* Handle result from enumerated type */ + sscanf(maxchistr[0], "%d", &maxchi); + bChi = (maxchi > 0); + + log = ffopen(ftp2fn(efLOG, NFILE, fnm), "w"); + + if (bRamOmega) + { + bOmega = TRUE; + bPhi = TRUE; + bPsi = TRUE; + } + + /* set some options */ + bDo_rt = (opt2bSet("-rt", NFILE, fnm)); + bDo_oh = (opt2bSet("-oh", NFILE, fnm)); + bDo_ot = (opt2bSet("-ot", NFILE, fnm)); + bDo_jc = (opt2bSet("-jc", NFILE, fnm)); + bCorr = (opt2bSet("-corr", NFILE, fnm)); + if (bCorr) + { + fprintf(stderr, "Will calculate autocorrelation\n"); + } + + if (core_frac > 1.0) + { + fprintf(stderr, "core_rotamer fraction > 1.0 ; will use 1.0\n"); + core_frac = 1.0; + } + if (core_frac < 0.0) + { + fprintf(stderr, "core_rotamer fraction < 0.0 ; will use 0.0\n"); + core_frac = 0.0; + } + + if (maxchi > MAXCHI) + { + fprintf(stderr, + "Will only calculate first %d Chi dihedrals in stead of %d.\n", + MAXCHI, maxchi); + maxchi = MAXCHI; + } + bSSHisto = ftp2bSet(efDAT, NFILE, fnm); + nbin = 360/ndeg; + + /* Find the chi angles using atoms struct and a list of amino acids */ + get_stx_coordnum(ftp2fn(efSTX, NFILE, fnm), &natoms); + init_t_atoms(&atoms, natoms, TRUE); + snew(x, natoms); + read_stx_conf(ftp2fn(efSTX, NFILE, fnm), title, &atoms, x, NULL, &ePBC, box); + fprintf(log, "Title: %s\n", title); + + gmx_residuetype_init(&rt); + dlist = mk_dlist(log, &atoms, &nlist, bPhi, bPsi, bChi, bHChi, maxchi, r0, rt); + fprintf(stderr, "%d residues with dihedrals found\n", nlist); + + if (nlist == 0) + { + gmx_fatal(FARGS, "No dihedrals in your structure!\n"); + } + + /* Make a linear index for reading all. */ + index = make_chi_ind(nlist, dlist, &ndih); + isize = 4*ndih; + fprintf(stderr, "%d dihedrals found\n", ndih); + + snew(dih, ndih); + + /* COMPUTE ALL DIHEDRALS! */ + read_ang_dih(ftp2fn(efTRX, NFILE, fnm), FALSE, TRUE, FALSE, bPBC, 1, &idum, + &nf, &time, isize, index, &trans_frac, &aver_angle, dih, oenv); + + dt = (time[nf-1]-time[0])/(nf-1); /* might want this for corr or n. transit*/ + if (bCorr) + { + if (nf < 2) + { + gmx_fatal(FARGS, "Need at least 2 frames for correlation"); + } + } + + /* put angles in -M_PI to M_PI ! and correct phase factor for phi and psi + * pass nactdih instead of ndih to low_ana_dih_trans and get_chi_product_traj + * to prevent accessing off end of arrays when maxchi < 5 or 6. */ + nactdih = reset_em_all(nlist, dlist, nf, dih, maxchi); + + if (bAll) + { + dump_em_all(nlist, dlist, nf, time, dih, maxchi, bPhi, bPsi, bChi, bOmega, bRAD, oenv); + } + + /* Histogramming & J coupling constants & calc of S2 order params */ + histogramming(log, nbin, rt, nf, maxchi, dih, nlist, dlist, index, + bPhi, bPsi, bOmega, bChi, + bNormHisto, bSSHisto, ftp2fn(efDAT, NFILE, fnm), bfac_max, &atoms, + bDo_jc, opt2fn("-jc", NFILE, fnm), oenv); + + /* transitions + * + * added multiplicity */ + + snew(multiplicity, ndih); + mk_multiplicity_lookup(multiplicity, maxchi, dih, nlist, dlist, ndih); + + strcpy(grpname, "All residues, "); + if (bPhi) + { + strcat(grpname, "Phi "); + } + if (bPsi) + { + strcat(grpname, "Psi "); + } + if (bOmega) + { + strcat(grpname, "Omega "); + } + if (bChi) + { + strcat(grpname, "Chi 1-"); + sprintf(grpname + strlen(grpname), "%i", maxchi); + } + + + low_ana_dih_trans(bDo_ot, opt2fn("-ot", NFILE, fnm), + bDo_oh, opt2fn("-oh", NFILE, fnm), maxchi, + dih, nlist, dlist, nf, nactdih, grpname, multiplicity, + time, FALSE, core_frac, oenv); + + /* Order parameters */ + order_params(log, opt2fn("-o", NFILE, fnm), maxchi, nlist, dlist, + ftp2fn_null(efPDB, NFILE, fnm), bfac_init, + &atoms, x, ePBC, box, bPhi, bPsi, bChi, oenv); + + /* Print ramachandran maps! */ + if (bRama) + { + do_rama(nf, nlist, dlist, dih, bViol, bRamOmega, oenv); + } + + if (bShift) + { + do_pp2shifts(log, nf, nlist, dlist, dih); + } + + /* rprint S^2, transitions, and rotamer occupancies to log */ + traj_t_ns = 0.001 * (time[nf-1]-time[0]); + pr_dlist(log, nlist, dlist, traj_t_ns, edPrintST, bPhi, bPsi, bChi, bOmega, maxchi); + pr_dlist(log, nlist, dlist, traj_t_ns, edPrintRO, bPhi, bPsi, bChi, bOmega, maxchi); + ffclose(log); + /* transitions to xvg */ + if (bDo_rt) + { + print_transitions(opt2fn("-rt", NFILE, fnm), maxchi, nlist, dlist, + &atoms, x, box, bPhi, bPsi, bChi, traj_t_ns, oenv); + } + + /* chi_product trajectories (ie one "rotamer number" for each residue) */ + if (bChiProduct && bChi) + { + snew(chi_lookup, nlist); + for (i = 0; i < nlist; i++) + { + snew(chi_lookup[i], maxchi); + } + mk_chi_lookup(chi_lookup, maxchi, dih, nlist, dlist); + + get_chi_product_traj(dih, nf, nactdih, nlist, + maxchi, dlist, time, chi_lookup, multiplicity, + FALSE, bNormHisto, core_frac, bAll, + opt2fn("-cp", NFILE, fnm), oenv); + + for (i = 0; i < nlist; i++) + { + sfree(chi_lookup[i]); + } + } + + /* Correlation comes last because it fucks up the angles */ + if (bCorr) + { + do_dihcorr(opt2fn("-corr", NFILE, fnm), nf, ndih, dih, dt, nlist, dlist, time, + maxchi, bPhi, bPsi, bChi, bOmega, oenv); + } + + + do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy"); + do_view(oenv, opt2fn("-jc", NFILE, fnm), "-nxy"); + if (bCorr) + { + do_view(oenv, opt2fn("-corr", NFILE, fnm), "-nxy"); + } + + gmx_residuetype_destroy(rt); + + thanx(stderr); + + return 0; +} diff --cc src/gromacs/gmxana/gmx_gyrate.c index 1111e899ff,0000000000..66bf4d406c mode 100644,000000..100644 --- a/src/gromacs/gmxana/gmx_gyrate.c +++ b/src/gromacs/gmxana/gmx_gyrate.c @@@ -1,395 -1,0 +1,395 @@@ +/* + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Green Red Orange Magenta Azure Cyan Skyblue + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "statutil.h" +#include "sysstuff.h" +#include "typedefs.h" +#include "smalloc.h" +#include "macros.h" +#include "vec.h" +#include "pbc.h" +#include "copyrite.h" +#include "futil.h" +#include "statutil.h" +#include "index.h" +#include "mshift.h" +#include "xvgr.h" +#include "princ.h" +#include "rmpbc.h" +#include "txtdump.h" +#include "tpxio.h" +#include "gstat.h" +#include "gmx_ana.h" + + +real calc_gyro(rvec x[], int gnx, atom_id index[], t_atom atom[], real tm, + rvec gvec, rvec d, gmx_bool bQ, gmx_bool bRot, gmx_bool bMOI, matrix trans) +{ + int i, ii, m; + real gyro, dx2, m0, Itot; + rvec comp; + + if (bRot) + { + principal_comp(gnx, index, atom, x, trans, d); + Itot = norm(d); + if (bMOI) + { + return Itot; + } + for (m = 0; (m < DIM); m++) + { + d[m] = sqrt(d[m]/tm); + } +#ifdef DEBUG + pr_rvecs(stderr, 0, "trans", trans, DIM); +#endif + /* rotate_atoms(gnx,index,x,trans); */ + } + clear_rvec(comp); + for (i = 0; (i < gnx); i++) + { + ii = index[i]; + if (bQ) + { + m0 = fabs(atom[ii].q); + } + else + { + m0 = atom[ii].m; + } + for (m = 0; (m < DIM); m++) + { + dx2 = x[ii][m]*x[ii][m]; + comp[m] += dx2*m0; + } + } + gyro = comp[XX]+comp[YY]+comp[ZZ]; + + for (m = 0; (m < DIM); m++) + { + gvec[m] = sqrt((gyro-comp[m])/tm); + } + + return sqrt(gyro/tm); +} + +void calc_gyro_z(rvec x[], matrix box, + int gnx, atom_id index[], t_atom atom[], + int nz, real time, FILE *out) +{ + static dvec *inertia = NULL; + static double *tm = NULL; + int i, ii, j, zi; + real zf, w, sdet, e1, e2; + + if (inertia == NULL) + { + snew(inertia, nz); + snew(tm, nz); + } + + for (i = 0; i < nz; i++) + { + clear_dvec(inertia[i]); + tm[i] = 0; + } + + for (i = 0; (i < gnx); i++) + { + ii = index[i]; + zf = nz*x[ii][ZZ]/box[ZZ][ZZ]; + if (zf >= nz) + { + zf -= nz; + } + if (zf < 0) + { + zf += nz; + } + for (j = 0; j < 2; j++) + { + zi = zf + j; + if (zi == nz) + { + zi = 0; + } + w = atom[ii].m*(1 + cos(M_PI*(zf - zi))); + inertia[zi][0] += w*sqr(x[ii][YY]); + inertia[zi][1] += w*sqr(x[ii][XX]); + inertia[zi][2] -= w*x[ii][XX]*x[ii][YY]; + tm[zi] += w; + } + } + fprintf(out, "%10g", time); + for (j = 0; j < nz; j++) + { + for (i = 0; i < 3; i++) + { + inertia[j][i] /= tm[j]; + } + sdet = sqrt(sqr(inertia[j][0] - inertia[j][1]) + 4*sqr(inertia[j][2])); + e1 = sqrt(0.5*(inertia[j][0] + inertia[j][1] + sdet)); + e2 = sqrt(0.5*(inertia[j][0] + inertia[j][1] - sdet)); + fprintf(out, " %5.3f %5.3f", e1, e2); + } + fprintf(out, "\n"); +} + +int gmx_gyrate(int argc, char *argv[]) +{ + const char *desc[] = { - "[TT]g_gyrate[tt] computes the radius of gyration of a group of atoms", ++ "[TT]g_gyrate[tt] computes the radius of gyration of a molecule", + "and the radii of gyration about the [IT]x[it]-, [IT]y[it]- and [IT]z[it]-axes,", + "as a function of time. The atoms are explicitly mass weighted.[PAR]", + "With the [TT]-nmol[tt] option the radius of gyration will be calculated", + "for multiple molecules by splitting the analysis group in equally", + "sized parts.[PAR]", + "With the option [TT]-nz[tt] 2D radii of gyration in the [IT]x-y[it] plane", + "of slices along the [IT]z[it]-axis are calculated." + }; + static int nmol = 1, nz = 0; + static gmx_bool bQ = FALSE, bRot = FALSE, bMOI = FALSE; + t_pargs pa[] = { + { "-nmol", FALSE, etINT, {&nmol}, + "The number of molecules to analyze" }, + { "-q", FALSE, etBOOL, {&bQ}, + "Use absolute value of the charge of an atom as weighting factor instead of mass" }, + { "-p", FALSE, etBOOL, {&bRot}, + "Calculate the radii of gyration about the principal axes." }, + { "-moi", FALSE, etBOOL, {&bMOI}, + "Calculate the moments of inertia (defined by the principal axes)." }, + { "-nz", FALSE, etINT, {&nz}, + "Calculate the 2D radii of gyration of this number of slices along the z-axis" }, + }; + FILE *out; + t_trxstatus *status; + t_topology top; + int ePBC; + rvec *x, *x_s; + rvec xcm, gvec, gvec1; + matrix box, trans; + gmx_bool bACF; + real **moi_trans = NULL; + int max_moi = 0, delta_moi = 100; + rvec d, d1; /* eigenvalues of inertia tensor */ + real t, t0, tm, gyro; + int natoms; + char *grpname, title[256]; + int i, j, m, gnx, nam, mol; + atom_id *index; + output_env_t oenv; + gmx_rmpbc_t gpbc = NULL; + const char *leg[] = { "Rg", "RgX", "RgY", "RgZ" }; + const char *legI[] = { "Itot", "I1", "I2", "I3" }; +#define NLEG asize(leg) + t_filenm fnm[] = { + { efTRX, "-f", NULL, ffREAD }, + { efTPS, NULL, NULL, ffREAD }, + { efNDX, NULL, NULL, ffOPTRD }, + { efXVG, NULL, "gyrate", ffWRITE }, + { efXVG, "-acf", "moi-acf", ffOPTWR }, + }; +#define NFILE asize(fnm) + int npargs; + t_pargs *ppa; + + npargs = asize(pa); + ppa = add_acf_pargs(&npargs, pa); + + parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW | PCA_BE_NICE, + NFILE, fnm, npargs, ppa, asize(desc), desc, 0, NULL, &oenv); + bACF = opt2bSet("-acf", NFILE, fnm); + if (bACF && nmol != 1) + { + gmx_fatal(FARGS, "Can only do acf with nmol=1"); + } + bRot = bRot || bMOI || bACF; + /* + if (nz > 0) + bMOI = TRUE; + */ + if (bRot) + { + printf("Will rotate system along principal axes\n"); + snew(moi_trans, DIM); + } + if (bMOI) + { + printf("Will print moments of inertia\n"); + bQ = FALSE; + } + if (bQ) + { + printf("Will print radius normalised by charge\n"); + } + + read_tps_conf(ftp2fn(efTPS, NFILE, fnm), title, &top, &ePBC, &x, NULL, box, TRUE); + get_index(&top.atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &gnx, &index, &grpname); + + if (nmol > gnx || gnx % nmol != 0) + { + gmx_fatal(FARGS, "The number of atoms in the group (%d) is not a multiple of nmol (%d)", gnx, nmol); + } + nam = gnx/nmol; + + natoms = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box); + snew(x_s, natoms); + + j = 0; + t0 = t; + if (bQ) + { + out = xvgropen(ftp2fn(efXVG, NFILE, fnm), + "Radius of Charge", "Time (ps)", "Rg (nm)", oenv); + } + else if (bMOI) + { + out = xvgropen(ftp2fn(efXVG, NFILE, fnm), + "Moments of inertia", "Time (ps)", "I (a.m.u. nm\\S2\\N)", oenv); + } + else + { + out = xvgropen(ftp2fn(efXVG, NFILE, fnm), + "Radius of gyration", "Time (ps)", "Rg (nm)", oenv); + } + if (bMOI) + { + xvgr_legend(out, NLEG, legI, oenv); + } + else + { + if (bRot) + { + if (output_env_get_print_xvgr_codes(oenv)) + { + fprintf(out, "@ subtitle \"Axes are principal component axes\"\n"); + } + } + xvgr_legend(out, NLEG, leg, oenv); + } + if (nz == 0) + { + gpbc = gmx_rmpbc_init(&top.idef, ePBC, natoms, box); + } + do + { + if (nz == 0) + { + gmx_rmpbc_copy(gpbc, natoms, box, x, x_s); + } + gyro = 0; + clear_rvec(gvec); + clear_rvec(d); + for (mol = 0; mol < nmol; mol++) + { + tm = sub_xcm(nz == 0 ? x_s : x, nam, index+mol*nam, top.atoms.atom, xcm, bQ); + if (nz == 0) + { + gyro += calc_gyro(x_s, nam, index+mol*nam, top.atoms.atom, + tm, gvec1, d1, bQ, bRot, bMOI, trans); + } + else + { + calc_gyro_z(x, box, nam, index+mol*nam, top.atoms.atom, nz, t, out); + } + rvec_inc(gvec, gvec1); + rvec_inc(d, d1); + } + if (nmol > 0) + { + gyro /= nmol; + svmul(1.0/nmol, gvec, gvec); + svmul(1.0/nmol, d, d); + } + + if (nz == 0) + { + if (bRot) + { + if (j >= max_moi) + { + max_moi += delta_moi; + for (m = 0; (m < DIM); m++) + { + srenew(moi_trans[m], max_moi*DIM); + } + } + for (m = 0; (m < DIM); m++) + { + copy_rvec(trans[m], moi_trans[m]+DIM*j); + } + fprintf(out, "%10g %10g %10g %10g %10g\n", + t, gyro, d[XX], d[YY], d[ZZ]); + } + else + { + fprintf(out, "%10g %10g %10g %10g %10g\n", + t, gyro, gvec[XX], gvec[YY], gvec[ZZ]); + } + } + j++; + } + while (read_next_x(oenv, status, &t, natoms, x, box)); + close_trj(status); + if (nz == 0) + { + gmx_rmpbc_done(gpbc); + } + + ffclose(out); + + if (bACF) + { + int mode = eacVector; + + do_autocorr(opt2fn("-acf", NFILE, fnm), oenv, + "Moment of inertia vector ACF", + j, 3, moi_trans, (t-t0)/j, mode, FALSE); + do_view(oenv, opt2fn("-acf", NFILE, fnm), "-nxy"); + } + + do_view(oenv, ftp2fn(efXVG, NFILE, fnm), "-nxy"); + + thanx(stderr); + + return 0; +} diff --cc src/gromacs/gmxlib/nonbonded/nonbonded.c index ac5fe893d4,0000000000..d866862453 mode 100644,000000..100644 --- a/src/gromacs/gmxlib/nonbonded/nonbonded.c +++ b/src/gromacs/gmxlib/nonbonded/nonbonded.c @@@ -1,685 -1,0 +1,686 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * GROningen Mixture of Alchemy and Childrens' Stories + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef GMX_THREAD_MPI +#include +#endif + +#include +#include +#include "typedefs.h" +#include "txtdump.h" +#include "smalloc.h" +#include "ns.h" +#include "vec.h" +#include "maths.h" +#include "macros.h" +#include "string2.h" +#include "force.h" +#include "names.h" +#include "main.h" +#include "xvgr.h" +#include "gmx_fatal.h" +#include "physics.h" +#include "force.h" +#include "bondf.h" +#include "nrnb.h" +#include "smalloc.h" +#include "nonbonded.h" + +#include "nb_kernel.h" +#include "nb_free_energy.h" +#include "nb_generic.h" +#include "nb_generic_cg.h" +#include "nb_generic_adress.h" + +/* Different default (c) and accelerated interaction-specific kernels */ +#include "nb_kernel_c/nb_kernel_c.h" + +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE) +# include "nb_kernel_sse2_single/nb_kernel_sse2_single.h" +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE) +# include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h" +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) +# include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h" +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE) +# include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h" +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE) +# include "nb_kernel_sse2_double/nb_kernel_sse2_double.h" +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE) +# include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h" +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE) +# include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h" +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE) +# include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h" +#endif +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE) +# include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h" +#endif + + +#ifdef GMX_THREAD_MPI +static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER; +#endif +static gmx_bool nonbonded_setup_done = FALSE; + + +void +gmx_nonbonded_setup(FILE * fplog, + t_forcerec * fr, + gmx_bool bGenericKernelOnly) +{ +#ifdef GMX_THREAD_MPI + tMPI_Thread_mutex_lock(&nonbonded_setup_mutex); +#endif + /* Here we are guaranteed only one thread made it. */ + if (nonbonded_setup_done == FALSE) + { + if (bGenericKernelOnly == FALSE) + { + /* Add the generic kernels to the structure stored statically in nb_kernel.c */ + nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size); + + if (!(fr != NULL && fr->use_cpu_acceleration == FALSE)) + { + /* Add interaction-specific kernels for different architectures */ + /* Single precision */ +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size); +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size); +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size); +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size); +#endif + /* Double precision */ +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size); +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size); +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size); +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size); +#endif +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE) + nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double,kernellist_sparc64_hpc_ace_double_size); +#endif + ; /* empty statement to avoid a completely empty block */ + } + } + /* Create a hash for faster lookups */ + nb_kernel_list_hash_init(); + + nonbonded_setup_done = TRUE; + } +#ifdef GMX_THREAD_MPI + tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex); +#endif +} + + + +void +gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl) +{ + const char * elec; + const char * elec_mod; + const char * vdw; + const char * vdw_mod; + const char * geom; + const char * other; + const char * vf; + + struct + { + const char * arch; + int simd_padding_width; + } + arch_and_padding[] = + { + /* Single precision */ +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE) + { "avx_256_single", 8 }, +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE) + { "avx_128_fma_single", 4 }, +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE) + { "sse4_1_single", 4 }, +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE) + { "sse2_single", 4 }, +#endif + /* Double precision */ +#if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE) + { "avx_256_double", 4 }, +#endif +#if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE) + /* Sic. Double precision 2-way SIMD does not require neighbor list padding, + * since the kernels execute a loop unrolled a factor 2, followed by + * a possible single odd-element epilogue. + */ + { "avx_128_fma_double", 1 }, +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE) + /* No padding - see comment above */ + { "sse2_double", 1 }, +#endif +#if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE) + /* No padding - see comment above */ + { "sse4_1_double", 1 }, +#endif +#if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE) + /* No padding - see comment above */ + { "sparc64_hpc_ace_double", 1 }, +#endif + { "c", 1 }, + }; + int narch = asize(arch_and_padding); + int i; + + if (nonbonded_setup_done == FALSE) + { + /* We typically call this setup routine before starting timers, + * but if that has not been done for whatever reason we do it now. + */ + gmx_nonbonded_setup(NULL, NULL, FALSE); + } + + /* Not used yet */ + other = ""; + + nl->kernelptr_vf = NULL; + nl->kernelptr_v = NULL; + nl->kernelptr_f = NULL; + + elec = gmx_nbkernel_elec_names[nl->ielec]; + elec_mod = eintmod_names[nl->ielecmod]; + vdw = gmx_nbkernel_vdw_names[nl->ivdw]; + vdw_mod = eintmod_names[nl->ivdwmod]; + geom = gmx_nblist_geometry_names[nl->igeometry]; + + if (nl->type == GMX_NBLIST_INTERACTION_ADRESS) + { + nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel; + nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel; + nl->simd_padding_width = 1; + return; + } + + if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY) + { + nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel; + nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel; + nl->simd_padding_width = 1; + } + else if (!gmx_strcasecmp_min(geom, "CG-CG")) + { + nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel; + nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel; + nl->simd_padding_width = 1; + } + else + { + /* Try to find a specific kernel first */ + + for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++) + { + nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce"); + nl->simd_padding_width = arch_and_padding[i].simd_padding_width; + } + for (i = 0; i < narch && nl->kernelptr_f == NULL; i++) + { + nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force"); + nl->simd_padding_width = arch_and_padding[i].simd_padding_width; + + /* If there is not force-only optimized kernel, is there a potential & force one? */ + if (nl->kernelptr_f == NULL) + { + nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce"); + nl->simd_padding_width = arch_and_padding[i].simd_padding_width; + } + } + + /* Give up, pick a generic one instead */ + if (nl->kernelptr_vf == NULL) + { + nl->kernelptr_vf = (void *) gmx_nb_generic_kernel; + nl->kernelptr_f = (void *) gmx_nb_generic_kernel; + nl->simd_padding_width = 1; + if (debug) + { + fprintf(debug, + "WARNING - Slow generic NB kernel used for neighborlist with\n" + " Elec: '%s', Modifier: '%s'\n" + " Vdw: '%s', Modifier: '%s'\n" + " Geom: '%s', Other: '%s'\n\n", + elec, elec_mod, vdw, vdw_mod, geom, other); + } + } + } + + return; +} + +void do_nonbonded(t_commrec *cr, t_forcerec *fr, + rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl, + gmx_grppairener_t *grppener, rvec box_size, + t_nrnb *nrnb, real *lambda, real *dvdl, + int nls, int eNL, int flags) +{ + t_nblist * nlist; + int n, n0, n1, i, i0, i1, sz, range; + t_nblists * nblists; + nb_kernel_data_t kernel_data; + nb_kernel_t * kernelptr = NULL; + rvec * f; + + kernel_data.flags = flags; + kernel_data.exclusions = excl; + kernel_data.lambda = lambda; + kernel_data.dvdl = dvdl; + + if (fr->bAllvsAll) + { ++ gmx_incons("All-vs-all kernels have not been implemented in version 4.6"); + return; + } + + if (eNL >= 0) + { + i0 = eNL; + i1 = i0+1; + } + else + { + i0 = 0; + i1 = eNL_NR; + } + + if (nls >= 0) + { + n0 = nls; + n1 = nls+1; + } + else + { + n0 = 0; + n1 = fr->nnblists; + } + + for (n = n0; (n < n1); n++) + { + nblists = &fr->nblists[n]; + + kernel_data.table_elec = &nblists->table_elec; + kernel_data.table_vdw = &nblists->table_vdw; + kernel_data.table_elec_vdw = &nblists->table_elec_vdw; + + for (range = 0; range < 2; range++) + { + /* Are we doing short/long-range? */ + if (range == 0) + { + /* Short-range */ + if (!(flags & GMX_NONBONDED_DO_SR)) + { + continue; + } + kernel_data.energygrp_elec = grppener->ener[egCOULSR]; + kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR]; + kernel_data.energygrp_polarization = grppener->ener[egGB]; + nlist = nblists->nlist_sr; + f = f_shortrange; + } + else if (range == 1) + { + /* Long-range */ + if (!(flags & GMX_NONBONDED_DO_LR)) + { + continue; + } + kernel_data.energygrp_elec = grppener->ener[egCOULLR]; + kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR]; + kernel_data.energygrp_polarization = grppener->ener[egGB]; + nlist = nblists->nlist_lr; + f = f_longrange; + } + + for (i = i0; (i < i1); i++) + { + if (nlist[i].nri > 0) + { + if (flags & GMX_NONBONDED_DO_POTENTIAL) + { + /* Potential and force */ + kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf; + } + else + { + /* Force only, no potential */ + kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f; + } + + if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA)) + { + /* We don't need the non-perturbed interactions */ + continue; + } + (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb); + } + } + } + } +} + +static void +nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit) +{ + gmx_warning("Listed nonbonded interaction between particles %d and %d\n" + "at distance %.3f which is larger than the table limit %.3f nm.\n\n" + "This is likely either a 1,4 interaction, or a listed interaction inside\n" + "a smaller molecule you are decoupling during a free energy calculation.\n" + "Since interactions at distances beyond the table cannot be computed,\n" + "they are skipped until they are inside the table limit again. You will\n" + "only see this message once, even if it occurs for several interactions.\n\n" + "IMPORTANT: This should not happen in a stable simulation, so there is\n" + "probably something wrong with your system. Only change the table-extension\n" + "distance in the mdp file if you are really sure that is the reason.\n", + glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit); + + if (debug) + { + fprintf(debug, + "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n", + x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ], + glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r); + } +} + + + +/* This might logically belong better in the nb_generic.c module, but it is only + * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an + * extra functional call for every single pair listed in the topology. + */ +static real +nb_evaluate_single(real r2, real tabscale, real *vftab, + real qq, real c6, real c12, real *velec, real *vvdw) +{ + real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal; + int ntab; + + /* Do the tabulated interactions - first table lookup */ + rinv = gmx_invsqrt(r2); + r = r2*rinv; + rtab = r*tabscale; + ntab = rtab; + eps = rtab-ntab; + eps2 = eps*eps; + ntab = 12*ntab; + /* Electrostatics */ + Y = vftab[ntab]; + F = vftab[ntab+1]; + Geps = eps*vftab[ntab+2]; + Heps2 = eps2*vftab[ntab+3]; + Fp = F+Geps+Heps2; + VVe = Y+eps*Fp; + FFe = Fp+Geps+2.0*Heps2; + /* Dispersion */ + Y = vftab[ntab+4]; + F = vftab[ntab+5]; + Geps = eps*vftab[ntab+6]; + Heps2 = eps2*vftab[ntab+7]; + Fp = F+Geps+Heps2; + VVd = Y+eps*Fp; + FFd = Fp+Geps+2.0*Heps2; + /* Repulsion */ + Y = vftab[ntab+8]; + F = vftab[ntab+9]; + Geps = eps*vftab[ntab+10]; + Heps2 = eps2*vftab[ntab+11]; + Fp = F+Geps+Heps2; + VVr = Y+eps*Fp; + FFr = Fp+Geps+2.0*Heps2; + + *velec = qq*VVe; + *vvdw = c6*VVd+c12*VVr; + + fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv; + + return fscal; +} + + +real +do_nonbonded_listed(int ftype, int nbonds, + const t_iatom iatoms[], const t_iparams iparams[], + const rvec x[], rvec f[], rvec fshift[], + const t_pbc *pbc, const t_graph *g, + real *lambda, real *dvdl, + const t_mdatoms *md, + const t_forcerec *fr, gmx_grppairener_t *grppener, + int *global_atom_index) +{ + int ielec, ivdw; + real qq, c6, c12; + rvec dx; + ivec dt; + int i, j, itype, ai, aj, gid; + int fshift_index; + real r2, rinv; + real fscal, velec, vvdw; + real * energygrp_elec; + real * energygrp_vdw; + static gmx_bool warned_rlimit = FALSE; + /* Free energy stuff */ + gmx_bool bFreeEnergy; + real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2]; + real qqB, c6B, c12B, sigma2_def, sigma2_min; + + + switch (ftype) + { + case F_LJ14: + case F_LJC14_Q: + energygrp_elec = grppener->ener[egCOUL14]; + energygrp_vdw = grppener->ener[egLJ14]; + break; + case F_LJC_PAIRS_NB: + energygrp_elec = grppener->ener[egCOULSR]; + energygrp_vdw = grppener->ener[egLJSR]; + break; + default: + energygrp_elec = NULL; /* Keep compiler happy */ + energygrp_vdw = NULL; /* Keep compiler happy */ + gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype); + break; + } + + if (fr->efep != efepNO) + { + /* Lambda factor for state A=1-lambda and B=lambda */ + LFC[0] = 1.0 - lambda[efptCOUL]; + LFV[0] = 1.0 - lambda[efptVDW]; + LFC[1] = lambda[efptCOUL]; + LFV[1] = lambda[efptVDW]; + + /*derivative of the lambda factor for state A and B */ + DLF[0] = -1; + DLF[1] = 1; + + /* precalculate */ + sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0); + sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0); + + for (i = 0; i < 2; i++) + { + lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i])); + dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1); + lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i])); + dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1); + } + } + else + { + sigma2_min = sigma2_def = 0; + } + + bFreeEnergy = FALSE; + for (i = 0; (i < nbonds); ) + { + itype = iatoms[i++]; + ai = iatoms[i++]; + aj = iatoms[i++]; + gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp); + + /* Get parameters */ + switch (ftype) + { + case F_LJ14: + bFreeEnergy = + (fr->efep != efepNO && + ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) || + iparams[itype].lj14.c6A != iparams[itype].lj14.c6B || + iparams[itype].lj14.c12A != iparams[itype].lj14.c12B)); + qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ; + c6 = iparams[itype].lj14.c6A; + c12 = iparams[itype].lj14.c12A; + break; + case F_LJC14_Q: + qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq; + c6 = iparams[itype].ljc14.c6; + c12 = iparams[itype].ljc14.c12; + break; + case F_LJC_PAIRS_NB: + qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac; + c6 = iparams[itype].ljcnb.c6; + c12 = iparams[itype].ljcnb.c12; + break; + default: + /* Cannot happen since we called gmx_fatal() above in this case */ + qq = c6 = c12 = 0; /* Keep compiler happy */ + break; + } + + /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors + * included in the general nfbp array now. This means the tables are scaled down by the + * same factor, so when we use the original c6/c12 parameters from iparams[] they must + * be scaled up. + */ + c6 *= 6.0; + c12 *= 12.0; + + /* Do we need to apply full periodic boundary conditions? */ + if (fr->bMolPBC == TRUE) + { + fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx); + } + else + { + fshift_index = CENTRAL; + rvec_sub(x[ai], x[aj], dx); + } + r2 = norm2(dx); + + if (r2 >= fr->tab14.r*fr->tab14.r) + { + if (warned_rlimit == FALSE) + { + nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r); + warned_rlimit = TRUE; + } + continue; + } + + if (bFreeEnergy) + { + /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */ + qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ; + c6B = iparams[itype].lj14.c6B*6.0; + c12B = iparams[itype].lj14.c12B*12.0; + + fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw, + fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B, + LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw, + fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl); + } + else + { + /* Evaluate tabulated interaction without free energy */ + fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw); + } + + energygrp_elec[gid] += velec; + energygrp_vdw[gid] += vvdw; + svmul(fscal, dx, dx); + + /* Add the forces */ + rvec_inc(f[ai], dx); + rvec_dec(f[aj], dx); + + if (g) + { + /* Correct the shift forces using the graph */ + ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt); + fshift_index = IVEC2IS(dt); + } + if (fshift_index != CENTRAL) + { + rvec_inc(fshift[fshift_index], dx); + rvec_dec(fshift[CENTRAL], dx); + } + } + return 0.0; +} diff --cc src/gromacs/gmxlib/tpxio.c index 80dac300ba,0000000000..e8d5d2c73b mode 100644,000000..100644 --- a/src/gromacs/gmxlib/tpxio.c +++ b/src/gromacs/gmxlib/tpxio.c @@@ -1,3538 -1,0 +1,3538 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * GROningen Mixture of Alchemy and Childrens' Stories + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +/* This file is completely threadsafe - keep it that way! */ +#ifdef GMX_THREAD_MPI +#include +#endif + + +#include +#include "sysstuff.h" +#include "smalloc.h" +#include "string2.h" +#include "gmx_fatal.h" +#include "macros.h" +#include "names.h" +#include "symtab.h" +#include "futil.h" +#include "filenm.h" +#include "gmxfio.h" +#include "topsort.h" +#include "tpxio.h" +#include "txtdump.h" +#include "confio.h" +#include "atomprop.h" +#include "copyrite.h" +#include "vec.h" +#include "mtop_util.h" + +#define TPX_TAG_RELEASE "release" + +/* This is the tag string which is stored in the tpx file. + * Change this if you want to change the tpx format in a feature branch. + * This ensures that there will not be different tpx formats around which + * can not be distinguished. + */ +static const char *tpx_tag = TPX_TAG_RELEASE; + +/* This number should be increased whenever the file format changes! */ +static const int tpx_version = 92; + +/* This number should only be increased when you edit the TOPOLOGY section + * or the HEADER of the tpx format. + * This way we can maintain forward compatibility too for all analysis tools + * and/or external programs that only need to know the atom/residue names, + * charges, and bond connectivity. + * + * It first appeared in tpx version 26, when I also moved the inputrecord + * to the end of the tpx file, so we can just skip it if we only + * want the topology. + */ +static const int tpx_generation = 25; + +/* This number should be the most recent backwards incompatible version + * I.e., if this number is 9, we cannot read tpx version 9 with this code. + */ +static const int tpx_incompatible_version = 9; + + + +/* Struct used to maintain tpx compatibility when function types are added */ +typedef struct { + int fvnr; /* file version number in which the function type first appeared */ + int ftype; /* function type */ +} t_ftupd; + +/* + * The entries should be ordered in: + * 1. ascending file version number + * 2. ascending function type number + */ +/*static const t_ftupd ftupd[] = { + { 20, F_CUBICBONDS }, + { 20, F_CONNBONDS }, + { 20, F_HARMONIC }, + { 20, F_EQM, }, + { 22, F_DISRESVIOL }, + { 22, F_ORIRES }, + { 22, F_ORIRESDEV }, + { 26, F_FOURDIHS }, + { 26, F_PIDIHS }, + { 26, F_DIHRES }, + { 26, F_DIHRESVIOL }, + { 30, F_CROSS_BOND_BONDS }, + { 30, F_CROSS_BOND_ANGLES }, + { 30, F_UREY_BRADLEY }, + { 30, F_POLARIZATION }, + { 54, F_DHDL_CON }, + };*/ +/* + * The entries should be ordered in: + * 1. ascending function type number + * 2. ascending file version number + */ +/* question; what is the purpose of the commented code above? */ +static const t_ftupd ftupd[] = { + { 20, F_CUBICBONDS }, + { 20, F_CONNBONDS }, + { 20, F_HARMONIC }, + { 34, F_FENEBONDS }, + { 43, F_TABBONDS }, + { 43, F_TABBONDSNC }, + { 70, F_RESTRBONDS }, + { 76, F_LINEAR_ANGLES }, + { 30, F_CROSS_BOND_BONDS }, + { 30, F_CROSS_BOND_ANGLES }, + { 30, F_UREY_BRADLEY }, + { 34, F_QUARTIC_ANGLES }, + { 43, F_TABANGLES }, + { 26, F_FOURDIHS }, + { 26, F_PIDIHS }, + { 43, F_TABDIHS }, + { 65, F_CMAP }, + { 60, F_GB12 }, + { 61, F_GB13 }, + { 61, F_GB14 }, + { 72, F_GBPOL }, + { 72, F_NPSOLVATION }, + { 41, F_LJC14_Q }, + { 41, F_LJC_PAIRS_NB }, + { 32, F_BHAM_LR }, + { 32, F_RF_EXCL }, + { 32, F_COUL_RECIP }, + { 46, F_DPD }, + { 30, F_POLARIZATION }, + { 36, F_THOLE_POL }, + { 90, F_FBPOSRES }, + { 22, F_DISRESVIOL }, + { 22, F_ORIRES }, + { 22, F_ORIRESDEV }, + { 26, F_DIHRES }, + { 26, F_DIHRESVIOL }, + { 49, F_VSITE4FDN }, + { 50, F_VSITEN }, + { 46, F_COM_PULL }, + { 20, F_EQM }, + { 46, F_ECONSERVED }, + { 69, F_VTEMP_NOLONGERUSED}, + { 66, F_PDISPCORR }, + { 54, F_DVDL_CONSTR }, + { 76, F_ANHARM_POL }, + { 79, F_DVDL_COUL }, + { 79, F_DVDL_VDW, }, + { 79, F_DVDL_BONDED, }, + { 79, F_DVDL_RESTRAINT }, + { 79, F_DVDL_TEMPERATURE }, +}; +#define NFTUPD asize(ftupd) + +/* Needed for backward compatibility */ +#define MAXNODES 256 + +static void _do_section(t_fileio *fio, int key, gmx_bool bRead, const char *src, + int line) +{ + char buf[STRLEN]; + gmx_bool bDbg; + + if (gmx_fio_getftp(fio) == efTPA) + { + if (!bRead) + { + gmx_fio_write_string(fio, itemstr[key]); + bDbg = gmx_fio_getdebug(fio); + gmx_fio_setdebug(fio, FALSE); + gmx_fio_write_string(fio, comment_str[key]); + gmx_fio_setdebug(fio, bDbg); + } + else + { + if (gmx_fio_getdebug(fio)) + { + fprintf(stderr, "Looking for section %s (%s, %d)", + itemstr[key], src, line); + } + + do + { + gmx_fio_do_string(fio, buf); + } + while ((gmx_strcasecmp(buf, itemstr[key]) != 0)); + + if (gmx_strcasecmp(buf, itemstr[key]) != 0) + { + gmx_fatal(FARGS, "\nCould not find section heading %s", itemstr[key]); + } + else if (gmx_fio_getdebug(fio)) + { + fprintf(stderr, " and found it\n"); + } + } + } +} + +#define do_section(fio, key, bRead) _do_section(fio, key, bRead, __FILE__, __LINE__) + +/************************************************************** + * + * Now the higer level routines that do io of the structures and arrays + * + **************************************************************/ +static void do_pullgrp(t_fileio *fio, t_pullgrp *pgrp, gmx_bool bRead, + int file_version) +{ + gmx_bool bDum = TRUE; + int i; + + gmx_fio_do_int(fio, pgrp->nat); + if (bRead) + { + snew(pgrp->ind, pgrp->nat); + } + bDum = gmx_fio_ndo_int(fio, pgrp->ind, pgrp->nat); + gmx_fio_do_int(fio, pgrp->nweight); + if (bRead) + { + snew(pgrp->weight, pgrp->nweight); + } + bDum = gmx_fio_ndo_real(fio, pgrp->weight, pgrp->nweight); + gmx_fio_do_int(fio, pgrp->pbcatom); + gmx_fio_do_rvec(fio, pgrp->vec); + gmx_fio_do_rvec(fio, pgrp->init); + gmx_fio_do_real(fio, pgrp->rate); + gmx_fio_do_real(fio, pgrp->k); + if (file_version >= 56) + { + gmx_fio_do_real(fio, pgrp->kB); + } + else + { + pgrp->kB = pgrp->k; + } +} + +static void do_expandedvals(t_fileio *fio, t_expanded *expand, t_lambda *fepvals, gmx_bool bRead, int file_version) +{ + /* i is used in the ndo_double macro*/ + int i; + real fv; + gmx_bool bDum = TRUE; + real rdum; + int n_lambda = fepvals->n_lambda; + + /* reset the lambda calculation window */ + fepvals->lambda_start_n = 0; + fepvals->lambda_stop_n = n_lambda; + if (file_version >= 79) + { + if (n_lambda > 0) + { + if (bRead) + { + snew(expand->init_lambda_weights, n_lambda); + } + bDum = gmx_fio_ndo_real(fio, expand->init_lambda_weights, n_lambda); + gmx_fio_do_gmx_bool(fio, expand->bInit_weights); + } + + gmx_fio_do_int(fio, expand->nstexpanded); + gmx_fio_do_int(fio, expand->elmcmove); + gmx_fio_do_int(fio, expand->elamstats); + gmx_fio_do_int(fio, expand->lmc_repeats); + gmx_fio_do_int(fio, expand->gibbsdeltalam); + gmx_fio_do_int(fio, expand->lmc_forced_nstart); + gmx_fio_do_int(fio, expand->lmc_seed); + gmx_fio_do_real(fio, expand->mc_temp); + gmx_fio_do_int(fio, expand->bSymmetrizedTMatrix); + gmx_fio_do_int(fio, expand->nstTij); + gmx_fio_do_int(fio, expand->minvarmin); + gmx_fio_do_int(fio, expand->c_range); + gmx_fio_do_real(fio, expand->wl_scale); + gmx_fio_do_real(fio, expand->wl_ratio); + gmx_fio_do_real(fio, expand->init_wl_delta); + gmx_fio_do_gmx_bool(fio, expand->bWLoneovert); + gmx_fio_do_int(fio, expand->elmceq); + gmx_fio_do_int(fio, expand->equil_steps); + gmx_fio_do_int(fio, expand->equil_samples); + gmx_fio_do_int(fio, expand->equil_n_at_lam); + gmx_fio_do_real(fio, expand->equil_wl_delta); + gmx_fio_do_real(fio, expand->equil_ratio); + } +} + +static void do_simtempvals(t_fileio *fio, t_simtemp *simtemp, int n_lambda, gmx_bool bRead, + int file_version) +{ + gmx_bool bDum = TRUE; + + if (file_version >= 79) + { + gmx_fio_do_int(fio, simtemp->eSimTempScale); + gmx_fio_do_real(fio, simtemp->simtemp_high); + gmx_fio_do_real(fio, simtemp->simtemp_low); + if (n_lambda > 0) + { + if (bRead) + { + snew(simtemp->temperatures, n_lambda); + } + bDum = gmx_fio_ndo_real(fio, simtemp->temperatures, n_lambda); + } + } +} + +static void do_fepvals(t_fileio *fio, t_lambda *fepvals, gmx_bool bRead, int file_version) +{ + /* i is defined in the ndo_double macro; use g to iterate. */ + int i, g; + real fv; + gmx_bool bDum = TRUE; + real rdum; + + /* free energy values */ + + if (file_version >= 79) + { + gmx_fio_do_int(fio, fepvals->init_fep_state); + gmx_fio_do_double(fio, fepvals->init_lambda); + gmx_fio_do_double(fio, fepvals->delta_lambda); + } + else if (file_version >= 59) + { + gmx_fio_do_double(fio, fepvals->init_lambda); + gmx_fio_do_double(fio, fepvals->delta_lambda); + } + else + { + gmx_fio_do_real(fio, rdum); + fepvals->init_lambda = rdum; + gmx_fio_do_real(fio, rdum); + fepvals->delta_lambda = rdum; + } + if (file_version >= 79) + { + gmx_fio_do_int(fio, fepvals->n_lambda); + if (bRead) + { + snew(fepvals->all_lambda, efptNR); + } + for (g = 0; g < efptNR; g++) + { + if (fepvals->n_lambda > 0) + { + if (bRead) + { + snew(fepvals->all_lambda[g], fepvals->n_lambda); + } + bDum = gmx_fio_ndo_double(fio, fepvals->all_lambda[g], fepvals->n_lambda); + bDum = gmx_fio_ndo_int(fio, fepvals->separate_dvdl, efptNR); + } + else if (fepvals->init_lambda >= 0) + { + fepvals->separate_dvdl[efptFEP] = TRUE; + } + } + } + else if (file_version >= 64) + { + gmx_fio_do_int(fio, fepvals->n_lambda); + if (bRead) + { + int g; + + snew(fepvals->all_lambda, efptNR); + /* still allocate the all_lambda array's contents. */ + for (g = 0; g < efptNR; g++) + { + if (fepvals->n_lambda > 0) + { + snew(fepvals->all_lambda[g], fepvals->n_lambda); + } + } + } + bDum = gmx_fio_ndo_double(fio, fepvals->all_lambda[efptFEP], + fepvals->n_lambda); + if (fepvals->init_lambda >= 0) + { + int g, h; + + fepvals->separate_dvdl[efptFEP] = TRUE; + + if (bRead) + { + /* copy the contents of the efptFEP lambda component to all + the other components */ + for (g = 0; g < efptNR; g++) + { + for (h = 0; h < fepvals->n_lambda; h++) + { + if (g != efptFEP) + { + fepvals->all_lambda[g][h] = + fepvals->all_lambda[efptFEP][h]; + } + } + } + } + } + } + else + { + fepvals->n_lambda = 0; + fepvals->all_lambda = NULL; + if (fepvals->init_lambda >= 0) + { + fepvals->separate_dvdl[efptFEP] = TRUE; + } + } + if (file_version >= 13) + { + gmx_fio_do_real(fio, fepvals->sc_alpha); + } + else + { + fepvals->sc_alpha = 0; + } + if (file_version >= 38) + { + gmx_fio_do_int(fio, fepvals->sc_power); + } + else + { + fepvals->sc_power = 2; + } + if (file_version >= 79) + { + gmx_fio_do_real(fio, fepvals->sc_r_power); + } + else + { + fepvals->sc_r_power = 6.0; + } + if (file_version >= 15) + { + gmx_fio_do_real(fio, fepvals->sc_sigma); + } + else + { + fepvals->sc_sigma = 0.3; + } + if (bRead) + { + if (file_version >= 71) + { + fepvals->sc_sigma_min = fepvals->sc_sigma; + } + else + { + fepvals->sc_sigma_min = 0; + } + } + if (file_version >= 79) + { + gmx_fio_do_int(fio, fepvals->bScCoul); + } + else + { + fepvals->bScCoul = TRUE; + } + if (file_version >= 64) + { + gmx_fio_do_int(fio, fepvals->nstdhdl); + } + else + { + fepvals->nstdhdl = 1; + } + + if (file_version >= 73) + { + gmx_fio_do_int(fio, fepvals->separate_dhdl_file); + gmx_fio_do_int(fio, fepvals->dhdl_derivatives); + } + else + { + fepvals->separate_dhdl_file = esepdhdlfileYES; + fepvals->dhdl_derivatives = edhdlderivativesYES; + } + if (file_version >= 71) + { + gmx_fio_do_int(fio, fepvals->dh_hist_size); + gmx_fio_do_double(fio, fepvals->dh_hist_spacing); + } + else + { + fepvals->dh_hist_size = 0; + fepvals->dh_hist_spacing = 0.1; + } + if (file_version >= 79) + { + gmx_fio_do_int(fio, fepvals->bPrintEnergy); + } + else + { + fepvals->bPrintEnergy = FALSE; + } + + /* handle lambda_neighbors */ + if ((file_version >= 83 && file_version < 90) || file_version >= 92) + { + gmx_fio_do_int(fio, fepvals->lambda_neighbors); + if ( (fepvals->lambda_neighbors >= 0) && (fepvals->init_fep_state >= 0) && + (fepvals->init_lambda < 0) ) + { + fepvals->lambda_start_n = (fepvals->init_fep_state - + fepvals->lambda_neighbors); + fepvals->lambda_stop_n = (fepvals->init_fep_state + + fepvals->lambda_neighbors + 1); + if (fepvals->lambda_start_n < 0) + { + fepvals->lambda_start_n = 0;; + } + if (fepvals->lambda_stop_n >= fepvals->n_lambda) + { + fepvals->lambda_stop_n = fepvals->n_lambda; + } + } + else + { + fepvals->lambda_start_n = 0; + fepvals->lambda_stop_n = fepvals->n_lambda; + } + } + else + { + fepvals->lambda_start_n = 0; + fepvals->lambda_stop_n = fepvals->n_lambda; + } +} + +static void do_pull(t_fileio *fio, t_pull *pull, gmx_bool bRead, int file_version) +{ + int g; + + gmx_fio_do_int(fio, pull->ngrp); + gmx_fio_do_int(fio, pull->eGeom); + gmx_fio_do_ivec(fio, pull->dim); + gmx_fio_do_real(fio, pull->cyl_r1); + gmx_fio_do_real(fio, pull->cyl_r0); + gmx_fio_do_real(fio, pull->constr_tol); + gmx_fio_do_int(fio, pull->nstxout); + gmx_fio_do_int(fio, pull->nstfout); + if (bRead) + { + snew(pull->grp, pull->ngrp+1); + } + for (g = 0; g < pull->ngrp+1; g++) + { + do_pullgrp(fio, &pull->grp[g], bRead, file_version); + } +} + + +static void do_rotgrp(t_fileio *fio, t_rotgrp *rotg, gmx_bool bRead, int file_version) +{ + gmx_bool bDum = TRUE; + int i; + + gmx_fio_do_int(fio, rotg->eType); + gmx_fio_do_int(fio, rotg->bMassW); + gmx_fio_do_int(fio, rotg->nat); + if (bRead) + { + snew(rotg->ind, rotg->nat); + } + gmx_fio_ndo_int(fio, rotg->ind, rotg->nat); + if (bRead) + { + snew(rotg->x_ref, rotg->nat); + } + gmx_fio_ndo_rvec(fio, rotg->x_ref, rotg->nat); + gmx_fio_do_rvec(fio, rotg->vec); + gmx_fio_do_rvec(fio, rotg->pivot); + gmx_fio_do_real(fio, rotg->rate); + gmx_fio_do_real(fio, rotg->k); + gmx_fio_do_real(fio, rotg->slab_dist); + gmx_fio_do_real(fio, rotg->min_gaussian); + gmx_fio_do_real(fio, rotg->eps); + gmx_fio_do_int(fio, rotg->eFittype); + gmx_fio_do_int(fio, rotg->PotAngle_nstep); + gmx_fio_do_real(fio, rotg->PotAngle_step); +} + +static void do_rot(t_fileio *fio, t_rot *rot, gmx_bool bRead, int file_version) +{ + int g; + + gmx_fio_do_int(fio, rot->ngrp); + gmx_fio_do_int(fio, rot->nstrout); + gmx_fio_do_int(fio, rot->nstsout); + if (bRead) + { + snew(rot->grp, rot->ngrp); + } + for (g = 0; g < rot->ngrp; g++) + { + do_rotgrp(fio, &rot->grp[g], bRead, file_version); + } +} + + +static void do_inputrec(t_fileio *fio, t_inputrec *ir, gmx_bool bRead, + int file_version, real *fudgeQQ) +{ + int i, j, k, *tmp, idum = 0; + gmx_bool bDum = TRUE; + real rdum, bd_temp; + rvec vdum; + gmx_bool bSimAnn; + real zerotemptime, finish_t, init_temp, finish_temp; + + if (file_version != tpx_version) + { + /* Give a warning about features that are not accessible */ + fprintf(stderr, "Note: file tpx version %d, software tpx version %d\n", + file_version, tpx_version); + } + + if (bRead) + { + init_inputrec(ir); + } + + if (file_version == 0) + { + return; + } + + /* Basic inputrec stuff */ + gmx_fio_do_int(fio, ir->eI); + if (file_version >= 62) + { + gmx_fio_do_gmx_large_int(fio, ir->nsteps); + } + else + { + gmx_fio_do_int(fio, idum); + ir->nsteps = idum; + } + if (file_version > 25) + { + if (file_version >= 62) + { + gmx_fio_do_gmx_large_int(fio, ir->init_step); + } + else + { + gmx_fio_do_int(fio, idum); + ir->init_step = idum; + } + } + else + { + ir->init_step = 0; + } + + if (file_version >= 58) + { + gmx_fio_do_int(fio, ir->simulation_part); + } + else + { + ir->simulation_part = 1; + } + + if (file_version >= 67) + { + gmx_fio_do_int(fio, ir->nstcalcenergy); + } + else + { + ir->nstcalcenergy = 1; + } + if (file_version < 53) + { + /* The pbc info has been moved out of do_inputrec, + * since we always want it, also without reading the inputrec. + */ + gmx_fio_do_int(fio, ir->ePBC); + if ((file_version <= 15) && (ir->ePBC == 2)) + { + ir->ePBC = epbcNONE; + } + if (file_version >= 45) + { + gmx_fio_do_int(fio, ir->bPeriodicMols); + } + else + { + if (ir->ePBC == 2) + { + ir->ePBC = epbcXYZ; + ir->bPeriodicMols = TRUE; + } + else + { + ir->bPeriodicMols = FALSE; + } + } + } + if (file_version >= 81) + { + gmx_fio_do_int(fio, ir->cutoff_scheme); + } + else + { + ir->cutoff_scheme = ecutsGROUP; + } + gmx_fio_do_int(fio, ir->ns_type); + gmx_fio_do_int(fio, ir->nstlist); + gmx_fio_do_int(fio, ir->ndelta); + if (file_version < 41) + { + gmx_fio_do_int(fio, idum); + gmx_fio_do_int(fio, idum); + } + if (file_version >= 45) + { + gmx_fio_do_real(fio, ir->rtpi); + } + else + { + ir->rtpi = 0.05; + } + gmx_fio_do_int(fio, ir->nstcomm); + if (file_version > 34) + { + gmx_fio_do_int(fio, ir->comm_mode); + } + else if (ir->nstcomm < 0) + { + ir->comm_mode = ecmANGULAR; + } + else + { + ir->comm_mode = ecmLINEAR; + } + ir->nstcomm = abs(ir->nstcomm); + + if (file_version > 25) + { + gmx_fio_do_int(fio, ir->nstcheckpoint); + } + else + { + ir->nstcheckpoint = 0; + } + + gmx_fio_do_int(fio, ir->nstcgsteep); + + if (file_version >= 30) + { + gmx_fio_do_int(fio, ir->nbfgscorr); + } + else if (bRead) + { + ir->nbfgscorr = 10; + } + + gmx_fio_do_int(fio, ir->nstlog); + gmx_fio_do_int(fio, ir->nstxout); + gmx_fio_do_int(fio, ir->nstvout); + gmx_fio_do_int(fio, ir->nstfout); + gmx_fio_do_int(fio, ir->nstenergy); + gmx_fio_do_int(fio, ir->nstxtcout); + if (file_version >= 59) + { + gmx_fio_do_double(fio, ir->init_t); + gmx_fio_do_double(fio, ir->delta_t); + } + else + { + gmx_fio_do_real(fio, rdum); + ir->init_t = rdum; + gmx_fio_do_real(fio, rdum); + ir->delta_t = rdum; + } + gmx_fio_do_real(fio, ir->xtcprec); + if (file_version < 19) + { + gmx_fio_do_int(fio, idum); + gmx_fio_do_int(fio, idum); + } + if (file_version < 18) + { + gmx_fio_do_int(fio, idum); + } + if (file_version >= 81) + { + gmx_fio_do_real(fio, ir->verletbuf_drift); + } + else + { + ir->verletbuf_drift = 0; + } + gmx_fio_do_real(fio, ir->rlist); + if (file_version >= 67) + { + gmx_fio_do_real(fio, ir->rlistlong); + } + if (file_version >= 82 && file_version != 90) + { + gmx_fio_do_int(fio, ir->nstcalclr); + } + else + { + /* Calculate at NS steps */ + ir->nstcalclr = ir->nstlist; + } + gmx_fio_do_int(fio, ir->coulombtype); + if (file_version < 32 && ir->coulombtype == eelRF) + { + ir->coulombtype = eelRF_NEC; + } + if (file_version >= 81) + { + gmx_fio_do_int(fio, ir->coulomb_modifier); + } + else + { + ir->coulomb_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE); + } + gmx_fio_do_real(fio, ir->rcoulomb_switch); + gmx_fio_do_real(fio, ir->rcoulomb); + gmx_fio_do_int(fio, ir->vdwtype); + if (file_version >= 81) + { + gmx_fio_do_int(fio, ir->vdw_modifier); + } + else + { + ir->vdw_modifier = (ir->cutoff_scheme == ecutsVERLET ? eintmodPOTSHIFT : eintmodNONE); + } + gmx_fio_do_real(fio, ir->rvdw_switch); + gmx_fio_do_real(fio, ir->rvdw); + if (file_version < 67) + { + ir->rlistlong = max_cutoff(ir->rlist, max_cutoff(ir->rvdw, ir->rcoulomb)); + } + gmx_fio_do_int(fio, ir->eDispCorr); + gmx_fio_do_real(fio, ir->epsilon_r); + if (file_version >= 37) + { + gmx_fio_do_real(fio, ir->epsilon_rf); + } + else + { + if (EEL_RF(ir->coulombtype)) + { + ir->epsilon_rf = ir->epsilon_r; + ir->epsilon_r = 1.0; + } + else + { + ir->epsilon_rf = 1.0; + } + } + if (file_version >= 29) + { + gmx_fio_do_real(fio, ir->tabext); + } + else + { + ir->tabext = 1.0; + } + + if (file_version > 25) + { + gmx_fio_do_int(fio, ir->gb_algorithm); + gmx_fio_do_int(fio, ir->nstgbradii); + gmx_fio_do_real(fio, ir->rgbradii); + gmx_fio_do_real(fio, ir->gb_saltconc); + gmx_fio_do_int(fio, ir->implicit_solvent); + } + else + { + ir->gb_algorithm = egbSTILL; + ir->nstgbradii = 1; + ir->rgbradii = 1.0; + ir->gb_saltconc = 0; + ir->implicit_solvent = eisNO; + } + if (file_version >= 55) + { + gmx_fio_do_real(fio, ir->gb_epsilon_solvent); + gmx_fio_do_real(fio, ir->gb_obc_alpha); + gmx_fio_do_real(fio, ir->gb_obc_beta); + gmx_fio_do_real(fio, ir->gb_obc_gamma); + if (file_version >= 60) + { + gmx_fio_do_real(fio, ir->gb_dielectric_offset); + gmx_fio_do_int(fio, ir->sa_algorithm); + } + else + { + ir->gb_dielectric_offset = 0.009; + ir->sa_algorithm = esaAPPROX; + } + gmx_fio_do_real(fio, ir->sa_surface_tension); + + /* Override sa_surface_tension if it is not changed in the mpd-file */ + if (ir->sa_surface_tension < 0) + { + if (ir->gb_algorithm == egbSTILL) + { + ir->sa_surface_tension = 0.0049 * 100 * CAL2JOULE; + } + else if (ir->gb_algorithm == egbHCT || ir->gb_algorithm == egbOBC) + { + ir->sa_surface_tension = 0.0054 * 100 * CAL2JOULE; + } + } + + } + else + { + /* Better use sensible values than insane (0.0) ones... */ + ir->gb_epsilon_solvent = 80; + ir->gb_obc_alpha = 1.0; + ir->gb_obc_beta = 0.8; + ir->gb_obc_gamma = 4.85; + ir->sa_surface_tension = 2.092; + } + + + if (file_version >= 81) + { + gmx_fio_do_real(fio, ir->fourier_spacing); + } + else + { + ir->fourier_spacing = 0.0; + } + gmx_fio_do_int(fio, ir->nkx); + gmx_fio_do_int(fio, ir->nky); + gmx_fio_do_int(fio, ir->nkz); + gmx_fio_do_int(fio, ir->pme_order); + gmx_fio_do_real(fio, ir->ewald_rtol); + + if (file_version >= 24) + { + gmx_fio_do_int(fio, ir->ewald_geometry); + } + else + { + ir->ewald_geometry = eewg3D; + } + + if (file_version <= 17) + { + ir->epsilon_surface = 0; + if (file_version == 17) + { + gmx_fio_do_int(fio, idum); + } + } + else + { + gmx_fio_do_real(fio, ir->epsilon_surface); + } + + gmx_fio_do_gmx_bool(fio, ir->bOptFFT); + + gmx_fio_do_gmx_bool(fio, ir->bContinuation); + gmx_fio_do_int(fio, ir->etc); + /* before version 18, ir->etc was a gmx_bool (ir->btc), + * but the values 0 and 1 still mean no and + * berendsen temperature coupling, respectively. + */ + if (file_version >= 79) + { + gmx_fio_do_gmx_bool(fio, ir->bPrintNHChains); + } + if (file_version >= 71) + { + gmx_fio_do_int(fio, ir->nsttcouple); + } + else + { + ir->nsttcouple = ir->nstcalcenergy; + } + if (file_version <= 15) + { + gmx_fio_do_int(fio, idum); + } + if (file_version <= 17) + { + gmx_fio_do_int(fio, ir->epct); + if (file_version <= 15) + { + if (ir->epct == 5) + { + ir->epct = epctSURFACETENSION; + } + gmx_fio_do_int(fio, idum); + } + ir->epct -= 1; + /* we have removed the NO alternative at the beginning */ + if (ir->epct == -1) + { + ir->epc = epcNO; + ir->epct = epctISOTROPIC; + } + else + { + ir->epc = epcBERENDSEN; + } + } + else + { + gmx_fio_do_int(fio, ir->epc); + gmx_fio_do_int(fio, ir->epct); + } + if (file_version >= 71) + { + gmx_fio_do_int(fio, ir->nstpcouple); + } + else + { + ir->nstpcouple = ir->nstcalcenergy; + } + gmx_fio_do_real(fio, ir->tau_p); + if (file_version <= 15) + { + gmx_fio_do_rvec(fio, vdum); + clear_mat(ir->ref_p); + for (i = 0; i < DIM; i++) + { + ir->ref_p[i][i] = vdum[i]; + } + } + else + { + gmx_fio_do_rvec(fio, ir->ref_p[XX]); + gmx_fio_do_rvec(fio, ir->ref_p[YY]); + gmx_fio_do_rvec(fio, ir->ref_p[ZZ]); + } + if (file_version <= 15) + { + gmx_fio_do_rvec(fio, vdum); + clear_mat(ir->compress); + for (i = 0; i < DIM; i++) + { + ir->compress[i][i] = vdum[i]; + } + } + else + { + gmx_fio_do_rvec(fio, ir->compress[XX]); + gmx_fio_do_rvec(fio, ir->compress[YY]); + gmx_fio_do_rvec(fio, ir->compress[ZZ]); + } + if (file_version >= 47) + { + gmx_fio_do_int(fio, ir->refcoord_scaling); + gmx_fio_do_rvec(fio, ir->posres_com); + gmx_fio_do_rvec(fio, ir->posres_comB); + } + else + { + ir->refcoord_scaling = erscNO; + clear_rvec(ir->posres_com); + clear_rvec(ir->posres_comB); + } + if ((file_version > 25) && (file_version < 79)) + { + gmx_fio_do_int(fio, ir->andersen_seed); + } + else + { + ir->andersen_seed = 0; + } + if (file_version < 26) + { + gmx_fio_do_gmx_bool(fio, bSimAnn); + gmx_fio_do_real(fio, zerotemptime); + } + + if (file_version < 37) + { + gmx_fio_do_real(fio, rdum); + } + + gmx_fio_do_real(fio, ir->shake_tol); + if (file_version < 54) + { + gmx_fio_do_real(fio, *fudgeQQ); + } + + gmx_fio_do_int(fio, ir->efep); + if (file_version <= 14 && ir->efep != efepNO) + { + ir->efep = efepYES; + } + do_fepvals(fio, ir->fepvals, bRead, file_version); + + if (file_version >= 79) + { + gmx_fio_do_gmx_bool(fio, ir->bSimTemp); + if (ir->bSimTemp) + { + ir->bSimTemp = TRUE; + } + } + else + { + ir->bSimTemp = FALSE; + } + if (ir->bSimTemp) + { + do_simtempvals(fio, ir->simtempvals, ir->fepvals->n_lambda, bRead, file_version); + } + + if (file_version >= 79) + { + gmx_fio_do_gmx_bool(fio, ir->bExpanded); + if (ir->bExpanded) + { + ir->bExpanded = TRUE; + } + else + { + ir->bExpanded = FALSE; + } + } + if (ir->bExpanded) + { + do_expandedvals(fio, ir->expandedvals, ir->fepvals, bRead, file_version); + } + if (file_version >= 57) + { + gmx_fio_do_int(fio, ir->eDisre); + } + gmx_fio_do_int(fio, ir->eDisreWeighting); + if (file_version < 22) + { + if (ir->eDisreWeighting == 0) + { + ir->eDisreWeighting = edrwEqual; + } + else + { + ir->eDisreWeighting = edrwConservative; + } + } + gmx_fio_do_gmx_bool(fio, ir->bDisreMixed); + gmx_fio_do_real(fio, ir->dr_fc); + gmx_fio_do_real(fio, ir->dr_tau); + gmx_fio_do_int(fio, ir->nstdisreout); + if (file_version >= 22) + { + gmx_fio_do_real(fio, ir->orires_fc); + gmx_fio_do_real(fio, ir->orires_tau); + gmx_fio_do_int(fio, ir->nstorireout); + } + else + { + ir->orires_fc = 0; + ir->orires_tau = 0; + ir->nstorireout = 0; + } + if (file_version >= 26 && file_version < 79) + { + gmx_fio_do_real(fio, ir->dihre_fc); + if (file_version < 56) + { + gmx_fio_do_real(fio, rdum); + gmx_fio_do_int(fio, idum); + } + } + else + { + ir->dihre_fc = 0; + } + + gmx_fio_do_real(fio, ir->em_stepsize); + gmx_fio_do_real(fio, ir->em_tol); + if (file_version >= 22) + { + gmx_fio_do_gmx_bool(fio, ir->bShakeSOR); + } + else if (bRead) + { + ir->bShakeSOR = TRUE; + } + if (file_version >= 11) + { + gmx_fio_do_int(fio, ir->niter); + } + else if (bRead) + { + ir->niter = 25; + fprintf(stderr, "Note: niter not in run input file, setting it to %d\n", + ir->niter); + } + if (file_version >= 21) + { + gmx_fio_do_real(fio, ir->fc_stepsize); + } + else + { + ir->fc_stepsize = 0; + } + gmx_fio_do_int(fio, ir->eConstrAlg); + gmx_fio_do_int(fio, ir->nProjOrder); + gmx_fio_do_real(fio, ir->LincsWarnAngle); + if (file_version <= 14) + { + gmx_fio_do_int(fio, idum); + } + if (file_version >= 26) + { + gmx_fio_do_int(fio, ir->nLincsIter); + } + else if (bRead) + { + ir->nLincsIter = 1; + fprintf(stderr, "Note: nLincsIter not in run input file, setting it to %d\n", + ir->nLincsIter); + } + if (file_version < 33) + { + gmx_fio_do_real(fio, bd_temp); + } + gmx_fio_do_real(fio, ir->bd_fric); + gmx_fio_do_int(fio, ir->ld_seed); + if (file_version >= 33) + { + for (i = 0; i < DIM; i++) + { + gmx_fio_do_rvec(fio, ir->deform[i]); + } + } + else + { + for (i = 0; i < DIM; i++) + { + clear_rvec(ir->deform[i]); + } + } + if (file_version >= 14) + { + gmx_fio_do_real(fio, ir->cos_accel); + } + else if (bRead) + { + ir->cos_accel = 0; + } + gmx_fio_do_int(fio, ir->userint1); + gmx_fio_do_int(fio, ir->userint2); + gmx_fio_do_int(fio, ir->userint3); + gmx_fio_do_int(fio, ir->userint4); + gmx_fio_do_real(fio, ir->userreal1); + gmx_fio_do_real(fio, ir->userreal2); + gmx_fio_do_real(fio, ir->userreal3); + gmx_fio_do_real(fio, ir->userreal4); + + /* AdResS stuff */ + if (file_version >= 77) + { + gmx_fio_do_gmx_bool(fio, ir->bAdress); + if (ir->bAdress) + { + if (bRead) + { + snew(ir->adress, 1); + } + gmx_fio_do_int(fio, ir->adress->type); + gmx_fio_do_real(fio, ir->adress->const_wf); + gmx_fio_do_real(fio, ir->adress->ex_width); + gmx_fio_do_real(fio, ir->adress->hy_width); + gmx_fio_do_int(fio, ir->adress->icor); + gmx_fio_do_int(fio, ir->adress->site); + gmx_fio_do_rvec(fio, ir->adress->refs); + gmx_fio_do_int(fio, ir->adress->n_tf_grps); + gmx_fio_do_real(fio, ir->adress->ex_forcecap); + gmx_fio_do_int(fio, ir->adress->n_energy_grps); + gmx_fio_do_int(fio, ir->adress->do_hybridpairs); + + if (bRead) + { + snew(ir->adress->tf_table_index, ir->adress->n_tf_grps); + } + if (ir->adress->n_tf_grps > 0) + { + bDum = gmx_fio_ndo_int(fio, ir->adress->tf_table_index, ir->adress->n_tf_grps); + } + if (bRead) + { + snew(ir->adress->group_explicit, ir->adress->n_energy_grps); + } + if (ir->adress->n_energy_grps > 0) + { + bDum = gmx_fio_ndo_int(fio, ir->adress->group_explicit, ir->adress->n_energy_grps); + } + } + } + else + { + ir->bAdress = FALSE; + } + + /* pull stuff */ + if (file_version >= 48) + { + gmx_fio_do_int(fio, ir->ePull); + if (ir->ePull != epullNO) + { + if (bRead) + { + snew(ir->pull, 1); + } + do_pull(fio, ir->pull, bRead, file_version); + } + } + else + { + ir->ePull = epullNO; + } + + /* Enforced rotation */ + if (file_version >= 74) + { + gmx_fio_do_int(fio, ir->bRot); + if (ir->bRot == TRUE) + { + if (bRead) + { + snew(ir->rot, 1); + } + do_rot(fio, ir->rot, bRead, file_version); + } + } + else + { + ir->bRot = FALSE; + } + + /* grpopts stuff */ + gmx_fio_do_int(fio, ir->opts.ngtc); + if (file_version >= 69) + { + gmx_fio_do_int(fio, ir->opts.nhchainlength); + } + else + { + ir->opts.nhchainlength = 1; + } + gmx_fio_do_int(fio, ir->opts.ngacc); + gmx_fio_do_int(fio, ir->opts.ngfrz); + gmx_fio_do_int(fio, ir->opts.ngener); + + if (bRead) + { + snew(ir->opts.nrdf, ir->opts.ngtc); + snew(ir->opts.ref_t, ir->opts.ngtc); + snew(ir->opts.annealing, ir->opts.ngtc); + snew(ir->opts.anneal_npoints, ir->opts.ngtc); + snew(ir->opts.anneal_time, ir->opts.ngtc); + snew(ir->opts.anneal_temp, ir->opts.ngtc); + snew(ir->opts.tau_t, ir->opts.ngtc); + snew(ir->opts.nFreeze, ir->opts.ngfrz); + snew(ir->opts.acc, ir->opts.ngacc); + snew(ir->opts.egp_flags, ir->opts.ngener*ir->opts.ngener); + } + if (ir->opts.ngtc > 0) + { + if (bRead && file_version < 13) + { + snew(tmp, ir->opts.ngtc); + bDum = gmx_fio_ndo_int(fio, tmp, ir->opts.ngtc); + for (i = 0; i < ir->opts.ngtc; i++) + { + ir->opts.nrdf[i] = tmp[i]; + } + sfree(tmp); + } + else + { + bDum = gmx_fio_ndo_real(fio, ir->opts.nrdf, ir->opts.ngtc); + } + bDum = gmx_fio_ndo_real(fio, ir->opts.ref_t, ir->opts.ngtc); + bDum = gmx_fio_ndo_real(fio, ir->opts.tau_t, ir->opts.ngtc); + if (file_version < 33 && ir->eI == eiBD) + { + for (i = 0; i < ir->opts.ngtc; i++) + { + ir->opts.tau_t[i] = bd_temp; + } + } + } + if (ir->opts.ngfrz > 0) + { + bDum = gmx_fio_ndo_ivec(fio, ir->opts.nFreeze, ir->opts.ngfrz); + } + if (ir->opts.ngacc > 0) + { + gmx_fio_ndo_rvec(fio, ir->opts.acc, ir->opts.ngacc); + } + if (file_version >= 12) + { + bDum = gmx_fio_ndo_int(fio, ir->opts.egp_flags, + ir->opts.ngener*ir->opts.ngener); + } + + if (bRead && file_version < 26) + { + for (i = 0; i < ir->opts.ngtc; i++) + { + if (bSimAnn) + { + ir->opts.annealing[i] = eannSINGLE; + ir->opts.anneal_npoints[i] = 2; + snew(ir->opts.anneal_time[i], 2); + snew(ir->opts.anneal_temp[i], 2); + /* calculate the starting/ending temperatures from reft, zerotemptime, and nsteps */ + finish_t = ir->init_t + ir->nsteps * ir->delta_t; + init_temp = ir->opts.ref_t[i]*(1-ir->init_t/zerotemptime); + finish_temp = ir->opts.ref_t[i]*(1-finish_t/zerotemptime); + ir->opts.anneal_time[i][0] = ir->init_t; + ir->opts.anneal_time[i][1] = finish_t; + ir->opts.anneal_temp[i][0] = init_temp; + ir->opts.anneal_temp[i][1] = finish_temp; + } + else + { + ir->opts.annealing[i] = eannNO; + ir->opts.anneal_npoints[i] = 0; + } + } + } + else + { + /* file version 26 or later */ + /* First read the lists with annealing and npoints for each group */ + bDum = gmx_fio_ndo_int(fio, ir->opts.annealing, ir->opts.ngtc); + bDum = gmx_fio_ndo_int(fio, ir->opts.anneal_npoints, ir->opts.ngtc); + for (j = 0; j < (ir->opts.ngtc); j++) + { + k = ir->opts.anneal_npoints[j]; + if (bRead) + { + snew(ir->opts.anneal_time[j], k); + snew(ir->opts.anneal_temp[j], k); + } + bDum = gmx_fio_ndo_real(fio, ir->opts.anneal_time[j], k); + bDum = gmx_fio_ndo_real(fio, ir->opts.anneal_temp[j], k); + } + } + /* Walls */ + if (file_version >= 45) + { + gmx_fio_do_int(fio, ir->nwall); + gmx_fio_do_int(fio, ir->wall_type); + if (file_version >= 50) + { + gmx_fio_do_real(fio, ir->wall_r_linpot); + } + else + { + ir->wall_r_linpot = -1; + } + gmx_fio_do_int(fio, ir->wall_atomtype[0]); + gmx_fio_do_int(fio, ir->wall_atomtype[1]); + gmx_fio_do_real(fio, ir->wall_density[0]); + gmx_fio_do_real(fio, ir->wall_density[1]); + gmx_fio_do_real(fio, ir->wall_ewald_zfac); + } + else + { + ir->nwall = 0; + ir->wall_type = 0; + ir->wall_atomtype[0] = -1; + ir->wall_atomtype[1] = -1; + ir->wall_density[0] = 0; + ir->wall_density[1] = 0; + ir->wall_ewald_zfac = 3; + } + /* Cosine stuff for electric fields */ + for (j = 0; (j < DIM); j++) + { + gmx_fio_do_int(fio, ir->ex[j].n); + gmx_fio_do_int(fio, ir->et[j].n); + if (bRead) + { + snew(ir->ex[j].a, ir->ex[j].n); + snew(ir->ex[j].phi, ir->ex[j].n); + snew(ir->et[j].a, ir->et[j].n); + snew(ir->et[j].phi, ir->et[j].n); + } + bDum = gmx_fio_ndo_real(fio, ir->ex[j].a, ir->ex[j].n); + bDum = gmx_fio_ndo_real(fio, ir->ex[j].phi, ir->ex[j].n); + bDum = gmx_fio_ndo_real(fio, ir->et[j].a, ir->et[j].n); + bDum = gmx_fio_ndo_real(fio, ir->et[j].phi, ir->et[j].n); + } + + /* QMMM stuff */ + if (file_version >= 39) + { + gmx_fio_do_gmx_bool(fio, ir->bQMMM); + gmx_fio_do_int(fio, ir->QMMMscheme); + gmx_fio_do_real(fio, ir->scalefactor); + gmx_fio_do_int(fio, ir->opts.ngQM); + if (bRead) + { + snew(ir->opts.QMmethod, ir->opts.ngQM); + snew(ir->opts.QMbasis, ir->opts.ngQM); + snew(ir->opts.QMcharge, ir->opts.ngQM); + snew(ir->opts.QMmult, ir->opts.ngQM); + snew(ir->opts.bSH, ir->opts.ngQM); + snew(ir->opts.CASorbitals, ir->opts.ngQM); + snew(ir->opts.CASelectrons, ir->opts.ngQM); + snew(ir->opts.SAon, ir->opts.ngQM); + snew(ir->opts.SAoff, ir->opts.ngQM); + snew(ir->opts.SAsteps, ir->opts.ngQM); + snew(ir->opts.bOPT, ir->opts.ngQM); + snew(ir->opts.bTS, ir->opts.ngQM); + } + if (ir->opts.ngQM > 0) + { + bDum = gmx_fio_ndo_int(fio, ir->opts.QMmethod, ir->opts.ngQM); + bDum = gmx_fio_ndo_int(fio, ir->opts.QMbasis, ir->opts.ngQM); + bDum = gmx_fio_ndo_int(fio, ir->opts.QMcharge, ir->opts.ngQM); + bDum = gmx_fio_ndo_int(fio, ir->opts.QMmult, ir->opts.ngQM); + bDum = gmx_fio_ndo_gmx_bool(fio, ir->opts.bSH, ir->opts.ngQM); + bDum = gmx_fio_ndo_int(fio, ir->opts.CASorbitals, ir->opts.ngQM); + bDum = gmx_fio_ndo_int(fio, ir->opts.CASelectrons, ir->opts.ngQM); + bDum = gmx_fio_ndo_real(fio, ir->opts.SAon, ir->opts.ngQM); + bDum = gmx_fio_ndo_real(fio, ir->opts.SAoff, ir->opts.ngQM); + bDum = gmx_fio_ndo_int(fio, ir->opts.SAsteps, ir->opts.ngQM); + bDum = gmx_fio_ndo_gmx_bool(fio, ir->opts.bOPT, ir->opts.ngQM); + bDum = gmx_fio_ndo_gmx_bool(fio, ir->opts.bTS, ir->opts.ngQM); + } + /* end of QMMM stuff */ + } +} + + +static void do_harm(t_fileio *fio, t_iparams *iparams, gmx_bool bRead) +{ + gmx_fio_do_real(fio, iparams->harmonic.rA); + gmx_fio_do_real(fio, iparams->harmonic.krA); + gmx_fio_do_real(fio, iparams->harmonic.rB); + gmx_fio_do_real(fio, iparams->harmonic.krB); +} + +void do_iparams(t_fileio *fio, t_functype ftype, t_iparams *iparams, + gmx_bool bRead, int file_version) +{ + int idum; + gmx_bool bDum; + real rdum; + + if (!bRead) + { + gmx_fio_set_comment(fio, interaction_function[ftype].name); + } + switch (ftype) + { + case F_ANGLES: + case F_G96ANGLES: + case F_BONDS: + case F_G96BONDS: + case F_HARMONIC: + case F_IDIHS: + do_harm(fio, iparams, bRead); + if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && bRead) + { + /* Correct incorrect storage of parameters */ + iparams->pdihs.phiB = iparams->pdihs.phiA; + iparams->pdihs.cpB = iparams->pdihs.cpA; + } + break; + case F_LINEAR_ANGLES: + gmx_fio_do_real(fio, iparams->linangle.klinA); + gmx_fio_do_real(fio, iparams->linangle.aA); + gmx_fio_do_real(fio, iparams->linangle.klinB); + gmx_fio_do_real(fio, iparams->linangle.aB); + break; + case F_FENEBONDS: + gmx_fio_do_real(fio, iparams->fene.bm); + gmx_fio_do_real(fio, iparams->fene.kb); + break; + case F_RESTRBONDS: + gmx_fio_do_real(fio, iparams->restraint.lowA); + gmx_fio_do_real(fio, iparams->restraint.up1A); + gmx_fio_do_real(fio, iparams->restraint.up2A); + gmx_fio_do_real(fio, iparams->restraint.kA); + gmx_fio_do_real(fio, iparams->restraint.lowB); + gmx_fio_do_real(fio, iparams->restraint.up1B); + gmx_fio_do_real(fio, iparams->restraint.up2B); + gmx_fio_do_real(fio, iparams->restraint.kB); + break; + case F_TABBONDS: + case F_TABBONDSNC: + case F_TABANGLES: + case F_TABDIHS: + gmx_fio_do_real(fio, iparams->tab.kA); + gmx_fio_do_int(fio, iparams->tab.table); + gmx_fio_do_real(fio, iparams->tab.kB); + break; + case F_CROSS_BOND_BONDS: + gmx_fio_do_real(fio, iparams->cross_bb.r1e); + gmx_fio_do_real(fio, iparams->cross_bb.r2e); + gmx_fio_do_real(fio, iparams->cross_bb.krr); + break; + case F_CROSS_BOND_ANGLES: + gmx_fio_do_real(fio, iparams->cross_ba.r1e); + gmx_fio_do_real(fio, iparams->cross_ba.r2e); + gmx_fio_do_real(fio, iparams->cross_ba.r3e); + gmx_fio_do_real(fio, iparams->cross_ba.krt); + break; + case F_UREY_BRADLEY: + gmx_fio_do_real(fio, iparams->u_b.thetaA); + gmx_fio_do_real(fio, iparams->u_b.kthetaA); + gmx_fio_do_real(fio, iparams->u_b.r13A); + gmx_fio_do_real(fio, iparams->u_b.kUBA); + if (file_version >= 79) + { + gmx_fio_do_real(fio, iparams->u_b.thetaB); + gmx_fio_do_real(fio, iparams->u_b.kthetaB); + gmx_fio_do_real(fio, iparams->u_b.r13B); + gmx_fio_do_real(fio, iparams->u_b.kUBB); + } + else + { + iparams->u_b.thetaB = iparams->u_b.thetaA; + iparams->u_b.kthetaB = iparams->u_b.kthetaA; + iparams->u_b.r13B = iparams->u_b.r13A; + iparams->u_b.kUBB = iparams->u_b.kUBA; + } + break; + case F_QUARTIC_ANGLES: + gmx_fio_do_real(fio, iparams->qangle.theta); + bDum = gmx_fio_ndo_real(fio, iparams->qangle.c, 5); + break; + case F_BHAM: + gmx_fio_do_real(fio, iparams->bham.a); + gmx_fio_do_real(fio, iparams->bham.b); + gmx_fio_do_real(fio, iparams->bham.c); + break; + case F_MORSE: + gmx_fio_do_real(fio, iparams->morse.b0A); + gmx_fio_do_real(fio, iparams->morse.cbA); + gmx_fio_do_real(fio, iparams->morse.betaA); + if (file_version >= 79) + { + gmx_fio_do_real(fio, iparams->morse.b0B); + gmx_fio_do_real(fio, iparams->morse.cbB); + gmx_fio_do_real(fio, iparams->morse.betaB); + } + else + { + iparams->morse.b0B = iparams->morse.b0A; + iparams->morse.cbB = iparams->morse.cbA; + iparams->morse.betaB = iparams->morse.betaA; + } + break; + case F_CUBICBONDS: + gmx_fio_do_real(fio, iparams->cubic.b0); + gmx_fio_do_real(fio, iparams->cubic.kb); + gmx_fio_do_real(fio, iparams->cubic.kcub); + break; + case F_CONNBONDS: + break; + case F_POLARIZATION: + gmx_fio_do_real(fio, iparams->polarize.alpha); + break; + case F_ANHARM_POL: + gmx_fio_do_real(fio, iparams->anharm_polarize.alpha); + gmx_fio_do_real(fio, iparams->anharm_polarize.drcut); + gmx_fio_do_real(fio, iparams->anharm_polarize.khyp); + break; + case F_WATER_POL: + if (file_version < 31) + { + gmx_fatal(FARGS, "Old tpr files with water_polarization not supported. Make a new."); + } + gmx_fio_do_real(fio, iparams->wpol.al_x); + gmx_fio_do_real(fio, iparams->wpol.al_y); + gmx_fio_do_real(fio, iparams->wpol.al_z); + gmx_fio_do_real(fio, iparams->wpol.rOH); + gmx_fio_do_real(fio, iparams->wpol.rHH); + gmx_fio_do_real(fio, iparams->wpol.rOD); + break; + case F_THOLE_POL: + gmx_fio_do_real(fio, iparams->thole.a); + gmx_fio_do_real(fio, iparams->thole.alpha1); + gmx_fio_do_real(fio, iparams->thole.alpha2); + gmx_fio_do_real(fio, iparams->thole.rfac); + break; + case F_LJ: + gmx_fio_do_real(fio, iparams->lj.c6); + gmx_fio_do_real(fio, iparams->lj.c12); + break; + case F_LJ14: + gmx_fio_do_real(fio, iparams->lj14.c6A); + gmx_fio_do_real(fio, iparams->lj14.c12A); + gmx_fio_do_real(fio, iparams->lj14.c6B); + gmx_fio_do_real(fio, iparams->lj14.c12B); + break; + case F_LJC14_Q: + gmx_fio_do_real(fio, iparams->ljc14.fqq); + gmx_fio_do_real(fio, iparams->ljc14.qi); + gmx_fio_do_real(fio, iparams->ljc14.qj); + gmx_fio_do_real(fio, iparams->ljc14.c6); + gmx_fio_do_real(fio, iparams->ljc14.c12); + break; + case F_LJC_PAIRS_NB: + gmx_fio_do_real(fio, iparams->ljcnb.qi); + gmx_fio_do_real(fio, iparams->ljcnb.qj); + gmx_fio_do_real(fio, iparams->ljcnb.c6); + gmx_fio_do_real(fio, iparams->ljcnb.c12); + break; + case F_PDIHS: + case F_PIDIHS: + case F_ANGRES: + case F_ANGRESZ: + gmx_fio_do_real(fio, iparams->pdihs.phiA); + gmx_fio_do_real(fio, iparams->pdihs.cpA); + if ((ftype == F_ANGRES || ftype == F_ANGRESZ) && file_version < 42) + { + /* Read the incorrectly stored multiplicity */ + gmx_fio_do_real(fio, iparams->harmonic.rB); + gmx_fio_do_real(fio, iparams->harmonic.krB); + iparams->pdihs.phiB = iparams->pdihs.phiA; + iparams->pdihs.cpB = iparams->pdihs.cpA; + } + else + { + gmx_fio_do_real(fio, iparams->pdihs.phiB); + gmx_fio_do_real(fio, iparams->pdihs.cpB); + gmx_fio_do_int(fio, iparams->pdihs.mult); + } + break; + case F_DISRES: + gmx_fio_do_int(fio, iparams->disres.label); + gmx_fio_do_int(fio, iparams->disres.type); + gmx_fio_do_real(fio, iparams->disres.low); + gmx_fio_do_real(fio, iparams->disres.up1); + gmx_fio_do_real(fio, iparams->disres.up2); + gmx_fio_do_real(fio, iparams->disres.kfac); + break; + case F_ORIRES: + gmx_fio_do_int(fio, iparams->orires.ex); + gmx_fio_do_int(fio, iparams->orires.label); + gmx_fio_do_int(fio, iparams->orires.power); + gmx_fio_do_real(fio, iparams->orires.c); + gmx_fio_do_real(fio, iparams->orires.obs); + gmx_fio_do_real(fio, iparams->orires.kfac); + break; + case F_DIHRES: - if (file_version < 72) ++ if (file_version < 82) + { + gmx_fio_do_int(fio, idum); + gmx_fio_do_int(fio, idum); + } + gmx_fio_do_real(fio, iparams->dihres.phiA); + gmx_fio_do_real(fio, iparams->dihres.dphiA); + gmx_fio_do_real(fio, iparams->dihres.kfacA); - if (file_version >= 72) ++ if (file_version >= 82) + { + gmx_fio_do_real(fio, iparams->dihres.phiB); + gmx_fio_do_real(fio, iparams->dihres.dphiB); + gmx_fio_do_real(fio, iparams->dihres.kfacB); + } + else + { + iparams->dihres.phiB = iparams->dihres.phiA; + iparams->dihres.dphiB = iparams->dihres.dphiA; + iparams->dihres.kfacB = iparams->dihres.kfacA; + } + break; + case F_POSRES: + gmx_fio_do_rvec(fio, iparams->posres.pos0A); + gmx_fio_do_rvec(fio, iparams->posres.fcA); + if (bRead && file_version < 27) + { + copy_rvec(iparams->posres.pos0A, iparams->posres.pos0B); + copy_rvec(iparams->posres.fcA, iparams->posres.fcB); + } + else + { + gmx_fio_do_rvec(fio, iparams->posres.pos0B); + gmx_fio_do_rvec(fio, iparams->posres.fcB); + } + break; + case F_FBPOSRES: + gmx_fio_do_int(fio, iparams->fbposres.geom); + gmx_fio_do_rvec(fio, iparams->fbposres.pos0); + gmx_fio_do_real(fio, iparams->fbposres.r); + gmx_fio_do_real(fio, iparams->fbposres.k); + break; + case F_RBDIHS: + bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcA, NR_RBDIHS); + if (file_version >= 25) + { + bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcB, NR_RBDIHS); + } + break; + case F_FOURDIHS: + /* Fourier dihedrals are internally represented + * as Ryckaert-Bellemans since those are faster to compute. + */ + bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcA, NR_RBDIHS); + bDum = gmx_fio_ndo_real(fio, iparams->rbdihs.rbcB, NR_RBDIHS); + break; + case F_CONSTR: + case F_CONSTRNC: + gmx_fio_do_real(fio, iparams->constr.dA); + gmx_fio_do_real(fio, iparams->constr.dB); + break; + case F_SETTLE: + gmx_fio_do_real(fio, iparams->settle.doh); + gmx_fio_do_real(fio, iparams->settle.dhh); + break; + case F_VSITE2: + gmx_fio_do_real(fio, iparams->vsite.a); + break; + case F_VSITE3: + case F_VSITE3FD: + case F_VSITE3FAD: + gmx_fio_do_real(fio, iparams->vsite.a); + gmx_fio_do_real(fio, iparams->vsite.b); + break; + case F_VSITE3OUT: + case F_VSITE4FD: + case F_VSITE4FDN: + gmx_fio_do_real(fio, iparams->vsite.a); + gmx_fio_do_real(fio, iparams->vsite.b); + gmx_fio_do_real(fio, iparams->vsite.c); + break; + case F_VSITEN: + gmx_fio_do_int(fio, iparams->vsiten.n); + gmx_fio_do_real(fio, iparams->vsiten.a); + break; + case F_GB12: + case F_GB13: + case F_GB14: + /* We got rid of some parameters in version 68 */ + if (bRead && file_version < 68) + { + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + gmx_fio_do_real(fio, rdum); + } + gmx_fio_do_real(fio, iparams->gb.sar); + gmx_fio_do_real(fio, iparams->gb.st); + gmx_fio_do_real(fio, iparams->gb.pi); + gmx_fio_do_real(fio, iparams->gb.gbr); + gmx_fio_do_real(fio, iparams->gb.bmlt); + break; + case F_CMAP: + gmx_fio_do_int(fio, iparams->cmap.cmapA); + gmx_fio_do_int(fio, iparams->cmap.cmapB); + break; + default: + gmx_fatal(FARGS, "unknown function type %d (%s) in %s line %d", + ftype, interaction_function[ftype].name, __FILE__, __LINE__); + } + if (!bRead) + { + gmx_fio_unset_comment(fio); + } +} + +static void do_ilist(t_fileio *fio, t_ilist *ilist, gmx_bool bRead, int file_version, + int ftype) +{ + int i, k, idum; + gmx_bool bDum = TRUE; + + if (!bRead) + { + gmx_fio_set_comment(fio, interaction_function[ftype].name); + } + if (file_version < 44) + { + for (i = 0; i < MAXNODES; i++) + { + gmx_fio_do_int(fio, idum); + } + } + gmx_fio_do_int(fio, ilist->nr); + if (bRead) + { + snew(ilist->iatoms, ilist->nr); + } + bDum = gmx_fio_ndo_int(fio, ilist->iatoms, ilist->nr); + if (!bRead) + { + gmx_fio_unset_comment(fio); + } +} + +static void do_ffparams(t_fileio *fio, gmx_ffparams_t *ffparams, + gmx_bool bRead, int file_version) +{ + int idum, i, j; + gmx_bool bDum = TRUE; + unsigned int k; + + gmx_fio_do_int(fio, ffparams->atnr); + if (file_version < 57) + { + gmx_fio_do_int(fio, idum); + } + gmx_fio_do_int(fio, ffparams->ntypes); + if (bRead && debug) + { + fprintf(debug, "ffparams->atnr = %d, ntypes = %d\n", + ffparams->atnr, ffparams->ntypes); + } + if (bRead) + { + snew(ffparams->functype, ffparams->ntypes); + snew(ffparams->iparams, ffparams->ntypes); + } + /* Read/write all the function types */ + bDum = gmx_fio_ndo_int(fio, ffparams->functype, ffparams->ntypes); + if (bRead && debug) + { + pr_ivec(debug, 0, "functype", ffparams->functype, ffparams->ntypes, TRUE); + } + + if (file_version >= 66) + { + gmx_fio_do_double(fio, ffparams->reppow); + } + else + { + ffparams->reppow = 12.0; + } + + if (file_version >= 57) + { + gmx_fio_do_real(fio, ffparams->fudgeQQ); + } + + /* Check whether all these function types are supported by the code. + * In practice the code is backwards compatible, which means that the + * numbering may have to be altered from old numbering to new numbering + */ + for (i = 0; (i < ffparams->ntypes); i++) + { + if (bRead) + { + /* Loop over file versions */ + for (k = 0; (k < NFTUPD); k++) + { + /* Compare the read file_version to the update table */ + if ((file_version < ftupd[k].fvnr) && + (ffparams->functype[i] >= ftupd[k].ftype)) + { + ffparams->functype[i] += 1; + if (debug) + { + fprintf(debug, "Incrementing function type %d to %d (due to %s)\n", + i, ffparams->functype[i], + interaction_function[ftupd[k].ftype].longname); + fflush(debug); + } + } + } + } + + do_iparams(fio, ffparams->functype[i], &ffparams->iparams[i], bRead, + file_version); + if (bRead && debug) + { + pr_iparams(debug, ffparams->functype[i], &ffparams->iparams[i]); + } + } +} + +static void add_settle_atoms(t_ilist *ilist) +{ + int i; + + /* Settle used to only store the first atom: add the other two */ + srenew(ilist->iatoms, 2*ilist->nr); + for (i = ilist->nr/2-1; i >= 0; i--) + { + ilist->iatoms[4*i+0] = ilist->iatoms[2*i+0]; + ilist->iatoms[4*i+1] = ilist->iatoms[2*i+1]; + ilist->iatoms[4*i+2] = ilist->iatoms[2*i+1] + 1; + ilist->iatoms[4*i+3] = ilist->iatoms[2*i+1] + 2; + } + ilist->nr = 2*ilist->nr; +} + +static void do_ilists(t_fileio *fio, t_ilist *ilist, gmx_bool bRead, + int file_version) +{ + int i, j, renum[F_NRE]; + gmx_bool bDum = TRUE, bClear; + unsigned int k; + + for (j = 0; (j < F_NRE); j++) + { + bClear = FALSE; + if (bRead) + { + for (k = 0; k < NFTUPD; k++) + { + if ((file_version < ftupd[k].fvnr) && (j == ftupd[k].ftype)) + { + bClear = TRUE; + } + } + } + if (bClear) + { + ilist[j].nr = 0; + ilist[j].iatoms = NULL; + } + else + { + do_ilist(fio, &ilist[j], bRead, file_version, j); + if (file_version < 78 && j == F_SETTLE && ilist[j].nr > 0) + { + add_settle_atoms(&ilist[j]); + } + } + /* + if (bRead && gmx_debug_at) + pr_ilist(debug,0,interaction_function[j].longname, + functype,&ilist[j],TRUE); + */ + } +} + +static void do_idef(t_fileio *fio, gmx_ffparams_t *ffparams, gmx_moltype_t *molt, + gmx_bool bRead, int file_version) +{ + do_ffparams(fio, ffparams, bRead, file_version); + + if (file_version >= 54) + { + gmx_fio_do_real(fio, ffparams->fudgeQQ); + } + + do_ilists(fio, molt->ilist, bRead, file_version); +} + +static void do_block(t_fileio *fio, t_block *block, gmx_bool bRead, int file_version) +{ + int i, idum, dum_nra, *dum_a; + gmx_bool bDum = TRUE; + + if (file_version < 44) + { + for (i = 0; i < MAXNODES; i++) + { + gmx_fio_do_int(fio, idum); + } + } + gmx_fio_do_int(fio, block->nr); + if (file_version < 51) + { + gmx_fio_do_int(fio, dum_nra); + } + if (bRead) + { + if ((block->nalloc_index > 0) && (NULL != block->index)) + { + sfree(block->index); + } + block->nalloc_index = block->nr+1; + snew(block->index, block->nalloc_index); + } + bDum = gmx_fio_ndo_int(fio, block->index, block->nr+1); + + if (file_version < 51 && dum_nra > 0) + { + snew(dum_a, dum_nra); + bDum = gmx_fio_ndo_int(fio, dum_a, dum_nra); + sfree(dum_a); + } +} + +static void do_blocka(t_fileio *fio, t_blocka *block, gmx_bool bRead, + int file_version) +{ + int i, idum; + gmx_bool bDum = TRUE; + + if (file_version < 44) + { + for (i = 0; i < MAXNODES; i++) + { + gmx_fio_do_int(fio, idum); + } + } + gmx_fio_do_int(fio, block->nr); + gmx_fio_do_int(fio, block->nra); + if (bRead) + { + block->nalloc_index = block->nr+1; + snew(block->index, block->nalloc_index); + block->nalloc_a = block->nra; + snew(block->a, block->nalloc_a); + } + bDum = gmx_fio_ndo_int(fio, block->index, block->nr+1); + bDum = gmx_fio_ndo_int(fio, block->a, block->nra); +} + +static void do_atom(t_fileio *fio, t_atom *atom, int ngrp, gmx_bool bRead, + int file_version, gmx_groups_t *groups, int atnr) +{ + int i, myngrp; + + gmx_fio_do_real(fio, atom->m); + gmx_fio_do_real(fio, atom->q); + gmx_fio_do_real(fio, atom->mB); + gmx_fio_do_real(fio, atom->qB); + gmx_fio_do_ushort(fio, atom->type); + gmx_fio_do_ushort(fio, atom->typeB); + gmx_fio_do_int(fio, atom->ptype); + gmx_fio_do_int(fio, atom->resind); + if (file_version >= 52) + { + gmx_fio_do_int(fio, atom->atomnumber); + } + else if (bRead) + { + atom->atomnumber = NOTSET; + } + if (file_version < 23) + { + myngrp = 8; + } + else if (file_version < 39) + { + myngrp = 9; + } + else + { + myngrp = ngrp; + } + + if (file_version < 57) + { + unsigned char uchar[egcNR]; + gmx_fio_ndo_uchar(fio, uchar, myngrp); + for (i = myngrp; (i < ngrp); i++) + { + uchar[i] = 0; + } + /* Copy the old data format to the groups struct */ + for (i = 0; i < ngrp; i++) + { + groups->grpnr[i][atnr] = uchar[i]; + } + } +} + +static void do_grps(t_fileio *fio, int ngrp, t_grps grps[], gmx_bool bRead, + int file_version) +{ + int i, j, myngrp; + gmx_bool bDum = TRUE; + + if (file_version < 23) + { + myngrp = 8; + } + else if (file_version < 39) + { + myngrp = 9; + } + else + { + myngrp = ngrp; + } + + for (j = 0; (j < ngrp); j++) + { + if (j < myngrp) + { + gmx_fio_do_int(fio, grps[j].nr); + if (bRead) + { + snew(grps[j].nm_ind, grps[j].nr); + } + bDum = gmx_fio_ndo_int(fio, grps[j].nm_ind, grps[j].nr); + } + else + { + grps[j].nr = 1; + snew(grps[j].nm_ind, grps[j].nr); + } + } +} + +static void do_symstr(t_fileio *fio, char ***nm, gmx_bool bRead, t_symtab *symtab) +{ + int ls; + + if (bRead) + { + gmx_fio_do_int(fio, ls); + *nm = get_symtab_handle(symtab, ls); + } + else + { + ls = lookup_symtab(symtab, *nm); + gmx_fio_do_int(fio, ls); + } +} + +static void do_strstr(t_fileio *fio, int nstr, char ***nm, gmx_bool bRead, + t_symtab *symtab) +{ + int j; + + for (j = 0; (j < nstr); j++) + { + do_symstr(fio, &(nm[j]), bRead, symtab); + } +} + +static void do_resinfo(t_fileio *fio, int n, t_resinfo *ri, gmx_bool bRead, + t_symtab *symtab, int file_version) +{ + int j; + + for (j = 0; (j < n); j++) + { + do_symstr(fio, &(ri[j].name), bRead, symtab); + if (file_version >= 63) + { + gmx_fio_do_int(fio, ri[j].nr); + gmx_fio_do_uchar(fio, ri[j].ic); + } + else + { + ri[j].nr = j + 1; + ri[j].ic = ' '; + } + } +} + +static void do_atoms(t_fileio *fio, t_atoms *atoms, gmx_bool bRead, t_symtab *symtab, + int file_version, + gmx_groups_t *groups) +{ + int i; + + gmx_fio_do_int(fio, atoms->nr); + gmx_fio_do_int(fio, atoms->nres); + if (file_version < 57) + { + gmx_fio_do_int(fio, groups->ngrpname); + for (i = 0; i < egcNR; i++) + { + groups->ngrpnr[i] = atoms->nr; + snew(groups->grpnr[i], groups->ngrpnr[i]); + } + } + if (bRead) + { + snew(atoms->atom, atoms->nr); + snew(atoms->atomname, atoms->nr); + snew(atoms->atomtype, atoms->nr); + snew(atoms->atomtypeB, atoms->nr); + snew(atoms->resinfo, atoms->nres); + if (file_version < 57) + { + snew(groups->grpname, groups->ngrpname); + } + atoms->pdbinfo = NULL; + } + for (i = 0; (i < atoms->nr); i++) + { + do_atom(fio, &atoms->atom[i], egcNR, bRead, file_version, groups, i); + } + do_strstr(fio, atoms->nr, atoms->atomname, bRead, symtab); + if (bRead && (file_version <= 20)) + { + for (i = 0; i < atoms->nr; i++) + { + atoms->atomtype[i] = put_symtab(symtab, "?"); + atoms->atomtypeB[i] = put_symtab(symtab, "?"); + } + } + else + { + do_strstr(fio, atoms->nr, atoms->atomtype, bRead, symtab); + do_strstr(fio, atoms->nr, atoms->atomtypeB, bRead, symtab); + } + do_resinfo(fio, atoms->nres, atoms->resinfo, bRead, symtab, file_version); + + if (file_version < 57) + { + do_strstr(fio, groups->ngrpname, groups->grpname, bRead, symtab); + + do_grps(fio, egcNR, groups->grps, bRead, file_version); + } +} + +static void do_groups(t_fileio *fio, gmx_groups_t *groups, + gmx_bool bRead, t_symtab *symtab, + int file_version) +{ + int g, n, i; + gmx_bool bDum = TRUE; + + do_grps(fio, egcNR, groups->grps, bRead, file_version); + gmx_fio_do_int(fio, groups->ngrpname); + if (bRead) + { + snew(groups->grpname, groups->ngrpname); + } + do_strstr(fio, groups->ngrpname, groups->grpname, bRead, symtab); + for (g = 0; g < egcNR; g++) + { + gmx_fio_do_int(fio, groups->ngrpnr[g]); + if (groups->ngrpnr[g] == 0) + { + if (bRead) + { + groups->grpnr[g] = NULL; + } + } + else + { + if (bRead) + { + snew(groups->grpnr[g], groups->ngrpnr[g]); + } + bDum = gmx_fio_ndo_uchar(fio, groups->grpnr[g], groups->ngrpnr[g]); + } + } +} + +static void do_atomtypes(t_fileio *fio, t_atomtypes *atomtypes, gmx_bool bRead, + t_symtab *symtab, int file_version) +{ + int i, j; + gmx_bool bDum = TRUE; + + if (file_version > 25) + { + gmx_fio_do_int(fio, atomtypes->nr); + j = atomtypes->nr; + if (bRead) + { + snew(atomtypes->radius, j); + snew(atomtypes->vol, j); + snew(atomtypes->surftens, j); + snew(atomtypes->atomnumber, j); + snew(atomtypes->gb_radius, j); + snew(atomtypes->S_hct, j); + } + bDum = gmx_fio_ndo_real(fio, atomtypes->radius, j); + bDum = gmx_fio_ndo_real(fio, atomtypes->vol, j); + bDum = gmx_fio_ndo_real(fio, atomtypes->surftens, j); + if (file_version >= 40) + { + bDum = gmx_fio_ndo_int(fio, atomtypes->atomnumber, j); + } + if (file_version >= 60) + { + bDum = gmx_fio_ndo_real(fio, atomtypes->gb_radius, j); + bDum = gmx_fio_ndo_real(fio, atomtypes->S_hct, j); + } + } + else + { + /* File versions prior to 26 cannot do GBSA, + * so they dont use this structure + */ + atomtypes->nr = 0; + atomtypes->radius = NULL; + atomtypes->vol = NULL; + atomtypes->surftens = NULL; + atomtypes->atomnumber = NULL; + atomtypes->gb_radius = NULL; + atomtypes->S_hct = NULL; + } +} + +static void do_symtab(t_fileio *fio, t_symtab *symtab, gmx_bool bRead) +{ + int i, nr; + t_symbuf *symbuf; + char buf[STRLEN]; + + gmx_fio_do_int(fio, symtab->nr); + nr = symtab->nr; + if (bRead) + { + snew(symtab->symbuf, 1); + symbuf = symtab->symbuf; + symbuf->bufsize = nr; + snew(symbuf->buf, nr); + for (i = 0; (i < nr); i++) + { + gmx_fio_do_string(fio, buf); + symbuf->buf[i] = strdup(buf); + } + } + else + { + symbuf = symtab->symbuf; + while (symbuf != NULL) + { + for (i = 0; (i < symbuf->bufsize) && (i < nr); i++) + { + gmx_fio_do_string(fio, symbuf->buf[i]); + } + nr -= i; + symbuf = symbuf->next; + } + if (nr != 0) + { + gmx_fatal(FARGS, "nr of symtab strings left: %d", nr); + } + } +} + +static void do_cmap(t_fileio *fio, gmx_cmap_t *cmap_grid, gmx_bool bRead) +{ + int i, j, ngrid, gs, nelem; + + gmx_fio_do_int(fio, cmap_grid->ngrid); + gmx_fio_do_int(fio, cmap_grid->grid_spacing); + + ngrid = cmap_grid->ngrid; + gs = cmap_grid->grid_spacing; + nelem = gs * gs; + + if (bRead) + { + snew(cmap_grid->cmapdata, ngrid); + + for (i = 0; i < cmap_grid->ngrid; i++) + { + snew(cmap_grid->cmapdata[i].cmap, 4*nelem); + } + } + + for (i = 0; i < cmap_grid->ngrid; i++) + { + for (j = 0; j < nelem; j++) + { + gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4]); + gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4+1]); + gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4+2]); + gmx_fio_do_real(fio, cmap_grid->cmapdata[i].cmap[j*4+3]); + } + } +} + + +void tpx_make_chain_identifiers(t_atoms *atoms, t_block *mols) +{ + int m, a, a0, a1, r; + char c, chainid; + int chainnum; + + /* We always assign a new chain number, but save the chain id characters + * for larger molecules. + */ +#define CHAIN_MIN_ATOMS 15 + + chainnum = 0; + chainid = 'A'; + for (m = 0; m < mols->nr; m++) + { + a0 = mols->index[m]; + a1 = mols->index[m+1]; + if ((a1-a0 >= CHAIN_MIN_ATOMS) && (chainid <= 'Z')) + { + c = chainid; + chainid++; + } + else + { + c = ' '; + } + for (a = a0; a < a1; a++) + { + atoms->resinfo[atoms->atom[a].resind].chainnum = chainnum; + atoms->resinfo[atoms->atom[a].resind].chainid = c; + } + chainnum++; + } + + /* Blank out the chain id if there was only one chain */ + if (chainid == 'B') + { + for (r = 0; r < atoms->nres; r++) + { + atoms->resinfo[r].chainid = ' '; + } + } +} + +static void do_moltype(t_fileio *fio, gmx_moltype_t *molt, gmx_bool bRead, + t_symtab *symtab, int file_version, + gmx_groups_t *groups) +{ + int i; + + if (file_version >= 57) + { + do_symstr(fio, &(molt->name), bRead, symtab); + } + + do_atoms(fio, &molt->atoms, bRead, symtab, file_version, groups); + + if (bRead && gmx_debug_at) + { + pr_atoms(debug, 0, "atoms", &molt->atoms, TRUE); + } + + if (file_version >= 57) + { + do_ilists(fio, molt->ilist, bRead, file_version); + + do_block(fio, &molt->cgs, bRead, file_version); + if (bRead && gmx_debug_at) + { + pr_block(debug, 0, "cgs", &molt->cgs, TRUE); + } + } + + /* This used to be in the atoms struct */ + do_blocka(fio, &molt->excls, bRead, file_version); +} + +static void do_molblock(t_fileio *fio, gmx_molblock_t *molb, gmx_bool bRead, + int file_version) +{ + int i; + + gmx_fio_do_int(fio, molb->type); + gmx_fio_do_int(fio, molb->nmol); + gmx_fio_do_int(fio, molb->natoms_mol); + /* Position restraint coordinates */ + gmx_fio_do_int(fio, molb->nposres_xA); + if (molb->nposres_xA > 0) + { + if (bRead) + { + snew(molb->posres_xA, molb->nposres_xA); + } + gmx_fio_ndo_rvec(fio, molb->posres_xA, molb->nposres_xA); + } + gmx_fio_do_int(fio, molb->nposres_xB); + if (molb->nposres_xB > 0) + { + if (bRead) + { + snew(molb->posres_xB, molb->nposres_xB); + } + gmx_fio_ndo_rvec(fio, molb->posres_xB, molb->nposres_xB); + } + +} + +static t_block mtop_mols(gmx_mtop_t *mtop) +{ + int mb, m, a, mol; + t_block mols; + + mols.nr = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + mols.nr += mtop->molblock[mb].nmol; + } + mols.nalloc_index = mols.nr + 1; + snew(mols.index, mols.nalloc_index); + + a = 0; + m = 0; + mols.index[m] = a; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + for (mol = 0; mol < mtop->molblock[mb].nmol; mol++) + { + a += mtop->molblock[mb].natoms_mol; + m++; + mols.index[m] = a; + } + } + + return mols; +} + +static void add_posres_molblock(gmx_mtop_t *mtop) +{ + t_ilist *il, *ilfb; + int am, i, mol, a; + gmx_bool bFE; + gmx_molblock_t *molb; + t_iparams *ip; + + /* posres reference positions are stored in ip->posres (if present) and + in ip->fbposres (if present). If normal and flat-bottomed posres are present, + posres.pos0A are identical to fbposres.pos0. */ + il = &mtop->moltype[0].ilist[F_POSRES]; + ilfb = &mtop->moltype[0].ilist[F_FBPOSRES]; + if (il->nr == 0 && ilfb->nr == 0) + { + return; + } + am = 0; + bFE = FALSE; + for (i = 0; i < il->nr; i += 2) + { + ip = &mtop->ffparams.iparams[il->iatoms[i]]; + am = max(am, il->iatoms[i+1]); + if (ip->posres.pos0B[XX] != ip->posres.pos0A[XX] || + ip->posres.pos0B[YY] != ip->posres.pos0A[YY] || + ip->posres.pos0B[ZZ] != ip->posres.pos0A[ZZ]) + { + bFE = TRUE; + } + } + /* This loop is required if we have only flat-bottomed posres: + - set am + - bFE == FALSE (no B-state for flat-bottomed posres) */ + if (il->nr == 0) + { + for (i = 0; i < ilfb->nr; i += 2) + { + ip = &mtop->ffparams.iparams[ilfb->iatoms[i]]; + am = max(am, ilfb->iatoms[i+1]); + } + } + /* Make the posres coordinate block end at a molecule end */ + mol = 0; + while (am >= mtop->mols.index[mol+1]) + { + mol++; + } + molb = &mtop->molblock[0]; + molb->nposres_xA = mtop->mols.index[mol+1]; + snew(molb->posres_xA, molb->nposres_xA); + if (bFE) + { + molb->nposres_xB = molb->nposres_xA; + snew(molb->posres_xB, molb->nposres_xB); + } + else + { + molb->nposres_xB = 0; + } + for (i = 0; i < il->nr; i += 2) + { + ip = &mtop->ffparams.iparams[il->iatoms[i]]; + a = il->iatoms[i+1]; + molb->posres_xA[a][XX] = ip->posres.pos0A[XX]; + molb->posres_xA[a][YY] = ip->posres.pos0A[YY]; + molb->posres_xA[a][ZZ] = ip->posres.pos0A[ZZ]; + if (bFE) + { + molb->posres_xB[a][XX] = ip->posres.pos0B[XX]; + molb->posres_xB[a][YY] = ip->posres.pos0B[YY]; + molb->posres_xB[a][ZZ] = ip->posres.pos0B[ZZ]; + } + } + if (il->nr == 0) + { + /* If only flat-bottomed posres are present, take reference pos from them. + Here: bFE == FALSE */ + for (i = 0; i < ilfb->nr; i += 2) + { + ip = &mtop->ffparams.iparams[ilfb->iatoms[i]]; + a = ilfb->iatoms[i+1]; + molb->posres_xA[a][XX] = ip->fbposres.pos0[XX]; + molb->posres_xA[a][YY] = ip->fbposres.pos0[YY]; + molb->posres_xA[a][ZZ] = ip->fbposres.pos0[ZZ]; + } + } +} + +static void set_disres_npair(gmx_mtop_t *mtop) +{ + int mt, i, npair; + t_iparams *ip; + t_ilist *il; + t_iatom *a; + + ip = mtop->ffparams.iparams; + + for (mt = 0; mt < mtop->nmoltype; mt++) + { + il = &mtop->moltype[mt].ilist[F_DISRES]; + if (il->nr > 0) + { + a = il->iatoms; + npair = 0; + for (i = 0; i < il->nr; i += 3) + { + npair++; + if (i+3 == il->nr || ip[a[i]].disres.label != ip[a[i+3]].disres.label) + { + ip[a[i]].disres.npair = npair; + npair = 0; + } + } + } + } +} + +static void do_mtop(t_fileio *fio, gmx_mtop_t *mtop, gmx_bool bRead, + int file_version) +{ + int mt, mb, i; + t_blocka dumb; + + if (bRead) + { + init_mtop(mtop); + } + do_symtab(fio, &(mtop->symtab), bRead); + if (bRead && debug) + { + pr_symtab(debug, 0, "symtab", &mtop->symtab); + } + + do_symstr(fio, &(mtop->name), bRead, &(mtop->symtab)); + + if (file_version >= 57) + { + do_ffparams(fio, &mtop->ffparams, bRead, file_version); + + gmx_fio_do_int(fio, mtop->nmoltype); + } + else + { + mtop->nmoltype = 1; + } + if (bRead) + { + snew(mtop->moltype, mtop->nmoltype); + if (file_version < 57) + { + mtop->moltype[0].name = mtop->name; + } + } + for (mt = 0; mt < mtop->nmoltype; mt++) + { + do_moltype(fio, &mtop->moltype[mt], bRead, &mtop->symtab, file_version, + &mtop->groups); + } + + if (file_version >= 57) + { + gmx_fio_do_int(fio, mtop->nmolblock); + } + else + { + mtop->nmolblock = 1; + } + if (bRead) + { + snew(mtop->molblock, mtop->nmolblock); + } + if (file_version >= 57) + { + for (mb = 0; mb < mtop->nmolblock; mb++) + { + do_molblock(fio, &mtop->molblock[mb], bRead, file_version); + } + gmx_fio_do_int(fio, mtop->natoms); + } + else + { + mtop->molblock[0].type = 0; + mtop->molblock[0].nmol = 1; + mtop->molblock[0].natoms_mol = mtop->moltype[0].atoms.nr; + mtop->molblock[0].nposres_xA = 0; + mtop->molblock[0].nposres_xB = 0; + } + + do_atomtypes (fio, &(mtop->atomtypes), bRead, &(mtop->symtab), file_version); + if (bRead && debug) + { + pr_atomtypes(debug, 0, "atomtypes", &mtop->atomtypes, TRUE); + } + + if (file_version < 57) + { + /* Debug statements are inside do_idef */ + do_idef (fio, &mtop->ffparams, &mtop->moltype[0], bRead, file_version); + mtop->natoms = mtop->moltype[0].atoms.nr; + } + + if (file_version >= 65) + { + do_cmap(fio, &mtop->ffparams.cmap_grid, bRead); + } + else + { + mtop->ffparams.cmap_grid.ngrid = 0; + mtop->ffparams.cmap_grid.grid_spacing = 0; + mtop->ffparams.cmap_grid.cmapdata = NULL; + } + + if (file_version >= 57) + { + do_groups(fio, &mtop->groups, bRead, &(mtop->symtab), file_version); + } + + if (file_version < 57) + { + do_block(fio, &mtop->moltype[0].cgs, bRead, file_version); + if (bRead && gmx_debug_at) + { + pr_block(debug, 0, "cgs", &mtop->moltype[0].cgs, TRUE); + } + do_block(fio, &mtop->mols, bRead, file_version); + /* Add the posres coordinates to the molblock */ + add_posres_molblock(mtop); + } + if (bRead) + { + if (file_version >= 57) + { + done_block(&mtop->mols); + mtop->mols = mtop_mols(mtop); + } + if (gmx_debug_at) + { + pr_block(debug, 0, "mols", &mtop->mols, TRUE); + } + } + + if (file_version < 51) + { + /* Here used to be the shake blocks */ + do_blocka(fio, &dumb, bRead, file_version); + if (dumb.nr > 0) + { + sfree(dumb.index); + } + if (dumb.nra > 0) + { + sfree(dumb.a); + } + } + + if (bRead) + { + close_symtab(&(mtop->symtab)); + } +} + +/* If TopOnlyOK is TRUE then we can read even future versions + * of tpx files, provided the file_generation hasn't changed. + * If it is FALSE, we need the inputrecord too, and bail out + * if the file is newer than the program. + * + * The version and generation if the topology (see top of this file) + * are returned in the two last arguments. + * + * If possible, we will read the inputrec even when TopOnlyOK is TRUE. + */ +static void do_tpxheader(t_fileio *fio, gmx_bool bRead, t_tpxheader *tpx, + gmx_bool TopOnlyOK, int *file_version, + int *file_generation) +{ + char buf[STRLEN]; + char file_tag[STRLEN]; + gmx_bool bDouble; + int precision; + int fver, fgen; + int idum = 0; + real rdum = 0; + + gmx_fio_checktype(fio); + gmx_fio_setdebug(fio, bDebugMode()); + + /* NEW! XDR tpb file */ + precision = sizeof(real); + if (bRead) + { + gmx_fio_do_string(fio, buf); + if (strncmp(buf, "VERSION", 7)) + { + gmx_fatal(FARGS, "Can not read file %s,\n" + " this file is from a Gromacs version which is older than 2.0\n" + " Make a new one with grompp or use a gro or pdb file, if possible", + gmx_fio_getname(fio)); + } + gmx_fio_do_int(fio, precision); + bDouble = (precision == sizeof(double)); + if ((precision != sizeof(float)) && !bDouble) + { + gmx_fatal(FARGS, "Unknown precision in file %s: real is %d bytes " + "instead of %d or %d", + gmx_fio_getname(fio), precision, sizeof(float), sizeof(double)); + } + gmx_fio_setprecision(fio, bDouble); + fprintf(stderr, "Reading file %s, %s (%s precision)\n", + gmx_fio_getname(fio), buf, bDouble ? "double" : "single"); + } + else + { + gmx_fio_write_string(fio, GromacsVersion()); + bDouble = (precision == sizeof(double)); + gmx_fio_setprecision(fio, bDouble); + gmx_fio_do_int(fio, precision); + fver = tpx_version; + sprintf(file_tag, "%s", tpx_tag); + fgen = tpx_generation; + } + + /* Check versions! */ + gmx_fio_do_int(fio, fver); + + /* This is for backward compatibility with development versions 77-79 + * where the tag was, mistakenly, placed before the generation, + * which would cause a segv instead of a proper error message + * when reading the topology only from tpx with <77 code. + */ + if (fver >= 77 && fver <= 79) + { + gmx_fio_do_string(fio, file_tag); + } + + if (fver >= 26) + { + gmx_fio_do_int(fio, fgen); + } + else + { + fgen = 0; + } + + if (fver >= 81) + { + gmx_fio_do_string(fio, file_tag); + } + if (bRead) + { + if (fver < 77) + { + /* Versions before 77 don't have the tag, set it to release */ + sprintf(file_tag, "%s", TPX_TAG_RELEASE); + } + + if (strcmp(file_tag, tpx_tag) != 0) + { + fprintf(stderr, "Note: file tpx tag '%s', software tpx tag '%s'\n", + file_tag, tpx_tag); + + /* We only support reading tpx files with the same tag as the code + * or tpx files with the release tag and with lower version number. + */ + if (!strcmp(file_tag, TPX_TAG_RELEASE) == 0 && fver < tpx_version) + { + gmx_fatal(FARGS, "tpx tag/version mismatch: reading tpx file (%s) version %d, tag '%s' with program for tpx version %d, tag '%s'", + gmx_fio_getname(fio), fver, file_tag, + tpx_version, tpx_tag); + } + } + } + + if (file_version != NULL) + { + *file_version = fver; + } + if (file_generation != NULL) + { + *file_generation = fgen; + } + + + if ((fver <= tpx_incompatible_version) || + ((fver > tpx_version) && !TopOnlyOK) || + (fgen > tpx_generation) || + tpx_version == 80) /*80 was used by both 5.0-dev and 4.6-dev*/ + { + gmx_fatal(FARGS, "reading tpx file (%s) version %d with version %d program", + gmx_fio_getname(fio), fver, tpx_version); + } + + do_section(fio, eitemHEADER, bRead); + gmx_fio_do_int(fio, tpx->natoms); + if (fver >= 28) + { + gmx_fio_do_int(fio, tpx->ngtc); + } + else + { + tpx->ngtc = 0; + } + if (fver < 62) + { + gmx_fio_do_int(fio, idum); + gmx_fio_do_real(fio, rdum); + } + /*a better decision will eventually (5.0 or later) need to be made + on how to treat the alchemical state of the system, which can now + vary through a simulation, and cannot be completely described + though a single lambda variable, or even a single state + index. Eventually, should probably be a vector. MRS*/ + if (fver >= 79) + { + gmx_fio_do_int(fio, tpx->fep_state); + } + gmx_fio_do_real(fio, tpx->lambda); + gmx_fio_do_int(fio, tpx->bIr); + gmx_fio_do_int(fio, tpx->bTop); + gmx_fio_do_int(fio, tpx->bX); + gmx_fio_do_int(fio, tpx->bV); + gmx_fio_do_int(fio, tpx->bF); + gmx_fio_do_int(fio, tpx->bBox); + + if ((fgen > tpx_generation)) + { + /* This can only happen if TopOnlyOK=TRUE */ + tpx->bIr = FALSE; + } +} + +static int do_tpx(t_fileio *fio, gmx_bool bRead, + t_inputrec *ir, t_state *state, rvec *f, gmx_mtop_t *mtop, + gmx_bool bXVallocated) +{ + t_tpxheader tpx; + t_inputrec dum_ir; + gmx_mtop_t dum_top; + gmx_bool TopOnlyOK, bDum = TRUE; + int file_version, file_generation; + int i; + rvec *xptr, *vptr; + int ePBC; + gmx_bool bPeriodicMols; + + if (!bRead) + { + tpx.natoms = state->natoms; + tpx.ngtc = state->ngtc; /* need to add nnhpres here? */ + tpx.fep_state = state->fep_state; + tpx.lambda = state->lambda[efptFEP]; + tpx.bIr = (ir != NULL); + tpx.bTop = (mtop != NULL); + tpx.bX = (state->x != NULL); + tpx.bV = (state->v != NULL); + tpx.bF = (f != NULL); + tpx.bBox = TRUE; + } + + TopOnlyOK = (ir == NULL); + + do_tpxheader(fio, bRead, &tpx, TopOnlyOK, &file_version, &file_generation); + + if (bRead) + { + state->flags = 0; + /* state->lambda = tpx.lambda;*/ /*remove this eventually? */ + /* The init_state calls initialize the Nose-Hoover xi integrals to zero */ + if (bXVallocated) + { + xptr = state->x; + vptr = state->v; + init_state(state, 0, tpx.ngtc, 0, 0, 0); /* nose-hoover chains */ /* eventually, need to add nnhpres here? */ + state->natoms = tpx.natoms; + state->nalloc = tpx.natoms; + state->x = xptr; + state->v = vptr; + } + else + { + init_state(state, tpx.natoms, tpx.ngtc, 0, 0, 0); /* nose-hoover chains */ + } + } + +#define do_test(fio, b, p) if (bRead && (p != NULL) && !b) gmx_fatal(FARGS, "No %s in %s",#p, gmx_fio_getname(fio)) + + do_test(fio, tpx.bBox, state->box); + do_section(fio, eitemBOX, bRead); + if (tpx.bBox) + { + gmx_fio_ndo_rvec(fio, state->box, DIM); + if (file_version >= 51) + { + gmx_fio_ndo_rvec(fio, state->box_rel, DIM); + } + else + { + /* We initialize box_rel after reading the inputrec */ + clear_mat(state->box_rel); + } + if (file_version >= 28) + { + gmx_fio_ndo_rvec(fio, state->boxv, DIM); + if (file_version < 56) + { + matrix mdum; + gmx_fio_ndo_rvec(fio, mdum, DIM); + } + } + } + + if (state->ngtc > 0 && file_version >= 28) + { + real *dumv; + /*ndo_double(state->nosehoover_xi,state->ngtc,bDum);*/ + /*ndo_double(state->nosehoover_vxi,state->ngtc,bDum);*/ + /*ndo_double(state->therm_integral,state->ngtc,bDum);*/ + snew(dumv, state->ngtc); + if (file_version < 69) + { + bDum = gmx_fio_ndo_real(fio, dumv, state->ngtc); + } + /* These used to be the Berendsen tcoupl_lambda's */ + bDum = gmx_fio_ndo_real(fio, dumv, state->ngtc); + sfree(dumv); + } + + /* Prior to tpx version 26, the inputrec was here. + * I moved it to enable partial forward-compatibility + * for analysis/viewer programs. + */ + if (file_version < 26) + { + do_test(fio, tpx.bIr, ir); + do_section(fio, eitemIR, bRead); + if (tpx.bIr) + { + if (ir) + { + do_inputrec(fio, ir, bRead, file_version, + mtop ? &mtop->ffparams.fudgeQQ : NULL); + if (bRead && debug) + { + pr_inputrec(debug, 0, "inputrec", ir, FALSE); + } + } + else + { + do_inputrec(fio, &dum_ir, bRead, file_version, + mtop ? &mtop->ffparams.fudgeQQ : NULL); + if (bRead && debug) + { + pr_inputrec(debug, 0, "inputrec", &dum_ir, FALSE); + } + done_inputrec(&dum_ir); + } + + } + } + + do_test(fio, tpx.bTop, mtop); + do_section(fio, eitemTOP, bRead); + if (tpx.bTop) + { + if (mtop) + { + do_mtop(fio, mtop, bRead, file_version); + } + else + { + do_mtop(fio, &dum_top, bRead, file_version); + done_mtop(&dum_top, TRUE); + } + } + do_test(fio, tpx.bX, state->x); + do_section(fio, eitemX, bRead); + if (tpx.bX) + { + if (bRead) + { + state->flags |= (1<x, state->natoms); + } + + do_test(fio, tpx.bV, state->v); + do_section(fio, eitemV, bRead); + if (tpx.bV) + { + if (bRead) + { + state->flags |= (1<v, state->natoms); + } + + do_test(fio, tpx.bF, f); + do_section(fio, eitemF, bRead); + if (tpx.bF) + { + gmx_fio_ndo_rvec(fio, f, state->natoms); + } + + /* Starting with tpx version 26, we have the inputrec + * at the end of the file, so we can ignore it + * if the file is never than the software (but still the + * same generation - see comments at the top of this file. + * + * + */ + ePBC = -1; + bPeriodicMols = FALSE; + if (file_version >= 26) + { + do_test(fio, tpx.bIr, ir); + do_section(fio, eitemIR, bRead); + if (tpx.bIr) + { + if (file_version >= 53) + { + /* Removed the pbc info from do_inputrec, since we always want it */ + if (!bRead) + { + ePBC = ir->ePBC; + bPeriodicMols = ir->bPeriodicMols; + } + gmx_fio_do_int(fio, ePBC); + gmx_fio_do_gmx_bool(fio, bPeriodicMols); + } + if (file_generation <= tpx_generation && ir) + { + do_inputrec(fio, ir, bRead, file_version, mtop ? &mtop->ffparams.fudgeQQ : NULL); + if (bRead && debug) + { + pr_inputrec(debug, 0, "inputrec", ir, FALSE); + } + if (file_version < 51) + { + set_box_rel(ir, state); + } + if (file_version < 53) + { + ePBC = ir->ePBC; + bPeriodicMols = ir->bPeriodicMols; + } + } + if (bRead && ir && file_version >= 53) + { + /* We need to do this after do_inputrec, since that initializes ir */ + ir->ePBC = ePBC; + ir->bPeriodicMols = bPeriodicMols; + } + } + } + + if (bRead) + { + if (tpx.bIr && ir) + { + if (state->ngtc == 0) + { + /* Reading old version without tcoupl state data: set it */ + init_gtc_state(state, ir->opts.ngtc, 0, ir->opts.nhchainlength); + } + if (tpx.bTop && mtop) + { + if (file_version < 57) + { + if (mtop->moltype[0].ilist[F_DISRES].nr > 0) + { + ir->eDisre = edrSimple; + } + else + { + ir->eDisre = edrNone; + } + } + set_disres_npair(mtop); + } + } + + if (tpx.bTop && mtop) + { + gmx_mtop_finalize(mtop); + } + + if (file_version >= 57) + { + char *env; + int ienv; + env = getenv("GMX_NOCHARGEGROUPS"); + if (env != NULL) + { + sscanf(env, "%d", &ienv); + fprintf(stderr, "\nFound env.var. GMX_NOCHARGEGROUPS = %d\n", + ienv); + if (ienv > 0) + { + fprintf(stderr, + "Will make single atomic charge groups in non-solvent%s\n", + ienv > 1 ? " and solvent" : ""); + gmx_mtop_make_atomic_charge_groups(mtop, ienv == 1); + } + fprintf(stderr, "\n"); + } + } + } + + return ePBC; +} + +/************************************************************ + * + * The following routines are the exported ones + * + ************************************************************/ + +t_fileio *open_tpx(const char *fn, const char *mode) +{ + return gmx_fio_open(fn, mode); +} + +void close_tpx(t_fileio *fio) +{ + gmx_fio_close(fio); +} + +void read_tpxheader(const char *fn, t_tpxheader *tpx, gmx_bool TopOnlyOK, + int *file_version, int *file_generation) +{ + t_fileio *fio; + + fio = open_tpx(fn, "r"); + do_tpxheader(fio, TRUE, tpx, TopOnlyOK, file_version, file_generation); + close_tpx(fio); +} + +void write_tpx_state(const char *fn, + t_inputrec *ir, t_state *state, gmx_mtop_t *mtop) +{ + t_fileio *fio; + + fio = open_tpx(fn, "w"); + do_tpx(fio, FALSE, ir, state, NULL, mtop, FALSE); + close_tpx(fio); +} + +void read_tpx_state(const char *fn, + t_inputrec *ir, t_state *state, rvec *f, gmx_mtop_t *mtop) +{ + t_fileio *fio; + + fio = open_tpx(fn, "r"); + do_tpx(fio, TRUE, ir, state, f, mtop, FALSE); + close_tpx(fio); +} + +int read_tpx(const char *fn, + t_inputrec *ir, matrix box, int *natoms, + rvec *x, rvec *v, rvec *f, gmx_mtop_t *mtop) +{ + t_fileio *fio; + t_state state; + int ePBC; + + state.x = x; + state.v = v; + fio = open_tpx(fn, "r"); + ePBC = do_tpx(fio, TRUE, ir, &state, f, mtop, TRUE); + close_tpx(fio); + *natoms = state.natoms; + if (box) + { + copy_mat(state.box, box); + } + state.x = NULL; + state.v = NULL; + done_state(&state); + + return ePBC; +} + +int read_tpx_top(const char *fn, + t_inputrec *ir, matrix box, int *natoms, + rvec *x, rvec *v, rvec *f, t_topology *top) +{ + gmx_mtop_t mtop; + t_topology *ltop; + int ePBC; + + ePBC = read_tpx(fn, ir, box, natoms, x, v, f, &mtop); + + *top = gmx_mtop_t_to_t_topology(&mtop); + + return ePBC; +} + +gmx_bool fn2bTPX(const char *file) +{ + switch (fn2ftp(file)) + { + case efTPR: + case efTPB: + case efTPA: + return TRUE; + default: + return FALSE; + } +} + +static void done_gmx_groups_t(gmx_groups_t *g) +{ + int i; + + for (i = 0; (i < egcNR); i++) + { + if (NULL != g->grps[i].nm_ind) + { + sfree(g->grps[i].nm_ind); + g->grps[i].nm_ind = NULL; + } + if (NULL != g->grpnr[i]) + { + sfree(g->grpnr[i]); + g->grpnr[i] = NULL; + } + } + /* The contents of this array is in symtab, don't free it here */ + sfree(g->grpname); +} + +gmx_bool read_tps_conf(const char *infile, char *title, t_topology *top, int *ePBC, + rvec **x, rvec **v, matrix box, gmx_bool bMass) +{ + t_tpxheader header; + int natoms, i, version, generation; + gmx_bool bTop, bXNULL = FALSE; + gmx_mtop_t *mtop; + t_topology *topconv; + gmx_atomprop_t aps; + + bTop = fn2bTPX(infile); + *ePBC = -1; + if (bTop) + { + read_tpxheader(infile, &header, TRUE, &version, &generation); + if (x) + { + snew(*x, header.natoms); + } + if (v) + { + snew(*v, header.natoms); + } + snew(mtop, 1); + *ePBC = read_tpx(infile, NULL, box, &natoms, + (x == NULL) ? NULL : *x, (v == NULL) ? NULL : *v, NULL, mtop); + *top = gmx_mtop_t_to_t_topology(mtop); + /* In this case we need to throw away the group data too */ + done_gmx_groups_t(&mtop->groups); + sfree(mtop); + strcpy(title, *top->name); + tpx_make_chain_identifiers(&top->atoms, &top->mols); + } + else + { + get_stx_coordnum(infile, &natoms); + init_t_atoms(&top->atoms, natoms, (fn2ftp(infile) == efPDB)); + if (x == NULL) + { + snew(x, 1); + bXNULL = TRUE; + } + snew(*x, natoms); + if (v) + { + snew(*v, natoms); + } + read_stx_conf(infile, title, &top->atoms, *x, (v == NULL) ? NULL : *v, ePBC, box); + if (bXNULL) + { + sfree(*x); + sfree(x); + } + if (bMass) + { + aps = gmx_atomprop_init(); + for (i = 0; (i < natoms); i++) + { + if (!gmx_atomprop_query(aps, epropMass, + *top->atoms.resinfo[top->atoms.atom[i].resind].name, + *top->atoms.atomname[i], + &(top->atoms.atom[i].m))) + { + if (debug) + { + fprintf(debug, "Can not find mass for atom %s %d %s, setting to 1\n", + *top->atoms.resinfo[top->atoms.atom[i].resind].name, + top->atoms.resinfo[top->atoms.atom[i].resind].nr, + *top->atoms.atomname[i]); + } + } + } + gmx_atomprop_destroy(aps); + } + top->idef.ntypes = -1; + } + + return bTop; +} diff --cc src/gromacs/legacyheaders/gstat.h index 15c25622a9,0000000000..aa991441b6 mode 100644,000000..100644 --- a/src/gromacs/legacyheaders/gstat.h +++ b/src/gromacs/legacyheaders/gstat.h @@@ -1,436 -1,0 +1,436 @@@ +/* + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Gromacs Runs On Most of All Computer Systems + */ + +#ifndef _gstat_h +#define _gstat_h + +#include "typedefs.h" +#include "statutil.h" +#include "mshift.h" +#include "rmpbc.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/*********************************************** + * + * A U T O C O R R E L A T I O N + * + ***********************************************/ + +real LegendreP(real x, unsigned long m); + +#define eacNormal (1<<0) +#define eacCos (1<<1) +#define eacVector (1<<2) +#define eacRcross (1<<3 | eacVector) +#define eacP0 (1<<4 | eacVector) +#define eacP1 (1<<5 | eacVector) +#define eacP2 (1<<6 | eacVector) +#define eacP3 (1<<7 | eacVector) +#define eacP4 (1<<8 | eacVector) +#define eacIden (1<<9) + +enum { + effnNONE, effnEXP1, effnEXP2, effnEXP3, effnVAC, + effnEXP5, effnEXP7, effnEXP9, effnERF, effnERREST, effnNR +}; + +/* must correspond with 'leg' g_chi.c:727 */ +enum { + edPhi = 0, edPsi, edOmega, edChi1, edChi2, edChi3, edChi4, edChi5, edChi6, edMax +}; + +enum { + edPrintST = 0, edPrintRO +}; + +#define NHISTO 360 +#define NONCHI 3 +#define MAXCHI edMax-NONCHI +#define NROT 4 /* number of rotamers: 1=g(-), 2=t, 3=g(+), 0=other */ + +typedef struct { - int minO, minC, H, N, C, O, Cn[MAXCHI+3]; ++ int minCalpha, minC, H, N, C, O, Cn[MAXCHI+3]; +} t_dihatms; /* Cn[0]=N, Cn[1]=Ca, Cn[2]=Cb etc. */ + +typedef struct { + char name[12]; + int resnr; + int index; /* Index for amino acids (histograms) */ + int j0[edMax]; /* Index in dih array (phi angle is first...) */ + t_dihatms atm; + int b[edMax]; + int ntr[edMax]; + real S2[edMax]; + real rot_occ[edMax][NROT]; + +} t_dlist; + +extern const int nfp_ffn[effnNR]; + +extern const char *s_ffn[effnNR+2]; + +extern const char *longs_ffn[effnNR]; + +int sffn2effn(const char **sffn); +/* Returns the ffn enum corresponding to the selected enum option in sffn */ + +t_pargs *add_acf_pargs(int *npargs, t_pargs *pa); +/* Add options for autocorr to the current set of options. + * *npargs must be initialised to the number of elements in pa, + * it will be incremented appropriately. + */ + +void cross_corr(int n, real f[], real g[], real corr[]); +/* Simple minded cross correlation algorithm */ + +real fit_acf(int ncorr, int fitfn, const output_env_t oenv, gmx_bool bVerbose, + real tbeginfit, real tendfit, real dt, real c1[], real *fit); +/* Fit an ACF to a given function */ + +void do_autocorr(const char *fn, const output_env_t oenv, + const char *title, + int nframes, int nitem, real **c1, + real dt, unsigned long mode, gmx_bool bAver); +/* Calls low_do_autocorr (see below). After calling add_acf_pargs */ + +void low_do_autocorr(const char *fn, const output_env_t oenv, + const char *title, int nframes, int nitem, + int nout, real **c1, real dt, unsigned long mode, + int nrestart, gmx_bool bAver, gmx_bool bNormalize, + gmx_bool bVerbose, real tbeginfit, real tendfit, + int nfitparm, int nskip); +/* + * do_autocorr calculates autocorrelation functions for many things. + * It takes a 2 d array containing nitem arrays of length nframes + * for each item the ACF is calculated. + * + * A number of "modes" exist for computation of the ACF + * + * if (mode == eacNormal) { + * C(t) = < X (tau) * X (tau+t) > + * } + * else if (mode == eacCos) { + * C(t) = < cos (X(tau) - X(tau+t)) > + * } + * else if (mode == eacIden) { **not fully supported yet** + * C(t) = < (X(tau) == X(tau+t)) > + * } + * else if (mode == eacVector) { + * C(t) = < X(tau) * X(tau+t) + * } + * else if (mode == eacP1) { + * C(t) = < cos (X(tau) * X(tau+t) > + * } + * else if (mode == eacP2) { + * C(t) = 1/2 * < 3 cos (X(tau) * X(tau+t) - 1 > + * } + * else if (mode == eacRcross) { + * C(t) = < ( X(tau) * X(tau+t) )^2 > + * } + * + * For modes eacVector, eacP1, eacP2 and eacRcross the input should be + * 3 x nframes long, where each triplet is taken as a 3D vector + * + * For mode eacCos inputdata must be in radians, not degrees! + * + * Other parameters are: + * + * fn is output filename (.xvg) where the correlation function(s) are printed + * title is the title in the output file + * nframes is the number of frames in the time series + * nitem is the number of items + * c1 is an array of dimension [ 0 .. nitem-1 ] [ 0 .. nframes-1 ] + * on output, this array is filled with the correlation function + * to reduce storage + * nrestart is the number of steps between restarts for direct ACFs + * (i.e. without FFT) When set to 1 all points are used as + * time origin for averaging + * dt is the time between frames + * bAver If set, all ndih C(t) functions are averaged into a single + * C(t) + * (bFour If set, will use fast fourier transform (FFT) for evaluating + * the ACF: removed option, now on the command line only) + * bNormalize If set, all ACFs will be normalized to start at 0 + * nskip Determines whether steps a re skipped in the output + */ + +typedef struct { + const char *name; /* Description of the J coupling constant */ + real A, B, C; /* Karplus coefficients */ + real offset; /* Offset for dihedral angle in histogram (e.g. -M_PI/3) */ + real Jc; /* Resulting Jcoupling */ + real Jcsig; /* Standard deviation in Jc */ +} t_karplus; + +void calc_distribution_props(int nh, int histo[], + real start, int nkkk, t_karplus kkk[], + real *S2); +/* This routine takes a dihedral distribution and calculates + * coupling constants and dihedral order parameters of it. + * + * nh is the number of points + * histo is the array of datapoints which is assumed to span + * 2 M_PI radians + * start is the starting angle of the histogram, this can be either 0 + * or -M_PI + * nkkk is the number of karplus sets (multiple coupling constants may be + * derived from a single angle) + * kkk are the constants for calculating J coupling constants using a + * Karplus equation according to + * + * 2 + * J = A cos theta + B cos theta + C + * + * where theta is phi - offset (phi is the angle in the histogram) + * offset is subtracted from phi before substitution in the Karplus + * equation + * S2 is the resulting dihedral order parameter + * + */ + + +/*********************************************** + * + * F I T R O U T I N E S + * + ***********************************************/ +void do_expfit(int ndata, real c1[], real dt, + real begintimefit, real endtimefit); + +void expfit(int n, real x[], real y[], real Dy[], + real *a, real *sa, + real *b, real *sb); +/* This procedure fits y=exp(a+bx) for n (x,y) pairs to determine a and b. + * The uncertainties in the y values must be in the vector Dy. + * The standard deviations of a and b, sa and sb, are also calculated. + * + * Routine from Computers in physics, 7(3) (1993), p. 280-285. + */ + +void ana_dih_trans(const char *fn_trans, const char *fn_histo, + real **dih, int nframes, int nangles, + const char *grpname, real *time, gmx_bool bRb, + const output_env_t oenv); +/* + * Analyse dihedral transitions, by counting transitions per dihedral + * and per frame. The total number of transitions is printed to + * stderr, as well as the average time between transitions. + * + * is wrapper to low_ana_dih_trans, which also passes in and out the + number of transitions per dihedral per residue. that uses struc dlist + which is not external, so pp2shift.h must be included. + + * Dihedrals are supposed to be in either of three minima, + * (trans, gauche+, gauche-) + * + * fn_trans output file name for #transitions per timeframe + * fn_histo output file name for transition time histogram + * dih the actual dihedral angles + * nframes number of times frames + * nangles number of angles + * grpname a string for the header of plots + * time array (size nframes) of times of trajectory frames + * bRb determines whether the polymer convention is used + * (trans = 0) + */ + +void low_ana_dih_trans(gmx_bool bTrans, const char *fn_trans, + gmx_bool bHisto, const char *fn_histo, int maxchi, + real **dih, int nlist, t_dlist dlist[], + int nframes, int nangles, const char *grpname, + int multiplicity[], real *time, gmx_bool bRb, + real core_frac, const output_env_t oenv); +/* as above but passes dlist so can copy occupancies into it, and multiplicity[] + * (1..nangles, corresp to dih[this][], so can have non-3 multiplicity of + * rotamers. Also production of xvg output files is conditional + * and the fractional width of each rotamer can be set ie for a 3 fold + * dihedral with core_frac = 0.5 only the central 60 degrees is assigned + * to each rotamer, the rest goes to rotamer zero */ + + + +void read_ang_dih(const char *trj_fn, + gmx_bool bAngles, gmx_bool bSaveAll, gmx_bool bRb, gmx_bool bPBC, + int maxangstat, int angstat[], + int *nframes, real **time, + int isize, atom_id index[], + real **trans_frac, + real **aver_angle, + real *dih[], + const output_env_t oenv); +/* + * Read a trajectory and calculate angles and dihedrals. + * + * trj_fn file name of trajectory + * tpb_fn file name of tpb file + * bAngles do we have to read angles or dihedrals + * bSaveAll do we have to store all in the dih array + * bRb do we have Ryckaert-Bellemans dihedrals (trans = 0) + * bPBC compute angles module 2 Pi + * maxangstat number of entries in distribution array + * angstat angle distribution + * *nframes number of frames read + * time simulation time at each time frame + * isize number of entries in the index, when angles 3*number of angles + * else 4*number of angles + * index atom numbers that define the angles or dihedrals + * (i,j,k) resp (i,j,k,l) + * trans_frac number of dihedrals in trans + * aver_angle average angle at each time frame + * dih all angles at each time frame + */ + +void make_histo(FILE *log, + int ndata, real data[], int npoints, int histo[], + real minx, real maxx); +/* + * Make a histogram from data. The min and max of the data array can + * be determined (if minx == 0 and maxx == 0) + * and the index in the histogram is computed from + * ind = npoints/(max(data) - min(data)) + * + * log write error output to this file + * ndata number of points in data + * data data points + * npoints number of points in histogram + * histo histogram array. This is NOT set to zero, to allow you + * to add multiple histograms + * minx start of the histogram + * maxx end of the histogram + * if both are 0, these values are computed by the routine itself + */ + +void normalize_histo(int npoints, int histo[], real dx, real normhisto[]); +/* + * Normalize a histogram so that the integral over the histo is 1 + * + * npoints number of points in the histo array + * histo input histogram + * dx distance between points on the X-axis + * normhisto normalized output histogram + */ + +real fit_function(int eFitFn, real *parm, real x); +/* Returns the value of fit function eFitFn at x */ + +/* Use Levenberg-Marquardt method to fit to a nfitparm parameter exponential */ +/* or to a transverse current autocorrelation function */ +/* Or: "There is no KILL like OVERKILL", Dr. Ir. D. van der Spoel */ +real do_lmfit(int ndata, real c1[], real sig[], real dt, real *x, + real begintimefit, real endtimefit, const output_env_t oenv, + gmx_bool bVerbose, int eFitFn, real fitparms[], int fix); +/* Returns integral. + * If x == NULL, the timestep dt will be used to create a time axis. + * fix fixes fit parameter i at it's starting value, when the i'th bit + * of fix is set. + */ + +real evaluate_integral(int n, real x[], real y[], real dy[], + real aver_start, real *stddev); +/* Integrate data in y, and, if given, use dy as weighting + * aver_start should be set to a value where the function has + * converged to 0. + */ + +real print_and_integrate(FILE *fp, int n, real dt, + real c[], real *fit, int nskip); +/* Integrate the data in c[] from 0 to n using trapezium rule. + * If fp != NULL output is written to it + * nskip determines whether all elements are written to the output file + * (written when i % nskip == 0) + * If fit != NULL the fit is also written. + */ + +int get_acfnout(void); +/* Return the output length for the correlation function + * Works only AFTER do_auto_corr has been called! + */ + +int get_acffitfn(void); +/* Return the fit function type. + * Works only AFTER do_auto_corr has been called! + */ + +/* Routines from pp2shift (anadih.c etc.) */ + +void do_pp2shifts(FILE *fp, int nframes, + int nlist, t_dlist dlist[], real **dih); + +gmx_bool has_dihedral(int Dih, t_dlist *dl); + +t_dlist *mk_dlist(FILE *log, + t_atoms *atoms, int *nlist, + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bHChi, + int maxchi, int r0, gmx_residuetype_t rt); + +void pr_dlist(FILE *fp, int nl, t_dlist dl[], real dt, int printtype, + gmx_bool bPhi, gmx_bool bPsi, gmx_bool bChi, gmx_bool bOmega, int maxchi); + +int pr_trans(FILE *fp, int nl, t_dlist dl[], real dt, int Xi); + +void mk_chi_lookup (int **lookup, int maxchi, real **dih, + int nlist, t_dlist dlist[]); + +void mk_multiplicity_lookup (int *multiplicity, int maxchi, real **dih, + int nlist, t_dlist dlist[], int nangle); + +void get_chi_product_traj (real **dih, int nframes, int nangles, + int nlist, int maxchi, t_dlist dlist[], + real time[], int **lookup, int *multiplicity, + gmx_bool bRb, gmx_bool bNormalize, + real core_frac, gmx_bool bAll, const char *fnall, + const output_env_t oenv); + +void print_one (const output_env_t oenv, const char *base, + const char *name, + const char *title, const char *ylabel, int nf, + real time[], real data[]); + +/* Routines from g_hbond */ +void analyse_corr(int n, real t[], real ct[], real nt[], real kt[], + real sigma_ct[], real sigma_nt[], real sigma_kt[], + real fit_start, real temp, real smooth_tail_start, + const output_env_t oenv); + +void compute_derivative(int nn, real x[], real y[], real dydx[]); + +#ifdef __cplusplus +} +#endif + +#endif diff --cc src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h index e8e43073bf,0000000000..ed3bae4ef2 mode 100644,000000..100644 --- a/src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h +++ b/src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h @@@ -1,135 -1,0 +1,135 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2012, The GROMACS development team, + * check out http://www.gromacs.org for more information. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon + */ + +#ifndef NBNXN_CUDA_DATA_MGMT_H +#define NBNXN_CUDA_DATA_MGMT_H + +#include "types/simple.h" +#include "types/interaction_const.h" +#include "types/nbnxn_cuda_types_ext.h" +#include "types/hw_info.h" + +#ifdef GMX_GPU +#define FUNC_TERM ; +#define FUNC_QUALIFIER +#else +#define FUNC_TERM {} +#define FUNC_QUALIFIER static +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/*! Initializes the data structures related to CUDA nonbonded calculations. */ +FUNC_QUALIFIER +void nbnxn_cuda_init(FILE *fplog, + nbnxn_cuda_ptr_t *p_cu_nb, + gmx_gpu_info_t *gpu_info, int my_gpu_index, + /* true of both local and non-local are don on GPU */ + gmx_bool bLocalAndNonlocal) FUNC_TERM + +/*! Initializes simulation constant data. */ +FUNC_QUALIFIER - void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t p_cu_nb, - const interaction_const_t *ic, - const nonbonded_verlet_t *nbv) FUNC_TERM ++void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t cu_nb, ++ const interaction_const_t *ic, ++ const nonbonded_verlet_group_t *nbv_group) FUNC_TERM + +/*! Initializes pair-list data for GPU, called at every pair search step. */ +FUNC_QUALIFIER +void nbnxn_cuda_init_pairlist(nbnxn_cuda_ptr_t cu_nb, + const nbnxn_pairlist_t *h_nblist, + int iloc) FUNC_TERM + +/*! Initializes atom-data on the GPU, called at every pair search step. */ +FUNC_QUALIFIER +void nbnxn_cuda_init_atomdata(nbnxn_cuda_ptr_t cu_nb, + const nbnxn_atomdata_t *atomdata) FUNC_TERM + +/*! \brief Update parameters during PP-PME load balancing. */ +FUNC_QUALIFIER +void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t cu_nb, + const interaction_const_t *ic) FUNC_TERM + +/*! Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */ +FUNC_QUALIFIER +void nbnxn_cuda_upload_shiftvec(nbnxn_cuda_ptr_t cu_nb, + const nbnxn_atomdata_t *nbatom) FUNC_TERM + +/*! Clears GPU outputs: nonbonded force, shift force and energy. */ +FUNC_QUALIFIER +void nbnxn_cuda_clear_outputs(nbnxn_cuda_ptr_t cu_nb, + int flags) FUNC_TERM + +/*! Frees all GPU resources used for the nonbonded calculations. */ +FUNC_QUALIFIER +void nbnxn_cuda_free(FILE *fplog, + nbnxn_cuda_ptr_t cu_nb) FUNC_TERM + +/*! Returns the GPU timings structure or NULL if GPU is not used or timing is off. */ +FUNC_QUALIFIER +wallclock_gpu_t * nbnxn_cuda_get_timings(nbnxn_cuda_ptr_t cu_nb) +#ifdef GMX_GPU +; +#else +{ + return NULL; +} +#endif + +/*! Resets nonbonded GPU timings. */ +FUNC_QUALIFIER +void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t cu_nb) FUNC_TERM + +/*! Calculates the minimum size of proximity lists to improve SM load balance + with CUDA non-bonded kernels. */ +FUNC_QUALIFIER +int nbnxn_cuda_min_ci_balanced(nbnxn_cuda_ptr_t cu_nb) +#ifdef GMX_GPU +; +#else +{ + return -1; +} +#endif + +#ifdef __cplusplus +} +#endif + +#undef FUNC_TERM +#undef FUNC_QUALIFIER + +#endif /* NBNXN_CUDA_DATA_MGMT_H */ diff --cc src/gromacs/legacyheaders/pull_rotation.h index 93c07ff5b9,0000000000..a24cd1528d mode 100644,000000..100644 --- a/src/gromacs/legacyheaders/pull_rotation.h +++ b/src/gromacs/legacyheaders/pull_rotation.h @@@ -1,145 -1,0 +1,144 @@@ +/* + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2008, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon + */ + +/*! \file pull_rotation.h + * + * @brief Enforced rotation of protein parts or other groups of particles. + * + * This file contains routines that are used to enforce rotational motion + * upon a subgroup of particles. + * + */ + +#ifndef _pull_rotation_h +#define _pull_rotation_h + +#include "vec.h" +#include "typedefs.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/*! \brief Initialize the enforced rotation groups. + * + * This routine does the memory allocation for various helper arrays, opens + * the output files etc. + * + * \param fplog General output file, normally md.log. + * \param ir Struct containing MD input parameters, among those + * also the enforced rotation parameters. + * \param nfile Number of entries in the fnm structure. + * \param fnm The filenames struct containing also the names + * of the rotation output files. + * \param cr Pointer to MPI communication data. + * \param x The positions of all MD particles. - * \param box Simulation box, needed to make group whole. ++ * \param box The simulation box. + * \param mtop Molecular topology. + * \param oenv Needed to open the rotation output xvgr file. + * \param bVerbose Whether to print extra status information. + * \param Flags Flags passed over from main, used to determine + * whether or not we are doing a rerun. + */ +extern void init_rot(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[], + t_commrec *cr, rvec *x, matrix box, gmx_mtop_t *mtop, const output_env_t oenv, + gmx_bool bVerbose, unsigned long Flags); + + +/*! \brief Make a selection of the home atoms for all enforced rotation groups. + * + * This routine is similar to dd_make_local_pull_groups, but works only with + * domain decomposition. It should be called at every domain decomposition. + * + * \param dd Structure containing domain decomposition data. + * \param rot Pointer to all the enforced rotation data. + */ +extern void dd_make_local_rotation_groups(gmx_domdec_t *dd, t_rot *rot); + + +/*! \brief Calculation of the enforced rotation potential. + * + * This is the main enforced rotation module which is called during every time + * step. Here the rotation potential as well as the resulting forces are + * calculated. + * + * \param cr Pointer to MPI communication data. + * \param ir Struct containing MD input parameters, among those + * \param box Simulation box, needed to make group whole. + * \param x The positions of all the local particles. + * \param t Time. + * \param step The time step. + * \param wcycle During the potential calculation the wallcycles are + * counted. Later they enter the dynamic load balancing. + * \param bNS After domain decomposition / neighborsearching several + * local arrays have to be updated (masses, shifts) + */ +extern void do_rotation(t_commrec *cr, t_inputrec *ir, matrix box, rvec x[], real t, + gmx_large_int_t step, gmx_wallcycle_t wcycle, gmx_bool bNS); + + +/*! \brief Add the enforced rotation forces to the official force array. + * + * Adds the forces from enforced rotation potential to the local forces and + * sums up the contributions to the rotation potential from all the nodes. Since + * this needs communication, this routine should be called after the SR forces + * have been evaluated (in order not to spoil cycle counts). + * This routine also outputs data to the various rotation output files (e.g. + * the potential, the angle of the group, torques and more). + * + * \param rot Pointer to all the enforced rotation data. + * \param f The local forces to which the rotational forces have + * to be added. + * \param cr Pointer to MPI communication data. + * \param step The time step, used for output. + * \param t Time, used for output. + */ +extern real add_rot_forces(t_rot *rot, rvec f[], t_commrec *cr, gmx_large_int_t step, real t); + + +/*! \brief Close the enforced rotation output files. + * - * \param fplog General output file, normally md.log. + * \param rot Pointer to all the enforced rotation data. + */ - extern void finish_rot(FILE *fplog, t_rot *rot); ++extern void finish_rot(t_rot *rot); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --cc src/gromacs/mdlib/expanded.c index 617647fdb5,0000000000..6140392c78 mode 100644,000000..100644 --- a/src/gromacs/mdlib/expanded.c +++ b/src/gromacs/mdlib/expanded.c @@@ -1,1431 -1,0 +1,1432 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2012, The GROMACS development team, + * check out http://www.gromacs.org for more information. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * GROwing Monsters And Cloning Shrimps + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef GMX_CRAY_XT3 +#include +#endif + + +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#endif +#include +#include "typedefs.h" +#include "string2.h" +#include "gmxfio.h" +#include "smalloc.h" +#include "names.h" +#include "confio.h" +#include "mvdata.h" +#include "txtdump.h" +#include "pbc.h" +#include "chargegroup.h" +#include "vec.h" +#include "nrnb.h" +#include "mshift.h" +#include "mdrun.h" +#include "update.h" +#include "physics.h" +#include "main.h" +#include "mdatoms.h" +#include "force.h" +#include "bondf.h" +#include "pme.h" +#include "disre.h" +#include "orires.h" +#include "network.h" +#include "calcmu.h" +#include "constr.h" +#include "xvgr.h" +#include "trnio.h" +#include "xtcio.h" +#include "copyrite.h" +#include "gmx_random.h" +#include "domdec.h" +#include "partdec.h" +#include "gmx_wallcycle.h" +#include "macros.h" + +#include "gromacs/utility/gmxmpi.h" + +void GenerateGibbsProbabilities(real *ene, real *p_k, real *pks, int minfep, int maxfep) +{ + + int i; + real maxene; + + *pks = 0.0; + maxene = ene[minfep]; + /* find the maximum value */ + for (i = minfep; i <= maxfep; i++) + { + if (ene[i] > maxene) + { + maxene = ene[i]; + } + } + /* find the denominator */ + for (i = minfep; i <= maxfep; i++) + { + *pks += exp(ene[i]-maxene); + } + /*numerators*/ + for (i = minfep; i <= maxfep; i++) + { + p_k[i] = exp(ene[i]-maxene) / *pks; + } +} + +void GenerateWeightedGibbsProbabilities(real *ene, real *p_k, real *pks, int nlim, real *nvals, real delta) +{ + + int i; + real maxene; + real *nene; + *pks = 0.0; + + snew(nene, nlim); + for (i = 0; i < nlim; i++) + { + if (nvals[i] == 0) + { + /* add the delta, since we need to make sure it's greater than zero, and + we need a non-arbitrary number? */ + nene[i] = ene[i] + log(nvals[i]+delta); + } + else + { + nene[i] = ene[i] + log(nvals[i]); + } + } + + /* find the maximum value */ + maxene = nene[0]; + for (i = 0; i < nlim; i++) + { + if (nene[i] > maxene) + { + maxene = nene[i]; + } + } + + /* subtract off the maximum, avoiding overflow */ + for (i = 0; i < nlim; i++) + { + nene[i] -= maxene; + } + + /* find the denominator */ + for (i = 0; i < nlim; i++) + { + *pks += exp(nene[i]); + } + + /*numerators*/ + for (i = 0; i < nlim; i++) + { + p_k[i] = exp(nene[i]) / *pks; + } + sfree(nene); +} + +real do_logsum(int N, real *a_n) +{ + + /* RETURN VALUE */ + /* log(\sum_{i=0}^(N-1) exp[a_n]) */ + real maxarg; + real sum; + int i; + real logsum; + /* compute maximum argument to exp(.) */ + + maxarg = a_n[0]; + for (i = 1; i < N; i++) + { + maxarg = max(maxarg, a_n[i]); + } + + /* compute sum of exp(a_n - maxarg) */ + sum = 0.0; + for (i = 0; i < N; i++) + { + sum = sum + exp(a_n[i] - maxarg); + } + + /* compute log sum */ + logsum = log(sum) + maxarg; + return logsum; +} + +int FindMinimum(real *min_metric, int N) +{ + + real min_val; + int min_nval, nval; + + min_nval = 0; + min_val = min_metric[0]; + + for (nval = 0; nval < N; nval++) + { + if (min_metric[nval] < min_val) + { + min_val = min_metric[nval]; + min_nval = nval; + } + } + return min_nval; +} + +static gmx_bool CheckHistogramRatios(int nhisto, real *histo, real ratio) +{ + + int i; + real nmean; + gmx_bool bIfFlat; + + nmean = 0; + for (i = 0; i < nhisto; i++) + { + nmean += histo[i]; + } + + if (nmean == 0) + { + /* no samples! is bad!*/ + bIfFlat = FALSE; + return bIfFlat; + } + nmean /= (real)nhisto; + + bIfFlat = TRUE; + for (i = 0; i < nhisto; i++) + { + /* make sure that all points are in the ratio < x < 1/ratio range */ + if (!((histo[i]/nmean < 1.0/ratio) && (histo[i]/nmean > ratio))) + { + bIfFlat = FALSE; + break; + } + } + return bIfFlat; +} + +static gmx_bool CheckIfDoneEquilibrating(int nlim, t_expanded *expand, df_history_t *dfhist, gmx_large_int_t step) +{ + + int i, totalsamples; + gmx_bool bDoneEquilibrating = TRUE; + gmx_bool bIfFlat; + + /* assume we have equilibrated the weights, then check to see if any of the conditions are not met */ + + /* calculate the total number of samples */ + switch (expand->elmceq) + { + case elmceqNO: + /* We have not equilibrated, and won't, ever. */ + return FALSE; + case elmceqYES: + /* we have equilibrated -- we're done */ + return TRUE; + case elmceqSTEPS: + /* first, check if we are equilibrating by steps, if we're still under */ + if (step < expand->equil_steps) + { + bDoneEquilibrating = FALSE; + } + break; + case elmceqSAMPLES: + totalsamples = 0; + for (i = 0; i < nlim; i++) + { + totalsamples += dfhist->n_at_lam[i]; + } + if (totalsamples < expand->equil_samples) + { + bDoneEquilibrating = FALSE; + } + break; + case elmceqNUMATLAM: + for (i = 0; i < nlim; i++) + { + if (dfhist->n_at_lam[i] < expand->equil_n_at_lam) /* we are still doing the initial sweep, so we're definitely not + done equilibrating*/ + { + bDoneEquilibrating = FALSE; + break; + } + } + break; + case elmceqWLDELTA: + if (EWL(expand->elamstats)) /* This check is in readir as well, but + just to be sure */ + { + if (dfhist->wl_delta > expand->equil_wl_delta) + { + bDoneEquilibrating = FALSE; + } + } + break; + case elmceqRATIO: + /* we can use the flatness as a judge of good weights, as long as + we're not doing minvar, or Wang-Landau. + But turn off for now until we figure out exactly how we do this. + */ + + if (!(EWL(expand->elamstats) || expand->elamstats == elamstatsMINVAR)) + { + /* we want to use flatness -avoiding- the forced-through samples. Plus, we need to convert to + floats for this histogram function. */ + + real *modhisto; + snew(modhisto, nlim); + for (i = 0; i < nlim; i++) + { + modhisto[i] = 1.0*(dfhist->n_at_lam[i]-expand->lmc_forced_nstart); + } + bIfFlat = CheckHistogramRatios(nlim, modhisto, expand->equil_ratio); + sfree(modhisto); + if (!bIfFlat) + { + bDoneEquilibrating = FALSE; + } + } + default: + bDoneEquilibrating = TRUE; + } + /* one last case to go though, if we are doing slow growth to get initial values, we haven't finished equilibrating */ + + if (expand->lmc_forced_nstart > 0) + { + for (i = 0; i < nlim; i++) + { + if (dfhist->n_at_lam[i] < expand->lmc_forced_nstart) /* we are still doing the initial sweep, so we're definitely not + done equilibrating*/ + { + bDoneEquilibrating = FALSE; + break; + } + } + } + return bDoneEquilibrating; +} + +static gmx_bool UpdateWeights(int nlim, t_expanded *expand, df_history_t *dfhist, + int fep_state, real *scaled_lamee, real *weighted_lamee, gmx_large_int_t step) +{ + real maxdiff = 0.000000001; + gmx_bool bSufficientSamples; + int i, k, n, nz, indexi, indexk, min_n, max_n, nlam, totali; + int n0, np1, nm1, nval, min_nvalm, min_nvalp, maxc; + real omega_m1_0, omega_p1_m1, omega_m1_p1, omega_p1_0, clam_osum; + real de, de_function, dr, denom, maxdr, pks = 0; + real min_val, cnval, zero_sum_weights; + real *omegam_array, *weightsm_array, *omegap_array, *weightsp_array, *varm_array, *varp_array, *dwp_array, *dwm_array; + real clam_varm, clam_varp, clam_weightsm, clam_weightsp, clam_minvar; + real *lam_weights, *lam_minvar_corr, *lam_variance, *lam_dg, *p_k; + real *numweighted_lamee, *logfrac; + int *nonzero; + real chi_m1_0, chi_p1_0, chi_m2_0, chi_p2_0, chi_p1_m1, chi_p2_m1, chi_m1_p1, chi_m2_p1; + + /* if we have equilibrated the weights, exit now */ + if (dfhist->bEquil) + { + return FALSE; + } + + if (CheckIfDoneEquilibrating(nlim, expand, dfhist, step)) + { + dfhist->bEquil = TRUE; + /* zero out the visited states so we know how many equilibrated states we have + from here on out.*/ + for (i = 0; i < nlim; i++) + { + dfhist->n_at_lam[i] = 0; + } + return TRUE; + } + + /* If we reached this far, we have not equilibrated yet, keep on + going resetting the weights */ + + if (EWL(expand->elamstats)) + { + if (expand->elamstats == elamstatsWL) /* Standard Wang-Landau */ + { + dfhist->sum_weights[fep_state] -= dfhist->wl_delta; + dfhist->wl_histo[fep_state] += 1.0; + } + else if (expand->elamstats == elamstatsWWL) /* Weighted Wang-Landau */ + { + snew(p_k, nlim); + + /* first increment count */ + GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, 0, nlim-1); + for (i = 0; i < nlim; i++) + { + dfhist->wl_histo[i] += p_k[i]; + } + + /* then increment weights (uses count) */ + pks = 0.0; + GenerateWeightedGibbsProbabilities(weighted_lamee, p_k, &pks, nlim, dfhist->wl_histo, dfhist->wl_delta); + + for (i = 0; i < nlim; i++) + { + dfhist->sum_weights[i] -= dfhist->wl_delta*p_k[i]; + } + /* Alternate definition, using logarithms. Shouldn't make very much difference! */ + /* + real di; + for (i=0;iwl_delta*p_k[i]; + dfhist->sum_weights[i] -= log(di); + } + */ + sfree(p_k); + } + + zero_sum_weights = dfhist->sum_weights[0]; + for (i = 0; i < nlim; i++) + { + dfhist->sum_weights[i] -= zero_sum_weights; + } + } + + if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMETROPOLIS || expand->elamstats == elamstatsMINVAR) + { + + de_function = 0; /* to get rid of warnings, but this value will not be used because of the logic */ + maxc = 2*expand->c_range+1; + + snew(lam_dg, nlim); + snew(lam_variance, nlim); + + snew(omegap_array, maxc); + snew(weightsp_array, maxc); + snew(varp_array, maxc); + snew(dwp_array, maxc); + + snew(omegam_array, maxc); + snew(weightsm_array, maxc); + snew(varm_array, maxc); + snew(dwm_array, maxc); + + /* unpack the current lambdas -- we will only update 2 of these */ + + for (i = 0; i < nlim-1; i++) + { /* only through the second to last */ + lam_dg[i] = dfhist->sum_dg[i+1] - dfhist->sum_dg[i]; + lam_variance[i] = pow(dfhist->sum_variance[i+1], 2) - pow(dfhist->sum_variance[i], 2); + } + + /* accumulate running averages */ + for (nval = 0; nval < maxc; nval++) + { + /* constants for later use */ + cnval = (real)(nval-expand->c_range); + /* actually, should be able to rewrite it w/o exponential, for better numerical stability */ + if (fep_state > 0) + { + de = exp(cnval - (scaled_lamee[fep_state]-scaled_lamee[fep_state-1])); + if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMINVAR) + { + de_function = 1.0/(1.0+de); + } + else if (expand->elamstats == elamstatsMETROPOLIS) + { + if (de < 1.0) + { + de_function = 1.0; + } + else + { + de_function = 1.0/de; + } + } + dfhist->accum_m[fep_state][nval] += de_function; + dfhist->accum_m2[fep_state][nval] += de_function*de_function; + } + + if (fep_state < nlim-1) + { + de = exp(-cnval + (scaled_lamee[fep_state+1]-scaled_lamee[fep_state])); + if (expand->elamstats == elamstatsBARKER || expand->elamstats == elamstatsMINVAR) + { + de_function = 1.0/(1.0+de); + } + else if (expand->elamstats == elamstatsMETROPOLIS) + { + if (de < 1.0) + { + de_function = 1.0; + } + else + { + de_function = 1.0/de; + } + } + dfhist->accum_p[fep_state][nval] += de_function; + dfhist->accum_p2[fep_state][nval] += de_function*de_function; + } + + /* Metropolis transition and Barker transition (unoptimized Bennett) acceptance weight determination */ + + n0 = dfhist->n_at_lam[fep_state]; + if (fep_state > 0) + { + nm1 = dfhist->n_at_lam[fep_state-1]; + } + else + { + nm1 = 0; + } + if (fep_state < nlim-1) + { + np1 = dfhist->n_at_lam[fep_state+1]; + } + else + { + np1 = 0; + } + + /* logic SHOULD keep these all set correctly whatever the logic, but apparently it can't figure it out. */ + chi_m1_0 = chi_p1_0 = chi_m2_0 = chi_p2_0 = chi_p1_m1 = chi_p2_m1 = chi_m1_p1 = chi_m2_p1 = 0; + + if (n0 > 0) + { + chi_m1_0 = dfhist->accum_m[fep_state][nval]/n0; + chi_p1_0 = dfhist->accum_p[fep_state][nval]/n0; + chi_m2_0 = dfhist->accum_m2[fep_state][nval]/n0; + chi_p2_0 = dfhist->accum_p2[fep_state][nval]/n0; + } + + if ((fep_state > 0 ) && (nm1 > 0)) + { + chi_p1_m1 = dfhist->accum_p[fep_state-1][nval]/nm1; + chi_p2_m1 = dfhist->accum_p2[fep_state-1][nval]/nm1; + } + + if ((fep_state < nlim-1) && (np1 > 0)) + { + chi_m1_p1 = dfhist->accum_m[fep_state+1][nval]/np1; + chi_m2_p1 = dfhist->accum_m2[fep_state+1][nval]/np1; + } + + omega_m1_0 = 0; + omega_p1_0 = 0; + clam_weightsm = 0; + clam_weightsp = 0; + clam_varm = 0; + clam_varp = 0; + + if (fep_state > 0) + { + if (n0 > 0) + { + omega_m1_0 = chi_m2_0/(chi_m1_0*chi_m1_0) - 1.0; + } + if (nm1 > 0) + { + omega_p1_m1 = chi_p2_m1/(chi_p1_m1*chi_p1_m1) - 1.0; + } + if ((n0 > 0) && (nm1 > 0)) + { + clam_weightsm = (log(chi_m1_0) - log(chi_p1_m1)) + cnval; + clam_varm = (1.0/n0)*(omega_m1_0) + (1.0/nm1)*(omega_p1_m1); + } + } + + if (fep_state < nlim-1) + { + if (n0 > 0) + { + omega_p1_0 = chi_p2_0/(chi_p1_0*chi_p1_0) - 1.0; + } + if (np1 > 0) + { + omega_m1_p1 = chi_m2_p1/(chi_m1_p1*chi_m1_p1) - 1.0; + } + if ((n0 > 0) && (np1 > 0)) + { + clam_weightsp = (log(chi_m1_p1) - log(chi_p1_0)) + cnval; + clam_varp = (1.0/np1)*(omega_m1_p1) + (1.0/n0)*(omega_p1_0); + } + } + + if (n0 > 0) + { + omegam_array[nval] = omega_m1_0; + } + else + { + omegam_array[nval] = 0; + } + weightsm_array[nval] = clam_weightsm; + varm_array[nval] = clam_varm; + if (nm1 > 0) + { + dwm_array[nval] = fabs( (cnval + log((1.0*n0)/nm1)) - lam_dg[fep_state-1] ); + } + else + { + dwm_array[nval] = fabs( cnval - lam_dg[fep_state-1] ); + } + + if (n0 > 0) + { + omegap_array[nval] = omega_p1_0; + } + else + { + omegap_array[nval] = 0; + } + weightsp_array[nval] = clam_weightsp; + varp_array[nval] = clam_varp; + if ((np1 > 0) && (n0 > 0)) + { + dwp_array[nval] = fabs( (cnval + log((1.0*np1)/n0)) - lam_dg[fep_state] ); + } + else + { + dwp_array[nval] = fabs( cnval - lam_dg[fep_state] ); + } + + } + + /* find the C's closest to the old weights value */ + + min_nvalm = FindMinimum(dwm_array, maxc); + omega_m1_0 = omegam_array[min_nvalm]; + clam_weightsm = weightsm_array[min_nvalm]; + clam_varm = varm_array[min_nvalm]; + + min_nvalp = FindMinimum(dwp_array, maxc); + omega_p1_0 = omegap_array[min_nvalp]; + clam_weightsp = weightsp_array[min_nvalp]; + clam_varp = varp_array[min_nvalp]; + + clam_osum = omega_m1_0 + omega_p1_0; + clam_minvar = 0; + if (clam_osum > 0) + { + clam_minvar = 0.5*log(clam_osum); + } + + if (fep_state > 0) + { + lam_dg[fep_state-1] = clam_weightsm; + lam_variance[fep_state-1] = clam_varm; + } + + if (fep_state < nlim-1) + { + lam_dg[fep_state] = clam_weightsp; + lam_variance[fep_state] = clam_varp; + } + + if (expand->elamstats == elamstatsMINVAR) + { + bSufficientSamples = TRUE; + /* make sure they are all past a threshold */ + for (i = 0; i < nlim; i++) + { + if (dfhist->n_at_lam[i] < expand->minvarmin) + { + bSufficientSamples = FALSE; + } + } + if (bSufficientSamples) + { + dfhist->sum_minvar[fep_state] = clam_minvar; + if (fep_state == 0) + { + for (i = 0; i < nlim; i++) + { + dfhist->sum_minvar[i] += (expand->minvar_const-clam_minvar); + } + expand->minvar_const = clam_minvar; + dfhist->sum_minvar[fep_state] = 0.0; + } + else + { + dfhist->sum_minvar[fep_state] -= expand->minvar_const; + } + } + } + + /* we need to rezero minvar now, since it could change at fep_state = 0 */ + dfhist->sum_dg[0] = 0.0; + dfhist->sum_variance[0] = 0.0; + dfhist->sum_weights[0] = dfhist->sum_dg[0] + dfhist->sum_minvar[0]; /* should be zero */ + + for (i = 1; i < nlim; i++) + { + dfhist->sum_dg[i] = lam_dg[i-1] + dfhist->sum_dg[i-1]; + dfhist->sum_variance[i] = sqrt(lam_variance[i-1] + pow(dfhist->sum_variance[i-1], 2)); + dfhist->sum_weights[i] = dfhist->sum_dg[i] + dfhist->sum_minvar[i]; + } + + sfree(lam_dg); + sfree(lam_variance); + + sfree(omegam_array); + sfree(weightsm_array); + sfree(varm_array); + sfree(dwm_array); + + sfree(omegap_array); + sfree(weightsp_array); + sfree(varp_array); + sfree(dwp_array); + } + return FALSE; +} + +static int ChooseNewLambda(FILE *log, int nlim, t_expanded *expand, df_history_t *dfhist, int fep_state, real *weighted_lamee, real *p_k, gmx_rng_t rng) +{ + /* Choose new lambda value, and update transition matrix */ + + int i, ifep, jfep, minfep, maxfep, lamnew, lamtrial, starting_fep_state; + real r1, r2, pks, de_old, de_new, de, trialprob, tprob = 0; + real **Tij; + real *propose, *accept, *remainder; + real sum, pnorm; + gmx_bool bRestricted; + + starting_fep_state = fep_state; + lamnew = fep_state; /* so that there is a default setting -- stays the same */ + + if (!EWL(expand->elamstats)) /* ignore equilibrating the weights if using WL */ + { + if ((expand->lmc_forced_nstart > 0) && (dfhist->n_at_lam[nlim-1] <= expand->lmc_forced_nstart)) + { + /* Use a marching method to run through the lambdas and get preliminary free energy data, + before starting 'free' sampling. We start free sampling when we have enough at each lambda */ + + /* if we have enough at this lambda, move on to the next one */ + + if (dfhist->n_at_lam[fep_state] == expand->lmc_forced_nstart) + { + lamnew = fep_state+1; + if (lamnew == nlim) /* whoops, stepped too far! */ + { + lamnew -= 1; + } + } + else + { + lamnew = fep_state; + } + return lamnew; + } + } + + snew(propose, nlim); + snew(accept, nlim); + snew(remainder, nlim); + + for (i = 0; i < expand->lmc_repeats; i++) + { + + for (ifep = 0; ifep < nlim; ifep++) + { + propose[ifep] = 0; + accept[ifep] = 0; + } + + if ((expand->elmcmove == elmcmoveGIBBS) || (expand->elmcmove == elmcmoveMETGIBBS)) + { + bRestricted = TRUE; + /* use the Gibbs sampler, with restricted range */ + if (expand->gibbsdeltalam < 0) + { + minfep = 0; + maxfep = nlim-1; + bRestricted = FALSE; + } + else + { + minfep = fep_state - expand->gibbsdeltalam; + maxfep = fep_state + expand->gibbsdeltalam; + if (minfep < 0) + { + minfep = 0; + } + if (maxfep > nlim-1) + { + maxfep = nlim-1; + } + } + + GenerateGibbsProbabilities(weighted_lamee, p_k, &pks, minfep, maxfep); + + if (expand->elmcmove == elmcmoveGIBBS) + { + for (ifep = minfep; ifep <= maxfep; ifep++) + { + propose[ifep] = p_k[ifep]; + accept[ifep] = 1.0; + } + /* Gibbs sampling */ + r1 = gmx_rng_uniform_real(rng); + for (lamnew = minfep; lamnew <= maxfep; lamnew++) + { + if (r1 <= p_k[lamnew]) + { + break; + } + r1 -= p_k[lamnew]; + } + } + else if (expand->elmcmove == elmcmoveMETGIBBS) + { + + /* Metropolized Gibbs sampling */ + for (ifep = minfep; ifep <= maxfep; ifep++) + { + remainder[ifep] = 1 - p_k[ifep]; + } + + /* find the proposal probabilities */ + + if (remainder[fep_state] == 0) + { + /* only the current state has any probability */ + /* we have to stay at the current state */ + lamnew = fep_state; + } + else + { + for (ifep = minfep; ifep <= maxfep; ifep++) + { + if (ifep != fep_state) + { + propose[ifep] = p_k[ifep]/remainder[fep_state]; + } + else + { + propose[ifep] = 0; + } + } + + r1 = gmx_rng_uniform_real(rng); + for (lamtrial = minfep; lamtrial <= maxfep; lamtrial++) + { + pnorm = p_k[lamtrial]/remainder[fep_state]; + if (lamtrial != fep_state) + { + if (r1 <= pnorm) + { + break; + } + r1 -= pnorm; + } + } + + /* we have now selected lamtrial according to p(lamtrial)/1-p(fep_state) */ + tprob = 1.0; + /* trial probability is min{1,\frac{1 - p(old)}{1-p(new)} MRS 1/8/2008 */ + trialprob = (remainder[fep_state])/(remainder[lamtrial]); + if (trialprob < tprob) + { + tprob = trialprob; + } + r2 = gmx_rng_uniform_real(rng); + if (r2 < tprob) + { + lamnew = lamtrial; + } + else + { + lamnew = fep_state; + } + } + + /* now figure out the acceptance probability for each */ + for (ifep = minfep; ifep <= maxfep; ifep++) + { + tprob = 1.0; + if (remainder[ifep] != 0) + { + trialprob = (remainder[fep_state])/(remainder[ifep]); + } + else + { + trialprob = 1.0; /* this state is the only choice! */ + } + if (trialprob < tprob) + { + tprob = trialprob; + } + /* probability for fep_state=0, but that's fine, it's never proposed! */ + accept[ifep] = tprob; + } + } + + if (lamnew > maxfep) + { + /* it's possible some rounding is failing */ + if (remainder[fep_state] < 2.0e-15) + { + /* probably numerical rounding error -- no state other than the original has weight */ + lamnew = fep_state; + } + else + { + /* probably not a numerical issue */ + int loc = 0; + int nerror = 200+(maxfep-minfep+1)*60; + char *errorstr; + snew(errorstr, nerror); + /* if its greater than maxfep, then something went wrong -- probably underflow in the calculation + of sum weights. Generated detailed info for failure */ + loc += sprintf(errorstr, "Something wrong in choosing new lambda state with a Gibbs move -- probably underflow in weight determination.\nDenominator is: %3d%17.10e\n i dE numerator weights\n", 0, pks); + for (ifep = minfep; ifep <= maxfep; ifep++) + { + loc += sprintf(&errorstr[loc], "%3d %17.10e%17.10e%17.10e\n", ifep, weighted_lamee[ifep], p_k[ifep], dfhist->sum_weights[ifep]); + } + gmx_fatal(FARGS, errorstr); + } + } + } + else if ((expand->elmcmove == elmcmoveMETROPOLIS) || (expand->elmcmove == elmcmoveBARKER)) + { + /* use the metropolis sampler with trial +/- 1 */ + r1 = gmx_rng_uniform_real(rng); + if (r1 < 0.5) + { + if (fep_state == 0) + { + lamtrial = fep_state; + } + else + { + lamtrial = fep_state-1; + } + } + else + { + if (fep_state == nlim-1) + { + lamtrial = fep_state; + } + else + { + lamtrial = fep_state+1; + } + } + + de = weighted_lamee[lamtrial] - weighted_lamee[fep_state]; + if (expand->elmcmove == elmcmoveMETROPOLIS) + { + tprob = 1.0; + trialprob = exp(de); + if (trialprob < tprob) + { + tprob = trialprob; + } + propose[fep_state] = 0; + propose[lamtrial] = 1.0; /* note that this overwrites the above line if fep_state = ntrial, which only occurs at the ends */ + accept[fep_state] = 1.0; /* doesn't actually matter, never proposed unless fep_state = ntrial, in which case it's 1.0 anyway */ + accept[lamtrial] = tprob; + + } + else if (expand->elmcmove == elmcmoveBARKER) + { + tprob = 1.0/(1.0+exp(-de)); + + propose[fep_state] = (1-tprob); + propose[lamtrial] += tprob; /* we add, to account for the fact that at the end, they might be the same point */ + accept[fep_state] = 1.0; + accept[lamtrial] = 1.0; + } + + r2 = gmx_rng_uniform_real(rng); + if (r2 < tprob) + { + lamnew = lamtrial; + } + else + { + lamnew = fep_state; + } + } + + for (ifep = 0; ifep < nlim; ifep++) + { + dfhist->Tij[fep_state][ifep] += propose[ifep]*accept[ifep]; + dfhist->Tij[fep_state][fep_state] += propose[ifep]*(1.0-accept[ifep]); + } + fep_state = lamnew; + } + + dfhist->Tij_empirical[starting_fep_state][lamnew] += 1.0; + + sfree(propose); + sfree(accept); + sfree(remainder); + + return lamnew; +} + +/* print out the weights to the log, along with current state */ +extern void PrintFreeEnergyInfoToFile(FILE *outfile, t_lambda *fep, t_expanded *expand, t_simtemp *simtemp, df_history_t *dfhist, + int nlam, int frequency, gmx_large_int_t step) +{ + int nlim, i, ifep, jfep; + real dw, dg, dv, dm, Tprint; + real *temps; + const char *print_names[efptNR] = {" FEPL", "MassL", "CoulL", " VdwL", "BondL", "RestT", "Temp.(K)"}; + gmx_bool bSimTemp = FALSE; + + nlim = fep->n_lambda; + if (simtemp != NULL) + { + bSimTemp = TRUE; + } + + if (mod(step, frequency) == 0) + { + fprintf(outfile, " MC-lambda information\n"); + if (EWL(expand->elamstats) && (!(dfhist->bEquil))) + { + fprintf(outfile, " Wang-Landau incrementor is: %11.5g\n", dfhist->wl_delta); + } + fprintf(outfile, " N"); + for (i = 0; i < efptNR; i++) + { + if (fep->separate_dvdl[i]) + { + fprintf(outfile, "%7s", print_names[i]); + } + else if ((i == efptTEMPERATURE) && bSimTemp) + { + fprintf(outfile, "%10s", print_names[i]); /* more space for temperature formats */ + } + } + fprintf(outfile, " Count "); + if (expand->elamstats == elamstatsMINVAR) + { + fprintf(outfile, "W(in kT) G(in kT) dG(in kT) dV(in kT)\n"); + } + else + { + fprintf(outfile, "G(in kT) dG(in kT)\n"); + } + for (ifep = 0; ifep < nlim; ifep++) + { + if (ifep == nlim-1) + { + dw = 0.0; + dg = 0.0; + dv = 0.0; + dm = 0.0; + } + else + { + dw = dfhist->sum_weights[ifep+1] - dfhist->sum_weights[ifep]; + dg = dfhist->sum_dg[ifep+1] - dfhist->sum_dg[ifep]; + dv = sqrt(pow(dfhist->sum_variance[ifep+1], 2) - pow(dfhist->sum_variance[ifep], 2)); + dm = dfhist->sum_minvar[ifep+1] - dfhist->sum_minvar[ifep]; + + } + fprintf(outfile, "%3d", (ifep+1)); + for (i = 0; i < efptNR; i++) + { + if (fep->separate_dvdl[i]) + { + fprintf(outfile, "%7.3f", fep->all_lambda[i][ifep]); + } + else if (i == efptTEMPERATURE && bSimTemp) + { + fprintf(outfile, "%9.3f", simtemp->temperatures[ifep]); + } + } + if (EWL(expand->elamstats) && (!(dfhist->bEquil))) /* if performing WL and still haven't equilibrated */ + { + if (expand->elamstats == elamstatsWL) + { + fprintf(outfile, " %8d", (int)dfhist->wl_histo[ifep]); + } + else + { + fprintf(outfile, " %8.3f", dfhist->wl_histo[ifep]); + } + } + else /* we have equilibrated weights */ + { + fprintf(outfile, " %8d", dfhist->n_at_lam[ifep]); + } + if (expand->elamstats == elamstatsMINVAR) + { + fprintf(outfile, " %10.5f %10.5f %10.5f %10.5f", dfhist->sum_weights[ifep], dfhist->sum_dg[ifep], dg, dv); + } + else + { + fprintf(outfile, " %10.5f %10.5f", dfhist->sum_weights[ifep], dw); + } + if (ifep == nlam) + { + fprintf(outfile, " <<\n"); + } + else + { + fprintf(outfile, " \n"); + } + } + fprintf(outfile, "\n"); + + if ((mod(step, expand->nstTij) == 0) && (expand->nstTij > 0) && (step > 0)) + { + fprintf(outfile, " Transition Matrix\n"); + for (ifep = 0; ifep < nlim; ifep++) + { + fprintf(outfile, "%12d", (ifep+1)); + } + fprintf(outfile, "\n"); + for (ifep = 0; ifep < nlim; ifep++) + { + for (jfep = 0; jfep < nlim; jfep++) + { + if (dfhist->n_at_lam[ifep] > 0) + { + if (expand->bSymmetrizedTMatrix) + { + Tprint = (dfhist->Tij[ifep][jfep]+dfhist->Tij[jfep][ifep])/(dfhist->n_at_lam[ifep]+dfhist->n_at_lam[jfep]); + } + else + { + Tprint = (dfhist->Tij[ifep][jfep])/(dfhist->n_at_lam[ifep]); + } + } + else + { + Tprint = 0.0; + } + fprintf(outfile, "%12.8f", Tprint); + } + fprintf(outfile, "%3d\n", (ifep+1)); + } + + fprintf(outfile, " Empirical Transition Matrix\n"); + for (ifep = 0; ifep < nlim; ifep++) + { + fprintf(outfile, "%12d", (ifep+1)); + } + fprintf(outfile, "\n"); + for (ifep = 0; ifep < nlim; ifep++) + { + for (jfep = 0; jfep < nlim; jfep++) + { + if (dfhist->n_at_lam[ifep] > 0) + { + if (expand->bSymmetrizedTMatrix) + { + Tprint = (dfhist->Tij_empirical[ifep][jfep]+dfhist->Tij_empirical[jfep][ifep])/(dfhist->n_at_lam[ifep]+dfhist->n_at_lam[jfep]); + } + else + { + Tprint = dfhist->Tij_empirical[ifep][jfep]/(dfhist->n_at_lam[ifep]); + } + } + else + { + Tprint = 0.0; + } + fprintf(outfile, "%12.8f", Tprint); + } + fprintf(outfile, "%3d\n", (ifep+1)); + } + } + } +} + +extern void get_mc_state(gmx_rng_t rng, t_state *state) +{ + gmx_rng_get_state(rng, state->mc_rng, state->mc_rngi); +} + +extern void set_mc_state(gmx_rng_t rng, t_state *state) +{ + gmx_rng_set_state(rng, state->mc_rng, state->mc_rngi[0]); +} + +extern int ExpandedEnsembleDynamics(FILE *log, t_inputrec *ir, gmx_enerdata_t *enerd, + t_state *state, t_extmass *MassQ, df_history_t *dfhist, + gmx_large_int_t step, gmx_rng_t mcrng, + rvec *v, t_mdatoms *mdatoms) +{ + real *pfep_lamee, *p_k, *scaled_lamee, *weighted_lamee; + int i, nlam, nlim, lamnew, totalsamples; + real oneovert, maxscaled = 0, maxweighted = 0; + t_expanded *expand; + t_simtemp *simtemp; + double *temperature_lambdas; + gmx_bool bIfReset, bSwitchtoOneOverT, bDoneEquilibrating = FALSE; + + expand = ir->expandedvals; + simtemp = ir->simtempvals; + nlim = ir->fepvals->n_lambda; + nlam = state->fep_state; + + snew(scaled_lamee, nlim); + snew(weighted_lamee, nlim); + snew(pfep_lamee, nlim); + snew(p_k, nlim); + + if (expand->bInit_weights) /* if initialized weights, we need to fill them in */ + { + dfhist->wl_delta = expand->init_wl_delta; /* MRS -- this would fit better somewhere else? */ + for (i = 0; i < nlim; i++) + { + dfhist->sum_weights[i] = expand->init_lambda_weights[i]; + dfhist->sum_dg[i] = expand->init_lambda_weights[i]; + } + expand->bInit_weights = FALSE; + } + + /* update the count at the current lambda*/ + dfhist->n_at_lam[nlam]++; + + /* need to calculate the PV term somewhere, but not needed here? Not until there's a lambda state that's + pressure controlled.*/ + /* + pVTerm = 0; + where does this PV term go? + for (i=0;iefep != efepNO) + { + for (i = 0; i < nlim; i++) + { + if (ir->bSimTemp) + { + /* Note -- this assumes no mass changes, since kinetic energy is not added . . . */ + scaled_lamee[i] = (enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0])/(simtemp->temperatures[i]*BOLTZ) + + enerd->term[F_EPOT]*(1.0/(simtemp->temperatures[i])- 1.0/(simtemp->temperatures[nlam]))/BOLTZ; + } + else + { + scaled_lamee[i] = (enerd->enerpart_lambda[i+1]-enerd->enerpart_lambda[0])/(expand->mc_temp*BOLTZ); + /* mc_temp is currently set to the system reft unless otherwise defined */ + } + + /* save these energies for printing, so they don't get overwritten by the next step */ + /* they aren't overwritten in the non-free energy case, but we always print with these + for simplicity */ + } + } + else + { + if (ir->bSimTemp) + { + for (i = 0; i < nlim; i++) + { + scaled_lamee[i] = enerd->term[F_EPOT]*(1.0/simtemp->temperatures[i] - 1.0/simtemp->temperatures[nlam])/BOLTZ; + } + } + } + + for (i = 0; i < nlim; i++) + { + pfep_lamee[i] = scaled_lamee[i]; + + weighted_lamee[i] = dfhist->sum_weights[i] - scaled_lamee[i]; + if (i == 0) + { + maxscaled = scaled_lamee[i]; + maxweighted = weighted_lamee[i]; + } + else + { + if (scaled_lamee[i] > maxscaled) + { + maxscaled = scaled_lamee[i]; + } + if (weighted_lamee[i] > maxweighted) + { + maxweighted = weighted_lamee[i]; + } + } + } + + for (i = 0; i < nlim; i++) + { + scaled_lamee[i] -= maxscaled; + weighted_lamee[i] -= maxweighted; + } + + /* update weights - we decide whether or not to actually do this inside */ + + bDoneEquilibrating = UpdateWeights(nlim, expand, dfhist, nlam, scaled_lamee, weighted_lamee, step); + if (bDoneEquilibrating) + { + if (log) + { + fprintf(log, "\nStep %d: Weights have equilibrated, using criteria: %s\n", (int)step, elmceq_names[expand->elmceq]); + } + } + + lamnew = ChooseNewLambda(log, nlim, expand, dfhist, nlam, weighted_lamee, p_k, mcrng); + /* if using simulated tempering, we need to adjust the temperatures */ + if (ir->bSimTemp && (lamnew != nlam)) /* only need to change the temperatures if we change the state */ + { + int i, j, n, d; + real *buf_ngtc; + real told; + int nstart, nend, gt; + + snew(buf_ngtc, ir->opts.ngtc); + + for (i = 0; i < ir->opts.ngtc; i++) + { + if (ir->opts.ref_t[i] > 0) + { + told = ir->opts.ref_t[i]; + ir->opts.ref_t[i] = simtemp->temperatures[lamnew]; + buf_ngtc[i] = sqrt(ir->opts.ref_t[i]/told); /* using the buffer as temperature scaling */ + } + } + + /* we don't need to manipulate the ekind information, as it isn't due to be reset until the next step anyway */ + + nstart = mdatoms->start; + nend = nstart + mdatoms->homenr; + for (n = nstart; n < nend; n++) + { + gt = 0; + if (mdatoms->cTC) + { + gt = mdatoms->cTC[n]; + } + for (d = 0; d < DIM; d++) + { + v[n][d] *= buf_ngtc[gt]; + } + } + + if (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir)) + { + /* we need to recalculate the masses if the temperature has changed */ + init_npt_masses(ir, state, MassQ, FALSE); + for (i = 0; i < state->nnhpres; i++) + { + for (j = 0; j < ir->opts.nhchainlength; j++) + { + state->nhpres_vxi[i+j] *= buf_ngtc[i]; + } + } + for (i = 0; i < ir->opts.ngtc; i++) + { + for (j = 0; j < ir->opts.nhchainlength; j++) + { + state->nosehoover_vxi[i+j] *= buf_ngtc[i]; + } + } + } + sfree(buf_ngtc); + } + + /* now check on the Wang-Landau updating critera */ + + if (EWL(expand->elamstats)) + { + bSwitchtoOneOverT = FALSE; + if (expand->bWLoneovert) + { + totalsamples = 0; + for (i = 0; i < nlim; i++) + { + totalsamples += dfhist->n_at_lam[i]; + } + oneovert = (1.0*nlim)/totalsamples; + /* oneovert has decreasd by a bit since last time, so we actually make sure its within one of this number */ + /* switch to 1/t incrementing when wl_delta has decreased at least once, and wl_delta is now less than 1/t */ + if ((dfhist->wl_delta <= ((totalsamples)/(totalsamples-1.00001))*oneovert) && + (dfhist->wl_delta < expand->init_wl_delta)) + { + bSwitchtoOneOverT = TRUE; + } + } + if (bSwitchtoOneOverT) + { + dfhist->wl_delta = oneovert; /* now we reduce by this each time, instead of only at flatness */ + } + else + { + bIfReset = CheckHistogramRatios(nlim, dfhist->wl_histo, expand->wl_ratio); + if (bIfReset) + { + for (i = 0; i < nlim; i++) + { + dfhist->wl_histo[i] = 0; + } + dfhist->wl_delta *= expand->wl_scale; + if (log) + { + fprintf(log, "\nStep %d: weights are now:", (int)step); + for (i = 0; i < nlim; i++) + { + fprintf(log, " %.5f", dfhist->sum_weights[i]); + } + fprintf(log, "\n"); + } + } + } + } ++ sfree(pfep_lamee); + sfree(scaled_lamee); + sfree(weighted_lamee); + sfree(p_k); + + return lamnew; +} diff --cc src/gromacs/mdlib/force.c index 278c8de8f3,0000000000..7f0ae3ba2f mode 100644,000000..100644 --- a/src/gromacs/mdlib/force.c +++ b/src/gromacs/mdlib/force.c @@@ -1,956 -1,0 +1,963 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * GROwing Monsters And Cloning Shrimps + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include "sysstuff.h" +#include "typedefs.h" +#include "macros.h" +#include "smalloc.h" +#include "macros.h" +#include "physics.h" +#include "force.h" +#include "nonbonded.h" +#include "names.h" +#include "network.h" +#include "pbc.h" +#include "ns.h" +#include "nrnb.h" +#include "bondf.h" +#include "mshift.h" +#include "txtdump.h" +#include "coulomb.h" +#include "pme.h" +#include "mdrun.h" +#include "domdec.h" +#include "partdec.h" +#include "qmmm.h" +#include "gmx_omp_nthreads.h" + + +void ns(FILE *fp, + t_forcerec *fr, + rvec x[], + matrix box, + gmx_groups_t *groups, + t_grpopts *opts, + gmx_localtop_t *top, + t_mdatoms *md, + t_commrec *cr, + t_nrnb *nrnb, + real *lambda, + real *dvdlambda, + gmx_grppairener_t *grppener, + gmx_bool bFillGrid, + gmx_bool bDoLongRangeNS) +{ + char *ptr; + int nsearch; + + + if (!fr->ns.nblist_initialized) + { + init_neighbor_list(fp, fr, md->homenr); + } + + if (fr->bTwinRange) + { + fr->nlr = 0; + } + + nsearch = search_neighbours(fp, fr, x, box, top, groups, cr, nrnb, md, + lambda, dvdlambda, grppener, + bFillGrid, bDoLongRangeNS, TRUE); + if (debug) + { + fprintf(debug, "nsearch = %d\n", nsearch); + } + + /* Check whether we have to do dynamic load balancing */ + /*if ((nsb->nstDlb > 0) && (mod(step,nsb->nstDlb) == 0)) + count_nb(cr,nsb,&(top->blocks[ebCGS]),nns,fr->nlr, + &(top->idef),opts->ngener); + */ + if (fr->ns.dump_nl > 0) + { + dump_nblist(fp, cr, fr, fr->ns.dump_nl); + } +} + +static void reduce_thread_forces(int n, rvec *f, + tensor vir, + real *Vcorr, + int efpt_ind, real *dvdl, + int nthreads, f_thread_t *f_t) +{ + int t, i; + + /* This reduction can run over any number of threads */ +#pragma omp parallel for num_threads(gmx_omp_nthreads_get(emntBonded)) private(t) schedule(static) + for (i = 0; i < n; i++) + { + for (t = 1; t < nthreads; t++) + { + rvec_inc(f[i], f_t[t].f[i]); + } + } + for (t = 1; t < nthreads; t++) + { + *Vcorr += f_t[t].Vcorr; + *dvdl += f_t[t].dvdl[efpt_ind]; + m_add(vir, f_t[t].vir, vir); + } +} + +void do_force_lowlevel(FILE *fplog, gmx_large_int_t step, + t_forcerec *fr, t_inputrec *ir, + t_idef *idef, t_commrec *cr, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + t_mdatoms *md, + t_grpopts *opts, + rvec x[], history_t *hist, + rvec f[], + rvec f_longrange[], + gmx_enerdata_t *enerd, + t_fcdata *fcd, + gmx_mtop_t *mtop, + gmx_localtop_t *top, + gmx_genborn_t *born, + t_atomtypes *atype, + gmx_bool bBornRadii, + matrix box, + t_lambda *fepvals, + real *lambda, + t_graph *graph, + t_blocka *excl, + rvec mu_tot[], + int flags, + float *cycles_pme) +{ + int i, j, status; + int donb_flags; + gmx_bool bDoEpot, bSepDVDL, bSB; + int pme_flags; + matrix boxs; + rvec box_size; + real Vsr, Vlr, Vcorr = 0; + t_pbc pbc; + real dvdgb; + char buf[22]; + double clam_i, vlam_i; + real dvdl_dum[efptNR], dvdl, dvdl_nb[efptNR], lam_i[efptNR]; + real dvdlsum; + +#ifdef GMX_MPI + double t0 = 0.0, t1, t2, t3; /* time measurement for coarse load balancing */ +#endif + +#define PRINT_SEPDVDL(s, v, dvdlambda) if (bSepDVDL) {fprintf(fplog, sepdvdlformat, s, v, dvdlambda); } + + + set_pbc(&pbc, fr->ePBC, box); + + /* reset free energy components */ + for (i = 0; i < efptNR; i++) + { + dvdl_nb[i] = 0; + dvdl_dum[i] = 0; + } + + /* Reset box */ + for (i = 0; (i < DIM); i++) + { + box_size[i] = box[i][i]; + } + + bSepDVDL = (fr->bSepDVDL && do_per_step(step, ir->nstlog)); + debug_gmx(); + + /* do QMMM first if requested */ + if (fr->bQMMM) + { + enerd->term[F_EQM] = calculate_QMMM(cr, x, f, fr, md); + } + + if (bSepDVDL) + { + fprintf(fplog, "Step %s: non-bonded V and dVdl for node %d:\n", + gmx_step_str(step, buf), cr->nodeid); + } + + /* Call the short range functions all in one go. */ + +#ifdef GMX_MPI + /*#define TAKETIME ((cr->npmenodes) && (fr->timesteps < 12))*/ +#define TAKETIME FALSE + if (TAKETIME) + { + MPI_Barrier(cr->mpi_comm_mygroup); + t0 = MPI_Wtime(); + } +#endif + + if (ir->nwall) + { + /* foreign lambda component for walls */ + dvdl = do_walls(ir, fr, box, md, x, f, lambda[efptVDW], + enerd->grpp.ener[egLJSR], nrnb); + PRINT_SEPDVDL("Walls", 0.0, dvdl); + enerd->dvdl_lin[efptVDW] += dvdl; + } + + /* If doing GB, reset dvda and calculate the Born radii */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsNONBONDED); + + for (i = 0; i < born->nr; i++) + { + fr->dvda[i] = 0; + } + + if (bBornRadii) + { + calc_gb_rad(cr, fr, ir, top, atype, x, &(fr->gblist), born, md, nrnb); + } + + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + } + + where(); + /* We only do non-bonded calculation with group scheme here, the verlet + * calls are done from do_force_cutsVERLET(). */ + if (fr->cutoff_scheme == ecutsGROUP && (flags & GMX_FORCE_NONBONDED)) + { + donb_flags = 0; + /* Add short-range interactions */ + donb_flags |= GMX_NONBONDED_DO_SR; + + if (flags & GMX_FORCE_FORCES) + { + donb_flags |= GMX_NONBONDED_DO_FORCE; + } + if (flags & GMX_FORCE_ENERGY) + { + donb_flags |= GMX_NONBONDED_DO_POTENTIAL; + } + if (flags & GMX_FORCE_DO_LR) + { + donb_flags |= GMX_NONBONDED_DO_LR; + } + + wallcycle_sub_start(wcycle, ewcsNONBONDED); + do_nonbonded(cr, fr, x, f, f_longrange, md, excl, + &enerd->grpp, box_size, nrnb, + lambda, dvdl_nb, -1, -1, donb_flags); + + /* If we do foreign lambda and we have soft-core interactions + * we have to recalculate the (non-linear) energies contributions. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && fepvals->sc_alpha != 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + reset_foreign_enerdata(enerd); + do_nonbonded(cr, fr, x, f, f_longrange, md, excl, + &(enerd->foreign_grpp), box_size, nrnb, + lam_i, dvdl_dum, -1, -1, + (donb_flags & ~GMX_NONBONDED_DO_FORCE) | GMX_NONBONDED_DO_FOREIGNLAMBDA); + sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + wallcycle_sub_stop(wcycle, ewcsNONBONDED); + where(); + } + + /* If we are doing GB, calculate bonded forces and apply corrections + * to the solvation forces */ + /* MRS: Eventually, many need to include free energy contribution here! */ + if (ir->implicit_solvent) + { + wallcycle_sub_start(wcycle, ewcsBONDED); + calc_gb_forces(cr, md, born, top, atype, x, f, fr, idef, + ir->gb_algorithm, ir->sa_algorithm, nrnb, bBornRadii, &pbc, graph, enerd); + wallcycle_sub_stop(wcycle, ewcsBONDED); + } + +#ifdef GMX_MPI + if (TAKETIME) + { + t1 = MPI_Wtime(); + fr->t_fnbf += t1-t0; + } +#endif + + if (fepvals->sc_alpha != 0) + { + enerd->dvdl_nonlin[efptVDW] += dvdl_nb[efptVDW]; + } + else + { + enerd->dvdl_lin[efptVDW] += dvdl_nb[efptVDW]; + } + + if (fepvals->sc_alpha != 0) + + /* even though coulomb part is linear, we already added it, beacuse we + need to go through the vdw calculation anyway */ + { + enerd->dvdl_nonlin[efptCOUL] += dvdl_nb[efptCOUL]; + } + else + { + enerd->dvdl_lin[efptCOUL] += dvdl_nb[efptCOUL]; + } + + Vsr = 0; + if (bSepDVDL) + { + for (i = 0; i < enerd->grpp.nener; i++) + { + Vsr += + (fr->bBHAM ? + enerd->grpp.ener[egBHAMSR][i] : + enerd->grpp.ener[egLJSR][i]) + + enerd->grpp.ener[egCOULSR][i] + enerd->grpp.ener[egGB][i]; + } + dvdlsum = dvdl_nb[efptVDW] + dvdl_nb[efptCOUL]; + PRINT_SEPDVDL("VdW and Coulomb SR particle-p.", Vsr, dvdlsum); + } + debug_gmx(); + + + if (debug) + { + pr_rvecs(debug, 0, "fshift after SR", fr->fshift, SHIFTS); + } + + /* Shift the coordinates. Must be done before bonded forces and PPPM, + * but is also necessary for SHAKE and update, therefore it can NOT + * go when no bonded forces have to be evaluated. + */ + + /* Here sometimes we would not need to shift with NBFonly, + * but we do so anyhow for consistency of the returned coordinates. + */ + if (graph) + { + shift_self(graph, box, x); + if (TRICLINIC(box)) + { + inc_nrnb(nrnb, eNR_SHIFTX, 2*graph->nnodes); + } + else + { + inc_nrnb(nrnb, eNR_SHIFTX, graph->nnodes); + } + } + /* Check whether we need to do bondeds or correct for exclusions */ + if (fr->bMolPBC && + ((flags & GMX_FORCE_BONDED) + || EEL_RF(fr->eeltype) || EEL_FULL(fr->eeltype))) + { + /* Since all atoms are in the rectangular or triclinic unit-cell, + * only single box vector shifts (2 in x) are required. + */ + set_pbc_dd(&pbc, fr->ePBC, cr->dd, TRUE, box); + } + debug_gmx(); + + if (flags & GMX_FORCE_BONDED) + { + wallcycle_sub_start(wcycle, ewcsBONDED); + calc_bonds(fplog, cr->ms, + idef, x, hist, f, fr, &pbc, graph, enerd, nrnb, lambda, md, fcd, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL, atype, born, + flags, + fr->bSepDVDL && do_per_step(step, ir->nstlog), step); + + /* Check if we have to determine energy differences + * at foreign lambda's. + */ + if (fepvals->n_lambda > 0 && (flags & GMX_FORCE_DHDL) && + idef->ilsort != ilsortNO_FE) + { + if (idef->ilsort != ilsortFE_SORTED) + { + gmx_incons("The bonded interactions are not sorted for free energy"); + } + for (i = 0; i < enerd->n_lambda; i++) + { + reset_foreign_enerdata(enerd); + for (j = 0; j < efptNR; j++) + { + lam_i[j] = (i == 0 ? lambda[j] : fepvals->all_lambda[j][i-1]); + } + calc_bonds_lambda(fplog, idef, x, fr, &pbc, graph, &(enerd->foreign_grpp), enerd->foreign_term, nrnb, lam_i, md, + fcd, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + sum_epot(&ir->opts, &(enerd->foreign_grpp), enerd->foreign_term); + enerd->enerpart_lambda[i] += enerd->foreign_term[F_EPOT]; + } + } + debug_gmx(); + + wallcycle_sub_stop(wcycle, ewcsBONDED); + } + + where(); + + *cycles_pme = 0; + if (EEL_FULL(fr->eeltype)) + { + bSB = (ir->nwall == 2); + if (bSB) + { + copy_mat(box, boxs); + svmul(ir->wall_ewald_zfac, boxs[ZZ], boxs[ZZ]); + box_size[ZZ] *= ir->wall_ewald_zfac; + } + + clear_mat(fr->vir_el_recip); + + if (fr->bEwald) + { + Vcorr = 0; + dvdl = 0; + + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + /* The TPI molecule does not have exclusions with the rest + * of the system and no intra-molecular PME grid contributions + * will be calculated in gmx_pme_calc_energy. + */ + if ((ir->cutoff_scheme == ecutsGROUP && fr->n_tpi == 0) || + ir->ewald_geometry != eewg3D || + ir->epsilon_surface != 0) + { + int nthreads, t; + + wallcycle_sub_start(wcycle, ewcsEWALD_CORRECTION); + + if (fr->n_tpi > 0) + { + gmx_fatal(FARGS, "TPI with PME currently only works in a 3D geometry with tin-foil boundary conditions"); + } + + nthreads = gmx_omp_nthreads_get(emntBonded); +#pragma omp parallel for num_threads(nthreads) schedule(static) + for (t = 0; t < nthreads; t++) + { + int s, e, i; + rvec *fnv; + tensor *vir; + real *Vcorrt, *dvdlt; + if (t == 0) + { + fnv = fr->f_novirsum; + vir = &fr->vir_el_recip; + Vcorrt = &Vcorr; + dvdlt = &dvdl; + } + else + { + fnv = fr->f_t[t].f; + vir = &fr->f_t[t].vir; + Vcorrt = &fr->f_t[t].Vcorr; + dvdlt = &fr->f_t[t].dvdl[efptCOUL]; + for (i = 0; i < fr->natoms_force; i++) + { + clear_rvec(fnv[i]); + } + clear_mat(*vir); + } + *dvdlt = 0; + *Vcorrt = + ewald_LRcorrection(fplog, + fr->excl_load[t], fr->excl_load[t+1], + cr, t, fr, + md->chargeA, + md->nChargePerturbed ? md->chargeB : NULL, + ir->cutoff_scheme != ecutsVERLET, + excl, x, bSB ? boxs : box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + fnv, *vir, + lambda[efptCOUL], dvdlt); + } + if (nthreads > 1) + { + reduce_thread_forces(fr->natoms_force, fr->f_novirsum, + fr->vir_el_recip, + &Vcorr, efptCOUL, &dvdl, + nthreads, fr->f_t); + } + + wallcycle_sub_stop(wcycle, ewcsEWALD_CORRECTION); + } + + if (fr->n_tpi == 0) + { + Vcorr += ewald_charge_correction(cr, fr, lambda[efptCOUL], box, + &dvdl, fr->vir_el_recip); + } + + PRINT_SEPDVDL("Ewald excl./charge/dip. corr.", Vcorr, dvdl); + enerd->dvdl_lin[efptCOUL] += dvdl; + } + + status = 0; + Vlr = 0; + dvdl = 0; + switch (fr->eeltype) + { + case eelPME: + case eelPMESWITCH: + case eelPMEUSER: + case eelPMEUSERSWITCH: + case eelP3M_AD: + if (cr->duty & DUTY_PME) + { + assert(fr->n_tpi >= 0); + if (fr->n_tpi == 0 || (flags & GMX_FORCE_STATECHANGED)) + { + pme_flags = GMX_PME_SPREAD_Q | GMX_PME_SOLVE; + if (flags & GMX_FORCE_FORCES) + { + pme_flags |= GMX_PME_CALC_F; + } + if (flags & (GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY)) + { + pme_flags |= GMX_PME_CALC_ENER_VIR; + } + if (fr->n_tpi > 0) + { + /* We don't calculate f, but we do want the potential */ + pme_flags |= GMX_PME_CALC_POT; + } + wallcycle_start(wcycle, ewcPMEMESH); + status = gmx_pme_do(fr->pmedata, + md->start, md->homenr - fr->n_tpi, + x, fr->f_novirsum, + md->chargeA, md->chargeB, + bSB ? boxs : box, cr, + DOMAINDECOMP(cr) ? dd_pme_maxshift_x(cr->dd) : 0, + DOMAINDECOMP(cr) ? dd_pme_maxshift_y(cr->dd) : 0, + nrnb, wcycle, + fr->vir_el_recip, fr->ewaldcoeff, + &Vlr, lambda[efptCOUL], &dvdl, + pme_flags); + *cycles_pme = wallcycle_stop(wcycle, ewcPMEMESH); + + /* We should try to do as little computation after + * this as possible, because parallel PME synchronizes + * the nodes, so we want all load imbalance of the rest + * of the force calculation to be before the PME call. + * DD load balancing is done on the whole time of + * the force call (without PME). + */ + } + if (fr->n_tpi > 0) + { + /* Determine the PME grid energy of the test molecule + * with the PME grid potential of the other charges. + */ + gmx_pme_calc_energy(fr->pmedata, fr->n_tpi, + x + md->homenr - fr->n_tpi, + md->chargeA + md->homenr - fr->n_tpi, + &Vlr); + } + PRINT_SEPDVDL("PME mesh", Vlr, dvdl); + } + break; + case eelEWALD: + Vlr = do_ewald(fplog, FALSE, ir, x, fr->f_novirsum, + md->chargeA, md->chargeB, + box_size, cr, md->homenr, + fr->vir_el_recip, fr->ewaldcoeff, + lambda[efptCOUL], &dvdl, fr->ewald_table); + PRINT_SEPDVDL("Ewald long-range", Vlr, dvdl); + break; + default: + gmx_fatal(FARGS, "No such electrostatics method implemented %s", + eel_names[fr->eeltype]); + } + if (status != 0) + { + gmx_fatal(FARGS, "Error %d in long range electrostatics routine %s", + status, EELTYPE(fr->eeltype)); + } + /* Note that with separate PME nodes we get the real energies later */ + enerd->dvdl_lin[efptCOUL] += dvdl; + enerd->term[F_COUL_RECIP] = Vlr + Vcorr; + if (debug) + { + fprintf(debug, "Vlr = %g, Vcorr = %g, Vlr_corr = %g\n", + Vlr, Vcorr, enerd->term[F_COUL_RECIP]); + pr_rvecs(debug, 0, "vir_el_recip after corr", fr->vir_el_recip, DIM); + pr_rvecs(debug, 0, "fshift after LR Corrections", fr->fshift, SHIFTS); + } + } + else + { + if (EEL_RF(fr->eeltype)) + { + /* With the Verlet scheme exclusion forces are calculated + * in the non-bonded kernel. + */ + if (ir->cutoff_scheme != ecutsVERLET && fr->eeltype != eelRF_NEC) + { + dvdl = 0; + enerd->term[F_RF_EXCL] = + RF_excl_correction(fplog, fr, graph, md, excl, x, f, + fr->fshift, &pbc, lambda[efptCOUL], &dvdl); + } + + enerd->dvdl_lin[efptCOUL] += dvdl; + PRINT_SEPDVDL("RF exclusion correction", + enerd->term[F_RF_EXCL], dvdl); + } + } + where(); + debug_gmx(); + + if (debug) + { + print_nrnb(debug, nrnb); + } + debug_gmx(); + +#ifdef GMX_MPI + if (TAKETIME) + { + t2 = MPI_Wtime(); + MPI_Barrier(cr->mpi_comm_mygroup); + t3 = MPI_Wtime(); + fr->t_wait += t3-t2; + if (fr->timesteps == 11) + { + fprintf(stderr, "* PP load balancing info: node %d, step %s, rel wait time=%3.0f%% , load string value: %7.2f\n", + cr->nodeid, gmx_step_str(fr->timesteps, buf), + 100*fr->t_wait/(fr->t_wait+fr->t_fnbf), + (fr->t_fnbf+fr->t_wait)/fr->t_fnbf); + } + fr->timesteps++; + } +#endif + + if (debug) + { + pr_rvecs(debug, 0, "fshift after bondeds", fr->fshift, SHIFTS); + } + +} + +void init_enerdata(int ngener, int n_lambda, gmx_enerdata_t *enerd) +{ + int i, n2; + + for (i = 0; i < F_NRE; i++) + { + enerd->term[i] = 0; + enerd->foreign_term[i] = 0; + } + + + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0; + enerd->dvdl_nonlin[i] = 0; + } + + n2 = ngener*ngener; + if (debug) + { + fprintf(debug, "Creating %d sized group matrix for energies\n", n2); + } + enerd->grpp.nener = n2; + enerd->foreign_grpp.nener = n2; + for (i = 0; (i < egNR); i++) + { + snew(enerd->grpp.ener[i], n2); + snew(enerd->foreign_grpp.ener[i], n2); + } + + if (n_lambda) + { + enerd->n_lambda = 1 + n_lambda; + snew(enerd->enerpart_lambda, enerd->n_lambda); + } + else + { + enerd->n_lambda = 0; + } +} + +void destroy_enerdata(gmx_enerdata_t *enerd) +{ + int i; + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->grpp.ener[i]); + } + + for (i = 0; (i < egNR); i++) + { + sfree(enerd->foreign_grpp.ener[i]); + } + + if (enerd->n_lambda) + { + sfree(enerd->enerpart_lambda); + } +} + +static real sum_v(int n, real v[]) +{ + real t; + int i; + + t = 0.0; + for (i = 0; (i < n); i++) + { + t = t + v[i]; + } + + return t; +} + +void sum_epot(t_grpopts *opts, gmx_grppairener_t *grpp, real *epot) +{ + int i; + + /* Accumulate energies */ + epot[F_COUL_SR] = sum_v(grpp->nener, grpp->ener[egCOULSR]); + epot[F_LJ] = sum_v(grpp->nener, grpp->ener[egLJSR]); + epot[F_LJ14] = sum_v(grpp->nener, grpp->ener[egLJ14]); + epot[F_COUL14] = sum_v(grpp->nener, grpp->ener[egCOUL14]); + epot[F_COUL_LR] = sum_v(grpp->nener, grpp->ener[egCOULLR]); + epot[F_LJ_LR] = sum_v(grpp->nener, grpp->ener[egLJLR]); + /* We have already added 1-2,1-3, and 1-4 terms to F_GBPOL */ + epot[F_GBPOL] += sum_v(grpp->nener, grpp->ener[egGB]); + +/* lattice part of LR doesnt belong to any group + * and has been added earlier + */ + epot[F_BHAM] = sum_v(grpp->nener, grpp->ener[egBHAMSR]); + epot[F_BHAM_LR] = sum_v(grpp->nener, grpp->ener[egBHAMLR]); + + epot[F_EPOT] = 0; + for (i = 0; (i < F_EPOT); i++) + { + if (i != F_DISRESVIOL && i != F_ORIRESDEV) + { + epot[F_EPOT] += epot[i]; + } + } +} + +void sum_dhdl(gmx_enerdata_t *enerd, real *lambda, t_lambda *fepvals) +{ + int i, j, index; + double dlam; + + enerd->dvdl_lin[efptVDW] += enerd->term[F_DVDL_VDW]; /* include dispersion correction */ + enerd->term[F_DVDL] = 0.0; + for (i = 0; i < efptNR; i++) + { + if (fepvals->separate_dvdl[i]) + { + /* could this be done more readably/compactly? */ + switch (i) + { + case (efptMASS): + index = F_DKDL; + break; + case (efptCOUL): + index = F_DVDL_COUL; + break; + case (efptVDW): + index = F_DVDL_VDW; + break; + case (efptBONDED): + index = F_DVDL_BONDED; + break; + case (efptRESTRAINT): + index = F_DVDL_RESTRAINT; + break; + default: + index = F_DVDL; + break; + } + enerd->term[index] = enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvdl-%s[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[i], i, enerd->term[index], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + else + { + enerd->term[F_DVDL] += enerd->dvdl_lin[i] + enerd->dvdl_nonlin[i]; + if (debug) + { + fprintf(debug, "dvd-%sl[%2d]: %f: non-linear %f + linear %f\n", + efpt_names[0], i, enerd->term[F_DVDL], enerd->dvdl_nonlin[i], enerd->dvdl_lin[i]); + } + } + } + + /* Notes on the foreign lambda free energy difference evaluation: + * Adding the potential and ekin terms that depend linearly on lambda + * as delta lam * dvdl to the energy differences is exact. + * For the constraints this is not exact, but we have no other option + * without literally changing the lengths and reevaluating the energies at each step. + * (try to remedy this post 4.6 - MRS) + * For the non-bonded LR term we assume that the soft-core (if present) + * no longer affects the energy beyond the short-range cut-off, + * which is a very good approximation (except for exotic settings). + * (investigate how to overcome this post 4.6 - MRS) + */ - enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; ++ if (fepvals->separate_dvdl[efptBONDED]) ++ { ++ enerd->term[F_DVDL_BONDED] += enerd->term[F_DVDL_CONSTR]; ++ } ++ else ++ { ++ enerd->term[F_DVDL] += enerd->term[F_DVDL_CONSTR]; ++ } + enerd->term[F_DVDL_CONSTR] = 0; + + for (i = 0; i < fepvals->n_lambda; i++) + { /* note we are iterating over fepvals here! + For the current lam, dlam = 0 automatically, + so we don't need to add anything to the + enerd->enerpart_lambda[0] */ + + /* we don't need to worry about dvdl_lin contributions to dE at + current lambda, because the contributions to the current + lambda are automatically zeroed */ + + for (j = 0; j < efptNR; j++) + { + /* Note that this loop is over all dhdl components, not just the separated ones */ + dlam = (fepvals->all_lambda[j][i]-lambda[j]); + enerd->enerpart_lambda[i+1] += dlam*enerd->dvdl_lin[j]; + if (debug) + { + fprintf(debug, "enerdiff lam %g: (%15s), non-linear %f linear %f*%f\n", + fepvals->all_lambda[j][i], efpt_names[j], + (enerd->enerpart_lambda[i+1] - enerd->enerpart_lambda[0]), + dlam, enerd->dvdl_lin[j]); + } + } + } +} + + +void reset_foreign_enerdata(gmx_enerdata_t *enerd) +{ + int i, j; + + /* First reset all foreign energy components. Foreign energies always called on + neighbor search steps */ + for (i = 0; (i < egNR); i++) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->foreign_grpp.ener[i][j] = 0.0; + } + } + + /* potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->foreign_term[i] = 0.0; + } +} + +void reset_enerdata(t_grpopts *opts, + t_forcerec *fr, gmx_bool bNS, + gmx_enerdata_t *enerd, + gmx_bool bMaster) +{ + gmx_bool bKeepLR; + int i, j; + + /* First reset all energy components, except for the long range terms + * on the master at non neighbor search steps, since the long range + * terms have already been summed at the last neighbor search step. + */ + bKeepLR = (fr->bTwinRange && !bNS); + for (i = 0; (i < egNR); i++) + { + if (!(bKeepLR && bMaster && (i == egCOULLR || i == egLJLR))) + { + for (j = 0; (j < enerd->grpp.nener); j++) + { + enerd->grpp.ener[i][j] = 0.0; + } + } + } + for (i = 0; i < efptNR; i++) + { + enerd->dvdl_lin[i] = 0.0; + enerd->dvdl_nonlin[i] = 0.0; + } + + /* Normal potential energy components */ + for (i = 0; (i <= F_EPOT); i++) + { + enerd->term[i] = 0.0; + } + /* Initialize the dVdlambda term with the long range contribution */ + /* Initialize the dvdl term with the long range contribution */ + enerd->term[F_DVDL] = 0.0; + enerd->term[F_DVDL_COUL] = 0.0; + enerd->term[F_DVDL_VDW] = 0.0; + enerd->term[F_DVDL_BONDED] = 0.0; + enerd->term[F_DVDL_RESTRAINT] = 0.0; + enerd->term[F_DKDL] = 0.0; + if (enerd->n_lambda > 0) + { + for (i = 0; i < enerd->n_lambda; i++) + { + enerd->enerpart_lambda[i] = 0.0; + } + } + /* reset foreign energy data - separate function since we also call it elsewhere */ + reset_foreign_enerdata(enerd); +} diff --cc src/gromacs/mdlib/forcerec.c index 794841cac9,0000000000..0751b57db4 mode 100644,000000..100644 --- a/src/gromacs/mdlib/forcerec.c +++ b/src/gromacs/mdlib/forcerec.c @@@ -1,2958 -1,0 +1,2977 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * GROwing Monsters And Cloning Shrimps + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include "sysstuff.h" +#include "typedefs.h" +#include "vec.h" +#include "maths.h" +#include "macros.h" +#include "smalloc.h" +#include "macros.h" +#include "gmx_fatal.h" +#include "gmx_fatal_collective.h" +#include "physics.h" +#include "force.h" +#include "tables.h" +#include "nonbonded.h" +#include "invblock.h" +#include "names.h" +#include "network.h" +#include "pbc.h" +#include "ns.h" +#include "mshift.h" +#include "txtdump.h" +#include "coulomb.h" +#include "md_support.h" +#include "md_logging.h" +#include "domdec.h" +#include "partdec.h" +#include "qmmm.h" +#include "copyrite.h" +#include "mtop_util.h" +#include "nbnxn_search.h" +#include "nbnxn_atomdata.h" +#include "nbnxn_consts.h" +#include "statutil.h" +#include "gmx_omp_nthreads.h" +#include "gmx_detect_hardware.h" + +#ifdef _MSC_VER +/* MSVC definition for __cpuid() */ +#include +#endif + +#include "types/nbnxn_cuda_types_ext.h" +#include "gpu_utils.h" +#include "nbnxn_cuda_data_mgmt.h" +#include "pmalloc_cuda.h" + +t_forcerec *mk_forcerec(void) +{ + t_forcerec *fr; + + snew(fr, 1); + + return fr; +} + +#ifdef DEBUG +static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr) +{ + int i, j; + + for (i = 0; (i < atnr); i++) + { + for (j = 0; (j < atnr); j++) + { + fprintf(fp, "%2d - %2d", i, j); + if (bBHAM) + { + fprintf(fp, " a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j), + BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0); + } + else + { + fprintf(fp, " c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0, + C12(nbfp, atnr, i, j)/12.0); + } + } + } +} +#endif + +static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM) +{ + real *nbfp; + int i, j, k, atnr; + + atnr = idef->atnr; + if (bBHAM) + { + snew(nbfp, 3*atnr*atnr); + for (i = k = 0; (i < atnr); i++) + { + for (j = 0; (j < atnr); j++, k++) + { + BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a; + BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b; + /* nbfp now includes the 6.0 derivative prefactor */ + BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0; + } + } + } + else + { + snew(nbfp, 2*atnr*atnr); + for (i = k = 0; (i < atnr); i++) + { + for (j = 0; (j < atnr); j++, k++) + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + C6(nbfp, atnr, i, j) = idef->iparams[k].lj.c6*6.0; + C12(nbfp, atnr, i, j) = idef->iparams[k].lj.c12*12.0; + } + } + } + + return nbfp; +} + +/* This routine sets fr->solvent_opt to the most common solvent in the + * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in + * the fr->solvent_type array with the correct type (or esolNO). + * + * Charge groups that fulfill the conditions but are not identical to the + * most common one will be marked as esolNO in the solvent_type array. + * + * TIP3p is identical to SPC for these purposes, so we call it + * SPC in the arrays (Apologies to Bill Jorgensen ;-) + * + * NOTE: QM particle should not + * become an optimized solvent. Not even if there is only one charge + * group in the Qm + */ + +typedef struct +{ + int model; + int count; + int vdwtype[4]; + real charge[4]; +} solvent_parameters_t; + +static void +check_solvent_cg(const gmx_moltype_t *molt, + int cg0, + int nmol, + const unsigned char *qm_grpnr, + const t_grps *qm_grps, + t_forcerec * fr, + int *n_solvent_parameters, + solvent_parameters_t **solvent_parameters_p, + int cginfo, + int *cg_sp) +{ + const t_blocka * excl; + t_atom *atom; + int j, k; + int j0, j1, nj; + gmx_bool perturbed; + gmx_bool has_vdw[4]; + gmx_bool match; + real tmp_charge[4]; + int tmp_vdwtype[4]; + int tjA; + gmx_bool qm; + solvent_parameters_t *solvent_parameters; + + /* We use a list with parameters for each solvent type. + * Every time we discover a new molecule that fulfills the basic + * conditions for a solvent we compare with the previous entries + * in these lists. If the parameters are the same we just increment + * the counter for that type, and otherwise we create a new type + * based on the current molecule. + * + * Once we've finished going through all molecules we check which + * solvent is most common, and mark all those molecules while we + * clear the flag on all others. + */ + + solvent_parameters = *solvent_parameters_p; + + /* Mark the cg first as non optimized */ + *cg_sp = -1; + + /* Check if this cg has no exclusions with atoms in other charge groups + * and all atoms inside the charge group excluded. + * We only have 3 or 4 atom solvent loops. + */ + if (GET_CGINFO_EXCL_INTER(cginfo) || + !GET_CGINFO_EXCL_INTRA(cginfo)) + { + return; + } + + /* Get the indices of the first atom in this charge group */ + j0 = molt->cgs.index[cg0]; + j1 = molt->cgs.index[cg0+1]; + + /* Number of atoms in our molecule */ + nj = j1 - j0; + + if (debug) + { + fprintf(debug, + "Moltype '%s': there are %d atoms in this charge group\n", + *molt->name, nj); + } + + /* Check if it could be an SPC (3 atoms) or TIP4p (4) water, + * otherwise skip it. + */ + if (nj < 3 || nj > 4) + { + return; + } + + /* Check if we are doing QM on this group */ + qm = FALSE; + if (qm_grpnr != NULL) + { + for (j = j0; j < j1 && !qm; j++) + { + qm = (qm_grpnr[j] < qm_grps->nr - 1); + } + } + /* Cannot use solvent optimization with QM */ + if (qm) + { + return; + } + + atom = molt->atoms.atom; + + /* Still looks like a solvent, time to check parameters */ + + /* If it is perturbed (free energy) we can't use the solvent loops, + * so then we just skip to the next molecule. + */ + perturbed = FALSE; + + for (j = j0; j < j1 && !perturbed; j++) + { + perturbed = PERTURBED(atom[j]); + } + + if (perturbed) + { + return; + } + + /* Now it's only a question if the VdW and charge parameters + * are OK. Before doing the check we compare and see if they are + * identical to a possible previous solvent type. + * First we assign the current types and charges. + */ + for (j = 0; j < nj; j++) + { + tmp_vdwtype[j] = atom[j0+j].type; + tmp_charge[j] = atom[j0+j].q; + } + + /* Does it match any previous solvent type? */ + for (k = 0; k < *n_solvent_parameters; k++) + { + match = TRUE; + + + /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */ + if ( (solvent_parameters[k].model == esolSPC && nj != 3) || + (solvent_parameters[k].model == esolTIP4P && nj != 4) ) + { + match = FALSE; + } + + /* Check that types & charges match for all atoms in molecule */ + for (j = 0; j < nj && match == TRUE; j++) + { + if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j]) + { + match = FALSE; + } + if (tmp_charge[j] != solvent_parameters[k].charge[j]) + { + match = FALSE; + } + } + if (match == TRUE) + { + /* Congratulations! We have a matched solvent. + * Flag it with this type for later processing. + */ + *cg_sp = k; + solvent_parameters[k].count += nmol; + + /* We are done with this charge group */ + return; + } + } + + /* If we get here, we have a tentative new solvent type. + * Before we add it we must check that it fulfills the requirements + * of the solvent optimized loops. First determine which atoms have + * VdW interactions. + */ + for (j = 0; j < nj; j++) + { + has_vdw[j] = FALSE; + tjA = tmp_vdwtype[j]; + + /* Go through all other tpes and see if any have non-zero + * VdW parameters when combined with this one. + */ + for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++) + { + /* We already checked that the atoms weren't perturbed, + * so we only need to check state A now. + */ + if (fr->bBHAM) + { + has_vdw[j] = (has_vdw[j] || + (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) || + (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) || + (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0)); + } + else + { + /* Standard LJ */ + has_vdw[j] = (has_vdw[j] || + (C6(fr->nbfp, fr->ntype, tjA, k) != 0.0) || + (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0)); + } + } + } + + /* Now we know all we need to make the final check and assignment. */ + if (nj == 3) + { + /* So, is it an SPC? + * For this we require thatn all atoms have charge, + * the charges on atom 2 & 3 should be the same, and only + * atom 1 might have VdW. + */ + if (has_vdw[1] == FALSE && + has_vdw[2] == FALSE && + tmp_charge[0] != 0 && + tmp_charge[1] != 0 && + tmp_charge[2] == tmp_charge[1]) + { + srenew(solvent_parameters, *n_solvent_parameters+1); + solvent_parameters[*n_solvent_parameters].model = esolSPC; + solvent_parameters[*n_solvent_parameters].count = nmol; + for (k = 0; k < 3; k++) + { + solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k]; + solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k]; + } + + *cg_sp = *n_solvent_parameters; + (*n_solvent_parameters)++; + } + } + else if (nj == 4) + { + /* Or could it be a TIP4P? + * For this we require thatn atoms 2,3,4 have charge, but not atom 1. + * Only atom 1 mght have VdW. + */ + if (has_vdw[1] == FALSE && + has_vdw[2] == FALSE && + has_vdw[3] == FALSE && + tmp_charge[0] == 0 && + tmp_charge[1] != 0 && + tmp_charge[2] == tmp_charge[1] && + tmp_charge[3] != 0) + { + srenew(solvent_parameters, *n_solvent_parameters+1); + solvent_parameters[*n_solvent_parameters].model = esolTIP4P; + solvent_parameters[*n_solvent_parameters].count = nmol; + for (k = 0; k < 4; k++) + { + solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k]; + solvent_parameters[*n_solvent_parameters].charge[k] = tmp_charge[k]; + } + + *cg_sp = *n_solvent_parameters; + (*n_solvent_parameters)++; + } + } + + *solvent_parameters_p = solvent_parameters; +} + +static void +check_solvent(FILE * fp, + const gmx_mtop_t * mtop, + t_forcerec * fr, + cginfo_mb_t *cginfo_mb) +{ + const t_block * cgs; + const t_block * mols; + const gmx_moltype_t *molt; + int mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol; + int n_solvent_parameters; + solvent_parameters_t *solvent_parameters; + int **cg_sp; + int bestsp, bestsol; + + if (debug) + { + fprintf(debug, "Going to determine what solvent types we have.\n"); + } + + mols = &mtop->mols; + + n_solvent_parameters = 0; + solvent_parameters = NULL; + /* Allocate temporary array for solvent type */ + snew(cg_sp, mtop->nmolblock); + + cg_offset = 0; + at_offset = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + molt = &mtop->moltype[mtop->molblock[mb].type]; + cgs = &molt->cgs; + /* Here we have to loop over all individual molecules + * because we need to check for QMMM particles. + */ + snew(cg_sp[mb], cginfo_mb[mb].cg_mod); + nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr; + nmol = mtop->molblock[mb].nmol/nmol_ch; + for (mol = 0; mol < nmol_ch; mol++) + { + cgm = mol*cgs->nr; + am = mol*cgs->index[cgs->nr]; + for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++) + { + check_solvent_cg(molt, cg_mol, nmol, + mtop->groups.grpnr[egcQMMM] ? + mtop->groups.grpnr[egcQMMM]+at_offset+am : 0, + &mtop->groups.grps[egcQMMM], + fr, + &n_solvent_parameters, &solvent_parameters, + cginfo_mb[mb].cginfo[cgm+cg_mol], + &cg_sp[mb][cgm+cg_mol]); + } + } + cg_offset += cgs->nr; + at_offset += cgs->index[cgs->nr]; + } + + /* Puh! We finished going through all charge groups. + * Now find the most common solvent model. + */ + + /* Most common solvent this far */ + bestsp = -2; + for (i = 0; i < n_solvent_parameters; i++) + { + if (bestsp == -2 || + solvent_parameters[i].count > solvent_parameters[bestsp].count) + { + bestsp = i; + } + } + + if (bestsp >= 0) + { + bestsol = solvent_parameters[bestsp].model; + } + else + { + bestsol = esolNO; + } + +#ifdef DISABLE_WATER_NLIST + bestsol = esolNO; +#endif + + fr->nWatMol = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + cgs = &mtop->moltype[mtop->molblock[mb].type].cgs; + nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod; + for (i = 0; i < cginfo_mb[mb].cg_mod; i++) + { + if (cg_sp[mb][i] == bestsp) + { + SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol); + fr->nWatMol += nmol; + } + else + { + SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO); + } + } + sfree(cg_sp[mb]); + } + sfree(cg_sp); + + if (bestsol != esolNO && fp != NULL) + { + fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n", + esol_names[bestsol], + solvent_parameters[bestsp].count); + } + + sfree(solvent_parameters); + fr->solvent_opt = bestsol; +} + +enum { + acNONE = 0, acCONSTRAINT, acSETTLE +}; + +static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop, + t_forcerec *fr, gmx_bool bNoSolvOpt, + gmx_bool *bExcl_IntraCGAll_InterCGNone) +{ + const t_block *cgs; + const t_blocka *excl; + const gmx_moltype_t *molt; + const gmx_molblock_t *molb; + cginfo_mb_t *cginfo_mb; + gmx_bool *type_VDW; + int *cginfo; + int cg_offset, a_offset, cgm, am; + int mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc; + int *a_con; + int ftype; + int ia; + gmx_bool bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ; + + ncg_tot = ncg_mtop(mtop); + snew(cginfo_mb, mtop->nmolblock); + + snew(type_VDW, fr->ntype); + for (ai = 0; ai < fr->ntype; ai++) + { + type_VDW[ai] = FALSE; + for (j = 0; j < fr->ntype; j++) + { + type_VDW[ai] = type_VDW[ai] || + fr->bBHAM || + C6(fr->nbfp, fr->ntype, ai, j) != 0 || + C12(fr->nbfp, fr->ntype, ai, j) != 0; + } + } + + *bExcl_IntraCGAll_InterCGNone = TRUE; + + excl_nalloc = 10; + snew(bExcl, excl_nalloc); + cg_offset = 0; + a_offset = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + molb = &mtop->molblock[mb]; + molt = &mtop->moltype[molb->type]; + cgs = &molt->cgs; + excl = &molt->excls; + + /* Check if the cginfo is identical for all molecules in this block. + * If so, we only need an array of the size of one molecule. + * Otherwise we make an array of #mol times #cgs per molecule. + */ + bId = TRUE; + am = 0; + for (m = 0; m < molb->nmol; m++) + { + am = m*cgs->index[cgs->nr]; + for (cg = 0; cg < cgs->nr; cg++) + { + a0 = cgs->index[cg]; + a1 = cgs->index[cg+1]; + if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) != + ggrpnr(&mtop->groups, egcENER, a_offset +a0)) + { + bId = FALSE; + } + if (mtop->groups.grpnr[egcQMMM] != NULL) + { + for (ai = a0; ai < a1; ai++) + { + if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] != + mtop->groups.grpnr[egcQMMM][a_offset +ai]) + { + bId = FALSE; + } + } + } + } + } + + cginfo_mb[mb].cg_start = cg_offset; + cginfo_mb[mb].cg_end = cg_offset + molb->nmol*cgs->nr; + cginfo_mb[mb].cg_mod = (bId ? 1 : molb->nmol)*cgs->nr; + snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod); + cginfo = cginfo_mb[mb].cginfo; + + /* Set constraints flags for constrained atoms */ + snew(a_con, molt->atoms.nr); + for (ftype = 0; ftype < F_NRE; ftype++) + { + if (interaction_function[ftype].flags & IF_CONSTRAINT) + { + int nral; + + nral = NRAL(ftype); + for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral) + { + int a; + + for (a = 0; a < nral; a++) + { + a_con[molt->ilist[ftype].iatoms[ia+1+a]] = + (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT); + } + } + } + } + + for (m = 0; m < (bId ? 1 : molb->nmol); m++) + { + cgm = m*cgs->nr; + am = m*cgs->index[cgs->nr]; + for (cg = 0; cg < cgs->nr; cg++) + { + a0 = cgs->index[cg]; + a1 = cgs->index[cg+1]; + + /* Store the energy group in cginfo */ + gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0); + SET_CGINFO_GID(cginfo[cgm+cg], gid); + + /* Check the intra/inter charge group exclusions */ + if (a1-a0 > excl_nalloc) + { + excl_nalloc = a1 - a0; + srenew(bExcl, excl_nalloc); + } + /* bExclIntraAll: all intra cg interactions excluded + * bExclInter: any inter cg interactions excluded + */ + bExclIntraAll = TRUE; + bExclInter = FALSE; + bHaveVDW = FALSE; + bHaveQ = FALSE; + for (ai = a0; ai < a1; ai++) + { + /* Check VDW and electrostatic interactions */ + bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] || + type_VDW[molt->atoms.atom[ai].typeB]); + bHaveQ = bHaveQ || (molt->atoms.atom[ai].q != 0 || + molt->atoms.atom[ai].qB != 0); + + /* Clear the exclusion list for atom ai */ + for (aj = a0; aj < a1; aj++) + { + bExcl[aj-a0] = FALSE; + } + /* Loop over all the exclusions of atom ai */ + for (j = excl->index[ai]; j < excl->index[ai+1]; j++) + { + aj = excl->a[j]; + if (aj < a0 || aj >= a1) + { + bExclInter = TRUE; + } + else + { + bExcl[aj-a0] = TRUE; + } + } + /* Check if ai excludes a0 to a1 */ + for (aj = a0; aj < a1; aj++) + { + if (!bExcl[aj-a0]) + { + bExclIntraAll = FALSE; + } + } + + switch (a_con[ai]) + { + case acCONSTRAINT: + SET_CGINFO_CONSTR(cginfo[cgm+cg]); + break; + case acSETTLE: + SET_CGINFO_SETTLE(cginfo[cgm+cg]); + break; + default: + break; + } + } + if (bExclIntraAll) + { + SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]); + } + if (bExclInter) + { + SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]); + } + if (a1 - a0 > MAX_CHARGEGROUP_SIZE) + { + /* The size in cginfo is currently only read with DD */ + gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE); + } + if (bHaveVDW) + { + SET_CGINFO_HAS_VDW(cginfo[cgm+cg]); + } + if (bHaveQ) + { + SET_CGINFO_HAS_Q(cginfo[cgm+cg]); + } + /* Store the charge group size */ + SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0); + + if (!bExclIntraAll || bExclInter) + { + *bExcl_IntraCGAll_InterCGNone = FALSE; + } + } + } + + sfree(a_con); + + cg_offset += molb->nmol*cgs->nr; + a_offset += molb->nmol*cgs->index[cgs->nr]; + } + sfree(bExcl); + + /* the solvent optimizer is called after the QM is initialized, + * because we don't want to have the QM subsystemto become an + * optimized solvent + */ + + check_solvent(fplog, mtop, fr, cginfo_mb); + + if (getenv("GMX_NO_SOLV_OPT")) + { + if (fplog) + { + fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n" + "Disabling all solvent optimization\n"); + } + fr->solvent_opt = esolNO; + } + if (bNoSolvOpt) + { + fr->solvent_opt = esolNO; + } + if (!fr->solvent_opt) + { + for (mb = 0; mb < mtop->nmolblock; mb++) + { + for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++) + { + SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO); + } + } + } + + return cginfo_mb; +} + +static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb) +{ + int ncg, mb, cg; + int *cginfo; + + ncg = cgi_mb[nmb-1].cg_end; + snew(cginfo, ncg); + mb = 0; + for (cg = 0; cg < ncg; cg++) + { + while (cg >= cgi_mb[mb].cg_end) + { + mb++; + } + cginfo[cg] = + cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod]; + } + + return cginfo; +} + +static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop) +{ + double qsum, q2sum, q; + int mb, nmol, i; + const t_atoms *atoms; + + qsum = 0; + q2sum = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + for (i = 0; i < atoms->nr; i++) + { + q = atoms->atom[i].q; + qsum += nmol*q; + q2sum += nmol*q*q; + } + } + fr->qsum[0] = qsum; + fr->q2sum[0] = q2sum; + if (fr->efep != efepNO) + { + qsum = 0; + q2sum = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + for (i = 0; i < atoms->nr; i++) + { + q = atoms->atom[i].qB; + qsum += nmol*q; + q2sum += nmol*q*q; + } + fr->qsum[1] = qsum; + fr->q2sum[1] = q2sum; + } + } + else + { + fr->qsum[1] = fr->qsum[0]; + fr->q2sum[1] = fr->q2sum[0]; + } + if (log) + { + if (fr->efep == efepNO) + { + fprintf(log, "System total charge: %.3f\n", fr->qsum[0]); + } + else + { + fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n", + fr->qsum[0], fr->qsum[1]); + } + } +} + +void update_forcerec(FILE *log, t_forcerec *fr, matrix box) +{ + if (fr->eeltype == eelGRF) + { + calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf, + fr->rcoulomb, fr->temp, fr->zsquare, box, + &fr->kappa, &fr->k_rf, &fr->c_rf); + } +} + +void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop) +{ + const t_atoms *atoms, *atoms_tpi; + const t_blocka *excl; + int mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q; +#if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8) + long long int npair, npair_ij, tmpi, tmpj; +#else + double npair, npair_ij, tmpi, tmpj; +#endif + double csix, ctwelve; + int ntp, *typecount; + gmx_bool bBHAM; + real *nbfp; + + ntp = fr->ntype; + bBHAM = fr->bBHAM; + nbfp = fr->nbfp; + + for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++) + { + csix = 0; + ctwelve = 0; + npair = 0; + nexcl = 0; + if (!fr->n_tpi) + { + /* Count the types so we avoid natoms^2 operations */ + snew(typecount, ntp); + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + for (i = 0; i < atoms->nr; i++) + { + if (q == 0) + { + tpi = atoms->atom[i].type; + } + else + { + tpi = atoms->atom[i].typeB; + } + typecount[tpi] += nmol; + } + } + for (tpi = 0; tpi < ntp; tpi++) + { + for (tpj = tpi; tpj < ntp; tpj++) + { + tmpi = typecount[tpi]; + tmpj = typecount[tpj]; + if (tpi != tpj) + { + npair_ij = tmpi*tmpj; + } + else + { + npair_ij = tmpi*(tmpi - 1)/2; + } + if (bBHAM) + { + /* nbfp now includes the 6.0 derivative prefactor */ + csix += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0; + } + else + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + csix += npair_ij* C6(nbfp, ntp, tpi, tpj)/6.0; + ctwelve += npair_ij* C12(nbfp, ntp, tpi, tpj)/12.0; + } + npair += npair_ij; + } + } + sfree(typecount); + /* Subtract the excluded pairs. + * The main reason for substracting exclusions is that in some cases + * some combinations might never occur and the parameters could have + * any value. These unused values should not influence the dispersion + * correction. + */ + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + excl = &mtop->moltype[mtop->molblock[mb].type].excls; + for (i = 0; (i < atoms->nr); i++) + { + if (q == 0) + { + tpi = atoms->atom[i].type; + } + else + { + tpi = atoms->atom[i].typeB; + } + j1 = excl->index[i]; + j2 = excl->index[i+1]; + for (j = j1; j < j2; j++) + { + k = excl->a[j]; + if (k > i) + { + if (q == 0) + { + tpj = atoms->atom[k].type; + } + else + { + tpj = atoms->atom[k].typeB; + } + if (bBHAM) + { + /* nbfp now includes the 6.0 derivative prefactor */ + csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0; + } + else + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + csix -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0; + ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0; + } + nexcl += nmol; + } + } + } + } + } + else + { + /* Only correct for the interaction of the test particle + * with the rest of the system. + */ + atoms_tpi = + &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms; + + npair = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + nmol = mtop->molblock[mb].nmol; + atoms = &mtop->moltype[mtop->molblock[mb].type].atoms; + for (j = 0; j < atoms->nr; j++) + { + nmolc = nmol; + /* Remove the interaction of the test charge group + * with itself. + */ + if (mb == mtop->nmolblock-1) + { + nmolc--; + + if (mb == 0 && nmol == 1) + { + gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file"); + } + } + if (q == 0) + { + tpj = atoms->atom[j].type; + } + else + { + tpj = atoms->atom[j].typeB; + } + for (i = 0; i < fr->n_tpi; i++) + { + if (q == 0) + { + tpi = atoms_tpi->atom[i].type; + } + else + { + tpi = atoms_tpi->atom[i].typeB; + } + if (bBHAM) + { + /* nbfp now includes the 6.0 derivative prefactor */ + csix += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0; + } + else + { + /* nbfp now includes the 6.0/12.0 derivative prefactors */ + csix += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0; + ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0; + } + npair += nmolc; + } + } + } + } + if (npair - nexcl <= 0 && fplog) + { + fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n"); + csix = 0; + ctwelve = 0; + } + else + { + csix /= npair - nexcl; + ctwelve /= npair - nexcl; + } + if (debug) + { + fprintf(debug, "Counted %d exclusions\n", nexcl); + fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix); + fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve); + } + fr->avcsix[q] = csix; + fr->avctwelve[q] = ctwelve; + } + if (fplog != NULL) + { + if (fr->eDispCorr == edispcAllEner || + fr->eDispCorr == edispcAllEnerPres) + { + fprintf(fplog, "Long Range LJ corr.: %10.4e, %10.4e\n", + fr->avcsix[0], fr->avctwelve[0]); + } + else + { + fprintf(fplog, "Long Range LJ corr.: %10.4e\n", fr->avcsix[0]); + } + } +} + + +static void set_bham_b_max(FILE *fplog, t_forcerec *fr, + const gmx_mtop_t *mtop) +{ + const t_atoms *at1, *at2; + int mt1, mt2, i, j, tpi, tpj, ntypes; + real b, bmin; + real *nbfp; + + if (fplog) + { + fprintf(fplog, "Determining largest Buckingham b parameter for table\n"); + } + nbfp = fr->nbfp; + ntypes = fr->ntype; + + bmin = -1; + fr->bham_b_max = 0; + for (mt1 = 0; mt1 < mtop->nmoltype; mt1++) + { + at1 = &mtop->moltype[mt1].atoms; + for (i = 0; (i < at1->nr); i++) + { + tpi = at1->atom[i].type; + if (tpi >= ntypes) + { + gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes); + } + + for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++) + { + at2 = &mtop->moltype[mt2].atoms; + for (j = 0; (j < at2->nr); j++) + { + tpj = at2->atom[j].type; + if (tpj >= ntypes) + { + gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes); + } + b = BHAMB(nbfp, ntypes, tpi, tpj); + if (b > fr->bham_b_max) + { + fr->bham_b_max = b; + } + if ((b < bmin) || (bmin == -1)) + { + bmin = b; + } + } + } + } + } + if (fplog) + { + fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n", + bmin, fr->bham_b_max); + } +} + +static void make_nbf_tables(FILE *fp, const output_env_t oenv, + t_forcerec *fr, real rtab, + const t_commrec *cr, + const char *tabfn, char *eg1, char *eg2, + t_nblists *nbl) +{ + char buf[STRLEN]; + int i, j; + + if (tabfn == NULL) + { + if (debug) + { + fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n"); + } + return; + } + + sprintf(buf, "%s", tabfn); + if (eg1 && eg2) + { + /* Append the two energy group names */ + sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s", + eg1, eg2, ftp2ext(efXVG)); + } + nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0); + /* Copy the contents of the table to separate coulomb and LJ tables too, + * to improve cache performance. + */ + /* For performance reasons we want + * the table data to be aligned to 16-byte. The pointers could be freed + * but currently aren't. + */ + nbl->table_elec.interaction = GMX_TABLE_INTERACTION_ELEC; + nbl->table_elec.format = nbl->table_elec_vdw.format; + nbl->table_elec.r = nbl->table_elec_vdw.r; + nbl->table_elec.n = nbl->table_elec_vdw.n; + nbl->table_elec.scale = nbl->table_elec_vdw.scale; + nbl->table_elec.scale_exp = nbl->table_elec_vdw.scale_exp; + nbl->table_elec.formatsize = nbl->table_elec_vdw.formatsize; + nbl->table_elec.ninteractions = 1; + nbl->table_elec.stride = nbl->table_elec.formatsize * nbl->table_elec.ninteractions; + snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32); + + nbl->table_vdw.interaction = GMX_TABLE_INTERACTION_VDWREP_VDWDISP; + nbl->table_vdw.format = nbl->table_elec_vdw.format; + nbl->table_vdw.r = nbl->table_elec_vdw.r; + nbl->table_vdw.n = nbl->table_elec_vdw.n; + nbl->table_vdw.scale = nbl->table_elec_vdw.scale; + nbl->table_vdw.scale_exp = nbl->table_elec_vdw.scale_exp; + nbl->table_vdw.formatsize = nbl->table_elec_vdw.formatsize; + nbl->table_vdw.ninteractions = 2; + nbl->table_vdw.stride = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions; + snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32); + + for (i = 0; i <= nbl->table_elec_vdw.n; i++) + { + for (j = 0; j < 4; j++) + { + nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j]; + } + for (j = 0; j < 8; j++) + { + nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j]; + } + } +} + +static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop, + int *ncount, int **count) +{ + const gmx_moltype_t *molt; + const t_ilist *il; + int mt, ftype, stride, i, j, tabnr; + + for (mt = 0; mt < mtop->nmoltype; mt++) + { + molt = &mtop->moltype[mt]; + for (ftype = 0; ftype < F_NRE; ftype++) + { + if (ftype == ftype1 || ftype == ftype2) + { + il = &molt->ilist[ftype]; + stride = 1 + NRAL(ftype); + for (i = 0; i < il->nr; i += stride) + { + tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table; + if (tabnr < 0) + { + gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr); + } + if (tabnr >= *ncount) + { + srenew(*count, tabnr+1); + for (j = *ncount; j < tabnr+1; j++) + { + (*count)[j] = 0; + } + *ncount = tabnr+1; + } + (*count)[tabnr]++; + } + } + } + } +} + +static bondedtable_t *make_bonded_tables(FILE *fplog, + int ftype1, int ftype2, + const gmx_mtop_t *mtop, + const char *basefn, const char *tabext) +{ + int i, ncount, *count; + char tabfn[STRLEN]; + bondedtable_t *tab; + + tab = NULL; + + ncount = 0; + count = NULL; + count_tables(ftype1, ftype2, mtop, &ncount, &count); + + if (ncount > 0) + { + snew(tab, ncount); + for (i = 0; i < ncount; i++) + { + if (count[i] > 0) + { + sprintf(tabfn, "%s", basefn); + sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s", + tabext, i, ftp2ext(efXVG)); + tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2); + } + } + sfree(count); + } + + return tab; +} + +void forcerec_set_ranges(t_forcerec *fr, + int ncg_home, int ncg_force, + int natoms_force, + int natoms_force_constr, int natoms_f_novirsum) +{ + fr->cg0 = 0; + fr->hcg = ncg_home; + + /* fr->ncg_force is unused in the standard code, + * but it can be useful for modified code dealing with charge groups. + */ + fr->ncg_force = ncg_force; + fr->natoms_force = natoms_force; + fr->natoms_force_constr = natoms_force_constr; + + if (fr->natoms_force_constr > fr->nalloc_force) + { + fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr); + + if (fr->bTwinRange) + { + srenew(fr->f_twin, fr->nalloc_force); + } + } + + if (fr->bF_NoVirSum) + { + fr->f_novirsum_n = natoms_f_novirsum; + if (fr->f_novirsum_n > fr->f_novirsum_nalloc) + { + fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n); + srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc); + } + } + else + { + fr->f_novirsum_n = 0; + } +} + +static real cutoff_inf(real cutoff) +{ + if (cutoff == 0) + { + cutoff = GMX_CUTOFF_INF; + } + + return cutoff; +} + +static void make_adress_tf_tables(FILE *fp, const output_env_t oenv, + t_forcerec *fr, const t_inputrec *ir, + const char *tabfn, const gmx_mtop_t *mtop, + matrix box) +{ + char buf[STRLEN]; + int i, j; + + if (tabfn == NULL) + { + gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n"); + return; + } + + snew(fr->atf_tabs, ir->adress->n_tf_grps); + + sprintf(buf, "%s", tabfn); + for (i = 0; i < ir->adress->n_tf_grps; i++) + { + j = ir->adress->tf_table_index[i]; /* get energy group index */ + sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s", + *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG)); + if (fp) + { + fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf); + } + fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box); + } + +} + +gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop, + gmx_bool bPrintNote, t_commrec *cr, FILE *fp) +{ + gmx_bool bAllvsAll; + + bAllvsAll = + ( + ir->rlist == 0 && + ir->rcoulomb == 0 && + ir->rvdw == 0 && + ir->ePBC == epbcNONE && + ir->vdwtype == evdwCUT && + ir->coulombtype == eelCUT && + ir->efep == efepNO && + (ir->implicit_solvent == eisNO || + (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL || + ir->gb_algorithm == egbHCT || + ir->gb_algorithm == egbOBC))) && + getenv("GMX_NO_ALLVSALL") == NULL + ); + + if (bAllvsAll && ir->opts.ngener > 1) + { + const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n"; + + if (bPrintNote) + { + if (MASTER(cr)) + { + fprintf(stderr, "\n%s\n", note); + } + if (fp != NULL) + { + fprintf(fp, "\n%s\n", note); + } + } + bAllvsAll = FALSE; + } + + if (bAllvsAll && fp && MASTER(cr)) + { + fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n"); + } + + return bAllvsAll; +} + + +static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp) +{ + int t, i; + + /* These thread local data structures are used for bondeds only */ + fr->nthreads = gmx_omp_nthreads_get(emntBonded); + + if (fr->nthreads > 1) + { + snew(fr->f_t, fr->nthreads); + /* Thread 0 uses the global force and energy arrays */ + for (t = 1; t < fr->nthreads; t++) + { + fr->f_t[t].f = NULL; + fr->f_t[t].f_nalloc = 0; + snew(fr->f_t[t].fshift, SHIFTS); + fr->f_t[t].grpp.nener = nenergrp*nenergrp; + for (i = 0; i < egNR; i++) + { + snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener); + } + } + } +} + + +static void pick_nbnxn_kernel_cpu(FILE *fp, + const t_commrec *cr, + const gmx_cpuid_t cpuid_info, + const t_inputrec *ir, + int *kernel_type, + int *ewald_excl) +{ + *kernel_type = nbnxnk4x4_PlainC; + *ewald_excl = ewaldexclTable; + +#ifdef GMX_NBNXN_SIMD + { +#ifdef GMX_NBNXN_SIMD_4XN + *kernel_type = nbnxnk4xN_SIMD_4xN; +#endif +#ifdef GMX_NBNXN_SIMD_2XNN + /* We expect the 2xNN kernels to be faster in most cases */ + *kernel_type = nbnxnk4xN_SIMD_2xNN; +#endif + +#if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256 + if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT) + { + /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4), + * 10% with HT, 50% without HT, but extra zeros interactions + * can compensate. As we currently don't detect the actual use + * of HT, switch to 4x8 to avoid a potential performance hit. + */ + *kernel_type = nbnxnk4xN_SIMD_4xN; + } +#endif + if (getenv("GMX_NBNXN_SIMD_4XN") != NULL) + { +#ifdef GMX_NBNXN_SIMD_4XN + *kernel_type = nbnxnk4xN_SIMD_4xN; +#else + gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels"); +#endif + } + if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL) + { +#ifdef GMX_NBNXN_SIMD_2XNN + *kernel_type = nbnxnk4xN_SIMD_2xNN; +#else + gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels"); +#endif + } + + /* Analytical Ewald exclusion correction is only an option in the + * x86 SIMD kernel. This is faster in single precision + * on Bulldozer and slightly faster on Sandy Bridge. + */ +#if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE + *ewald_excl = ewaldexclAnalytical; +#endif + if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL) + { + *ewald_excl = ewaldexclTable; + } + if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL) + { + *ewald_excl = ewaldexclAnalytical; + } + + } +#endif /* GMX_X86_SSE2 */ +} + + +const char *lookup_nbnxn_kernel_name(int kernel_type) +{ + const char *returnvalue = NULL; + switch (kernel_type) + { + case nbnxnkNotSet: returnvalue = "not set"; break; + case nbnxnk4x4_PlainC: returnvalue = "plain C"; break; +#ifndef GMX_NBNXN_SIMD + case nbnxnk4xN_SIMD_4xN: returnvalue = "not available"; break; + case nbnxnk4xN_SIMD_2xNN: returnvalue = "not available"; break; +#else +#ifdef GMX_X86_SSE2 +#if GMX_NBNXN_SIMD_BITWIDTH == 128 + /* x86 SIMD intrinsics can be converted to either SSE or AVX depending + * on compiler flags. As we use nearly identical intrinsics, using an AVX + * compiler flag without an AVX macro effectively results in AVX kernels. + * For gcc we check for __AVX__ + * At least a check for icc should be added (if there is a macro) + */ +#if !(defined GMX_X86_AVX_128_FMA || defined __AVX__) +#ifndef GMX_X86_SSE4_1 + case nbnxnk4xN_SIMD_4xN: returnvalue = "SSE2"; break; + case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE2"; break; +#else + case nbnxnk4xN_SIMD_4xN: returnvalue = "SSE4.1"; break; + case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE4.1"; break; +#endif +#else + case nbnxnk4xN_SIMD_4xN: returnvalue = "AVX-128"; break; + case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-128"; break; +#endif +#endif +#if GMX_NBNXN_SIMD_BITWIDTH == 256 + case nbnxnk4xN_SIMD_4xN: returnvalue = "AVX-256"; break; + case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-256"; break; +#endif +#else /* not GMX_X86_SSE2 */ + case nbnxnk4xN_SIMD_4xN: returnvalue = "SIMD"; break; + case nbnxnk4xN_SIMD_2xNN: returnvalue = "SIMD"; break; +#endif +#endif + case nbnxnk8x8x8_CUDA: returnvalue = "CUDA"; break; + case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break; + + case nbnxnkNR: + default: + gmx_fatal(FARGS, "Illegal kernel type selected"); + returnvalue = NULL; + break; + } + return returnvalue; +}; + +static void pick_nbnxn_kernel(FILE *fp, + const t_commrec *cr, + const gmx_hw_info_t *hwinfo, + gmx_bool use_cpu_acceleration, + gmx_bool bUseGPU, + gmx_bool bEmulateGPU, + const t_inputrec *ir, + int *kernel_type, + int *ewald_excl, + gmx_bool bDoNonbonded) +{ + assert(kernel_type); + + *kernel_type = nbnxnkNotSet; + *ewald_excl = ewaldexclTable; + + if (bEmulateGPU) + { + *kernel_type = nbnxnk8x8x8_PlainC; + + if (bDoNonbonded) + { + md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)"); + } + } + else if (bUseGPU) + { + *kernel_type = nbnxnk8x8x8_CUDA; + } + + if (*kernel_type == nbnxnkNotSet) + { + if (use_cpu_acceleration) + { + pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir, + kernel_type, ewald_excl); + } + else + { + *kernel_type = nbnxnk4x4_PlainC; + } + } + + if (bDoNonbonded && fp != NULL) + { + fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n", + lookup_nbnxn_kernel_name(*kernel_type), + nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE, + nbnxn_kernel_to_cj_size(*kernel_type)); + } +} + +static void pick_nbnxn_resources(FILE *fp, + const t_commrec *cr, + const gmx_hw_info_t *hwinfo, + gmx_bool bDoNonbonded, + gmx_bool *bUseGPU, + gmx_bool *bEmulateGPU) +{ + gmx_bool bEmulateGPUEnvVarSet; + char gpu_err_str[STRLEN]; + + *bUseGPU = FALSE; + + bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL); + + /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because + * GPUs (currently) only handle non-bonded calculations, we will + * automatically switch to emulation if non-bonded calculations are + * turned off via GMX_NO_NONBONDED - this is the simple and elegant + * way to turn off GPU initialization, data movement, and cleanup. + * + * GPU emulation can be useful to assess the performance one can expect by + * adding GPU(s) to the machine. The conditional below allows this even + * if mdrun is compiled without GPU acceleration support. + * Note that you should freezing the system as otherwise it will explode. + */ + *bEmulateGPU = (bEmulateGPUEnvVarSet || + (!bDoNonbonded && hwinfo->bCanUseGPU)); + + /* Enable GPU mode when GPUs are available or no GPU emulation is requested. + */ + if (hwinfo->bCanUseGPU && !(*bEmulateGPU)) + { + /* Each PP node will use the intra-node id-th device from the + * list of detected/selected GPUs. */ + if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info)) + { + /* At this point the init should never fail as we made sure that + * we have all the GPUs we need. If it still does, we'll bail. */ + gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s", + cr->nodeid, + get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode), + gpu_err_str); + } + + /* Here we actually turn on hardware GPU acceleration */ + *bUseGPU = TRUE; + } +} + +gmx_bool uses_simple_tables(int cutoff_scheme, + nonbonded_verlet_t *nbv, + int group) +{ + gmx_bool bUsesSimpleTables = TRUE; + int grp_index; + + switch (cutoff_scheme) + { + case ecutsGROUP: + bUsesSimpleTables = TRUE; + break; + case ecutsVERLET: + assert(NULL != nbv && NULL != nbv->grp); + grp_index = (group < 0) ? 0 : (nbv->ngrp - 1); + bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type); + break; + default: + gmx_incons("unimplemented"); + } + return bUsesSimpleTables; +} + +static void init_ewald_f_table(interaction_const_t *ic, + gmx_bool bUsesSimpleTables, + real rtab) +{ + real maxr; + + if (bUsesSimpleTables) + { + /* With a spacing of 0.0005 we are at the force summation accuracy + * for the SSE kernels for "normal" atomistic simulations. + */ + ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff, + ic->rcoulomb); + + maxr = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb; + ic->tabq_size = (int)(maxr*ic->tabq_scale) + 2; + } + else + { + ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE; + /* Subtract 2 iso 1 to avoid access out of range due to rounding */ + ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb; + } + + sfree_aligned(ic->tabq_coul_FDV0); + sfree_aligned(ic->tabq_coul_F); + sfree_aligned(ic->tabq_coul_V); + + /* Create the original table data in FDV0 */ + snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32); + snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32); + snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32); + table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0, + ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff); +} + +void init_interaction_const_tables(FILE *fp, + interaction_const_t *ic, + gmx_bool bUsesSimpleTables, + real rtab) +{ + real spacing; + + if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype)) + { + init_ewald_f_table(ic, bUsesSimpleTables, rtab); + + if (fp != NULL) + { + fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n", + 1/ic->tabq_scale, ic->tabq_size); + } + } +} + +void init_interaction_const(FILE *fp, + interaction_const_t **interaction_const, + const t_forcerec *fr, + real rtab) +{ + interaction_const_t *ic; + gmx_bool bUsesSimpleTables = TRUE; + + snew(ic, 1); + + /* Just allocate something so we can free it */ + snew_aligned(ic->tabq_coul_FDV0, 16, 32); + snew_aligned(ic->tabq_coul_F, 16, 32); + snew_aligned(ic->tabq_coul_V, 16, 32); + + ic->rlist = fr->rlist; + ic->rlistlong = fr->rlistlong; + + /* Lennard-Jones */ + ic->rvdw = fr->rvdw; + if (fr->vdw_modifier == eintmodPOTSHIFT) + { + ic->sh_invrc6 = pow(ic->rvdw, -6.0); + } + else + { + ic->sh_invrc6 = 0; + } + + /* Electrostatics */ + ic->eeltype = fr->eeltype; + ic->rcoulomb = fr->rcoulomb; + ic->epsilon_r = fr->epsilon_r; + ic->epsfac = fr->epsfac; + + /* Ewald */ + ic->ewaldcoeff = fr->ewaldcoeff; + if (fr->coulomb_modifier == eintmodPOTSHIFT) + { + ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb); + } + else + { + ic->sh_ewald = 0; + } + + /* Reaction-field */ + if (EEL_RF(ic->eeltype)) + { + ic->epsilon_rf = fr->epsilon_rf; + ic->k_rf = fr->k_rf; + ic->c_rf = fr->c_rf; + } + else + { + /* For plain cut-off we might use the reaction-field kernels */ + ic->epsilon_rf = ic->epsilon_r; + ic->k_rf = 0; + if (fr->coulomb_modifier == eintmodPOTSHIFT) + { + ic->c_rf = 1/ic->rcoulomb; + } + else + { + ic->c_rf = 0; + } + } + + if (fp != NULL) + { + fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f", + sqr(ic->sh_invrc6), ic->sh_invrc6); + if (ic->eeltype == eelCUT) + { + fprintf(fp, ", Coulomb %.3f", ic->c_rf); + } + else if (EEL_PME(ic->eeltype)) + { + fprintf(fp, ", Ewald %.3e", ic->sh_ewald); + } + fprintf(fp, "\n"); + } + + *interaction_const = ic; + + if (fr->nbv != NULL && fr->nbv->bUseGPU) + { - nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv); ++ nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp); + } + + bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1); + init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab); +} + +static void init_nb_verlet(FILE *fp, + nonbonded_verlet_t **nb_verlet, + const t_inputrec *ir, + const t_forcerec *fr, + const t_commrec *cr, + const char *nbpu_opt) +{ + nonbonded_verlet_t *nbv; + int i; + char *env; + gmx_bool bEmulateGPU, bHybridGPURun = FALSE; + + nbnxn_alloc_t *nb_alloc; + nbnxn_free_t *nb_free; + + snew(nbv, 1); + + pick_nbnxn_resources(fp, cr, fr->hwinfo, + fr->bNonbonded, + &nbv->bUseGPU, + &bEmulateGPU); + + nbv->nbs = NULL; + + nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1); + for (i = 0; i < nbv->ngrp; i++) + { + nbv->grp[i].nbl_lists.nnbl = 0; + nbv->grp[i].nbat = NULL; + nbv->grp[i].kernel_type = nbnxnkNotSet; + + if (i == 0) /* local */ + { + pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration, + nbv->bUseGPU, bEmulateGPU, + ir, + &nbv->grp[i].kernel_type, + &nbv->grp[i].ewald_excl, + fr->bNonbonded); + } + else /* non-local */ + { + if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0) + { + /* Use GPU for local, select a CPU kernel for non-local */ + pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration, + FALSE, FALSE, + ir, + &nbv->grp[i].kernel_type, + &nbv->grp[i].ewald_excl, + fr->bNonbonded); + + bHybridGPURun = TRUE; + } + else + { + /* Use the same kernel for local and non-local interactions */ + nbv->grp[i].kernel_type = nbv->grp[0].kernel_type; + nbv->grp[i].ewald_excl = nbv->grp[0].ewald_excl; + } + } + } + + if (nbv->bUseGPU) + { + /* init the NxN GPU data; the last argument tells whether we'll have + * both local and non-local NB calculation on GPU */ + nbnxn_cuda_init(fp, &nbv->cu_nbv, + &fr->hwinfo->gpu_info, cr->rank_pp_intranode, + (nbv->ngrp > 1) && !bHybridGPURun); + + if ((env = getenv("GMX_NB_MIN_CI")) != NULL) + { + char *end; + + nbv->min_ci_balanced = strtol(env, &end, 10); + if (!end || (*end != 0) || nbv->min_ci_balanced <= 0) + { + gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env); + } + + if (debug) + { + fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n", + nbv->min_ci_balanced); + } + } + else + { + nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv); + if (debug) + { + fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n", + nbv->min_ci_balanced); + } + } + } + else + { + nbv->min_ci_balanced = 0; + } + + *nb_verlet = nbv; + + nbnxn_init_search(&nbv->nbs, + DOMAINDECOMP(cr) ? &cr->dd->nc : NULL, + DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL, + gmx_omp_nthreads_get(emntNonbonded)); + + for (i = 0; i < nbv->ngrp; i++) + { + if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA) + { + nb_alloc = &pmalloc; + nb_free = &pfree; + } + else + { + nb_alloc = NULL; + nb_free = NULL; + } + + nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists, + nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type), + /* 8x8x8 "non-simple" lists are ATM always combined */ + !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type), + nb_alloc, nb_free); + + if (i == 0 || + nbv->grp[0].kernel_type != nbv->grp[i].kernel_type) + { + snew(nbv->grp[i].nbat, 1); + nbnxn_atomdata_init(fp, + nbv->grp[i].nbat, + nbv->grp[i].kernel_type, + fr->ntype, fr->nbfp, + ir->opts.ngener, + nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1, + nb_alloc, nb_free); + } + else + { + nbv->grp[i].nbat = nbv->grp[0].nbat; + } + } +} + +void init_forcerec(FILE *fp, + const output_env_t oenv, + t_forcerec *fr, + t_fcdata *fcd, + const t_inputrec *ir, + const gmx_mtop_t *mtop, + const t_commrec *cr, + matrix box, + gmx_bool bMolEpot, + const char *tabfn, + const char *tabafn, + const char *tabpfn, + const char *tabbfn, + const char *nbpu_opt, + gmx_bool bNoSolvOpt, + real print_force) +{ + int i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj; + real rtab; + char *env; + double dbl; + rvec box_size; + const t_block *cgs; + gmx_bool bGenericKernelOnly; + gmx_bool bTab, bSep14tab, bNormalnblists; + t_nblists *nbl; + int *nm_ind, egp_flags; + + if (fr->hwinfo == NULL) + { + /* Detect hardware, gather information. + * In mdrun, hwinfo has already been set before calling init_forcerec. + * Here we ignore GPUs, as tools will not use them anyhow. + */ + snew(fr->hwinfo, 1); + gmx_detect_hardware(fp, fr->hwinfo, cr, + FALSE, FALSE, NULL); + } + + /* By default we turn acceleration on, but it might be turned off further down... */ + fr->use_cpu_acceleration = TRUE; + + fr->bDomDec = DOMAINDECOMP(cr); + + natoms = mtop->natoms; + + if (check_box(ir->ePBC, box)) + { + gmx_fatal(FARGS, check_box(ir->ePBC, box)); + } + + /* Test particle insertion ? */ + if (EI_TPI(ir->eI)) + { + /* Set to the size of the molecule to be inserted (the last one) */ + /* Because of old style topologies, we have to use the last cg + * instead of the last molecule type. + */ + cgs = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs; + fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1]; + if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1]) + { + gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group."); + } + } + else + { + fr->n_tpi = 0; + } + + /* Copy AdResS parameters */ + if (ir->bAdress) + { + fr->adress_type = ir->adress->type; + fr->adress_const_wf = ir->adress->const_wf; + fr->adress_ex_width = ir->adress->ex_width; + fr->adress_hy_width = ir->adress->hy_width; + fr->adress_icor = ir->adress->icor; + fr->adress_site = ir->adress->site; + fr->adress_ex_forcecap = ir->adress->ex_forcecap; + fr->adress_do_hybridpairs = ir->adress->do_hybridpairs; + + + snew(fr->adress_group_explicit, ir->adress->n_energy_grps); + for (i = 0; i < ir->adress->n_energy_grps; i++) + { + fr->adress_group_explicit[i] = ir->adress->group_explicit[i]; + } + + fr->n_adress_tf_grps = ir->adress->n_tf_grps; + snew(fr->adress_tf_table_index, fr->n_adress_tf_grps); + for (i = 0; i < fr->n_adress_tf_grps; i++) + { + fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i]; + } + copy_rvec(ir->adress->refs, fr->adress_refs); + } + else + { + fr->adress_type = eAdressOff; + fr->adress_do_hybridpairs = FALSE; + } + + /* Copy the user determined parameters */ + fr->userint1 = ir->userint1; + fr->userint2 = ir->userint2; + fr->userint3 = ir->userint3; + fr->userint4 = ir->userint4; + fr->userreal1 = ir->userreal1; + fr->userreal2 = ir->userreal2; + fr->userreal3 = ir->userreal3; + fr->userreal4 = ir->userreal4; + + /* Shell stuff */ + fr->fc_stepsize = ir->fc_stepsize; + + /* Free energy */ + fr->efep = ir->efep; + fr->sc_alphavdw = ir->fepvals->sc_alpha; + if (ir->fepvals->bScCoul) + { + fr->sc_alphacoul = ir->fepvals->sc_alpha; + fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6); + } + else + { + fr->sc_alphacoul = 0; + fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */ + } + fr->sc_power = ir->fepvals->sc_power; + fr->sc_r_power = ir->fepvals->sc_r_power; + fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6); + + env = getenv("GMX_SCSIGMA_MIN"); + if (env != NULL) + { + dbl = 0; + sscanf(env, "%lf", &dbl); + fr->sc_sigma6_min = pow(dbl, 6); + if (fp) + { + fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl); + } + } + + fr->bNonbonded = TRUE; + if (getenv("GMX_NO_NONBONDED") != NULL) + { + /* turn off non-bonded calculations */ + fr->bNonbonded = FALSE; + md_print_warn(cr, fp, + "Found environment variable GMX_NO_NONBONDED.\n" + "Disabling nonbonded calculations.\n"); + } + + bGenericKernelOnly = FALSE; + + /* We now check in the NS code whether a particular combination of interactions + * can be used with water optimization, and disable it if that is not the case. + */ + + if (getenv("GMX_NB_GENERIC") != NULL) + { + if (fp != NULL) + { + fprintf(fp, + "Found environment variable GMX_NB_GENERIC.\n" + "Disabling all interaction-specific nonbonded kernels, will only\n" + "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n"); + } + bGenericKernelOnly = TRUE; + } + + if (bGenericKernelOnly == TRUE) + { + bNoSolvOpt = TRUE; + } + + if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) ) + { + fr->use_cpu_acceleration = FALSE; + if (fp != NULL) + { + fprintf(fp, + "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n" + "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n"); + } + } + + fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM); + + /* Check if we can/should do all-vs-all kernels */ + fr->bAllvsAll = can_use_allvsall(ir, mtop, FALSE, NULL, NULL); + fr->AllvsAll_work = NULL; + fr->AllvsAll_workgb = NULL; + ++ /* All-vs-all kernels have not been implemented in 4.6, and ++ * the SIMD group kernels are also buggy in this case. Non-accelerated ++ * group kernels are OK. See Redmine #1249. */ ++ if (fr->bAllvsAll) ++ { ++ fr->bAllvsAll = FALSE; ++ fr->use_cpu_acceleration = FALSE; ++ if (fp != NULL) ++ { ++ fprintf(fp, ++ "\nYour simulation settings would have triggered the efficient all-vs-all\n" ++ "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n" ++ "4.6. Also, we can't use the accelerated SIMD kernels here because\n" ++ "of an unfixed bug. The reference C kernels are correct, though, so\n" ++ "we are proceeding by disabling all CPU architecture-specific\n" ++ "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n" ++ "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n"); ++ } ++ } + + /* Neighbour searching stuff */ + fr->cutoff_scheme = ir->cutoff_scheme; + fr->bGrid = (ir->ns_type == ensGRID); + fr->ePBC = ir->ePBC; + + /* Determine if we will do PBC for distances in bonded interactions */ + if (fr->ePBC == epbcNONE) + { + fr->bMolPBC = FALSE; + } + else + { + if (!DOMAINDECOMP(cr)) + { + /* The group cut-off scheme and SHAKE assume charge groups + * are whole, but not using molpbc is faster in most cases. + */ + if (fr->cutoff_scheme == ecutsGROUP || + (ir->eConstrAlg == econtSHAKE && + (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 || + gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0))) + { + fr->bMolPBC = ir->bPeriodicMols; + } + else + { + fr->bMolPBC = TRUE; + if (getenv("GMX_USE_GRAPH") != NULL) + { + fr->bMolPBC = FALSE; + if (fp) + { + fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n"); + } + } + } + } + else + { + fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC); + } + } + fr->bGB = (ir->implicit_solvent == eisGBSA); + + fr->rc_scaling = ir->refcoord_scaling; + copy_rvec(ir->posres_com, fr->posres_com); + copy_rvec(ir->posres_comB, fr->posres_comB); + fr->rlist = cutoff_inf(ir->rlist); + fr->rlistlong = cutoff_inf(ir->rlistlong); + fr->eeltype = ir->coulombtype; + fr->vdwtype = ir->vdwtype; + + fr->coulomb_modifier = ir->coulomb_modifier; + fr->vdw_modifier = ir->vdw_modifier; + + /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */ + switch (fr->eeltype) + { + case eelCUT: + fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB; + break; + + case eelRF: + case eelGRF: + case eelRF_NEC: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD; + break; + + case eelRF_ZERO: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD; + fr->coulomb_modifier = eintmodEXACTCUTOFF; + break; + + case eelSWITCH: + case eelSHIFT: + case eelUSER: + case eelENCADSHIFT: + case eelPMESWITCH: + case eelPMEUSER: + case eelPMEUSERSWITCH: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE; + break; + + case eelPME: + case eelEWALD: + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD; + break; + + default: + gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]); + break; + } + + /* Vdw: Translate from mdp settings to kernel format */ + switch (fr->vdwtype) + { + case evdwCUT: + if (fr->bBHAM) + { + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM; + } + else + { + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES; + } + break; + + case evdwSWITCH: + case evdwSHIFT: + case evdwUSER: + case evdwENCADSHIFT: + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE; + break; + + default: + gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]); + break; + } + + /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */ + fr->nbkernel_elec_modifier = fr->coulomb_modifier; + fr->nbkernel_vdw_modifier = fr->vdw_modifier; + + fr->bTwinRange = fr->rlistlong > fr->rlist; + fr->bEwald = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD); + + fr->reppow = mtop->ffparams.reppow; + + if (ir->cutoff_scheme == ecutsGROUP) + { + fr->bvdwtab = (fr->vdwtype != evdwCUT || + !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS)); + /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */ + fr->bcoultab = !(fr->eeltype == eelCUT || + fr->eeltype == eelEWALD || + fr->eeltype == eelPME || + fr->eeltype == eelRF || + fr->eeltype == eelRF_ZERO); + + /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely + * going to be faster to tabulate the interaction than calling the generic kernel. + */ + if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH) + { + if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw)) + { + fr->bcoultab = TRUE; + } + } + else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) || + ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD && + fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF && + (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT)))) + { + if (fr->rcoulomb != fr->rvdw) + { + fr->bcoultab = TRUE; + } + } + + if (getenv("GMX_REQUIRE_TABLES")) + { + fr->bvdwtab = TRUE; + fr->bcoultab = TRUE; + } + + if (fp) + { + fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]); + fprintf(fp, "Table routines are used for vdw: %s\n", bool_names[fr->bvdwtab ]); + } + + if (fr->bvdwtab == TRUE) + { + fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE; + fr->nbkernel_vdw_modifier = eintmodNONE; + } + if (fr->bcoultab == TRUE) + { + fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE; + fr->nbkernel_elec_modifier = eintmodNONE; + } + } + + if (ir->cutoff_scheme == ecutsVERLET) + { + if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS)) + { + gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]); + } + fr->bvdwtab = FALSE; + fr->bcoultab = FALSE; + } + + /* Tables are used for direct ewald sum */ + if (fr->bEwald) + { + if (EEL_PME(ir->coulombtype)) + { + if (fp) + { + fprintf(fp, "Will do PME sum in reciprocal space.\n"); + } + if (ir->coulombtype == eelP3M_AD) + { + please_cite(fp, "Hockney1988"); + please_cite(fp, "Ballenegger2012"); + } + else + { + please_cite(fp, "Essmann95a"); + } + + if (ir->ewald_geometry == eewg3DC) + { + if (fp) + { + fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n"); + } + please_cite(fp, "In-Chul99a"); + } + } + fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol); + init_ewald_tab(&(fr->ewald_table), cr, ir, fp); + if (fp) + { + fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n", + 1/fr->ewaldcoeff); + } + } + + /* Electrostatics */ + fr->epsilon_r = ir->epsilon_r; + fr->epsilon_rf = ir->epsilon_rf; + fr->fudgeQQ = mtop->ffparams.fudgeQQ; + fr->rcoulomb_switch = ir->rcoulomb_switch; + fr->rcoulomb = cutoff_inf(ir->rcoulomb); + + /* Parameters for generalized RF */ + fr->zsquare = 0.0; + fr->temp = 0.0; + + if (fr->eeltype == eelGRF) + { + init_generalized_rf(fp, mtop, ir, fr); + } + else if (fr->eeltype == eelSHIFT) + { + for (m = 0; (m < DIM); m++) + { + box_size[m] = box[m][m]; + } + + if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch)) + { + set_shift_consts(fp, fr->rcoulomb_switch, fr->rcoulomb, box_size, fr); + } + } + + fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || + gmx_mtop_ftype_count(mtop, F_POSRES) > 0 || + gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 || + IR_ELEC_FIELD(*ir) || + (fr->adress_icor != eAdressICOff) + ); + + if (fr->cutoff_scheme == ecutsGROUP && + ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr)) + { + /* Count the total number of charge groups */ + fr->cg_nalloc = ncg_mtop(mtop); + srenew(fr->cg_cm, fr->cg_nalloc); + } + if (fr->shift_vec == NULL) + { + snew(fr->shift_vec, SHIFTS); + } + + if (fr->fshift == NULL) + { + snew(fr->fshift, SHIFTS); + } + + if (fr->nbfp == NULL) + { + fr->ntype = mtop->ffparams.atnr; + fr->nbfp = mk_nbfp(&mtop->ffparams, fr->bBHAM); + } + + /* Copy the energy group exclusions */ + fr->egp_flags = ir->opts.egp_flags; + + /* Van der Waals stuff */ + fr->rvdw = cutoff_inf(ir->rvdw); + fr->rvdw_switch = ir->rvdw_switch; + if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM) + { + if (fr->rvdw_switch >= fr->rvdw) + { + gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)", + fr->rvdw_switch, fr->rvdw); + } + if (fp) + { + fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n", + (fr->eeltype == eelSWITCH) ? "switched" : "shifted", + fr->rvdw_switch, fr->rvdw); + } + } + + if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH)) + { + gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham"); + } + + if (fp) + { + fprintf(fp, "Cut-off's: NS: %g Coulomb: %g %s: %g\n", + fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw); + } + + fr->eDispCorr = ir->eDispCorr; + if (ir->eDispCorr != edispcNO) + { + set_avcsixtwelve(fp, fr, mtop); + } + + if (fr->bBHAM) + { + set_bham_b_max(fp, fr, mtop); + } + + fr->gb_epsilon_solvent = ir->gb_epsilon_solvent; + + /* Copy the GBSA data (radius, volume and surftens for each + * atomtype) from the topology atomtype section to forcerec. + */ + snew(fr->atype_radius, fr->ntype); + snew(fr->atype_vol, fr->ntype); + snew(fr->atype_surftens, fr->ntype); + snew(fr->atype_gb_radius, fr->ntype); + snew(fr->atype_S_hct, fr->ntype); + + if (mtop->atomtypes.nr > 0) + { + for (i = 0; i < fr->ntype; i++) + { + fr->atype_radius[i] = mtop->atomtypes.radius[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_vol[i] = mtop->atomtypes.vol[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_surftens[i] = mtop->atomtypes.surftens[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i]; + } + for (i = 0; i < fr->ntype; i++) + { + fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i]; + } + } + + /* Generate the GB table if needed */ + if (fr->bGB) + { +#ifdef GMX_DOUBLE + fr->gbtabscale = 2000; +#else + fr->gbtabscale = 500; +#endif + + fr->gbtabr = 100; + fr->gbtab = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale); + + init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm); + + /* Copy local gb data (for dd, this is done in dd_partition_system) */ + if (!DOMAINDECOMP(cr)) + { + make_local_gb(cr, fr->born, ir->gb_algorithm); + } + } + + /* Set the charge scaling */ + if (fr->epsilon_r != 0) + { + fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r; + } + else + { + /* eps = 0 is infinite dieletric: no coulomb interactions */ + fr->epsfac = 0; + } + + /* Reaction field constants */ + if (EEL_RF(fr->eeltype)) + { + calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf, + fr->rcoulomb, fr->temp, fr->zsquare, box, + &fr->kappa, &fr->k_rf, &fr->c_rf); + } + + set_chargesum(fp, fr, mtop); + + /* if we are using LR electrostatics, and they are tabulated, + * the tables will contain modified coulomb interactions. + * Since we want to use the non-shifted ones for 1-4 + * coulombic interactions, we must have an extra set of tables. + */ + + /* Construct tables. + * A little unnecessary to make both vdw and coul tables sometimes, + * but what the heck... */ + + bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald; + + bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT || + fr->bBHAM || fr->bEwald) && + (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 || + gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 || + gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0)); + + negp_pp = ir->opts.ngener - ir->nwall; + negptable = 0; + if (!bTab) + { + bNormalnblists = TRUE; + fr->nnblists = 1; + } + else + { + bNormalnblists = (ir->eDispCorr != edispcNO); + for (egi = 0; egi < negp_pp; egi++) + { + for (egj = egi; egj < negp_pp; egj++) + { + egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)]; + if (!(egp_flags & EGP_EXCL)) + { + if (egp_flags & EGP_TABLE) + { + negptable++; + } + else + { + bNormalnblists = TRUE; + } + } + } + } + if (bNormalnblists) + { + fr->nnblists = negptable + 1; + } + else + { + fr->nnblists = negptable; + } + if (fr->nnblists > 1) + { + snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener); + } + } + + if (ir->adress) + { + fr->nnblists *= 2; + } + + snew(fr->nblists, fr->nnblists); + + /* This code automatically gives table length tabext without cut-off's, + * in that case grompp should already have checked that we do not need + * normal tables and we only generate tables for 1-4 interactions. + */ + rtab = ir->rlistlong + ir->tabext; + + if (bTab) + { + /* make tables for ordinary interactions */ + if (bNormalnblists) + { + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]); + if (ir->adress) + { + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]); + } + if (!bSep14tab) + { + fr->tab14 = fr->nblists[0].table_elec_vdw; + } + m = 1; + } + else + { + m = 0; + } + if (negptable > 0) + { + /* Read the special tables for certain energy group pairs */ + nm_ind = mtop->groups.grps[egcENER].nm_ind; + for (egi = 0; egi < negp_pp; egi++) + { + for (egj = egi; egj < negp_pp; egj++) + { + egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)]; + if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL)) + { + nbl = &(fr->nblists[m]); + if (fr->nnblists > 1) + { + fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m; + } + /* Read the table file with the two energy groups names appended */ + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, + *mtop->groups.grpname[nm_ind[egi]], + *mtop->groups.grpname[nm_ind[egj]], + &fr->nblists[m]); + if (ir->adress) + { + make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, + *mtop->groups.grpname[nm_ind[egi]], + *mtop->groups.grpname[nm_ind[egj]], + &fr->nblists[fr->nnblists/2+m]); + } + m++; + } + else if (fr->nnblists > 1) + { + fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0; + } + } + } + } + } + if (bSep14tab) + { + /* generate extra tables with plain Coulomb for 1-4 interactions only */ + fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab, + GMX_MAKETABLES_14ONLY); + } + + /* Read AdResS Thermo Force table if needed */ + if (fr->adress_icor == eAdressICThermoForce) + { + /* old todo replace */ + + if (ir->adress->n_tf_grps > 0) + { + make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box); + + } + else + { + /* load the default table */ + snew(fr->atf_tabs, 1); + fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box); + } + } + + /* Wall stuff */ + fr->nwall = ir->nwall; + if (ir->nwall && ir->wall_type == ewtTABLE) + { + make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr); + } + + if (fcd && tabbfn) + { + fcd->bondtab = make_bonded_tables(fp, + F_TABBONDS, F_TABBONDSNC, + mtop, tabbfn, "b"); + fcd->angletab = make_bonded_tables(fp, + F_TABANGLES, -1, + mtop, tabbfn, "a"); + fcd->dihtab = make_bonded_tables(fp, + F_TABDIHS, -1, + mtop, tabbfn, "d"); + } + else + { + if (debug) + { + fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n"); + } + } + + /* QM/MM initialization if requested + */ + if (ir->bQMMM) + { + fprintf(stderr, "QM/MM calculation requested.\n"); + } + + fr->bQMMM = ir->bQMMM; + fr->qr = mk_QMMMrec(); + + /* Set all the static charge group info */ + fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt, + &fr->bExcl_IntraCGAll_InterCGNone); + if (DOMAINDECOMP(cr)) + { + fr->cginfo = NULL; + } + else + { + fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb); + } + + if (!DOMAINDECOMP(cr)) + { + /* When using particle decomposition, the effect of the second argument, + * which sets fr->hcg, is corrected later in do_md and init_em. + */ + forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop), + mtop->natoms, mtop->natoms, mtop->natoms); + } + + fr->print_force = print_force; + + + /* coarse load balancing vars */ + fr->t_fnbf = 0.; + fr->t_wait = 0.; + fr->timesteps = 0; + + /* Initialize neighbor search */ + init_ns(fp, cr, &fr->ns, fr, mtop, box); + + if (cr->duty & DUTY_PP) + { + gmx_nonbonded_setup(fp, fr, bGenericKernelOnly); + /* + if (ir->bAdress) + { + gmx_setup_adress_kernels(fp,bGenericKernelOnly); + } + */ + } + + /* Initialize the thread working data for bonded interactions */ + init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr); + + snew(fr->excl_load, fr->nthreads+1); + + if (fr->cutoff_scheme == ecutsVERLET) + { + if (ir->rcoulomb != ir->rvdw) + { + gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical"); + } + + init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt); + } + + /* fr->ic is used both by verlet and group kernels (to some extent) now */ + init_interaction_const(fp, &fr->ic, fr, rtab); + if (ir->eDispCorr != edispcNO) + { + calc_enervirdiff(fp, ir->eDispCorr, fr); + } +} + +#define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r) +#define pr_int(fp, i) fprintf((fp), "%s: %d\n",#i, i) +#define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b]) + +void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr) +{ + int i; + + pr_real(fp, fr->rlist); + pr_real(fp, fr->rcoulomb); + pr_real(fp, fr->fudgeQQ); + pr_bool(fp, fr->bGrid); + pr_bool(fp, fr->bTwinRange); + /*pr_int(fp,fr->cg0); + pr_int(fp,fr->hcg);*/ + for (i = 0; i < fr->nnblists; i++) + { + pr_int(fp, fr->nblists[i].table_elec_vdw.n); + } + pr_real(fp, fr->rcoulomb_switch); + pr_real(fp, fr->rcoulomb); + + fflush(fp); +} + +void forcerec_set_excl_load(t_forcerec *fr, + const gmx_localtop_t *top, const t_commrec *cr) +{ + const int *ind, *a; + int t, i, j, ntot, n, ntarget; + + if (cr != NULL && PARTDECOMP(cr)) + { + /* No OpenMP with particle decomposition */ + pd_at_range(cr, + &fr->excl_load[0], + &fr->excl_load[1]); + + return; + } + + ind = top->excls.index; + a = top->excls.a; + + ntot = 0; + for (i = 0; i < top->excls.nr; i++) + { + for (j = ind[i]; j < ind[i+1]; j++) + { + if (a[j] > i) + { + ntot++; + } + } + } + + fr->excl_load[0] = 0; + n = 0; + i = 0; + for (t = 1; t <= fr->nthreads; t++) + { + ntarget = (ntot*t)/fr->nthreads; + while (i < top->excls.nr && n < ntarget) + { + for (j = ind[i]; j < ind[i+1]; j++) + { + if (a[j] > i) + { + n++; + } + } + i++; + } + fr->excl_load[t] = i; + } +} diff --cc src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu index 19c0b2d4f4,0000000000..dc089c0bed mode 100644,000000..100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu @@@ -1,961 -1,0 +1,963 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2012, The GROMACS development team, + * check out http://www.gromacs.org for more information. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#include + +#include "gmx_fatal.h" +#include "smalloc.h" +#include "tables.h" +#include "typedefs.h" +#include "types/nb_verlet.h" +#include "types/interaction_const.h" +#include "types/force_flags.h" +#include "../nbnxn_consts.h" + +#include "nbnxn_cuda_types.h" +#include "../../gmxlib/cuda_tools/cudautils.cuh" +#include "nbnxn_cuda_data_mgmt.h" +#include "pmalloc_cuda.h" +#include "gpu_utils.h" + +static bool bUseCudaEventBlockingSync = false; /* makes the CPU thread block */ + +/* This is a heuristically determined parameter for the Fermi architecture for + * the minimum size of ci lists by multiplying this constant with the # of + * multiprocessors on the current device. + */ +static unsigned int gpu_min_ci_balanced_factor = 40; + +/* Functions from nbnxn_cuda.cu */ +extern void nbnxn_cuda_set_cacheconfig(cuda_dev_info_t *devinfo); +extern const struct texture& nbnxn_cuda_get_nbfp_texref(); +extern const struct texture& nbnxn_cuda_get_coulomb_tab_texref(); + +/* We should actually be using md_print_warn in md_logging.c, + * but we can't include mpi.h in CUDA code. + */ +static void md_print_warn(FILE *fplog, + const char *fmt, ...) +{ + va_list ap; + + if (fplog != NULL) + { + /* We should only print to stderr on the master node, + * in most cases fplog is only set on the master node, so this works. + */ + va_start(ap, fmt); + fprintf(stderr, "\n"); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + + va_start(ap, fmt); + fprintf(fplog, "\n"); + vfprintf(fplog, fmt, ap); + fprintf(fplog, "\n"); + va_end(ap); + } +} + + +/* Fw. decl. */ +static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb); + + +/*! Tabulates the Ewald Coulomb force and initializes the size/scale + and the table GPU array. If called with an already allocated table, + it just re-uploads the table. + */ +static void init_ewald_coulomb_force_table(cu_nbparam_t *nbp) +{ + float *ftmp, *coul_tab; + int tabsize; + double tabscale; + cudaError_t stat; + + tabsize = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE; + /* Subtract 2 iso 1 to avoid access out of range due to rounding */ + tabscale = (tabsize - 2) / sqrt(nbp->rcoulomb_sq); + + pmalloc((void**)&ftmp, tabsize*sizeof(*ftmp)); + + table_spline3_fill_ewald_lr(ftmp, NULL, NULL, tabsize, + 1/tabscale, nbp->ewald_beta); + + /* If the table pointer == NULL the table is generated the first time => + the array pointer will be saved to nbparam and the texture is bound. + */ + coul_tab = nbp->coulomb_tab; + if (coul_tab == NULL) + { + stat = cudaMalloc((void **)&coul_tab, tabsize*sizeof(*coul_tab)); + CU_RET_ERR(stat, "cudaMalloc failed on coul_tab"); + + nbp->coulomb_tab = coul_tab; + + cudaChannelFormatDesc cd = cudaCreateChannelDesc(); + stat = cudaBindTexture(NULL, &nbnxn_cuda_get_coulomb_tab_texref(), + coul_tab, &cd, tabsize*sizeof(*coul_tab)); + CU_RET_ERR(stat, "cudaBindTexture on coul_tab failed"); + } + + cu_copy_H2D(coul_tab, ftmp, tabsize*sizeof(*coul_tab)); + + nbp->coulomb_tab_size = tabsize; + nbp->coulomb_tab_scale = tabscale; + + pfree(ftmp); +} + + +/*! Initializes the atomdata structure first time, it only gets filled at + pair-search. */ +static void init_atomdata_first(cu_atomdata_t *ad, int ntypes) +{ + cudaError_t stat; + + ad->ntypes = ntypes; + stat = cudaMalloc((void**)&ad->shift_vec, SHIFTS*sizeof(*ad->shift_vec)); + CU_RET_ERR(stat, "cudaMalloc failed on ad->shift_vec"); + ad->bShiftVecUploaded = false; + + stat = cudaMalloc((void**)&ad->fshift, SHIFTS*sizeof(*ad->fshift)); + CU_RET_ERR(stat, "cudaMalloc failed on ad->fshift"); + + stat = cudaMalloc((void**)&ad->e_lj, sizeof(*ad->e_lj)); + CU_RET_ERR(stat, "cudaMalloc failed on ad->e_lj"); + stat = cudaMalloc((void**)&ad->e_el, sizeof(*ad->e_el)); + CU_RET_ERR(stat, "cudaMalloc failed on ad->e_el"); + + /* initialize to NULL poiters to data that is not allocated here and will + need reallocation in nbnxn_cuda_init_atomdata */ + ad->xq = NULL; + ad->f = NULL; + + /* size -1 indicates that the respective array hasn't been initialized yet */ + ad->natoms = -1; + ad->nalloc = -1; +} + +/*! Selects the Ewald kernel type, analytical on SM 3.0 and later, tabulated on + earlier GPUs, single or twin cut-off. */ +static int pick_ewald_kernel_type(bool bTwinCut, + const cuda_dev_info_t *dev_info) +{ + bool bUseAnalyticalEwald, bForceAnalyticalEwald, bForceTabulatedEwald; + int kernel_type; + + /* Benchmarking/development environment variables to force the use of + analytical or tabulated Ewald kernel. */ + bForceAnalyticalEwald = (getenv("GMX_CUDA_NB_ANA_EWALD") != NULL); + bForceTabulatedEwald = (getenv("GMX_CUDA_NB_TAB_EWALD") != NULL); + + if (bForceAnalyticalEwald && bForceTabulatedEwald) + { + gmx_incons("Both analytical and tabulated Ewald CUDA non-bonded kernels " + "requested through environment variables."); + } + + /* By default, on SM 3.0 and later use analytical Ewald, on earlier tabulated. */ + if ((dev_info->prop.major >= 3 || bForceAnalyticalEwald) && !bForceTabulatedEwald) + { + bUseAnalyticalEwald = true; + + if (debug) + { + fprintf(debug, "Using analytical Ewald CUDA kernels\n"); + } + } + else + { + bUseAnalyticalEwald = false; + + if (debug) + { + fprintf(debug, "Using tabulated Ewald CUDA kernels\n"); + } + } + + /* Use twin cut-off kernels if requested by bTwinCut or the env. var. + forces it (use it for debugging/benchmarking only). */ + if (!bTwinCut && (getenv("GMX_CUDA_NB_EWALD_TWINCUT") == NULL)) + { + kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA : eelCuEWALD_TAB; + } + else + { + kernel_type = bUseAnalyticalEwald ? eelCuEWALD_ANA_TWIN : eelCuEWALD_TAB_TWIN; + } + + return kernel_type; +} + + +/*! Initializes the nonbonded parameter data structure. */ +static void init_nbparam(cu_nbparam_t *nbp, + const interaction_const_t *ic, - const nonbonded_verlet_t *nbv, ++ const nbnxn_atomdata_t *nbat, + const cuda_dev_info_t *dev_info) +{ + cudaError_t stat; + int ntypes, nnbfp; + - ntypes = nbv->grp[0].nbat->ntype; ++ ntypes = nbat->ntype; + + nbp->ewald_beta = ic->ewaldcoeff; + nbp->sh_ewald = ic->sh_ewald; + nbp->epsfac = ic->epsfac; + nbp->two_k_rf = 2.0 * ic->k_rf; + nbp->c_rf = ic->c_rf; + nbp->rvdw_sq = ic->rvdw * ic->rvdw; + nbp->rcoulomb_sq= ic->rcoulomb * ic->rcoulomb; + nbp->rlist_sq = ic->rlist * ic->rlist; + nbp->sh_invrc6 = ic->sh_invrc6; + + if (ic->eeltype == eelCUT) + { + nbp->eeltype = eelCuCUT; + } + else if (EEL_RF(ic->eeltype)) + { + nbp->eeltype = eelCuRF; + } + else if ((EEL_PME(ic->eeltype) || ic->eeltype==eelEWALD)) + { + /* Initially rcoulomb == rvdw, so it's surely not twin cut-off. */ + nbp->eeltype = pick_ewald_kernel_type(false, dev_info); + } + else + { + /* Shouldn't happen, as this is checked when choosing Verlet-scheme */ + gmx_incons("The requested electrostatics type is not implemented in the CUDA GPU accelerated kernels!"); + } + + /* generate table for PME */ + nbp->coulomb_tab = NULL; + if (nbp->eeltype == eelCuEWALD_TAB || nbp->eeltype == eelCuEWALD_TAB_TWIN) + { + init_ewald_coulomb_force_table(nbp); + } + + nnbfp = 2*ntypes*ntypes; + stat = cudaMalloc((void **)&nbp->nbfp, nnbfp*sizeof(*nbp->nbfp)); + CU_RET_ERR(stat, "cudaMalloc failed on nbp->nbfp"); - cu_copy_H2D(nbp->nbfp, nbv->grp[0].nbat->nbfp, nnbfp*sizeof(*nbp->nbfp)); ++ cu_copy_H2D(nbp->nbfp, nbat->nbfp, nnbfp*sizeof(*nbp->nbfp)); + + cudaChannelFormatDesc cd = cudaCreateChannelDesc(); + stat = cudaBindTexture(NULL, &nbnxn_cuda_get_nbfp_texref(), + nbp->nbfp, &cd, nnbfp*sizeof(*nbp->nbfp)); + CU_RET_ERR(stat, "cudaBindTexture on nbfp failed"); +} + +/*! Re-generate the GPU Ewald force table, resets rlist, and update the + * electrostatic type switching to twin cut-off (or back) if needed. */ +void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t cu_nb, + const interaction_const_t *ic) +{ + cu_nbparam_t *nbp = cu_nb->nbparam; + + nbp->rlist_sq = ic->rlist * ic->rlist; + nbp->rcoulomb_sq = ic->rcoulomb * ic->rcoulomb; + nbp->ewald_beta = ic->ewaldcoeff; + + nbp->eeltype = pick_ewald_kernel_type(ic->rcoulomb != ic->rvdw, + cu_nb->dev_info); + + init_ewald_coulomb_force_table(cu_nb->nbparam); +} + +/*! Initializes the pair list data structure. */ +static void init_plist(cu_plist_t *pl) +{ + /* initialize to NULL pointers to data that is not allocated here and will + need reallocation in nbnxn_cuda_init_pairlist */ + pl->sci = NULL; + pl->cj4 = NULL; + pl->excl = NULL; + + /* size -1 indicates that the respective array hasn't been initialized yet */ + pl->na_c = -1; + pl->nsci = -1; + pl->sci_nalloc = -1; + pl->ncj4 = -1; + pl->cj4_nalloc = -1; + pl->nexcl = -1; + pl->excl_nalloc = -1; + pl->bDoPrune = false; +} + +/*! Initializes the timer data structure. */ +static void init_timers(cu_timers_t *t, bool bUseTwoStreams) +{ + cudaError_t stat; + int eventflags = ( bUseCudaEventBlockingSync ? cudaEventBlockingSync: cudaEventDefault ); + + stat = cudaEventCreateWithFlags(&(t->start_atdat), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on start_atdat failed"); + stat = cudaEventCreateWithFlags(&(t->stop_atdat), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on stop_atdat failed"); + + /* The non-local counters/stream (second in the array) are needed only with DD. */ + for (int i = 0; i <= (bUseTwoStreams ? 1 : 0); i++) + { + stat = cudaEventCreateWithFlags(&(t->start_nb_k[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on start_nb_k failed"); + stat = cudaEventCreateWithFlags(&(t->stop_nb_k[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on stop_nb_k failed"); + + + stat = cudaEventCreateWithFlags(&(t->start_pl_h2d[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on start_pl_h2d failed"); + stat = cudaEventCreateWithFlags(&(t->stop_pl_h2d[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on stop_pl_h2d failed"); + + stat = cudaEventCreateWithFlags(&(t->start_nb_h2d[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on start_nb_h2d failed"); + stat = cudaEventCreateWithFlags(&(t->stop_nb_h2d[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on stop_nb_h2d failed"); + + stat = cudaEventCreateWithFlags(&(t->start_nb_d2h[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on start_nb_d2h failed"); + stat = cudaEventCreateWithFlags(&(t->stop_nb_d2h[i]), eventflags); + CU_RET_ERR(stat, "cudaEventCreate on stop_nb_d2h failed"); + } +} + +/*! Initializes the timings data structure. */ +static void init_timings(wallclock_gpu_t *t) +{ + int i, j; + + t->nb_h2d_t = 0.0; + t->nb_d2h_t = 0.0; + t->nb_c = 0; + t->pl_h2d_t = 0.0; + t->pl_h2d_c = 0; + for (i = 0; i < 2; i++) + { + for(j = 0; j < 2; j++) + { + t->ktime[i][j].t = 0.0; + t->ktime[i][j].c = 0; + } + } +} + +/* Decide which kernel version to use (default or legacy) based on: + * - CUDA version used for compilation + * - non-bonded kernel selector environment variables + * - GPU architecture version + */ +static int pick_nbnxn_kernel_version(FILE *fplog, + cuda_dev_info_t *devinfo) +{ + bool bForceLegacyKernel, bForceDefaultKernel, bCUDA40, bCUDA32; + char sbuf[STRLEN]; + int kver; + + /* Legacy kernel (former k2), kept for backward compatibility as it is + faster than the default with CUDA 3.2/4.0 on Fermi (not on Kepler). */ + bForceLegacyKernel = (getenv("GMX_CUDA_NB_LEGACY") != NULL); + /* default kernel (former k3). */ + bForceDefaultKernel = (getenv("GMX_CUDA_NB_DEFAULT") != NULL); + + if ((unsigned)(bForceLegacyKernel + bForceDefaultKernel) > 1) + { + gmx_fatal(FARGS, "Multiple CUDA non-bonded kernels requested; to manually pick a kernel set only one \n" + "of the following environment variables: \n" + "GMX_CUDA_NB_DEFAULT, GMX_CUDA_NB_LEGACY"); + } + + bCUDA32 = bCUDA40 = false; +#if CUDA_VERSION == 3200 + bCUDA32 = true; + sprintf(sbuf, "3.2"); +#elif CUDA_VERSION == 4000 + bCUDA40 = true; + sprintf(sbuf, "4.0"); +#endif + + /* default is default ;) */ + kver = eNbnxnCuKDefault; + + /* Consider switching to legacy kernels only on Fermi */ + if (devinfo->prop.major < 3 && (bCUDA32 || bCUDA40)) + { + /* use legacy kernel unless something else is forced by an env. var */ + if (bForceDefaultKernel) + { + md_print_warn(fplog, + "NOTE: CUDA %s compilation detected; with this compiler version the legacy\n" + " non-bonded kernels perform best. However, the default kernels were\n" + " selected by the GMX_CUDA_NB_DEFAULT environment variable.\n" + " For best performance upgrade your CUDA toolkit.\n", + sbuf); + } + else + { + kver = eNbnxnCuKLegacy; + } + } + else + { + /* issue note if the non-default kernel is forced by an env. var */ + if (bForceLegacyKernel) + { + md_print_warn(fplog, + "NOTE: Legacy non-bonded CUDA kernels selected by the GMX_CUDA_NB_LEGACY\n" + " env. var. Consider using using the default kernels which should be faster!\n"); + + kver = eNbnxnCuKLegacy; + } + } + + return kver; +} + +void nbnxn_cuda_init(FILE *fplog, + nbnxn_cuda_ptr_t *p_cu_nb, + gmx_gpu_info_t *gpu_info, int my_gpu_index, + gmx_bool bLocalAndNonlocal) +{ + cudaError_t stat; + nbnxn_cuda_ptr_t nb; + char sbuf[STRLEN]; + bool bStreamSync, bNoStreamSync, bTMPIAtomics, bX86, bOldDriver; + int cuda_drv_ver; + + assert(gpu_info); + + if (p_cu_nb == NULL) return; + + snew(nb, 1); + snew(nb->atdat, 1); + snew(nb->nbparam, 1); + snew(nb->plist[eintLocal], 1); + if (bLocalAndNonlocal) + { + snew(nb->plist[eintNonlocal], 1); + } + + nb->bUseTwoStreams = bLocalAndNonlocal; + + snew(nb->timers, 1); + snew(nb->timings, 1); + + /* init nbst */ + pmalloc((void**)&nb->nbst.e_lj, sizeof(*nb->nbst.e_lj)); + pmalloc((void**)&nb->nbst.e_el, sizeof(*nb->nbst.e_el)); + pmalloc((void**)&nb->nbst.fshift, SHIFTS * sizeof(*nb->nbst.fshift)); + + init_plist(nb->plist[eintLocal]); + + /* local/non-local GPU streams */ + stat = cudaStreamCreate(&nb->stream[eintLocal]); + CU_RET_ERR(stat, "cudaStreamCreate on stream[eintLocal] failed"); + if (nb->bUseTwoStreams) + { + init_plist(nb->plist[eintNonlocal]); + stat = cudaStreamCreate(&nb->stream[eintNonlocal]); + CU_RET_ERR(stat, "cudaStreamCreate on stream[eintNonlocal] failed"); + } + + /* init events for sychronization (timing disabled for performance reasons!) */ + stat = cudaEventCreateWithFlags(&nb->nonlocal_done, cudaEventDisableTiming); + CU_RET_ERR(stat, "cudaEventCreate on nonlocal_done failed"); + stat = cudaEventCreateWithFlags(&nb->misc_ops_done, cudaEventDisableTiming); + CU_RET_ERR(stat, "cudaEventCreate on misc_ops_one failed"); + + /* set device info, just point it to the right GPU among the detected ones */ + nb->dev_info = &gpu_info->cuda_dev[get_gpu_device_id(gpu_info, my_gpu_index)]; + + /* On GPUs with ECC enabled, cudaStreamSynchronize shows a large overhead + * (which increases with shorter time/step) caused by a known CUDA driver bug. + * To work around the issue we'll use an (admittedly fragile) memory polling + * waiting to preserve performance. This requires support for atomic + * operations and only works on x86/x86_64. + * With polling wait event-timing also needs to be disabled. + * + * The overhead is greatly reduced in API v5.0 drivers and the improvement + $ is independent of runtime version. Hence, with API v5.0 drivers and later + * we won't switch to polling. + * + * NOTE: Unfortunately, this is known to fail when GPUs are shared by (t)MPI, + * ranks so we will also disable it in that case. + */ + + bStreamSync = getenv("GMX_CUDA_STREAMSYNC") != NULL; + bNoStreamSync = getenv("GMX_NO_CUDA_STREAMSYNC") != NULL; + +#ifdef TMPI_ATOMICS + bTMPIAtomics = true; +#else + bTMPIAtomics = false; +#endif + +#if defined(i386) || defined(__x86_64__) + bX86 = true; +#else + bX86 = false; +#endif + + if (bStreamSync && bNoStreamSync) + { + gmx_fatal(FARGS, "Conflicting environment variables: both GMX_CUDA_STREAMSYNC and GMX_NO_CUDA_STREAMSYNC defined"); + } + + stat = cudaDriverGetVersion(&cuda_drv_ver); + CU_RET_ERR(stat, "cudaDriverGetVersion failed"); ++ + bOldDriver = (cuda_drv_ver < 5000); + - if (nb->dev_info->prop.ECCEnabled == 1) ++ if ((nb->dev_info->prop.ECCEnabled == 1) && bOldDriver) + { ++ /* Polling wait should be used instead of cudaStreamSynchronize only if: ++ * - ECC is ON & driver is old (checked above), ++ * - we're on x86/x86_64, ++ * - atomics are available, and ++ * - GPUs are not being shared. ++ */ ++ bool bShouldUsePollSync = (bX86 && bTMPIAtomics && !gpu_info->bDevShare); ++ + if (bStreamSync) + { + nb->bUseStreamSync = true; + + /* only warn if polling should be used */ - if (bOldDriver && !gpu_info->bDevShare) ++ if (bShouldUsePollSync) + { + md_print_warn(fplog, + "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, but\n" + " cudaStreamSynchronize waiting is forced by the GMX_CUDA_STREAMSYNC env. var.\n"); + } + } + else + { - /* Can/should turn of cudaStreamSynchronize wait only if - * - we're on x86/x86_64 - * - atomics are available - * - GPUs are not being shared - * - and driver is old. */ - nb->bUseStreamSync = - (bX86 && bTMPIAtomics && !gpu_info->bDevShare && bOldDriver) ? - true : false; - - if (nb->bUseStreamSync) ++ nb->bUseStreamSync = !bShouldUsePollSync; ++ ++ if (bShouldUsePollSync) + { + md_print_warn(fplog, + "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0, known to\n" - " cause performance loss. Switching to the alternative polling GPU waiting.\n" ++ " cause performance loss. Switching to the alternative polling GPU wait.\n" + " If you encounter issues, switch back to standard GPU waiting by setting\n" + " the GMX_CUDA_STREAMSYNC environment variable.\n"); + } - else if (bOldDriver) ++ else + { + /* Tell the user that the ECC+old driver combination can be bad */ + sprintf(sbuf, - "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0. A bug in this\n" - " driver can cause performance loss.\n" - " However, the polling waiting workaround can not be used because\n%s\n" ++ "NOTE: Using a GPU with ECC enabled and CUDA driver API version <5.0.\n" ++ " A known bug in this driver version can cause performance loss.\n" ++ " However, the polling wait workaround can not be used because\n%s\n" + " Consider updating the driver or turning ECC off.", - (!bX86 || !bTMPIAtomics) ? - " atomic operations are not supported by the platform/CPU+compiler." : - " GPU(s) are being oversubscribed."); ++ (bX86 && bTMPIAtomics) ? ++ " GPU(s) are being oversubscribed." : ++ " atomic operations are not supported by the platform/CPU+compiler."); + md_print_warn(fplog, sbuf); + } + } + } + else + { + if (bNoStreamSync) + { + nb->bUseStreamSync = false; + + md_print_warn(fplog, + "NOTE: Polling wait for GPU synchronization requested by GMX_NO_CUDA_STREAMSYNC\n"); + } + else + { + /* no/off ECC, cudaStreamSynchronize not turned off by env. var. */ + nb->bUseStreamSync = true; + } + } + + /* CUDA timing disabled as event timers don't work: + - with multiple streams = domain-decomposition; + - with the polling waiting hack (without cudaStreamSynchronize); + - when turned off by GMX_DISABLE_CUDA_TIMING. + */ + nb->bDoTime = (!nb->bUseTwoStreams && nb->bUseStreamSync && + (getenv("GMX_DISABLE_CUDA_TIMING") == NULL)); + + if (nb->bDoTime) + { + init_timers(nb->timers, nb->bUseTwoStreams); + init_timings(nb->timings); + } + + /* set the kernel type for the current GPU */ + nb->kernel_ver = pick_nbnxn_kernel_version(fplog, nb->dev_info); + /* pick L1 cache configuration */ + nbnxn_cuda_set_cacheconfig(nb->dev_info); + + *p_cu_nb = nb; + + if (debug) + { + fprintf(debug, "Initialized CUDA data structures.\n"); + } +} + - void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t cu_nb, - const interaction_const_t *ic, - const nonbonded_verlet_t *nbv) ++void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t cu_nb, ++ const interaction_const_t *ic, ++ const nonbonded_verlet_group_t *nbv_group) +{ - init_atomdata_first(cu_nb->atdat, nbv->grp[0].nbat->ntype); - init_nbparam(cu_nb->nbparam, ic, nbv, cu_nb->dev_info); ++ init_atomdata_first(cu_nb->atdat, nbv_group[0].nbat->ntype); ++ init_nbparam(cu_nb->nbparam, ic, nbv_group[0].nbat, cu_nb->dev_info); + + /* clear energy and shift force outputs */ + nbnxn_cuda_clear_e_fshift(cu_nb); +} + +void nbnxn_cuda_init_pairlist(nbnxn_cuda_ptr_t cu_nb, + const nbnxn_pairlist_t *h_plist, + int iloc) +{ + char sbuf[STRLEN]; + cudaError_t stat; + bool bDoTime = cu_nb->bDoTime; + cudaStream_t stream = cu_nb->stream[iloc]; + cu_plist_t *d_plist = cu_nb->plist[iloc]; + + if (d_plist->na_c < 0) + { + d_plist->na_c = h_plist->na_ci; + } + else + { + if (d_plist->na_c != h_plist->na_ci) + { + sprintf(sbuf, "In cu_init_plist: the #atoms per cell has changed (from %d to %d)", + d_plist->na_c, h_plist->na_ci); + gmx_incons(sbuf); + } + } + + if (bDoTime) + { + stat = cudaEventRecord(cu_nb->timers->start_pl_h2d[iloc], stream); + CU_RET_ERR(stat, "cudaEventRecord failed"); + } + + cu_realloc_buffered((void **)&d_plist->sci, h_plist->sci, sizeof(*d_plist->sci), + &d_plist->nsci, &d_plist->sci_nalloc, + h_plist->nsci, + stream, true); + + cu_realloc_buffered((void **)&d_plist->cj4, h_plist->cj4, sizeof(*d_plist->cj4), + &d_plist->ncj4, &d_plist->cj4_nalloc, + h_plist->ncj4, + stream, true); + + cu_realloc_buffered((void **)&d_plist->excl, h_plist->excl, sizeof(*d_plist->excl), + &d_plist->nexcl, &d_plist->excl_nalloc, + h_plist->nexcl, + stream, true); + + if (bDoTime) + { + stat = cudaEventRecord(cu_nb->timers->stop_pl_h2d[iloc], stream); + CU_RET_ERR(stat, "cudaEventRecord failed"); + } + + /* need to prune the pair list during the next step */ + d_plist->bDoPrune = true; +} + +void nbnxn_cuda_upload_shiftvec(nbnxn_cuda_ptr_t cu_nb, + const nbnxn_atomdata_t *nbatom) +{ + cu_atomdata_t *adat = cu_nb->atdat; + cudaStream_t ls = cu_nb->stream[eintLocal]; + + /* only if we have a dynamic box */ + if (nbatom->bDynamicBox || !adat->bShiftVecUploaded) + { + cu_copy_H2D_async(adat->shift_vec, nbatom->shift_vec, + SHIFTS * sizeof(*adat->shift_vec), ls); + adat->bShiftVecUploaded = true; + } +} + +/*! Clears the first natoms_clear elements of the GPU nonbonded force output array. */ +static void nbnxn_cuda_clear_f(nbnxn_cuda_ptr_t cu_nb, int natoms_clear) +{ + cudaError_t stat; + cu_atomdata_t *adat = cu_nb->atdat; + cudaStream_t ls = cu_nb->stream[eintLocal]; + + stat = cudaMemsetAsync(adat->f, 0, natoms_clear * sizeof(*adat->f), ls); + CU_RET_ERR(stat, "cudaMemsetAsync on f falied"); +} + +/*! Clears nonbonded shift force output array and energy outputs on the GPU. */ +static void nbnxn_cuda_clear_e_fshift(nbnxn_cuda_ptr_t cu_nb) +{ + cudaError_t stat; + cu_atomdata_t *adat = cu_nb->atdat; + cudaStream_t ls = cu_nb->stream[eintLocal]; + + stat = cudaMemsetAsync(adat->fshift, 0, SHIFTS * sizeof(*adat->fshift), ls); + CU_RET_ERR(stat, "cudaMemsetAsync on fshift falied"); + stat = cudaMemsetAsync(adat->e_lj, 0, sizeof(*adat->e_lj), ls); + CU_RET_ERR(stat, "cudaMemsetAsync on e_lj falied"); + stat = cudaMemsetAsync(adat->e_el, 0, sizeof(*adat->e_el), ls); + CU_RET_ERR(stat, "cudaMemsetAsync on e_el falied"); +} + +void nbnxn_cuda_clear_outputs(nbnxn_cuda_ptr_t cu_nb, int flags) +{ + nbnxn_cuda_clear_f(cu_nb, cu_nb->atdat->natoms); + /* clear shift force array and energies if the outputs were + used in the current step */ + if (flags & GMX_FORCE_VIRIAL) + { + nbnxn_cuda_clear_e_fshift(cu_nb); + } +} + +void nbnxn_cuda_init_atomdata(nbnxn_cuda_ptr_t cu_nb, + const nbnxn_atomdata_t *nbat) +{ + cudaError_t stat; + int nalloc, natoms; + bool realloced; + bool bDoTime = cu_nb->bDoTime; + cu_timers_t *timers = cu_nb->timers; + cu_atomdata_t *d_atdat = cu_nb->atdat; + cudaStream_t ls = cu_nb->stream[eintLocal]; + + natoms = nbat->natoms; + realloced = false; + + if (bDoTime) + { + /* time async copy */ + stat = cudaEventRecord(timers->start_atdat, ls); + CU_RET_ERR(stat, "cudaEventRecord failed"); + } + + /* need to reallocate if we have to copy more atoms than the amount of space + available and only allocate if we haven't initialized yet, i.e d_atdat->natoms == -1 */ + if (natoms > d_atdat->nalloc) + { + nalloc = over_alloc_small(natoms); + + /* free up first if the arrays have already been initialized */ + if (d_atdat->nalloc != -1) + { + cu_free_buffered(d_atdat->f, &d_atdat->natoms, &d_atdat->nalloc); + cu_free_buffered(d_atdat->xq); + cu_free_buffered(d_atdat->atom_types); + } + + stat = cudaMalloc((void **)&d_atdat->f, nalloc*sizeof(*d_atdat->f)); + CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->f"); + stat = cudaMalloc((void **)&d_atdat->xq, nalloc*sizeof(*d_atdat->xq)); + CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->xq"); + + stat = cudaMalloc((void **)&d_atdat->atom_types, nalloc*sizeof(*d_atdat->atom_types)); + CU_RET_ERR(stat, "cudaMalloc failed on d_atdat->atom_types"); + + d_atdat->nalloc = nalloc; + realloced = true; + } + + d_atdat->natoms = natoms; + d_atdat->natoms_local = nbat->natoms_local; + + /* need to clear GPU f output if realloc happened */ + if (realloced) + { + nbnxn_cuda_clear_f(cu_nb, nalloc); + } + + cu_copy_H2D_async(d_atdat->atom_types, nbat->type, + natoms*sizeof(*d_atdat->atom_types), ls); + + if (bDoTime) + { + stat = cudaEventRecord(timers->stop_atdat, ls); + CU_RET_ERR(stat, "cudaEventRecord failed"); + } +} + +void nbnxn_cuda_free(FILE *fplog, nbnxn_cuda_ptr_t cu_nb) +{ + cudaError_t stat; + cu_atomdata_t *atdat; + cu_nbparam_t *nbparam; + cu_plist_t *plist, *plist_nl; + cu_timers_t *timers; + + if (cu_nb == NULL) return; + + atdat = cu_nb->atdat; + nbparam = cu_nb->nbparam; + plist = cu_nb->plist[eintLocal]; + plist_nl = cu_nb->plist[eintNonlocal]; + timers = cu_nb->timers; + + if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN) + { + stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref()); + CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab failed"); + cu_free_buffered(nbparam->coulomb_tab, &nbparam->coulomb_tab_size); + } + + stat = cudaEventDestroy(cu_nb->nonlocal_done); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->nonlocal_done"); + stat = cudaEventDestroy(cu_nb->misc_ops_done); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->misc_ops_done"); + + if (cu_nb->bDoTime) + { + stat = cudaEventDestroy(timers->start_atdat); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_atdat"); + stat = cudaEventDestroy(timers->stop_atdat); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_atdat"); + + /* The non-local counters/stream (second in the array) are needed only with DD. */ + for (int i = 0; i <= (cu_nb->bUseTwoStreams ? 1 : 0); i++) + { + stat = cudaEventDestroy(timers->start_nb_k[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_k"); + stat = cudaEventDestroy(timers->stop_nb_k[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_k"); + + stat = cudaEventDestroy(timers->start_pl_h2d[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_pl_h2d"); + stat = cudaEventDestroy(timers->stop_pl_h2d[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_pl_h2d"); + + stat = cudaStreamDestroy(cu_nb->stream[i]); + CU_RET_ERR(stat, "cudaStreamDestroy failed on stream"); + + stat = cudaEventDestroy(timers->start_nb_h2d[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_h2d"); + stat = cudaEventDestroy(timers->stop_nb_h2d[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_h2d"); + + stat = cudaEventDestroy(timers->start_nb_d2h[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->start_nb_d2h"); + stat = cudaEventDestroy(timers->stop_nb_d2h[i]); + CU_RET_ERR(stat, "cudaEventDestroy failed on timers->stop_nb_d2h"); + } + } + + stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_texref()); + CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab failed"); + cu_free_buffered(nbparam->nbfp); + + stat = cudaFree(atdat->shift_vec); + CU_RET_ERR(stat, "cudaFree failed on atdat->shift_vec"); + stat = cudaFree(atdat->fshift); + CU_RET_ERR(stat, "cudaFree failed on atdat->fshift"); + + stat = cudaFree(atdat->e_lj); + CU_RET_ERR(stat, "cudaFree failed on atdat->e_lj"); + stat = cudaFree(atdat->e_el); + CU_RET_ERR(stat, "cudaFree failed on atdat->e_el"); + + cu_free_buffered(atdat->f, &atdat->natoms, &atdat->nalloc); + cu_free_buffered(atdat->xq); + cu_free_buffered(atdat->atom_types, &atdat->ntypes); + + cu_free_buffered(plist->sci, &plist->nsci, &plist->sci_nalloc); + cu_free_buffered(plist->cj4, &plist->ncj4, &plist->cj4_nalloc); + cu_free_buffered(plist->excl, &plist->nexcl, &plist->excl_nalloc); + if (cu_nb->bUseTwoStreams) + { + cu_free_buffered(plist_nl->sci, &plist_nl->nsci, &plist_nl->sci_nalloc); + cu_free_buffered(plist_nl->cj4, &plist_nl->ncj4, &plist_nl->cj4_nalloc); + cu_free_buffered(plist_nl->excl, &plist_nl->nexcl, &plist->excl_nalloc); + } + + sfree(atdat); + sfree(nbparam); + sfree(plist); + if (cu_nb->bUseTwoStreams) + { + sfree(plist_nl); + } + sfree(timers); + sfree(cu_nb->timings); + sfree(cu_nb); + + if (debug) + { + fprintf(debug, "Cleaned up CUDA data structures.\n"); + } +} + +void cu_synchstream_atdat(nbnxn_cuda_ptr_t cu_nb, int iloc) +{ + cudaError_t stat; + cudaStream_t stream = cu_nb->stream[iloc]; + + stat = cudaStreamWaitEvent(stream, cu_nb->timers->stop_atdat, 0); + CU_RET_ERR(stat, "cudaStreamWaitEvent failed"); +} + +wallclock_gpu_t * nbnxn_cuda_get_timings(nbnxn_cuda_ptr_t cu_nb) +{ + return (cu_nb != NULL && cu_nb->bDoTime) ? cu_nb->timings : NULL; +} + +void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t cu_nb) +{ + if (cu_nb->bDoTime) + { + init_timings(cu_nb->timings); + } +} + +int nbnxn_cuda_min_ci_balanced(nbnxn_cuda_ptr_t cu_nb) +{ + return cu_nb != NULL ? + gpu_min_ci_balanced_factor*cu_nb->dev_info->prop.multiProcessorCount : 0; + +} diff --cc src/gromacs/mdlib/pull_rotation.c index fb6c276416,0000000000..9fc97282be mode 100644,000000..100644 --- a/src/gromacs/mdlib/pull_rotation.c +++ b/src/gromacs/mdlib/pull_rotation.c @@@ -1,4092 -1,0 +1,4098 @@@ +/* + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2008, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include "domdec.h" +#include "gmx_wallcycle.h" +#include "gmx_cyclecounter.h" +#include "trnio.h" +#include "smalloc.h" +#include "network.h" +#include "pbc.h" +#include "futil.h" +#include "mdrun.h" +#include "txtdump.h" +#include "names.h" +#include "mtop_util.h" +#include "names.h" +#include "nrjac.h" +#include "vec.h" +#include "gmx_ga2la.h" +#include "xvgr.h" +#include "gmxfio.h" +#include "groupcoord.h" +#include "pull_rotation.h" +#include "gmx_sort.h" +#include "copyrite.h" +#include "macros.h" + + +static char *RotStr = {"Enforced rotation:"}; + + +/* Set the minimum weight for the determination of the slab centers */ +#define WEIGHT_MIN (10*GMX_FLOAT_MIN) + +/* Helper structure for sorting positions along rotation vector */ +typedef struct { + real xcproj; /* Projection of xc on the rotation vector */ + int ind; /* Index of xc */ + real m; /* Mass */ + rvec x; /* Position */ + rvec x_ref; /* Reference position */ +} sort_along_vec_t; + + +/* Enforced rotation / flexible: determine the angle of each slab */ +typedef struct gmx_slabdata +{ + int nat; /* Number of atoms belonging to this slab */ + rvec *x; /* The positions belonging to this slab. In + general, this should be all positions of the + whole rotation group, but we leave those away + that have a small enough weight */ + rvec *ref; /* Same for reference */ + real *weight; /* The weight for each atom */ +} t_gmx_slabdata; + + +/* Helper structure for potential fitting */ +typedef struct gmx_potfit +{ + real *degangle; /* Set of angles for which the potential is + calculated. The optimum fit is determined as + the angle for with the potential is minimal */ + real *V; /* Potential for the different angles */ + matrix *rotmat; /* Rotation matrix corresponding to the angles */ +} t_gmx_potfit; + + +/* Enforced rotation data for all groups */ +typedef struct gmx_enfrot +{ + FILE *out_rot; /* Output file for rotation data */ + FILE *out_torque; /* Output file for torque data */ + FILE *out_angles; /* Output file for slab angles for flexible type */ + FILE *out_slabs; /* Output file for slab centers */ + int bufsize; /* Allocation size of buf */ + rvec *xbuf; /* Coordinate buffer variable for sorting */ + real *mbuf; /* Masses buffer variable for sorting */ + sort_along_vec_t *data; /* Buffer variable needed for position sorting */ + real *mpi_inbuf; /* MPI buffer */ + real *mpi_outbuf; /* MPI buffer */ + int mpi_bufsize; /* Allocation size of in & outbuf */ + unsigned long Flags; /* mdrun flags */ + gmx_bool bOut; /* Used to skip first output when appending to + * avoid duplicate entries in rotation outfiles */ +} t_gmx_enfrot; + + +/* Global enforced rotation data for a single rotation group */ +typedef struct gmx_enfrotgrp +{ + real degangle; /* Rotation angle in degrees */ + matrix rotmat; /* Rotation matrix */ + atom_id *ind_loc; /* Local rotation indices */ + int nat_loc; /* Number of local group atoms */ + int nalloc_loc; /* Allocation size for ind_loc and weight_loc */ + + real V; /* Rotation potential for this rotation group */ + rvec *f_rot_loc; /* Array to store the forces on the local atoms + resulting from enforced rotation potential */ + + /* Collective coordinates for the whole rotation group */ + real *xc_ref_length; /* Length of each x_rotref vector after x_rotref + has been put into origin */ + int *xc_ref_ind; /* Position of each local atom in the collective + array */ + rvec xc_center; /* Center of the rotation group positions, may + be mass weighted */ + rvec xc_ref_center; /* dito, for the reference positions */ + rvec *xc; /* Current (collective) positions */ + ivec *xc_shifts; /* Current (collective) shifts */ + ivec *xc_eshifts; /* Extra shifts since last DD step */ + rvec *xc_old; /* Old (collective) positions */ + rvec *xc_norm; /* Normalized form of the current positions */ + rvec *xc_ref_sorted; /* Reference positions (sorted in the same order + as xc when sorted) */ + int *xc_sortind; /* Where is a position found after sorting? */ + real *mc; /* Collective masses */ + real *mc_sorted; + real invmass; /* one over the total mass of the rotation group */ + + real torque_v; /* Torque in the direction of rotation vector */ + real angle_v; /* Actual angle of the whole rotation group */ + /* Fixed rotation only */ + real weight_v; /* Weights for angle determination */ + rvec *xr_loc; /* Local reference coords, correctly rotated */ + rvec *x_loc_pbc; /* Local current coords, correct PBC image */ + real *m_loc; /* Masses of the current local atoms */ + + /* Flexible rotation only */ + int nslabs_alloc; /* For this many slabs memory is allocated */ + int slab_first; /* Lowermost slab for that the calculation needs + to be performed at a given time step */ + int slab_last; /* Uppermost slab ... */ + int slab_first_ref; /* First slab for which ref. center is stored */ + int slab_last_ref; /* Last ... */ + int slab_buffer; /* Slab buffer region around reference slabs */ + int *firstatom; /* First relevant atom for a slab */ + int *lastatom; /* Last relevant atom for a slab */ + rvec *slab_center; /* Gaussian-weighted slab center */ + rvec *slab_center_ref; /* Gaussian-weighted slab center for the + reference positions */ + real *slab_weights; /* Sum of gaussian weights in a slab */ + real *slab_torque_v; /* Torque T = r x f for each slab. */ + /* torque_v = m.v = angular momentum in the + direction of v */ + real max_beta; /* min_gaussian from inputrec->rotgrp is the + minimum value the gaussian must have so that + the force is actually evaluated max_beta is + just another way to put it */ + real *gn_atom; /* Precalculated gaussians for a single atom */ + int *gn_slabind; /* Tells to which slab each precalculated gaussian + belongs */ + rvec *slab_innersumvec; /* Inner sum of the flexible2 potential per slab; + this is precalculated for optimization reasons */ + t_gmx_slabdata *slab_data; /* Holds atom positions and gaussian weights + of atoms belonging to a slab */ + + /* For potential fits with varying angle: */ + t_gmx_potfit *PotAngleFit; /* Used for fit type 'potential' */ +} t_gmx_enfrotgrp; + + +/* Activate output of forces for correctness checks */ +/* #define PRINT_FORCES */ +#ifdef PRINT_FORCES +#define PRINT_FORCE_J fprintf(stderr, "f%d = %15.8f %15.8f %15.8f\n", erg->xc_ref_ind[j], erg->f_rot_loc[j][XX], erg->f_rot_loc[j][YY], erg->f_rot_loc[j][ZZ]); +#define PRINT_POT_TAU if (MASTER(cr)) { \ + fprintf(stderr, "potential = %15.8f\n" "torque = %15.8f\n", erg->V, erg->torque_v); \ +} +#else +#define PRINT_FORCE_J +#define PRINT_POT_TAU +#endif + +/* Shortcuts for often used queries */ +#define ISFLEX(rg) ( (rg->eType == erotgFLEX) || (rg->eType == erotgFLEXT) || (rg->eType == erotgFLEX2) || (rg->eType == erotgFLEX2T) ) +#define ISCOLL(rg) ( (rg->eType == erotgFLEX) || (rg->eType == erotgFLEXT) || (rg->eType == erotgFLEX2) || (rg->eType == erotgFLEX2T) || (rg->eType == erotgRMPF) || (rg->eType == erotgRM2PF) ) + + +/* Does any of the rotation groups use slab decomposition? */ +static gmx_bool HaveFlexibleGroups(t_rot *rot) +{ + int g; + t_rotgrp *rotg; + + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (ISFLEX(rotg)) + { + return TRUE; + } + } + + return FALSE; +} + + +/* Is for any group the fit angle determined by finding the minimum of the + * rotation potential? */ +static gmx_bool HavePotFitGroups(t_rot *rot) +{ + int g; + t_rotgrp *rotg; + + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (erotgFitPOT == rotg->eFittype) + { + return TRUE; + } + } + + return FALSE; +} + + +static double** allocate_square_matrix(int dim) +{ + int i; + double** mat = NULL; + + + snew(mat, dim); + for (i = 0; i < dim; i++) + { + snew(mat[i], dim); + } + + return mat; +} + + +static void free_square_matrix(double** mat, int dim) +{ + int i; + + + for (i = 0; i < dim; i++) + { + sfree(mat[i]); + } + sfree(mat); +} + + +/* Return the angle for which the potential is minimal */ +static real get_fitangle(t_rotgrp *rotg, gmx_enfrotgrp_t erg) +{ + int i; + real fitangle = -999.9; + real pot_min = GMX_FLOAT_MAX; + t_gmx_potfit *fit; + + + fit = erg->PotAngleFit; + + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + if (fit->V[i] < pot_min) + { + pot_min = fit->V[i]; + fitangle = fit->degangle[i]; + } + } + + return fitangle; +} + + +/* Reduce potential angle fit data for this group at this time step? */ +static gmx_inline gmx_bool bPotAngle(t_rot *rot, t_rotgrp *rotg, gmx_large_int_t step) +{ + return ( (erotgFitPOT == rotg->eFittype) && (do_per_step(step, rot->nstsout) || do_per_step(step, rot->nstrout)) ); +} + +/* Reduce slab torqe data for this group at this time step? */ +static gmx_inline gmx_bool bSlabTau(t_rot *rot, t_rotgrp *rotg, gmx_large_int_t step) +{ + return ( (ISFLEX(rotg)) && do_per_step(step, rot->nstsout) ); +} + +/* Output rotation energy, torques, etc. for each rotation group */ +static void reduce_output(t_commrec *cr, t_rot *rot, real t, gmx_large_int_t step) +{ + int g, i, islab, nslabs = 0; + int count; /* MPI element counter */ + t_rotgrp *rotg; + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real fitangle; + gmx_bool bFlex; + + + er = rot->enfrot; + + /* Fill the MPI buffer with stuff to reduce. If items are added for reduction + * here, the MPI buffer size has to be enlarged also in calc_mpi_bufsize() */ + if (PAR(cr)) + { + count = 0; + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + nslabs = erg->slab_last - erg->slab_first + 1; + er->mpi_inbuf[count++] = erg->V; + er->mpi_inbuf[count++] = erg->torque_v; + er->mpi_inbuf[count++] = erg->angle_v; + er->mpi_inbuf[count++] = erg->weight_v; /* weights are not needed for flex types, but this is just a single value */ + + if (bPotAngle(rot, rotg, step)) + { + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + er->mpi_inbuf[count++] = erg->PotAngleFit->V[i]; + } + } + if (bSlabTau(rot, rotg, step)) + { + for (i = 0; i < nslabs; i++) + { + er->mpi_inbuf[count++] = erg->slab_torque_v[i]; + } + } + } + if (count > er->mpi_bufsize) + { + gmx_fatal(FARGS, "%s MPI buffer overflow, please report this error.", RotStr); + } + +#ifdef GMX_MPI + MPI_Reduce(er->mpi_inbuf, er->mpi_outbuf, count, GMX_MPI_REAL, MPI_SUM, MASTERRANK(cr), cr->mpi_comm_mygroup); +#endif + + /* Copy back the reduced data from the buffer on the master */ + if (MASTER(cr)) + { + count = 0; + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + nslabs = erg->slab_last - erg->slab_first + 1; + erg->V = er->mpi_outbuf[count++]; + erg->torque_v = er->mpi_outbuf[count++]; + erg->angle_v = er->mpi_outbuf[count++]; + erg->weight_v = er->mpi_outbuf[count++]; + + if (bPotAngle(rot, rotg, step)) + { + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + erg->PotAngleFit->V[i] = er->mpi_outbuf[count++]; + } + } + if (bSlabTau(rot, rotg, step)) + { + for (i = 0; i < nslabs; i++) + { + erg->slab_torque_v[i] = er->mpi_outbuf[count++]; + } + } + } + } + } + + /* Output */ + if (MASTER(cr)) + { + /* Angle and torque for each rotation group */ + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + bFlex = ISFLEX(rotg); + + erg = rotg->enfrotgrp; + + /* Output to main rotation output file: */ + if (do_per_step(step, rot->nstrout) ) + { + if (erotgFitPOT == rotg->eFittype) + { + fitangle = get_fitangle(rotg, erg); + } + else + { + if (bFlex) + { + fitangle = erg->angle_v; /* RMSD fit angle */ + } + else + { + fitangle = (erg->angle_v/erg->weight_v)*180.0*M_1_PI; + } + } + fprintf(er->out_rot, "%12.4f", fitangle); + fprintf(er->out_rot, "%12.3e", erg->torque_v); + fprintf(er->out_rot, "%12.3e", erg->V); + } + + if (do_per_step(step, rot->nstsout) ) + { + /* Output to torque log file: */ + if (bFlex) + { + fprintf(er->out_torque, "%12.3e%6d", t, g); + for (i = erg->slab_first; i <= erg->slab_last; i++) + { + islab = i - erg->slab_first; /* slab index */ + /* Only output if enough weight is in slab */ + if (erg->slab_weights[islab] > rotg->min_gaussian) + { + fprintf(er->out_torque, "%6d%12.3e", i, erg->slab_torque_v[islab]); + } + } + fprintf(er->out_torque, "\n"); + } + + /* Output to angles log file: */ + if (erotgFitPOT == rotg->eFittype) + { + fprintf(er->out_angles, "%12.3e%6d%12.4f", t, g, erg->degangle); + /* Output energies at a set of angles around the reference angle */ + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + fprintf(er->out_angles, "%12.3e", erg->PotAngleFit->V[i]); + } + fprintf(er->out_angles, "\n"); + } + } + } + if (do_per_step(step, rot->nstrout) ) + { + fprintf(er->out_rot, "\n"); + } + } +} + + +/* Add the forces from enforced rotation potential to the local forces. + * Should be called after the SR forces have been evaluated */ +extern real add_rot_forces(t_rot *rot, rvec f[], t_commrec *cr, gmx_large_int_t step, real t) +{ + int g, l, ii; + t_rotgrp *rotg; + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real Vrot = 0.0; /* If more than one rotation group is present, Vrot + assembles the local parts from all groups */ + + + er = rot->enfrot; + + /* Loop over enforced rotation groups (usually 1, though) + * Apply the forces from rotation potentials */ + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + Vrot += erg->V; /* add the local parts from the nodes */ + for (l = 0; l < erg->nat_loc; l++) + { + /* Get the right index of the local force */ + ii = erg->ind_loc[l]; + /* Add */ + rvec_inc(f[ii], erg->f_rot_loc[l]); + } + } + + /* Reduce energy,torque, angles etc. to get the sum values (per rotation group) + * on the master and output these values to file. */ + if ( (do_per_step(step, rot->nstrout) || do_per_step(step, rot->nstsout)) && er->bOut) + { + reduce_output(cr, rot, t, step); + } + + /* When appending, er->bOut is FALSE the first time to avoid duplicate entries */ + er->bOut = TRUE; + + PRINT_POT_TAU + + return Vrot; +} + + +/* The Gaussian norm is chosen such that the sum of the gaussian functions + * over the slabs is approximately 1.0 everywhere */ +#define GAUSS_NORM 0.569917543430618 + + +/* Calculate the maximum beta that leads to a gaussian larger min_gaussian, + * also does some checks + */ +static double calc_beta_max(real min_gaussian, real slab_dist) +{ + double sigma; + double arg; + + + /* Actually the next two checks are already made in grompp */ + if (slab_dist <= 0) + { + gmx_fatal(FARGS, "Slab distance of flexible rotation groups must be >=0 !"); + } + if (min_gaussian <= 0) + { + gmx_fatal(FARGS, "Cutoff value for Gaussian must be > 0. (You requested %f)"); + } + + /* Define the sigma value */ + sigma = 0.7*slab_dist; + + /* Calculate the argument for the logarithm and check that the log() result is negative or 0 */ + arg = min_gaussian/GAUSS_NORM; + if (arg > 1.0) + { + gmx_fatal(FARGS, "min_gaussian of flexible rotation groups must be <%g", GAUSS_NORM); + } + + return sqrt(-2.0*sigma*sigma*log(min_gaussian/GAUSS_NORM)); +} + + +static gmx_inline real calc_beta(rvec curr_x, t_rotgrp *rotg, int n) +{ + return iprod(curr_x, rotg->vec) - rotg->slab_dist * n; +} + + +static gmx_inline real gaussian_weight(rvec curr_x, t_rotgrp *rotg, int n) +{ + const real norm = GAUSS_NORM; + real sigma; + + + /* Define the sigma value */ + sigma = 0.7*rotg->slab_dist; + /* Calculate the Gaussian value of slab n for position curr_x */ + return norm * exp( -0.5 * sqr( calc_beta(curr_x, rotg, n)/sigma ) ); +} + + +/* Returns the weight in a single slab, also calculates the Gaussian- and mass- + * weighted sum of positions for that slab */ +static real get_slab_weight(int j, t_rotgrp *rotg, rvec xc[], real mc[], rvec *x_weighted_sum) +{ + rvec curr_x; /* The position of an atom */ + rvec curr_x_weighted; /* The gaussian-weighted position */ + real gaussian; /* A single gaussian weight */ + real wgauss; /* gaussian times current mass */ + real slabweight = 0.0; /* The sum of weights in the slab */ + int i, islab; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + clear_rvec(*x_weighted_sum); + + /* Slab index */ + islab = j - erg->slab_first; + + /* Loop over all atoms in the rotation group */ + for (i = 0; i < rotg->nat; i++) + { + copy_rvec(xc[i], curr_x); + gaussian = gaussian_weight(curr_x, rotg, j); + wgauss = gaussian * mc[i]; + svmul(wgauss, curr_x, curr_x_weighted); + rvec_add(*x_weighted_sum, curr_x_weighted, *x_weighted_sum); + slabweight += wgauss; + } /* END of loop over rotation group atoms */ + + return slabweight; +} + + +static void get_slab_centers( + t_rotgrp *rotg, /* The rotation group information */ + rvec *xc, /* The rotation group positions; will + typically be enfrotgrp->xc, but at first call + it is enfrotgrp->xc_ref */ + real *mc, /* The masses of the rotation group atoms */ + int g, /* The number of the rotation group */ + real time, /* Used for output only */ + FILE *out_slabs, /* For outputting center per slab information */ + gmx_bool bOutStep, /* Is this an output step? */ + gmx_bool bReference) /* If this routine is called from + init_rot_group we need to store + the reference slab centers */ +{ + int j, islab; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Loop over slabs */ + for (j = erg->slab_first; j <= erg->slab_last; j++) + { + islab = j - erg->slab_first; + erg->slab_weights[islab] = get_slab_weight(j, rotg, xc, mc, &erg->slab_center[islab]); + + /* We can do the calculations ONLY if there is weight in the slab! */ + if (erg->slab_weights[islab] > WEIGHT_MIN) + { + svmul(1.0/erg->slab_weights[islab], erg->slab_center[islab], erg->slab_center[islab]); + } + else + { + /* We need to check this here, since we divide through slab_weights + * in the flexible low-level routines! */ + gmx_fatal(FARGS, "Not enough weight in slab %d. Slab center cannot be determined!", j); + } + + /* At first time step: save the centers of the reference structure */ + if (bReference) + { + copy_rvec(erg->slab_center[islab], erg->slab_center_ref[islab]); + } + } /* END of loop over slabs */ + + /* Output on the master */ + if ( (NULL != out_slabs) && bOutStep) + { + fprintf(out_slabs, "%12.3e%6d", time, g); + for (j = erg->slab_first; j <= erg->slab_last; j++) + { + islab = j - erg->slab_first; + fprintf(out_slabs, "%6d%12.3e%12.3e%12.3e", + j, erg->slab_center[islab][XX], erg->slab_center[islab][YY], erg->slab_center[islab][ZZ]); + } + fprintf(out_slabs, "\n"); + } +} + + +static void calc_rotmat( + rvec vec, + real degangle, /* Angle alpha of rotation at time t in degrees */ + matrix rotmat) /* Rotation matrix */ +{ + real radangle; /* Rotation angle in radians */ + real cosa; /* cosine alpha */ + real sina; /* sine alpha */ + real OMcosa; /* 1 - cos(alpha) */ + real dumxy, dumxz, dumyz; /* save computations */ + rvec rot_vec; /* Rotate around rot_vec ... */ + + + radangle = degangle * M_PI/180.0; + copy_rvec(vec, rot_vec ); + + /* Precompute some variables: */ + cosa = cos(radangle); + sina = sin(radangle); + OMcosa = 1.0 - cosa; + dumxy = rot_vec[XX]*rot_vec[YY]*OMcosa; + dumxz = rot_vec[XX]*rot_vec[ZZ]*OMcosa; + dumyz = rot_vec[YY]*rot_vec[ZZ]*OMcosa; + + /* Construct the rotation matrix for this rotation group: */ + /* 1st column: */ + rotmat[XX][XX] = cosa + rot_vec[XX]*rot_vec[XX]*OMcosa; + rotmat[YY][XX] = dumxy + rot_vec[ZZ]*sina; + rotmat[ZZ][XX] = dumxz - rot_vec[YY]*sina; + /* 2nd column: */ + rotmat[XX][YY] = dumxy - rot_vec[ZZ]*sina; + rotmat[YY][YY] = cosa + rot_vec[YY]*rot_vec[YY]*OMcosa; + rotmat[ZZ][YY] = dumyz + rot_vec[XX]*sina; + /* 3rd column: */ + rotmat[XX][ZZ] = dumxz + rot_vec[YY]*sina; + rotmat[YY][ZZ] = dumyz - rot_vec[XX]*sina; + rotmat[ZZ][ZZ] = cosa + rot_vec[ZZ]*rot_vec[ZZ]*OMcosa; + +#ifdef PRINTMATRIX + int iii, jjj; + + for (iii = 0; iii < 3; iii++) + { + for (jjj = 0; jjj < 3; jjj++) + { + fprintf(stderr, " %10.8f ", rotmat[iii][jjj]); + } + fprintf(stderr, "\n"); + } +#endif +} + + +/* Calculates torque on the rotation axis tau = position x force */ +static gmx_inline real torque( + rvec rotvec, /* rotation vector; MUST be normalized! */ + rvec force, /* force */ + rvec x, /* position of atom on which the force acts */ + rvec pivot) /* pivot point of rotation axis */ +{ + rvec vectmp, tau; + + + /* Subtract offset */ + rvec_sub(x, pivot, vectmp); + + /* position x force */ + cprod(vectmp, force, tau); + + /* Return the part of the torque which is parallel to the rotation vector */ + return iprod(tau, rotvec); +} + + +/* Right-aligned output of value with standard width */ +static void print_aligned(FILE *fp, char *str) +{ + fprintf(fp, "%12s", str); +} + + +/* Right-aligned output of value with standard short width */ +static void print_aligned_short(FILE *fp, char *str) +{ + fprintf(fp, "%6s", str); +} + + +static FILE *open_output_file(const char *fn, int steps, const char what[]) +{ + FILE *fp; + + + fp = ffopen(fn, "w"); + + fprintf(fp, "# Output of %s is written in intervals of %d time step%s.\n#\n", + what, steps, steps > 1 ? "s" : ""); + + return fp; +} + + +/* Open output file for slab center data. Call on master only */ - static FILE *open_slab_out(const char *fn, t_rot *rot, const output_env_t oenv) ++static FILE *open_slab_out(const char *fn, t_rot *rot) +{ + FILE *fp; + int g, i; + t_rotgrp *rotg; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + fp = open_output_file(fn, rot->nstsout, "gaussian weighted slab centers"); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (ISFLEX(rotg)) + { + fprintf(fp, "# Rotation group %d (%s), slab distance %f nm, %s.\n", + g, erotg_names[rotg->eType], rotg->slab_dist, + rotg->bMassW ? "centers of mass" : "geometrical centers"); + } + } + + fprintf(fp, "# Reference centers are listed first (t=-1).\n"); + fprintf(fp, "# The following columns have the syntax:\n"); + fprintf(fp, "# "); + print_aligned_short(fp, "t"); + print_aligned_short(fp, "grp"); + /* Print legend for the first two entries only ... */ + for (i = 0; i < 2; i++) + { + print_aligned_short(fp, "slab"); + print_aligned(fp, "X center"); + print_aligned(fp, "Y center"); + print_aligned(fp, "Z center"); + } + fprintf(fp, " ...\n"); + fflush(fp); + } + + return fp; +} + + +/* Adds 'buf' to 'str' */ +static void add_to_string(char **str, char *buf) +{ + int len; + + + len = strlen(*str) + strlen(buf) + 1; + srenew(*str, len); + strcat(*str, buf); +} + + +static void add_to_string_aligned(char **str, char *buf) +{ + char buf_aligned[STRLEN]; + + sprintf(buf_aligned, "%12s", buf); + add_to_string(str, buf_aligned); +} + + +/* Open output file and print some general information about the rotation groups. + * Call on master only */ +static FILE *open_rot_out(const char *fn, t_rot *rot, const output_env_t oenv) +{ + FILE *fp; + int g, nsets; + t_rotgrp *rotg; + const char **setname; + char buf[50], buf2[75]; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + gmx_bool bFlex; + char *LegendStr = NULL; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + fp = xvgropen(fn, "Rotation angles and energy", "Time (ps)", "angles (degrees) and energies (kJ/mol)", oenv); + fprintf(fp, "# Output of enforced rotation data is written in intervals of %d time step%s.\n#\n", rot->nstrout, rot->nstrout > 1 ? "s" : ""); + fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector v.\n"); + fprintf(fp, "# To obtain the vectorial torque, multiply tau with the group's rot_vec.\n"); + fprintf(fp, "# For flexible groups, tau(t,n) from all slabs n have been summed in a single value tau(t) here.\n"); + fprintf(fp, "# The torques tau(t,n) are found in the rottorque.log (-rt) output file\n"); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + bFlex = ISFLEX(rotg); + + fprintf(fp, "#\n"); + fprintf(fp, "# ROTATION GROUP %d, potential type '%s':\n", g, erotg_names[rotg->eType]); + fprintf(fp, "# rot_massw%d %s\n", g, yesno_names[rotg->bMassW]); + fprintf(fp, "# rot_vec%d %12.5e %12.5e %12.5e\n", g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]); + fprintf(fp, "# rot_rate%d %12.5e degrees/ps\n", g, rotg->rate); + fprintf(fp, "# rot_k%d %12.5e kJ/(mol*nm^2)\n", g, rotg->k); + if (rotg->eType == erotgISO || rotg->eType == erotgPM || rotg->eType == erotgRM || rotg->eType == erotgRM2) + { + fprintf(fp, "# rot_pivot%d %12.5e %12.5e %12.5e nm\n", g, rotg->pivot[XX], rotg->pivot[YY], rotg->pivot[ZZ]); + } + + if (bFlex) + { + fprintf(fp, "# rot_slab_distance%d %f nm\n", g, rotg->slab_dist); + fprintf(fp, "# rot_min_gaussian%d %12.5e\n", g, rotg->min_gaussian); + } + + /* Output the centers of the rotation groups for the pivot-free potentials */ + if ((rotg->eType == erotgISOPF) || (rotg->eType == erotgPMPF) || (rotg->eType == erotgRMPF) || (rotg->eType == erotgRM2PF + || (rotg->eType == erotgFLEXT) || (rotg->eType == erotgFLEX2T)) ) + { + fprintf(fp, "# ref. grp. %d center %12.5e %12.5e %12.5e\n", g, + erg->xc_ref_center[XX], erg->xc_ref_center[YY], erg->xc_ref_center[ZZ]); + + fprintf(fp, "# grp. %d init.center %12.5e %12.5e %12.5e\n", g, + erg->xc_center[XX], erg->xc_center[YY], erg->xc_center[ZZ]); + } + + if ( (rotg->eType == erotgRM2) || (rotg->eType == erotgFLEX2) || (rotg->eType == erotgFLEX2T) ) + { + fprintf(fp, "# rot_eps%d %12.5e nm^2\n", g, rotg->eps); + } + if (erotgFitPOT == rotg->eFittype) + { + fprintf(fp, "#\n"); + fprintf(fp, "# theta_fit%d is determined by first evaluating the potential for %d angles around theta_ref%d.\n", + g, rotg->PotAngle_nstep, g); + fprintf(fp, "# The fit angle is the one with the smallest potential. It is given as the deviation\n"); + fprintf(fp, "# from the reference angle, i.e. if theta_ref=X and theta_fit=Y, then the angle with\n"); + fprintf(fp, "# minimal value of the potential is X+Y. Angular resolution is %g degrees.\n", rotg->PotAngle_step); + } + } + + /* Print a nice legend */ + snew(LegendStr, 1); + LegendStr[0] = '\0'; + sprintf(buf, "# %6s", "time"); + add_to_string_aligned(&LegendStr, buf); + + nsets = 0; + snew(setname, 4*rot->ngrp); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + sprintf(buf, "theta_ref%d", g); + add_to_string_aligned(&LegendStr, buf); + + sprintf(buf2, "%s (degrees)", buf); + setname[nsets] = strdup(buf2); + nsets++; + } + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + bFlex = ISFLEX(rotg); + + /* For flexible axis rotation we use RMSD fitting to determine the + * actual angle of the rotation group */ + if (bFlex || erotgFitPOT == rotg->eFittype) + { + sprintf(buf, "theta_fit%d", g); + } + else + { + sprintf(buf, "theta_av%d", g); + } + add_to_string_aligned(&LegendStr, buf); + sprintf(buf2, "%s (degrees)", buf); + setname[nsets] = strdup(buf2); + nsets++; + + sprintf(buf, "tau%d", g); + add_to_string_aligned(&LegendStr, buf); + sprintf(buf2, "%s (kJ/mol)", buf); + setname[nsets] = strdup(buf2); + nsets++; + + sprintf(buf, "energy%d", g); + add_to_string_aligned(&LegendStr, buf); + sprintf(buf2, "%s (kJ/mol)", buf); + setname[nsets] = strdup(buf2); + nsets++; + } + fprintf(fp, "#\n"); + + if (nsets > 1) + { + xvgr_legend(fp, nsets, setname, oenv); + } + sfree(setname); + + fprintf(fp, "#\n# Legend for the following data columns:\n"); + fprintf(fp, "%s\n", LegendStr); + sfree(LegendStr); + + fflush(fp); + } + + return fp; +} + + +/* Call on master only */ - static FILE *open_angles_out(const char *fn, t_rot *rot, const output_env_t oenv) ++static FILE *open_angles_out(const char *fn, t_rot *rot) +{ + int g, i; + FILE *fp; + t_rotgrp *rotg; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + char buf[100]; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + /* Open output file and write some information about it's structure: */ + fp = open_output_file(fn, rot->nstsout, "rotation group angles"); + fprintf(fp, "# All angles given in degrees, time in ps.\n"); + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + /* Output for this group happens only if potential type is flexible or + * if fit type is potential! */ + if (ISFLEX(rotg) || (erotgFitPOT == rotg->eFittype) ) + { + if (ISFLEX(rotg)) + { + sprintf(buf, " slab distance %f nm, ", rotg->slab_dist); + } + else + { + buf[0] = '\0'; + } + + fprintf(fp, "#\n# ROTATION GROUP %d '%s',%s fit type '%s'.\n", + g, erotg_names[rotg->eType], buf, erotg_fitnames[rotg->eFittype]); + + /* Special type of fitting using the potential minimum. This is + * done for the whole group only, not for the individual slabs. */ + if (erotgFitPOT == rotg->eFittype) + { + fprintf(fp, "# To obtain theta_fit%d, the potential is evaluated for %d angles around theta_ref%d\n", g, rotg->PotAngle_nstep, g); + fprintf(fp, "# The fit angle in the rotation standard outfile is the one with minimal energy E(theta_fit) [kJ/mol].\n"); + fprintf(fp, "#\n"); + } + + fprintf(fp, "# Legend for the group %d data columns:\n", g); + fprintf(fp, "# "); + print_aligned_short(fp, "time"); + print_aligned_short(fp, "grp"); + print_aligned(fp, "theta_ref"); + + if (erotgFitPOT == rotg->eFittype) + { + /* Output the set of angles around the reference angle */ + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + sprintf(buf, "E(%g)", erg->PotAngleFit->degangle[i]); + print_aligned(fp, buf); + } + } + else + { + /* Output fit angle for each slab */ + print_aligned_short(fp, "slab"); + print_aligned_short(fp, "atoms"); + print_aligned(fp, "theta_fit"); + print_aligned_short(fp, "slab"); + print_aligned_short(fp, "atoms"); + print_aligned(fp, "theta_fit"); + fprintf(fp, " ..."); + } + fprintf(fp, "\n"); + } + } + fflush(fp); + } + + return fp; +} + + +/* Open torque output file and write some information about it's structure. + * Call on master only */ - static FILE *open_torque_out(const char *fn, t_rot *rot, const output_env_t oenv) ++static FILE *open_torque_out(const char *fn, t_rot *rot) +{ + FILE *fp; + int g; + t_rotgrp *rotg; + + + if (rot->enfrot->Flags & MD_APPENDFILES) + { + fp = gmx_fio_fopen(fn, "a"); + } + else + { + fp = open_output_file(fn, rot->nstsout, "torques"); + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + if (ISFLEX(rotg)) + { + fprintf(fp, "# Rotation group %d (%s), slab distance %f nm.\n", g, erotg_names[rotg->eType], rotg->slab_dist); + fprintf(fp, "# The scalar tau is the torque (kJ/mol) in the direction of the rotation vector.\n"); + fprintf(fp, "# To obtain the vectorial torque, multiply tau with\n"); + fprintf(fp, "# rot_vec%d %10.3e %10.3e %10.3e\n", g, rotg->vec[XX], rotg->vec[YY], rotg->vec[ZZ]); + fprintf(fp, "#\n"); + } + } + fprintf(fp, "# Legend for the following data columns: (tau=torque for that slab):\n"); + fprintf(fp, "# "); + print_aligned_short(fp, "t"); + print_aligned_short(fp, "grp"); + print_aligned_short(fp, "slab"); + print_aligned(fp, "tau"); + print_aligned_short(fp, "slab"); + print_aligned(fp, "tau"); + fprintf(fp, " ...\n"); + fflush(fp); + } + + return fp; +} + + +static void swap_val(double* vec, int i, int j) +{ + double tmp = vec[j]; + + + vec[j] = vec[i]; + vec[i] = tmp; +} + + +static void swap_col(double **mat, int i, int j) +{ + double tmp[3] = {mat[0][j], mat[1][j], mat[2][j]}; + + + mat[0][j] = mat[0][i]; + mat[1][j] = mat[1][i]; + mat[2][j] = mat[2][i]; + + mat[0][i] = tmp[0]; + mat[1][i] = tmp[1]; + mat[2][i] = tmp[2]; +} + + +/* Eigenvectors are stored in columns of eigen_vec */ +static void diagonalize_symmetric( + double **matrix, + double **eigen_vec, + double eigenval[3]) +{ + int n_rot; + + + jacobi(matrix, 3, eigenval, eigen_vec, &n_rot); + + /* sort in ascending order */ + if (eigenval[0] > eigenval[1]) + { + swap_val(eigenval, 0, 1); + swap_col(eigen_vec, 0, 1); + } + if (eigenval[1] > eigenval[2]) + { + swap_val(eigenval, 1, 2); + swap_col(eigen_vec, 1, 2); + } + if (eigenval[0] > eigenval[1]) + { + swap_val(eigenval, 0, 1); + swap_col(eigen_vec, 0, 1); + } +} + + +static void align_with_z( + rvec* s, /* Structure to align */ + int natoms, + rvec axis) +{ + int i, j, k; + rvec zet = {0.0, 0.0, 1.0}; + rvec rot_axis = {0.0, 0.0, 0.0}; + rvec *rotated_str = NULL; + real ooanorm; + real angle; + matrix rotmat; + + + snew(rotated_str, natoms); + + /* Normalize the axis */ + ooanorm = 1.0/norm(axis); + svmul(ooanorm, axis, axis); + + /* Calculate the angle for the fitting procedure */ + cprod(axis, zet, rot_axis); + angle = acos(axis[2]); + if (angle < 0.0) + { + angle += M_PI; + } + + /* Calculate the rotation matrix */ + calc_rotmat(rot_axis, angle*180.0/M_PI, rotmat); + + /* Apply the rotation matrix to s */ + for (i = 0; i < natoms; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + rotated_str[i][j] += rotmat[j][k]*s[i][k]; + } + } + } + + /* Rewrite the rotated structure to s */ + for (i = 0; i < natoms; i++) + { + for (j = 0; j < 3; j++) + { + s[i][j] = rotated_str[i][j]; + } + } + + sfree(rotated_str); +} + + +static void calc_correl_matrix(rvec* Xstr, rvec* Ystr, double** Rmat, int natoms) +{ + int i, j, k; + + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + Rmat[i][j] = 0.0; + } + } + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < natoms; k++) + { + Rmat[i][j] += Ystr[k][i] * Xstr[k][j]; + } + } + } +} + + +static void weigh_coords(rvec* str, real* weight, int natoms) +{ + int i, j; + + + for (i = 0; i < natoms; i++) + { + for (j = 0; j < 3; j++) + { + str[i][j] *= sqrt(weight[i]); + } + } +} + + +static real opt_angle_analytic( + rvec* ref_s, + rvec* act_s, + real* weight, + int natoms, + rvec ref_com, + rvec act_com, + rvec axis) +{ + int i, j, k; + rvec *ref_s_1 = NULL; + rvec *act_s_1 = NULL; + rvec shift; + double **Rmat, **RtR, **eigvec; + double eigval[3]; + double V[3][3], WS[3][3]; + double rot_matrix[3][3]; + double opt_angle; + + + /* Do not change the original coordinates */ + snew(ref_s_1, natoms); + snew(act_s_1, natoms); + for (i = 0; i < natoms; i++) + { + copy_rvec(ref_s[i], ref_s_1[i]); + copy_rvec(act_s[i], act_s_1[i]); + } + + /* Translate the structures to the origin */ + shift[XX] = -ref_com[XX]; + shift[YY] = -ref_com[YY]; + shift[ZZ] = -ref_com[ZZ]; + translate_x(ref_s_1, natoms, shift); + + shift[XX] = -act_com[XX]; + shift[YY] = -act_com[YY]; + shift[ZZ] = -act_com[ZZ]; + translate_x(act_s_1, natoms, shift); + + /* Align rotation axis with z */ + align_with_z(ref_s_1, natoms, axis); + align_with_z(act_s_1, natoms, axis); + + /* Correlation matrix */ + Rmat = allocate_square_matrix(3); + + for (i = 0; i < natoms; i++) + { + ref_s_1[i][2] = 0.0; + act_s_1[i][2] = 0.0; + } + + /* Weight positions with sqrt(weight) */ + if (NULL != weight) + { + weigh_coords(ref_s_1, weight, natoms); + weigh_coords(act_s_1, weight, natoms); + } + + /* Calculate correlation matrices R=YXt (X=ref_s; Y=act_s) */ + calc_correl_matrix(ref_s_1, act_s_1, Rmat, natoms); + + /* Calculate RtR */ + RtR = allocate_square_matrix(3); + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + RtR[i][j] += Rmat[k][i] * Rmat[k][j]; + } + } + } + /* Diagonalize RtR */ + snew(eigvec, 3); + for (i = 0; i < 3; i++) + { + snew(eigvec[i], 3); + } + + diagonalize_symmetric(RtR, eigvec, eigval); + swap_col(eigvec, 0, 1); + swap_col(eigvec, 1, 2); + swap_val(eigval, 0, 1); + swap_val(eigval, 1, 2); + + /* Calculate V */ + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + V[i][j] = 0.0; + WS[i][j] = 0.0; + } + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + WS[i][j] = eigvec[i][j] / sqrt(eigval[j]); + } + } + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + V[i][j] += Rmat[i][k]*WS[k][j]; + } + } + } + free_square_matrix(Rmat, 3); + + /* Calculate optimal rotation matrix */ + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + rot_matrix[i][j] = 0.0; + } + } + + for (i = 0; i < 3; i++) + { + for (j = 0; j < 3; j++) + { + for (k = 0; k < 3; k++) + { + rot_matrix[i][j] += eigvec[i][k]*V[j][k]; + } + } + } + rot_matrix[2][2] = 1.0; + + /* In some cases abs(rot_matrix[0][0]) can be slighly larger + * than unity due to numerical inacurracies. To be able to calculate + * the acos function, we put these values back in range. */ + if (rot_matrix[0][0] > 1.0) + { + rot_matrix[0][0] = 1.0; + } + else if (rot_matrix[0][0] < -1.0) + { + rot_matrix[0][0] = -1.0; + } + + /* Determine the optimal rotation angle: */ + opt_angle = (-1.0)*acos(rot_matrix[0][0])*180.0/M_PI; + if (rot_matrix[0][1] < 0.0) + { + opt_angle = (-1.0)*opt_angle; + } + + /* Give back some memory */ + free_square_matrix(RtR, 3); + sfree(ref_s_1); + sfree(act_s_1); + for (i = 0; i < 3; i++) + { + sfree(eigvec[i]); + } + sfree(eigvec); + + return (real) opt_angle; +} + + +/* Determine angle of the group by RMSD fit to the reference */ +/* Not parallelized, call this routine only on the master */ +static real flex_fit_angle(t_rotgrp *rotg) +{ + int i; + rvec *fitcoords = NULL; + rvec center; /* Center of positions passed to the fit routine */ + real fitangle; /* Angle of the rotation group derived by fitting */ + rvec coord; + real scal; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Get the center of the rotation group. + * Note, again, erg->xc has been sorted in do_flexible */ + get_center(erg->xc, erg->mc_sorted, rotg->nat, center); + + /* === Determine the optimal fit angle for the rotation group === */ + if (rotg->eFittype == erotgFitNORM) + { + /* Normalize every position to it's reference length */ + for (i = 0; i < rotg->nat; i++) + { + /* Put the center of the positions into the origin */ + rvec_sub(erg->xc[i], center, coord); + /* Determine the scaling factor for the length: */ + scal = erg->xc_ref_length[erg->xc_sortind[i]] / norm(coord); + /* Get position, multiply with the scaling factor and save */ + svmul(scal, coord, erg->xc_norm[i]); + } + fitcoords = erg->xc_norm; + } + else + { + fitcoords = erg->xc; + } + /* From the point of view of the current positions, the reference has rotated + * backwards. Since we output the angle relative to the fixed reference, + * we need the minus sign. */ + fitangle = -opt_angle_analytic(erg->xc_ref_sorted, fitcoords, erg->mc_sorted, + rotg->nat, erg->xc_ref_center, center, rotg->vec); + + return fitangle; +} + + +/* Determine actual angle of each slab by RMSD fit to the reference */ +/* Not parallelized, call this routine only on the master */ +static void flex_fit_angle_perslab( + int g, + t_rotgrp *rotg, + double t, + real degangle, + FILE *fp) +{ + int i, l, n, islab, ind; + rvec curr_x, ref_x; + rvec act_center; /* Center of actual positions that are passed to the fit routine */ + rvec ref_center; /* Same for the reference positions */ + real fitangle; /* Angle of a slab derived from an RMSD fit to + * the reference structure at t=0 */ + t_gmx_slabdata *sd; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real OOm_av; /* 1/average_mass of a rotation group atom */ + real m_rel; /* Relative mass of a rotation group atom */ + + + erg = rotg->enfrotgrp; + + /* Average mass of a rotation group atom: */ + OOm_av = erg->invmass*rotg->nat; + + /**********************************/ + /* First collect the data we need */ + /**********************************/ + + /* Collect the data for the individual slabs */ + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + sd = &(rotg->enfrotgrp->slab_data[islab]); + sd->nat = erg->lastatom[islab]-erg->firstatom[islab]+1; + ind = 0; + + /* Loop over the relevant atoms in the slab */ + for (l = erg->firstatom[islab]; l <= erg->lastatom[islab]; l++) + { + /* Current position of this atom: x[ii][XX/YY/ZZ] */ + copy_rvec(erg->xc[l], curr_x); + + /* The (unrotated) reference position of this atom is copied to ref_x. + * Beware, the xc coords have been sorted in do_flexible */ + copy_rvec(erg->xc_ref_sorted[l], ref_x); + + /* Save data for doing angular RMSD fit later */ + /* Save the current atom position */ + copy_rvec(curr_x, sd->x[ind]); + /* Save the corresponding reference position */ + copy_rvec(ref_x, sd->ref[ind]); + + /* Maybe also mass-weighting was requested. If yes, additionally + * multiply the weights with the relative mass of the atom. If not, + * multiply with unity. */ + m_rel = erg->mc_sorted[l]*OOm_av; + + /* Save the weight for this atom in this slab */ + sd->weight[ind] = gaussian_weight(curr_x, rotg, n) * m_rel; + + /* Next atom in this slab */ + ind++; + } + } + + /******************************/ + /* Now do the fit calculation */ + /******************************/ + + fprintf(fp, "%12.3e%6d%12.3f", t, g, degangle); + + /* === Now do RMSD fitting for each slab === */ + /* We require at least SLAB_MIN_ATOMS in a slab, such that the fit makes sense. */ +#define SLAB_MIN_ATOMS 4 + + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + sd = &(rotg->enfrotgrp->slab_data[islab]); + if (sd->nat >= SLAB_MIN_ATOMS) + { + /* Get the center of the slabs reference and current positions */ + get_center(sd->ref, sd->weight, sd->nat, ref_center); + get_center(sd->x, sd->weight, sd->nat, act_center); + if (rotg->eFittype == erotgFitNORM) + { + /* Normalize every position to it's reference length + * prior to performing the fit */ + for (i = 0; i < sd->nat; i++) /* Center */ + { + rvec_dec(sd->ref[i], ref_center); + rvec_dec(sd->x[i], act_center); + /* Normalize x_i such that it gets the same length as ref_i */ + svmul( norm(sd->ref[i])/norm(sd->x[i]), sd->x[i], sd->x[i] ); + } + /* We already subtracted the centers */ + clear_rvec(ref_center); + clear_rvec(act_center); + } + fitangle = -opt_angle_analytic(sd->ref, sd->x, sd->weight, sd->nat, + ref_center, act_center, rotg->vec); + fprintf(fp, "%6d%6d%12.3f", n, sd->nat, fitangle); + } + } + fprintf(fp, "\n"); + +#undef SLAB_MIN_ATOMS +} + + +/* Shift x with is */ +static gmx_inline void shift_single_coord(matrix box, rvec x, const ivec is) +{ + int tx, ty, tz; + + + tx = is[XX]; + ty = is[YY]; + tz = is[ZZ]; + + if (TRICLINIC(box)) + { + x[XX] += tx*box[XX][XX]+ty*box[YY][XX]+tz*box[ZZ][XX]; + x[YY] += ty*box[YY][YY]+tz*box[ZZ][YY]; + x[ZZ] += tz*box[ZZ][ZZ]; + } + else + { + x[XX] += tx*box[XX][XX]; + x[YY] += ty*box[YY][YY]; + x[ZZ] += tz*box[ZZ][ZZ]; + } +} + + +/* Determine the 'home' slab of this atom which is the + * slab with the highest Gaussian weight of all */ +#define round(a) (int)(a+0.5) +static gmx_inline int get_homeslab( + rvec curr_x, /* The position for which the home slab shall be determined */ + rvec rotvec, /* The rotation vector */ + real slabdist) /* The slab distance */ +{ + real dist; + + + /* The distance of the atom to the coordinate center (where the + * slab with index 0) is */ + dist = iprod(rotvec, curr_x); + + return round(dist / slabdist); +} + + +/* For a local atom determine the relevant slabs, i.e. slabs in + * which the gaussian is larger than min_gaussian + */ +static int get_single_atom_gaussians( + rvec curr_x, + t_rotgrp *rotg) +{ + int slab, homeslab; + real g; + int count = 0; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Determine the 'home' slab of this atom: */ + homeslab = get_homeslab(curr_x, rotg->vec, rotg->slab_dist); + + /* First determine the weight in the atoms home slab: */ + g = gaussian_weight(curr_x, rotg, homeslab); + + erg->gn_atom[count] = g; + erg->gn_slabind[count] = homeslab; + count++; + + + /* Determine the max slab */ + slab = homeslab; + while (g > rotg->min_gaussian) + { + slab++; + g = gaussian_weight(curr_x, rotg, slab); + erg->gn_slabind[count] = slab; + erg->gn_atom[count] = g; + count++; + } + count--; + - /* Determine the max slab */ ++ /* Determine the min slab */ + slab = homeslab; + do + { + slab--; + g = gaussian_weight(curr_x, rotg, slab); + erg->gn_slabind[count] = slab; + erg->gn_atom[count] = g; + count++; + } + while (g > rotg->min_gaussian); + count--; + + return count; +} + + +static void flex2_precalc_inner_sum(t_rotgrp *rotg) +{ + int i, n, islab; + rvec xi; /* positions in the i-sum */ + rvec xcn, ycn; /* the current and the reference slab centers */ + real gaussian_xi; + rvec yi0; + rvec rin; /* Helper variables */ + real fac, fac2; + rvec innersumvec; + real OOpsii, OOpsiistar; + real sin_rin; /* s_ii.r_ii */ + rvec s_in, tmpvec, tmpvec2; + real mi, wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + N_M = rotg->nat * erg->invmass; + + /* Loop over all slabs that contain something */ + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + /*** D. Calculate the whole inner sum used for second and third sum */ + /* For slab n, we need to loop over all atoms i again. Since we sorted + * the atoms with respect to the rotation vector, we know that it is sufficient + * to calculate from firstatom to lastatom only. All other contributions will + * be very small. */ + clear_rvec(innersumvec); + for (i = erg->firstatom[islab]; i <= erg->lastatom[islab]; i++) + { + /* Coordinate xi of this atom */ + copy_rvec(erg->xc[i], xi); + + /* The i-weights */ + gaussian_xi = gaussian_weight(xi, rotg, n); + mi = erg->mc_sorted[i]; /* need the sorted mass here */ + wi = N_M*mi; + + /* Calculate rin */ + copy_rvec(erg->xc_ref_sorted[i], yi0); /* Reference position yi0 */ + rvec_sub(yi0, ycn, tmpvec2); /* tmpvec2 = yi0 - ycn */ + mvmul(erg->rotmat, tmpvec2, rin); /* rin = Omega.(yi0 - ycn) */ + + /* Calculate psi_i* and sin */ + rvec_sub(xi, xcn, tmpvec2); /* tmpvec2 = xi - xcn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x (xi - xcn) */ + OOpsiistar = norm2(tmpvec)+rotg->eps; /* OOpsii* = 1/psii* = |v x (xi-xcn)|^2 + eps */ + OOpsii = norm(tmpvec); /* OOpsii = 1 / psii = |v x (xi - xcn)| */ + + /* * v x (xi - xcn) */ + unitv(tmpvec, s_in); /* sin = ---------------- */ + /* |v x (xi - xcn)| */ + + sin_rin = iprod(s_in, rin); /* sin_rin = sin . rin */ + + /* Now the whole sum */ + fac = OOpsii/OOpsiistar; + svmul(fac, rin, tmpvec); + fac2 = fac*fac*OOpsii; + svmul(fac2*sin_rin, s_in, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + + svmul(wi*gaussian_xi*sin_rin, tmpvec, tmpvec2); + + rvec_inc(innersumvec, tmpvec2); + } /* now we have the inner sum, used both for sum2 and sum3 */ + + /* Save it to be used in do_flex2_lowlevel */ + copy_rvec(innersumvec, erg->slab_innersumvec[islab]); + } /* END of loop over slabs */ +} + + +static void flex_precalc_inner_sum(t_rotgrp *rotg) +{ + int i, n, islab; + rvec xi; /* position */ + rvec xcn, ycn; /* the current and the reference slab centers */ + rvec qin, rin; /* q_i^n and r_i^n */ + real bin; + rvec tmpvec; + rvec innersumvec; /* Inner part of sum_n2 */ + real gaussian_xi; /* Gaussian weight gn(xi) */ + real mi, wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + N_M = rotg->nat * erg->invmass; + + /* Loop over all slabs that contain something */ + for (n = erg->slab_first; n <= erg->slab_last; n++) + { + islab = n - erg->slab_first; /* slab index */ + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + /* For slab n, we need to loop over all atoms i again. Since we sorted + * the atoms with respect to the rotation vector, we know that it is sufficient + * to calculate from firstatom to lastatom only. All other contributions will + * be very small. */ + clear_rvec(innersumvec); + for (i = erg->firstatom[islab]; i <= erg->lastatom[islab]; i++) + { + /* Coordinate xi of this atom */ + copy_rvec(erg->xc[i], xi); + + /* The i-weights */ + gaussian_xi = gaussian_weight(xi, rotg, n); + mi = erg->mc_sorted[i]; /* need the sorted mass here */ + wi = N_M*mi; + + /* Calculate rin and qin */ + rvec_sub(erg->xc_ref_sorted[i], ycn, tmpvec); /* tmpvec = yi0-ycn */ + mvmul(erg->rotmat, tmpvec, rin); /* rin = Omega.(yi0 - ycn) */ + cprod(rotg->vec, rin, tmpvec); /* tmpvec = v x Omega*(yi0-ycn) */ + + /* * v x Omega*(yi0-ycn) */ + unitv(tmpvec, qin); /* qin = --------------------- */ + /* |v x Omega*(yi0-ycn)| */ + + /* Calculate bin */ + rvec_sub(xi, xcn, tmpvec); /* tmpvec = xi-xcn */ + bin = iprod(qin, tmpvec); /* bin = qin*(xi-xcn) */ + + svmul(wi*gaussian_xi*bin, qin, tmpvec); + + /* Add this contribution to the inner sum: */ + rvec_add(innersumvec, tmpvec, innersumvec); + } /* now we have the inner sum vector S^n for this slab */ + /* Save it to be used in do_flex_lowlevel */ + copy_rvec(innersumvec, erg->slab_innersumvec[islab]); + } +} + + +static real do_flex2_lowlevel( + t_rotgrp *rotg, + real sigma, /* The Gaussian width sigma */ + rvec x[], + gmx_bool bOutstepRot, + gmx_bool bOutstepSlab, + matrix box) +{ + int count, ic, ii, j, m, n, islab, iigrp, ifit; + rvec xj; /* position in the i-sum */ + rvec yj0; /* the reference position in the j-sum */ + rvec xcn, ycn; /* the current and the reference slab centers */ + real V; /* This node's part of the rotation pot. energy */ + real gaussian_xj; /* Gaussian weight */ + real beta; + + real numerator, fit_numerator; + rvec rjn, fit_rjn; /* Helper variables */ + real fac, fac2; + + real OOpsij, OOpsijstar; + real OOsigma2; /* 1/(sigma^2) */ + real sjn_rjn; + real betasigpsi; + rvec sjn, tmpvec, tmpvec2, yj0_ycn; + rvec sum1vec_part, sum1vec, sum2vec_part, sum2vec, sum3vec, sum4vec, innersumvec; + real sum3, sum4; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + real mj, wj; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + real Wjn; /* g_n(x_j) m_j / Mjn */ + gmx_bool bCalcPotFit; + + /* To calculate the torque per slab */ + rvec slab_force; /* Single force from slab n on one atom */ + rvec slab_sum1vec_part; + real slab_sum3part, slab_sum4part; + rvec slab_sum1vec, slab_sum2vec, slab_sum3vec, slab_sum4vec; + + + erg = rotg->enfrotgrp; + + /* Pre-calculate the inner sums, so that we do not have to calculate + * them again for every atom */ + flex2_precalc_inner_sum(rotg); + + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + /********************************************************/ + /* Main loop over all local atoms of the rotation group */ + /********************************************************/ + N_M = rotg->nat * erg->invmass; + V = 0.0; + OOsigma2 = 1.0 / (sigma*sigma); + for (j = 0; j < erg->nat_loc; j++) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; /* need the unsorted mass here */ + wj = N_M*mj; + + /* Current position of this atom: x[ii][XX/YY/ZZ] + * Note that erg->xc_center contains the center of mass in case the flex2-t + * potential was chosen. For the flex2 potential erg->xc_center must be + * zero. */ + rvec_sub(x[ii], erg->xc_center, xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* Determine the slabs to loop over, i.e. the ones with contributions + * larger than min_gaussian */ + count = get_single_atom_gaussians(xj, rotg); + + clear_rvec(sum1vec_part); + clear_rvec(sum2vec_part); + sum3 = 0.0; + sum4 = 0.0; + /* Loop over the relevant slabs for this atom */ + for (ic = 0; ic < count; ic++) + { + n = erg->gn_slabind[ic]; + + /* Get the precomputed Gaussian value of curr_slab for curr_x */ + gaussian_xj = erg->gn_atom[ic]; + + islab = n - erg->slab_first; /* slab index */ + + /* The (unrotated) reference position of this atom is copied to yj0: */ + copy_rvec(rotg->x_ref[iigrp], yj0); + + beta = calc_beta(xj, rotg, n); + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + rvec_sub(yj0, ycn, yj0_ycn); /* yj0_ycn = yj0 - ycn */ + + /* Rotate: */ + mvmul(erg->rotmat, yj0_ycn, rjn); /* rjn = Omega.(yj0 - ycn) */ + + /* Subtract the slab center from xj */ + rvec_sub(xj, xcn, tmpvec2); /* tmpvec2 = xj - xcn */ ++ ++ /* In rare cases, when an atom position coincides with a slab center ++ * (tmpvec2 == 0) we cannot compute the vector product for sjn. ++ * However, since the atom is located directly on the pivot, this ++ * slab's contribution to the force on that atom will be zero ++ * anyway. Therefore, we directly move on to the next slab. */ ++ if ( 0 == norm(tmpvec2) ) ++ { ++ continue; ++ } + + /* Calculate sjn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x (xj - xcn) */ + + OOpsijstar = norm2(tmpvec)+rotg->eps; /* OOpsij* = 1/psij* = |v x (xj-xcn)|^2 + eps */ + + numerator = sqr(iprod(tmpvec, rjn)); + + /*********************************/ + /* Add to the rotation potential */ + /*********************************/ + V += 0.5*rotg->k*wj*gaussian_xj*numerator/OOpsijstar; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, fit_rjn); + fit_numerator = sqr(iprod(tmpvec, fit_rjn)); + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*fit_numerator/OOpsijstar; + } + } + + /*************************************/ + /* Now calculate the force on atom j */ + /*************************************/ + + OOpsij = norm(tmpvec); /* OOpsij = 1 / psij = |v x (xj - xcn)| */ + + /* * v x (xj - xcn) */ + unitv(tmpvec, sjn); /* sjn = ---------------- */ + /* |v x (xj - xcn)| */ + + sjn_rjn = iprod(sjn, rjn); /* sjn_rjn = sjn . rjn */ + + + /*** A. Calculate the first of the four sum terms: ****************/ + fac = OOpsij/OOpsijstar; + svmul(fac, rjn, tmpvec); + fac2 = fac*fac*OOpsij; + svmul(fac2*sjn_rjn, sjn, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + fac2 = wj*gaussian_xj; /* also needed for sum4 */ + svmul(fac2*sjn_rjn, tmpvec, slab_sum1vec_part); + /********************/ + /*** Add to sum1: ***/ + /********************/ + rvec_inc(sum1vec_part, slab_sum1vec_part); /* sum1 still needs to vector multiplied with v */ + + /*** B. Calculate the forth of the four sum terms: ****************/ + betasigpsi = beta*OOsigma2*OOpsij; /* this is also needed for sum3 */ + /********************/ + /*** Add to sum4: ***/ + /********************/ + slab_sum4part = fac2*betasigpsi*fac*sjn_rjn*sjn_rjn; /* Note that fac is still valid from above */ + sum4 += slab_sum4part; + + /*** C. Calculate Wjn for second and third sum */ + /* Note that we can safely divide by slab_weights since we check in + * get_slab_centers that it is non-zero. */ + Wjn = gaussian_xj*mj/erg->slab_weights[islab]; + + /* We already have precalculated the inner sum for slab n */ + copy_rvec(erg->slab_innersumvec[islab], innersumvec); + + /* Weigh the inner sum vector with Wjn */ + svmul(Wjn, innersumvec, innersumvec); + + /*** E. Calculate the second of the four sum terms: */ + /********************/ + /*** Add to sum2: ***/ + /********************/ + rvec_inc(sum2vec_part, innersumvec); /* sum2 still needs to be vector crossproduct'ed with v */ + + /*** F. Calculate the third of the four sum terms: */ + slab_sum3part = betasigpsi * iprod(sjn, innersumvec); + sum3 += slab_sum3part; /* still needs to be multiplied with v */ + + /*** G. Calculate the torque on the local slab's axis: */ + if (bOutstepRot) + { + /* Sum1 */ + cprod(slab_sum1vec_part, rotg->vec, slab_sum1vec); + /* Sum2 */ + cprod(innersumvec, rotg->vec, slab_sum2vec); + /* Sum3 */ + svmul(slab_sum3part, rotg->vec, slab_sum3vec); + /* Sum4 */ + svmul(slab_sum4part, rotg->vec, slab_sum4vec); + + /* The force on atom ii from slab n only: */ + for (m = 0; m < DIM; m++) + { + slab_force[m] = rotg->k * (-slab_sum1vec[m] + slab_sum2vec[m] - slab_sum3vec[m] + 0.5*slab_sum4vec[m]); + } + + erg->slab_torque_v[islab] += torque(rotg->vec, slab_force, xj, xcn); + } + } /* END of loop over slabs */ + + /* Construct the four individual parts of the vector sum: */ + cprod(sum1vec_part, rotg->vec, sum1vec); /* sum1vec = { } x v */ + cprod(sum2vec_part, rotg->vec, sum2vec); /* sum2vec = { } x v */ + svmul(sum3, rotg->vec, sum3vec); /* sum3vec = { } . v */ + svmul(sum4, rotg->vec, sum4vec); /* sum4vec = { } . v */ + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + for (m = 0; m < DIM; m++) + { + erg->f_rot_loc[j][m] = rotg->k * (-sum1vec[m] + sum2vec[m] - sum3vec[m] + 0.5*sum4vec[m]); + } + +#ifdef SUM_PARTS + fprintf(stderr, "sum1: %15.8f %15.8f %15.8f\n", -rotg->k*sum1vec[XX], -rotg->k*sum1vec[YY], -rotg->k*sum1vec[ZZ]); + fprintf(stderr, "sum2: %15.8f %15.8f %15.8f\n", rotg->k*sum2vec[XX], rotg->k*sum2vec[YY], rotg->k*sum2vec[ZZ]); + fprintf(stderr, "sum3: %15.8f %15.8f %15.8f\n", -rotg->k*sum3vec[XX], -rotg->k*sum3vec[YY], -rotg->k*sum3vec[ZZ]); + fprintf(stderr, "sum4: %15.8f %15.8f %15.8f\n", 0.5*rotg->k*sum4vec[XX], 0.5*rotg->k*sum4vec[YY], 0.5*rotg->k*sum4vec[ZZ]); +#endif + + PRINT_FORCE_J + + } /* END of loop over local atoms */ + + return V; +} + + +static real do_flex_lowlevel( + t_rotgrp *rotg, + real sigma, /* The Gaussian width sigma */ + rvec x[], + gmx_bool bOutstepRot, + gmx_bool bOutstepSlab, + matrix box) +{ + int count, ic, ifit, ii, j, m, n, islab, iigrp; + rvec xj, yj0; /* current and reference position */ + rvec xcn, ycn; /* the current and the reference slab centers */ + rvec yj0_ycn; /* yj0 - ycn */ + rvec xj_xcn; /* xj - xcn */ + rvec qjn, fit_qjn; /* q_i^n */ + rvec sum_n1, sum_n2; /* Two contributions to the rotation force */ + rvec innersumvec; /* Inner part of sum_n2 */ + rvec s_n; + rvec force_n; /* Single force from slab n on one atom */ + rvec force_n1, force_n2; /* First and second part of force_n */ + rvec tmpvec, tmpvec2, tmp_f; /* Helper variables */ + real V; /* The rotation potential energy */ + real OOsigma2; /* 1/(sigma^2) */ + real beta; /* beta_n(xj) */ + real bjn, fit_bjn; /* b_j^n */ + real gaussian_xj; /* Gaussian weight gn(xj) */ + real betan_xj_sigma2; + real mj, wj; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + gmx_bool bCalcPotFit; + + + erg = rotg->enfrotgrp; + + /* Pre-calculate the inner sums, so that we do not have to calculate + * them again for every atom */ + flex_precalc_inner_sum(rotg); + + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + /********************************************************/ + /* Main loop over all local atoms of the rotation group */ + /********************************************************/ + OOsigma2 = 1.0/(sigma*sigma); + N_M = rotg->nat * erg->invmass; + V = 0.0; + for (j = 0; j < erg->nat_loc; j++) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; /* need the unsorted mass here */ + wj = N_M*mj; + + /* Current position of this atom: x[ii][XX/YY/ZZ] + * Note that erg->xc_center contains the center of mass in case the flex-t + * potential was chosen. For the flex potential erg->xc_center must be + * zero. */ + rvec_sub(x[ii], erg->xc_center, xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* Determine the slabs to loop over, i.e. the ones with contributions + * larger than min_gaussian */ + count = get_single_atom_gaussians(xj, rotg); + + clear_rvec(sum_n1); + clear_rvec(sum_n2); + + /* Loop over the relevant slabs for this atom */ + for (ic = 0; ic < count; ic++) + { + n = erg->gn_slabind[ic]; + + /* Get the precomputed Gaussian for xj in slab n */ + gaussian_xj = erg->gn_atom[ic]; + + islab = n - erg->slab_first; /* slab index */ + + /* The (unrotated) reference position of this atom is saved in yj0: */ + copy_rvec(rotg->x_ref[iigrp], yj0); + + beta = calc_beta(xj, rotg, n); + + /* The current center of this slab is saved in xcn: */ + copy_rvec(erg->slab_center[islab], xcn); + /* ... and the reference center in ycn: */ + copy_rvec(erg->slab_center_ref[islab+erg->slab_buffer], ycn); + + rvec_sub(yj0, ycn, yj0_ycn); /* yj0_ycn = yj0 - ycn */ + + /* Rotate: */ + mvmul(erg->rotmat, yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */ + + /* Subtract the slab center from xj */ + rvec_sub(xj, xcn, xj_xcn); /* xj_xcn = xj - xcn */ + ++ /* In rare cases, when an atom position coincides with a slab center ++ * (xj_xcn == 0) we cannot compute the vector product for qjn. ++ * However, since the atom is located directly on the pivot, this ++ * slab's contribution to the force on that atom will be zero ++ * anyway. Therefore, we directly move on to the next slab. */ ++ if ( 0 == norm(xj_xcn) ) ++ { ++ continue; ++ } ++ + /* Calculate qjn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec= v x Omega.(yj0-ycn) */ + + /* * v x Omega.(yj0-ycn) */ + unitv(tmpvec, qjn); /* qjn = --------------------- */ + /* |v x Omega.(yj0-ycn)| */ + + bjn = iprod(qjn, xj_xcn); /* bjn = qjn * (xj - xcn) */ + + /*********************************/ + /* Add to the rotation potential */ + /*********************************/ + V += 0.5*rotg->k*wj*gaussian_xj*sqr(bjn); + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* As above calculate Omega.(yj0-ycn), now for the other angles */ + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_ycn, tmpvec2); /* tmpvec2= Omega.(yj0-ycn) */ + /* As above calculate qjn */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec= v x Omega.(yj0-ycn) */ + /* * v x Omega.(yj0-ycn) */ + unitv(tmpvec, fit_qjn); /* fit_qjn = --------------------- */ + /* |v x Omega.(yj0-ycn)| */ + fit_bjn = iprod(fit_qjn, xj_xcn); /* fit_bjn = fit_qjn * (xj - xcn) */ + /* Add to the rotation potential for this angle */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*gaussian_xj*sqr(fit_bjn); + } + } + + /****************************************************************/ + /* sum_n1 will typically be the main contribution to the force: */ + /****************************************************************/ + betan_xj_sigma2 = beta*OOsigma2; /* beta_n(xj)/sigma^2 */ + + /* The next lines calculate + * qjn - (bjn*beta(xj)/(2sigma^2))v */ + svmul(bjn*0.5*betan_xj_sigma2, rotg->vec, tmpvec2); + rvec_sub(qjn, tmpvec2, tmpvec); + + /* Multiply with gn(xj)*bjn: */ + svmul(gaussian_xj*bjn, tmpvec, tmpvec2); + + /* Sum over n: */ + rvec_inc(sum_n1, tmpvec2); + + /* We already have precalculated the Sn term for slab n */ + copy_rvec(erg->slab_innersumvec[islab], s_n); + /* * beta_n(xj) */ + svmul(betan_xj_sigma2*iprod(s_n, xj_xcn), rotg->vec, tmpvec); /* tmpvec = ---------- s_n (xj-xcn) */ + /* sigma^2 */ + + rvec_sub(s_n, tmpvec, innersumvec); + + /* We can safely divide by slab_weights since we check in get_slab_centers + * that it is non-zero. */ + svmul(gaussian_xj/erg->slab_weights[islab], innersumvec, innersumvec); + + rvec_add(sum_n2, innersumvec, sum_n2); + + /* Calculate the torque: */ + if (bOutstepRot) + { + /* The force on atom ii from slab n only: */ + svmul(-rotg->k*wj, tmpvec2, force_n1); /* part 1 */ + svmul( rotg->k*mj, innersumvec, force_n2); /* part 2 */ + rvec_add(force_n1, force_n2, force_n); + erg->slab_torque_v[islab] += torque(rotg->vec, force_n, xj, xcn); + } + } /* END of loop over slabs */ + + /* Put both contributions together: */ + svmul(wj, sum_n1, sum_n1); + svmul(mj, sum_n2, sum_n2); + rvec_sub(sum_n2, sum_n1, tmp_f); /* F = -grad V */ + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + for (m = 0; m < DIM; m++) + { + erg->f_rot_loc[j][m] = rotg->k*tmp_f[m]; + } + + PRINT_FORCE_J + + } /* END of loop over local atoms */ + + return V; +} + +#ifdef PRINT_COORDS +static void print_coordinates(t_rotgrp *rotg, rvec x[], matrix box, int step) +{ + int i; + static FILE *fp; + static char buf[STRLEN]; + static gmx_bool bFirst = 1; + + + if (bFirst) + { + sprintf(buf, "coords%d.txt", cr->nodeid); + fp = fopen(buf, "w"); + bFirst = 0; + } + + fprintf(fp, "\nStep %d\n", step); + fprintf(fp, "box: %f %f %f %f %f %f %f %f %f\n", + box[XX][XX], box[XX][YY], box[XX][ZZ], + box[YY][XX], box[YY][YY], box[YY][ZZ], + box[ZZ][XX], box[ZZ][ZZ], box[ZZ][ZZ]); + for (i = 0; i < rotg->nat; i++) + { + fprintf(fp, "%4d %f %f %f\n", i, + erg->xc[i][XX], erg->xc[i][YY], erg->xc[i][ZZ]); + } + fflush(fp); + +} +#endif + + +static int projection_compare(const void *a, const void *b) +{ + sort_along_vec_t *xca, *xcb; + + + xca = (sort_along_vec_t *)a; + xcb = (sort_along_vec_t *)b; + + if (xca->xcproj < xcb->xcproj) + { + return -1; + } + else if (xca->xcproj > xcb->xcproj) + { + return 1; + } + else + { + return 0; + } +} + + +static void sort_collective_coordinates( + t_rotgrp *rotg, /* Rotation group */ + sort_along_vec_t *data) /* Buffer for sorting the positions */ +{ + int i; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* The projection of the position vector on the rotation vector is + * the relevant value for sorting. Fill the 'data' structure */ + for (i = 0; i < rotg->nat; i++) + { + data[i].xcproj = iprod(erg->xc[i], rotg->vec); /* sort criterium */ + data[i].m = erg->mc[i]; + data[i].ind = i; + copy_rvec(erg->xc[i], data[i].x ); + copy_rvec(rotg->x_ref[i], data[i].x_ref); + } + /* Sort the 'data' structure */ + gmx_qsort(data, rotg->nat, sizeof(sort_along_vec_t), projection_compare); + + /* Copy back the sorted values */ + for (i = 0; i < rotg->nat; i++) + { + copy_rvec(data[i].x, erg->xc[i] ); + copy_rvec(data[i].x_ref, erg->xc_ref_sorted[i]); + erg->mc_sorted[i] = data[i].m; + erg->xc_sortind[i] = data[i].ind; + } +} + + +/* For each slab, get the first and the last index of the sorted atom + * indices */ +static void get_firstlast_atom_per_slab(t_rotgrp *rotg) +{ + int i, islab, n; + real beta; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Find the first atom that needs to enter the calculation for each slab */ + n = erg->slab_first; /* slab */ + i = 0; /* start with the first atom */ + do + { + /* Find the first atom that significantly contributes to this slab */ + do /* move forward in position until a large enough beta is found */ + { + beta = calc_beta(erg->xc[i], rotg, n); + i++; + } + while ((beta < -erg->max_beta) && (i < rotg->nat)); + i--; + islab = n - erg->slab_first; /* slab index */ + erg->firstatom[islab] = i; + /* Proceed to the next slab */ + n++; + } + while (n <= erg->slab_last); + + /* Find the last atom for each slab */ + n = erg->slab_last; /* start with last slab */ + i = rotg->nat-1; /* start with the last atom */ + do + { + do /* move backward in position until a large enough beta is found */ + { + beta = calc_beta(erg->xc[i], rotg, n); + i--; + } + while ((beta > erg->max_beta) && (i > -1)); + i++; + islab = n - erg->slab_first; /* slab index */ + erg->lastatom[islab] = i; + /* Proceed to the next slab */ + n--; + } + while (n >= erg->slab_first); +} + + +/* Determine the very first and very last slab that needs to be considered + * For the first slab that needs to be considered, we have to find the smallest + * n that obeys: + * + * x_first * v - n*Delta_x <= beta_max + * + * slab index n, slab distance Delta_x, rotation vector v. For the last slab we + * have to find the largest n that obeys + * + * x_last * v - n*Delta_x >= -beta_max + * + */ +static gmx_inline int get_first_slab( + t_rotgrp *rotg, /* The rotation group (inputrec data) */ + real max_beta, /* The max_beta value, instead of min_gaussian */ + rvec firstatom) /* First atom after sorting along the rotation vector v */ +{ + /* Find the first slab for the first atom */ + return ceil((iprod(firstatom, rotg->vec) - max_beta)/rotg->slab_dist); +} + + +static gmx_inline int get_last_slab( + t_rotgrp *rotg, /* The rotation group (inputrec data) */ + real max_beta, /* The max_beta value, instead of min_gaussian */ + rvec lastatom) /* Last atom along v */ +{ + /* Find the last slab for the last atom */ + return floor((iprod(lastatom, rotg->vec) + max_beta)/rotg->slab_dist); +} + + +static void get_firstlast_slab_check( + t_rotgrp *rotg, /* The rotation group (inputrec data) */ + t_gmx_enfrotgrp *erg, /* The rotation group (data only accessible in this file) */ + rvec firstatom, /* First atom after sorting along the rotation vector v */ - rvec lastatom, /* Last atom along v */ - int g) /* The rotation group number */ ++ rvec lastatom) /* Last atom along v */ +{ + erg->slab_first = get_first_slab(rotg, erg->max_beta, firstatom); + erg->slab_last = get_last_slab(rotg, erg->max_beta, lastatom); + ++ /* Calculate the slab buffer size, which changes when slab_first changes */ ++ erg->slab_buffer = erg->slab_first - erg->slab_first_ref; ++ + /* Check whether we have reference data to compare against */ + if (erg->slab_first < erg->slab_first_ref) + { + gmx_fatal(FARGS, "%s No reference data for first slab (n=%d), unable to proceed.", + RotStr, erg->slab_first); + } + + /* Check whether we have reference data to compare against */ + if (erg->slab_last > erg->slab_last_ref) + { + gmx_fatal(FARGS, "%s No reference data for last slab (n=%d), unable to proceed.", + RotStr, erg->slab_last); + } +} + + +/* Enforced rotation with a flexible axis */ +static void do_flexible( + gmx_bool bMaster, + gmx_enfrot_t enfrot, /* Other rotation data */ + t_rotgrp *rotg, /* The rotation group */ + int g, /* Group number */ + rvec x[], /* The local positions */ + matrix box, + double t, /* Time in picoseconds */ - gmx_large_int_t step, /* The time step */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int l, nslabs; + real sigma; /* The Gaussian width sigma */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + erg = rotg->enfrotgrp; + + /* Define the sigma value */ + sigma = 0.7*rotg->slab_dist; + + /* Sort the collective coordinates erg->xc along the rotation vector. This is + * an optimization for the inner loop. */ + sort_collective_coordinates(rotg, enfrot->data); + + /* Determine the first relevant slab for the first atom and the last + * relevant slab for the last atom */ - get_firstlast_slab_check(rotg, erg, erg->xc[0], erg->xc[rotg->nat-1], g); ++ get_firstlast_slab_check(rotg, erg, erg->xc[0], erg->xc[rotg->nat-1]); + + /* Determine for each slab depending on the min_gaussian cutoff criterium, + * a first and a last atom index inbetween stuff needs to be calculated */ + get_firstlast_atom_per_slab(rotg); + + /* Determine the gaussian-weighted center of positions for all slabs */ + get_slab_centers(rotg, erg->xc, erg->mc_sorted, g, t, enfrot->out_slabs, bOutstepSlab, FALSE); + + /* Clear the torque per slab from last time step: */ + nslabs = erg->slab_last - erg->slab_first + 1; + for (l = 0; l < nslabs; l++) + { + erg->slab_torque_v[l] = 0.0; + } + + /* Call the rotational forces kernel */ + if (rotg->eType == erotgFLEX || rotg->eType == erotgFLEXT) + { + erg->V = do_flex_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box); + } + else if (rotg->eType == erotgFLEX2 || rotg->eType == erotgFLEX2T) + { + erg->V = do_flex2_lowlevel(rotg, sigma, x, bOutstepRot, bOutstepSlab, box); + } + else + { + gmx_fatal(FARGS, "Unknown flexible rotation type"); + } + + /* Determine angle by RMSD fit to the reference - Let's hope this */ + /* only happens once in a while, since this is not parallelized! */ + if (bMaster && (erotgFitPOT != rotg->eFittype) ) + { + if (bOutstepRot) + { + /* Fit angle of the whole rotation group */ + erg->angle_v = flex_fit_angle(rotg); + } + if (bOutstepSlab) + { + /* Fit angle of each slab */ + flex_fit_angle_perslab(g, rotg, t, erg->degangle, enfrot->out_angles); + } + } + + /* Lump together the torques from all slabs: */ + erg->torque_v = 0.0; + for (l = 0; l < nslabs; l++) + { + erg->torque_v += erg->slab_torque_v[l]; + } +} + + +/* Calculate the angle between reference and actual rotation group atom, + * both projected into a plane perpendicular to the rotation vector: */ +static void angle(t_rotgrp *rotg, + rvec x_act, + rvec x_ref, + real *alpha, + real *weight) /* atoms near the rotation axis should count less than atoms far away */ +{ + rvec xp, xrp; /* current and reference positions projected on a plane perpendicular to pg->vec */ + rvec dum; + + + /* Project x_ref and x into a plane through the origin perpendicular to rot_vec: */ + /* Project x_ref: xrp = x_ref - (vec * x_ref) * vec */ + svmul(iprod(rotg->vec, x_ref), rotg->vec, dum); + rvec_sub(x_ref, dum, xrp); + /* Project x_act: */ + svmul(iprod(rotg->vec, x_act), rotg->vec, dum); + rvec_sub(x_act, dum, xp); + + /* Retrieve information about which vector precedes. gmx_angle always + * returns a positive angle. */ + cprod(xp, xrp, dum); /* if reference precedes, this is pointing into the same direction as vec */ + + if (iprod(rotg->vec, dum) >= 0) + { + *alpha = -gmx_angle(xrp, xp); + } + else + { + *alpha = +gmx_angle(xrp, xp); + } + + /* Also return the weight */ + *weight = norm(xp); +} + + +/* Project first vector onto a plane perpendicular to the second vector + * dr = dr - (dr.v)v + * Note that v must be of unit length. + */ +static gmx_inline void project_onto_plane(rvec dr, const rvec v) +{ + rvec tmp; + + + svmul(iprod(dr, v), v, tmp); /* tmp = (dr.v)v */ + rvec_dec(dr, tmp); /* dr = dr - (dr.v)v */ +} + + +/* Fixed rotation: The rotation reference group rotates around the v axis. */ +/* The atoms of the actual rotation group are attached with imaginary */ +/* springs to the reference atoms. */ +static void do_fixed( + t_rotgrp *rotg, /* The rotation group */ - rvec x[], /* The positions */ - matrix box, /* The simulation box */ - double t, /* Time in picoseconds */ - gmx_large_int_t step, /* The time step */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int ifit, j, jj, m; + rvec dr; + rvec tmp_f; /* Force */ + real alpha; /* a single angle between an actual and a reference position */ + real weight; /* single weight for a single angle */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xi_xc; /* xi - xc */ + gmx_bool bCalcPotFit; + rvec fit_xr_loc; + + /* for mass weighting: */ + real wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + real k_wi; /* k times wi */ + + gmx_bool bProject; + + + erg = rotg->enfrotgrp; + bProject = (rotg->eType == erotgPM) || (rotg->eType == erotgPMPF); + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + N_M = rotg->nat * erg->invmass; + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + /* Calculate (x_i-x_c) resp. (x_i-u) */ + rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xi_xc); + + /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */ + rvec_sub(erg->xr_loc[j], xi_xc, dr); + + if (bProject) + { + project_onto_plane(dr, rotg->vec); + } + + /* Mass-weighting */ + wi = N_M*erg->m_loc[j]; + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + k_wi = rotg->k*wi; + for (m = 0; m < DIM; m++) + { + tmp_f[m] = k_wi*dr[m]; + erg->f_rot_loc[j][m] = tmp_f[m]; + erg->V += 0.5*k_wi*sqr(dr[m]); + } + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* Index of this rotation group atom with respect to the whole rotation group */ + jj = erg->xc_ref_ind[j]; + + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_xr_loc); /* fit_xr_loc = Omega*(y_i-y_c) */ + + /* Calculate Omega*(y_i-y_c)-(x_i-x_c) */ + rvec_sub(fit_xr_loc, xi_xc, dr); + + if (bProject) + { + project_onto_plane(dr, rotg->vec); + } + + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*k_wi*norm2(dr); + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xi_xc, erg->xr_loc[j], &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + /* If you want enforced rotation to contribute to the virial, + * activate the following lines: + if (MASTER(cr)) + { + Add the rotation contribution to the virial + for(j=0; jenfrotgrp; + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + N_M = rotg->nat * erg->invmass; + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + /* Calculate (xj-u) */ + rvec_sub(erg->x_loc_pbc[j], erg->xc_center, xj_u); /* xj_u = xj-u */ + + /* Calculate Omega.(yj0-u) */ + cprod(rotg->vec, erg->xr_loc[j], tmpvec); /* tmpvec = v x Omega.(yj0-u) */ + + /* * v x Omega.(yj0-u) */ + unitv(tmpvec, pj); /* pj = --------------------- */ + /* | v x Omega.(yj0-u) | */ + + fac = iprod(pj, xj_u); /* fac = pj.(xj-u) */ + fac2 = fac*fac; + + /* Mass-weighting */ + wj = N_M*erg->m_loc[j]; + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + svmul(-rotg->k*wj*fac, pj, tmp_f); + copy_rvec(tmp_f, erg->f_rot_loc[j]); + sum += wj*fac2; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* Index of this rotation group atom with respect to the whole rotation group */ + jj = erg->xc_ref_ind[j]; + + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[jj], fit_tmpvec); /* fit_tmpvec = Omega*(yj0-u) */ + + /* Calculate Omega.(yj0-u) */ + cprod(rotg->vec, fit_tmpvec, tmpvec); /* tmpvec = v x Omega.(yj0-u) */ + /* * v x Omega.(yj0-u) */ + unitv(tmpvec, pj); /* pj = --------------------- */ + /* | v x Omega.(yj0-u) | */ + + fac = iprod(pj, xj_u); /* fac = pj.(xj-u) */ + fac2 = fac*fac; + + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2; + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, tmp_f, erg->x_loc_pbc[j], erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xj_u, erg->xr_loc[j], &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + + PRINT_FORCE_J + + } /* end of loop over local rotation group atoms */ + erg->V = 0.5*rotg->k*sum; +} + + +/* Calculate the radial motion pivot-free potential and forces */ +static void do_radial_motion_pf( + t_rotgrp *rotg, /* The rotation group */ + rvec x[], /* The positions */ + matrix box, /* The simulation box */ - double t, /* Time in picoseconds */ - gmx_large_int_t step, /* The time step */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int i, ii, iigrp, ifit, j; + rvec xj; /* Current position */ + rvec xj_xc; /* xj - xc */ + rvec yj0_yc0; /* yj0 - yc0 */ + rvec tmp_f; /* Force */ + real alpha; /* a single angle between an actual and a reference position */ + real weight; /* single weight for a single angle */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec tmpvec, tmpvec2; + rvec innersumvec; /* Precalculation of the inner sum */ + rvec innersumveckM; + real fac, fac2, V = 0.0; + rvec qi, qj; + gmx_bool bCalcPotFit; + + /* For mass weighting: */ + real mj, wi, wj; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + + + erg = rotg->enfrotgrp; + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + N_M = rotg->nat * erg->invmass; + + /* Get the current center of the rotation group: */ + get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center); + + /* Precalculate Sum_i [ wi qi.(xi-xc) qi ] which is needed for every single j */ + clear_rvec(innersumvec); + for (i = 0; i < rotg->nat; i++) + { + /* Mass-weighting */ + wi = N_M*erg->mc[i]; + + /* Calculate qi. Note that xc_ref_center has already been subtracted from + * x_ref in init_rot_group.*/ + mvmul(erg->rotmat, rotg->x_ref[i], tmpvec); /* tmpvec = Omega.(yi0-yc0) */ + + cprod(rotg->vec, tmpvec, tmpvec2); /* tmpvec2 = v x Omega.(yi0-yc0) */ + + /* * v x Omega.(yi0-yc0) */ + unitv(tmpvec2, qi); /* qi = ----------------------- */ + /* | v x Omega.(yi0-yc0) | */ + + rvec_sub(erg->xc[i], erg->xc_center, tmpvec); /* tmpvec = xi-xc */ + + svmul(wi*iprod(qi, tmpvec), qi, tmpvec2); + + rvec_inc(innersumvec, tmpvec2); + } + svmul(rotg->k*erg->invmass, innersumvec, innersumveckM); + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; /* need the unsorted mass here */ + wj = N_M*mj; + + /* Current position of this atom: x[ii][XX/YY/ZZ] */ + copy_rvec(x[ii], xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* The (unrotated) reference position is yj0. yc0 has already + * been subtracted in init_rot_group */ + copy_rvec(rotg->x_ref[iigrp], yj0_yc0); /* yj0_yc0 = yj0 - yc0 */ + + /* Calculate Omega.(yj0-yc0) */ + mvmul(erg->rotmat, yj0_yc0, tmpvec2); /* tmpvec2 = Omega.(yj0 - yc0) */ + + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x Omega.(yj0-yc0) */ + + /* * v x Omega.(yj0-yc0) */ + unitv(tmpvec, qj); /* qj = ----------------------- */ + /* | v x Omega.(yj0-yc0) | */ + + /* Calculate (xj-xc) */ + rvec_sub(xj, erg->xc_center, xj_xc); /* xj_xc = xj-xc */ + + fac = iprod(qj, xj_xc); /* fac = qj.(xj-xc) */ + fac2 = fac*fac; + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + svmul(-rotg->k*wj*fac, qj, tmp_f); /* part 1 of force */ + svmul(mj, innersumveckM, tmpvec); /* part 2 of force */ + rvec_inc(tmp_f, tmpvec); + copy_rvec(tmp_f, erg->f_rot_loc[j]); + V += wj*fac2; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, tmpvec2); /* tmpvec2 = Omega*(yj0-yc0) */ + + /* Calculate Omega.(yj0-u) */ + cprod(rotg->vec, tmpvec2, tmpvec); /* tmpvec = v x Omega.(yj0-yc0) */ + /* * v x Omega.(yj0-yc0) */ + unitv(tmpvec, qj); /* qj = ----------------------- */ + /* | v x Omega.(yj0-yc0) | */ + + fac = iprod(qj, xj_xc); /* fac = qj.(xj-xc) */ + fac2 = fac*fac; + + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*fac2; + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, tmp_f, xj, erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xj_xc, yj0_yc0, &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + + PRINT_FORCE_J + + } /* end of loop over local rotation group atoms */ + erg->V = 0.5*rotg->k*V; +} + + +/* Precalculate the inner sum for the radial motion 2 forces */ +static void radial_motion2_precalc_inner_sum(t_rotgrp *rotg, rvec innersumvec) +{ + int i; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xi_xc; /* xj - xc */ + rvec tmpvec, tmpvec2; + real fac, fac2; + rvec ri, si; + real siri; + rvec v_xi_xc; /* v x (xj - u) */ + real psii, psiistar; + real wi; /* Mass-weighting of the positions */ + real N_M; /* N/M */ + rvec sumvec; + + erg = rotg->enfrotgrp; + N_M = rotg->nat * erg->invmass; + + /* Loop over the collective set of positions */ + clear_rvec(sumvec); + for (i = 0; i < rotg->nat; i++) + { + /* Mass-weighting */ + wi = N_M*erg->mc[i]; + + rvec_sub(erg->xc[i], erg->xc_center, xi_xc); /* xi_xc = xi-xc */ + + /* Calculate ri. Note that xc_ref_center has already been subtracted from + * x_ref in init_rot_group.*/ + mvmul(erg->rotmat, rotg->x_ref[i], ri); /* ri = Omega.(yi0-yc0) */ + + cprod(rotg->vec, xi_xc, v_xi_xc); /* v_xi_xc = v x (xi-u) */ + + fac = norm2(v_xi_xc); + /* * 1 */ + psiistar = 1.0/(fac + rotg->eps); /* psiistar = --------------------- */ + /* |v x (xi-xc)|^2 + eps */ + + psii = gmx_invsqrt(fac); /* 1 */ + /* psii = ------------- */ + /* |v x (xi-xc)| */ + + svmul(psii, v_xi_xc, si); /* si = psii * (v x (xi-xc) ) */ + + fac = iprod(v_xi_xc, ri); /* fac = (v x (xi-xc)).ri */ + fac2 = fac*fac; + + siri = iprod(si, ri); /* siri = si.ri */ + + svmul(psiistar/psii, ri, tmpvec); + svmul(psiistar*psiistar/(psii*psii*psii) * siri, si, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + cprod(tmpvec, rotg->vec, tmpvec2); + + svmul(wi*siri, tmpvec2, tmpvec); + + rvec_inc(sumvec, tmpvec); + } + svmul(rotg->k*erg->invmass, sumvec, innersumvec); +} + + +/* Calculate the radial motion 2 potential and forces */ +static void do_radial_motion2( + t_rotgrp *rotg, /* The rotation group */ + rvec x[], /* The positions */ + matrix box, /* The simulation box */ - double t, /* Time in picoseconds */ - gmx_large_int_t step, /* The time step */ + gmx_bool bOutstepRot, /* Output to main rotation output file */ + gmx_bool bOutstepSlab) /* Output per-slab data */ +{ + int ii, iigrp, ifit, j; + rvec xj; /* Position */ + real alpha; /* a single angle between an actual and a reference position */ + real weight; /* single weight for a single angle */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xj_u; /* xj - u */ + rvec yj0_yc0; /* yj0 -yc0 */ + rvec tmpvec, tmpvec2; + real fac, fit_fac, fac2, Vpart = 0.0; + rvec rj, fit_rj, sj; + real sjrj; + rvec v_xj_u; /* v x (xj - u) */ + real psij, psijstar; + real mj, wj; /* For mass-weighting of the positions */ + real N_M; /* N/M */ + gmx_bool bPF; + rvec innersumvec; + gmx_bool bCalcPotFit; + + + erg = rotg->enfrotgrp; + + bPF = rotg->eType == erotgRM2PF; + bCalcPotFit = (bOutstepRot || bOutstepSlab) && (erotgFitPOT == rotg->eFittype); + + + clear_rvec(yj0_yc0); /* Make the compiler happy */ + + clear_rvec(innersumvec); + if (bPF) + { + /* For the pivot-free variant we have to use the current center of + * mass of the rotation group instead of the pivot u */ + get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center); + + /* Also, we precalculate the second term of the forces that is identical + * (up to the weight factor mj) for all forces */ + radial_motion2_precalc_inner_sum(rotg, innersumvec); + } + + N_M = rotg->nat * erg->invmass; + + /* Each process calculates the forces on its local atoms */ + for (j = 0; j < erg->nat_loc; j++) + { + if (bPF) + { + /* Local index of a rotation group atom */ + ii = erg->ind_loc[j]; + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Mass-weighting */ + mj = erg->mc[iigrp]; + + /* Current position of this atom: x[ii] */ + copy_rvec(x[ii], xj); + + /* Shift this atom such that it is near its reference */ + shift_single_coord(box, xj, erg->xc_shifts[iigrp]); + + /* The (unrotated) reference position is yj0. yc0 has already + * been subtracted in init_rot_group */ + copy_rvec(rotg->x_ref[iigrp], yj0_yc0); /* yj0_yc0 = yj0 - yc0 */ + + /* Calculate Omega.(yj0-yc0) */ + mvmul(erg->rotmat, yj0_yc0, rj); /* rj = Omega.(yj0-yc0) */ + } + else + { + mj = erg->m_loc[j]; + copy_rvec(erg->x_loc_pbc[j], xj); + copy_rvec(erg->xr_loc[j], rj); /* rj = Omega.(yj0-u) */ + } + /* Mass-weighting */ + wj = N_M*mj; + + /* Calculate (xj-u) resp. (xj-xc) */ + rvec_sub(xj, erg->xc_center, xj_u); /* xj_u = xj-u */ + + cprod(rotg->vec, xj_u, v_xj_u); /* v_xj_u = v x (xj-u) */ + + fac = norm2(v_xj_u); + /* * 1 */ + psijstar = 1.0/(fac + rotg->eps); /* psistar = -------------------- */ + /* |v x (xj-u)|^2 + eps */ + + psij = gmx_invsqrt(fac); /* 1 */ + /* psij = ------------ */ + /* |v x (xj-u)| */ + + svmul(psij, v_xj_u, sj); /* sj = psij * (v x (xj-u) ) */ + + fac = iprod(v_xj_u, rj); /* fac = (v x (xj-u)).rj */ + fac2 = fac*fac; + + sjrj = iprod(sj, rj); /* sjrj = sj.rj */ + + svmul(psijstar/psij, rj, tmpvec); + svmul(psijstar*psijstar/(psij*psij*psij) * sjrj, sj, tmpvec2); + rvec_dec(tmpvec, tmpvec2); + cprod(tmpvec, rotg->vec, tmpvec2); + + /* Store the additional force so that it can be added to the force + * array after the normal forces have been evaluated */ + svmul(-rotg->k*wj*sjrj, tmpvec2, tmpvec); + svmul(mj, innersumvec, tmpvec2); /* This is != 0 only for the pivot-free variant */ + + rvec_add(tmpvec2, tmpvec, erg->f_rot_loc[j]); + Vpart += wj*psijstar*fac2; + + /* If requested, also calculate the potential for a set of angles + * near the current reference angle */ + if (bCalcPotFit) + { + for (ifit = 0; ifit < rotg->PotAngle_nstep; ifit++) + { + if (bPF) + { + mvmul(erg->PotAngleFit->rotmat[ifit], yj0_yc0, fit_rj); /* fit_rj = Omega.(yj0-yc0) */ + } + else + { + /* Position of this atom in the collective array */ + iigrp = erg->xc_ref_ind[j]; + /* Rotate with the alternative angle. Like rotate_local_reference(), + * just for a single local atom */ + mvmul(erg->PotAngleFit->rotmat[ifit], rotg->x_ref[iigrp], fit_rj); /* fit_rj = Omega*(yj0-u) */ + } + fit_fac = iprod(v_xj_u, fit_rj); /* fac = (v x (xj-u)).fit_rj */ + /* Add to the rotation potential for this angle: */ + erg->PotAngleFit->V[ifit] += 0.5*rotg->k*wj*psijstar*fit_fac*fit_fac; + } + } + + if (bOutstepRot) + { + /* Add to the torque of this rotation group */ + erg->torque_v += torque(rotg->vec, erg->f_rot_loc[j], xj, erg->xc_center); + + /* Calculate the angle between reference and actual rotation group atom. */ + angle(rotg, xj_u, rj, &alpha, &weight); /* angle in rad, weighted */ + erg->angle_v += alpha * weight; + erg->weight_v += weight; + } + + PRINT_FORCE_J + + } /* end of loop over local rotation group atoms */ + erg->V = 0.5*rotg->k*Vpart; +} + + +/* Determine the smallest and largest position vector (with respect to the + * rotation vector) for the reference group */ +static void get_firstlast_atom_ref( + t_rotgrp *rotg, + int *firstindex, + int *lastindex) +{ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + int i; + real xcproj; /* The projection of a reference position on the + rotation vector */ + real minproj, maxproj; /* Smallest and largest projection on v */ + + + + erg = rotg->enfrotgrp; + + /* Start with some value */ + minproj = iprod(rotg->x_ref[0], rotg->vec); + maxproj = minproj; + + /* This is just to ensure that it still works if all the atoms of the + * reference structure are situated in a plane perpendicular to the rotation + * vector */ + *firstindex = 0; + *lastindex = rotg->nat-1; + + /* Loop over all atoms of the reference group, + * project them on the rotation vector to find the extremes */ + for (i = 0; i < rotg->nat; i++) + { + xcproj = iprod(rotg->x_ref[i], rotg->vec); + if (xcproj < minproj) + { + minproj = xcproj; + *firstindex = i; + } + if (xcproj > maxproj) + { + maxproj = xcproj; + *lastindex = i; + } + } +} + + +/* Allocate memory for the slabs */ +static void allocate_slabs( + t_rotgrp *rotg, + FILE *fplog, + int g, + gmx_bool bVerbose) +{ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + int i, nslabs; + + + erg = rotg->enfrotgrp; + + /* More slabs than are defined for the reference are never needed */ + nslabs = erg->slab_last_ref - erg->slab_first_ref + 1; + + /* Remember how many we allocated */ + erg->nslabs_alloc = nslabs; + + if ( (NULL != fplog) && bVerbose) + { + fprintf(fplog, "%s allocating memory to store data for %d slabs (rotation group %d).\n", + RotStr, nslabs, g); + } + snew(erg->slab_center, nslabs); + snew(erg->slab_center_ref, nslabs); + snew(erg->slab_weights, nslabs); + snew(erg->slab_torque_v, nslabs); + snew(erg->slab_data, nslabs); + snew(erg->gn_atom, nslabs); + snew(erg->gn_slabind, nslabs); + snew(erg->slab_innersumvec, nslabs); + for (i = 0; i < nslabs; i++) + { + snew(erg->slab_data[i].x, rotg->nat); + snew(erg->slab_data[i].ref, rotg->nat); + snew(erg->slab_data[i].weight, rotg->nat); + } + snew(erg->xc_ref_sorted, rotg->nat); + snew(erg->xc_sortind, rotg->nat); + snew(erg->firstatom, nslabs); + snew(erg->lastatom, nslabs); +} + + - /* From the extreme coordinates of the reference group, determine the first ++/* From the extreme positions of the reference group, determine the first + * and last slab of the reference. We can never have more slabs in the real + * simulation than calculated here for the reference. + */ +static void get_firstlast_slab_ref(t_rotgrp *rotg, real mc[], int ref_firstindex, int ref_lastindex) +{ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ - int first, last, firststart; ++ int first, last; + rvec dummy; + + + erg = rotg->enfrotgrp; + first = get_first_slab(rotg, erg->max_beta, rotg->x_ref[ref_firstindex]); + last = get_last_slab( rotg, erg->max_beta, rotg->x_ref[ref_lastindex ]); - firststart = first; + + while (get_slab_weight(first, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN) + { + first--; + } + erg->slab_first_ref = first+1; + while (get_slab_weight(last, rotg, rotg->x_ref, mc, &dummy) > WEIGHT_MIN) + { + last++; + } + erg->slab_last_ref = last-1; - - erg->slab_buffer = firststart - erg->slab_first_ref; +} + + +/* Special version of copy_rvec: + * During the copy procedure of xcurr to b, the correct PBC image is chosen + * such that the copied vector ends up near its reference position xref */ +static inline void copy_correct_pbc_image( + const rvec xcurr, /* copy vector xcurr ... */ + rvec b, /* ... to b ... */ + const rvec xref, /* choosing the PBC image such that b ends up near xref */ + matrix box, + int npbcdim) +{ + rvec dx; + int d, m; + ivec shift; + + + /* Shortest PBC distance between the atom and its reference */ + rvec_sub(xcurr, xref, dx); + + /* Determine the shift for this atom */ + clear_ivec(shift); + for (m = npbcdim-1; m >= 0; m--) + { + while (dx[m] < -0.5*box[m][m]) + { + for (d = 0; d < DIM; d++) + { + dx[d] += box[m][d]; + } + shift[m]++; + } + while (dx[m] >= 0.5*box[m][m]) + { + for (d = 0; d < DIM; d++) + { + dx[d] -= box[m][d]; + } + shift[m]--; + } + } + + /* Apply the shift to the position */ + copy_rvec(xcurr, b); + shift_single_coord(box, b, shift); +} + + +static void init_rot_group(FILE *fplog, t_commrec *cr, int g, t_rotgrp *rotg, + rvec *x, gmx_mtop_t *mtop, gmx_bool bVerbose, FILE *out_slabs, matrix box, + t_inputrec *ir, gmx_bool bOutputCenters) +{ + int i, ii; + rvec coord, xref, *xdum; + gmx_bool bFlex, bColl; + t_atom *atom; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + int ref_firstindex, ref_lastindex; + gmx_mtop_atomlookup_t alook = NULL; + real mass, totalmass; + real start = 0.0; + double t_start; + + + /* Do we have a flexible axis? */ + bFlex = ISFLEX(rotg); + /* Do we use a global set of coordinates? */ + bColl = ISCOLL(rotg); + + erg = rotg->enfrotgrp; + + /* Allocate space for collective coordinates if needed */ + if (bColl) + { + snew(erg->xc, rotg->nat); + snew(erg->xc_shifts, rotg->nat); + snew(erg->xc_eshifts, rotg->nat); + snew(erg->xc_old, rotg->nat); + + if (rotg->eFittype == erotgFitNORM) + { + snew(erg->xc_ref_length, rotg->nat); /* in case fit type NORM is chosen */ + snew(erg->xc_norm, rotg->nat); + } + } + else + { + snew(erg->xr_loc, rotg->nat); + snew(erg->x_loc_pbc, rotg->nat); + } + + snew(erg->f_rot_loc, rotg->nat); + snew(erg->xc_ref_ind, rotg->nat); + + /* Make space for the calculation of the potential at other angles (used + * for fitting only) */ + if (erotgFitPOT == rotg->eFittype) + { + snew(erg->PotAngleFit, 1); + snew(erg->PotAngleFit->degangle, rotg->PotAngle_nstep); + snew(erg->PotAngleFit->V, rotg->PotAngle_nstep); + snew(erg->PotAngleFit->rotmat, rotg->PotAngle_nstep); + + /* Get the set of angles around the reference angle */ + start = -0.5 * (rotg->PotAngle_nstep - 1)*rotg->PotAngle_step; + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + erg->PotAngleFit->degangle[i] = start + i*rotg->PotAngle_step; + } + } + else + { + erg->PotAngleFit = NULL; + } + + /* xc_ref_ind needs to be set to identity in the serial case */ + if (!PAR(cr)) + { + for (i = 0; i < rotg->nat; i++) + { + erg->xc_ref_ind[i] = i; + } + } + + /* Copy the masses so that the center can be determined. For all types of + * enforced rotation, we store the masses in the erg->mc array. */ + if (rotg->bMassW) + { + alook = gmx_mtop_atomlookup_init(mtop); + } + snew(erg->mc, rotg->nat); + if (bFlex) + { + snew(erg->mc_sorted, rotg->nat); + } + if (!bColl) + { + snew(erg->m_loc, rotg->nat); + } + totalmass = 0.0; + for (i = 0; i < rotg->nat; i++) + { + if (rotg->bMassW) + { + gmx_mtop_atomnr_to_atom(alook, rotg->ind[i], &atom); + mass = atom->m; + } + else + { + mass = 1.0; + } + erg->mc[i] = mass; + totalmass += mass; + } + erg->invmass = 1.0/totalmass; + + if (rotg->bMassW) + { + gmx_mtop_atomlookup_destroy(alook); + } + + /* Set xc_ref_center for any rotation potential */ + if ((rotg->eType == erotgISO) || (rotg->eType == erotgPM) || (rotg->eType == erotgRM) || (rotg->eType == erotgRM2)) + { + /* Set the pivot point for the fixed, stationary-axis potentials. This + * won't change during the simulation */ + copy_rvec(rotg->pivot, erg->xc_ref_center); + copy_rvec(rotg->pivot, erg->xc_center ); + } + else + { + /* Center of the reference positions */ + get_center(rotg->x_ref, erg->mc, rotg->nat, erg->xc_ref_center); + + /* Center of the actual positions */ + if (MASTER(cr)) + { + snew(xdum, rotg->nat); + for (i = 0; i < rotg->nat; i++) + { + ii = rotg->ind[i]; + copy_rvec(x[ii], xdum[i]); + } + get_center(xdum, erg->mc, rotg->nat, erg->xc_center); + sfree(xdum); + } +#ifdef GMX_MPI + if (PAR(cr)) + { + gmx_bcast(sizeof(erg->xc_center), erg->xc_center, cr); + } +#endif + } + + if (bColl) + { + /* Save the original (whole) set of positions in xc_old such that at later + * steps the rotation group can always be made whole again. If the simulation is + * restarted, we compute the starting reference positions (given the time) + * and assume that the correct PBC image of each position is the one nearest + * to the current reference */ + if (MASTER(cr)) + { + /* Calculate the rotation matrix for this angle: */ + t_start = ir->init_t + ir->init_step*ir->delta_t; + erg->degangle = rotg->rate * t_start; + calc_rotmat(rotg->vec, erg->degangle, erg->rotmat); + + for (i = 0; i < rotg->nat; i++) + { + ii = rotg->ind[i]; + + /* Subtract pivot, rotate, and add pivot again. This will yield the + * reference position for time t */ + rvec_sub(rotg->x_ref[i], erg->xc_ref_center, coord); + mvmul(erg->rotmat, coord, xref); + rvec_inc(xref, erg->xc_ref_center); + + copy_correct_pbc_image(x[ii], erg->xc_old[i], xref, box, 3); + } + } +#ifdef GMX_MPI + if (PAR(cr)) + { + gmx_bcast(rotg->nat*sizeof(erg->xc_old[0]), erg->xc_old, cr); + } +#endif + } + + if ( (rotg->eType != erotgFLEX) && (rotg->eType != erotgFLEX2) ) + { + /* Put the reference positions into origin: */ + for (i = 0; i < rotg->nat; i++) + { + rvec_dec(rotg->x_ref[i], erg->xc_ref_center); + } + } + + /* Enforced rotation with flexible axis */ + if (bFlex) + { + /* Calculate maximum beta value from minimum gaussian (performance opt.) */ + erg->max_beta = calc_beta_max(rotg->min_gaussian, rotg->slab_dist); + + /* Determine the smallest and largest coordinate with respect to the rotation vector */ + get_firstlast_atom_ref(rotg, &ref_firstindex, &ref_lastindex); + - /* From the extreme coordinates of the reference group, determine the first ++ /* From the extreme positions of the reference group, determine the first + * and last slab of the reference. */ + get_firstlast_slab_ref(rotg, erg->mc, ref_firstindex, ref_lastindex); + + /* Allocate memory for the slabs */ + allocate_slabs(rotg, fplog, g, bVerbose); + + /* Flexible rotation: determine the reference centers for the rest of the simulation */ + erg->slab_first = erg->slab_first_ref; + erg->slab_last = erg->slab_last_ref; + get_slab_centers(rotg, rotg->x_ref, erg->mc, g, -1, out_slabs, bOutputCenters, TRUE); + + /* Length of each x_rotref vector from center (needed if fit routine NORM is chosen): */ + if (rotg->eFittype == erotgFitNORM) + { + for (i = 0; i < rotg->nat; i++) + { + rvec_sub(rotg->x_ref[i], erg->xc_ref_center, coord); + erg->xc_ref_length[i] = norm(coord); + } + } + } +} + + +extern void dd_make_local_rotation_groups(gmx_domdec_t *dd, t_rot *rot) +{ + gmx_ga2la_t ga2la; + int g; + t_rotgrp *rotg; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + ga2la = dd->ga2la; + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + + dd_make_local_group_indices(ga2la, rotg->nat, rotg->ind, + &erg->nat_loc, &erg->ind_loc, &erg->nalloc_loc, erg->xc_ref_ind); + } +} + + +/* Calculate the size of the MPI buffer needed in reduce_output() */ +static int calc_mpi_bufsize(t_rot *rot) +{ + int g; + int count_group, count_total; + t_rotgrp *rotg; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + + + count_total = 0; + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + /* Count the items that are transferred for this group: */ + count_group = 4; /* V, torque, angle, weight */ + + /* Add the maximum number of slabs for flexible groups */ + if (ISFLEX(rotg)) + { + count_group += erg->slab_last_ref - erg->slab_first_ref + 1; + } + + /* Add space for the potentials at different angles: */ + if (erotgFitPOT == rotg->eFittype) + { + count_group += rotg->PotAngle_nstep; + } + + /* Add to the total number: */ + count_total += count_group; + } + + return count_total; +} + + +extern void init_rot(FILE *fplog, t_inputrec *ir, int nfile, const t_filenm fnm[], + t_commrec *cr, rvec *x, matrix box, gmx_mtop_t *mtop, const output_env_t oenv, + gmx_bool bVerbose, unsigned long Flags) +{ + t_rot *rot; + t_rotgrp *rotg; + int g; + int nat_max = 0; /* Size of biggest rotation group */ + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec *x_pbc = NULL; /* Space for the pbc-correct atom positions */ + + + if ( (PAR(cr)) && !DOMAINDECOMP(cr) ) + { + gmx_fatal(FARGS, "Enforced rotation is only implemented for domain decomposition!"); + } + + if (MASTER(cr) && bVerbose) + { + fprintf(stdout, "%s Initializing ...\n", RotStr); + } + + rot = ir->rot; + snew(rot->enfrot, 1); + er = rot->enfrot; + er->Flags = Flags; + + /* When appending, skip first output to avoid duplicate entries in the data files */ + if (er->Flags & MD_APPENDFILES) + { + er->bOut = FALSE; + } + else + { + er->bOut = TRUE; + } + + if (MASTER(cr) && er->bOut) + { + please_cite(fplog, "Kutzner2011"); + } + + /* Output every step for reruns */ + if (er->Flags & MD_RERUN) + { + if (NULL != fplog) + { + fprintf(fplog, "%s rerun - will write rotation output every available step.\n", RotStr); + } + rot->nstrout = 1; + rot->nstsout = 1; + } + + er->out_slabs = NULL; + if (MASTER(cr) && HaveFlexibleGroups(rot) ) + { - er->out_slabs = open_slab_out(opt2fn("-rs", nfile, fnm), rot, oenv); ++ er->out_slabs = open_slab_out(opt2fn("-rs", nfile, fnm), rot); + } + + if (MASTER(cr)) + { + /* Remove pbc, make molecule whole. + * When ir->bContinuation=TRUE this has already been done, but ok. */ + snew(x_pbc, mtop->natoms); + m_rveccopy(mtop->natoms, x, x_pbc); + do_pbc_first_mtop(NULL, ir->ePBC, box, mtop, x_pbc); + /* All molecules will be whole now, but not necessarily in the home box. + * Additionally, if a rotation group consists of more than one molecule + * (e.g. two strands of DNA), each one of them can end up in a different + * periodic box. This is taken care of in init_rot_group. */ + } + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + + if (NULL != fplog) + { + fprintf(fplog, "%s group %d type '%s'\n", RotStr, g, erotg_names[rotg->eType]); + } + + if (rotg->nat > 0) + { + /* Allocate space for the rotation group's data: */ + snew(rotg->enfrotgrp, 1); + erg = rotg->enfrotgrp; + + nat_max = max(nat_max, rotg->nat); + + if (PAR(cr)) + { + erg->nat_loc = 0; + erg->nalloc_loc = 0; + erg->ind_loc = NULL; + } + else + { + erg->nat_loc = rotg->nat; + erg->ind_loc = rotg->ind; + } + init_rot_group(fplog, cr, g, rotg, x_pbc, mtop, bVerbose, er->out_slabs, box, ir, + !(er->Flags & MD_APPENDFILES) ); /* Do not output the reference centers + * again if we are appending */ + } + } + + /* Allocate space for enforced rotation buffer variables */ + er->bufsize = nat_max; + snew(er->data, nat_max); + snew(er->xbuf, nat_max); + snew(er->mbuf, nat_max); + + /* Buffers for MPI reducing torques, angles, weights (for each group), and V */ + if (PAR(cr)) + { + er->mpi_bufsize = calc_mpi_bufsize(rot) + 100; /* larger to catch errors */ + snew(er->mpi_inbuf, er->mpi_bufsize); + snew(er->mpi_outbuf, er->mpi_bufsize); + } + else + { + er->mpi_bufsize = 0; + er->mpi_inbuf = NULL; + er->mpi_outbuf = NULL; + } + + /* Only do I/O on the MASTER */ + er->out_angles = NULL; + er->out_rot = NULL; + er->out_torque = NULL; + if (MASTER(cr)) + { + er->out_rot = open_rot_out(opt2fn("-ro", nfile, fnm), rot, oenv); + + if (rot->nstsout > 0) + { + if (HaveFlexibleGroups(rot) || HavePotFitGroups(rot) ) + { - er->out_angles = open_angles_out(opt2fn("-ra", nfile, fnm), rot, oenv); ++ er->out_angles = open_angles_out(opt2fn("-ra", nfile, fnm), rot); + } + if (HaveFlexibleGroups(rot) ) + { - er->out_torque = open_torque_out(opt2fn("-rt", nfile, fnm), rot, oenv); ++ er->out_torque = open_torque_out(opt2fn("-rt", nfile, fnm), rot); + } + } + + sfree(x_pbc); + } +} + + - extern void finish_rot(FILE *fplog, t_rot *rot) ++extern void finish_rot(t_rot *rot) +{ + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + + + er = rot->enfrot; + if (er->out_rot) + { + gmx_fio_fclose(er->out_rot); + } + if (er->out_slabs) + { + gmx_fio_fclose(er->out_slabs); + } + if (er->out_angles) + { + gmx_fio_fclose(er->out_angles); + } + if (er->out_torque) + { + gmx_fio_fclose(er->out_torque); + } +} + + +/* Rotate the local reference positions and store them in + * erg->xr_loc[0...(nat_loc-1)] + * + * Note that we already subtracted u or y_c from the reference positions + * in init_rot_group(). + */ +static void rotate_local_reference(t_rotgrp *rotg) +{ + gmx_enfrotgrp_t erg; + int i, ii; + + + erg = rotg->enfrotgrp; + + for (i = 0; i < erg->nat_loc; i++) + { + /* Index of this rotation group atom with respect to the whole rotation group */ + ii = erg->xc_ref_ind[i]; + /* Rotate */ + mvmul(erg->rotmat, rotg->x_ref[ii], erg->xr_loc[i]); + } +} + + +/* Select the PBC representation for each local x position and store that + * for later usage. We assume the right PBC image of an x is the one nearest to + * its rotated reference */ +static void choose_pbc_image(rvec x[], t_rotgrp *rotg, matrix box, int npbcdim) +{ + int i, ii; + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec xref; + + + erg = rotg->enfrotgrp; + + for (i = 0; i < erg->nat_loc; i++) + { + /* Index of a rotation group atom */ + ii = erg->ind_loc[i]; + + /* Get the correctly rotated reference position. The pivot was already + * subtracted in init_rot_group() from the reference positions. Also, + * the reference positions have already been rotated in + * rotate_local_reference(). For the current reference position we thus + * only need to add the pivot again. */ + copy_rvec(erg->xr_loc[i], xref); + rvec_inc(xref, erg->xc_ref_center); + + copy_correct_pbc_image(x[ii], erg->x_loc_pbc[i], xref, box, npbcdim); + } +} + + +extern void do_rotation( + t_commrec *cr, + t_inputrec *ir, + matrix box, + rvec x[], + real t, + gmx_large_int_t step, + gmx_wallcycle_t wcycle, + gmx_bool bNS) +{ + int g, i, ii; + t_rot *rot; + t_rotgrp *rotg; + gmx_bool outstep_slab, outstep_rot; + gmx_bool bFlex, bColl; + gmx_enfrot_t er; /* Pointer to the enforced rotation buffer variables */ + gmx_enfrotgrp_t erg; /* Pointer to enforced rotation group data */ + rvec transvec; + t_gmx_potfit *fit = NULL; /* For fit type 'potential' determine the fit + angle via the potential minimum */ + + /* Enforced rotation cycle counting: */ + gmx_cycles_t cycles_comp; /* Cycles for the enf. rotation computation + only, does not count communication. This + counter is used for load-balancing */ + +#ifdef TAKETIME + double t0; +#endif + + rot = ir->rot; + er = rot->enfrot; + + /* When to output in main rotation output file */ + outstep_rot = do_per_step(step, rot->nstrout) && er->bOut; + /* When to output per-slab data */ + outstep_slab = do_per_step(step, rot->nstsout) && er->bOut; + + /* Output time into rotation output file */ + if (outstep_rot && MASTER(cr)) + { + fprintf(er->out_rot, "%12.3e", t); + } + + /**************************************************************************/ + /* First do ALL the communication! */ + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + /* Do we have a flexible axis? */ + bFlex = ISFLEX(rotg); + /* Do we use a collective (global) set of coordinates? */ + bColl = ISCOLL(rotg); + + /* Calculate the rotation matrix for this angle: */ + erg->degangle = rotg->rate * t; + calc_rotmat(rotg->vec, erg->degangle, erg->rotmat); + + if (bColl) + { + /* Transfer the rotation group's positions such that every node has + * all of them. Every node contributes its local positions x and stores + * it in the collective erg->xc array. */ + communicate_group_positions(cr, erg->xc, erg->xc_shifts, erg->xc_eshifts, bNS, + x, rotg->nat, erg->nat_loc, erg->ind_loc, erg->xc_ref_ind, erg->xc_old, box); + } + else + { + /* Fill the local masses array; + * this array changes in DD/neighborsearching steps */ + if (bNS) + { + for (i = 0; i < erg->nat_loc; i++) + { + /* Index of local atom w.r.t. the collective rotation group */ + ii = erg->xc_ref_ind[i]; + erg->m_loc[i] = erg->mc[ii]; + } + } + + /* Calculate Omega*(y_i-y_c) for the local positions */ + rotate_local_reference(rotg); + + /* Choose the nearest PBC images of the group atoms with respect + * to the rotated reference positions */ + choose_pbc_image(x, rotg, box, 3); + + /* Get the center of the rotation group */ + if ( (rotg->eType == erotgISOPF) || (rotg->eType == erotgPMPF) ) + { + get_center_comm(cr, erg->x_loc_pbc, erg->m_loc, erg->nat_loc, rotg->nat, erg->xc_center); + } + } + + } /* End of loop over rotation groups */ + + /**************************************************************************/ + /* Done communicating, we can start to count cycles for the load balancing now ... */ + cycles_comp = gmx_cycles_read(); + + +#ifdef TAKETIME + t0 = MPI_Wtime(); +#endif + + for (g = 0; g < rot->ngrp; g++) + { + rotg = &rot->grp[g]; + erg = rotg->enfrotgrp; + + bFlex = ISFLEX(rotg); + bColl = ISCOLL(rotg); + + if (outstep_rot && MASTER(cr)) + { + fprintf(er->out_rot, "%12.4f", erg->degangle); + } + + /* Calculate angles and rotation matrices for potential fitting: */ + if ( (outstep_rot || outstep_slab) && (erotgFitPOT == rotg->eFittype) ) + { + fit = erg->PotAngleFit; + for (i = 0; i < rotg->PotAngle_nstep; i++) + { + calc_rotmat(rotg->vec, erg->degangle + fit->degangle[i], fit->rotmat[i]); + + /* Clear value from last step */ + erg->PotAngleFit->V[i] = 0.0; + } + } + + /* Clear values from last time step */ + erg->V = 0.0; + erg->torque_v = 0.0; + erg->angle_v = 0.0; + erg->weight_v = 0.0; + + switch (rotg->eType) + { + case erotgISO: + case erotgISOPF: + case erotgPM: + case erotgPMPF: - do_fixed(rotg, x, box, t, step, outstep_rot, outstep_slab); ++ do_fixed(rotg, outstep_rot, outstep_slab); + break; + case erotgRM: - do_radial_motion(rotg, x, box, t, step, outstep_rot, outstep_slab); ++ do_radial_motion(rotg, outstep_rot, outstep_slab); + break; + case erotgRMPF: - do_radial_motion_pf(rotg, x, box, t, step, outstep_rot, outstep_slab); ++ do_radial_motion_pf(rotg, x, box, outstep_rot, outstep_slab); + break; + case erotgRM2: + case erotgRM2PF: - do_radial_motion2(rotg, x, box, t, step, outstep_rot, outstep_slab); ++ do_radial_motion2(rotg, x, box, outstep_rot, outstep_slab); + break; + case erotgFLEXT: + case erotgFLEX2T: + /* Subtract the center of the rotation group from the collective positions array + * Also store the center in erg->xc_center since it needs to be subtracted + * in the low level routines from the local coordinates as well */ + get_center(erg->xc, erg->mc, rotg->nat, erg->xc_center); + svmul(-1.0, erg->xc_center, transvec); + translate_x(erg->xc, rotg->nat, transvec); - do_flexible(MASTER(cr), er, rotg, g, x, box, t, step, outstep_rot, outstep_slab); ++ do_flexible(MASTER(cr), er, rotg, g, x, box, t, outstep_rot, outstep_slab); + break; + case erotgFLEX: + case erotgFLEX2: + /* Do NOT subtract the center of mass in the low level routines! */ + clear_rvec(erg->xc_center); - do_flexible(MASTER(cr), er, rotg, g, x, box, t, step, outstep_rot, outstep_slab); ++ do_flexible(MASTER(cr), er, rotg, g, x, box, t, outstep_rot, outstep_slab); + break; + default: + gmx_fatal(FARGS, "No such rotation potential."); + break; + } + } + +#ifdef TAKETIME + if (MASTER(cr)) + { + fprintf(stderr, "%s calculation (step %d) took %g seconds.\n", RotStr, step, MPI_Wtime()-t0); + } +#endif + + /* Stop the enforced rotation cycle counter and add the computation-only + * cycles to the force cycles for load balancing */ + cycles_comp = gmx_cycles_read() - cycles_comp; + + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles_comp, ddCyclF); + } +} diff --cc src/gromacs/mdlib/shakef.c index 57a6a8d386,0000000000..659fc32889 mode 100644,000000..100644 --- a/src/gromacs/mdlib/shakef.c +++ b/src/gromacs/mdlib/shakef.c @@@ -1,561 -1,0 +1,576 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * GROwing Monsters And Cloning Shrimps + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include "sysstuff.h" +#include "typedefs.h" +#include "smalloc.h" +#include "pbc.h" +#include "txtdump.h" +#include "vec.h" +#include "nrnb.h" +#include "constr.h" + +typedef struct gmx_shakedata +{ + rvec *rij; + real *M2; + real *tt; + real *dist2; + int nalloc; + /* SOR stuff */ + real delta; + real omega; + real gamma; +} t_gmx_shakedata; + +gmx_shakedata_t shake_init() +{ + gmx_shakedata_t d; + + snew(d, 1); + + d->nalloc = 0; + d->rij = NULL; + d->M2 = NULL; + d->tt = NULL; + d->dist2 = NULL; + + /* SOR initialization */ + d->delta = 0.1; + d->omega = 1.0; + d->gamma = 1000000; + + return d; +} + +static void pv(FILE *log, char *s, rvec x) +{ + int m; + + fprintf(log, "%5s:", s); + for (m = 0; (m < DIM); m++) + { + fprintf(log, " %10.3f", x[m]); + } + fprintf(log, "\n"); + fflush(log); +} + +void cshake(atom_id iatom[], int ncon, int *nnit, int maxnit, + real dist2[], real xp[], real rij[], real m2[], real omega, + real invmass[], real tt[], real lagr[], int *nerror) +{ + /* + * r.c. van schaik and w.f. van gunsteren + * eth zuerich + * june 1992 + * Adapted for use with Gromacs by David van der Spoel november 92 and later. + */ + /* default should be increased! MRS 8/4/2009 */ + const real mytol = 1e-10; + + int ll, i, j, i3, j3, l3; + int ix, iy, iz, jx, jy, jz; + real toler, rpij2, rrpr, tx, ty, tz, diff, acor, im, jm; + real xh, yh, zh, rijx, rijy, rijz; + real tix, tiy, tiz; + real tjx, tjy, tjz; + int nit, error, nconv; + real iconvf; + + error = 0; + nconv = 1; + for (nit = 0; (nit < maxnit) && (nconv != 0) && (error == 0); nit++) + { + nconv = 0; + for (ll = 0; (ll < ncon) && (error == 0); ll++) + { + l3 = 3*ll; + rijx = rij[l3+XX]; + rijy = rij[l3+YY]; + rijz = rij[l3+ZZ]; + i = iatom[l3+1]; + j = iatom[l3+2]; + i3 = 3*i; + j3 = 3*j; + ix = i3+XX; + iy = i3+YY; + iz = i3+ZZ; + jx = j3+XX; + jy = j3+YY; + jz = j3+ZZ; + + tx = xp[ix]-xp[jx]; + ty = xp[iy]-xp[jy]; + tz = xp[iz]-xp[jz]; + rpij2 = tx*tx+ty*ty+tz*tz; + toler = dist2[ll]; + diff = toler-rpij2; + + /* iconvf is less than 1 when the error is smaller than a bound */ + /* But if tt is too big, then it will result in looping in iconv */ + + iconvf = fabs(diff)*tt[ll]; + + if (iconvf > 1) + { + nconv = iconvf; + rrpr = rijx*tx+rijy*ty+rijz*tz; + + if (rrpr < toler*mytol) + { + error = ll+1; + } + else + { + acor = omega*diff*m2[ll]/rrpr; + lagr[ll] += acor; + xh = rijx*acor; + yh = rijy*acor; + zh = rijz*acor; + im = invmass[i]; + jm = invmass[j]; + xp[ix] += xh*im; + xp[iy] += yh*im; + xp[iz] += zh*im; + xp[jx] -= xh*jm; + xp[jy] -= yh*jm; + xp[jz] -= zh*jm; + } + } + } + } + *nnit = nit; + *nerror = error; +} + +int vec_shakef(FILE *fplog, gmx_shakedata_t shaked, + int natoms, real invmass[], int ncon, + t_iparams ip[], t_iatom *iatom, + real tol, rvec x[], rvec prime[], real omega, + gmx_bool bFEP, real lambda, real lagr[], + real invdt, rvec *v, + gmx_bool bCalcVir, tensor vir_r_m_dr, int econq, + t_vetavars *vetavar) +{ + rvec *rij; + real *M2, *tt, *dist2; + int maxnit = 1000; + int nit = 0, ll, i, j, type; + t_iatom *ia; + real L1, tol2, toler; + real mm = 0., tmp; + int error = 0; + real g, vscale, rscale, rvscale; + + if (ncon > shaked->nalloc) + { + shaked->nalloc = over_alloc_dd(ncon); + srenew(shaked->rij, shaked->nalloc); + srenew(shaked->M2, shaked->nalloc); + srenew(shaked->tt, shaked->nalloc); + srenew(shaked->dist2, shaked->nalloc); + } + rij = shaked->rij; + M2 = shaked->M2; + tt = shaked->tt; + dist2 = shaked->dist2; + + L1 = 1.0-lambda; + tol2 = 2.0*tol; + ia = iatom; + for (ll = 0; (ll < ncon); ll++, ia += 3) + { + type = ia[0]; + i = ia[1]; + j = ia[2]; + + mm = 2*(invmass[i]+invmass[j]); + rij[ll][XX] = x[i][XX]-x[j][XX]; + rij[ll][YY] = x[i][YY]-x[j][YY]; + rij[ll][ZZ] = x[i][ZZ]-x[j][ZZ]; + M2[ll] = 1.0/mm; + if (bFEP) + { + toler = sqr(L1*ip[type].constr.dA + lambda*ip[type].constr.dB); + } + else + { + toler = sqr(ip[type].constr.dA); + } + dist2[ll] = toler; + tt[ll] = 1.0/(toler*tol2); + } + + switch (econq) + { + case econqCoord: + cshake(iatom, ncon, &nit, maxnit, dist2, prime[0], rij[0], M2, omega, invmass, tt, lagr, &error); + break; + case econqVeloc: + crattle(iatom, ncon, &nit, maxnit, dist2, prime[0], rij[0], M2, omega, invmass, tt, lagr, &error, invdt, vetavar); + break; + } + + if (nit >= maxnit) + { + if (fplog) + { + fprintf(fplog, "Shake did not converge in %d steps\n", maxnit); + } + fprintf(stderr, "Shake did not converge in %d steps\n", maxnit); + nit = 0; + } + else if (error != 0) + { + if (fplog) + { + fprintf(fplog, "Inner product between old and new vector <= 0.0!\n" + "constraint #%d atoms %u and %u\n", + error-1, iatom[3*(error-1)+1]+1, iatom[3*(error-1)+2]+1); + } + fprintf(stderr, "Inner product between old and new vector <= 0.0!\n" + "constraint #%d atoms %u and %u\n", + error-1, iatom[3*(error-1)+1]+1, iatom[3*(error-1)+2]+1); + nit = 0; + } + + /* Constraint virial and correct the lagrange multipliers for the length */ + + ia = iatom; + + for (ll = 0; (ll < ncon); ll++, ia += 3) + { + + if ((econq == econqCoord) && v != NULL) + { + /* Correct the velocities */ + mm = lagr[ll]*invmass[ia[1]]*invdt/vetavar->rscale; + for (i = 0; i < DIM; i++) + { + v[ia[1]][i] += mm*rij[ll][i]; + } + mm = lagr[ll]*invmass[ia[2]]*invdt/vetavar->rscale; + for (i = 0; i < DIM; i++) + { + v[ia[2]][i] -= mm*rij[ll][i]; + } + /* 16 flops */ + } + + /* constraint virial */ + if (bCalcVir) + { + if (econq == econqCoord) + { + mm = lagr[ll]/vetavar->rvscale; + } + if (econq == econqVeloc) + { + mm = lagr[ll]/(vetavar->vscale*vetavar->vscale_nhc[0]); + } + for (i = 0; i < DIM; i++) + { + tmp = mm*rij[ll][i]; + for (j = 0; j < DIM; j++) + { + vir_r_m_dr[i][j] -= tmp*rij[ll][j]; + } + } + /* 21 flops */ + } + + /* Correct the lagrange multipliers for the length */ + /* (more details would be useful here . . . )*/ + + type = ia[0]; + if (bFEP) + { + toler = L1*ip[type].constr.dA + lambda*ip[type].constr.dB; + } + else + { + toler = ip[type].constr.dA; + lagr[ll] *= toler; + } + } + + return nit; +} + +static void check_cons(FILE *log, int nc, rvec x[], rvec prime[], rvec v[], + t_iparams ip[], t_iatom *iatom, + real invmass[], int econq) +{ + t_iatom *ia; + int ai, aj; + int i; + real d, dp; + rvec dx, dv; + + fprintf(log, + " i mi j mj before after should be\n"); + ia = iatom; + for (i = 0; (i < nc); i++, ia += 3) + { + ai = ia[1]; + aj = ia[2]; + rvec_sub(x[ai], x[aj], dx); + d = norm(dx); + + switch (econq) + { + case econqCoord: + rvec_sub(prime[ai], prime[aj], dx); + dp = norm(dx); + fprintf(log, "%5d %5.2f %5d %5.2f %10.5f %10.5f %10.5f\n", + ai+1, 1.0/invmass[ai], + aj+1, 1.0/invmass[aj], d, dp, ip[ia[0]].constr.dA); + break; + case econqVeloc: + rvec_sub(v[ai], v[aj], dv); + d = iprod(dx, dv); + rvec_sub(prime[ai], prime[aj], dv); + dp = iprod(dx, dv); + fprintf(log, "%5d %5.2f %5d %5.2f %10.5f %10.5f %10.5f\n", + ai+1, 1.0/invmass[ai], + aj+1, 1.0/invmass[aj], d, dp, 0.); + break; + } + } +} + +gmx_bool bshakef(FILE *log, gmx_shakedata_t shaked, + int natoms, real invmass[], int nblocks, int sblock[], + t_idef *idef, t_inputrec *ir, rvec x_s[], rvec prime[], + t_nrnb *nrnb, real *lagr, real lambda, real *dvdlambda, + real invdt, rvec *v, gmx_bool bCalcVir, tensor vir_r_m_dr, + gmx_bool bDumpOnError, int econq, t_vetavars *vetavar) +{ + t_iatom *iatoms; + real *lam, dt_2, dvdl; + int i, n0, ncons, blen, type; + int tnit = 0, trij = 0; + +#ifdef DEBUG + fprintf(log, "nblocks=%d, sblock[0]=%d\n", nblocks, sblock[0]); +#endif + + ncons = idef->il[F_CONSTR].nr/3; + + for (i = 0; i < ncons; i++) + { + lagr[i] = 0; + } + + iatoms = &(idef->il[F_CONSTR].iatoms[sblock[0]]); + lam = lagr; + for (i = 0; (i < nblocks); ) + { + blen = (sblock[i+1]-sblock[i]); + blen /= 3; + n0 = vec_shakef(log, shaked, natoms, invmass, blen, idef->iparams, + iatoms, ir->shake_tol, x_s, prime, shaked->omega, + ir->efep != efepNO, lambda, lam, invdt, v, bCalcVir, vir_r_m_dr, + econq, vetavar); + +#ifdef DEBUGSHAKE + check_cons(log, blen, x_s, prime, v, idef->iparams, iatoms, invmass, econq); +#endif + + if (n0 == 0) + { + if (bDumpOnError && log) + { + { + check_cons(log, blen, x_s, prime, v, idef->iparams, iatoms, invmass, econq); + } + } + return FALSE; + } + tnit += n0*blen; + trij += blen; + iatoms += 3*blen; /* Increment pointer! */ + lam += blen; + i++; + } + /* only for position part? */ + if (econq == econqCoord) + { + if (ir->efep != efepNO) + { ++ real bondA,bondB; + dt_2 = 1/sqr(ir->delta_t); + dvdl = 0; + for (i = 0; i < ncons; i++) + { + type = idef->il[F_CONSTR].iatoms[3*i]; - dvdl += lagr[i]*dt_2* - (idef->iparams[type].constr.dB-idef->iparams[type].constr.dA); ++ ++ /* dh/dl contribution from constraint force is dh/dr (constraint force) dot dr/dl */ ++ /* constraint force is -\sum_i lagr_i* d(constraint)/dr, with constrant = r^2-d^2 */ ++ /* constraint force is -\sum_i lagr_i* 2 r */ ++ /* so dh/dl = -\sum_i lagr_i* 2 r * dr/dl */ ++ /* However, by comparison with lincs and with ++ comparison with a full thermodynamics cycle (see ++ redmine issue #1255), this is off by a factor of ++ two -- the 2r should apparently just be r. Further ++ investigation should be done at some point to ++ understand why and see if there is something deeper ++ we are missing */ ++ ++ bondA = idef->iparams[type].constr.dA; ++ bondB = idef->iparams[type].constr.dB; ++ dvdl += lagr[i] * dt_2 * ((1.0-lambda)*bondA + lambda*bondB) * (bondB-bondA); + } + *dvdlambda += dvdl; + } + } +#ifdef DEBUG + fprintf(log, "tnit: %5d omega: %10.5f\n", tnit, omega); +#endif + if (ir->bShakeSOR) + { + if (tnit > shaked->gamma) + { + shaked->delta *= -0.5; + } + shaked->omega += shaked->delta; + shaked->gamma = tnit; + } + inc_nrnb(nrnb, eNR_SHAKE, tnit); + inc_nrnb(nrnb, eNR_SHAKE_RIJ, trij); + if (v) + { + inc_nrnb(nrnb, eNR_CONSTR_V, trij*2); + } + if (bCalcVir) + { + inc_nrnb(nrnb, eNR_CONSTR_VIR, trij); + } + + return TRUE; +} + +void crattle(atom_id iatom[], int ncon, int *nnit, int maxnit, + real dist2[], real vp[], real rij[], real m2[], real omega, + real invmass[], real tt[], real lagr[], int *nerror, real invdt, t_vetavars *vetavar) +{ + /* + * r.c. van schaik and w.f. van gunsteren + * eth zuerich + * june 1992 + * Adapted for use with Gromacs by David van der Spoel november 92 and later. + * rattle added by M.R. Shirts, April 2004, from code written by Jay Ponder in TINKER + * second part of rattle algorithm + */ + + const real mytol = 1e-10; + + int ll, i, j, i3, j3, l3, ii; + int ix, iy, iz, jx, jy, jz; + real toler, rijd, vpijd, vx, vy, vz, diff, acor, xdotd, fac, im, jm, imdt, jmdt; + real xh, yh, zh, rijx, rijy, rijz; + real tix, tiy, tiz; + real tjx, tjy, tjz; + int nit, error, nconv; + real veta, vscale_nhc, iconvf; + + veta = vetavar->veta; + vscale_nhc = vetavar->vscale_nhc[0]; /* for now, just use the first state */ + + error = 0; + nconv = 1; + for (nit = 0; (nit < maxnit) && (nconv != 0) && (error == 0); nit++) + { + nconv = 0; + for (ll = 0; (ll < ncon) && (error == 0); ll++) + { + l3 = 3*ll; + rijx = rij[l3+XX]; + rijy = rij[l3+YY]; + rijz = rij[l3+ZZ]; + i = iatom[l3+1]; + j = iatom[l3+2]; + i3 = 3*i; + j3 = 3*j; + ix = i3+XX; + iy = i3+YY; + iz = i3+ZZ; + jx = j3+XX; + jy = j3+YY; + jz = j3+ZZ; + vx = vp[ix]-vp[jx]; + vy = vp[iy]-vp[jy]; + vz = vp[iz]-vp[jz]; + + vpijd = vx*rijx+vy*rijy+vz*rijz; + toler = dist2[ll]; + /* this is r(t+dt) \dotproduct \dot{r}(t+dt) */ + xdotd = vpijd*vscale_nhc + veta*toler; + + /* iconv is zero when the error is smaller than a bound */ + iconvf = fabs(xdotd)*(tt[ll]/invdt); + + if (iconvf > 1) + { + nconv = iconvf; + fac = omega*2.0*m2[ll]/toler; + acor = -fac*xdotd; + lagr[ll] += acor; + + xh = rijx*acor; + yh = rijy*acor; + zh = rijz*acor; + + im = invmass[i]/vscale_nhc; + jm = invmass[j]/vscale_nhc; + + vp[ix] += xh*im; + vp[iy] += yh*im; + vp[iz] += zh*im; + vp[jx] -= xh*jm; + vp[jy] -= yh*jm; + vp[jz] -= zh*jm; + } + } + } + *nnit = nit; + *nerror = error; +} diff --cc src/gromacs/selection/selhelp.cpp index 5cd674a639,0000000000..8d8defbff6 mode 100644,000000..100644 --- a/src/gromacs/selection/selhelp.cpp +++ b/src/gromacs/selection/selhelp.cpp @@@ -1,634 -1,0 +1,634 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2009,2010,2011,2012, by the GROMACS development team, led by + * David van der Spoel, Berk Hess, Erik Lindahl, and including many + * others, as listed in the AUTHORS file in the top-level source + * directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +/*! \internal \file + * \brief + * Implements functions in selhelp.h. + * + * \author Teemu Murtola + * \ingroup module_selection + */ +#include +#include +#include + +#include + +#include "gromacs/onlinehelp/helptopic.h" +#include "gromacs/onlinehelp/helpwritercontext.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/file.h" +#include "gromacs/utility/stringutil.h" + +#include "selhelp.h" +#include "selmethod.h" +#include "symrec.h" + +namespace +{ + +struct CommonHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char CommonHelpText::name[] = "selections"; +const char CommonHelpText::title[] = + "Selection syntax and usage"; +const char *const CommonHelpText::text[] = { + "Selections are used to select atoms/molecules/residues for analysis.", + "In contrast to traditional index files, selections can be dynamic, i.e.,", + "select different atoms for different trajectory frames.[PAR]", + + "Each analysis tool requires a different number of selections and the", + "selections are interpreted differently. The general idea is still the", + "same: each selection evaluates to a set of positions, where a position", + "can be an atom position or center-of-mass or center-of-geometry of", + "a set of atoms. The tool then uses these positions for its analysis to", + "allow very flexible processing. Some analysis tools may have limitations", + "on the types of selections allowed.[PAR]", + + "To get started with selections, run, e.g., [TT][PROGRAM] select[tt]", + "without specifying selections on the command-line and use the interactive", + "prompt to try out different selections.", + "This tool provides output options that allow one to see what is actually", + "selected by the given selections, and the interactive prompt reports", + "syntax errors immediately, allowing one to try again.", + "The subtopics listed below give more details on different aspects of", + "selections.", +}; + +struct ArithmeticHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char ArithmeticHelpText::name[] = "arithmetic"; +const char ArithmeticHelpText::title[] = + "Arithmetic expressions in selections"; +const char *const ArithmeticHelpText::text[] = { + "Basic arithmetic evaluation is supported for numeric expressions.", + "Supported operations are addition, subtraction, negation, multiplication,", + "division, and exponentiation (using ^).", + "Result of a division by zero or other illegal operations is undefined.", +}; + +struct CmdLineHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char CmdLineHelpText::name[] = "cmdline"; +const char CmdLineHelpText::title[] = + "Specifying selections from command line"; +const char *const CmdLineHelpText::text[] = { + "If no selections are provided on the command line, you are prompted to", + "type the selections interactively (a pipe can also be used to provide", + "the selections in this case for most tools). While this works well for", + "testing, it is easier to provide the selections from the command line", + "if they are complex or for scripting.[PAR]", + + "Each tool has different command-line arguments for specifying selections", + "(listed by [TT][PROGRAM] help [tt]).", + "You can either pass a single string containing all selections (separated", + "by semicolons), or multiple strings, each containing one selection.", + "Note that you need to quote the selections to protect them from the", + "shell.[PAR]", + + "If you set a selection command-line argument, but do not provide any", + "selections, you are prompted to type the selections for that argument", + "interactively. This is useful if that selection argument is optional,", + "in which case it is not normally prompted for.[PAR]", + + "To provide selections from a file, use [TT]-sf file.dat[tt] in the place", + "of the selection for a selection argument (e.g.,", + "[TT]-select -sf file.dat[tt]). In general, the [TT]-sf[tt] argument reads", + "selections from the provided file and assigns them to selection arguments", + "that have been specified up to that point, but for which no selections", + "have been provided.", + "As a special case, [TT]-sf[tt] provided on its own, without preceding", + "selection arguments, assigns the selections to all (yet unset) required", + "selections (i.e., those that would be promted interactively if no", + "selections are provided on the command line).[PAR]", + + "To use groups from a traditional index file, use argument [TT]-n[tt]", + "to provide a file. See the \"syntax\" subtopic for how to use them.", + "If this option is not provided, default groups are generated.", + "The default groups are generated by reading selections from a file", + "[TT]defselection.dat[tt]. If such a file is found in the current", + "directory, it is used instead of the one provided by default.[PAR]", + + "Depending on the tool, two additional command-line arguments may be", + "available to control the behavior:[BR]", + "1. [TT]-seltype[tt] can be used to specify the default type of", + "positions to calculate for each selection.[BR]", + "2. [TT]-selrpos[tt] can be used to specify the default type of", + "positions used in selecting atoms by coordinates.[BR]", + "See the \"positions\" subtopic for more information on these options.", +}; + +struct EvaluationHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char EvaluationHelpText::name[] = "evaluation"; +const char EvaluationHelpText::title[] = + "Selection evaluation and optimization"; +const char *const EvaluationHelpText::text[] = { + "Boolean evaluation proceeds from left to right and is short-circuiting", + "i.e., as soon as it is known whether an atom will be selected, the", + "remaining expressions are not evaluated at all.", + "This can be used to optimize the selections: you should write the", + "most restrictive and/or the most inexpensive expressions first in", + "boolean expressions.", + "The relative ordering between dynamic and static expressions does not", + "matter: all static expressions are evaluated only once, before the first", + "frame, and the result becomes the leftmost expression.[PAR]", + + "Another point for optimization is in common subexpressions: they are not", + "automatically recognized, but can be manually optimized by the use of", + "variables. This can have a big impact on the performance of complex", + "selections, in particular if you define several index groups like this:", + " [TT]rdist = distance from com of resnr 1 to 5;[tt][BR]", + " [TT]resname RES and rdist < 2;[tt][BR]", + " [TT]resname RES and rdist < 4;[tt][BR]", + " [TT]resname RES and rdist < 6;[tt][BR]", + "Without the variable assignment, the distances would be evaluated three", + "times, although they are exactly the same within each selection.", + "Anything assigned into a variable becomes a common subexpression that", + "is evaluated only once during a frame.", + "Currently, in some cases the use of variables can actually lead to a small", + "performance loss because of the checks necessary to determine for which", + "atoms the expression has already been evaluated, but this should not be", + "a major problem.", +}; + +struct ExamplesHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char ExamplesHelpText::name[] = "examples"; +const char ExamplesHelpText::title[] = + "Selection examples"; +const char *const ExamplesHelpText::text[] = { + // TODO: Once there are more tools available, use examples that invoke + // tools and explain what the selections do in those tools. + "Below, examples of increasingly complex selections are given.[PAR]", + + "Selection of all water oxygens:[BR]", + " resname SOL and name OW", + "[PAR]", + + "Centers of mass of residues 1 to 5 and 10:[BR]", + " res_com of resnr 1 to 5 10", + "[PAR]", + + "All atoms farther than 1 nm of a fixed position:[BR]", - " not within 1 of (1.2, 3.1, 2.4)", ++ " not within 1 of [1.2, 3.1, 2.4]", + "[PAR]", + + "All atoms of a residue LIG within 0.5 nm of a protein (with a custom name):[BR]", + " \"Close to protein\" resname LIG and within 0.5 of group \"Protein\"", + "[PAR]", + + "All protein residues that have at least one atom within 0.5 nm of a residue LIG:[BR]", + " group \"Protein\" and same residue as within 0.5 of resname LIG", + "[PAR]", + + "All RES residues whose COM is between 2 and 4 nm from the COM of all of them:[BR]", + " rdist = res_com distance from com of resname RES[BR]", + " resname RES and rdist >= 2 and rdist <= 4", + "[PAR]", + + "Selection like C1 C2 C2 C3 C3 C4 ... C8 C9 (e.g., for g_bond):[BR]", + " name \"C[1-8]\" merge name \"C[2-9]\"", +}; + +struct KeywordsHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char KeywordsHelpText::name[] = "keywords"; +const char KeywordsHelpText::title[] = + "Selection keywords"; +const char *const KeywordsHelpText::text[] = { + "The following selection keywords are currently available.", + "For keywords marked with a star, additional help is available through", + "a subtopic KEYWORD, where KEYWORD is the name of the keyword.", +}; + +struct LimitationsHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char LimitationsHelpText::name[] = "limitations"; +const char LimitationsHelpText::title[] = + "Selection limitations"; +const char *const LimitationsHelpText::text[] = { + "Some analysis programs may require a special structure for the input", + "selections (e.g., [TT]g_angle[tt] requires the index group to be made", + "of groups of three or four atoms).", + "For such programs, it is up to the user to provide a proper selection", + "expression that always returns such positions.", + "[PAR]", + + "Due to technical reasons, having a negative value as the first value in", + "expressions like[BR]", + "[TT]charge -1 to -0.7[tt][BR]", + "result in a syntax error. A workaround is to write[BR]", + "[TT]charge {-1 to -0.7}[tt][BR]", + "instead.[PAR]", + + "When [TT]name[tt] selection keyword is used together with PDB input", + "files, the behavior may be unintuitive. When Gromacs reads in a PDB", + "file, 4 character atom names that start with a digit are transformed", + "such that, e.g., 1HG2 becomes HG21, and the latter is what is matched", + "by the [TT]name[tt] keyword. Use [TT]pdbname[tt] to match the atom name", + "as it appears in the input PDB file.", +}; + +struct PositionsHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char PositionsHelpText::name[] = "positions"; +const char PositionsHelpText::title[] = + "Specifying positions in selections"; +const char *const PositionsHelpText::text[] = { + "Possible ways of specifying positions in selections are:[PAR]", + + "1. A constant position can be defined as [TT][XX, YY, ZZ][tt], where", + "[TT]XX[tt], [TT]YY[tt] and [TT]ZZ[tt] are real numbers.[PAR]", + + "2. [TT]com of ATOM_EXPR [pbc][tt] or [TT]cog of ATOM_EXPR [pbc][tt]", + "calculate the center of mass/geometry of [TT]ATOM_EXPR[tt]. If", + "[TT]pbc[tt] is specified, the center is calculated iteratively to try", + "to deal with cases where [TT]ATOM_EXPR[tt] wraps around periodic", + "boundary conditions.[PAR]", + + "3. [TT]POSTYPE of ATOM_EXPR[tt] calculates the specified positions for", + "the atoms in [TT]ATOM_EXPR[tt].", + "[TT]POSTYPE[tt] can be [TT]atom[tt], [TT]res_com[tt], [TT]res_cog[tt],", + "[TT]mol_com[tt] or [TT]mol_cog[tt], with an optional prefix [TT]whole_[tt]", + "[TT]part_[tt] or [TT]dyn_[tt].", + "[TT]whole_[tt] calculates the centers for the whole residue/molecule,", + "even if only part of it is selected.", + "[TT]part_[tt] prefix calculates the centers for the selected atoms, but", + "uses always the same atoms for the same residue/molecule. The used atoms", + "are determined from the the largest group allowed by the selection.", + "[TT]dyn_[tt] calculates the centers strictly only for the selected atoms.", + "If no prefix is specified, whole selections default to [TT]part_[tt] and", + "other places default to [TT]whole_[tt].", + "The latter is often desirable to select the same molecules in different", + "tools, while the first is a compromise between speed ([TT]dyn_[tt]", + "positions can be slower to evaluate than [TT]part_[tt]) and intuitive", + "behavior.[PAR]", + + "4. [TT]ATOM_EXPR[tt], when given for whole selections, is handled as 3.", + "above, using the position type from the command-line argument", + "[TT]-seltype[tt].[PAR]", + + "Selection keywords that select atoms based on their positions, such as", + "[TT]dist from[tt], use by default the positions defined by the", + "[TT]-selrpos[tt] command-line option.", + "This can be overridden by prepending a [TT]POSTYPE[tt] specifier to the", + "keyword. For example, [TT]res_com dist from POS[tt] evaluates the", + "residue center of mass distances. In the example, all atoms of a residue", + "are either selected or not, based on the single distance calculated.", +}; + +struct SyntaxHelpText +{ + static const char name[]; + static const char title[]; + static const char *const text[]; +}; + +const char SyntaxHelpText::name[] = "syntax"; +const char SyntaxHelpText::title[] = + "Selection syntax"; +const char *const SyntaxHelpText::text[] = { + "A set of selections consists of one or more selections, separated by", + "semicolons. Each selection defines a set of positions for the analysis.", + "Each selection can also be preceded by a string that gives a name for", + "the selection for use in, e.g., graph legends.", + "If no name is provided, the string used for the selection is used", + "automatically as the name.[PAR]", + + "For interactive input, the syntax is slightly altered: line breaks can", + "also be used to separate selections. \\ followed by a line break can", + "be used to continue a line if necessary.", + "Notice that the above only applies to real interactive input,", + "not if you provide the selections, e.g., from a pipe.[PAR]", + + "It is possible to use variables to store selection expressions.", + "A variable is defined with the following syntax:[BR]", + "[TT]VARNAME = EXPR ;[tt][BR]", + "where [TT]EXPR[tt] is any valid selection expression.", + "After this, [TT]VARNAME[tt] can be used anywhere where [TT]EXPR[tt]", + "would be valid.[PAR]", + + "Selections are composed of three main types of expressions, those that", + "define atoms ([TT]ATOM_EXPR[tt]s), those that define positions", + "([TT]POS_EXPR[tt]s), and those that evaluate to numeric values", + "([TT]NUM_EXPR[tt]s). Each selection should be a [TT]POS_EXPR[tt]", + "or a [TT]ATOM_EXPR[tt] (the latter is automatically converted to", + "positions). The basic rules are as follows:[BR]", + "1. An expression like [TT]NUM_EXPR1 < NUM_EXPR2[tt] evaluates to an", + "[TT]ATOM_EXPR[tt] that selects all the atoms for which the comparison", + "is true.[BR]", + "2. Atom expressions can be combined with boolean operations such as", + "[TT]not ATOM_EXPR[tt], [TT]ATOM_EXPR and ATOM_EXPR[tt], or", + "[TT]ATOM_EXPR or ATOM_EXPR[tt]. Parentheses can be used to alter the", + "evaluation order.[BR]", + "3. [TT]ATOM_EXPR[tt] expressions can be converted into [TT]POS_EXPR[tt]", + "expressions in various ways, see the \"positions\" subtopic for more", + "details.[PAR]", + + "Some keywords select atoms based on string values such as the atom name.", + "For these keywords, it is possible to use wildcards ([TT]name \"C*\"[tt])", + "or regular expressions (e.g., [TT]resname \"R[AB]\"[tt]).", + "The match type is automatically guessed from the string: if it contains", + "other characters than letters, numbers, '*', or '?', it is interpreted", + "as a regular expression.", + "To force the matching to use literal string matching, use", + "[TT]name = \"C*\"[tt] to match a literal C*.", + "To force other type of matching, use '?' or '~' in place of '=' to force", + "wildcard or regular expression matching, respectively.[PAR]", + + "Strings that contain non-alphanumeric characters should be enclosed in", + "double quotes as in the examples. For other strings, the quotes are", + "optional, but if the value conflicts with a reserved keyword, a syntax", + "error will occur. If your strings contain uppercase letters, this should", + "not happen.[PAR]", + + "Index groups provided with the [TT]-n[tt] command-line option or", + "generated by default can be accessed with [TT]group NR[tt] or", + "[TT]group NAME[tt], where [TT]NR[tt] is a zero-based index of the group", + "and [TT]NAME[tt] is part of the name of the desired group.", + "The keyword [TT]group[tt] is optional if the whole selection is", + "provided from an index group.", + "To see a list of available groups in the interactive mode, press enter", + "in the beginning of a line.", +}; + +} // namespace + +namespace gmx +{ + +namespace +{ + +/*! \internal \brief + * Help topic implementation for an individual selection method. + * + * \ingroup module_selection + */ +class KeywordDetailsHelpTopic : public AbstractSimpleHelpTopic +{ + public: + //! Initialize help topic for the given selection method. + KeywordDetailsHelpTopic(const std::string &name, + const gmx_ana_selmethod_t &method) + : name_(name), method_(method) + { + } + + virtual const char *name() const + { + return name_.c_str(); + } + virtual const char *title() const + { + return NULL; + } + + protected: + virtual std::string helpText() const + { + return concatenateStrings(method_.help.help, method_.help.nlhelp); + } + + private: + std::string name_; + const gmx_ana_selmethod_t &method_; + + GMX_DISALLOW_COPY_AND_ASSIGN(KeywordDetailsHelpTopic); +}; + +/*! \internal \brief + * Custom help topic for printing a list of selection keywords. + * + * \ingroup module_selection + */ +class KeywordsHelpTopic : public CompositeHelpTopic +{ + public: + KeywordsHelpTopic(); + + virtual void writeHelp(const HelpWriterContext &context) const; + + private: + /*! \brief + * Container for known selection methods. + * + * The first item in the pair is the name of the selection method, and + * the second points to the static data structure that describes the + * method. + * The name in the first item may differ from the name of the static + * data structure if an alias is defined for that method. + */ + typedef std::vector > + MethodList; + + /*! \brief + * Prints a brief list of keywords (selection methods) available. + * + * \param[in] context Context for printing the help. + * \param[in] type Only methods that return this type are printed. + * \param[in] bModifiers If false, \ref SMETH_MODIFIER methods are + * excluded, otherwise only them are printed. + */ + void printKeywordList(const HelpWriterContext &context, + e_selvalue_t type, bool bModifiers) const; + + MethodList methods_; +}; + +KeywordsHelpTopic::KeywordsHelpTopic() +{ + // TODO: This is not a very elegant way of getting the list of selection + // methods, but this needs to be rewritten in any case if/when #652 is + // implemented. + boost::scoped_ptr symtab( + new SelectionParserSymbolTable); + gmx_ana_selmethod_register_defaults(symtab.get()); + + SelectionParserSymbolIterator symbol + = symtab->beginIterator(SelectionParserSymbol::MethodSymbol); + while (symbol != symtab->endIterator()) + { + const std::string &symname = symbol->name(); + const gmx_ana_selmethod_t *method = symbol->methodValue(); + methods_.push_back(std::make_pair(std::string(symname), method)); + if (method->help.nlhelp > 0 && method->help.help != NULL) + { + addSubTopic(HelpTopicPointer( + new KeywordDetailsHelpTopic(symname, *method))); + } + ++symbol; + } +} + +void KeywordsHelpTopic::writeHelp(const HelpWriterContext &context) const +{ + if (context.outputFormat() != eHelpOutputFormat_Console) + { + GMX_THROW(NotImplementedError( + "Selection help is not implemented for this output format")); + } + // TODO: The markup here is not really appropriate, and printKeywordList() + // still prints raw text, but these are waiting for discussion of the + // markup format in #969. + writeBasicHelpTopic(context, *this, helpText()); + context.writeTextBlock("[BR]"); + + // Print the list of keywords + context.writeTextBlock( + "Keywords that select atoms by an integer property:[BR]" + "(use in expressions or like \"atomnr 1 to 5 7 9\")[BR]"); + printKeywordList(context, INT_VALUE, false); + context.writeTextBlock("[BR]"); + + context.writeTextBlock( + "Keywords that select atoms by a numeric property:[BR]" + "(use in expressions or like \"occupancy 0.5 to 1\")[BR]"); + printKeywordList(context, REAL_VALUE, false); + context.writeTextBlock("[BR]"); + + context.writeTextBlock( + "Keywords that select atoms by a string property:[BR]" + "(use like \"name PATTERN [PATTERN] ...\")[BR]"); + printKeywordList(context, STR_VALUE, false); + context.writeTextBlock("[BR]"); + + context.writeTextBlock( + "Additional keywords that directly select atoms:[BR]"); + printKeywordList(context, GROUP_VALUE, false); + context.writeTextBlock("[BR]"); + + context.writeTextBlock( + "Keywords that directly evaluate to positions:[BR]" + "(see also \"positions\" subtopic)[BR]"); + printKeywordList(context, POS_VALUE, false); + context.writeTextBlock("[BR]"); + + context.writeTextBlock("Additional keywords:[BR]"); + printKeywordList(context, POS_VALUE, true); + printKeywordList(context, NO_VALUE, true); +} + +void KeywordsHelpTopic::printKeywordList(const HelpWriterContext &context, + e_selvalue_t type, + bool bModifiers) const +{ + File &file = context.outputFile(); + MethodList::const_iterator iter; + for (iter = methods_.begin(); iter != methods_.end(); ++iter) + { + const gmx_ana_selmethod_t &method = *iter->second; + bool bIsModifier = (method.flags & SMETH_MODIFIER) != 0; + if (method.type == type && bModifiers == bIsModifier) + { + bool bHasHelp = (method.help.nlhelp > 0 && method.help.help != NULL); + file.writeString(formatString(" %c ", bHasHelp ? '*' : ' ')); + if (method.help.syntax != NULL) + { + file.writeLine(method.help.syntax); + } + else + { + std::string symname = iter->first; + if (symname != method.name) + { + symname.append(formatString(" (synonym for %s)", method.name)); + } + file.writeLine(symname); + } + } + } +} + +} // namespace + +/*! \cond internal */ +HelpTopicPointer createSelectionHelpTopic() +{ + CompositeHelpTopicPointer root(new CompositeHelpTopic); + root->registerSubTopic >(); + root->registerSubTopic >(); + root->registerSubTopic >(); + root->registerSubTopic >(); + root->registerSubTopic(); + root->registerSubTopic >(); + root->registerSubTopic >(); + root->registerSubTopic >(); + return move(root); +} +//! \endcond + +} // namespace gmx diff --cc src/programs/mdrun/md.c index f972179326,0000000000..99683e8df2 mode 100644,000000..100644 --- a/src/programs/mdrun/md.c +++ b/src/programs/mdrun/md.c @@@ -1,2217 -1,0 +1,2236 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "typedefs.h" +#include "smalloc.h" +#include "sysstuff.h" +#include "vec.h" +#include "statutil.h" +#include "vcm.h" +#include "mdebin.h" +#include "nrnb.h" +#include "calcmu.h" +#include "index.h" +#include "vsite.h" +#include "update.h" +#include "ns.h" +#include "trnio.h" +#include "xtcio.h" +#include "mdrun.h" +#include "md_support.h" +#include "md_logging.h" +#include "confio.h" +#include "network.h" +#include "pull.h" +#include "xvgr.h" +#include "physics.h" +#include "names.h" +#include "xmdrun.h" +#include "ionize.h" +#include "disre.h" +#include "orires.h" +#include "pme.h" +#include "mdatoms.h" +#include "repl_ex.h" +#include "qmmm.h" +#include "domdec.h" +#include "domdec_network.h" +#include "partdec.h" +#include "topsort.h" +#include "coulomb.h" +#include "constr.h" +#include "shellfc.h" +#include "compute_io.h" +#include "mvdata.h" +#include "checkpoint.h" +#include "mtop_util.h" +#include "sighandler.h" +#include "txtdump.h" +#include "string2.h" +#include "pme_loadbal.h" +#include "bondf.h" +#include "membed.h" +#include "types/nlistheuristics.h" +#include "types/iteratedconstraints.h" +#include "nbnxn_cuda_data_mgmt.h" + +#include "gromacs/utility/gmxmpi.h" + +#ifdef GMX_FAHCORE +#include "corewrap.h" +#endif + +static void reset_all_counters(FILE *fplog, t_commrec *cr, + gmx_large_int_t step, + gmx_large_int_t *step_rel, t_inputrec *ir, + gmx_wallcycle_t wcycle, t_nrnb *nrnb, + gmx_runtime_t *runtime, + nbnxn_cuda_ptr_t cu_nbv) +{ + char sbuf[STEPSTRSIZE]; + + /* Reset all the counters related to performance over the run */ + md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", + gmx_step_str(step, sbuf)); + + if (cu_nbv) + { + nbnxn_cuda_reset_timings(cu_nbv); + } + + wallcycle_stop(wcycle, ewcRUN); + wallcycle_reset_all(wcycle); + if (DOMAINDECOMP(cr)) + { + reset_dd_statistics_counters(cr->dd); + } + init_nrnb(nrnb); + ir->init_step += *step_rel; + ir->nsteps -= *step_rel; + *step_rel = 0; + wallcycle_start(wcycle, ewcRUN); + runtime_start(runtime); + print_date_and_time(fplog, cr->nodeid, "Restarted time", runtime); +} + +double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], + const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, t_inputrec *ir, + gmx_mtop_t *top_global, + t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed, + real cpt_period, real max_hours, + const char *deviceOptions, + unsigned long Flags, + gmx_runtime_t *runtime) +{ + gmx_mdoutf_t *outf; + gmx_large_int_t step, step_rel; + double run_time; + double t, t0, lam0[efptNR]; + gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEner; + gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE, + bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep, + bBornRadii, bStartingFromCpt; + gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; + gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, + bForceUpdate = FALSE, bCPT; + int mdof_flags; + gmx_bool bMasterState; + int force_flags, cglo_flags; + tensor force_vir, shake_vir, total_vir, tmp_vir, pres; + int i, m; + t_trxstatus *status; + rvec mu_tot; + t_vcm *vcm; + t_state *bufstate = NULL; + matrix *scale_tot, pcoupl_mu, M, ebox; + gmx_nlheur_t nlh; + t_trxframe rerun_fr; + gmx_repl_ex_t repl_ex = NULL; + int nchkpt = 1; + gmx_localtop_t *top; + t_mdebin *mdebin = NULL; + df_history_t df_history; + t_state *state = NULL; + rvec *f_global = NULL; + int n_xtc = -1; + rvec *x_xtc = NULL; + gmx_enerdata_t *enerd; + rvec *f = NULL; + gmx_global_stat_t gstat; + gmx_update_t upd = NULL; + t_graph *graph = NULL; + globsig_t gs; + gmx_rng_t mcrng = NULL; + gmx_bool bFFscan; + gmx_groups_t *groups; + gmx_ekindata_t *ekind, *ekind_save; + gmx_shellfc_t shellfc; + int count, nconverged = 0; + real timestep = 0; + double tcount = 0; + gmx_bool bIonize = FALSE; + gmx_bool bTCR = FALSE, bConverged = TRUE, bOK, bSumEkinhOld, bExchanged; + gmx_bool bAppend; + gmx_bool bResetCountersHalfMaxH = FALSE; + gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter; + gmx_bool bUpdateDoLR; + real mu_aver = 0, dvdl_constr; + int a0, a1, gnx = 0, ii; + atom_id *grpindex = NULL; + char *grpname; + t_coupl_rec *tcr = NULL; + rvec *xcopy = NULL, *vcopy = NULL, *cbuf = NULL; + matrix boxcopy = {{0}}, lastbox; + tensor tmpvir; + real fom, oldfom, veta_save, pcurr, scalevir, tracevir; + real vetanew = 0; + int lamnew = 0; + /* for FEP */ + int nstfep; + real rate; + double cycles; + real saved_conserved_quantity = 0; + real last_ekin = 0; + int iter_i; + t_extmass MassQ; + int **trotter_seq; + char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; + int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ + gmx_iterate_t iterate; + gmx_large_int_t multisim_nsteps = -1; /* number of steps to do before first multisim + simulation stops. If equal to zero, don't + communicate any more between multisims.*/ + /* PME load balancing data for GPU kernels */ + pme_load_balancing_t pme_loadbal = NULL; + double cycles_pmes; + gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE; + +#ifdef GMX_FAHCORE + /* Temporary addition for FAHCORE checkpointing */ + int chkpt_ret; +#endif + + /* Check for special mdrun options */ + bRerunMD = (Flags & MD_RERUN); + bIonize = (Flags & MD_IONIZE); + bFFscan = (Flags & MD_FFSCAN); + bAppend = (Flags & MD_APPENDFILES); + if (Flags & MD_RESETCOUNTERSHALFWAY) + { + if (ir->nsteps > 0) + { + /* Signal to reset the counters half the simulation steps. */ + wcycle_set_reset_counters(wcycle, ir->nsteps/2); + } + /* Signal to reset the counters halfway the simulation time. */ + bResetCountersHalfMaxH = (max_hours > 0); + } + + /* md-vv uses averaged full step velocities for T-control + md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) + md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ + bVV = EI_VV(ir->eI); + if (bVV) /* to store the initial velocities while computing virial */ + { + snew(cbuf, top_global->natoms); + } + /* all the iteratative cases - only if there are constraints */ + bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD)); + gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to + false in this step. The correct value, true or false, + is set at each step, as it depends on the frequency of temperature + and pressure control.*/ + bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir))); + + if (bRerunMD) + { + /* Since we don't know if the frames read are related in any way, + * rebuild the neighborlist at every step. + */ + ir->nstlist = 1; + ir->nstcalcenergy = 1; + nstglobalcomm = 1; + } + + check_ir_old_tpx_versions(cr, fplog, ir, top_global); + + nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); + bGStatEveryStep = (nstglobalcomm == 1); + + if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL) + { + fprintf(fplog, + "To reduce the energy communication with nstlist = -1\n" + "the neighbor list validity should not be checked at every step,\n" + "this means that exact integration is not guaranteed.\n" + "The neighbor list validity is checked after:\n" + " - 2*std.dev.(n.list life time) steps.\n" + "In most cases this will result in exact integration.\n" + "This reduces the energy communication by a factor of 2 to 3.\n" + "If you want less energy communication, set nstlist > 3.\n\n"); + } + + if (bRerunMD || bFFscan) + { + ir->nstxtcout = 0; + } + groups = &top_global->groups; + + /* Initial values */ + init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, + &(state_global->fep_state), lam0, + nrnb, top_global, &upd, + nfile, fnm, &outf, &mdebin, + force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, state_global, Flags); + + clear_mat(total_vir); + clear_mat(pres); + /* Energy terms and groups */ + snew(enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + enerd); + if (DOMAINDECOMP(cr)) + { + f = NULL; + } + else + { + snew(f, top_global->natoms); + } + + /* lambda Monte carlo random number generator */ + if (ir->bExpanded) + { + mcrng = gmx_rng_init(ir->expandedvals->lmc_seed); + } + /* copy the state into df_history */ + copy_df_history(&df_history, &state_global->dfhist); + + /* Kinetic energy data */ + snew(ekind, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind); + /* needed for iteration of constraints */ + snew(ekind_save, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind_save); + /* Copy the cos acceleration to the groups struct */ + ekind->cosacc.cos_accel = ir->cos_accel; + + gstat = global_stat_init(ir); + debug_gmx(); + + /* Check for polarizable models and flexible constraints */ + shellfc = init_shell_flexcon(fplog, + top_global, n_flexible_constraints(constr), + (ir->bContinuation || + (DOMAINDECOMP(cr) && !MASTER(cr))) ? + NULL : state_global->x); + + if (DEFORM(*ir)) + { +#ifdef GMX_THREAD_MPI + tMPI_Thread_mutex_lock(&deform_init_box_mutex); +#endif + set_deform_reference_box(upd, + deform_init_init_step_tpx, + deform_init_box_tpx); +#ifdef GMX_THREAD_MPI + tMPI_Thread_mutex_unlock(&deform_init_box_mutex); +#endif + } + + { + double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); + if ((io > 2000) && MASTER(cr)) + { + fprintf(stderr, + "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", + io); + } + } + + if (DOMAINDECOMP(cr)) + { + top = dd_init_local_top(top_global); + + snew(state, 1); + dd_init_local_state(cr->dd, state_global, state); + + if (DDMASTER(cr->dd) && ir->nstfout) + { + snew(f_global, state_global->natoms); + } + } + else + { + if (PAR(cr)) + { + /* Initialize the particle decomposition and split the topology */ + top = split_system(fplog, top_global, ir, cr); + + pd_cg_range(cr, &fr->cg0, &fr->hcg); + pd_at_range(cr, &a0, &a1); + } + else + { + top = gmx_mtop_generate_local_top(top_global, ir); + + a0 = 0; + a1 = top_global->natoms; + } + + forcerec_set_excl_load(fr, top, cr); + + state = partdec_init_local_state(cr, state_global); + f_global = f; + + atoms2md(top_global, ir, 0, NULL, a0, a1-a0, mdatoms); + + if (vsite) + { + set_vsite_top(vsite, top, mdatoms, cr); + } + + if (ir->ePBC != epbcNONE && !fr->bMolPBC) + { + graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); + } + + if (shellfc) + { + make_local_shells(cr, mdatoms, shellfc); + } + + init_bonded_thread_force_reduction(fr, &top->idef); + + if (ir->pull && PAR(cr)) + { + dd_make_local_pull_groups(NULL, ir->pull, mdatoms); + } + } + + if (DOMAINDECOMP(cr)) + { + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, shellfc, constr, + nrnb, wcycle, FALSE); + + } + + update_mdatoms(mdatoms, state->lambda[efptMASS]); + + if (opt2bSet("-cpi", nfile, fnm)) + { + bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr); + } + else + { + bStateFromCP = FALSE; + } + + if (MASTER(cr)) + { + if (bStateFromCP) + { + /* Update mdebin with energy history if appending to output files */ + if (Flags & MD_APPENDFILES) + { + restore_energyhistory_from_state(mdebin, &state_global->enerhist); + } + else + { + /* We might have read an energy history from checkpoint, + * free the allocated memory and reset the counts. + */ + done_energyhistory(&state_global->enerhist); + init_energyhistory(&state_global->enerhist); + } + } + /* Set the initial energy history in state by updating once */ + update_energyhistory(&state_global->enerhist, mdebin); + } + + if ((state->flags & (1<flags & (1<mols.nr; + snew(grpindex, gnx); + for (i = 0; (i < gnx); i++) + { + grpindex[i] = i; + } + } + + if (repl_ex_nst > 0) + { + /* We need to be sure replica exchange can only occur + * when the energies are current */ + check_nst_param(fplog, cr, "nstcalcenergy", ir->nstcalcenergy, + "repl_ex_nst", &repl_ex_nst); + /* This check needs to happen before inter-simulation + * signals are initialized, too */ + } + if (repl_ex_nst > 0 && MASTER(cr)) + { + repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, + repl_ex_nst, repl_ex_nex, repl_ex_seed); + } + + /* PME tuning is only supported with GPUs or PME nodes and not with rerun. + * With perturbed charges with soft-core we should not change the cut-off. + */ + if ((Flags & MD_TUNEPME) && + EEL_PME(fr->eeltype) && + ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) && + !(ir->efep != efepNO && mdatoms->nChargePerturbed > 0 && ir->fepvals->bScCoul) && + !bRerunMD) + { + pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata); + cycles_pmes = 0; + if (cr->duty & DUTY_PME) + { + /* Start tuning right away, as we can't measure the load */ + bPMETuneRunning = TRUE; + } + else + { + /* Separate PME nodes, we can measure the PP/PME load balance */ + bPMETuneTry = TRUE; + } + } + + if (!ir->bContinuation && !bRerunMD) + { + if (mdatoms->cFREEZE && (state->flags & (1<start; i < mdatoms->start+mdatoms->homenr; i++) + { + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) + { + state->v[i][m] = 0; + } + } + } + } + + if (constr) + { + /* Constrain the initial coordinates and velocities */ + do_constrain_first(fplog, constr, ir, mdatoms, state, f, + graph, cr, nrnb, fr, top, shake_vir); + } + if (vsite) + { + /* Construct the virtual sites for the initial configuration */ + construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, graph, cr, state->box); + } + } + + debug_gmx(); + + /* set free energy calculation frequency as the minimum of nstdhdl, nstexpanded, and nstrepl_ex_nst*/ + nstfep = ir->fepvals->nstdhdl; + if (ir->bExpanded && (nstfep > ir->expandedvals->nstexpanded)) + { + nstfep = ir->expandedvals->nstexpanded; + } + if (repl_ex_nst > 0 && nstfep > repl_ex_nst) + { + nstfep = repl_ex_nst; + } + + /* I'm assuming we need global communication the first time! MRS */ + cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT + | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0) + | (bVV ? CGLO_PRESSURE : 0) + | (bVV ? CGLO_CONSTRAINT : 0) + | (bRerunMD ? CGLO_RERUNMD : 0) + | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); + + bSumEkinhOld = FALSE; + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &pcurr, top_global->natoms, &bSumEkinhOld, cglo_flags); + if (ir->eI == eiVVAK) + { + /* a second call to get the half step temperature initialized as well */ + /* we do the same call as above, but turn the pressure off -- internally to + compute_globals, this is recognized as a velocity verlet half-step + kinetic energy calculation. This minimized excess variables, but + perhaps loses some logic?*/ + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &pcurr, top_global->natoms, &bSumEkinhOld, + cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); + } + + /* Calculate the initial half step temperature, and save the ekinh_old */ + if (!(Flags & MD_STARTFROMCPT)) + { + for (i = 0; (i < ir->opts.ngtc); i++) + { + copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); + } + } + if (ir->eI != eiVV) + { + enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, + and there is no previous step */ + } + + /* if using an iterative algorithm, we need to create a working directory for the state. */ + if (bIterativeCase) + { + bufstate = init_bufstate(state); + } + if (bFFscan) + { + snew(xcopy, state->natoms); + snew(vcopy, state->natoms); + copy_rvecn(state->x, xcopy, 0, state->natoms); + copy_rvecn(state->v, vcopy, 0, state->natoms); + copy_mat(state->box, boxcopy); + } + + /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter + temperature control */ + trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); + + if (MASTER(cr)) + { + if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) + { + fprintf(fplog, + "RMS relative constraint deviation after constraining: %.2e\n", + constr_rmsd(constr, FALSE)); + } + if (EI_STATE_VELOCITY(ir->eI)) + { + fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); + } + if (bRerunMD) + { + fprintf(stderr, "starting md rerun '%s', reading coordinates from" + " input trajectory '%s'\n\n", + *(top_global->name), opt2fn("-rerun", nfile, fnm)); + if (bVerbose) + { + fprintf(stderr, "Calculated time to finish depends on nsteps from " + "run input file,\nwhich may not correspond to the time " + "needed to process input trajectory.\n\n"); + } + } + else + { + char tbuf[20]; + fprintf(stderr, "starting mdrun '%s'\n", + *(top_global->name)); + if (ir->nsteps >= 0) + { + sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); + } + else + { + sprintf(tbuf, "%s", "infinite"); + } + if (ir->init_step > 0) + { + fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", + gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, + gmx_step_str(ir->init_step, sbuf2), + ir->init_step*ir->delta_t); + } + else + { + fprintf(stderr, "%s steps, %s ps.\n", + gmx_step_str(ir->nsteps, sbuf), tbuf); + } + } + fprintf(fplog, "\n"); + } + + /* Set and write start time */ + runtime_start(runtime); + print_date_and_time(fplog, cr->nodeid, "Started mdrun", runtime); + wallcycle_start(wcycle, ewcRUN); + if (fplog) + { + fprintf(fplog, "\n"); + } + + /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ +#ifdef GMX_FAHCORE + chkpt_ret = fcCheckPointParallel( cr->nodeid, + NULL, 0); + if (chkpt_ret == 0) + { + gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); + } +#endif + + debug_gmx(); + /*********************************************************** + * + * Loop over MD steps + * + ************************************************************/ + + /* if rerunMD then read coordinates and velocities from input trajectory */ + if (bRerunMD) + { + if (getenv("GMX_FORCE_UPDATE")) + { + bForceUpdate = TRUE; + } + + rerun_fr.natoms = 0; + if (MASTER(cr)) + { + bNotLastFrame = read_first_frame(oenv, &status, + opt2fn("-rerun", nfile, fnm), + &rerun_fr, TRX_NEED_X | TRX_READ_V); + if (rerun_fr.natoms != top_global->natoms) + { + gmx_fatal(FARGS, + "Number of atoms in trajectory (%d) does not match the " + "run input file (%d)\n", + rerun_fr.natoms, top_global->natoms); + } + if (ir->ePBC != epbcNONE) + { + if (!rerun_fr.bBox) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); + } + if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong)) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); + } + } + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); + } + + if (ir->ePBC != epbcNONE) + { + /* Set the shift vectors. + * Necessary here when have a static box different from the tpr box. + */ + calc_shifts(rerun_fr.box, fr->shift_vec); + } + } + + /* loop over MD steps or if rerunMD to end of input trajectory */ + bFirstStep = TRUE; + /* Skip the first Nose-Hoover integration when we get the state from tpx */ + bStateFromTPX = !bStateFromCP; + bInitStep = bFirstStep && (bStateFromTPX || bVV); + bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; + bLastStep = FALSE; + bSumEkinhOld = FALSE; + bExchanged = FALSE; + + init_global_signals(&gs, cr, ir, repl_ex_nst); + + step = ir->init_step; + step_rel = 0; + + if (ir->nstlist == -1) + { + init_nlistheuristics(&nlh, bGStatEveryStep, step); + } + + if (MULTISIM(cr) && (repl_ex_nst <= 0 )) + { + /* check how many steps are left in other sims */ + multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); + } + + + /* and stop now if we should */ + bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) || + ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); + while (!bLastStep || (bRerunMD && bNotLastFrame)) + { + + wallcycle_start(wcycle, ewcSTEP); + + if (bRerunMD) + { + if (rerun_fr.bStep) + { + step = rerun_fr.step; + step_rel = step - ir->init_step; + } + if (rerun_fr.bTime) + { + t = rerun_fr.time; + } + else + { + t = step; + } + } + else + { + bLastStep = (step_rel == ir->nsteps); + t = t0 + step*ir->delta_t; + } + + if (ir->efep != efepNO || ir->bSimTemp) + { + /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, + requiring different logic. */ + + set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); + bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); + bDoFEP = (do_per_step(step, nstfep) && (ir->efep != efepNO)); + bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded) && (step > 0)); + } + + if (bSimAnn) + { + update_annealing_target_temp(&(ir->opts), t); + } + + if (bRerunMD) + { + if (!(DOMAINDECOMP(cr) && !MASTER(cr))) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.x[i], state_global->x[i]); + } + if (rerun_fr.bV) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.v[i], state_global->v[i]); + } + } + else + { + for (i = 0; i < state_global->natoms; i++) + { + clear_rvec(state_global->v[i]); + } + if (bRerunWarnNoV) + { + fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" + " Ekin, temperature and pressure are incorrect,\n" + " the virial will be incorrect when constraints are present.\n" + "\n"); + bRerunWarnNoV = FALSE; + } + } + } + copy_mat(rerun_fr.box, state_global->box); + copy_mat(state_global->box, state->box); + + if (vsite && (Flags & MD_RERUN_VSITE)) + { + if (DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented for domain decomposition, use particle decomposition"); + } + if (graph) + { + /* Following is necessary because the graph may get out of sync + * with the coordinates if we only have every N'th coordinate set + */ + mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); + shift_self(graph, state->box, state->x); + } + construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, graph, cr, state->box); + if (graph) + { + unshift_self(graph, state->box, state->x); + } + } + } + + /* Stop Center of Mass motion */ + bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); + + /* Copy back starting coordinates in case we're doing a forcefield scan */ + if (bFFscan) + { + for (ii = 0; (ii < state->natoms); ii++) + { + copy_rvec(xcopy[ii], state->x[ii]); + copy_rvec(vcopy[ii], state->v[ii]); + } + copy_mat(boxcopy, state->box); + } + + if (bRerunMD) + { + /* for rerun MD always do Neighbour Searching */ + bNS = (bFirstStep || ir->nstlist != 0); + bNStList = bNS; + } + else + { + /* Determine whether or not to do Neighbour Searching and LR */ + bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); + + bNS = (bFirstStep || bExchanged || bNStList || bDoFEP || + (ir->nstlist == -1 && nlh.nabnsb > 0)); + + if (bNS && ir->nstlist == -1) + { + set_nlistheuristics(&nlh, bFirstStep || bExchanged || bDoFEP, step); + } + } + + /* check whether we should stop because another simulation has + stopped. */ + if (MULTISIM(cr)) + { + if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && + (multisim_nsteps != ir->nsteps) ) + { + if (bNS) + { + if (MASTER(cr)) + { + fprintf(stderr, + "Stopping simulation %d because another one has finished\n", + cr->ms->sim); + } + bLastStep = TRUE; + gs.sig[eglsCHKPT] = 1; + } + } + } + + /* < 0 means stop at next step, > 0 means stop at next NS step */ + if ( (gs.set[eglsSTOPCOND] < 0 ) || + ( (gs.set[eglsSTOPCOND] > 0 ) && ( bNS || ir->nstlist == 0)) ) + { + bLastStep = TRUE; + } + + /* Determine whether or not to update the Born radii if doing GB */ + bBornRadii = bFirstStep; + if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) + { + bBornRadii = TRUE; + } + + do_log = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep; + do_verbose = bVerbose && + (step % stepout == 0 || bFirstStep || bLastStep); + + if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) + { + if (bRerunMD) + { + bMasterState = TRUE; + } + else + { + bMasterState = FALSE; + /* Correct the new box if it is too skewed */ + if (DYNAMIC_BOX(*ir)) + { + if (correct_box(fplog, step, state->box, graph)) + { + bMasterState = TRUE; + } + } + if (DOMAINDECOMP(cr) && bMasterState) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + if (DOMAINDECOMP(cr)) + { + /* Repartition the domain decomposition */ + wallcycle_start(wcycle, ewcDOMDEC); + dd_partition_system(fplog, step, cr, + bMasterState, nstglobalcomm, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, shellfc, constr, + nrnb, wcycle, + do_verbose && !bPMETuneRunning); + wallcycle_stop(wcycle, ewcDOMDEC); + /* If using an iterative integrator, reallocate space to match the decomposition */ + } + } + + if (MASTER(cr) && do_log && !bFFscan) + { + print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */ + } + + if (ir->efep != efepNO) + { + update_mdatoms(mdatoms, state->lambda[efptMASS]); + } + + if ((bRerunMD && rerun_fr.bV) || bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &pcurr, top_global->natoms, &bSumEkinhOld, + CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); + } + clear_mat(force_vir); + + /* Ionize the atoms if necessary */ + if (bIonize) + { + ionize(fplog, oenv, mdatoms, top_global, t, ir, state->x, state->v, + mdatoms->start, mdatoms->start+mdatoms->homenr, state->box, cr); + } + + /* Update force field in ffscan program */ + if (bFFscan) + { + if (update_forcefield(fplog, + nfile, fnm, fr, + mdatoms->nr, state->x, state->box)) + { + gmx_finalize_par(); + + exit(0); + } + } + + /* We write a checkpoint at this MD step when: + * either at an NS step when we signalled through gs, + * or at the last step (but not when we do not want confout), + * but never at the first step or with rerun. + */ + bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || + (bLastStep && (Flags & MD_CONFOUT))) && + step > ir->init_step && !bRerunMD); + if (bCPT) + { + gs.set[eglsCHKPT] = 0; + } + + /* Determine the energy and pressure: + * at nstcalcenergy steps and at energy output steps (set below). + */ + if (EI_VV(ir->eI) && (!bInitStep)) + { + /* for vv, the first half of the integration actually corresponds + to the previous step. bCalcEner is only required to be evaluated on the 'next' step, + but the virial needs to be calculated on both the current step and the 'next' step. Future + reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ + + bCalcEner = do_per_step(step-1, ir->nstcalcenergy); + bCalcVir = bCalcEner || + (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); + } + else + { + bCalcEner = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEner || + (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); + } + + /* Do we need global communication ? */ + bGStat = (bCalcVir || bCalcEner || bStopCM || + do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) || + (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck)); + + do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); + + if (do_ene || do_log) + { + bCalcVir = TRUE; + bCalcEner = TRUE; + bGStat = TRUE; + } + + /* these CGLO_ options remain the same throughout the iteration */ + cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) | + (bGStat ? CGLO_GSTAT : 0) + ); + + force_flags = (GMX_FORCE_STATECHANGED | + ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | + GMX_FORCE_ALLFORCES | + GMX_FORCE_SEPLRF | + (bCalcVir ? GMX_FORCE_VIRIAL : 0) | + (bCalcEner ? GMX_FORCE_ENERGY : 0) | + (bDoFEP ? GMX_FORCE_DHDL : 0) + ); + + if (fr->bTwinRange) + { + if (do_per_step(step, ir->nstcalclr)) + { + force_flags |= GMX_FORCE_DO_LR; + } + } + + if (shellfc) + { + /* Now is the time to relax the shells */ + count = relax_shell_flexcon(fplog, cr, bVerbose, bFFscan ? step+1 : step, + ir, bNS, force_flags, + bStopCM, top, top_global, + constr, enerd, fcd, + state, f, force_vir, mdatoms, + nrnb, wcycle, graph, groups, + shellfc, fr, bBornRadii, t, mu_tot, + state->natoms, &bConverged, vsite, + outf->fp_field); + tcount += count; + + if (bConverged) + { + nconverged++; + } + } + else + { + /* The coordinates (x) are shifted (to get whole molecules) + * in do_force. + * This is parallellized as well, and does communication too. + * Check comments in sim_util.c + */ + do_force(fplog, cr, ir, step, nrnb, wcycle, top, top_global, groups, + state->box, state->x, &state->hist, + f, force_vir, mdatoms, enerd, fcd, + state->lambda, graph, + fr, vsite, mu_tot, t, outf->fp_field, ed, bBornRadii, + (bNS ? GMX_FORCE_NS : 0) | force_flags); + } + + if (bTCR) + { + mu_aver = calc_mu_aver(cr, state->x, mdatoms->chargeA, + mu_tot, &top_global->mols, mdatoms, gnx, grpindex); + } + + if (bTCR && bFirstStep) + { + tcr = init_coupling(fplog, nfile, fnm, cr, fr, mdatoms, &(top->idef)); + fprintf(fplog, "Done init_coupling\n"); + fflush(fplog); + } + + if (bVV && !bStartingFromCpt && !bRerunMD) + /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ + { + if (ir->eI == eiVV && bInitStep) + { + /* if using velocity verlet with full time step Ekin, + * take the first half step only to compute the + * virial for the first step. From there, + * revert back to the initial coordinates + * so that the input is actually the initial step. + */ + copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */ + } + else + { + /* this is for NHC in the Ekin(t+dt/2) version of vv */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); + } + + /* If we are using twin-range interactions where the long-range component + * is only evaluated every nstcalclr>1 steps, we should do a special update + * step to combine the long-range forces on these steps. + * For nstcalclr=1 this is not done, since the forces would have been added + * directly to the short-range forces already. + */ + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, + f, bUpdateDoLR, fr->f_twin, fcd, + ekind, M, wcycle, upd, bInitStep, etrtVELOCITY1, + cr, nrnb, constr, &top->idef); + + if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep) + { + gmx_iterate_init(&iterate, TRUE); + } + /* for iterations, we save these vectors, as we will be self-consistently iterating + the calculations */ + + /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */ + + /* save the state */ + if (iterate.bIterationActive) + { + copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); + } + + bFirstIterate = TRUE; + while (bFirstIterate || iterate.bIterationActive) + { + if (iterate.bIterationActive) + { + copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); + if (bFirstIterate && bTrotter) + { + /* The first time through, we need a decent first estimate + of veta(t+dt) to compute the constraints. Do + this by computing the box volume part of the + trotter integration at this time. Nothing else + should be changed by this routine here. If + !(first time), we start with the previous value + of veta. */ + + veta_save = state->veta; + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0); + vetanew = state->veta; + state->veta = veta_save; + } + } + + bOK = TRUE; + if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ + { + update_constraints(fplog, step, NULL, ir, ekind, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, shake_vir, NULL, + cr, nrnb, wcycle, upd, constr, + bInitStep, TRUE, bCalcVir, vetanew); + + if (!bOK && !bFFscan) + { + gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); + } + + } + else if (graph) + { + /* Need to unshift here if a do_force has been + called in the previous step */ + unshift_self(graph, state->box, state->x); + } + + /* if VV, compute the pressure and constraints */ + /* For VV2, we strictly only need this if using pressure + * control, but we really would like to have accurate pressures + * printed out. + * Think about ways around this in the future? + * For now, keep this choice in comments. + */ + /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */ + /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/ + bPres = TRUE; + bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); + if (bCalcEner && ir->eI == eiVVAK) /*MRS: 7/9/2010 -- this still doesn't fix it?*/ + { + bSumEkinhOld = TRUE; + } + /* for vv, the first half of the integration actually corresponds to the previous step. + So we need information from the last step in the first half of the integration */ + if (bGStat || do_per_step(step-1, nstglobalcomm)) + { + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &pcurr, top_global->natoms, &bSumEkinhOld, + cglo_flags + | CGLO_ENERGY + | (bTemp ? CGLO_TEMPERATURE : 0) + | (bPres ? CGLO_PRESSURE : 0) + | (bPres ? CGLO_CONSTRAINT : 0) + | ((iterate.bIterationActive) ? CGLO_ITERATE : 0) + | (bFirstIterate ? CGLO_FIRSTITERATE : 0) + | CGLO_SCALEEKIN + ); + /* explanation of above: + a) We compute Ekin at the full time step + if 1) we are using the AveVel Ekin, and it's not the + initial step, or 2) if we are using AveEkin, but need the full + time step kinetic energy for the pressure (always true now, since we want accurate statistics). + b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in + EkinAveVel because it's needed for the pressure */ + } + /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ + if (!bInitStep) + { + if (bTrotter) + { + m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); + } + else + { + if (bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &pcurr, top_global->natoms, &bSumEkinhOld, + CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); + } + } + } + + if (iterate.bIterationActive && + done_iterating(cr, fplog, step, &iterate, bFirstIterate, + state->veta, &vetanew)) + { + break; + } + bFirstIterate = FALSE; + } + + if (bTrotter && !bInitStep) + { + copy_mat(shake_vir, state->svir_prev); + copy_mat(force_vir, state->fvir_prev); + if (IR_NVT_TROTTER(ir) && ir->eI == eiVV) + { + /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ + enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE, FALSE); + enerd->term[F_EKIN] = trace(ekind->ekin); + } + } + /* if it's the initial step, we performed this first step just to get the constraint virial */ + if (bInitStep && ir->eI == eiVV) + { + copy_rvecn(cbuf, state->v, 0, state->natoms); + } + } + + /* MRS -- now done iterating -- compute the conserved quantity */ + if (bVV) + { + saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); + if (ir->eI == eiVV) + { + last_ekin = enerd->term[F_EKIN]; + } + if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) + { + saved_conserved_quantity -= enerd->term[F_DISPCORR]; + } + /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ + if (!bRerunMD) + { + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + } + + /* ######## END FIRST UPDATE STEP ############## */ + /* ######## If doing VV, we now have v(dt) ###### */ + if (bDoExpanded) + { + /* perform extended ensemble sampling in lambda - we don't + actually move to the new state before outputting + statistics, but if performing simulated tempering, we + do update the velocities and the tau_t. */ + + lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, &df_history, step, mcrng, state->v, mdatoms); + } + /* ################## START TRAJECTORY OUTPUT ################# */ + + /* Now we have the energies and forces corresponding to the + * coordinates at time t. We must output all of this before + * the update. + * for RerunMD t is read from input trajectory + */ + mdof_flags = 0; + if (do_per_step(step, ir->nstxout)) + { + mdof_flags |= MDOF_X; + } + if (do_per_step(step, ir->nstvout)) + { + mdof_flags |= MDOF_V; + } + if (do_per_step(step, ir->nstfout)) + { + mdof_flags |= MDOF_F; + } + if (do_per_step(step, ir->nstxtcout)) + { + mdof_flags |= MDOF_XTC; + } + if (bCPT) + { + mdof_flags |= MDOF_CPT; + } + ; + +#if defined(GMX_FAHCORE) || defined(GMX_WRITELASTSTEP) + if (bLastStep) + { + /* Enforce writing positions and velocities at end of run */ + mdof_flags |= (MDOF_X | MDOF_V); + } +#endif +#ifdef GMX_FAHCORE + if (MASTER(cr)) + { + fcReportProgress( ir->nsteps, step ); + } + + /* sync bCPT and fc record-keeping */ + if (bCPT && MASTER(cr)) + { + fcRequestCheckPoint(); + } +#endif + + if (mdof_flags != 0) + { + wallcycle_start(wcycle, ewcTRAJ); + if (bCPT) + { + if (state->flags & (1<flags & (1<ekinstate.bUpToDate = FALSE; + } + else + { + update_ekinstate(&state_global->ekinstate, ekind); + state_global->ekinstate.bUpToDate = TRUE; + } + update_energyhistory(&state_global->enerhist, mdebin); + if (ir->efep != efepNO || ir->bSimTemp) + { + state_global->fep_state = state->fep_state; /* MRS: seems kludgy. The code should be + structured so this isn't necessary. + Note this reassignment is only necessary + for single threads.*/ + copy_df_history(&state_global->dfhist, &df_history); + } + } + } + write_traj(fplog, cr, outf, mdof_flags, top_global, + step, t, state, state_global, f, f_global, &n_xtc, &x_xtc); + if (bCPT) + { + nchkpt++; + bCPT = FALSE; + } + debug_gmx(); + if (bLastStep && step_rel == ir->nsteps && + (Flags & MD_CONFOUT) && MASTER(cr) && + !bRerunMD && !bFFscan) + { + /* x and v have been collected in write_traj, + * because a checkpoint file will always be written + * at the last step. + */ + fprintf(stderr, "\nWriting final coordinates.\n"); + if (fr->bMolPBC) + { + /* Make molecules whole only for confout writing */ + do_pbc_mtop(fplog, ir->ePBC, state->box, top_global, state_global->x); + } + write_sto_conf_mtop(ftp2fn(efSTO, nfile, fnm), + *top_global->name, top_global, + state_global->x, state_global->v, + ir->ePBC, state->box); + debug_gmx(); + } + wallcycle_stop(wcycle, ewcTRAJ); + } + + /* kludge -- virial is lost with restart for NPT control. Must restart */ + if (bStartingFromCpt && bVV) + { + copy_mat(state->svir_prev, shake_vir); + copy_mat(state->fvir_prev, force_vir); + } + /* ################## END TRAJECTORY OUTPUT ################ */ + + /* Determine the wallclock run time up till now */ + run_time = gmx_gettime() - (double)runtime->real; + + /* Check whether everything is still allright */ + if (((int)gmx_get_stop_condition() > handled_stop_condition) +#ifdef GMX_THREAD_MPI + && MASTER(cr) +#endif + ) + { + /* this is just make gs.sig compatible with the hack + of sending signals around by MPI_Reduce with together with + other floats */ + if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) + { + gs.sig[eglsSTOPCOND] = 1; + } + if (gmx_get_stop_condition() == gmx_stop_cond_next) + { + gs.sig[eglsSTOPCOND] = -1; + } + /* < 0 means stop at next step, > 0 means stop at next NS step */ + if (fplog) + { + fprintf(fplog, + "\n\nReceived the %s signal, stopping at the next %sstep\n\n", + gmx_get_signal_name(), + gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); + fflush(fplog); + } + fprintf(stderr, + "\n\nReceived the %s signal, stopping at the next %sstep\n\n", + gmx_get_signal_name(), + gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); + fflush(stderr); + handled_stop_condition = (int)gmx_get_stop_condition(); + } + else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && + (max_hours > 0 && run_time > max_hours*60.0*60.0*0.99) && + gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) + { + /* Signal to terminate the run */ + gs.sig[eglsSTOPCOND] = 1; + if (fplog) + { + fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + + if (bResetCountersHalfMaxH && MASTER(cr) && + run_time > max_hours*60.0*60.0*0.495) + { + gs.sig[eglsRESETCOUNTERS] = 1; + } + + if (ir->nstlist == -1 && !bRerunMD) + { + /* When bGStatEveryStep=FALSE, global_stat is only called + * when we check the atom displacements, not at NS steps. + * This means that also the bonded interaction count check is not + * performed immediately after NS. Therefore a few MD steps could + * be performed with missing interactions. + * But wrong energies are never written to file, + * since energies are only written after global_stat + * has been called. + */ + if (step >= nlh.step_nscheck) + { + nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs, + nlh.scale_tot, state->x); + } + else + { + /* This is not necessarily true, + * but step_nscheck is determined quite conservatively. + */ + nlh.nabnsb = 0; + } + } + + /* In parallel we only have to check for checkpointing in steps + * where we do global communication, + * otherwise the other nodes don't know. + */ + if (MASTER(cr) && ((bGStat || !PAR(cr)) && + cpt_period >= 0 && + (cpt_period == 0 || + run_time >= nchkpt*cpt_period*60.0)) && + gs.set[eglsCHKPT] == 0) + { + gs.sig[eglsCHKPT] = 1; + } + + /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */ + if (EI_VV(ir->eI)) + { + if (!bInitStep) + { + update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms); + } + if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ + { + gmx_bool bIfRandomize; + bIfRandomize = update_randomize_velocities(ir, step, mdatoms, state, upd, &top->idef, constr); + /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ + if (constr && bIfRandomize) + { + update_constraints(fplog, step, NULL, ir, ekind, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, NULL, + cr, nrnb, wcycle, upd, constr, + bInitStep, TRUE, bCalcVir, vetanew); + } + } + } + + if (bIterativeCase && do_per_step(step, ir->nstpcouple)) + { + gmx_iterate_init(&iterate, TRUE); + /* for iterations, we save these vectors, as we will be redoing the calculations */ + copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); + } + + bFirstIterate = TRUE; + while (bFirstIterate || iterate.bIterationActive) + { + /* We now restore these vectors to redo the calculation with improved extended variables */ + if (iterate.bIterationActive) + { + copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); + } + + /* We make the decision to break or not -after- the calculation of Ekin and Pressure, + so scroll down for that logic */ + + /* ######### START SECOND UPDATE STEP ################# */ + /* Box is changed in update() when we do pressure coupling, + * but we should still use the old box for energy corrections and when + * writing it to the energy file, so it matches the trajectory files for + * the same timestep above. Make a copy in a separate array. + */ + copy_mat(state->box, lastbox); + + bOK = TRUE; + dvdl_constr = 0; + + if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate)) + { + wallcycle_start(wcycle, ewcUPDATE); + /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ + if (bTrotter) + { + if (iterate.bIterationActive) + { + if (bFirstIterate) + { + scalevir = 1; + } + else + { + /* we use a new value of scalevir to converge the iterations faster */ + scalevir = tracevir/trace(shake_vir); + } + msmul(shake_vir, scalevir, shake_vir); + m_add(force_vir, shake_vir, total_vir); + clear_mat(shake_vir); + } + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); + /* We can only do Berendsen coupling after we have summed + * the kinetic energy or virial. Since the happens + * in global_state after update, we should only do it at + * step % nstlist = 1 with bGStatEveryStep=FALSE. + */ + } + else + { + update_tcouple(fplog, step, ir, state, ekind, wcycle, upd, &MassQ, mdatoms); + update_pcouple(fplog, step, ir, state, pcoupl_mu, M, wcycle, + upd, bInitStep); + } + + if (bVV) + { + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + /* velocity half-step update */ + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, + bUpdateDoLR, fr->f_twin, fcd, + ekind, M, wcycle, upd, FALSE, etrtVELOCITY2, + cr, nrnb, constr, &top->idef); + } + + /* Above, initialize just copies ekinh into ekin, + * it doesn't copy position (for VV), + * and entire integrator for MD. + */ + + if (ir->eI == eiVVAK) + { + copy_rvecn(state->x, cbuf, 0, state->natoms); + } + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, + bUpdateDoLR, fr->f_twin, fcd, + ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); + wallcycle_stop(wcycle, ewcUPDATE); + + update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state, + fr->bMolPBC, graph, f, + &top->idef, shake_vir, force_vir, + cr, nrnb, wcycle, upd, constr, + bInitStep, FALSE, bCalcVir, state->veta); + + if (ir->eI == eiVVAK) + { + /* erase F_EKIN and F_TEMP here? */ + /* just compute the kinetic energy at the half step to perform a trotter step */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, lastbox, + top_global, &pcurr, top_global->natoms, &bSumEkinhOld, + cglo_flags | CGLO_TEMPERATURE + ); + wallcycle_start(wcycle, ewcUPDATE); + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); + /* now we know the scaling, we can compute the positions again again */ + copy_rvecn(cbuf, state->x, 0, state->natoms); + + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, + bUpdateDoLR, fr->f_twin, fcd, + ekind, M, wcycle, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); + wallcycle_stop(wcycle, ewcUPDATE); + + /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ + /* are the small terms in the shake_vir here due + * to numerical errors, or are they important + * physically? I'm thinking they are just errors, but not completely sure. + * For now, will call without actually constraining, constr=NULL*/ + update_constraints(fplog, step, NULL, ir, ekind, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, force_vir, + cr, nrnb, wcycle, upd, NULL, + bInitStep, FALSE, bCalcVir, + state->veta); + } + if (!bOK && !bFFscan) + { + gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); + } + + if (fr->bSepDVDL && fplog && do_log) + { + fprintf(fplog, sepdvdlformat, "Constraint dV/dl", 0.0, dvdl_constr); + } - enerd->term[F_DVDL_CONSTR] += dvdl_constr; ++ if (bVV) ++ { ++ /* this factor or 2 correction is necessary ++ because half of the constraint force is removed ++ in the vv step, so we have to double it. See ++ the Redmine issue #1255. It is not yet clear ++ if the factor of 2 is exact, or just a very ++ good approximation, and this will be ++ investigated. The next step is to see if this ++ can be done adding a dhdl contribution from the ++ rattle step, but this is somewhat more ++ complicated with the current code. Will be ++ investigated, hopefully for 4.6.3. However, ++ this current solution is much better than ++ having it completely wrong. ++ */ ++ enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; ++ } ++ else ++ { ++ enerd->term[F_DVDL_CONSTR] += dvdl_constr; ++ } + } + else if (graph) + { + /* Need to unshift here */ + unshift_self(graph, state->box, state->x); + } + + if (vsite != NULL) + { + wallcycle_start(wcycle, ewcVSITECONSTR); + if (graph != NULL) + { + shift_self(graph, state->box, state->x); + } + construct_vsites(fplog, vsite, state->x, nrnb, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, graph, cr, state->box); + + if (graph != NULL) + { + unshift_self(graph, state->box, state->x); + } + wallcycle_stop(wcycle, ewcVSITECONSTR); + } + + /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */ + /* With Leap-Frog we can skip compute_globals at + * non-communication steps, but we need to calculate + * the kinetic energy one step before communication. + */ + if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) + { + if (ir->nstlist == -1 && bFirstIterate) + { + gs.sig[eglsNABNSB] = nlh.nabnsb; + } + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, + bFirstIterate ? &gs : NULL, + (step_rel % gs.nstms == 0) && + (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), + lastbox, + top_global, &pcurr, top_global->natoms, &bSumEkinhOld, + cglo_flags + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) + | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) + | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) + | (iterate.bIterationActive ? CGLO_ITERATE : 0) + | (bFirstIterate ? CGLO_FIRSTITERATE : 0) + | CGLO_CONSTRAINT + ); + if (ir->nstlist == -1 && bFirstIterate) + { + nlh.nabnsb = gs.set[eglsNABNSB]; + gs.set[eglsNABNSB] = 0; + } + } + /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */ + /* ############# END CALC EKIN AND PRESSURE ################# */ + + /* Note: this is OK, but there are some numerical precision issues with using the convergence of + the virial that should probably be addressed eventually. state->veta has better properies, + but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could + generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ + + if (iterate.bIterationActive && + done_iterating(cr, fplog, step, &iterate, bFirstIterate, + trace(shake_vir), &tracevir)) + { + break; + } + bFirstIterate = FALSE; + } + - /* only add constraint dvdl after constraints */ - enerd->term[F_DVDL_CONSTR] += dvdl_constr; + if (!bVV || bRerunMD) + { + /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */ + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + update_box(fplog, step, ir, mdatoms, state, graph, f, + ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, wcycle, upd, bInitStep, FALSE); + + /* ################# END UPDATE STEP 2 ################# */ + /* #### We now have r(t+dt) and v(t+dt/2) ############# */ + + /* The coordinates (x) were unshifted in update */ + if (bFFscan && (shellfc == NULL || bConverged)) + { + if (print_forcefield(fplog, enerd->term, mdatoms->homenr, + f, NULL, xcopy, + &(top_global->mols), mdatoms->massT, pres)) + { + gmx_finalize_par(); + + fprintf(stderr, "\n"); + exit(0); + } + } + if (!bGStat) + { + /* We will not sum ekinh_old, + * so signal that we still have to do it. + */ + bSumEkinhOld = TRUE; + } + + if (bTCR) + { + /* Only do GCT when the relaxation of shells (minimization) has converged, + * otherwise we might be coupling to bogus energies. + * In parallel we must always do this, because the other sims might + * update the FF. + */ + + /* Since this is called with the new coordinates state->x, I assume + * we want the new box state->box too. / EL 20040121 + */ + do_coupling(fplog, oenv, nfile, fnm, tcr, t, step, enerd->term, fr, + ir, MASTER(cr), + mdatoms, &(top->idef), mu_aver, + top_global->mols.nr, cr, + state->box, total_vir, pres, + mu_tot, state->x, f, bConverged); + debug_gmx(); + } + + /* ######### BEGIN PREPARING EDR OUTPUT ########### */ + + /* use the directly determined last velocity, not actually the averaged half steps */ + if (bTrotter && ir->eI == eiVV) + { + enerd->term[F_EKIN] = last_ekin; + } + enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; + + if (bVV) + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; + } + else + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); + } + /* Check for excessively large energies */ + if (bIonize) + { +#ifdef GMX_DOUBLE + real etot_max = 1e200; +#else + real etot_max = 1e30; +#endif + if (fabs(enerd->term[F_ETOT]) > etot_max) + { + fprintf(stderr, "Energy too large (%g), giving up\n", + enerd->term[F_ETOT]); + } + } + /* ######### END PREPARING EDR OUTPUT ########### */ + + /* Time for performance */ + if (((step % stepout) == 0) || bLastStep) + { + runtime_upd_proc(runtime); + } + + /* Output stuff */ + if (MASTER(cr)) + { + gmx_bool do_dr, do_or; + + if (fplog && do_log && bDoExpanded) + { + /* only needed if doing expanded ensemble */ + PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, + &df_history, state->fep_state, ir->nstlog, step); + } + if (!(bStartingFromCpt && (EI_VV(ir->eI)))) + { + if (bCalcEner) + { + upd_mdebin(mdebin, bDoDHDL, TRUE, + t, mdatoms->tmass, enerd, state, + ir->fepvals, ir->expandedvals, lastbox, + shake_vir, force_vir, total_vir, pres, + ekind, mu_tot, constr); + } + else + { + upd_mdebin_step(mdebin); + } + + do_dr = do_per_step(step, ir->nstdisreout); + do_or = do_per_step(step, ir->nstorireout); + + print_ebin(outf->fp_ene, do_ene, do_dr, do_or, do_log ? fplog : NULL, + step, t, + eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts)); + } + if (ir->ePull != epullNO) + { + pull_print_output(ir->pull, step, t); + } + + if (do_per_step(step, ir->nstlog)) + { + if (fflush(fplog) != 0) + { + gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); + } + } + } + if (bDoExpanded) + { + /* Have to do this part after outputting the logfile and the edr file */ + state->fep_state = lamnew; + for (i = 0; i < efptNR; i++) + { + state_global->lambda[i] = ir->fepvals->all_lambda[i][lamnew]; + } + } + /* Remaining runtime */ + if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning) + { + if (shellfc) + { + fprintf(stderr, "\n"); + } + print_time(stderr, runtime, step, ir, cr); + } + + /* Replica exchange */ + bExchanged = FALSE; + if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && + do_per_step(step, repl_ex_nst)) + { + bExchanged = replica_exchange(fplog, cr, repl_ex, + state_global, enerd, + state, step, t); + + if (bExchanged && DOMAINDECOMP(cr)) + { + dd_partition_system(fplog, step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, shellfc, constr, + nrnb, wcycle, FALSE); + } + } + + bFirstStep = FALSE; + bInitStep = FALSE; + bStartingFromCpt = FALSE; + + /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ + /* With all integrators, except VV, we need to retain the pressure + * at the current step for coupling at the next step. + */ + if ((state->flags & (1<nstpcouple > 0 && step % ir->nstpcouple == 0))) + { + /* Store the pressure in t_state for pressure coupling + * at the next MD step. + */ + copy_mat(pres, state->pres_prev); + } + + /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ + + if ( (membed != NULL) && (!bLastStep) ) + { + rescale_membed(step_rel, membed, state_global->x); + } + + if (bRerunMD) + { + if (MASTER(cr)) + { + /* read next frame from input trajectory */ + bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); + } + } + + if (!bRerunMD || !rerun_fr.bStep) + { + /* increase the MD step number */ + step++; + step_rel++; + } + + cycles = wallcycle_stop(wcycle, ewcSTEP); + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles, ddCyclStep); + } + + if (bPMETuneRunning || bPMETuneTry) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + + /* Count the total cycles over the last steps */ + cycles_pmes += cycles; + + /* We can only switch cut-off at NS steps */ + if (step % ir->nstlist == 0) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + if (bPMETuneTry) + { + if (DDMASTER(cr->dd)) + { + /* PME node load is too high, start tuning */ + bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05); + } + dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); + + if (bPMETuneRunning || step_rel > ir->nstlist*50) + { + bPMETuneTry = FALSE; + } + } + if (bPMETuneRunning) + { + /* init_step might not be a multiple of nstlist, + * but the first cycle is always skipped anyhow. + */ + bPMETuneRunning = + pme_load_balance(pme_loadbal, cr, + (bVerbose && MASTER(cr)) ? stderr : NULL, + fplog, + ir, state, cycles_pmes, + fr->ic, fr->nbv, &fr->pmedata, + step); + + /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ + fr->ewaldcoeff = fr->ic->ewaldcoeff; + fr->rlist = fr->ic->rlist; + fr->rlistlong = fr->ic->rlistlong; + fr->rcoulomb = fr->ic->rcoulomb; + fr->rvdw = fr->ic->rvdw; + } + cycles_pmes = 0; + } + } + + if (step_rel == wcycle_get_reset_counters(wcycle) || + gs.set[eglsRESETCOUNTERS] != 0) + { + /* Reset all the counters related to performance over the run */ + reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, runtime, + fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL); + wcycle_set_reset_counters(wcycle, -1); + if (!(cr->duty & DUTY_PME)) + { + /* Tell our PME node to reset its counters */ + gmx_pme_send_resetcounters(cr, step); + } + /* Correct max_hours for the elapsed time */ + max_hours -= run_time/(60.0*60.0); + bResetCountersHalfMaxH = FALSE; + gs.set[eglsRESETCOUNTERS] = 0; + } + + } + /* End of main MD loop */ + debug_gmx(); + + /* Stop the time */ + runtime_end(runtime); + + if (bRerunMD && MASTER(cr)) + { + close_trj(status); + } + + if (!(cr->duty & DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + if (MASTER(cr)) + { + if (ir->nstcalcenergy > 0 && !bRerunMD) + { + print_ebin(outf->fp_ene, FALSE, FALSE, FALSE, fplog, step, t, + eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts)); + } + } + + done_mdoutf(outf); + + debug_gmx(); + + if (ir->nstlist == -1 && nlh.nns > 0 && fplog) + { + fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns))); + fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns); + } + + if (pme_loadbal != NULL) + { + pme_loadbal_done(pme_loadbal, cr, fplog, + fr->nbv != NULL && fr->nbv->bUseGPU); + } + + if (shellfc && fplog) + { + fprintf(fplog, "Fraction of iterations that converged: %.2f %%\n", + (nconverged*100.0)/step_rel); + fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n", + tcount/step_rel); + } + + if (repl_ex_nst > 0 && MASTER(cr)) + { + print_replica_exchange_statistics(fplog, repl_ex); + } + + runtime->nsteps_done = step_rel; + + return 0; +} diff --cc src/programs/mdrun/runner.c index 8ff74dbdd5,0000000000..c820938cf6 mode 100644,000000..100644 --- a/src/programs/mdrun/runner.c +++ b/src/programs/mdrun/runner.c @@@ -1,1689 -1,0 +1,1689 @@@ +/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*- + * + * + * This source code is part of + * + * G R O M A C S + * + * GROningen MAchine for Chemical Simulations + * + * VERSION 3.2.0 + * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others. + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team, + * check out http://www.gromacs.org for more information. + + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * If you want to redistribute modifications, please consider that + * scientific software is very special. Version control is crucial - + * bugs must be traceable. We will be happy to consider code for + * inclusion in the official distribution, but derived work must not + * be called official GROMACS. Details are found in the README & COPYING + * files - if they are missing, get the official version at www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the papers on the package - you can find them in the top README file. + * + * For more info, check our website at http://www.gromacs.org + * + * And Hey: + * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon + */ +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include + +#include "typedefs.h" +#include "smalloc.h" +#include "sysstuff.h" +#include "statutil.h" +#include "mdrun.h" +#include "md_logging.h" +#include "md_support.h" +#include "network.h" +#include "pull.h" +#include "pull_rotation.h" +#include "names.h" +#include "disre.h" +#include "orires.h" +#include "pme.h" +#include "mdatoms.h" +#include "repl_ex.h" +#include "qmmm.h" +#include "domdec.h" +#include "partdec.h" +#include "coulomb.h" +#include "constr.h" +#include "mvdata.h" +#include "checkpoint.h" +#include "mtop_util.h" +#include "sighandler.h" +#include "tpxio.h" +#include "txtdump.h" +#include "gmx_detect_hardware.h" +#include "gmx_omp_nthreads.h" +#include "pull_rotation.h" +#include "calc_verletbuf.h" +#include "../mdlib/nbnxn_search.h" +#include "../mdlib/nbnxn_consts.h" +#include "gmx_fatal_collective.h" +#include "membed.h" +#include "macros.h" +#include "gmx_omp.h" +#include "gmx_thread_affinity.h" + +#include "gromacs/utility/gmxmpi.h" + +#ifdef GMX_FAHCORE +#include "corewrap.h" +#endif + +#include "gpu_utils.h" +#include "nbnxn_cuda_data_mgmt.h" + +typedef struct { + gmx_integrator_t *func; +} gmx_intp_t; + +/* The array should match the eI array in include/types/enums.h */ +const gmx_intp_t integrator[eiNR] = { {do_md}, {do_steep}, {do_cg}, {do_md}, {do_md}, {do_nm}, {do_lbfgs}, {do_tpi}, {do_tpi}, {do_md}, {do_md}, {do_md}}; + +gmx_large_int_t deform_init_init_step_tpx; +matrix deform_init_box_tpx; +#ifdef GMX_THREAD_MPI +tMPI_Thread_mutex_t deform_init_box_mutex = TMPI_THREAD_MUTEX_INITIALIZER; +#endif + + +#ifdef GMX_THREAD_MPI +struct mdrunner_arglist +{ + gmx_hw_opt_t *hw_opt; + FILE *fplog; + t_commrec *cr; + int nfile; + const t_filenm *fnm; + output_env_t oenv; + gmx_bool bVerbose; + gmx_bool bCompact; + int nstglobalcomm; + ivec ddxyz; + int dd_node_order; + real rdd; + real rconstr; + const char *dddlb_opt; + real dlb_scale; + const char *ddcsx; + const char *ddcsy; + const char *ddcsz; + const char *nbpu_opt; + gmx_large_int_t nsteps_cmdline; + int nstepout; + int resetstep; + int nmultisim; + int repl_ex_nst; + int repl_ex_nex; + int repl_ex_seed; + real pforce; + real cpt_period; + real max_hours; + const char *deviceOptions; + unsigned long Flags; + int ret; /* return value */ +}; + + +/* The function used for spawning threads. Extracts the mdrunner() + arguments from its one argument and calls mdrunner(), after making + a commrec. */ +static void mdrunner_start_fn(void *arg) +{ + struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg; + struct mdrunner_arglist mc = *mda; /* copy the arg list to make sure + that it's thread-local. This doesn't + copy pointed-to items, of course, + but those are all const. */ + t_commrec *cr; /* we need a local version of this */ + FILE *fplog = NULL; + t_filenm *fnm; + + fnm = dup_tfn(mc.nfile, mc.fnm); + + cr = init_par_threads(mc.cr); + + if (MASTER(cr)) + { + fplog = mc.fplog; + } + + mda->ret = mdrunner(mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, + mc.bVerbose, mc.bCompact, mc.nstglobalcomm, + mc.ddxyz, mc.dd_node_order, mc.rdd, + mc.rconstr, mc.dddlb_opt, mc.dlb_scale, + mc.ddcsx, mc.ddcsy, mc.ddcsz, + mc.nbpu_opt, + mc.nsteps_cmdline, mc.nstepout, mc.resetstep, + mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, + mc.cpt_period, mc.max_hours, mc.deviceOptions, mc.Flags); +} + +/* called by mdrunner() to start a specific number of threads (including + the main thread) for thread-parallel runs. This in turn calls mdrunner() + for each thread. + All options besides nthreads are the same as for mdrunner(). */ +static t_commrec *mdrunner_start_threads(gmx_hw_opt_t *hw_opt, + FILE *fplog, t_commrec *cr, int nfile, + const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose, + gmx_bool bCompact, int nstglobalcomm, + ivec ddxyz, int dd_node_order, real rdd, real rconstr, + const char *dddlb_opt, real dlb_scale, + const char *ddcsx, const char *ddcsy, const char *ddcsz, + const char *nbpu_opt, + gmx_large_int_t nsteps_cmdline, + int nstepout, int resetstep, + int nmultisim, int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, + real pforce, real cpt_period, real max_hours, + const char *deviceOptions, unsigned long Flags) +{ + int ret; + struct mdrunner_arglist *mda; + t_commrec *crn; /* the new commrec */ + t_filenm *fnmn; + + /* first check whether we even need to start tMPI */ + if (hw_opt->nthreads_tmpi < 2) + { + return cr; + } + + /* a few small, one-time, almost unavoidable memory leaks: */ + snew(mda, 1); + fnmn = dup_tfn(nfile, fnm); + + /* fill the data structure to pass as void pointer to thread start fn */ + mda->hw_opt = hw_opt; + mda->fplog = fplog; + mda->cr = cr; + mda->nfile = nfile; + mda->fnm = fnmn; + mda->oenv = oenv; + mda->bVerbose = bVerbose; + mda->bCompact = bCompact; + mda->nstglobalcomm = nstglobalcomm; + mda->ddxyz[XX] = ddxyz[XX]; + mda->ddxyz[YY] = ddxyz[YY]; + mda->ddxyz[ZZ] = ddxyz[ZZ]; + mda->dd_node_order = dd_node_order; + mda->rdd = rdd; + mda->rconstr = rconstr; + mda->dddlb_opt = dddlb_opt; + mda->dlb_scale = dlb_scale; + mda->ddcsx = ddcsx; + mda->ddcsy = ddcsy; + mda->ddcsz = ddcsz; + mda->nbpu_opt = nbpu_opt; + mda->nsteps_cmdline = nsteps_cmdline; + mda->nstepout = nstepout; + mda->resetstep = resetstep; + mda->nmultisim = nmultisim; + mda->repl_ex_nst = repl_ex_nst; + mda->repl_ex_nex = repl_ex_nex; + mda->repl_ex_seed = repl_ex_seed; + mda->pforce = pforce; + mda->cpt_period = cpt_period; + mda->max_hours = max_hours; + mda->deviceOptions = deviceOptions; + mda->Flags = Flags; + + /* now spawn new threads that start mdrunner_start_fn(), while + the main thread returns, we set thread affinity later */ + ret = tMPI_Init_fn(TRUE, hw_opt->nthreads_tmpi, TMPI_AFFINITY_NONE, + mdrunner_start_fn, (void*)(mda) ); + if (ret != TMPI_SUCCESS) + { + return NULL; + } + + /* make a new comm_rec to reflect the new situation */ + crn = init_par_threads(cr); + return crn; +} + + +static int get_tmpi_omp_thread_division(const gmx_hw_info_t *hwinfo, + const gmx_hw_opt_t *hw_opt, + int nthreads_tot, + int ngpu) +{ + int nthreads_tmpi; + + /* There are no separate PME nodes here, as we ensured in + * check_and_update_hw_opt that nthreads_tmpi>0 with PME nodes + * and a conditional ensures we would not have ended up here. + * Note that separate PME nodes might be switched on later. + */ + if (ngpu > 0) + { + nthreads_tmpi = ngpu; + if (nthreads_tot > 0 && nthreads_tot < nthreads_tmpi) + { + nthreads_tmpi = nthreads_tot; + } + } + else if (hw_opt->nthreads_omp > 0) + { + /* Here we could oversubscribe, when we do, we issue a warning later */ + nthreads_tmpi = max(1, nthreads_tot/hw_opt->nthreads_omp); + } + else + { + /* TODO choose nthreads_omp based on hardware topology + when we have a hardware topology detection library */ + /* In general, when running up to 4 threads, OpenMP should be faster. + * Note: on AMD Bulldozer we should avoid running OpenMP over two dies. + * On Intel>=Nehalem running OpenMP on a single CPU is always faster, + * even on two CPUs it's usually faster (but with many OpenMP threads + * it could be faster not to use HT, currently we always use HT). + * On Nehalem/Westmere we want to avoid running 16 threads over + * two CPUs with HT, so we need a limit<16; thus we use 12. + * A reasonable limit for Intel Sandy and Ivy bridge, + * not knowing the topology, is 16 threads. + */ + const int nthreads_omp_always_faster = 4; + const int nthreads_omp_always_faster_Nehalem = 12; + const int nthreads_omp_always_faster_SandyBridge = 16; + const int first_model_Nehalem = 0x1A; + const int first_model_SandyBridge = 0x2A; + gmx_bool bIntel_Family6; + + bIntel_Family6 = + (gmx_cpuid_vendor(hwinfo->cpuid_info) == GMX_CPUID_VENDOR_INTEL && + gmx_cpuid_family(hwinfo->cpuid_info) == 6); + + if (nthreads_tot <= nthreads_omp_always_faster || + (bIntel_Family6 && + ((gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_Nehalem && nthreads_tot <= nthreads_omp_always_faster_Nehalem) || + (gmx_cpuid_model(hwinfo->cpuid_info) >= nthreads_omp_always_faster_SandyBridge && nthreads_tot <= nthreads_omp_always_faster_SandyBridge)))) + { + /* Use pure OpenMP parallelization */ + nthreads_tmpi = 1; + } + else + { + /* Don't use OpenMP parallelization */ + nthreads_tmpi = nthreads_tot; + } + } + + return nthreads_tmpi; +} + + +/* Get the number of threads to use for thread-MPI based on how many + * were requested, which algorithms we're using, + * and how many particles there are. + * At the point we have already called check_and_update_hw_opt. + * Thus all options should be internally consistent and consistent + * with the hardware, except that ntmpi could be larger than #GPU. + */ +static int get_nthreads_mpi(gmx_hw_info_t *hwinfo, + gmx_hw_opt_t *hw_opt, + t_inputrec *inputrec, gmx_mtop_t *mtop, + const t_commrec *cr, + FILE *fplog) +{ + int nthreads_hw, nthreads_tot_max, nthreads_tmpi, nthreads_new, ngpu; + int min_atoms_per_mpi_thread; + char *env; + char sbuf[STRLEN]; + gmx_bool bCanUseGPU; + + if (hw_opt->nthreads_tmpi > 0) + { + /* Trivial, return right away */ + return hw_opt->nthreads_tmpi; + } + + nthreads_hw = hwinfo->nthreads_hw_avail; + + /* How many total (#tMPI*#OpenMP) threads can we start? */ + if (hw_opt->nthreads_tot > 0) + { + nthreads_tot_max = hw_opt->nthreads_tot; + } + else + { + nthreads_tot_max = nthreads_hw; + } + + bCanUseGPU = (inputrec->cutoff_scheme == ecutsVERLET && hwinfo->bCanUseGPU); + if (bCanUseGPU) + { + ngpu = hwinfo->gpu_info.ncuda_dev_use; + } + else + { + ngpu = 0; + } + + nthreads_tmpi = + get_tmpi_omp_thread_division(hwinfo, hw_opt, nthreads_tot_max, ngpu); + + if (inputrec->eI == eiNM || EI_TPI(inputrec->eI)) + { + /* Steps are divided over the nodes iso splitting the atoms */ + min_atoms_per_mpi_thread = 0; + } + else + { + if (bCanUseGPU) + { + min_atoms_per_mpi_thread = MIN_ATOMS_PER_GPU; + } + else + { + min_atoms_per_mpi_thread = MIN_ATOMS_PER_MPI_THREAD; + } + } + + /* Check if an algorithm does not support parallel simulation. */ + if (nthreads_tmpi != 1 && + ( inputrec->eI == eiLBFGS || + inputrec->coulombtype == eelEWALD ) ) + { + nthreads_tmpi = 1; + + md_print_warn(cr, fplog, "The integration or electrostatics algorithm doesn't support parallel runs. Using a single thread-MPI thread.\n"); + if (hw_opt->nthreads_tmpi > nthreads_tmpi) + { + gmx_fatal(FARGS, "You asked for more than 1 thread-MPI thread, but an algorithm doesn't support that"); + } + } + else if (mtop->natoms/nthreads_tmpi < min_atoms_per_mpi_thread) + { + /* the thread number was chosen automatically, but there are too many + threads (too few atoms per thread) */ + nthreads_new = max(1, mtop->natoms/min_atoms_per_mpi_thread); + + /* Avoid partial use of Hyper-Threading */ + if (gmx_cpuid_x86_smt(hwinfo->cpuid_info) == GMX_CPUID_X86_SMT_ENABLED && + nthreads_new > nthreads_hw/2 && nthreads_new < nthreads_hw) + { + nthreads_new = nthreads_hw/2; + } + + /* Avoid large prime numbers in the thread count */ + if (nthreads_new >= 6) + { + /* Use only 6,8,10 with additional factors of 2 */ + int fac; + + fac = 2; + while (3*fac*2 <= nthreads_new) + { + fac *= 2; + } + + nthreads_new = (nthreads_new/fac)*fac; + } + else + { + /* Avoid 5 */ + if (nthreads_new == 5) + { + nthreads_new = 4; + } + } + + nthreads_tmpi = nthreads_new; + + fprintf(stderr, "\n"); + fprintf(stderr, "NOTE: Parallelization is limited by the small number of atoms,\n"); + fprintf(stderr, " only starting %d thread-MPI threads.\n", nthreads_tmpi); + fprintf(stderr, " You can use the -nt and/or -ntmpi option to optimize the number of threads.\n\n"); + } + + return nthreads_tmpi; +} +#endif /* GMX_THREAD_MPI */ + + +/* Environment variable for setting nstlist */ +static const char* NSTLIST_ENVVAR = "GMX_NSTLIST"; +/* Try to increase nstlist when using a GPU with nstlist less than this */ +static const int NSTLIST_GPU_ENOUGH = 20; +/* Increase nstlist until the non-bonded cost increases more than this factor */ +static const float NBNXN_GPU_LIST_OK_FAC = 1.25; +/* Don't increase nstlist beyond a non-bonded cost increases of this factor */ +static const float NBNXN_GPU_LIST_MAX_FAC = 1.40; + +/* Try to increase nstlist when running on a GPU */ +static void increase_nstlist(FILE *fp, t_commrec *cr, + t_inputrec *ir, const gmx_mtop_t *mtop, matrix box) +{ + char *env; + int nstlist_orig, nstlist_prev; + verletbuf_list_setup_t ls; + real rlist_inc, rlist_ok, rlist_max, rlist_new, rlist_prev; + int i; + t_state state_tmp; + gmx_bool bBox, bDD, bCont; + const char *nstl_fmt = "\nFor optimal performance with a GPU nstlist (now %d) should be larger.\nThe optimum depends on your CPU and GPU resources.\nYou might want to try several nstlist values.\n"; + const char *vbd_err = "Can not increase nstlist for GPU run because verlet-buffer-drift is not set or used"; + const char *box_err = "Can not increase nstlist for GPU run because the box is too small"; + const char *dd_err = "Can not increase nstlist for GPU run because of domain decomposition limitations"; + char buf[STRLEN]; + + /* Number of + nstlist alternative values to try when switching */ + const int nstl[] = { 20, 25, 40, 50 }; +#define NNSTL sizeof(nstl)/sizeof(nstl[0]) + + env = getenv(NSTLIST_ENVVAR); + if (env == NULL) + { + if (fp != NULL) + { + fprintf(fp, nstl_fmt, ir->nstlist); + } + } + + if (ir->verletbuf_drift == 0) + { + gmx_fatal(FARGS, "You are using an old tpr file with a GPU, please generate a new tpr file with an up to date version of grompp"); + } + + if (ir->verletbuf_drift < 0) + { + if (MASTER(cr)) + { + fprintf(stderr, "%s\n", vbd_err); + } + if (fp != NULL) + { + fprintf(fp, "%s\n", vbd_err); + } + + return; + } + + nstlist_orig = ir->nstlist; + if (env != NULL) + { + sprintf(buf, "Getting nstlist from environment variable GMX_NSTLIST=%s", env); + if (MASTER(cr)) + { + fprintf(stderr, "%s\n", buf); + } + if (fp != NULL) + { + fprintf(fp, "%s\n", buf); + } + sscanf(env, "%d", &ir->nstlist); + } + + verletbuf_get_list_setup(TRUE, &ls); + + /* Allow rlist to make the list double the size of the cut-off sphere */ + rlist_inc = nbnxn_get_rlist_effective_inc(NBNXN_GPU_CLUSTER_SIZE, mtop->natoms/det(box)); + rlist_ok = (max(ir->rvdw, ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_OK_FAC, 1.0/3.0) - rlist_inc; + rlist_max = (max(ir->rvdw, ir->rcoulomb) + rlist_inc)*pow(NBNXN_GPU_LIST_MAX_FAC, 1.0/3.0) - rlist_inc; + if (debug) + { + fprintf(debug, "GPU nstlist tuning: rlist_inc %.3f rlist_max %.3f\n", + rlist_inc, rlist_max); + } + + i = 0; + nstlist_prev = nstlist_orig; + rlist_prev = ir->rlist; + do + { + if (env == NULL) + { + ir->nstlist = nstl[i]; + } + + /* Set the pair-list buffer size in ir */ + calc_verlet_buffer_size(mtop, det(box), ir, ir->verletbuf_drift, &ls, + NULL, &rlist_new); + + /* Does rlist fit in the box? */ + bBox = (sqr(rlist_new) < max_cutoff2(ir->ePBC, box)); + bDD = TRUE; + if (bBox && DOMAINDECOMP(cr)) + { + /* Check if rlist fits in the domain decomposition */ + if (inputrec2nboundeddim(ir) < DIM) + { + gmx_incons("Changing nstlist with domain decomposition and unbounded dimensions is not implemented yet"); + } + copy_mat(box, state_tmp.box); + bDD = change_dd_cutoff(cr, &state_tmp, ir, rlist_new); + } + + bCont = FALSE; + + if (env == NULL) + { + if (bBox && bDD && rlist_new <= rlist_max) + { + /* Increase nstlist */ + nstlist_prev = ir->nstlist; + rlist_prev = rlist_new; + bCont = (i+1 < NNSTL && rlist_new < rlist_ok); + } + else + { + /* Stick with the previous nstlist */ + ir->nstlist = nstlist_prev; + rlist_new = rlist_prev; + bBox = TRUE; + bDD = TRUE; + } + } + + i++; + } + while (bCont); + + if (!bBox || !bDD) + { + gmx_warning(!bBox ? box_err : dd_err); + if (fp != NULL) + { + fprintf(fp, "\n%s\n", bBox ? box_err : dd_err); + } + ir->nstlist = nstlist_orig; + } + else if (ir->nstlist != nstlist_orig || rlist_new != ir->rlist) + { + sprintf(buf, "Changing nstlist from %d to %d, rlist from %g to %g", + nstlist_orig, ir->nstlist, + ir->rlist, rlist_new); + if (MASTER(cr)) + { + fprintf(stderr, "%s\n\n", buf); + } + if (fp != NULL) + { + fprintf(fp, "%s\n\n", buf); + } + ir->rlist = rlist_new; + ir->rlistlong = rlist_new; + } +} + +static void prepare_verlet_scheme(FILE *fplog, + gmx_hw_info_t *hwinfo, + t_commrec *cr, - gmx_hw_opt_t *hw_opt, + const char *nbpu_opt, + t_inputrec *ir, + const gmx_mtop_t *mtop, + matrix box, + gmx_bool *bUseGPU) +{ + /* Here we only check for GPU usage on the MPI master process, + * as here we don't know how many GPUs we will use yet. + * We check for a GPU on all processes later. + */ + *bUseGPU = hwinfo->bCanUseGPU || (getenv("GMX_EMULATE_GPU") != NULL); + + if (ir->verletbuf_drift > 0) + { + /* Update the Verlet buffer size for the current run setup */ + verletbuf_list_setup_t ls; + real rlist_new; + + /* Here we assume CPU acceleration is on. But as currently + * calc_verlet_buffer_size gives the same results for 4x8 and 4x4 + * and 4x2 gives a larger buffer than 4x4, this is ok. + */ + verletbuf_get_list_setup(*bUseGPU, &ls); + + calc_verlet_buffer_size(mtop, det(box), ir, + ir->verletbuf_drift, &ls, + NULL, &rlist_new); + if (rlist_new != ir->rlist) + { + if (fplog != NULL) + { + fprintf(fplog, "\nChanging rlist from %g to %g for non-bonded %dx%d atom kernels\n\n", + ir->rlist, rlist_new, + ls.cluster_size_i, ls.cluster_size_j); + } + ir->rlist = rlist_new; + ir->rlistlong = rlist_new; + } + } + + /* With GPU or emulation we should check nstlist for performance */ + if ((EI_DYNAMICS(ir->eI) && + *bUseGPU && + ir->nstlist < NSTLIST_GPU_ENOUGH) || + getenv(NSTLIST_ENVVAR) != NULL) + { + /* Choose a better nstlist */ + increase_nstlist(fplog, cr, ir, mtop, box); + } +} + +static void convert_to_verlet_scheme(FILE *fplog, + t_inputrec *ir, + gmx_mtop_t *mtop, real box_vol) +{ + char *conv_mesg = "Converting input file with group cut-off scheme to the Verlet cut-off scheme"; + + md_print_warn(NULL, fplog, "%s\n", conv_mesg); + + ir->cutoff_scheme = ecutsVERLET; + ir->verletbuf_drift = 0.005; + + if (ir->rcoulomb != ir->rvdw) + { + gmx_fatal(FARGS, "The VdW and Coulomb cut-offs are different, whereas the Verlet scheme only supports equal cut-offs"); + } + + if (ir->vdwtype == evdwUSER || EEL_USER(ir->coulombtype)) + { + gmx_fatal(FARGS, "User non-bonded potentials are not (yet) supported with the Verlet scheme"); + } + else if (EVDW_SWITCHED(ir->vdwtype) || EEL_SWITCHED(ir->coulombtype)) + { + md_print_warn(NULL, fplog, "Converting switched or shifted interactions to a shifted potential (without force shift), this will lead to slightly different interaction potentials"); + + if (EVDW_SWITCHED(ir->vdwtype)) + { + ir->vdwtype = evdwCUT; + } + if (EEL_SWITCHED(ir->coulombtype)) + { + if (EEL_FULL(ir->coulombtype)) + { + /* With full electrostatic only PME can be switched */ + ir->coulombtype = eelPME; + } + else + { + md_print_warn(NULL, fplog, "NOTE: Replacing %s electrostatics with reaction-field with epsilon-rf=inf\n", eel_names[ir->coulombtype]); + ir->coulombtype = eelRF; + ir->epsilon_rf = 0.0; + } + } + + /* We set the target energy drift to a small number. + * Note that this is only for testing. For production the user + * should think about this and set the mdp options. + */ + ir->verletbuf_drift = 1e-4; + } + + if (inputrec2nboundeddim(ir) != 3) + { + gmx_fatal(FARGS, "Can only convert old tpr files to the Verlet cut-off scheme with 3D pbc"); + } + + if (ir->efep != efepNO || ir->implicit_solvent != eisNO) + { + gmx_fatal(FARGS, "Will not convert old tpr files to the Verlet cut-off scheme with free-energy calculations or implicit solvent"); + } + + if (EI_DYNAMICS(ir->eI) && !(EI_MD(ir->eI) && ir->etc == etcNO)) + { + verletbuf_list_setup_t ls; + + verletbuf_get_list_setup(FALSE, &ls); + calc_verlet_buffer_size(mtop, box_vol, ir, ir->verletbuf_drift, &ls, + NULL, &ir->rlist); + } + else + { + ir->verletbuf_drift = -1; + ir->rlist = 1.05*max(ir->rvdw, ir->rcoulomb); + } + + gmx_mtop_remove_chargegroups(mtop); +} + +static void check_and_update_hw_opt(gmx_hw_opt_t *hw_opt, + int cutoff_scheme, + gmx_bool bIsSimMaster) +{ + gmx_omp_nthreads_read_env(&hw_opt->nthreads_omp, bIsSimMaster); + +#ifndef GMX_THREAD_MPI + if (hw_opt->nthreads_tot > 0) + { + gmx_fatal(FARGS, "Setting the total number of threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI"); + } + if (hw_opt->nthreads_tmpi > 0) + { + gmx_fatal(FARGS, "Setting the number of thread-MPI threads is only supported with thread-MPI and Gromacs was compiled without thread-MPI"); + } +#endif + + if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp_pme <= 0) + { + /* We have the same number of OpenMP threads for PP and PME processes, + * thus we can perform several consistency checks. + */ + if (hw_opt->nthreads_tmpi > 0 && + hw_opt->nthreads_omp > 0 && + hw_opt->nthreads_tot != hw_opt->nthreads_tmpi*hw_opt->nthreads_omp) + { + gmx_fatal(FARGS, "The total number of threads requested (%d) does not match the thread-MPI threads (%d) times the OpenMP threads (%d) requested", + hw_opt->nthreads_tot, hw_opt->nthreads_tmpi, hw_opt->nthreads_omp); + } + + if (hw_opt->nthreads_tmpi > 0 && + hw_opt->nthreads_tot % hw_opt->nthreads_tmpi != 0) + { + gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of thread-MPI threads requested (%d)", + hw_opt->nthreads_tot, hw_opt->nthreads_tmpi); + } + + if (hw_opt->nthreads_omp > 0 && + hw_opt->nthreads_tot % hw_opt->nthreads_omp != 0) + { + gmx_fatal(FARGS, "The total number of threads requested (%d) is not divisible by the number of OpenMP threads requested (%d)", + hw_opt->nthreads_tot, hw_opt->nthreads_omp); + } + + if (hw_opt->nthreads_tmpi > 0 && + hw_opt->nthreads_omp <= 0) + { + hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi; + } + } + +#ifndef GMX_OPENMP + if (hw_opt->nthreads_omp > 1) + { + gmx_fatal(FARGS, "OpenMP threads are requested, but Gromacs was compiled without OpenMP support"); + } +#endif + + if (cutoff_scheme == ecutsGROUP) + { + /* We only have OpenMP support for PME only nodes */ + if (hw_opt->nthreads_omp > 1) + { + gmx_fatal(FARGS, "OpenMP threads have been requested with cut-off scheme %s, but these are only supported with cut-off scheme %s", + ecutscheme_names[cutoff_scheme], + ecutscheme_names[ecutsVERLET]); + } + hw_opt->nthreads_omp = 1; + } + + if (hw_opt->nthreads_omp_pme > 0 && hw_opt->nthreads_omp <= 0) + { + gmx_fatal(FARGS, "You need to specify -ntomp in addition to -ntomp_pme"); + } + + if (hw_opt->nthreads_tot == 1) + { + hw_opt->nthreads_tmpi = 1; + + if (hw_opt->nthreads_omp > 1) + { + gmx_fatal(FARGS, "You requested %d OpenMP threads with %d total threads", + hw_opt->nthreads_tmpi, hw_opt->nthreads_tot); + } + hw_opt->nthreads_omp = 1; + } + + if (hw_opt->nthreads_omp_pme <= 0 && hw_opt->nthreads_omp > 0) + { + hw_opt->nthreads_omp_pme = hw_opt->nthreads_omp; + } + + if (debug) + { + fprintf(debug, "hw_opt: nt %d ntmpi %d ntomp %d ntomp_pme %d gpu_id '%s'\n", + hw_opt->nthreads_tot, + hw_opt->nthreads_tmpi, + hw_opt->nthreads_omp, + hw_opt->nthreads_omp_pme, + hw_opt->gpu_id != NULL ? hw_opt->gpu_id : ""); + + } +} + + +/* Override the value in inputrec with value passed on the command line (if any) */ +static void override_nsteps_cmdline(FILE *fplog, + gmx_large_int_t nsteps_cmdline, + t_inputrec *ir, + const t_commrec *cr) +{ + char sbuf[STEPSTRSIZE]; + + assert(ir); + assert(cr); + + /* override with anything else than the default -2 */ + if (nsteps_cmdline > -2) + { + char stmp[STRLEN]; + + ir->nsteps = nsteps_cmdline; + if (EI_DYNAMICS(ir->eI)) + { + sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps, %.3f ps", + gmx_step_str(nsteps_cmdline, sbuf), + nsteps_cmdline*ir->delta_t); + } + else + { + sprintf(stmp, "Overriding nsteps with value passed on the command line: %s steps", + gmx_step_str(nsteps_cmdline, sbuf)); + } + + md_print_warn(cr, fplog, "%s\n", stmp); + } +} + +/* Data structure set by SIMMASTER which needs to be passed to all nodes + * before the other nodes have read the tpx file and called gmx_detect_hardware. + */ +typedef struct { + int cutoff_scheme; /* The cutoff scheme from inputrec_t */ + gmx_bool bUseGPU; /* Use GPU or GPU emulation */ +} master_inf_t; + +int mdrunner(gmx_hw_opt_t *hw_opt, + FILE *fplog, t_commrec *cr, int nfile, + const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose, + gmx_bool bCompact, int nstglobalcomm, + ivec ddxyz, int dd_node_order, real rdd, real rconstr, + const char *dddlb_opt, real dlb_scale, + const char *ddcsx, const char *ddcsy, const char *ddcsz, + const char *nbpu_opt, + gmx_large_int_t nsteps_cmdline, int nstepout, int resetstep, + int nmultisim, int repl_ex_nst, int repl_ex_nex, + int repl_ex_seed, real pforce, real cpt_period, real max_hours, + const char *deviceOptions, unsigned long Flags) +{ + gmx_bool bForceUseGPU, bTryUseGPU; + double nodetime = 0, realtime; + t_inputrec *inputrec; + t_state *state = NULL; + matrix box; + gmx_ddbox_t ddbox = {0}; + int npme_major, npme_minor; + real tmpr1, tmpr2; + t_nrnb *nrnb; + gmx_mtop_t *mtop = NULL; + t_mdatoms *mdatoms = NULL; + t_forcerec *fr = NULL; + t_fcdata *fcd = NULL; + real ewaldcoeff = 0; + gmx_pme_t *pmedata = NULL; + gmx_vsite_t *vsite = NULL; + gmx_constr_t constr; + int i, m, nChargePerturbed = -1, status, nalloc; + char *gro; + gmx_wallcycle_t wcycle; + gmx_bool bReadRNG, bReadEkin; + int list; + gmx_runtime_t runtime; + int rc; + gmx_large_int_t reset_counters; + gmx_edsam_t ed = NULL; + t_commrec *cr_old = cr; + int nthreads_pme = 1; + int nthreads_pp = 1; + gmx_membed_t membed = NULL; + gmx_hw_info_t *hwinfo = NULL; + master_inf_t minf = {-1, FALSE}; + + /* CAUTION: threads may be started later on in this function, so + cr doesn't reflect the final parallel state right now */ + snew(inputrec, 1); + snew(mtop, 1); + + if (Flags & MD_APPENDFILES) + { + fplog = NULL; + } + + bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0); + bTryUseGPU = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU; + + snew(state, 1); + if (SIMMASTER(cr)) + { + /* Read (nearly) all data required for the simulation */ + read_tpx_state(ftp2fn(efTPX, nfile, fnm), inputrec, state, NULL, mtop); + + if (inputrec->cutoff_scheme != ecutsVERLET && + ((Flags & MD_TESTVERLET) || getenv("GMX_VERLET_SCHEME") != NULL)) + { + convert_to_verlet_scheme(fplog, inputrec, mtop, det(state->box)); + } + + /* Detect hardware, gather information. With tMPI only thread 0 does it + * and after threads are started broadcasts hwinfo around. */ + snew(hwinfo, 1); + gmx_detect_hardware(fplog, hwinfo, cr, + bForceUseGPU, bTryUseGPU, hw_opt->gpu_id); + + minf.cutoff_scheme = inputrec->cutoff_scheme; + minf.bUseGPU = FALSE; + + if (inputrec->cutoff_scheme == ecutsVERLET) + { - prepare_verlet_scheme(fplog, hwinfo, cr, hw_opt, nbpu_opt, ++ prepare_verlet_scheme(fplog, hwinfo, cr, nbpu_opt, + inputrec, mtop, state->box, + &minf.bUseGPU); + } + else if (hwinfo->bCanUseGPU) + { + md_print_warn(cr, fplog, + "NOTE: GPU(s) found, but the current simulation can not use GPUs\n" + " To use a GPU, set the mdp option: cutoff-scheme = Verlet\n" + " (for quick performance testing you can use the -testverlet option)\n"); + + if (bForceUseGPU) + { + gmx_fatal(FARGS, "GPU requested, but can't be used without cutoff-scheme=Verlet"); + } + } + } +#ifndef GMX_THREAD_MPI + if (PAR(cr)) + { + gmx_bcast_sim(sizeof(minf), &minf, cr); + } +#endif + if (minf.bUseGPU && cr->npmenodes == -1) + { + /* Don't automatically use PME-only nodes with GPUs */ + cr->npmenodes = 0; + } + + /* Check for externally set OpenMP affinity and turn off internal + * pinning if any is found. We need to do this check early to tell + * thread-MPI whether it should do pinning when spawning threads. + * TODO: the above no longer holds, we should move these checks down + */ + gmx_omp_check_thread_affinity(fplog, cr, hw_opt); + +#ifdef GMX_THREAD_MPI + /* With thread-MPI inputrec is only set here on the master thread */ + if (SIMMASTER(cr)) +#endif + { + check_and_update_hw_opt(hw_opt, minf.cutoff_scheme, SIMMASTER(cr)); + +#ifdef GMX_THREAD_MPI + /* Early check for externally set process affinity. Can't do over all + * MPI processes because hwinfo is not available everywhere, but with + * thread-MPI it's needed as pinning might get turned off which needs + * to be known before starting thread-MPI. */ + gmx_check_thread_affinity_set(fplog, + NULL, + hw_opt, hwinfo->nthreads_hw_avail, FALSE); +#endif + +#ifdef GMX_THREAD_MPI + if (cr->npmenodes > 0 && hw_opt->nthreads_tmpi <= 0) + { + gmx_fatal(FARGS, "You need to explicitly specify the number of MPI threads (-ntmpi) when using separate PME nodes"); + } +#endif + + if (hw_opt->nthreads_omp_pme != hw_opt->nthreads_omp && + cr->npmenodes <= 0) + { + gmx_fatal(FARGS, "You need to explicitly specify the number of PME nodes (-npme) when using different number of OpenMP threads for PP and PME nodes"); + } + } + +#ifdef GMX_THREAD_MPI + if (SIMMASTER(cr)) + { + /* NOW the threads will be started: */ + hw_opt->nthreads_tmpi = get_nthreads_mpi(hwinfo, + hw_opt, + inputrec, mtop, + cr, fplog); + if (hw_opt->nthreads_tot > 0 && hw_opt->nthreads_omp <= 0) + { + hw_opt->nthreads_omp = hw_opt->nthreads_tot/hw_opt->nthreads_tmpi; + } + + if (hw_opt->nthreads_tmpi > 1) + { + /* now start the threads. */ + cr = mdrunner_start_threads(hw_opt, fplog, cr_old, nfile, fnm, + oenv, bVerbose, bCompact, nstglobalcomm, + ddxyz, dd_node_order, rdd, rconstr, + dddlb_opt, dlb_scale, ddcsx, ddcsy, ddcsz, + nbpu_opt, + nsteps_cmdline, nstepout, resetstep, nmultisim, + repl_ex_nst, repl_ex_nex, repl_ex_seed, pforce, + cpt_period, max_hours, deviceOptions, + Flags); + /* the main thread continues here with a new cr. We don't deallocate + the old cr because other threads may still be reading it. */ + if (cr == NULL) + { + gmx_comm("Failed to spawn threads"); + } + } + } +#endif + /* END OF CAUTION: cr is now reliable */ + + /* g_membed initialisation * + * Because we change the mtop, init_membed is called before the init_parallel * + * (in case we ever want to make it run in parallel) */ + if (opt2bSet("-membed", nfile, fnm)) + { + if (MASTER(cr)) + { + fprintf(stderr, "Initializing membed"); + } + membed = init_membed(fplog, nfile, fnm, mtop, inputrec, state, cr, &cpt_period); + } + + if (PAR(cr)) + { + /* now broadcast everything to the non-master nodes/threads: */ + init_parallel(fplog, cr, inputrec, mtop); + + /* This check needs to happen after get_nthreads_mpi() */ + if (inputrec->cutoff_scheme == ecutsVERLET && (Flags & MD_PARTDEC)) + { + gmx_fatal_collective(FARGS, cr, NULL, + "The Verlet cut-off scheme is not supported with particle decomposition.\n" + "You can achieve the same effect as particle decomposition by running in parallel using only OpenMP threads."); + } + } + if (fplog != NULL) + { + pr_inputrec(fplog, 0, "Input Parameters", inputrec, FALSE); + } + +#if defined GMX_THREAD_MPI + /* With tMPI we detected on thread 0 and we'll just pass the hwinfo pointer + * to the other threads -- slightly uncool, but works fine, just need to + * make sure that the data doesn't get freed twice. */ + if (cr->nnodes > 1) + { + if (!SIMMASTER(cr)) + { + snew(hwinfo, 1); + } + gmx_bcast(sizeof(&hwinfo), &hwinfo, cr); + } +#else + if (PAR(cr) && !SIMMASTER(cr)) + { + /* now we have inputrec on all nodes, can run the detection */ + /* TODO: perhaps it's better to propagate within a node instead? */ + snew(hwinfo, 1); + gmx_detect_hardware(fplog, hwinfo, cr, + bForceUseGPU, bTryUseGPU, hw_opt->gpu_id); + } + + /* Now do the affinity check with MPI/no-MPI (done earlier with thread-MPI). */ + gmx_check_thread_affinity_set(fplog, cr, + hw_opt, hwinfo->nthreads_hw_avail, FALSE); +#endif + + /* now make sure the state is initialized and propagated */ + set_state_entries(state, inputrec, cr->nnodes); + + /* A parallel command line option consistency check that we can + only do after any threads have started. */ + if (!PAR(cr) && + (ddxyz[XX] > 1 || ddxyz[YY] > 1 || ddxyz[ZZ] > 1 || cr->npmenodes > 0)) + { + gmx_fatal(FARGS, + "The -dd or -npme option request a parallel simulation, " +#ifndef GMX_MPI + "but %s was compiled without threads or MPI enabled" +#else +#ifdef GMX_THREAD_MPI + "but the number of threads (option -nt) is 1" +#else + "but %s was not started through mpirun/mpiexec or only one process was requested through mpirun/mpiexec" +#endif +#endif + , ShortProgram() + ); + } + + if ((Flags & MD_RERUN) && + (EI_ENERGY_MINIMIZATION(inputrec->eI) || eiNM == inputrec->eI)) + { + gmx_fatal(FARGS, "The .mdp file specified an energy mininization or normal mode algorithm, and these are not compatible with mdrun -rerun"); + } + + if (can_use_allvsall(inputrec, mtop, TRUE, cr, fplog) && PAR(cr)) + { - /* All-vs-all loops do not work with domain decomposition */ ++ /* Simple neighbour searching and (also?) all-vs-all loops ++ * do not work with domain decomposition. */ + Flags |= MD_PARTDEC; + } + + if (!EEL_PME(inputrec->coulombtype) || (Flags & MD_PARTDEC)) + { + if (cr->npmenodes > 0) + { + if (!EEL_PME(inputrec->coulombtype)) + { + gmx_fatal_collective(FARGS, cr, NULL, + "PME nodes are requested, but the system does not use PME electrostatics"); + } + if (Flags & MD_PARTDEC) + { + gmx_fatal_collective(FARGS, cr, NULL, + "PME nodes are requested, but particle decomposition does not support separate PME nodes"); + } + } + + cr->npmenodes = 0; + } + +#ifdef GMX_FAHCORE + fcRegisterSteps(inputrec->nsteps, inputrec->init_step); +#endif + + /* NMR restraints must be initialized before load_checkpoint, + * since with time averaging the history is added to t_state. + * For proper consistency check we therefore need to extend + * t_state here. + * So the PME-only nodes (if present) will also initialize + * the distance restraints. + */ + snew(fcd, 1); + + /* This needs to be called before read_checkpoint to extend the state */ + init_disres(fplog, mtop, inputrec, cr, Flags & MD_PARTDEC, fcd, state, repl_ex_nst > 0); + + if (gmx_mtop_ftype_count(mtop, F_ORIRES) > 0) + { + if (PAR(cr) && !(Flags & MD_PARTDEC)) + { + gmx_fatal(FARGS, "Orientation restraints do not work (yet) with domain decomposition, use particle decomposition (mdrun option -pd)"); + } + /* Orientation restraints */ + if (MASTER(cr)) + { + init_orires(fplog, mtop, state->x, inputrec, cr->ms, &(fcd->orires), + state); + } + } + + if (DEFORM(*inputrec)) + { + /* Store the deform reference box before reading the checkpoint */ + if (SIMMASTER(cr)) + { + copy_mat(state->box, box); + } + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + /* Because we do not have the update struct available yet + * in which the reference values should be stored, + * we store them temporarily in static variables. + * This should be thread safe, since they are only written once + * and with identical values. + */ +#ifdef GMX_THREAD_MPI + tMPI_Thread_mutex_lock(&deform_init_box_mutex); +#endif + deform_init_init_step_tpx = inputrec->init_step; + copy_mat(box, deform_init_box_tpx); +#ifdef GMX_THREAD_MPI + tMPI_Thread_mutex_unlock(&deform_init_box_mutex); +#endif + } + + if (opt2bSet("-cpi", nfile, fnm)) + { + /* Check if checkpoint file exists before doing continuation. + * This way we can use identical input options for the first and subsequent runs... + */ + if (gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr) ) + { + load_checkpoint(opt2fn_master("-cpi", nfile, fnm, cr), &fplog, + cr, Flags & MD_PARTDEC, ddxyz, + inputrec, state, &bReadRNG, &bReadEkin, + (Flags & MD_APPENDFILES), + (Flags & MD_APPENDFILESSET)); + + if (bReadRNG) + { + Flags |= MD_READ_RNG; + } + if (bReadEkin) + { + Flags |= MD_READ_EKIN; + } + } + } + + if (((MASTER(cr) || (Flags & MD_SEPPOT)) && (Flags & MD_APPENDFILES)) +#ifdef GMX_THREAD_MPI + /* With thread MPI only the master node/thread exists in mdrun.c, + * therefore non-master nodes need to open the "seppot" log file here. + */ + || (!MASTER(cr) && (Flags & MD_SEPPOT)) +#endif + ) + { + gmx_log_open(ftp2fn(efLOG, nfile, fnm), cr, !(Flags & MD_SEPPOT), + Flags, &fplog); + } + + /* override nsteps with value from cmdline */ + override_nsteps_cmdline(fplog, nsteps_cmdline, inputrec, cr); + + if (SIMMASTER(cr)) + { + copy_mat(state->box, box); + } + + if (PAR(cr)) + { + gmx_bcast(sizeof(box), box, cr); + } + + /* Essential dynamics */ + if (opt2bSet("-ei", nfile, fnm)) + { + /* Open input and output files, allocate space for ED data structure */ + ed = ed_open(mtop->natoms, &state->edsamstate, nfile, fnm, Flags, oenv, cr); + } + + if (PAR(cr) && !((Flags & MD_PARTDEC) || + EI_TPI(inputrec->eI) || + inputrec->eI == eiNM)) + { + cr->dd = init_domain_decomposition(fplog, cr, Flags, ddxyz, rdd, rconstr, + dddlb_opt, dlb_scale, + ddcsx, ddcsy, ddcsz, + mtop, inputrec, + box, state->x, + &ddbox, &npme_major, &npme_minor); + + make_dd_communicators(fplog, cr, dd_node_order); + + /* Set overallocation to avoid frequent reallocation of arrays */ + set_over_alloc_dd(TRUE); + } + else + { + /* PME, if used, is done on all nodes with 1D decomposition */ + cr->npmenodes = 0; + cr->duty = (DUTY_PP | DUTY_PME); + npme_major = 1; + npme_minor = 1; + if (!EI_TPI(inputrec->eI)) + { + npme_major = cr->nnodes; + } + + if (inputrec->ePBC == epbcSCREW) + { + gmx_fatal(FARGS, + "pbc=%s is only implemented with domain decomposition", + epbc_names[inputrec->ePBC]); + } + } + + if (PAR(cr)) + { + /* After possible communicator splitting in make_dd_communicators. + * we can set up the intra/inter node communication. + */ + gmx_setup_nodecomm(fplog, cr); + } + + /* Initialize per-physical-node MPI process/thread ID and counters. */ + gmx_init_intranode_counters(cr); + +#ifdef GMX_MPI + md_print_info(cr, fplog, "Using %d MPI %s\n", + cr->nnodes, +#ifdef GMX_THREAD_MPI + cr->nnodes == 1 ? "thread" : "threads" +#else + cr->nnodes == 1 ? "process" : "processes" +#endif + ); + fflush(stderr); +#endif + + gmx_omp_nthreads_init(fplog, cr, + hwinfo->nthreads_hw_avail, + hw_opt->nthreads_omp, + hw_opt->nthreads_omp_pme, + (cr->duty & DUTY_PP) == 0, + inputrec->cutoff_scheme == ecutsVERLET); + + gmx_check_hw_runconf_consistency(fplog, hwinfo, cr, hw_opt->nthreads_tmpi, minf.bUseGPU); + + /* getting number of PP/PME threads + PME: env variable should be read only on one node to make sure it is + identical everywhere; + */ + /* TODO nthreads_pp is only used for pinning threads. + * This is a temporary solution until we have a hw topology library. + */ + nthreads_pp = gmx_omp_nthreads_get(emntNonbonded); + nthreads_pme = gmx_omp_nthreads_get(emntPME); + + wcycle = wallcycle_init(fplog, resetstep, cr, nthreads_pp, nthreads_pme); + + if (PAR(cr)) + { + /* Master synchronizes its value of reset_counters with all nodes + * including PME only nodes */ + reset_counters = wcycle_get_reset_counters(wcycle); + gmx_bcast_sim(sizeof(reset_counters), &reset_counters, cr); + wcycle_set_reset_counters(wcycle, reset_counters); + } + + snew(nrnb, 1); + if (cr->duty & DUTY_PP) + { + /* For domain decomposition we allocate dynamically + * in dd_partition_system. + */ + if (DOMAINDECOMP(cr)) + { + bcast_state_setup(cr, state); + } + else + { + if (PAR(cr)) + { + bcast_state(cr, state, TRUE); + } + } + + /* Initiate forcerecord */ + fr = mk_forcerec(); + fr->hwinfo = hwinfo; + init_forcerec(fplog, oenv, fr, fcd, inputrec, mtop, cr, box, FALSE, + opt2fn("-table", nfile, fnm), + opt2fn("-tabletf", nfile, fnm), + opt2fn("-tablep", nfile, fnm), + opt2fn("-tableb", nfile, fnm), + nbpu_opt, + FALSE, pforce); + + /* version for PCA_NOT_READ_NODE (see md.c) */ + /*init_forcerec(fplog,fr,fcd,inputrec,mtop,cr,box,FALSE, + "nofile","nofile","nofile","nofile",FALSE,pforce); + */ + fr->bSepDVDL = ((Flags & MD_SEPPOT) == MD_SEPPOT); + + /* Initialize QM-MM */ + if (fr->bQMMM) + { + init_QMMMrec(cr, box, mtop, inputrec, fr); + } + + /* Initialize the mdatoms structure. + * mdatoms is not filled with atom data, + * as this can not be done now with domain decomposition. + */ + mdatoms = init_mdatoms(fplog, mtop, inputrec->efep != efepNO); + + if (mdatoms->nPerturbed > 0 && inputrec->cutoff_scheme == ecutsVERLET) + { + gmx_fatal(FARGS, "The Verlet cut-off scheme does not (yet) support free-energy calculations with perturbed atoms, only perturbed interactions. This will be implemented soon. Use the group scheme for now."); + } + + /* Initialize the virtual site communication */ + vsite = init_vsite(mtop, cr, FALSE); + + calc_shifts(box, fr->shift_vec); + + /* With periodic molecules the charge groups should be whole at start up + * and the virtual sites should not be far from their proper positions. + */ + if (!inputrec->bContinuation && MASTER(cr) && + !(inputrec->ePBC != epbcNONE && inputrec->bPeriodicMols)) + { + /* Make molecules whole at start of run */ + if (fr->ePBC != epbcNONE) + { + do_pbc_first_mtop(fplog, inputrec->ePBC, box, mtop, state->x); + } + if (vsite) + { + /* Correct initial vsite positions are required + * for the initial distribution in the domain decomposition + * and for the initial shell prediction. + */ + construct_vsites_mtop(fplog, vsite, mtop, state->x); + } + } + + if (EEL_PME(fr->eeltype)) + { + ewaldcoeff = fr->ewaldcoeff; + pmedata = &fr->pmedata; + } + else + { + pmedata = NULL; + } + } + else + { + /* This is a PME only node */ + + /* We don't need the state */ + done_state(state); + + ewaldcoeff = calc_ewaldcoeff(inputrec->rcoulomb, inputrec->ewald_rtol); + snew(pmedata, 1); + } + + if (hw_opt->thread_affinity != threadaffOFF) + { + /* Before setting affinity, check whether the affinity has changed + * - which indicates that probably the OpenMP library has changed it + * since we first checked). + */ + gmx_check_thread_affinity_set(fplog, cr, + hw_opt, hwinfo->nthreads_hw_avail, TRUE); + + /* Set the CPU affinity */ + gmx_set_thread_affinity(fplog, cr, hw_opt, nthreads_pme, hwinfo, inputrec); + } + + /* Initiate PME if necessary, + * either on all nodes or on dedicated PME nodes only. */ + if (EEL_PME(inputrec->coulombtype)) + { + if (mdatoms) + { + nChargePerturbed = mdatoms->nChargePerturbed; + } + if (cr->npmenodes > 0) + { + /* The PME only nodes need to know nChargePerturbed */ + gmx_bcast_sim(sizeof(nChargePerturbed), &nChargePerturbed, cr); + } + + if (cr->duty & DUTY_PME) + { + status = gmx_pme_init(pmedata, cr, npme_major, npme_minor, inputrec, + mtop ? mtop->natoms : 0, nChargePerturbed, + (Flags & MD_REPRODUCIBLE), nthreads_pme); + if (status != 0) + { + gmx_fatal(FARGS, "Error %d initializing PME", status); + } + } + } + + + if (integrator[inputrec->eI].func == do_md) + { + /* Turn on signal handling on all nodes */ + /* + * (A user signal from the PME nodes (if any) + * is communicated to the PP nodes. + */ + signal_handler_install(); + } + + if (cr->duty & DUTY_PP) + { + if (inputrec->ePull != epullNO) + { + /* Initialize pull code */ + init_pull(fplog, inputrec, nfile, fnm, mtop, cr, oenv, inputrec->fepvals->init_lambda, + EI_DYNAMICS(inputrec->eI) && MASTER(cr), Flags); + } + + if (inputrec->bRot) + { + /* Initialize enforced rotation code */ + init_rot(fplog, inputrec, nfile, fnm, cr, state->x, box, mtop, oenv, + bVerbose, Flags); + } + + constr = init_constraints(fplog, mtop, inputrec, ed, state, cr); + + if (DOMAINDECOMP(cr)) + { + dd_init_bondeds(fplog, cr->dd, mtop, vsite, constr, inputrec, + Flags & MD_DDBONDCHECK, fr->cginfo_mb); + + set_dd_parameters(fplog, cr->dd, dlb_scale, inputrec, fr, &ddbox); + + setup_dd_grid(fplog, cr->dd); + } + + /* Now do whatever the user wants us to do (how flexible...) */ + integrator[inputrec->eI].func(fplog, cr, nfile, fnm, + oenv, bVerbose, bCompact, + nstglobalcomm, + vsite, constr, + nstepout, inputrec, mtop, + fcd, state, + mdatoms, nrnb, wcycle, ed, fr, + repl_ex_nst, repl_ex_nex, repl_ex_seed, + membed, + cpt_period, max_hours, + deviceOptions, + Flags, + &runtime); + + if (inputrec->ePull != epullNO) + { + finish_pull(fplog, inputrec->pull); + } + + if (inputrec->bRot) + { - finish_rot(fplog, inputrec->rot); ++ finish_rot(inputrec->rot); + } + + } + else + { + /* do PME only */ + gmx_pmeonly(*pmedata, cr, nrnb, wcycle, ewaldcoeff, FALSE, inputrec); + } + + if (EI_DYNAMICS(inputrec->eI) || EI_TPI(inputrec->eI)) + { + /* Some timing stats */ + if (SIMMASTER(cr)) + { + if (runtime.proc == 0) + { + runtime.proc = runtime.real; + } + } + else + { + runtime.real = 0; + } + } + + wallcycle_stop(wcycle, ewcRUN); + + /* Finish up, write some stuff + * if rerunMD, don't write last frame again + */ + finish_run(fplog, cr, ftp2fn(efSTO, nfile, fnm), + inputrec, nrnb, wcycle, &runtime, + fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ? + nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL, + nthreads_pp, + EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr)); + + if ((cr->duty & DUTY_PP) && fr->nbv != NULL && fr->nbv->bUseGPU) + { + char gpu_err_str[STRLEN]; + + /* free GPU memory and uninitialize GPU (by destroying the context) */ + nbnxn_cuda_free(fplog, fr->nbv->cu_nbv); + + if (!free_gpu(gpu_err_str)) + { + gmx_warning("On node %d failed to free GPU #%d: %s", + cr->nodeid, get_current_gpu_device_id(), gpu_err_str); + } + } + + if (opt2bSet("-membed", nfile, fnm)) + { + sfree(membed); + } + +#ifdef GMX_THREAD_MPI + if (PAR(cr) && SIMMASTER(cr)) +#endif + { + gmx_hardware_info_free(hwinfo); + } + + /* Does what it says */ + print_date_and_time(fplog, cr->nodeid, "Finished mdrun", &runtime); + + /* Close logfile already here if we were appending to it */ + if (MASTER(cr) && (Flags & MD_APPENDFILES)) + { + gmx_log_close(fplog); + } + + rc = (int)gmx_get_stop_condition(); + +#ifdef GMX_THREAD_MPI + /* we need to join all threads. The sub-threads join when they + exit this function, but the master thread needs to be told to + wait for that. */ + if (PAR(cr) && MASTER(cr)) + { + tMPI_Finalize(); + } +#endif + + return rc; +}