From: Roland Schulz Date: Fri, 21 Feb 2014 00:53:08 +0000 (-0500) Subject: Merge release-4-6 into master X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=ba6bc1cfca9f5dc013a82eb5806faa6c16406f3f;p=alexxy%2Fgromacs.git Merge release-4-6 into master Changed BUILD_CPU_ACCELERATION to GMX_SIMD in CMakeLists.txt. Reverts change to src/mdlib/pme_simd4.h. Corresponding fix for master is under review as I5e08b0a09. Removed reference to PD in message in shellfc.c. Conflicts (all trivial): CMakeLists.txt src/contrib/fftw/CMakeLists.txt src/gromacs/gmxlib/gmx_detect_hardware.c src/gromacs/legacyheaders/shellfc.h src/gromacs/gmxlib/copyrite.cpp Change-Id: I2d43520ae8833055eff51d431e47cd8b90d3a687 --- ba6bc1cfca9f5dc013a82eb5806faa6c16406f3f diff --cc CMakeLists.txt index e2dab23cf5,535ec6d3a7..f30bcd6e3d --- a/CMakeLists.txt +++ b/CMakeLists.txt @@@ -525,12 -610,29 +525,28 @@@ gmx_test_inline_asm_gcc_x86(GMX_X86_GCC include(gmxSetBuildInformation) gmx_set_build_information() - if(BUILD_CPU_FEATURES MATCHES "rdtscp" AND NOT GMX_DISTRIBUTABLE_BUILD) + # Turn on RDTSCP if: + # - the build system's CPU supports it + # - the acceleration is set to AVX as all AVX-capable CPUs support AVX (which + # at this point means that the user set it). -# Note: it's better to not use the later set value of GMX_CPU_ACCELERATION because ++# Note: it's better to not use the later set value of GMX_SIMD because + # it reflects the system's capability of both compiling and running AVX code. + # TODO: After merge with 5.0 one could implement a cache variable dependency + # such that GMX_USE_RDTSCP can change if GMX_CPU_ACCELERATION is changed to AVX + # after the first cmake pass. -if (BUILD_CPU_FEATURES MATCHES "rdtscp" OR GMX_CPU_ACCELERATION MATCHES "AVX") ++if (BUILD_CPU_FEATURES MATCHES "rdtscp" OR GMX_SIMD MATCHES "AVX") + set(GMX_USE_RDTSCP_DEFAULT_VALUE ON) + else() + set(GMX_USE_RDTSCP_DEFAULT_VALUE OFF) + endif() + option(GMX_USE_RDTSCP "Use RDTSCP for better CPU-based timers (available on recent x86 CPUs; might need to be off when compiling for heterogeneous environments)" ${GMX_USE_RDTSCP_DEFAULT_VALUE}) + mark_as_advanced(GMX_USE_RDTSCP) + if(GMX_USE_RDTSCP) - # The timestep counter headers do not include config.h - add_definitions(-DHAVE_RDTSCP) + set(HAVE_RDTSCP 1) - endif(BUILD_CPU_FEATURES MATCHES "rdtscp" AND NOT GMX_DISTRIBUTABLE_BUILD) + endif() include(gmxTestFloatFormat) -gmx_test_float_format(GMX_FLOAT_FORMAT_IEEE754 +gmx_test_float_format(GMX_FLOAT_FORMAT_IEEE754 GMX_IEEE754_BIG_ENDIAN_BYTE_ORDER GMX_IEEE754_BIG_ENDIAN_WORD_ORDER) diff --cc src/contrib/fftw/CMakeLists.txt index 1365dda4a7,a0e603394a..549b5766d4 --- a/src/contrib/fftw/CMakeLists.txt +++ b/src/contrib/fftw/CMakeLists.txt @@@ -66,11 -32,10 +66,12 @@@ if (TARGET_HOST endif() # Machinery for running the external project -set(EXTERNAL_FFTW_VERSION 3.3.2) +set(EXTERNAL_FFTW_VERSION 3.3.3) - set(GMX_BUILD_OWN_FFTW_URL "http://www.fftw.org/fftw-${EXTERNAL_FFTW_VERSION}.tar.gz" CACHE PATH "URL from where to download fftw, (use an absolute path when offline)") + # cmake make eats slashes //// -> // + set(GMX_BUILD_OWN_FFTW_URL "http:////www.fftw.org/fftw-${EXTERNAL_FFTW_VERSION}.tar.gz" CACHE PATH "URL from where to download fftw, (use an absolute path when offline)") mark_as_advanced(GMX_BUILD_OWN_FFTW_URL) +set(EXTERNAL_FFTW_MD5SUM 0a05ca9c7b3bfddc8278e7c40791a1c2) +set (EXTERNAL_FFTW_BUILD_TARGET fftwBuild) include(ExternalProject) # TODO in master branch - show this warning only on the first run # by using gmx_check_if_changed_result from I21b791ab8e4f3 when diff --cc src/gromacs/gmxana/gmx_anaeig.c index eef2ef1b99,0000000000..6ccc8f8541 mode 100644,000000..100644 --- a/src/gromacs/gmxana/gmx_anaeig.c +++ b/src/gromacs/gmxana/gmx_anaeig.c @@@ -1,1481 -1,0 +1,1486 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif +#include +#include +#include "gromacs/commandline/pargs.h" +#include "sysstuff.h" +#include "typedefs.h" +#include "smalloc.h" +#include "macros.h" +#include "gmx_fatal.h" +#include "vec.h" +#include "pbc.h" +#include "gromacs/fileio/futil.h" +#include "index.h" +#include "gromacs/fileio/pdbio.h" +#include "gromacs/fileio/confio.h" +#include "gromacs/fileio/tpxio.h" +#include "gromacs/fileio/trxio.h" +#include "gromacs/fileio/matio.h" +#include "mshift.h" +#include "xvgr.h" +#include "do_fit.h" +#include "rmpbc.h" +#include "txtdump.h" +#include "eigio.h" +#include "physics.h" +#include "gmx_ana.h" + +static void calc_entropy_qh(FILE *fp, int n, real eigval[], real temp, int nskip) +{ + int i; + double hwkT, w, dS, S = 0; + double hbar, lambda; + + hbar = PLANCK1/(2*M_PI); + for (i = 0; (i < n-nskip); i++) + { + if (eigval[i] > 0) + { + lambda = eigval[i]*AMU; + w = sqrt(BOLTZMANN*temp/lambda)/NANO; + hwkT = (hbar*w)/(BOLTZMANN*temp); + dS = (hwkT/(exp(hwkT) - 1) - log(1-exp(-hwkT))); + S += dS; + if (debug) + { + fprintf(debug, "i = %5d w = %10g lam = %10g hwkT = %10g dS = %10g\n", + i, w, lambda, hwkT, dS); + } + } + else + { + fprintf(stderr, "eigval[%d] = %g\n", i, eigval[i]); + w = 0; + } + } + fprintf(fp, "The Entropy due to the Quasi Harmonic approximation is %g J/mol K\n", + S*RGAS); +} + +static void calc_entropy_schlitter(FILE *fp, int n, int nskip, + real *eigval, real temp) +{ + double dd, deter; + int *indx; + int i, j, k, m; + char buf[256]; + double hbar, kt, kteh, S; + + hbar = PLANCK1/(2*M_PI); + kt = BOLTZMANN*temp; + kteh = kt*exp(2.0)/(hbar*hbar)*AMU*(NANO*NANO); + if (debug) + { + fprintf(debug, "n = %d, nskip = %d kteh = %g\n", n, nskip, kteh); + } + + deter = 0; + for (i = 0; (i < n-nskip); i++) + { + dd = 1+kteh*eigval[i]; + deter += log(dd); + } + S = 0.5*RGAS*deter; + + fprintf(fp, "The Entropy due to the Schlitter formula is %g J/mol K\n", S); +} + +const char *proj_unit; + +static real tick_spacing(real range, int minticks) +{ + real sp; + + if (range <= 0) + { + return 1.0; + } + + sp = 0.2*exp(log(10)*ceil(log(range)/log(10))); + while (range/sp < minticks-1) + { + sp = sp/2; + } + + return sp; +} + +static void write_xvgr_graphs(const char *file, int ngraphs, int nsetspergraph, + const char *title, const char *subtitle, + const char *xlabel, const char **ylabel, + int n, real *x, real **y, real ***sy, + real scale_x, gmx_bool bZero, gmx_bool bSplit, + const output_env_t oenv) +{ + FILE *out; + int g, s, i; + real min, max, xsp, ysp; + + out = gmx_ffopen(file, "w"); + if (output_env_get_xvg_format(oenv) == exvgXMGRACE) + { + fprintf(out, "@ autoscale onread none\n"); + } + for (g = 0; g < ngraphs; g++) + { + if (y) + { + min = y[g][0]; + max = y[g][0]; + for (i = 0; i < n; i++) + { + if (y[g][i] < min) + { + min = y[g][i]; + } + if (y[g][i] > max) + { + max = y[g][i]; + } + } + } + else + { + min = sy[g][0][0]; + max = sy[g][0][0]; + for (s = 0; s < nsetspergraph; s++) + { + for (i = 0; i < n; i++) + { + if (sy[g][s][i] < min) + { + min = sy[g][s][i]; + } + if (sy[g][s][i] > max) + { + max = sy[g][s][i]; + } + } + } + } + if (bZero) + { + min = 0; + } + else + { + min = min-0.1*(max-min); + } + max = max+0.1*(max-min); + xsp = tick_spacing((x[n-1]-x[0])*scale_x, 4); + ysp = tick_spacing(max-min, 3); + if (output_env_get_print_xvgr_codes(oenv)) + { + fprintf(out, "@ with g%d\n@ g%d on\n", g, g); + if (g == 0) + { + fprintf(out, "@ title \"%s\"\n", title); + if (subtitle) + { + fprintf(out, "@ subtitle \"%s\"\n", subtitle); + } + } + if (g == ngraphs-1) + { + fprintf(out, "@ xaxis label \"%s\"\n", xlabel); + } + else + { + fprintf(out, "@ xaxis ticklabel off\n"); + } + if (n > 1) + { + fprintf(out, "@ world xmin %g\n", x[0]*scale_x); + fprintf(out, "@ world xmax %g\n", x[n-1]*scale_x); + fprintf(out, "@ world ymin %g\n", min); + fprintf(out, "@ world ymax %g\n", max); + } + fprintf(out, "@ view xmin 0.15\n"); + fprintf(out, "@ view xmax 0.85\n"); + fprintf(out, "@ view ymin %g\n", 0.15+(ngraphs-1-g)*0.7/ngraphs); + fprintf(out, "@ view ymax %g\n", 0.15+(ngraphs-g)*0.7/ngraphs); + fprintf(out, "@ yaxis label \"%s\"\n", ylabel[g]); + fprintf(out, "@ xaxis tick major %g\n", xsp); + fprintf(out, "@ xaxis tick minor %g\n", xsp/2); + fprintf(out, "@ xaxis ticklabel start type spec\n"); + fprintf(out, "@ xaxis ticklabel start %g\n", ceil(min/xsp)*xsp); + fprintf(out, "@ yaxis tick major %g\n", ysp); + fprintf(out, "@ yaxis tick minor %g\n", ysp/2); + fprintf(out, "@ yaxis ticklabel start type spec\n"); + fprintf(out, "@ yaxis ticklabel start %g\n", ceil(min/ysp)*ysp); + if ((min < 0) && (max > 0)) + { + fprintf(out, "@ zeroxaxis bar on\n"); + fprintf(out, "@ zeroxaxis bar linestyle 3\n"); + } + } + for (s = 0; s < nsetspergraph; s++) + { + for (i = 0; i < n; i++) + { + if (bSplit && i > 0 && abs(x[i]) < 1e-5) + { + if (output_env_get_print_xvgr_codes(oenv)) + { + fprintf(out, "&\n"); + } + } + fprintf(out, "%10.4f %10.5f\n", + x[i]*scale_x, y ? y[g][i] : sy[g][s][i]); + } + if (output_env_get_print_xvgr_codes(oenv)) + { + fprintf(out, "&\n"); + } + } + } + gmx_ffclose(out); +} + +static void +compare(int natoms, int n1, rvec **eigvec1, int n2, rvec **eigvec2, + real *eigval1, int neig1, real *eigval2, int neig2) +{ + int n, nrow; + int i, j, k; + double sum1, sum2, trace1, trace2, sab, samsb2, tmp, ip; + + n = min(n1, n2); + + n = min(n, min(neig1, neig2)); + fprintf(stdout, "Will compare the covariance matrices using %d dimensions\n", n); + + sum1 = 0; + for (i = 0; i < n; i++) + { + if (eigval1[i] < 0) + { + eigval1[i] = 0; + } + sum1 += eigval1[i]; + eigval1[i] = sqrt(eigval1[i]); + } + trace1 = sum1; + for (i = n; i < neig1; i++) + { + trace1 += eigval1[i]; + } + sum2 = 0; + for (i = 0; i < n; i++) + { + if (eigval2[i] < 0) + { + eigval2[i] = 0; + } + sum2 += eigval2[i]; + eigval2[i] = sqrt(eigval2[i]); + } + trace2 = sum2; + for (i = n; i < neig2; i++) + { + trace2 += eigval2[i]; + } + + fprintf(stdout, "Trace of the two matrices: %g and %g\n", sum1, sum2); + if (neig1 != n || neig2 != n) + { + fprintf(stdout, "this is %d%% and %d%% of the total trace\n", + (int)(100*sum1/trace1+0.5), (int)(100*sum2/trace2+0.5)); + } + fprintf(stdout, "Square root of the traces: %g and %g\n", + sqrt(sum1), sqrt(sum2)); + + sab = 0; + for (i = 0; i < n; i++) + { + tmp = 0; + for (j = 0; j < n; j++) + { + ip = 0; + for (k = 0; k < natoms; k++) + { + ip += iprod(eigvec1[i][k], eigvec2[j][k]); + } + tmp += eigval2[j]*ip*ip; + } + sab += eigval1[i]*tmp; + } + + samsb2 = sum1+sum2-2*sab; + if (samsb2 < 0) + { + samsb2 = 0; + } + + fprintf(stdout, "The overlap of the covariance matrices:\n"); + fprintf(stdout, " normalized: %.3f\n", 1-sqrt(samsb2/(sum1+sum2))); + tmp = 1-sab/sqrt(sum1*sum2); + if (tmp < 0) + { + tmp = 0; + } + fprintf(stdout, " shape: %.3f\n", 1-sqrt(tmp)); +} + + +static void inprod_matrix(const char *matfile, int natoms, + int nvec1, int *eignr1, rvec **eigvec1, + int nvec2, int *eignr2, rvec **eigvec2, + gmx_bool bSelect, int noutvec, int *outvec) +{ + FILE *out; + real **mat; + int i, x1, y1, x, y, nlevels; + int nx, ny; + real inp, *t_x, *t_y, max; + t_rgb rlo, rhi; + + snew(t_y, nvec2); + if (bSelect) + { + nx = noutvec; + ny = 0; + for (y1 = 0; y1 < nx; y1++) + { + if (outvec[y1] < nvec2) + { + t_y[ny] = eignr2[outvec[y1]]+1; + ny++; + } + } + } + else + { + nx = nvec1; + ny = nvec2; + for (y = 0; y < ny; y++) + { + t_y[y] = eignr2[y]+1; + } + } + + fprintf(stderr, "Calculating inner-product matrix of %dx%d eigenvectors\n", + nx, nvec2); + + snew(mat, nx); + snew(t_x, nx); + max = 0; + for (x1 = 0; x1 < nx; x1++) + { + snew(mat[x1], ny); + if (bSelect) + { + x = outvec[x1]; + } + else + { + x = x1; + } + t_x[x1] = eignr1[x]+1; + fprintf(stderr, " %d", eignr1[x]+1); + for (y1 = 0; y1 < ny; y1++) + { + inp = 0; + if (bSelect) + { + while (outvec[y1] >= nvec2) + { + y1++; + } + y = outvec[y1]; + } + else + { + y = y1; + } + for (i = 0; i < natoms; i++) + { + inp += iprod(eigvec1[x][i], eigvec2[y][i]); + } + mat[x1][y1] = fabs(inp); + if (mat[x1][y1] > max) + { + max = mat[x1][y1]; + } + } + } + fprintf(stderr, "\n"); + rlo.r = 1; rlo.g = 1; rlo.b = 1; + rhi.r = 0; rhi.g = 0; rhi.b = 0; + nlevels = 41; + out = gmx_ffopen(matfile, "w"); + write_xpm(out, 0, "Eigenvector inner-products", "in.prod.", "run 1", "run 2", + nx, ny, t_x, t_y, mat, 0.0, max, rlo, rhi, &nlevels); + gmx_ffclose(out); +} + +static void overlap(const char *outfile, int natoms, + rvec **eigvec1, + int nvec2, int *eignr2, rvec **eigvec2, + int noutvec, int *outvec, + const output_env_t oenv) +{ + FILE *out; + int i, v, vec, x; + real overlap, inp; + + fprintf(stderr, "Calculating overlap between eigenvectors of set 2 with eigenvectors\n"); + for (i = 0; i < noutvec; i++) + { + fprintf(stderr, "%d ", outvec[i]+1); + } + fprintf(stderr, "\n"); + + out = xvgropen(outfile, "Subspace overlap", + "Eigenvectors of trajectory 2", "Overlap", oenv); + fprintf(out, "@ subtitle \"using %d eigenvectors of trajectory 1\"\n", + noutvec); + overlap = 0; + for (x = 0; x < nvec2; x++) + { + for (v = 0; v < noutvec; v++) + { + vec = outvec[v]; + inp = 0; + for (i = 0; i < natoms; i++) + { + inp += iprod(eigvec1[vec][i], eigvec2[x][i]); + } + overlap += sqr(inp); + } + fprintf(out, "%5d %5.3f\n", eignr2[x]+1, overlap/noutvec); + } + + gmx_ffclose(out); +} + +static void project(const char *trajfile, t_topology *top, int ePBC, matrix topbox, + const char *projfile, const char *twodplotfile, + const char *threedplotfile, const char *filterfile, int skip, + const char *extremefile, gmx_bool bExtrAll, real extreme, + int nextr, t_atoms *atoms, int natoms, atom_id *index, + gmx_bool bFit, rvec *xref, int nfit, atom_id *ifit, real *w_rls, + real *sqrtm, rvec *xav, + int *eignr, rvec **eigvec, + int noutvec, int *outvec, gmx_bool bSplit, + const output_env_t oenv) +{ + FILE *xvgrout = NULL; + int nat, i, j, d, v, vec, nfr, nframes = 0, snew_size, frame; + t_trxstatus *out = NULL; + t_trxstatus *status; + int noutvec_extr, imin, imax; + real *pmin, *pmax; + atom_id *all_at; + matrix box; + rvec *xread, *x; + real t, inp, **inprod = NULL, min = 0, max = 0; + char str[STRLEN], str2[STRLEN], **ylabel, *c; + real fact; + gmx_rmpbc_t gpbc = NULL; + + snew(x, natoms); + + if (bExtrAll) + { + noutvec_extr = noutvec; + } + else + { + noutvec_extr = 1; + } + + + if (trajfile) + { + snew(inprod, noutvec+1); + + if (filterfile) + { + fprintf(stderr, "Writing a filtered trajectory to %s using eigenvectors\n", + filterfile); + for (i = 0; i < noutvec; i++) + { + fprintf(stderr, "%d ", outvec[i]+1); + } + fprintf(stderr, "\n"); + out = open_trx(filterfile, "w"); + } + snew_size = 0; + nfr = 0; + nframes = 0; + nat = read_first_x(oenv, &status, trajfile, &t, &xread, box); + if (nat > atoms->nr) + { + gmx_fatal(FARGS, "the number of atoms in your trajectory (%d) is larger than the number of atoms in your structure file (%d)", nat, atoms->nr); + } + snew(all_at, nat); + + if (top) + { + gpbc = gmx_rmpbc_init(&top->idef, ePBC, nat); + } + + for (i = 0; i < nat; i++) + { + all_at[i] = i; + } + do + { + if (nfr % skip == 0) + { + if (top) + { + gmx_rmpbc(gpbc, nat, box, xread); + } + if (nframes >= snew_size) + { + snew_size += 100; + for (i = 0; i < noutvec+1; i++) + { + srenew(inprod[i], snew_size); + } + } + inprod[noutvec][nframes] = t; + /* calculate x: a fitted struture of the selected atoms */ + if (bFit) + { + reset_x(nfit, ifit, nat, NULL, xread, w_rls); + do_fit(nat, w_rls, xref, xread); + } + for (i = 0; i < natoms; i++) + { + copy_rvec(xread[index[i]], x[i]); + } + + for (v = 0; v < noutvec; v++) + { + vec = outvec[v]; + /* calculate (mass-weighted) projection */ + inp = 0; + for (i = 0; i < natoms; i++) + { + inp += (eigvec[vec][i][0]*(x[i][0]-xav[i][0])+ + eigvec[vec][i][1]*(x[i][1]-xav[i][1])+ + eigvec[vec][i][2]*(x[i][2]-xav[i][2]))*sqrtm[i]; + } + inprod[v][nframes] = inp; + } + if (filterfile) + { + for (i = 0; i < natoms; i++) + { + for (d = 0; d < DIM; d++) + { + /* misuse xread for output */ + xread[index[i]][d] = xav[i][d]; + for (v = 0; v < noutvec; v++) + { + xread[index[i]][d] += + inprod[v][nframes]*eigvec[outvec[v]][i][d]/sqrtm[i]; + } + } + } + write_trx(out, natoms, index, atoms, 0, t, box, xread, NULL, NULL); + } + nframes++; + } + nfr++; + } + while (read_next_x(oenv, status, &t, xread, box)); + close_trx(status); + sfree(x); + if (filterfile) + { + close_trx(out); + } + } + else + { + snew(xread, atoms->nr); + } + + if (top) + { + gmx_rmpbc_done(gpbc); + } + + + if (projfile) + { + snew(ylabel, noutvec); + for (v = 0; v < noutvec; v++) + { + sprintf(str, "vec %d", eignr[outvec[v]]+1); + ylabel[v] = strdup(str); + } + sprintf(str, "projection on eigenvectors (%s)", proj_unit); + write_xvgr_graphs(projfile, noutvec, 1, str, NULL, output_env_get_xvgr_tlabel(oenv), + (const char **)ylabel, + nframes, inprod[noutvec], inprod, NULL, + output_env_get_time_factor(oenv), FALSE, bSplit, oenv); + } + + if (twodplotfile) + { + sprintf(str, "projection on eigenvector %d (%s)", + eignr[outvec[0]]+1, proj_unit); + sprintf(str2, "projection on eigenvector %d (%s)", + eignr[outvec[noutvec-1]]+1, proj_unit); + xvgrout = xvgropen(twodplotfile, "2D projection of trajectory", str, str2, + oenv); + for (i = 0; i < nframes; i++) + { + if (bSplit && i > 0 && abs(inprod[noutvec][i]) < 1e-5) + { + fprintf(xvgrout, "&\n"); + } + fprintf(xvgrout, "%10.5f %10.5f\n", inprod[0][i], inprod[noutvec-1][i]); + } + gmx_ffclose(xvgrout); + } + + if (threedplotfile) + { + t_atoms atoms; + rvec *x; + real *b = NULL; + matrix box; + char *resnm, *atnm, pdbform[STRLEN]; + gmx_bool bPDB, b4D; + FILE *out; + + if (noutvec < 3) + { + gmx_fatal(FARGS, "You have selected less than 3 eigenvectors"); + } + + /* initialize */ + bPDB = fn2ftp(threedplotfile) == efPDB; + clear_mat(box); + box[XX][XX] = box[YY][YY] = box[ZZ][ZZ] = 1; + + b4D = bPDB && (noutvec >= 4); + if (b4D) + { + fprintf(stderr, "You have selected four or more eigenvectors:\n" + "fourth eigenvector will be plotted " + "in bfactor field of pdb file\n"); + sprintf(str, "4D proj. of traj. on eigenv. %d, %d, %d and %d", + eignr[outvec[0]]+1, eignr[outvec[1]]+1, + eignr[outvec[2]]+1, eignr[outvec[3]]+1); + } + else + { + sprintf(str, "3D proj. of traj. on eigenv. %d, %d and %d", + eignr[outvec[0]]+1, eignr[outvec[1]]+1, eignr[outvec[2]]+1); + } + init_t_atoms(&atoms, nframes, FALSE); + snew(x, nframes); + snew(b, nframes); + atnm = strdup("C"); + resnm = strdup("PRJ"); + + if (nframes > 10000) + { + fact = 10000.0/nframes; + } + else + { + fact = 1.0; + } + + for (i = 0; i < nframes; i++) + { + atoms.atomname[i] = &atnm; + atoms.atom[i].resind = i; + atoms.resinfo[i].name = &resnm; + atoms.resinfo[i].nr = ceil(i*fact); + atoms.resinfo[i].ic = ' '; + x[i][XX] = inprod[0][i]; + x[i][YY] = inprod[1][i]; + x[i][ZZ] = inprod[2][i]; + if (b4D) + { + b[i] = inprod[3][i]; + } + } + if ( ( b4D || bSplit ) && bPDB) + { + strcpy(pdbform, get_pdbformat()); + strcat(pdbform, "%8.4f%8.4f\n"); + + out = gmx_ffopen(threedplotfile, "w"); + fprintf(out, "HEADER %s\n", str); + if (b4D) + { + fprintf(out, "REMARK %s\n", "fourth dimension plotted as B-factor"); + } + j = 0; + for (i = 0; i < atoms.nr; i++) + { + if (j > 0 && bSplit && abs(inprod[noutvec][i]) < 1e-5) + { + fprintf(out, "TER\n"); + j = 0; + } + fprintf(out, pdbform, "ATOM", i+1, "C", "PRJ", ' ', j+1, + 10*x[i][XX], 10*x[i][YY], 10*x[i][ZZ], 1.0, 10*b[i]); + if (j > 0) + { + fprintf(out, "CONECT%5d%5d\n", i, i+1); + } + j++; + } + fprintf(out, "TER\n"); + gmx_ffclose(out); + } + else + { + write_sto_conf(threedplotfile, str, &atoms, x, NULL, ePBC, box); + } + free_t_atoms(&atoms, FALSE); + } + + if (extremefile) + { + snew(pmin, noutvec_extr); + snew(pmax, noutvec_extr); + if (extreme == 0) + { + fprintf(stderr, "%11s %17s %17s\n", "eigenvector", "Minimum", "Maximum"); + fprintf(stderr, + "%11s %10s %10s %10s %10s\n", "", "value", "frame", "value", "frame"); + imin = 0; + imax = 0; + for (v = 0; v < noutvec_extr; v++) + { + for (i = 0; i < nframes; i++) + { + if (inprod[v][i] < inprod[v][imin]) + { + imin = i; + } + if (inprod[v][i] > inprod[v][imax]) + { + imax = i; + } + } + pmin[v] = inprod[v][imin]; + pmax[v] = inprod[v][imax]; + fprintf(stderr, "%7d %10.6f %10d %10.6f %10d\n", + eignr[outvec[v]]+1, + pmin[v], imin, pmax[v], imax); + } + } + else + { + pmin[0] = -extreme; + pmax[0] = extreme; + } + /* build format string for filename: */ + strcpy(str, extremefile); /* copy filename */ + c = strrchr(str, '.'); /* find where extention begins */ + strcpy(str2, c); /* get extention */ + sprintf(c, "%%d%s", str2); /* append '%s' and extention to filename */ + for (v = 0; v < noutvec_extr; v++) + { + /* make filename using format string */ + if (noutvec_extr == 1) + { + strcpy(str2, extremefile); + } + else + { + sprintf(str2, str, eignr[outvec[v]]+1); + } + fprintf(stderr, "Writing %d frames along eigenvector %d to %s\n", + nextr, outvec[v]+1, str2); + out = open_trx(str2, "w"); + for (frame = 0; frame < nextr; frame++) + { + if ((extreme == 0) && (nextr <= 3)) + { + for (i = 0; i < natoms; i++) + { + atoms->resinfo[atoms->atom[index[i]].resind].chainid = 'A' + frame; + } + } + for (i = 0; i < natoms; i++) + { + for (d = 0; d < DIM; d++) + { + xread[index[i]][d] = + (xav[i][d] + (pmin[v]*(nextr-frame-1)+pmax[v]*frame)/(nextr-1) + *eigvec[outvec[v]][i][d]/sqrtm[i]); + } + } + write_trx(out, natoms, index, atoms, 0, frame, topbox, xread, NULL, NULL); + } + close_trx(out); + } + sfree(pmin); + sfree(pmax); + } + fprintf(stderr, "\n"); +} + +static void components(const char *outfile, int natoms, + int *eignr, rvec **eigvec, + int noutvec, int *outvec, + const output_env_t oenv) +{ + int g, s, v, i; + real *x, ***y; + char str[STRLEN], **ylabel; + + fprintf(stderr, "Writing eigenvector components to %s\n", outfile); + + snew(ylabel, noutvec); + snew(y, noutvec); + snew(x, natoms); + for (i = 0; i < natoms; i++) + { + x[i] = i+1; + } + for (g = 0; g < noutvec; g++) + { + v = outvec[g]; + sprintf(str, "vec %d", eignr[v]+1); + ylabel[g] = strdup(str); + snew(y[g], 4); + for (s = 0; s < 4; s++) + { + snew(y[g][s], natoms); + } + for (i = 0; i < natoms; i++) + { + y[g][0][i] = norm(eigvec[v][i]); + for (s = 0; s < 3; s++) + { + y[g][s+1][i] = eigvec[v][i][s]; + } + } + } + write_xvgr_graphs(outfile, noutvec, 4, "Eigenvector components", + "black: total, red: x, green: y, blue: z", + "Atom number", (const char **)ylabel, + natoms, x, NULL, y, 1, FALSE, FALSE, oenv); + fprintf(stderr, "\n"); +} + +static void rmsf(const char *outfile, int natoms, real *sqrtm, + int *eignr, rvec **eigvec, + int noutvec, int *outvec, + real *eigval, int neig, + const output_env_t oenv) +{ + int nrow, g, v, i; + real *x, **y; + char str[STRLEN], **ylabel; + + for (i = 0; i < neig; i++) + { + if (eigval[i] < 0) + { + eigval[i] = 0; + } + } + + fprintf(stderr, "Writing rmsf to %s\n", outfile); + + snew(ylabel, noutvec); + snew(y, noutvec); + snew(x, natoms); + for (i = 0; i < natoms; i++) + { + x[i] = i+1; + } + for (g = 0; g < noutvec; g++) + { + v = outvec[g]; + if (eignr[v] >= neig) + { + gmx_fatal(FARGS, "Selected vector %d is larger than the number of eigenvalues (%d)", eignr[v]+1, neig); + } + sprintf(str, "vec %d", eignr[v]+1); + ylabel[g] = strdup(str); + snew(y[g], natoms); + for (i = 0; i < natoms; i++) + { + y[g][i] = sqrt(eigval[eignr[v]]*norm2(eigvec[v][i]))/sqrtm[i]; + } + } + write_xvgr_graphs(outfile, noutvec, 1, "RMS fluctuation (nm) ", NULL, + "Atom number", (const char **)ylabel, + natoms, x, y, NULL, 1, TRUE, FALSE, oenv); + fprintf(stderr, "\n"); +} + +int gmx_anaeig(int argc, char *argv[]) +{ + static const char *desc[] = { + "[THISMODULE] analyzes eigenvectors. The eigenvectors can be of a", + "covariance matrix ([gmx-covar]) or of a Normal Modes analysis", + "([gmx-nmeig]).[PAR]", + + "When a trajectory is projected on eigenvectors, all structures are", + "fitted to the structure in the eigenvector file, if present, otherwise", + "to the structure in the structure file. When no run input file is", + "supplied, periodicity will not be taken into account. Most analyses", + "are performed on eigenvectors [TT]-first[tt] to [TT]-last[tt], but when", + "[TT]-first[tt] is set to -1 you will be prompted for a selection.[PAR]", + + "[TT]-comp[tt]: plot the vector components per atom of eigenvectors", + "[TT]-first[tt] to [TT]-last[tt].[PAR]", + + "[TT]-rmsf[tt]: plot the RMS fluctuation per atom of eigenvectors", + "[TT]-first[tt] to [TT]-last[tt] (requires [TT]-eig[tt]).[PAR]", + + "[TT]-proj[tt]: calculate projections of a trajectory on eigenvectors", + "[TT]-first[tt] to [TT]-last[tt].", + "The projections of a trajectory on the eigenvectors of its", + "covariance matrix are called principal components (pc's).", + "It is often useful to check the cosine content of the pc's,", + "since the pc's of random diffusion are cosines with the number", + "of periods equal to half the pc index.", + "The cosine content of the pc's can be calculated with the program", + "[gmx-analyze].[PAR]", + + "[TT]-2d[tt]: calculate a 2d projection of a trajectory on eigenvectors", + "[TT]-first[tt] and [TT]-last[tt].[PAR]", + + "[TT]-3d[tt]: calculate a 3d projection of a trajectory on the first", + "three selected eigenvectors.[PAR]", + + "[TT]-filt[tt]: filter the trajectory to show only the motion along", + "eigenvectors [TT]-first[tt] to [TT]-last[tt].[PAR]", + + "[TT]-extr[tt]: calculate the two extreme projections along a trajectory", + "on the average structure and interpolate [TT]-nframes[tt] frames", + "between them, or set your own extremes with [TT]-max[tt]. The", + "eigenvector [TT]-first[tt] will be written unless [TT]-first[tt] and", + "[TT]-last[tt] have been set explicitly, in which case all eigenvectors", + "will be written to separate files. Chain identifiers will be added", + "when writing a [TT].pdb[tt] file with two or three structures (you", + "can use [TT]rasmol -nmrpdb[tt] to view such a [TT].pdb[tt] file).[PAR]", + + " Overlap calculations between covariance analysis:[BR]", + " [BB]Note:[bb] the analysis should use the same fitting structure[PAR]", + + "[TT]-over[tt]: calculate the subspace overlap of the eigenvectors in", + "file [TT]-v2[tt] with eigenvectors [TT]-first[tt] to [TT]-last[tt]", + "in file [TT]-v[tt].[PAR]", + + "[TT]-inpr[tt]: calculate a matrix of inner-products between", + "eigenvectors in files [TT]-v[tt] and [TT]-v2[tt]. All eigenvectors", + "of both files will be used unless [TT]-first[tt] and [TT]-last[tt]", + "have been set explicitly.[PAR]", + + "When [TT]-v[tt], [TT]-eig[tt], [TT]-v2[tt] and [TT]-eig2[tt] are given,", + "a single number for the overlap between the covariance matrices is", + "generated. The formulas are:[BR]", + " difference = sqrt(tr((sqrt(M1) - sqrt(M2))^2))[BR]", + "normalized overlap = 1 - difference/sqrt(tr(M1) + tr(M2))[BR]", + " shape overlap = 1 - sqrt(tr((sqrt(M1/tr(M1)) - sqrt(M2/tr(M2)))^2))[BR]", + "where M1 and M2 are the two covariance matrices and tr is the trace", + "of a matrix. The numbers are proportional to the overlap of the square", + "root of the fluctuations. The normalized overlap is the most useful", + "number, it is 1 for identical matrices and 0 when the sampled", + "subspaces are orthogonal.[PAR]", + "When the [TT]-entropy[tt] flag is given an entropy estimate will be", + "computed based on the Quasiharmonic approach and based on", + "Schlitter's formula." + }; + static int first = 1, last = -1, skip = 1, nextr = 2, nskip = 6; + static real max = 0.0, temp = 298.15; + static gmx_bool bSplit = FALSE, bEntropy = FALSE; + t_pargs pa[] = { + { "-first", FALSE, etINT, {&first}, + "First eigenvector for analysis (-1 is select)" }, + { "-last", FALSE, etINT, {&last}, + "Last eigenvector for analysis (-1 is till the last)" }, + { "-skip", FALSE, etINT, {&skip}, + "Only analyse every nr-th frame" }, + { "-max", FALSE, etREAL, {&max}, + "Maximum for projection of the eigenvector on the average structure, " + "max=0 gives the extremes" }, + { "-nframes", FALSE, etINT, {&nextr}, + "Number of frames for the extremes output" }, + { "-split", FALSE, etBOOL, {&bSplit}, + "Split eigenvector projections where time is zero" }, + { "-entropy", FALSE, etBOOL, {&bEntropy}, + "Compute entropy according to the Quasiharmonic formula or Schlitter's method." }, + { "-temp", FALSE, etREAL, {&temp}, + "Temperature for entropy calculations" }, + { "-nevskip", FALSE, etINT, {&nskip}, + "Number of eigenvalues to skip when computing the entropy due to the quasi harmonic approximation. When you do a rotational and/or translational fit prior to the covariance analysis, you get 3 or 6 eigenvalues that are very close to zero, and which should not be taken into account when computing the entropy." } + }; +#define NPA asize(pa) + + FILE *out; + int status, trjout; + t_topology top; + int ePBC = -1; + t_atoms *atoms = NULL; + rvec *xtop, *xref1, *xref2, *xrefp = NULL; + gmx_bool bDMR1, bDMA1, bDMR2, bDMA2; + int nvec1, nvec2, *eignr1 = NULL, *eignr2 = NULL; + rvec *x, *xread, *xav1, *xav2, **eigvec1 = NULL, **eigvec2 = NULL; + matrix topbox; + real xid, totmass, *sqrtm, *w_rls, t, lambda; + int natoms, step; + char *grpname; + const char *indexfile; + char title[STRLEN]; + int i, j, d; + int nout, *iout, noutvec, *outvec, nfit; + atom_id *index, *ifit; + const char *VecFile, *Vec2File, *topfile; + const char *EigFile, *Eig2File; + const char *CompFile, *RmsfFile, *ProjOnVecFile; + const char *TwoDPlotFile, *ThreeDPlotFile; + const char *FilterFile, *ExtremeFile; + const char *OverlapFile, *InpMatFile; + gmx_bool bFit1, bFit2, bM, bIndex, bTPS, bTop, bVec2, bProj; + gmx_bool bFirstToLast, bFirstLastSet, bTraj, bCompare, bPDB3D; + real *eigval1 = NULL, *eigval2 = NULL; + int neig1, neig2; + double **xvgdata; + output_env_t oenv; + gmx_rmpbc_t gpbc; + + t_filenm fnm[] = { + { efTRN, "-v", "eigenvec", ffREAD }, + { efTRN, "-v2", "eigenvec2", ffOPTRD }, + { efTRX, "-f", NULL, ffOPTRD }, + { efTPS, NULL, NULL, ffOPTRD }, + { efNDX, NULL, NULL, ffOPTRD }, + { efXVG, "-eig", "eigenval", ffOPTRD }, + { efXVG, "-eig2", "eigenval2", ffOPTRD }, + { efXVG, "-comp", "eigcomp", ffOPTWR }, + { efXVG, "-rmsf", "eigrmsf", ffOPTWR }, + { efXVG, "-proj", "proj", ffOPTWR }, + { efXVG, "-2d", "2dproj", ffOPTWR }, + { efSTO, "-3d", "3dproj.pdb", ffOPTWR }, + { efTRX, "-filt", "filtered", ffOPTWR }, + { efTRX, "-extr", "extreme.pdb", ffOPTWR }, + { efXVG, "-over", "overlap", ffOPTWR }, + { efXPM, "-inpr", "inprod", ffOPTWR } + }; +#define NFILE asize(fnm) + + if (!parse_common_args(&argc, argv, + PCA_CAN_TIME | PCA_TIME_UNIT | PCA_CAN_VIEW | PCA_BE_NICE, + NFILE, fnm, NPA, pa, asize(desc), desc, 0, NULL, &oenv)) + { + return 0; + } + + indexfile = ftp2fn_null(efNDX, NFILE, fnm); + + VecFile = opt2fn("-v", NFILE, fnm); + Vec2File = opt2fn_null("-v2", NFILE, fnm); + topfile = ftp2fn(efTPS, NFILE, fnm); + EigFile = opt2fn_null("-eig", NFILE, fnm); + Eig2File = opt2fn_null("-eig2", NFILE, fnm); + CompFile = opt2fn_null("-comp", NFILE, fnm); + RmsfFile = opt2fn_null("-rmsf", NFILE, fnm); + ProjOnVecFile = opt2fn_null("-proj", NFILE, fnm); + TwoDPlotFile = opt2fn_null("-2d", NFILE, fnm); + ThreeDPlotFile = opt2fn_null("-3d", NFILE, fnm); + FilterFile = opt2fn_null("-filt", NFILE, fnm); + ExtremeFile = opt2fn_null("-extr", NFILE, fnm); + OverlapFile = opt2fn_null("-over", NFILE, fnm); + InpMatFile = ftp2fn_null(efXPM, NFILE, fnm); + + bTop = fn2bTPX(topfile); + bProj = ProjOnVecFile || TwoDPlotFile || ThreeDPlotFile + || FilterFile || ExtremeFile; + bFirstLastSet = + opt2parg_bSet("-first", NPA, pa) && opt2parg_bSet("-last", NPA, pa); + bFirstToLast = CompFile || RmsfFile || ProjOnVecFile || FilterFile || + OverlapFile || ((ExtremeFile || InpMatFile) && bFirstLastSet); + bVec2 = Vec2File || OverlapFile || InpMatFile; + bM = RmsfFile || bProj; + bTraj = ProjOnVecFile || FilterFile || (ExtremeFile && (max == 0)) + || TwoDPlotFile || ThreeDPlotFile; + bIndex = bM || bProj; + bTPS = ftp2bSet(efTPS, NFILE, fnm) || bM || bTraj || + FilterFile || (bIndex && indexfile); + bCompare = Vec2File || Eig2File; + bPDB3D = fn2ftp(ThreeDPlotFile) == efPDB; + + read_eigenvectors(VecFile, &natoms, &bFit1, + &xref1, &bDMR1, &xav1, &bDMA1, + &nvec1, &eignr1, &eigvec1, &eigval1); + neig1 = DIM*natoms; + + /* Overwrite eigenvalues from separate files if the user provides them */ + if (EigFile != NULL) + { + int neig_tmp = read_xvg(EigFile, &xvgdata, &i); + if (neig_tmp != neig1) + { + fprintf(stderr, "Warning: number of eigenvalues in xvg file (%d) does not mtch trr file (%d)\n", neig1, natoms); + } + neig1 = neig_tmp; + srenew(eigval1, neig1); + for (j = 0; j < neig1; j++) + { + real tmp = eigval1[j]; + eigval1[j] = xvgdata[1][j]; + if (debug && (eigval1[j] != tmp)) + { + fprintf(debug, "Replacing eigenvalue %d. From trr: %10g, from xvg: %10g\n", + j, tmp, eigval1[j]); + } + } + for (j = 0; j < i; j++) + { + sfree(xvgdata[j]); + } + sfree(xvgdata); + fprintf(stderr, "Read %d eigenvalues from %s\n", neig1, EigFile); + } + + if (bEntropy) + { + if (bDMA1) + { + gmx_fatal(FARGS, "Can not calculate entropies from mass-weighted eigenvalues, redo the analysis without mass-weighting"); + } + calc_entropy_qh(stdout, neig1, eigval1, temp, nskip); + calc_entropy_schlitter(stdout, neig1, nskip, eigval1, temp); + } + + if (bVec2) + { + if (!Vec2File) + { + gmx_fatal(FARGS, "Need a second eigenvector file to do this analysis."); + } + read_eigenvectors(Vec2File, &neig2, &bFit2, + &xref2, &bDMR2, &xav2, &bDMA2, &nvec2, &eignr2, &eigvec2, &eigval2); + + neig2 = DIM*neig2; + if (neig2 != neig1) + { + gmx_fatal(FARGS, "Dimensions in the eigenvector files don't match"); + } + } + + if (Eig2File != NULL) + { + neig2 = read_xvg(Eig2File, &xvgdata, &i); + srenew(eigval2, neig2); + for (j = 0; j < neig2; j++) + { + eigval2[j] = xvgdata[1][j]; + } + for (j = 0; j < i; j++) + { + sfree(xvgdata[j]); + } + sfree(xvgdata); + fprintf(stderr, "Read %d eigenvalues from %s\n", neig2, Eig2File); + } + + + if ((!bFit1 || xref1) && !bDMR1 && !bDMA1) + { + bM = FALSE; + } + if ((xref1 == NULL) && (bM || bTraj)) + { + bTPS = TRUE; + } + + xtop = NULL; + nfit = 0; + ifit = NULL; + w_rls = NULL; + + if (!bTPS) + { + bTop = FALSE; + } + else + { + bTop = read_tps_conf(ftp2fn(efTPS, NFILE, fnm), + title, &top, &ePBC, &xtop, NULL, topbox, bM); + atoms = &top.atoms; + gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr); + gmx_rmpbc(gpbc, atoms->nr, topbox, xtop); + /* Fitting is only required for the projection */ + if (bProj && bFit1) + { + if (xref1 == NULL) + { + printf("\nNote: the structure in %s should be the same\n" + " as the one used for the fit in g_covar\n", topfile); + } + printf("\nSelect the index group that was used for the least squares fit in g_covar\n"); + get_index(atoms, indexfile, 1, &nfit, &ifit, &grpname); + + snew(w_rls, atoms->nr); + for (i = 0; (i < nfit); i++) + { + if (bDMR1) + { + w_rls[ifit[i]] = atoms->atom[ifit[i]].m; + } + else + { + w_rls[ifit[i]] = 1.0; + } + } + + snew(xrefp, atoms->nr); + if (xref1 != NULL) + { ++ /* Safety check between selected fit-group and reference structure read from the eigenvector file */ ++ if (natoms != nfit) ++ { ++ gmx_fatal(FARGS, "you selected a group with %d elements instead of %d, your selection does not fit the reference structure in the eigenvector file.", nfit, natoms); ++ } + for (i = 0; (i < nfit); i++) + { + copy_rvec(xref1[i], xrefp[ifit[i]]); + } + } + else + { + /* The top coordinates are the fitting reference */ + for (i = 0; (i < nfit); i++) + { + copy_rvec(xtop[ifit[i]], xrefp[ifit[i]]); + } + reset_x(nfit, ifit, atoms->nr, NULL, xrefp, w_rls); + } + } + gmx_rmpbc_done(gpbc); + } + + if (bIndex) + { + printf("\nSelect an index group of %d elements that corresponds to the eigenvectors\n", natoms); + get_index(atoms, indexfile, 1, &i, &index, &grpname); + if (i != natoms) + { + gmx_fatal(FARGS, "you selected a group with %d elements instead of %d", i, natoms); + } + printf("\n"); + } + + snew(sqrtm, natoms); + if (bM && bDMA1) + { + proj_unit = "u\\S1/2\\Nnm"; + for (i = 0; (i < natoms); i++) + { + sqrtm[i] = sqrt(atoms->atom[index[i]].m); + } + } + else + { + proj_unit = "nm"; + for (i = 0; (i < natoms); i++) + { + sqrtm[i] = 1.0; + } + } + + if (bVec2) + { + t = 0; + totmass = 0; + for (i = 0; (i < natoms); i++) + { + for (d = 0; (d < DIM); d++) + { + t += sqr((xav1[i][d]-xav2[i][d])*sqrtm[i]); + totmass += sqr(sqrtm[i]); + } + } + fprintf(stdout, "RMSD (without fit) between the two average structures:" + " %.3f (nm)\n\n", sqrt(t/totmass)); + } + + if (last == -1) + { + last = natoms*DIM; + } + if (first > -1) + { + if (bFirstToLast) + { + /* make an index from first to last */ + nout = last-first+1; + snew(iout, nout); + for (i = 0; i < nout; i++) + { + iout[i] = first-1+i; + } + } + else if (ThreeDPlotFile) + { + /* make an index of first+(0,1,2) and last */ + nout = bPDB3D ? 4 : 3; + nout = min(last-first+1, nout); + snew(iout, nout); + iout[0] = first-1; + iout[1] = first; + if (nout > 3) + { + iout[2] = first+1; + } + iout[nout-1] = last-1; + } + else + { + /* make an index of first and last */ + nout = 2; + snew(iout, nout); + iout[0] = first-1; + iout[1] = last-1; + } + } + else + { + printf("Select eigenvectors for output, end your selection with 0\n"); + nout = -1; + iout = NULL; + + do + { + nout++; + srenew(iout, nout+1); + if (1 != scanf("%d", &iout[nout])) + { + gmx_fatal(FARGS, "Error reading user input"); + } + iout[nout]--; + } + while (iout[nout] >= 0); + + printf("\n"); + } + /* make an index of the eigenvectors which are present */ + snew(outvec, nout); + noutvec = 0; + for (i = 0; i < nout; i++) + { + j = 0; + while ((j < nvec1) && (eignr1[j] != iout[i])) + { + j++; + } + if ((j < nvec1) && (eignr1[j] == iout[i])) + { + outvec[noutvec] = j; + noutvec++; + } + } + fprintf(stderr, "%d eigenvectors selected for output", noutvec); + if (noutvec <= 100) + { + fprintf(stderr, ":"); + for (j = 0; j < noutvec; j++) + { + fprintf(stderr, " %d", eignr1[outvec[j]]+1); + } + } + fprintf(stderr, "\n"); + + if (CompFile) + { + components(CompFile, natoms, eignr1, eigvec1, noutvec, outvec, oenv); + } + + if (RmsfFile) + { + rmsf(RmsfFile, natoms, sqrtm, eignr1, eigvec1, noutvec, outvec, eigval1, + neig1, oenv); + } + + if (bProj) + { + project(bTraj ? opt2fn("-f", NFILE, fnm) : NULL, + bTop ? &top : NULL, ePBC, topbox, + ProjOnVecFile, TwoDPlotFile, ThreeDPlotFile, FilterFile, skip, + ExtremeFile, bFirstLastSet, max, nextr, atoms, natoms, index, + bFit1, xrefp, nfit, ifit, w_rls, + sqrtm, xav1, eignr1, eigvec1, noutvec, outvec, bSplit, + oenv); + } + + if (OverlapFile) + { + overlap(OverlapFile, natoms, + eigvec1, nvec2, eignr2, eigvec2, noutvec, outvec, oenv); + } + + if (InpMatFile) + { + inprod_matrix(InpMatFile, natoms, + nvec1, eignr1, eigvec1, nvec2, eignr2, eigvec2, + bFirstLastSet, noutvec, outvec); + } + + if (bCompare) + { + compare(natoms, nvec1, eigvec1, nvec2, eigvec2, eigval1, neig1, eigval2, neig2); + } + + + if (!CompFile && !bProj && !OverlapFile && !InpMatFile && + !bCompare && !bEntropy) + { + fprintf(stderr, "\nIf you want some output," + " set one (or two or ...) of the output file options\n"); + } + + + view_all(oenv, NFILE, fnm); + + return 0; +} diff --cc src/gromacs/gmxlib/copyrite.cpp index e0cab08de3,0000000000..249810e6ab mode 100644,000000..100644 --- a/src/gromacs/gmxlib/copyrite.cpp +++ b/src/gromacs/gmxlib/copyrite.cpp @@@ -1,804 -1,0 +1,816 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#include "copyrite.h" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#ifdef HAVE_LIBMKL +#include +#endif + +#include + +/* This file is completely threadsafe - keep it that way! */ + +#include "gromacs/legacyheaders/macros.h" +#include "gromacs/random/random.h" +#include "gromacs/legacyheaders/smalloc.h" +#include "gromacs/legacyheaders/string2.h" +#include "gromacs/legacyheaders/vec.h" + +#include "gromacs/fft/fft.h" +#include "gromacs/fileio/futil.h" +#include "gromacs/fileio/strdb.h" +#include "gromacs/utility/exceptions.h" +#include "gromacs/utility/gmxassert.h" +#include "gromacs/utility/programcontext.h" + +#include "buildinfo.h" + +static gmx_bool be_cool(void) +{ + /* Yes, it is bad to check the environment variable every call, + * but we dont call this routine often, and it avoids using + * a mutex for locking the variable... + */ +#ifdef GMX_COOL_QUOTES + return (getenv("GMX_NO_QUOTES") == NULL); +#else + /*be uncool*/ + return FALSE; +#endif +} + +static void pukeit(const char *db, const char *defstring, char *retstring, + int retsize, int *cqnum) +{ + FILE *fp; + char **help; + int i, nhlp; + gmx_rng_t rng; + + if (be_cool() && ((fp = low_libopen(db, FALSE)) != NULL)) + { + nhlp = fget_lines(fp, &help); + /* for libraries we can use the low-level close routines */ + gmx_ffclose(fp); + rng = gmx_rng_init(gmx_rng_make_seed()); + *cqnum = static_cast(nhlp*gmx_rng_uniform_real(rng)); + gmx_rng_destroy(rng); + if (strlen(help[*cqnum]) >= STRLEN) + { + help[*cqnum][STRLEN-1] = '\0'; + } + strncpy(retstring, help[*cqnum], retsize); + for (i = 0; (i < nhlp); i++) + { + sfree(help[i]); + } + sfree(help); + } + else + { + *cqnum = -1; + strncpy(retstring, defstring, retsize); + } +} + +void bromacs(char *retstring, int retsize) +{ + int dum; + + pukeit("bromacs.dat", + "Groningen Machine for Chemical Simulation", + retstring, retsize, &dum); +} + +void cool_quote(char *retstring, int retsize, int *cqnum) +{ + char *tmpstr; + char *ptr; + int tmpcq, *p; + + if (cqnum != NULL) + { + p = cqnum; + } + else + { + p = &tmpcq; + } + + /* protect audience from explicit lyrics */ + snew(tmpstr, retsize+1); + pukeit("gurgle.dat", "Thanx for Using GROMACS - Have a Nice Day", + tmpstr, retsize-2, p); + + if ((ptr = strchr(tmpstr, '_')) != NULL) + { + *ptr = '\0'; + ptr++; + sprintf(retstring, "\"%s\" %s", tmpstr, ptr); + } + else + { + strcpy(retstring, tmpstr); + } + sfree(tmpstr); +} + +static void printCopyright(FILE *fp) +{ + static const char * const Contributors[] = { + "Emile Apol", + "Rossen Apostolov", + "Herman J.C. Berendsen", + "Par Bjelkmar", + "Aldert van Buuren", + "Rudi van Drunen", + "Anton Feenstra", + "Sebastian Fritsch", + "Gerrit Groenhof", + "Christoph Junghans", + "Peter Kasson", + "Carsten Kutzner", + "Per Larsson", + "Justin A. Lemkul", + "Magnus Lundborg", + "Pieter Meulenhoff", + "Erik Marklund", + "Teemu Murtola", + "Szilard Pall", + "Sander Pronk", + "Roland Schulz", + "Alexey Shvetsov", + "Michael Shirts", + "Alfons Sijbers", + "Peter Tieleman", + "Christian Wennberg", + "Maarten Wolf" + }; + static const char * const CopyrightText[] = { + "Copyright (c) 1991-2000, University of Groningen, The Netherlands.", + "Copyright (c) 2001-2014, The GROMACS development team at", + "Uppsala University, Stockholm University and", + "the Royal Institute of Technology, Sweden.", + "check out http://www.gromacs.org for more information." + }; + static const char * const LicenseText[] = { + "GROMACS is free software; you can redistribute it and/or modify it", + "under the terms of the GNU Lesser General Public License", + "as published by the Free Software Foundation; either version 2.1", + "of the License, or (at your option) any later version." + }; + +#define NCONTRIBUTORS (int)asize(Contributors) +#define NCR (int)asize(CopyrightText) + +// FAH has an exception permission from LGPL to allow digital signatures in Gromacs. +#ifdef GMX_FAHCORE +#define NLICENSE 0 +#else +#define NLICENSE (int)asize(LicenseText) +#endif + + fprintf(fp, "GROMACS is written by:\n"); + for (int i = 0; i < NCONTRIBUTORS; ) + { + for (int j = 0; j < 4 && i < NCONTRIBUTORS; ++j, ++i) + { + fprintf(fp, "%-18s ", Contributors[i]); + } + fprintf(fp, "\n"); + } + fprintf(fp, "and the project leaders:\n"); + fprintf(fp, "Mark Abraham, Berk Hess, Erik Lindahl, and David van der Spoel\n"); + fprintf(fp, "\n"); + for (int i = 0; i < NCR; ++i) + { + fprintf(fp, "%s\n", CopyrightText[i]); + } + fprintf(fp, "\n"); + for (int i = 0; i < NLICENSE; ++i) + { + fprintf(fp, "%s\n", LicenseText[i]); + } +} + + +void gmx_thanx(FILE *fp) +{ + char cq[1024]; + int cqnum = -1; + + /* protect the audience from suggestive discussions */ + cool_quote(cq, 1023, &cqnum); + + if (cqnum >= 0) + { + fprintf(fp, "\ngcq#%d: %s\n\n", cqnum, cq); + } + else + { + fprintf(fp, "\n%s\n\n", cq); + } +} + +typedef struct { + const char *key; + const char *author; + const char *title; + const char *journal; + int volume, year; + const char *pages; +} t_citerec; + +void please_cite(FILE *fp, const char *key) +{ + static const t_citerec citedb[] = { + { "Allen1987a", + "M. P. Allen and D. J. Tildesley", + "Computer simulation of liquids", + "Oxford Science Publications", + 1, 1987, "1" }, + { "Berendsen95a", + "H. J. C. Berendsen, D. van der Spoel and R. van Drunen", + "GROMACS: A message-passing parallel molecular dynamics implementation", + "Comp. Phys. Comm.", + 91, 1995, "43-56" }, + { "Berendsen84a", + "H. J. C. Berendsen, J. P. M. Postma, A. DiNola and J. R. Haak", + "Molecular dynamics with coupling to an external bath", + "J. Chem. Phys.", + 81, 1984, "3684-3690" }, + { "Ryckaert77a", + "J. P. Ryckaert and G. Ciccotti and H. J. C. Berendsen", + "Numerical Integration of the Cartesian Equations of Motion of a System with Constraints; Molecular Dynamics of n-Alkanes", + "J. Comp. Phys.", + 23, 1977, "327-341" }, + { "Miyamoto92a", + "S. Miyamoto and P. A. Kollman", + "SETTLE: An Analytical Version of the SHAKE and RATTLE Algorithms for Rigid Water Models", + "J. Comp. Chem.", + 13, 1992, "952-962" }, + { "Cromer1968a", + "D. T. Cromer & J. B. Mann", + "X-ray scattering factors computed from numerical Hartree-Fock wave functions", + "Acta Cryst. A", + 24, 1968, "321" }, + { "Barth95a", + "E. Barth and K. Kuczera and B. Leimkuhler and R. D. Skeel", + "Algorithms for Constrained Molecular Dynamics", + "J. Comp. Chem.", + 16, 1995, "1192-1209" }, + { "Essmann95a", + "U. Essmann, L. Perera, M. L. Berkowitz, T. Darden, H. Lee and L. G. Pedersen ", + "A smooth particle mesh Ewald method", + "J. Chem. Phys.", + 103, 1995, "8577-8592" }, + { "Torda89a", + "A. E. Torda and R. M. Scheek and W. F. van Gunsteren", + "Time-dependent distance restraints in molecular dynamics simulations", + "Chem. Phys. Lett.", + 157, 1989, "289-294" }, + { "Tironi95a", + "I. G. Tironi and R. Sperb and P. E. Smith and W. F. van Gunsteren", + "Generalized reaction field method for molecular dynamics simulations", + "J. Chem. Phys", + 102, 1995, "5451-5459" }, + { "Hess97a", + "B. Hess and H. Bekker and H. J. C. Berendsen and J. G. E. M. Fraaije", + "LINCS: A Linear Constraint Solver for molecular simulations", + "J. Comp. Chem.", + 18, 1997, "1463-1472" }, + { "Hess2008a", + "B. Hess", + "P-LINCS: A Parallel Linear Constraint Solver for molecular simulation", + "J. Chem. Theory Comput.", + 4, 2008, "116-122" }, + { "Hess2008b", + "B. Hess and C. Kutzner and D. van der Spoel and E. Lindahl", + "GROMACS 4: Algorithms for highly efficient, load-balanced, and scalable molecular simulation", + "J. Chem. Theory Comput.", + 4, 2008, "435-447" }, + { "Hub2010", + "J. S. Hub, B. L. de Groot and D. van der Spoel", + "g_wham - A free weighted histogram analysis implementation including robust error and autocorrelation estimates", + "J. Chem. Theory Comput.", + 6, 2010, "3713-3720"}, + { "In-Chul99a", + "Y. In-Chul and M. L. Berkowitz", + "Ewald summation for systems with slab geometry", + "J. Chem. Phys.", + 111, 1999, "3155-3162" }, + { "DeGroot97a", + "B. L. de Groot and D. M. F. van Aalten and R. M. Scheek and A. Amadei and G. Vriend and H. J. C. Berendsen", + "Prediction of Protein Conformational Freedom From Distance Constrains", + "Proteins", + 29, 1997, "240-251" }, + { "Spoel98a", + "D. van der Spoel and P. J. van Maaren and H. J. C. Berendsen", + "A systematic study of water models for molecular simulation. Derivation of models optimized for use with a reaction-field.", + "J. Chem. Phys.", + 108, 1998, "10220-10230" }, + { "Wishart98a", + "D. S. Wishart and A. M. Nip", + "Protein Chemical Shift Analysis: A Practical Guide", + "Biochem. Cell Biol.", + 76, 1998, "153-163" }, + { "Maiorov95", + "V. N. Maiorov and G. M. Crippen", + "Size-Independent Comparison of Protein Three-Dimensional Structures", + "PROTEINS: Struct. Funct. Gen.", + 22, 1995, "273-283" }, + { "Feenstra99", + "K. A. Feenstra and B. Hess and H. J. C. Berendsen", + "Improving Efficiency of Large Time-scale Molecular Dynamics Simulations of Hydrogen-rich Systems", + "J. Comput. Chem.", + 20, 1999, "786-798" }, + { "Lourenco2013a", + "Tuanan C. Lourenco and Mariny F. C. Coelho and Teodorico C. Ramalho and David van der Spoel and Luciano T. Costa", + "Insights on the Solubility of CO2 in 1-Ethyl-3-methylimidazolium Bis(trifluoromethylsulfonyl)imide from the Microscopic Point of View", + "Environ. Sci. Technol.", + 47, 2013, "7421-7429" }, + { "Timneanu2004a", + "N. Timneanu and C. Caleman and J. Hajdu and D. van der Spoel", + "Auger Electron Cascades in Water and Ice", + "Chem. Phys.", + 299, 2004, "277-283" }, + { "Pascal2011a", + "T. A. Pascal and S. T. Lin and W. A. Goddard III", + "Thermodynamics of liquids: standard molar entropies and heat capacities of common solvents from 2PT molecular dynamics", + "Phys. Chem. Chem. Phys.", + 13, 2011, "169-181" }, + { "Caleman2011b", + "C. Caleman and P. J. van Maaren and M. Hong and J. S. Hub and L. T. da Costa and D. van der Spoel", + "Force Field Benchmark of Organic Liquids: Density, Enthalpy of Vaporization, Heat Capacities, Surface Tension, Isothermal Compressibility, Volumetric Expansion Coefficient, and Dielectric Constant", + "J. Chem. Theo. Comp.", + 8, 2012, "61" }, + { "Lindahl2001a", + "E. Lindahl and B. Hess and D. van der Spoel", + "GROMACS 3.0: A package for molecular simulation and trajectory analysis", + "J. Mol. Mod.", + 7, 2001, "306-317" }, + { "Wang2001a", + "J. Wang and W. Wang and S. Huo and M. Lee and P. A. Kollman", + "Solvation model based on weighted solvent accessible surface area", + "J. Phys. Chem. B", + 105, 2001, "5055-5067" }, + { "Eisenberg86a", + "D. Eisenberg and A. D. McLachlan", + "Solvation energy in protein folding and binding", + "Nature", + 319, 1986, "199-203" }, + { "Bondi1964a", + "A. Bondi", + "van der Waals Volumes and Radii", + "J. Phys. Chem.", + 68, 1964, "441-451" }, + { "Eisenhaber95", + "Frank Eisenhaber and Philip Lijnzaad and Patrick Argos and Chris Sander and Michael Scharf", + "The Double Cube Lattice Method: Efficient Approaches to Numerical Integration of Surface Area and Volume and to Dot Surface Contouring of Molecular Assemblies", + "J. Comp. Chem.", + 16, 1995, "273-284" }, + { "Hess2002", + "B. Hess, H. Saint-Martin and H.J.C. Berendsen", + "Flexible constraints: an adiabatic treatment of quantum degrees of freedom, with application to the flexible and polarizable MCDHO model for water", + "J. Chem. Phys.", + 116, 2002, "9602-9610" }, + { "Hetenyi2002b", + "Csaba Hetenyi and David van der Spoel", + "Efficient docking of peptides to proteins without prior knowledge of the binding site.", + "Prot. Sci.", + 11, 2002, "1729-1737" }, + { "Hess2003", + "B. Hess and R.M. Scheek", + "Orientation restraints in molecular dynamics simulations using time and ensemble averaging", + "J. Magn. Res.", + 164, 2003, "19-27" }, + { "Rappe1991a", + "A. K. Rappe and W. A. Goddard III", + "Charge Equillibration for Molecular Dynamics Simulations", + "J. Phys. Chem.", + 95, 1991, "3358-3363" }, + { "Mu2005a", + "Y. Mu, P. H. Nguyen and G. Stock", + "Energy landscape of a small peptide revelaed by dihedral angle principal component analysis", + "Prot. Struct. Funct. Bioinf.", + 58, 2005, "45-52" }, + { "Okabe2001a", + "T. Okabe and M. Kawata and Y. Okamoto and M. Mikami", + "Replica-exchange {M}onte {C}arlo method for the isobaric-isothermal ensemble", + "Chem. Phys. Lett.", + 335, 2001, "435-439" }, + { "Hukushima96a", + "K. Hukushima and K. Nemoto", + "Exchange Monte Carlo Method and Application to Spin Glass Simulations", + "J. Phys. Soc. Jpn.", + 65, 1996, "1604-1608" }, + { "Tropp80a", + "J. Tropp", + "Dipolar Relaxation and Nuclear Overhauser effects in nonrigid molecules: The effect of fluctuating internuclear distances", + "J. Chem. Phys.", + 72, 1980, "6035-6043" }, + { "Bultinck2002a", + "P. Bultinck and W. Langenaeker and P. Lahorte and F. De Proft and P. Geerlings and M. Waroquier and J. P. Tollenaere", + "The electronegativity equalization method I: Parametrization and validation for atomic charge calculations", + "J. Phys. Chem. A", + 106, 2002, "7887-7894" }, + { "Yang2006b", + "Q. Y. Yang and K. A. Sharp", + "Atomic charge parameters for the finite difference Poisson-Boltzmann method using electronegativity neutralization", + "J. Chem. Theory Comput.", + 2, 2006, "1152-1167" }, + { "Spoel2005a", + "D. van der Spoel, E. Lindahl, B. Hess, G. Groenhof, A. E. Mark and H. J. C. Berendsen", + "GROMACS: Fast, Flexible and Free", + "J. Comp. Chem.", + 26, 2005, "1701-1719" }, + { "Spoel2006b", + "D. van der Spoel, P. J. van Maaren, P. Larsson and N. Timneanu", + "Thermodynamics of hydrogen bonding in hydrophilic and hydrophobic media", + "J. Phys. Chem. B", + 110, 2006, "4393-4398" }, + { "Spoel2006d", + "D. van der Spoel and M. M. Seibert", + "Protein folding kinetics and thermodynamics from atomistic simulations", + "Phys. Rev. Letters", + 96, 2006, "238102" }, + { "Palmer94a", + "B. J. Palmer", + "Transverse-current autocorrelation-function calculations of the shear viscosity for molecular liquids", + "Phys. Rev. E", + 49, 1994, "359-366" }, + { "Bussi2007a", + "G. Bussi, D. Donadio and M. Parrinello", + "Canonical sampling through velocity rescaling", + "J. Chem. Phys.", + 126, 2007, "014101" }, + { "Hub2006", + "J. S. Hub and B. L. de Groot", + "Does CO2 permeate through Aquaporin-1?", + "Biophys. J.", + 91, 2006, "842-848" }, + { "Hub2008", + "J. S. Hub and B. L. de Groot", + "Mechanism of selectivity in aquaporins and aquaglyceroporins", + "PNAS", + 105, 2008, "1198-1203" }, + { "Friedrich2009", + "M. S. Friedrichs, P. Eastman, V. Vaidyanathan, M. Houston, S. LeGrand, A. L. Beberg, D. L. Ensign, C. M. Bruns, and V. S. Pande", + "Accelerating Molecular Dynamic Simulation on Graphics Processing Units", + "J. Comp. Chem.", + 30, 2009, "864-872" }, + { "Engin2010", + "O. Engin, A. Villa, M. Sayar and B. Hess", + "Driving Forces for Adsorption of Amphiphilic Peptides to Air-Water Interface", + "J. Phys. Chem. B", + 114, 2010, "11093" }, + { "Fritsch12", + "S. Fritsch, C. Junghans and K. Kremer", + "Adaptive molecular simulation study on structure formation of toluene around C60 using Gromacs", + "J. Chem. Theo. Comp.", + 8, 2012, "398" }, + { "Junghans10", + "C. Junghans and S. Poblete", + "A reference implementation of the adaptive resolution scheme in ESPResSo", + "Comp. Phys. Comm.", + 181, 2010, "1449" }, + { "Wang2010", + "H. Wang, F. Dommert, C.Holm", + "Optimizing working parameters of the smooth particle mesh Ewald algorithm in terms of accuracy and efficiency", + "J. Chem. Phys. B", + 133, 2010, "034117" }, + { "Sugita1999a", + "Y. Sugita, Y. Okamoto", + "Replica-exchange molecular dynamics method for protein folding", + "Chem. Phys. Lett.", + 314, 1999, "141-151" }, + { "Kutzner2011", + "C. Kutzner and J. Czub and H. Grubmuller", + "Keep it Flexible: Driving Macromolecular Rotary Motions in Atomistic Simulations with GROMACS", + "J. Chem. Theory Comput.", + 7, 2011, "1381-1393" }, + { "Hoefling2011", + "M. Hoefling, N. Lima, D. Haenni, C.A.M. Seidel, B. Schuler, H. Grubmuller", + "Structural Heterogeneity and Quantitative FRET Efficiency Distributions of Polyprolines through a Hybrid Atomistic Simulation and Monte Carlo Approach", + "PLoS ONE", + 6, 2011, "e19791" }, + { "Hockney1988", + "R. W. Hockney and J. W. Eastwood", + "Computer simulation using particles", + "IOP, Bristol", + 1, 1988, "1" }, + { "Ballenegger2012", + "V. Ballenegger, J.J. Cerda, and C. Holm", + "How to Convert SPME to P3M: Influence Functions and Error Estimates", + "J. Chem. Theory Comput.", + 8, 2012, "936-947" }, + { "Garmay2012", + "Garmay Yu, Shvetsov A, Karelov D, Lebedev D, Radulescu A, Petukhov M, Isaev-Ivanov V", + "Correlated motion of protein subdomains and large-scale conformational flexibility of RecA protein filament", + "Journal of Physics: Conference Series", + 340, 2012, "012094" }, + { "Kutzner2011b", + "C. Kutzner, H. Grubmuller, B. L. de Groot, and U. Zachariae", + "Computational Electrophysiology: The Molecular Dynamics of Ion Channel Permeation and Selectivity in Atomistic Detail", + "Biophys. J.", + 101, 2011, "809-817"}, + { "Lundborg2014", + "M. Lundborg, R. Apostolov, D. Spangberg, A. Gardenas, D. van der Spoel and E. Lindahl", + "An efficient and extensible format, library, and API for binary trajectory data from molecular simulations", + "J. Comput. Chem.", + 35, 2014, "260-269"} + }; +#define NSTR (int)asize(citedb) + + int index; + char *author; + char *title; +#define LINE_WIDTH 79 + + if (fp == NULL) + { + return; + } + + for (index = 0; (index < NSTR) && (strcmp(citedb[index].key, key) != 0); index++) + { + ; + } + + fprintf(fp, "\n++++ PLEASE READ AND CITE THE FOLLOWING REFERENCE ++++\n"); + if (index < NSTR) + { + /* Insert newlines */ + author = wrap_lines(citedb[index].author, LINE_WIDTH, 0, FALSE); + title = wrap_lines(citedb[index].title, LINE_WIDTH, 0, FALSE); + fprintf(fp, "%s\n%s\n%s %d (%d) pp. %s\n", + author, title, citedb[index].journal, + citedb[index].volume, citedb[index].year, + citedb[index].pages); + sfree(author); + sfree(title); + } + else + { + fprintf(fp, "Entry %s not found in citation database\n", key); + } + fprintf(fp, "-------- -------- --- Thank You --- -------- --------\n\n"); + fflush(fp); +} + +#ifdef GMX_GIT_VERSION_INFO +/* Version information generated at compile time. */ +#include "gromacs/utility/gitversion.h" +#else +/* Fall back to statically defined version. */ +static const char _gmx_ver_string[] = "VERSION " VERSION; +#endif + +const char *GromacsVersion() +{ + return _gmx_ver_string; +} + +const char *ShortProgram(void) +{ + try + { + // TODO: Use the display name once it doesn't break anything. + return gmx::getProgramContext().programName(); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +} + +const char *Program(void) +{ + try + { + return gmx::getProgramContext().fullBinaryPath(); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; +} + + +extern void gmx_print_version_info_gpu(FILE *fp); + +static void gmx_print_version_info(FILE *fp) +{ + fprintf(fp, "Gromacs version: %s\n", _gmx_ver_string); +#ifdef GMX_GIT_VERSION_INFO + fprintf(fp, "GIT SHA1 hash: %s\n", _gmx_full_git_hash); + /* Only print out the branch information if present. + * The generating script checks whether the branch point actually + * coincides with the hash reported above, and produces an empty string + * in such cases. */ + if (_gmx_central_base_hash[0] != 0) + { + fprintf(fp, "Branched from: %s\n", _gmx_central_base_hash); + } +#endif + +#ifdef GMX_DOUBLE + fprintf(fp, "Precision: double\n"); +#else + fprintf(fp, "Precision: single\n"); +#endif + fprintf(fp, "Memory model: %u bit\n", (unsigned)(8*sizeof(void *))); + +#ifdef GMX_THREAD_MPI + fprintf(fp, "MPI library: thread_mpi\n"); +#elif defined(GMX_MPI) + fprintf(fp, "MPI library: MPI\n"); +#else + fprintf(fp, "MPI library: none\n"); +#endif +#ifdef GMX_OPENMP + fprintf(fp, "OpenMP support: enabled\n"); +#else + fprintf(fp, "OpenMP support: disabled\n"); +#endif +#ifdef GMX_GPU + fprintf(fp, "GPU support: enabled\n"); +#else + fprintf(fp, "GPU support: disabled\n"); +#endif + /* A preprocessor trick to avoid duplicating logic from vec.h */ +#define gmx_stringify2(x) #x +#define gmx_stringify(x) gmx_stringify2(x) + fprintf(fp, "invsqrt routine: %s\n", gmx_stringify(gmx_invsqrt(x))); + fprintf(fp, "SIMD instructions: %s\n", GMX_SIMD_STRING); + + fprintf(fp, "FFT library: %s\n", gmx_fft_get_version_info()); +#ifdef HAVE_RDTSCP + fprintf(fp, "RDTSCP usage: enabled\n"); +#else + fprintf(fp, "RDTSCP usage: disabled\n"); +#endif +#ifdef GMX_CXX11 + fprintf(fp, "C++11 compilation: enabled\n"); +#else + fprintf(fp, "C++11 compilation: disabled\n"); +#endif +#ifdef GMX_USE_TNG + fprintf(fp, "TNG support: enabled\n"); +#else + fprintf(fp, "TNG support: disabled\n"); +#endif + + fprintf(fp, "Built on: %s\n", BUILD_TIME); + fprintf(fp, "Built by: %s\n", BUILD_USER); + fprintf(fp, "Build OS/arch: %s\n", BUILD_HOST); + fprintf(fp, "Build CPU vendor: %s\n", BUILD_CPU_VENDOR); + fprintf(fp, "Build CPU brand: %s\n", BUILD_CPU_BRAND); + fprintf(fp, "Build CPU family: %d Model: %d Stepping: %d\n", + BUILD_CPU_FAMILY, BUILD_CPU_MODEL, BUILD_CPU_STEPPING); + /* TODO: The below strings can be quite long, so it would be nice to wrap + * them. Can wait for later, as the master branch has ready code to do all + * that. */ + fprintf(fp, "Build CPU features: %s\n", BUILD_CPU_FEATURES); + fprintf(fp, "C compiler: %s\n", BUILD_C_COMPILER); + fprintf(fp, "C compiler flags: %s\n", BUILD_CFLAGS); + fprintf(fp, "C++ compiler: %s\n", BUILD_CXX_COMPILER); + fprintf(fp, "C++ compiler flags: %s\n", BUILD_CXXFLAGS); +#ifdef HAVE_LIBMKL + /* MKL might be used for LAPACK/BLAS even if FFTs use FFTW, so keep it separate */ + fprintf(fp, "Linked with Intel MKL version %d.%d.%d.\n", + __INTEL_MKL__, __INTEL_MKL_MINOR__, __INTEL_MKL_UPDATE__); +#endif +#ifdef GMX_EXTERNAL_BOOST + const bool bExternalBoost = true; +#else + const bool bExternalBoost = false; +#endif + fprintf(fp, "Boost version: %d.%d.%d%s\n", BOOST_VERSION / 100000, + BOOST_VERSION / 100 % 1000, BOOST_VERSION % 100, + bExternalBoost ? " (external)" : " (internal)"); +#ifdef GMX_GPU + gmx_print_version_info_gpu(fp); +#endif +} + ++#ifdef GMX_DOUBLE ++void gmx_is_double_precision() ++{ ++ /* allow precision detection */ ++} ++#else ++void gmx_is_single_precision() ++{ ++ /* allow precision detection */ ++} ++#endif ++ +namespace gmx +{ + +BinaryInformationSettings::BinaryInformationSettings() + : bExtendedInfo_(false), bCopyright_(false), + bGeneratedByHeader_(false), prefix_(""), suffix_("") +{ +} + +void printBinaryInformation(FILE *fp, + const ProgramContextInterface &programContext) +{ + printBinaryInformation(fp, programContext, BinaryInformationSettings()); +} + +void printBinaryInformation(FILE *fp, + const ProgramContextInterface &programContext, + const BinaryInformationSettings &settings) +{ + const char *prefix = settings.prefix_; + const char *suffix = settings.suffix_; + const char *precisionString = ""; +#ifdef GMX_DOUBLE + precisionString = " (double precision)"; +#endif + const char *const name = programContext.displayName(); + if (settings.bGeneratedByHeader_) + { + fprintf(fp, "%sCreated by:%s\n", prefix, suffix); + } + if (settings.bCopyright_) + { + GMX_RELEASE_ASSERT(prefix[0] == '\0' && suffix[0] == '\0', + "Prefix/suffix not supported with copyright"); + // This line is printed again after the copyright notice to make it + // appear together with all the other information, so that it is not + // necessary to read stuff above the copyright notice. + // The line above the copyright notice puts the copyright notice is + // context, though. + // TODO: It would be nice to know here whether we are really running a + // Gromacs binary or some other binary that is calling Gromacs; we + // could then print "%s is part of GROMACS" or some alternative text. + fprintf(fp, "%sGROMACS: %s, %s%s%s\n", prefix, name, + GromacsVersion(), precisionString, suffix); + fprintf(fp, "\n"); + printCopyright(fp); + fprintf(fp, "\n"); + } + fprintf(fp, "%sGROMACS: %s, %s%s%s\n", prefix, name, + GromacsVersion(), precisionString, suffix); + const char *const binaryPath = programContext.fullBinaryPath(); + if (binaryPath != NULL && binaryPath[0] != '\0') + { + fprintf(fp, "%sExecutable: %s%s\n", prefix, binaryPath, suffix); + } + const char *const commandLine = programContext.commandLine(); + if (commandLine != NULL && commandLine[0] != '\0') + { + fprintf(fp, "%sCommand line:%s\n%s %s%s\n", + prefix, suffix, prefix, commandLine, suffix); + } + if (settings.bExtendedInfo_) + { + GMX_RELEASE_ASSERT(prefix[0] == '\0' && suffix[0] == '\0', + "Prefix/suffix not supported with extended info"); + fprintf(fp, "\n"); + gmx_print_version_info(fp); + } +} + +} // namespace gmx diff --cc src/gromacs/gmxlib/gmx_detect_hardware.c index 2ba16d4ce4,0000000000..986f9ccb05 mode 100644,000000..100644 --- a/src/gromacs/gmxlib/gmx_detect_hardware.c +++ b/src/gromacs/gmxlib/gmx_detect_hardware.c @@@ -1,836 -1,0 +1,874 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#ifdef HAVE_UNISTD_H +/* For sysconf */ +#include +#endif + +#include "types/enums.h" +#include "types/hw_info.h" +#include "types/commrec.h" +#include "gmx_fatal.h" +#include "gmx_fatal_collective.h" ++#include "md_logging.h" ++#include "gmx_cpuid.h" +#include "smalloc.h" +#include "gpu_utils.h" +#include "copyrite.h" +#include "gmx_detect_hardware.h" +#include "main.h" +#include "md_logging.h" +#include "gromacs/utility/gmxomp.h" + +#include "thread_mpi/threads.h" + +#ifdef GMX_NATIVE_WINDOWS +#include +#endif + +#ifdef GMX_GPU +const gmx_bool bGPUBinary = TRUE; +#else +const gmx_bool bGPUBinary = FALSE; +#endif + +static const char * invalid_gpuid_hint = + "A delimiter-free sequence of valid numeric IDs of available GPUs is expected."; + +/* The globally shared hwinfo structure. */ +static gmx_hw_info_t *hwinfo_g; +/* A reference counter for the hwinfo structure */ +static int n_hwinfo = 0; +/* A lock to protect the hwinfo structure */ +static tMPI_Thread_mutex_t hw_info_lock = TMPI_THREAD_MUTEX_INITIALIZER; + + +/* FW decl. */ +static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count); +static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info, + const gmx_gpu_opt_t *gpu_opt); + +static void sprint_gpus(char *sbuf, const gmx_gpu_info_t *gpu_info) +{ + int i, ndev; + char stmp[STRLEN]; + + ndev = gpu_info->ncuda_dev; + + sbuf[0] = '\0'; + for (i = 0; i < ndev; i++) + { + get_gpu_device_info_string(stmp, gpu_info, i); + strcat(sbuf, " "); + strcat(sbuf, stmp); + if (i < ndev - 1) + { + strcat(sbuf, "\n"); + } + } +} + +static void print_gpu_detection_stats(FILE *fplog, + const gmx_gpu_info_t *gpu_info, + const t_commrec *cr) +{ + char onhost[266], stmp[STRLEN]; + int ngpu; + + if (!gpu_info->bDetectGPUs) + { + /* We skipped the detection, so don't print detection stats */ + return; + } + + ngpu = gpu_info->ncuda_dev; + +#if defined GMX_MPI && !defined GMX_THREAD_MPI + /* We only print the detection on one, of possibly multiple, nodes */ + strncpy(onhost, " on host ", 10); + gmx_gethostname(onhost+9, 256); +#else + /* We detect all relevant GPUs */ + strncpy(onhost, "", 1); +#endif + + if (ngpu > 0) + { + sprint_gpus(stmp, gpu_info); + md_print_warn(cr, fplog, "%d GPU%s detected%s:\n%s\n", + ngpu, (ngpu > 1) ? "s" : "", onhost, stmp); + } + else + { + md_print_warn(cr, fplog, "No GPUs detected%s\n", onhost); + } +} + +static void print_gpu_use_stats(FILE *fplog, + const gmx_gpu_info_t *gpu_info, + const gmx_gpu_opt_t *gpu_opt, + const t_commrec *cr) +{ + char sbuf[STRLEN], stmp[STRLEN]; + int i, ngpu_comp, ngpu_use; + + ngpu_comp = gpu_info->ncuda_dev_compatible; + ngpu_use = gpu_opt->ncuda_dev_use; + + /* Issue a note if GPUs are available but not used */ + if (ngpu_comp > 0 && ngpu_use < 1) + { + sprintf(sbuf, + "%d compatible GPU%s detected in the system, but none will be used.\n" + "Consider trying GPU acceleration with the Verlet scheme!", + ngpu_comp, (ngpu_comp > 1) ? "s" : ""); + } + else + { + int ngpu_use_uniq; + + ngpu_use_uniq = gmx_count_gpu_dev_unique(gpu_info, gpu_opt); + + sprintf(sbuf, "%d GPU%s %sselected for this run.\n" + "Mapping of GPU%s to the %d PP rank%s in this node: ", + ngpu_use_uniq, (ngpu_use_uniq > 1) ? "s" : "", + gpu_opt->bUserSet ? "user-" : "auto-", + (ngpu_use > 1) ? "s" : "", + cr->nrank_pp_intranode, + (cr->nrank_pp_intranode > 1) ? "s" : ""); + + for (i = 0; i < ngpu_use; i++) + { + sprintf(stmp, "#%d", get_gpu_device_id(gpu_info, gpu_opt, i)); + if (i < ngpu_use - 1) + { + strcat(stmp, ", "); + } + strcat(sbuf, stmp); + } + } + md_print_info(cr, fplog, "%s\n\n", sbuf); +} + +/* Parse a "plain" GPU ID string which contains a sequence of digits corresponding + * to GPU IDs; the order will indicate the process/tMPI thread - GPU assignment. */ +static void parse_gpu_id_plain_string(const char *idstr, int *nid, int **idlist) +{ + int i; + + *nid = strlen(idstr); + + snew(*idlist, *nid); + + for (i = 0; i < *nid; i++) + { + if (idstr[i] < '0' || idstr[i] > '9') + { + gmx_fatal(FARGS, "Invalid character in GPU ID string: '%c'\n%s\n", + idstr[i], invalid_gpuid_hint); + } + (*idlist)[i] = idstr[i] - '0'; + } +} + +static void parse_gpu_id_csv_string(const char gmx_unused *idstr, int gmx_unused *nid, int gmx_unused *idlist) +{ + /* XXX implement cvs format to support more than 10 different GPUs in a box. */ + gmx_incons("Not implemented yet"); +} + ++/* Give a suitable fatal error or warning if the build configuration ++ and runtime CPU do not match. */ ++static void ++check_use_of_rdtscp_on_this_cpu(FILE *fplog, ++ const t_commrec *cr, ++ const gmx_hw_info_t *hwinfo) ++{ ++ gmx_bool bCpuHasRdtscp, bBinaryUsesRdtscp; ++#ifdef HAVE_RDTSCP ++ bBinaryUsesRdtscp = TRUE; ++#else ++ bBinaryUsesRdtscp = FALSE; ++#endif ++ ++ bCpuHasRdtscp = gmx_cpuid_feature(hwinfo->cpuid_info, GMX_CPUID_FEATURE_X86_RDTSCP); ++ ++ if (!bCpuHasRdtscp && bBinaryUsesRdtscp) ++ { ++ gmx_fatal(FARGS, "The %s executable was compiled to use the rdtscp CPU instruction. " ++ "However, this is not supported by the current hardware and continuing would lead to a crash. " ++ "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.", ++ ShortProgram()); ++ } ++ ++ if (bCpuHasRdtscp && !bBinaryUsesRdtscp) ++ { ++ md_print_warn(cr, fplog, "The current CPU can measure timings more accurately than the code in\n" ++ "%s was configured to use. This might affect your simulation\n" ++ "speed as accurate timings are needed for load-balancing.\n" ++ "Please consider rebuilding %s with the GMX_USE_RDTSCP=OFF CMake option.\n", ++ ShortProgram(), ShortProgram()); ++ } ++} ++ +void gmx_check_hw_runconf_consistency(FILE *fplog, + const gmx_hw_info_t *hwinfo, + const t_commrec *cr, + const gmx_hw_opt_t *hw_opt, + gmx_bool bUseGPU) +{ + int npppn, ntmpi_pp; + char sbuf[STRLEN], th_or_proc[STRLEN], th_or_proc_plural[STRLEN], pernode[STRLEN]; + gmx_bool btMPI, bMPI, bMaxMpiThreadsSet, bNthreadsAuto, bEmulateGPU; + + assert(hwinfo); + assert(cr); + + /* Below we only do consistency checks for PP and GPUs, + * this is irrelevant for PME only nodes, so in that case we return + * here. + */ + if (!(cr->duty & DUTY_PP)) + { + return; + } + + btMPI = bMPI = FALSE; + bNthreadsAuto = FALSE; +#if defined(GMX_THREAD_MPI) + btMPI = TRUE; + bNthreadsAuto = (hw_opt->nthreads_tmpi < 1); +#elif defined(GMX_LIB_MPI) + bMPI = TRUE; +#endif + + /* GPU emulation detection is done later, but we need here as well + * -- uncool, but there's no elegant workaround */ + bEmulateGPU = (getenv("GMX_EMULATE_GPU") != NULL); + bMaxMpiThreadsSet = (getenv("GMX_MAX_MPI_THREADS") != NULL); + + /* check the SIMD level mdrun is compiled with against hardware + capabilities */ + /* TODO: Here we assume homogeneous hardware which is not necessarily + the case! Might not hurt to add an extra check over MPI. */ + gmx_cpuid_simd_check(hwinfo->cpuid_info, fplog, SIMMASTER(cr)); + ++ check_use_of_rdtscp_on_this_cpu(fplog, cr, hwinfo); ++ + /* NOTE: this print is only for and on one physical node */ + print_gpu_detection_stats(fplog, &hwinfo->gpu_info, cr); + + if (hwinfo->gpu_info.ncuda_dev_compatible > 0) + { + /* NOTE: this print is only for and on one physical node */ + print_gpu_use_stats(fplog, &hwinfo->gpu_info, &hw_opt->gpu_opt, cr); + } + + /* Need to ensure that we have enough GPUs: + * - need one GPU per PP node + * - no GPU oversubscription with tMPI + * */ + /* number of PP processes per node */ + npppn = cr->nrank_pp_intranode; + + pernode[0] = '\0'; + th_or_proc_plural[0] = '\0'; + if (btMPI) + { + sprintf(th_or_proc, "thread-MPI thread"); + if (npppn > 1) + { + sprintf(th_or_proc_plural, "s"); + } + } + else if (bMPI) + { + sprintf(th_or_proc, "MPI process"); + if (npppn > 1) + { + sprintf(th_or_proc_plural, "es"); + } + sprintf(pernode, " per node"); + } + else + { + /* neither MPI nor tMPI */ + sprintf(th_or_proc, "process"); + } + + if (bUseGPU && hwinfo->gpu_info.ncuda_dev_compatible > 0 && + !bEmulateGPU) + { + int ngpu_comp, ngpu_use; + char gpu_comp_plural[2], gpu_use_plural[2]; + + ngpu_comp = hwinfo->gpu_info.ncuda_dev_compatible; + ngpu_use = hw_opt->gpu_opt.ncuda_dev_use; + + sprintf(gpu_comp_plural, "%s", (ngpu_comp > 1) ? "s" : ""); + sprintf(gpu_use_plural, "%s", (ngpu_use > 1) ? "s" : ""); + + /* number of tMPI threads auto-adjusted */ + if (btMPI && bNthreadsAuto) + { + if (hw_opt->gpu_opt.bUserSet && npppn < ngpu_use) + { + /* The user manually provided more GPUs than threads we + could automatically start. */ + gmx_fatal(FARGS, + "%d GPU%s provided, but only %d PP thread-MPI thread%s coud be started.\n" + "%s requires one PP tread-MPI thread per GPU; use fewer GPUs%s.", + ngpu_use, gpu_use_plural, + npppn, th_or_proc_plural, + ShortProgram(), bMaxMpiThreadsSet ? "\nor allow more threads to be used" : ""); + } + + if (!hw_opt->gpu_opt.bUserSet && npppn < ngpu_comp) + { + /* There are more GPUs than tMPI threads; we have + limited the number GPUs used. */ + md_print_warn(cr, fplog, + "NOTE: %d GPU%s were detected, but only %d PP thread-MPI thread%s can be started.\n" + " %s can use one GPU per PP tread-MPI thread, so only %d GPU%s will be used.%s\n", + ngpu_comp, gpu_comp_plural, + npppn, th_or_proc_plural, + ShortProgram(), npppn, + npppn > 1 ? "s" : "", + bMaxMpiThreadsSet ? "\n Also, you can allow more threads to be used by increasing GMX_MAX_MPI_THREADS" : ""); + } + } + + if (hw_opt->gpu_opt.bUserSet) + { + if (ngpu_use != npppn) + { + gmx_fatal(FARGS, + "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n" + "%s was started with %d PP %s%s%s, but you provided %d GPU%s.", + th_or_proc, btMPI ? "s" : "es", pernode, + ShortProgram(), npppn, th_or_proc, + th_or_proc_plural, pernode, + ngpu_use, gpu_use_plural); + } + } + else + { + if (ngpu_comp > npppn) + { + md_print_warn(cr, fplog, + "NOTE: potentially sub-optimal launch configuration, %s started with less\n" + " PP %s%s%s than GPU%s available.\n" + " Each PP %s can use only one GPU, %d GPU%s%s will be used.\n", + ShortProgram(), th_or_proc, + th_or_proc_plural, pernode, gpu_comp_plural, + th_or_proc, npppn, gpu_use_plural, pernode); + } + + if (ngpu_use != npppn) + { + /* Avoid duplicate error messages. + * Unfortunately we can only do this at the physical node + * level, since the hardware setup and MPI process count + * might differ between physical nodes. + */ + if (cr->rank_pp_intranode == 0) + { + gmx_fatal(FARGS, + "Incorrect launch configuration: mismatching number of PP %s%s and GPUs%s.\n" + "%s was started with %d PP %s%s%s, but only %d GPU%s were detected.", + th_or_proc, btMPI ? "s" : "es", pernode, + ShortProgram(), npppn, th_or_proc, + th_or_proc_plural, pernode, + ngpu_use, gpu_use_plural); + } + } + } + + { + int same_count; + + same_count = gmx_count_gpu_dev_shared(&hw_opt->gpu_opt); + + if (same_count > 0) + { + md_print_info(cr, fplog, + "NOTE: You assigned %s to multiple %s%s.\n", + same_count > 1 ? "GPUs" : "a GPU", th_or_proc, btMPI ? "s" : "es"); + } + } + } + +#ifdef GMX_MPI + if (PAR(cr)) + { + /* Avoid other ranks to continue after + inconsistency */ + MPI_Barrier(cr->mpi_comm_mygroup); + } +#endif + +} + +/* Return 0 if none of the GPU (per node) are shared among PP ranks. + * + * Sharing GPUs among multiple PP ranks is possible when the user passes + * GPU IDs. Here we check for sharing and return a non-zero value when + * this is detected. Note that the return value represents the number of + * PP rank pairs that share a device. + */ +int gmx_count_gpu_dev_shared(const gmx_gpu_opt_t *gpu_opt) +{ + int same_count = 0; + int ngpu = gpu_opt->ncuda_dev_use; + + if (gpu_opt->bUserSet) + { + int i, j; + + for (i = 0; i < ngpu - 1; i++) + { + for (j = i + 1; j < ngpu; j++) + { + same_count += (gpu_opt->cuda_dev_use[i] == + gpu_opt->cuda_dev_use[j]); + } + } + } + + return same_count; +} + +/* Count and return the number of unique GPUs (per node) selected. + * + * As sharing GPUs among multiple PP ranks is possible when the user passes + * GPU IDs, the number of GPUs user (per node) can be different from the + * number of GPU IDs selected. + */ +static int gmx_count_gpu_dev_unique(const gmx_gpu_info_t *gpu_info, + const gmx_gpu_opt_t *gpu_opt) +{ + int i, uniq_count, ngpu; + int *uniq_ids; + + assert(gpu_info); + assert(gpu_opt); + + ngpu = gpu_info->ncuda_dev; + uniq_count = 0; + + snew(uniq_ids, ngpu); + + /* Each element in uniq_ids will be set to 0 or 1. The n-th element set + * to 1 indicates that the respective GPU was selected to be used. */ + for (i = 0; i < gpu_opt->ncuda_dev_use; i++) + { + uniq_ids[get_gpu_device_id(gpu_info, gpu_opt, i)] = 1; + } + /* Count the devices used. */ + for (i = 0; i < ngpu; i++) + { + uniq_count += uniq_ids[i]; + } + + sfree(uniq_ids); + + return uniq_count; +} + + +/* Return the number of hardware threads supported by the current CPU. + * We assume that this is equal with the number of CPUs reported to be + * online by the OS at the time of the call. + */ +static int get_nthreads_hw_avail(FILE gmx_unused *fplog, const t_commrec gmx_unused *cr) +{ + int ret = 0; + +#if ((defined(WIN32) || defined( _WIN32 ) || defined(WIN64) || defined( _WIN64 )) && !(defined (__CYGWIN__) || defined (__CYGWIN32__))) + /* Windows */ + SYSTEM_INFO sysinfo; + GetSystemInfo( &sysinfo ); + ret = sysinfo.dwNumberOfProcessors; +#elif defined HAVE_SYSCONF + /* We are probably on Unix. + * Now check if we have the argument to use before executing the call + */ +#if defined(_SC_NPROCESSORS_ONLN) + ret = sysconf(_SC_NPROCESSORS_ONLN); +#elif defined(_SC_NPROC_ONLN) + ret = sysconf(_SC_NPROC_ONLN); +#elif defined(_SC_NPROCESSORS_CONF) + ret = sysconf(_SC_NPROCESSORS_CONF); +#elif defined(_SC_NPROC_CONF) + ret = sysconf(_SC_NPROC_CONF); +#else +#warning "No valid sysconf argument value found. Executables will not be able to determine the number of hardware threads: mdrun will use 1 thread by default!" +#endif /* End of check for sysconf argument values */ + +#else + /* Neither windows nor Unix. No fscking idea how many CPUs we have! */ + ret = -1; +#endif + + if (debug) + { + fprintf(debug, "Detected %d processors, will use this as the number " + "of supported hardware threads.\n", ret); + } + +#ifdef GMX_OPENMP + if (ret != gmx_omp_get_num_procs()) + { + md_print_warn(cr, fplog, + "Number of CPUs detected (%d) does not match the number reported by OpenMP (%d).\n" + "Consider setting the launch configuration manually!", + ret, gmx_omp_get_num_procs()); + } +#endif + + return ret; +} + +static void gmx_detect_gpus(FILE *fplog, const t_commrec *cr) +{ +#ifdef GMX_LIB_MPI + int rank_world; + MPI_Comm physicalnode_comm; +#endif + int rank_local; + + /* Under certain circumstances MPI ranks on the same physical node + * can not simultaneously access the same GPU(s). Therefore we run + * the detection only on one MPI rank per node and broadcast the info. + * Note that with thread-MPI only a single thread runs this code. + * + * TODO: We should also do CPU hardware detection only once on each + * physical node and broadcast it, instead of do it on every MPI rank. + */ +#ifdef GMX_LIB_MPI + /* A split of MPI_COMM_WORLD over physical nodes is only required here, + * so we create and destroy it locally. + */ + MPI_Comm_rank(MPI_COMM_WORLD, &rank_world); + MPI_Comm_split(MPI_COMM_WORLD, gmx_physicalnode_id_hash(), + rank_world, &physicalnode_comm); + MPI_Comm_rank(physicalnode_comm, &rank_local); +#else + /* Here there should be only one process, check this */ + assert(cr->nnodes == 1 && cr->sim_nodeid == 0); + + rank_local = 0; +#endif + + if (rank_local == 0) + { + char detection_error[STRLEN] = "", sbuf[STRLEN]; + + if (detect_cuda_gpus(&hwinfo_g->gpu_info, detection_error) != 0) + { + if (detection_error != NULL && detection_error[0] != '\0') + { + sprintf(sbuf, ":\n %s\n", detection_error); + } + else + { + sprintf(sbuf, "."); + } + md_print_warn(cr, fplog, + "NOTE: Error occurred during GPU detection%s" + " Can not use GPU acceleration, will fall back to CPU kernels.\n", + sbuf); + } + } + +#ifdef GMX_LIB_MPI + /* Broadcast the GPU info to the other ranks within this node */ + MPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev, 1, MPI_INT, 0, physicalnode_comm); + + if (hwinfo_g->gpu_info.ncuda_dev > 0) + { + int cuda_dev_size; + + cuda_dev_size = hwinfo_g->gpu_info.ncuda_dev*sizeof_cuda_dev_info(); + + if (rank_local > 0) + { + hwinfo_g->gpu_info.cuda_dev = + (cuda_dev_info_ptr_t)malloc(cuda_dev_size); + } + MPI_Bcast(hwinfo_g->gpu_info.cuda_dev, cuda_dev_size, MPI_BYTE, + 0, physicalnode_comm); + MPI_Bcast(&hwinfo_g->gpu_info.ncuda_dev_compatible, 1, MPI_INT, + 0, physicalnode_comm); + } + + MPI_Comm_free(&physicalnode_comm); +#endif +} + +gmx_hw_info_t *gmx_detect_hardware(FILE *fplog, const t_commrec *cr, + gmx_bool bDetectGPUs) +{ + gmx_hw_info_t *hw; + int ret; + + /* make sure no one else is doing the same thing */ + ret = tMPI_Thread_mutex_lock(&hw_info_lock); + if (ret != 0) + { + gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno)); + } + + /* only initialize the hwinfo structure if it is not already initalized */ + if (n_hwinfo == 0) + { + snew(hwinfo_g, 1); + + /* detect CPUID info; no fuss, we don't detect system-wide + * -- sloppy, but that's it for now */ + if (gmx_cpuid_init(&hwinfo_g->cpuid_info) != 0) + { + gmx_fatal_collective(FARGS, cr, NULL, "CPUID detection failed!"); + } + + /* detect number of hardware threads */ + hwinfo_g->nthreads_hw_avail = get_nthreads_hw_avail(fplog, cr); + + /* detect GPUs */ + hwinfo_g->gpu_info.ncuda_dev = 0; + hwinfo_g->gpu_info.cuda_dev = NULL; + hwinfo_g->gpu_info.ncuda_dev_compatible = 0; + + /* Run the detection if the binary was compiled with GPU support + * and we requested detection. + */ + hwinfo_g->gpu_info.bDetectGPUs = + (bGPUBinary && bDetectGPUs && + getenv("GMX_DISABLE_GPU_DETECTION") == NULL); + if (hwinfo_g->gpu_info.bDetectGPUs) + { + gmx_detect_gpus(fplog, cr); + } + } + /* increase the reference counter */ + n_hwinfo++; + + ret = tMPI_Thread_mutex_unlock(&hw_info_lock); + if (ret != 0) + { + gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno)); + } + + return hwinfo_g; +} + +void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt) +{ + char *env; + + if (gpu_opt->gpu_id != NULL && !bGPUBinary) + { + gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!", ShortProgram()); + } + + env = getenv("GMX_GPU_ID"); + if (env != NULL && gpu_opt->gpu_id != NULL) + { + gmx_fatal(FARGS, "GMX_GPU_ID and -gpu_id can not be used at the same time"); + } + if (env == NULL) + { + env = gpu_opt->gpu_id; + } + + /* parse GPU IDs if the user passed any */ + if (env != NULL) + { + parse_gpu_id_plain_string(env, + &gpu_opt->ncuda_dev_use, + &gpu_opt->cuda_dev_use); + + if (gpu_opt->ncuda_dev_use == 0) + { + gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n", + invalid_gpuid_hint); + } + + gpu_opt->bUserSet = TRUE; + } +} + +void gmx_select_gpu_ids(FILE *fplog, const t_commrec *cr, + const gmx_gpu_info_t *gpu_info, + gmx_bool bForceUseGPU, + gmx_gpu_opt_t *gpu_opt) +{ + int i; + const char *env; + char sbuf[STRLEN], stmp[STRLEN]; + + /* Bail if binary is not compiled with GPU acceleration, but this is either + * explicitly (-nb gpu) or implicitly (gpu ID passed) requested. */ + if (bForceUseGPU && !bGPUBinary) + { + gmx_fatal(FARGS, "GPU acceleration requested, but %s was compiled without GPU support!", ShortProgram()); + } + + if (gpu_opt->bUserSet) + { + /* Check the GPU IDs passed by the user. + * (GPU IDs have been parsed by gmx_parse_gpu_ids before) + */ + int *checkres; + int res; + + snew(checkres, gpu_opt->ncuda_dev_use); + + res = check_selected_cuda_gpus(checkres, gpu_info, gpu_opt); + + if (!res) + { + print_gpu_detection_stats(fplog, gpu_info, cr); + + sprintf(sbuf, "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n"); + for (i = 0; i < gpu_opt->ncuda_dev_use; i++) + { + if (checkres[i] != egpuCompatible) + { + sprintf(stmp, " GPU #%d: %s\n", + gpu_opt->cuda_dev_use[i], + gpu_detect_res_str[checkres[i]]); + strcat(sbuf, stmp); + } + } + gmx_fatal(FARGS, "%s", sbuf); + } + + sfree(checkres); + } + else + { + pick_compatible_gpus(&hwinfo_g->gpu_info, gpu_opt); + + if (gpu_opt->ncuda_dev_use > cr->nrank_pp_intranode) + { + /* We picked more GPUs than we can use: limit the number. + * We print detailed messages about this later in + * gmx_check_hw_runconf_consistency. + */ + limit_num_gpus_used(gpu_opt, cr->nrank_pp_intranode); + } + + gpu_opt->bUserSet = FALSE; + } + + /* If the user asked for a GPU, check whether we have a GPU */ + if (bForceUseGPU && gpu_info->ncuda_dev_compatible == 0) + { + gmx_fatal(FARGS, "GPU acceleration requested, but no compatible GPUs were detected."); + } +} + +static void limit_num_gpus_used(gmx_gpu_opt_t *gpu_opt, int count) +{ + int ndev_use; + + assert(gpu_opt); + + ndev_use = gpu_opt->ncuda_dev_use; + + if (count > ndev_use) + { + /* won't increase the # of GPUs */ + return; + } + + if (count < 1) + { + char sbuf[STRLEN]; + sprintf(sbuf, "Limiting the number of GPUs to <1 doesn't make sense (detected %d, %d requested)!", + ndev_use, count); + gmx_incons(sbuf); + } + + /* TODO: improve this implementation: either sort GPUs or remove the weakest here */ + gpu_opt->ncuda_dev_use = count; +} + +void gmx_hardware_info_free(gmx_hw_info_t *hwinfo) +{ + int ret; + + ret = tMPI_Thread_mutex_lock(&hw_info_lock); + if (ret != 0) + { + gmx_fatal(FARGS, "Error locking hwinfo mutex: %s", strerror(errno)); + } + + /* decrease the reference counter */ + n_hwinfo--; + + + if (hwinfo != hwinfo_g) + { + gmx_incons("hwinfo < hwinfo_g"); + } + + if (n_hwinfo < 0) + { + gmx_incons("n_hwinfo < 0"); + } + + if (n_hwinfo == 0) + { + gmx_cpuid_done(hwinfo_g->cpuid_info); + free_gpu_info(&hwinfo_g->gpu_info); + sfree(hwinfo_g); + } + + ret = tMPI_Thread_mutex_unlock(&hw_info_lock); + if (ret != 0) + { + gmx_fatal(FARGS, "Error unlocking hwinfo mutex: %s", strerror(errno)); + } +} diff --cc src/gromacs/legacyheaders/shellfc.h index 96dcaec0cf,0000000000..22cd489e4b mode 100644,000000..100644 --- a/src/gromacs/legacyheaders/shellfc.h +++ b/src/gromacs/legacyheaders/shellfc.h @@@ -1,80 -1,0 +1,81 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2008, The GROMACS development team. - * Copyright (c) 2013, by the GROMACS development team, led by ++ * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ + +#include "typedefs.h" +#include "vsite.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Initialization function, also predicts the initial shell postions. + * If x!=NULL, the shells are predict for the global coordinates x. + */ - gmx_shellfc_t init_shell_flexcon(FILE *log, ++gmx_shellfc_t init_shell_flexcon(FILE *fplog, ++ gmx_bool bCutoffSchemeIsVerlet, + gmx_mtop_t *mtop, int nflexcon, + rvec *x); + +/* Get the local shell with domain decomposition */ +void make_local_shells(t_commrec *cr, t_mdatoms *md, + gmx_shellfc_t shfc); + +/* Optimize shell positions */ +int relax_shell_flexcon(FILE *log, t_commrec *cr, gmx_bool bVerbose, + gmx_int64_t mdstep, t_inputrec *inputrec, + gmx_bool bDoNS, int force_flags, + gmx_localtop_t *top, + gmx_constr_t constr, + gmx_enerdata_t *enerd, t_fcdata *fcd, + t_state *state, rvec f[], + tensor force_vir, + t_mdatoms *md, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + t_graph *graph, + gmx_groups_t *groups, + gmx_shellfc_t shfc, + t_forcerec *fr, + gmx_bool bBornRadii, + double t, rvec mu_tot, + gmx_bool *bConverged, + gmx_vsite_t *vsite, + FILE *fp_field); + + +#ifdef __cplusplus +} +#endif diff --cc src/gromacs/mdlib/shellfc.c index 7a6a2b0a62,0000000000..1a4e4b90bd mode 100644,000000..100644 --- a/src/gromacs/mdlib/shellfc.c +++ b/src/gromacs/mdlib/shellfc.c @@@ -1,1251 -1,0 +1,1258 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2008, The GROMACS development team. + * Copyright (c) 2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include "typedefs.h" +#include "smalloc.h" +#include "gmx_fatal.h" +#include "vec.h" +#include "txtdump.h" +#include "force.h" +#include "mdrun.h" +#include "mdatoms.h" +#include "vsite.h" +#include "network.h" +#include "names.h" +#include "constr.h" +#include "domdec.h" +#include "physics.h" +#include "shellfc.h" +#include "mtop_util.h" +#include "chargegroup.h" +#include "macros.h" + + +typedef struct { + int nnucl; + atom_id shell; /* The shell id */ + atom_id nucl1, nucl2, nucl3; /* The nuclei connected to the shell */ + /* gmx_bool bInterCG; */ /* Coupled to nuclei outside cg? */ + real k; /* force constant */ + real k_1; /* 1 over force constant */ + rvec xold; + rvec fold; + rvec step; +} t_shell; + +typedef struct gmx_shellfc { + int nshell_gl; /* The number of shells in the system */ + t_shell *shell_gl; /* All the shells (for DD only) */ + int *shell_index_gl; /* Global shell index (for DD only) */ + gmx_bool bInterCG; /* Are there inter charge-group shells? */ + int nshell; /* The number of local shells */ + t_shell *shell; /* The local shells */ + int shell_nalloc; /* The allocation size of shell */ + gmx_bool bPredict; /* Predict shell positions */ + gmx_bool bRequireInit; /* Require initialization of shell positions */ + int nflexcon; /* The number of flexible constraints */ + rvec *x[2]; /* Array for iterative minimization */ + rvec *f[2]; /* Array for iterative minimization */ + int x_nalloc; /* The allocation size of x and f */ + rvec *acc_dir; /* Acceleration direction for flexcon */ + rvec *x_old; /* Old coordinates for flexcon */ + int flex_nalloc; /* The allocation size of acc_dir and x_old */ + rvec *adir_xnold; /* Work space for init_adir */ + rvec *adir_xnew; /* Work space for init_adir */ + int adir_nalloc; /* Work space for init_adir */ +} t_gmx_shellfc; + + +static void pr_shell(FILE *fplog, int ns, t_shell s[]) +{ + int i; + + fprintf(fplog, "SHELL DATA\n"); + fprintf(fplog, "%5s %8s %5s %5s %5s\n", + "Shell", "Force k", "Nucl1", "Nucl2", "Nucl3"); + for (i = 0; (i < ns); i++) + { + fprintf(fplog, "%5d %8.3f %5d", s[i].shell, 1.0/s[i].k_1, s[i].nucl1); + if (s[i].nnucl == 2) + { + fprintf(fplog, " %5d\n", s[i].nucl2); + } + else if (s[i].nnucl == 3) + { + fprintf(fplog, " %5d %5d\n", s[i].nucl2, s[i].nucl3); + } + else + { + fprintf(fplog, "\n"); + } + } +} + +static void predict_shells(FILE *fplog, rvec x[], rvec v[], real dt, + int ns, t_shell s[], + real mass[], gmx_mtop_t *mtop, gmx_bool bInit) +{ + int i, m, s1, n1, n2, n3; + real dt_1, dt_2, dt_3, fudge, tm, m1, m2, m3; + rvec *ptr; + gmx_mtop_atomlookup_t alook = NULL; + t_atom *atom; + + if (mass == NULL) + { + alook = gmx_mtop_atomlookup_init(mtop); + } + + /* We introduce a fudge factor for performance reasons: with this choice + * the initial force on the shells is about a factor of two lower than + * without + */ + fudge = 1.0; + + if (bInit) + { + if (fplog) + { + fprintf(fplog, "RELAX: Using prediction for initial shell placement\n"); + } + ptr = x; + dt_1 = 1; + } + else + { + ptr = v; + dt_1 = fudge*dt; + } + + for (i = 0; (i < ns); i++) + { + s1 = s[i].shell; + if (bInit) + { + clear_rvec(x[s1]); + } + switch (s[i].nnucl) + { + case 1: + n1 = s[i].nucl1; + for (m = 0; (m < DIM); m++) + { + x[s1][m] += ptr[n1][m]*dt_1; + } + break; + case 2: + n1 = s[i].nucl1; + n2 = s[i].nucl2; + if (mass) + { + m1 = mass[n1]; + m2 = mass[n2]; + } + else + { + /* Not the correct masses with FE, but it is just a prediction... */ + m1 = atom[n1].m; + m2 = atom[n2].m; + } + tm = dt_1/(m1+m2); + for (m = 0; (m < DIM); m++) + { + x[s1][m] += (m1*ptr[n1][m]+m2*ptr[n2][m])*tm; + } + break; + case 3: + n1 = s[i].nucl1; + n2 = s[i].nucl2; + n3 = s[i].nucl3; + if (mass) + { + m1 = mass[n1]; + m2 = mass[n2]; + m3 = mass[n3]; + } + else + { + /* Not the correct masses with FE, but it is just a prediction... */ + gmx_mtop_atomnr_to_atom(alook, n1, &atom); + m1 = atom->m; + gmx_mtop_atomnr_to_atom(alook, n2, &atom); + m2 = atom->m; + gmx_mtop_atomnr_to_atom(alook, n3, &atom); + m3 = atom->m; + } + tm = dt_1/(m1+m2+m3); + for (m = 0; (m < DIM); m++) + { + x[s1][m] += (m1*ptr[n1][m]+m2*ptr[n2][m]+m3*ptr[n3][m])*tm; + } + break; + default: + gmx_fatal(FARGS, "Shell %d has %d nuclei!", i, s[i].nnucl); + } + } + + if (mass == NULL) + { + gmx_mtop_atomlookup_destroy(alook); + } +} + +gmx_shellfc_t init_shell_flexcon(FILE *fplog, ++ gmx_bool bCutoffSchemeIsVerlet, + gmx_mtop_t *mtop, int nflexcon, + rvec *x) +{ + struct gmx_shellfc *shfc; + t_shell *shell; + int *shell_index = NULL, *at2cg; + t_atom *atom; + int n[eptNR], ns, nshell, nsi; + int i, j, nmol, type, mb, mt, a_offset, cg, mol, ftype, nra; + real qS, alpha; + int aS, aN = 0; /* Shell and nucleus */ + int bondtypes[] = { F_BONDS, F_HARMONIC, F_CUBICBONDS, F_POLARIZATION, F_ANHARM_POL, F_WATER_POL }; +#define NBT asize(bondtypes) + t_iatom *ia; + gmx_mtop_atomloop_block_t aloopb; + gmx_mtop_atomloop_all_t aloop; + gmx_ffparams_t *ffparams; + gmx_molblock_t *molb; + gmx_moltype_t *molt; + t_block *cgs; + + /* Count number of shells, and find their indices */ + for (i = 0; (i < eptNR); i++) + { + n[i] = 0; + } + + aloopb = gmx_mtop_atomloop_block_init(mtop); + while (gmx_mtop_atomloop_block_next(aloopb, &atom, &nmol)) + { + n[atom->ptype] += nmol; + } + + if (fplog) + { + /* Print the number of each particle type */ + for (i = 0; (i < eptNR); i++) + { + if (n[i] != 0) + { + fprintf(fplog, "There are: %d %ss\n", n[i], ptype_str[i]); + } + } + } + + nshell = n[eptShell]; + + if (nshell == 0 && nflexcon == 0) + { ++ /* We're not doing shells or flexible constraints */ + return NULL; + } + ++ if (bCutoffSchemeIsVerlet) ++ { ++ gmx_fatal(FARGS, "The shell code does not work with the Verlet cut-off scheme.\n"); ++ } ++ + snew(shfc, 1); + shfc->nflexcon = nflexcon; + + if (nshell == 0) + { + return shfc; + } + + /* We have shells: fill the shell data structure */ + + /* Global system sized array, this should be avoided */ + snew(shell_index, mtop->natoms); + + aloop = gmx_mtop_atomloop_all_init(mtop); + nshell = 0; + while (gmx_mtop_atomloop_all_next(aloop, &i, &atom)) + { + if (atom->ptype == eptShell) + { + shell_index[i] = nshell++; + } + } + + snew(shell, nshell); + + /* Initiate the shell structures */ + for (i = 0; (i < nshell); i++) + { + shell[i].shell = NO_ATID; + shell[i].nnucl = 0; + shell[i].nucl1 = NO_ATID; + shell[i].nucl2 = NO_ATID; + shell[i].nucl3 = NO_ATID; + /* shell[i].bInterCG=FALSE; */ + shell[i].k_1 = 0; + shell[i].k = 0; + } + + ffparams = &mtop->ffparams; + + /* Now fill the structures */ + shfc->bInterCG = FALSE; + ns = 0; + a_offset = 0; + for (mb = 0; mb < mtop->nmolblock; mb++) + { + molb = &mtop->molblock[mb]; + molt = &mtop->moltype[molb->type]; + + cgs = &molt->cgs; + snew(at2cg, molt->atoms.nr); + for (cg = 0; cg < cgs->nr; cg++) + { + for (i = cgs->index[cg]; i < cgs->index[cg+1]; i++) + { + at2cg[i] = cg; + } + } + + atom = molt->atoms.atom; + for (mol = 0; mol < molb->nmol; mol++) + { + for (j = 0; (j < NBT); j++) + { + ia = molt->ilist[bondtypes[j]].iatoms; + for (i = 0; (i < molt->ilist[bondtypes[j]].nr); ) + { + type = ia[0]; + ftype = ffparams->functype[type]; + nra = interaction_function[ftype].nratoms; + + /* Check whether we have a bond with a shell */ + aS = NO_ATID; + + switch (bondtypes[j]) + { + case F_BONDS: + case F_HARMONIC: + case F_CUBICBONDS: + case F_POLARIZATION: + case F_ANHARM_POL: + if (atom[ia[1]].ptype == eptShell) + { + aS = ia[1]; + aN = ia[2]; + } + else if (atom[ia[2]].ptype == eptShell) + { + aS = ia[2]; + aN = ia[1]; + } + break; + case F_WATER_POL: + aN = ia[4]; /* Dummy */ + aS = ia[5]; /* Shell */ + break; + default: + gmx_fatal(FARGS, "Death Horror: %s, %d", __FILE__, __LINE__); + } + + if (aS != NO_ATID) + { + qS = atom[aS].q; + + /* Check whether one of the particles is a shell... */ + nsi = shell_index[a_offset+aS]; + if ((nsi < 0) || (nsi >= nshell)) + { + gmx_fatal(FARGS, "nsi is %d should be within 0 - %d. aS = %d", + nsi, nshell, aS); + } + if (shell[nsi].shell == NO_ATID) + { + shell[nsi].shell = a_offset + aS; + ns++; + } + else if (shell[nsi].shell != a_offset+aS) + { + gmx_fatal(FARGS, "Weird stuff in %s, %d", __FILE__, __LINE__); + } + + if (shell[nsi].nucl1 == NO_ATID) + { + shell[nsi].nucl1 = a_offset + aN; + } + else if (shell[nsi].nucl2 == NO_ATID) + { + shell[nsi].nucl2 = a_offset + aN; + } + else if (shell[nsi].nucl3 == NO_ATID) + { + shell[nsi].nucl3 = a_offset + aN; + } + else + { + if (fplog) + { + pr_shell(fplog, ns, shell); + } + gmx_fatal(FARGS, "Can not handle more than three bonds per shell\n"); + } + if (at2cg[aS] != at2cg[aN]) + { + /* shell[nsi].bInterCG = TRUE; */ + shfc->bInterCG = TRUE; + } + + switch (bondtypes[j]) + { + case F_BONDS: + case F_HARMONIC: + shell[nsi].k += ffparams->iparams[type].harmonic.krA; + break; + case F_CUBICBONDS: + shell[nsi].k += ffparams->iparams[type].cubic.kb; + break; + case F_POLARIZATION: + case F_ANHARM_POL: + if (!gmx_within_tol(qS, atom[aS].qB, GMX_REAL_EPS*10)) + { + gmx_fatal(FARGS, "polarize can not be used with qA(%e) != qB(%e) for atom %d of molecule block %d", qS, atom[aS].qB, aS+1, mb+1); + } + shell[nsi].k += sqr(qS)*ONE_4PI_EPS0/ + ffparams->iparams[type].polarize.alpha; + break; + case F_WATER_POL: + if (!gmx_within_tol(qS, atom[aS].qB, GMX_REAL_EPS*10)) + { + gmx_fatal(FARGS, "water_pol can not be used with qA(%e) != qB(%e) for atom %d of molecule block %d", qS, atom[aS].qB, aS+1, mb+1); + } + alpha = (ffparams->iparams[type].wpol.al_x+ + ffparams->iparams[type].wpol.al_y+ + ffparams->iparams[type].wpol.al_z)/3.0; + shell[nsi].k += sqr(qS)*ONE_4PI_EPS0/alpha; + break; + default: + gmx_fatal(FARGS, "Death Horror: %s, %d", __FILE__, __LINE__); + } + shell[nsi].nnucl++; + } + ia += nra+1; + i += nra+1; + } + } + a_offset += molt->atoms.nr; + } + /* Done with this molecule type */ + sfree(at2cg); + } + + /* Verify whether it's all correct */ + if (ns != nshell) + { + gmx_fatal(FARGS, "Something weird with shells. They may not be bonded to something"); + } + + for (i = 0; (i < ns); i++) + { + shell[i].k_1 = 1.0/shell[i].k; + } + + if (debug) + { + pr_shell(debug, ns, shell); + } + + + shfc->nshell_gl = ns; + shfc->shell_gl = shell; + shfc->shell_index_gl = shell_index; + + shfc->bPredict = (getenv("GMX_NOPREDICT") == NULL); + shfc->bRequireInit = FALSE; + if (!shfc->bPredict) + { + if (fplog) + { + fprintf(fplog, "\nWill never predict shell positions\n"); + } + } + else + { + shfc->bRequireInit = (getenv("GMX_REQUIRE_SHELL_INIT") != NULL); + if (shfc->bRequireInit && fplog) + { + fprintf(fplog, "\nWill always initiate shell positions\n"); + } + } + + if (shfc->bPredict) + { + if (x) + { + predict_shells(fplog, x, NULL, 0, shfc->nshell_gl, shfc->shell_gl, + NULL, mtop, TRUE); + } + + if (shfc->bInterCG) + { + if (fplog) + { + fprintf(fplog, "\nNOTE: there all shells that are connected to particles outside thier own charge group, will not predict shells positions during the run\n\n"); + } + shfc->bPredict = FALSE; + } + } + + return shfc; +} + +void make_local_shells(t_commrec *cr, t_mdatoms *md, + struct gmx_shellfc *shfc) +{ + t_shell *shell; + int a0, a1, *ind, nshell, i; + gmx_domdec_t *dd = NULL; + + if (DOMAINDECOMP(cr)) + { + dd = cr->dd; + a0 = 0; + a1 = dd->nat_home; + } + else + { + /* Single node: we need all shells, just copy the pointer */ + shfc->nshell = shfc->nshell_gl; + shfc->shell = shfc->shell_gl; + + return; + } + + ind = shfc->shell_index_gl; + + nshell = 0; + shell = shfc->shell; + for (i = a0; i < a1; i++) + { + if (md->ptype[i] == eptShell) + { + if (nshell+1 > shfc->shell_nalloc) + { + shfc->shell_nalloc = over_alloc_dd(nshell+1); + srenew(shell, shfc->shell_nalloc); + } + if (dd) + { + shell[nshell] = shfc->shell_gl[ind[dd->gatindex[i]]]; + } + else + { + shell[nshell] = shfc->shell_gl[ind[i]]; + } + + /* With inter-cg shells we can no do shell prediction, + * so we do not need the nuclei numbers. + */ + if (!shfc->bInterCG) + { + shell[nshell].nucl1 = i + shell[nshell].nucl1 - shell[nshell].shell; + if (shell[nshell].nnucl > 1) + { + shell[nshell].nucl2 = i + shell[nshell].nucl2 - shell[nshell].shell; + } + if (shell[nshell].nnucl > 2) + { + shell[nshell].nucl3 = i + shell[nshell].nucl3 - shell[nshell].shell; + } + } + shell[nshell].shell = i; + nshell++; + } + } + + shfc->nshell = nshell; + shfc->shell = shell; +} + +static void do_1pos(rvec xnew, rvec xold, rvec f, real step) +{ + real xo, yo, zo; + real dx, dy, dz; + + xo = xold[XX]; + yo = xold[YY]; + zo = xold[ZZ]; + + dx = f[XX]*step; + dy = f[YY]*step; + dz = f[ZZ]*step; + + xnew[XX] = xo+dx; + xnew[YY] = yo+dy; + xnew[ZZ] = zo+dz; +} + +static void do_1pos3(rvec xnew, rvec xold, rvec f, rvec step) +{ + real xo, yo, zo; + real dx, dy, dz; + + xo = xold[XX]; + yo = xold[YY]; + zo = xold[ZZ]; + + dx = f[XX]*step[XX]; + dy = f[YY]*step[YY]; + dz = f[ZZ]*step[ZZ]; + + xnew[XX] = xo+dx; + xnew[YY] = yo+dy; + xnew[ZZ] = zo+dz; +} + +static void directional_sd(rvec xold[], rvec xnew[], rvec acc_dir[], + int start, int homenr, real step) +{ + int i; + + for (i = start; i < homenr; i++) + { + do_1pos(xnew[i], xold[i], acc_dir[i], step); + } +} + +static void shell_pos_sd(rvec xcur[], rvec xnew[], rvec f[], + int ns, t_shell s[], int count) +{ + const real step_scale_min = 0.8, + step_scale_increment = 0.2, + step_scale_max = 1.2, + step_scale_multiple = (step_scale_max - step_scale_min) / step_scale_increment; + int i, shell, d; + real dx, df, k_est; +#ifdef PRINT_STEP + real step_min, step_max; + + step_min = 1e30; + step_max = 0; +#endif + for (i = 0; (i < ns); i++) + { + shell = s[i].shell; + if (count == 1) + { + for (d = 0; d < DIM; d++) + { + s[i].step[d] = s[i].k_1; +#ifdef PRINT_STEP + step_min = min(step_min, s[i].step[d]); + step_max = max(step_max, s[i].step[d]); +#endif + } + } + else + { + for (d = 0; d < DIM; d++) + { + dx = xcur[shell][d] - s[i].xold[d]; + df = f[shell][d] - s[i].fold[d]; + /* -dx/df gets used to generate an interpolated value, but would + * cause a NaN if df were binary-equal to zero. Values close to + * zero won't cause problems (because of the min() and max()), so + * just testing for binary inequality is OK. */ + if (0.0 != df) + { + k_est = -dx/df; + /* Scale the step size by a factor interpolated from + * step_scale_min to step_scale_max, as k_est goes from 0 to + * step_scale_multiple * s[i].step[d] */ + s[i].step[d] = + step_scale_min * s[i].step[d] + + step_scale_increment * min(step_scale_multiple * s[i].step[d], max(k_est, 0)); + } + else + { + /* Here 0 == df */ + if (gmx_numzero(dx)) /* 0 == dx */ + { + /* Likely this will never happen, but if it does just + * don't scale the step. */ + } + else /* 0 != dx */ + { + s[i].step[d] *= step_scale_max; + } + } +#ifdef PRINT_STEP + step_min = min(step_min, s[i].step[d]); + step_max = max(step_max, s[i].step[d]); +#endif + } + } + copy_rvec(xcur[shell], s[i].xold); + copy_rvec(f[shell], s[i].fold); + + do_1pos3(xnew[shell], xcur[shell], f[shell], s[i].step); + + if (gmx_debug_at) + { + fprintf(debug, "shell[%d] = %d\n", i, shell); + pr_rvec(debug, 0, "fshell", f[shell], DIM, TRUE); + pr_rvec(debug, 0, "xold", xcur[shell], DIM, TRUE); + pr_rvec(debug, 0, "step", s[i].step, DIM, TRUE); + pr_rvec(debug, 0, "xnew", xnew[shell], DIM, TRUE); + } + } +#ifdef PRINT_STEP + printf("step %.3e %.3e\n", step_min, step_max); +#endif +} + +static void decrease_step_size(int nshell, t_shell s[]) +{ + int i; + + for (i = 0; i < nshell; i++) + { + svmul(0.8, s[i].step, s[i].step); + } +} + +static void print_epot(FILE *fp, gmx_int64_t mdstep, int count, real epot, real df, + int ndir, real sf_dir) +{ + char buf[22]; + + fprintf(fp, "MDStep=%5s/%2d EPot: %12.8e, rmsF: %6.2e", + gmx_step_str(mdstep, buf), count, epot, df); + if (ndir) + { + fprintf(fp, ", dir. rmsF: %6.2e\n", sqrt(sf_dir/ndir)); + } + else + { + fprintf(fp, "\n"); + } +} + + +static real rms_force(t_commrec *cr, rvec f[], int ns, t_shell s[], + int ndir, real *sf_dir, real *Epot) +{ + int i, shell, ntot; + double buf[4]; + + buf[0] = *sf_dir; + for (i = 0; i < ns; i++) + { + shell = s[i].shell; + buf[0] += norm2(f[shell]); + } + ntot = ns; + + if (PAR(cr)) + { + buf[1] = ntot; + buf[2] = *sf_dir; + buf[3] = *Epot; + gmx_sumd(4, buf, cr); + ntot = (int)(buf[1] + 0.5); + *sf_dir = buf[2]; + *Epot = buf[3]; + } + ntot += ndir; + + return (ntot ? sqrt(buf[0]/ntot) : 0); +} + +static void check_pbc(FILE *fp, rvec x[], int shell) +{ + int m, now; + + now = shell-4; + for (m = 0; (m < DIM); m++) + { + if (fabs(x[shell][m]-x[now][m]) > 0.3) + { + pr_rvecs(fp, 0, "SHELL-X", x+now, 5); + break; + } + } +} + +static void dump_shells(FILE *fp, rvec x[], rvec f[], real ftol, int ns, t_shell s[]) +{ + int i, shell; + real ft2, ff2; + + ft2 = sqr(ftol); + + for (i = 0; (i < ns); i++) + { + shell = s[i].shell; + ff2 = iprod(f[shell], f[shell]); + if (ff2 > ft2) + { + fprintf(fp, "SHELL %5d, force %10.5f %10.5f %10.5f, |f| %10.5f\n", + shell, f[shell][XX], f[shell][YY], f[shell][ZZ], sqrt(ff2)); + } + check_pbc(fp, x, shell); + } +} + +static void init_adir(FILE *log, gmx_shellfc_t shfc, + gmx_constr_t constr, t_idef *idef, t_inputrec *ir, + t_commrec *cr, int dd_ac1, + gmx_int64_t step, t_mdatoms *md, int start, int end, + rvec *x_old, rvec *x_init, rvec *x, + rvec *f, rvec *acc_dir, + gmx_bool bMolPBC, matrix box, + real *lambda, real *dvdlambda, t_nrnb *nrnb) +{ + rvec *xnold, *xnew; + double w_dt; + int gf, ga, gt; + real dt, scale; + int n, d; + unsigned short *ptype; + rvec p, dx; + + if (DOMAINDECOMP(cr)) + { + n = dd_ac1; + } + else + { + n = end - start; + } + if (n > shfc->adir_nalloc) + { + shfc->adir_nalloc = over_alloc_dd(n); + srenew(shfc->adir_xnold, shfc->adir_nalloc); + srenew(shfc->adir_xnew, shfc->adir_nalloc); + } + xnold = shfc->adir_xnold; + xnew = shfc->adir_xnew; + + ptype = md->ptype; + + dt = ir->delta_t; + + /* Does NOT work with freeze or acceleration groups (yet) */ + for (n = start; n < end; n++) + { + w_dt = md->invmass[n]*dt; + + for (d = 0; d < DIM; d++) + { + if ((ptype[n] != eptVSite) && (ptype[n] != eptShell)) + { + xnold[n-start][d] = x[n][d] - (x_init[n][d] - x_old[n][d]); + xnew[n-start][d] = 2*x[n][d] - x_old[n][d] + f[n][d]*w_dt*dt; + } + else + { + xnold[n-start][d] = x[n][d]; + xnew[n-start][d] = x[n][d]; + } + } + } + constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md, + x, xnold-start, NULL, bMolPBC, box, + lambda[efptBONDED], &(dvdlambda[efptBONDED]), + NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); + constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md, + x, xnew-start, NULL, bMolPBC, box, + lambda[efptBONDED], &(dvdlambda[efptBONDED]), + NULL, NULL, nrnb, econqCoord, FALSE, 0, 0); + + for (n = start; n < end; n++) + { + for (d = 0; d < DIM; d++) + { + xnew[n-start][d] = + -(2*x[n][d]-xnold[n-start][d]-xnew[n-start][d])/sqr(dt) + - f[n][d]*md->invmass[n]; + } + clear_rvec(acc_dir[n]); + } + + /* Project the acceleration on the old bond directions */ + constrain(log, FALSE, FALSE, constr, idef, ir, NULL, cr, step, 0, md, + x_old, xnew-start, acc_dir, bMolPBC, box, + lambda[efptBONDED], &(dvdlambda[efptBONDED]), + NULL, NULL, nrnb, econqDeriv_FlexCon, FALSE, 0, 0); +} + +int relax_shell_flexcon(FILE *fplog, t_commrec *cr, gmx_bool bVerbose, + gmx_int64_t mdstep, t_inputrec *inputrec, + gmx_bool bDoNS, int force_flags, + gmx_localtop_t *top, + gmx_constr_t constr, + gmx_enerdata_t *enerd, t_fcdata *fcd, + t_state *state, rvec f[], + tensor force_vir, + t_mdatoms *md, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + t_graph *graph, + gmx_groups_t *groups, + struct gmx_shellfc *shfc, + t_forcerec *fr, + gmx_bool bBornRadii, + double t, rvec mu_tot, + gmx_bool *bConverged, + gmx_vsite_t *vsite, + FILE *fp_field) +{ + int nshell; + t_shell *shell; + t_idef *idef; + rvec *pos[2], *force[2], *acc_dir = NULL, *x_old = NULL; + real Epot[2], df[2]; + rvec dx; + real sf_dir, invdt; + real ftol, xiH, xiS, dum = 0; + char sbuf[22]; + gmx_bool bCont, bInit; + int nat, dd_ac0, dd_ac1 = 0, i; + int start = 0, homenr = md->homenr, end = start+homenr, cg0, cg1; + int nflexcon, g, number_steps, d, Min = 0, count = 0; +#define Try (1-Min) /* At start Try = 1 */ + + bCont = (mdstep == inputrec->init_step) && inputrec->bContinuation; + bInit = (mdstep == inputrec->init_step) || shfc->bRequireInit; + ftol = inputrec->em_tol; + number_steps = inputrec->niter; + nshell = shfc->nshell; + shell = shfc->shell; + nflexcon = shfc->nflexcon; + + idef = &top->idef; + + if (DOMAINDECOMP(cr)) + { + nat = dd_natoms_vsite(cr->dd); + if (nflexcon > 0) + { + dd_get_constraint_range(cr->dd, &dd_ac0, &dd_ac1); + nat = max(nat, dd_ac1); + } + } + else + { + nat = state->natoms; + } + + if (nat > shfc->x_nalloc) + { + /* Allocate local arrays */ + shfc->x_nalloc = over_alloc_dd(nat); + for (i = 0; (i < 2); i++) + { + srenew(shfc->x[i], shfc->x_nalloc); + srenew(shfc->f[i], shfc->x_nalloc); + } + } + for (i = 0; (i < 2); i++) + { + pos[i] = shfc->x[i]; + force[i] = shfc->f[i]; + } + + /* When we had particle decomposition, this code only worked with + * PD when all particles involved with each shell were in the same + * charge group. Not sure if this is still relevant. */ + if (bDoNS && inputrec->ePBC != epbcNONE && !DOMAINDECOMP(cr)) + { + /* This is the only time where the coordinates are used + * before do_force is called, which normally puts all + * charge groups in the box. + */ + cg0 = 0; + cg1 = top->cgs.nr; + put_charge_groups_in_box(fplog, cg0, cg1, fr->ePBC, state->box, + &(top->cgs), state->x, fr->cg_cm); + if (graph) + { + mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); + } + } + + /* After this all coordinate arrays will contain whole molecules */ + if (graph) + { + shift_self(graph, state->box, state->x); + } + + if (nflexcon) + { + if (nat > shfc->flex_nalloc) + { + shfc->flex_nalloc = over_alloc_dd(nat); + srenew(shfc->acc_dir, shfc->flex_nalloc); + srenew(shfc->x_old, shfc->flex_nalloc); + } + acc_dir = shfc->acc_dir; + x_old = shfc->x_old; + for (i = 0; i < homenr; i++) + { + for (d = 0; d < DIM; d++) + { + shfc->x_old[i][d] = + state->x[start+i][d] - state->v[start+i][d]*inputrec->delta_t; + } + } + } + + /* Do a prediction of the shell positions */ + if (shfc->bPredict && !bCont) + { + predict_shells(fplog, state->x, state->v, inputrec->delta_t, nshell, shell, + md->massT, NULL, bInit); + } + + /* do_force expected the charge groups to be in the box */ + if (graph) + { + unshift_self(graph, state->box, state->x); + } + + /* Calculate the forces first time around */ + if (gmx_debug_at) + { + pr_rvecs(debug, 0, "x b4 do_force", state->x + start, homenr); + } + do_force(fplog, cr, inputrec, mdstep, nrnb, wcycle, top, groups, + state->box, state->x, &state->hist, + force[Min], force_vir, md, enerd, fcd, + state->lambda, graph, + fr, vsite, mu_tot, t, fp_field, NULL, bBornRadii, + (bDoNS ? GMX_FORCE_NS : 0) | force_flags); + + sf_dir = 0; + if (nflexcon) + { + init_adir(fplog, shfc, + constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end, + shfc->x_old-start, state->x, state->x, force[Min], + shfc->acc_dir-start, + fr->bMolPBC, state->box, state->lambda, &dum, nrnb); + + for (i = start; i < end; i++) + { + sf_dir += md->massT[i]*norm2(shfc->acc_dir[i-start]); + } + } + + Epot[Min] = enerd->term[F_EPOT]; + + df[Min] = rms_force(cr, shfc->f[Min], nshell, shell, nflexcon, &sf_dir, &Epot[Min]); + df[Try] = 0; + if (debug) + { + fprintf(debug, "df = %g %g\n", df[Min], df[Try]); + } + + if (gmx_debug_at) + { + pr_rvecs(debug, 0, "force0", force[Min], md->nr); + } + + if (nshell+nflexcon > 0) + { + /* Copy x to pos[Min] & pos[Try]: during minimization only the + * shell positions are updated, therefore the other particles must + * be set here. + */ + memcpy(pos[Min], state->x, nat*sizeof(state->x[0])); + memcpy(pos[Try], state->x, nat*sizeof(state->x[0])); + } + + if (bVerbose && MASTER(cr)) + { + print_epot(stdout, mdstep, 0, Epot[Min], df[Min], nflexcon, sf_dir); + } + + if (debug) + { + fprintf(debug, "%17s: %14.10e\n", + interaction_function[F_EKIN].longname, enerd->term[F_EKIN]); + fprintf(debug, "%17s: %14.10e\n", + interaction_function[F_EPOT].longname, enerd->term[F_EPOT]); + fprintf(debug, "%17s: %14.10e\n", + interaction_function[F_ETOT].longname, enerd->term[F_ETOT]); + fprintf(debug, "SHELLSTEP %s\n", gmx_step_str(mdstep, sbuf)); + } + + /* First check whether we should do shells, or whether the force is + * low enough even without minimization. + */ + *bConverged = (df[Min] < ftol); + + for (count = 1; (!(*bConverged) && (count < number_steps)); count++) + { + if (vsite) + { + construct_vsites(vsite, pos[Min], inputrec->delta_t, state->v, + idef->iparams, idef->il, + fr->ePBC, fr->bMolPBC, cr, state->box); + } + + if (nflexcon) + { + init_adir(fplog, shfc, + constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end, + x_old-start, state->x, pos[Min], force[Min], acc_dir-start, + fr->bMolPBC, state->box, state->lambda, &dum, nrnb); + + directional_sd(pos[Min], pos[Try], acc_dir-start, start, end, + fr->fc_stepsize); + } + + /* New positions, Steepest descent */ + shell_pos_sd(pos[Min], pos[Try], force[Min], nshell, shell, count); + + /* do_force expected the charge groups to be in the box */ + if (graph) + { + unshift_self(graph, state->box, pos[Try]); + } + + if (gmx_debug_at) + { + pr_rvecs(debug, 0, "RELAX: pos[Min] ", pos[Min] + start, homenr); + pr_rvecs(debug, 0, "RELAX: pos[Try] ", pos[Try] + start, homenr); + } + /* Try the new positions */ + do_force(fplog, cr, inputrec, 1, nrnb, wcycle, + top, groups, state->box, pos[Try], &state->hist, + force[Try], force_vir, + md, enerd, fcd, state->lambda, graph, + fr, vsite, mu_tot, t, fp_field, NULL, bBornRadii, + force_flags); + + if (gmx_debug_at) + { + pr_rvecs(debug, 0, "RELAX: force[Min]", force[Min] + start, homenr); + pr_rvecs(debug, 0, "RELAX: force[Try]", force[Try] + start, homenr); + } + sf_dir = 0; + if (nflexcon) + { + init_adir(fplog, shfc, + constr, idef, inputrec, cr, dd_ac1, mdstep, md, start, end, + x_old-start, state->x, pos[Try], force[Try], acc_dir-start, + fr->bMolPBC, state->box, state->lambda, &dum, nrnb); + + for (i = start; i < end; i++) + { + sf_dir += md->massT[i]*norm2(acc_dir[i-start]); + } + } + + Epot[Try] = enerd->term[F_EPOT]; + + df[Try] = rms_force(cr, force[Try], nshell, shell, nflexcon, &sf_dir, &Epot[Try]); + + if (debug) + { + fprintf(debug, "df = %g %g\n", df[Min], df[Try]); + } + + if (debug) + { + if (gmx_debug_at) + { + pr_rvecs(debug, 0, "F na do_force", force[Try] + start, homenr); + } + if (gmx_debug_at) + { + fprintf(debug, "SHELL ITER %d\n", count); + dump_shells(debug, pos[Try], force[Try], ftol, nshell, shell); + } + } + + if (bVerbose && MASTER(cr)) + { + print_epot(stdout, mdstep, count, Epot[Try], df[Try], nflexcon, sf_dir); + } + + *bConverged = (df[Try] < ftol); + + if ((df[Try] < df[Min])) + { + if (debug) + { + fprintf(debug, "Swapping Min and Try\n"); + } + if (nflexcon) + { + /* Correct the velocities for the flexible constraints */ + invdt = 1/inputrec->delta_t; + for (i = start; i < end; i++) + { + for (d = 0; d < DIM; d++) + { + state->v[i][d] += (pos[Try][i][d] - pos[Min][i][d])*invdt; + } + } + } + Min = Try; + } + else + { + decrease_step_size(nshell, shell); + } + } + if (MASTER(cr) && !(*bConverged)) + { + /* Note that the energies and virial are incorrect when not converged */ + if (fplog) + { + fprintf(fplog, + "step %s: EM did not converge in %d iterations, RMS force %.3f\n", + gmx_step_str(mdstep, sbuf), number_steps, df[Min]); + } + fprintf(stderr, + "step %s: EM did not converge in %d iterations, RMS force %.3f\n", + gmx_step_str(mdstep, sbuf), number_steps, df[Min]); + } + + /* Copy back the coordinates and the forces */ + memcpy(state->x, pos[Min], nat*sizeof(state->x[0])); + memcpy(f, force[Min], nat*sizeof(f[0])); + + return count; +} diff --cc src/programs/mdrun/md.c index 23b44867fa,0000000000..04a4f7bb63 mode 100644,000000..100644 --- a/src/programs/mdrun/md.c +++ b/src/programs/mdrun/md.c @@@ -1,1982 -1,0 +1,1982 @@@ +/* + * This file is part of the GROMACS molecular simulation package. + * + * Copyright (c) 1991-2000, University of Groningen, The Netherlands. + * Copyright (c) 2001-2004, The GROMACS development team. + * Copyright (c) 2011,2012,2013,2014, by the GROMACS development team, led by + * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, + * and including many others, as listed in the AUTHORS file in the + * top-level source directory and at http://www.gromacs.org. + * + * GROMACS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the License, or (at your option) any later version. + * + * GROMACS is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GROMACS; if not, see + * http://www.gnu.org/licenses, or write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * If you want to redistribute modifications to GROMACS, please + * consider that scientific software is very special. Version + * control is crucial - bugs must be traceable. We will be happy to + * consider code for inclusion in the official distribution, but + * derived work must not be called official GROMACS. Details are found + * in the README & COPYING files - if they are missing, get the + * official version at http://www.gromacs.org. + * + * To help us fund GROMACS development, we humbly ask that you cite + * the research papers on the package. Check out http://www.gromacs.org. + */ +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "typedefs.h" +#include "smalloc.h" +#include "sysstuff.h" +#include "vec.h" +#include "vcm.h" +#include "mdebin.h" +#include "nrnb.h" +#include "calcmu.h" +#include "index.h" +#include "vsite.h" +#include "update.h" +#include "ns.h" +#include "mdrun.h" +#include "md_support.h" +#include "md_logging.h" +#include "network.h" +#include "xvgr.h" +#include "physics.h" +#include "names.h" +#include "force.h" +#include "disre.h" +#include "orires.h" +#include "pme.h" +#include "mdatoms.h" +#include "repl_ex.h" +#include "qmmm.h" +#include "domdec.h" +#include "domdec_network.h" +#include "gromacs/gmxlib/topsort.h" +#include "coulomb.h" +#include "constr.h" +#include "shellfc.h" +#include "compute_io.h" +#include "checkpoint.h" +#include "mtop_util.h" +#include "sighandler.h" +#include "txtdump.h" +#include "string2.h" +#include "pme_loadbal.h" +#include "bondf.h" +#include "membed.h" +#include "types/nlistheuristics.h" +#include "types/iteratedconstraints.h" +#include "nbnxn_cuda_data_mgmt.h" + +#include "gromacs/utility/gmxmpi.h" +#include "gromacs/fileio/confio.h" +#include "gromacs/fileio/trajectory_writing.h" +#include "gromacs/fileio/trnio.h" +#include "gromacs/fileio/trxio.h" +#include "gromacs/fileio/xtcio.h" +#include "gromacs/timing/wallcycle.h" +#include "gromacs/timing/walltime_accounting.h" +#include "gromacs/pulling/pull.h" +#include "gromacs/swap/swapcoords.h" + +#ifdef GMX_FAHCORE +#include "corewrap.h" +#endif + +static void reset_all_counters(FILE *fplog, t_commrec *cr, + gmx_int64_t step, + gmx_int64_t *step_rel, t_inputrec *ir, + gmx_wallcycle_t wcycle, t_nrnb *nrnb, + gmx_walltime_accounting_t walltime_accounting, + nbnxn_cuda_ptr_t cu_nbv) +{ + char sbuf[STEPSTRSIZE]; + + /* Reset all the counters related to performance over the run */ + md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n", + gmx_step_str(step, sbuf)); + + if (cu_nbv) + { + nbnxn_cuda_reset_timings(cu_nbv); + } + + wallcycle_stop(wcycle, ewcRUN); + wallcycle_reset_all(wcycle); + if (DOMAINDECOMP(cr)) + { + reset_dd_statistics_counters(cr->dd); + } + init_nrnb(nrnb); + ir->init_step += *step_rel; + ir->nsteps -= *step_rel; + *step_rel = 0; + wallcycle_start(wcycle, ewcRUN); + walltime_accounting_start(walltime_accounting); + print_date_and_time(fplog, cr->nodeid, "Restarted time", walltime_accounting); +} + +double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[], + const output_env_t oenv, gmx_bool bVerbose, gmx_bool bCompact, + int nstglobalcomm, + gmx_vsite_t *vsite, gmx_constr_t constr, + int stepout, t_inputrec *ir, + gmx_mtop_t *top_global, + t_fcdata *fcd, + t_state *state_global, + t_mdatoms *mdatoms, + t_nrnb *nrnb, gmx_wallcycle_t wcycle, + gmx_edsam_t ed, t_forcerec *fr, + int repl_ex_nst, int repl_ex_nex, int repl_ex_seed, gmx_membed_t membed, + real cpt_period, real max_hours, + const char gmx_unused *deviceOptions, + unsigned long Flags, + gmx_walltime_accounting_t walltime_accounting) +{ + gmx_mdoutf_t outf = NULL; + gmx_int64_t step, step_rel; + double elapsed_time; + double t, t0, lam0[efptNR]; + gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEner; + gmx_bool bNS, bNStList, bSimAnn, bStopCM, bRerunMD, bNotLastFrame = FALSE, + bFirstStep, bStateFromCP, bStateFromTPX, bInitStep, bLastStep, + bBornRadii, bStartingFromCpt; + gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE; + gmx_bool do_ene, do_log, do_verbose, bRerunWarnNoV = TRUE, + bForceUpdate = FALSE, bCPT; + gmx_bool bMasterState; + int force_flags, cglo_flags; + tensor force_vir, shake_vir, total_vir, tmp_vir, pres; + int i, m; + t_trxstatus *status; + rvec mu_tot; + t_vcm *vcm; + t_state *bufstate = NULL; + matrix *scale_tot, pcoupl_mu, M, ebox; + gmx_nlheur_t nlh; + t_trxframe rerun_fr; + gmx_repl_ex_t repl_ex = NULL; + int nchkpt = 1; + gmx_localtop_t *top; + t_mdebin *mdebin = NULL; + t_state *state = NULL; + rvec *f_global = NULL; + gmx_enerdata_t *enerd; + rvec *f = NULL; + gmx_global_stat_t gstat; + gmx_update_t upd = NULL; + t_graph *graph = NULL; + globsig_t gs; + gmx_rng_t mcrng = NULL; + gmx_groups_t *groups; + gmx_ekindata_t *ekind, *ekind_save; + gmx_shellfc_t shellfc; + int count, nconverged = 0; + real timestep = 0; + double tcount = 0; + gmx_bool bConverged = TRUE, bOK, bSumEkinhOld, bExchanged, bNeedRepartition; + gmx_bool bAppend; + gmx_bool bResetCountersHalfMaxH = FALSE; + gmx_bool bVV, bIterativeCase, bFirstIterate, bTemp, bPres, bTrotter; + gmx_bool bUpdateDoLR; + real dvdl_constr; + rvec *cbuf = NULL; + matrix lastbox; + real veta_save, scalevir, tracevir; + real vetanew = 0; + int lamnew = 0; + /* for FEP */ + int nstfep; + double cycles; + real saved_conserved_quantity = 0; + real last_ekin = 0; + int iter_i; + t_extmass MassQ; + int **trotter_seq; + char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE]; + int handled_stop_condition = gmx_stop_cond_none; /* compare to get_stop_condition*/ + gmx_iterate_t iterate; + gmx_int64_t multisim_nsteps = -1; /* number of steps to do before first multisim + simulation stops. If equal to zero, don't + communicate any more between multisims.*/ + /* PME load balancing data for GPU kernels */ + pme_load_balancing_t pme_loadbal = NULL; + double cycles_pmes; + gmx_bool bPMETuneTry = FALSE, bPMETuneRunning = FALSE; + +#ifdef GMX_FAHCORE + /* Temporary addition for FAHCORE checkpointing */ + int chkpt_ret; +#endif + + /* Check for special mdrun options */ + bRerunMD = (Flags & MD_RERUN); + bAppend = (Flags & MD_APPENDFILES); + if (Flags & MD_RESETCOUNTERSHALFWAY) + { + if (ir->nsteps > 0) + { + /* Signal to reset the counters half the simulation steps. */ + wcycle_set_reset_counters(wcycle, ir->nsteps/2); + } + /* Signal to reset the counters halfway the simulation time. */ + bResetCountersHalfMaxH = (max_hours > 0); + } + + /* md-vv uses averaged full step velocities for T-control + md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control) + md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */ + bVV = EI_VV(ir->eI); + if (bVV) /* to store the initial velocities while computing virial */ + { + snew(cbuf, top_global->natoms); + } + /* all the iteratative cases - only if there are constraints */ + bIterativeCase = ((IR_NPH_TROTTER(ir) || IR_NPT_TROTTER(ir)) && (constr) && (!bRerunMD)); + gmx_iterate_init(&iterate, FALSE); /* The default value of iterate->bIterationActive is set to + false in this step. The correct value, true or false, + is set at each step, as it depends on the frequency of temperature + and pressure control.*/ + bTrotter = (bVV && (IR_NPT_TROTTER(ir) || IR_NPH_TROTTER(ir) || IR_NVT_TROTTER(ir))); + + if (bRerunMD) + { + /* Since we don't know if the frames read are related in any way, + * rebuild the neighborlist at every step. + */ + ir->nstlist = 1; + ir->nstcalcenergy = 1; + nstglobalcomm = 1; + } + + check_ir_old_tpx_versions(cr, fplog, ir, top_global); + + nstglobalcomm = check_nstglobalcomm(fplog, cr, nstglobalcomm, ir); + bGStatEveryStep = (nstglobalcomm == 1); + + if (!bGStatEveryStep && ir->nstlist == -1 && fplog != NULL) + { + fprintf(fplog, + "To reduce the energy communication with nstlist = -1\n" + "the neighbor list validity should not be checked at every step,\n" + "this means that exact integration is not guaranteed.\n" + "The neighbor list validity is checked after:\n" + " - 2*std.dev.(n.list life time) steps.\n" + "In most cases this will result in exact integration.\n" + "This reduces the energy communication by a factor of 2 to 3.\n" + "If you want less energy communication, set nstlist > 3.\n\n"); + } + + if (bRerunMD) + { + ir->nstxout_compressed = 0; + } + groups = &top_global->groups; + + /* Initial values */ + init_md(fplog, cr, ir, oenv, &t, &t0, state_global->lambda, + &(state_global->fep_state), lam0, + nrnb, top_global, &upd, + nfile, fnm, &outf, &mdebin, + force_vir, shake_vir, mu_tot, &bSimAnn, &vcm, Flags); + + clear_mat(total_vir); + clear_mat(pres); + /* Energy terms and groups */ + snew(enerd, 1); + init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda, + enerd); + if (DOMAINDECOMP(cr)) + { + f = NULL; + } + else + { + snew(f, top_global->natoms); + } + + /* Kinetic energy data */ + snew(ekind, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind); + /* needed for iteration of constraints */ + snew(ekind_save, 1); + init_ekindata(fplog, top_global, &(ir->opts), ekind_save); + /* Copy the cos acceleration to the groups struct */ + ekind->cosacc.cos_accel = ir->cos_accel; + + gstat = global_stat_init(ir); + debug_gmx(); + + /* Check for polarizable models and flexible constraints */ - shellfc = init_shell_flexcon(fplog, ++ shellfc = init_shell_flexcon(fplog, fr->cutoff_scheme == ecutsVERLET, + top_global, n_flexible_constraints(constr), + (ir->bContinuation || + (DOMAINDECOMP(cr) && !MASTER(cr))) ? + NULL : state_global->x); + + if (DEFORM(*ir)) + { + tMPI_Thread_mutex_lock(&deform_init_box_mutex); + set_deform_reference_box(upd, + deform_init_init_step_tpx, + deform_init_box_tpx); + tMPI_Thread_mutex_unlock(&deform_init_box_mutex); + } + + { + double io = compute_io(ir, top_global->natoms, groups, mdebin->ebin->nener, 1); + if ((io > 2000) && MASTER(cr)) + { + fprintf(stderr, + "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", + io); + } + } + + if (DOMAINDECOMP(cr)) + { + top = dd_init_local_top(top_global); + + snew(state, 1); + dd_init_local_state(cr->dd, state_global, state); + + if (DDMASTER(cr->dd) && ir->nstfout) + { + snew(f_global, state_global->natoms); + } + } + else + { + top = gmx_mtop_generate_local_top(top_global, ir); + + forcerec_set_excl_load(fr, top); + + state = serial_init_local_state(state_global); + f_global = f; + + atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms); + + if (vsite) + { + set_vsite_top(vsite, top, mdatoms, cr); + } + + if (ir->ePBC != epbcNONE && !fr->bMolPBC) + { + graph = mk_graph(fplog, &(top->idef), 0, top_global->natoms, FALSE, FALSE); + } + + if (shellfc) + { + make_local_shells(cr, mdatoms, shellfc); + } + + setup_bonded_threading(fr, &top->idef); + } + + if (DOMAINDECOMP(cr)) + { + /* Distribute the charge groups over the nodes from the master node */ + dd_partition_system(fplog, ir->init_step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, shellfc, constr, + nrnb, wcycle, FALSE); + + } + + update_mdatoms(mdatoms, state->lambda[efptMASS]); + + if (opt2bSet("-cpi", nfile, fnm)) + { + bStateFromCP = gmx_fexist_master(opt2fn_master("-cpi", nfile, fnm, cr), cr); + } + else + { + bStateFromCP = FALSE; + } + + if (ir->bExpanded) + { + init_expanded_ensemble(bStateFromCP, ir, &mcrng, &state->dfhist); + } + + if (MASTER(cr)) + { + if (bStateFromCP) + { + /* Update mdebin with energy history if appending to output files */ + if (Flags & MD_APPENDFILES) + { + restore_energyhistory_from_state(mdebin, &state_global->enerhist); + } + else + { + /* We might have read an energy history from checkpoint, + * free the allocated memory and reset the counts. + */ + done_energyhistory(&state_global->enerhist); + init_energyhistory(&state_global->enerhist); + } + } + /* Set the initial energy history in state by updating once */ + update_energyhistory(&state_global->enerhist, mdebin); + } + + if ((state->flags & (1<flags & (1< 0) + { + /* We need to be sure replica exchange can only occur + * when the energies are current */ + check_nst_param(fplog, cr, "nstcalcenergy", ir->nstcalcenergy, + "repl_ex_nst", &repl_ex_nst); + /* This check needs to happen before inter-simulation + * signals are initialized, too */ + } + if (repl_ex_nst > 0 && MASTER(cr)) + { + repl_ex = init_replica_exchange(fplog, cr->ms, state_global, ir, + repl_ex_nst, repl_ex_nex, repl_ex_seed); + } + + /* PME tuning is only supported with GPUs or PME nodes and not with rerun or LJ-PME. + * With perturbed charges with soft-core we should not change the cut-off. + */ + if ((Flags & MD_TUNEPME) && + EEL_PME(fr->eeltype) && + ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) && + !(ir->efep != efepNO && mdatoms->nChargePerturbed > 0 && ir->fepvals->bScCoul) && + !bRerunMD && !EVDW_PME(fr->vdwtype)) + { + pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata); + cycles_pmes = 0; + if (cr->duty & DUTY_PME) + { + /* Start tuning right away, as we can't measure the load */ + bPMETuneRunning = TRUE; + } + else + { + /* Separate PME nodes, we can measure the PP/PME load balance */ + bPMETuneTry = TRUE; + } + } + + if (!ir->bContinuation && !bRerunMD) + { + if (mdatoms->cFREEZE && (state->flags & (1<homenr; i++) + { + for (m = 0; m < DIM; m++) + { + if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m]) + { + state->v[i][m] = 0; + } + } + } + } + + if (constr) + { + /* Constrain the initial coordinates and velocities */ + do_constrain_first(fplog, constr, ir, mdatoms, state, + cr, nrnb, fr, top); + } + if (vsite) + { + /* Construct the virtual sites for the initial configuration */ + construct_vsites(vsite, state->x, ir->delta_t, NULL, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + } + } + + debug_gmx(); + + /* set free energy calculation frequency as the minimum + greatest common denominator of nstdhdl, nstexpanded, and repl_ex_nst*/ + nstfep = ir->fepvals->nstdhdl; + if (ir->bExpanded) + { + nstfep = gmx_greatest_common_divisor(ir->fepvals->nstdhdl, nstfep); + } + if (repl_ex_nst > 0) + { + nstfep = gmx_greatest_common_divisor(repl_ex_nst, nstfep); + } + + /* I'm assuming we need global communication the first time! MRS */ + cglo_flags = (CGLO_TEMPERATURE | CGLO_GSTAT + | ((ir->comm_mode != ecmNO) ? CGLO_STOPCM : 0) + | (bVV ? CGLO_PRESSURE : 0) + | (bVV ? CGLO_CONSTRAINT : 0) + | (bRerunMD ? CGLO_RERUNMD : 0) + | ((Flags & MD_READ_EKIN) ? CGLO_READEKIN : 0)); + + bSumEkinhOld = FALSE; + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &bSumEkinhOld, cglo_flags); + if (ir->eI == eiVVAK) + { + /* a second call to get the half step temperature initialized as well */ + /* we do the same call as above, but turn the pressure off -- internally to + compute_globals, this is recognized as a velocity verlet half-step + kinetic energy calculation. This minimized excess variables, but + perhaps loses some logic?*/ + + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + NULL, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &bSumEkinhOld, + cglo_flags &~(CGLO_STOPCM | CGLO_PRESSURE)); + } + + /* Calculate the initial half step temperature, and save the ekinh_old */ + if (!(Flags & MD_STARTFROMCPT)) + { + for (i = 0; (i < ir->opts.ngtc); i++) + { + copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old); + } + } + if (ir->eI != eiVV) + { + enerd->term[F_TEMP] *= 2; /* result of averages being done over previous and current step, + and there is no previous step */ + } + + /* if using an iterative algorithm, we need to create a working directory for the state. */ + if (bIterativeCase) + { + bufstate = init_bufstate(state); + } + + /* need to make an initiation call to get the Trotter variables set, as well as other constants for non-trotter + temperature control */ + trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter); + + if (MASTER(cr)) + { + if (constr && !ir->bContinuation && ir->eConstrAlg == econtLINCS) + { + fprintf(fplog, + "RMS relative constraint deviation after constraining: %.2e\n", + constr_rmsd(constr, FALSE)); + } + if (EI_STATE_VELOCITY(ir->eI)) + { + fprintf(fplog, "Initial temperature: %g K\n", enerd->term[F_TEMP]); + } + if (bRerunMD) + { + fprintf(stderr, "starting md rerun '%s', reading coordinates from" + " input trajectory '%s'\n\n", + *(top_global->name), opt2fn("-rerun", nfile, fnm)); + if (bVerbose) + { + fprintf(stderr, "Calculated time to finish depends on nsteps from " + "run input file,\nwhich may not correspond to the time " + "needed to process input trajectory.\n\n"); + } + } + else + { + char tbuf[20]; + fprintf(stderr, "starting mdrun '%s'\n", + *(top_global->name)); + if (ir->nsteps >= 0) + { + sprintf(tbuf, "%8.1f", (ir->init_step+ir->nsteps)*ir->delta_t); + } + else + { + sprintf(tbuf, "%s", "infinite"); + } + if (ir->init_step > 0) + { + fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n", + gmx_step_str(ir->init_step+ir->nsteps, sbuf), tbuf, + gmx_step_str(ir->init_step, sbuf2), + ir->init_step*ir->delta_t); + } + else + { + fprintf(stderr, "%s steps, %s ps.\n", + gmx_step_str(ir->nsteps, sbuf), tbuf); + } + } + fprintf(fplog, "\n"); + } + + walltime_accounting_start(walltime_accounting); + wallcycle_start(wcycle, ewcRUN); + print_start(fplog, cr, walltime_accounting, "mdrun"); + + /* safest point to do file checkpointing is here. More general point would be immediately before integrator call */ +#ifdef GMX_FAHCORE + chkpt_ret = fcCheckPointParallel( cr->nodeid, + NULL, 0); + if (chkpt_ret == 0) + { + gmx_fatal( 3, __FILE__, __LINE__, "Checkpoint error on step %d\n", 0 ); + } +#endif + + debug_gmx(); + /*********************************************************** + * + * Loop over MD steps + * + ************************************************************/ + + /* if rerunMD then read coordinates and velocities from input trajectory */ + if (bRerunMD) + { + if (getenv("GMX_FORCE_UPDATE")) + { + bForceUpdate = TRUE; + } + + rerun_fr.natoms = 0; + if (MASTER(cr)) + { + bNotLastFrame = read_first_frame(oenv, &status, + opt2fn("-rerun", nfile, fnm), + &rerun_fr, TRX_NEED_X | TRX_READ_V); + if (rerun_fr.natoms != top_global->natoms) + { + gmx_fatal(FARGS, + "Number of atoms in trajectory (%d) does not match the " + "run input file (%d)\n", + rerun_fr.natoms, top_global->natoms); + } + if (ir->ePBC != epbcNONE) + { + if (!rerun_fr.bBox) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f does not contain a box, while pbc is used", rerun_fr.step, rerun_fr.time); + } + if (max_cutoff2(ir->ePBC, rerun_fr.box) < sqr(fr->rlistlong)) + { + gmx_fatal(FARGS, "Rerun trajectory frame step %d time %f has too small box dimensions", rerun_fr.step, rerun_fr.time); + } + } + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); + } + + if (ir->ePBC != epbcNONE) + { + /* Set the shift vectors. + * Necessary here when have a static box different from the tpr box. + */ + calc_shifts(rerun_fr.box, fr->shift_vec); + } + } + + /* loop over MD steps or if rerunMD to end of input trajectory */ + bFirstStep = TRUE; + /* Skip the first Nose-Hoover integration when we get the state from tpx */ + bStateFromTPX = !bStateFromCP; + bInitStep = bFirstStep && (bStateFromTPX || bVV); + bStartingFromCpt = (Flags & MD_STARTFROMCPT) && bInitStep; + bLastStep = FALSE; + bSumEkinhOld = FALSE; + bExchanged = FALSE; + bNeedRepartition = FALSE; + + init_global_signals(&gs, cr, ir, repl_ex_nst); + + step = ir->init_step; + step_rel = 0; + + if (ir->nstlist == -1) + { + init_nlistheuristics(&nlh, bGStatEveryStep, step); + } + + if (MULTISIM(cr) && (repl_ex_nst <= 0 )) + { + /* check how many steps are left in other sims */ + multisim_nsteps = get_multisim_nsteps(cr, ir->nsteps); + } + + + /* and stop now if we should */ + bLastStep = (bRerunMD || (ir->nsteps >= 0 && step_rel > ir->nsteps) || + ((multisim_nsteps >= 0) && (step_rel >= multisim_nsteps ))); + while (!bLastStep || (bRerunMD && bNotLastFrame)) + { + + wallcycle_start(wcycle, ewcSTEP); + + if (bRerunMD) + { + if (rerun_fr.bStep) + { + step = rerun_fr.step; + step_rel = step - ir->init_step; + } + if (rerun_fr.bTime) + { + t = rerun_fr.time; + } + else + { + t = step; + } + } + else + { + bLastStep = (step_rel == ir->nsteps); + t = t0 + step*ir->delta_t; + } + + if (ir->efep != efepNO || ir->bSimTemp) + { + /* find and set the current lambdas. If rerunning, we either read in a state, or a lambda value, + requiring different logic. */ + + set_current_lambdas(step, ir->fepvals, bRerunMD, &rerun_fr, state_global, state, lam0); + bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl); + bDoFEP = (do_per_step(step, nstfep) && (ir->efep != efepNO)); + bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) + && (ir->bExpanded) && (step > 0) && (!bStartingFromCpt)); + } + + if (bSimAnn) + { + update_annealing_target_temp(&(ir->opts), t); + } + + if (bRerunMD) + { + if (!DOMAINDECOMP(cr) || MASTER(cr)) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.x[i], state_global->x[i]); + } + if (rerun_fr.bV) + { + for (i = 0; i < state_global->natoms; i++) + { + copy_rvec(rerun_fr.v[i], state_global->v[i]); + } + } + else + { + for (i = 0; i < state_global->natoms; i++) + { + clear_rvec(state_global->v[i]); + } + if (bRerunWarnNoV) + { + fprintf(stderr, "\nWARNING: Some frames do not contain velocities.\n" + " Ekin, temperature and pressure are incorrect,\n" + " the virial will be incorrect when constraints are present.\n" + "\n"); + bRerunWarnNoV = FALSE; + } + } + } + copy_mat(rerun_fr.box, state_global->box); + copy_mat(state_global->box, state->box); + + if (vsite && (Flags & MD_RERUN_VSITE)) + { + if (DOMAINDECOMP(cr)) + { + gmx_fatal(FARGS, "Vsite recalculation with -rerun is not implemented with domain decomposition, use a single rank"); + } + if (graph) + { + /* Following is necessary because the graph may get out of sync + * with the coordinates if we only have every N'th coordinate set + */ + mk_mshift(fplog, graph, fr->ePBC, state->box, state->x); + shift_self(graph, state->box, state->x); + } + construct_vsites(vsite, state->x, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + if (graph) + { + unshift_self(graph, state->box, state->x); + } + } + } + + /* Stop Center of Mass motion */ + bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm)); + + if (bRerunMD) + { + /* for rerun MD always do Neighbour Searching */ + bNS = (bFirstStep || ir->nstlist != 0); + bNStList = bNS; + } + else + { + /* Determine whether or not to do Neighbour Searching and LR */ + bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0); + + bNS = (bFirstStep || bExchanged || bNeedRepartition || bNStList || bDoFEP || + (ir->nstlist == -1 && nlh.nabnsb > 0)); + + if (bNS && ir->nstlist == -1) + { + set_nlistheuristics(&nlh, bFirstStep || bExchanged || bNeedRepartition || bDoFEP, step); + } + } + + /* check whether we should stop because another simulation has + stopped. */ + if (MULTISIM(cr)) + { + if ( (multisim_nsteps >= 0) && (step_rel >= multisim_nsteps) && + (multisim_nsteps != ir->nsteps) ) + { + if (bNS) + { + if (MASTER(cr)) + { + fprintf(stderr, + "Stopping simulation %d because another one has finished\n", + cr->ms->sim); + } + bLastStep = TRUE; + gs.sig[eglsCHKPT] = 1; + } + } + } + + /* < 0 means stop at next step, > 0 means stop at next NS step */ + if ( (gs.set[eglsSTOPCOND] < 0) || + ( (gs.set[eglsSTOPCOND] > 0) && (bNStList || ir->nstlist == 0) ) ) + { + bLastStep = TRUE; + } + + /* Determine whether or not to update the Born radii if doing GB */ + bBornRadii = bFirstStep; + if (ir->implicit_solvent && (step % ir->nstgbradii == 0)) + { + bBornRadii = TRUE; + } + + do_log = do_per_step(step, ir->nstlog) || bFirstStep || bLastStep; + do_verbose = bVerbose && + (step % stepout == 0 || bFirstStep || bLastStep); + + if (bNS && !(bFirstStep && ir->bContinuation && !bRerunMD)) + { + if (bRerunMD) + { + bMasterState = TRUE; + } + else + { + bMasterState = FALSE; + /* Correct the new box if it is too skewed */ + if (DYNAMIC_BOX(*ir)) + { + if (correct_box(fplog, step, state->box, graph)) + { + bMasterState = TRUE; + } + } + if (DOMAINDECOMP(cr) && bMasterState) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + if (DOMAINDECOMP(cr)) + { + /* Repartition the domain decomposition */ + wallcycle_start(wcycle, ewcDOMDEC); + dd_partition_system(fplog, step, cr, + bMasterState, nstglobalcomm, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, shellfc, constr, + nrnb, wcycle, + do_verbose && !bPMETuneRunning); + wallcycle_stop(wcycle, ewcDOMDEC); + /* If using an iterative integrator, reallocate space to match the decomposition */ + } + } + + if (MASTER(cr) && do_log) + { + print_ebin_header(fplog, step, t, state->lambda[efptFEP]); /* can we improve the information printed here? */ + } + + if (ir->efep != efepNO) + { + update_mdatoms(mdatoms, state->lambda[efptMASS]); + } + + if ((bRerunMD && rerun_fr.bV) || bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &bSumEkinhOld, + CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); + } + clear_mat(force_vir); + + /* We write a checkpoint at this MD step when: + * either at an NS step when we signalled through gs, + * or at the last step (but not when we do not want confout), + * but never at the first step or with rerun. + */ + bCPT = (((gs.set[eglsCHKPT] && (bNS || ir->nstlist == 0)) || + (bLastStep && (Flags & MD_CONFOUT))) && + step > ir->init_step && !bRerunMD); + if (bCPT) + { + gs.set[eglsCHKPT] = 0; + } + + /* Determine the energy and pressure: + * at nstcalcenergy steps and at energy output steps (set below). + */ + if (EI_VV(ir->eI) && (!bInitStep)) + { + /* for vv, the first half of the integration actually corresponds + to the previous step. bCalcEner is only required to be evaluated on the 'next' step, + but the virial needs to be calculated on both the current step and the 'next' step. Future + reorganization may be able to get rid of one of the bCalcVir=TRUE steps. */ + + bCalcEner = do_per_step(step-1, ir->nstcalcenergy); + bCalcVir = bCalcEner || + (ir->epc != epcNO && (do_per_step(step, ir->nstpcouple) || do_per_step(step-1, ir->nstpcouple))); + } + else + { + bCalcEner = do_per_step(step, ir->nstcalcenergy); + bCalcVir = bCalcEner || + (ir->epc != epcNO && do_per_step(step, ir->nstpcouple)); + } + + /* Do we need global communication ? */ + bGStat = (bCalcVir || bCalcEner || bStopCM || + do_per_step(step, nstglobalcomm) || (bVV && IR_NVT_TROTTER(ir) && do_per_step(step-1, nstglobalcomm)) || + (ir->nstlist == -1 && !bRerunMD && step >= nlh.step_nscheck)); + + do_ene = (do_per_step(step, ir->nstenergy) || bLastStep); + + if (do_ene || do_log) + { + bCalcVir = TRUE; + bCalcEner = TRUE; + bGStat = TRUE; + } + + /* these CGLO_ options remain the same throughout the iteration */ + cglo_flags = ((bRerunMD ? CGLO_RERUNMD : 0) | + (bGStat ? CGLO_GSTAT : 0) + ); + + force_flags = (GMX_FORCE_STATECHANGED | + ((DYNAMIC_BOX(*ir) || bRerunMD) ? GMX_FORCE_DYNAMICBOX : 0) | + GMX_FORCE_ALLFORCES | + GMX_FORCE_SEPLRF | + (bCalcVir ? GMX_FORCE_VIRIAL : 0) | + (bCalcEner ? GMX_FORCE_ENERGY : 0) | + (bDoFEP ? GMX_FORCE_DHDL : 0) + ); + + if (fr->bTwinRange) + { + if (do_per_step(step, ir->nstcalclr)) + { + force_flags |= GMX_FORCE_DO_LR; + } + } + + if (shellfc) + { + /* Now is the time to relax the shells */ + count = relax_shell_flexcon(fplog, cr, bVerbose, step, + ir, bNS, force_flags, + top, + constr, enerd, fcd, + state, f, force_vir, mdatoms, + nrnb, wcycle, graph, groups, + shellfc, fr, bBornRadii, t, mu_tot, + &bConverged, vsite, + mdoutf_get_fp_field(outf)); + tcount += count; + + if (bConverged) + { + nconverged++; + } + } + else + { + /* The coordinates (x) are shifted (to get whole molecules) + * in do_force. + * This is parallellized as well, and does communication too. + * Check comments in sim_util.c + */ + do_force(fplog, cr, ir, step, nrnb, wcycle, top, groups, + state->box, state->x, &state->hist, + f, force_vir, mdatoms, enerd, fcd, + state->lambda, graph, + fr, vsite, mu_tot, t, mdoutf_get_fp_field(outf), ed, bBornRadii, + (bNS ? GMX_FORCE_NS : 0) | force_flags); + } + + if (bVV && !bStartingFromCpt && !bRerunMD) + /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */ + { + if (ir->eI == eiVV && bInitStep) + { + /* if using velocity verlet with full time step Ekin, + * take the first half step only to compute the + * virial for the first step. From there, + * revert back to the initial coordinates + * so that the input is actually the initial step. + */ + copy_rvecn(state->v, cbuf, 0, state->natoms); /* should make this better for parallelizing? */ + } + else + { + /* this is for NHC in the Ekin(t+dt/2) version of vv */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ1); + } + + /* If we are using twin-range interactions where the long-range component + * is only evaluated every nstcalclr>1 steps, we should do a special update + * step to combine the long-range forces on these steps. + * For nstcalclr=1 this is not done, since the forces would have been added + * directly to the short-range forces already. + */ + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, + f, bUpdateDoLR, fr->f_twin, fcd, + ekind, M, upd, bInitStep, etrtVELOCITY1, + cr, nrnb, constr, &top->idef); + + if (bIterativeCase && do_per_step(step-1, ir->nstpcouple) && !bInitStep) + { + gmx_iterate_init(&iterate, TRUE); + } + /* for iterations, we save these vectors, as we will be self-consistently iterating + the calculations */ + + /*#### UPDATE EXTENDED VARIABLES IN TROTTER FORMULATION */ + + /* save the state */ + if (iterate.bIterationActive) + { + copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); + } + + bFirstIterate = TRUE; + while (bFirstIterate || iterate.bIterationActive) + { + if (iterate.bIterationActive) + { + copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); + if (bFirstIterate && bTrotter) + { + /* The first time through, we need a decent first estimate + of veta(t+dt) to compute the constraints. Do + this by computing the box volume part of the + trotter integration at this time. Nothing else + should be changed by this routine here. If + !(first time), we start with the previous value + of veta. */ + + veta_save = state->veta; + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ0); + vetanew = state->veta; + state->veta = veta_save; + } + } + + bOK = TRUE; + if (!bRerunMD || rerun_fr.bV || bForceUpdate) /* Why is rerun_fr.bV here? Unclear. */ + { + update_constraints(fplog, step, NULL, ir, ekind, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, shake_vir, + cr, nrnb, wcycle, upd, constr, + TRUE, bCalcVir, vetanew); + + if (!bOK) + { + gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); + } + + } + else if (graph) + { + /* Need to unshift here if a do_force has been + called in the previous step */ + unshift_self(graph, state->box, state->x); + } + + /* if VV, compute the pressure and constraints */ + /* For VV2, we strictly only need this if using pressure + * control, but we really would like to have accurate pressures + * printed out. + * Think about ways around this in the future? + * For now, keep this choice in comments. + */ + /*bPres = (ir->eI==eiVV || IR_NPT_TROTTER(ir)); */ + /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && IR_NPT_TROTTER(ir)));*/ + bPres = TRUE; + bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK)); + if (bCalcEner && ir->eI == eiVVAK) /*MRS: 7/9/2010 -- this still doesn't fix it?*/ + { + bSumEkinhOld = TRUE; + } + /* for vv, the first half of the integration actually corresponds to the previous step. + So we need information from the last step in the first half of the integration */ + if (bGStat || do_per_step(step-1, nstglobalcomm)) + { + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &bSumEkinhOld, + cglo_flags + | CGLO_ENERGY + | (bTemp ? CGLO_TEMPERATURE : 0) + | (bPres ? CGLO_PRESSURE : 0) + | (bPres ? CGLO_CONSTRAINT : 0) + | ((iterate.bIterationActive) ? CGLO_ITERATE : 0) + | (bFirstIterate ? CGLO_FIRSTITERATE : 0) + | CGLO_SCALEEKIN + ); + /* explanation of above: + a) We compute Ekin at the full time step + if 1) we are using the AveVel Ekin, and it's not the + initial step, or 2) if we are using AveEkin, but need the full + time step kinetic energy for the pressure (always true now, since we want accurate statistics). + b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in + EkinAveVel because it's needed for the pressure */ + } + /* temperature scaling and pressure scaling to produce the extended variables at t+dt */ + if (!bInitStep) + { + if (bTrotter) + { + m_add(force_vir, shake_vir, total_vir); /* we need the un-dispersion corrected total vir here */ + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ2); + } + else + { + if (bExchanged) + { + + /* We need the kinetic energy at minus the half step for determining + * the full step kinetic energy and possibly for T-coupling.*/ + /* This may not be quite working correctly yet . . . . */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, NULL, NULL, NULL, NULL, mu_tot, + constr, NULL, FALSE, state->box, + top_global, &bSumEkinhOld, + CGLO_RERUNMD | CGLO_GSTAT | CGLO_TEMPERATURE); + } + } + } + + if (iterate.bIterationActive && + done_iterating(cr, fplog, step, &iterate, bFirstIterate, + state->veta, &vetanew)) + { + break; + } + bFirstIterate = FALSE; + } + + if (bTrotter && !bInitStep) + { + copy_mat(shake_vir, state->svir_prev); + copy_mat(force_vir, state->fvir_prev); + if (IR_NVT_TROTTER(ir) && ir->eI == eiVV) + { + /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */ + enerd->term[F_TEMP] = sum_ekin(&(ir->opts), ekind, NULL, (ir->eI == eiVV), FALSE); + enerd->term[F_EKIN] = trace(ekind->ekin); + } + } + /* if it's the initial step, we performed this first step just to get the constraint virial */ + if (bInitStep && ir->eI == eiVV) + { + copy_rvecn(cbuf, state->v, 0, state->natoms); + } + } + + /* MRS -- now done iterating -- compute the conserved quantity */ + if (bVV) + { + saved_conserved_quantity = compute_conserved_from_auxiliary(ir, state, &MassQ); + if (ir->eI == eiVV) + { + last_ekin = enerd->term[F_EKIN]; + } + if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres)) + { + saved_conserved_quantity -= enerd->term[F_DISPCORR]; + } + /* sum up the foreign energy and dhdl terms for vv. currently done every step so that dhdl is correct in the .edr */ + if (!bRerunMD) + { + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + } + + /* ######## END FIRST UPDATE STEP ############## */ + /* ######## If doing VV, we now have v(dt) ###### */ + if (bDoExpanded) + { + /* perform extended ensemble sampling in lambda - we don't + actually move to the new state before outputting + statistics, but if performing simulated tempering, we + do update the velocities and the tau_t. */ + + lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state, &state->dfhist, step, mcrng, state->v, mdatoms); + /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */ + copy_df_history(&state_global->dfhist, &state->dfhist); + } + + /* Now we have the energies and forces corresponding to the + * coordinates at time t. We must output all of this before + * the update. + */ + do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, + ir, state, state_global, top_global, fr, upd, + outf, mdebin, ekind, f, f_global, + wcycle, mcrng, &nchkpt, + bCPT, bRerunMD, bLastStep, (Flags & MD_CONFOUT), + bSumEkinhOld); + + /* kludge -- virial is lost with restart for NPT control. Must restart */ + if (bStartingFromCpt && bVV) + { + copy_mat(state->svir_prev, shake_vir); + copy_mat(state->fvir_prev, force_vir); + } + + elapsed_time = walltime_accounting_get_current_elapsed_time(walltime_accounting); + + /* Check whether everything is still allright */ + if (((int)gmx_get_stop_condition() > handled_stop_condition) +#ifdef GMX_THREAD_MPI + && MASTER(cr) +#endif + ) + { + /* this is just make gs.sig compatible with the hack + of sending signals around by MPI_Reduce with together with + other floats */ + if (gmx_get_stop_condition() == gmx_stop_cond_next_ns) + { + gs.sig[eglsSTOPCOND] = 1; + } + if (gmx_get_stop_condition() == gmx_stop_cond_next) + { + gs.sig[eglsSTOPCOND] = -1; + } + /* < 0 means stop at next step, > 0 means stop at next NS step */ + if (fplog) + { + fprintf(fplog, + "\n\nReceived the %s signal, stopping at the next %sstep\n\n", + gmx_get_signal_name(), + gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); + fflush(fplog); + } + fprintf(stderr, + "\n\nReceived the %s signal, stopping at the next %sstep\n\n", + gmx_get_signal_name(), + gs.sig[eglsSTOPCOND] == 1 ? "NS " : ""); + fflush(stderr); + handled_stop_condition = (int)gmx_get_stop_condition(); + } + else if (MASTER(cr) && (bNS || ir->nstlist <= 0) && + (max_hours > 0 && elapsed_time > max_hours*60.0*60.0*0.99) && + gs.sig[eglsSTOPCOND] == 0 && gs.set[eglsSTOPCOND] == 0) + { + /* Signal to terminate the run */ + gs.sig[eglsSTOPCOND] = 1; + if (fplog) + { + fprintf(fplog, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + fprintf(stderr, "\nStep %s: Run time exceeded %.3f hours, will terminate the run\n", gmx_step_str(step, sbuf), max_hours*0.99); + } + + if (bResetCountersHalfMaxH && MASTER(cr) && + elapsed_time > max_hours*60.0*60.0*0.495) + { + gs.sig[eglsRESETCOUNTERS] = 1; + } + + if (ir->nstlist == -1 && !bRerunMD) + { + /* When bGStatEveryStep=FALSE, global_stat is only called + * when we check the atom displacements, not at NS steps. + * This means that also the bonded interaction count check is not + * performed immediately after NS. Therefore a few MD steps could + * be performed with missing interactions. + * But wrong energies are never written to file, + * since energies are only written after global_stat + * has been called. + */ + if (step >= nlh.step_nscheck) + { + nlh.nabnsb = natoms_beyond_ns_buffer(ir, fr, &top->cgs, + nlh.scale_tot, state->x); + } + else + { + /* This is not necessarily true, + * but step_nscheck is determined quite conservatively. + */ + nlh.nabnsb = 0; + } + } + + /* In parallel we only have to check for checkpointing in steps + * where we do global communication, + * otherwise the other nodes don't know. + */ + if (MASTER(cr) && ((bGStat || !PAR(cr)) && + cpt_period >= 0 && + (cpt_period == 0 || + elapsed_time >= nchkpt*cpt_period*60.0)) && + gs.set[eglsCHKPT] == 0) + { + gs.sig[eglsCHKPT] = 1; + } + + /* at the start of step, randomize or scale the velocities (trotter done elsewhere) */ + if (EI_VV(ir->eI)) + { + if (!bInitStep) + { + update_tcouple(step, ir, state, ekind, upd, &MassQ, mdatoms); + } + if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */ + { + gmx_bool bIfRandomize; + bIfRandomize = update_randomize_velocities(ir, step, mdatoms, state, upd, &top->idef, constr); + /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */ + if (constr && bIfRandomize) + { + update_constraints(fplog, step, NULL, ir, ekind, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, + cr, nrnb, wcycle, upd, constr, + TRUE, bCalcVir, vetanew); + } + } + } + + if (bIterativeCase && do_per_step(step, ir->nstpcouple)) + { + gmx_iterate_init(&iterate, TRUE); + /* for iterations, we save these vectors, as we will be redoing the calculations */ + copy_coupling_state(state, bufstate, ekind, ekind_save, &(ir->opts)); + } + + bFirstIterate = TRUE; + while (bFirstIterate || iterate.bIterationActive) + { + /* We now restore these vectors to redo the calculation with improved extended variables */ + if (iterate.bIterationActive) + { + copy_coupling_state(bufstate, state, ekind_save, ekind, &(ir->opts)); + } + + /* We make the decision to break or not -after- the calculation of Ekin and Pressure, + so scroll down for that logic */ + + /* ######### START SECOND UPDATE STEP ################# */ + /* Box is changed in update() when we do pressure coupling, + * but we should still use the old box for energy corrections and when + * writing it to the energy file, so it matches the trajectory files for + * the same timestep above. Make a copy in a separate array. + */ + copy_mat(state->box, lastbox); + + bOK = TRUE; + dvdl_constr = 0; + + if (!(bRerunMD && !rerun_fr.bV && !bForceUpdate)) + { + wallcycle_start(wcycle, ewcUPDATE); + /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */ + if (bTrotter) + { + if (iterate.bIterationActive) + { + if (bFirstIterate) + { + scalevir = 1; + } + else + { + /* we use a new value of scalevir to converge the iterations faster */ + scalevir = tracevir/trace(shake_vir); + } + msmul(shake_vir, scalevir, shake_vir); + m_add(force_vir, shake_vir, total_vir); + clear_mat(shake_vir); + } + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3); + /* We can only do Berendsen coupling after we have summed + * the kinetic energy or virial. Since the happens + * in global_state after update, we should only do it at + * step % nstlist = 1 with bGStatEveryStep=FALSE. + */ + } + else + { + update_tcouple(step, ir, state, ekind, upd, &MassQ, mdatoms); + update_pcouple(fplog, step, ir, state, pcoupl_mu, M, bInitStep); + } + + if (bVV) + { + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + /* velocity half-step update */ + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, + bUpdateDoLR, fr->f_twin, fcd, + ekind, M, upd, FALSE, etrtVELOCITY2, + cr, nrnb, constr, &top->idef); + } + + /* Above, initialize just copies ekinh into ekin, + * it doesn't copy position (for VV), + * and entire integrator for MD. + */ + + if (ir->eI == eiVVAK) + { + copy_rvecn(state->x, cbuf, 0, state->natoms); + } + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, + bUpdateDoLR, fr->f_twin, fcd, + ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); + wallcycle_stop(wcycle, ewcUPDATE); + + update_constraints(fplog, step, &dvdl_constr, ir, ekind, mdatoms, state, + fr->bMolPBC, graph, f, + &top->idef, shake_vir, + cr, nrnb, wcycle, upd, constr, + FALSE, bCalcVir, state->veta); + + if (ir->eI == eiVVAK) + { + /* erase F_EKIN and F_TEMP here? */ + /* just compute the kinetic energy at the half step to perform a trotter step */ + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, NULL, FALSE, lastbox, + top_global, &bSumEkinhOld, + cglo_flags | CGLO_TEMPERATURE + ); + wallcycle_start(wcycle, ewcUPDATE); + trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4); + /* now we know the scaling, we can compute the positions again again */ + copy_rvecn(cbuf, state->x, 0, state->natoms); + + bUpdateDoLR = (fr->bTwinRange && do_per_step(step, ir->nstcalclr)); + + update_coords(fplog, step, ir, mdatoms, state, fr->bMolPBC, f, + bUpdateDoLR, fr->f_twin, fcd, + ekind, M, upd, bInitStep, etrtPOSITION, cr, nrnb, constr, &top->idef); + wallcycle_stop(wcycle, ewcUPDATE); + + /* do we need an extra constraint here? just need to copy out of state->v to upd->xp? */ + /* are the small terms in the shake_vir here due + * to numerical errors, or are they important + * physically? I'm thinking they are just errors, but not completely sure. + * For now, will call without actually constraining, constr=NULL*/ + update_constraints(fplog, step, NULL, ir, ekind, mdatoms, + state, fr->bMolPBC, graph, f, + &top->idef, tmp_vir, + cr, nrnb, wcycle, upd, NULL, + FALSE, bCalcVir, + state->veta); + } + if (!bOK) + { + gmx_fatal(FARGS, "Constraint error: Shake, Lincs or Settle could not solve the constrains"); + } + + if (fr->bSepDVDL && fplog && do_log) + { + gmx_print_sepdvdl(fplog, "Constraint dV/dl", 0.0, dvdl_constr); + } + if (bVV) + { + /* this factor or 2 correction is necessary + because half of the constraint force is removed + in the vv step, so we have to double it. See + the Redmine issue #1255. It is not yet clear + if the factor of 2 is exact, or just a very + good approximation, and this will be + investigated. The next step is to see if this + can be done adding a dhdl contribution from the + rattle step, but this is somewhat more + complicated with the current code. Will be + investigated, hopefully for 4.6.3. However, + this current solution is much better than + having it completely wrong. + */ + enerd->term[F_DVDL_CONSTR] += 2*dvdl_constr; + } + else + { + enerd->term[F_DVDL_CONSTR] += dvdl_constr; + } + } + else if (graph) + { + /* Need to unshift here */ + unshift_self(graph, state->box, state->x); + } + + if (vsite != NULL) + { + wallcycle_start(wcycle, ewcVSITECONSTR); + if (graph != NULL) + { + shift_self(graph, state->box, state->x); + } + construct_vsites(vsite, state->x, ir->delta_t, state->v, + top->idef.iparams, top->idef.il, + fr->ePBC, fr->bMolPBC, cr, state->box); + + if (graph != NULL) + { + unshift_self(graph, state->box, state->x); + } + wallcycle_stop(wcycle, ewcVSITECONSTR); + } + + /* ############## IF NOT VV, Calculate globals HERE, also iterate constraints ############ */ + /* With Leap-Frog we can skip compute_globals at + * non-communication steps, but we need to calculate + * the kinetic energy one step before communication. + */ + if (bGStat || (!EI_VV(ir->eI) && do_per_step(step+1, nstglobalcomm))) + { + if (ir->nstlist == -1 && bFirstIterate) + { + gs.sig[eglsNABNSB] = nlh.nabnsb; + } + compute_globals(fplog, gstat, cr, ir, fr, ekind, state, state_global, mdatoms, nrnb, vcm, + wcycle, enerd, force_vir, shake_vir, total_vir, pres, mu_tot, + constr, + bFirstIterate ? &gs : NULL, + (step_rel % gs.nstms == 0) && + (multisim_nsteps < 0 || (step_rel < multisim_nsteps)), + lastbox, + top_global, &bSumEkinhOld, + cglo_flags + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_ENERGY : 0) + | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0) + | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0) + | (!EI_VV(ir->eI) || bRerunMD ? CGLO_PRESSURE : 0) + | (iterate.bIterationActive ? CGLO_ITERATE : 0) + | (bFirstIterate ? CGLO_FIRSTITERATE : 0) + | CGLO_CONSTRAINT + ); + if (ir->nstlist == -1 && bFirstIterate) + { + nlh.nabnsb = gs.set[eglsNABNSB]; + gs.set[eglsNABNSB] = 0; + } + } + /* bIterate is set to keep it from eliminating the old ekin kinetic energy terms */ + /* ############# END CALC EKIN AND PRESSURE ################# */ + + /* Note: this is OK, but there are some numerical precision issues with using the convergence of + the virial that should probably be addressed eventually. state->veta has better properies, + but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could + generate the new shake_vir, but test the veta value for convergence. This will take some thought. */ + + if (iterate.bIterationActive && + done_iterating(cr, fplog, step, &iterate, bFirstIterate, + trace(shake_vir), &tracevir)) + { + break; + } + bFirstIterate = FALSE; + } + + if (!bVV || bRerunMD) + { + /* sum up the foreign energy and dhdl terms for md and sd. currently done every step so that dhdl is correct in the .edr */ + sum_dhdl(enerd, state->lambda, ir->fepvals); + } + update_box(fplog, step, ir, mdatoms, state, f, + ir->nstlist == -1 ? &nlh.scale_tot : NULL, pcoupl_mu, nrnb, upd); + + /* ################# END UPDATE STEP 2 ################# */ + /* #### We now have r(t+dt) and v(t+dt/2) ############# */ + + /* The coordinates (x) were unshifted in update */ + if (!bGStat) + { + /* We will not sum ekinh_old, + * so signal that we still have to do it. + */ + bSumEkinhOld = TRUE; + } + + /* ######### BEGIN PREPARING EDR OUTPUT ########### */ + + /* use the directly determined last velocity, not actually the averaged half steps */ + if (bTrotter && ir->eI == eiVV) + { + enerd->term[F_EKIN] = last_ekin; + } + enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN]; + + if (bVV) + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity; + } + else + { + enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + compute_conserved_from_auxiliary(ir, state, &MassQ); + } + /* ######### END PREPARING EDR OUTPUT ########### */ + + /* Output stuff */ + if (MASTER(cr)) + { + gmx_bool do_dr, do_or; + + if (fplog && do_log && bDoExpanded) + { + /* only needed if doing expanded ensemble */ + PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals, ir->bSimTemp ? ir->simtempvals : NULL, + &state_global->dfhist, state->fep_state, ir->nstlog, step); + } + if (!(bStartingFromCpt && (EI_VV(ir->eI)))) + { + if (bCalcEner) + { + upd_mdebin(mdebin, bDoDHDL, TRUE, + t, mdatoms->tmass, enerd, state, + ir->fepvals, ir->expandedvals, lastbox, + shake_vir, force_vir, total_vir, pres, + ekind, mu_tot, constr); + } + else + { + upd_mdebin_step(mdebin); + } + + do_dr = do_per_step(step, ir->nstdisreout); + do_or = do_per_step(step, ir->nstorireout); + + print_ebin(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or, do_log ? fplog : NULL, + step, t, + eprNORMAL, bCompact, mdebin, fcd, groups, &(ir->opts)); + } + if (ir->ePull != epullNO) + { + pull_print_output(ir->pull, step, t); + } + + if (do_per_step(step, ir->nstlog)) + { + if (fflush(fplog) != 0) + { + gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?"); + } + } + } + if (bDoExpanded) + { + /* Have to do this part _after_ outputting the logfile and the edr file */ + /* Gets written into the state at the beginning of next loop*/ + state->fep_state = lamnew; + } + /* Print the remaining wall clock time for the run */ + if (MULTIMASTER(cr) && (do_verbose || gmx_got_usr_signal()) && !bPMETuneRunning) + { + if (shellfc) + { + fprintf(stderr, "\n"); + } + print_time(stderr, walltime_accounting, step, ir, cr); + } + + /* Ion/water position swapping. + * Not done in last step since trajectory writing happens before this call + * in the MD loop and exchanges would be lost anyway. */ + bNeedRepartition = FALSE; + if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && + do_per_step(step, ir->swap->nstswap)) + { + bNeedRepartition = do_swapcoords(cr, step, t, ir, wcycle, + bRerunMD ? rerun_fr.x : state->x, + bRerunMD ? rerun_fr.box : state->box, + top_global, MASTER(cr) && bVerbose, bRerunMD); + + if (bNeedRepartition && DOMAINDECOMP(cr)) + { + dd_collect_state(cr->dd, state, state_global); + } + } + + /* Replica exchange */ + bExchanged = FALSE; + if ((repl_ex_nst > 0) && (step > 0) && !bLastStep && + do_per_step(step, repl_ex_nst)) + { + bExchanged = replica_exchange(fplog, cr, repl_ex, + state_global, enerd, + state, step, t); + } + + if ( (bExchanged || bNeedRepartition) && DOMAINDECOMP(cr) ) + { + dd_partition_system(fplog, step, cr, TRUE, 1, + state_global, top_global, ir, + state, &f, mdatoms, top, fr, + vsite, shellfc, constr, + nrnb, wcycle, FALSE); + } + + bFirstStep = FALSE; + bInitStep = FALSE; + bStartingFromCpt = FALSE; + + /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */ + /* With all integrators, except VV, we need to retain the pressure + * at the current step for coupling at the next step. + */ + if ((state->flags & (1<nstpcouple > 0 && step % ir->nstpcouple == 0))) + { + /* Store the pressure in t_state for pressure coupling + * at the next MD step. + */ + copy_mat(pres, state->pres_prev); + } + + /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */ + + if ( (membed != NULL) && (!bLastStep) ) + { + rescale_membed(step_rel, membed, state_global->x); + } + + if (bRerunMD) + { + if (MASTER(cr)) + { + /* read next frame from input trajectory */ + bNotLastFrame = read_next_frame(oenv, status, &rerun_fr); + } + + if (PAR(cr)) + { + rerun_parallel_comm(cr, &rerun_fr, &bNotLastFrame); + } + } + + if (!bRerunMD || !rerun_fr.bStep) + { + /* increase the MD step number */ + step++; + step_rel++; + } + + cycles = wallcycle_stop(wcycle, ewcSTEP); + if (DOMAINDECOMP(cr) && wcycle) + { + dd_cycles_add(cr->dd, cycles, ddCyclStep); + } + + if (bPMETuneRunning || bPMETuneTry) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + + /* Count the total cycles over the last steps */ + cycles_pmes += cycles; + + /* We can only switch cut-off at NS steps */ + if (step % ir->nstlist == 0) + { + /* PME grid + cut-off optimization with GPUs or PME nodes */ + if (bPMETuneTry) + { + if (DDMASTER(cr->dd)) + { + /* PME node load is too high, start tuning */ + bPMETuneRunning = (dd_pme_f_ratio(cr->dd) >= 1.05); + } + dd_bcast(cr->dd, sizeof(gmx_bool), &bPMETuneRunning); + + if (bPMETuneRunning || step_rel > ir->nstlist*50) + { + bPMETuneTry = FALSE; + } + } + if (bPMETuneRunning) + { + /* init_step might not be a multiple of nstlist, + * but the first cycle is always skipped anyhow. + */ + bPMETuneRunning = + pme_load_balance(pme_loadbal, cr, + (bVerbose && MASTER(cr)) ? stderr : NULL, + fplog, + ir, state, cycles_pmes, + fr->ic, fr->nbv, &fr->pmedata, + step); + + /* Update constants in forcerec/inputrec to keep them in sync with fr->ic */ + fr->ewaldcoeff_q = fr->ic->ewaldcoeff_q; + fr->rlist = fr->ic->rlist; + fr->rlistlong = fr->ic->rlistlong; + fr->rcoulomb = fr->ic->rcoulomb; + fr->rvdw = fr->ic->rvdw; + } + cycles_pmes = 0; + } + } + + if (step_rel == wcycle_get_reset_counters(wcycle) || + gs.set[eglsRESETCOUNTERS] != 0) + { + /* Reset all the counters related to performance over the run */ + reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting, + fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL); + wcycle_set_reset_counters(wcycle, -1); + if (!(cr->duty & DUTY_PME)) + { + /* Tell our PME node to reset its counters */ + gmx_pme_send_resetcounters(cr, step); + } + /* Correct max_hours for the elapsed time */ + max_hours -= elapsed_time/(60.0*60.0); + bResetCountersHalfMaxH = FALSE; + gs.set[eglsRESETCOUNTERS] = 0; + } + + } + /* End of main MD loop */ + debug_gmx(); + + /* Stop measuring walltime */ + walltime_accounting_end(walltime_accounting); + + if (bRerunMD && MASTER(cr)) + { + close_trj(status); + } + + if (!(cr->duty & DUTY_PME)) + { + /* Tell the PME only node to finish */ + gmx_pme_send_finish(cr); + } + + if (MASTER(cr)) + { + if (ir->nstcalcenergy > 0 && !bRerunMD) + { + print_ebin(mdoutf_get_fp_ene(outf), FALSE, FALSE, FALSE, fplog, step, t, + eprAVER, FALSE, mdebin, fcd, groups, &(ir->opts)); + } + } + + done_mdoutf(outf); + debug_gmx(); + + if (ir->nstlist == -1 && nlh.nns > 0 && fplog) + { + fprintf(fplog, "Average neighborlist lifetime: %.1f steps, std.dev.: %.1f steps\n", nlh.s1/nlh.nns, sqrt(nlh.s2/nlh.nns - sqr(nlh.s1/nlh.nns))); + fprintf(fplog, "Average number of atoms that crossed the half buffer length: %.1f\n\n", nlh.ab/nlh.nns); + } + + if (pme_loadbal != NULL) + { + pme_loadbal_done(pme_loadbal, cr, fplog, + fr->nbv != NULL && fr->nbv->bUseGPU); + } + + if (shellfc && fplog) + { + fprintf(fplog, "Fraction of iterations that converged: %.2f %%\n", + (nconverged*100.0)/step_rel); + fprintf(fplog, "Average number of force evaluations per MD step: %.2f\n\n", + tcount/step_rel); + } + + if (repl_ex_nst > 0 && MASTER(cr)) + { + print_replica_exchange_statistics(fplog, repl_ex); + } + + walltime_accounting_set_nsteps_done(walltime_accounting, step_rel); + + return 0; +}