2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/fileio/matio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/eigio.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/linearalgebra/eigensolver.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/pbcutil/pbc.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/topology/topology.h"
57 #include "gromacs/utility/arrayref.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/exceptions.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
64 #include "gromacs/utility/sysinfo.h"
73 /*! \brief Throw an error if any element in index exceeds a given number.
75 * \param[in] indices to be acessed
76 * \param[in] largestOkayIndex to be accessed
77 * \param[in] indexUsagePurpose to be more explicit in the error message
79 * \throws RangeError if largestOkayIndex is larger than any element in indices
82 void throwErrorIfIndexOutOfBounds(ArrayRef<const int> indices,
83 const int largestOkayIndex,
84 const std::string& indexUsagePurpose)
86 // do nothing if index is empty
87 if (indices.ssize() == 0)
91 const int largestIndex = *std::max_element(indices.begin(), indices.end());
92 if (largestIndex < largestOkayIndex)
94 GMX_THROW(RangeError("The provided structure file only contains "
95 + std::to_string(largestOkayIndex) + " coordinates, but coordinate index "
96 + std::to_string(largestIndex) + " was requested for " + indexUsagePurpose
97 + ". Make sure to update structure files "
98 "and index files if you store only a part of your system."));
106 int gmx_covar(int argc, char* argv[])
108 const char* desc[] = {
109 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
110 "covariance matrix.",
111 "All structures are fitted to the structure in the structure file.",
112 "When this is not a run input file periodicity will not be taken into",
113 "account. When the fit and analysis groups are identical and the analysis",
114 "is non mass-weighted, the fit will also be non mass-weighted.",
116 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
117 "When the same atoms are used for the fit and the covariance analysis,",
118 "the reference structure for the fit is written first with t=-1.",
119 "The average (or reference when [TT]-ref[tt] is used) structure is",
120 "written with t=0, the eigenvectors",
121 "are written as frames with the eigenvector number and eigenvalue",
122 "as step number and timestamp, respectively.",
124 "The eigenvectors can be analyzed with [gmx-anaeig].",
126 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
127 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
129 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
131 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
132 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
135 "Note that the diagonalization of a matrix requires memory and time",
136 "that will increase at least as fast as than the square of the number",
137 "of atoms involved. It is easy to run out of memory, in which",
138 "case this tool will probably exit with a 'Segmentation fault'. You",
139 "should consider carefully whether a reduced set of atoms will meet",
140 "your needs for lower costs."
142 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
145 { "-fit", FALSE, etBOOL, { &bFit }, "Fit to a reference structure" },
150 "Use the deviation from the conformation in the structure file instead of from the "
152 { "-mwa", FALSE, etBOOL, { &bM }, "Mass-weighted covariance analysis" },
153 { "-last", FALSE, etINT, { &end }, "Last eigenvector to write away (-1 is till the last)" },
154 { "-pbc", FALSE, etBOOL, { &bPBC }, "Apply corrections for periodic boundary conditions" }
156 FILE* out = nullptr; /* initialization makes all compilers happy */
161 rvec * x, *xread, *xref, *xav, *xproj;
163 real * sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
164 real t, tstart, tend, **mat2;
165 real xj, *w_rls = nullptr;
166 real min, max, *axis;
167 int natoms, nat, nframes0, nframes, nlevels;
168 int64_t ndim, i, j, k, l;
170 const char * fitfile, *trxfile, *ndxfile;
171 const char * eigvalfile, *eigvecfile, *averfile, *logfile;
172 const char * asciifile, *xpmfile, *xpmafile;
173 char str[STRLEN], *fitname, *ananame;
176 gmx_bool bDiffMass1, bDiffMass2;
179 gmx_output_env_t* oenv;
180 gmx_rmpbc_t gpbc = nullptr;
183 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
184 { efNDX, nullptr, nullptr, ffOPTRD }, { efXVG, nullptr, "eigenval", ffWRITE },
185 { efTRN, "-v", "eigenvec", ffWRITE }, { efSTO, "-av", "average.pdb", ffWRITE },
186 { efLOG, nullptr, "covar", ffWRITE }, { efDAT, "-ascii", "covar", ffOPTWR },
187 { efXPM, "-xpm", "covar", ffOPTWR }, { efXPM, "-xpma", "covara", ffOPTWR }
189 #define NFILE asize(fnm)
191 if (!parse_common_args(
192 &argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
199 fitfile = ftp2fn(efTPS, NFILE, fnm);
200 trxfile = ftp2fn(efTRX, NFILE, fnm);
201 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
202 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
203 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
204 averfile = ftp2fn(efSTO, NFILE, fnm);
205 logfile = ftp2fn(efLOG, NFILE, fnm);
206 asciifile = opt2fn_null("-ascii", NFILE, fnm);
207 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
208 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
210 read_tps_conf(fitfile, &top, &pbcType, &xref, nullptr, box, TRUE);
215 printf("\nChoose a group for the least squares fit\n");
216 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
217 // Make sure that we never attempt to access a coordinate out of range
218 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, atoms->nr, "fitting");
221 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
228 printf("\nChoose a group for the covariance analysis\n");
229 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
230 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, atoms->nr, "analysis");
235 snew(w_rls, atoms->nr);
236 for (i = 0; (i < nfit); i++)
238 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
241 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i - 1]]);
247 for (i = 0; (i < natoms); i++)
251 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
254 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i - 1]);
263 if (bFit && bDiffMass1 && !bDiffMass2)
265 bDiffMass1 = natoms != nfit;
266 for (i = 0; (i < natoms) && !bDiffMass1; i++)
268 bDiffMass1 = index[i] != ifit[i];
274 "Note: the fit and analysis group are identical,\n"
275 " while the fit is mass weighted and the analysis is not.\n"
276 " Making the fit non mass weighted.\n\n");
277 for (i = 0; (i < nfit); i++)
279 w_rls[ifit[i]] = 1.0;
284 /* Prepare reference frame */
287 gpbc = gmx_rmpbc_init(&top.idef, pbcType, atoms->nr);
288 gmx_rmpbc(gpbc, atoms->nr, box, xref);
292 reset_x(nfit, ifit, atoms->nr, nullptr, xref, w_rls);
298 if (std::sqrt(static_cast<real>(INT64_MAX)) < static_cast<real>(ndim))
300 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
302 snew(mat, ndim * ndim);
304 fprintf(stderr, "Calculating the average structure ...\n");
306 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
307 if (nat != atoms->nr)
310 "\nWARNING: number of atoms in structure file (%d) and trajectory (%d) do not "
315 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, nat, "fitting");
316 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, nat, "analysis");
321 /* calculate x: a fitted struture of the selected atoms */
324 gmx_rmpbc(gpbc, nat, box, xread);
328 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
329 do_fit(nat, w_rls, xref, xread);
331 for (i = 0; i < natoms; i++)
333 rvec_inc(xav[i], xread[index[i]]);
335 } while (read_next_x(oenv, status, &t, xread, box));
338 inv_nframes = 1.0 / nframes0;
339 for (i = 0; i < natoms; i++)
341 for (d = 0; d < DIM; d++)
343 xav[i][d] *= inv_nframes;
344 xread[index[i]][d] = xav[i][d];
347 write_sto_conf_indexed(
348 opt2fn("-av", NFILE, fnm), "Average structure", atoms, xread, nullptr, PbcType::No, zerobox, natoms, index);
352 "Constructing covariance matrix (%dx%d) ...\n",
353 static_cast<int>(ndim),
354 static_cast<int>(ndim));
356 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
362 /* calculate x: a (fitted) structure of the selected atoms */
365 gmx_rmpbc(gpbc, nat, box, xread);
369 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
370 do_fit(nat, w_rls, xref, xread);
374 for (i = 0; i < natoms; i++)
376 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
381 for (i = 0; i < natoms; i++)
383 rvec_sub(xread[index[i]], xav[i], x[i]);
387 for (j = 0; j < natoms; j++)
389 for (dj = 0; dj < DIM; dj++)
391 k = ndim * (DIM * j + dj);
393 for (i = j; i < natoms; i++)
396 for (d = 0; d < DIM; d++)
398 mat[l + d] += x[i][d] * xj;
403 } while (read_next_x(oenv, status, &t, xread, box) && (bRef || nframes < nframes0));
405 gmx_rmpbc_done(gpbc);
407 fprintf(stderr, "Read %d frames\n", nframes);
411 /* copy the reference structure to the ouput array x */
413 for (i = 0; i < natoms; i++)
415 copy_rvec(xref[index[i]], xproj[i]);
423 /* correct the covariance matrix for the mass */
424 inv_nframes = 1.0 / nframes;
425 for (j = 0; j < natoms; j++)
427 for (dj = 0; dj < DIM; dj++)
429 for (i = j; i < natoms; i++)
431 k = ndim * (DIM * j + dj) + DIM * i;
432 for (d = 0; d < DIM; d++)
434 mat[k + d] = mat[k + d] * inv_nframes * sqrtm[i] * sqrtm[j];
440 /* symmetrize the matrix */
441 for (j = 0; j < ndim; j++)
443 for (i = j; i < ndim; i++)
445 mat[ndim * i + j] = mat[ndim * j + i];
450 for (i = 0; i < ndim; i++)
452 trace += mat[i * ndim + i];
454 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n", trace, bM ? "u " : "");
458 out = gmx_ffopen(asciifile, "w");
459 for (j = 0; j < ndim; j++)
461 for (i = 0; i < ndim; i += 3)
463 fprintf(out, "%g %g %g\n", mat[ndim * j + i], mat[ndim * j + i + 1], mat[ndim * j + i + 2]);
474 for (j = 0; j < ndim; j++)
476 mat2[j] = &(mat[ndim * j]);
477 for (i = 0; i <= j; i++)
479 if (mat2[j][i] < min)
483 if (mat2[j][j] > max)
490 for (i = 0; i < ndim; i++)
503 out = gmx_ffopen(xpmfile, "w");
508 bM ? "u nm^2" : "nm^2",
532 snew(mat2, ndim / DIM);
533 for (i = 0; i < ndim / DIM; i++)
535 snew(mat2[i], ndim / DIM);
537 for (j = 0; j < ndim / DIM; j++)
539 for (i = 0; i <= j; i++)
542 for (d = 0; d < DIM; d++)
544 mat2[j][i] += mat[ndim * (DIM * j + d) + DIM * i + d];
546 if (mat2[j][i] < min)
550 if (mat2[j][j] > max)
554 mat2[i][j] = mat2[j][i];
557 snew(axis, ndim / DIM);
558 for (i = 0; i < ndim / DIM; i++)
571 out = gmx_ffopen(xpmafile, "w");
576 bM ? "u nm^2" : "nm^2",
593 for (i = 0; i < ndim / DIM; i++)
601 /* call diagonalization routine */
603 snew(eigenvalues, ndim);
604 snew(eigenvectors, ndim * ndim);
606 std::memcpy(eigenvectors, mat, ndim * ndim * sizeof(real));
607 fprintf(stderr, "\nDiagonalizing ...\n");
609 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
612 /* now write the output */
615 for (i = 0; i < ndim; i++)
617 sum += eigenvalues[i];
619 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n", sum, bM ? "u " : "");
620 if (std::abs(trace - sum) > 0.01 * trace)
623 "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
626 /* Set 'end', the maximum eigenvector and -value index used for output */
629 if (nframes - 1 < ndim)
632 fprintf(stderr, "\nWARNING: there are fewer frames in your trajectory than there are\n");
633 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
634 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
642 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
644 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
645 out = xvgropen(eigvalfile, "Eigenvalues of the covariance matrix", "Eigenvector index", str, oenv);
646 for (i = 0; (i < end); i++)
648 fprintf(out, "%10d %g\n", static_cast<int>(i + 1), eigenvalues[ndim - 1 - i]);
654 /* misuse lambda: 0/1 mass weighted analysis no/yes */
657 WriteXref = eWXR_YES;
658 for (i = 0; i < nfit; i++)
660 copy_rvec(xref[ifit[i]], x[i]);
670 /* misuse lambda: -1 for no fit */
671 WriteXref = eWXR_NOFIT;
675 eigvecfile, natoms, mat, TRUE, 1, end, WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
677 out = gmx_ffopen(logfile, "w");
679 fprintf(out, "Covariance analysis log, written %s\n", gmx_format_current_time().c_str());
681 fprintf(out, "Program: %s\n", argv[0]);
682 gmx_getcwd(str, STRLEN);
684 fprintf(out, "Working directory: %s\n\n", str);
687 "Read %d frames from %s (time %g to %g %s)\n",
690 output_env_conv_time(oenv, tstart),
691 output_env_conv_time(oenv, tend),
692 output_env_get_time_unit(oenv).c_str());
695 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
699 fprintf(out, "Read index groups from %s\n", ndxfile);
703 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
706 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
710 fprintf(out, "No fit was used\n");
712 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
715 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
717 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim), static_cast<int>(ndim));
718 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n", trace);
719 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n", sum);
721 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
722 if (WriteXref == eWXR_YES)
724 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
726 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
727 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
731 fprintf(stderr, "Wrote the log to %s\n", logfile);