2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/fileio/matio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/eigio.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/linearalgebra/eigensolver.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/pbcutil/pbc.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/topology/topology.h"
57 #include "gromacs/utility/arrayref.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/exceptions.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
64 #include "gromacs/utility/sysinfo.h"
73 /*! \brief Throw an error if any element in index exceeds a given number.
75 * \param[in] indices to be acessed
76 * \param[in] numAtoms to be accessed
77 * \param[in] indexUsagePurpose to be more explicit in the error message
79 * \throws RangeError if any element in indices is larger than or equal numAtoms
82 void throwErrorIfIndexOutOfBounds(ArrayRef<const int> indices, const int numAtoms, const std::string& indexUsagePurpose)
84 // do nothing if index is empty
89 const int largestIndex = *std::max_element(indices.begin(), indices.end());
90 if (largestIndex >= numAtoms)
92 GMX_THROW(RangeError("The provided structure file only contains " + std::to_string(numAtoms)
93 + " coordinates, but coordinate index " + std::to_string(largestIndex + 1)
94 + " was requested for " + indexUsagePurpose
95 + ". Make sure to update structure files "
96 "and index files if you store only a part of your system."));
104 int gmx_covar(int argc, char* argv[])
106 const char* desc[] = {
107 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
108 "covariance matrix.",
109 "All structures are fitted to the structure in the structure file.",
110 "When this is not a run input file periodicity will not be taken into",
111 "account. When the fit and analysis groups are identical and the analysis",
112 "is non mass-weighted, the fit will also be non mass-weighted.",
114 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
115 "When the same atoms are used for the fit and the covariance analysis,",
116 "the reference structure for the fit is written first with t=-1.",
117 "The average (or reference when [TT]-ref[tt] is used) structure is",
118 "written with t=0, the eigenvectors",
119 "are written as frames with the eigenvector number and eigenvalue",
120 "as step number and timestamp, respectively.",
122 "The eigenvectors can be analyzed with [gmx-anaeig].",
124 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
125 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
127 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
129 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
130 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
133 "Note that the diagonalization of a matrix requires memory and time",
134 "that will increase at least as fast as than the square of the number",
135 "of atoms involved. It is easy to run out of memory, in which",
136 "case this tool will probably exit with a 'Segmentation fault'. You",
137 "should consider carefully whether a reduced set of atoms will meet",
138 "your needs for lower costs."
140 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
143 { "-fit", FALSE, etBOOL, { &bFit }, "Fit to a reference structure" },
148 "Use the deviation from the conformation in the structure file instead of from the "
150 { "-mwa", FALSE, etBOOL, { &bM }, "Mass-weighted covariance analysis" },
151 { "-last", FALSE, etINT, { &end }, "Last eigenvector to write away (-1 is till the last)" },
152 { "-pbc", FALSE, etBOOL, { &bPBC }, "Apply corrections for periodic boundary conditions" }
154 FILE* out = nullptr; /* initialization makes all compilers happy */
159 rvec * x, *xread, *xref, *xav, *xproj;
161 real * sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
162 real t, tstart, tend, **mat2;
163 real xj, *w_rls = nullptr;
164 real min, max, *axis;
165 int natoms, nat, nframes0, nframes, nlevels;
166 int64_t ndim, i, j, k, l;
168 const char * fitfile, *trxfile, *ndxfile;
169 const char * eigvalfile, *eigvecfile, *averfile, *logfile;
170 const char * asciifile, *xpmfile, *xpmafile;
171 char str[STRLEN], *fitname, *ananame;
174 gmx_bool bDiffMass1, bDiffMass2;
177 gmx_output_env_t* oenv;
178 gmx_rmpbc_t gpbc = nullptr;
181 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
182 { efNDX, nullptr, nullptr, ffOPTRD }, { efXVG, nullptr, "eigenval", ffWRITE },
183 { efTRN, "-v", "eigenvec", ffWRITE }, { efSTO, "-av", "average.pdb", ffWRITE },
184 { efLOG, nullptr, "covar", ffWRITE }, { efDAT, "-ascii", "covar", ffOPTWR },
185 { efXPM, "-xpm", "covar", ffOPTWR }, { efXPM, "-xpma", "covara", ffOPTWR }
187 #define NFILE asize(fnm)
189 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT, NFILE, fnm, asize(pa), pa,
190 asize(desc), desc, 0, nullptr, &oenv))
197 fitfile = ftp2fn(efTPS, NFILE, fnm);
198 trxfile = ftp2fn(efTRX, NFILE, fnm);
199 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
200 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
201 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
202 averfile = ftp2fn(efSTO, NFILE, fnm);
203 logfile = ftp2fn(efLOG, NFILE, fnm);
204 asciifile = opt2fn_null("-ascii", NFILE, fnm);
205 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
206 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
208 read_tps_conf(fitfile, &top, &pbcType, &xref, nullptr, box, TRUE);
213 printf("\nChoose a group for the least squares fit\n");
214 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
215 // Make sure that we never attempt to access a coordinate out of range
216 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, atoms->nr, "fitting");
219 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
226 printf("\nChoose a group for the covariance analysis\n");
227 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
228 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, atoms->nr, "analysis");
233 snew(w_rls, atoms->nr);
234 for (i = 0; (i < nfit); i++)
236 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
239 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i - 1]]);
245 for (i = 0; (i < natoms); i++)
249 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
252 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i - 1]);
261 if (bFit && bDiffMass1 && !bDiffMass2)
263 bDiffMass1 = natoms != nfit;
264 for (i = 0; (i < natoms) && !bDiffMass1; i++)
266 bDiffMass1 = index[i] != ifit[i];
272 "Note: the fit and analysis group are identical,\n"
273 " while the fit is mass weighted and the analysis is not.\n"
274 " Making the fit non mass weighted.\n\n");
275 for (i = 0; (i < nfit); i++)
277 w_rls[ifit[i]] = 1.0;
282 /* Prepare reference frame */
285 gpbc = gmx_rmpbc_init(&top.idef, pbcType, atoms->nr);
286 gmx_rmpbc(gpbc, atoms->nr, box, xref);
290 reset_x(nfit, ifit, atoms->nr, nullptr, xref, w_rls);
296 if (std::sqrt(static_cast<real>(INT64_MAX)) < static_cast<real>(ndim))
298 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
300 snew(mat, ndim * ndim);
302 fprintf(stderr, "Calculating the average structure ...\n");
304 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
305 if (nat != atoms->nr)
308 "\nWARNING: number of atoms in structure file (%d) and trajectory (%d) do not "
312 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, nat, "fitting");
313 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, nat, "analysis");
318 /* calculate x: a fitted struture of the selected atoms */
321 gmx_rmpbc(gpbc, nat, box, xread);
325 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
326 do_fit(nat, w_rls, xref, xread);
328 for (i = 0; i < natoms; i++)
330 rvec_inc(xav[i], xread[index[i]]);
332 } while (read_next_x(oenv, status, &t, xread, box));
335 inv_nframes = 1.0 / nframes0;
336 for (i = 0; i < natoms; i++)
338 for (d = 0; d < DIM; d++)
340 xav[i][d] *= inv_nframes;
341 xread[index[i]][d] = xav[i][d];
344 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure", atoms, xread, nullptr,
345 PbcType::No, zerobox, natoms, index);
348 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim),
349 static_cast<int>(ndim));
351 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
357 /* calculate x: a (fitted) structure of the selected atoms */
360 gmx_rmpbc(gpbc, nat, box, xread);
364 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
365 do_fit(nat, w_rls, xref, xread);
369 for (i = 0; i < natoms; i++)
371 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
376 for (i = 0; i < natoms; i++)
378 rvec_sub(xread[index[i]], xav[i], x[i]);
382 for (j = 0; j < natoms; j++)
384 for (dj = 0; dj < DIM; dj++)
386 k = ndim * (DIM * j + dj);
388 for (i = j; i < natoms; i++)
391 for (d = 0; d < DIM; d++)
393 mat[l + d] += x[i][d] * xj;
398 } while (read_next_x(oenv, status, &t, xread, box) && (bRef || nframes < nframes0));
400 gmx_rmpbc_done(gpbc);
402 fprintf(stderr, "Read %d frames\n", nframes);
406 /* copy the reference structure to the ouput array x */
408 for (i = 0; i < natoms; i++)
410 copy_rvec(xref[index[i]], xproj[i]);
418 /* correct the covariance matrix for the mass */
419 inv_nframes = 1.0 / nframes;
420 for (j = 0; j < natoms; j++)
422 for (dj = 0; dj < DIM; dj++)
424 for (i = j; i < natoms; i++)
426 k = ndim * (DIM * j + dj) + DIM * i;
427 for (d = 0; d < DIM; d++)
429 mat[k + d] = mat[k + d] * inv_nframes * sqrtm[i] * sqrtm[j];
435 /* symmetrize the matrix */
436 for (j = 0; j < ndim; j++)
438 for (i = j; i < ndim; i++)
440 mat[ndim * i + j] = mat[ndim * j + i];
445 for (i = 0; i < ndim; i++)
447 trace += mat[i * ndim + i];
449 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n", trace, bM ? "u " : "");
453 out = gmx_ffopen(asciifile, "w");
454 for (j = 0; j < ndim; j++)
456 for (i = 0; i < ndim; i += 3)
458 fprintf(out, "%g %g %g\n", mat[ndim * j + i], mat[ndim * j + i + 1],
459 mat[ndim * j + i + 2]);
470 for (j = 0; j < ndim; j++)
472 mat2[j] = &(mat[ndim * j]);
473 for (i = 0; i <= j; i++)
475 if (mat2[j][i] < min)
479 if (mat2[j][j] > max)
486 for (i = 0; i < ndim; i++)
499 out = gmx_ffopen(xpmfile, "w");
501 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "dim", "dim", ndim, ndim, axis,
502 axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
512 snew(mat2, ndim / DIM);
513 for (i = 0; i < ndim / DIM; i++)
515 snew(mat2[i], ndim / DIM);
517 for (j = 0; j < ndim / DIM; j++)
519 for (i = 0; i <= j; i++)
522 for (d = 0; d < DIM; d++)
524 mat2[j][i] += mat[ndim * (DIM * j + d) + DIM * i + d];
526 if (mat2[j][i] < min)
530 if (mat2[j][j] > max)
534 mat2[i][j] = mat2[j][i];
537 snew(axis, ndim / DIM);
538 for (i = 0; i < ndim / DIM; i++)
551 out = gmx_ffopen(xpmafile, "w");
553 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "atom", "atom", ndim / DIM,
554 ndim / DIM, axis, axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
557 for (i = 0; i < ndim / DIM; i++)
565 /* call diagonalization routine */
567 snew(eigenvalues, ndim);
568 snew(eigenvectors, ndim * ndim);
570 std::memcpy(eigenvectors, mat, ndim * ndim * sizeof(real));
571 fprintf(stderr, "\nDiagonalizing ...\n");
573 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
576 /* now write the output */
579 for (i = 0; i < ndim; i++)
581 sum += eigenvalues[i];
583 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n", sum, bM ? "u " : "");
584 if (std::abs(trace - sum) > 0.01 * trace)
587 "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
590 /* Set 'end', the maximum eigenvector and -value index used for output */
593 if (nframes - 1 < ndim)
597 "\nWARNING: there are fewer frames in your trajectory than there are\n");
598 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
599 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
607 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
609 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
610 out = xvgropen(eigvalfile, "Eigenvalues of the covariance matrix", "Eigenvector index", str, oenv);
611 for (i = 0; (i < end); i++)
613 fprintf(out, "%10d %g\n", static_cast<int>(i + 1), eigenvalues[ndim - 1 - i]);
619 /* misuse lambda: 0/1 mass weighted analysis no/yes */
622 WriteXref = eWXR_YES;
623 for (i = 0; i < nfit; i++)
625 copy_rvec(xref[ifit[i]], x[i]);
635 /* misuse lambda: -1 for no fit */
636 WriteXref = eWXR_NOFIT;
639 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end, WriteXref, x, bDiffMass1, xproj, bM,
642 out = gmx_ffopen(logfile, "w");
644 fprintf(out, "Covariance analysis log, written %s\n", gmx_format_current_time().c_str());
646 fprintf(out, "Program: %s\n", argv[0]);
647 gmx_getcwd(str, STRLEN);
649 fprintf(out, "Working directory: %s\n\n", str);
651 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
652 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend),
653 output_env_get_time_unit(oenv).c_str());
656 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
660 fprintf(out, "Read index groups from %s\n", ndxfile);
664 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
667 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
671 fprintf(out, "No fit was used\n");
673 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
676 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
678 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim),
679 static_cast<int>(ndim));
680 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n", trace);
681 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n", sum);
683 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
684 if (WriteXref == eWXR_YES)
686 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
688 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
689 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
693 fprintf(stderr, "Wrote the log to %s\n", logfile);