2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/fileio/matio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/eigio.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/linearalgebra/eigensolver.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/pbcutil/pbc.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/topology/topology.h"
57 #include "gromacs/utility/arrayref.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/exceptions.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
64 #include "gromacs/utility/sysinfo.h"
73 /*! \brief Throw an error if any element in index exceeds a given number.
75 * \param[in] indices to be acessed
76 * \param[in] numAtoms to be accessed
77 * \param[in] indexUsagePurpose to be more explicit in the error message
79 * \throws RangeError if any element in indices is larger than or equal numAtoms
82 void throwErrorIfIndexOutOfBounds(ArrayRef<const int> indices, const int numAtoms, const std::string& indexUsagePurpose)
84 // do nothing if index is empty
89 const int largestIndex = *std::max_element(indices.begin(), indices.end());
90 if (largestIndex >= numAtoms)
92 GMX_THROW(RangeError("The provided structure file only contains " + std::to_string(numAtoms)
93 + " coordinates, but coordinate index " + std::to_string(largestIndex + 1)
94 + " was requested for " + indexUsagePurpose
95 + ". Make sure to update structure files "
96 "and index files if you store only a part of your system."));
104 int gmx_covar(int argc, char* argv[])
106 const char* desc[] = {
107 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
108 "covariance matrix.",
109 "All structures are fitted to the structure in the structure file.",
110 "When this is not a run input file periodicity will not be taken into",
111 "account. When the fit and analysis groups are identical and the analysis",
112 "is non mass-weighted, the fit will also be non mass-weighted.",
114 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
115 "When the same atoms are used for the fit and the covariance analysis,",
116 "the reference structure for the fit is written first with t=-1.",
117 "The average (or reference when [TT]-ref[tt] is used) structure is",
118 "written with t=0, the eigenvectors",
119 "are written as frames with the eigenvector number and eigenvalue",
120 "as step number and timestamp, respectively.",
122 "The eigenvectors can be analyzed with [gmx-anaeig].",
124 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
125 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
127 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
129 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
130 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
133 "Note that the diagonalization of a matrix requires memory and time",
134 "that will increase at least as fast as than the square of the number",
135 "of atoms involved. It is easy to run out of memory, in which",
136 "case this tool will probably exit with a 'Segmentation fault'. You",
137 "should consider carefully whether a reduced set of atoms will meet",
138 "your needs for lower costs."
140 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
143 { "-fit", FALSE, etBOOL, { &bFit }, "Fit to a reference structure" },
148 "Use the deviation from the conformation in the structure file instead of from the "
150 { "-mwa", FALSE, etBOOL, { &bM }, "Mass-weighted covariance analysis" },
151 { "-last", FALSE, etINT, { &end }, "Last eigenvector to write away (-1 is till the last)" },
152 { "-pbc", FALSE, etBOOL, { &bPBC }, "Apply corrections for periodic boundary conditions" }
154 FILE* out = nullptr; /* initialization makes all compilers happy */
159 rvec * x, *xread, *xref, *xav, *xproj;
161 real * sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
162 real t, tstart, tend, **mat2;
163 real xj, *w_rls = nullptr;
164 real min, max, *axis;
165 int natoms, nat, nframes0, nframes, nlevels;
166 int64_t ndim, i, j, k, l;
168 const char * fitfile, *trxfile, *ndxfile;
169 const char * eigvalfile, *eigvecfile, *averfile, *logfile;
170 const char * asciifile, *xpmfile, *xpmafile;
171 char str[STRLEN], *fitname, *ananame;
174 gmx_bool bDiffMass1, bDiffMass2;
177 gmx_output_env_t* oenv;
178 gmx_rmpbc_t gpbc = nullptr;
181 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
182 { efNDX, nullptr, nullptr, ffOPTRD }, { efXVG, nullptr, "eigenval", ffWRITE },
183 { efTRN, "-v", "eigenvec", ffWRITE }, { efSTO, "-av", "average.pdb", ffWRITE },
184 { efLOG, nullptr, "covar", ffWRITE }, { efDAT, "-ascii", "covar", ffOPTWR },
185 { efXPM, "-xpm", "covar", ffOPTWR }, { efXPM, "-xpma", "covara", ffOPTWR }
187 #define NFILE asize(fnm)
189 if (!parse_common_args(
190 &argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
197 fitfile = ftp2fn(efTPS, NFILE, fnm);
198 trxfile = ftp2fn(efTRX, NFILE, fnm);
199 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
200 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
201 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
202 averfile = ftp2fn(efSTO, NFILE, fnm);
203 logfile = ftp2fn(efLOG, NFILE, fnm);
204 asciifile = opt2fn_null("-ascii", NFILE, fnm);
205 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
206 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
208 read_tps_conf(fitfile, &top, &pbcType, &xref, nullptr, box, TRUE);
213 printf("\nChoose a group for the least squares fit\n");
214 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
215 // Make sure that we never attempt to access a coordinate out of range
216 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, atoms->nr, "fitting");
219 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
226 printf("\nChoose a group for the covariance analysis\n");
227 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
228 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, atoms->nr, "analysis");
233 snew(w_rls, atoms->nr);
234 for (i = 0; (i < nfit); i++)
236 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
239 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i - 1]]);
245 for (i = 0; (i < natoms); i++)
249 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
252 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i - 1]);
261 if (bFit && bDiffMass1 && !bDiffMass2)
263 bDiffMass1 = natoms != nfit;
264 for (i = 0; (i < natoms) && !bDiffMass1; i++)
266 bDiffMass1 = index[i] != ifit[i];
272 "Note: the fit and analysis group are identical,\n"
273 " while the fit is mass weighted and the analysis is not.\n"
274 " Making the fit non mass weighted.\n\n");
275 for (i = 0; (i < nfit); i++)
277 w_rls[ifit[i]] = 1.0;
282 /* Prepare reference frame */
285 gpbc = gmx_rmpbc_init(&top.idef, pbcType, atoms->nr);
286 gmx_rmpbc(gpbc, atoms->nr, box, xref);
290 reset_x(nfit, ifit, atoms->nr, nullptr, xref, w_rls);
296 if (std::sqrt(static_cast<real>(INT64_MAX)) < static_cast<real>(ndim))
298 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
300 snew(mat, ndim * ndim);
302 fprintf(stderr, "Calculating the average structure ...\n");
304 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
305 if (nat != atoms->nr)
308 "\nWARNING: number of atoms in structure file (%d) and trajectory (%d) do not "
313 gmx::throwErrorIfIndexOutOfBounds({ ifit, ifit + nfit }, nat, "fitting");
314 gmx::throwErrorIfIndexOutOfBounds({ index, index + natoms }, nat, "analysis");
319 /* calculate x: a fitted struture of the selected atoms */
322 gmx_rmpbc(gpbc, nat, box, xread);
326 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
327 do_fit(nat, w_rls, xref, xread);
329 for (i = 0; i < natoms; i++)
331 rvec_inc(xav[i], xread[index[i]]);
333 } while (read_next_x(oenv, status, &t, xread, box));
336 inv_nframes = 1.0 / nframes0;
337 for (i = 0; i < natoms; i++)
339 for (d = 0; d < DIM; d++)
341 xav[i][d] *= inv_nframes;
342 xread[index[i]][d] = xav[i][d];
345 write_sto_conf_indexed(
346 opt2fn("-av", NFILE, fnm), "Average structure", atoms, xread, nullptr, PbcType::No, zerobox, natoms, index);
350 "Constructing covariance matrix (%dx%d) ...\n",
351 static_cast<int>(ndim),
352 static_cast<int>(ndim));
354 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
360 /* calculate x: a (fitted) structure of the selected atoms */
363 gmx_rmpbc(gpbc, nat, box, xread);
367 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
368 do_fit(nat, w_rls, xref, xread);
372 for (i = 0; i < natoms; i++)
374 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
379 for (i = 0; i < natoms; i++)
381 rvec_sub(xread[index[i]], xav[i], x[i]);
385 for (j = 0; j < natoms; j++)
387 for (dj = 0; dj < DIM; dj++)
389 k = ndim * (DIM * j + dj);
391 for (i = j; i < natoms; i++)
394 for (d = 0; d < DIM; d++)
396 mat[l + d] += x[i][d] * xj;
401 } while (read_next_x(oenv, status, &t, xread, box) && (bRef || nframes < nframes0));
403 gmx_rmpbc_done(gpbc);
405 fprintf(stderr, "Read %d frames\n", nframes);
409 /* copy the reference structure to the ouput array x */
411 for (i = 0; i < natoms; i++)
413 copy_rvec(xref[index[i]], xproj[i]);
421 /* correct the covariance matrix for the mass */
422 inv_nframes = 1.0 / nframes;
423 for (j = 0; j < natoms; j++)
425 for (dj = 0; dj < DIM; dj++)
427 for (i = j; i < natoms; i++)
429 k = ndim * (DIM * j + dj) + DIM * i;
430 for (d = 0; d < DIM; d++)
432 mat[k + d] = mat[k + d] * inv_nframes * sqrtm[i] * sqrtm[j];
438 /* symmetrize the matrix */
439 for (j = 0; j < ndim; j++)
441 for (i = j; i < ndim; i++)
443 mat[ndim * i + j] = mat[ndim * j + i];
448 for (i = 0; i < ndim; i++)
450 trace += mat[i * ndim + i];
452 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n", trace, bM ? "u " : "");
456 out = gmx_ffopen(asciifile, "w");
457 for (j = 0; j < ndim; j++)
459 for (i = 0; i < ndim; i += 3)
461 fprintf(out, "%g %g %g\n", mat[ndim * j + i], mat[ndim * j + i + 1], mat[ndim * j + i + 2]);
472 for (j = 0; j < ndim; j++)
474 mat2[j] = &(mat[ndim * j]);
475 for (i = 0; i <= j; i++)
477 if (mat2[j][i] < min)
481 if (mat2[j][j] > max)
488 for (i = 0; i < ndim; i++)
501 out = gmx_ffopen(xpmfile, "w");
506 bM ? "u nm^2" : "nm^2",
530 snew(mat2, ndim / DIM);
531 for (i = 0; i < ndim / DIM; i++)
533 snew(mat2[i], ndim / DIM);
535 for (j = 0; j < ndim / DIM; j++)
537 for (i = 0; i <= j; i++)
540 for (d = 0; d < DIM; d++)
542 mat2[j][i] += mat[ndim * (DIM * j + d) + DIM * i + d];
544 if (mat2[j][i] < min)
548 if (mat2[j][j] > max)
552 mat2[i][j] = mat2[j][i];
555 snew(axis, ndim / DIM);
556 for (i = 0; i < ndim / DIM; i++)
569 out = gmx_ffopen(xpmafile, "w");
574 bM ? "u nm^2" : "nm^2",
591 for (i = 0; i < ndim / DIM; i++)
599 /* call diagonalization routine */
601 snew(eigenvalues, ndim);
602 snew(eigenvectors, ndim * ndim);
604 std::memcpy(eigenvectors, mat, ndim * ndim * sizeof(real));
605 fprintf(stderr, "\nDiagonalizing ...\n");
607 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
610 /* now write the output */
613 for (i = 0; i < ndim; i++)
615 sum += eigenvalues[i];
617 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n", sum, bM ? "u " : "");
618 if (std::abs(trace - sum) > 0.01 * trace)
621 "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
624 /* Set 'end', the maximum eigenvector and -value index used for output */
627 if (nframes - 1 < ndim)
630 fprintf(stderr, "\nWARNING: there are fewer frames in your trajectory than there are\n");
631 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
632 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
640 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
642 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
643 out = xvgropen(eigvalfile, "Eigenvalues of the covariance matrix", "Eigenvector index", str, oenv);
644 for (i = 0; (i < end); i++)
646 fprintf(out, "%10d %g\n", static_cast<int>(i + 1), eigenvalues[ndim - 1 - i]);
652 /* misuse lambda: 0/1 mass weighted analysis no/yes */
655 WriteXref = eWXR_YES;
656 for (i = 0; i < nfit; i++)
658 copy_rvec(xref[ifit[i]], x[i]);
668 /* misuse lambda: -1 for no fit */
669 WriteXref = eWXR_NOFIT;
673 eigvecfile, natoms, mat, TRUE, 1, end, WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
675 out = gmx_ffopen(logfile, "w");
677 fprintf(out, "Covariance analysis log, written %s\n", gmx_format_current_time().c_str());
679 fprintf(out, "Program: %s\n", argv[0]);
680 gmx_getcwd(str, STRLEN);
682 fprintf(out, "Working directory: %s\n\n", str);
685 "Read %d frames from %s (time %g to %g %s)\n",
688 output_env_conv_time(oenv, tstart),
689 output_env_conv_time(oenv, tend),
690 output_env_get_time_unit(oenv).c_str());
693 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
697 fprintf(out, "Read index groups from %s\n", ndxfile);
701 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
704 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
708 fprintf(out, "No fit was used\n");
710 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
713 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
715 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim), static_cast<int>(ndim));
716 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n", trace);
717 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n", sum);
719 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
720 if (WriteXref == eWXR_YES)
722 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
724 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
725 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
729 fprintf(stderr, "Wrote the log to %s\n", logfile);