2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/confio.h"
45 #include "gromacs/fileio/matio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/eigio.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/linearalgebra/eigensolver.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/pbcutil/pbc.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/topology/topology.h"
57 #include "gromacs/utility/arraysize.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/smalloc.h"
62 #include "gromacs/utility/sysinfo.h"
64 int gmx_covar(int argc, char* argv[])
66 const char* desc[] = {
67 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
69 "All structures are fitted to the structure in the structure file.",
70 "When this is not a run input file periodicity will not be taken into",
71 "account. When the fit and analysis groups are identical and the analysis",
72 "is non mass-weighted, the fit will also be non mass-weighted.",
74 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
75 "When the same atoms are used for the fit and the covariance analysis,",
76 "the reference structure for the fit is written first with t=-1.",
77 "The average (or reference when [TT]-ref[tt] is used) structure is",
78 "written with t=0, the eigenvectors",
79 "are written as frames with the eigenvector number and eigenvalue",
80 "as step number and timestamp, respectively.",
82 "The eigenvectors can be analyzed with [gmx-anaeig].",
84 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
85 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
87 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
89 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
90 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
93 "Note that the diagonalization of a matrix requires memory and time",
94 "that will increase at least as fast as than the square of the number",
95 "of atoms involved. It is easy to run out of memory, in which",
96 "case this tool will probably exit with a 'Segmentation fault'. You",
97 "should consider carefully whether a reduced set of atoms will meet",
98 "your needs for lower costs."
100 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
103 { "-fit", FALSE, etBOOL, { &bFit }, "Fit to a reference structure" },
108 "Use the deviation from the conformation in the structure file instead of from the "
110 { "-mwa", FALSE, etBOOL, { &bM }, "Mass-weighted covariance analysis" },
111 { "-last", FALSE, etINT, { &end }, "Last eigenvector to write away (-1 is till the last)" },
112 { "-pbc", FALSE, etBOOL, { &bPBC }, "Apply corrections for periodic boundary conditions" }
114 FILE* out = nullptr; /* initialization makes all compilers happy */
119 rvec * x, *xread, *xref, *xav, *xproj;
121 real * sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
122 real t, tstart, tend, **mat2;
123 real xj, *w_rls = nullptr;
124 real min, max, *axis;
125 int natoms, nat, nframes0, nframes, nlevels;
126 int64_t ndim, i, j, k, l;
128 const char * fitfile, *trxfile, *ndxfile;
129 const char * eigvalfile, *eigvecfile, *averfile, *logfile;
130 const char * asciifile, *xpmfile, *xpmafile;
131 char str[STRLEN], *fitname, *ananame;
134 gmx_bool bDiffMass1, bDiffMass2;
137 gmx_output_env_t* oenv;
138 gmx_rmpbc_t gpbc = nullptr;
141 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
142 { efNDX, nullptr, nullptr, ffOPTRD }, { efXVG, nullptr, "eigenval", ffWRITE },
143 { efTRN, "-v", "eigenvec", ffWRITE }, { efSTO, "-av", "average.pdb", ffWRITE },
144 { efLOG, nullptr, "covar", ffWRITE }, { efDAT, "-ascii", "covar", ffOPTWR },
145 { efXPM, "-xpm", "covar", ffOPTWR }, { efXPM, "-xpma", "covara", ffOPTWR }
147 #define NFILE asize(fnm)
149 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT, NFILE, fnm, asize(pa), pa,
150 asize(desc), desc, 0, nullptr, &oenv))
157 fitfile = ftp2fn(efTPS, NFILE, fnm);
158 trxfile = ftp2fn(efTRX, NFILE, fnm);
159 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
160 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
161 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
162 averfile = ftp2fn(efSTO, NFILE, fnm);
163 logfile = ftp2fn(efLOG, NFILE, fnm);
164 asciifile = opt2fn_null("-ascii", NFILE, fnm);
165 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
166 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
168 read_tps_conf(fitfile, &top, &ePBC, &xref, nullptr, box, TRUE);
173 printf("\nChoose a group for the least squares fit\n");
174 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
177 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
184 printf("\nChoose a group for the covariance analysis\n");
185 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
190 snew(w_rls, atoms->nr);
191 for (i = 0; (i < nfit); i++)
193 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
196 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i - 1]]);
202 for (i = 0; (i < natoms); i++)
206 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
209 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i - 1]);
218 if (bFit && bDiffMass1 && !bDiffMass2)
220 bDiffMass1 = natoms != nfit;
221 for (i = 0; (i < natoms) && !bDiffMass1; i++)
223 bDiffMass1 = index[i] != ifit[i];
229 "Note: the fit and analysis group are identical,\n"
230 " while the fit is mass weighted and the analysis is not.\n"
231 " Making the fit non mass weighted.\n\n");
232 for (i = 0; (i < nfit); i++)
234 w_rls[ifit[i]] = 1.0;
239 /* Prepare reference frame */
242 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
243 gmx_rmpbc(gpbc, atoms->nr, box, xref);
247 reset_x(nfit, ifit, atoms->nr, nullptr, xref, w_rls);
253 if (std::sqrt(static_cast<real>(INT64_MAX)) < static_cast<real>(ndim))
255 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
257 snew(mat, ndim * ndim);
259 fprintf(stderr, "Calculating the average structure ...\n");
261 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
262 if (nat != atoms->nr)
264 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n",
270 /* calculate x: a fitted struture of the selected atoms */
273 gmx_rmpbc(gpbc, nat, box, xread);
277 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
278 do_fit(nat, w_rls, xref, xread);
280 for (i = 0; i < natoms; i++)
282 rvec_inc(xav[i], xread[index[i]]);
284 } while (read_next_x(oenv, status, &t, xread, box));
287 inv_nframes = 1.0 / nframes0;
288 for (i = 0; i < natoms; i++)
290 for (d = 0; d < DIM; d++)
292 xav[i][d] *= inv_nframes;
293 xread[index[i]][d] = xav[i][d];
296 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure", atoms, xread, nullptr,
297 epbcNONE, zerobox, natoms, index);
300 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim),
301 static_cast<int>(ndim));
303 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
309 /* calculate x: a (fitted) structure of the selected atoms */
312 gmx_rmpbc(gpbc, nat, box, xread);
316 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
317 do_fit(nat, w_rls, xref, xread);
321 for (i = 0; i < natoms; i++)
323 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
328 for (i = 0; i < natoms; i++)
330 rvec_sub(xread[index[i]], xav[i], x[i]);
334 for (j = 0; j < natoms; j++)
336 for (dj = 0; dj < DIM; dj++)
338 k = ndim * (DIM * j + dj);
340 for (i = j; i < natoms; i++)
343 for (d = 0; d < DIM; d++)
345 mat[l + d] += x[i][d] * xj;
350 } while (read_next_x(oenv, status, &t, xread, box) && (bRef || nframes < nframes0));
352 gmx_rmpbc_done(gpbc);
354 fprintf(stderr, "Read %d frames\n", nframes);
358 /* copy the reference structure to the ouput array x */
360 for (i = 0; i < natoms; i++)
362 copy_rvec(xref[index[i]], xproj[i]);
370 /* correct the covariance matrix for the mass */
371 inv_nframes = 1.0 / nframes;
372 for (j = 0; j < natoms; j++)
374 for (dj = 0; dj < DIM; dj++)
376 for (i = j; i < natoms; i++)
378 k = ndim * (DIM * j + dj) + DIM * i;
379 for (d = 0; d < DIM; d++)
381 mat[k + d] = mat[k + d] * inv_nframes * sqrtm[i] * sqrtm[j];
387 /* symmetrize the matrix */
388 for (j = 0; j < ndim; j++)
390 for (i = j; i < ndim; i++)
392 mat[ndim * i + j] = mat[ndim * j + i];
397 for (i = 0; i < ndim; i++)
399 trace += mat[i * ndim + i];
401 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n", trace, bM ? "u " : "");
405 out = gmx_ffopen(asciifile, "w");
406 for (j = 0; j < ndim; j++)
408 for (i = 0; i < ndim; i += 3)
410 fprintf(out, "%g %g %g\n", mat[ndim * j + i], mat[ndim * j + i + 1],
411 mat[ndim * j + i + 2]);
422 for (j = 0; j < ndim; j++)
424 mat2[j] = &(mat[ndim * j]);
425 for (i = 0; i <= j; i++)
427 if (mat2[j][i] < min)
431 if (mat2[j][j] > max)
438 for (i = 0; i < ndim; i++)
451 out = gmx_ffopen(xpmfile, "w");
453 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "dim", "dim", ndim, ndim, axis,
454 axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
464 snew(mat2, ndim / DIM);
465 for (i = 0; i < ndim / DIM; i++)
467 snew(mat2[i], ndim / DIM);
469 for (j = 0; j < ndim / DIM; j++)
471 for (i = 0; i <= j; i++)
474 for (d = 0; d < DIM; d++)
476 mat2[j][i] += mat[ndim * (DIM * j + d) + DIM * i + d];
478 if (mat2[j][i] < min)
482 if (mat2[j][j] > max)
486 mat2[i][j] = mat2[j][i];
489 snew(axis, ndim / DIM);
490 for (i = 0; i < ndim / DIM; i++)
503 out = gmx_ffopen(xpmafile, "w");
505 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "atom", "atom", ndim / DIM,
506 ndim / DIM, axis, axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
509 for (i = 0; i < ndim / DIM; i++)
517 /* call diagonalization routine */
519 snew(eigenvalues, ndim);
520 snew(eigenvectors, ndim * ndim);
522 std::memcpy(eigenvectors, mat, ndim * ndim * sizeof(real));
523 fprintf(stderr, "\nDiagonalizing ...\n");
525 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
528 /* now write the output */
531 for (i = 0; i < ndim; i++)
533 sum += eigenvalues[i];
535 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n", sum, bM ? "u " : "");
536 if (std::abs(trace - sum) > 0.01 * trace)
539 "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
542 /* Set 'end', the maximum eigenvector and -value index used for output */
545 if (nframes - 1 < ndim)
549 "\nWARNING: there are fewer frames in your trajectory than there are\n");
550 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
551 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
559 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
561 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
562 out = xvgropen(eigvalfile, "Eigenvalues of the covariance matrix", "Eigenvector index", str, oenv);
563 for (i = 0; (i < end); i++)
565 fprintf(out, "%10d %g\n", static_cast<int>(i + 1), eigenvalues[ndim - 1 - i]);
571 /* misuse lambda: 0/1 mass weighted analysis no/yes */
574 WriteXref = eWXR_YES;
575 for (i = 0; i < nfit; i++)
577 copy_rvec(xref[ifit[i]], x[i]);
587 /* misuse lambda: -1 for no fit */
588 WriteXref = eWXR_NOFIT;
591 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end, WriteXref, x, bDiffMass1, xproj, bM,
594 out = gmx_ffopen(logfile, "w");
596 fprintf(out, "Covariance analysis log, written %s\n", gmx_format_current_time().c_str());
598 fprintf(out, "Program: %s\n", argv[0]);
599 gmx_getcwd(str, STRLEN);
601 fprintf(out, "Working directory: %s\n\n", str);
603 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
604 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend),
605 output_env_get_time_unit(oenv).c_str());
608 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
612 fprintf(out, "Read index groups from %s\n", ndxfile);
616 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
619 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
623 fprintf(out, "No fit was used\n");
625 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
628 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
630 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim),
631 static_cast<int>(ndim));
632 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n", trace);
633 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n", sum);
635 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
636 if (WriteXref == eWXR_YES)
638 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
640 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
641 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
645 fprintf(stderr, "Wrote the log to %s\n", logfile);