2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/trxio.h"
46 #include "gromacs/fileio/xvgr.h"
47 #include "gromacs/gmxana/eigio.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/linearalgebra/eigensolver.h"
50 #include "gromacs/math/do_fit.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/pbcutil/pbc.h"
53 #include "gromacs/pbcutil/rmpbc.h"
54 #include "gromacs/topology/index.h"
55 #include "gromacs/topology/topology.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/sysinfo.h"
63 int gmx_covar(int argc, char* argv[])
65 const char* desc[] = {
66 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
68 "All structures are fitted to the structure in the structure file.",
69 "When this is not a run input file periodicity will not be taken into",
70 "account. When the fit and analysis groups are identical and the analysis",
71 "is non mass-weighted, the fit will also be non mass-weighted.",
73 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
74 "When the same atoms are used for the fit and the covariance analysis,",
75 "the reference structure for the fit is written first with t=-1.",
76 "The average (or reference when [TT]-ref[tt] is used) structure is",
77 "written with t=0, the eigenvectors",
78 "are written as frames with the eigenvector number and eigenvalue",
79 "as step number and timestamp, respectively.",
81 "The eigenvectors can be analyzed with [gmx-anaeig].",
83 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
84 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
86 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
88 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
89 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
92 "Note that the diagonalization of a matrix requires memory and time",
93 "that will increase at least as fast as than the square of the number",
94 "of atoms involved. It is easy to run out of memory, in which",
95 "case this tool will probably exit with a 'Segmentation fault'. You",
96 "should consider carefully whether a reduced set of atoms will meet",
97 "your needs for lower costs."
99 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
102 { "-fit", FALSE, etBOOL, { &bFit }, "Fit to a reference structure" },
107 "Use the deviation from the conformation in the structure file instead of from the "
109 { "-mwa", FALSE, etBOOL, { &bM }, "Mass-weighted covariance analysis" },
110 { "-last", FALSE, etINT, { &end }, "Last eigenvector to write away (-1 is till the last)" },
111 { "-pbc", FALSE, etBOOL, { &bPBC }, "Apply corrections for periodic boundary conditions" }
113 FILE* out = nullptr; /* initialization makes all compilers happy */
118 rvec * x, *xread, *xref, *xav, *xproj;
120 real * sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
121 real t, tstart, tend, **mat2;
122 real xj, *w_rls = nullptr;
123 real min, max, *axis;
124 int natoms, nat, nframes0, nframes, nlevels;
125 int64_t ndim, i, j, k, l;
127 const char * fitfile, *trxfile, *ndxfile;
128 const char * eigvalfile, *eigvecfile, *averfile, *logfile;
129 const char * asciifile, *xpmfile, *xpmafile;
130 char str[STRLEN], *fitname, *ananame;
133 gmx_bool bDiffMass1, bDiffMass2;
136 gmx_output_env_t* oenv;
137 gmx_rmpbc_t gpbc = nullptr;
140 { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
141 { efNDX, nullptr, nullptr, ffOPTRD }, { efXVG, nullptr, "eigenval", ffWRITE },
142 { efTRN, "-v", "eigenvec", ffWRITE }, { efSTO, "-av", "average.pdb", ffWRITE },
143 { efLOG, nullptr, "covar", ffWRITE }, { efDAT, "-ascii", "covar", ffOPTWR },
144 { efXPM, "-xpm", "covar", ffOPTWR }, { efXPM, "-xpma", "covara", ffOPTWR }
146 #define NFILE asize(fnm)
148 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT, NFILE, fnm, asize(pa), pa,
149 asize(desc), desc, 0, nullptr, &oenv))
156 fitfile = ftp2fn(efTPS, NFILE, fnm);
157 trxfile = ftp2fn(efTRX, NFILE, fnm);
158 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
159 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
160 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
161 averfile = ftp2fn(efSTO, NFILE, fnm);
162 logfile = ftp2fn(efLOG, NFILE, fnm);
163 asciifile = opt2fn_null("-ascii", NFILE, fnm);
164 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
165 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
167 read_tps_conf(fitfile, &top, &ePBC, &xref, nullptr, box, TRUE);
172 printf("\nChoose a group for the least squares fit\n");
173 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
176 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
183 printf("\nChoose a group for the covariance analysis\n");
184 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
189 snew(w_rls, atoms->nr);
190 for (i = 0; (i < nfit); i++)
192 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
195 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i - 1]]);
201 for (i = 0; (i < natoms); i++)
205 sqrtm[i] = std::sqrt(atoms->atom[index[i]].m);
208 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i - 1]);
217 if (bFit && bDiffMass1 && !bDiffMass2)
219 bDiffMass1 = natoms != nfit;
220 for (i = 0; (i < natoms) && !bDiffMass1; i++)
222 bDiffMass1 = index[i] != ifit[i];
228 "Note: the fit and analysis group are identical,\n"
229 " while the fit is mass weighted and the analysis is not.\n"
230 " Making the fit non mass weighted.\n\n");
231 for (i = 0; (i < nfit); i++)
233 w_rls[ifit[i]] = 1.0;
238 /* Prepare reference frame */
241 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
242 gmx_rmpbc(gpbc, atoms->nr, box, xref);
246 reset_x(nfit, ifit, atoms->nr, nullptr, xref, w_rls);
252 if (std::sqrt(static_cast<real>(INT64_MAX)) < static_cast<real>(ndim))
254 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
256 snew(mat, ndim * ndim);
258 fprintf(stderr, "Calculating the average structure ...\n");
260 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
261 if (nat != atoms->nr)
263 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n",
269 /* calculate x: a fitted struture of the selected atoms */
272 gmx_rmpbc(gpbc, nat, box, xread);
276 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
277 do_fit(nat, w_rls, xref, xread);
279 for (i = 0; i < natoms; i++)
281 rvec_inc(xav[i], xread[index[i]]);
283 } while (read_next_x(oenv, status, &t, xread, box));
286 inv_nframes = 1.0 / nframes0;
287 for (i = 0; i < natoms; i++)
289 for (d = 0; d < DIM; d++)
291 xav[i][d] *= inv_nframes;
292 xread[index[i]][d] = xav[i][d];
295 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure", atoms, xread, nullptr,
296 epbcNONE, zerobox, natoms, index);
299 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", static_cast<int>(ndim),
300 static_cast<int>(ndim));
302 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
308 /* calculate x: a (fitted) structure of the selected atoms */
311 gmx_rmpbc(gpbc, nat, box, xread);
315 reset_x(nfit, ifit, nat, nullptr, xread, w_rls);
316 do_fit(nat, w_rls, xref, xread);
320 for (i = 0; i < natoms; i++)
322 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
327 for (i = 0; i < natoms; i++)
329 rvec_sub(xread[index[i]], xav[i], x[i]);
333 for (j = 0; j < natoms; j++)
335 for (dj = 0; dj < DIM; dj++)
337 k = ndim * (DIM * j + dj);
339 for (i = j; i < natoms; i++)
342 for (d = 0; d < DIM; d++)
344 mat[l + d] += x[i][d] * xj;
349 } while (read_next_x(oenv, status, &t, xread, box) && (bRef || nframes < nframes0));
351 gmx_rmpbc_done(gpbc);
353 fprintf(stderr, "Read %d frames\n", nframes);
357 /* copy the reference structure to the ouput array x */
359 for (i = 0; i < natoms; i++)
361 copy_rvec(xref[index[i]], xproj[i]);
369 /* correct the covariance matrix for the mass */
370 inv_nframes = 1.0 / nframes;
371 for (j = 0; j < natoms; j++)
373 for (dj = 0; dj < DIM; dj++)
375 for (i = j; i < natoms; i++)
377 k = ndim * (DIM * j + dj) + DIM * i;
378 for (d = 0; d < DIM; d++)
380 mat[k + d] = mat[k + d] * inv_nframes * sqrtm[i] * sqrtm[j];
386 /* symmetrize the matrix */
387 for (j = 0; j < ndim; j++)
389 for (i = j; i < ndim; i++)
391 mat[ndim * i + j] = mat[ndim * j + i];
396 for (i = 0; i < ndim; i++)
398 trace += mat[i * ndim + i];
400 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n", trace, bM ? "u " : "");
404 out = gmx_ffopen(asciifile, "w");
405 for (j = 0; j < ndim; j++)
407 for (i = 0; i < ndim; i += 3)
409 fprintf(out, "%g %g %g\n", mat[ndim * j + i], mat[ndim * j + i + 1],
410 mat[ndim * j + i + 2]);
421 for (j = 0; j < ndim; j++)
423 mat2[j] = &(mat[ndim * j]);
424 for (i = 0; i <= j; i++)
426 if (mat2[j][i] < min)
430 if (mat2[j][j] > max)
437 for (i = 0; i < ndim; i++)
450 out = gmx_ffopen(xpmfile, "w");
452 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "dim", "dim", ndim, ndim, axis,
453 axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
463 snew(mat2, ndim / DIM);
464 for (i = 0; i < ndim / DIM; i++)
466 snew(mat2[i], ndim / DIM);
468 for (j = 0; j < ndim / DIM; j++)
470 for (i = 0; i <= j; i++)
473 for (d = 0; d < DIM; d++)
475 mat2[j][i] += mat[ndim * (DIM * j + d) + DIM * i + d];
477 if (mat2[j][i] < min)
481 if (mat2[j][j] > max)
485 mat2[i][j] = mat2[j][i];
488 snew(axis, ndim / DIM);
489 for (i = 0; i < ndim / DIM; i++)
502 out = gmx_ffopen(xpmafile, "w");
504 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2", "atom", "atom", ndim / DIM,
505 ndim / DIM, axis, axis, mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
508 for (i = 0; i < ndim / DIM; i++)
516 /* call diagonalization routine */
518 snew(eigenvalues, ndim);
519 snew(eigenvectors, ndim * ndim);
521 std::memcpy(eigenvectors, mat, ndim * ndim * sizeof(real));
522 fprintf(stderr, "\nDiagonalizing ...\n");
524 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
527 /* now write the output */
530 for (i = 0; i < ndim; i++)
532 sum += eigenvalues[i];
534 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n", sum, bM ? "u " : "");
535 if (std::abs(trace - sum) > 0.01 * trace)
538 "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
541 /* Set 'end', the maximum eigenvector and -value index used for output */
544 if (nframes - 1 < ndim)
548 "\nWARNING: there are fewer frames in your trajectory than there are\n");
549 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
550 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, static_cast<int>(ndim));
558 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
560 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
561 out = xvgropen(eigvalfile, "Eigenvalues of the covariance matrix", "Eigenvector index", str, oenv);
562 for (i = 0; (i < end); i++)
564 fprintf(out, "%10d %g\n", static_cast<int>(i + 1), eigenvalues[ndim - 1 - i]);
570 /* misuse lambda: 0/1 mass weighted analysis no/yes */
573 WriteXref = eWXR_YES;
574 for (i = 0; i < nfit; i++)
576 copy_rvec(xref[ifit[i]], x[i]);
586 /* misuse lambda: -1 for no fit */
587 WriteXref = eWXR_NOFIT;
590 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end, WriteXref, x, bDiffMass1, xproj, bM,
593 out = gmx_ffopen(logfile, "w");
595 fprintf(out, "Covariance analysis log, written %s\n", gmx_format_current_time().c_str());
597 fprintf(out, "Program: %s\n", argv[0]);
598 gmx_getcwd(str, STRLEN);
600 fprintf(out, "Working directory: %s\n\n", str);
602 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
603 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend),
604 output_env_get_time_unit(oenv).c_str());
607 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
611 fprintf(out, "Read index groups from %s\n", ndxfile);
615 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
618 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
622 fprintf(out, "No fit was used\n");
624 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
627 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
629 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", static_cast<int>(ndim),
630 static_cast<int>(ndim));
631 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n", trace);
632 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n", sum);
634 fprintf(out, "Wrote %d eigenvalues to %s\n", static_cast<int>(end), eigvalfile);
635 if (WriteXref == eWXR_YES)
637 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
639 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
640 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
644 fprintf(stderr, "Wrote the log to %s\n", logfile);