2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/trxio.h"
46 #include "gromacs/fileio/xvgr.h"
47 #include "gromacs/gmxana/eigio.h"
48 #include "gromacs/gmxana/gmx_ana.h"
49 #include "gromacs/legacyheaders/macros.h"
50 #include "gromacs/legacyheaders/txtdump.h"
51 #include "gromacs/legacyheaders/typedefs.h"
52 #include "gromacs/linearalgebra/eigensolver.h"
53 #include "gromacs/math/do_fit.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/pbcutil/rmpbc.h"
56 #include "gromacs/topology/index.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/futil.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/sysinfo.h"
63 int gmx_covar(int argc, char *argv[])
65 const char *desc[] = {
66 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
68 "All structures are fitted to the structure in the structure file.",
69 "When this is not a run input file periodicity will not be taken into",
70 "account. When the fit and analysis groups are identical and the analysis",
71 "is non mass-weighted, the fit will also be non mass-weighted.",
73 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
74 "When the same atoms are used for the fit and the covariance analysis,",
75 "the reference structure for the fit is written first with t=-1.",
76 "The average (or reference when [TT]-ref[tt] is used) structure is",
77 "written with t=0, the eigenvectors",
78 "are written as frames with the eigenvector number as timestamp.",
80 "The eigenvectors can be analyzed with [gmx-anaeig].",
82 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
83 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
85 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
87 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
88 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
91 "Note that the diagonalization of a matrix requires memory and time",
92 "that will increase at least as fast as than the square of the number",
93 "of atoms involved. It is easy to run out of memory, in which",
94 "case this tool will probably exit with a 'Segmentation fault'. You",
95 "should consider carefully whether a reduced set of atoms will meet",
96 "your needs for lower costs."
98 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
101 { "-fit", FALSE, etBOOL, {&bFit},
102 "Fit to a reference structure"},
103 { "-ref", FALSE, etBOOL, {&bRef},
104 "Use the deviation from the conformation in the structure file instead of from the average" },
105 { "-mwa", FALSE, etBOOL, {&bM},
106 "Mass-weighted covariance analysis"},
107 { "-last", FALSE, etINT, {&end},
108 "Last eigenvector to write away (-1 is till the last)" },
109 { "-pbc", FALSE, etBOOL, {&bPBC},
110 "Apply corrections for periodic boundary conditions" }
112 FILE *out = NULL; /* initialization makes all compilers happy */
118 rvec *x, *xread, *xref, *xav, *xproj;
120 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
121 real t, tstart, tend, **mat2;
122 real xj, *w_rls = NULL;
123 real min, max, *axis;
125 int natoms, nat, count, nframes0, nframes, nlevels;
126 gmx_int64_t ndim, i, j, k, l;
128 const char *fitfile, *trxfile, *ndxfile;
129 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
130 const char *asciifile, *xpmfile, *xpmafile;
131 char str[STRLEN], *fitname, *ananame, *pcwd;
133 atom_id *index, *ifit;
134 gmx_bool bDiffMass1, bDiffMass2;
135 char timebuf[STRLEN];
139 gmx_rmpbc_t gpbc = NULL;
142 { efTRX, "-f", NULL, ffREAD },
143 { efTPS, NULL, NULL, ffREAD },
144 { efNDX, NULL, NULL, ffOPTRD },
145 { efXVG, NULL, "eigenval", ffWRITE },
146 { efTRN, "-v", "eigenvec", ffWRITE },
147 { efSTO, "-av", "average.pdb", ffWRITE },
148 { efLOG, NULL, "covar", ffWRITE },
149 { efDAT, "-ascii", "covar", ffOPTWR },
150 { efXPM, "-xpm", "covar", ffOPTWR },
151 { efXPM, "-xpma", "covara", ffOPTWR }
153 #define NFILE asize(fnm)
155 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT,
156 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
163 fitfile = ftp2fn(efTPS, NFILE, fnm);
164 trxfile = ftp2fn(efTRX, NFILE, fnm);
165 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
166 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
167 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
168 averfile = ftp2fn(efSTO, NFILE, fnm);
169 logfile = ftp2fn(efLOG, NFILE, fnm);
170 asciifile = opt2fn_null("-ascii", NFILE, fnm);
171 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
172 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
174 read_tps_conf(fitfile, str, &top, &ePBC, &xref, NULL, box, TRUE);
179 printf("\nChoose a group for the least squares fit\n");
180 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
183 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
190 printf("\nChoose a group for the covariance analysis\n");
191 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
196 snew(w_rls, atoms->nr);
197 for (i = 0; (i < nfit); i++)
199 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
202 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
208 for (i = 0; (i < natoms); i++)
212 sqrtm[i] = sqrt(atoms->atom[index[i]].m);
215 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
224 if (bFit && bDiffMass1 && !bDiffMass2)
226 bDiffMass1 = natoms != nfit;
228 for (i = 0; (i < natoms) && !bDiffMass1; i++)
230 bDiffMass1 = index[i] != ifit[i];
235 "Note: the fit and analysis group are identical,\n"
236 " while the fit is mass weighted and the analysis is not.\n"
237 " Making the fit non mass weighted.\n\n");
238 for (i = 0; (i < nfit); i++)
240 w_rls[ifit[i]] = 1.0;
245 /* Prepare reference frame */
248 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
249 gmx_rmpbc(gpbc, atoms->nr, box, xref);
253 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
259 if (sqrt(GMX_INT64_MAX) < ndim)
261 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
263 snew(mat, ndim*ndim);
265 fprintf(stderr, "Calculating the average structure ...\n");
267 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
268 if (nat != atoms->nr)
270 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
275 /* calculate x: a fitted struture of the selected atoms */
278 gmx_rmpbc(gpbc, nat, box, xread);
282 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
283 do_fit(nat, w_rls, xref, xread);
285 for (i = 0; i < natoms; i++)
287 rvec_inc(xav[i], xread[index[i]]);
290 while (read_next_x(oenv, status, &t, xread, box));
293 inv_nframes = 1.0/nframes0;
294 for (i = 0; i < natoms; i++)
296 for (d = 0; d < DIM; d++)
298 xav[i][d] *= inv_nframes;
299 xread[index[i]][d] = xav[i][d];
302 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
303 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
306 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", (int)ndim, (int)ndim);
308 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
314 /* calculate x: a (fitted) structure of the selected atoms */
317 gmx_rmpbc(gpbc, nat, box, xread);
321 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
322 do_fit(nat, w_rls, xref, xread);
326 for (i = 0; i < natoms; i++)
328 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
333 for (i = 0; i < natoms; i++)
335 rvec_sub(xread[index[i]], xav[i], x[i]);
339 for (j = 0; j < natoms; j++)
341 for (dj = 0; dj < DIM; dj++)
345 for (i = j; i < natoms; i++)
348 for (d = 0; d < DIM; d++)
350 mat[l+d] += x[i][d]*xj;
356 while (read_next_x(oenv, status, &t, xread, box) &&
357 (bRef || nframes < nframes0));
359 gmx_rmpbc_done(gpbc);
361 fprintf(stderr, "Read %d frames\n", nframes);
365 /* copy the reference structure to the ouput array x */
367 for (i = 0; i < natoms; i++)
369 copy_rvec(xref[index[i]], xproj[i]);
377 /* correct the covariance matrix for the mass */
378 inv_nframes = 1.0/nframes;
379 for (j = 0; j < natoms; j++)
381 for (dj = 0; dj < DIM; dj++)
383 for (i = j; i < natoms; i++)
385 k = ndim*(DIM*j+dj)+DIM*i;
386 for (d = 0; d < DIM; d++)
388 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
394 /* symmetrize the matrix */
395 for (j = 0; j < ndim; j++)
397 for (i = j; i < ndim; i++)
399 mat[ndim*i+j] = mat[ndim*j+i];
404 for (i = 0; i < ndim; i++)
406 trace += mat[i*ndim+i];
408 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
409 trace, bM ? "u " : "");
413 out = gmx_ffopen(asciifile, "w");
414 for (j = 0; j < ndim; j++)
416 for (i = 0; i < ndim; i += 3)
418 fprintf(out, "%g %g %g\n",
419 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
430 for (j = 0; j < ndim; j++)
432 mat2[j] = &(mat[ndim*j]);
433 for (i = 0; i <= j; i++)
435 if (mat2[j][i] < min)
439 if (mat2[j][j] > max)
446 for (i = 0; i < ndim; i++)
450 rlo.r = 0; rlo.g = 0; rlo.b = 1;
451 rmi.r = 1; rmi.g = 1; rmi.b = 1;
452 rhi.r = 1; rhi.g = 0; rhi.b = 0;
453 out = gmx_ffopen(xpmfile, "w");
455 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
456 "dim", "dim", ndim, ndim, axis, axis,
457 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
467 snew(mat2, ndim/DIM);
468 for (i = 0; i < ndim/DIM; i++)
470 snew(mat2[i], ndim/DIM);
472 for (j = 0; j < ndim/DIM; j++)
474 for (i = 0; i <= j; i++)
477 for (d = 0; d < DIM; d++)
479 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
481 if (mat2[j][i] < min)
485 if (mat2[j][j] > max)
489 mat2[i][j] = mat2[j][i];
492 snew(axis, ndim/DIM);
493 for (i = 0; i < ndim/DIM; i++)
497 rlo.r = 0; rlo.g = 0; rlo.b = 1;
498 rmi.r = 1; rmi.g = 1; rmi.b = 1;
499 rhi.r = 1; rhi.g = 0; rhi.b = 0;
500 out = gmx_ffopen(xpmafile, "w");
502 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
503 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
504 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
507 for (i = 0; i < ndim/DIM; i++)
515 /* call diagonalization routine */
517 snew(eigenvalues, ndim);
518 snew(eigenvectors, ndim*ndim);
520 memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
521 fprintf(stderr, "\nDiagonalizing ...\n");
523 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
526 /* now write the output */
529 for (i = 0; i < ndim; i++)
531 sum += eigenvalues[i];
533 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
534 sum, bM ? "u " : "");
535 if (fabs(trace-sum) > 0.01*trace)
537 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
540 /* Set 'end', the maximum eigenvector and -value index used for output */
543 if (nframes-1 < ndim)
546 fprintf(stderr, "\nWARNING: there are fewer frames in your trajectory than there are\n");
547 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
548 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, (int)ndim);
556 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
558 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
559 out = xvgropen(eigvalfile,
560 "Eigenvalues of the covariance matrix",
561 "Eigenvector index", str, oenv);
562 for (i = 0; (i < end); i++)
564 fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
570 /* misuse lambda: 0/1 mass weighted analysis no/yes */
573 WriteXref = eWXR_YES;
574 for (i = 0; i < nfit; i++)
576 copy_rvec(xref[ifit[i]], x[i]);
586 /* misuse lambda: -1 for no fit */
587 WriteXref = eWXR_NOFIT;
590 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
591 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
593 out = gmx_ffopen(logfile, "w");
595 gmx_format_current_time(timebuf, STRLEN);
596 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
598 fprintf(out, "Program: %s\n", argv[0]);
599 gmx_getcwd(str, STRLEN);
601 fprintf(out, "Working directory: %s\n\n", str);
603 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
604 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
607 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
611 fprintf(out, "Read index groups from %s\n", ndxfile);
615 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
618 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
622 fprintf(out, "No fit was used\n");
624 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
627 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
629 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", (int)ndim, (int)ndim);
630 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
632 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
635 fprintf(out, "Wrote %d eigenvalues to %s\n", (int)end, eigvalfile);
636 if (WriteXref == eWXR_YES)
638 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
640 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
641 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
645 fprintf(stderr, "Wrote the log to %s\n", logfile);