2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #ifdef HAVE_SYS_TIME_H
49 #include "gromacs/commandline/pargs.h"
51 #include "gromacs/utility/smalloc.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/utility/futil.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/fileio/confio.h"
57 #include "gromacs/fileio/trnio.h"
58 #include "gromacs/fileio/xvgr.h"
59 #include "gromacs/pbcutil/rmpbc.h"
61 #include "gromacs/fileio/matio.h"
64 #include "gromacs/utility/cstringutil.h"
65 #include "gromacs/fileio/trxio.h"
67 #include "gromacs/linearalgebra/eigensolver.h"
68 #include "gromacs/math/do_fit.h"
69 #include "gromacs/utility/fatalerror.h"
71 int gmx_covar(int argc, char *argv[])
73 const char *desc[] = {
74 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
76 "All structures are fitted to the structure in the structure file.",
77 "When this is not a run input file periodicity will not be taken into",
78 "account. When the fit and analysis groups are identical and the analysis",
79 "is non mass-weighted, the fit will also be non mass-weighted.",
81 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
82 "When the same atoms are used for the fit and the covariance analysis,",
83 "the reference structure for the fit is written first with t=-1.",
84 "The average (or reference when [TT]-ref[tt] is used) structure is",
85 "written with t=0, the eigenvectors",
86 "are written as frames with the eigenvector number as timestamp.",
88 "The eigenvectors can be analyzed with [gmx-anaeig].",
90 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
91 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
93 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [TT].xpm[tt] file.",
95 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [TT].xpm[tt] file,",
96 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
99 "Note that the diagonalization of a matrix requires memory and time",
100 "that will increase at least as fast as than the square of the number",
101 "of atoms involved. It is easy to run out of memory, in which",
102 "case this tool will probably exit with a 'Segmentation fault'. You",
103 "should consider carefully whether a reduced set of atoms will meet",
104 "your needs for lower costs."
106 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
109 { "-fit", FALSE, etBOOL, {&bFit},
110 "Fit to a reference structure"},
111 { "-ref", FALSE, etBOOL, {&bRef},
112 "Use the deviation from the conformation in the structure file instead of from the average" },
113 { "-mwa", FALSE, etBOOL, {&bM},
114 "Mass-weighted covariance analysis"},
115 { "-last", FALSE, etINT, {&end},
116 "Last eigenvector to write away (-1 is till the last)" },
117 { "-pbc", FALSE, etBOOL, {&bPBC},
118 "Apply corrections for periodic boundary conditions" }
126 rvec *x, *xread, *xref, *xav, *xproj;
128 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
129 real t, tstart, tend, **mat2;
130 real xj, *w_rls = NULL;
131 real min, max, *axis;
133 int natoms, nat, count, nframes0, nframes, nlevels;
134 gmx_int64_t ndim, i, j, k, l;
136 const char *fitfile, *trxfile, *ndxfile;
137 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
138 const char *asciifile, *xpmfile, *xpmafile;
139 char str[STRLEN], *fitname, *ananame, *pcwd;
141 atom_id *index, *ifit;
142 gmx_bool bDiffMass1, bDiffMass2;
144 char timebuf[STRLEN];
148 gmx_rmpbc_t gpbc = NULL;
151 { efTRX, "-f", NULL, ffREAD },
152 { efTPS, NULL, NULL, ffREAD },
153 { efNDX, NULL, NULL, ffOPTRD },
154 { efXVG, NULL, "eigenval", ffWRITE },
155 { efTRN, "-v", "eigenvec", ffWRITE },
156 { efSTO, "-av", "average.pdb", ffWRITE },
157 { efLOG, NULL, "covar", ffWRITE },
158 { efDAT, "-ascii", "covar", ffOPTWR },
159 { efXPM, "-xpm", "covar", ffOPTWR },
160 { efXPM, "-xpma", "covara", ffOPTWR }
162 #define NFILE asize(fnm)
164 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
165 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
172 fitfile = ftp2fn(efTPS, NFILE, fnm);
173 trxfile = ftp2fn(efTRX, NFILE, fnm);
174 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
175 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
176 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
177 averfile = ftp2fn(efSTO, NFILE, fnm);
178 logfile = ftp2fn(efLOG, NFILE, fnm);
179 asciifile = opt2fn_null("-ascii", NFILE, fnm);
180 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
181 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
183 read_tps_conf(fitfile, str, &top, &ePBC, &xref, NULL, box, TRUE);
188 printf("\nChoose a group for the least squares fit\n");
189 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
192 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
199 printf("\nChoose a group for the covariance analysis\n");
200 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
205 snew(w_rls, atoms->nr);
206 for (i = 0; (i < nfit); i++)
208 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
211 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
217 for (i = 0; (i < natoms); i++)
221 sqrtm[i] = sqrt(atoms->atom[index[i]].m);
224 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
233 if (bFit && bDiffMass1 && !bDiffMass2)
235 bDiffMass1 = natoms != nfit;
237 for (i = 0; (i < natoms) && !bDiffMass1; i++)
239 bDiffMass1 = index[i] != ifit[i];
244 "Note: the fit and analysis group are identical,\n"
245 " while the fit is mass weighted and the analysis is not.\n"
246 " Making the fit non mass weighted.\n\n");
247 for (i = 0; (i < nfit); i++)
249 w_rls[ifit[i]] = 1.0;
254 /* Prepare reference frame */
257 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
258 gmx_rmpbc(gpbc, atoms->nr, box, xref);
262 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
268 if (sqrt(GMX_INT64_MAX) < ndim)
270 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
272 snew(mat, ndim*ndim);
274 fprintf(stderr, "Calculating the average structure ...\n");
276 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
277 if (nat != atoms->nr)
279 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
284 /* calculate x: a fitted struture of the selected atoms */
287 gmx_rmpbc(gpbc, nat, box, xread);
291 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
292 do_fit(nat, w_rls, xref, xread);
294 for (i = 0; i < natoms; i++)
296 rvec_inc(xav[i], xread[index[i]]);
299 while (read_next_x(oenv, status, &t, xread, box));
302 inv_nframes = 1.0/nframes0;
303 for (i = 0; i < natoms; i++)
305 for (d = 0; d < DIM; d++)
307 xav[i][d] *= inv_nframes;
308 xread[index[i]][d] = xav[i][d];
311 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
312 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
315 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", (int)ndim, (int)ndim);
317 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
323 /* calculate x: a (fitted) structure of the selected atoms */
326 gmx_rmpbc(gpbc, nat, box, xread);
330 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
331 do_fit(nat, w_rls, xref, xread);
335 for (i = 0; i < natoms; i++)
337 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
342 for (i = 0; i < natoms; i++)
344 rvec_sub(xread[index[i]], xav[i], x[i]);
348 for (j = 0; j < natoms; j++)
350 for (dj = 0; dj < DIM; dj++)
354 for (i = j; i < natoms; i++)
357 for (d = 0; d < DIM; d++)
359 mat[l+d] += x[i][d]*xj;
365 while (read_next_x(oenv, status, &t, xread, box) &&
366 (bRef || nframes < nframes0));
368 gmx_rmpbc_done(gpbc);
370 fprintf(stderr, "Read %d frames\n", nframes);
374 /* copy the reference structure to the ouput array x */
376 for (i = 0; i < natoms; i++)
378 copy_rvec(xref[index[i]], xproj[i]);
386 /* correct the covariance matrix for the mass */
387 inv_nframes = 1.0/nframes;
388 for (j = 0; j < natoms; j++)
390 for (dj = 0; dj < DIM; dj++)
392 for (i = j; i < natoms; i++)
394 k = ndim*(DIM*j+dj)+DIM*i;
395 for (d = 0; d < DIM; d++)
397 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
403 /* symmetrize the matrix */
404 for (j = 0; j < ndim; j++)
406 for (i = j; i < ndim; i++)
408 mat[ndim*i+j] = mat[ndim*j+i];
413 for (i = 0; i < ndim; i++)
415 trace += mat[i*ndim+i];
417 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
418 trace, bM ? "u " : "");
422 out = gmx_ffopen(asciifile, "w");
423 for (j = 0; j < ndim; j++)
425 for (i = 0; i < ndim; i += 3)
427 fprintf(out, "%g %g %g\n",
428 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
439 for (j = 0; j < ndim; j++)
441 mat2[j] = &(mat[ndim*j]);
442 for (i = 0; i <= j; i++)
444 if (mat2[j][i] < min)
448 if (mat2[j][j] > max)
455 for (i = 0; i < ndim; i++)
459 rlo.r = 0; rlo.g = 0; rlo.b = 1;
460 rmi.r = 1; rmi.g = 1; rmi.b = 1;
461 rhi.r = 1; rhi.g = 0; rhi.b = 0;
462 out = gmx_ffopen(xpmfile, "w");
464 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
465 "dim", "dim", ndim, ndim, axis, axis,
466 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
476 snew(mat2, ndim/DIM);
477 for (i = 0; i < ndim/DIM; i++)
479 snew(mat2[i], ndim/DIM);
481 for (j = 0; j < ndim/DIM; j++)
483 for (i = 0; i <= j; i++)
486 for (d = 0; d < DIM; d++)
488 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
490 if (mat2[j][i] < min)
494 if (mat2[j][j] > max)
498 mat2[i][j] = mat2[j][i];
501 snew(axis, ndim/DIM);
502 for (i = 0; i < ndim/DIM; i++)
506 rlo.r = 0; rlo.g = 0; rlo.b = 1;
507 rmi.r = 1; rmi.g = 1; rmi.b = 1;
508 rhi.r = 1; rhi.g = 0; rhi.b = 0;
509 out = gmx_ffopen(xpmafile, "w");
511 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
512 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
513 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
516 for (i = 0; i < ndim/DIM; i++)
524 /* call diagonalization routine */
526 snew(eigenvalues, ndim);
527 snew(eigenvectors, ndim*ndim);
529 memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
530 fprintf(stderr, "\nDiagonalizing ...\n");
532 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
535 /* now write the output */
538 for (i = 0; i < ndim; i++)
540 sum += eigenvalues[i];
542 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
543 sum, bM ? "u " : "");
544 if (fabs(trace-sum) > 0.01*trace)
546 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
549 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
551 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
552 out = xvgropen(eigvalfile,
553 "Eigenvalues of the covariance matrix",
554 "Eigenvector index", str, oenv);
555 for (i = 0; (i < end); i++)
557 fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
563 if (nframes-1 < ndim)
566 fprintf(out, "WARNING: there are fewer frames in your trajectory than there are\n");
567 fprintf(out, "degrees of freedom in your system. Only generating the first\n");
568 fprintf(out, "%d out of %d eigenvectors and eigenvalues.\n", end, (int)ndim);
577 /* misuse lambda: 0/1 mass weighted analysis no/yes */
580 WriteXref = eWXR_YES;
581 for (i = 0; i < nfit; i++)
583 copy_rvec(xref[ifit[i]], x[i]);
593 /* misuse lambda: -1 for no fit */
594 WriteXref = eWXR_NOFIT;
597 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
598 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
600 out = gmx_ffopen(logfile, "w");
603 gmx_ctime_r(&now, timebuf, STRLEN);
604 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
606 fprintf(out, "Program: %s\n", argv[0]);
607 gmx_getcwd(str, STRLEN);
609 fprintf(out, "Working directory: %s\n\n", str);
611 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
612 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
615 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
619 fprintf(out, "Read index groups from %s\n", ndxfile);
623 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
626 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
630 fprintf(out, "No fit was used\n");
632 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
635 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
637 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", (int)ndim, (int)ndim);
638 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
640 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
643 fprintf(out, "Wrote %d eigenvalues to %s\n", (int)end, eigvalfile);
644 if (WriteXref == eWXR_YES)
646 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
648 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
649 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
653 fprintf(stderr, "Wrote the log to %s\n", logfile);