2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
43 #ifdef HAVE_SYS_TIME_H
47 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/utility/smalloc.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/utility/futil.h"
53 #include "gromacs/topology/index.h"
54 #include "gromacs/fileio/confio.h"
55 #include "gromacs/fileio/trnio.h"
56 #include "gromacs/fileio/xvgr.h"
57 #include "gromacs/pbcutil/rmpbc.h"
59 #include "gromacs/fileio/matio.h"
62 #include "gromacs/utility/cstringutil.h"
63 #include "gromacs/fileio/trxio.h"
65 #include "gromacs/linearalgebra/eigensolver.h"
66 #include "gromacs/math/do_fit.h"
67 #include "gromacs/utility/fatalerror.h"
69 int gmx_covar(int argc, char *argv[])
71 const char *desc[] = {
72 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
74 "All structures are fitted to the structure in the structure file.",
75 "When this is not a run input file periodicity will not be taken into",
76 "account. When the fit and analysis groups are identical and the analysis",
77 "is non mass-weighted, the fit will also be non mass-weighted.",
79 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
80 "When the same atoms are used for the fit and the covariance analysis,",
81 "the reference structure for the fit is written first with t=-1.",
82 "The average (or reference when [TT]-ref[tt] is used) structure is",
83 "written with t=0, the eigenvectors",
84 "are written as frames with the eigenvector number as timestamp.",
86 "The eigenvectors can be analyzed with [gmx-anaeig].",
88 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
89 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
91 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [TT].xpm[tt] file.",
93 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [TT].xpm[tt] file,",
94 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
97 "Note that the diagonalization of a matrix requires memory and time",
98 "that will increase at least as fast as than the square of the number",
99 "of atoms involved. It is easy to run out of memory, in which",
100 "case this tool will probably exit with a 'Segmentation fault'. You",
101 "should consider carefully whether a reduced set of atoms will meet",
102 "your needs for lower costs."
104 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
107 { "-fit", FALSE, etBOOL, {&bFit},
108 "Fit to a reference structure"},
109 { "-ref", FALSE, etBOOL, {&bRef},
110 "Use the deviation from the conformation in the structure file instead of from the average" },
111 { "-mwa", FALSE, etBOOL, {&bM},
112 "Mass-weighted covariance analysis"},
113 { "-last", FALSE, etINT, {&end},
114 "Last eigenvector to write away (-1 is till the last)" },
115 { "-pbc", FALSE, etBOOL, {&bPBC},
116 "Apply corrections for periodic boundary conditions" }
124 rvec *x, *xread, *xref, *xav, *xproj;
126 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
127 real t, tstart, tend, **mat2;
128 real xj, *w_rls = NULL;
129 real min, max, *axis;
131 int natoms, nat, count, nframes0, nframes, nlevels;
132 gmx_int64_t ndim, i, j, k, l;
134 const char *fitfile, *trxfile, *ndxfile;
135 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
136 const char *asciifile, *xpmfile, *xpmafile;
137 char str[STRLEN], *fitname, *ananame, *pcwd;
139 atom_id *index, *ifit;
140 gmx_bool bDiffMass1, bDiffMass2;
142 char timebuf[STRLEN];
146 gmx_rmpbc_t gpbc = NULL;
149 { efTRX, "-f", NULL, ffREAD },
150 { efTPS, NULL, NULL, ffREAD },
151 { efNDX, NULL, NULL, ffOPTRD },
152 { efXVG, NULL, "eigenval", ffWRITE },
153 { efTRN, "-v", "eigenvec", ffWRITE },
154 { efSTO, "-av", "average.pdb", ffWRITE },
155 { efLOG, NULL, "covar", ffWRITE },
156 { efDAT, "-ascii", "covar", ffOPTWR },
157 { efXPM, "-xpm", "covar", ffOPTWR },
158 { efXPM, "-xpma", "covara", ffOPTWR }
160 #define NFILE asize(fnm)
162 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
163 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
170 fitfile = ftp2fn(efTPS, NFILE, fnm);
171 trxfile = ftp2fn(efTRX, NFILE, fnm);
172 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
173 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
174 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
175 averfile = ftp2fn(efSTO, NFILE, fnm);
176 logfile = ftp2fn(efLOG, NFILE, fnm);
177 asciifile = opt2fn_null("-ascii", NFILE, fnm);
178 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
179 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
181 read_tps_conf(fitfile, str, &top, &ePBC, &xref, NULL, box, TRUE);
186 printf("\nChoose a group for the least squares fit\n");
187 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
190 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
197 printf("\nChoose a group for the covariance analysis\n");
198 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
203 snew(w_rls, atoms->nr);
204 for (i = 0; (i < nfit); i++)
206 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
209 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
215 for (i = 0; (i < natoms); i++)
219 sqrtm[i] = sqrt(atoms->atom[index[i]].m);
222 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
231 if (bFit && bDiffMass1 && !bDiffMass2)
233 bDiffMass1 = natoms != nfit;
235 for (i = 0; (i < natoms) && !bDiffMass1; i++)
237 bDiffMass1 = index[i] != ifit[i];
242 "Note: the fit and analysis group are identical,\n"
243 " while the fit is mass weighted and the analysis is not.\n"
244 " Making the fit non mass weighted.\n\n");
245 for (i = 0; (i < nfit); i++)
247 w_rls[ifit[i]] = 1.0;
252 /* Prepare reference frame */
255 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
256 gmx_rmpbc(gpbc, atoms->nr, box, xref);
260 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
266 if (sqrt(GMX_INT64_MAX) < ndim)
268 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
270 snew(mat, ndim*ndim);
272 fprintf(stderr, "Calculating the average structure ...\n");
274 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
275 if (nat != atoms->nr)
277 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
282 /* calculate x: a fitted struture of the selected atoms */
285 gmx_rmpbc(gpbc, nat, box, xread);
289 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
290 do_fit(nat, w_rls, xref, xread);
292 for (i = 0; i < natoms; i++)
294 rvec_inc(xav[i], xread[index[i]]);
297 while (read_next_x(oenv, status, &t, xread, box));
300 inv_nframes = 1.0/nframes0;
301 for (i = 0; i < natoms; i++)
303 for (d = 0; d < DIM; d++)
305 xav[i][d] *= inv_nframes;
306 xread[index[i]][d] = xav[i][d];
309 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
310 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
313 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", (int)ndim, (int)ndim);
315 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
321 /* calculate x: a (fitted) structure of the selected atoms */
324 gmx_rmpbc(gpbc, nat, box, xread);
328 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
329 do_fit(nat, w_rls, xref, xread);
333 for (i = 0; i < natoms; i++)
335 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
340 for (i = 0; i < natoms; i++)
342 rvec_sub(xread[index[i]], xav[i], x[i]);
346 for (j = 0; j < natoms; j++)
348 for (dj = 0; dj < DIM; dj++)
352 for (i = j; i < natoms; i++)
355 for (d = 0; d < DIM; d++)
357 mat[l+d] += x[i][d]*xj;
363 while (read_next_x(oenv, status, &t, xread, box) &&
364 (bRef || nframes < nframes0));
366 gmx_rmpbc_done(gpbc);
368 fprintf(stderr, "Read %d frames\n", nframes);
372 /* copy the reference structure to the ouput array x */
374 for (i = 0; i < natoms; i++)
376 copy_rvec(xref[index[i]], xproj[i]);
384 /* correct the covariance matrix for the mass */
385 inv_nframes = 1.0/nframes;
386 for (j = 0; j < natoms; j++)
388 for (dj = 0; dj < DIM; dj++)
390 for (i = j; i < natoms; i++)
392 k = ndim*(DIM*j+dj)+DIM*i;
393 for (d = 0; d < DIM; d++)
395 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
401 /* symmetrize the matrix */
402 for (j = 0; j < ndim; j++)
404 for (i = j; i < ndim; i++)
406 mat[ndim*i+j] = mat[ndim*j+i];
411 for (i = 0; i < ndim; i++)
413 trace += mat[i*ndim+i];
415 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
416 trace, bM ? "u " : "");
420 out = gmx_ffopen(asciifile, "w");
421 for (j = 0; j < ndim; j++)
423 for (i = 0; i < ndim; i += 3)
425 fprintf(out, "%g %g %g\n",
426 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
437 for (j = 0; j < ndim; j++)
439 mat2[j] = &(mat[ndim*j]);
440 for (i = 0; i <= j; i++)
442 if (mat2[j][i] < min)
446 if (mat2[j][j] > max)
453 for (i = 0; i < ndim; i++)
457 rlo.r = 0; rlo.g = 0; rlo.b = 1;
458 rmi.r = 1; rmi.g = 1; rmi.b = 1;
459 rhi.r = 1; rhi.g = 0; rhi.b = 0;
460 out = gmx_ffopen(xpmfile, "w");
462 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
463 "dim", "dim", ndim, ndim, axis, axis,
464 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
474 snew(mat2, ndim/DIM);
475 for (i = 0; i < ndim/DIM; i++)
477 snew(mat2[i], ndim/DIM);
479 for (j = 0; j < ndim/DIM; j++)
481 for (i = 0; i <= j; i++)
484 for (d = 0; d < DIM; d++)
486 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
488 if (mat2[j][i] < min)
492 if (mat2[j][j] > max)
496 mat2[i][j] = mat2[j][i];
499 snew(axis, ndim/DIM);
500 for (i = 0; i < ndim/DIM; i++)
504 rlo.r = 0; rlo.g = 0; rlo.b = 1;
505 rmi.r = 1; rmi.g = 1; rmi.b = 1;
506 rhi.r = 1; rhi.g = 0; rhi.b = 0;
507 out = gmx_ffopen(xpmafile, "w");
509 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
510 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
511 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
514 for (i = 0; i < ndim/DIM; i++)
522 /* call diagonalization routine */
524 snew(eigenvalues, ndim);
525 snew(eigenvectors, ndim*ndim);
527 memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
528 fprintf(stderr, "\nDiagonalizing ...\n");
530 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
533 /* now write the output */
536 for (i = 0; i < ndim; i++)
538 sum += eigenvalues[i];
540 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
541 sum, bM ? "u " : "");
542 if (fabs(trace-sum) > 0.01*trace)
544 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
547 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
549 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
550 out = xvgropen(eigvalfile,
551 "Eigenvalues of the covariance matrix",
552 "Eigenvector index", str, oenv);
553 for (i = 0; (i < end); i++)
555 fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
561 if (nframes-1 < ndim)
564 fprintf(out, "WARNING: there are fewer frames in your trajectory than there are\n");
565 fprintf(out, "degrees of freedom in your system. Only generating the first\n");
566 fprintf(out, "%d out of %d eigenvectors and eigenvalues.\n", end, (int)ndim);
575 /* misuse lambda: 0/1 mass weighted analysis no/yes */
578 WriteXref = eWXR_YES;
579 for (i = 0; i < nfit; i++)
581 copy_rvec(xref[ifit[i]], x[i]);
591 /* misuse lambda: -1 for no fit */
592 WriteXref = eWXR_NOFIT;
595 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
596 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
598 out = gmx_ffopen(logfile, "w");
601 gmx_ctime_r(&now, timebuf, STRLEN);
602 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
604 fprintf(out, "Program: %s\n", argv[0]);
605 gmx_getcwd(str, STRLEN);
607 fprintf(out, "Working directory: %s\n\n", str);
609 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
610 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
613 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
617 fprintf(out, "Read index groups from %s\n", ndxfile);
621 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
624 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
628 fprintf(out, "No fit was used\n");
630 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
633 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
635 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", (int)ndim, (int)ndim);
636 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
638 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
641 fprintf(out, "Wrote %d eigenvalues to %s\n", (int)end, eigvalfile);
642 if (WriteXref == eWXR_YES)
644 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
646 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
647 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
651 fprintf(stderr, "Wrote the log to %s\n", logfile);