2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #ifdef HAVE_SYS_TIME_H
49 #include "gromacs/commandline/pargs.h"
52 #include "gromacs/utility/smalloc.h"
56 #include "gromacs/fileio/futil.h"
58 #include "gromacs/fileio/confio.h"
59 #include "gromacs/fileio/trnio.h"
64 #include "gromacs/fileio/matio.h"
68 #include "gromacs/utility/cstringutil.h"
69 #include "gromacs/fileio/trxio.h"
71 #include "gromacs/linearalgebra/eigensolver.h"
72 #include "gromacs/math/do_fit.h"
73 #include "gromacs/legacyheaders/gmx_fatal.h"
75 int gmx_covar(int argc, char *argv[])
77 const char *desc[] = {
78 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
80 "All structures are fitted to the structure in the structure file.",
81 "When this is not a run input file periodicity will not be taken into",
82 "account. When the fit and analysis groups are identical and the analysis",
83 "is non mass-weighted, the fit will also be non mass-weighted.",
85 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
86 "When the same atoms are used for the fit and the covariance analysis,",
87 "the reference structure for the fit is written first with t=-1.",
88 "The average (or reference when [TT]-ref[tt] is used) structure is",
89 "written with t=0, the eigenvectors",
90 "are written as frames with the eigenvector number as timestamp.",
92 "The eigenvectors can be analyzed with [gmx-anaeig].",
94 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
95 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
97 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [TT].xpm[tt] file.",
99 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [TT].xpm[tt] file,",
100 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
103 "Note that the diagonalization of a matrix requires memory and time",
104 "that will increase at least as fast as than the square of the number",
105 "of atoms involved. It is easy to run out of memory, in which",
106 "case this tool will probably exit with a 'Segmentation fault'. You",
107 "should consider carefully whether a reduced set of atoms will meet",
108 "your needs for lower costs."
110 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
113 { "-fit", FALSE, etBOOL, {&bFit},
114 "Fit to a reference structure"},
115 { "-ref", FALSE, etBOOL, {&bRef},
116 "Use the deviation from the conformation in the structure file instead of from the average" },
117 { "-mwa", FALSE, etBOOL, {&bM},
118 "Mass-weighted covariance analysis"},
119 { "-last", FALSE, etINT, {&end},
120 "Last eigenvector to write away (-1 is till the last)" },
121 { "-pbc", FALSE, etBOOL, {&bPBC},
122 "Apply corrections for periodic boundary conditions" }
130 rvec *x, *xread, *xref, *xav, *xproj;
132 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
133 real t, tstart, tend, **mat2;
134 real xj, *w_rls = NULL;
135 real min, max, *axis;
137 int natoms, nat, count, nframes0, nframes, nlevels;
138 gmx_int64_t ndim, i, j, k, l;
140 const char *fitfile, *trxfile, *ndxfile;
141 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
142 const char *asciifile, *xpmfile, *xpmafile;
143 char str[STRLEN], *fitname, *ananame, *pcwd;
145 atom_id *index, *ifit;
146 gmx_bool bDiffMass1, bDiffMass2;
148 char timebuf[STRLEN];
152 gmx_rmpbc_t gpbc = NULL;
155 { efTRX, "-f", NULL, ffREAD },
156 { efTPS, NULL, NULL, ffREAD },
157 { efNDX, NULL, NULL, ffOPTRD },
158 { efXVG, NULL, "eigenval", ffWRITE },
159 { efTRN, "-v", "eigenvec", ffWRITE },
160 { efSTO, "-av", "average.pdb", ffWRITE },
161 { efLOG, NULL, "covar", ffWRITE },
162 { efDAT, "-ascii", "covar", ffOPTWR },
163 { efXPM, "-xpm", "covar", ffOPTWR },
164 { efXPM, "-xpma", "covara", ffOPTWR }
166 #define NFILE asize(fnm)
168 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
169 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
176 fitfile = ftp2fn(efTPS, NFILE, fnm);
177 trxfile = ftp2fn(efTRX, NFILE, fnm);
178 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
179 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
180 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
181 averfile = ftp2fn(efSTO, NFILE, fnm);
182 logfile = ftp2fn(efLOG, NFILE, fnm);
183 asciifile = opt2fn_null("-ascii", NFILE, fnm);
184 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
185 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
187 read_tps_conf(fitfile, str, &top, &ePBC, &xref, NULL, box, TRUE);
192 printf("\nChoose a group for the least squares fit\n");
193 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
196 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
203 printf("\nChoose a group for the covariance analysis\n");
204 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
209 snew(w_rls, atoms->nr);
210 for (i = 0; (i < nfit); i++)
212 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
215 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
221 for (i = 0; (i < natoms); i++)
225 sqrtm[i] = sqrt(atoms->atom[index[i]].m);
228 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
237 if (bFit && bDiffMass1 && !bDiffMass2)
239 bDiffMass1 = natoms != nfit;
241 for (i = 0; (i < natoms) && !bDiffMass1; i++)
243 bDiffMass1 = index[i] != ifit[i];
248 "Note: the fit and analysis group are identical,\n"
249 " while the fit is mass weighted and the analysis is not.\n"
250 " Making the fit non mass weighted.\n\n");
251 for (i = 0; (i < nfit); i++)
253 w_rls[ifit[i]] = 1.0;
258 /* Prepare reference frame */
261 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
262 gmx_rmpbc(gpbc, atoms->nr, box, xref);
266 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
272 if (sqrt(GMX_INT64_MAX) < ndim)
274 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
276 snew(mat, ndim*ndim);
278 fprintf(stderr, "Calculating the average structure ...\n");
280 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
281 if (nat != atoms->nr)
283 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
288 /* calculate x: a fitted struture of the selected atoms */
291 gmx_rmpbc(gpbc, nat, box, xread);
295 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
296 do_fit(nat, w_rls, xref, xread);
298 for (i = 0; i < natoms; i++)
300 rvec_inc(xav[i], xread[index[i]]);
303 while (read_next_x(oenv, status, &t, xread, box));
306 inv_nframes = 1.0/nframes0;
307 for (i = 0; i < natoms; i++)
309 for (d = 0; d < DIM; d++)
311 xav[i][d] *= inv_nframes;
312 xread[index[i]][d] = xav[i][d];
315 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
316 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
319 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", (int)ndim, (int)ndim);
321 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
327 /* calculate x: a (fitted) structure of the selected atoms */
330 gmx_rmpbc(gpbc, nat, box, xread);
334 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
335 do_fit(nat, w_rls, xref, xread);
339 for (i = 0; i < natoms; i++)
341 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
346 for (i = 0; i < natoms; i++)
348 rvec_sub(xread[index[i]], xav[i], x[i]);
352 for (j = 0; j < natoms; j++)
354 for (dj = 0; dj < DIM; dj++)
358 for (i = j; i < natoms; i++)
361 for (d = 0; d < DIM; d++)
363 mat[l+d] += x[i][d]*xj;
369 while (read_next_x(oenv, status, &t, xread, box) &&
370 (bRef || nframes < nframes0));
372 gmx_rmpbc_done(gpbc);
374 fprintf(stderr, "Read %d frames\n", nframes);
378 /* copy the reference structure to the ouput array x */
380 for (i = 0; i < natoms; i++)
382 copy_rvec(xref[index[i]], xproj[i]);
390 /* correct the covariance matrix for the mass */
391 inv_nframes = 1.0/nframes;
392 for (j = 0; j < natoms; j++)
394 for (dj = 0; dj < DIM; dj++)
396 for (i = j; i < natoms; i++)
398 k = ndim*(DIM*j+dj)+DIM*i;
399 for (d = 0; d < DIM; d++)
401 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
407 /* symmetrize the matrix */
408 for (j = 0; j < ndim; j++)
410 for (i = j; i < ndim; i++)
412 mat[ndim*i+j] = mat[ndim*j+i];
417 for (i = 0; i < ndim; i++)
419 trace += mat[i*ndim+i];
421 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
422 trace, bM ? "u " : "");
426 out = gmx_ffopen(asciifile, "w");
427 for (j = 0; j < ndim; j++)
429 for (i = 0; i < ndim; i += 3)
431 fprintf(out, "%g %g %g\n",
432 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
443 for (j = 0; j < ndim; j++)
445 mat2[j] = &(mat[ndim*j]);
446 for (i = 0; i <= j; i++)
448 if (mat2[j][i] < min)
452 if (mat2[j][j] > max)
459 for (i = 0; i < ndim; i++)
463 rlo.r = 0; rlo.g = 0; rlo.b = 1;
464 rmi.r = 1; rmi.g = 1; rmi.b = 1;
465 rhi.r = 1; rhi.g = 0; rhi.b = 0;
466 out = gmx_ffopen(xpmfile, "w");
468 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
469 "dim", "dim", ndim, ndim, axis, axis,
470 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
480 snew(mat2, ndim/DIM);
481 for (i = 0; i < ndim/DIM; i++)
483 snew(mat2[i], ndim/DIM);
485 for (j = 0; j < ndim/DIM; j++)
487 for (i = 0; i <= j; i++)
490 for (d = 0; d < DIM; d++)
492 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
494 if (mat2[j][i] < min)
498 if (mat2[j][j] > max)
502 mat2[i][j] = mat2[j][i];
505 snew(axis, ndim/DIM);
506 for (i = 0; i < ndim/DIM; i++)
510 rlo.r = 0; rlo.g = 0; rlo.b = 1;
511 rmi.r = 1; rmi.g = 1; rmi.b = 1;
512 rhi.r = 1; rhi.g = 0; rhi.b = 0;
513 out = gmx_ffopen(xpmafile, "w");
515 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
516 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
517 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
520 for (i = 0; i < ndim/DIM; i++)
528 /* call diagonalization routine */
530 snew(eigenvalues, ndim);
531 snew(eigenvectors, ndim*ndim);
533 memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
534 fprintf(stderr, "\nDiagonalizing ...\n");
536 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
539 /* now write the output */
542 for (i = 0; i < ndim; i++)
544 sum += eigenvalues[i];
546 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
547 sum, bM ? "u " : "");
548 if (fabs(trace-sum) > 0.01*trace)
550 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
553 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
555 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
556 out = xvgropen(eigvalfile,
557 "Eigenvalues of the covariance matrix",
558 "Eigenvector index", str, oenv);
559 for (i = 0; (i < end); i++)
561 fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
567 if (nframes-1 < ndim)
570 fprintf(out, "WARNING: there are fewer frames in your trajectory than there are\n");
571 fprintf(out, "degrees of freedom in your system. Only generating the first\n");
572 fprintf(out, "%d out of %d eigenvectors and eigenvalues.\n", end, (int)ndim);
581 /* misuse lambda: 0/1 mass weighted analysis no/yes */
584 WriteXref = eWXR_YES;
585 for (i = 0; i < nfit; i++)
587 copy_rvec(xref[ifit[i]], x[i]);
597 /* misuse lambda: -1 for no fit */
598 WriteXref = eWXR_NOFIT;
601 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
602 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
604 out = gmx_ffopen(logfile, "w");
607 gmx_ctime_r(&now, timebuf, STRLEN);
608 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
610 fprintf(out, "Program: %s\n", argv[0]);
611 gmx_getcwd(str, STRLEN);
613 fprintf(out, "Working directory: %s\n\n", str);
615 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
616 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
619 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
623 fprintf(out, "Read index groups from %s\n", ndxfile);
627 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
630 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
634 fprintf(out, "No fit was used\n");
636 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
639 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
641 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", (int)ndim, (int)ndim);
642 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
644 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
647 fprintf(out, "Wrote %d eigenvalues to %s\n", (int)end, eigvalfile);
648 if (WriteXref == eWXR_YES)
650 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
652 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
653 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
657 fprintf(stderr, "Wrote the log to %s\n", logfile);