2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #ifdef HAVE_SYS_TIME_H
49 #include "gromacs/commandline/pargs.h"
56 #include "gromacs/fileio/futil.h"
58 #include "gromacs/fileio/confio.h"
59 #include "gromacs/fileio/trnio.h"
65 #include "gromacs/fileio/matio.h"
70 #include "gromacs/fileio/trxio.h"
72 #include "gromacs/linearalgebra/eigensolver.h"
74 int gmx_covar(int argc, char *argv[])
76 const char *desc[] = {
77 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
79 "All structures are fitted to the structure in the structure file.",
80 "When this is not a run input file periodicity will not be taken into",
81 "account. When the fit and analysis groups are identical and the analysis",
82 "is non mass-weighted, the fit will also be non mass-weighted.",
84 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
85 "When the same atoms are used for the fit and the covariance analysis,",
86 "the reference structure for the fit is written first with t=-1.",
87 "The average (or reference when [TT]-ref[tt] is used) structure is",
88 "written with t=0, the eigenvectors",
89 "are written as frames with the eigenvector number as timestamp.",
91 "The eigenvectors can be analyzed with [gmx-anaeig].",
93 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
94 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
96 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [TT].xpm[tt] file.",
98 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [TT].xpm[tt] file,",
99 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
102 "Note that the diagonalization of a matrix requires memory and time",
103 "that will increase at least as fast as than the square of the number",
104 "of atoms involved. It is easy to run out of memory, in which",
105 "case this tool will probably exit with a 'Segmentation fault'. You",
106 "should consider carefully whether a reduced set of atoms will meet",
107 "your needs for lower costs."
109 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
112 { "-fit", FALSE, etBOOL, {&bFit},
113 "Fit to a reference structure"},
114 { "-ref", FALSE, etBOOL, {&bRef},
115 "Use the deviation from the conformation in the structure file instead of from the average" },
116 { "-mwa", FALSE, etBOOL, {&bM},
117 "Mass-weighted covariance analysis"},
118 { "-last", FALSE, etINT, {&end},
119 "Last eigenvector to write away (-1 is till the last)" },
120 { "-pbc", FALSE, etBOOL, {&bPBC},
121 "Apply corrections for periodic boundary conditions" }
129 rvec *x, *xread, *xref, *xav, *xproj;
131 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
132 real t, tstart, tend, **mat2;
133 real xj, *w_rls = NULL;
134 real min, max, *axis;
136 int natoms, nat, count, nframes0, nframes, nlevels;
137 gmx_int64_t ndim, i, j, k, l;
139 const char *fitfile, *trxfile, *ndxfile;
140 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
141 const char *asciifile, *xpmfile, *xpmafile;
142 char str[STRLEN], *fitname, *ananame, *pcwd;
144 atom_id *index, *ifit;
145 gmx_bool bDiffMass1, bDiffMass2;
147 char timebuf[STRLEN];
151 gmx_rmpbc_t gpbc = NULL;
154 { efTRX, "-f", NULL, ffREAD },
155 { efTPS, NULL, NULL, ffREAD },
156 { efNDX, NULL, NULL, ffOPTRD },
157 { efXVG, NULL, "eigenval", ffWRITE },
158 { efTRN, "-v", "eigenvec", ffWRITE },
159 { efSTO, "-av", "average.pdb", ffWRITE },
160 { efLOG, NULL, "covar", ffWRITE },
161 { efDAT, "-ascii", "covar", ffOPTWR },
162 { efXPM, "-xpm", "covar", ffOPTWR },
163 { efXPM, "-xpma", "covara", ffOPTWR }
165 #define NFILE asize(fnm)
167 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
168 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
175 fitfile = ftp2fn(efTPS, NFILE, fnm);
176 trxfile = ftp2fn(efTRX, NFILE, fnm);
177 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
178 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
179 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
180 averfile = ftp2fn(efSTO, NFILE, fnm);
181 logfile = ftp2fn(efLOG, NFILE, fnm);
182 asciifile = opt2fn_null("-ascii", NFILE, fnm);
183 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
184 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
186 read_tps_conf(fitfile, str, &top, &ePBC, &xref, NULL, box, TRUE);
191 printf("\nChoose a group for the least squares fit\n");
192 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
195 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
202 printf("\nChoose a group for the covariance analysis\n");
203 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
208 snew(w_rls, atoms->nr);
209 for (i = 0; (i < nfit); i++)
211 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
214 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
220 for (i = 0; (i < natoms); i++)
224 sqrtm[i] = sqrt(atoms->atom[index[i]].m);
227 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
236 if (bFit && bDiffMass1 && !bDiffMass2)
238 bDiffMass1 = natoms != nfit;
240 for (i = 0; (i < natoms) && !bDiffMass1; i++)
242 bDiffMass1 = index[i] != ifit[i];
247 "Note: the fit and analysis group are identical,\n"
248 " while the fit is mass weighted and the analysis is not.\n"
249 " Making the fit non mass weighted.\n\n");
250 for (i = 0; (i < nfit); i++)
252 w_rls[ifit[i]] = 1.0;
257 /* Prepare reference frame */
260 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
261 gmx_rmpbc(gpbc, atoms->nr, box, xref);
265 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
271 if (sqrt(GMX_INT64_MAX) < ndim)
273 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
275 snew(mat, ndim*ndim);
277 fprintf(stderr, "Calculating the average structure ...\n");
279 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
280 if (nat != atoms->nr)
282 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
287 /* calculate x: a fitted struture of the selected atoms */
290 gmx_rmpbc(gpbc, nat, box, xread);
294 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
295 do_fit(nat, w_rls, xref, xread);
297 for (i = 0; i < natoms; i++)
299 rvec_inc(xav[i], xread[index[i]]);
302 while (read_next_x(oenv, status, &t, xread, box));
305 inv_nframes = 1.0/nframes0;
306 for (i = 0; i < natoms; i++)
308 for (d = 0; d < DIM; d++)
310 xav[i][d] *= inv_nframes;
311 xread[index[i]][d] = xav[i][d];
314 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
315 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
318 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", (int)ndim, (int)ndim);
320 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
326 /* calculate x: a (fitted) structure of the selected atoms */
329 gmx_rmpbc(gpbc, nat, box, xread);
333 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
334 do_fit(nat, w_rls, xref, xread);
338 for (i = 0; i < natoms; i++)
340 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
345 for (i = 0; i < natoms; i++)
347 rvec_sub(xread[index[i]], xav[i], x[i]);
351 for (j = 0; j < natoms; j++)
353 for (dj = 0; dj < DIM; dj++)
357 for (i = j; i < natoms; i++)
360 for (d = 0; d < DIM; d++)
362 mat[l+d] += x[i][d]*xj;
368 while (read_next_x(oenv, status, &t, xread, box) &&
369 (bRef || nframes < nframes0));
371 gmx_rmpbc_done(gpbc);
373 fprintf(stderr, "Read %d frames\n", nframes);
377 /* copy the reference structure to the ouput array x */
379 for (i = 0; i < natoms; i++)
381 copy_rvec(xref[index[i]], xproj[i]);
389 /* correct the covariance matrix for the mass */
390 inv_nframes = 1.0/nframes;
391 for (j = 0; j < natoms; j++)
393 for (dj = 0; dj < DIM; dj++)
395 for (i = j; i < natoms; i++)
397 k = ndim*(DIM*j+dj)+DIM*i;
398 for (d = 0; d < DIM; d++)
400 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
406 /* symmetrize the matrix */
407 for (j = 0; j < ndim; j++)
409 for (i = j; i < ndim; i++)
411 mat[ndim*i+j] = mat[ndim*j+i];
416 for (i = 0; i < ndim; i++)
418 trace += mat[i*ndim+i];
420 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
421 trace, bM ? "u " : "");
425 out = gmx_ffopen(asciifile, "w");
426 for (j = 0; j < ndim; j++)
428 for (i = 0; i < ndim; i += 3)
430 fprintf(out, "%g %g %g\n",
431 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
442 for (j = 0; j < ndim; j++)
444 mat2[j] = &(mat[ndim*j]);
445 for (i = 0; i <= j; i++)
447 if (mat2[j][i] < min)
451 if (mat2[j][j] > max)
458 for (i = 0; i < ndim; i++)
462 rlo.r = 0; rlo.g = 0; rlo.b = 1;
463 rmi.r = 1; rmi.g = 1; rmi.b = 1;
464 rhi.r = 1; rhi.g = 0; rhi.b = 0;
465 out = gmx_ffopen(xpmfile, "w");
467 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
468 "dim", "dim", ndim, ndim, axis, axis,
469 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
479 snew(mat2, ndim/DIM);
480 for (i = 0; i < ndim/DIM; i++)
482 snew(mat2[i], ndim/DIM);
484 for (j = 0; j < ndim/DIM; j++)
486 for (i = 0; i <= j; i++)
489 for (d = 0; d < DIM; d++)
491 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
493 if (mat2[j][i] < min)
497 if (mat2[j][j] > max)
501 mat2[i][j] = mat2[j][i];
504 snew(axis, ndim/DIM);
505 for (i = 0; i < ndim/DIM; i++)
509 rlo.r = 0; rlo.g = 0; rlo.b = 1;
510 rmi.r = 1; rmi.g = 1; rmi.b = 1;
511 rhi.r = 1; rhi.g = 0; rhi.b = 0;
512 out = gmx_ffopen(xpmafile, "w");
514 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
515 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
516 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
519 for (i = 0; i < ndim/DIM; i++)
527 /* call diagonalization routine */
529 snew(eigenvalues, ndim);
530 snew(eigenvectors, ndim*ndim);
532 memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
533 fprintf(stderr, "\nDiagonalizing ...\n");
535 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
538 /* now write the output */
541 for (i = 0; i < ndim; i++)
543 sum += eigenvalues[i];
545 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
546 sum, bM ? "u " : "");
547 if (fabs(trace-sum) > 0.01*trace)
549 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
552 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
554 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
555 out = xvgropen(eigvalfile,
556 "Eigenvalues of the covariance matrix",
557 "Eigenvector index", str, oenv);
558 for (i = 0; (i < ndim); i++)
560 fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
566 if (nframes-1 < ndim)
577 /* misuse lambda: 0/1 mass weighted analysis no/yes */
580 WriteXref = eWXR_YES;
581 for (i = 0; i < nfit; i++)
583 copy_rvec(xref[ifit[i]], x[i]);
593 /* misuse lambda: -1 for no fit */
594 WriteXref = eWXR_NOFIT;
597 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
598 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
600 out = gmx_ffopen(logfile, "w");
603 gmx_ctime_r(&now, timebuf, STRLEN);
604 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
606 fprintf(out, "Program: %s\n", argv[0]);
607 gmx_getcwd(str, STRLEN);
609 fprintf(out, "Working directory: %s\n\n", str);
611 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
612 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
615 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
619 fprintf(out, "Read index groups from %s\n", ndxfile);
623 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
626 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
630 fprintf(out, "No fit was used\n");
632 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
635 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
637 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", (int)ndim, (int)ndim);
638 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
640 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
643 fprintf(out, "Wrote %d eigenvalues to %s\n", (int)ndim, eigvalfile);
644 if (WriteXref == eWXR_YES)
646 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
648 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
649 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
653 fprintf(stderr, "Wrote the log to %s\n", logfile);