2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #ifdef HAVE_SYS_TIME_H
49 #include "gromacs/commandline/pargs.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/fileio/matio.h"
52 #include "gromacs/fileio/trnio.h"
53 #include "gromacs/fileio/trxio.h"
54 #include "gromacs/fileio/xvgr.h"
55 #include "gromacs/gmxana/eigio.h"
56 #include "gromacs/gmxana/gmx_ana.h"
57 #include "gromacs/legacyheaders/macros.h"
58 #include "gromacs/legacyheaders/txtdump.h"
59 #include "gromacs/legacyheaders/typedefs.h"
60 #include "gromacs/linearalgebra/eigensolver.h"
61 #include "gromacs/math/do_fit.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/pbcutil/rmpbc.h"
64 #include "gromacs/topology/index.h"
65 #include "gromacs/utility/cstringutil.h"
66 #include "gromacs/utility/fatalerror.h"
67 #include "gromacs/utility/futil.h"
68 #include "gromacs/utility/smalloc.h"
70 int gmx_covar(int argc, char *argv[])
72 const char *desc[] = {
73 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
75 "All structures are fitted to the structure in the structure file.",
76 "When this is not a run input file periodicity will not be taken into",
77 "account. When the fit and analysis groups are identical and the analysis",
78 "is non mass-weighted, the fit will also be non mass-weighted.",
80 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
81 "When the same atoms are used for the fit and the covariance analysis,",
82 "the reference structure for the fit is written first with t=-1.",
83 "The average (or reference when [TT]-ref[tt] is used) structure is",
84 "written with t=0, the eigenvectors",
85 "are written as frames with the eigenvector number as timestamp.",
87 "The eigenvectors can be analyzed with [gmx-anaeig].",
89 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
90 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
92 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [TT].xpm[tt] file.",
94 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [TT].xpm[tt] file,",
95 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
98 "Note that the diagonalization of a matrix requires memory and time",
99 "that will increase at least as fast as than the square of the number",
100 "of atoms involved. It is easy to run out of memory, in which",
101 "case this tool will probably exit with a 'Segmentation fault'. You",
102 "should consider carefully whether a reduced set of atoms will meet",
103 "your needs for lower costs."
105 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
108 { "-fit", FALSE, etBOOL, {&bFit},
109 "Fit to a reference structure"},
110 { "-ref", FALSE, etBOOL, {&bRef},
111 "Use the deviation from the conformation in the structure file instead of from the average" },
112 { "-mwa", FALSE, etBOOL, {&bM},
113 "Mass-weighted covariance analysis"},
114 { "-last", FALSE, etINT, {&end},
115 "Last eigenvector to write away (-1 is till the last)" },
116 { "-pbc", FALSE, etBOOL, {&bPBC},
117 "Apply corrections for periodic boundary conditions" }
119 FILE *out = NULL; /* initialization makes all compilers happy */
125 rvec *x, *xread, *xref, *xav, *xproj;
127 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
128 real t, tstart, tend, **mat2;
129 real xj, *w_rls = NULL;
130 real min, max, *axis;
132 int natoms, nat, count, nframes0, nframes, nlevels;
133 gmx_int64_t ndim, i, j, k, l;
135 const char *fitfile, *trxfile, *ndxfile;
136 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
137 const char *asciifile, *xpmfile, *xpmafile;
138 char str[STRLEN], *fitname, *ananame, *pcwd;
140 atom_id *index, *ifit;
141 gmx_bool bDiffMass1, bDiffMass2;
143 char timebuf[STRLEN];
147 gmx_rmpbc_t gpbc = NULL;
150 { efTRX, "-f", NULL, ffREAD },
151 { efTPS, NULL, NULL, ffREAD },
152 { efNDX, NULL, NULL, ffOPTRD },
153 { efXVG, NULL, "eigenval", ffWRITE },
154 { efTRN, "-v", "eigenvec", ffWRITE },
155 { efSTO, "-av", "average.pdb", ffWRITE },
156 { efLOG, NULL, "covar", ffWRITE },
157 { efDAT, "-ascii", "covar", ffOPTWR },
158 { efXPM, "-xpm", "covar", ffOPTWR },
159 { efXPM, "-xpma", "covara", ffOPTWR }
161 #define NFILE asize(fnm)
163 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT,
164 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
171 fitfile = ftp2fn(efTPS, NFILE, fnm);
172 trxfile = ftp2fn(efTRX, NFILE, fnm);
173 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
174 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
175 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
176 averfile = ftp2fn(efSTO, NFILE, fnm);
177 logfile = ftp2fn(efLOG, NFILE, fnm);
178 asciifile = opt2fn_null("-ascii", NFILE, fnm);
179 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
180 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
182 read_tps_conf(fitfile, str, &top, &ePBC, &xref, NULL, box, TRUE);
187 printf("\nChoose a group for the least squares fit\n");
188 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
191 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
198 printf("\nChoose a group for the covariance analysis\n");
199 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
204 snew(w_rls, atoms->nr);
205 for (i = 0; (i < nfit); i++)
207 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
210 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
216 for (i = 0; (i < natoms); i++)
220 sqrtm[i] = sqrt(atoms->atom[index[i]].m);
223 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
232 if (bFit && bDiffMass1 && !bDiffMass2)
234 bDiffMass1 = natoms != nfit;
236 for (i = 0; (i < natoms) && !bDiffMass1; i++)
238 bDiffMass1 = index[i] != ifit[i];
243 "Note: the fit and analysis group are identical,\n"
244 " while the fit is mass weighted and the analysis is not.\n"
245 " Making the fit non mass weighted.\n\n");
246 for (i = 0; (i < nfit); i++)
248 w_rls[ifit[i]] = 1.0;
253 /* Prepare reference frame */
256 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
257 gmx_rmpbc(gpbc, atoms->nr, box, xref);
261 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
267 if (sqrt(GMX_INT64_MAX) < ndim)
269 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
271 snew(mat, ndim*ndim);
273 fprintf(stderr, "Calculating the average structure ...\n");
275 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
276 if (nat != atoms->nr)
278 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
283 /* calculate x: a fitted struture of the selected atoms */
286 gmx_rmpbc(gpbc, nat, box, xread);
290 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
291 do_fit(nat, w_rls, xref, xread);
293 for (i = 0; i < natoms; i++)
295 rvec_inc(xav[i], xread[index[i]]);
298 while (read_next_x(oenv, status, &t, xread, box));
301 inv_nframes = 1.0/nframes0;
302 for (i = 0; i < natoms; i++)
304 for (d = 0; d < DIM; d++)
306 xav[i][d] *= inv_nframes;
307 xread[index[i]][d] = xav[i][d];
310 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
311 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
314 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", (int)ndim, (int)ndim);
316 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
322 /* calculate x: a (fitted) structure of the selected atoms */
325 gmx_rmpbc(gpbc, nat, box, xread);
329 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
330 do_fit(nat, w_rls, xref, xread);
334 for (i = 0; i < natoms; i++)
336 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
341 for (i = 0; i < natoms; i++)
343 rvec_sub(xread[index[i]], xav[i], x[i]);
347 for (j = 0; j < natoms; j++)
349 for (dj = 0; dj < DIM; dj++)
353 for (i = j; i < natoms; i++)
356 for (d = 0; d < DIM; d++)
358 mat[l+d] += x[i][d]*xj;
364 while (read_next_x(oenv, status, &t, xread, box) &&
365 (bRef || nframes < nframes0));
367 gmx_rmpbc_done(gpbc);
369 fprintf(stderr, "Read %d frames\n", nframes);
373 /* copy the reference structure to the ouput array x */
375 for (i = 0; i < natoms; i++)
377 copy_rvec(xref[index[i]], xproj[i]);
385 /* correct the covariance matrix for the mass */
386 inv_nframes = 1.0/nframes;
387 for (j = 0; j < natoms; j++)
389 for (dj = 0; dj < DIM; dj++)
391 for (i = j; i < natoms; i++)
393 k = ndim*(DIM*j+dj)+DIM*i;
394 for (d = 0; d < DIM; d++)
396 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
402 /* symmetrize the matrix */
403 for (j = 0; j < ndim; j++)
405 for (i = j; i < ndim; i++)
407 mat[ndim*i+j] = mat[ndim*j+i];
412 for (i = 0; i < ndim; i++)
414 trace += mat[i*ndim+i];
416 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
417 trace, bM ? "u " : "");
421 out = gmx_ffopen(asciifile, "w");
422 for (j = 0; j < ndim; j++)
424 for (i = 0; i < ndim; i += 3)
426 fprintf(out, "%g %g %g\n",
427 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
438 for (j = 0; j < ndim; j++)
440 mat2[j] = &(mat[ndim*j]);
441 for (i = 0; i <= j; i++)
443 if (mat2[j][i] < min)
447 if (mat2[j][j] > max)
454 for (i = 0; i < ndim; i++)
458 rlo.r = 0; rlo.g = 0; rlo.b = 1;
459 rmi.r = 1; rmi.g = 1; rmi.b = 1;
460 rhi.r = 1; rhi.g = 0; rhi.b = 0;
461 out = gmx_ffopen(xpmfile, "w");
463 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
464 "dim", "dim", ndim, ndim, axis, axis,
465 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
475 snew(mat2, ndim/DIM);
476 for (i = 0; i < ndim/DIM; i++)
478 snew(mat2[i], ndim/DIM);
480 for (j = 0; j < ndim/DIM; j++)
482 for (i = 0; i <= j; i++)
485 for (d = 0; d < DIM; d++)
487 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
489 if (mat2[j][i] < min)
493 if (mat2[j][j] > max)
497 mat2[i][j] = mat2[j][i];
500 snew(axis, ndim/DIM);
501 for (i = 0; i < ndim/DIM; i++)
505 rlo.r = 0; rlo.g = 0; rlo.b = 1;
506 rmi.r = 1; rmi.g = 1; rmi.b = 1;
507 rhi.r = 1; rhi.g = 0; rhi.b = 0;
508 out = gmx_ffopen(xpmafile, "w");
510 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
511 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
512 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
515 for (i = 0; i < ndim/DIM; i++)
523 /* call diagonalization routine */
525 snew(eigenvalues, ndim);
526 snew(eigenvectors, ndim*ndim);
528 memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
529 fprintf(stderr, "\nDiagonalizing ...\n");
531 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
534 /* now write the output */
537 for (i = 0; i < ndim; i++)
539 sum += eigenvalues[i];
541 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
542 sum, bM ? "u " : "");
543 if (fabs(trace-sum) > 0.01*trace)
545 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
548 /* Set 'end', the maximum eigenvector and -value index used for output */
551 if (nframes-1 < ndim)
554 fprintf(out, "WARNING: there are fewer frames in your trajectory than there are\n");
555 fprintf(out, "degrees of freedom in your system. Only generating the first\n");
556 fprintf(out, "%d out of %d eigenvectors and eigenvalues.\n", end, (int)ndim);
564 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
566 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
567 out = xvgropen(eigvalfile,
568 "Eigenvalues of the covariance matrix",
569 "Eigenvector index", str, oenv);
570 for (i = 0; (i < end); i++)
572 fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
578 /* misuse lambda: 0/1 mass weighted analysis no/yes */
581 WriteXref = eWXR_YES;
582 for (i = 0; i < nfit; i++)
584 copy_rvec(xref[ifit[i]], x[i]);
594 /* misuse lambda: -1 for no fit */
595 WriteXref = eWXR_NOFIT;
598 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
599 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
601 out = gmx_ffopen(logfile, "w");
604 gmx_ctime_r(&now, timebuf, STRLEN);
605 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
607 fprintf(out, "Program: %s\n", argv[0]);
608 gmx_getcwd(str, STRLEN);
610 fprintf(out, "Working directory: %s\n\n", str);
612 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
613 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
616 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
620 fprintf(out, "Read index groups from %s\n", ndxfile);
624 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
627 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
631 fprintf(out, "No fit was used\n");
633 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
636 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
638 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", (int)ndim, (int)ndim);
639 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
641 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
644 fprintf(out, "Wrote %d eigenvalues to %s\n", (int)end, eigvalfile);
645 if (WriteXref == eWXR_YES)
647 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
649 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
650 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
654 fprintf(stderr, "Wrote the log to %s\n", logfile);