2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/matio.h"
45 #include "gromacs/fileio/trnio.h"
46 #include "gromacs/fileio/trxio.h"
47 #include "gromacs/fileio/xvgr.h"
48 #include "gromacs/gmxana/eigio.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/legacyheaders/macros.h"
51 #include "gromacs/legacyheaders/txtdump.h"
52 #include "gromacs/legacyheaders/typedefs.h"
53 #include "gromacs/linearalgebra/eigensolver.h"
54 #include "gromacs/math/do_fit.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/pbcutil/rmpbc.h"
57 #include "gromacs/topology/index.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/smalloc.h"
62 #include "gromacs/utility/sysinfo.h"
64 int gmx_covar(int argc, char *argv[])
66 const char *desc[] = {
67 "[THISMODULE] calculates and diagonalizes the (mass-weighted)",
69 "All structures are fitted to the structure in the structure file.",
70 "When this is not a run input file periodicity will not be taken into",
71 "account. When the fit and analysis groups are identical and the analysis",
72 "is non mass-weighted, the fit will also be non mass-weighted.",
74 "The eigenvectors are written to a trajectory file ([TT]-v[tt]).",
75 "When the same atoms are used for the fit and the covariance analysis,",
76 "the reference structure for the fit is written first with t=-1.",
77 "The average (or reference when [TT]-ref[tt] is used) structure is",
78 "written with t=0, the eigenvectors",
79 "are written as frames with the eigenvector number as timestamp.",
81 "The eigenvectors can be analyzed with [gmx-anaeig].",
83 "Option [TT]-ascii[tt] writes the whole covariance matrix to",
84 "an ASCII file. The order of the elements is: x1x1, x1y1, x1z1, x1x2, ...",
86 "Option [TT]-xpm[tt] writes the whole covariance matrix to an [REF].xpm[ref] file.",
88 "Option [TT]-xpma[tt] writes the atomic covariance matrix to an [REF].xpm[ref] file,",
89 "i.e. for each atom pair the sum of the xx, yy and zz covariances is",
92 "Note that the diagonalization of a matrix requires memory and time",
93 "that will increase at least as fast as than the square of the number",
94 "of atoms involved. It is easy to run out of memory, in which",
95 "case this tool will probably exit with a 'Segmentation fault'. You",
96 "should consider carefully whether a reduced set of atoms will meet",
97 "your needs for lower costs."
99 static gmx_bool bFit = TRUE, bRef = FALSE, bM = FALSE, bPBC = TRUE;
102 { "-fit", FALSE, etBOOL, {&bFit},
103 "Fit to a reference structure"},
104 { "-ref", FALSE, etBOOL, {&bRef},
105 "Use the deviation from the conformation in the structure file instead of from the average" },
106 { "-mwa", FALSE, etBOOL, {&bM},
107 "Mass-weighted covariance analysis"},
108 { "-last", FALSE, etINT, {&end},
109 "Last eigenvector to write away (-1 is till the last)" },
110 { "-pbc", FALSE, etBOOL, {&bPBC},
111 "Apply corrections for periodic boundary conditions" }
113 FILE *out = NULL; /* initialization makes all compilers happy */
119 rvec *x, *xread, *xref, *xav, *xproj;
121 real *sqrtm, *mat, *eigenvalues, sum, trace, inv_nframes;
122 real t, tstart, tend, **mat2;
123 real xj, *w_rls = NULL;
124 real min, max, *axis;
126 int natoms, nat, count, nframes0, nframes, nlevels;
127 gmx_int64_t ndim, i, j, k, l;
129 const char *fitfile, *trxfile, *ndxfile;
130 const char *eigvalfile, *eigvecfile, *averfile, *logfile;
131 const char *asciifile, *xpmfile, *xpmafile;
132 char str[STRLEN], *fitname, *ananame, *pcwd;
134 atom_id *index, *ifit;
135 gmx_bool bDiffMass1, bDiffMass2;
136 char timebuf[STRLEN];
140 gmx_rmpbc_t gpbc = NULL;
143 { efTRX, "-f", NULL, ffREAD },
144 { efTPS, NULL, NULL, ffREAD },
145 { efNDX, NULL, NULL, ffOPTRD },
146 { efXVG, NULL, "eigenval", ffWRITE },
147 { efTRN, "-v", "eigenvec", ffWRITE },
148 { efSTO, "-av", "average.pdb", ffWRITE },
149 { efLOG, NULL, "covar", ffWRITE },
150 { efDAT, "-ascii", "covar", ffOPTWR },
151 { efXPM, "-xpm", "covar", ffOPTWR },
152 { efXPM, "-xpma", "covara", ffOPTWR }
154 #define NFILE asize(fnm)
156 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_TIME_UNIT,
157 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
164 fitfile = ftp2fn(efTPS, NFILE, fnm);
165 trxfile = ftp2fn(efTRX, NFILE, fnm);
166 ndxfile = ftp2fn_null(efNDX, NFILE, fnm);
167 eigvalfile = ftp2fn(efXVG, NFILE, fnm);
168 eigvecfile = ftp2fn(efTRN, NFILE, fnm);
169 averfile = ftp2fn(efSTO, NFILE, fnm);
170 logfile = ftp2fn(efLOG, NFILE, fnm);
171 asciifile = opt2fn_null("-ascii", NFILE, fnm);
172 xpmfile = opt2fn_null("-xpm", NFILE, fnm);
173 xpmafile = opt2fn_null("-xpma", NFILE, fnm);
175 read_tps_conf(fitfile, str, &top, &ePBC, &xref, NULL, box, TRUE);
180 printf("\nChoose a group for the least squares fit\n");
181 get_index(atoms, ndxfile, 1, &nfit, &ifit, &fitname);
184 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
191 printf("\nChoose a group for the covariance analysis\n");
192 get_index(atoms, ndxfile, 1, &natoms, &index, &ananame);
197 snew(w_rls, atoms->nr);
198 for (i = 0; (i < nfit); i++)
200 w_rls[ifit[i]] = atoms->atom[ifit[i]].m;
203 bDiffMass1 = bDiffMass1 || (w_rls[ifit[i]] != w_rls[ifit[i-1]]);
209 for (i = 0; (i < natoms); i++)
213 sqrtm[i] = sqrt(atoms->atom[index[i]].m);
216 bDiffMass2 = bDiffMass2 || (sqrtm[i] != sqrtm[i-1]);
225 if (bFit && bDiffMass1 && !bDiffMass2)
227 bDiffMass1 = natoms != nfit;
229 for (i = 0; (i < natoms) && !bDiffMass1; i++)
231 bDiffMass1 = index[i] != ifit[i];
236 "Note: the fit and analysis group are identical,\n"
237 " while the fit is mass weighted and the analysis is not.\n"
238 " Making the fit non mass weighted.\n\n");
239 for (i = 0; (i < nfit); i++)
241 w_rls[ifit[i]] = 1.0;
246 /* Prepare reference frame */
249 gpbc = gmx_rmpbc_init(&top.idef, ePBC, atoms->nr);
250 gmx_rmpbc(gpbc, atoms->nr, box, xref);
254 reset_x(nfit, ifit, atoms->nr, NULL, xref, w_rls);
260 if (sqrt(GMX_INT64_MAX) < ndim)
262 gmx_fatal(FARGS, "Number of degrees of freedoms to large for matrix.\n");
264 snew(mat, ndim*ndim);
266 fprintf(stderr, "Calculating the average structure ...\n");
268 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
269 if (nat != atoms->nr)
271 fprintf(stderr, "\nWARNING: number of atoms in tpx (%d) and trajectory (%d) do not match\n", natoms, nat);
276 /* calculate x: a fitted struture of the selected atoms */
279 gmx_rmpbc(gpbc, nat, box, xread);
283 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
284 do_fit(nat, w_rls, xref, xread);
286 for (i = 0; i < natoms; i++)
288 rvec_inc(xav[i], xread[index[i]]);
291 while (read_next_x(oenv, status, &t, xread, box));
294 inv_nframes = 1.0/nframes0;
295 for (i = 0; i < natoms; i++)
297 for (d = 0; d < DIM; d++)
299 xav[i][d] *= inv_nframes;
300 xread[index[i]][d] = xav[i][d];
303 write_sto_conf_indexed(opt2fn("-av", NFILE, fnm), "Average structure",
304 atoms, xread, NULL, epbcNONE, zerobox, natoms, index);
307 fprintf(stderr, "Constructing covariance matrix (%dx%d) ...\n", (int)ndim, (int)ndim);
309 nat = read_first_x(oenv, &status, trxfile, &t, &xread, box);
315 /* calculate x: a (fitted) structure of the selected atoms */
318 gmx_rmpbc(gpbc, nat, box, xread);
322 reset_x(nfit, ifit, nat, NULL, xread, w_rls);
323 do_fit(nat, w_rls, xref, xread);
327 for (i = 0; i < natoms; i++)
329 rvec_sub(xread[index[i]], xref[index[i]], x[i]);
334 for (i = 0; i < natoms; i++)
336 rvec_sub(xread[index[i]], xav[i], x[i]);
340 for (j = 0; j < natoms; j++)
342 for (dj = 0; dj < DIM; dj++)
346 for (i = j; i < natoms; i++)
349 for (d = 0; d < DIM; d++)
351 mat[l+d] += x[i][d]*xj;
357 while (read_next_x(oenv, status, &t, xread, box) &&
358 (bRef || nframes < nframes0));
360 gmx_rmpbc_done(gpbc);
362 fprintf(stderr, "Read %d frames\n", nframes);
366 /* copy the reference structure to the ouput array x */
368 for (i = 0; i < natoms; i++)
370 copy_rvec(xref[index[i]], xproj[i]);
378 /* correct the covariance matrix for the mass */
379 inv_nframes = 1.0/nframes;
380 for (j = 0; j < natoms; j++)
382 for (dj = 0; dj < DIM; dj++)
384 for (i = j; i < natoms; i++)
386 k = ndim*(DIM*j+dj)+DIM*i;
387 for (d = 0; d < DIM; d++)
389 mat[k+d] = mat[k+d]*inv_nframes*sqrtm[i]*sqrtm[j];
395 /* symmetrize the matrix */
396 for (j = 0; j < ndim; j++)
398 for (i = j; i < ndim; i++)
400 mat[ndim*i+j] = mat[ndim*j+i];
405 for (i = 0; i < ndim; i++)
407 trace += mat[i*ndim+i];
409 fprintf(stderr, "\nTrace of the covariance matrix: %g (%snm^2)\n",
410 trace, bM ? "u " : "");
414 out = gmx_ffopen(asciifile, "w");
415 for (j = 0; j < ndim; j++)
417 for (i = 0; i < ndim; i += 3)
419 fprintf(out, "%g %g %g\n",
420 mat[ndim*j+i], mat[ndim*j+i+1], mat[ndim*j+i+2]);
431 for (j = 0; j < ndim; j++)
433 mat2[j] = &(mat[ndim*j]);
434 for (i = 0; i <= j; i++)
436 if (mat2[j][i] < min)
440 if (mat2[j][j] > max)
447 for (i = 0; i < ndim; i++)
451 rlo.r = 0; rlo.g = 0; rlo.b = 1;
452 rmi.r = 1; rmi.g = 1; rmi.b = 1;
453 rhi.r = 1; rhi.g = 0; rhi.b = 0;
454 out = gmx_ffopen(xpmfile, "w");
456 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
457 "dim", "dim", ndim, ndim, axis, axis,
458 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
468 snew(mat2, ndim/DIM);
469 for (i = 0; i < ndim/DIM; i++)
471 snew(mat2[i], ndim/DIM);
473 for (j = 0; j < ndim/DIM; j++)
475 for (i = 0; i <= j; i++)
478 for (d = 0; d < DIM; d++)
480 mat2[j][i] += mat[ndim*(DIM*j+d)+DIM*i+d];
482 if (mat2[j][i] < min)
486 if (mat2[j][j] > max)
490 mat2[i][j] = mat2[j][i];
493 snew(axis, ndim/DIM);
494 for (i = 0; i < ndim/DIM; i++)
498 rlo.r = 0; rlo.g = 0; rlo.b = 1;
499 rmi.r = 1; rmi.g = 1; rmi.b = 1;
500 rhi.r = 1; rhi.g = 0; rhi.b = 0;
501 out = gmx_ffopen(xpmafile, "w");
503 write_xpm3(out, 0, "Covariance", bM ? "u nm^2" : "nm^2",
504 "atom", "atom", ndim/DIM, ndim/DIM, axis, axis,
505 mat2, min, 0.0, max, rlo, rmi, rhi, &nlevels);
508 for (i = 0; i < ndim/DIM; i++)
516 /* call diagonalization routine */
518 snew(eigenvalues, ndim);
519 snew(eigenvectors, ndim*ndim);
521 memcpy(eigenvectors, mat, ndim*ndim*sizeof(real));
522 fprintf(stderr, "\nDiagonalizing ...\n");
524 eigensolver(eigenvectors, ndim, 0, ndim, eigenvalues, mat);
527 /* now write the output */
530 for (i = 0; i < ndim; i++)
532 sum += eigenvalues[i];
534 fprintf(stderr, "\nSum of the eigenvalues: %g (%snm^2)\n",
535 sum, bM ? "u " : "");
536 if (fabs(trace-sum) > 0.01*trace)
538 fprintf(stderr, "\nWARNING: eigenvalue sum deviates from the trace of the covariance matrix\n");
541 /* Set 'end', the maximum eigenvector and -value index used for output */
544 if (nframes-1 < ndim)
547 fprintf(stderr, "\nWARNING: there are fewer frames in your trajectory than there are\n");
548 fprintf(stderr, "degrees of freedom in your system. Only generating the first\n");
549 fprintf(stderr, "%d out of %d eigenvectors and eigenvalues.\n", end, (int)ndim);
557 fprintf(stderr, "\nWriting eigenvalues to %s\n", eigvalfile);
559 sprintf(str, "(%snm\\S2\\N)", bM ? "u " : "");
560 out = xvgropen(eigvalfile,
561 "Eigenvalues of the covariance matrix",
562 "Eigenvector index", str, oenv);
563 for (i = 0; (i < end); i++)
565 fprintf (out, "%10d %g\n", (int)i+1, eigenvalues[ndim-1-i]);
571 /* misuse lambda: 0/1 mass weighted analysis no/yes */
574 WriteXref = eWXR_YES;
575 for (i = 0; i < nfit; i++)
577 copy_rvec(xref[ifit[i]], x[i]);
587 /* misuse lambda: -1 for no fit */
588 WriteXref = eWXR_NOFIT;
591 write_eigenvectors(eigvecfile, natoms, mat, TRUE, 1, end,
592 WriteXref, x, bDiffMass1, xproj, bM, eigenvalues);
594 out = gmx_ffopen(logfile, "w");
596 gmx_format_current_time(timebuf, STRLEN);
597 fprintf(out, "Covariance analysis log, written %s\n", timebuf);
599 fprintf(out, "Program: %s\n", argv[0]);
600 gmx_getcwd(str, STRLEN);
602 fprintf(out, "Working directory: %s\n\n", str);
604 fprintf(out, "Read %d frames from %s (time %g to %g %s)\n", nframes, trxfile,
605 output_env_conv_time(oenv, tstart), output_env_conv_time(oenv, tend), output_env_get_time_unit(oenv));
608 fprintf(out, "Read reference structure for fit from %s\n", fitfile);
612 fprintf(out, "Read index groups from %s\n", ndxfile);
616 fprintf(out, "Analysis group is '%s' (%d atoms)\n", ananame, natoms);
619 fprintf(out, "Fit group is '%s' (%d atoms)\n", fitname, nfit);
623 fprintf(out, "No fit was used\n");
625 fprintf(out, "Analysis is %smass weighted\n", bDiffMass2 ? "" : "non-");
628 fprintf(out, "Fit is %smass weighted\n", bDiffMass1 ? "" : "non-");
630 fprintf(out, "Diagonalized the %dx%d covariance matrix\n", (int)ndim, (int)ndim);
631 fprintf(out, "Trace of the covariance matrix before diagonalizing: %g\n",
633 fprintf(out, "Trace of the covariance matrix after diagonalizing: %g\n\n",
636 fprintf(out, "Wrote %d eigenvalues to %s\n", (int)end, eigvalfile);
637 if (WriteXref == eWXR_YES)
639 fprintf(out, "Wrote reference structure to %s\n", eigvecfile);
641 fprintf(out, "Wrote average structure to %s and %s\n", averfile, eigvecfile);
642 fprintf(out, "Wrote eigenvectors %d to %d to %s\n", 1, end, eigvecfile);
646 fprintf(stderr, "Wrote the log to %s\n", logfile);