2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/fileio/filenm.h"
43 #include "gromacs/utility/smalloc.h"
44 #include "gromacs/legacyheaders/macros.h"
45 #include "gromacs/legacyheaders/typedefs.h"
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/fileio/tpxio.h"
48 #include "gromacs/math/vec.h"
49 #include "gromacs/topology/index.h"
50 #include "gromacs/utility/fatalerror.h"
51 #include "gromacs/utility/futil.h"
52 #include "gromacs/fileio/confio.h"
53 #include "gromacs/fileio/pdbio.h"
54 #include "gromacs/legacyheaders/txtdump.h"
55 #include "gromacs/legacyheaders/viewit.h"
56 #include "gromacs/pbcutil/rmpbc.h"
59 #include "gromacs/math/do_fit.h"
61 void calc_rm_cm(int isize, atom_id index[], t_atoms *atoms, rvec x[], rvec xcm)
66 /* calculate and remove center of mass of reference structure */
69 for (i = 0; i < isize; i++)
71 m = atoms->atom[index[i]].m;
72 for (d = 0; d < DIM; d++)
74 xcm[d] += m*x[index[i]][d];
78 svmul(1/tm, xcm, xcm);
79 for (i = 0; i < atoms->nr; i++)
85 int build_res_index(int isize, atom_id index[], t_atom atom[], int rindex[])
90 rindex[r] = atom[index[0]].resind;
92 for (i = 1; i < isize; i++)
94 if (atom[index[i]].resind != rindex[r-1])
96 rindex[r] = atom[index[i]].resind;
104 int find_res_end(int i, int isize, atom_id index[], t_atoms *atoms)
108 rnr = atoms->atom[index[i]].resind;
109 while (i < isize && atoms->atom[index[i]].resind == rnr)
116 int debug_strcmp(char s1[], char s2[])
120 fprintf(debug, " %s-%s", s1, s2);
122 return strcmp(s1, s2);
125 int find_next_match_atoms_in_res(int *i1, atom_id index1[],
126 int m1, char **atnms1[],
127 int *i2, atom_id index2[],
128 int m2, char **atnms2[])
130 int dx, dy, dmax, cmp;
131 gmx_bool bFW = FALSE;
135 dmax = max(m1-*i1, m2-*i2);
136 for (dx = 0; dx < dmax && cmp != 0; dx++)
138 for (dy = dx; dy < dmax && cmp != 0; dy++)
147 if (*i1+dx < m1 && *i2+dy < m2)
150 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
153 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
156 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
159 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
162 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
168 /* apparently, dx and dy are incremented one more time
169 as the loops terminate: we correct this here */
176 fprintf(debug, "{%d %d}", *i1 + (bFW ? dx : dy), *i2 + (bFW ? dy : dx) );
193 static int find_next_match_res(int *rnr1, int isize1,
194 int index1[], t_resinfo *resinfo1,
195 int *rnr2, int isize2,
196 int index2[], t_resinfo *resinfo2)
198 int dx, dy, dmax, cmp, rr1, rr2;
199 gmx_bool bFW = FALSE, bFF = FALSE;
203 while (rr1 < isize1 && *rnr1 != index1[rr1])
208 while (rr2 < isize2 && *rnr2 != index2[rr2])
214 dmax = max(isize1-rr1, isize2-rr2);
217 fprintf(debug, " R:%d-%d:%d-%d:%d ",
218 rr1, isize1, rr2, isize2, dmax);
220 for (dx = 0; dx < dmax && cmp != 0; dx++)
222 for (dy = 0; dy <= dx && cmp != 0; dy++)
227 if (rr1+dx < isize1 && rr2+dy < isize2)
230 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
231 *resinfo2[index2[rr2+dy]].name);
234 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
237 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
240 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
241 *resinfo2[index2[rr2+dx]].name);
244 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
247 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
250 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
251 *resinfo2[index2[rr2+dx]].name);
254 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
264 /* apparently, dx and dy are incremented one more time
265 as the loops terminate: we correct this here */
268 /* if we skipped equal on both sides, only skip one residue
269 to allow for single mutations of residues... */
274 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
275 *resinfo1[index1[rr1+1]].name,
276 *resinfo2[index2[rr2+1]].name);
309 int find_first_atom_in_res(int rnr, int isize, atom_id index[], t_atom atom[])
314 while (i < isize && atom[index[i]].resind != rnr)
319 if (atom[index[i]].resind == rnr)
329 void find_matching_names(int *isize1, atom_id index1[], t_atoms *atoms1,
330 int *isize2, atom_id index2[], t_atoms *atoms2)
332 int i, i1, i2, ii1, ii2, m1, m2;
334 int r, rnr1, rnr2, prnr1, prnr2;
336 int *rindex1, *rindex2;
337 char *resnm1, *resnm2, *atnm1, *atnm2;
338 char ***atnms1, ***atnms2;
339 t_resinfo *resinfo1, *resinfo2;
341 /* set some handy shortcuts */
342 resinfo1 = atoms1->resinfo;
343 atnms1 = atoms1->atomname;
344 resinfo2 = atoms2->resinfo;
345 atnms2 = atoms2->atomname;
347 /* build indexes of selected residues */
348 snew(rindex1, atoms1->nres);
349 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
350 snew(rindex2, atoms2->nres);
351 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
356 prnr1 = prnr2 = NOTSET;
359 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
361 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
364 rnr1 = atoms1->atom[index1[i1]].resind;
365 rnr2 = atoms2->atom[index2[i2]].resind;
366 if (rnr1 != prnr1 || rnr2 != prnr2)
370 fprintf(debug, "R: %s%d %s%d\n",
371 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
373 rescmp = strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
377 fprintf(debug, "comparing %d %d", i1, i2);
379 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
382 if (atcmp != 0) /* no match -> find match within residues */
384 m1 = find_res_end(i1, *isize1, index1, atoms1);
385 m2 = find_res_end(i2, *isize2, index2, atoms2);
388 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
390 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
391 &i2, index2, m2, atnms2);
394 fprintf(debug, " -> %d %d %s-%s", i1, i2,
395 *atnms1[index1[i1]], *atnms2[index2[i2]]);
399 if (atcmp != 0) /* still no match -> next residue(s) */
403 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
404 &rnr2, rsize2, rindex2, resinfo2);
407 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
411 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
415 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
416 *resinfo1[rnr1].name, rnr1,
417 *atnms1[index1[i1]], index1[i1],
418 *resinfo2[rnr2].name, rnr2,
419 *atnms2[index2[i2]], index2[i2]);
421 m1 = find_res_end(i1, *isize1, index1, atoms1);
422 m2 = find_res_end(i2, *isize2, index2, atoms2);
425 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
427 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
428 &i2, index2, m2, atnms2);
431 fprintf(debug, " -> %d %d %s-%s", i1, i2,
432 *atnms1[index1[i1]], *atnms2[index2[i2]]);
437 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
439 if (atcmp == 0) /* if match -> store indices */
441 index1[ii1++] = index1[i1];
442 index2[ii2++] = index2[i2];
454 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
456 if (ii1 == i1 && ii2 == i2)
458 printf("All atoms in index groups 1 and 2 match\n");
462 if (i1 == i2 && ii1 == ii2)
464 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
470 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
474 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
483 int gmx_confrms(int argc, char *argv[])
485 const char *desc[] = {
486 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
487 "structures after least-squares fitting the second structure on the first one.",
488 "The two structures do NOT need to have the same number of atoms,",
489 "only the two index groups used for the fit need to be identical.",
490 "With [TT]-name[tt] only matching atom names from the selected groups",
491 "will be used for the fit and RMSD calculation. This can be useful ",
492 "when comparing mutants of a protein.",
494 "The superimposed structures are written to file. In a [TT].pdb[tt] file",
495 "the two structures will be written as separate models",
496 "(use [TT]rasmol -nmrpdb[tt]). Also in a [TT].pdb[tt] file, B-factors",
497 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
499 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
500 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
503 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
504 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
505 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
506 { "-fit", FALSE, etBOOL, {&bFit},
507 "Do least squares superposition of the target structure to the reference" },
508 { "-name", FALSE, etBOOL, {&bName},
509 "Only compare matching atom names" },
510 { "-label", FALSE, etBOOL, {&bLabel},
511 "Added chain labels A for first and B for second structure"},
512 { "-bfac", FALSE, etBOOL, {&bBfac},
513 "Output B-factors from atomic MSD values" }
516 { efTPS, "-f1", "conf1.gro", ffREAD },
517 { efSTX, "-f2", "conf2", ffREAD },
518 { efSTO, "-o", "fit.pdb", ffWRITE },
519 { efNDX, "-n1", "fit1", ffOPTRD },
520 { efNDX, "-n2", "fit2", ffOPTRD },
521 { efNDX, "-no", "match", ffOPTWR }
523 #define NFILE asize(fnm)
525 /* the two structure files */
526 const char *conf1file, *conf2file, *matchndxfile, *outfile;
528 char title1[STRLEN], title2[STRLEN], *name1, *name2;
529 t_topology *top1, *top2;
531 t_atoms *atoms1, *atoms2;
534 real *w_rls, mass, totmass;
535 rvec *x1, *v1, *x2, *v2, *fit_x;
543 /* center of mass calculation */
547 /* variables for fit */
548 char *groupnames1, *groupnames2;
550 atom_id *index1, *index2;
551 real rms, msd, minmsd, maxmsd;
555 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
556 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
560 matchndxfile = opt2fn_null("-no", NFILE, fnm);
561 conf1file = ftp2fn(efTPS, NFILE, fnm);
562 conf2file = ftp2fn(efSTX, NFILE, fnm);
564 /* reading reference structure from first structure file */
565 fprintf(stderr, "\nReading first structure file\n");
567 read_tps_conf(conf1file, title1, top1, &ePBC1, &x1, &v1, box1, TRUE);
568 atoms1 = &(top1->atoms);
569 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
570 title1, atoms1->nr, atoms1->nres);
574 rm_gropbc(atoms1, x1, box1);
577 fprintf(stderr, "Select group from first structure\n");
578 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
579 1, &isize1, &index1, &groupnames1);
582 if (bFit && (isize1 < 3))
584 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
587 /* reading second structure file */
588 fprintf(stderr, "\nReading second structure file\n");
590 read_tps_conf(conf2file, title2, top2, &ePBC2, &x2, &v2, box2, TRUE);
591 atoms2 = &(top2->atoms);
592 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
593 title2, atoms2->nr, atoms2->nres);
597 rm_gropbc(atoms2, x2, box2);
600 fprintf(stderr, "Select group from second structure\n");
601 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
602 1, &isize2, &index2, &groupnames2);
606 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
609 fp = gmx_ffopen(matchndxfile, "w");
610 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
611 groupnames1, conf1file, groupnames2, conf2file);
612 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
613 for (i = 0; i < isize1; i++)
615 fprintf(fp, "%4u%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
617 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
618 for (i = 0; i < isize2; i++)
620 fprintf(fp, "%4u%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
625 /* check isizes, must be equal */
626 if (isize2 != isize1)
628 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
631 for (i = 0; i < isize1; i++)
633 name1 = *atoms1->atomname[index1[i]];
634 name2 = *atoms2->atomname[index2[i]];
635 if (strcmp(name1, name2))
640 "Warning: atomnames at index %d don't match: %u %s, %u %s\n",
641 i+1, index1[i]+1, name1, index2[i]+1, name2);
647 atoms1->atom[index1[i]].m = 1;
648 atoms2->atom[index2[i]].m = 1;
653 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
658 /* calculate and remove center of mass of structures */
659 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
660 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
662 snew(w_rls, atoms2->nr);
663 snew(fit_x, atoms2->nr);
664 for (at = 0; (at < isize1); at++)
666 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
667 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
670 /* do the least squares fit to the reference structure */
671 do_fit(atoms2->nr, w_rls, fit_x, x2);
684 /* calculate the rms deviation */
690 for (at = 0; at < isize1; at++)
692 mass = atoms1->atom[index1[at]].m;
693 for (m = 0; m < DIM; m++)
695 msd = sqr(x1[index1[at]][m] - x2[index2[at]][m]);
699 maxmsd = max(maxmsd, msds[at]);
700 minmsd = min(minmsd, msds[at]);
703 rms = sqrt(rms/totmass);
705 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
708 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
713 /* reset coordinates of reference and fitted structure */
714 for (i = 0; i < atoms1->nr; i++)
716 for (m = 0; m < DIM; m++)
721 for (i = 0; i < atoms2->nr; i++)
723 for (m = 0; m < DIM; m++)
730 outfile = ftp2fn(efSTO, NFILE, fnm);
731 switch (fn2ftp(outfile))
738 srenew(atoms1->pdbinfo, atoms1->nr);
739 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
741 /* Avoid segfaults when writing the pdb-file */
742 for (i = 0; i < atoms1->nr; i++)
744 atoms1->pdbinfo[i].type = eptAtom;
745 atoms1->pdbinfo[i].occup = 1.00;
746 atoms1->pdbinfo[i].bAnisotropic = FALSE;
749 atoms1->pdbinfo[i].bfac = 0;
753 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
757 for (i = 0; i < isize1; i++)
759 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
760 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
763 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
766 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
768 srenew(atoms2->pdbinfo, atoms2->nr);
769 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
771 for (i = 0; i < atoms2->nr; i++)
773 atoms2->pdbinfo[i].type = eptAtom;
774 atoms2->pdbinfo[i].occup = 1.00;
775 atoms2->pdbinfo[i].bAnisotropic = FALSE;
778 atoms2->pdbinfo[i].bfac = 0;
782 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
786 for (i = 0; i < isize2; i++)
788 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
789 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
792 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
795 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
798 fp = gmx_ffopen(outfile, "w");
801 write_pdbfile(fp, title1, atoms1, x1, ePBC1, box1, ' ', 1, NULL, TRUE);
803 write_pdbfile(fp, title2, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, NULL, TRUE);
809 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
811 fp = gmx_ffopen(outfile, "w");
814 write_hconf_p(fp, title1, atoms1, 3, x1, v1, box1);
816 write_hconf_p(fp, title2, atoms2, 3, x2, v2, box2);
822 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
823 ftp2ext(fn2ftp(outfile)));
828 "WARNING: cannot write the reference structure to %s file\n",
829 ftp2ext(fn2ftp(outfile)));
831 write_sto_conf(outfile, title2, atoms2, x2, v2, ePBC2, box2);
835 view_all(oenv, NFILE, fnm);