2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
42 #include "gromacs/commandline/pargs.h"
43 #include "gromacs/fileio/confio.h"
44 #include "gromacs/fileio/filenm.h"
45 #include "gromacs/fileio/pdbio.h"
46 #include "gromacs/fileio/tpxio.h"
47 #include "gromacs/gmxana/gmx_ana.h"
48 #include "gromacs/legacyheaders/macros.h"
49 #include "gromacs/legacyheaders/txtdump.h"
50 #include "gromacs/legacyheaders/typedefs.h"
51 #include "gromacs/legacyheaders/viewit.h"
52 #include "gromacs/math/do_fit.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/pbcutil/rmpbc.h"
55 #include "gromacs/topology/index.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/utility/smalloc.h"
60 void calc_rm_cm(int isize, atom_id index[], t_atoms *atoms, rvec x[], rvec xcm)
65 /* calculate and remove center of mass of reference structure */
68 for (i = 0; i < isize; i++)
70 m = atoms->atom[index[i]].m;
71 for (d = 0; d < DIM; d++)
73 xcm[d] += m*x[index[i]][d];
77 svmul(1/tm, xcm, xcm);
78 for (i = 0; i < atoms->nr; i++)
84 int build_res_index(int isize, atom_id index[], t_atom atom[], int rindex[])
89 rindex[r] = atom[index[0]].resind;
91 for (i = 1; i < isize; i++)
93 if (atom[index[i]].resind != rindex[r-1])
95 rindex[r] = atom[index[i]].resind;
103 int find_res_end(int i, int isize, atom_id index[], t_atoms *atoms)
107 rnr = atoms->atom[index[i]].resind;
108 while (i < isize && atoms->atom[index[i]].resind == rnr)
115 int debug_strcmp(char s1[], char s2[])
119 fprintf(debug, " %s-%s", s1, s2);
121 return strcmp(s1, s2);
124 int find_next_match_atoms_in_res(int *i1, atom_id index1[],
125 int m1, char **atnms1[],
126 int *i2, atom_id index2[],
127 int m2, char **atnms2[])
129 int dx, dy, dmax, cmp;
130 gmx_bool bFW = FALSE;
134 dmax = max(m1-*i1, m2-*i2);
135 for (dx = 0; dx < dmax && cmp != 0; dx++)
137 for (dy = dx; dy < dmax && cmp != 0; dy++)
146 if (*i1+dx < m1 && *i2+dy < m2)
149 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
152 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
155 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
158 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
161 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
167 /* apparently, dx and dy are incremented one more time
168 as the loops terminate: we correct this here */
175 fprintf(debug, "{%d %d}", *i1 + (bFW ? dx : dy), *i2 + (bFW ? dy : dx) );
192 static int find_next_match_res(int *rnr1, int isize1,
193 int index1[], t_resinfo *resinfo1,
194 int *rnr2, int isize2,
195 int index2[], t_resinfo *resinfo2)
197 int dx, dy, dmax, cmp, rr1, rr2;
198 gmx_bool bFW = FALSE, bFF = FALSE;
202 while (rr1 < isize1 && *rnr1 != index1[rr1])
207 while (rr2 < isize2 && *rnr2 != index2[rr2])
213 dmax = max(isize1-rr1, isize2-rr2);
216 fprintf(debug, " R:%d-%d:%d-%d:%d ",
217 rr1, isize1, rr2, isize2, dmax);
219 for (dx = 0; dx < dmax && cmp != 0; dx++)
221 for (dy = 0; dy <= dx && cmp != 0; dy++)
226 if (rr1+dx < isize1 && rr2+dy < isize2)
229 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
230 *resinfo2[index2[rr2+dy]].name);
233 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
236 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
239 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
240 *resinfo2[index2[rr2+dx]].name);
243 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
246 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
249 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
250 *resinfo2[index2[rr2+dx]].name);
253 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
263 /* apparently, dx and dy are incremented one more time
264 as the loops terminate: we correct this here */
267 /* if we skipped equal on both sides, only skip one residue
268 to allow for single mutations of residues... */
273 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
274 *resinfo1[index1[rr1+1]].name,
275 *resinfo2[index2[rr2+1]].name);
308 int find_first_atom_in_res(int rnr, int isize, atom_id index[], t_atom atom[])
313 while (i < isize && atom[index[i]].resind != rnr)
318 if (atom[index[i]].resind == rnr)
328 void find_matching_names(int *isize1, atom_id index1[], t_atoms *atoms1,
329 int *isize2, atom_id index2[], t_atoms *atoms2)
331 int i, i1, i2, ii1, ii2, m1, m2;
333 int r, rnr1, rnr2, prnr1, prnr2;
335 int *rindex1, *rindex2;
336 char *resnm1, *resnm2, *atnm1, *atnm2;
337 char ***atnms1, ***atnms2;
338 t_resinfo *resinfo1, *resinfo2;
340 /* set some handy shortcuts */
341 resinfo1 = atoms1->resinfo;
342 atnms1 = atoms1->atomname;
343 resinfo2 = atoms2->resinfo;
344 atnms2 = atoms2->atomname;
346 /* build indexes of selected residues */
347 snew(rindex1, atoms1->nres);
348 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
349 snew(rindex2, atoms2->nres);
350 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
355 prnr1 = prnr2 = NOTSET;
358 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
360 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
363 rnr1 = atoms1->atom[index1[i1]].resind;
364 rnr2 = atoms2->atom[index2[i2]].resind;
365 if (rnr1 != prnr1 || rnr2 != prnr2)
369 fprintf(debug, "R: %s%d %s%d\n",
370 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
372 rescmp = strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
376 fprintf(debug, "comparing %d %d", i1, i2);
378 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
381 if (atcmp != 0) /* no match -> find match within residues */
383 m1 = find_res_end(i1, *isize1, index1, atoms1);
384 m2 = find_res_end(i2, *isize2, index2, atoms2);
387 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
389 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
390 &i2, index2, m2, atnms2);
393 fprintf(debug, " -> %d %d %s-%s", i1, i2,
394 *atnms1[index1[i1]], *atnms2[index2[i2]]);
398 if (atcmp != 0) /* still no match -> next residue(s) */
402 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
403 &rnr2, rsize2, rindex2, resinfo2);
406 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
410 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
414 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
415 *resinfo1[rnr1].name, rnr1,
416 *atnms1[index1[i1]], index1[i1],
417 *resinfo2[rnr2].name, rnr2,
418 *atnms2[index2[i2]], index2[i2]);
420 m1 = find_res_end(i1, *isize1, index1, atoms1);
421 m2 = find_res_end(i2, *isize2, index2, atoms2);
424 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
426 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
427 &i2, index2, m2, atnms2);
430 fprintf(debug, " -> %d %d %s-%s", i1, i2,
431 *atnms1[index1[i1]], *atnms2[index2[i2]]);
436 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
438 if (atcmp == 0) /* if match -> store indices */
440 index1[ii1++] = index1[i1];
441 index2[ii2++] = index2[i2];
453 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
455 if (ii1 == i1 && ii2 == i2)
457 printf("All atoms in index groups 1 and 2 match\n");
461 if (i1 == i2 && ii1 == ii2)
463 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
469 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
473 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
482 int gmx_confrms(int argc, char *argv[])
484 const char *desc[] = {
485 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
486 "structures after least-squares fitting the second structure on the first one.",
487 "The two structures do NOT need to have the same number of atoms,",
488 "only the two index groups used for the fit need to be identical.",
489 "With [TT]-name[tt] only matching atom names from the selected groups",
490 "will be used for the fit and RMSD calculation. This can be useful ",
491 "when comparing mutants of a protein.",
493 "The superimposed structures are written to file. In a [TT].pdb[tt] file",
494 "the two structures will be written as separate models",
495 "(use [TT]rasmol -nmrpdb[tt]). Also in a [TT].pdb[tt] file, B-factors",
496 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
498 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
499 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
502 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
503 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
504 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
505 { "-fit", FALSE, etBOOL, {&bFit},
506 "Do least squares superposition of the target structure to the reference" },
507 { "-name", FALSE, etBOOL, {&bName},
508 "Only compare matching atom names" },
509 { "-label", FALSE, etBOOL, {&bLabel},
510 "Added chain labels A for first and B for second structure"},
511 { "-bfac", FALSE, etBOOL, {&bBfac},
512 "Output B-factors from atomic MSD values" }
515 { efTPS, "-f1", "conf1.gro", ffREAD },
516 { efSTX, "-f2", "conf2", ffREAD },
517 { efSTO, "-o", "fit.pdb", ffWRITE },
518 { efNDX, "-n1", "fit1", ffOPTRD },
519 { efNDX, "-n2", "fit2", ffOPTRD },
520 { efNDX, "-no", "match", ffOPTWR }
522 #define NFILE asize(fnm)
524 /* the two structure files */
525 const char *conf1file, *conf2file, *matchndxfile, *outfile;
527 char title1[STRLEN], title2[STRLEN], *name1, *name2;
528 t_topology *top1, *top2;
530 t_atoms *atoms1, *atoms2;
533 real *w_rls, mass, totmass;
534 rvec *x1, *v1, *x2, *v2, *fit_x;
542 /* center of mass calculation */
546 /* variables for fit */
547 char *groupnames1, *groupnames2;
549 atom_id *index1, *index2;
550 real rms, msd, minmsd, maxmsd;
554 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW,
555 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
559 matchndxfile = opt2fn_null("-no", NFILE, fnm);
560 conf1file = ftp2fn(efTPS, NFILE, fnm);
561 conf2file = ftp2fn(efSTX, NFILE, fnm);
563 /* reading reference structure from first structure file */
564 fprintf(stderr, "\nReading first structure file\n");
566 read_tps_conf(conf1file, title1, top1, &ePBC1, &x1, &v1, box1, TRUE);
567 atoms1 = &(top1->atoms);
568 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
569 title1, atoms1->nr, atoms1->nres);
573 rm_gropbc(atoms1, x1, box1);
576 fprintf(stderr, "Select group from first structure\n");
577 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
578 1, &isize1, &index1, &groupnames1);
581 if (bFit && (isize1 < 3))
583 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
586 /* reading second structure file */
587 fprintf(stderr, "\nReading second structure file\n");
589 read_tps_conf(conf2file, title2, top2, &ePBC2, &x2, &v2, box2, TRUE);
590 atoms2 = &(top2->atoms);
591 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
592 title2, atoms2->nr, atoms2->nres);
596 rm_gropbc(atoms2, x2, box2);
599 fprintf(stderr, "Select group from second structure\n");
600 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
601 1, &isize2, &index2, &groupnames2);
605 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
608 fp = gmx_ffopen(matchndxfile, "w");
609 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
610 groupnames1, conf1file, groupnames2, conf2file);
611 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
612 for (i = 0; i < isize1; i++)
614 fprintf(fp, "%4u%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
616 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
617 for (i = 0; i < isize2; i++)
619 fprintf(fp, "%4u%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
624 /* check isizes, must be equal */
625 if (isize2 != isize1)
627 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
630 for (i = 0; i < isize1; i++)
632 name1 = *atoms1->atomname[index1[i]];
633 name2 = *atoms2->atomname[index2[i]];
634 if (strcmp(name1, name2))
639 "Warning: atomnames at index %d don't match: %u %s, %u %s\n",
640 i+1, index1[i]+1, name1, index2[i]+1, name2);
646 atoms1->atom[index1[i]].m = 1;
647 atoms2->atom[index2[i]].m = 1;
652 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
657 /* calculate and remove center of mass of structures */
658 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
659 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
661 snew(w_rls, atoms2->nr);
662 snew(fit_x, atoms2->nr);
663 for (at = 0; (at < isize1); at++)
665 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
666 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
669 /* do the least squares fit to the reference structure */
670 do_fit(atoms2->nr, w_rls, fit_x, x2);
683 /* calculate the rms deviation */
689 for (at = 0; at < isize1; at++)
691 mass = atoms1->atom[index1[at]].m;
692 for (m = 0; m < DIM; m++)
694 msd = sqr(x1[index1[at]][m] - x2[index2[at]][m]);
698 maxmsd = max(maxmsd, msds[at]);
699 minmsd = min(minmsd, msds[at]);
702 rms = sqrt(rms/totmass);
704 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
707 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
712 /* reset coordinates of reference and fitted structure */
713 for (i = 0; i < atoms1->nr; i++)
715 for (m = 0; m < DIM; m++)
720 for (i = 0; i < atoms2->nr; i++)
722 for (m = 0; m < DIM; m++)
729 outfile = ftp2fn(efSTO, NFILE, fnm);
730 switch (fn2ftp(outfile))
737 srenew(atoms1->pdbinfo, atoms1->nr);
738 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
740 /* Avoid segfaults when writing the pdb-file */
741 for (i = 0; i < atoms1->nr; i++)
743 atoms1->pdbinfo[i].type = eptAtom;
744 atoms1->pdbinfo[i].occup = 1.00;
745 atoms1->pdbinfo[i].bAnisotropic = FALSE;
748 atoms1->pdbinfo[i].bfac = 0;
752 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
756 for (i = 0; i < isize1; i++)
758 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
759 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
762 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
765 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
767 srenew(atoms2->pdbinfo, atoms2->nr);
768 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
770 for (i = 0; i < atoms2->nr; i++)
772 atoms2->pdbinfo[i].type = eptAtom;
773 atoms2->pdbinfo[i].occup = 1.00;
774 atoms2->pdbinfo[i].bAnisotropic = FALSE;
777 atoms2->pdbinfo[i].bfac = 0;
781 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
785 for (i = 0; i < isize2; i++)
787 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
788 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
791 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
794 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
797 fp = gmx_ffopen(outfile, "w");
800 write_pdbfile(fp, title1, atoms1, x1, ePBC1, box1, ' ', 1, NULL, TRUE);
802 write_pdbfile(fp, title2, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, NULL, TRUE);
808 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
810 fp = gmx_ffopen(outfile, "w");
813 write_hconf_p(fp, title1, atoms1, 3, x1, v1, box1);
815 write_hconf_p(fp, title2, atoms2, 3, x2, v2, box2);
821 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
822 ftp2ext(fn2ftp(outfile)));
827 "WARNING: cannot write the reference structure to %s file\n",
828 ftp2ext(fn2ftp(outfile)));
830 write_sto_conf(outfile, title2, atoms2, x2, v2, ePBC2, box2);
834 view_all(oenv, NFILE, fnm);