2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/fileio/filenm.h"
45 #include "gromacs/utility/smalloc.h"
49 #include "gromacs/commandline/pargs.h"
50 #include "gromacs/fileio/tpxio.h"
54 #include "gmx_fatal.h"
55 #include "gromacs/fileio/futil.h"
56 #include "gromacs/fileio/confio.h"
57 #include "gromacs/fileio/pdbio.h"
63 #include "gromacs/math/do_fit.h"
65 void calc_rm_cm(int isize, atom_id index[], t_atoms *atoms, rvec x[], rvec xcm)
70 /* calculate and remove center of mass of reference structure */
73 for (i = 0; i < isize; i++)
75 m = atoms->atom[index[i]].m;
76 for (d = 0; d < DIM; d++)
78 xcm[d] += m*x[index[i]][d];
82 svmul(1/tm, xcm, xcm);
83 for (i = 0; i < atoms->nr; i++)
89 int build_res_index(int isize, atom_id index[], t_atom atom[], int rindex[])
94 rindex[r] = atom[index[0]].resind;
96 for (i = 1; i < isize; i++)
98 if (atom[index[i]].resind != rindex[r-1])
100 rindex[r] = atom[index[i]].resind;
108 int find_res_end(int i, int isize, atom_id index[], t_atoms *atoms)
112 rnr = atoms->atom[index[i]].resind;
113 while (i < isize && atoms->atom[index[i]].resind == rnr)
120 int debug_strcmp(char s1[], char s2[])
124 fprintf(debug, " %s-%s", s1, s2);
126 return strcmp(s1, s2);
129 int find_next_match_atoms_in_res(int *i1, atom_id index1[],
130 int m1, char **atnms1[],
131 int *i2, atom_id index2[],
132 int m2, char **atnms2[])
134 int dx, dy, dmax, cmp;
135 gmx_bool bFW = FALSE;
139 dmax = max(m1-*i1, m2-*i2);
140 for (dx = 0; dx < dmax && cmp != 0; dx++)
142 for (dy = dx; dy < dmax && cmp != 0; dy++)
151 if (*i1+dx < m1 && *i2+dy < m2)
154 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
157 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
160 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
163 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
166 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
172 /* apparently, dx and dy are incremented one more time
173 as the loops terminate: we correct this here */
180 fprintf(debug, "{%d %d}", *i1+bFW ? dx : dy, *i2+bFW ? dy : dx );
197 static int find_next_match_res(int *rnr1, int isize1,
198 int index1[], t_resinfo *resinfo1,
199 int *rnr2, int isize2,
200 int index2[], t_resinfo *resinfo2)
202 int dx, dy, dmax, cmp, rr1, rr2;
203 gmx_bool bFW = FALSE, bFF = FALSE;
207 while (rr1 < isize1 && *rnr1 != index1[rr1])
212 while (rr2 < isize2 && *rnr2 != index2[rr2])
218 dmax = max(isize1-rr1, isize2-rr2);
221 fprintf(debug, " R:%d-%d:%d-%d:%d ",
222 rr1, isize1, rr2, isize2, dmax);
224 for (dx = 0; dx < dmax && cmp != 0; dx++)
226 for (dy = 0; dy <= dx && cmp != 0; dy++)
231 if (rr1+dx < isize1 && rr2+dy < isize2)
234 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
235 *resinfo2[index2[rr2+dy]].name);
238 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
241 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
244 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
245 *resinfo2[index2[rr2+dx]].name);
248 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
251 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
254 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
255 *resinfo2[index2[rr2+dx]].name);
258 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
268 /* apparently, dx and dy are incremented one more time
269 as the loops terminate: we correct this here */
272 /* if we skipped equal on both sides, only skip one residue
273 to allow for single mutations of residues... */
278 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
279 *resinfo1[index1[rr1+1]].name,
280 *resinfo2[index2[rr2+1]].name);
313 int find_first_atom_in_res(int rnr, int isize, atom_id index[], t_atom atom[])
318 while (i < isize && atom[index[i]].resind != rnr)
323 if (atom[index[i]].resind == rnr)
333 void find_matching_names(int *isize1, atom_id index1[], t_atoms *atoms1,
334 int *isize2, atom_id index2[], t_atoms *atoms2)
336 int i, i1, i2, ii1, ii2, m1, m2;
338 int r, rnr1, rnr2, prnr1, prnr2;
340 int *rindex1, *rindex2;
341 char *resnm1, *resnm2, *atnm1, *atnm2;
342 char ***atnms1, ***atnms2;
343 t_resinfo *resinfo1, *resinfo2;
345 /* set some handy shortcuts */
346 resinfo1 = atoms1->resinfo;
347 atnms1 = atoms1->atomname;
348 resinfo2 = atoms2->resinfo;
349 atnms2 = atoms2->atomname;
351 /* build indexes of selected residues */
352 snew(rindex1, atoms1->nres);
353 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
354 snew(rindex2, atoms2->nres);
355 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
360 prnr1 = prnr2 = NOTSET;
363 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
365 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
368 rnr1 = atoms1->atom[index1[i1]].resind;
369 rnr2 = atoms2->atom[index2[i2]].resind;
370 if (rnr1 != prnr1 || rnr2 != prnr2)
374 fprintf(debug, "R: %s%d %s%d\n",
375 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
377 rescmp = strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
381 fprintf(debug, "comparing %d %d", i1, i2);
383 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
386 if (atcmp != 0) /* no match -> find match within residues */
388 m1 = find_res_end(i1, *isize1, index1, atoms1);
389 m2 = find_res_end(i2, *isize2, index2, atoms2);
392 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
394 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
395 &i2, index2, m2, atnms2);
398 fprintf(debug, " -> %d %d %s-%s", i1, i2,
399 *atnms1[index1[i1]], *atnms2[index2[i2]]);
403 if (atcmp != 0) /* still no match -> next residue(s) */
407 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
408 &rnr2, rsize2, rindex2, resinfo2);
411 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
415 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
419 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
420 *resinfo1[rnr1].name, rnr1,
421 *atnms1[index1[i1]], index1[i1],
422 *resinfo2[rnr2].name, rnr2,
423 *atnms2[index2[i2]], index2[i2]);
425 m1 = find_res_end(i1, *isize1, index1, atoms1);
426 m2 = find_res_end(i2, *isize2, index2, atoms2);
429 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
431 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
432 &i2, index2, m2, atnms2);
435 fprintf(debug, " -> %d %d %s-%s", i1, i2,
436 *atnms1[index1[i1]], *atnms2[index2[i2]]);
441 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
443 if (atcmp == 0) /* if match -> store indices */
445 index1[ii1++] = index1[i1];
446 index2[ii2++] = index2[i2];
458 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
460 if (ii1 == i1 && ii2 == i2)
462 printf("All atoms in index groups 1 and 2 match\n");
466 if (i1 == i2 && ii1 == ii2)
468 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
474 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
478 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
487 int gmx_confrms(int argc, char *argv[])
489 const char *desc[] = {
490 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
491 "structures after least-squares fitting the second structure on the first one.",
492 "The two structures do NOT need to have the same number of atoms,",
493 "only the two index groups used for the fit need to be identical.",
494 "With [TT]-name[tt] only matching atom names from the selected groups",
495 "will be used for the fit and RMSD calculation. This can be useful ",
496 "when comparing mutants of a protein.",
498 "The superimposed structures are written to file. In a [TT].pdb[tt] file",
499 "the two structures will be written as separate models",
500 "(use [TT]rasmol -nmrpdb[tt]). Also in a [TT].pdb[tt] file, B-factors",
501 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
503 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
504 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
507 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
508 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
509 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
510 { "-fit", FALSE, etBOOL, {&bFit},
511 "Do least squares superposition of the target structure to the reference" },
512 { "-name", FALSE, etBOOL, {&bName},
513 "Only compare matching atom names" },
514 { "-label", FALSE, etBOOL, {&bLabel},
515 "Added chain labels A for first and B for second structure"},
516 { "-bfac", FALSE, etBOOL, {&bBfac},
517 "Output B-factors from atomic MSD values" }
520 { efTPS, "-f1", "conf1.gro", ffREAD },
521 { efSTX, "-f2", "conf2", ffREAD },
522 { efSTO, "-o", "fit.pdb", ffWRITE },
523 { efNDX, "-n1", "fit1.ndx", ffOPTRD },
524 { efNDX, "-n2", "fit2.ndx", ffOPTRD },
525 { efNDX, "-no", "match.ndx", ffOPTWR }
527 #define NFILE asize(fnm)
529 /* the two structure files */
530 const char *conf1file, *conf2file, *matchndxfile, *outfile;
532 char title1[STRLEN], title2[STRLEN], *name1, *name2;
533 t_topology *top1, *top2;
535 t_atoms *atoms1, *atoms2;
538 real *w_rls, mass, totmass;
539 rvec *x1, *v1, *x2, *v2, *fit_x;
547 /* center of mass calculation */
551 /* variables for fit */
552 char *groupnames1, *groupnames2;
554 atom_id *index1, *index2;
555 real rms, msd, minmsd, maxmsd;
559 if (!parse_common_args(&argc, argv, PCA_BE_NICE | PCA_CAN_VIEW,
560 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
564 matchndxfile = opt2fn_null("-no", NFILE, fnm);
565 conf1file = ftp2fn(efTPS, NFILE, fnm);
566 conf2file = ftp2fn(efSTX, NFILE, fnm);
568 /* reading reference structure from first structure file */
569 fprintf(stderr, "\nReading first structure file\n");
571 read_tps_conf(conf1file, title1, top1, &ePBC1, &x1, &v1, box1, TRUE);
572 atoms1 = &(top1->atoms);
573 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
574 title1, atoms1->nr, atoms1->nres);
578 rm_gropbc(atoms1, x1, box1);
581 fprintf(stderr, "Select group from first structure\n");
582 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
583 1, &isize1, &index1, &groupnames1);
586 if (bFit && (isize1 < 3))
588 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
591 /* reading second structure file */
592 fprintf(stderr, "\nReading second structure file\n");
594 read_tps_conf(conf2file, title2, top2, &ePBC2, &x2, &v2, box2, TRUE);
595 atoms2 = &(top2->atoms);
596 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
597 title2, atoms2->nr, atoms2->nres);
601 rm_gropbc(atoms2, x2, box2);
604 fprintf(stderr, "Select group from second structure\n");
605 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
606 1, &isize2, &index2, &groupnames2);
610 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
613 fp = gmx_ffopen(matchndxfile, "w");
614 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
615 groupnames1, conf1file, groupnames2, conf2file);
616 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
617 for (i = 0; i < isize1; i++)
619 fprintf(fp, "%4u%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
621 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
622 for (i = 0; i < isize2; i++)
624 fprintf(fp, "%4u%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
629 /* check isizes, must be equal */
630 if (isize2 != isize1)
632 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
635 for (i = 0; i < isize1; i++)
637 name1 = *atoms1->atomname[index1[i]];
638 name2 = *atoms2->atomname[index2[i]];
639 if (strcmp(name1, name2))
644 "Warning: atomnames at index %d don't match: %u %s, %u %s\n",
645 i+1, index1[i]+1, name1, index2[i]+1, name2);
651 atoms1->atom[index1[i]].m = 1;
652 atoms2->atom[index2[i]].m = 1;
657 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
662 /* calculate and remove center of mass of structures */
663 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
664 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
666 snew(w_rls, atoms2->nr);
667 snew(fit_x, atoms2->nr);
668 for (at = 0; (at < isize1); at++)
670 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
671 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
674 /* do the least squares fit to the reference structure */
675 do_fit(atoms2->nr, w_rls, fit_x, x2);
688 /* calculate the rms deviation */
694 for (at = 0; at < isize1; at++)
696 mass = atoms1->atom[index1[at]].m;
697 for (m = 0; m < DIM; m++)
699 msd = sqr(x1[index1[at]][m] - x2[index2[at]][m]);
703 maxmsd = max(maxmsd, msds[at]);
704 minmsd = min(minmsd, msds[at]);
707 rms = sqrt(rms/totmass);
709 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
712 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
717 /* reset coordinates of reference and fitted structure */
718 for (i = 0; i < atoms1->nr; i++)
720 for (m = 0; m < DIM; m++)
725 for (i = 0; i < atoms2->nr; i++)
727 for (m = 0; m < DIM; m++)
734 outfile = ftp2fn(efSTO, NFILE, fnm);
735 switch (fn2ftp(outfile))
742 srenew(atoms1->pdbinfo, atoms1->nr);
743 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
745 /* Avoid segfaults when writing the pdb-file */
746 for (i = 0; i < atoms1->nr; i++)
748 atoms1->pdbinfo[i].type = eptAtom;
749 atoms1->pdbinfo[i].occup = 1.00;
750 atoms1->pdbinfo[i].bAnisotropic = FALSE;
753 atoms1->pdbinfo[i].bfac = 0;
757 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
761 for (i = 0; i < isize1; i++)
763 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
764 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
767 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
770 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
772 srenew(atoms2->pdbinfo, atoms2->nr);
773 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
775 for (i = 0; i < atoms2->nr; i++)
777 atoms2->pdbinfo[i].type = eptAtom;
778 atoms2->pdbinfo[i].occup = 1.00;
779 atoms2->pdbinfo[i].bAnisotropic = FALSE;
782 atoms2->pdbinfo[i].bfac = 0;
786 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
790 for (i = 0; i < isize2; i++)
792 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
793 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
796 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
799 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
802 fp = gmx_ffopen(outfile, "w");
805 write_pdbfile(fp, title1, atoms1, x1, ePBC1, box1, ' ', 1, NULL, TRUE);
807 write_pdbfile(fp, title2, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, NULL, TRUE);
813 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
815 fp = gmx_ffopen(outfile, "w");
818 write_hconf_p(fp, title1, atoms1, 3, x1, v1, box1);
820 write_hconf_p(fp, title2, atoms2, 3, x2, v2, box2);
826 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
827 ftp2ext(fn2ftp(outfile)));
832 "WARNING: cannot write the reference structure to %s file\n",
833 ftp2ext(fn2ftp(outfile)));
835 write_sto_conf(outfile, title2, atoms2, x2, v2, ePBC2, box2);
839 view_all(oenv, NFILE, fnm);