2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
41 #include "gromacs/fileio/filenm.h"
47 #include "gromacs/commandline/pargs.h"
48 #include "gromacs/fileio/tpxio.h"
53 #include "gmx_fatal.h"
54 #include "gromacs/fileio/futil.h"
55 #include "gromacs/fileio/confio.h"
56 #include "gromacs/fileio/pdbio.h"
64 void calc_rm_cm(int isize, atom_id index[], t_atoms *atoms, rvec x[], rvec xcm)
69 /* calculate and remove center of mass of reference structure */
72 for (i = 0; i < isize; i++)
74 m = atoms->atom[index[i]].m;
75 for (d = 0; d < DIM; d++)
77 xcm[d] += m*x[index[i]][d];
81 svmul(1/tm, xcm, xcm);
82 for (i = 0; i < atoms->nr; i++)
88 int build_res_index(int isize, atom_id index[], t_atom atom[], int rindex[])
93 rindex[r] = atom[index[0]].resind;
95 for (i = 1; i < isize; i++)
97 if (atom[index[i]].resind != rindex[r-1])
99 rindex[r] = atom[index[i]].resind;
107 int find_res_end(int i, int isize, atom_id index[], t_atoms *atoms)
111 rnr = atoms->atom[index[i]].resind;
112 while (i < isize && atoms->atom[index[i]].resind == rnr)
119 int debug_strcmp(char s1[], char s2[])
123 fprintf(debug, " %s-%s", s1, s2);
125 return strcmp(s1, s2);
128 int find_next_match_atoms_in_res(int *i1, atom_id index1[],
129 int m1, char **atnms1[],
130 int *i2, atom_id index2[],
131 int m2, char **atnms2[])
133 int dx, dy, dmax, cmp;
134 gmx_bool bFW = FALSE;
138 dmax = max(m1-*i1, m2-*i2);
139 for (dx = 0; dx < dmax && cmp != 0; dx++)
141 for (dy = dx; dy < dmax && cmp != 0; dy++)
150 if (*i1+dx < m1 && *i2+dy < m2)
153 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
156 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
159 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
162 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
165 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
171 /* apparently, dx and dy are incremented one more time
172 as the loops terminate: we correct this here */
179 fprintf(debug, "{%d %d}", *i1+bFW ? dx : dy, *i2+bFW ? dy : dx );
196 static int find_next_match_res(int *rnr1, int isize1,
197 int index1[], t_resinfo *resinfo1,
198 int *rnr2, int isize2,
199 int index2[], t_resinfo *resinfo2)
201 int dx, dy, dmax, cmp, rr1, rr2;
202 gmx_bool bFW = FALSE, bFF = FALSE;
206 while (rr1 < isize1 && *rnr1 != index1[rr1])
211 while (rr2 < isize2 && *rnr2 != index2[rr2])
217 dmax = max(isize1-rr1, isize2-rr2);
220 fprintf(debug, " R:%d-%d:%d-%d:%d ",
221 rr1, isize1, rr2, isize2, dmax);
223 for (dx = 0; dx < dmax && cmp != 0; dx++)
225 for (dy = 0; dy <= dx && cmp != 0; dy++)
230 if (rr1+dx < isize1 && rr2+dy < isize2)
233 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
234 *resinfo2[index2[rr2+dy]].name);
237 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
240 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
243 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
244 *resinfo2[index2[rr2+dx]].name);
247 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
250 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
253 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
254 *resinfo2[index2[rr2+dx]].name);
257 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
267 /* apparently, dx and dy are incremented one more time
268 as the loops terminate: we correct this here */
271 /* if we skipped equal on both sides, only skip one residue
272 to allow for single mutations of residues... */
277 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
278 *resinfo1[index1[rr1+1]].name,
279 *resinfo2[index2[rr2+1]].name);
312 int find_first_atom_in_res(int rnr, int isize, atom_id index[], t_atom atom[])
317 while (i < isize && atom[index[i]].resind != rnr)
322 if (atom[index[i]].resind == rnr)
332 void find_matching_names(int *isize1, atom_id index1[], t_atoms *atoms1,
333 int *isize2, atom_id index2[], t_atoms *atoms2)
335 int i, i1, i2, ii1, ii2, m1, m2;
337 int r, rnr1, rnr2, prnr1, prnr2;
339 int *rindex1, *rindex2;
340 char *resnm1, *resnm2, *atnm1, *atnm2;
341 char ***atnms1, ***atnms2;
342 t_resinfo *resinfo1, *resinfo2;
344 /* set some handy shortcuts */
345 resinfo1 = atoms1->resinfo;
346 atnms1 = atoms1->atomname;
347 resinfo2 = atoms2->resinfo;
348 atnms2 = atoms2->atomname;
350 /* build indexes of selected residues */
351 snew(rindex1, atoms1->nres);
352 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
353 snew(rindex2, atoms2->nres);
354 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
359 prnr1 = prnr2 = NOTSET;
362 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
364 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
367 rnr1 = atoms1->atom[index1[i1]].resind;
368 rnr2 = atoms2->atom[index2[i2]].resind;
369 if (rnr1 != prnr1 || rnr2 != prnr2)
373 fprintf(debug, "R: %s%d %s%d\n",
374 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
376 rescmp = strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
380 fprintf(debug, "comparing %d %d", i1, i2);
382 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
385 if (atcmp != 0) /* no match -> find match within residues */
387 m1 = find_res_end(i1, *isize1, index1, atoms1);
388 m2 = find_res_end(i2, *isize2, index2, atoms2);
391 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
393 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
394 &i2, index2, m2, atnms2);
397 fprintf(debug, " -> %d %d %s-%s", i1, i2,
398 *atnms1[index1[i1]], *atnms2[index2[i2]]);
402 if (atcmp != 0) /* still no match -> next residue(s) */
406 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
407 &rnr2, rsize2, rindex2, resinfo2);
410 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
414 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
418 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
419 *resinfo1[rnr1].name, rnr1,
420 *atnms1[index1[i1]], index1[i1],
421 *resinfo2[rnr2].name, rnr2,
422 *atnms2[index2[i2]], index2[i2]);
424 m1 = find_res_end(i1, *isize1, index1, atoms1);
425 m2 = find_res_end(i2, *isize2, index2, atoms2);
428 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
430 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
431 &i2, index2, m2, atnms2);
434 fprintf(debug, " -> %d %d %s-%s", i1, i2,
435 *atnms1[index1[i1]], *atnms2[index2[i2]]);
440 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
442 if (atcmp == 0) /* if match -> store indices */
444 index1[ii1++] = index1[i1];
445 index2[ii2++] = index2[i2];
457 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
459 if (ii1 == i1 && ii2 == i2)
461 printf("All atoms in index groups 1 and 2 match\n");
465 if (i1 == i2 && ii1 == ii2)
467 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
473 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
477 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
486 int gmx_confrms(int argc, char *argv[])
488 const char *desc[] = {
489 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
490 "structures after least-squares fitting the second structure on the first one.",
491 "The two structures do NOT need to have the same number of atoms,",
492 "only the two index groups used for the fit need to be identical.",
493 "With [TT]-name[tt] only matching atom names from the selected groups",
494 "will be used for the fit and RMSD calculation. This can be useful ",
495 "when comparing mutants of a protein.",
497 "The superimposed structures are written to file. In a [TT].pdb[tt] file",
498 "the two structures will be written as separate models",
499 "(use [TT]rasmol -nmrpdb[tt]). Also in a [TT].pdb[tt] file, B-factors",
500 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
502 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
503 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
506 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
507 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
508 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
509 { "-fit", FALSE, etBOOL, {&bFit},
510 "Do least squares superposition of the target structure to the reference" },
511 { "-name", FALSE, etBOOL, {&bName},
512 "Only compare matching atom names" },
513 { "-label", FALSE, etBOOL, {&bLabel},
514 "Added chain labels A for first and B for second structure"},
515 { "-bfac", FALSE, etBOOL, {&bBfac},
516 "Output B-factors from atomic MSD values" }
519 { efTPS, "-f1", "conf1.gro", ffREAD },
520 { efSTX, "-f2", "conf2", ffREAD },
521 { efSTO, "-o", "fit.pdb", ffWRITE },
522 { efNDX, "-n1", "fit1.ndx", ffOPTRD },
523 { efNDX, "-n2", "fit2.ndx", ffOPTRD },
524 { efNDX, "-no", "match.ndx", ffOPTWR }
526 #define NFILE asize(fnm)
528 /* the two structure files */
529 const char *conf1file, *conf2file, *matchndxfile, *outfile;
531 char title1[STRLEN], title2[STRLEN], *name1, *name2;
532 t_topology *top1, *top2;
534 t_atoms *atoms1, *atoms2;
537 real *w_rls, mass, totmass;
538 rvec *x1, *v1, *x2, *v2, *fit_x;
546 /* center of mass calculation */
550 /* variables for fit */
551 char *groupnames1, *groupnames2;
553 atom_id *index1, *index2;
554 real rms, msd, minmsd, maxmsd;
558 if (!parse_common_args(&argc, argv, PCA_BE_NICE | PCA_CAN_VIEW,
559 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
563 matchndxfile = opt2fn_null("-no", NFILE, fnm);
564 conf1file = ftp2fn(efTPS, NFILE, fnm);
565 conf2file = ftp2fn(efSTX, NFILE, fnm);
567 /* reading reference structure from first structure file */
568 fprintf(stderr, "\nReading first structure file\n");
570 read_tps_conf(conf1file, title1, top1, &ePBC1, &x1, &v1, box1, TRUE);
571 atoms1 = &(top1->atoms);
572 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
573 title1, atoms1->nr, atoms1->nres);
577 rm_gropbc(atoms1, x1, box1);
580 fprintf(stderr, "Select group from first structure\n");
581 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
582 1, &isize1, &index1, &groupnames1);
585 if (bFit && (isize1 < 3))
587 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
590 /* reading second structure file */
591 fprintf(stderr, "\nReading second structure file\n");
593 read_tps_conf(conf2file, title2, top2, &ePBC2, &x2, &v2, box2, TRUE);
594 atoms2 = &(top2->atoms);
595 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
596 title2, atoms2->nr, atoms2->nres);
600 rm_gropbc(atoms2, x2, box2);
603 fprintf(stderr, "Select group from second structure\n");
604 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
605 1, &isize2, &index2, &groupnames2);
609 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
612 fp = gmx_ffopen(matchndxfile, "w");
613 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
614 groupnames1, conf1file, groupnames2, conf2file);
615 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
616 for (i = 0; i < isize1; i++)
618 fprintf(fp, "%4u%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
620 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
621 for (i = 0; i < isize2; i++)
623 fprintf(fp, "%4u%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
628 /* check isizes, must be equal */
629 if (isize2 != isize1)
631 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
634 for (i = 0; i < isize1; i++)
636 name1 = *atoms1->atomname[index1[i]];
637 name2 = *atoms2->atomname[index2[i]];
638 if (strcmp(name1, name2))
643 "Warning: atomnames at index %d don't match: %u %s, %u %s\n",
644 i+1, index1[i]+1, name1, index2[i]+1, name2);
650 atoms1->atom[index1[i]].m = 1;
651 atoms2->atom[index2[i]].m = 1;
656 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
661 /* calculate and remove center of mass of structures */
662 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
663 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
665 snew(w_rls, atoms2->nr);
666 snew(fit_x, atoms2->nr);
667 for (at = 0; (at < isize1); at++)
669 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
670 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
673 /* do the least squares fit to the reference structure */
674 do_fit(atoms2->nr, w_rls, fit_x, x2);
687 /* calculate the rms deviation */
693 for (at = 0; at < isize1; at++)
695 mass = atoms1->atom[index1[at]].m;
696 for (m = 0; m < DIM; m++)
698 msd = sqr(x1[index1[at]][m] - x2[index2[at]][m]);
702 maxmsd = max(maxmsd, msds[at]);
703 minmsd = min(minmsd, msds[at]);
706 rms = sqrt(rms/totmass);
708 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
711 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
716 /* reset coordinates of reference and fitted structure */
717 for (i = 0; i < atoms1->nr; i++)
719 for (m = 0; m < DIM; m++)
724 for (i = 0; i < atoms2->nr; i++)
726 for (m = 0; m < DIM; m++)
733 outfile = ftp2fn(efSTO, NFILE, fnm);
734 switch (fn2ftp(outfile))
741 srenew(atoms1->pdbinfo, atoms1->nr);
742 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
744 /* Avoid segfaults when writing the pdb-file */
745 for (i = 0; i < atoms1->nr; i++)
747 atoms1->pdbinfo[i].type = eptAtom;
748 atoms1->pdbinfo[i].occup = 1.00;
749 atoms1->pdbinfo[i].bAnisotropic = FALSE;
752 atoms1->pdbinfo[i].bfac = 0;
756 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
760 for (i = 0; i < isize1; i++)
762 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
763 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
766 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
769 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
771 srenew(atoms2->pdbinfo, atoms2->nr);
772 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
774 for (i = 0; i < atoms2->nr; i++)
776 atoms2->pdbinfo[i].type = eptAtom;
777 atoms2->pdbinfo[i].occup = 1.00;
778 atoms2->pdbinfo[i].bAnisotropic = FALSE;
781 atoms2->pdbinfo[i].bfac = 0;
785 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
789 for (i = 0; i < isize2; i++)
791 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
792 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
795 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
798 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
801 fp = gmx_ffopen(outfile, "w");
804 write_pdbfile(fp, title1, atoms1, x1, ePBC1, box1, ' ', 1, NULL, TRUE);
806 write_pdbfile(fp, title2, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, NULL, TRUE);
812 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
814 fp = gmx_ffopen(outfile, "w");
817 write_hconf_p(fp, title1, atoms1, 3, x1, v1, box1);
819 write_hconf_p(fp, title2, atoms2, 3, x2, v2, box2);
825 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
826 ftp2ext(fn2ftp(outfile)));
831 "WARNING: cannot write the reference structure to %s file\n",
832 ftp2ext(fn2ftp(outfile)));
834 write_sto_conf(outfile, title2, atoms2, x2, v2, ePBC2, box2);
838 view_all(oenv, NFILE, fnm);