2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/fileio/filenm.h"
45 #include "gromacs/utility/smalloc.h"
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/fileio/tpxio.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/topology/index.h"
52 #include "gromacs/utility/fatalerror.h"
53 #include "gromacs/utility/futil.h"
54 #include "gromacs/fileio/confio.h"
55 #include "gromacs/fileio/pdbio.h"
58 #include "gromacs/pbcutil/rmpbc.h"
61 #include "gromacs/math/do_fit.h"
63 void calc_rm_cm(int isize, atom_id index[], t_atoms *atoms, rvec x[], rvec xcm)
68 /* calculate and remove center of mass of reference structure */
71 for (i = 0; i < isize; i++)
73 m = atoms->atom[index[i]].m;
74 for (d = 0; d < DIM; d++)
76 xcm[d] += m*x[index[i]][d];
80 svmul(1/tm, xcm, xcm);
81 for (i = 0; i < atoms->nr; i++)
87 int build_res_index(int isize, atom_id index[], t_atom atom[], int rindex[])
92 rindex[r] = atom[index[0]].resind;
94 for (i = 1; i < isize; i++)
96 if (atom[index[i]].resind != rindex[r-1])
98 rindex[r] = atom[index[i]].resind;
106 int find_res_end(int i, int isize, atom_id index[], t_atoms *atoms)
110 rnr = atoms->atom[index[i]].resind;
111 while (i < isize && atoms->atom[index[i]].resind == rnr)
118 int debug_strcmp(char s1[], char s2[])
122 fprintf(debug, " %s-%s", s1, s2);
124 return strcmp(s1, s2);
127 int find_next_match_atoms_in_res(int *i1, atom_id index1[],
128 int m1, char **atnms1[],
129 int *i2, atom_id index2[],
130 int m2, char **atnms2[])
132 int dx, dy, dmax, cmp;
133 gmx_bool bFW = FALSE;
137 dmax = max(m1-*i1, m2-*i2);
138 for (dx = 0; dx < dmax && cmp != 0; dx++)
140 for (dy = dx; dy < dmax && cmp != 0; dy++)
149 if (*i1+dx < m1 && *i2+dy < m2)
152 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
155 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
158 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
161 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
164 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
170 /* apparently, dx and dy are incremented one more time
171 as the loops terminate: we correct this here */
178 fprintf(debug, "{%d %d}", *i1 + (bFW ? dx : dy), *i2 + (bFW ? dy : dx) );
195 static int find_next_match_res(int *rnr1, int isize1,
196 int index1[], t_resinfo *resinfo1,
197 int *rnr2, int isize2,
198 int index2[], t_resinfo *resinfo2)
200 int dx, dy, dmax, cmp, rr1, rr2;
201 gmx_bool bFW = FALSE, bFF = FALSE;
205 while (rr1 < isize1 && *rnr1 != index1[rr1])
210 while (rr2 < isize2 && *rnr2 != index2[rr2])
216 dmax = max(isize1-rr1, isize2-rr2);
219 fprintf(debug, " R:%d-%d:%d-%d:%d ",
220 rr1, isize1, rr2, isize2, dmax);
222 for (dx = 0; dx < dmax && cmp != 0; dx++)
224 for (dy = 0; dy <= dx && cmp != 0; dy++)
229 if (rr1+dx < isize1 && rr2+dy < isize2)
232 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
233 *resinfo2[index2[rr2+dy]].name);
236 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
239 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
242 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
243 *resinfo2[index2[rr2+dx]].name);
246 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
249 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
252 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
253 *resinfo2[index2[rr2+dx]].name);
256 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
266 /* apparently, dx and dy are incremented one more time
267 as the loops terminate: we correct this here */
270 /* if we skipped equal on both sides, only skip one residue
271 to allow for single mutations of residues... */
276 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
277 *resinfo1[index1[rr1+1]].name,
278 *resinfo2[index2[rr2+1]].name);
311 int find_first_atom_in_res(int rnr, int isize, atom_id index[], t_atom atom[])
316 while (i < isize && atom[index[i]].resind != rnr)
321 if (atom[index[i]].resind == rnr)
331 void find_matching_names(int *isize1, atom_id index1[], t_atoms *atoms1,
332 int *isize2, atom_id index2[], t_atoms *atoms2)
334 int i, i1, i2, ii1, ii2, m1, m2;
336 int r, rnr1, rnr2, prnr1, prnr2;
338 int *rindex1, *rindex2;
339 char *resnm1, *resnm2, *atnm1, *atnm2;
340 char ***atnms1, ***atnms2;
341 t_resinfo *resinfo1, *resinfo2;
343 /* set some handy shortcuts */
344 resinfo1 = atoms1->resinfo;
345 atnms1 = atoms1->atomname;
346 resinfo2 = atoms2->resinfo;
347 atnms2 = atoms2->atomname;
349 /* build indexes of selected residues */
350 snew(rindex1, atoms1->nres);
351 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
352 snew(rindex2, atoms2->nres);
353 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
358 prnr1 = prnr2 = NOTSET;
361 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
363 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
366 rnr1 = atoms1->atom[index1[i1]].resind;
367 rnr2 = atoms2->atom[index2[i2]].resind;
368 if (rnr1 != prnr1 || rnr2 != prnr2)
372 fprintf(debug, "R: %s%d %s%d\n",
373 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
375 rescmp = strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
379 fprintf(debug, "comparing %d %d", i1, i2);
381 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
384 if (atcmp != 0) /* no match -> find match within residues */
386 m1 = find_res_end(i1, *isize1, index1, atoms1);
387 m2 = find_res_end(i2, *isize2, index2, atoms2);
390 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
392 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
393 &i2, index2, m2, atnms2);
396 fprintf(debug, " -> %d %d %s-%s", i1, i2,
397 *atnms1[index1[i1]], *atnms2[index2[i2]]);
401 if (atcmp != 0) /* still no match -> next residue(s) */
405 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
406 &rnr2, rsize2, rindex2, resinfo2);
409 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
413 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
417 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
418 *resinfo1[rnr1].name, rnr1,
419 *atnms1[index1[i1]], index1[i1],
420 *resinfo2[rnr2].name, rnr2,
421 *atnms2[index2[i2]], index2[i2]);
423 m1 = find_res_end(i1, *isize1, index1, atoms1);
424 m2 = find_res_end(i2, *isize2, index2, atoms2);
427 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
429 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
430 &i2, index2, m2, atnms2);
433 fprintf(debug, " -> %d %d %s-%s", i1, i2,
434 *atnms1[index1[i1]], *atnms2[index2[i2]]);
439 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
441 if (atcmp == 0) /* if match -> store indices */
443 index1[ii1++] = index1[i1];
444 index2[ii2++] = index2[i2];
456 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
458 if (ii1 == i1 && ii2 == i2)
460 printf("All atoms in index groups 1 and 2 match\n");
464 if (i1 == i2 && ii1 == ii2)
466 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
472 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
476 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
485 int gmx_confrms(int argc, char *argv[])
487 const char *desc[] = {
488 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
489 "structures after least-squares fitting the second structure on the first one.",
490 "The two structures do NOT need to have the same number of atoms,",
491 "only the two index groups used for the fit need to be identical.",
492 "With [TT]-name[tt] only matching atom names from the selected groups",
493 "will be used for the fit and RMSD calculation. This can be useful ",
494 "when comparing mutants of a protein.",
496 "The superimposed structures are written to file. In a [TT].pdb[tt] file",
497 "the two structures will be written as separate models",
498 "(use [TT]rasmol -nmrpdb[tt]). Also in a [TT].pdb[tt] file, B-factors",
499 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
501 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
502 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
505 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
506 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
507 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
508 { "-fit", FALSE, etBOOL, {&bFit},
509 "Do least squares superposition of the target structure to the reference" },
510 { "-name", FALSE, etBOOL, {&bName},
511 "Only compare matching atom names" },
512 { "-label", FALSE, etBOOL, {&bLabel},
513 "Added chain labels A for first and B for second structure"},
514 { "-bfac", FALSE, etBOOL, {&bBfac},
515 "Output B-factors from atomic MSD values" }
518 { efTPS, "-f1", "conf1.gro", ffREAD },
519 { efSTX, "-f2", "conf2", ffREAD },
520 { efSTO, "-o", "fit.pdb", ffWRITE },
521 { efNDX, "-n1", "fit1.ndx", ffOPTRD },
522 { efNDX, "-n2", "fit2.ndx", ffOPTRD },
523 { efNDX, "-no", "match.ndx", ffOPTWR }
525 #define NFILE asize(fnm)
527 /* the two structure files */
528 const char *conf1file, *conf2file, *matchndxfile, *outfile;
530 char title1[STRLEN], title2[STRLEN], *name1, *name2;
531 t_topology *top1, *top2;
533 t_atoms *atoms1, *atoms2;
536 real *w_rls, mass, totmass;
537 rvec *x1, *v1, *x2, *v2, *fit_x;
545 /* center of mass calculation */
549 /* variables for fit */
550 char *groupnames1, *groupnames2;
552 atom_id *index1, *index2;
553 real rms, msd, minmsd, maxmsd;
557 if (!parse_common_args(&argc, argv, PCA_BE_NICE | PCA_CAN_VIEW,
558 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
562 matchndxfile = opt2fn_null("-no", NFILE, fnm);
563 conf1file = ftp2fn(efTPS, NFILE, fnm);
564 conf2file = ftp2fn(efSTX, NFILE, fnm);
566 /* reading reference structure from first structure file */
567 fprintf(stderr, "\nReading first structure file\n");
569 read_tps_conf(conf1file, title1, top1, &ePBC1, &x1, &v1, box1, TRUE);
570 atoms1 = &(top1->atoms);
571 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
572 title1, atoms1->nr, atoms1->nres);
576 rm_gropbc(atoms1, x1, box1);
579 fprintf(stderr, "Select group from first structure\n");
580 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
581 1, &isize1, &index1, &groupnames1);
584 if (bFit && (isize1 < 3))
586 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
589 /* reading second structure file */
590 fprintf(stderr, "\nReading second structure file\n");
592 read_tps_conf(conf2file, title2, top2, &ePBC2, &x2, &v2, box2, TRUE);
593 atoms2 = &(top2->atoms);
594 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
595 title2, atoms2->nr, atoms2->nres);
599 rm_gropbc(atoms2, x2, box2);
602 fprintf(stderr, "Select group from second structure\n");
603 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
604 1, &isize2, &index2, &groupnames2);
608 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
611 fp = gmx_ffopen(matchndxfile, "w");
612 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
613 groupnames1, conf1file, groupnames2, conf2file);
614 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
615 for (i = 0; i < isize1; i++)
617 fprintf(fp, "%4u%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
619 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
620 for (i = 0; i < isize2; i++)
622 fprintf(fp, "%4u%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
627 /* check isizes, must be equal */
628 if (isize2 != isize1)
630 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
633 for (i = 0; i < isize1; i++)
635 name1 = *atoms1->atomname[index1[i]];
636 name2 = *atoms2->atomname[index2[i]];
637 if (strcmp(name1, name2))
642 "Warning: atomnames at index %d don't match: %u %s, %u %s\n",
643 i+1, index1[i]+1, name1, index2[i]+1, name2);
649 atoms1->atom[index1[i]].m = 1;
650 atoms2->atom[index2[i]].m = 1;
655 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
660 /* calculate and remove center of mass of structures */
661 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
662 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
664 snew(w_rls, atoms2->nr);
665 snew(fit_x, atoms2->nr);
666 for (at = 0; (at < isize1); at++)
668 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
669 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
672 /* do the least squares fit to the reference structure */
673 do_fit(atoms2->nr, w_rls, fit_x, x2);
686 /* calculate the rms deviation */
692 for (at = 0; at < isize1; at++)
694 mass = atoms1->atom[index1[at]].m;
695 for (m = 0; m < DIM; m++)
697 msd = sqr(x1[index1[at]][m] - x2[index2[at]][m]);
701 maxmsd = max(maxmsd, msds[at]);
702 minmsd = min(minmsd, msds[at]);
705 rms = sqrt(rms/totmass);
707 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
710 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
715 /* reset coordinates of reference and fitted structure */
716 for (i = 0; i < atoms1->nr; i++)
718 for (m = 0; m < DIM; m++)
723 for (i = 0; i < atoms2->nr; i++)
725 for (m = 0; m < DIM; m++)
732 outfile = ftp2fn(efSTO, NFILE, fnm);
733 switch (fn2ftp(outfile))
740 srenew(atoms1->pdbinfo, atoms1->nr);
741 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
743 /* Avoid segfaults when writing the pdb-file */
744 for (i = 0; i < atoms1->nr; i++)
746 atoms1->pdbinfo[i].type = eptAtom;
747 atoms1->pdbinfo[i].occup = 1.00;
748 atoms1->pdbinfo[i].bAnisotropic = FALSE;
751 atoms1->pdbinfo[i].bfac = 0;
755 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
759 for (i = 0; i < isize1; i++)
761 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
762 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
765 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
768 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
770 srenew(atoms2->pdbinfo, atoms2->nr);
771 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
773 for (i = 0; i < atoms2->nr; i++)
775 atoms2->pdbinfo[i].type = eptAtom;
776 atoms2->pdbinfo[i].occup = 1.00;
777 atoms2->pdbinfo[i].bAnisotropic = FALSE;
780 atoms2->pdbinfo[i].bfac = 0;
784 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
788 for (i = 0; i < isize2; i++)
790 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
791 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
794 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
797 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
800 fp = gmx_ffopen(outfile, "w");
803 write_pdbfile(fp, title1, atoms1, x1, ePBC1, box1, ' ', 1, NULL, TRUE);
805 write_pdbfile(fp, title2, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, NULL, TRUE);
811 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
813 fp = gmx_ffopen(outfile, "w");
816 write_hconf_p(fp, title1, atoms1, 3, x1, v1, box1);
818 write_hconf_p(fp, title2, atoms2, 3, x2, v2, box2);
824 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
825 ftp2ext(fn2ftp(outfile)));
830 "WARNING: cannot write the reference structure to %s file\n",
831 ftp2ext(fn2ftp(outfile)));
833 write_sto_conf(outfile, title2, atoms2, x2, v2, ePBC2, box2);
837 view_all(oenv, NFILE, fnm);