3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Green Red Orange Magenta Azure Cyan Skyblue
39 #include "gromacs/fileio/filenm.h"
46 #include "gromacs/fileio/tpxio.h"
51 #include "gmx_fatal.h"
52 #include "gromacs/fileio/futil.h"
53 #include "gromacs/fileio/confio.h"
54 #include "gromacs/fileio/pdbio.h"
62 void calc_rm_cm(int isize, atom_id index[], t_atoms *atoms, rvec x[], rvec xcm)
67 /* calculate and remove center of mass of reference structure */
70 for (i = 0; i < isize; i++)
72 m = atoms->atom[index[i]].m;
73 for (d = 0; d < DIM; d++)
75 xcm[d] += m*x[index[i]][d];
79 svmul(1/tm, xcm, xcm);
80 for (i = 0; i < atoms->nr; i++)
86 int build_res_index(int isize, atom_id index[], t_atom atom[], int rindex[])
91 rindex[r] = atom[index[0]].resind;
93 for (i = 1; i < isize; i++)
95 if (atom[index[i]].resind != rindex[r-1])
97 rindex[r] = atom[index[i]].resind;
105 int find_res_end(int i, int isize, atom_id index[], t_atoms *atoms)
109 rnr = atoms->atom[index[i]].resind;
110 while (i < isize && atoms->atom[index[i]].resind == rnr)
117 int debug_strcmp(char s1[], char s2[])
121 fprintf(debug, " %s-%s", s1, s2);
123 return strcmp(s1, s2);
126 int find_next_match_atoms_in_res(int *i1, atom_id index1[],
127 int m1, char **atnms1[],
128 int *i2, atom_id index2[],
129 int m2, char **atnms2[])
131 int dx, dy, dmax, cmp;
132 gmx_bool bFW = FALSE;
136 dmax = max(m1-*i1, m2-*i2);
137 for (dx = 0; dx < dmax && cmp != 0; dx++)
139 for (dy = dx; dy < dmax && cmp != 0; dy++)
148 if (*i1+dx < m1 && *i2+dy < m2)
151 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
154 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
157 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
160 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
163 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
169 /* apparently, dx and dy are incremented one more time
170 as the loops terminate: we correct this here */
177 fprintf(debug, "{%d %d}", *i1+bFW ? dx : dy, *i2+bFW ? dy : dx );
194 static int find_next_match_res(int *rnr1, int isize1,
195 int index1[], t_resinfo *resinfo1,
196 int *rnr2, int isize2,
197 int index2[], t_resinfo *resinfo2)
199 int dx, dy, dmax, cmp, rr1, rr2;
200 gmx_bool bFW = FALSE, bFF = FALSE;
204 while (rr1 < isize1 && *rnr1 != index1[rr1])
209 while (rr2 < isize2 && *rnr2 != index2[rr2])
215 dmax = max(isize1-rr1, isize2-rr2);
218 fprintf(debug, " R:%d-%d:%d-%d:%d ",
219 rr1, isize1, rr2, isize2, dmax);
221 for (dx = 0; dx < dmax && cmp != 0; dx++)
223 for (dy = 0; dy <= dx && cmp != 0; dy++)
228 if (rr1+dx < isize1 && rr2+dy < isize2)
231 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
232 *resinfo2[index2[rr2+dy]].name);
235 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
238 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
241 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
242 *resinfo2[index2[rr2+dx]].name);
245 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
248 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
251 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
252 *resinfo2[index2[rr2+dx]].name);
255 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
265 /* apparently, dx and dy are incremented one more time
266 as the loops terminate: we correct this here */
269 /* if we skipped equal on both sides, only skip one residue
270 to allow for single mutations of residues... */
275 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
276 *resinfo1[index1[rr1+1]].name,
277 *resinfo2[index2[rr2+1]].name);
310 int find_first_atom_in_res(int rnr, int isize, atom_id index[], t_atom atom[])
315 while (i < isize && atom[index[i]].resind != rnr)
320 if (atom[index[i]].resind == rnr)
330 void find_matching_names(int *isize1, atom_id index1[], t_atoms *atoms1,
331 int *isize2, atom_id index2[], t_atoms *atoms2)
333 int i, i1, i2, ii1, ii2, m1, m2;
335 int r, rnr1, rnr2, prnr1, prnr2;
337 int *rindex1, *rindex2;
338 char *resnm1, *resnm2, *atnm1, *atnm2;
339 char ***atnms1, ***atnms2;
340 t_resinfo *resinfo1, *resinfo2;
342 /* set some handy shortcuts */
343 resinfo1 = atoms1->resinfo;
344 atnms1 = atoms1->atomname;
345 resinfo2 = atoms2->resinfo;
346 atnms2 = atoms2->atomname;
348 /* build indexes of selected residues */
349 snew(rindex1, atoms1->nres);
350 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
351 snew(rindex2, atoms2->nres);
352 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
357 prnr1 = prnr2 = NOTSET;
360 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
362 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
365 rnr1 = atoms1->atom[index1[i1]].resind;
366 rnr2 = atoms2->atom[index2[i2]].resind;
367 if (rnr1 != prnr1 || rnr2 != prnr2)
371 fprintf(debug, "R: %s%d %s%d\n",
372 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
374 rescmp = strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
378 fprintf(debug, "comparing %d %d", i1, i2);
380 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
383 if (atcmp != 0) /* no match -> find match within residues */
385 m1 = find_res_end(i1, *isize1, index1, atoms1);
386 m2 = find_res_end(i2, *isize2, index2, atoms2);
389 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
391 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
392 &i2, index2, m2, atnms2);
395 fprintf(debug, " -> %d %d %s-%s", i1, i2,
396 *atnms1[index1[i1]], *atnms2[index2[i2]]);
400 if (atcmp != 0) /* still no match -> next residue(s) */
404 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
405 &rnr2, rsize2, rindex2, resinfo2);
408 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
412 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
416 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
417 *resinfo1[rnr1].name, rnr1,
418 *atnms1[index1[i1]], index1[i1],
419 *resinfo2[rnr2].name, rnr2,
420 *atnms2[index2[i2]], index2[i2]);
422 m1 = find_res_end(i1, *isize1, index1, atoms1);
423 m2 = find_res_end(i2, *isize2, index2, atoms2);
426 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
428 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
429 &i2, index2, m2, atnms2);
432 fprintf(debug, " -> %d %d %s-%s", i1, i2,
433 *atnms1[index1[i1]], *atnms2[index2[i2]]);
438 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
440 if (atcmp == 0) /* if match -> store indices */
442 index1[ii1++] = index1[i1];
443 index2[ii2++] = index2[i2];
455 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
457 if (ii1 == i1 && ii2 == i2)
459 printf("All atoms in index groups 1 and 2 match\n");
463 if (i1 == i2 && ii1 == ii2)
465 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
471 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
475 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
484 int gmx_confrms(int argc, char *argv[])
486 const char *desc[] = {
487 "[TT]g_confrms[tt] computes the root mean square deviation (RMSD) of two",
488 "structures after least-squares fitting the second structure on the first one.",
489 "The two structures do NOT need to have the same number of atoms,",
490 "only the two index groups used for the fit need to be identical.",
491 "With [TT]-name[tt] only matching atom names from the selected groups",
492 "will be used for the fit and RMSD calculation. This can be useful ",
493 "when comparing mutants of a protein.",
495 "The superimposed structures are written to file. In a [TT].pdb[tt] file",
496 "the two structures will be written as separate models",
497 "(use [TT]rasmol -nmrpdb[tt]). Also in a [TT].pdb[tt] file, B-factors",
498 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
500 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
501 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
504 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
505 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
506 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
507 { "-fit", FALSE, etBOOL, {&bFit},
508 "Do least squares superposition of the target structure to the reference" },
509 { "-name", FALSE, etBOOL, {&bName},
510 "Only compare matching atom names" },
511 { "-label", FALSE, etBOOL, {&bLabel},
512 "Added chain labels A for first and B for second structure"},
513 { "-bfac", FALSE, etBOOL, {&bBfac},
514 "Output B-factors from atomic MSD values" }
517 { efTPS, "-f1", "conf1.gro", ffREAD },
518 { efSTX, "-f2", "conf2", ffREAD },
519 { efSTO, "-o", "fit.pdb", ffWRITE },
520 { efNDX, "-n1", "fit1.ndx", ffOPTRD },
521 { efNDX, "-n2", "fit2.ndx", ffOPTRD },
522 { efNDX, "-no", "match.ndx", ffOPTWR }
524 #define NFILE asize(fnm)
526 /* the two structure files */
527 const char *conf1file, *conf2file, *matchndxfile, *outfile;
529 char title1[STRLEN], title2[STRLEN], *name1, *name2;
530 t_topology *top1, *top2;
532 t_atoms *atoms1, *atoms2;
535 real *w_rls, mass, totmass;
536 rvec *x1, *v1, *x2, *v2, *fit_x;
544 /* center of mass calculation */
548 /* variables for fit */
549 char *groupnames1, *groupnames2;
551 atom_id *index1, *index2;
552 real rms, msd, minmsd, maxmsd;
556 if (!parse_common_args(&argc, argv, PCA_BE_NICE | PCA_CAN_VIEW,
557 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
561 matchndxfile = opt2fn_null("-no", NFILE, fnm);
562 conf1file = ftp2fn(efTPS, NFILE, fnm);
563 conf2file = ftp2fn(efSTX, NFILE, fnm);
565 /* reading reference structure from first structure file */
566 fprintf(stderr, "\nReading first structure file\n");
568 read_tps_conf(conf1file, title1, top1, &ePBC1, &x1, &v1, box1, TRUE);
569 atoms1 = &(top1->atoms);
570 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
571 title1, atoms1->nr, atoms1->nres);
575 rm_gropbc(atoms1, x1, box1);
578 fprintf(stderr, "Select group from first structure\n");
579 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
580 1, &isize1, &index1, &groupnames1);
583 if (bFit && (isize1 < 3))
585 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
588 /* reading second structure file */
589 fprintf(stderr, "\nReading second structure file\n");
591 read_tps_conf(conf2file, title2, top2, &ePBC2, &x2, &v2, box2, TRUE);
592 atoms2 = &(top2->atoms);
593 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
594 title2, atoms2->nr, atoms2->nres);
598 rm_gropbc(atoms2, x2, box2);
601 fprintf(stderr, "Select group from second structure\n");
602 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
603 1, &isize2, &index2, &groupnames2);
607 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
610 fp = ffopen(matchndxfile, "w");
611 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
612 groupnames1, conf1file, groupnames2, conf2file);
613 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
614 for (i = 0; i < isize1; i++)
616 fprintf(fp, "%4u%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
618 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
619 for (i = 0; i < isize2; i++)
621 fprintf(fp, "%4u%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
626 /* check isizes, must be equal */
627 if (isize2 != isize1)
629 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
632 for (i = 0; i < isize1; i++)
634 name1 = *atoms1->atomname[index1[i]];
635 name2 = *atoms2->atomname[index2[i]];
636 if (strcmp(name1, name2))
641 "Warning: atomnames at index %d don't match: %u %s, %u %s\n",
642 i+1, index1[i]+1, name1, index2[i]+1, name2);
648 atoms1->atom[index1[i]].m = 1;
649 atoms2->atom[index2[i]].m = 1;
654 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
659 /* calculate and remove center of mass of structures */
660 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
661 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
663 snew(w_rls, atoms2->nr);
664 snew(fit_x, atoms2->nr);
665 for (at = 0; (at < isize1); at++)
667 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
668 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
671 /* do the least squares fit to the reference structure */
672 do_fit(atoms2->nr, w_rls, fit_x, x2);
685 /* calculate the rms deviation */
691 for (at = 0; at < isize1; at++)
693 mass = atoms1->atom[index1[at]].m;
694 for (m = 0; m < DIM; m++)
696 msd = sqr(x1[index1[at]][m] - x2[index2[at]][m]);
700 maxmsd = max(maxmsd, msds[at]);
701 minmsd = min(minmsd, msds[at]);
704 rms = sqrt(rms/totmass);
706 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
709 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
714 /* reset coordinates of reference and fitted structure */
715 for (i = 0; i < atoms1->nr; i++)
717 for (m = 0; m < DIM; m++)
722 for (i = 0; i < atoms2->nr; i++)
724 for (m = 0; m < DIM; m++)
731 outfile = ftp2fn(efSTO, NFILE, fnm);
732 switch (fn2ftp(outfile))
739 srenew(atoms1->pdbinfo, atoms1->nr);
740 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
742 /* Avoid segfaults when writing the pdb-file */
743 for (i = 0; i < atoms1->nr; i++)
745 atoms1->pdbinfo[i].type = eptAtom;
746 atoms1->pdbinfo[i].occup = 1.00;
747 atoms1->pdbinfo[i].bAnisotropic = FALSE;
750 atoms1->pdbinfo[i].bfac = 0;
754 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
758 for (i = 0; i < isize1; i++)
760 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
761 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
764 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
767 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
769 srenew(atoms2->pdbinfo, atoms2->nr);
770 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
772 for (i = 0; i < atoms2->nr; i++)
774 atoms2->pdbinfo[i].type = eptAtom;
775 atoms2->pdbinfo[i].occup = 1.00;
776 atoms2->pdbinfo[i].bAnisotropic = FALSE;
779 atoms2->pdbinfo[i].bfac = 0;
783 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
787 for (i = 0; i < isize2; i++)
789 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
790 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
793 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
796 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
799 fp = ffopen(outfile, "w");
802 write_pdbfile(fp, title1, atoms1, x1, ePBC1, box1, ' ', 1, NULL, TRUE);
804 write_pdbfile(fp, title2, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, NULL, TRUE);
810 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
812 fp = ffopen(outfile, "w");
815 write_hconf_p(fp, title1, atoms1, 3, x1, v1, box1);
817 write_hconf_p(fp, title2, atoms2, 3, x2, v2, box2);
823 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
824 ftp2ext(fn2ftp(outfile)));
829 "WARNING: cannot write the reference structure to %s file\n",
830 ftp2ext(fn2ftp(outfile)));
832 write_sto_conf(outfile, title2, atoms2, x2, v2, ePBC2, box2);
836 view_all(oenv, NFILE, fnm);