3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Green Red Orange Magenta Azure Cyan Skyblue
51 #include "gmx_fatal.h"
62 void calc_rm_cm(int isize, atom_id index[], t_atoms *atoms, rvec x[], rvec xcm)
67 /* calculate and remove center of mass of reference structure */
70 for (i = 0; i < isize; i++)
72 m = atoms->atom[index[i]].m;
73 for (d = 0; d < DIM; d++)
75 xcm[d] += m*x[index[i]][d];
79 svmul(1/tm, xcm, xcm);
80 for (i = 0; i < atoms->nr; i++)
86 int build_res_index(int isize, atom_id index[], t_atom atom[], int rindex[])
91 rindex[r] = atom[index[0]].resind;
93 for (i = 1; i < isize; i++)
95 if (atom[index[i]].resind != rindex[r-1])
97 rindex[r] = atom[index[i]].resind;
105 int find_res_end(int i, int isize, atom_id index[], t_atoms *atoms)
109 rnr = atoms->atom[index[i]].resind;
110 while (i < isize && atoms->atom[index[i]].resind == rnr)
117 int debug_strcmp(char s1[], char s2[])
121 fprintf(debug, " %s-%s", s1, s2);
123 return strcmp(s1, s2);
126 int find_next_match_atoms_in_res(int *i1, atom_id index1[],
127 int m1, char **atnms1[],
128 int *i2, atom_id index2[],
129 int m2, char **atnms2[])
131 int dx, dy, dmax, cmp;
132 gmx_bool bFW = FALSE;
136 dmax = max(m1-*i1, m2-*i2);
137 for (dx = 0; dx < dmax && cmp != 0; dx++)
139 for (dy = dx; dy < dmax && cmp != 0; dy++)
148 if (*i1+dx < m1 && *i2+dy < m2)
151 cmp = debug_strcmp(*atnms1[index1[*i1+dx]], *atnms2[index2[*i2+dy]]);
154 fprintf(debug, "(%d %d)", *i1+dx, *i2+dy);
157 if (cmp != 0 && *i1+dy < m1 && *i2+dx < m2)
160 cmp = debug_strcmp(*atnms1[index1[*i1+dy]], *atnms2[index2[*i2+dx]]);
163 fprintf(debug, "(%d %d)", *i1+dy, *i2+dx);
169 /* apparently, dx and dy are incremented one more time
170 as the loops terminate: we correct this here */
177 fprintf(debug, "{%d %d}", *i1+bFW ? dx : dy, *i2+bFW ? dy : dx );
194 static int find_next_match_res(int *rnr1, int isize1,
195 int index1[], t_resinfo *resinfo1,
196 int *rnr2, int isize2,
197 int index2[], t_resinfo *resinfo2)
199 int dx, dy, dmax, cmp, rr1, rr2;
200 gmx_bool bFW = FALSE, bFF = FALSE;
204 while (rr1 < isize1 && *rnr1 != index1[rr1])
209 while (rr2 < isize2 && *rnr2 != index2[rr2])
215 dmax = max(isize1-rr1, isize2-rr2);
218 fprintf(debug, " R:%d-%d:%d-%d:%d ",
219 rr1, isize1, rr2, isize2, dmax);
221 for (dx = 0; dx < dmax && cmp != 0; dx++)
223 for (dy = 0; dy <= dx && cmp != 0; dy++)
228 if (rr1+dx < isize1 && rr2+dy < isize2)
231 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
232 *resinfo2[index2[rr2+dy]].name);
235 fprintf(debug, "(%d %d)", rr1+dx, rr2+dy);
238 if (cmp != 0 && rr1+dy < isize1 && rr2+dx < isize2)
241 cmp = debug_strcmp(*resinfo1[index1[rr1+dy]].name,
242 *resinfo2[index2[rr2+dx]].name);
245 fprintf(debug, "(%d %d)", rr1+dy, rr2+dx);
248 if (dx != 0 && cmp != 0 && rr1+dx < isize1 && rr2+dx < isize2)
251 cmp = debug_strcmp(*resinfo1[index1[rr1+dx]].name,
252 *resinfo2[index2[rr2+dx]].name);
255 fprintf(debug, "(%d %d)", rr1+dx, rr2+dx);
265 /* apparently, dx and dy are incremented one more time
266 as the loops terminate: we correct this here */
269 /* if we skipped equal on both sides, only skip one residue
270 to allow for single mutations of residues... */
275 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2,
276 *resinfo1[index1[rr1+1]].name,
277 *resinfo2[index2[rr2+1]].name);
310 int find_first_atom_in_res(int rnr, int isize, atom_id index[], t_atom atom[])
315 while (i < isize && atom[index[i]].resind != rnr)
320 if (atom[index[i]].resind == rnr)
330 void find_matching_names(int *isize1, atom_id index1[], t_atoms *atoms1,
331 int *isize2, atom_id index2[], t_atoms *atoms2)
333 int i, i1, i2, ii1, ii2, m1, m2;
335 int r, rnr1, rnr2, prnr1, prnr2;
337 int *rindex1, *rindex2;
338 char *resnm1, *resnm2, *atnm1, *atnm2;
339 char ***atnms1, ***atnms2;
340 t_resinfo *resinfo1, *resinfo2;
342 /* set some handy shortcuts */
343 resinfo1 = atoms1->resinfo;
344 atnms1 = atoms1->atomname;
345 resinfo2 = atoms2->resinfo;
346 atnms2 = atoms2->atomname;
348 /* build indexes of selected residues */
349 snew(rindex1, atoms1->nres);
350 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
351 snew(rindex2, atoms2->nres);
352 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
357 prnr1 = prnr2 = NOTSET;
360 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
362 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
365 rnr1 = atoms1->atom[index1[i1]].resind;
366 rnr2 = atoms2->atom[index2[i2]].resind;
367 if (rnr1 != prnr1 || rnr2 != prnr2)
371 fprintf(debug, "R: %s%d %s%d\n",
372 *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
374 rescmp = strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
378 fprintf(debug, "comparing %d %d", i1, i2);
380 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
383 if (atcmp != 0) /* no match -> find match within residues */
385 m1 = find_res_end(i1, *isize1, index1, atoms1);
386 m2 = find_res_end(i2, *isize2, index2, atoms2);
389 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
391 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
392 &i2, index2, m2, atnms2);
395 fprintf(debug, " -> %d %d %s-%s", i1, i2,
396 *atnms1[index1[i1]], *atnms2[index2[i2]]);
400 if (atcmp != 0) /* still no match -> next residue(s) */
404 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1,
405 &rnr2, rsize2, rindex2, resinfo2);
408 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
412 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
416 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d",
417 *resinfo1[rnr1].name, rnr1,
418 *atnms1[index1[i1]], index1[i1],
419 *resinfo2[rnr2].name, rnr2,
420 *atnms2[index2[i2]], index2[i2]);
422 m1 = find_res_end(i1, *isize1, index1, atoms1);
423 m2 = find_res_end(i2, *isize2, index2, atoms2);
426 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
428 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1,
429 &i2, index2, m2, atnms2);
432 fprintf(debug, " -> %d %d %s-%s", i1, i2,
433 *atnms1[index1[i1]], *atnms2[index2[i2]]);
438 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
440 if (atcmp == 0) /* if match -> store indices */
442 index1[ii1++] = index1[i1];
443 index2[ii2++] = index2[i2];
455 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
457 if (ii1 == i1 && ii2 == i2)
459 printf("All atoms in index groups 1 and 2 match\n");
463 if (i1 == i2 && ii1 == ii2)
465 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
471 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
475 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
484 int gmx_confrms(int argc, char *argv[])
486 const char *desc[] = {
487 "[TT]g_confrms[tt] computes the root mean square deviation (RMSD) of two",
488 "structures after least-squares fitting the second structure on the first one.",
489 "The two structures do NOT need to have the same number of atoms,",
490 "only the two index groups used for the fit need to be identical.",
491 "With [TT]-name[tt] only matching atom names from the selected groups",
492 "will be used for the fit and RMSD calculation. This can be useful ",
493 "when comparing mutants of a protein.",
495 "The superimposed structures are written to file. In a [TT].pdb[tt] file",
496 "the two structures will be written as separate models",
497 "(use [TT]rasmol -nmrpdb[tt]). Also in a [TT].pdb[tt] file, B-factors",
498 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
500 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE,
501 bBfac = FALSE, bFit = TRUE, bLabel = FALSE;
504 { "-one", FALSE, etBOOL, {&bOne}, "Only write the fitted structure to file" },
505 { "-mw", FALSE, etBOOL, {&bMW}, "Mass-weighted fitting and RMSD" },
506 { "-pbc", FALSE, etBOOL, {&bRmpbc}, "Try to make molecules whole again" },
507 { "-fit", FALSE, etBOOL, {&bFit},
508 "Do least squares superposition of the target structure to the reference" },
509 { "-name", FALSE, etBOOL, {&bName},
510 "Only compare matching atom names" },
511 { "-label", FALSE, etBOOL, {&bLabel},
512 "Added chain labels A for first and B for second structure"},
513 { "-bfac", FALSE, etBOOL, {&bBfac},
514 "Output B-factors from atomic MSD values" }
517 { efTPS, "-f1", "conf1.gro", ffREAD },
518 { efSTX, "-f2", "conf2", ffREAD },
519 { efSTO, "-o", "fit.pdb", ffWRITE },
520 { efNDX, "-n1", "fit1.ndx", ffOPTRD },
521 { efNDX, "-n2", "fit2.ndx", ffOPTRD },
522 { efNDX, "-no", "match.ndx", ffOPTWR }
524 #define NFILE asize(fnm)
526 /* the two structure files */
527 const char *conf1file, *conf2file, *matchndxfile, *outfile;
529 char title1[STRLEN], title2[STRLEN], *name1, *name2;
530 t_topology *top1, *top2;
532 t_atoms *atoms1, *atoms2;
535 real *w_rls, mass, totmass;
536 rvec *x1, *v1, *x2, *v2, *fit_x;
544 /* center of mass calculation */
548 /* variables for fit */
549 char *groupnames1, *groupnames2;
551 atom_id *index1, *index2;
552 real rms, msd, minmsd, maxmsd;
556 parse_common_args(&argc, argv, PCA_BE_NICE | PCA_CAN_VIEW,
557 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv);
558 matchndxfile = opt2fn_null("-no", NFILE, fnm);
559 conf1file = ftp2fn(efTPS, NFILE, fnm);
560 conf2file = ftp2fn(efSTX, NFILE, fnm);
562 /* reading reference structure from first structure file */
563 fprintf(stderr, "\nReading first structure file\n");
565 read_tps_conf(conf1file, title1, top1, &ePBC1, &x1, &v1, box1, TRUE);
566 atoms1 = &(top1->atoms);
567 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
568 title1, atoms1->nr, atoms1->nres);
572 rm_gropbc(atoms1, x1, box1);
575 fprintf(stderr, "Select group from first structure\n");
576 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm),
577 1, &isize1, &index1, &groupnames1);
580 if (bFit && (isize1 < 3))
582 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
585 /* reading second structure file */
586 fprintf(stderr, "\nReading second structure file\n");
588 read_tps_conf(conf2file, title2, top2, &ePBC2, &x2, &v2, box2, TRUE);
589 atoms2 = &(top2->atoms);
590 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n",
591 title2, atoms2->nr, atoms2->nres);
595 rm_gropbc(atoms2, x2, box2);
598 fprintf(stderr, "Select group from second structure\n");
599 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm),
600 1, &isize2, &index2, &groupnames2);
604 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
607 fp = ffopen(matchndxfile, "w");
608 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n",
609 groupnames1, conf1file, groupnames2, conf2file);
610 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
611 for (i = 0; i < isize1; i++)
613 fprintf(fp, "%4u%s", index1[i]+1, (i%15 == 14 || i == isize1-1) ? "\n" : " ");
615 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
616 for (i = 0; i < isize2; i++)
618 fprintf(fp, "%4u%s", index2[i]+1, (i%15 == 14 || i == isize2-1) ? "\n" : " ");
623 /* check isizes, must be equal */
624 if (isize2 != isize1)
626 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
629 for (i = 0; i < isize1; i++)
631 name1 = *atoms1->atomname[index1[i]];
632 name2 = *atoms2->atomname[index2[i]];
633 if (strcmp(name1, name2))
638 "Warning: atomnames at index %d don't match: %u %s, %u %s\n",
639 i+1, index1[i]+1, name1, index2[i]+1, name2);
645 atoms1->atom[index1[i]].m = 1;
646 atoms2->atom[index2[i]].m = 1;
651 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
656 /* calculate and remove center of mass of structures */
657 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
658 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
660 snew(w_rls, atoms2->nr);
661 snew(fit_x, atoms2->nr);
662 for (at = 0; (at < isize1); at++)
664 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
665 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
668 /* do the least squares fit to the reference structure */
669 do_fit(atoms2->nr, w_rls, fit_x, x2);
682 /* calculate the rms deviation */
688 for (at = 0; at < isize1; at++)
690 mass = atoms1->atom[index1[at]].m;
691 for (m = 0; m < DIM; m++)
693 msd = sqr(x1[index1[at]][m] - x2[index2[at]][m]);
697 maxmsd = max(maxmsd, msds[at]);
698 minmsd = min(minmsd, msds[at]);
701 rms = sqrt(rms/totmass);
703 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
706 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
711 /* reset coordinates of reference and fitted structure */
712 for (i = 0; i < atoms1->nr; i++)
714 for (m = 0; m < DIM; m++)
719 for (i = 0; i < atoms2->nr; i++)
721 for (m = 0; m < DIM; m++)
728 outfile = ftp2fn(efSTO, NFILE, fnm);
729 switch (fn2ftp(outfile))
736 srenew(atoms1->pdbinfo, atoms1->nr);
737 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
739 /* Avoid segfaults when writing the pdb-file */
740 for (i = 0; i < atoms1->nr; i++)
742 atoms1->pdbinfo[i].type = eptAtom;
743 atoms1->pdbinfo[i].occup = 1.00;
744 atoms1->pdbinfo[i].bAnisotropic = FALSE;
747 atoms1->pdbinfo[i].bfac = 0;
751 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
755 for (i = 0; i < isize1; i++)
757 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
758 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
761 atoms1->pdbinfo[index1[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
764 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
766 srenew(atoms2->pdbinfo, atoms2->nr);
767 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
769 for (i = 0; i < atoms2->nr; i++)
771 atoms2->pdbinfo[i].type = eptAtom;
772 atoms2->pdbinfo[i].occup = 1.00;
773 atoms2->pdbinfo[i].bAnisotropic = FALSE;
776 atoms2->pdbinfo[i].bfac = 0;
780 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
784 for (i = 0; i < isize2; i++)
786 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
787 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
790 atoms2->pdbinfo[index2[i]].bfac = (800*M_PI*M_PI/3.0)*msds[i];
793 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
796 fp = ffopen(outfile, "w");
799 write_pdbfile(fp, title1, atoms1, x1, ePBC1, box1, ' ', 1, NULL, TRUE);
801 write_pdbfile(fp, title2, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, NULL, TRUE);
807 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
809 fp = ffopen(outfile, "w");
812 write_hconf_p(fp, title1, atoms1, 3, x1, v1, box1);
814 write_hconf_p(fp, title2, atoms2, 3, x2, v2, box2);
820 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
821 ftp2ext(fn2ftp(outfile)));
826 "WARNING: cannot write the reference structure to %s file\n",
827 ftp2ext(fn2ftp(outfile)));
829 write_sto_conf(outfile, title2, atoms2, x2, v2, ePBC2, box2);
833 view_all(oenv, NFILE, fnm);