2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/commandline/filenm.h"
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/groio.h"
49 #include "gromacs/fileio/pdbio.h"
50 #include "gromacs/gmxana/gmx_ana.h"
51 #include "gromacs/math/do_fit.h"
52 #include "gromacs/math/functions.h"
53 #include "gromacs/math/utilities.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/pbcutil/rmpbc.h"
56 #include "gromacs/topology/index.h"
57 #include "gromacs/topology/topology.h"
58 #include "gromacs/utility/arraysize.h"
59 #include "gromacs/utility/cstringutil.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/smalloc.h"
64 static const int NOTSET = -9368163;
66 static void calc_rm_cm(int isize, const int index[], const t_atoms* atoms, rvec x[], rvec xcm)
71 /* calculate and remove center of mass of reference structure */
74 for (i = 0; i < isize; i++)
76 m = atoms->atom[index[i]].m;
77 for (d = 0; d < DIM; d++)
79 xcm[d] += m * x[index[i]][d];
83 svmul(1 / tm, xcm, xcm);
84 for (i = 0; i < atoms->nr; i++)
90 static int build_res_index(int isize, const int index[], t_atom atom[], int rindex[])
95 rindex[r] = atom[index[0]].resind;
97 for (i = 1; i < isize; i++)
99 if (atom[index[i]].resind != rindex[r - 1])
101 rindex[r] = atom[index[i]].resind;
109 static int find_res_end(int i, int isize, const int index[], const t_atoms* atoms)
113 rnr = atoms->atom[index[i]].resind;
114 while (i < isize && atoms->atom[index[i]].resind == rnr)
121 static int debug_strcmp(char s1[], char s2[])
125 fprintf(debug, " %s-%s", s1, s2);
127 return std::strcmp(s1, s2);
130 static int find_next_match_atoms_in_res(int* i1,
139 int dx, dy, dmax, cmp;
140 gmx_bool bFW = FALSE;
143 dmax = std::max(m1 - *i1, m2 - *i2);
144 for (dx = 0, dy = 0; dx < dmax && cmp != 0; dx++)
146 for (dy = dx; dy < dmax && cmp != 0; dy++)
155 if (*i1 + dx < m1 && *i2 + dy < m2)
158 cmp = debug_strcmp(*atnms1[index1[*i1 + dx]], *atnms2[index2[*i2 + dy]]);
161 fprintf(debug, "(%d %d)", *i1 + dx, *i2 + dy);
164 if (cmp != 0 && *i1 + dy < m1 && *i2 + dx < m2)
167 cmp = debug_strcmp(*atnms1[index1[*i1 + dy]], *atnms2[index2[*i2 + dx]]);
170 fprintf(debug, "(%d %d)", *i1 + dy, *i2 + dx);
176 /* apparently, dx and dy are incremented one more time
177 as the loops terminate: we correct this here */
184 fprintf(debug, "{%d %d}", *i1 + (bFW ? dx : dy), *i2 + (bFW ? dy : dx));
201 static int find_next_match_res(int* rnr1,
210 int dmax, cmp, rr1, rr2;
211 gmx_bool bFW = FALSE, bFF = FALSE;
214 while (rr1 < isize1 && *rnr1 != index1[rr1])
219 while (rr2 < isize2 && *rnr2 != index2[rr2])
225 dmax = std::max(isize1 - rr1, isize2 - rr2);
228 fprintf(debug, " R:%d-%d:%d-%d:%d ", rr1, isize1, rr2, isize2, dmax);
231 for (; dx < dmax && cmp != 0; dx++)
233 for (dy = 0; dy <= dx && cmp != 0; dy++)
238 if (rr1 + dx < isize1 && rr2 + dy < isize2)
241 cmp = debug_strcmp(*resinfo1[index1[rr1 + dx]].name, *resinfo2[index2[rr2 + dy]].name);
244 fprintf(debug, "(%d %d)", rr1 + dx, rr2 + dy);
247 if (cmp != 0 && rr1 + dy < isize1 && rr2 + dx < isize2)
250 cmp = debug_strcmp(*resinfo1[index1[rr1 + dy]].name, *resinfo2[index2[rr2 + dx]].name);
253 fprintf(debug, "(%d %d)", rr1 + dy, rr2 + dx);
256 if (dx != 0 && cmp != 0 && rr1 + dx < isize1 && rr2 + dx < isize2)
259 cmp = debug_strcmp(*resinfo1[index1[rr1 + dx]].name, *resinfo2[index2[rr2 + dx]].name);
262 fprintf(debug, "(%d %d)", rr1 + dx, rr2 + dx);
271 /* apparently, dx and dy are incremented one more time
272 as the loops terminate: we correct this here */
276 /* if we skipped equal on both sides, only skip one residue
277 to allow for single mutations of residues... */
282 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2, *resinfo1[index1[rr1 + 1]].name,
283 *resinfo2[index2[rr2 + 1]].name);
315 static int find_first_atom_in_res(int rnr, int isize, const int index[], t_atom atom[])
320 while (i < isize && atom[index[i]].resind != rnr)
325 if (atom[index[i]].resind == rnr)
335 static void find_matching_names(int* isize1,
337 const t_atoms* atoms1,
340 const t_atoms* atoms2)
342 int i1, i2, ii1, ii2, m1, m2;
344 int rnr1, rnr2, prnr1, prnr2;
346 int * rindex1, *rindex2;
347 char *** atnms1, ***atnms2;
348 t_resinfo *resinfo1, *resinfo2;
350 /* set some handy shortcuts */
351 resinfo1 = atoms1->resinfo;
352 atnms1 = atoms1->atomname;
353 resinfo2 = atoms2->resinfo;
354 atnms2 = atoms2->atomname;
356 /* build indexes of selected residues */
357 snew(rindex1, atoms1->nres);
358 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
359 snew(rindex2, atoms2->nres);
360 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
365 prnr1 = prnr2 = NOTSET;
368 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
370 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
373 rnr1 = atoms1->atom[index1[i1]].resind;
374 rnr2 = atoms2->atom[index2[i2]].resind;
375 if (rnr1 != prnr1 || rnr2 != prnr2)
379 fprintf(debug, "R: %s%d %s%d\n", *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
381 rescmp = std::strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
385 fprintf(debug, "comparing %d %d", i1, i2);
387 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
390 if (atcmp != 0) /* no match -> find match within residues */
392 m1 = find_res_end(i1, *isize1, index1, atoms1);
393 m2 = find_res_end(i2, *isize2, index2, atoms2);
396 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
398 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1, &i2, index2, m2, atnms2);
401 fprintf(debug, " -> %d %d %s-%s", i1, i2, *atnms1[index1[i1]], *atnms2[index2[i2]]);
404 if (atcmp != 0) /* still no match -> next residue(s) */
408 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1, &rnr2, rsize2, rindex2, resinfo2);
411 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
415 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
419 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d", *resinfo1[rnr1].name, rnr1, *atnms1[index1[i1]],
420 index1[i1], *resinfo2[rnr2].name, rnr2, *atnms2[index2[i2]], index2[i2]);
422 m1 = find_res_end(i1, *isize1, index1, atoms1);
423 m2 = find_res_end(i2, *isize2, index2, atoms2);
426 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
428 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1, &i2, index2, m2, atnms2);
431 fprintf(debug, " -> %d %d %s-%s", i1, i2, *atnms1[index1[i1]], *atnms2[index2[i2]]);
436 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
438 if (atcmp == 0) /* if match -> store indices */
440 index1[ii1++] = index1[i1];
441 index2[ii2++] = index2[i2];
453 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
455 if (ii1 == i1 && ii2 == i2)
457 printf("All atoms in index groups 1 and 2 match\n");
461 if (i1 == i2 && ii1 == ii2)
463 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
469 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
473 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
482 int gmx_confrms(int argc, char* argv[])
484 const char* desc[] = {
485 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
486 "structures after least-squares fitting the second structure on the first one.",
487 "The two structures do NOT need to have the same number of atoms,",
488 "only the two index groups used for the fit need to be identical.",
489 "With [TT]-name[tt] only matching atom names from the selected groups",
490 "will be used for the fit and RMSD calculation. This can be useful ",
491 "when comparing mutants of a protein.",
493 "The superimposed structures are written to file. In a [REF].pdb[ref] file",
494 "the two structures will be written as separate models",
495 "(use [TT]rasmol -nmrpdb[tt]). Also in a [REF].pdb[ref] file, B-factors",
496 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
498 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE, bBfac = FALSE,
499 bFit = TRUE, bLabel = FALSE;
502 { "-one", FALSE, etBOOL, { &bOne }, "Only write the fitted structure to file" },
503 { "-mw", FALSE, etBOOL, { &bMW }, "Mass-weighted fitting and RMSD" },
504 { "-pbc", FALSE, etBOOL, { &bRmpbc }, "Try to make molecules whole again" },
509 "Do least squares superposition of the target structure to the reference" },
510 { "-name", FALSE, etBOOL, { &bName }, "Only compare matching atom names" },
515 "Added chain labels A for first and B for second structure" },
516 { "-bfac", FALSE, etBOOL, { &bBfac }, "Output B-factors from atomic MSD values" }
518 t_filenm fnm[] = { { efTPS, "-f1", "conf1.gro", ffREAD }, { efSTX, "-f2", "conf2", ffREAD },
519 { efSTO, "-o", "fit.pdb", ffWRITE }, { efNDX, "-n1", "fit1", ffOPTRD },
520 { efNDX, "-n2", "fit2", ffOPTRD }, { efNDX, "-no", "match", ffOPTWR } };
521 #define NFILE asize(fnm)
523 /* the two structure files */
524 const char *conf1file, *conf2file, *matchndxfile, *outfile;
526 char * name1, *name2;
527 t_topology *top1, *top2;
529 t_atoms * atoms1, *atoms2;
532 real * w_rls, mass, totmass;
533 rvec * x1, *v1, *x2, *v2, *fit_x;
536 gmx_output_env_t* oenv;
541 /* center of mass calculation */
544 /* variables for fit */
545 char *groupnames1, *groupnames2;
547 int * index1, *index2;
548 real rms, msd, minmsd, maxmsd;
552 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW, NFILE, fnm, asize(pa), pa, asize(desc), desc,
557 matchndxfile = opt2fn_null("-no", NFILE, fnm);
558 conf1file = ftp2fn(efTPS, NFILE, fnm);
559 conf2file = ftp2fn(efSTX, NFILE, fnm);
561 /* reading reference structure from first structure file */
562 fprintf(stderr, "\nReading first structure file\n");
564 read_tps_conf(conf1file, top1, &ePBC1, &x1, &v1, box1, TRUE);
565 atoms1 = &(top1->atoms);
566 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n", *top1->name, atoms1->nr, atoms1->nres);
570 rm_gropbc(atoms1, x1, box1);
573 fprintf(stderr, "Select group from first structure\n");
574 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm), 1, &isize1, &index1, &groupnames1);
577 if (bFit && (isize1 < 3))
579 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
582 /* reading second structure file */
583 fprintf(stderr, "\nReading second structure file\n");
585 read_tps_conf(conf2file, top2, &ePBC2, &x2, &v2, box2, TRUE);
586 atoms2 = &(top2->atoms);
587 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n", *top2->name, atoms2->nr, atoms2->nres);
591 rm_gropbc(atoms2, x2, box2);
594 fprintf(stderr, "Select group from second structure\n");
595 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm), 1, &isize2, &index2, &groupnames2);
599 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
602 fp = gmx_ffopen(matchndxfile, "w");
603 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n", groupnames1,
604 conf1file, groupnames2, conf2file);
605 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
606 for (i = 0; i < isize1; i++)
608 fprintf(fp, "%4d%s", index1[i] + 1, (i % 15 == 14 || i == isize1 - 1) ? "\n" : " ");
610 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
611 for (i = 0; i < isize2; i++)
613 fprintf(fp, "%4d%s", index2[i] + 1, (i % 15 == 14 || i == isize2 - 1) ? "\n" : " ");
618 /* check isizes, must be equal */
619 if (isize2 != isize1)
621 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
624 for (i = 0; i < isize1; i++)
626 name1 = *atoms1->atomname[index1[i]];
627 name2 = *atoms2->atomname[index2[i]];
628 if (std::strcmp(name1, name2) != 0)
632 fprintf(stderr, "Warning: atomnames at index %d don't match: %d %s, %d %s\n", i + 1,
633 index1[i] + 1, name1, index2[i] + 1, name2);
639 atoms1->atom[index1[i]].m = 1;
640 atoms2->atom[index2[i]].m = 1;
645 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
650 /* calculate and remove center of mass of structures */
651 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
652 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
654 snew(w_rls, atoms2->nr);
655 snew(fit_x, atoms2->nr);
656 for (at = 0; (at < isize1); at++)
658 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
659 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
662 /* do the least squares fit to the reference structure */
663 do_fit(atoms2->nr, w_rls, fit_x, x2);
676 /* calculate the rms deviation */
682 for (at = 0; at < isize1; at++)
684 mass = atoms1->atom[index1[at]].m;
685 for (m = 0; m < DIM; m++)
687 msd = gmx::square(x1[index1[at]][m] - x2[index2[at]][m]);
691 maxmsd = std::max(maxmsd, msds[at]);
692 minmsd = std::min(minmsd, msds[at]);
695 rms = std::sqrt(rms / totmass);
697 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
700 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
705 /* reset coordinates of reference and fitted structure */
706 for (i = 0; i < atoms1->nr; i++)
708 for (m = 0; m < DIM; m++)
713 for (i = 0; i < atoms2->nr; i++)
715 for (m = 0; m < DIM; m++)
722 outfile = ftp2fn(efSTO, NFILE, fnm);
723 switch (fn2ftp(outfile))
730 srenew(atoms1->pdbinfo, atoms1->nr);
731 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
733 atoms1->havePdbInfo = TRUE;
735 /* Avoid segfaults when writing the pdb-file */
736 for (i = 0; i < atoms1->nr; i++)
738 atoms1->pdbinfo[i].type = eptAtom;
739 atoms1->pdbinfo[i].occup = 1.00;
740 atoms1->pdbinfo[i].bAnisotropic = FALSE;
743 atoms1->pdbinfo[i].bfac = 0;
747 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
751 for (i = 0; i < isize1; i++)
753 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
754 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
757 atoms1->pdbinfo[index1[i]].bfac = (800 * M_PI * M_PI / 3.0) * msds[i];
760 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
762 srenew(atoms2->pdbinfo, atoms2->nr);
763 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
765 for (i = 0; i < atoms2->nr; i++)
767 atoms2->pdbinfo[i].type = eptAtom;
768 atoms2->pdbinfo[i].occup = 1.00;
769 atoms2->pdbinfo[i].bAnisotropic = FALSE;
772 atoms2->pdbinfo[i].bfac = 0;
776 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
780 for (i = 0; i < isize2; i++)
782 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
783 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
786 atoms2->pdbinfo[index2[i]].bfac = (800 * M_PI * M_PI / 3.0) * msds[i];
789 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
792 fp = gmx_ffopen(outfile, "w");
795 write_pdbfile(fp, *top1->name, atoms1, x1, ePBC1, box1, ' ', 1, nullptr);
797 write_pdbfile(fp, *top2->name, atoms2, x2, ePBC2, box2, ' ', bOne ? -1 : 2, nullptr);
803 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
805 fp = gmx_ffopen(outfile, "w");
808 write_hconf_p(fp, *top1->name, atoms1, x1, v1, box1);
810 write_hconf_p(fp, *top2->name, atoms2, x2, v2, box2);
816 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
817 ftp2ext(fn2ftp(outfile)));
821 fprintf(stderr, "WARNING: cannot write the reference structure to %s file\n",
822 ftp2ext(fn2ftp(outfile)));
824 write_sto_conf(outfile, *top2->name, atoms2, x2, v2, ePBC2, box2);
828 view_all(oenv, NFILE, fnm);