2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/commandline/filenm.h"
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/commandline/viewit.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/groio.h"
50 #include "gromacs/fileio/pdbio.h"
51 #include "gromacs/gmxana/gmx_ana.h"
52 #include "gromacs/math/do_fit.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/pbcutil/rmpbc.h"
57 #include "gromacs/topology/index.h"
58 #include "gromacs/topology/topology.h"
59 #include "gromacs/utility/arraysize.h"
60 #include "gromacs/utility/cstringutil.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
65 static const int NOTSET = -9368163;
67 static void calc_rm_cm(int isize, const int index[], const t_atoms* atoms, rvec x[], rvec xcm)
72 /* calculate and remove center of mass of reference structure */
75 for (i = 0; i < isize; i++)
77 m = atoms->atom[index[i]].m;
78 for (d = 0; d < DIM; d++)
80 xcm[d] += m * x[index[i]][d];
84 svmul(1 / tm, xcm, xcm);
85 for (i = 0; i < atoms->nr; i++)
91 static int build_res_index(int isize, const int index[], t_atom atom[], int rindex[])
96 rindex[r] = atom[index[0]].resind;
98 for (i = 1; i < isize; i++)
100 if (atom[index[i]].resind != rindex[r - 1])
102 rindex[r] = atom[index[i]].resind;
110 static int find_res_end(int i, int isize, const int index[], const t_atoms* atoms)
114 rnr = atoms->atom[index[i]].resind;
115 while (i < isize && atoms->atom[index[i]].resind == rnr)
122 static int debug_strcmp(char s1[], char s2[])
126 fprintf(debug, " %s-%s", s1, s2);
128 return std::strcmp(s1, s2);
131 static int find_next_match_atoms_in_res(int* i1,
140 int dx, dy, dmax, cmp;
141 gmx_bool bFW = FALSE;
144 dmax = std::max(m1 - *i1, m2 - *i2);
145 for (dx = 0, dy = 0; dx < dmax && cmp != 0; dx++)
147 for (dy = dx; dy < dmax && cmp != 0; dy++)
156 if (*i1 + dx < m1 && *i2 + dy < m2)
159 cmp = debug_strcmp(*atnms1[index1[*i1 + dx]], *atnms2[index2[*i2 + dy]]);
162 fprintf(debug, "(%d %d)", *i1 + dx, *i2 + dy);
165 if (cmp != 0 && *i1 + dy < m1 && *i2 + dx < m2)
168 cmp = debug_strcmp(*atnms1[index1[*i1 + dy]], *atnms2[index2[*i2 + dx]]);
171 fprintf(debug, "(%d %d)", *i1 + dy, *i2 + dx);
177 /* apparently, dx and dy are incremented one more time
178 as the loops terminate: we correct this here */
185 fprintf(debug, "{%d %d}", *i1 + (bFW ? dx : dy), *i2 + (bFW ? dy : dx));
202 static int find_next_match_res(int* rnr1,
211 int dmax, cmp, rr1, rr2;
212 gmx_bool bFW = FALSE, bFF = FALSE;
215 while (rr1 < isize1 && *rnr1 != index1[rr1])
220 while (rr2 < isize2 && *rnr2 != index2[rr2])
226 dmax = std::max(isize1 - rr1, isize2 - rr2);
229 fprintf(debug, " R:%d-%d:%d-%d:%d ", rr1, isize1, rr2, isize2, dmax);
232 for (; dx < dmax && cmp != 0; dx++)
234 for (dy = 0; dy <= dx && cmp != 0; dy++)
239 if (rr1 + dx < isize1 && rr2 + dy < isize2)
242 cmp = debug_strcmp(*resinfo1[index1[rr1 + dx]].name, *resinfo2[index2[rr2 + dy]].name);
245 fprintf(debug, "(%d %d)", rr1 + dx, rr2 + dy);
248 if (cmp != 0 && rr1 + dy < isize1 && rr2 + dx < isize2)
251 cmp = debug_strcmp(*resinfo1[index1[rr1 + dy]].name, *resinfo2[index2[rr2 + dx]].name);
254 fprintf(debug, "(%d %d)", rr1 + dy, rr2 + dx);
257 if (dx != 0 && cmp != 0 && rr1 + dx < isize1 && rr2 + dx < isize2)
260 cmp = debug_strcmp(*resinfo1[index1[rr1 + dx]].name, *resinfo2[index2[rr2 + dx]].name);
263 fprintf(debug, "(%d %d)", rr1 + dx, rr2 + dx);
272 /* apparently, dx and dy are incremented one more time
273 as the loops terminate: we correct this here */
277 /* if we skipped equal on both sides, only skip one residue
278 to allow for single mutations of residues... */
288 *resinfo1[index1[rr1 + 1]].name,
289 *resinfo2[index2[rr2 + 1]].name);
321 static int find_first_atom_in_res(int rnr, int isize, const int index[], t_atom atom[])
326 while (i < isize && atom[index[i]].resind != rnr)
331 if (atom[index[i]].resind == rnr)
341 static void find_matching_names(int* isize1,
343 const t_atoms* atoms1,
346 const t_atoms* atoms2)
348 int i1, i2, ii1, ii2, m1, m2;
350 int rnr1, rnr2, prnr1, prnr2;
352 int * rindex1, *rindex2;
353 char *** atnms1, ***atnms2;
354 t_resinfo *resinfo1, *resinfo2;
356 /* set some handy shortcuts */
357 resinfo1 = atoms1->resinfo;
358 atnms1 = atoms1->atomname;
359 resinfo2 = atoms2->resinfo;
360 atnms2 = atoms2->atomname;
362 /* build indexes of selected residues */
363 snew(rindex1, atoms1->nres);
364 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
365 snew(rindex2, atoms2->nres);
366 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
371 prnr1 = prnr2 = NOTSET;
374 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
376 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
379 rnr1 = atoms1->atom[index1[i1]].resind;
380 rnr2 = atoms2->atom[index2[i2]].resind;
381 if (rnr1 != prnr1 || rnr2 != prnr2)
385 fprintf(debug, "R: %s%d %s%d\n", *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
387 rescmp = std::strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
391 fprintf(debug, "comparing %d %d", i1, i2);
393 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
396 if (atcmp != 0) /* no match -> find match within residues */
398 m1 = find_res_end(i1, *isize1, index1, atoms1);
399 m2 = find_res_end(i2, *isize2, index2, atoms2);
402 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
404 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1, &i2, index2, m2, atnms2);
407 fprintf(debug, " -> %d %d %s-%s", i1, i2, *atnms1[index1[i1]], *atnms2[index2[i2]]);
410 if (atcmp != 0) /* still no match -> next residue(s) */
414 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1, &rnr2, rsize2, rindex2, resinfo2);
417 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
421 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
426 " -> %s%d-%s%d %s%d-%s%d",
427 *resinfo1[rnr1].name,
431 *resinfo2[rnr2].name,
436 m1 = find_res_end(i1, *isize1, index1, atoms1);
437 m2 = find_res_end(i2, *isize2, index2, atoms2);
440 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
442 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1, &i2, index2, m2, atnms2);
445 fprintf(debug, " -> %d %d %s-%s", i1, i2, *atnms1[index1[i1]], *atnms2[index2[i2]]);
450 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
452 if (atcmp == 0) /* if match -> store indices */
454 index1[ii1++] = index1[i1];
455 index2[ii2++] = index2[i2];
467 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
469 if (ii1 == i1 && ii2 == i2)
471 printf("All atoms in index groups 1 and 2 match\n");
475 if (i1 == i2 && ii1 == ii2)
477 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
483 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
487 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
496 int gmx_confrms(int argc, char* argv[])
498 const char* desc[] = {
499 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
500 "structures after least-squares fitting the second structure on the first one.",
501 "The two structures do NOT need to have the same number of atoms,",
502 "only the two index groups used for the fit need to be identical.",
503 "With [TT]-name[tt] only matching atom names from the selected groups",
504 "will be used for the fit and RMSD calculation. This can be useful ",
505 "when comparing mutants of a protein.",
507 "The superimposed structures are written to file. In a [REF].pdb[ref] file",
508 "the two structures will be written as separate models",
509 "(use [TT]rasmol -nmrpdb[tt]). Also in a [REF].pdb[ref] file, B-factors",
510 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
512 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE, bBfac = FALSE,
513 bFit = TRUE, bLabel = FALSE;
516 { "-one", FALSE, etBOOL, { &bOne }, "Only write the fitted structure to file" },
517 { "-mw", FALSE, etBOOL, { &bMW }, "Mass-weighted fitting and RMSD" },
518 { "-pbc", FALSE, etBOOL, { &bRmpbc }, "Try to make molecules whole again" },
523 "Do least squares superposition of the target structure to the reference" },
524 { "-name", FALSE, etBOOL, { &bName }, "Only compare matching atom names" },
529 "Added chain labels A for first and B for second structure" },
530 { "-bfac", FALSE, etBOOL, { &bBfac }, "Output B-factors from atomic MSD values" }
532 t_filenm fnm[] = { { efTPS, "-f1", "conf1.gro", ffREAD }, { efSTX, "-f2", "conf2", ffREAD },
533 { efSTO, "-o", "fit.pdb", ffWRITE }, { efNDX, "-n1", "fit1", ffOPTRD },
534 { efNDX, "-n2", "fit2", ffOPTRD }, { efNDX, "-no", "match", ffOPTWR } };
535 #define NFILE asize(fnm)
537 /* the two structure files */
538 const char *conf1file, *conf2file, *matchndxfile, *outfile;
540 char * name1, *name2;
541 t_topology *top1, *top2;
542 PbcType pbcType1, pbcType2;
543 t_atoms * atoms1, *atoms2;
546 real * w_rls, mass, totmass;
547 rvec * x1, *v1, *x2, *v2, *fit_x;
550 gmx_output_env_t* oenv;
555 /* center of mass calculation */
558 /* variables for fit */
559 char *groupnames1, *groupnames2;
561 int * index1, *index2;
562 real rms, msd, minmsd, maxmsd;
566 if (!parse_common_args(
567 &argc, argv, PCA_CAN_VIEW, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
571 matchndxfile = opt2fn_null("-no", NFILE, fnm);
572 conf1file = ftp2fn(efTPS, NFILE, fnm);
573 conf2file = ftp2fn(efSTX, NFILE, fnm);
575 /* reading reference structure from first structure file */
576 fprintf(stderr, "\nReading first structure file\n");
578 read_tps_conf(conf1file, top1, &pbcType1, &x1, &v1, box1, TRUE);
579 atoms1 = &(top1->atoms);
580 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n", *top1->name, atoms1->nr, atoms1->nres);
584 rm_gropbc(atoms1, x1, box1);
587 fprintf(stderr, "Select group from first structure\n");
588 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm), 1, &isize1, &index1, &groupnames1);
591 if (bFit && (isize1 < 3))
593 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
596 /* reading second structure file */
597 fprintf(stderr, "\nReading second structure file\n");
599 read_tps_conf(conf2file, top2, &pbcType2, &x2, &v2, box2, TRUE);
600 atoms2 = &(top2->atoms);
601 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n", *top2->name, atoms2->nr, atoms2->nres);
605 rm_gropbc(atoms2, x2, box2);
608 fprintf(stderr, "Select group from second structure\n");
609 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm), 1, &isize2, &index2, &groupnames2);
613 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
616 fp = gmx_ffopen(matchndxfile, "w");
618 "; Matching atoms between %s from %s and %s from %s\n",
623 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
624 for (i = 0; i < isize1; i++)
626 fprintf(fp, "%4d%s", index1[i] + 1, (i % 15 == 14 || i == isize1 - 1) ? "\n" : " ");
628 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
629 for (i = 0; i < isize2; i++)
631 fprintf(fp, "%4d%s", index2[i] + 1, (i % 15 == 14 || i == isize2 - 1) ? "\n" : " ");
636 /* check isizes, must be equal */
637 if (isize2 != isize1)
639 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
642 for (i = 0; i < isize1; i++)
644 name1 = *atoms1->atomname[index1[i]];
645 name2 = *atoms2->atomname[index2[i]];
646 if (std::strcmp(name1, name2) != 0)
651 "Warning: atomnames at index %d don't match: %d %s, %d %s\n",
662 atoms1->atom[index1[i]].m = 1;
663 atoms2->atom[index2[i]].m = 1;
668 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
673 /* calculate and remove center of mass of structures */
674 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
675 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
677 snew(w_rls, atoms2->nr);
678 snew(fit_x, atoms2->nr);
679 for (at = 0; (at < isize1); at++)
681 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
682 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
685 /* do the least squares fit to the reference structure */
686 do_fit(atoms2->nr, w_rls, fit_x, x2);
699 /* calculate the rms deviation */
705 for (at = 0; at < isize1; at++)
707 mass = atoms1->atom[index1[at]].m;
708 for (m = 0; m < DIM; m++)
710 msd = gmx::square(x1[index1[at]][m] - x2[index2[at]][m]);
714 maxmsd = std::max(maxmsd, msds[at]);
715 minmsd = std::min(minmsd, msds[at]);
718 rms = std::sqrt(rms / totmass);
720 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
723 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
728 /* reset coordinates of reference and fitted structure */
729 for (i = 0; i < atoms1->nr; i++)
731 for (m = 0; m < DIM; m++)
736 for (i = 0; i < atoms2->nr; i++)
738 for (m = 0; m < DIM; m++)
745 outfile = ftp2fn(efSTO, NFILE, fnm);
746 switch (fn2ftp(outfile))
753 srenew(atoms1->pdbinfo, atoms1->nr);
754 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
756 atoms1->havePdbInfo = TRUE;
758 /* Avoid segfaults when writing the pdb-file */
759 for (i = 0; i < atoms1->nr; i++)
761 atoms1->pdbinfo[i].type = eptAtom;
762 atoms1->pdbinfo[i].occup = 1.00;
763 atoms1->pdbinfo[i].bAnisotropic = FALSE;
766 atoms1->pdbinfo[i].bfac = 0;
770 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
774 for (i = 0; i < isize1; i++)
776 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
777 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
780 atoms1->pdbinfo[index1[i]].bfac = (800 * M_PI * M_PI / 3.0) * msds[i];
783 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
785 srenew(atoms2->pdbinfo, atoms2->nr);
786 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
788 for (i = 0; i < atoms2->nr; i++)
790 atoms2->pdbinfo[i].type = eptAtom;
791 atoms2->pdbinfo[i].occup = 1.00;
792 atoms2->pdbinfo[i].bAnisotropic = FALSE;
795 atoms2->pdbinfo[i].bfac = 0;
799 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
803 for (i = 0; i < isize2; i++)
805 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
806 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
809 atoms2->pdbinfo[index2[i]].bfac = (800 * M_PI * M_PI / 3.0) * msds[i];
812 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
815 fp = gmx_ffopen(outfile, "w");
818 write_pdbfile(fp, *top1->name, atoms1, x1, pbcType1, box1, ' ', 1, nullptr);
820 write_pdbfile(fp, *top2->name, atoms2, x2, pbcType2, box2, ' ', bOne ? -1 : 2, nullptr);
826 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
828 fp = gmx_ffopen(outfile, "w");
831 write_hconf_p(fp, *top1->name, atoms1, x1, v1, box1);
833 write_hconf_p(fp, *top2->name, atoms2, x2, v2, box2);
840 "WARNING: cannot write B-factor values to %s file\n",
841 ftp2ext(fn2ftp(outfile)));
846 "WARNING: cannot write the reference structure to %s file\n",
847 ftp2ext(fn2ftp(outfile)));
849 write_sto_conf(outfile, *top2->name, atoms2, x2, v2, pbcType2, box2);
853 view_all(oenv, NFILE, fnm);