2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/commandline/filenm.h"
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/commandline/viewit.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/groio.h"
50 #include "gromacs/fileio/pdbio.h"
51 #include "gromacs/gmxana/gmx_ana.h"
52 #include "gromacs/math/do_fit.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/pbcutil/rmpbc.h"
57 #include "gromacs/topology/index.h"
58 #include "gromacs/topology/topology.h"
59 #include "gromacs/utility/arraysize.h"
60 #include "gromacs/utility/cstringutil.h"
61 #include "gromacs/utility/fatalerror.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
65 static const int NOTSET = -9368163;
67 static void calc_rm_cm(int isize, const int index[], const t_atoms* atoms, rvec x[], rvec xcm)
72 /* calculate and remove center of mass of reference structure */
75 for (i = 0; i < isize; i++)
77 m = atoms->atom[index[i]].m;
78 for (d = 0; d < DIM; d++)
80 xcm[d] += m * x[index[i]][d];
84 svmul(1 / tm, xcm, xcm);
85 for (i = 0; i < atoms->nr; i++)
91 static int build_res_index(int isize, const int index[], t_atom atom[], int rindex[])
96 rindex[r] = atom[index[0]].resind;
98 for (i = 1; i < isize; i++)
100 if (atom[index[i]].resind != rindex[r - 1])
102 rindex[r] = atom[index[i]].resind;
110 static int find_res_end(int i, int isize, const int index[], const t_atoms* atoms)
114 rnr = atoms->atom[index[i]].resind;
115 while (i < isize && atoms->atom[index[i]].resind == rnr)
122 static int debug_strcmp(char s1[], char s2[])
126 fprintf(debug, " %s-%s", s1, s2);
128 return std::strcmp(s1, s2);
131 static int find_next_match_atoms_in_res(int* i1,
140 int dx, dy, dmax, cmp;
141 gmx_bool bFW = FALSE;
144 dmax = std::max(m1 - *i1, m2 - *i2);
145 for (dx = 0, dy = 0; dx < dmax && cmp != 0; dx++)
147 for (dy = dx; dy < dmax && cmp != 0; dy++)
156 if (*i1 + dx < m1 && *i2 + dy < m2)
159 cmp = debug_strcmp(*atnms1[index1[*i1 + dx]], *atnms2[index2[*i2 + dy]]);
162 fprintf(debug, "(%d %d)", *i1 + dx, *i2 + dy);
165 if (cmp != 0 && *i1 + dy < m1 && *i2 + dx < m2)
168 cmp = debug_strcmp(*atnms1[index1[*i1 + dy]], *atnms2[index2[*i2 + dx]]);
171 fprintf(debug, "(%d %d)", *i1 + dy, *i2 + dx);
177 /* apparently, dx and dy are incremented one more time
178 as the loops terminate: we correct this here */
185 fprintf(debug, "{%d %d}", *i1 + (bFW ? dx : dy), *i2 + (bFW ? dy : dx));
202 static int find_next_match_res(int* rnr1,
211 int dmax, cmp, rr1, rr2;
212 gmx_bool bFW = FALSE, bFF = FALSE;
215 while (rr1 < isize1 && *rnr1 != index1[rr1])
220 while (rr2 < isize2 && *rnr2 != index2[rr2])
226 dmax = std::max(isize1 - rr1, isize2 - rr2);
229 fprintf(debug, " R:%d-%d:%d-%d:%d ", rr1, isize1, rr2, isize2, dmax);
232 for (; dx < dmax && cmp != 0; dx++)
234 for (dy = 0; dy <= dx && cmp != 0; dy++)
239 if (rr1 + dx < isize1 && rr2 + dy < isize2)
242 cmp = debug_strcmp(*resinfo1[index1[rr1 + dx]].name, *resinfo2[index2[rr2 + dy]].name);
245 fprintf(debug, "(%d %d)", rr1 + dx, rr2 + dy);
248 if (cmp != 0 && rr1 + dy < isize1 && rr2 + dx < isize2)
251 cmp = debug_strcmp(*resinfo1[index1[rr1 + dy]].name, *resinfo2[index2[rr2 + dx]].name);
254 fprintf(debug, "(%d %d)", rr1 + dy, rr2 + dx);
257 if (dx != 0 && cmp != 0 && rr1 + dx < isize1 && rr2 + dx < isize2)
260 cmp = debug_strcmp(*resinfo1[index1[rr1 + dx]].name, *resinfo2[index2[rr2 + dx]].name);
263 fprintf(debug, "(%d %d)", rr1 + dx, rr2 + dx);
272 /* apparently, dx and dy are incremented one more time
273 as the loops terminate: we correct this here */
277 /* if we skipped equal on both sides, only skip one residue
278 to allow for single mutations of residues... */
283 fprintf(debug, "%d.%d.%dX%sX%s", dx, rr1, rr2, *resinfo1[index1[rr1 + 1]].name,
284 *resinfo2[index2[rr2 + 1]].name);
316 static int find_first_atom_in_res(int rnr, int isize, const int index[], t_atom atom[])
321 while (i < isize && atom[index[i]].resind != rnr)
326 if (atom[index[i]].resind == rnr)
336 static void find_matching_names(int* isize1,
338 const t_atoms* atoms1,
341 const t_atoms* atoms2)
343 int i1, i2, ii1, ii2, m1, m2;
345 int rnr1, rnr2, prnr1, prnr2;
347 int * rindex1, *rindex2;
348 char *** atnms1, ***atnms2;
349 t_resinfo *resinfo1, *resinfo2;
351 /* set some handy shortcuts */
352 resinfo1 = atoms1->resinfo;
353 atnms1 = atoms1->atomname;
354 resinfo2 = atoms2->resinfo;
355 atnms2 = atoms2->atomname;
357 /* build indexes of selected residues */
358 snew(rindex1, atoms1->nres);
359 rsize1 = build_res_index(*isize1, index1, atoms1->atom, rindex1);
360 snew(rindex2, atoms2->nres);
361 rsize2 = build_res_index(*isize2, index2, atoms2->atom, rindex2);
366 prnr1 = prnr2 = NOTSET;
369 fprintf(debug, "Find matching names: %d, %d\n", *isize1, *isize2);
371 while (atcmp == 0 && i1 < *isize1 && i2 < *isize2)
374 rnr1 = atoms1->atom[index1[i1]].resind;
375 rnr2 = atoms2->atom[index2[i2]].resind;
376 if (rnr1 != prnr1 || rnr2 != prnr2)
380 fprintf(debug, "R: %s%d %s%d\n", *resinfo1[rnr1].name, rnr1, *resinfo2[rnr2].name, rnr2);
382 rescmp = std::strcmp(*resinfo1[rnr1].name, *resinfo2[rnr2].name);
386 fprintf(debug, "comparing %d %d", i1, i2);
388 atcmp = debug_strcmp(*atnms1[index1[i1]], *atnms2[index2[i2]]);
391 if (atcmp != 0) /* no match -> find match within residues */
393 m1 = find_res_end(i1, *isize1, index1, atoms1);
394 m2 = find_res_end(i2, *isize2, index2, atoms2);
397 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
399 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1, &i2, index2, m2, atnms2);
402 fprintf(debug, " -> %d %d %s-%s", i1, i2, *atnms1[index1[i1]], *atnms2[index2[i2]]);
405 if (atcmp != 0) /* still no match -> next residue(s) */
409 rescmp = find_next_match_res(&rnr1, rsize1, rindex1, resinfo1, &rnr2, rsize2, rindex2, resinfo2);
412 i1 = find_first_atom_in_res(rnr1, *isize1, index1, atoms1->atom);
416 i2 = find_first_atom_in_res(rnr2, *isize2, index2, atoms2->atom);
420 fprintf(debug, " -> %s%d-%s%d %s%d-%s%d", *resinfo1[rnr1].name, rnr1, *atnms1[index1[i1]],
421 index1[i1], *resinfo2[rnr2].name, rnr2, *atnms2[index2[i2]], index2[i2]);
423 m1 = find_res_end(i1, *isize1, index1, atoms1);
424 m2 = find_res_end(i2, *isize2, index2, atoms2);
427 fprintf(debug, " [%d<%d %d<%d]", i1, m1, i2, m2);
429 atcmp = find_next_match_atoms_in_res(&i1, index1, m1, atnms1, &i2, index2, m2, atnms2);
432 fprintf(debug, " -> %d %d %s-%s", i1, i2, *atnms1[index1[i1]], *atnms2[index2[i2]]);
437 fprintf(debug, "(%d %d): %d %d\n", ii1, ii2, atcmp, rescmp);
439 if (atcmp == 0) /* if match -> store indices */
441 index1[ii1++] = index1[i1];
442 index2[ii2++] = index2[i2];
454 gmx_fatal(FARGS, "DEATH HORROR: non-equal number of matching atoms!\n");
456 if (ii1 == i1 && ii2 == i2)
458 printf("All atoms in index groups 1 and 2 match\n");
462 if (i1 == i2 && ii1 == ii2)
464 printf("Both index groups modified from %d to %d atoms\n", i1, ii1);
470 printf("Index group 1 modified from %d to %d atoms\n", i1, ii1);
474 printf("Index group 2 modified from %d to %d atoms\n", i2, ii2);
483 int gmx_confrms(int argc, char* argv[])
485 const char* desc[] = {
486 "[THISMODULE] computes the root mean square deviation (RMSD) of two",
487 "structures after least-squares fitting the second structure on the first one.",
488 "The two structures do NOT need to have the same number of atoms,",
489 "only the two index groups used for the fit need to be identical.",
490 "With [TT]-name[tt] only matching atom names from the selected groups",
491 "will be used for the fit and RMSD calculation. This can be useful ",
492 "when comparing mutants of a protein.",
494 "The superimposed structures are written to file. In a [REF].pdb[ref] file",
495 "the two structures will be written as separate models",
496 "(use [TT]rasmol -nmrpdb[tt]). Also in a [REF].pdb[ref] file, B-factors",
497 "calculated from the atomic MSD values can be written with [TT]-bfac[tt].",
499 static gmx_bool bOne = FALSE, bRmpbc = FALSE, bMW = TRUE, bName = FALSE, bBfac = FALSE,
500 bFit = TRUE, bLabel = FALSE;
503 { "-one", FALSE, etBOOL, { &bOne }, "Only write the fitted structure to file" },
504 { "-mw", FALSE, etBOOL, { &bMW }, "Mass-weighted fitting and RMSD" },
505 { "-pbc", FALSE, etBOOL, { &bRmpbc }, "Try to make molecules whole again" },
510 "Do least squares superposition of the target structure to the reference" },
511 { "-name", FALSE, etBOOL, { &bName }, "Only compare matching atom names" },
516 "Added chain labels A for first and B for second structure" },
517 { "-bfac", FALSE, etBOOL, { &bBfac }, "Output B-factors from atomic MSD values" }
519 t_filenm fnm[] = { { efTPS, "-f1", "conf1.gro", ffREAD }, { efSTX, "-f2", "conf2", ffREAD },
520 { efSTO, "-o", "fit.pdb", ffWRITE }, { efNDX, "-n1", "fit1", ffOPTRD },
521 { efNDX, "-n2", "fit2", ffOPTRD }, { efNDX, "-no", "match", ffOPTWR } };
522 #define NFILE asize(fnm)
524 /* the two structure files */
525 const char *conf1file, *conf2file, *matchndxfile, *outfile;
527 char * name1, *name2;
528 t_topology *top1, *top2;
529 PbcType pbcType1, pbcType2;
530 t_atoms * atoms1, *atoms2;
533 real * w_rls, mass, totmass;
534 rvec * x1, *v1, *x2, *v2, *fit_x;
537 gmx_output_env_t* oenv;
542 /* center of mass calculation */
545 /* variables for fit */
546 char *groupnames1, *groupnames2;
548 int * index1, *index2;
549 real rms, msd, minmsd, maxmsd;
553 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW, NFILE, fnm, asize(pa), pa, asize(desc), desc,
558 matchndxfile = opt2fn_null("-no", NFILE, fnm);
559 conf1file = ftp2fn(efTPS, NFILE, fnm);
560 conf2file = ftp2fn(efSTX, NFILE, fnm);
562 /* reading reference structure from first structure file */
563 fprintf(stderr, "\nReading first structure file\n");
565 read_tps_conf(conf1file, top1, &pbcType1, &x1, &v1, box1, TRUE);
566 atoms1 = &(top1->atoms);
567 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n", *top1->name, atoms1->nr, atoms1->nres);
571 rm_gropbc(atoms1, x1, box1);
574 fprintf(stderr, "Select group from first structure\n");
575 get_index(atoms1, opt2fn_null("-n1", NFILE, fnm), 1, &isize1, &index1, &groupnames1);
578 if (bFit && (isize1 < 3))
580 gmx_fatal(FARGS, "Need >= 3 points to fit!\n");
583 /* reading second structure file */
584 fprintf(stderr, "\nReading second structure file\n");
586 read_tps_conf(conf2file, top2, &pbcType2, &x2, &v2, box2, TRUE);
587 atoms2 = &(top2->atoms);
588 fprintf(stderr, "%s\nContaining %d atoms in %d residues\n", *top2->name, atoms2->nr, atoms2->nres);
592 rm_gropbc(atoms2, x2, box2);
595 fprintf(stderr, "Select group from second structure\n");
596 get_index(atoms2, opt2fn_null("-n2", NFILE, fnm), 1, &isize2, &index2, &groupnames2);
600 find_matching_names(&isize1, index1, atoms1, &isize2, index2, atoms2);
603 fp = gmx_ffopen(matchndxfile, "w");
604 fprintf(fp, "; Matching atoms between %s from %s and %s from %s\n", groupnames1,
605 conf1file, groupnames2, conf2file);
606 fprintf(fp, "[ Match_%s_%s ]\n", conf1file, groupnames1);
607 for (i = 0; i < isize1; i++)
609 fprintf(fp, "%4d%s", index1[i] + 1, (i % 15 == 14 || i == isize1 - 1) ? "\n" : " ");
611 fprintf(fp, "[ Match_%s_%s ]\n", conf2file, groupnames2);
612 for (i = 0; i < isize2; i++)
614 fprintf(fp, "%4d%s", index2[i] + 1, (i % 15 == 14 || i == isize2 - 1) ? "\n" : " ");
619 /* check isizes, must be equal */
620 if (isize2 != isize1)
622 gmx_fatal(FARGS, "You selected groups with differen number of atoms.\n");
625 for (i = 0; i < isize1; i++)
627 name1 = *atoms1->atomname[index1[i]];
628 name2 = *atoms2->atomname[index2[i]];
629 if (std::strcmp(name1, name2) != 0)
633 fprintf(stderr, "Warning: atomnames at index %d don't match: %d %s, %d %s\n", i + 1,
634 index1[i] + 1, name1, index2[i] + 1, name2);
640 atoms1->atom[index1[i]].m = 1;
641 atoms2->atom[index2[i]].m = 1;
646 fprintf(stderr, "%d atomname%s did not match\n", warn, (warn == 1) ? "" : "s");
651 /* calculate and remove center of mass of structures */
652 calc_rm_cm(isize1, index1, atoms1, x1, xcm1);
653 calc_rm_cm(isize2, index2, atoms2, x2, xcm2);
655 snew(w_rls, atoms2->nr);
656 snew(fit_x, atoms2->nr);
657 for (at = 0; (at < isize1); at++)
659 w_rls[index2[at]] = atoms1->atom[index1[at]].m;
660 copy_rvec(x1[index1[at]], fit_x[index2[at]]);
663 /* do the least squares fit to the reference structure */
664 do_fit(atoms2->nr, w_rls, fit_x, x2);
677 /* calculate the rms deviation */
683 for (at = 0; at < isize1; at++)
685 mass = atoms1->atom[index1[at]].m;
686 for (m = 0; m < DIM; m++)
688 msd = gmx::square(x1[index1[at]][m] - x2[index2[at]][m]);
692 maxmsd = std::max(maxmsd, msds[at]);
693 minmsd = std::min(minmsd, msds[at]);
696 rms = std::sqrt(rms / totmass);
698 printf("Root mean square deviation after lsq fit = %g nm\n", rms);
701 printf("Atomic MSD's range from %g to %g nm^2\n", minmsd, maxmsd);
706 /* reset coordinates of reference and fitted structure */
707 for (i = 0; i < atoms1->nr; i++)
709 for (m = 0; m < DIM; m++)
714 for (i = 0; i < atoms2->nr; i++)
716 for (m = 0; m < DIM; m++)
723 outfile = ftp2fn(efSTO, NFILE, fnm);
724 switch (fn2ftp(outfile))
731 srenew(atoms1->pdbinfo, atoms1->nr);
732 srenew(atoms1->atom, atoms1->nr); /* Why renew atom? */
734 atoms1->havePdbInfo = TRUE;
736 /* Avoid segfaults when writing the pdb-file */
737 for (i = 0; i < atoms1->nr; i++)
739 atoms1->pdbinfo[i].type = eptAtom;
740 atoms1->pdbinfo[i].occup = 1.00;
741 atoms1->pdbinfo[i].bAnisotropic = FALSE;
744 atoms1->pdbinfo[i].bfac = 0;
748 atoms1->resinfo[atoms1->atom[i].resind].chainid = 'A';
752 for (i = 0; i < isize1; i++)
754 /* atoms1->pdbinfo[index1[i]].type = eptAtom; */
755 /* atoms1->pdbinfo[index1[i]].bAnisotropic = FALSE; */
758 atoms1->pdbinfo[index1[i]].bfac = (800 * M_PI * M_PI / 3.0) * msds[i];
761 /* atoms1->resinfo[atoms1->atom[index1[i]].resind].chain = 'A'; */
763 srenew(atoms2->pdbinfo, atoms2->nr);
764 srenew(atoms2->atom, atoms2->nr); /* Why renew atom? */
766 for (i = 0; i < atoms2->nr; i++)
768 atoms2->pdbinfo[i].type = eptAtom;
769 atoms2->pdbinfo[i].occup = 1.00;
770 atoms2->pdbinfo[i].bAnisotropic = FALSE;
773 atoms2->pdbinfo[i].bfac = 0;
777 atoms2->resinfo[atoms1->atom[i].resind].chainid = 'B';
781 for (i = 0; i < isize2; i++)
783 /* atoms2->pdbinfo[index2[i]].type = eptAtom; */
784 /* atoms2->pdbinfo[index2[i]].bAnisotropic = FALSE; */
787 atoms2->pdbinfo[index2[i]].bfac = (800 * M_PI * M_PI / 3.0) * msds[i];
790 /* atoms2->resinfo[atoms2->atom[index2[i]].resind].chain = 'B'; */
793 fp = gmx_ffopen(outfile, "w");
796 write_pdbfile(fp, *top1->name, atoms1, x1, pbcType1, box1, ' ', 1, nullptr);
798 write_pdbfile(fp, *top2->name, atoms2, x2, pbcType2, box2, ' ', bOne ? -1 : 2, nullptr);
804 fprintf(stderr, "WARNING: cannot write B-factor values to gro file\n");
806 fp = gmx_ffopen(outfile, "w");
809 write_hconf_p(fp, *top1->name, atoms1, x1, v1, box1);
811 write_hconf_p(fp, *top2->name, atoms2, x2, v2, box2);
817 fprintf(stderr, "WARNING: cannot write B-factor values to %s file\n",
818 ftp2ext(fn2ftp(outfile)));
822 fprintf(stderr, "WARNING: cannot write the reference structure to %s file\n",
823 ftp2ext(fn2ftp(outfile)));
825 write_sto_conf(outfile, *top2->name, atoms2, x2, v2, pbcType2, box2);
829 view_all(oenv, NFILE, fnm);